Apache-Tika-Async
view release on metacpan or search on metacpan
lib/Apache/Tika/Server.pm view on Meta::CPAN
'-jar',
$self->jarfile,
#'--port', $self->port,
'--config', $self->tika_config_temp_file,
@{$self->tika_args},
};
sub spawn_child_win32( $self, @cmd ) {
system(1, @cmd)
}
sub spawn_child_posix( $self, @cmd ) {
require POSIX;
POSIX->import("setsid");
# daemonize
defined(my $pid = fork()) || die "can't fork: $!";
if( $pid ) { # non-zero now means I am the parent
return $pid;
};
# We are the child, close about everything, then exec
chdir("/") || die "can't chdir to /: $!";
(setsid() != -1) || die "Can't start a new session: $!";
open(STDERR, ">&STDOUT") || die "can't dup stdout: $!";
open(STDIN, "< /dev/null") || die "can't read /dev/null: $!";
open(STDOUT, "> /dev/null") || die "can't write to /dev/null: $!";
exec @cmd;
exit 1;
}
sub spawn_child( $self, @cmd ) {
my ($pid);
if( $^O =~ /mswin/i ) {
$pid = $self->spawn_child_win32(@cmd)
} else {
$pid = $self->spawn_child_posix(@cmd)
};
return $pid
}
sub launch( $self ) {
if( !$self->pid ) {
my $cmdline= join " ", $self->cmdline; # well, for Windows...
#warn $cmdline;
my $pid= $self->spawn_child( $self->cmdline )
or croak "Couldn't launch [$cmdline]: $!/$^E";
$self->pid( $pid );
sleep 2; # Java...
};
}
sub url {
# Should return URI instead
my( $self, $type )= @_;
$type||= 'text';
my $url= {
text => 'rmeta',
test => 'tika', # but GET instead of PUT
meta => 'rmeta',
#all => 'all',
language => 'language/string',
all => 'rmeta',
# unpack
}->{ $type };
sprintf
'http://%s:%s/%s',
$self->host,
$self->port,
$url
};
# /rmeta
# /unpacker
# /all
# /tika
# /language
# hello world
sub fetch {
my( $self, %options )= @_;
$options{ type }||= 'text';
my $url= $self->url( $options{ type } );
if(! $options{ content } and $options{ filename }) {
# read $options{ filename }
open my $fh, '<', $options{ filename }
or croak "Couldn't read '$options{ filename }': $!";
binmode $fh;
local $/;
$options{ content } = <$fh>;
};
my $method;
if( 'test' eq $options{ type } ) {
$method= 'get';
} else {
$method= 'put';
;
};
my $headers = $options{ headers } || {};
#my ($code,$res) = await
# $self->ua->request( $method, $url, $options{ content }, %$headers );
return $self->ua->request( $method, $url, $options{ content }, %$headers )
->then(sub( $code, $res ) {
my $info;
if( 'all' eq $options{ type }
or 'text' eq $options{ type }
or 'meta' eq $options{ type } ) {
if( $code !~ /^2..$/ ) {
croak "Got HTTP error code $code for '$options{ filename }'";
};
my $item = $res->[0];
# Should/could this be lazy?
my $c = delete $item->{'X-TIKA:content'};
# Ghetto-strip HTML we don't want:
( run in 0.811 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )