Alt-CWB-ambs
view release on metacpan or search on metacpan
lib/CWB/CQP.pm view on Meta::CPAN
=item I<$cqp> = B<new> CWB::CQP;
=item I<$cqp> = B<new> CWB::CQP '-r /corpora/registry', '-l /data/cqpresults';
Spawn new CQP background process. The object I<$cqp> can then be used to communicate with
this CQP instance. Optional arguments of the B<new> method are passed as command-line
options to CQP. Use at your own risk.
=cut
## CWB::CQP object constructor
sub new {
my $class = shift; # class name
my $self = {}; # namespace for new CQP class object
my @options = @_; # CQP command-line options (use at your own risk)
# split options with values, e.g. "-r /my/registry" => "-r", "/my/registry" (doesn't work for multiple options in one string)
@options = map { (/^(--?[A-Za-z0-9]+)\s+(.+)$/) ? ($1, $2) : $_ } @options;
## run CQP server in the background
my $in = $self->{'in'} = new FileHandle; # stdin of CQP
my $out = $self->{'out'} = new FileHandle; # stdout of CQP
my $err = $self->{'err'} = new FileHandle; # stderr of CQP
my $pid = open3($in, $out, $err, $CWB::CQP, @CQP_options, @options);
$self->{'pid'} = $pid; # child process ID (so process can be killed if necessary)
$in->autoflush(1); # make sure that commands sent to CQP are always flushed immediately
my ($need_major, $need_minor, $need_beta) = split /\./, $CQP_version; # required CQP version
$need_beta = 0 unless $need_beta;
my $version_string = $out->getline; # child mode (-c) should print version on startup
chomp $version_string;
croak "ERROR: CQP backend startup failed ('$CWB::CQP @CQP_options @options')\n"
unless $version_string =~ /^CQP\s+(?:\w+\s+)*([0-9]+)\.([0-9]+)(?:\.b?([0-9]+))?(?:\s+(.*))?$/;
$self->{'major_version'} = $1;
$self->{'minor_version'} = $2;
$self->{'beta_version'} = $3 || 0;
$self->{'compile_date'} = $4 || "unknown";
croak "ERROR: CQP version too old, need at least v$CQP_version ($version_string)\n"
unless ($1 > $need_major or
$1 == $need_major
and ($2 > $need_minor or
($2 == $need_minor and $3 >= $need_beta)));
## command execution
$self->{'command'} = undef; # CQP command string that is currently being processed (undef = last command has been completed)
$self->{'lines'} = []; # array of output lines read from CQP process
$self->{'buffer'} = ""; # read buffer for standard output from CQP process
$self->{'block_size'} = 256; # block size for reading from CQP's output and error streams
$self->{'query_lock'} = undef;# holds random key while query lock mode is active
## error handling (messages on stderr)
$self->{'error_handler'} = undef; # set to subref for user-defined error handler
$self->{'status'} = 'ok'; # status of last executed command ('ok' or 'error')
$self->{'error_message'} = []; # arrayref to array containing message produced by last command (if any)
## handling of CQP progress messages
$self->{'progress'} = 0; # whether progress messages are activated
$self->{'progress_handler'} = undef; # optional callback for progress messages
$self->{'progress_info'} = []; # contains last available progress information: [$total_percent, $pass, $n_passes, $message, $percent]
## debugging (prints more or less everything on stdout)
$self->{'debug'} = 0;
## select vectors for CQP output (stdout, stderr, stdout|stderr)
$self->{'select_err'} = new IO::Select($err);
$self->{'select_out'} = new IO::Select($out);
$self->{'select_any'} = new IO::Select($err, $out);
## CQP object setup complete
bless($self, $class);
## the following command will collect and ignore any output which may have been produced during startup
$self->exec("set PrettyPrint off"); # pretty-printing should be turned off for non-interactive use
return $self;
}
=item B<undef> I<$cqp>;
Exit CQP background process gracefully by issuing an C<exit;> command.
This is done automatically when the variable I<$cqp> goes out of scope.
Note that there may be a slight delay while B<CWB::CQP> waits for the CQP
process to terminate.
=cut
sub DESTROY {
my $self = shift;
if ($self->{'command'}) {
while ($self->_update) {} # read pending output from active command
}
my $out = $self->{'out'};
if (defined $out) {
$out->print("exit"); # exit CQP backend
$out->close;
}
my $in = $self->{'in'};
if (defined $in) {
$in->close;
}
my $pid = $self->{'pid'};
waitpid $pid, 0; # wait for CQP to exit and reap background process
## **TODO** -- this may hang in some cases; is there a safe workaround?
}
=item I<$ok> = I<$cqp>->B<check_version>(I<$major>, I<$minor>, I<$beta>);
Check for minimum required CQP version, i.e. the background process has
to be CQP version I<$major>.I<$minor>.I<$beta> or newer.
I<$minor> and I<$beta> may be omitted, in which case they default to 0.
Note that the B<CWB::CQP> module automatically checks whether the CQP version
is compatible with its own requirements when a new object is created.
The B<check_version> method can subsequently be used to check for a more
recent release that provides functionality needed by the Perl script.
=cut
sub check_version {
my $self = shift;
my ($major, $minor, $beta) = @_;
$minor = 0 unless defined $minor;
lib/CWB/CQP.pm view on Meta::CPAN
sysread $self->{'out'}, $self->{'buffer'}, $self->{'block_size'}, length($self->{'buffer'}); # append to object's input buffer
if ($self->{'buffer'} =~ /\n/) {
## if there's a complete line in the input buffer, split off all lines
my @new_lines = split /\n/, $self->{'buffer'}, -1; # make sure that last line is handled correctly if buffer ends in \n
$self->{'buffer'} = pop @new_lines; # last entry is incomplete line ("" if buffer ended in \n) => return to input buffer
foreach my $line (@new_lines) {
## skip blank line printed after each CQP command
next if $line eq "";
## handle progress messages if ProgressBar has been activated
if ($self->{'progress'} and $line =~ /^-::-PROGRESS-::-/) {
my ($pass, $n_passes, $message);
(undef, $pass, $n_passes, $message) = split /\t/, $line;
my $percent = ($message =~ /([0-9]+)\%\s*complete/) ? $1+0 : undef; # extract progress percentage, if present
my $total_percent = (100 * ($pass - 1) + ($percent || 0)) / $n_passes; # estimate total progress ($percent assumed to be 0% if not given)
$self->{'progress_info'} = [$total_percent, $pass, $n_passes, $message, $percent];
my $handler = $self->{'progress_handler'};
if (ref($handler) eq 'CODE') {
$handler->($total_percent, $pass, $n_passes, $message, $percent); # call user-defined progress handler
}
}
## regular output lines are collected in object's line buffer
else {
push @{$self->{'lines'}}, $line;
$lines++;
}
}
}
last if $lines > 0; # if we have read a line and there is no output on stderr, return from function
}
## ERROR -- we should never reach this point
else {
die "CWB::CQP: INTERNAL ERROR in _update() -- no data on stdout or stderr of CQP child process";
}
}
if ($stderr_buffer ne "") {
$self->{'status'} = 'error'; # any output on stderr indicates that something went wrong
push @{$self->{'error_message'}}, split /\n/, $stderr_buffer; # append output on stderr to error message
$self->error(@{$self->{'error_message'}}); # may call error handler and abort, or print message and continue
}
return $lines;
}
=item I<$cqp>->B<run>(I<$cmd>);
Start a single CQP command I<$cmd> in the background. This method returns immediately.
Command output can then be read with the B<getline>, B<getlines> and B<getrow> methods.
If asynchronous communication is desired, use B<ready> to check whether output is available.
It is an error to B<run> a new command before the output of the previous command has completely
been processed.
=cut
sub run {
croak 'USAGE: $cqp->run($cmd]);'
unless @_ == 2;
my $self = shift;
my $cmd = shift;
my $debug = $self->{'debug'};
$cmd =~ s/\n+/ /g; # make sure there are no newline characters (to be on the safe side)
$cmd =~ s/(;\s*)+$//; # ";" will be added when $cmd is sent to CQP
my $active_cmd = $self->{'command'};
croak "Error: new CQP command issued while '$active_cmd' is still being processed"
if $active_cmd;
$self->{'command'} = "$cmd;";
$self->{'status'} = 'ok';
$self->{'buffer'} = "";
$self->{'lines'} = [];
$self->{'error_message'} = [];
print "CQP << $cmd;\n"
if $debug;
$self->{'in'}->print("$cmd;\n .EOL.;\n"); # append .EOL. command to mark end of CQP output
}
=item I<$num_of_lines> = I<$cqp>->B<ready>;
=item I<$num_of_lines> = I<$cqp>->B<ready>(I<$timeout>);
Check if output from current CQP command is available for reading with B<getline> etc.,
returning the number of lines currently held in the input buffer (possibly including an
end-of-output marker line that will not be returned by B<getline> etc.). If there is no
active command, returns B<undef>.
The first form of the command returns immediately. The second form waits up to I<$timeout>
seconds for CQP output to become available. Use a negative I<$timeout> for blocking mode.
=cut
sub ready {
my $self = shift;
my $timeout = shift;
my $lines = @{$self->{'lines'}};
return $lines # output has already been buffered => ready to read
if $lines > 0;
return undef # no command active => undefined state
unless $self->{'command'};
return $self->_update($timeout); # try to read from CQP process & return number of lines available (NB: line buffer was empty before)
}
## INTERNAL: reset internal status after command has been completed, check that there is no extra output
sub _eol {
my $self = shift;
while ($self->_update > 0) { 1 } # check for any pending output from CQP process
carp "CWB::CQP: Unexpected CQP output after end of command:\n",
map {" | $_\n"} @{$self->{'lines'}},
"(command was: ".$self->{'command'}.")"
if @{$self->{'lines'}} > 0;
$self->{'lines'} = [];
$self->{'command'} = undef; # no active command now
}
=item I<$line> = I<$cqp>->B<getline>;
Read one line of output from CQP process, blocking if necessary until output beomes available.
Returns B<undef> when all output from the current CQP command has been read.
=cut
sub getline {
croak 'USAGE: $line = $cqp->getline;'
unless @_ == 1;
my $self = shift;
croak 'CWB::CQP: $cqp->getline called without active CQP command'
unless $self->{'command'};
my $debug = $self->{'debug'};
$self->_update(-1) # fill line buffer if necessary (blocking mode)
unless @{$self->{'lines'}} > 0;
my $line = shift @{$self->{'lines'}};
if ($line eq '-::-EOL-::-') {
## special line printed by ".EOL.;" marks end of CQP output
print "CQP ", "-" x 60, "\n"
if $debug;
$self->_eol;
return undef; # undef return value marks end of output
}
else {
print "CQP >> $line\n"
if $debug;
return $line;
}
}
=item I<@lines> = I<$cqp>->B<getlines>(I<$n>);
Read I<$n> lines of output from the CQP process, blocking as long as necessary. An explicit B<undef> element is included at the end of the output of a CQP command. Note that B<getlines> may return fewer than I<$n> lines if the end of output is reac...
Set C<I<$n> = 0> to read all complete lines currently held in the input buffer (as indicated by the B<ready> method), or specify a negative value to read the complete output of the active CQP command.
=cut
sub getlines {
croak 'USAGE: @lines = $cqp->getlines($n);'
unless @_ == 2 and $_[1] =~ /^-?[0-9]+$/ and wantarray;
my $self = shift;
my $n_lines = shift;
my @lines = ();
if ($n_lines == 0) {
while (my $line = shift @{$self->{'lines'}}) {
if ($line eq '-::-EOL-::-') {
$self->_eol;
push @lines, undef;
}
else {
push @lines, $line;
}
}
}
else {
while ($n_lines != 0) { # if $n_lines < 0, reads complete output of CQP command
while ($n_lines != 0 and @{$self->{'lines'}} > 0) {
my $line = shift @{$self->{'lines'}};
if ($line eq '-::-EOL-::-') {
$self->_eol;
push @lines, undef;
$n_lines = 0;
}
else {
push @lines, $line;
$n_lines--;
}
}
$self->_update(-1) # wait for CQP output to become available (in $self's input buffer)
if $n_lines != 0;
}
return (wantarray) ? @lines : shift @lines;
}
}
=item I<@lines> = I<$cqp>->B<exec>(I<$cmd>);
A convenience function that executes CQP command I<$cmd>, waits for it to complete, and returns all lines of
output from the command.
Fully equivalent to the following two commands, except that the trailing B<undef> returnd by B<getlines> is not included in the output:
$cqp->run($cmd);
@lines = $cqp->getlines(-1);
=cut
sub exec {
croak 'USAGE: $cqp->exec($cmd);'
unless @_ == 2;
my $self = shift;
my $cmd = shift;
my $debug = $self->{'debug'};
$self->run($cmd);
my @result = $self->getlines(-1);
my $eol = pop @result;
if (defined $eol) {
die "CWB::CQP: INTERNAL ERROR in _exec() -- missing 'undef' at end of command output (ignored)";
push @result, $eol; # seems to be regular line, so push it back onto result list
}
return @result;
}
=item I<@fields> = I<$cqp>->B<getrow>;
=item I<@rows> = I<$cqp>->B<exec_rows>(I<$cmd>);
Convenience functions for reading TAB-delimited tables, which are generated by CQP commands such as B<count>, B<group>, B<tabulate> and B<show cd>.
B<getrow> returns a single row of output, split into TAB-delimited fields. If the active CQP command has completed, it returns an empty list.
B<exec_rows> executes the CQP command I<$cmd>, waits for it to complete, and then returns the TAB-delimited table as an array of array references. You can then use multiple indices to access a specific element of the table, e.g. C<I<@rows>[41][2]> f...
=cut
sub getrow {
croak 'USAGE: @fields = $cqp->getrow;'
unless @_ == 1 and wantarray;
my $self = shift;
my $line = $self->getline;
return ()
unless defined $line;
return split /\t/, $line;
}
sub exec_rows {
croak 'USAGE: @rows = $cqp->exec_rows($cmd);'
unless @_ == 2 and wantarray;
my $self = shift;
my $cmd = shift;
my @lines = $self->exec($cmd); ## **TODO** this function could be optimised to collect arrayrefs directly
return map { [ split /\t/ ] } @lines;
}
=item I<$cqp>->B<begin_query>;
=item I<$cqp>->B<end_query>;
Enter/exit query lock mode for safe execution of CQP queries entered by an untrusted user (e.g. from a Web interface). In query lock mode, all interactive CQP commands are temporarily disabled; in particular, it is impossible to access files or exec...
=cut
sub begin_query {
my $self = shift;
croak 'CWB::CQP: $cqp->begin_query; has been called while query lock mode is already active'
if $self->{'query_lock'};
my $key = 1 + int rand(1_000_000); # make sure this is a TRUE value
$self->exec("set QueryLock $key");
$self->{'query_lock'} = $key;
}
sub end_query {
lib/CWB/CQP.pm view on Meta::CPAN
=item I<$cqp>->B<set_error_handler>(I<&my_error_handler>);
=item I<$cqp>->B<set_error_handler>('die' | 'warn' | 'ignore');
The first form of the B<set_error_handler> method activates a user-defined error handler. The argument is a reference to a named or anonymous subroutine, which will be called whenever a CQP error is detected (or an error is raised explicitly with th...
The second form of the method activates one of the built-in error handlers:
=over 4
=item *
B<C<'die'>> aborts program execution with an error message; this handler is particularly convenient for one-off scripts or command-line utilities that do not need to recover from error conditions.
=item *
B<C<'warn'>> prints the error message on STDERR, but continues program execution. This is the default error handler of a new B<CWB::CQP> object.
=item *
B<C<'ignore'>> silently ignores all errors. The application script should check for error conditions after every CQP command, using the B<ok> or B<status> method.
=back
=cut
## set user-defined error handler (or built-in handlers 'die', 'warn' [default], 'ignore')
sub set_error_handler {
my $self = shift;
my $handler = shift;
if (defined $handler) {
my $type = ref $handler;
if ($type ne 'CODE') {
$handler = lc($handler);
croak 'USAGE: $cqp->set_error_handler( \&my_error_handler | "die" | "warn" | "ignore" );'
unless $handler =~ /^(die|warn|ignore)$/;
if ($handler eq 'die') {
$handler = \&_error_handler_die;
}
elsif ($handler eq 'warn') {
$handler = undef; # default behaviour if no error handler is specified
}
elsif ($handler eq 'ignore') {
$handler = \&_error_handler_ignore;
}
}
}
$self->{'error_handler'} = $handler;
}
## INTERNAL: built in error handlers for 'die' and 'ignore' modes
sub _error_handler_die {
croak "\n", "=+===CWB::CQP ERROR=====\n", (map {" | $_\n"} @_), "=+== occurred";
}
sub _error_handler_ignore {
# do nothing
}
=item I<$cqp>->B<debug>(1);
=item I<$cqp>->B<debug>(0);
Activate/deactivate debugging mode, which logs all executed commands and their complete output on STDOUT. The B<debug> method returns the previous status for convenience.
=cut
sub debug {
croak 'USAGE: $prev_status = $cqp->debug( 1 | 0 ) ;'
unless @_ == 2;
my $self = shift;
my $on = shift;
my $prev = $self->{'debug'};
$self->{'debug'} = $on;
return $prev;
}
=item I<$cqp>->B<progress_on>;
=item I<$cqp>->B<progress_off>;
=item I<$message> = I<$cqp>->B<progress>;
=item (I<$total>, I<$pass>, I<$n_passes>, I<$msg>, I<$percent>) = I<$cqp>->B<progress_info>;
CQP progress messages can be activated and deactivated with the B<progress_on> and B<progress_off> methods (corresponding to C<set ProgressBar on|off;> in CQP).
If active, progress information can be obtained with the method B<progress>, which returns the last progress message received from CQP. The B<progress_info> returns pre-parsed progress information, consisting of estimated total percentage of complet...
It is an error to call B<progress> or B<progress_info> without activating progress messages first.
=cut
## activate / deactivate CQP progress messages
sub progress_on {
my $self = shift;
if ($self->{'progress'}) {
carp 'CWB::CQP: Progress messages have already been activated (ignored)';
}
else {
$self->exec("set ProgressBar on");
$self->{'progress'} = 1;
}
}
sub progress_off {
my $self = shift;
if ( $self->{'progress'}) {
$self->exec("set ProgressBar off");
$self->{'progress'} = 0;
}
else {
carp 'CWB::CQP: Progress messages have not been turned on yet (ignored)';
}
}
## poll current progress status (estimated total percentage, or detailed information)
sub progress {
my $self = shift;
croak 'CWB::CQP: No progress() information available, please call progress_on() first'
unless $self->{'progress'};
if (not $self->{'command'}) {
carp 'CWB::CQP: No active command, progress() does not make sense';
return undef;
}
else {
## if input is already available, return corresponding progress state; otherwise check for new progress messages
$self->_update
unless @{$self->{'lines'}} > 0;
return $self->{'progress_info'}->[0];
}
}
sub progress_info {
( run in 1.892 second using v1.01-cache-2.11-cpan-39bf76dae61 )