Alt-CWB-ambs

 view release on metacpan or  search on metacpan

lib/CWB/CQP.pm  view on Meta::CPAN

=item I<$cqp> = B<new> CWB::CQP;

=item I<$cqp> = B<new> CWB::CQP '-r /corpora/registry', '-l /data/cqpresults';

Spawn new CQP background process.  The object I<$cqp> can then be used to communicate with 
this CQP instance.  Optional arguments of the B<new> method are passed as command-line
options to CQP.  Use at your own risk.

=cut

## CWB::CQP object constructor
sub new {
  my $class = shift;            # class name
  my $self = {};                # namespace for new CQP class object
  my @options = @_;             # CQP command-line options (use at your own risk)
  # split options with values, e.g. "-r /my/registry" => "-r", "/my/registry" (doesn't work for multiple options in one string)
  @options = map { (/^(--?[A-Za-z0-9]+)\s+(.+)$/) ? ($1, $2) : $_ } @options;

  ## run CQP server in the background
  my $in = $self->{'in'} = new FileHandle;   # stdin of CQP
  my $out = $self->{'out'} = new FileHandle; # stdout of CQP
  my $err = $self->{'err'} = new FileHandle; # stderr of CQP
  my $pid = open3($in, $out, $err, $CWB::CQP, @CQP_options, @options);
  $self->{'pid'} = $pid; # child process ID (so process can be killed if necessary)
  $in->autoflush(1); # make sure that commands sent to CQP are always flushed immediately

  my ($need_major, $need_minor, $need_beta) = split /\./, $CQP_version; # required CQP version
  $need_beta = 0 unless $need_beta;

  my $version_string = $out->getline; # child mode (-c) should print version on startup
  chomp $version_string;
  croak "ERROR: CQP backend startup failed ('$CWB::CQP @CQP_options @options')\n"
    unless $version_string =~ /^CQP\s+(?:\w+\s+)*([0-9]+)\.([0-9]+)(?:\.b?([0-9]+))?(?:\s+(.*))?$/;
  $self->{'major_version'} = $1;
  $self->{'minor_version'} = $2;
  $self->{'beta_version'} = $3 || 0;
  $self->{'compile_date'} = $4 || "unknown";
  croak "ERROR: CQP version too old, need at least v$CQP_version ($version_string)\n"
    unless ($1 > $need_major or
            $1 == $need_major
            and ($2 > $need_minor or
                 ($2 == $need_minor and $3 >= $need_beta)));

  ## command execution
  $self->{'command'} = undef; # CQP command string that is currently being processed (undef = last command has been completed)
  $self->{'lines'} = [];      # array of output lines read from CQP process
  $self->{'buffer'} = "";     # read buffer for standard output from CQP process
  $self->{'block_size'} = 256;  # block size for reading from CQP's output and error streams
  $self->{'query_lock'} = undef;# holds random key while query lock mode is active

  ## error handling (messages on stderr)
  $self->{'error_handler'} = undef; # set to subref for user-defined error handler
  $self->{'status'} = 'ok';         # status of last executed command ('ok' or 'error')
  $self->{'error_message'} = [];    # arrayref to array containing message produced by last command (if any)

  ## handling of CQP progress messages
  $self->{'progress'} = 0;             # whether progress messages are activated
  $self->{'progress_handler'} = undef; # optional callback for progress messages
  $self->{'progress_info'} = [];       # contains last available progress information: [$total_percent, $pass, $n_passes, $message, $percent]

  ## debugging (prints more or less everything on stdout)
  $self->{'debug'} = 0;

  ## select vectors for CQP output (stdout, stderr, stdout|stderr)
  $self->{'select_err'} = new IO::Select($err);
  $self->{'select_out'} = new IO::Select($out);
  $self->{'select_any'} = new IO::Select($err, $out);

  ## CQP object setup complete
  bless($self, $class);

  ## the following command will collect and ignore any output which may have been produced during startup
  $self->exec("set PrettyPrint off"); # pretty-printing should be turned off for non-interactive use

  return $self;
}

=item B<undef> I<$cqp>;

Exit CQP background process gracefully by issuing an C<exit;> command.
This is done automatically when the variable I<$cqp> goes out of scope.
Note that there may be a slight delay while B<CWB::CQP> waits for the CQP
process to terminate.

=cut

sub DESTROY {
  my $self = shift;

  if ($self->{'command'}) {
    while ($self->_update) {} # read pending output from active command
  }
  my $out = $self->{'out'};
  if (defined $out) {
    $out->print("exit");        # exit CQP backend
    $out->close;
  }
  my $in = $self->{'in'};
  if (defined $in) {
    $in->close;
  }
  my $pid = $self->{'pid'};
  waitpid $pid, 0; # wait for CQP to exit and reap background process
  ## **TODO** -- this may hang in some cases; is there a safe workaround?
}

=item I<$ok> = I<$cqp>->B<check_version>(I<$major>, I<$minor>, I<$beta>);

Check for minimum required CQP version, i.e. the background process has
to be CQP version I<$major>.I<$minor>.I<$beta> or newer.
I<$minor> and I<$beta> may be omitted, in which case they default to 0.
Note that the B<CWB::CQP> module automatically checks whether the CQP version
is compatible with its own requirements when a new object is created.
The B<check_version> method can subsequently be used to check for a more
recent release that provides functionality needed by the Perl script.

=cut

sub check_version {
  my $self = shift;
  my ($major, $minor, $beta) = @_;
  $minor = 0 unless defined $minor;

lib/CWB/CQP.pm  view on Meta::CPAN

      sysread $self->{'out'}, $self->{'buffer'}, $self->{'block_size'}, length($self->{'buffer'}); # append to object's input buffer
      if ($self->{'buffer'} =~ /\n/) {
        ## if there's a complete line in the input buffer, split off all lines
        my @new_lines = split /\n/, $self->{'buffer'}, -1; # make sure that last line is handled correctly if buffer ends in \n
        $self->{'buffer'} = pop @new_lines; # last entry is incomplete line ("" if buffer ended in \n) => return to input buffer
        foreach my $line (@new_lines) {
          ## skip blank line printed after each CQP command
          next if $line eq "";
          ## handle progress messages if ProgressBar has been activated
          if ($self->{'progress'} and $line =~ /^-::-PROGRESS-::-/) {
              my ($pass, $n_passes, $message); 
              (undef, $pass, $n_passes, $message) = split /\t/, $line;
              my $percent = ($message =~ /([0-9]+)\%\s*complete/) ? $1+0 : undef; # extract progress percentage, if present
              my $total_percent = (100 * ($pass - 1) + ($percent || 0)) / $n_passes; # estimate total progress ($percent assumed to be 0% if not given)
              $self->{'progress_info'} = [$total_percent, $pass, $n_passes, $message, $percent];
              my $handler = $self->{'progress_handler'};
              if (ref($handler) eq 'CODE') {
                $handler->($total_percent, $pass, $n_passes, $message, $percent); # call user-defined progress handler
              }
          }
          ## regular output lines are collected in object's line buffer
          else {
            push @{$self->{'lines'}}, $line;
            $lines++;
          }
        }
      }
      last if $lines > 0;       # if we have read a line and there is no output on stderr, return from function
    }

    ## ERROR -- we should never reach this point
    else {
      die "CWB::CQP: INTERNAL ERROR in _update() -- no data on stdout or stderr of CQP child process";
    }
  }

  if ($stderr_buffer ne "") {
    $self->{'status'} = 'error'; # any output on stderr indicates that something went wrong
    push @{$self->{'error_message'}}, split /\n/, $stderr_buffer;  # append output on stderr to error message
    $self->error(@{$self->{'error_message'}});                     # may call error handler and abort, or print message and continue
  }
  return $lines;
}

=item I<$cqp>->B<run>(I<$cmd>);

Start a single CQP command I<$cmd> in the background.  This method returns immediately.
Command output can then be read with the B<getline>, B<getlines> and B<getrow> methods.
If asynchronous communication is desired, use B<ready> to check whether output is available.

It is an error to B<run> a new command before the output of the previous command has completely
been processed.

=cut

sub run {
  croak 'USAGE:  $cqp->run($cmd]);'
    unless @_ == 2;
  my $self = shift;
  my $cmd = shift;
  my $debug = $self->{'debug'};

  $cmd =~ s/\n+/ /g;            # make sure there are no newline characters (to be on the safe side)
  $cmd =~ s/(;\s*)+$//;         # ";" will be added when $cmd is sent to CQP

  my $active_cmd = $self->{'command'};
  croak "Error: new CQP command issued while '$active_cmd' is still being processed"
    if $active_cmd;

  $self->{'command'} = "$cmd;";
  $self->{'status'} = 'ok';
  $self->{'buffer'} = "";
  $self->{'lines'} = [];
  $self->{'error_message'} = [];

  print "CQP << $cmd;\n"
    if $debug;
  $self->{'in'}->print("$cmd;\n .EOL.;\n"); # append .EOL. command to mark end of CQP output
}

=item I<$num_of_lines> = I<$cqp>->B<ready>;

=item I<$num_of_lines> = I<$cqp>->B<ready>(I<$timeout>);

Check if output from current CQP command is available for reading with B<getline> etc.,
returning the number of lines currently held in the input buffer (possibly including an
end-of-output marker line that will not be returned by B<getline> etc.).  If there is no
active command, returns B<undef>.

The first form of the command returns immediately.  The second form waits up to I<$timeout>
seconds for CQP output to become available.  Use a negative I<$timeout> for blocking mode.

=cut

sub ready {
  my $self = shift;
  my $timeout = shift;

  my $lines = @{$self->{'lines'}};
  return $lines            # output has already been buffered => ready to read
    if $lines > 0;
  return undef             # no command active => undefined state
    unless $self->{'command'};
  return $self->_update($timeout); # try to read from CQP process & return number of lines available (NB: line buffer was empty before)
}

## INTERNAL: reset internal status after command has been completed, check that there is no extra output
sub _eol {
  my $self = shift;
  while ($self->_update > 0) { 1 } # check for any pending output from CQP process
  carp "CWB::CQP:  Unexpected CQP output after end of command:\n",
    map {" | $_\n"} @{$self->{'lines'}},
      "(command was: ".$self->{'command'}.")"
        if @{$self->{'lines'}} > 0;
  $self->{'lines'} = [];
  $self->{'command'} = undef; # no active command now
}

=item I<$line> = I<$cqp>->B<getline>;

Read one line of output from CQP process, blocking if necessary until output beomes available.
Returns B<undef> when all output from the current CQP command has been read.

=cut

sub getline {
  croak 'USAGE:  $line = $cqp->getline;'
    unless @_ == 1;
  my $self = shift;
  croak 'CWB::CQP:  $cqp->getline called without active CQP command'
    unless $self->{'command'};
  my $debug = $self->{'debug'};

  $self->_update(-1)            # fill line buffer if necessary (blocking mode)
    unless @{$self->{'lines'}} > 0;

  my $line = shift @{$self->{'lines'}};
  if ($line eq '-::-EOL-::-') { 
    ## special line printed by ".EOL.;" marks end of CQP output
    print "CQP ", "-" x 60, "\n"
      if $debug;
    $self->_eol;
    return undef;               # undef return value marks end of output
  }
  else {
    print "CQP >> $line\n"
      if $debug;
    return $line;
  }
}

=item I<@lines> = I<$cqp>->B<getlines>(I<$n>);

Read I<$n> lines of output from the CQP process, blocking as long as necessary.  An explicit B<undef> element is included at the end of the output of a CQP command.  Note that B<getlines> may return fewer than I<$n> lines if the end of output is reac...

Set C<I<$n> = 0> to read all complete lines currently held in the input buffer (as indicated by the B<ready> method), or specify a negative value to read the complete output of the active CQP command.

=cut

sub getlines {
  croak 'USAGE:  @lines = $cqp->getlines($n);'
    unless @_ == 2 and $_[1] =~ /^-?[0-9]+$/ and wantarray;
  my $self = shift;
  my $n_lines = shift;
  my @lines = ();
  if ($n_lines == 0) {
    while (my $line = shift @{$self->{'lines'}}) {
      if ($line eq '-::-EOL-::-') {
        $self->_eol;
        push @lines, undef;
      }
      else {
        push @lines, $line;
      }
    }
  }
  else {
    while ($n_lines != 0) {     # if $n_lines < 0, reads complete output of CQP command
      while ($n_lines != 0 and @{$self->{'lines'}} > 0) {
        my $line = shift @{$self->{'lines'}};
        if ($line eq '-::-EOL-::-') {
          $self->_eol;
          push @lines, undef;
          $n_lines = 0;
        }
        else {
          push @lines, $line;
          $n_lines--;
        }
      }
      $self->_update(-1)     # wait for CQP output to become available (in $self's input buffer)
        if $n_lines != 0;
    }
    return (wantarray) ? @lines : shift @lines;
  }
}

=item I<@lines> = I<$cqp>->B<exec>(I<$cmd>);

A convenience function that executes CQP command I<$cmd>, waits for it to complete, and returns all lines of
output from the command.

Fully equivalent to the following two commands, except that the trailing B<undef> returnd by B<getlines> is not included in the output:

  $cqp->run($cmd);
  @lines = $cqp->getlines(-1);

=cut

sub exec {
  croak 'USAGE:  $cqp->exec($cmd);'
    unless @_ == 2;
  my $self = shift;
  my $cmd = shift;
  my $debug = $self->{'debug'};

  $self->run($cmd);
  my @result = $self->getlines(-1);
  my $eol = pop @result;
  if (defined $eol) {
    die "CWB::CQP:  INTERNAL ERROR in _exec() -- missing 'undef' at end of command output (ignored)";
    push @result, $eol; # seems to be regular line, so push it back onto result list
  }
  return @result;
}

=item I<@fields> = I<$cqp>->B<getrow>;

=item I<@rows> = I<$cqp>->B<exec_rows>(I<$cmd>);

Convenience functions for reading TAB-delimited tables, which are generated by CQP commands such as B<count>, B<group>, B<tabulate> and B<show cd>.

B<getrow> returns a single row of output, split into TAB-delimited fields.  If the active CQP command has completed, it returns an empty list.

B<exec_rows> executes the CQP command I<$cmd>, waits for it to complete, and then returns the TAB-delimited table as an array of array references.  You can then use multiple indices to access a specific element of the table, e.g. C<I<@rows>[41][2]> f...

=cut

sub getrow {
  croak 'USAGE:  @fields = $cqp->getrow;'
    unless @_ == 1 and wantarray;
  my $self = shift;
  my $line = $self->getline;
  return ()
    unless defined $line;
  return split /\t/, $line;
}

sub exec_rows {
  croak 'USAGE:  @rows = $cqp->exec_rows($cmd);'
    unless @_ == 2 and wantarray;
  my $self = shift;
  my $cmd = shift;
  my @lines = $self->exec($cmd); ## **TODO** this function could be optimised to collect arrayrefs directly
  return map { [ split /\t/ ] } @lines;
}

=item I<$cqp>->B<begin_query>;

=item I<$cqp>->B<end_query>;

Enter/exit query lock mode for safe execution of CQP queries entered by an untrusted user (e.g. from a Web interface).  In query lock mode, all interactive CQP commands are temporarily disabled; in particular, it is impossible to access files or exec...

=cut

sub begin_query {
  my $self = shift;
  croak 'CWB::CQP:  $cqp->begin_query; has been called while query lock mode is already active'
    if $self->{'query_lock'};
  my $key = 1 + int rand(1_000_000); # make sure this is a TRUE value
  $self->exec("set QueryLock $key");
  $self->{'query_lock'} = $key;
}

sub end_query {

lib/CWB/CQP.pm  view on Meta::CPAN

=item I<$cqp>->B<set_error_handler>(I<&my_error_handler>);

=item I<$cqp>->B<set_error_handler>('die' | 'warn' | 'ignore');

The first form of the B<set_error_handler> method activates a user-defined error handler.  The argument is a reference to a named or anonymous subroutine, which will be called whenever a CQP error is detected (or an error is raised explicitly with th...

The second form of the method activates one of the built-in error handlers:

=over 4

=item *

B<C<'die'>> aborts program execution with an error message; this handler is particularly convenient for one-off scripts or command-line utilities that do not need to recover from error conditions.

=item *

B<C<'warn'>> prints the error message on STDERR, but continues program execution.  This is the default error handler of a new B<CWB::CQP> object.

=item *

B<C<'ignore'>> silently ignores all errors.  The application script should check for error conditions after every CQP command, using the B<ok> or B<status> method.

=back 

=cut

## set user-defined error handler (or built-in handlers 'die', 'warn' [default], 'ignore')
sub set_error_handler {
  my $self = shift;
  my $handler = shift;

  if (defined $handler) {
    my $type = ref $handler;
    if ($type ne 'CODE') {
      $handler = lc($handler);
      croak 'USAGE:  $cqp->set_error_handler( \&my_error_handler | "die" | "warn" | "ignore" );'
        unless $handler =~ /^(die|warn|ignore)$/;
      if ($handler eq 'die') {
        $handler = \&_error_handler_die;
      }
      elsif ($handler eq 'warn') {
        $handler = undef;       # default behaviour if no error handler is specified
      }
      elsif ($handler eq 'ignore') {
        $handler = \&_error_handler_ignore;
      }
    }
  }
  $self->{'error_handler'} = $handler;
}

## INTERNAL: built in error handlers for 'die' and 'ignore' modes
sub _error_handler_die {
  croak "\n", "=+===CWB::CQP ERROR=====\n", (map {" | $_\n"} @_), "=+== occurred";
}

sub _error_handler_ignore {
  # do nothing
}

=item I<$cqp>->B<debug>(1);

=item I<$cqp>->B<debug>(0);

Activate/deactivate debugging mode, which logs all executed commands and their complete output on STDOUT.  The B<debug> method returns the previous status for convenience.

=cut

sub debug {
  croak 'USAGE:  $prev_status = $cqp->debug( 1 | 0 ) ;'
    unless @_ == 2;
  my $self = shift;
  my $on = shift;
  my $prev = $self->{'debug'};
  $self->{'debug'} = $on;
  return $prev;
}

=item I<$cqp>->B<progress_on>;

=item I<$cqp>->B<progress_off>;

=item I<$message> = I<$cqp>->B<progress>;

=item (I<$total>, I<$pass>, I<$n_passes>, I<$msg>, I<$percent>) = I<$cqp>->B<progress_info>;

CQP progress messages can be activated and deactivated with the B<progress_on> and B<progress_off> methods (corresponding to C<set ProgressBar on|off;> in CQP).

If active, progress information can be obtained with the method B<progress>, which returns the last progress message received from CQP.  The B<progress_info> returns pre-parsed progress information, consisting of estimated total percentage of complet...

It is an error to call B<progress> or B<progress_info> without activating progress messages first.

=cut

## activate / deactivate CQP progress messages
sub progress_on {
  my $self = shift;
  if ($self->{'progress'}) {
    carp 'CWB::CQP:  Progress messages have already been activated (ignored)';
  }
  else {
    $self->exec("set ProgressBar on");
    $self->{'progress'} = 1;
  }
}

sub progress_off {
  my $self = shift;
  if ( $self->{'progress'}) {
    $self->exec("set ProgressBar off");
    $self->{'progress'} = 0;
  }
  else {
    carp 'CWB::CQP:  Progress messages have not been turned on yet (ignored)';
  }
}

## poll current progress status (estimated total percentage, or detailed information)
sub progress {
  my $self = shift;
  croak 'CWB::CQP:  No progress() information available, please call progress_on() first'
    unless $self->{'progress'};
  if (not $self->{'command'}) {
    carp 'CWB::CQP:  No active command, progress() does not make sense';
    return undef;
  }
  else {
    ## if input is already available, return corresponding progress state; otherwise check for new progress messages
    $self->_update
      unless @{$self->{'lines'}} > 0;
    return $self->{'progress_info'}->[0];
  }
}

sub progress_info {



( run in 1.892 second using v1.01-cache-2.11-cpan-39bf76dae61 )