CWB-CQP-More
view release on metacpan or search on metacpan
lib/CWB/CQP/More.pm view on Meta::CPAN
package CWB::CQP::More;
$CWB::CQP::More::VERSION = '0.08';
use parent CWB::CQP;
use CWB;
use Carp;
use Try::Tiny;
use Encode;
use warnings;
use strict;
use POSIX::Open3;
use CWB::CQP::More::Iterator;
our $DEBUG = 0;
sub import {
my @ops = @_;
$DEBUG = grep { $_ eq "DEBUG" } @ops;
}
=head1 NAME
CWB::CQP::More - A higher level interface for CWB::CQP
=head1 SYNOPSIS
use CWB::CQP::More;
my $cqp = CWB::CQP::More->new( { utf8 => 1 } );
$cqp->change_corpus('HANSARDS');
# This needs to get fixed... not nice to say "'<b>'"
$cqp->set(Context => [20, 'words'],
LD => "'<b>'",
RD => "'</b>'");
# using Try::Tiny...
try {
$cqp->exec('A = "dog";');
my $result_size = $cqp->size('A');
my @lines = $cqp->cat('A');
} catch {
print "Error: $_\n";
}
$cqp->annotation_show("pos");
$details = $cqp->corpora_details('hansards');
$available_corpora = $cqp->show_corpora;
# for debug
use CWB::CQP::More 'DEBUG';
=head1 METHODS
This class superclasses CWB::CQP and adds some higher-order
functionalities.
=head2 new
The C<new> constructor has the same behavior has the C<CWB::CQP>
C<new> method, unless the first argument is a hash reference. In that
case, it is shifted and used as configuration for
C<CWB::CQP::More>. The remaining arguments are sent unaltered to
C<CWB::CQP> constructor.
=cut
sub _super_hacked_new {
my @options = @_;
my $self = {};
# split options with values, e.g. "-r /my/registry" => "-r", "/my/registry"
# (doesn't work for multiple options in one string)
@options = map { (/^(--?[A-Za-z0-9]+)\s+(.+)$/) ? ($1, $2) : $_ } @options;
## run CQP server in the background
my $in = $self->{'in'} = new FileHandle; # stdin of CQP
my $out = $self->{'out'} = new FileHandle; # stdout of CQP
my $err = $self->{'err'} = new FileHandle; # stderr of CQP
my $pid = open3($in, $out, $err, $CWB::CQP, @CWB::CQP::CQP_options, @options);
$self->{'pid'} = $pid; # child process ID (so process can be killed if necessary)
$in->autoflush(1); # make sure that commands sent to CQP are always flushed immediately
my ($need_major, $need_minor, $need_beta) = split /\./, $CWB::CQP::CQP_version;
$need_beta = 0 unless $need_beta;
my $version_string = $out->getline; # child mode (-c) should print version on startup
chomp $version_string;
croak "ERROR: CQP backend startup failed ('$CWB::CQP @CWB::CQP::CQP_options @options')\n"
unless $version_string =~
m/^CQP\s+(?:\w+\s+)*([0-9]+)\.([0-9]+)(?:\.b?([0-9]+))?(?:\s+(.*))?$/;
$self->{'major_version'} = $1;
$self->{'minor_version'} = $2;
lib/CWB/CQP/More.pm view on Meta::CPAN
=cut
sub annotation_show($@) {
my ($self, @annotations) = @_;
my $annots = join(" ", map { "+$_" } @annotations);
$self->exec("show $annots;");
}
=head2 annotation_hide
Use this method to specify what annotations to make CQP to not show
(hide). Pass it a list of the annotation names.
=cut
sub annotation_hide($@) {
my ($self, @annotations) = @_;
my $annots = join(" ", map { "-$_" } @annotations);
$self->exec("show $annots;");
}
=head2 change_corpus
Change current active corpus. Pass the corpus name as the argument.
=cut
sub change_corpus($$) {
my ($self, $cname) = @_;
$cname = uc $cname;
$self->exec("$cname;");
}
=head2 set
Set CQP properties. Pass a hash (not a reference) of key/values to be
set. Note that at the moment string values should be double quoted
(see example in the synopsis).
=cut
sub set($%) {
my ($self, %vars) = @_;
for my $key (keys %vars) {
my $values;
if (ref($vars{$key}) eq "ARRAY") {
$values = join(" ", @{$vars{$key}});
} else {
$values = $vars{$key};
}
try {
$self->exec("set $key $values;");
};
}
}
=head2 exec
Similar to CWB::CQP->exec, but dying in case of error with the error
message. Useful for use with C<Try::Tiny>. Check the synopsis above
for an example.
=cut
sub exec {
my ($self, @args) = @_;
@args = map { Encode::_utf8_off($_); $_ } @args if $self->{__utf8};
print STDERR join(' || ', @args), "\n" if $DEBUG;
my @answer = $self->SUPER::exec(@args);
die $self->error_message unless $self->ok;
@answer = map { Encode::_utf8_on($_); $_ } @answer if $self->{__utf8};
return @answer;
}
=head2 corpora_details
Returns a reference to a hash with details about a specific corpus,
like name, id, home directory, properties and attributes;
=cut
sub corpora_details {
my ($self, $cname) = @_;
return undef unless $cname;
$cname = lc $cname unless $cname =~ m{[/\\]};
my $details = {};
my $reg = new CWB::RegistryFile $cname;
return undef unless $reg;
$details->{filename} = $reg->filename;
$details->{name} = $reg->name;
$details->{corpus_id} = $reg->id;
$details->{home_dir} = $reg->home;
$details->{info_file} = $reg->info;
my @properties = $reg->list_properties;
for my $property (@properties) {
$details->{property}{$property} = $reg->property($property);
}
my @attributes = $reg->list_attributes;
for my $attr (@attributes) {
$details->{attribute}{$reg->attribute($attr)}{$attr} = $reg->attribute_path($attr);
}
return $details;
}
=head2 show_corpora
Returns a reference to a list of the available corpora;
=cut
sub show_corpora {
my $self = shift;
my $ans;
try {
( run in 1.648 second using v1.01-cache-2.11-cpan-22024b96cdf )