CWB-CQP-More

 view release on metacpan or  search on metacpan

lib/CWB/CQP/More.pm  view on Meta::CPAN

package CWB::CQP::More;
$CWB::CQP::More::VERSION = '0.08';
use parent CWB::CQP;
use CWB;

use Carp;
use Try::Tiny;
use Encode;
use warnings;
use strict;
use POSIX::Open3;
use CWB::CQP::More::Iterator;

our $DEBUG = 0;

sub import {
    my @ops = @_;
    $DEBUG = grep { $_ eq "DEBUG" } @ops;
}

=head1 NAME

CWB::CQP::More - A higher level interface for CWB::CQP

=head1 SYNOPSIS

    use CWB::CQP::More;

    my $cqp = CWB::CQP::More->new( { utf8 => 1 } );

    $cqp->change_corpus('HANSARDS');

    # This needs to get fixed... not nice to say "'<b>'"
    $cqp->set(Context  => [20, 'words'],
              LD       => "'<b>'",
              RD       => "'</b>'");

    # using Try::Tiny...
    try {
        $cqp->exec('A = "dog";');
        my $result_size = $cqp->size('A');
        my @lines = $cqp->cat('A');
    } catch {
        print "Error: $_\n";
    }

    $cqp->annotation_show("pos");

    $details = $cqp->corpora_details('hansards');

    $available_corpora = $cqp->show_corpora;

    # for debug
    use CWB::CQP::More 'DEBUG';

=head1 METHODS

This class superclasses CWB::CQP and adds some higher-order
functionalities.

=head2 new

The C<new> constructor has the same behavior has the C<CWB::CQP>
C<new> method, unless the first argument is a hash reference. In that
case, it is shifted and used as configuration for
C<CWB::CQP::More>. The remaining arguments are sent unaltered to
C<CWB::CQP> constructor.

=cut

sub _super_hacked_new {
    my @options = @_;
    my $self = {};

    # split options with values, e.g. "-r /my/registry" => "-r", "/my/registry"
    # (doesn't work for multiple options in one string)
    @options = map { (/^(--?[A-Za-z0-9]+)\s+(.+)$/) ? ($1, $2) : $_ } @options;

    ## run CQP server in the background
    my $in  = $self->{'in'}  = new FileHandle; # stdin  of CQP
    my $out = $self->{'out'} = new FileHandle; # stdout of CQP
    my $err = $self->{'err'} = new FileHandle; # stderr of CQP

    my $pid = open3($in, $out, $err, $CWB::CQP, @CWB::CQP::CQP_options, @options);

    $self->{'pid'} = $pid; # child process ID (so process can be killed if necessary)
    $in->autoflush(1); # make sure that commands sent to CQP are always flushed immediately

    my ($need_major, $need_minor, $need_beta) = split /\./, $CWB::CQP::CQP_version;
    $need_beta = 0 unless $need_beta;

    my $version_string = $out->getline; # child mode (-c) should print version on startup
    chomp $version_string;
    croak "ERROR: CQP backend startup failed ('$CWB::CQP @CWB::CQP::CQP_options @options')\n"
      unless $version_string =~
        m/^CQP\s+(?:\w+\s+)*([0-9]+)\.([0-9]+)(?:\.b?([0-9]+))?(?:\s+(.*))?$/;
    $self->{'major_version'} = $1;
    $self->{'minor_version'} = $2;

lib/CWB/CQP/More.pm  view on Meta::CPAN

=cut

sub annotation_show($@) {
    my ($self, @annotations) = @_;
    my $annots = join(" ", map { "+$_" } @annotations);
    $self->exec("show $annots;");
}

=head2 annotation_hide

Use this method to specify what annotations to make CQP to not show
(hide). Pass it a list of the annotation names.

=cut

sub annotation_hide($@) {
    my ($self, @annotations) = @_;
    my $annots = join(" ", map { "-$_" } @annotations);
    $self->exec("show $annots;");
}

=head2 change_corpus

Change current active corpus. Pass the corpus name as the argument.

=cut

sub change_corpus($$) {
    my ($self, $cname) = @_;
    $cname = uc $cname;
    $self->exec("$cname;");
}

=head2 set

Set CQP properties. Pass a hash (not a reference) of key/values to be
set. Note that at the moment string values should be double quoted
(see example in the synopsis).

=cut

sub set($%) {
    my ($self, %vars) = @_;
    for my $key (keys %vars) {
        my $values;
        if (ref($vars{$key}) eq "ARRAY") {
            $values = join(" ", @{$vars{$key}});
        } else {
            $values = $vars{$key};
        }

        try {
            $self->exec("set $key $values;");
        };
    }
}

=head2 exec

Similar to CWB::CQP->exec, but dying in case of error with the error
message. Useful for use with C<Try::Tiny>. Check the synopsis above
for an example.

=cut

sub exec {
    my ($self, @args) = @_;
    @args = map { Encode::_utf8_off($_); $_ } @args if $self->{__utf8};
    print STDERR join(' || ', @args), "\n" if $DEBUG;
    my @answer = $self->SUPER::exec(@args);
    die $self->error_message unless $self->ok;
    @answer = map { Encode::_utf8_on($_); $_ } @answer if $self->{__utf8};
    return @answer;
}

=head2 corpora_details

Returns a reference to a hash with details about a specific corpus,
like name, id, home directory, properties and attributes;

=cut

sub corpora_details {
    my ($self, $cname) = @_;
    return undef unless $cname;

    $cname = lc $cname unless $cname =~ m{[/\\]};

    my $details = {};
    my $reg = new CWB::RegistryFile $cname;
    return undef unless $reg;

    $details->{filename}  = $reg->filename;
    $details->{name}      = $reg->name;
    $details->{corpus_id} = $reg->id;
    $details->{home_dir}  = $reg->home;
    $details->{info_file} = $reg->info;

    my @properties = $reg->list_properties;
    for my $property (@properties) {
        $details->{property}{$property} = $reg->property($property);
    }

    my @attributes = $reg->list_attributes;
    for my $attr (@attributes) {
        $details->{attribute}{$reg->attribute($attr)}{$attr} = $reg->attribute_path($attr);
    }

    return $details;
}

=head2 show_corpora

Returns a reference to a list of the available corpora;

=cut

sub show_corpora {
    my $self = shift;
    my $ans;
    try {



( run in 1.648 second using v1.01-cache-2.11-cpan-22024b96cdf )