Alt-CWB-ambs

 view release on metacpan or  search on metacpan

lib/CWB.pm  view on Meta::CPAN

  my $name = $basedir.$prefix.".$$".$suffix;
  my $num = 1;
  while (-e $name) {            # choose unique name in case file already exists
    $name = $basedir.$prefix.".$$-".$num.$suffix;
    $num++;
  }
  my $fh = CWB::OpenFile "> $name";
  $self->{NAME} = $name;
  $self->{FH} = $fh;
  $self->{STATUS} = "W";        # W = writing, F = finished, R = reading, D = deleted
  return bless($self, $class);
}

sub DESTROY {
  my $self = shift;
  if ($self->{STATUS} ne "D") {
    $self->close;
  }
}

=item $tf->close;

lib/CWB.pm  view on Meta::CPAN

     INFO => undef,                             # info file (optional, but highly recommended)
     PROPERTIES => [],                          # corpus properties ([property, value] pairs)
     ATT => {},                                 # attributes (att => 'p' / 's' / 'a')
     ATT_PATH => {},                            # data paths for attributs
     SERIALIZE => [],                           # order in which attributes are listed in the registry entry
     COMMENTS =>  {},                           # comments and/or blank lines preceding each content line
                                                # (att => [comment1, comment2, ...], ':NAME' => [...], '::property' => ...)
     LINECOMMENT => {},                         # line comments on content lines (att => comment, ':NAME' => comment, ...)
     FILENAME => undef,                         # filename of registry file (if loaded from file)
    };
  bless($self, $class);

  # if filename was specified, try loading registry entry (searches in registry directories if necessary)
  if (defined $filename) {
    if ($filename !~ /\// and not -f $filename) {
      my @dirs = CWB::RegistryDirectory();
      my @files = grep { -f $_ } map { "$_/".lc($filename) } @dirs; # corpus ID may be specified in uppercase
      return $self->error("Found multiple registry entries for corpus ".uc($filename).":", @files)
        if @files > 1;
      $filename = shift @files
        if @files;

lib/CWB/CEQL.pm  view on Meta::CPAN

sub new {
  my $class = shift;
  my $self = new CWB::CEQL::Parser;
  $self->NewParam("pos_attribute", "pos");
  $self->NewParam("lemma_attribute", "lemma");
  $self->NewParam("simple_pos", undef);
  $self->NewParam("simple_pos_attribute", undef);
  $self->NewParam("s_attributes", { "s" => 1 });
  $self->NewParam("default_ignore_case", 1);
  $self->NewParam("default_ignore_diac", 0);
  return bless($self, $class);
}

=item I<$cqp_query> = I<$CEQL>->B<Parse>(I<$simple_query>);

Parses simple query in CEQL syntax and returns equivalent CQP code.  If there
is a syntax error in I<$simple_query> or parsing fails for some other reason,
an B<undef>ined value is returned.

=item @text_lines = I<$CEQL>->B<ErrorMessage>;

lib/CWB/CEQL/Parser.pm  view on Meta::CPAN


A typical skeletion of a DPP grammar with parameters looks as follows:

  package MyGrammar;
  use base 'CWB::CEQL::Parser';

  sub new {
    my $class = shift;
    my $self = new CWB::CEQL::Parser;
    $self->NewParam("pos_attribute", "pos");
    return bless($self, $class);
  }

  sub pos_tag {
    my ($self, $input) = @_;
    my $pos_att = $self->GetParam("pos_attribute");
    die "'$input' does not appear to be a valid POS tag\n"
      unless $input =~ /^[A-Z0-9]+$/;
    return "$pos_att = '$input'"; # CQP constraint for POS tag
  }

lib/CWB/CEQL/Parser.pm  view on Meta::CPAN

  my $class = shift;
  my $self = {
              'PARAM_DEFAULTS' => {},  # globally set default values for parameters
              'PARAM' => undef,        # working copies of parameters during parse
              'INPUT' => undef,        # input string (defined while parsing)
              'ERROR' => undef,        # error message generated by last parse (undef = no error)
              'CALLSTACK' => [],       # call stack for backtrace in case of error
              'GROUPS' => undef,       # group structure for shift-reduce parser (undef if not active)
              'GROUPSTACK' => undef,   # stack of nested bracketing groups (undef if not active)
             };
  bless($self, $class);
}

=item I<$result> = I<$grammar>->B<Parse>(I<$string> [, I<$rule>]);

Parse input string I<$string> as a constituent of type I<$rule> (if
unspecified, the C<default> rule will be used).  The return value I<$result>
is typically a string containing the transformed query, but may also be an
arbitrary data structure or object (such as a parse tree for I<$input>).
Consult the relevant grammar documentation for details.  If parsing fails,
B<undef> is returned.

lib/CWB/CEQL/Parser.pm  view on Meta::CPAN

  confess "CWB::CEQL::Parser: parameter '$name' already exists, cannot create with NewParam()"
    if exists $param_set->{$name};
  $param_set->{$name} = $value;
}

=item I<$result> = I<$self>->B<Call>(I<$rule>, I<$input>);

Apply rule I<$rule> to input string I<$input>.  The return value I<$result>
depends on the grammar rule, but is usually a string containing a translated
version of I<$input>.  Grammar rules may also annotate this string with
B<attributes> or by B<bless>ing it into a custom class, or return a complex
data structure such as a parse tree for I<$input>.  The caller has to be aware
what kind of value I<$rule> returns.

Note that B<Call> never returns B<undef>.  In case of an error, the entire
parse is aborted.

=cut

sub Call {
  confess 'Usage:  $result = $self->Call($rule, $input);'

lib/CWB/CEQL/String.pm  view on Meta::CPAN


=cut

sub new {
  my ($class, $value, $type) = @_;
  my $self = {
              VALUE => $value,
              TYPE => $type,  # undef if not specified
              ATTRIBUTE => {},
             };
  return bless($self, $class);
}

=item I<$string> = I<$obj>->B<value>;

=item I<$string> = "I<$obj>";

Return string value of B<CWB::CEQL::String> object I<$obj>.  Overloading
ensures that this value is accessed automatically if I<$obj> is used in a
string context (such as interpolation).

lib/CWB/CEQL/String.pm  view on Meta::CPAN


=cut

sub copy {
  my $self = shift;
  my $new_self = {
                  VALUE => $self->{VALUE},
                  TYPE => $self->{TYPE},
                  ATTRIBUTE => { %{$self->{ATTRIBUTE}} },
                 };
  return bless($new_self, ref $self);
}

=item I<$result> = I<$obj>->B<cmp>(I<$obj2> [, I<$reverse>]);

The B<cmp> method implements string comparison operators for
B<CWB::CEQL::String> objects.  The second operand I<$obj2> must either be a
plain string or another B<CWB::CEQL::String> object.  If the optional argument
I<$reverse> is TRUE, the comparison is reversed (so a string as first operand
can be compared with a B<CWB::CEQL::String> object).

lib/CWB/CQP.pm  view on Meta::CPAN


  ## debugging (prints more or less everything on stdout)
  $self->{'debug'} = 0;

  ## select vectors for CQP output (stdout, stderr, stdout|stderr)
  $self->{'select_err'} = new IO::Select($err);
  $self->{'select_out'} = new IO::Select($out);
  $self->{'select_any'} = new IO::Select($err, $out);

  ## CQP object setup complete
  bless($self, $class);

  ## the following command will collect and ignore any output which may have been produced during startup
  $self->exec("set PrettyPrint off"); # pretty-printing should be turned off for non-interactive use

  return $self;
}

=item B<undef> I<$cqp>;

Exit CQP background process gracefully by issuing an C<exit;> command.

lib/CWB/Encoder.pm  view on Meta::CPAN

      $comp = $attr = "";       # reset to check for syntax errors
    }
    elsif (/Type:\s+([A-Z])/) {
      carp "CWB::Indexer: Missing attribute name in output of cwb-describe-corpus $name (skipped).\n"
        unless $attr;
      $self->{TYPES}->{$attr} = $1;
    }
    # all other lines are ignored
  }

  return bless($self, $class);
}

=item $idx->group($group);

=item $idx->perm($permission);

Optional group membership and access permissions for newly created
files (otherwise, neither B<chgrp> nor B<chmod> will be called). Note
that I<$permission> must be a string rather than an octal number (as
for the built-in B<chmod> function). Indexing will fail if the

lib/CWB/Encoder.pm  view on Meta::CPAN

              PERM => undef,    # permissions for created files
              OVERWRITE => undef, # can I overwrite existing files?
              MEMORY => 75,     # passed to CWB::Indexer
              VALIDATE => 1,    # passed to CWB::Indexer
              ENTITIES => 1,    # whether to decode XML entities (and skip comments etc.)
              UNDEF_SYMBOL => "", # string to insert for missing values of p-attributes
              VERBOSE => 0,     # print some progress information (stdout)
              DEBUG => 0,
              PIPE => undef,    # pipe to cwb-encode (for encode_pipe() method)
             };
  bless($self, $class);
  $self->name(shift)
    if @_;
  return $self;
}

=item $enc->name($corpus);

Change the CWB name of a corpus after the encoder object I<$enc> has been created.
Has to be used if the constructor was called without arguments.

t/44_ceql_bncweb.t  view on Meta::CPAN

               "STOP" => "STOP",
               "UNC" => "UNC",
              };
  $self->SetParam("simple_pos", $table);
  $self->SetParam("simple_pos_attribute", "class");
  my %xml_tags = map { $_ => 1 } # list of s-attribute regions in the BNC version used by BNCweb
    (qw(text u div head quote sp speaker stage lg l list label item note bibl corr hi trunc p s mw), # from CWB registry file
     # nested attributes are accepted, but should perhaps better be inserted automagically
     qw(div1 div2 div3 quote1 list1 list2 item1 item2 hi1 p1 p2));
  $self->SetParam("s_attributes", \%xml_tags);
  return bless($self, $class);
}

# BNCweb::CEQL expects its input to be in the canonical BNCweb encoding, i.e. Latin-1 + HTML entities;
# the "default" rule first converts the input to a Perl Unicode string, and then re-encodes the resulting CQP query in Latin-1
sub default {
  my ($self, $input) = @_;
  my $unicode = decode("iso-8859-1", $input);
  ##-- # the real implementation uses the HTML::Entities module to decode HTML entities
  ##-- decode_entities($unicode);
  # here, dummy rules covering all entities in the test suite help us to avoid a dependency on the non-standard HTML::Entities module



( run in 2.747 seconds using v1.01-cache-2.11-cpan-de7293f3b23 )