Alt-CWB-ambs

 view release on metacpan or  search on metacpan

lib/CWB/Encoder.pm  view on Meta::CPAN

}

=item $enc->debug(1);

Activate debugging output (on STDERR).

=cut

sub debug {
  my ($self, $yesno) = @_;
  $self->{DEBUG} = $yesno;
  $self->{VERBOSE} = 1          # debugging also activates verbose output
    if $yesno;
}

# internal method: called _before_ running cwb-encode
sub prepare_encode {
  my $self = shift;
  my $overwrite = $self->{OVERWRITE};
  
  my $name = $self->{NAME};     # check that setup is complete
  croak "CWB::Encoder: Corpus ID hasn't been specified (with name() method)\n"
    unless $name;
  croak "CWB::Encoder: No positional attributes specified.\n"
    unless @{$self->{PATT}} > 0;

  my $reg = $self->{REGISTRY};
  if (not defined $reg) {
    $reg = CWB::RegistryDirectory(); # try to guess registry if not specified
    $self->{REGISTRY} = $reg;
  }
  croak "CWB::Encoder: Can't determine unique registry directory (path is $reg).\n"
    if $reg =~ /:/;
  croak "CWB::Encoder: Registry directory $reg does not exist.\n"
    unless -d $reg;
  print STDERR "CWB::Encoder: registry directory is $reg\n"
    if $self->{DEBUG};

  my $regfile = "$reg/$name";   # remove registry entry if it exists
  if (-f $regfile) {
    croak "CWB::Encoder: Registry file already exists (overwriting not enabled).\n"
      unless $overwrite;
    print "Removing registry file $reg/$name ...\n"
      if $self->{VERBOSE};
    unlink "$reg/$name";
    croak "CWB::Encoder: Can't delete registry file $reg/$name\n"
      if -f "$reg/$name";
    print STDERR "CWB::Encoder: deleting file $reg/$name\n"
      if $self->{DEBUG};
  }

  my $dir = $self->{DIR};       # check/create data directory
  croak "CWB::Encoder: Data directory has not been set.\n"
    unless $dir;
  if (-d $dir) {
    croak "CWB::Encoder: Data directory already exists (overwriting not enabled).\n"
      unless $overwrite;
    print "Cleaning up data directory $dir ...\n"
      if $self->{VERBOSE};
    my $dh = new DirHandle $dir;
    my @files = grep {-f $_} (glob("$dir/*"), glob("$dir/.*"));
    my ($file, $filename);
    while (defined($filename = $dh->read)) {
      $file = "$dir/$filename";
      next unless -f $file;     # skip subdirectories etc.
      unlink $file;
      carp "CWB::Encoder: Can't delete file $file (trying to continue).\n"
        if -f $file;
      print STDERR "CWB::Encoder: deleting file $file\n"
        if $self->{DEBUG};
    }
    $dh->close;
  }
  else {
    print "Creating data directory $dir ...\n"
      if $self->{VERBOSE};
    croak "CWB::Encoder: Can't create data directory $dir\n"
      unless mkdir $dir;
    my $perm = $self->{PERM};
    if ($perm) {
      $perm =~ tr[642][753];    # derive directory permissions
      CWB::Shell::Cmd("chmod $perm '$dir'");
      $perm = "(chmod $perm)";
    }
    else {
      $perm = "";
    }
    my $group = $self->{GROUP};
    if ($group) {
      CWB::Shell::Cmd("chgrp $group '$dir'");
      $group = "(chgrp $group)";
    }
    else {
      $group = "";
    }
    print STDERR "CWB::Encoder: created directory $dir $perm $group\n"
      if $self->{DEBUG};
  }

}

# internal method: used to construct a cwb-encode command line for the specified input files
sub make_encode_cmd {
  my $self = shift;
  my @files = @_;
  my %attr = ();                # check for duplicate attributes

  my $cmd = "'$CWB::Encode' -s"; # build encode command (-xsB flags are always recommended!)
  $cmd .= " -B" unless $self->{UNDEF_SYMBOL} =~ /^\s*$/;  # assume that whitespace-only strings are allowed unless undef symbol is set
  $cmd .= " -x" if $self->{ENTITIES};                     # -x only if we're allowed to decode entities
  $cmd .= " -U '".$self->{UNDEF_SYMBOL}."'";
  $cmd .= " -R '".$self->{REGISTRY}."/".$self->{NAME}."'"; # has been set and checked by prepare_encode()
  $cmd .= " -d '".$self->{DIR}."'";
  $cmd .= " -c ".$self->{CHARSET};                         # from version 2.2.101, cwb-encode can set character set in registry file
  foreach my $file (@files) {   # check that all input files exist
    croak "CWB::Encoder: Input file $file does not exist.\n"
      unless -f $file;
    $cmd .= " -f '$file'";
  }
  $cmd .= " -p -";              # declare all p-attributes explicitly
  foreach my $att (@{$self->{PATT}}) { # declare p-attributes



( run in 1.014 second using v1.01-cache-2.11-cpan-ceb78f64989 )