Treex-PML

 view release on metacpan or  search on metacpan

lib/Treex/PML/Backend/CSTS/Csts2fs.pm  view on Meta::CPAN

           'A arabspec' => [\&to_node_attr,'|','arabspec'],
           'A arabclause' => [\&to_node_attr,'|','arabclause'],
           'MDt w' => [\&to_composed_node_attr,'_','|','src','wMDt'],
           'MDl w' => [\&to_composed_node_attr,'_','|','src','wMDl'],
           'MDA w' => [\&to_composed_node_attr,'_','|','src','wMDA'],
           'MDA parallel' => [\&to_composed_node_attr,'_','|','src','parallelMD'],
           'MDA paren' => [\&to_composed_node_attr,'_','|','src','parenMD'],
           'MDA arabfa' => [\&to_composed_node_attr,'_','|','src','arabfaMD'],
           'MDA arabspec' => [\&to_composed_node_attr,'_','|','src','arabspecMD'],
           'MDA arabclause' => [\&to_composed_node_attr,'_','|','src','arabclauseMD'],
           'MDg w' => [\&to_composed_node_attr,'_','|','src','wMDg'],
           'wsd s' => [\&to_node_attr,'|','wsds'],
           'wsd ewn' => [\&to_node_attr,'|','wsdewn'],
           'wsd ili' => [\&to_node_attr,'|','wsdili'],
           'wsd iliOffset' => [\&to_node_attr,'|','wsdiliOffset'],
           's id' => [\&to_attr,'root','|','ID1'],
           'salt id' => [\&to_attr,'root','|','ID1'],
           'csts lang' => [\&to_node_attr,'|','cstslang'],
           'f case' => [\&to_node_attr,'|','formtype'],
           'f id' => [\&to_node_attr,'','AID'],
           'd type' => [\&to_node_attr,'|','formtype'],
           'd id' => [\&to_node_attr,'|','AID'],
           'w kind' => [\&to_next_node_attr,'|','origfkind'],
           't w'=> [\&to_node_attr,'|','wt'],
           'fadd id' => [\&to_node_attr,'','TID'],
           'fadd del' => [sub {
                            my ($s,$data)=@_;
                            &to_node_attr($s,uc($data),'|','del');
                            &to_node_attr($s,'hide','','ARhide');
                          }],
           'MTRl quot' => [\&to_composed_node_attr,'_','|','src','quotMTRl'],
           'TRl quot' => [\&assign_quot_dsp],
           'coref ref' => [\&to_node_attr,'|','coref'],
           'coref type' => [\&to_node_attr,'|','cortype'],
           'TRl status' => [sub {
                            my ($s,$data)=@_;
                            &to_node_attr($s,'hide','','TR')
                              if ($data eq 'hidden');
                          }],
           'MTRl status' => [sub {
                               my ($s,$data)=@_;
                               &to_composed_node_attr($s,'hide','_','|','src','MTR')
                                 if ($data eq 'hidden');
                             }],
           'TRl origin' => [sub {
                              my ($s,$data)=@_;
                              $data=~s/\s+/|/g;
                              &to_node_attr($s,$data,'','AIDREFS');
                            }],
           'MTRl origin' => [sub {
                               my ($s,$data)=@_;
                               $data=~s/\s+/|/g;
                               &to_composed_node_attr($s,$data,'_','','src','MAIDREFS');
                             }],
           'x name' => [sub {
                          my ($s,$data)=@_;
                          $s->{node}->{X_hide}='' if ($data eq 'TNT');
                        }]
          );

my %pcdata = (
              'source' => [\&to_node_attr,'','cstssource'],
              'mauth' => [sub {
                my ($s)=@_;
                if (($s->{elements}->[-3]||'') eq 'h') {
                  to_attr(@_,'following_root','','cstsmarkup');
                } else {
                  to_attr(@_,'following_root','','docmarkup');
                }
              }],
              'mdate' => [sub {
                my ($s)=@_;
                if (($s->{elements}->[-3]||'') eq 'h') {
                  to_attr(@_,'following_root','','cstsmarkup');
                } else {
                  to_attr(@_,'following_root','','docmarkup');
                }
              }],
              'mdesc' => [sub {
                my ($s)=@_;
                if (($s->{elements}->[-3]||'') eq 'h') {
                  to_attr(@_,'following_root','','cstsmarkup');
                } else {
                  to_attr(@_,'following_root','','docmarkup');
                }
              }],
              'mod' => [\&to_attr,'following_root','','docprolog'],
              'txtype' => [\&to_attr,'following_root','','docprolog'],
              'genre' => [\&to_attr,'following_root','','docprolog'],
              'verse' => [\&to_attr,'following_root','','docprolog'],
              'med' => [\&to_attr,'following_root','','docprolog'],
              'authsex' => [\&to_attr,'following_root','','docprolog'],
              'lang' => [\&to_attr,'following_root','','docprolog'],
              'transsex' => [\&to_attr,'following_root','','docprolog'],
              'srclang' => [\&to_attr,'following_root','','docprolog'],
              'temp' => [\&to_attr,'following_root','','docprolog'],
              'firsted' => [\&to_attr,'following_root','','docprolog'],
              'authname' => [\&to_attr,'following_root','','docprolog'],
              'transname' => [\&to_attr,'following_root','','docprolog'],
              'opus' => [\&to_attr,'following_root','','docprolog'],
              'id' => [\&to_attr,'following_root','','docprolog'],
              'i' => [\&to_node_attr,'','!GAP'],
              'iref' => [\&to_node_attr,'','!GAP'],
              MDt => [\&to_composed_node_attr,'_','|','src','tagMD'],
              MDl => [\&to_composed_node_attr,'_','|','src','lemmaMD'],
              MMt => [sub {
                        my ($s,$data) = @_;
                        # dirty hack to have the same number of MMl and MMt values
                        to_composed_node_attr(@_,'_','|','src','tagMM');
                        my $name;
                        for my $a (@{$s->{attributes}[-1]}) {
                          if ($a->[0] eq 'src') {
                            $name='MM_'.$a->[1];
                            last;
                          }
                        }
                        my @l = split /\|/,$s->{node}->{'lemma'.$name};
                        my @t = split /\|/,$s->{node}->{'tag'.$name};
                        $s->{node}->{'lemma'.$name}.='|'.$l[$#l] if (@l == scalar(@t)-1);
                      }],
              MMl => [\&to_composed_node_attr,'_','|','src','lemmaMM'],

lib/Treex/PML/Backend/CSTS/Csts2fs.pm  view on Meta::CPAN

      "\n"
    } elsif ($case eq '|') { # SDATA bracket (ignoring)
      ''
    } else {
      chr(oct($1))
    }
  }gex;
  return $data;
}

sub read {
  my ($fh,$fsfile) = @_;
  return unless ref($fsfile);

  my (%defs,@attlist,$event,@trees,@header);
  my (@elements, $next_attributes, @attributes);
  $next_attributes=[];
  my $state = {
               elements => \@elements,      # element name stack
               attributes => \@attributes,  # element attribute-list stack
               file => undef, # fixme
               event => undef,
               root => undef,
               following_root => {},
               node => undef,
               following => {},
               trees => [],
               nodes => []
              };

  my $document_is_conforming=0;
  # we parse the the output of nsgmls
  # and dispatch as needed
  while (!eof($fh)) {
    my $type = getc($fh);
    my $data = <$fh>;
    chop($data);
    if ($type eq '(') { # start element
      push @elements, $data;
      push @attributes, $next_attributes;
      if (exists($start_tag{$data})) {
        my ($cb,@args)=@{ $start_tag{$data} };
        &$cb($state,$data,@args);
      }
      for my $attribute (@$next_attributes) {
        if (exists $att{"$data $attribute->[0]"}) {
          my ($cb,@args)=@{ $att{"$data $attribute->[0]"} };
          &$cb($state,$attribute->[1],@args);
        }
      }
      $next_attributes=[];
    } elsif ($type eq ')') { # end element
      if (exists($end_tag{$data})) {
        my ($cb,@args)=@{ $end_tag{$data} };
        &$cb($state,$data,@args);
      }
      pop @elements;
      pop @attributes;
    } elsif ($type eq '-') { # character data
      my $element = $elements[-1];
      if (exists($pcdata{$element})) {
        my ($cb,@args)=@{ $pcdata{$element} };
        &$cb($state,unescape_data($data),@args);
      }
    } elsif ($type eq 'A') { # attribute of the next element
      my ($name,$value)= $data=~m{^(\S+) (?:IMPLIED$|(?:CDATA|NOTATION|ENTITY|TOKEN|ID) (.*))};
      push @$next_attributes,[$name,unescape_data($value)];
    } elsif ($type eq 'C') { # document is conforming
      $document_is_conforming=1;
    }
  }

  @header=@{$header};
  foreach (keys %composed_attrs) {
    push @header,"\@P $_";
  }
  $fsfile->changeFS(Treex::PML::Factory->createFSFormat(\@header));
  $fsfile->changeTail("$fs_tail\n");
  $fsfile->changeTrees(@{$state->{trees}});
  $fsfile->changePatterns(@fs_patterns);
  $fsfile->changeHint($fs_hint);

  return $document_is_conforming;
}

sub setupTR {
  $gov = "govTR";
  $header = \@TRheader;
  $initial_node_values{TR}='hide';
  $initial_root_values{reserve1}='TR_TREE';
  $fs_tail='(2,3)';
  @fs_patterns=();                # proper patterns added by TrEd's hook
  $fs_hint=undef;

}

sub setupAR {
  $gov="ordorig";
  $header = \@ARheader;
  delete $initial_node_values{TR};
  delete $initial_root_values{reserve1};
  $fs_tail='(2,3)';
  @fs_patterns=('${form}', '${afun}');
  $fs_hint="tag:\t\${tag}\nlemma:\t\${lemma}";
}

sub setupPADTAR {
  setupAR();
  $header = \@PADTARheader;
  @fs_patterns=('${form}',
                '#{custom1}<? join "_", map { "\${$_}" }'.
                '   grep { $this->{$_}=~/./ && $this->{$_}!~/^no-/ }'.
                '   qw(afun parallel paren arabfa arabspec arabclause) ?>');
  $fs_hint="tag:\t\${tag}\nlemma:\t\${lemma}\ngloss:\t\${x_gloss}\ncommentA:\t\${commentA}";
}

# hackish stuff follows...
# people should stop using CSTS |-/
sub setupSpec {
  $gov = $_[0];
  if (@_>1) {
    $header = [ @TRheader ];



( run in 0.443 second using v1.01-cache-2.11-cpan-524268b4103 )