Treex-PML
view release on metacpan or search on metacpan
lib/Treex/PML/Backend/CSTS/Csts2fs.pm view on Meta::CPAN
'A arabspec' => [\&to_node_attr,'|','arabspec'],
'A arabclause' => [\&to_node_attr,'|','arabclause'],
'MDt w' => [\&to_composed_node_attr,'_','|','src','wMDt'],
'MDl w' => [\&to_composed_node_attr,'_','|','src','wMDl'],
'MDA w' => [\&to_composed_node_attr,'_','|','src','wMDA'],
'MDA parallel' => [\&to_composed_node_attr,'_','|','src','parallelMD'],
'MDA paren' => [\&to_composed_node_attr,'_','|','src','parenMD'],
'MDA arabfa' => [\&to_composed_node_attr,'_','|','src','arabfaMD'],
'MDA arabspec' => [\&to_composed_node_attr,'_','|','src','arabspecMD'],
'MDA arabclause' => [\&to_composed_node_attr,'_','|','src','arabclauseMD'],
'MDg w' => [\&to_composed_node_attr,'_','|','src','wMDg'],
'wsd s' => [\&to_node_attr,'|','wsds'],
'wsd ewn' => [\&to_node_attr,'|','wsdewn'],
'wsd ili' => [\&to_node_attr,'|','wsdili'],
'wsd iliOffset' => [\&to_node_attr,'|','wsdiliOffset'],
's id' => [\&to_attr,'root','|','ID1'],
'salt id' => [\&to_attr,'root','|','ID1'],
'csts lang' => [\&to_node_attr,'|','cstslang'],
'f case' => [\&to_node_attr,'|','formtype'],
'f id' => [\&to_node_attr,'','AID'],
'd type' => [\&to_node_attr,'|','formtype'],
'd id' => [\&to_node_attr,'|','AID'],
'w kind' => [\&to_next_node_attr,'|','origfkind'],
't w'=> [\&to_node_attr,'|','wt'],
'fadd id' => [\&to_node_attr,'','TID'],
'fadd del' => [sub {
my ($s,$data)=@_;
&to_node_attr($s,uc($data),'|','del');
&to_node_attr($s,'hide','','ARhide');
}],
'MTRl quot' => [\&to_composed_node_attr,'_','|','src','quotMTRl'],
'TRl quot' => [\&assign_quot_dsp],
'coref ref' => [\&to_node_attr,'|','coref'],
'coref type' => [\&to_node_attr,'|','cortype'],
'TRl status' => [sub {
my ($s,$data)=@_;
&to_node_attr($s,'hide','','TR')
if ($data eq 'hidden');
}],
'MTRl status' => [sub {
my ($s,$data)=@_;
&to_composed_node_attr($s,'hide','_','|','src','MTR')
if ($data eq 'hidden');
}],
'TRl origin' => [sub {
my ($s,$data)=@_;
$data=~s/\s+/|/g;
&to_node_attr($s,$data,'','AIDREFS');
}],
'MTRl origin' => [sub {
my ($s,$data)=@_;
$data=~s/\s+/|/g;
&to_composed_node_attr($s,$data,'_','','src','MAIDREFS');
}],
'x name' => [sub {
my ($s,$data)=@_;
$s->{node}->{X_hide}='' if ($data eq 'TNT');
}]
);
my %pcdata = (
'source' => [\&to_node_attr,'','cstssource'],
'mauth' => [sub {
my ($s)=@_;
if (($s->{elements}->[-3]||'') eq 'h') {
to_attr(@_,'following_root','','cstsmarkup');
} else {
to_attr(@_,'following_root','','docmarkup');
}
}],
'mdate' => [sub {
my ($s)=@_;
if (($s->{elements}->[-3]||'') eq 'h') {
to_attr(@_,'following_root','','cstsmarkup');
} else {
to_attr(@_,'following_root','','docmarkup');
}
}],
'mdesc' => [sub {
my ($s)=@_;
if (($s->{elements}->[-3]||'') eq 'h') {
to_attr(@_,'following_root','','cstsmarkup');
} else {
to_attr(@_,'following_root','','docmarkup');
}
}],
'mod' => [\&to_attr,'following_root','','docprolog'],
'txtype' => [\&to_attr,'following_root','','docprolog'],
'genre' => [\&to_attr,'following_root','','docprolog'],
'verse' => [\&to_attr,'following_root','','docprolog'],
'med' => [\&to_attr,'following_root','','docprolog'],
'authsex' => [\&to_attr,'following_root','','docprolog'],
'lang' => [\&to_attr,'following_root','','docprolog'],
'transsex' => [\&to_attr,'following_root','','docprolog'],
'srclang' => [\&to_attr,'following_root','','docprolog'],
'temp' => [\&to_attr,'following_root','','docprolog'],
'firsted' => [\&to_attr,'following_root','','docprolog'],
'authname' => [\&to_attr,'following_root','','docprolog'],
'transname' => [\&to_attr,'following_root','','docprolog'],
'opus' => [\&to_attr,'following_root','','docprolog'],
'id' => [\&to_attr,'following_root','','docprolog'],
'i' => [\&to_node_attr,'','!GAP'],
'iref' => [\&to_node_attr,'','!GAP'],
MDt => [\&to_composed_node_attr,'_','|','src','tagMD'],
MDl => [\&to_composed_node_attr,'_','|','src','lemmaMD'],
MMt => [sub {
my ($s,$data) = @_;
# dirty hack to have the same number of MMl and MMt values
to_composed_node_attr(@_,'_','|','src','tagMM');
my $name;
for my $a (@{$s->{attributes}[-1]}) {
if ($a->[0] eq 'src') {
$name='MM_'.$a->[1];
last;
}
}
my @l = split /\|/,$s->{node}->{'lemma'.$name};
my @t = split /\|/,$s->{node}->{'tag'.$name};
$s->{node}->{'lemma'.$name}.='|'.$l[$#l] if (@l == scalar(@t)-1);
}],
MMl => [\&to_composed_node_attr,'_','|','src','lemmaMM'],
lib/Treex/PML/Backend/CSTS/Csts2fs.pm view on Meta::CPAN
"\n"
} elsif ($case eq '|') { # SDATA bracket (ignoring)
''
} else {
chr(oct($1))
}
}gex;
return $data;
}
sub read {
my ($fh,$fsfile) = @_;
return unless ref($fsfile);
my (%defs,@attlist,$event,@trees,@header);
my (@elements, $next_attributes, @attributes);
$next_attributes=[];
my $state = {
elements => \@elements, # element name stack
attributes => \@attributes, # element attribute-list stack
file => undef, # fixme
event => undef,
root => undef,
following_root => {},
node => undef,
following => {},
trees => [],
nodes => []
};
my $document_is_conforming=0;
# we parse the the output of nsgmls
# and dispatch as needed
while (!eof($fh)) {
my $type = getc($fh);
my $data = <$fh>;
chop($data);
if ($type eq '(') { # start element
push @elements, $data;
push @attributes, $next_attributes;
if (exists($start_tag{$data})) {
my ($cb,@args)=@{ $start_tag{$data} };
&$cb($state,$data,@args);
}
for my $attribute (@$next_attributes) {
if (exists $att{"$data $attribute->[0]"}) {
my ($cb,@args)=@{ $att{"$data $attribute->[0]"} };
&$cb($state,$attribute->[1],@args);
}
}
$next_attributes=[];
} elsif ($type eq ')') { # end element
if (exists($end_tag{$data})) {
my ($cb,@args)=@{ $end_tag{$data} };
&$cb($state,$data,@args);
}
pop @elements;
pop @attributes;
} elsif ($type eq '-') { # character data
my $element = $elements[-1];
if (exists($pcdata{$element})) {
my ($cb,@args)=@{ $pcdata{$element} };
&$cb($state,unescape_data($data),@args);
}
} elsif ($type eq 'A') { # attribute of the next element
my ($name,$value)= $data=~m{^(\S+) (?:IMPLIED$|(?:CDATA|NOTATION|ENTITY|TOKEN|ID) (.*))};
push @$next_attributes,[$name,unescape_data($value)];
} elsif ($type eq 'C') { # document is conforming
$document_is_conforming=1;
}
}
@header=@{$header};
foreach (keys %composed_attrs) {
push @header,"\@P $_";
}
$fsfile->changeFS(Treex::PML::Factory->createFSFormat(\@header));
$fsfile->changeTail("$fs_tail\n");
$fsfile->changeTrees(@{$state->{trees}});
$fsfile->changePatterns(@fs_patterns);
$fsfile->changeHint($fs_hint);
return $document_is_conforming;
}
sub setupTR {
$gov = "govTR";
$header = \@TRheader;
$initial_node_values{TR}='hide';
$initial_root_values{reserve1}='TR_TREE';
$fs_tail='(2,3)';
@fs_patterns=(); # proper patterns added by TrEd's hook
$fs_hint=undef;
}
sub setupAR {
$gov="ordorig";
$header = \@ARheader;
delete $initial_node_values{TR};
delete $initial_root_values{reserve1};
$fs_tail='(2,3)';
@fs_patterns=('${form}', '${afun}');
$fs_hint="tag:\t\${tag}\nlemma:\t\${lemma}";
}
sub setupPADTAR {
setupAR();
$header = \@PADTARheader;
@fs_patterns=('${form}',
'#{custom1}<? join "_", map { "\${$_}" }'.
' grep { $this->{$_}=~/./ && $this->{$_}!~/^no-/ }'.
' qw(afun parallel paren arabfa arabspec arabclause) ?>');
$fs_hint="tag:\t\${tag}\nlemma:\t\${lemma}\ngloss:\t\${x_gloss}\ncommentA:\t\${commentA}";
}
# hackish stuff follows...
# people should stop using CSTS |-/
sub setupSpec {
$gov = $_[0];
if (@_>1) {
$header = [ @TRheader ];
( run in 0.443 second using v1.01-cache-2.11-cpan-524268b4103 )