XML-TMX
view release on metacpan or search on metacpan
lib/XML/TMX/Reader.pm view on Meta::CPAN
}
},
tuv => sub {
my $tuv;
for my $v (@$c) {
if ($v->[0] eq "-prop") {
push @{$tuv->{$v->[0]}{$v->[1]}}, $v->[2]
} elsif ($v->[0] eq "-note") {
push @{$tuv->{$v->[0]}}, $v->[1]
} elsif ($v->[0] eq "-cdata") {
$tuv->{-iscdata} = 1;
$tuv->{-seg} = $v->[1];
} else {
$tuv->{-seg} = $v->[0];
}
}
[ $v{lang} || $v{'xml:lang'} || "_" => $tuv ]
},
prop => sub { ["-prop", $v{type} || "_", $c] },
note => sub { ["-note" , $c] },
seg => sub {
return ($v{iscdata}) ? [ -cdata => $c ] : [ $c ]
},
-cdata => sub {
father->{'iscdata'} = 1; $c },
hi => sub { $self->{ignore_markup}?$c:toxml },
ph => sub { $self->{ignore_markup}?$c:toxml },
);
$/ = "\n";
$h{-outputenc} = $h{-inputenc} = $self->{encoding};
my $resto = "";
lib/XML/TMX/Writer.pm view on Meta::CPAN
sub add_tu {
my $self = shift;
my %tuv = @_;
my %prop = ();
my @note = ();
my %opt;
my $verbatim = 0;
my $cdata = 0;
if (exists($tuv{-raw})) {
# value already includes <tu> tags, hopefully, at least!
# so we will not mess with it.
$self->_write($tuv{-raw});
return;
}
for my $key (qw'id datatype segtype srclang creationid creationdate changedate changeid') {
if (exists($tuv{$key})) {
lib/XML/TMX/Writer.pm view on Meta::CPAN
delete $tuv{"-n"};
}
$self->_startTag(0,'tu', %opt)->_nl;
### write the prop s <prop type="x-name">problemas 23</prop>
$self->_write_props(3, \%prop);
$self->_write_notes(3, \@note);
for my $lang (sort keys %tuv) {
my $cdata = 0;
$self->_startTag(1, 'tuv', 'xml:lang' => $lang);
if (ref($tuv{$lang}) eq "HASH") {
$cdata++ if defined($tuv{$lang}{-iscdata});
delete($tuv{$lang}{-iscdata}) if exists($tuv{$lang}{-iscdata});
$self->_write_props(2, $tuv{$lang}{-prop}) if exists $tuv{$lang}{-prop};
$self->_write_notes(2, $tuv{$lang}{-note}) if exists $tuv{$lang}{-note};
$tuv{$lang} = $tuv{$lang}{-seg} || "";
}
$self->_startTag(0, 'seg');
if ($verbatim) {
$self->_write($tuv{$lang});
} elsif ($cdata) {
$self->_write("<![CDATA[");
$self->_write($tuv{$lang});
$self->_write("]]>");
} else {
$self->_characters($tuv{$lang});
}
$self->_endTag('seg');
$self->_endTag('tuv')->_nl;
}
$self->_endTag('tu')->_nl->_nl;
scripts/tmx-POStagger view on Meta::CPAN
my @tokens = map { Lingua::FreeLing3::Word->new($_) } split /\s+/, $txt;
my $sentences = splitter($ln)->split(\@tokens);
$sentences = morph($ln)->analyze($sentences);
$sentences = hmm($ln)->tag($sentences);
for my $stc (@$sentences) {
$seg .= "<s>\n" if $s;
$seg .= $compact ? _dump_compact($stc->words)
: _dump_words($stc->words);
$seg .= "</s>\n" if $s;
}
$tu->{$lang}{-iscdata} = $compact ? 0 : 1;
$seg .= "]]>" unless $compact;
$tu->{$lang}{-seg} = $seg;
}
}
return $tu;
});
$reader = XML::TMX::Reader->new($tmpName);
$reader->for_tu({
( run in 0.528 second using v1.01-cache-2.11-cpan-454fe037f31 )