XML-TMX

 view release on metacpan or  search on metacpan

lib/XML/TMX/Reader.pm  view on Meta::CPAN

                 }
             },

             tuv  => sub {
                 my $tuv;
                 for my $v (@$c) {
                     if ($v->[0] eq "-prop") {
                         push @{$tuv->{$v->[0]}{$v->[1]}}, $v->[2]
                     } elsif ($v->[0] eq "-note") {
                         push @{$tuv->{$v->[0]}}, $v->[1]
                     } elsif ($v->[0] eq "-cdata") {
                         $tuv->{-iscdata} = 1;
                         $tuv->{-seg} = $v->[1];
                     } else {
                         $tuv->{-seg} = $v->[0];
                     }
                 }
                 [ $v{lang} || $v{'xml:lang'} || "_" => $tuv ]
             },
             prop => sub { ["-prop", $v{type} || "_", $c] },
             note => sub { ["-note" , $c] },
             seg  => sub {
                 return ($v{iscdata}) ? [ -cdata => $c ] : [ $c ]
             },
             -cdata => sub { 
                father->{'iscdata'} = 1; $c },
             hi   => sub { $self->{ignore_markup}?$c:toxml },
             ph   => sub { $self->{ignore_markup}?$c:toxml },
            );


    $/ = "\n";

    $h{-outputenc} = $h{-inputenc} = $self->{encoding};

    my $resto = "";

lib/XML/TMX/Writer.pm  view on Meta::CPAN



sub add_tu {
    my $self = shift;
    my %tuv = @_;
    my %prop = ();
    my @note = ();
    my %opt;

    my $verbatim = 0;
    my $cdata = 0;

    if (exists($tuv{-raw})) {
        # value already includes <tu> tags, hopefully, at least!
        # so we will not mess with it.
        $self->_write($tuv{-raw});
        return;
    }

    for my $key (qw'id datatype segtype srclang creationid creationdate changedate changeid') {
        if (exists($tuv{$key})) {

lib/XML/TMX/Writer.pm  view on Meta::CPAN

        delete $tuv{"-n"};
    }

    $self->_startTag(0,'tu', %opt)->_nl;

    ### write the prop s <prop type="x-name">problemas 23</prop>
    $self->_write_props(3, \%prop);
    $self->_write_notes(3, \@note);

    for my $lang (sort keys %tuv) {
        my $cdata = 0;
        $self->_startTag(1, 'tuv', 'xml:lang' => $lang);
        if (ref($tuv{$lang}) eq "HASH") {
            $cdata++ if defined($tuv{$lang}{-iscdata});
            delete($tuv{$lang}{-iscdata}) if exists($tuv{$lang}{-iscdata});

            $self->_write_props(2, $tuv{$lang}{-prop}) if exists $tuv{$lang}{-prop};
            $self->_write_notes(2, $tuv{$lang}{-note}) if exists $tuv{$lang}{-note};
            $tuv{$lang} = $tuv{$lang}{-seg} || "";
        }
        $self->_startTag(0, 'seg');
        if ($verbatim) {
            $self->_write($tuv{$lang});
        } elsif ($cdata) {
            $self->_write("<![CDATA[");
            $self->_write($tuv{$lang});
            $self->_write("]]>");
        } else {
            $self->_characters($tuv{$lang});
        }
        $self->_endTag('seg');
        $self->_endTag('tuv')->_nl;
    }
    $self->_endTag('tu')->_nl->_nl;

scripts/tmx-POStagger  view on Meta::CPAN

              my @tokens = map { Lingua::FreeLing3::Word->new($_) } split /\s+/, $txt;
              my $sentences = splitter($ln)->split(\@tokens);
              $sentences = morph($ln)->analyze($sentences);
              $sentences = hmm($ln)->tag($sentences);
              for my $stc (@$sentences) {
                  $seg .= "<s>\n" if $s;
                  $seg .= $compact ? _dump_compact($stc->words)
                                   : _dump_words($stc->words);
                  $seg .= "</s>\n" if $s;
              }
              $tu->{$lang}{-iscdata} = $compact ? 0 : 1;
              $seg .= "]]>" unless $compact;
              $tu->{$lang}{-seg} = $seg;
          }
      }
      return $tu;
   });

$reader = XML::TMX::Reader->new($tmpName);

$reader->for_tu({



( run in 0.263 second using v1.01-cache-2.11-cpan-ec4f86ec37b )