XML-TMX

 view release on metacpan or  search on metacpan

lib/XML/TMX/Reader.pm  view on Meta::CPAN


sub for_tu {
    my $self = shift;
    my $conf = { -header => 1 };
    my $i = 0;

    ref($_[0]) eq "HASH" and $conf = {%$conf , %{shift(@_)}};

    my $code = shift;
    die "invalid processor" unless ref($code) eq "CODE";

    local $/;

    my $outputingTMX = 0;
    my $tmx;
    my $data;
    my $gen=0;
    my %h = (
             -type => { tu => 'SEQ', tuv => 'SEQ' },
             tu  => sub {
                 my $tu;
                 for my $va (@$c) {
                     if ($va->[0] eq "-prop") {
                         push @{$tu->{$va->[0]}{$va->[1]}}, $va->[2]
                     } elsif ($va->[0] eq "-note") {
                         push @{$tu->{$va->[0]}}, $va->[1]
                     } else {
                         $tu->{$va->[0]} = $va->[1]
                     }
                 }
                 my ($ans, $v) = $code->($tu, \%v);

                 # Check if the user wants to create a TMX and
                 # forgot to say us
                 if (!$outputingTMX && $ans && ref($ans) eq "HASH") {
                     $outputingTMX = 1;
                     $tmx = XML::TMX::Writer->new();
                     if ($conf->{-header}) {
                         my $header = _compute_header($self->{header}, $conf);
                         $tmx->start_tmx(encoding => $self->{encoding}, %$header);
                     }
                 }
                 # Add the translation unit
                 if ($ans && ref($ans) eq "HASH") {
                     $gen++;
                     %v = %$v if ($v && ref($v) eq "HASH");

                     my %ans = (%v, %$ans);
                     $ans{"-n"}=$i if $conf->{n} ;
                     $tmx->add_tu(-verbatim => $conf->{-verbatim}, %ans);
                 }
             },

             tuv  => sub {
                 my $tuv;
                 for my $v (@$c) {
                     if ($v->[0] eq "-prop") {
                         push @{$tuv->{$v->[0]}{$v->[1]}}, $v->[2]
                     } elsif ($v->[0] eq "-note") {
                         push @{$tuv->{$v->[0]}}, $v->[1]
                     } elsif ($v->[0] eq "-cdata") {
                         $tuv->{-iscdata} = 1;
                         $tuv->{-seg} = $v->[1];
                     } else {
                         $tuv->{-seg} = $v->[0];
                     }
                 }
                 [ $v{lang} || $v{'xml:lang'} || "_" => $tuv ]
             },
             prop => sub { ["-prop", $v{type} || "_", $c] },
             note => sub { ["-note" , $c] },
             seg  => sub {
                 return ($v{iscdata}) ? [ -cdata => $c ] : [ $c ]
             },
             -cdata => sub { 
                father->{'iscdata'} = 1; $c },
             hi   => sub { $self->{ignore_markup}?$c:toxml },
             ph   => sub { $self->{ignore_markup}?$c:toxml },
            );


    $/ = "\n";

    $h{-outputenc} = $h{-inputenc} = $self->{encoding};

    my $resto = "";
    ## Go through the header...
    my $fh;
    open_bom($fh, $self->{filename},_enc2bin($self->{encoding})) ;#  or die "$!";
    #print STDERR "Debug2: defuse.ENC= $! ; enc=" ,_enc2bin($self->{encoding}),"\n";
    while (<$fh>) {
        next if /^\s*$/;
        last if /<body\b/;
    }

    if (m!(.*?)(<body.*?>)(.*)!s) {
        $resto = $3;
    }


    # If we have an output filename, user wants to output a TMX
    $conf->{-output} = $conf->{output} if defined($conf->{output});
    if (defined($conf->{-output})) {
        $outputingTMX = 1;
        $tmx = XML::TMX::Writer->new();
        if ($conf->{-header}) {
            my $header = _compute_header($self->{header}, $conf);
            $tmx->start_tmx(encoding => $self->{encoding},
                            -output  => $conf->{-output},
                            %$header);
        }
    }

    $/ = "</tu>";
    $conf->{-verbose}++ if $conf->{verbose};
    print STDERR "." if $conf->{-verbose};
    while (<$fh>) {
        ($_ = $resto . $_ and $resto = "" ) if $resto;
        last if /<\/body>/;
        $i++;
        print STDERR "\r$i" if $conf->{-verbose} && !($i % 10);
        last if defined $conf->{proc_tu} && $i > $conf->{proc_tu} ;
        last if defined $conf->{gen_tu}  && $gen > $conf->{gen_tu};
#       next if defined $conf->{patt}    && !m/$conf->{patt}/     ;

        if (defined $conf->{patt}){      ## FIXME untested
           if(ref($conf->{patt})){       ## EN=>/cat/ PT=>/gato/
              my $ok = 1;
              my $textli = "";
              for my $li (keys %{$conf->{patt}}){
                 my $patli= $conf->{patt}{$li};
                 if (m!lang=["']$li['"](.*?)</tuv>!is ) { $textli = $1 };
                 $ok &&= ($textli =~ /$patli/)
              }
              next unless $ok
           }



( run in 0.539 second using v1.01-cache-2.11-cpan-39bf76dae61 )