XML-TMX
view release on metacpan or search on metacpan
lib/XML/TMX/Reader.pm view on Meta::CPAN
sub for_tu {
my $self = shift;
my $conf = { -header => 1 };
my $i = 0;
ref($_[0]) eq "HASH" and $conf = {%$conf , %{shift(@_)}};
my $code = shift;
die "invalid processor" unless ref($code) eq "CODE";
local $/;
my $outputingTMX = 0;
my $tmx;
my $data;
my $gen=0;
my %h = (
-type => { tu => 'SEQ', tuv => 'SEQ' },
tu => sub {
my $tu;
for my $va (@$c) {
if ($va->[0] eq "-prop") {
push @{$tu->{$va->[0]}{$va->[1]}}, $va->[2]
} elsif ($va->[0] eq "-note") {
push @{$tu->{$va->[0]}}, $va->[1]
} else {
$tu->{$va->[0]} = $va->[1]
}
}
my ($ans, $v) = $code->($tu, \%v);
# Check if the user wants to create a TMX and
# forgot to say us
if (!$outputingTMX && $ans && ref($ans) eq "HASH") {
$outputingTMX = 1;
$tmx = XML::TMX::Writer->new();
if ($conf->{-header}) {
my $header = _compute_header($self->{header}, $conf);
$tmx->start_tmx(encoding => $self->{encoding}, %$header);
}
}
# Add the translation unit
if ($ans && ref($ans) eq "HASH") {
$gen++;
%v = %$v if ($v && ref($v) eq "HASH");
my %ans = (%v, %$ans);
$ans{"-n"}=$i if $conf->{n} ;
$tmx->add_tu(-verbatim => $conf->{-verbatim}, %ans);
}
},
tuv => sub {
my $tuv;
for my $v (@$c) {
if ($v->[0] eq "-prop") {
push @{$tuv->{$v->[0]}{$v->[1]}}, $v->[2]
} elsif ($v->[0] eq "-note") {
push @{$tuv->{$v->[0]}}, $v->[1]
} elsif ($v->[0] eq "-cdata") {
$tuv->{-iscdata} = 1;
$tuv->{-seg} = $v->[1];
} else {
$tuv->{-seg} = $v->[0];
}
}
[ $v{lang} || $v{'xml:lang'} || "_" => $tuv ]
},
prop => sub { ["-prop", $v{type} || "_", $c] },
note => sub { ["-note" , $c] },
seg => sub {
return ($v{iscdata}) ? [ -cdata => $c ] : [ $c ]
},
-cdata => sub {
father->{'iscdata'} = 1; $c },
hi => sub { $self->{ignore_markup}?$c:toxml },
ph => sub { $self->{ignore_markup}?$c:toxml },
);
$/ = "\n";
$h{-outputenc} = $h{-inputenc} = $self->{encoding};
my $resto = "";
## Go through the header...
my $fh;
open_bom($fh, $self->{filename},_enc2bin($self->{encoding})) ;# or die "$!";
#print STDERR "Debug2: defuse.ENC= $! ; enc=" ,_enc2bin($self->{encoding}),"\n";
while (<$fh>) {
next if /^\s*$/;
last if /<body\b/;
}
if (m!(.*?)(<body.*?>)(.*)!s) {
$resto = $3;
}
# If we have an output filename, user wants to output a TMX
$conf->{-output} = $conf->{output} if defined($conf->{output});
if (defined($conf->{-output})) {
$outputingTMX = 1;
$tmx = XML::TMX::Writer->new();
if ($conf->{-header}) {
my $header = _compute_header($self->{header}, $conf);
$tmx->start_tmx(encoding => $self->{encoding},
-output => $conf->{-output},
%$header);
}
}
$/ = "</tu>";
$conf->{-verbose}++ if $conf->{verbose};
print STDERR "." if $conf->{-verbose};
while (<$fh>) {
($_ = $resto . $_ and $resto = "" ) if $resto;
last if /<\/body>/;
$i++;
print STDERR "\r$i" if $conf->{-verbose} && !($i % 10);
last if defined $conf->{proc_tu} && $i > $conf->{proc_tu} ;
last if defined $conf->{gen_tu} && $gen > $conf->{gen_tu};
# next if defined $conf->{patt} && !m/$conf->{patt}/ ;
if (defined $conf->{patt}){ ## FIXME untested
if(ref($conf->{patt})){ ## EN=>/cat/ PT=>/gato/
my $ok = 1;
my $textli = "";
for my $li (keys %{$conf->{patt}}){
my $patli= $conf->{patt}{$li};
if (m!lang=["']$li['"](.*?)</tuv>!is ) { $textli = $1 };
$ok &&= ($textli =~ /$patli/)
}
next unless $ok
}
( run in 0.539 second using v1.01-cache-2.11-cpan-39bf76dae61 )