Alvis-Convert
view release on metacpan or search on metacpan
bin/alvisXMLmerge view on Meta::CPAN
if (!defined($skip) && defined($id) && defined($extra->{$id})) {
$skip = 1;
}
# trim
my $oring_str = $str;
$str =~ s/^\s+//;
$str =~ s/\s+$//;
if (defined($skip)) {
for my $node (@nodes) {
if (!defined($printed{$node})
&& defined($extra->{$id}{$node}))
{
#if ($str =~ /^\s*<\/$node>/) { # node already defined in xml
if ($str eq "<\/$node>") {
my $x = $extra->{$id}{$node};
$x =~ s/\n*<$node>\n*//s;
$x =~ s/\n*<\/$node>\n*//s;
print OUT $x;
$printed{$node} = 1;
}
#elsif ($str =~ /^\s*$config{$node}/) {
elsif ($str eq $config{$node}) {
print OUT $extra->{$id}{$node};
$printed{$node} = 1;
}
}
}
}
print OUT $oring_str;
#if ($str =~ /^\s*<documentRecord id="(.+)">/) {
if ($str =~ /^<documentRecord id="([^"]+)"/) {
$id = $1;
undef $skip;
%printed = ();
print "$n records merged\n" if ($VERBOSE && (++$n % 100 == 0));
}
}
close OUT;
close $IN;
}
################################################################################
# TODO: move this to Alvis::Utils
sub mk_paths
{
my $orig_filename = shift;
my $uorig_filename = $orig_filename;
$uorig_filename =~ s/\.bz2$//;
$uorig_filename =~ s/\.gz$//;
my $extra_filename = $uorig_filename;
my $out_filename = $uorig_filename;
my $orig_dir_p = escape($orig_dir);
my $out_dir_p = escape($out_dir);
my $extra_dir_p = escape($extra_dir) unless (defined($extra_file));
$out_filename =~ s/$orig_dir_p/$out_dir_p/;
$extra_filename =~ s/$orig_dir_p/$extra_dir_p/
unless (defined($extra_file));
$out_filename =~ /(.+)\/(.+?)$/;
my $out_dir = $1;
mkpath($out_dir) unless (-e $out_dir);
return ($extra_filename, $out_filename);
}
# TODO: move this to Alvis::Utils
sub escape
{
my $val = shift;
$val =~ s/(['|"\+?*])/\\$1/g;
return $val;
}
################################################################################
sub read_extra_file
{
my $filename = shift;
my @nodes = @_;
my %extra = ();
my $IN = Alvis::Utils::open_file("$filename");
my ($id, $tag, $xml) = ();
while (defined(my $str = <$IN>)) {
$id = $1 if ($str =~ /^\s*<documentRecord id="([^"]+)"/);
unless (defined $tag) {
for my $node (@nodes) {
$tag = $node
if ($str =~ /.*<$node>.*/); # TODO: optimize reqexp
}
}
if (defined $tag) {
$xml .= $str;
if ($str =~ /.*<\/$tag>.*/) { # TODO: optimize reqexp
$extra{$id}{$tag} = $xml;
undef $xml;
undef $tag;
}
}
}
close $IN;
return %extra;
}
################################################################################
sub read_config
{
my %config = ();
my $config_file = shift;
open(FP, $config_file) or die $!;
while (defined(my $str = <FP>)) {
$str =~ /(\S+?)\s+"\^(.+)"/;
$config{$1} = $2;
}
close FP;
return %config;
}
################################################################################
sub compress
{
my $file = shift;
unless ($file =~ /\.bz2$/) {
( run in 0.732 second using v1.01-cache-2.11-cpan-39bf76dae61 )