Alvis-Convert

 view release on metacpan or  search on metacpan

bin/alvisXMLmerge  view on Meta::CPAN


        if (!defined($skip) && defined($id) && defined($extra->{$id})) {
            $skip = 1;
        }

        # trim
        my $oring_str = $str;
        $str =~ s/^\s+//;
        $str =~ s/\s+$//;

        if (defined($skip)) {
            for my $node (@nodes) {
                if (!defined($printed{$node})
                    && defined($extra->{$id}{$node}))
                {

                   #if ($str =~ /^\s*<\/$node>/) { # node already defined in xml
                    if ($str eq "<\/$node>") {
                        my $x = $extra->{$id}{$node};
                        $x =~ s/\n*<$node>\n*//s;
                        $x =~ s/\n*<\/$node>\n*//s;
                        print OUT $x;
                        $printed{$node} = 1;
                    }

                    #elsif ($str =~ /^\s*$config{$node}/) {
                    elsif ($str eq $config{$node}) {
                        print OUT $extra->{$id}{$node};
                        $printed{$node} = 1;
                    }
                }
            }
        }

        print OUT $oring_str;

        #if ($str =~ /^\s*<documentRecord id="(.+)">/) {
        if ($str =~ /^<documentRecord id="([^"]+)"/) {
            $id = $1;
            undef $skip;
            %printed = ();
            print "$n records merged\n" if ($VERBOSE && (++$n % 100 == 0));
        }
    }

    close OUT;
    close $IN;
}

################################################################################
# TODO: move this to Alvis::Utils
sub mk_paths
{
    my $orig_filename  = shift;
    my $uorig_filename = $orig_filename;
    $uorig_filename =~ s/\.bz2$//;
    $uorig_filename =~ s/\.gz$//;
    my $extra_filename = $uorig_filename;
    my $out_filename   = $uorig_filename;

    my $orig_dir_p  = escape($orig_dir);
    my $out_dir_p   = escape($out_dir);
    my $extra_dir_p = escape($extra_dir) unless (defined($extra_file));
    $out_filename   =~ s/$orig_dir_p/$out_dir_p/;
    $extra_filename =~ s/$orig_dir_p/$extra_dir_p/
      unless (defined($extra_file));

    $out_filename =~ /(.+)\/(.+?)$/;
    my $out_dir = $1;
    mkpath($out_dir) unless (-e $out_dir);

    return ($extra_filename, $out_filename);
}

# TODO: move this to Alvis::Utils
sub escape
{
    my $val = shift;
    $val =~ s/(['|"\+?*])/\\$1/g;
    return $val;
}

################################################################################
sub read_extra_file
{
    my $filename = shift;
    my @nodes    = @_;
    my %extra    = ();

    my $IN = Alvis::Utils::open_file("$filename");
    my ($id, $tag, $xml) = ();
    while (defined(my $str = <$IN>)) {
        $id = $1 if ($str =~ /^\s*<documentRecord id="([^"]+)"/);

        unless (defined $tag) {
            for my $node (@nodes) {
                $tag = $node
                  if ($str =~ /.*<$node>.*/);    # TODO: optimize reqexp
            }
        }

        if (defined $tag) {
            $xml .= $str;
            if ($str =~ /.*<\/$tag>.*/) {        # TODO: optimize reqexp
                $extra{$id}{$tag} = $xml;
                undef $xml;
                undef $tag;
            }
        }
    }
    close $IN;

    return %extra;
}

################################################################################
sub read_config
{
    my %config      = ();
    my $config_file = shift;
    open(FP, $config_file) or die $!;

    while (defined(my $str = <FP>)) {
        $str =~ /(\S+?)\s+"\^(.+)"/;
        $config{$1} = $2;
    }
    close FP;

    return %config;
}

################################################################################
sub compress
{
    my $file = shift;
    unless ($file =~ /\.bz2$/) {



( run in 0.732 second using v1.01-cache-2.11-cpan-39bf76dae61 )