AnyData

 view release on metacpan or  search on metacpan

lib/AnyData/Format/XML.pm  view on Meta::CPAN

sub DESTROY {
    return;
    print "XML DESTROYED";
    my $self = shift;
    if ( $self->{storage}->{fh}
      && $self->{storage}->{open_mode} ne 'r'
     ){
        $self->export( $self->{storage} );
    }
    #undef $self->{twig};
    #undef $self->{storage}->{fh};
}

sub read_fields {
    my $self = shift;
    my $c = $self->{current_element};
    return undef unless defined $c;
    $c = $self->{current_element} = $c->next_elt($c->gi)
         if $c->att('record_tag__');
    $self->{prev_element} = $self->{current_element};
    $self->{current_element} = $c->next_elt($c->gi) if $c;
    return $self->process_element( $self->{prev_element} );
}

sub process_element {
    my $self = shift;
    my $element   = shift;
    my @col_names  = @{ $self->{col_structure}->{col_names} };
    my @row;
    my $parent = $element->parent;
    my $values = { $element->path => $element->text };
    my $par_ats = {};
       $par_ats = $parent->atts if $parent;
    my $elt_ats = $element->atts || {};
    while( my($att_key,$att_val) = each %$par_ats) {
        $values->{$parent->path.'/'.$att_key} = $att_val;
    }
    while( my($att_key,$att_val) = each %$elt_ats) {
        $values->{$element->path.'/'.$att_key} = $att_val;
    }
    for my $kid($element->children) {
        if ( defined $values->{$kid->path} ) {
 	    if (!ref $values->{$kid->path}) {
               $values->{$kid->path} = [ $values->{$kid->path} ] ;
	    }
            push ( @{ $values->{$kid->path} }, $kid->text );
        } 
        else {
	  $values->{$kid->path} = $kid->text;
	}
    }
    for my $col(@col_names) {
        if (ref $values->{$col}) {
           @row = (@row,@{$values->{$col}});
        }
        else {
           push @row, $values->{$col};
	}

    }
    # use Data::Dumper; print Dumper $values, Dumper \@row; exit;
    return  @row;
}


sub seek_first_record {
    my $self = shift;
    return unless $self->{twig} and $self->{twig}->root;
    $self->{current_element} = $self->{record_tag};
}
sub push_names {
    my $self      = shift;
    my $col_names = shift || $self->{col_names};
    #my @c= caller 1; die $c[3]."!!!";
    my $str = "<table>\n  <row record_tag__='1'>\n";
    #print "CREATING";
    for (@$col_names) {
        $str .= "    <$_>dummy__</$_>\n";
    }
    $str .= "  </row>\n</table>\n";
    $str = $self->{template} if $self->{template};
    if ( $self->{dtd} ) {
       $str = $self->{dtd} if $self->{dtd};
       my $root = $str;
       $root =~ s/.*<!DOCTYPE\s+(\S+)\s+.*/$1/ms;
       $str .= "\n\n<$root></$root>";
       #die $str;
    }
    $self->get_data( $str );
    return $self->{col_names};
}

sub import { 
    my $self = shift; 
    my $data = shift; 
    my $storage = shift; 
    $self->init_parser($storage,$data); 
    return $self->get_data($data,$storage->{col_names});
}

####
# GET DATA FROM STRING
###
sub init_parser {
    my $self    = shift;
    my $storage = shift;
    my $fh_or_str = shift;
    return if 'co' =~ /$storage->{open_mode}/;
#print "   INIT ...";
#print "HAS RECS\n" if $storage->{recs};
#print "HAS DATA\n" if ref $storage->{file} eq 'HASH';
    $fh_or_str ||= $storage->{fh} if $storage->{fh};
    $fh_or_str  ||= $storage->{file}->{data} if ref $storage->{file} eq 'HASH';
    $fh_or_str  ||= $storage->{recs};
#    $fh_or_str ||= join('',@$fh_or_str) if ref $fh_or_str eq 'ARRAY';
#print $fh_or_str; exit;
###z    $self->create_new_twig( $self->{col_names} );
    my $rv = $self->get_data( $fh_or_str,$self->{col_names} );
    return undef unless $rv;
    $self->{current_element} = $self->{twig}->root;
    $storage->{col_names} = $self->{col_names};

lib/AnyData/Format/XML.pm  view on Meta::CPAN

#
    #if ($has_record_tag) { $twig->print; exit; } 
    return($record_tag,$col_names,$atts) if $has_record_tag;

    my $cols;
    @$cols = map {$_} @$col_names;
    my $elt= new XML::Twig::Elt($record_tag->gi);
    for my $a(keys %$atts ) {
        $a =~ s".*/([^/]+)$"$1";
        next if $a eq 'record_tag__';
        next unless $record_tag->att($a);
        $elt->set_att($a,'');
    }
    for my $c(@$cols ) {
        next if $atts->{$c};
        next if $c =~ m"/#PCDATA";
        $c =~ s"/#PCDATA"";
        $c =~ s".*/([^/]+)$"$1";
        my $e= new XML::Twig::Elt($c);
        $e->paste('last_child',$elt);
    }
    my $par;
    if ($record_tag->parent and $record_tag->parent->parent) {
        $par= new XML::Twig::Elt($record_tag->parent->gi);
        for my $a(keys %$atts ) {
            $a =~ s".*/([^/]+)$"$1";
            next unless $record_tag->parent->att($a);
            $par->set_att($a,'');
        }
###z# print 3;
        $elt->set_att('record_tag__','true');
        $elt->paste('first_child',$par);
        $par->paste('before',$record_tag->parent);
#        $record_tag = $self->{record_tag} = $record_tag->parent->prev_sibling->first_child;
        $record_tag ||= $self->{record_tag} = $record_tag->parent->prev_sibling->first_child;
    }
    else {
        for my $a(keys %$atts ) {
            $a =~ s".*/([^/]+)$"$1";
            next unless $record_tag->att($a);
            $elt->set_att($a,'');
        }
###z# print 4;
        $record_tag ||= $self->{record_tag} = $record_tag->prev_sibling;
#        $record_tag = $self->{record_tag} = $record_tag->prev_sibling;
    }
    $record_tag ||= $twig->root->first_child;
    my $old = $record_tag->next_elt($record_tag->gi);
#    $old->delete if $self->{create};
#    $old->set_att('frump','foo') if $old;
    $old->del_att('record_tag__') if $old;
#$twig->print;
    #print "\n"; 
    #$old->print if $old;
    ##print "\n"; 
#    my $par = $self->create_record;
#    $self->{blank_element} = $par;
    #printf "\n%s\n   %s\n", $elt->path, "@$col_names";
    @$col_names = map {s"/#PCDATA""; $_}  @$col_names;
#$twig->print;  print "\n\n";
#use Data::Dumper; print Dumper $record_tag->gi,$col_names,$atts;
    return $record_tag,$col_names,$atts;
}

sub check_twig_options {
    my $flags = shift;
    my $new_flags;
    my %twig_opt = %XML::Twig::valid_option;
    return $flags unless scalar (keys %twig_opt);
    while (my($k,$v) = each %$flags) {
        $new_flags->{$k} = $v if $twig_opt{$k};
    }
    return $new_flags;
}

sub get_structure_from_map {
    my $self = shift;
    my $twig = shift;
    my $col_map = shift;
    my($amap,$map,$multi,$col_names,$pretty_cols,$col2tag);
    for my $col(@$col_map) {
        my($tag_name,$col_name) = ($col,$col);
        ($tag_name,$col_name) = each %$col if ref $col eq 'HASH';
        my($tname,$tparent) = ($tag_name,$tag_name);
        if ($tname =~ m!(.*)/([^/]*)$! ) {
            $tparent = $1;
            $tname   = $2;
            $tparent =~ s!.*/([^/]*)$!$1!;
	}
        my $tag  = $twig->first_elt($tname);
        $tag_name=$tag->path if $tag;
        if (!$tag) {
            my $new_tag  = $twig->first_elt($tparent);
            # die "No such element '$tname'!" unless $tag;
            if (!$new_tag) {
                $tag_name = $tname;
	    }
            else {
                $tag_name=$new_tag->path . '/' . $tname;
            }
            $amap->{$tag_name}++;
	}
        if (ref $col_name eq 'ARRAY') {
	  for my $col2(@$col_name) {
              $col2tag->{$col2} = $tag_name;
              $multi->{$tag_name}++;
              push @$pretty_cols, $col2;
	  }
	}
        push @$col_names, $tag_name;
        push @$pretty_cols, $col_name unless ref $col_name eq 'ARRAY';
        $map->{$tag_name} = $col_name;
    }
    my $record_tag;
    my $record_tag_path = '';
    for my $col(@$col_names) {
        my($rt) = $col =~ m!(.*)/[^/]*$!;
        next unless $rt;
        $record_tag_path = $rt if length $rt > length $record_tag_path;
    }
    my @children = $twig->root->descendants;
    for my $e(@children) {
        next unless $e->path eq $record_tag_path;
        $record_tag = $e;
        last;
    }
    if (!$record_tag) {
       $record_tag = $twig->root->first_child;
       my $p = $record_tag->path;
       @$col_names = map {$p.'/'.$_}@$col_names;
#       use Data::Dumper; print Dumper $amap;
       my $newmap;
       $newmap->{ $p.'/'.$_ }++ for keys %{$amap};
       $amap = $newmap;
       $newmap = {};
       $newmap->{ $p.'/'.$_ } = $map->{$_} for keys %{$map};
       $map = $newmap;
    }
##
#=pod
#paste into parent record_tag__
#    my $rt_atts = $record_tag->atts;
#    if (!$rt_atts->{record_tag__}) {
#       my $new_rt = $record_tag->copy;
#       $new_rt->set_att('record_tag__','1');
#       $new_rt->set_att('xstruct__','1');
#       $new_rt->paste('first_child',$record_tag->parent);
#       $record_tag = $new_rt;
#    }
#=cut
    my $col_structure = {
        amap => $amap,
        map  => $map,
        multi => $multi,
        col_names => $col_names,
        pretty_cols => $pretty_cols,
        col2tag     => $col2tag,
    };
# print $record_tag->path, "\n";
# use Data::Dumper; print Dumper $col_structure; 
# exit;
    return $record_tag, $col_structure;
}

sub get_data {
    my $self = shift;
    my $fh_or_str  = shift;
    my $url = $self->{url};
    if ( $url ) {
      $fh_or_str = AnyData::Storage::RAM::get_remote_data({},$url);
    }
    return if( ! defined( $fh_or_str ) );
    my $col_names = shift || [];
    $col_names = []; #### IGNORE USER COLUMN NAMES FOR NOW
    my $flags;
    while (my($k,$v)=each %$self) {
        $flags->{$k}=$v;
    }
    my $root_tag            = $flags->{root_tag};
    my $depth_limit         = $flags->{depth_limit};
    my $supplied_col_names  = $flags->{col_names};
    my $have_col_names      = 1 if $supplied_col_names;
    my $pretty_col_names    = $supplied_col_names;
    my $col_structure = $self->{col_structure};
    undef $col_structure unless $col_structure->{col_names}
                            and scalar @{$col_structure->{col_names}};
    my %multi;
    my %map;
    my %amap;
    $flags->{LoadDTD} = 1;
    $flags->{TwigRoots} = {$root_tag=>'1'} if $root_tag;
#
# DEFAULTS : KeepEncoding OFF to mirror XML::Twig
#            ProtocolEncoding 'ISO-8859-1'
#
#    $flags->{KeepEncoding}     ||= 1;
#
    $flags->{ProtocolEncoding} ||= 'ISO-8859-1';
#use Data::Dumper; die Dumper $flags;
    $flags = check_twig_options($flags);

    my $twig= new XML::Twig(%{$flags});
    my $success = $twig->safe_parse($fh_or_str);
    $self->{errstr} = $@ unless $success;
    die $self->{errstr} if $self->{errstr};
    return undef unless $success;
    $self->{dtd} = $twig->dtd;
    my $root = $twig->first_elt($root_tag) || $twig->root;
    my $name = $root->path;
    my $element= $twig->root;
    my($record_tag,$colZ,$atts);

    my $col_map = $self->{col_map};
    if ($col_map) {
      ($record_tag,$col_structure) =
          $self->get_structure_from_map($twig,$col_map);
    } 
    else {
      ($record_tag,$colZ,$atts) = $self->get_structure($twig);
      if (!$col_structure) {
          $have_col_names++;
          $col_structure = build_column_names($colZ,$root,$root_tag,$colZ);
          $col_structure->{amap} = $atts;
        }
    }

    # CREATE A DUMMY RECORD TAG
    #
    my $rt_atts = $record_tag->atts;
    if (!$rt_atts->{record_tag__}) {
       my $new_rt = $record_tag->copy;
       $new_rt->set_att('record_tag__','1');
       $new_rt->set_att('xstruct__','1');
       $new_rt->paste('first_child',$record_tag->parent);
       $record_tag = $new_rt;
    }

    # $twig->print;
    # use Data::Dumper; print Dumper $col_structure;
  #  print $self->{record_tag}->path;

    $self->{record_tag}    = $record_tag;
    $self->{twig}          = $twig;
    $self->{col_names}     = $col_structure->{pretty_cols};
    $self->{col_structure} = $col_structure;
    return 1;
}


###############################################################
# MAP A ROW HASH ONTO A COLUMN NAMES ARRAY
###############################################################
sub rowhash_to_array {
    my $row           = shift;
    my $col_structure = shift;
#die Dumper $col_structure;
    my $col_names        = $col_structure->{col_names};
    my %map              = %{ $col_structure->{map} } if $col_structure->{map};
    my %multi            = %{ $col_structure->{multi} } if $col_structure->{multi};
    my $pretty_col_names = $col_structure->{pretty_cols} if $col_structure->{pretty_cols};
    my @newvals;
    my %visited;
    for my $coln(@$col_names) {
 	my $tag = $map{$coln};
        #next unless $tag;
        if (!$multi{$tag}) {
            $row->{$tag} ? push @newvals, $row->{$tag} : push @newvals, undef;
	}
        else {
            if (!$visited{$tag}) {
                $visited{$tag}++;
                my @multi_col = ref $row->{$tag} eq 'ARRAY'
 	                  ? @{$row->{$tag}}
                          : ($row->{$tag});
                push @multi_col,undef unless scalar @multi_col;
	        my $dif = ($multi{$tag}) - (scalar @multi_col);
                push @multi_col,undef for 0 .. $dif;
                push @newvals,$_ for @multi_col;
	    }
        }
    }
    return( \@newvals );
}
###############################################################
# BUILD A COLUMN NAMES LIST IF NONE HAS BEEN BUILT YET
###############################################################
sub build_column_names {
    my $tags = shift;
    my $root = shift;
    my $root_tag = shift;
    my $col_names = shift || [];
    my %multi;
    my %map;
    for my $col(@$col_names) {
        $multi{$col}++;
    }
    my %num;
    my $newcolz;
    for my $col(@$col_names) {



( run in 0.602 second using v1.01-cache-2.11-cpan-39bf76dae61 )