AnyData
view release on metacpan or search on metacpan
lib/AnyData/Format/XML.pm view on Meta::CPAN
sub DESTROY {
return;
print "XML DESTROYED";
my $self = shift;
if ( $self->{storage}->{fh}
&& $self->{storage}->{open_mode} ne 'r'
){
$self->export( $self->{storage} );
}
#undef $self->{twig};
#undef $self->{storage}->{fh};
}
sub read_fields {
my $self = shift;
my $c = $self->{current_element};
return undef unless defined $c;
$c = $self->{current_element} = $c->next_elt($c->gi)
if $c->att('record_tag__');
$self->{prev_element} = $self->{current_element};
$self->{current_element} = $c->next_elt($c->gi) if $c;
return $self->process_element( $self->{prev_element} );
}
sub process_element {
my $self = shift;
my $element = shift;
my @col_names = @{ $self->{col_structure}->{col_names} };
my @row;
my $parent = $element->parent;
my $values = { $element->path => $element->text };
my $par_ats = {};
$par_ats = $parent->atts if $parent;
my $elt_ats = $element->atts || {};
while( my($att_key,$att_val) = each %$par_ats) {
$values->{$parent->path.'/'.$att_key} = $att_val;
}
while( my($att_key,$att_val) = each %$elt_ats) {
$values->{$element->path.'/'.$att_key} = $att_val;
}
for my $kid($element->children) {
if ( defined $values->{$kid->path} ) {
if (!ref $values->{$kid->path}) {
$values->{$kid->path} = [ $values->{$kid->path} ] ;
}
push ( @{ $values->{$kid->path} }, $kid->text );
}
else {
$values->{$kid->path} = $kid->text;
}
}
for my $col(@col_names) {
if (ref $values->{$col}) {
@row = (@row,@{$values->{$col}});
}
else {
push @row, $values->{$col};
}
}
# use Data::Dumper; print Dumper $values, Dumper \@row; exit;
return @row;
}
sub seek_first_record {
my $self = shift;
return unless $self->{twig} and $self->{twig}->root;
$self->{current_element} = $self->{record_tag};
}
sub push_names {
my $self = shift;
my $col_names = shift || $self->{col_names};
#my @c= caller 1; die $c[3]."!!!";
my $str = "<table>\n <row record_tag__='1'>\n";
#print "CREATING";
for (@$col_names) {
$str .= " <$_>dummy__</$_>\n";
}
$str .= " </row>\n</table>\n";
$str = $self->{template} if $self->{template};
if ( $self->{dtd} ) {
$str = $self->{dtd} if $self->{dtd};
my $root = $str;
$root =~ s/.*<!DOCTYPE\s+(\S+)\s+.*/$1/ms;
$str .= "\n\n<$root></$root>";
#die $str;
}
$self->get_data( $str );
return $self->{col_names};
}
sub import {
my $self = shift;
my $data = shift;
my $storage = shift;
$self->init_parser($storage,$data);
return $self->get_data($data,$storage->{col_names});
}
####
# GET DATA FROM STRING
###
sub init_parser {
my $self = shift;
my $storage = shift;
my $fh_or_str = shift;
return if 'co' =~ /$storage->{open_mode}/;
#print " INIT ...";
#print "HAS RECS\n" if $storage->{recs};
#print "HAS DATA\n" if ref $storage->{file} eq 'HASH';
$fh_or_str ||= $storage->{fh} if $storage->{fh};
$fh_or_str ||= $storage->{file}->{data} if ref $storage->{file} eq 'HASH';
$fh_or_str ||= $storage->{recs};
# $fh_or_str ||= join('',@$fh_or_str) if ref $fh_or_str eq 'ARRAY';
#print $fh_or_str; exit;
###z $self->create_new_twig( $self->{col_names} );
my $rv = $self->get_data( $fh_or_str,$self->{col_names} );
return undef unless $rv;
$self->{current_element} = $self->{twig}->root;
$storage->{col_names} = $self->{col_names};
lib/AnyData/Format/XML.pm view on Meta::CPAN
#
#if ($has_record_tag) { $twig->print; exit; }
return($record_tag,$col_names,$atts) if $has_record_tag;
my $cols;
@$cols = map {$_} @$col_names;
my $elt= new XML::Twig::Elt($record_tag->gi);
for my $a(keys %$atts ) {
$a =~ s".*/([^/]+)$"$1";
next if $a eq 'record_tag__';
next unless $record_tag->att($a);
$elt->set_att($a,'');
}
for my $c(@$cols ) {
next if $atts->{$c};
next if $c =~ m"/#PCDATA";
$c =~ s"/#PCDATA"";
$c =~ s".*/([^/]+)$"$1";
my $e= new XML::Twig::Elt($c);
$e->paste('last_child',$elt);
}
my $par;
if ($record_tag->parent and $record_tag->parent->parent) {
$par= new XML::Twig::Elt($record_tag->parent->gi);
for my $a(keys %$atts ) {
$a =~ s".*/([^/]+)$"$1";
next unless $record_tag->parent->att($a);
$par->set_att($a,'');
}
###z# print 3;
$elt->set_att('record_tag__','true');
$elt->paste('first_child',$par);
$par->paste('before',$record_tag->parent);
# $record_tag = $self->{record_tag} = $record_tag->parent->prev_sibling->first_child;
$record_tag ||= $self->{record_tag} = $record_tag->parent->prev_sibling->first_child;
}
else {
for my $a(keys %$atts ) {
$a =~ s".*/([^/]+)$"$1";
next unless $record_tag->att($a);
$elt->set_att($a,'');
}
###z# print 4;
$record_tag ||= $self->{record_tag} = $record_tag->prev_sibling;
# $record_tag = $self->{record_tag} = $record_tag->prev_sibling;
}
$record_tag ||= $twig->root->first_child;
my $old = $record_tag->next_elt($record_tag->gi);
# $old->delete if $self->{create};
# $old->set_att('frump','foo') if $old;
$old->del_att('record_tag__') if $old;
#$twig->print;
#print "\n";
#$old->print if $old;
##print "\n";
# my $par = $self->create_record;
# $self->{blank_element} = $par;
#printf "\n%s\n %s\n", $elt->path, "@$col_names";
@$col_names = map {s"/#PCDATA""; $_} @$col_names;
#$twig->print; print "\n\n";
#use Data::Dumper; print Dumper $record_tag->gi,$col_names,$atts;
return $record_tag,$col_names,$atts;
}
sub check_twig_options {
my $flags = shift;
my $new_flags;
my %twig_opt = %XML::Twig::valid_option;
return $flags unless scalar (keys %twig_opt);
while (my($k,$v) = each %$flags) {
$new_flags->{$k} = $v if $twig_opt{$k};
}
return $new_flags;
}
sub get_structure_from_map {
my $self = shift;
my $twig = shift;
my $col_map = shift;
my($amap,$map,$multi,$col_names,$pretty_cols,$col2tag);
for my $col(@$col_map) {
my($tag_name,$col_name) = ($col,$col);
($tag_name,$col_name) = each %$col if ref $col eq 'HASH';
my($tname,$tparent) = ($tag_name,$tag_name);
if ($tname =~ m!(.*)/([^/]*)$! ) {
$tparent = $1;
$tname = $2;
$tparent =~ s!.*/([^/]*)$!$1!;
}
my $tag = $twig->first_elt($tname);
$tag_name=$tag->path if $tag;
if (!$tag) {
my $new_tag = $twig->first_elt($tparent);
# die "No such element '$tname'!" unless $tag;
if (!$new_tag) {
$tag_name = $tname;
}
else {
$tag_name=$new_tag->path . '/' . $tname;
}
$amap->{$tag_name}++;
}
if (ref $col_name eq 'ARRAY') {
for my $col2(@$col_name) {
$col2tag->{$col2} = $tag_name;
$multi->{$tag_name}++;
push @$pretty_cols, $col2;
}
}
push @$col_names, $tag_name;
push @$pretty_cols, $col_name unless ref $col_name eq 'ARRAY';
$map->{$tag_name} = $col_name;
}
my $record_tag;
my $record_tag_path = '';
for my $col(@$col_names) {
my($rt) = $col =~ m!(.*)/[^/]*$!;
next unless $rt;
$record_tag_path = $rt if length $rt > length $record_tag_path;
}
my @children = $twig->root->descendants;
for my $e(@children) {
next unless $e->path eq $record_tag_path;
$record_tag = $e;
last;
}
if (!$record_tag) {
$record_tag = $twig->root->first_child;
my $p = $record_tag->path;
@$col_names = map {$p.'/'.$_}@$col_names;
# use Data::Dumper; print Dumper $amap;
my $newmap;
$newmap->{ $p.'/'.$_ }++ for keys %{$amap};
$amap = $newmap;
$newmap = {};
$newmap->{ $p.'/'.$_ } = $map->{$_} for keys %{$map};
$map = $newmap;
}
##
#=pod
#paste into parent record_tag__
# my $rt_atts = $record_tag->atts;
# if (!$rt_atts->{record_tag__}) {
# my $new_rt = $record_tag->copy;
# $new_rt->set_att('record_tag__','1');
# $new_rt->set_att('xstruct__','1');
# $new_rt->paste('first_child',$record_tag->parent);
# $record_tag = $new_rt;
# }
#=cut
my $col_structure = {
amap => $amap,
map => $map,
multi => $multi,
col_names => $col_names,
pretty_cols => $pretty_cols,
col2tag => $col2tag,
};
# print $record_tag->path, "\n";
# use Data::Dumper; print Dumper $col_structure;
# exit;
return $record_tag, $col_structure;
}
sub get_data {
my $self = shift;
my $fh_or_str = shift;
my $url = $self->{url};
if ( $url ) {
$fh_or_str = AnyData::Storage::RAM::get_remote_data({},$url);
}
return if( ! defined( $fh_or_str ) );
my $col_names = shift || [];
$col_names = []; #### IGNORE USER COLUMN NAMES FOR NOW
my $flags;
while (my($k,$v)=each %$self) {
$flags->{$k}=$v;
}
my $root_tag = $flags->{root_tag};
my $depth_limit = $flags->{depth_limit};
my $supplied_col_names = $flags->{col_names};
my $have_col_names = 1 if $supplied_col_names;
my $pretty_col_names = $supplied_col_names;
my $col_structure = $self->{col_structure};
undef $col_structure unless $col_structure->{col_names}
and scalar @{$col_structure->{col_names}};
my %multi;
my %map;
my %amap;
$flags->{LoadDTD} = 1;
$flags->{TwigRoots} = {$root_tag=>'1'} if $root_tag;
#
# DEFAULTS : KeepEncoding OFF to mirror XML::Twig
# ProtocolEncoding 'ISO-8859-1'
#
# $flags->{KeepEncoding} ||= 1;
#
$flags->{ProtocolEncoding} ||= 'ISO-8859-1';
#use Data::Dumper; die Dumper $flags;
$flags = check_twig_options($flags);
my $twig= new XML::Twig(%{$flags});
my $success = $twig->safe_parse($fh_or_str);
$self->{errstr} = $@ unless $success;
die $self->{errstr} if $self->{errstr};
return undef unless $success;
$self->{dtd} = $twig->dtd;
my $root = $twig->first_elt($root_tag) || $twig->root;
my $name = $root->path;
my $element= $twig->root;
my($record_tag,$colZ,$atts);
my $col_map = $self->{col_map};
if ($col_map) {
($record_tag,$col_structure) =
$self->get_structure_from_map($twig,$col_map);
}
else {
($record_tag,$colZ,$atts) = $self->get_structure($twig);
if (!$col_structure) {
$have_col_names++;
$col_structure = build_column_names($colZ,$root,$root_tag,$colZ);
$col_structure->{amap} = $atts;
}
}
# CREATE A DUMMY RECORD TAG
#
my $rt_atts = $record_tag->atts;
if (!$rt_atts->{record_tag__}) {
my $new_rt = $record_tag->copy;
$new_rt->set_att('record_tag__','1');
$new_rt->set_att('xstruct__','1');
$new_rt->paste('first_child',$record_tag->parent);
$record_tag = $new_rt;
}
# $twig->print;
# use Data::Dumper; print Dumper $col_structure;
# print $self->{record_tag}->path;
$self->{record_tag} = $record_tag;
$self->{twig} = $twig;
$self->{col_names} = $col_structure->{pretty_cols};
$self->{col_structure} = $col_structure;
return 1;
}
###############################################################
# MAP A ROW HASH ONTO A COLUMN NAMES ARRAY
###############################################################
sub rowhash_to_array {
my $row = shift;
my $col_structure = shift;
#die Dumper $col_structure;
my $col_names = $col_structure->{col_names};
my %map = %{ $col_structure->{map} } if $col_structure->{map};
my %multi = %{ $col_structure->{multi} } if $col_structure->{multi};
my $pretty_col_names = $col_structure->{pretty_cols} if $col_structure->{pretty_cols};
my @newvals;
my %visited;
for my $coln(@$col_names) {
my $tag = $map{$coln};
#next unless $tag;
if (!$multi{$tag}) {
$row->{$tag} ? push @newvals, $row->{$tag} : push @newvals, undef;
}
else {
if (!$visited{$tag}) {
$visited{$tag}++;
my @multi_col = ref $row->{$tag} eq 'ARRAY'
? @{$row->{$tag}}
: ($row->{$tag});
push @multi_col,undef unless scalar @multi_col;
my $dif = ($multi{$tag}) - (scalar @multi_col);
push @multi_col,undef for 0 .. $dif;
push @newvals,$_ for @multi_col;
}
}
}
return( \@newvals );
}
###############################################################
# BUILD A COLUMN NAMES LIST IF NONE HAS BEEN BUILT YET
###############################################################
sub build_column_names {
my $tags = shift;
my $root = shift;
my $root_tag = shift;
my $col_names = shift || [];
my %multi;
my %map;
for my $col(@$col_names) {
$multi{$col}++;
}
my %num;
my $newcolz;
for my $col(@$col_names) {
( run in 0.602 second using v1.01-cache-2.11-cpan-39bf76dae61 )