XML-DT

 view release on metacpan or  search on metacpan

lib/XML/DT.pm  view on Meta::CPAN


   father("x"); # returns value for attribute "x" on father element
   father("x", "value"); # sets value for attribute "x" on father
                                 # element

You can also use it directly as a reference to C<@dtattributes>:

   father->{"x"};           # gets the attribute
   father->{"x"} = "value"; # sets the attribute
   $attributes = father;            # gets all attributes reference


=item C<root>

You can use it as a function to access to your tree root element.

   root("x");          # gets attribute C<x> on root element
   root("x", "value"); # sets value for attribute C<x> on root

You can also use it directly as a reference to C<$dtattributes[-1]>:

   root->{"x"};           # gets the attribute x
   root->{"x"} = "value"; # sets the attribute x
   $attributes = root;    # gets all attributes reference

=back

=head1 User provided element processing functions

The user must provide an HASH with a function for each element, that
computes element output. Functions can use the element name C<$q>, the
element content C<$c> and the attribute values hash C<%v>.

All those global variables are defined in C<$CALLER::>.

Each time an element is find the associated function is called.

Content is calculated by concatenation of element contents strings and
interior elements return values.

=head2 C<-default> function

When a element has no associated function, the function associated
with C<-default> called. If no C<-default> function is defined the
default function returns a XML like string for the element.

When you use C</-type> definitions, you often need do set C<-default>
function to return just the contents: C<sub{$c}>.

=head2 C<-outputenc> option

C<-outputenc> defines the output encoding (default is Unicode UTF8).

=head2 C<-inputenc> option

C<-inputenc> forces a input encoding type. Whenever that is possible,
define the input encoding in the XML file:

 <?xml version='1.0' encoding='ISO-8859-1'?>

=head2 C<-pcdata> function

C<-pcdata> function is used to define transformation over the
contents.  Typically this function should look at context (see
C<inctxt> function)

The default C<-pcdata> function is the identity

=head2 C<-cdata> function

You can process C<<CDATA>> in a way different from pcdata. If you
define a C<-cdata> method, it will be used. Otherwise, the C<-pcdata>
method is called.

=head2 C<-begin> function

Function to be executed before processing XML file.

Example of use: initialization of side-effect variables

=head2 C<-end> function

Function to be executed after processing XML file.  I can use C<$c>
content value.  The value returned by C<-end> will be the C<dt> return
value.

Example of use: post-processing of returned contents

=head2 C<-recover> option

If set, the parser will try to recover in XML errors.

=head2 C<-html> option

If set, the parser will try to recover in errors. Note that this
differs from the previous one in the sense it uses some knowledge of
the HTML structure for the recovery.

=head2 C<-userdata> option

Use this to pass any information you like to your handlers. The data
structure you pass in this option will be available as C<< $u >> in
your code. -- New in 0.62.


=head1 Elements with values other than strings (C<-type>)

By default all elements return strings, and contents (C<$c>) is the
concatenation of the strings returned by the sub-elements.

In some situations the XML text contains values that are better
processed as a structured type.

The following types (functors) are available:

=over 4

=item THE_CHILD

Return the result of processing the only child of the element.

=item LAST_CHILD

Returns the result of processing the last child of the element.

=item STR

concatenates all the sub-elements returned values (DEFAULT) all the
sub-element should return strings to be concatenated;

=item SEQ

lib/XML/DT.pm  view on Meta::CPAN

  for $z (keys %aux){
    my $code = sub {
      my $l;
      for $l (@{$aux{$z}}) {
	my $prefix = "";
	$prefix = "^" unless (($l->[0]) or ($l->[1]));
	$prefix = "^" if (($l->[0] eq "/") && ($l->[1]));
	if ($l->[3]) {
	  if(inctxt("$prefix$l->[1]") && _testAttr($l->[3])) 
	    {return &{$l->[2]}; }
	} else {
	  if(inctxt("$prefix$l->[1]")) {return &{$l->[2]};}
	}
      }
      return &{ $aux2{$z}} if $aux2{$z} ;
      return &{ $h{-default}} if $h{-default};
      &toxml();
    };
    $n{$z} = $code;
  }
  for $z (keys %aux2){
    $n{$z} ||= $aux2{$z} ;
  }
  return %n;
}



sub _omni {
    my ($par, $xml, @l) = @_;
    my $defaulttype =
      (exists($xml->{-type}) && exists($xml->{-type}{-default}))
        ?
          $xml->{-type}{-default} : "STR";
    my $type = $ty{$par} || $defaulttype;
    my %typeargs = ();

  if (ref($type) eq "mmapon") {
      $typeargs{$_} = 1  for (@$type);
      $type = "MMAPON";
  }

  my $r ;
  if( $type eq 'STR')                                   { $r = "" }
  elsif( $type eq 'THE_CHILD' or $type eq 'LAST_CHILD') { $r = 0  }
  elsif( $type eq 'SEQ'  or $type eq "ARRAY")           { $r = [] }
  elsif( $type eq 'SEQH' or $type eq "ARRAYOFHASH")     { $r = [] }
  elsif( $type eq 'MAP'  or $type eq "HASH")            { $r = {} }
  elsif( $type eq 'MULTIMAP')                           { $r = {} }
  elsif( $type eq 'MMAPON' or $type eq "HASHOFARRAY")   { $r = {} }
  elsif( $type eq 'NONE')                               { $r = "" }
  elsif( $type eq 'ZERO')                               { return "" }

  my ($name, $val, @val, $atr, $aux);

    $u = $xml->{-userdata};
  while(@l) {
      my $tree = shift @l;
      next unless $tree;

      $name = ref($tree) eq "XML::LibXML::CDATASection" ? "-pcdata" : $tree->getName();

      if (ref($tree) eq "XML::LibXML::CDATASection") {
          $val = $tree->getData();

          $name = "-cdata";
          $aux = (defined($xml->{-outputenc}))?_fromUTF8($val,$xml->{-outputenc}):$val;

          if (defined($xml->{-cdata})) {
              push(@dtcontext,"-cdata");
              $c = $aux;
              $aux = &{$xml->{-cdata}};
              pop(@dtcontext);
          } elsif (defined($xml->{-pcdata})) {
              push(@dtcontext,"-pcdata");
              $c = $aux;
              $aux = &{$xml->{-pcdata}};
              pop(@dtcontext);
          }

      } elsif (ref($tree) eq "XML::LibXML::Comment") {
          ### At the moment, treat as Text
          ### We will need to change this, I hope!
          $val = "";
          $name = "-pcdata";
          $aux= (defined($xml->{-outputenc}))?_fromUTF8($val, $xml->{-outputenc}):$val;
          if (defined($xml->{-pcdata})) {
              push(@dtcontext,"-pcdata");
              $c = $aux;
              $aux = &{$xml->{-pcdata}};
              pop(@dtcontext);
          }
      }
      elsif (ref($tree) eq "XML::LibXML::Text") {
          $val = $tree->getData();

          $name = "-pcdata";
          $aux = (defined($xml->{-outputenc}))?_fromUTF8($val,$xml->{-outputenc}):$val;

          if (defined($xml->{-pcdata})) {
              push(@dtcontext,"-pcdata");
              $c = $aux;
              $aux = &{$xml->{-pcdata}};
              pop(@dtcontext);
          }

      } elsif (ref($tree) eq "XML::LibXML::Element") {
          my %atr = _nodeAttributes($tree);
          $atr = \%atr;

          if (exists($xml->{-ignorecase})) {
              $name = lc($name);
              for (keys %$atr) {
                  my ($k,$v) = (lc($_),$atr->{$_});
                  delete($atr->{$_});
                  $atr->{$k} = $v;
              }
          }

          push(@dtcontext,$name);
          $dtcontextcount{$name}++;
          unshift(@dtatributes, $atr);
          unshift(@dtattributes, $atr);
          $aux = _omniele($xml, $name, _omni($name, $xml, ($tree->getChildnodes())), $atr);
          shift(@dtatributes);
          shift(@dtattributes);
          pop(@dtcontext); $dtcontextcount{$name}--;
      } elsif (ref($tree) eq "XML::LibXML::Node") {
          if ($tree->nodeType == XML_ENTITY_REF_NODE) {
              # if we get here, is because we are not expanding entities (I think)
              if ($tree->textContent) {
                  $aux = $tree->textContent;
              } else {
                  $aux = '&'.$tree->nodeName.';';
              }
          } else {
              print STDERR "Not handled, generic node of type: [",$tree->nodeType,"]\n";
          }
      } else {
          print STDERR "Not handled: [",ref($tree),"]\n";
      }

      if    ($type eq "STR"){ if (defined($aux)) {$r .= $aux} ;}
      elsif ($type eq "THE_CHILD" or $type eq "LAST_CHILD"){
          $r = $aux unless _whitepc($aux, $name); }
      elsif ($type eq "SEQ" or $type eq "ARRAY"){
          push(@$r, $aux) unless _whitepc($aux, $name);}
      elsif ($type eq "SEQH" or $type eq "ARRAYHASH"){
          push(@$r,{"-c" => $aux,
                    "-q" => $name,
                    _nodeAttributes($tree)
                   }) unless _whitepc($aux,$name);
      }
      elsif($type eq "MMAPON"){
          if(not _whitepc($aux,$name)){
              if(! $typeargs{$name}) {
                  warn "duplicated tag '$name'\n" if(defined($r->{$name}));
                  $r->{$name} = $aux }
              else { push(@{$r->{$name}},$aux) unless _whitepc($aux,$name)}}
      }
      elsif($type eq "MAP" or $type eq "HASH"){
          if(not _whitepc($aux,$name)){
              warn "duplicated tag '$name'\n" if(defined($r->{$name}));
              $r->{$name} = $aux }}
      elsif($type eq "MULTIMAP"){
          push(@{$r->{$name}},$aux) unless _whitepc($aux,$name)}
      elsif($type eq "NONE"){ $r = $aux;}
      else { $r="undefined type !!!"}
  }
  $r;
}



sub _omniele {
  my $xml = shift;
  my $aux;
  ($q, $c, $aux) = @_;

  %v = %$aux;

  if (defined($xml->{-outputenc})) {
    for (keys %v){
      $v{$_} = _fromUTF8($v{$_}, $xml->{-outputenc})
    }
  }

  if (defined $xml->{$q})
    { &{$xml->{$q}} }
  elsif (defined $xml->{'-default'})
    { &{$xml->{'-default'}} }
  elsif (defined $xml->{'-tohtml'})
    { tohtml() }
  else
    { toxml() }
}



sub xmltree { +{'-c' => $c, '-q' => $q, %v} }

sub tohtml {
    my ($q,$v,$c);
	
    if (not @_) {
        ($q,$v,$c) = ($XML::DT::q, \%XML::DT::v, $XML::DT::c);
    } elsif (ref($_[0])) {
        $c = shift;
    } else {
        ($q,$v,$c) = @_;
    }
	
    if (not ref($c)) {
        if ($q eq "-pcdata") {
            return $c
        } elsif ($q eq "link" || $q eq "br" || $q eq "hr" || $q eq "img") {
            return _openTag($q,$v)
	} else {
            return _openTag($q,$v) . "$c</$q>"
        }
    }
    elsif (ref($c) eq "HASH" && $c->{'-q'} && $c->{'-c'}) {
        my %a = %$c;
        my ($q,$c) = delete @a{"-q","-c"};
        tohtml($q,\%a,(ref($c)?tohtml($c):$c));
    }
    elsif (ref($c) eq "HASH") {
        _openTag($q,$v).
          join("",map {($_ ne "-pcdata")
                         ? ( (ref($c->{$_}) eq "ARRAY")
                             ? "<$_>".
                             join("</$_>\n<$_>", @{$c->{$_}}).
                             "</$_>\n" 
                             : tohtml($_,{},$c->{$_})."\n" )
                           : () }
               keys %{$c} ) .
                 "$c->{-pcdata}</$q>" } ########  "NOTYetREady"
    elsif (ref($c) eq "ARRAY") {
        if (defined($q) && exists($ty{$q}) && $ty{$q} eq "SEQH") {
            tohtml($q,$v,join("\n",map {tohtml($_)} @$c))
        } elsif (defined $q) {
            tohtml($q,$v,join("",@{$c}))
        } else {
            join("\n",map {(ref($_)?tohtml($_):$_)} @$c)
        }
    }
}

sub toxml {
  my ($q,$v,$c);

  if (not @_) {
    ($q, $v, $c) = ($XML::DT::q, \%XML::DT::v, $XML::DT::c);
  } elsif (ref($_[0])) {
    $c = shift;
  } else {
    ($q, $v, $c) = @_;
  }

  if (not ref($c)) {
    if ($q eq "-pcdata") {
      return $c
    } elsif ($c eq "") {
      return _emptyTag($q,$v)
    } else {
      return _openTag($q,$v) . "$c</$q>"
    }
  }
  elsif (ref($c) eq "HASH" && $c->{'-q'} && $c->{'-c'}) {
    my %a = %$c;
    my ($q,$c) = delete @a{"-q","-c"};
    ###   _openTag($q,\%a).toxml($c).).
    ###   toxml($q,\%a,join("\n",map {toxml($_)} @$c))
    toxml($q,\%a,(ref($c)?toxml($c):$c));
  }
  elsif (ref($c) eq "HASH") {
    _openTag($q,$v).
      join("",map {($_ ne "-pcdata")
		     ? ( (ref($c->{$_}) eq "ARRAY")
			 ? "<$_>".
			 join("</$_>\n<$_>", @{$c->{$_}}).
			 "</$_>\n" 
			 : toxml($_,{},$c->{$_})."\n" )
		       : () }
	   keys %{$c} ) .
	     "$c->{-pcdata}</$q>" } ########  "NOTYetREady"
  elsif (ref($c) eq "ARRAY") {
    if (defined($q) && exists($ty{$q}) && $ty{$q} eq "SEQH") {
      toxml($q,$v,join("\n",map {toxml($_)} @$c))
    } elsif (defined $q) {
      toxml($q,$v,join("",@{$c}))
    } else {
      join("\n",map {(ref($_)?toxml($_):$_)} @$c)
    }
  }
}


sub _openTag{
  "<$_[0]". join("",map {" $_=\"$_[1]{$_}\""} keys %{$_[1]} ).">"
}

sub _emptyTag{
  "<$_[0]". join("",map {" $_=\"$_[1]{$_}\""} keys %{$_[1]} )."/>"
}


sub mkdtskel_fromDTD {
  my $filename = shift;
  my $file = ParseDTDFile($filename);

  print <<'PERL';
#!/usr/bin/perl
use warnings;
use strict;
use XML::DT;
my $filename = shift;

# Variable Reference
#
# $c - contents after child processing
# $q - element name (tag)
# %v - hash of attributes

my %handler=(
#    '-outputenc' => 'ISO-8859-1',
#    '-default'   => sub{"<$q>$c</$q>"},
PERL


  for (sort keys %{$file}) {
    print "     '$_' => sub { },";
    print " # attributes: ",
      join(", ", keys %{$file->{$_}{attributes}}) if exists($file->{$_}{attributes});
    print "\n";
  }


  print <<'PERL';
);

print dt($filename, %handler);
PERL

}

lib/XML/DT.pm  view on Meta::CPAN

	 print "     '$name' => sub{ }, #";
	 print " $element{$name} occurrences;";
	 print ' attributes: ',
	   join(', ', keys %{$att{$name}}) if $att{$name};
#	 print "       \"\$q:\$c\"\n";
	 print "\n";
       }
       print <<'END';
);
print dt($filename, %handler);
END
     }
    );

  my $file = shift(@files);
  while($file =~ /^-/){
    if   ($file eq "-html")   {
        $HTML = "     '-html' => 1,\n";
        $mkdtskel{'-html'} = 1;} 
    elsif($file eq "-latin1") { $mkdtskel{'-inputenc'}='ISO-8859-1';}
    else { die("usage mktskel [-html] [-latin1] file \n")}
    $file=shift(@files)}

  dt($file,%mkdtskel)
}



sub _nodeAttributes {
  my $node = shift;
  my %answer = ();
  my @attrs = $node->getAttributes();
  for (@attrs) {
    if (ref($_) eq "XML::LibXML::Namespace") {
      # TODO: This should not be ignored, I think.
      # This sould be converted on a standard attribute with
      # key 'namespace' and respective contents
    } else {
      $answer{$_->getName()} = $_->getValue();
    }
  }
  return %answer;
}


sub mkdtdskel {
  my @files = @_; 
  my $name;
  my %att;
  my %ele;
  my %elel;
  my $root;
  my %handler=(
    '-outputenc' => 'ISO-8859-1',
    '-default'   => sub{ 
          $elel{$q}++;
          $root = $q unless ctxt(1);
          $ele{ctxt(1)}{$q} ++;
          for(keys(%v)){$att{$q}{$_} ++ } ;
        },
    '-pcdata'    => sub{ if ($c =~ /[^ \t\n]/){ $ele{ctxt(1)}{"#PCDATA"}=1 }},
  );

  while($files[0] =~ /^-/){
    if   ($files[0] eq "-html")   { $handler{'-html'} = 1;} 
    elsif($files[0] eq "-latin1") { $handler{'-inputenc'}='ISO-8859-1';}
    else { die("usage mkdtdskel [-html] [-latin1] file* \n")}
    shift(@files)}

  for my $filename (@files){
    dt($filename,%handler); 
  }

  print "<!-- DTD $root ... -->\n<!-- (C) ... " . localtime(time) ." -->\n";
  delete $elel{$root};

  for ($root, keys %elel){
    _putele($_, \%ele);
    for $name (keys(%{$att{$_}})) {
       print( "\t<!-- $name : ... -->\n");
       print( "\t<!ATTLIST $_ $name CDATA #IMPLIED >\n");
    }
  }
}

sub _putele {
  my ($e,$ele) = @_;
  my @f ;
  if ($ele->{$e}) {
    @f = keys %{$ele->{$e}};
    print "<!ELEMENT $e (", join("|", @f ),")",
      (@f >= 1 && $f[0] eq "#PCDATA" ? "" : "*"),
	" >\n";
    print "<!-- ", join(" | ", (map {"$_=$ele->{$e}{$_}"} @f )), " -->\n";
  }
  else {
    print "<!ELEMENT $e  EMPTY >\n";
  }
}

sub _whitepc {
  $_[1] eq '-pcdata' and $_[0] =~ /^[ \t\r\n]*$/
}

sub MMAPON {
  bless([@_],"mmapon")
}


sub _fromUTF8 {
  my $string = shift;
  my $encode = shift;
  my $ans = eval { XML::LibXML::decodeFromUTF8($encode, $string) };
  if ($@) {
    return $string
  } else {
    return $ans
  }
}

1;



( run in 0.935 second using v1.01-cache-2.11-cpan-39bf76dae61 )