CFDI

 view release on metacpan or  search on metacpan

lib/CFDI/Parser/XML.pm  view on Meta::CPAN

package CFDI::Parser::XML;

use strict;
use CFDI::Constants::Class;
use CFDI::Regex::XML;
require Exporter;
our @EXPORT = qw(parse);
our @ISA = qw(Exporter);
our $VERSION = 0.85;
our $BUFLEN = 256;

=todo
namespaces...
<?proc some="calue"?>     #processing instructions
#entities()    <     &     &amp &quot   &#something;
$attr{'xml:space'} eq 'default'){ #remove space
=cut

sub parse(_){
  my $file = shift;
  die "file required$/" unless defined $file;
  local $_ = '';
  die "cannot access file $file$/" unless -e $file && -r _;
  open(XML,'<:encoding(UTF-8)',$file) or die "cannot open file $file as UTF-8: $!$/";
  my ($t,$squote,$dquote,$cmntOpen,$char,$buf,@tokns,$dec,$hasTags) = (0,0,0,0);
  local $SIG{__DIE__} = sub {close XML or warn "cannot close file $file: $!$/"};
  my ($chars,$buffer,$BOM);
  die "file required$/" unless defined $file;
  die "cannot access file $file$/" unless -e $file && -r _;
  $chars = sysread XML,$buffer,1;
  die "error reading first char$/" unless defined $chars;
  die "file $file is empty$/" unless $chars;
  $BOM = 65279 == ord $buffer ? 1 : 0;
  local $_;
  # RD1: $chars = sysread XML,$buffer,1;
  # die "error reading file $file$/" unless defined $chars;
  # die "parsing error at: $_$/" unless $chars;
  # $_ .= $buffer;
  # goto RD1 if -1 == index $_,'>';
  # die "declaration error: $_$/" unless s/^<\?xml($qr_at*)\?>//s;
  # $attr = $1;
  # push @attr,$1,substr$2,1,-1 while defined $attr && $attr=~s/\s*($qr_na)\s*=\s*($qr_va)\s*//;
  # exists $n{$_} ? die "attribute '$_' is not unique$/" : $n{$_}++ for grep ++$i%2, @attr;
  # %attr = @attr;
  # die "bad xml 1.0 declaration$/" if grep !/^(?:version|encoding|standalone)$/, keys %attr;
  # if(exists $attr{version}){
  #   if(!defined $attr{version} || $attr{version} ne '1.0'){
  #     die "xml version 1.0 only$/"}}
  # if(exists $attr{standalone}){
  #   if(!defined $attr{standalone} || $attr{standalone} !~ /^(?:yes|no)$/){
  #     die "standalone error declaration$/"}}
  # if(exists $attr{encoding}){
  #   die "encoding error declaration$/" if !defined $attr{encoding} || $attr{encoding} !~ m!^UTF[-_ /]?8$!i}
  # $dec = bless \@attr,DECLARATION;
  my ($buffer2,$buffer1) = ($BOM ? '' : $buffer);
  while(length($buffer2) || ($char = sysread XML,$buffer1,$BUFLEN) || length){
    if(length $buffer2){
      $char = 0;
    }elsif($char){
      $buffer2 = $buffer1;
      undef $buffer1;
    }else{
      s/^\s*|\s*$//;
      $_ = "<$_" if $t;
      die "parsing error: $_$/" if length;
      last;
    }
    $buf = substr $buffer2,0,1,'';
    if($buf eq '<' && !$cmntOpen){
      die "parsing error: <$_<$buffer2$/" if $t == 1;
      $t = 1;
      if(length){
        die "parsing error: $_<$buffer2$/" if !$hasTags && /\S/;
        my $text = $_;
        $tokns[$#tokns+1] = bless \$text,TEXT;
        $_ = '';
      }
    }elsif($t && $buf eq '>' && !$squote && !$dquote && (!$cmntOpen || (5 <= length $_ && '--' eq substr $_,-2)) ){
      die "parsing error: <$_>$buffer2$/" unless /$qr_ta/;
      $t = 0;
      if(defined $1 && length $1){
        my ($name,$attr,$slsh,@attr,%n,$i) = ($1,$2,$3);
        push @attr,$1,substr$2,1,-1 while defined $attr && $attr=~s/\s*($qr_na)\s*=\s*($qr_va)\s*//;
        my $data = $_;
        exists $n{$_} ? die "parsing error: attribute '$_' is not unique at <$data>$buffer2$/" : $n{$_}++ for grep ++$i%2, @attr;
        #parse namespaces
        $attr = $#attr+1 ? bless \@attr,ATTRIBUTES : undef;
        my $Name = bless \$name,NAME;
        my $token = $attr ? [$Name,$attr] : [$Name];
        bless $token,ELEMENT if defined $slsh && length $slsh;
        $hasTags = 1;
        $tokns[$#tokns+1] = $token;
      }elsif(defined $4 && length $4){#closing tag - check for content and former opening tag
        my $name = $4;
        my $i = $#tokns;
        my $found = 0;
        my @content;
        while($i >= 0){
          my $token = $tokns[$i];
          if(ref $token eq 'ARRAY'){
            die "parsing error: <$_>$buffer2$/" unless ${$$token[0]} eq $name;
            $found = 1;
            if(0 && (my ($attr) = grep ref eq ATTRIBUTES,@$token)){
              my %attr = @$attr;
              if(defined $attr{'xml:space'} && $attr{'xml:space'} eq 'default'){
                #remove space
              }
            }
            $$token[$#$token+1] = bless \@content,CONTENT;
            bless $token,ELEMENT;
            last;
          }else{
            unshift @content,splice @tokns,$i,1;
          }
          $i--;



( run in 0.520 second using v1.01-cache-2.11-cpan-524268b4103 )