XML-ParseDTD

 view release on metacpan or  search on metacpan

ParseDTD.pm  view on Meta::CPAN

  else {
    open DTD, "<$dtd" or die "Cannot open file $dtd : $!\n";
    {
      local $/;
      $DTD = <DTD>;
    }
    close DTD;
    $pdtd{lmod} = (stat($dtd))[9];
  }

  $DTD =~ s/<!--.*?-->//gs;
  
  my %IntEntity;
  while($DTD =~ s/<!ENTITY\s*%\s*(\S+)\s*[A-Z]*\s*(?:"([^"]*?)"\s*)+>//os) {
    $IntEntity{$1} = $2;
  }
  
  my $entity;
  foreach $_ (keys(%IntEntity)) {
    #$IntEntity{$_} =~ s/%(\S+);/$IntEntity{$1}/gs;
    while($IntEntity{$_} =~ s/%(\S+);/$IntEntity{$1}/s) {}
  }
  
  #$DTD =~ s/%(\S+);/$IntEntity{$1}/gs;
  while($DTD =~ s/%(\S+);/$IntEntity{$1}/s) {}

  while($DTD =~ s/<!ELEMENT\s*(\S+)\s*(?:(\([^<>]*\)(\*|\+)?)|(EMPTY)|(ANY))\s*>//s) {
    if(!$4) {
      $_ = $1;
      $pdtd{'Element'}->{$_} = $2;
      $pdtd{'Element'}->{$_} =~ s/\s*//gs;
      $pdtd{'Element'}->{$_} =~ s/([a-zA-Z0-9#]+)(?!(,|[a-zA-Z0-9#]))/$1,/gs;
      $pdtd{'Element'}->{$_} =~ s/([a-zA-Z0-9#]+,)/($1)/gs;
      $pdtd{'Element'}->{$_} =~ s/([^a-zA-Z0-9#]{1}),/$1/gs;
    }
    else {
      if($4 eq 'EMPTY') {
	$pdtd{'Element'}->{$1} = 1;
	$pdtd{'Empty'}->{$1} = 1;
      }
      elsif($4 eq 'ANY') {
	$pdtd{'Element'}->{$1} = '.*';
	$pdtd{'Any'}->{$1} = 1;
      }
    }
  }
  
  my $elem;
  while($DTD =~ s/<!ATTLIST\s*(\S+)\s*([^<>]*)>//s) {
    $elem = $1;
    $pdtd{'Attr'}->{$elem} = {};
    $_ = $2;
    my ($attr,$type,$some,$default);
    while(s/\s*(\S+)\s*((?:\([^\(\)]+\))|(?:[^\(\) \t\n]+))\s*(\S+)?\s*((?:"|')\S+(?:'|"))?\s*//s) {
      ($attr,$type,$some,$default) = ($1,$2,$3,$4);
      for($type) {
	#/^ID(REF)?$/ && do { $pdtd{'Attr'}->{$elem}->{$attr} = \&XML::ParseDTD::_check_id; last; };
	/^ID(REF)?$/ && do { $pdtd{'Attr'}->{$elem}->{$attr} = ['ID', '^[A-Za-z_]{1}[A-Za-z0-9_:.-]*$']; last; };
	#/^IDREFS$/ && do { $pdtd{'Attr'}->{$elem}->{$attr} = \&XML::ParseDTD::_check_idrefs; last; };
	/^IDREFS$/ && do { $pdtd{'Attr'}->{$elem}->{$attr} = ['IDREFS', '^[A-Za-z_]{1}[A-Za-z0-9_:. -]*$']; last; };
	#/^CDATA$/ && do { $pdtd{'Attr'}->{$elem}->{$attr} = \&XML::ParseDTD::_check_cdata; last; };
	/^CDATA$/ && do { $pdtd{'Attr'}->{$elem}->{$attr} = ['CDATA', '.*']; last; };
	#/^PCDATA$/ && do { $pdtd{'Attr'}->{$elem}->{$attr} = \&XML::ParseDTD::_check_pcdata; last; };
	/^PCDATA$/ && do { $pdtd{'Attr'}->{$elem}->{$attr} = ['PCDATA', '.*']; last; };
	#/^NMTOKEN$/ && do { $pdtd{'Attr'}->{$elem}->{$attr} = \&XML::ParseDTD::_check_nmtoken; last; };
	/^NMTOKEN$/ && do { $pdtd{'Attr'}->{$elem}->{$attr} = ['NMTOKEN', '^[A-Za-z0-9_:.-]{1}\S*$']; last; };
	/^\((.*)\)$/s && do {
	  $_ = $1;
	  s/\s//gs;
	  my @allowed = split(/\|/s, $_);
	  if(@allowed > 1) {
	    $pdtd{'Attr'}->{$elem}->{$attr} = {};
	    foreach my $value (@allowed) {
	      $pdtd{'Attr'}->{$elem}->{$attr}->{$value} = 1;
	    }
	  }
	  else {
	    $pdtd{'Attr'}->{$elem}->{$attr} = $allowed[0];
	  }
	  last;
	};
      }
      for($some) {
	/#IMPLIED/ && do { last; };
	/#REQUIRED/ && do { $pdtd{'ReqAtt'}->{$elem}->{$attr} = 1; last; };
	/#FIXED/ && do { $pdtd{'FixAtt'}->{$elem}->{$attr} = 1; last; };
	($pdtd{'DefAtt'}->{$elem}->{$attr} = $some) =~ s/("|')//g if($some);
      }
      ($pdtd{'DefAtt'}->{$elem}->{$attr} = $default)  =~ s/("|')//g if($default);
    }
  }
  return \%pdtd;
}

#this method proves whether the dtd is already cached and if so if it should be refetched (and reparsed)
sub _validate {
  my ($dtd,$rec,$checklm,$timeout) = @_;
  my $lmod;
  if($dtd =~ m/^([A-za-z]+):\/\//i) {
    $lmod = ($checklm < 0 || int(rand($checklm))) ? $rec->{lmod} : LWP::UserAgent->new(timeout => $timeout)->head($dtd)->last_modified;
  }
  else {
    $lmod = (stat($dtd))[9];
  }
  return ($lmod == $rec->{lmod}) ? 1 : 0;
}

######################################################################
return 1;
__END__

=head1 BUGS

Send bug reports to: bug-XML-ParseDTD@rt.cpan.org (if that doesn't
work feel free to send directly to moritz@freesources.org). Or use the
webinterface at http://rt.cpan.org/NoAuth/Bugs.html?Dist=XML-ParseDTD.

Thanks!

=head1 AUTHOR

(c) 2003, Moritz Sinn. This module is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License (see http://www.gnu.org/licenses/gpl.txt) as published by the Free Software Foundation; either version 2 o...



( run in 1.448 second using v1.01-cache-2.11-cpan-fe3c2283af0 )