XML-ParseDTD
view release on metacpan or search on metacpan
ParseDTD.pm view on Meta::CPAN
else {
open DTD, "<$dtd" or die "Cannot open file $dtd : $!\n";
{
local $/;
$DTD = <DTD>;
}
close DTD;
$pdtd{lmod} = (stat($dtd))[9];
}
$DTD =~ s/<!--.*?-->//gs;
my %IntEntity;
while($DTD =~ s/<!ENTITY\s*%\s*(\S+)\s*[A-Z]*\s*(?:"([^"]*?)"\s*)+>//os) {
$IntEntity{$1} = $2;
}
my $entity;
foreach $_ (keys(%IntEntity)) {
#$IntEntity{$_} =~ s/%(\S+);/$IntEntity{$1}/gs;
while($IntEntity{$_} =~ s/%(\S+);/$IntEntity{$1}/s) {}
}
#$DTD =~ s/%(\S+);/$IntEntity{$1}/gs;
while($DTD =~ s/%(\S+);/$IntEntity{$1}/s) {}
while($DTD =~ s/<!ELEMENT\s*(\S+)\s*(?:(\([^<>]*\)(\*|\+)?)|(EMPTY)|(ANY))\s*>//s) {
if(!$4) {
$_ = $1;
$pdtd{'Element'}->{$_} = $2;
$pdtd{'Element'}->{$_} =~ s/\s*//gs;
$pdtd{'Element'}->{$_} =~ s/([a-zA-Z0-9#]+)(?!(,|[a-zA-Z0-9#]))/$1,/gs;
$pdtd{'Element'}->{$_} =~ s/([a-zA-Z0-9#]+,)/($1)/gs;
$pdtd{'Element'}->{$_} =~ s/([^a-zA-Z0-9#]{1}),/$1/gs;
}
else {
if($4 eq 'EMPTY') {
$pdtd{'Element'}->{$1} = 1;
$pdtd{'Empty'}->{$1} = 1;
}
elsif($4 eq 'ANY') {
$pdtd{'Element'}->{$1} = '.*';
$pdtd{'Any'}->{$1} = 1;
}
}
}
my $elem;
while($DTD =~ s/<!ATTLIST\s*(\S+)\s*([^<>]*)>//s) {
$elem = $1;
$pdtd{'Attr'}->{$elem} = {};
$_ = $2;
my ($attr,$type,$some,$default);
while(s/\s*(\S+)\s*((?:\([^\(\)]+\))|(?:[^\(\) \t\n]+))\s*(\S+)?\s*((?:"|')\S+(?:'|"))?\s*//s) {
($attr,$type,$some,$default) = ($1,$2,$3,$4);
for($type) {
#/^ID(REF)?$/ && do { $pdtd{'Attr'}->{$elem}->{$attr} = \&XML::ParseDTD::_check_id; last; };
/^ID(REF)?$/ && do { $pdtd{'Attr'}->{$elem}->{$attr} = ['ID', '^[A-Za-z_]{1}[A-Za-z0-9_:.-]*$']; last; };
#/^IDREFS$/ && do { $pdtd{'Attr'}->{$elem}->{$attr} = \&XML::ParseDTD::_check_idrefs; last; };
/^IDREFS$/ && do { $pdtd{'Attr'}->{$elem}->{$attr} = ['IDREFS', '^[A-Za-z_]{1}[A-Za-z0-9_:. -]*$']; last; };
#/^CDATA$/ && do { $pdtd{'Attr'}->{$elem}->{$attr} = \&XML::ParseDTD::_check_cdata; last; };
/^CDATA$/ && do { $pdtd{'Attr'}->{$elem}->{$attr} = ['CDATA', '.*']; last; };
#/^PCDATA$/ && do { $pdtd{'Attr'}->{$elem}->{$attr} = \&XML::ParseDTD::_check_pcdata; last; };
/^PCDATA$/ && do { $pdtd{'Attr'}->{$elem}->{$attr} = ['PCDATA', '.*']; last; };
#/^NMTOKEN$/ && do { $pdtd{'Attr'}->{$elem}->{$attr} = \&XML::ParseDTD::_check_nmtoken; last; };
/^NMTOKEN$/ && do { $pdtd{'Attr'}->{$elem}->{$attr} = ['NMTOKEN', '^[A-Za-z0-9_:.-]{1}\S*$']; last; };
/^\((.*)\)$/s && do {
$_ = $1;
s/\s//gs;
my @allowed = split(/\|/s, $_);
if(@allowed > 1) {
$pdtd{'Attr'}->{$elem}->{$attr} = {};
foreach my $value (@allowed) {
$pdtd{'Attr'}->{$elem}->{$attr}->{$value} = 1;
}
}
else {
$pdtd{'Attr'}->{$elem}->{$attr} = $allowed[0];
}
last;
};
}
for($some) {
/#IMPLIED/ && do { last; };
/#REQUIRED/ && do { $pdtd{'ReqAtt'}->{$elem}->{$attr} = 1; last; };
/#FIXED/ && do { $pdtd{'FixAtt'}->{$elem}->{$attr} = 1; last; };
($pdtd{'DefAtt'}->{$elem}->{$attr} = $some) =~ s/("|')//g if($some);
}
($pdtd{'DefAtt'}->{$elem}->{$attr} = $default) =~ s/("|')//g if($default);
}
}
return \%pdtd;
}
#this method proves whether the dtd is already cached and if so if it should be refetched (and reparsed)
sub _validate {
my ($dtd,$rec,$checklm,$timeout) = @_;
my $lmod;
if($dtd =~ m/^([A-za-z]+):\/\//i) {
$lmod = ($checklm < 0 || int(rand($checklm))) ? $rec->{lmod} : LWP::UserAgent->new(timeout => $timeout)->head($dtd)->last_modified;
}
else {
$lmod = (stat($dtd))[9];
}
return ($lmod == $rec->{lmod}) ? 1 : 0;
}
######################################################################
return 1;
__END__
=head1 BUGS
Send bug reports to: bug-XML-ParseDTD@rt.cpan.org (if that doesn't
work feel free to send directly to moritz@freesources.org). Or use the
webinterface at http://rt.cpan.org/NoAuth/Bugs.html?Dist=XML-ParseDTD.
Thanks!
=head1 AUTHOR
(c) 2003, Moritz Sinn. This module is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License (see http://www.gnu.org/licenses/gpl.txt) as published by the Free Software Foundation; either version 2 o...
( run in 1.448 second using v1.01-cache-2.11-cpan-fe3c2283af0 )