SGML-DTDParse

 view release on metacpan or  search on metacpan

lib/SGML/DTDParse/DTD.pm  view on Meta::CPAN

# -*- Perl -*-

package SGML::DTDParse::DTD;

use strict;
use vars qw($VERSION $CVS);

$VERSION = do { my @r=(q$Revision: 2.2 $=~/\d+/g); sprintf "%d."."%03d"x$#r,@r };
$CVS = '$Id: DTD.pm,v 2.2 2005/07/16 03:21:35 ehood Exp $ ';

use Text::DelimMatch;
use SGML::DTDParse;
use SGML::DTDParse::Catalog;
use SGML::DTDParse::Tokenizer;
use SGML::DTDParse::ContentModel;
use SGML::DTDParse::Util qw(entify);

my $DTDVERSION = "1.0";
my $DTDPUBID = "-//Norman Walsh//DTD DTDParse V2.0//EN";
my $DTDSYSID = "dtd.dtd";
my $debug = 0;

{
    package SGML::DTDParse::DTD::ENTITY;

    sub new {
	my($type, $dtd, $entity, $etype, $pub, $sys, $text) = @_;
	my $class = ref($type) || $type;
	my $self = {};

	$text = $dtd->fix_entityrefs($text);

	if ($dtd->{'XML'} && ($pub && !$sys)) {
	    $dtd->status("External entity declaration without system "
			 . "identifer found in XML DTD. "
			 . "This isn't an XML DTD.", 1);
	    $dtd->{'XML'} = 0;
	}

	$self->{'DTD'} = $dtd;
	$self->{'NAME'} = $entity;
	$self->{'TYPE'} = $etype;
	$self->{'NOTATION'} = "";
	$self->{'PUBLIC'} = $pub;
	$self->{'SYSTEM'} = $sys;
	$self->{'TEXT'} = $text;

	if ($etype =~ /^ndata (\S+)$/i) {
	    $self->{'TYPE'} = 'ndata';
	    $self->{'NOTATION'} = $1;
	}

	if ($etype =~ /^cdata (\S+)$/i) {
	    $self->{'TYPE'} = 'cdata';
	    $self->{'NOTATION'} = $1;
	}

	bless $self, $class;
    }

    sub name {
	my $self = shift;
	my $value = shift;
	$self->{'NAME'} = $value if defined($value);
	return $self->{'NAME'};
    }

    sub type {
	my $self = shift;
	my $value = shift;
	$self->{'TYPE'} = $value if defined($value);
	return $self->{'TYPE'};
    }

    sub notation {
	my $self = shift;
	my $value = shift;
	$self->{'NOTATION'} = $value if defined($value);
	return $self->{'NOTATION'};
    }

    sub public {
	my $self = shift;
	my $value = shift;
	$self->{'PUBLIC'} = $value if defined($value);
	return $self->{'PUBLIC'};
    }

    sub system {
	my $self = shift;
	my $value = shift;
	$self->{'SYSTEM'} = $value if defined($value);
	return $self->{'SYSTEM'};
    }

    sub text {
	my $self = shift;
	my $value = shift;
	$self->{'TEXT'} = $value if defined($value);
	return $self->{'TEXT'};
    }

    sub xml {
	my $self = shift;
	my $xml = "";

	$xml .= "<entity name=\"" . $self->name() . "\"\n";
	$xml .= "        type=\"" . $self->type() . "\"\n";
	$xml .= "        notation=\"" . $self->notation() . "\"\n"
	    if $self->notation();

	if ($self->public() || $self->system()) {
	    $xml .= "        public=\"" . $self->public() . "\"\n"
		if $self->public();

lib/SGML/DTDParse/DTD.pm  view on Meta::CPAN

	    $self->{'TYPE'}->{$name} = $attrtype;
	    $self->{'VALS'}->{$name} = $values;
	    $self->{'DEFV'}->{$name} = $defval;
	}
    }

    sub name {
	my $self = shift;
	my $value = shift;
	$self->{'NAME'} = $value if defined($value);
	return $self->{'NAME'};
    }

    sub type {
	return "attlist";
    }

    sub text {
	my $self = shift;
	return $self->{'DECL'};
    }

    sub attribute_list {
	my $self = shift;
	my(@attr) = keys %{$self->{'TYPE'}};
	return @attr;
    }

    sub attribute_type {
	my $self = shift;
	my $attr = shift;
	my $value = shift;
	$self->{'TYPE'}->{$attr} = $value if defined($value);
	return $self->{'TYPE'}->{$attr};
    }

    sub attribute_values {
	my $self = shift;
	my $attr = shift;
	my $value = shift;
	$self->{'VALS'}->{$attr} = $value if defined($value);
	return $self->{'VALS'}->{$attr};
    }

    sub attribute_default {
	my $self = shift;
	my $attr = shift;
	my $value = shift;
	$self->{'DEFV'}->{$attr} = $value if defined($value);
	return $self->{'DEFV'}->{$attr};
    }

    sub xml {
	my $self = shift;
	my $xml = "";
	my(@attr) = $self->attribute_list();
	my($attr, $text);

	$xml .= "<attlist name=\"" . $self->name() . "\">\n";

	my $cdata = $self->{'DECL'};
	$cdata =~ s/&/&amp;/sg;
	$cdata =~ s/</&lt;/sg;

	$xml .= "<attdecl>$cdata</attdecl>\n";

	foreach $attr (@attr) {
	    $xml .= "<attribute name=\"$attr\"\n";

	    $text = $self->attribute_type($attr);
	    # $text =~ s/\%/\&/sg;
	    $xml .= "           type=\"$text\"\n";

	    $text = $self->attribute_values($attr);
	    # $text =~ s/\%/\&/sg;

	    my $enumtype = undef;
	    if ($text =~ /^NOTATION \(/) {
		$enumtype = "notation";
		$text = "(" . $'; # '
	    }

	    if ($text =~ /^\(/) {
		$enumtype = "yes" if !defined($enumtype);
		$xml .= "           enumeration=\"$enumtype\"\n";
		$text =~ s/[\(\)\|]/ /g;
		$text =~ s/\s+/ /g;
		$text =~ s/^\s*//;
		$text =~ s/\s*$//;
	    }

	    $xml .= "           value=\"$text\"\n";

	    $text = $self->attribute_default($attr);
	    # $text =~ s/\%/\&/sg;
	    $xml .= "           default=\"$text\"/>\n";
	}

	$xml .= "</attlist>\n";

	return $xml;
    }
}

{
    package SGML::DTDParse::DTD::NOTATION;

    sub new {
	my($type, $dtd, $notation, $pub, $sys, $text) = @_;
	my $class = ref($type) || $type;
	my $self = {};

	$self->{'DTD'} = $dtd;
	$self->{'NAME'} = $notation;
	$self->{'PUBLIC'} = $pub;
	$self->{'SYSTEM'} = $sys;

	bless $self, $class;
    }

    sub name {
	my $self = shift;
	my $value = shift;
	$self->{'NAME'} = $value if defined($value);
	return $self->{'NAME'};

lib/SGML/DTDParse/DTD.pm  view on Meta::CPAN

    print $fh "     namecase-general=\"", $self->{'NAMECASE_GEN'}, "\"\n";
    print $fh "     namecase-entity=\"", $self->{'NAMECASE_ENT'}, "\"\n";
    print $fh "     xml=\"", $self->{'XML'}, "\"\n";
    print $fh "     system-id=\"", entify($self->{'SYSTEM_ID'}), "\"\n";
    print $fh "     public-id=\"", entify($self->{'PUBLIC_ID'}), "\"\n";
    print $fh "     declaration=\"", $self->{'DECLARATION'}, "\"\n";
    print $fh "     created-by=\"DTDParse V$SGML::DTDParse::VERSION\"\n";
    print $fh "     created-on=\"", scalar(localtime()), "\"\n";
    print $fh ">\n";

    $self->xml_elements($fh);
    print $fh "</dtd>\n";
}

# ======================================================================

sub parse_entity {
    my $self = shift;
    my $dtd = shift;
    my($type, $name) = ('gen', undef);
    my($public, $system, $text) = ("", "", "");
    my($tok);

    ($tok, $dtd) = $self->next_token($dtd);

    if ($tok eq '%') {
	$type = 'param';
	($tok, $dtd) = $self->next_token($dtd);
    }

    $name = $tok;

    $tok = $self->peek_token($dtd);

    if ($tok =~ /^[\"\']/) {
	# we're looking at text...
	($text, $dtd) = $self->next_token($dtd);
	$text = $self->trim_quotes($text);
    } else {
	($tok, $dtd) = $self->next_token($dtd);

	if ($tok =~ /public/i) {
	    ($public, $dtd) = $self->next_token($dtd);
	    $public = $self->trim_quotes($public);
	    $tok = $self->peek_token($dtd);
	    if ($tok ne '>') {
		($system, $dtd) = $self->next_token($dtd);
		$system = $self->trim_quotes($system);
	    }
	} elsif ($tok =~ /system/i) {
	    ($system, $dtd) = $self->next_token($dtd);
	    $system = $self->trim_quotes($system);
	} elsif ($tok =~ /^sdata$/i) {
	    $type = 'sdata';
	    ($text, $dtd) = $self->next_token($dtd);
	    $text = $self->trim_quotes($text);
	} elsif ($tok =~ /^pi$/i) {
	    $type = 'pi';
	    ($text, $dtd) = $self->next_token($dtd);
	    $text = $self->trim_quotes($text);
	} elsif ($tok =~ /^cdata$/i) {
	    $type = 'cdata';
	    ($text, $dtd) = $self->next_token($dtd);
	    $text = $self->trim_quotes($text);
	} else {
	    die "Error: Unexpected declared entity type ($name): $tok\n";
	}
    }

    ($tok, $dtd) = $self->next_token($dtd);

    if ($tok =~ /ndata/i) {
	($tok, $dtd) = $self->next_token($dtd);
	# now $tok contains the notation name
	$type = "ndata $tok";
	($tok, $dtd) = $self->next_token($dtd);
	# now $tok should contain the token after the notation
    } elsif ($tok =~ /cdata/i) {
	($tok, $dtd) = $self->next_token($dtd);
	# now $tok contains the notation name
	$type = "cdata $tok";
	($tok, $dtd) = $self->next_token($dtd);
	# now $tok should contain the token after the notation
    }

    if ($tok ne '>') {
	print "[[", substr($dtd, 0, 100), "]]\n";
	die "Error: Unexpected token in ENTITY declaration: $tok\n";
    }

    print STDERR "ENT: $type $name (P: $public) (S: $system) [$text]\n" if $debug>1;

    $self->status("Entity $name");

    $self->add_entity($name, $type, $public, $system, $text);

    return $dtd;
}

sub parse_element {
    my $self = shift;
    my $dtd = shift;
    my(@names) = ();
    my($stagm, $etagm) = ('', '');
    my $mc = new Text::DelimMatch '\(', '\)[\?\+\*\,]*';
    my($tok, $cm, $expand, $rest);
    my($incl, $excl, $name);

    ($tok, $dtd) = $self->next_token($dtd);

    if ($tok =~ /^\(/) {
	my($pre, $namegrp, $ntok, $rest);
	($pre, $namegrp, $dtd) = $mc->match($tok . $dtd);

	($ntok, $rest) = $self->next_token($namegrp);
	while ($ntok) {
	    if ($ntok =~ /[\|\(\)]/) {
		# nop
	    } else {
		push (@names, $ntok);
	    }
	    ($ntok, $rest) = $self->next_token($rest);
	}
    } else {
	push (@names, $tok);
    }

    # we need to look ahead a little bit here so that we can handle
    # the case where the start/end tag minimization flags are in
    # a parameter entity without accidentally expanding parameter
    # entities in the content model...

    ($tok, $dtd) = $self->next_token($dtd, 1);

    if ($tok =~ /^\%/) {
	# check to see what this is...
	($expand, $rest) = $self->next_token($tok);

	if ($expand =~ /^[\-o]/is) {
	    $stagm = $expand;
	    $dtd = $rest . $dtd;



( run in 1.176 second using v1.01-cache-2.11-cpan-39bf76dae61 )