perlSGML.1997Sep

 view release on metacpan or  search on metacpan

doc/SGML..Parser.sgml  view on Meta::CPAN


<p>SGML::Parser - SGML instance parser</p>

<!-- =================================================================== -->
<hr><h2><a name="Synopsis">Synopsis</a></h2>
<pre>
  package MyParser;
  use SGML::Parser;
  @ISA = qw( SGML::Parser );

  sub cdata { ... }
  sub char_ref { ... }
  sub comment_decl { ... }
  sub end_tag { ... }
  sub entity_ref { ... }
  sub ignored_data { ... }
  sub marked_sect_close { ... }
  sub marked_sect_open { ... }
  sub parm_entity_ref { ... }
  sub processing_inst { ... }
  sub start_tag { ... }

doc/SGML..Parser.sgml  view on Meta::CPAN

</p>

<!-- =================================================================== -->
<hr><h2><a name="Callback_Methods">Callback Methods</a></h2>

<p>The following methods are intended to be redefined by a derived
class to handle the processing events generated by the
<strong>parse_data</strong> method.
</p>
<ul>
<li><a href="#cdata">cdata</a></li>
<li><a href="#char_ref">char_ref</a></li>
<li><a href="#comment_decl">comment_decl</a></li>
<li><a href="#end_tag">end_tag</a></li>
<li><a href="#entity_ref">entity_ref</a></li>
<li><a href="#error">error</a></li>
<li><a href="#ignored_data">ignored_data</a></li>
<li><a href="#marked_sect_close">marked_sect_close</a></li>
<li><a href="#marked_sect_open">marked_sect_open</a></li>
<li><a href="#parm_entity_ref">parm_entity_ref</a></li>
<li><a href="#processing_inst">processing_inst</a></li>
<li><a href="#start_tag">start_tag</a></li>
</ul>

<!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -->
<hr size=0 width="50%" align=left noshade>
<h3><a name="cdata">cdata</a></h3>
<pre>
    $parser-><strong>cdata</strong>(<var>$data</var>);
</pre>

<h4>Arguments:</h4>
<dl>
<dt><var>$data</var>
<dd>Character data.
</dl>

<h4>Return:</h4>
<p>N/A
</p>

<h4>Description:</h4>
<p><strong>cdata</strong> is invoked when character data is encountered.
The character data is passed into the method.  Multiple lines of
character data may generate multiple <strong>cdata</strong> calls.
</p>

<!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -->
<hr size=0 width="50%" align=left noshade>
<h3><a name="char_ref">char_ref</a></h3>
<pre>
    $parser-><strong>char_ref</strong>(<var>$value</var>);
</pre>

<h4>Arguments:</h4>

doc/html/SGML..Parser.html  view on Meta::CPAN


<p>SGML::Parser - SGML instance parser</p>

<!-- =================================================================== -->
<hr><h2><a name="Synopsis">Synopsis</a></h2>
<pre>
  package MyParser;
  use SGML::Parser;
  @ISA = qw( SGML::Parser );

  sub cdata { ... }
  sub char_ref { ... }
  sub comment_decl { ... }
  sub end_tag { ... }
  sub entity_ref { ... }
  sub ignored_data { ... }
  sub marked_sect_close { ... }
  sub marked_sect_open { ... }
  sub parm_entity_ref { ... }
  sub processing_inst { ... }
  sub start_tag { ... }

doc/html/SGML..Parser.html  view on Meta::CPAN

</p>

<!-- =================================================================== -->
<hr><h2><a name="Callback_Methods">Callback Methods</a></h2>

<p>The following methods are intended to be redefined by a derived
class to handle the processing events generated by the
<strong>parse_data</strong> method.
</p>
<ul>
<li><a href="#cdata">cdata</a></li>
<li><a href="#char_ref">char_ref</a></li>
<li><a href="#comment_decl">comment_decl</a></li>
<li><a href="#end_tag">end_tag</a></li>
<li><a href="#entity_ref">entity_ref</a></li>
<li><a href="#error">error</a></li>
<li><a href="#ignored_data">ignored_data</a></li>
<li><a href="#marked_sect_close">marked_sect_close</a></li>
<li><a href="#marked_sect_open">marked_sect_open</a></li>
<li><a href="#parm_entity_ref">parm_entity_ref</a></li>
<li><a href="#processing_inst">processing_inst</a></li>
<li><a href="#start_tag">start_tag</a></li>
</ul>

<!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -->
<hr size=0 width="50%" align=left noshade>
<h3><a name="cdata">cdata</a></h3>
<pre>
    $parser-><strong>cdata</strong>(<var>$data</var>);
</pre>

<h4>Arguments:</h4>
<dl>
<dt><var>$data</var>
<dd>Character data.
</dl>

<h4>Return:</h4>
<p>N/A
</p>

<h4>Description:</h4>
<p><strong>cdata</strong> is invoked when character data is encountered.
The character data is passed into the method.  Multiple lines of
character data may generate multiple <strong>cdata</strong> calls.
</p>

<!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -->
<hr size=0 width="50%" align=left noshade>
<h3><a name="char_ref">char_ref</a></h3>
<pre>
    $parser-><strong>char_ref</strong>(<var>$value</var>);
</pre>

<h4>Arguments:</h4>

doc/html/sgml.pl.html  view on Meta::CPAN

<dl>
<dt>An open tag</dt>
<dd><code>&amp;$sgml'OpenTagFunc($gi, $attribute_list);</code></dd>
<dt>An end tag</dt>
<dd><code>&amp;$sgml'EndTagFunc($gi);</code></dd>
<dt>A comment</dt>
<dd><code>&amp;$sgml'CommentFunc(*comment_text);</code></dd>
<dt>Processing instruction</dt>
<dd><code>&amp;$sgml'ProcInsFunc(*pi_text);</code></dd>
<dt>Character data</dt>
<dd><code>&amp;$sgml'CdataFunc(*cdata);</code></dd>
</dl>

<h4>Notes</h4>
<ul>
<li><p><code>SGMLread_sgml</code> is not intended to parse a DTD, or an
SGML declaration statement, '<code>&lt;!SGML ...&gt;</code>'.  It is
designed to parse SGML instances.  If a "<code>&lt;!</code>" sequence
is encountered (and not part of a comment declaration,
<code>SGMLread_sgml</code> tries to ignore the declaration.
</p></li>

doc/sgml.pl.sgml  view on Meta::CPAN

<dl>
<dt>An open tag</dt>
<dd><code>&amp;$sgml'OpenTagFunc($gi, $attribute_list);</code></dd>
<dt>An end tag</dt>
<dd><code>&amp;$sgml'EndTagFunc($gi);</code></dd>
<dt>A comment</dt>
<dd><code>&amp;$sgml'CommentFunc(*comment_text);</code></dd>
<dt>Processing instruction</dt>
<dd><code>&amp;$sgml'ProcInsFunc(*pi_text);</code></dd>
<dt>Character data</dt>
<dd><code>&amp;$sgml'CdataFunc(*cdata);</code></dd>
</dl>

<h4>Notes</h4>
<ul>
<li><p><code>SGMLread_sgml</code> is not intended to parse a DTD, or an
SGML declaration statement, '<code>&lt;!SGML ...&gt;</code>'.  It is
designed to parse SGML instances.  If a "<code>&lt;!</code>" sequence
is encountered (and not part of a comment declaration,
<code>SGMLread_sgml</code> tries to ignore the declaration.
</p></li>

lib/SGML/DTD.pm  view on Meta::CPAN


##---------------------------------------------------------------------------
##	reset() initializes all instance variables.
##
sub reset {
    my $this = shift;

    $this->{ParEntity} 		= {}; # Int parameter entities
    $this->{PubParEntity} 	= {}; # Ext public parameter entities
    $this->{SysParEntity} 	= {}; # Ext system parameter entities
    $this->{GenEntity} 		= {}; # (pcdata) general entities
    $this->{StartTagEntity} 	= {}; # Start tag entities (STARTTAG)
    $this->{EndTagEntity} 	= {}; # End tag entities (ENDTAG)
    $this->{MSEntity} 		= {}; # Marked section ents (MS)
    $this->{MDEntity} 		= {}; # Markup declaration ents (MD)
    $this->{PIEntity} 		= {}; # Processing instructions ents (PI)
    $this->{CDataEntity} 	= {}; # Character data entities (CDATA)
    $this->{SDataEntity} 	= {}; # System data ents (SDATA)

    ## Following ent structures currently not used.
    $this->{PubEntity} 		= {}; # External public ents (PUBLIC)
    $this->{SysEntity} 		= {}; # External system ents (SYSTEM)
    $this->{SysCDEntity} 	= {}; # Ext cdata ents (SYSTEM CDATA)
    $this->{SysNDEntity} 	= {}; # Ext non-SGML ents (SYSTEM NDATA)
    $this->{SysSDEntity} 	= {}; # Ext sdata ents (SYSTEM SDATA)
    $this->{SysSubDEntity} 	= {}; # Ext sub doc ents (SYSTEM SUBDOC)

    $this->{SysNotation} 	= {}; # Notations w/SYSTEM ids
    $this->{PubNotation} 	= {}; # Notations w/PUBLIC ids

    $this->{ShortRef} 		= {}; # Short ref mappings
    $this->{UseMap} 		= {}; # Maps in use (<!USEMAP ...)

lib/SGML/DTD.pm  view on Meta::CPAN

	$this->do_ge_starttag($name, $line), last GENSW
	    if $tmp =~ /^\s*$STARTTAG\s*$/io;
	$this->do_ge_endtag($name, $line), last GENSW
	    if $tmp =~ /^\s*$ENDTAG\s*$/io;
	$this->do_ge_ms($name, $line), last GENSW
	    if $tmp =~ /^\s*$MS\s*$/io;
	$this->do_ge_md($name, $line), last GENSW
	    if $tmp =~ /^\s*$MD\s*$/io;
	$this->do_ge_pi($name, $line), last GENSW
	    if $tmp =~ /^\s*$PI\s*$/io;
	$this->do_ge_cdata($name, $line), last GENSW
	    if $tmp =~ /^\s*$CDATA\s*$/io;
	$this->do_ge_sdata($name, $line), last GENSW
	    if $tmp =~ /^\s*$SDATA\s*$/io;
	$this->do_ge_public($name, $line), last GENSW
	    if $tmp =~ /^\s*$PUBLIC\s*$/io;
	$this->do_ge_system($name, $line), last GENSW
	    if $tmp =~ /^\s*$SYSTEM\s*$/io;
	$this->{_AGE}{$name} = $this->{GenEntity}{$name} = $tmp;
    }
    push(@{$this->{GenEntities}}, $name);

lib/SGML/DTD.pm  view on Meta::CPAN

sub do_ge_pi {
    my $this = shift;
    my($name, $line) = @_;
    my($tmp);

    $tmp = &get_next_group($line);
    $this->{PIEntity}{$name} = $tmp;
    $this->{_AGE}{$name} = $pio_ . $tmp . $pic_;
}

sub do_ge_cdata {
    my $this = shift;
    my($name, $line) = @_;
    my($tmp);

    $tmp = &get_next_group($line);
    $this->{CDataEntity}{$name} = $tmp;
}

sub do_ge_sdata {
    my $this = shift;

lib/SGML/Parser.pm  view on Meta::CPAN

##				$line_no_start);
##
##	Only the first argument is required.  The other are optional.
##
##	The routine calls callback methods for the various events
##	that can occur.  It is up to the methods to make sense of
##      the data.
##
##	The following lists the various methods invoked during parsing:
##
##		   $this->cdata($cdata);
##		   $this->char_ref($funcname_or_charnum);
##		   $this->comment_decl(\@comments);
##		   $this->end_tag($gi);
##	    $txt = $this->entity_ref($entname);
##		   $this->ignored_data($data);
##		   $this->marked_sect_close();
##		   $this->marked_sect_open($status_keyword, $status_spec);
##	    $txt = $this->parm_entity_ref($entname);
##		   $this->processing_inst($pidata);
##		   $this->start_tag($gi, $attr_spec);

lib/SGML/Parser.pm  view on Meta::CPAN

		last LOOP  unless defined($buf = $this->_get_line());
	    }

	    #--------------------------------------------------------------
	    # Check for markup.  Choose match that occurs earliest in
	    # string.
	    #--------------------------------------------------------------

	    ($before, $after, $type, $m1) = (undef,'','','');

	    # Pcdata mode checks
	    if ($this->{'mode'} == $ModePCData) {
		if ($buf =~ m@<([!?/>$namestart])@o) {
		    $before = $`;  $m1 = $1;  $after = $';
		    BLK: {
			if ($m1 eq '!') { $type = $TypeMDO;  last BLK; }
			if ($m1 eq '?') { $type = $TypePIO;  last BLK; }
			if ($m1 eq '/') { $type = $TypeETagO;  last BLK; }
			if ($m1 eq '>') { $type = $TypeSTagO;  last BLK; }
			$type = $TypeSTagO;
		    }

lib/SGML/Parser.pm  view on Meta::CPAN

		$this->{'mode'} == $ModeMSRCData) {

		if ($buf =~ m@\&([#$namestart])@o) {
		    if (!defined($before) or length($before) > length($`)) {
			$before = $`;  $m1 = $1;  $after = $';
			$type = $TypeERO;
		    }
		}
	    }

	    # Check for cdata mode
	    if ($this->{'mode'} == $ModeCData) {
		if ($buf =~ m|<(/)|) {
		    if (!defined($before) or length($before) > length($`)) {
			$before = $`;  $m1 = $1;  $after = $';
			$type = $TypeETagO;
		    }
		}
	    }

	    # Check for marked section close

lib/SGML/Parser.pm  view on Meta::CPAN

		    if ($type == $TypeMSC and length($before) > length($`)) {
			$this->{'_open_ms_ign'}++;
		    }
		}
	    }

	    #--------------------------------------------------------------
	    # Now, check what the type is and process accordingly.
	    #--------------------------------------------------------------
	    
	    ## Invoke cdata callback if any before text -------------------
	    if ($before ne '') {
		$this->{'mode'} == $ModeIgnore ?
		    $this->ignored_data($before) : $this->cdata($before);
	    }

	    ## Entity reference -------------------------------------------
	    if ($type == $TypeERO) {
		$buf = $after;
		$name = $m1;
	
		if ($name eq '#') {	# Character reference
		    if ($buf =~ s/^([$namechars]+);?//o) {
			$name = $1;

lib/SGML/Parser.pm  view on Meta::CPAN

					       "Comment declaration " .
					       "not closed");
				last COMDCL;
			    }
			}
			if ($buf =~ s/^\s*--//o) {
			    next COMDCL;
			} elsif ($buf =~ s/^\s*>//o) {
			    last COMDCL;
			} else {	# punt
			    $this->error("Invalid cdata outside of comment");
			    next COMDCL;
			}
		    }
		    $this->comment_decl(\@comms);

		    next LOOP;
		} # end comment

		$buf = "<!" . $buf;

	    } # end markup declaration

	
	    ## If not markup, invoke cdata callback -----------------------
	    $this->{'mode'} == $ModeIgnore ?
		$this->ignored_data($buf) :
		$this->cdata($buf);
	    $buf = '';
	}

    }; # End eval

    $this->{'_input'} = pop(@{$this->{'_input_stack'}});

    # Return buffer.  May contain data if parsing was aborted, otherwise
    # should be undef.
    $buf;

lib/SGML/Parser.pm  view on Meta::CPAN


##########################################################################

##**********************************************************************##
##	CALLBACK METHODS
##**********************************************************************##
##	Subclasses are to redefine callback methods to perform
##	whatever actions are desired.
##**********************************************************************##

sub cdata { }
sub char_ref { }
sub comment_decl { }
sub end_tag { }
sub entity_ref { undef }
sub ignored_data { }
sub marked_sect_close { }
sub marked_sect_open { }
sub parm_entity_ref { undef }
sub processing_inst { }
sub start_tag { }

lib/SGML/StripParser.pm  view on Meta::CPAN


sub set_ign_parm_ents {
    my $this = shift;
    $this->{'_stripout'}{_ignents}{@_} = ('IGNORE') x scalar(@_);
}

##**********************************************************************##
##	Redefined SGML::Parser Callback Methods
##**********************************************************************##

sub cdata {
    my $this = shift;
    print { $this->{'_stripout'}{'_fh'} } $_[0];
}

sub char_ref {
    my $this = shift;
    my $val = shift;
    my $str = '';

    if ($this->{'_stripout'}{'_charset'}) {

lib/dtd.pl  view on Meta::CPAN

	&do_ge_starttag($name, *line), last GENSW
	    if $tmp =~ /^\s*$STARTTAG\s*$/io;
	&do_ge_endtag($name, *line), last GENSW
	    if $tmp =~ /^\s*$ENDTAG\s*$/io;
	&do_ge_ms($name, *line), last GENSW
	    if $tmp =~ /^\s*$MS\s*$/io;
	&do_ge_md($name, *line), last GENSW
	    if $tmp =~ /^\s*$MD\s*$/io;
	&do_ge_pi($name, *line), last GENSW
	    if $tmp =~ /^\s*$PI\s*$/io;
	&do_ge_cdata($name, *line), last GENSW
	    if $tmp =~ /^\s*$CDATA\s*$/io;
	&do_ge_sdata($name, *line), last GENSW
	    if $tmp =~ /^\s*$SDATA\s*$/io;
	&do_ge_public($name, *line), last GENSW
	    if $tmp =~ /^\s*$PUBLIC\s*$/io;
	&do_ge_system($name, *line), last GENSW
	    if $tmp =~ /^\s*$SYSTEM\s*$/io;
	$_AGE{$name} = $GenEntity{$name} = $tmp;
    }
    push(@GenEntities, $name);

lib/dtd.pl  view on Meta::CPAN


sub do_ge_pi {
    local($name, *line) = @_;
    local($tmp);

    $tmp = &get_next_group(*line);
    $PIEntity{$name} = $tmp;
    $_AGE{$name} = $pio_ . $tmp . $pic_;
}

sub do_ge_cdata {
    local($name, *line) = @_;
    local($tmp);

    $tmp = &get_next_group(*line);
    $CDataEntity{$name} = $tmp;
}

sub do_ge_sdata {
    local($name, *line) = @_;
    local($tmp);

lib/sgml.pl  view on Meta::CPAN


##---------------------------------------------------------------------------
##	SGMLread_sgml() reads SGML markup.  A callback is called when
##	the following occurs:
##
##	    o	An open tag:	&$OpenTagFunc($gi, $attribute_list)
##	    o	An end tag:	&$EndTagFunc($gi)
##	    o	A comment:	&$CommentFunc(*comment_text);
##	    o	Processing instruction:
##				&$ProcInsFunc(*pi_text);
##	    o	Character data: &$CdataFunc(*cdata);
##
##	Argument descriptions:
##	    $handle :	Filehandle containing the SGML instance.
##
##	Notes:
##	    o	read_sgml() is not intended to parse a DTD, or an
##		SGML declaration statement, '<!SGML ...>'.  It is
##		designed to parse SGML instances.  If a "<!" sequence
##		is encountered (and not part of a comment declaration,
##		read_sgml() tries to ignore the declaration.

old/stripsgml  view on Meta::CPAN

);

##---------------------------------------------------------------------------##
##	Globals
##---------------------------------------------------------------------------##

##	Variable to hold current URL
$Url	= '';

##	Register callbacks to sgml.pl
$sgml'CdataFunc		= "main'cdata_cb";
$sgml'OpenTagFunc	= "main'open_tag_cb";
$sgml'EndTagFunc	= "main'close_tag_cb";

##---------------------------------------------------------------------------##
				##------------##
				## Begin MAIN ##
				##------------##
{

&get_cli_opts();

old/stripsgml  view on Meta::CPAN

    &Usage() unless
    &NGetOpt(
	"html",		# Generate compact listing

	"help"    	# Help message
    );
    &Usage()	if defined($opt_help);
    $HTML = 1	if defined($opt_html);
}

sub cdata_cb {
    local(*data) = shift;

    $data =~ s/\&([#\w-._]+);/&expand_ent($1)/ge;
    print STDOUT $data;
}

sub open_tag_cb {
    local($gi, $attr) = ($_[0], $_[1]);

    if ($HTML && ($gi =~ /^A$/i)) {

sgm/IBMIDDoc.dtd  view on Meta::CPAN

<!ELEMENT  FragmentRef          - O (Title)     -- Fragment Reference -->
<!ATTLIST  FragmentRef
    FragID           NAME         #CONREF      -- Must be Fragment ID --
    Repeat           CDATA        #IMPLIED           -- Default is NO --
    RepID            NAME         #IMPLIED       -- Must be REPSEP ID --
    RepSep           CDATA        #IMPLIED  -- Repeat separator chars --
    OptReq           (Opt | Req| Def)  Req
    %Common.Atts.NoLexMdl.NoRefType;

    LexModel         CDATA        #FIXED
                          "REPEAT ('YES' | 'NO' | number | rcdata)
                           &Common.LexModel;"
    RefType          CDATA        #FIXED
                          "RepID RepSep FragID Fragment &Common.RefType;"
    TargetType       CDATA        #FIXED    "Fragment"
    InfoMaster       NAME         #FIXED    "SymbolicReference"
>

<!ELEMENT  Range            - O (Title, RangeStart?, RangeEnd?) >
<!ATTLIST  Range
    Repeat           CDATA        #IMPLIED
    RepID            NAME         #IMPLIED -- Must be REPSEP ID --
    SetID            NAME         #IMPLIED -- Must be SET ID --
    RepSep           CDATA        #IMPLIED -- Repeat separator chars --
    OptReq           (Opt | Req| Def)  Req
    %Common.Atts.NoLexMdl.NoRefType;

    LexModel         CDATA        #FIXED
                          "REPEAT ('YES' | 'NO' | number | rcdata)
                           &Common.LexModel;"
    RefType          CDATA        #FIXED
                          "REPID REPSEP SETID SET &Common.RefType;"
>

<!ELEMENT  (RangeStart | RangeEnd)
                            - O (%DataPool;)* >
<!ATTLIST  (RangeStart | RangeEnd)
    %Common.Atts;
>



( run in 0.356 second using v1.01-cache-2.11-cpan-454fe037f31 )