PLJava
view release on metacpan - search on metacpan
view release on metacpan or search on metacpan
basiclib/XML/Smart/HTMLParser.pm-txt view on Meta::CPAN
#############################################################################
## Name: HTMLParser.pm
## Purpose: XML::Smart::HTMLParser
## Author: Graciliano M. P.
## Modified by:
## Created: 29/05/2003
## RCS-ID:
## Copyright: (c) 2003 Graciliano M. P.
## Licence: This program is free software; you can redistribute it and/or
## modify it under the same terms as Perl itself
#############################################################################
package XML::Smart::HTMLParser ;
use 5.006 ;
use strict qw(vars) ;
no warnings ;
our ($VERSION , @ISA) ;
$VERSION = '1.0' ;
#######
# NEW #
#######
sub new {
my $this = shift ;
my $class = ref($this) || $this ;
return $this if ref $this ;
$this = bless {} => $class ;
my %args = @_ ;
$this->setHandlers(%args) ;
$this->{NOENTITY} = 1 ;
return $this ;
}
###############
# SETHANDLERS #
###############
sub setHandlers {
my $this = shift ;
my %args = @_;
$this->{Init} = $args{Init} || sub{} ;
$this->{Start} = $args{Start} || sub{} ;
$this->{Char} = $args{Char} || sub{} ;
$this->{End} = $args{End} || sub{} ;
$this->{Final} = $args{Final} || sub{} ;
return( 1 ) ;
}
#########
# PARSE #
#########
sub parse {
my $this = shift ;
my $data = shift ;
$data =~ s/\r\n?/\n/gs ;
$data =~ s/^\s*<\?xml.*?>//gsi ;
my @parsed ;
while( $data =~ /(.*?)<(.*?)>/gsi ) {
my $cont = $1 ;
my $markup = $2 ;
my ($more_q , @args) = &parse_tag($markup) ;
while ($more_q) {
my $more ;
($more) = ( $data =~ /\G(.*?)>/s ) ;
pos($data) += length($more) + 1 ;
$markup = $markup.'>'.$more ;
($more_q , @args) = &parse_tag($markup) ;
}
if ($cont =~ /\S/s) { push(@parsed , 'Char' , $cont) ;}
if ($args[0] =~ /^\/(.*)/) { push(@parsed , 'End' , $1) ;}
elsif (@args[-1] =~ /^\/$/) {
pop @args ;
push(@parsed , 'StartEnd' , [@args]) ;
view all matches for this distributionview release on metacpan - search on metacpan
( run in 1.632 second using v1.00-cache-2.02-grep-82fe00e-cpan-f5108d614456 )