PLJava

 view release on metacpan or  search on metacpan

basiclib/XML/Smart/HTMLParser.pm-txt  view on Meta::CPAN

#############################################################################
## Name:        HTMLParser.pm
## Purpose:     XML::Smart::HTMLParser
## Author:      Graciliano M. P.
## Modified by:
## Created:     29/05/2003
## RCS-ID:      
## Copyright:   (c) 2003 Graciliano M. P.
## Licence:     This program is free software; you can redistribute it and/or
##              modify it under the same terms as Perl itself
#############################################################################

package XML::Smart::HTMLParser ;
use 5.006 ;

use strict qw(vars) ;
no warnings ;

our ($VERSION , @ISA) ;
$VERSION = '1.0' ;

#######
# NEW #
#######

sub new { 
  my $this = shift ;
  my $class = ref($this) || $this ;
  return $this if ref $this ;

  $this = bless {} => $class ;
  
  my %args = @_ ;
  $this->setHandlers(%args) ;
  
  $this->{NOENTITY} = 1 ;
  
  return $this ;
}

###############
# SETHANDLERS #
###############

sub setHandlers {
  my $this = shift ;
  my %args = @_;
    
  $this->{Init}  = $args{Init} || sub{} ;
  $this->{Start} = $args{Start} || sub{} ;
  $this->{Char}  = $args{Char} || sub{} ;
  $this->{End}   = $args{End} || sub{} ;
  $this->{Final} = $args{Final} || sub{} ;
  
  return( 1 ) ;
}

#########
# PARSE #
#########

sub parse {
  my $this = shift ;
  my $data = shift ;
  
  $data =~ s/\r\n?/\n/gs ;
  
  $data =~ s/^\s*<\?xml.*?>//gsi ;
  
  my @parsed ;
  
  while( $data =~ /(.*?)<(.*?)>/gsi ) {
    my $cont = $1 ;
    my $markup = $2 ;
    
    my ($more_q , @args) = &parse_tag($markup) ;
    
    while ($more_q) {
      my $more ;
      ($more) = ( $data =~ /\G(.*?)>/s ) ;
      pos($data) += length($more) + 1 ;
      $markup = $markup.'>'.$more ;
      ($more_q , @args) = &parse_tag($markup) ;
    }
    
    if ($cont =~ /\S/s) { push(@parsed , 'Char' , $cont) ;}
    
    if ($args[0] =~ /^\/(.*)/) { push(@parsed , 'End' , $1) ;}
    elsif (@args[-1] =~ /^\/$/) {
      pop @args ;
      push(@parsed , 'StartEnd' , [@args]) ;

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 1.632 second using v1.00-cache-2.02-grep-82fe00e-cpan-f5108d614456 )