XML-Smart

 view release on metacpan or  search on metacpan

lib/XML/Smart/Tree.pm  view on Meta::CPAN

    elsif ($module eq 'XML_Smart_Parser') { $xml = XML::Smart::Parser->new() ;}
    elsif ($module eq 'XML_Smart_HTMLParser') { $xml = XML::Smart::HTMLParser->new() ;}
    else { croak("Can't find a parser for XML!") ;}
    
    shift(@_) ;
    if ( $_[0] && ( $_[0] =~ /^\s*(?:XML_\w+|html?|re\w+|smart)\s*$/i ) ) { shift(@_) ;}
    
    _unset_sig_warn() ;
    my ( %args ) = @_ ;
    _reset_sig_warn() ;
    
    if ( $args{lowtag} ) { $xml->{SMART}{tag} = 1 ;}
    if ( $args{upertag} ) { $xml->{SMART}{tag} = 2 ;}
    if ( $args{lowarg} ) { $xml->{SMART}{arg} = 1 ;}
    if ( $args{uperarg} ) { $xml->{SMART}{arg} = 2 ;}
    if ( $args{arg_single} ) { $xml->{SMART}{arg_single} = 1 ;}  
    
    if ( $args{no_order} ) { $xml->{SMART}{no_order} = 1 ;}
    if ( $args{no_nodes} ) { $xml->{SMART}{no_nodes} = 1 ;}
    
    if ( $args{use_spaces} ) { $xml->{SMART}{use_spaces} = 1 ;}
    
    $xml->{SMART}{on_start} = $args{on_start} if ref($args{on_start}) eq 'CODE' ;
    $xml->{SMART}{on_char}  = $args{on_char}  if ref($args{on_char})  eq 'CODE' ;
    $xml->{SMART}{on_end}   = $args{on_end}   if ref($args{on_end})   eq 'CODE' ;
    
    $xml->setHandlers(
	Init => \&_Init ,
	Start => \&_Start ,
	Char  => \&_Char ,
	End   => \&_End ,
	Final => \&_Final ,
	) ;
    
    my $tree ;
    eval { 
	$tree = $xml->parse($data);
    }; croak( $@ ) if( $@ );
    return( $tree ) ;
}




##################################################
##            UNUSED - DEPRECATED.              ##
##################################################

sub _clean_data_with_lt { 

    my $data = shift ;

    my @data = split( //, $data ) ;
    my $data_len = @data          ;
    

    # State Machine Definition: 

    my %state_machine = 
	(
	 'in_cdata_block'            =>  0 ,
	 'seen_some_tag'             =>  0 ,
	 'need_to_cdata_this'        =>  0 ,
	 'prev_lt'                   => -1 ,
	 'last_tag_start'            => -1 ,
	 'last_tag_close'            => -1 ,
	 'tag_balance'               =>  0 ,
	);
	  

    CHAR: for( my $index = 0; $index < $data_len; $index++ ) { 

	{ 
	    no warnings ;
	    next CHAR unless( $data[ $index ] eq '<' or $data[ $index ] eq '>' ) ;
	}

	if( $data[ $index ] eq '<' ) { 

	    next CHAR if( $state_machine{ 'in_cdata_block' } ) ;
	    
	    { 
		# Check for possibility of this being a cdata block
		my $possible_cdata_block = join( '', @data[ $index .. ( $index + 8 ) ] ) ;
		if( $possible_cdata_block eq '<![CDATA[' ) { 
		    $state_machine{ 'in_cdata_block' } = 1 ;
		    next CHAR                              ;
		}
		
	    }

	    $state_machine{ 'tag_balance'    }++ ;
	    $state_machine{ 'prev_lt' } = $index ;
	    
	    next CHAR if( $state_machine{ 'need_to_cdata_this' } ) ;
	    	    
	    unless( $state_machine{ 'seen_some_tag' } ) { 
		$state_machine{ 'seen_some_tag' }  = 1      ;
		$state_machine{ 'last_tag_start' } = $index ;
		next CHAR                                   ;
	    } 
	    
	    if( $state_machine{ 'tag_balance' } == 1 ) { 
		$state_machine{ 'last_tag_start' } = $index ;
		next CHAR ;
	    }

	    $state_machine{ 'need_to_cdata_this' } = 1 ;

	    ## Seen a < and 
	    #    1. We are not in a CDATA block
	    #    2. This is not the start of a CDATA block


	} elsif( $data[ $index ] eq '>' ) { 


	    if( $state_machine{ 'in_cdata_block' } ) { 
		
		my $possible_cdata_close = join( '', @data[ ( $index - 2 ) .. $index ] ) ;
		if( $possible_cdata_close eq ']]>' ) {
		    $state_machine{ 'in_cdata_block' } = 0 ;
		    $state_machine{ 'tag_balance'    } = 0 ;
		    next CHAR                              ;
		}
		
		next CHAR ;
	    }
	    
	    unless( $state_machine{ 'seen_some_tag' } ) { 
		croak " > found before < - Input XML seems to have errors!\n";
	    }


	    $state_machine{ 'tag_balance' }-- ;
	    
	    unless( $state_machine{ 'tag_balance' } ) { 
		$state_machine{ 'last_tag_close' } = $index ;
		next CHAR                                   ;
	    }		
	    

	    ## Need to add CDATA now.

	    my $last_tag_close = $state_machine{ 'last_tag_close' } ;
	    my $prev_lt        = $state_machine{ 'prev_lt'        } ;
	    $data[ $last_tag_close ] = '><![CDATA[' ;
	    $data[ $prev_lt        ] = ']]><'       ;

	    $state_machine{ 'last_tag_close'     } = $index ;
	    $state_machine{ 'need_to_cdata_this' } = 0      ;

	    $state_machine{ 'tag_balance'        } = 0      ;
	    
	}

    }

    $data = join( '', @data ) ;

    return $data;

}


###########
# GET_URL #
###########


sub get_url {
    
  my ( $url ) = @_ ;
  my $data ;
  
  require LWP ;
  require LWP::UserAgent ;

  my $ua = LWP::UserAgent->new();
  
  my $agent = $ua->agent() ;
  $agent = "XML::Smart/$XML::Smart::VERSION $agent" ;
  $ua->agent($agent) ;

  my $req = HTTP::Request->new(GET => $url) ;
  my $res = $ua->request($req) ;

  if ($res->is_success) { return $res->content ;}
  else { return undef ;}
}

##########
# MODULE #
##########

sub module {
  foreach my $Key ( keys %PARSERS ) {
    if ($PARSERS{$Key}) {
      my $module = $Key ;
      $module =~ s/_/::/g ;
      return( $module ) ;
    }
  }
  return('') ;
}

#########
# _INIT #
#########

sub _Init {



( run in 1.185 second using v1.01-cache-2.11-cpan-39bf76dae61 )