XML-Smart
view release on metacpan or search on metacpan
lib/XML/Smart/Tree.pm view on Meta::CPAN
elsif ($module eq 'XML_Smart_Parser') { $xml = XML::Smart::Parser->new() ;}
elsif ($module eq 'XML_Smart_HTMLParser') { $xml = XML::Smart::HTMLParser->new() ;}
else { croak("Can't find a parser for XML!") ;}
shift(@_) ;
if ( $_[0] && ( $_[0] =~ /^\s*(?:XML_\w+|html?|re\w+|smart)\s*$/i ) ) { shift(@_) ;}
_unset_sig_warn() ;
my ( %args ) = @_ ;
_reset_sig_warn() ;
if ( $args{lowtag} ) { $xml->{SMART}{tag} = 1 ;}
if ( $args{upertag} ) { $xml->{SMART}{tag} = 2 ;}
if ( $args{lowarg} ) { $xml->{SMART}{arg} = 1 ;}
if ( $args{uperarg} ) { $xml->{SMART}{arg} = 2 ;}
if ( $args{arg_single} ) { $xml->{SMART}{arg_single} = 1 ;}
if ( $args{no_order} ) { $xml->{SMART}{no_order} = 1 ;}
if ( $args{no_nodes} ) { $xml->{SMART}{no_nodes} = 1 ;}
if ( $args{use_spaces} ) { $xml->{SMART}{use_spaces} = 1 ;}
$xml->{SMART}{on_start} = $args{on_start} if ref($args{on_start}) eq 'CODE' ;
$xml->{SMART}{on_char} = $args{on_char} if ref($args{on_char}) eq 'CODE' ;
$xml->{SMART}{on_end} = $args{on_end} if ref($args{on_end}) eq 'CODE' ;
$xml->setHandlers(
Init => \&_Init ,
Start => \&_Start ,
Char => \&_Char ,
End => \&_End ,
Final => \&_Final ,
) ;
my $tree ;
eval {
$tree = $xml->parse($data);
}; croak( $@ ) if( $@ );
return( $tree ) ;
}
##################################################
## UNUSED - DEPRECATED. ##
##################################################
sub _clean_data_with_lt {
my $data = shift ;
my @data = split( //, $data ) ;
my $data_len = @data ;
# State Machine Definition:
my %state_machine =
(
'in_cdata_block' => 0 ,
'seen_some_tag' => 0 ,
'need_to_cdata_this' => 0 ,
'prev_lt' => -1 ,
'last_tag_start' => -1 ,
'last_tag_close' => -1 ,
'tag_balance' => 0 ,
);
CHAR: for( my $index = 0; $index < $data_len; $index++ ) {
{
no warnings ;
next CHAR unless( $data[ $index ] eq '<' or $data[ $index ] eq '>' ) ;
}
if( $data[ $index ] eq '<' ) {
next CHAR if( $state_machine{ 'in_cdata_block' } ) ;
{
# Check for possibility of this being a cdata block
my $possible_cdata_block = join( '', @data[ $index .. ( $index + 8 ) ] ) ;
if( $possible_cdata_block eq '<![CDATA[' ) {
$state_machine{ 'in_cdata_block' } = 1 ;
next CHAR ;
}
}
$state_machine{ 'tag_balance' }++ ;
$state_machine{ 'prev_lt' } = $index ;
next CHAR if( $state_machine{ 'need_to_cdata_this' } ) ;
unless( $state_machine{ 'seen_some_tag' } ) {
$state_machine{ 'seen_some_tag' } = 1 ;
$state_machine{ 'last_tag_start' } = $index ;
next CHAR ;
}
if( $state_machine{ 'tag_balance' } == 1 ) {
$state_machine{ 'last_tag_start' } = $index ;
next CHAR ;
}
$state_machine{ 'need_to_cdata_this' } = 1 ;
## Seen a < and
# 1. We are not in a CDATA block
# 2. This is not the start of a CDATA block
} elsif( $data[ $index ] eq '>' ) {
if( $state_machine{ 'in_cdata_block' } ) {
my $possible_cdata_close = join( '', @data[ ( $index - 2 ) .. $index ] ) ;
if( $possible_cdata_close eq ']]>' ) {
$state_machine{ 'in_cdata_block' } = 0 ;
$state_machine{ 'tag_balance' } = 0 ;
next CHAR ;
}
next CHAR ;
}
unless( $state_machine{ 'seen_some_tag' } ) {
croak " > found before < - Input XML seems to have errors!\n";
}
$state_machine{ 'tag_balance' }-- ;
unless( $state_machine{ 'tag_balance' } ) {
$state_machine{ 'last_tag_close' } = $index ;
next CHAR ;
}
## Need to add CDATA now.
my $last_tag_close = $state_machine{ 'last_tag_close' } ;
my $prev_lt = $state_machine{ 'prev_lt' } ;
$data[ $last_tag_close ] = '><![CDATA[' ;
$data[ $prev_lt ] = ']]><' ;
$state_machine{ 'last_tag_close' } = $index ;
$state_machine{ 'need_to_cdata_this' } = 0 ;
$state_machine{ 'tag_balance' } = 0 ;
}
}
$data = join( '', @data ) ;
return $data;
}
###########
# GET_URL #
###########
sub get_url {
my ( $url ) = @_ ;
my $data ;
require LWP ;
require LWP::UserAgent ;
my $ua = LWP::UserAgent->new();
my $agent = $ua->agent() ;
$agent = "XML::Smart/$XML::Smart::VERSION $agent" ;
$ua->agent($agent) ;
my $req = HTTP::Request->new(GET => $url) ;
my $res = $ua->request($req) ;
if ($res->is_success) { return $res->content ;}
else { return undef ;}
}
##########
# MODULE #
##########
sub module {
foreach my $Key ( keys %PARSERS ) {
if ($PARSERS{$Key}) {
my $module = $Key ;
$module =~ s/_/::/g ;
return( $module ) ;
}
}
return('') ;
}
#########
# _INIT #
#########
sub _Init {
( run in 1.185 second using v1.01-cache-2.11-cpan-39bf76dae61 )