XML-Twig
view release on metacpan or search on metacpan
lib/XML/Twig.pm view on Meta::CPAN
my $left_string= substr( $string, 0, $offset);
my $right_string= substr( $string, $offset);
$text_elt->set_pcdata( $left_string);
my $new_elt= $elt->new( $elt->gi, $right_string);
$new_elt->paste( after => $elt);
return $new_elt;
}
# split an element or its text descendants into several, in place
# all elements (new and untouched) are returned
sub split
{ my $elt= shift;
my @text_chunks;
my @result;
if( $elt->is_text) { @text_chunks= ($elt); }
else { @text_chunks= $elt->descendants( $TEXT); }
foreach my $text_chunk (@text_chunks)
{ push @result, $text_chunk->_split( 1, @_); }
return @result;
}
# split an element or its text descendants into several, in place
# created elements (those which match the regexp) are returned
sub mark
{ my $elt= shift;
my @text_chunks;
my @result;
if( $elt->is_text) { @text_chunks= ($elt); }
else { @text_chunks= $elt->descendants( $TEXT); }
foreach my $text_chunk (@text_chunks)
{ push @result, $text_chunk->_split( 0, @_); }
return @result;
}
# split a single text element
# return_all defines what is returned: if it is true
# only returns the elements created by matches in the split regexp
# otherwise all elements (new and untouched) are returned
{
sub _split
{ my $elt= shift;
my $return_all= shift;
my $regexp= shift;
my @tags;
while( @_)
{ my $tag= shift();
if( ref $_[0])
{ push @tags, { tag => $tag, atts => shift }; }
else
{ push @tags, { tag => $tag }; }
}
unless( @tags) { @tags= { tag => $elt->_parent->gi }; }
my @result; # the returned list of elements
my $text= $elt->text;
my $gi= $elt->gi;
# 2 uses: if split matches then the first substring reuses $elt
# once a split has occurred then the last match needs to be put in
# a new element
my $previous_match= 0;
while( my( $pre_match, @matches)= $text=~ /^(.*?)$regexp(.*)$/gcs)
{ $text= pop @matches;
if( $previous_match)
{ # match, not the first one, create a new text ($gi) element
$elt= $elt->insert_new_elt( after => $gi, $pre_match);
push @result, $elt if( $return_all);
}
else
{ # first match in $elt, re-use $elt for the first sub-string
$elt->set_text( $pre_match);
$previous_match++; # store the fact that there was a match
push @result, $elt if( $return_all);
}
# now deal with matches captured in the regexp
if( @matches)
{ # match, with capture
my $i=0;
foreach my $match (@matches)
{ # create new element, text is the match
my $tag = _repl_match( $tags[$i]->{tag}, @matches) || '#PCDATA';
my $atts = \%{$tags[$i]->{atts}} || {};
my %atts= map { _repl_match( $_, @matches) => _repl_match( $atts->{$_}, @matches) } keys %$atts;
$elt= $elt->insert_new_elt( after => $tag, \%atts, $match);
push @result, $elt;
$i= ($i + 1) % @tags;
}
}
else
{ # match, no captures
my $tag = $tags[0]->{tag};
my $atts = \%{$tags[0]->{atts}} || {};
$elt= $elt->insert_new_elt( after => $tag, $atts);
push @result, $elt;
}
}
if( $previous_match && $text)
{ # there was at least 1 match, and there is text left after the match
$elt= $elt->insert_new_elt( after => $gi, $text);
}
push @result, $elt if( $return_all);
return @result; # return all elements
}
sub _repl_match
{ my( $val, @matches)= @_;
$val=~ s{\$(\d+)}{$matches[$1-1]}g;
return $val;
}
}
{ my %replace_sub; # cache for complex expressions (expression => sub)
sub subs_text
{ my( $elt, $regexp, $replace)= @_;
my $replacement_string;
my $is_string= _is_string( $replace);
my @parents;
foreach my $text_elt ($elt->descendants_or_self( $TEXT))
{
if( $is_string)
{ my $text= $text_elt->text;
$text=~ s{$regexp}{ _replace_var( $replace, $1, $2, $3, $4, $5, $6, $7, $8, $9)}egx;
$text_elt->set_text( $text);
}
else
{
my $replace_sub= ( $replace_sub{$replace} ||= _install_replace_sub( $replace));
my $text= $text_elt->text;
my $pos=0; # used to skip text that was previously matched
my $found_hit;
while( my( $pre_match_string, $match_string, @var)= ($text=~ m{(.*?)($regexp)}sg))
{ $found_hit=1;
my $match_start = length( $pre_match_string);
my $match = $match_start ? $text_elt->split_at( $match_start + $pos) : $text_elt;
my $match_length = length( $match_string);
my $post_match = $match->split_at( $match_length);
$replace_sub->( $match, @var);
# go to next
$text_elt= $post_match;
$text= $post_match->text;
if( $found_hit) { push @parents, $text_elt->parent unless $parents[-1] && $parents[-1]== $text_elt->parent; }
}
}
}
foreach my $parent (@parents) { $parent->normalize; }
return $elt;
}
sub _is_string
{ return ($_[0]=~ m{&e[ln]t}) ? 0: 1 }
sub _replace_var
{ my( $string, @var)= @_;
unshift @var, undef;
$string=~ s{\$(\d)}{$var[$1]}g;
return $string;
}
sub _install_replace_sub
{ my $replace_exp= shift;
my @item= split m{(&e[ln]t\s*\([^)]*\))}, $replace_exp;
my $sub= q{ my( $match, @var)= @_; my $new; my $last_inserted=$match;};
my( $gi, $exp);
foreach my $item (@item)
{ next if ! length $item;
if( $item=~ m{^&elt\s*\(([^)]*)\)})
{ $exp= $1; }
elsif( $item=~ m{^&ent\s*\(\s*([^\s)]*)\s*\)})
{ $exp= " '#ENT' => $1"; }
else
{ $exp= qq{ '#PCDATA' => "$item"}; }
$exp=~ s{\$(\d)}{my $i= $1-1; "\$var[$i]"}eg; # replace references to matches
$sub.= qq{ \$new= \$match->new( $exp); };
$sub .= q{ $new->paste( after => $last_inserted); $last_inserted=$new;};
}
$sub .= q{ $match->delete; };
#$sub=~ s/;/;\n/g; warn "subs: $sub";
my $coderef= eval "sub { $NO_WARNINGS; $sub }"; ## no critic (ProhibitStringyEval)
if( $@) {
my $msg = "invalid replacement expression $replace_exp: " . $@;
if( $@ =~ m{Global symbol .* requires explicit package name} )
{ my $suggested = $replace_exp;
$suggested =~ s{\$(\d)}{\\\$$1}g;
$msg = join "\n ",
$msg,
"the problem seems to be that the replacement expression is a single-quoted string".
"that includes variables. Those need to be expanded.",
"ie replace the following:",
" '$replace_exp'",
"with:",
" qq{$suggested}",
;
}
croak $msg;
}
return $coderef;
}
}
sub merge_text
{ my( $e1, $e2)= @_;
croak "invalid merge: can only merge 2 elements"
unless( isa( $e2, 'XML::Twig::Elt'));
croak "invalid merge: can only merge 2 text elements"
unless( $e1->is_text && $e2->is_text && ($e1->gi eq $e2->gi));
my $t1_length= length( $e1->text);
$e1->set_text( $e1->text . $e2->text);
if( my $extra_data_in_pcdata= $e2->_extra_data_in_pcdata)
{ foreach my $data (@$extra_data_in_pcdata) { $e1->_push_extra_data_in_pcdata( $data->{text}, $data->{offset} + $t1_length); } }
$e2->delete;
return $e1;
}
sub merge
{ my( $e1, $e2)= @_;
( run in 2.023 seconds using v1.01-cache-2.11-cpan-71847e10f99 )