XML-Twig

 view release on metacpan or  search on metacpan

lib/XML/Twig.pm  view on Meta::CPAN

    my $left_string= substr( $string, 0, $offset);
    my $right_string= substr( $string, $offset);
    $text_elt->set_pcdata( $left_string);
    my $new_elt= $elt->new( $elt->gi, $right_string);
    $new_elt->paste( after => $elt);
    return $new_elt;
  }

# split an element or its text descendants into several, in place
# all elements (new and untouched) are returned
sub split
  { my $elt= shift;
    my @text_chunks;
    my @result;
    if( $elt->is_text) { @text_chunks= ($elt); }
    else               { @text_chunks= $elt->descendants( $TEXT); }
    foreach my $text_chunk (@text_chunks)
      { push @result, $text_chunk->_split( 1, @_); }
    return @result;
  }

# split an element or its text descendants into several, in place
# created elements (those which match the regexp) are returned
sub mark
  { my $elt= shift;
    my @text_chunks;
    my @result;
    if( $elt->is_text) { @text_chunks= ($elt); }
    else               { @text_chunks= $elt->descendants( $TEXT); }
    foreach my $text_chunk (@text_chunks)
      { push @result, $text_chunk->_split( 0, @_); }
    return @result;
  }

# split a single text element
# return_all defines what is returned: if it is true
# only returns the elements created by matches in the split regexp
# otherwise all elements (new and untouched) are returned
{

  sub _split
    { my $elt= shift;
      my $return_all= shift;
      my $regexp= shift;
      my @tags;

      while( @_)
        { my $tag= shift();
          if( ref $_[0])
            { push @tags, { tag => $tag, atts => shift }; }
          else
            { push @tags, { tag => $tag }; }
        }

      unless( @tags) { @tags= { tag => $elt->_parent->gi }; }

      my @result;                                 # the returned list of elements
      my $text= $elt->text;
      my $gi= $elt->gi;

      # 2 uses: if split matches then the first substring reuses $elt
      #         once a split has occurred then the last match needs to be put in
      #         a new element
      my $previous_match= 0;

      while( my( $pre_match, @matches)= $text=~ /^(.*?)$regexp(.*)$/gcs)
        { $text= pop @matches;
          if( $previous_match)
            { # match, not the first one, create a new text ($gi) element
              $elt= $elt->insert_new_elt( after => $gi, $pre_match);
              push @result, $elt if( $return_all);
            }
          else
            { # first match in $elt, re-use $elt for the first sub-string
              $elt->set_text( $pre_match);
              $previous_match++;                # store the fact that there was a match
              push @result, $elt if( $return_all);
            }

          # now deal with matches captured in the regexp
          if( @matches)
            { # match, with capture
              my $i=0;
              foreach my $match (@matches)
                { # create new element, text is the match
                  my $tag  = _repl_match( $tags[$i]->{tag}, @matches) || '#PCDATA';
                  my $atts = \%{$tags[$i]->{atts}} || {};
                  my %atts= map { _repl_match( $_, @matches) => _repl_match( $atts->{$_}, @matches) } keys %$atts;
                  $elt= $elt->insert_new_elt( after => $tag, \%atts, $match);
                  push @result, $elt;
                  $i= ($i + 1) % @tags;
                }
            }
          else
            { # match, no captures
              my $tag  = $tags[0]->{tag};
              my $atts = \%{$tags[0]->{atts}} || {};
              $elt=  $elt->insert_new_elt( after => $tag, $atts);
              push @result, $elt;
            }
        }
      if( $previous_match && $text)
        { # there was at least 1 match, and there is text left after the match
          $elt= $elt->insert_new_elt( after => $gi, $text);
        }

      push @result, $elt if( $return_all);

      return @result; # return all elements
   }

sub _repl_match
  { my( $val, @matches)= @_;
    $val=~ s{\$(\d+)}{$matches[$1-1]}g;
    return $val;
  }

}

{ my %replace_sub; # cache for complex expressions (expression => sub)

  sub subs_text
    { my( $elt, $regexp, $replace)= @_;

      my $replacement_string;
      my $is_string= _is_string( $replace);

      my @parents;

      foreach my $text_elt ($elt->descendants_or_self( $TEXT))
        {
          if( $is_string)
            { my $text= $text_elt->text;
              $text=~ s{$regexp}{ _replace_var( $replace, $1, $2, $3, $4, $5, $6, $7, $8, $9)}egx;
              $text_elt->set_text( $text);
           }
          else
            {
              my $replace_sub= ( $replace_sub{$replace} ||= _install_replace_sub( $replace));
              my $text= $text_elt->text;
              my $pos=0;  # used to skip text that was previously matched
              my $found_hit;
              while( my( $pre_match_string, $match_string, @var)= ($text=~ m{(.*?)($regexp)}sg))
                { $found_hit=1;
                  my $match_start  = length( $pre_match_string);
                  my $match        = $match_start ? $text_elt->split_at( $match_start + $pos) : $text_elt;
                  my $match_length = length( $match_string);
                  my $post_match   = $match->split_at( $match_length);
                  $replace_sub->( $match, @var);

                  # go to next
                  $text_elt= $post_match;
                  $text= $post_match->text;

                  if( $found_hit) { push @parents, $text_elt->parent unless $parents[-1] && $parents[-1]== $text_elt->parent; }

                }
            }
        }

      foreach my $parent (@parents) { $parent->normalize; }

      return $elt;
    }

  sub _is_string
    { return ($_[0]=~ m{&e[ln]t}) ? 0: 1 }

  sub _replace_var
    { my( $string, @var)= @_;
      unshift @var, undef;
      $string=~ s{\$(\d)}{$var[$1]}g;
      return $string;
    }

  sub _install_replace_sub
    { my $replace_exp= shift;
      my @item= split m{(&e[ln]t\s*\([^)]*\))}, $replace_exp;
      my $sub= q{ my( $match, @var)= @_; my $new; my $last_inserted=$match;};
      my( $gi, $exp);
      foreach my $item (@item)
        { next if ! length $item;
          if(    $item=~ m{^&elt\s*\(([^)]*)\)})
            { $exp= $1; }
          elsif( $item=~ m{^&ent\s*\(\s*([^\s)]*)\s*\)})
            { $exp= " '#ENT' => $1"; }
          else
            { $exp= qq{ '#PCDATA' => "$item"}; }
          $exp=~ s{\$(\d)}{my $i= $1-1; "\$var[$i]"}eg; # replace references to matches
          $sub.= qq{ \$new= \$match->new( $exp); };
          $sub .= q{ $new->paste( after => $last_inserted); $last_inserted=$new;};
        }
      $sub .= q{ $match->delete; };
      #$sub=~ s/;/;\n/g; warn "subs: $sub";
      my $coderef= eval "sub { $NO_WARNINGS; $sub }"; ## no critic (ProhibitStringyEval)
      if( $@) {
          my $msg = "invalid replacement expression $replace_exp: " . $@;
          if( $@ =~ m{Global symbol .* requires explicit package name} )
            { my $suggested = $replace_exp;
              $suggested =~ s{\$(\d)}{\\\$$1}g;
              $msg = join "\n    ",
                  $msg,
                  "the problem seems to be that the replacement expression is a single-quoted string".
                  "that includes variables. Those need to be expanded.",
                  "ie replace the following:",
                  "    '$replace_exp'",
                  "with:",
                  "    qq{$suggested}",
                  ;
            }
          croak $msg;
        }
      return $coderef;
    }

  }

sub merge_text
  { my( $e1, $e2)= @_;
    croak "invalid merge: can only merge 2 elements"
        unless( isa( $e2, 'XML::Twig::Elt'));
    croak "invalid merge: can only merge 2 text elements"
        unless( $e1->is_text && $e2->is_text && ($e1->gi eq $e2->gi));

    my $t1_length= length( $e1->text);

    $e1->set_text( $e1->text . $e2->text);

    if( my $extra_data_in_pcdata= $e2->_extra_data_in_pcdata)
      { foreach my $data (@$extra_data_in_pcdata) { $e1->_push_extra_data_in_pcdata( $data->{text}, $data->{offset} + $t1_length); } }

    $e2->delete;

    return $e1;
  }

sub merge
  { my( $e1, $e2)= @_;



( run in 2.023 seconds using v1.01-cache-2.11-cpan-71847e10f99 )