Cindy

 view release on metacpan or  search on metacpan

lib/Cindy.pm  view on Meta::CPAN


our @EXPORT= qw(get_html_doc get_xml_doc 
                parse_html_string parse_xml_string 
                parse_cis parse_cis_string
                inject dump_xpath_profile);

use XML::LibXML;
use Cindy::Sheet;
use Cindy::Log;
 
sub get_html_doc($)
{
  my ($file)  = @_;
  my $parser = XML::LibXML->new();

  return $parser->parse_html_file($file);
}

sub get_xml_doc($)
{
  my ($file)  = @_;
  my $parser = XML::LibXML->new();

  return $parser->parse_file($file);
}

sub omit_nodes {
  my ($doc, $tag) = @_; 

lib/Cindy.pm  view on Meta::CPAN

    my $parent = $node->parentNode;

    foreach my $child ($node->childNodes()) {
      $parent->insertBefore($child->cloneNode(1), $node);
    }
  
    $parent->removeChild($node);
  }
}

sub parse_html_string($;$)
{
  my ($string, $ropt)  = @_;
  $ropt ||= {};
  
  my $html_parse_noimplied = $ropt->{html_parse_noimplied}
                             || $ropt->{no_implied};

  my $dont_omit =  !$html_parse_noimplied 
               ||  ($string =~ /<html|<body/i);

lib/Cindy.pm  view on Meta::CPAN

    # Until HTML_PARSE_NOIMPLIED is implemented by 
    # libxml2 (and passed by XML::LibXML) we need
    # to remove html/body tags that have been added to 
    # fragments.
    omit_nodes($doc, 'html');
    omit_nodes($doc, 'body');
  }
  return $doc;
}

sub parse_xml_string($)
{
  my $parser = XML::LibXML->new();

  return $parser->parse_string($_[0]);
}

sub parse_cis($)
{
  return Cindy::Sheet::parse_cis($_[0]);
}

sub parse_cis_string($)
{
  return Cindy::Sheet::parse_cis_string($_[0]);
}

#
# Get a copied doc. root for modification.
#
sub get_root_copy($)
{
  my ($doc)   = @_;
  my $root  = $doc->documentElement();
  my $rtn = $root->cloneNode( 1 );
  return $rtn;
}

sub dump_xpath_profile()
{
  Cindy::Injection::dump_profile();
}

sub inject($$$)
{
  my ($data, $doc, $descriptions) = @_;
  my $docroot = get_root_copy($doc);
#  my $dataroot = get_root_copy($data);
  my $dataroot = $data->getDocumentElement();
  # Create a root description with action none 
  # to hold the description list 
  my $descroot = Cindy::Injection->new(
      '.', 'none', '.', 'xpath', 
      sublist => $descriptions);

lib/Cindy/Action.pm  view on Meta::CPAN


use XML::LibXML;

#
# Helpers/Wrappers
#

#
# Evaluate data node as boolean
#
sub is_true($)
{
  my ($data) = @_;

  return 0 if (!$data); 
  return $data->textContent if ($data->can('textContent'));
  return $data->value if ($data->can('value')); 
}

#
# Evaluate data node text
#
sub text($)
{
  my ($data) = @_;

  return $data->textContent if ($data->can('textContent'));
  return $data->value if ($data->can('value')); 
  # This may be text by some perl magic
  return $data;
}


#
# Get list of child nodes
#
sub copy_children($$)
{
  my ($data, $node) =@_;

  if (defined($data)) {
    if ($data->isa('XML::LibXML::Attr') ) {
      # Replace an attribute node with a text node
      return ($node->ownerDocument->createTextNode(
                      $data->textContent));
    } else {
      return map {$_->cloneNode(1);} $data->childNodes() ;
    }
  } else {
    return ();
  }
}

#
# The node only survives if data exists and its content
# evalutes to true. 
#
sub condition($$) 
{
  my ($node, $data) = @_;  

	#	remove node 
  if  (!is_true($data)) {
    my $parent = $node->parentNode;
    $parent->removeChild( $node );
  }

  return 0;
}

#
# The node gets a copy of the data children to replace
# the existing ones. This copies the text held by data
# as well as possible element nodes (e.g. <b>). If data
# is not a node its treated as text.
#
sub content($$) 
{
  my ($node, $data) = @_;  

  # An a node without children will remove all
  # target children. If however no node matched,
  # the target node will be left unchanged. 
  if (defined($data)) {
    $node->removeChildNodes();	
    if ( $data->can('childNodes')
      || $data->isa('XML::LibXML::Attr')) {

lib/Cindy/Action.pm  view on Meta::CPAN

    }
  }  

  return 0;
}

#
# Appends a comment as a child of the node. Data is
# interpreted as the text for the comment.
#
sub comment($$) 
{
  my ($node, $data) = @_;  

  if (defined($data)) {
    $node->appendChild(
      $node->ownerDocument->createComment(text($data)));
  }  

  return 0;
}

#
# The node is removed and the parent node gets 
# the data children instead. 
#
sub replace($$) 
{
  my ($node, $data) = @_;  

  my $parent = $node->parentNode;
  
  foreach my $child (copy_children($data, $node)) {
    $parent->insertBefore($child, $node);
  }

  # An a node without children will remove all

lib/Cindy/Action.pm  view on Meta::CPAN

    $parent->removeChild($node);
  }

  return 0;
}

#
# The node is removed and the parent node gets 
# the data node and its children instead. 
#
sub copy($$) 
{
  my ($node, $data) = @_;  
  
  # If no node matched,
  # the target node will be left unchanged. 
  if (defined($data)) {
    my $parent = $node->parentNode;
    $parent->insertBefore($data->cloneNode(1), $node);
    $parent->removeChild($node);
  }

  return 0;
}


#
# If data and its text content evaluate to true the node is 
# removed and the parent node gets the children instead.
#
sub omit_tag($$) 
{
  my ($node, $data) = @_;  

  if (is_true($data)) {
    my $parent = $node->parentNode;

    foreach my $child ($node->childNodes()) {
      $parent->insertBefore($child->cloneNode(1), $node);
    }
  

lib/Cindy/Action.pm  view on Meta::CPAN

  return 0;
}

#
# Sets or removes an attribute from an element node.
# If data is undefined the element is removed, otherwise
# the data text content is used as the attribute value. 
# Note the additional parameter name which passes the
# attribute name. 
#
sub attribute($$$) 
{
  my ($node, $data, $name) = @_;  

  if ($data) {
    $node->setAttribute($name, text($data));    
  } else {
    $node->removeAttribute($name);
  }

  return 0;
}

#
# Copies the doc node and inserts the copy before
# the original. 
# The actual repetion is done by the data xpath.
#
# return The cloned node
#
sub repeat($$) 
{
  my ($node, $data) = @_;  

  if (defined($data)) {
    my $parent = $node->parentNode;
    # Note that we do a deep copy here.
    my $new = $node->cloneNode(1);
  
    $parent->insertBefore($new, $node);
    return $new;

lib/Cindy/Action.pm  view on Meta::CPAN

  }
}

#
# Special actions for internal use
#

#
# Removes the given node. Data is ignored. 
#
sub remove($$) 
{
  my ($node, $data) = @_;  
    
  my $parent = $node->parentNode;
  $parent->removeChild($node);

  return 0;
}

#
# Does nothing. Used for subsheet holders.
#
sub none($$) 
{
}


1;

lib/Cindy/Injection.pm  view on Meta::CPAN

        or $action eq 'none' );
  $self->{xfilter} = $parms{xfilter} 
      if ( $action eq 'repeat' );

  return bless($self, $class); 
}

#
# Make a copy
#
sub clone($)
{
  my ($self) = @_;

  my %rtn = %{$self};

  return bless(\%rtn, ref($self));
}

my $prof = Cindy::Profile->new();
sub dump_profile()
{
  $prof = Cindy::Profile->new();
}
END {
  $prof = undef;
}

#
# Wrapper for find.
#
sub find_matches($$) {
  my ($data, $xpath) = @_;

  my @data_nodes = ();

  DEBUG "Matching '$xpath'.";

  # No xpath, no results
  return @data_nodes unless ($xpath);
  # No data, no results
  return @data_nodes unless (defined $data);

lib/Cindy/Injection.pm  view on Meta::CPAN

    @data_nodes = ($found);
  }


  return @data_nodes;
}

#
# Helper for debugging
#
sub debugNode($)
{
  my ($nd) = @_;
  return $nd. '/' .$nd->nodeName." (".$nd->nodeType.")";
}

#
# Matches all doc nodes
#
sub matchDoc($$)
{
  my ($self, $doc) = @_;
  return $self->match($doc, 'doc');
}

#
# Matches all data nodes 
# Note that "no data found" is expressed by 
# returning an injection where data is undef.
# This leaves the decision what to do to the action.
# Note that it differs from the handling of doc.
# As a result a data node that is not found 
# generally triggers removal.
# 
sub matchData($$)
{
  my ($self, $data) = @_;
  my @matches = $self->match($data, 'data');
  if (scalar(@matches) == 0) {
    my $rtn = $self->clone();
    $rtn->{data} = undef;
    return ($rtn);
  } else {
    return @matches;
  }

lib/Cindy/Injection.pm  view on Meta::CPAN

# Does doc/data matching. The xpath from xdoc/xdata
# is used to match nodes that are then stored as doc/data
# properties of cloned nodes. A list of such nodes is 
# returned.
#
# self - This injection.
# $context - The context node for the match.
# $what - One of 'doc' or 'data'.
# return - A list of self clones holding the matches.
#
sub match($$$)
{
  my ($self, $context, $what) = @_;

  # Find the nodes matching the xpath
  my @nodes = find_matches($context, $self->{"x$what"}); 

  my $cnt = scalar(@nodes);
  DEBUG "Matched $cnt $what nodes for action "
    .$self->{action}.".";

lib/Cindy/Injection.pm  view on Meta::CPAN

    DEBUG "No match. Removed.";
    return;
  }
}

#
# Execute a member function on all subsheet elements
# and replace the subsheet with the concatenated returns
# of the calls.
#
sub subsheetsDo($$)
{
  my ($self, $do) = @_;
  DEBUG "Entered subsheetsDo.";

  # Without a subsheet, nothing is done.
  if ($self->{subsheet}) {
    DEBUG "Found subsheet.";

    my @subsheets = ();
    foreach my $inj (@{$self->{subsheet}}) {

lib/Cindy/Injection.pm  view on Meta::CPAN

          if ($cnt_bef != $cnt_aft);
    }
    $self->{subsheet} = \@subsheets;
  }
}

#
# Returns an additional remove action to remove the original 
# of the target doc node after a sequence of replace actions.
#
sub appendRemoveToRepeat()
{
  my ($self) = @_;

  if ($self->{'action'} eq 'repeat') {
    DEBUG "Appending remove.";

    # rmv has the same doc node as inj.
    my $rmv = $self->clone();

    # We need a cheap match, since matchData

lib/Cindy/Injection.pm  view on Meta::CPAN

    return ($self, $rmv); 
  }
  
  return ($self);
}
  
#
# Executes nodes where doc and data have been matched 
# before. Execution directly changes the doc.
#
sub execute()
{
  my ($self) = @_;

  DEBUG "Will execute $self->{action}.";

  if ($self->{action} eq 'repeat') {
    my $newdoc = 
    action($self->{action},
           $self->{data},
           $self->{doc},

lib/Cindy/Injection.pm  view on Meta::CPAN

  return ($self);
}

#
# This does all the work on the subsheet.
# The subsheet is a list of injections. It
# may get longer during the steps of run.
# The doc side is matched first because the 
# in case of repeat the matched doc fragments
# are copied.
sub run($;$$)
{
  my ($self, $dataroot, $docroot) = @_;
  $dataroot ||= $self->{data}; 
  $docroot  ||= $self->{doc};

  return ($self) unless $self->{subsheet};

  # Match all doc nodes.
  DEBUG "WILL MATCH DOC";
  $self->subsheetsDo(sub {$_[0]->matchDoc($docroot)});

lib/Cindy/Injection.pm  view on Meta::CPAN

  DEBUG ">>>>> WILL RUN";
  $self->subsheetsDo(sub {$_[0]->run();});  
  DEBUG ">>>>> DID RUN";  

  return ($self);
}

#
# Stringifies a node.
#
sub dbg_dump($)
{
  my ($x) = @_; 
  return 'undef' if (!defined($x));
  return $x->toString() if ($x->can('toString'));
  return $x;
}

#
# A funtion to execute the named action by calling the
# Action::<action> function.
#
sub action($$$;$)
{
  my ($action, $data, $node, $opt) = @_;

  DEBUG "Doing $action on ".dbg_dump($node)." with ".
            dbg_dump($data).":";

  $action =~ s/-/_/g;
  # This is possibel with strict refs
  my $call = \&{"Cindy::Action::$action"};
  my $rtn = &$call($node, $data, $opt);

lib/Cindy/Profile.pm  view on Meta::CPAN


#
# Constructor
#
sub new ($)
{
  my $class = shift;
  return bless({}, $class);  
}

sub before() {
  my @rtn = gettimeofday();
  return \@rtn;  
}

sub after($$$)
{
  my ($self, $r_before, $name) = @_;
  my @now = gettimeofday();
  # Time difference
  my $delta = tv_interval($r_before, \@now);

  if (not exists($self->{$name})) {
    # An array [count, sum] is initialised
    $self->{$name} = [0, 0];
  }
  $self->{$name}[0]++;
  $self->{$name}[1] += $delta;  
}  

sub DESTROY($) {
  my ($self) = shift;

  # This is needed by apache (that loads the 
  # module at configuration time without a request).
  return if not (keys(%{$self}));

  INFO "Outputting profile:";
  my @top = sort {$self->{$b}[1] <=> $self->{$a}[1];}  
              keys(%{$self});
  foreach my $name (@top[0 .. 9]) {

lib/Cindy/Sheet.pm  view on Meta::CPAN

}

#
# parse_cis
#
# file - The name of the file to read the injection sheet from
#
# return: A reference to a array of injections obtained from 
#         parsing. 
#
sub parse_cis($)
{
  my ($file) = @_;
  open(my $CIS, '<', $file) 
  or die "Failed to open $file:$!";
  my $text;
  read($CIS, $text, -s $CIS);
  close($CIS);
  my $parser = PARSER();
  my $rtn = $parser->complete_injection_list($text);
  die_on_errors($parser->{__error_collector});

lib/Cindy/Sheet.pm  view on Meta::CPAN

}

#
# parse_cis_string
#
# $ - The injection sheet as a string
#
# return: A reference to a array of injections obtained from 
#         parsing. 
#
sub parse_cis_string($)
{
  my $parser = PARSER();
  my $rtn = $parser->complete_injection_list($_[0]);
  die_on_errors($parser->{__error_collector});
  return $rtn;
}

1;

t/Cindy.t  view on Meta::CPAN


# change 'tests => 1' to 'tests => last_test_to_print';

use Test::More tests => 6;
BEGIN { use_ok('Cindy') };

#########################

use Cindy;

sub strip_ws($) {
  my ($str) = @_;
  # Remove all whitespace
  $str =~ s/\s+//gm;
  return $str;
}

sub is_up_to_ws
{
  my $got = strip_ws(shift(@_));
  my $exp = strip_ws(shift(@_));
  my $name = shift(@_);
  is($got, $exp, $name);
}

sub test($$$) {
  my ($doc, $data, $cis) = @_;
  my $xdoc; 
  my $is_xml_doc = ($doc =~ /^<\?xml/);
  if ($is_xml_doc) {
    $xdoc  = parse_xml_string($doc);
  } else {
    $xdoc  = parse_html_string($doc);
  }
  my $xdata = parse_xml_string ($data);
  my $xcis  = parse_cis_string ($cis);



( run in 0.373 second using v1.01-cache-2.11-cpan-1f129e94a17 )