view release on metacpan or search on metacpan
lib/Cindy.pm view on Meta::CPAN
our @EXPORT= qw(get_html_doc get_xml_doc
parse_html_string parse_xml_string
parse_cis parse_cis_string
inject dump_xpath_profile);
use XML::LibXML;
use Cindy::Sheet;
use Cindy::Log;
sub get_html_doc($)
{
my ($file) = @_;
my $parser = XML::LibXML->new();
return $parser->parse_html_file($file);
}
sub get_xml_doc($)
{
my ($file) = @_;
my $parser = XML::LibXML->new();
return $parser->parse_file($file);
}
sub omit_nodes {
my ($doc, $tag) = @_;
lib/Cindy.pm view on Meta::CPAN
my $parent = $node->parentNode;
foreach my $child ($node->childNodes()) {
$parent->insertBefore($child->cloneNode(1), $node);
}
$parent->removeChild($node);
}
}
sub parse_html_string($;$)
{
my ($string, $ropt) = @_;
$ropt ||= {};
my $html_parse_noimplied = $ropt->{html_parse_noimplied}
|| $ropt->{no_implied};
my $dont_omit = !$html_parse_noimplied
|| ($string =~ /<html|<body/i);
lib/Cindy.pm view on Meta::CPAN
# Until HTML_PARSE_NOIMPLIED is implemented by
# libxml2 (and passed by XML::LibXML) we need
# to remove html/body tags that have been added to
# fragments.
omit_nodes($doc, 'html');
omit_nodes($doc, 'body');
}
return $doc;
}
sub parse_xml_string($)
{
my $parser = XML::LibXML->new();
return $parser->parse_string($_[0]);
}
sub parse_cis($)
{
return Cindy::Sheet::parse_cis($_[0]);
}
sub parse_cis_string($)
{
return Cindy::Sheet::parse_cis_string($_[0]);
}
#
# Get a copied doc. root for modification.
#
sub get_root_copy($)
{
my ($doc) = @_;
my $root = $doc->documentElement();
my $rtn = $root->cloneNode( 1 );
return $rtn;
}
sub dump_xpath_profile()
{
Cindy::Injection::dump_profile();
}
sub inject($$$)
{
my ($data, $doc, $descriptions) = @_;
my $docroot = get_root_copy($doc);
# my $dataroot = get_root_copy($data);
my $dataroot = $data->getDocumentElement();
# Create a root description with action none
# to hold the description list
my $descroot = Cindy::Injection->new(
'.', 'none', '.', 'xpath',
sublist => $descriptions);
lib/Cindy/Action.pm view on Meta::CPAN
use XML::LibXML;
#
# Helpers/Wrappers
#
#
# Evaluate data node as boolean
#
sub is_true($)
{
my ($data) = @_;
return 0 if (!$data);
return $data->textContent if ($data->can('textContent'));
return $data->value if ($data->can('value'));
}
#
# Evaluate data node text
#
sub text($)
{
my ($data) = @_;
return $data->textContent if ($data->can('textContent'));
return $data->value if ($data->can('value'));
# This may be text by some perl magic
return $data;
}
#
# Get list of child nodes
#
sub copy_children($$)
{
my ($data, $node) =@_;
if (defined($data)) {
if ($data->isa('XML::LibXML::Attr') ) {
# Replace an attribute node with a text node
return ($node->ownerDocument->createTextNode(
$data->textContent));
} else {
return map {$_->cloneNode(1);} $data->childNodes() ;
}
} else {
return ();
}
}
#
# The node only survives if data exists and its content
# evalutes to true.
#
sub condition($$)
{
my ($node, $data) = @_;
# remove node
if (!is_true($data)) {
my $parent = $node->parentNode;
$parent->removeChild( $node );
}
return 0;
}
#
# The node gets a copy of the data children to replace
# the existing ones. This copies the text held by data
# as well as possible element nodes (e.g. <b>). If data
# is not a node its treated as text.
#
sub content($$)
{
my ($node, $data) = @_;
# An a node without children will remove all
# target children. If however no node matched,
# the target node will be left unchanged.
if (defined($data)) {
$node->removeChildNodes();
if ( $data->can('childNodes')
|| $data->isa('XML::LibXML::Attr')) {
lib/Cindy/Action.pm view on Meta::CPAN
}
}
return 0;
}
#
# Appends a comment as a child of the node. Data is
# interpreted as the text for the comment.
#
sub comment($$)
{
my ($node, $data) = @_;
if (defined($data)) {
$node->appendChild(
$node->ownerDocument->createComment(text($data)));
}
return 0;
}
#
# The node is removed and the parent node gets
# the data children instead.
#
sub replace($$)
{
my ($node, $data) = @_;
my $parent = $node->parentNode;
foreach my $child (copy_children($data, $node)) {
$parent->insertBefore($child, $node);
}
# An a node without children will remove all
lib/Cindy/Action.pm view on Meta::CPAN
$parent->removeChild($node);
}
return 0;
}
#
# The node is removed and the parent node gets
# the data node and its children instead.
#
sub copy($$)
{
my ($node, $data) = @_;
# If no node matched,
# the target node will be left unchanged.
if (defined($data)) {
my $parent = $node->parentNode;
$parent->insertBefore($data->cloneNode(1), $node);
$parent->removeChild($node);
}
return 0;
}
#
# If data and its text content evaluate to true the node is
# removed and the parent node gets the children instead.
#
sub omit_tag($$)
{
my ($node, $data) = @_;
if (is_true($data)) {
my $parent = $node->parentNode;
foreach my $child ($node->childNodes()) {
$parent->insertBefore($child->cloneNode(1), $node);
}
lib/Cindy/Action.pm view on Meta::CPAN
return 0;
}
#
# Sets or removes an attribute from an element node.
# If data is undefined the element is removed, otherwise
# the data text content is used as the attribute value.
# Note the additional parameter name which passes the
# attribute name.
#
sub attribute($$$)
{
my ($node, $data, $name) = @_;
if ($data) {
$node->setAttribute($name, text($data));
} else {
$node->removeAttribute($name);
}
return 0;
}
#
# Copies the doc node and inserts the copy before
# the original.
# The actual repetion is done by the data xpath.
#
# return The cloned node
#
sub repeat($$)
{
my ($node, $data) = @_;
if (defined($data)) {
my $parent = $node->parentNode;
# Note that we do a deep copy here.
my $new = $node->cloneNode(1);
$parent->insertBefore($new, $node);
return $new;
lib/Cindy/Action.pm view on Meta::CPAN
}
}
#
# Special actions for internal use
#
#
# Removes the given node. Data is ignored.
#
sub remove($$)
{
my ($node, $data) = @_;
my $parent = $node->parentNode;
$parent->removeChild($node);
return 0;
}
#
# Does nothing. Used for subsheet holders.
#
sub none($$)
{
}
1;
lib/Cindy/Injection.pm view on Meta::CPAN
or $action eq 'none' );
$self->{xfilter} = $parms{xfilter}
if ( $action eq 'repeat' );
return bless($self, $class);
}
#
# Make a copy
#
sub clone($)
{
my ($self) = @_;
my %rtn = %{$self};
return bless(\%rtn, ref($self));
}
my $prof = Cindy::Profile->new();
sub dump_profile()
{
$prof = Cindy::Profile->new();
}
END {
$prof = undef;
}
#
# Wrapper for find.
#
sub find_matches($$) {
my ($data, $xpath) = @_;
my @data_nodes = ();
DEBUG "Matching '$xpath'.";
# No xpath, no results
return @data_nodes unless ($xpath);
# No data, no results
return @data_nodes unless (defined $data);
lib/Cindy/Injection.pm view on Meta::CPAN
@data_nodes = ($found);
}
return @data_nodes;
}
#
# Helper for debugging
#
sub debugNode($)
{
my ($nd) = @_;
return $nd. '/' .$nd->nodeName." (".$nd->nodeType.")";
}
#
# Matches all doc nodes
#
sub matchDoc($$)
{
my ($self, $doc) = @_;
return $self->match($doc, 'doc');
}
#
# Matches all data nodes
# Note that "no data found" is expressed by
# returning an injection where data is undef.
# This leaves the decision what to do to the action.
# Note that it differs from the handling of doc.
# As a result a data node that is not found
# generally triggers removal.
#
sub matchData($$)
{
my ($self, $data) = @_;
my @matches = $self->match($data, 'data');
if (scalar(@matches) == 0) {
my $rtn = $self->clone();
$rtn->{data} = undef;
return ($rtn);
} else {
return @matches;
}
lib/Cindy/Injection.pm view on Meta::CPAN
# Does doc/data matching. The xpath from xdoc/xdata
# is used to match nodes that are then stored as doc/data
# properties of cloned nodes. A list of such nodes is
# returned.
#
# self - This injection.
# $context - The context node for the match.
# $what - One of 'doc' or 'data'.
# return - A list of self clones holding the matches.
#
sub match($$$)
{
my ($self, $context, $what) = @_;
# Find the nodes matching the xpath
my @nodes = find_matches($context, $self->{"x$what"});
my $cnt = scalar(@nodes);
DEBUG "Matched $cnt $what nodes for action "
.$self->{action}.".";
lib/Cindy/Injection.pm view on Meta::CPAN
DEBUG "No match. Removed.";
return;
}
}
#
# Execute a member function on all subsheet elements
# and replace the subsheet with the concatenated returns
# of the calls.
#
sub subsheetsDo($$)
{
my ($self, $do) = @_;
DEBUG "Entered subsheetsDo.";
# Without a subsheet, nothing is done.
if ($self->{subsheet}) {
DEBUG "Found subsheet.";
my @subsheets = ();
foreach my $inj (@{$self->{subsheet}}) {
lib/Cindy/Injection.pm view on Meta::CPAN
if ($cnt_bef != $cnt_aft);
}
$self->{subsheet} = \@subsheets;
}
}
#
# Returns an additional remove action to remove the original
# of the target doc node after a sequence of replace actions.
#
sub appendRemoveToRepeat()
{
my ($self) = @_;
if ($self->{'action'} eq 'repeat') {
DEBUG "Appending remove.";
# rmv has the same doc node as inj.
my $rmv = $self->clone();
# We need a cheap match, since matchData
lib/Cindy/Injection.pm view on Meta::CPAN
return ($self, $rmv);
}
return ($self);
}
#
# Executes nodes where doc and data have been matched
# before. Execution directly changes the doc.
#
sub execute()
{
my ($self) = @_;
DEBUG "Will execute $self->{action}.";
if ($self->{action} eq 'repeat') {
my $newdoc =
action($self->{action},
$self->{data},
$self->{doc},
lib/Cindy/Injection.pm view on Meta::CPAN
return ($self);
}
#
# This does all the work on the subsheet.
# The subsheet is a list of injections. It
# may get longer during the steps of run.
# The doc side is matched first because the
# in case of repeat the matched doc fragments
# are copied.
sub run($;$$)
{
my ($self, $dataroot, $docroot) = @_;
$dataroot ||= $self->{data};
$docroot ||= $self->{doc};
return ($self) unless $self->{subsheet};
# Match all doc nodes.
DEBUG "WILL MATCH DOC";
$self->subsheetsDo(sub {$_[0]->matchDoc($docroot)});
lib/Cindy/Injection.pm view on Meta::CPAN
DEBUG ">>>>> WILL RUN";
$self->subsheetsDo(sub {$_[0]->run();});
DEBUG ">>>>> DID RUN";
return ($self);
}
#
# Stringifies a node.
#
sub dbg_dump($)
{
my ($x) = @_;
return 'undef' if (!defined($x));
return $x->toString() if ($x->can('toString'));
return $x;
}
#
# A funtion to execute the named action by calling the
# Action::<action> function.
#
sub action($$$;$)
{
my ($action, $data, $node, $opt) = @_;
DEBUG "Doing $action on ".dbg_dump($node)." with ".
dbg_dump($data).":";
$action =~ s/-/_/g;
# This is possibel with strict refs
my $call = \&{"Cindy::Action::$action"};
my $rtn = &$call($node, $data, $opt);
lib/Cindy/Profile.pm view on Meta::CPAN
#
# Constructor
#
sub new ($)
{
my $class = shift;
return bless({}, $class);
}
sub before() {
my @rtn = gettimeofday();
return \@rtn;
}
sub after($$$)
{
my ($self, $r_before, $name) = @_;
my @now = gettimeofday();
# Time difference
my $delta = tv_interval($r_before, \@now);
if (not exists($self->{$name})) {
# An array [count, sum] is initialised
$self->{$name} = [0, 0];
}
$self->{$name}[0]++;
$self->{$name}[1] += $delta;
}
sub DESTROY($) {
my ($self) = shift;
# This is needed by apache (that loads the
# module at configuration time without a request).
return if not (keys(%{$self}));
INFO "Outputting profile:";
my @top = sort {$self->{$b}[1] <=> $self->{$a}[1];}
keys(%{$self});
foreach my $name (@top[0 .. 9]) {
lib/Cindy/Sheet.pm view on Meta::CPAN
}
#
# parse_cis
#
# file - The name of the file to read the injection sheet from
#
# return: A reference to a array of injections obtained from
# parsing.
#
sub parse_cis($)
{
my ($file) = @_;
open(my $CIS, '<', $file)
or die "Failed to open $file:$!";
my $text;
read($CIS, $text, -s $CIS);
close($CIS);
my $parser = PARSER();
my $rtn = $parser->complete_injection_list($text);
die_on_errors($parser->{__error_collector});
lib/Cindy/Sheet.pm view on Meta::CPAN
}
#
# parse_cis_string
#
# $ - The injection sheet as a string
#
# return: A reference to a array of injections obtained from
# parsing.
#
sub parse_cis_string($)
{
my $parser = PARSER();
my $rtn = $parser->complete_injection_list($_[0]);
die_on_errors($parser->{__error_collector});
return $rtn;
}
1;
# change 'tests => 1' to 'tests => last_test_to_print';
use Test::More tests => 6;
BEGIN { use_ok('Cindy') };
#########################
use Cindy;
sub strip_ws($) {
my ($str) = @_;
# Remove all whitespace
$str =~ s/\s+//gm;
return $str;
}
sub is_up_to_ws
{
my $got = strip_ws(shift(@_));
my $exp = strip_ws(shift(@_));
my $name = shift(@_);
is($got, $exp, $name);
}
sub test($$$) {
my ($doc, $data, $cis) = @_;
my $xdoc;
my $is_xml_doc = ($doc =~ /^<\?xml/);
if ($is_xml_doc) {
$xdoc = parse_xml_string($doc);
} else {
$xdoc = parse_html_string($doc);
}
my $xdata = parse_xml_string ($data);
my $xcis = parse_cis_string ($cis);