XML-DocStats
view release on metacpan or search on metacpan
lib/XML/DocStats.pm view on Meta::CPAN
}
sub end_element {
my ($self, $element) = @_;
my $lev = $self->level;
$self->chars->{$lev.$self->element} = undef;
$self->level(--$lev);
$self->element(pop @{$self->elestack});
}
sub processing_instruction {
my ($self, $pi) = @_;
my $target = $pi->{Target};
(my $data = $pi->{Data}) =~ s/\n//g;
$data =~ s/\s+/ /g;
my @attrs = ("Target='$target'","Data='$data'");
$self->print($self->color('PI','PI: ').$self->color('element',$self->element).$self->color('ATTR'," @attrs\n")) if $self->ok_print('pi');
$self->stats('!PI');
}
sub comment {
my ($self, $comment) = @_;
my $text = $self->trim($comment->{Data});
$text = $self->escape($text);
$self->print($self->color('COMMENT','COMMENT: ').$self->color('element',$self->element)." '$text'\n") if $self->ok_print('comment');
$self->stats('!COMMENT');
}
sub stats {
my ($self, $stat, $amount) = @_;
# $stat = "!$stat"; # invalid element name
$amount = 1 unless $amount;
$self->STATS->{$stat} = exists $self->STATS->{$stat}?
$amount+($self->STATS->{$stat}):
$amount;
}
sub printstat {
my ($self,$label,$quote,@keys) = @_;
my @attrs;
for my $attr (@keys) {
(my $name = $attr) =~ s/^[!@^&]//;
$name =~ s{\&}{&}g;
push @attrs,$self->STATS->{$attr}." $quote$name$quote";
}
$self->prnt($self->color('STATS',$label).$self->color('ATTR',join(', ',@attrs))) if @attrs;
$self->prnt("\n");
}
sub printstats {
my ($self) = @_;
$self->prnt("\n");
my @keys = sort keys %{$self->STATS};
$self->printstat('TOTALS: ','',grep {m/^!/} @keys);
$self->printstat('ELEMENTS: ','',grep {not m/^[!@^&]/} @keys);
$self->printstat('ATTRIBUTES: ','',grep {m/^@/} @keys);
$self->printstat('ATTRVALUES: ',"'",grep {m/^\^/} @keys);
$self->printstat('ENTITIES: ','',grep {m/^&/} @keys);
}
sub start_cdata {
my ($self, $element) = @_;
$self->stats('!CDATA');
}
1;
__END__
=head1 NAME
XML::DocStats - produce a simple analysis of an XML document
=head1 SYNOPSIS
Analyze the xml document on STDIN, the STDOUT output format is html:
use XML::DocStats;
my $parse = XML::DocStats->new;
$parse->analyze;
Analyze in-memory xml document:
use XML::DocStats;
my ($xmldata) = @_;
my $parse = XML::DocStats->new(xmlsource=>{String => $xmldata},
BYTES => length($xmldata));
$parse->analyze;
Analyze xml document IO stream, the output format is plain text:
use XML::DocStats;
use IO::File;
my $xmlsource = IO::File->new("< document.xml");
my $parse = XML::DocStats->new(xmlsource=>{ByteStream => $xmlsource});
$parse->format('text');
$parse->analyze;
=head1 DESCRIPTION
=over 4
XML::DocStats parses an xml document using a SAX handler built using Ken MacLeod's XML::Parser::PerlSAX. It produces a listing indented to show the element heirarchy, and collects counts of various xml components along the way. A summary of the count...
The output listing is either in plain text or html.
Each xml thingy is color-coded in the html output for easy reading:
=begin text
- purple denotes elements.
- blue denotes text (character data). The text itself is black.
- olive denotes attributes and attribute values in elements,
XML-DCL, DOCTYPE, and PIs.
- fuchsia denotes entity references. The name of the entity is
in black. fuchsia is also used to denote the root element, and
to mark the start and finish of the parse, as well as to label
the statistices at the end.
- teal denotes the XML declaration.
- navy denotes the DOCTYPE declaration.
- maroon denotes processing instructions.
- green denotes comments. The text of the comment is black.
( run in 2.075 seconds using v1.01-cache-2.11-cpan-140bd7fdf52 )