PApp
view release on metacpan or search on metacpan
PApp/XML.pm view on Meta::CPAN
##########################################################################
## All portions of this code are copyright (c) 2003,2004 nethype GmbH ##
##########################################################################
## Using, reading, modifying or copying this code requires a LICENSE ##
## from nethype GmbH, Franz-Werfel-Str. 11, 74078 Heilbronn, ##
## Germany. If you happen to have questions, feel free to contact us at ##
## license@nethype.de. ##
##########################################################################
=head1 NAME
PApp::XML - pxml sections and more
=head1 SYNOPSIS
use PApp::XML;
=head1 DESCRIPTION
Apart from providing XML convinience functions, the PApp::XML module
manages XML templates containing pappxml directives and perl code similar
to phtml sections. Together with stylesheets (L<PApp::XSLT>) this can be
used to almost totally seperate content from layout. Image a database
containing XML documents with customized tags. A stylesheet can then be
used to transform this XML document into html + special pappxml directives
that can be used to create links etc...
=cut
package PApp::XML;
use Convert::Scalar ':utf8';
use PApp::Util;
use PApp::Exception qw(fancydie);
use base 'Exporter';
$VERSION = 2.4;
@EXPORT_OK = qw(
xml_quote xml_attr xml_unquote xml_tag xml_cdata
xml_check xml_encoding xml2utf8 pod2xml
xml_include expand_pi xml_errorparser
);
=head2 Functions for XML-Generation
=over 4
=item xml_quote $string
Quotes (and returns) the given string so that it's contents won't be
interpreted by an XML parser (quotes ', ", <, & and > to avoid ]]>). Example:
print xml_quote q( <xx> & <[[]]> );
=> <xx> & <[[]]>
=item xml_cdata $string
Does the same thing as C<xml_quote>, but using CDATA constructs, rather
than quoting individual characters. Example:
print xml_cdata q(hi ]]> there);
=> <![CDATA[hi ]]]]><![CDATA[> there ]]>
=item xml_unquote $string
Unquotes (and returns) an XML string (by resolving it's entities and
CDATA sections). Currently, only the named predefined xml entities and
numerical character entities are resolved. Everything else is silently
ignored. Example:
print xml_unquote q( <![CDATA[text1]]> & text2! );
=> text1 & text2!
=item xml_attr $attr => $value [, $attr2 => $value2, ...]
Returns fully quoted $attr => $value pairs. Example:
print xml_attr authors => q(Alan Cox & Linus "kubys" Torvalds);
=> authors="Alan Cox & Linus "kubys" Torvalds"
=item xml_tag $element_name, [$attr => $value, ...] [, $content_or_undef]
Generates a tag from the given element name, content and attribute
name => value pairs. If content is undef, an empty tag will be
generated. Example:
print xml_tag "p", align => "center"
=> <p align="center"/>
As a very special courtesy hack for you, if you omit the content argument
entirely, only an opening tag will be generated.
=cut
sub xml_quote {
local $_ = shift;
s/&/&/g;
s/</</g;
s/>/>/g;
#s/]]>/]]>/g; # avoids problems when ]] and > are quoted in seperate calls
$_;
}
sub xml_cdata {
local $_ = shift;
s/]]>/]]]]><![CDATA[>/g;
"<![CDATA[$_]]>";
}
sub xml_attr {
my $attrs;
for (my $i = 0; $i < $#_; $i += 2) {
local $_ = $_[$i+1];
s/&/&/g;
s/"/"/g;
s/</</g;
$attrs .= " $_[$i]=\"$_\"";
}
substr $attrs, 1;
}
sub xml_tag {
my $element = shift;
my $tag = "<$element";
$tag .= " ".&xml_attr if @_ > 1;
if (@_ & 1) {
if (defined $_[-1]) {
"$tag>$_[-1]</$element>";
} else {
"$tag/>";
}
} else {
"$tag>";
}
}
sub xml_unquote($) {
local $_ = shift;
s{&([^;]+);|<!\[CDATA\[(.*?)]]>}{
if (defined $2) {
$2;
} elsif ("#" eq substr $1, 0, 1) {
if ("x" eq substr $1, 1, 1) {
chr hex substr $1, 2;
} else {
chr substr $1,1;
}
} else {
{ gt => '>', lt => '<', amp => '&', quot => '"', apos => "'" }->{$1}
}
}ge;
$_;
}
=back
=head2 Functions for Analyzing XML
=over 4
=item ($msg, $line, $col, $byte) = xml_check $string [, $prolog, $epilog]
Checks wether the given document is well-formed (as opposed to
valid). This merely tries to parse the string as an xml-document. Nothing
is returned if the document is well-formed.
PApp/XML.pm view on Meta::CPAN
h2 => 0,
table => 0,
tr => 0,
);
%delay_error = ();
sub xml_errorparser {
require HTML::Parser;
# fix any invalid xml-"names"
my $xmlname = sub {
local $_ = $_[0];
s/([^:]*):([^:]*):/$1:$2_illegal-colon-in-name_/g;
s/^([^\p{Letter}_:])/"illegal-xml-start-character_" . (ord $1)/e;
s/([^\p{Letter}\p{Digit}\-_.:])/"_illegal-character-" . (ord $1) . "-in-name_"/ge;
$_;
};
my ($xml, $errofs, $errmsg) = @_;
defined $errofs or $errofs = 1e99;
my $output = "";
my $delayed;
my @tag; # open elements
my $err = sub {
$delayed .= $_[0] if @_;
return if exists $delay_error{$tag[-1]};
for (my $i = @tag; --$i >= 0; ) {
return if $delay_error{$tag[$i]};
}
$output .= $delayed;
$delayed = "";
};
$xml =~ s%
([\x{0}-\x{8}\x{b}\x{c}\x{e}-\x{1f}\x{fffe}])
%
"illegal-character-" . (ord $1) . "-skipped";
%gex;
# HTML::Parser can't cope with unicode :(, unfortunately
# this destroys position information quite severly
utf8_upgrade $xml;
$xml = (utf8_to PApp::Recode "iso-8859-1", \&PApp::_unicode_to_entity)->($xml);
utf8_downgrade $xml;
my $parser = new HTML::Parser
api_version => 3,
strict_names => 1,
xml_mode => 1,
unbroken_text => 1,
case_sensitive => 1,
ignore_elements=> [qw(script)],
text_h => [sub {
if ($_[1] >= $errofs) {
$err->("<error>$errmsg, source<pre>\n"
. (xml_cdata substr $xml, $errofs >= 160 ? $errofs - 160 : 0, $errofs >= 160 ? 160 : $errofs)
. "÷"
. (xml_cdata substr $xml, $errofs, 160)
. "\n</pre></error>");
$errofs = 1e99;
} else {
$delayed and $err->();
}
$output .= PApp::XML::xml_quote $_[0];
}, "dtext, offset"],
start_h => [sub {
my $tag = $xmlname->($_[0]);
push @tag, $tag;
$output .= PApp::XML::xml_tag $tag, map +($xmlname->($_), $_[1]{$_}), keys %{$_[1]};
$delayed and $err->();
}, "tagname, attr"],
end_h => [sub {
my $tag = $xmlname->($_[0]);
if ($tag[-1] eq $tag) {
pop @tag;
$output .= "</$tag>";
$delayed and $err->();
} else {
for (my $i = @tag; --$i >= 0; ) {
if ($tag[$i] eq $tag) {
my $errmsg = "<error>ERROR: end-tag for element '$tag', which is not open, closing tag(s)";
while (@tag > $i) {
my $tag = pop @tag;
$output .= "</$tag>";
$delayed and $err->();
$errmsg .= " $tag";
}
$err->("$errmsg instead. </error>");
return;
}
}
$err->("<error>ERROR: skipping end-tag for element '$tag', which is not open. </error>");
}
}, "tagname"],
end_document_h => [sub {
while (@tag) {
my $tag = pop @tag;
$output .= "</$tag>" ;
$delayed and $err->();
}
}],
declaration_h => [sub {
}],
comment_h => [sub {
}],
process_h => [sub {
}],
;
$parser->parse($xml);
$parser->eof;
utf8_upgrade $output; # just for your convinience
}
=item xml_encoding xml-string [DEPRECATED]
Convenience function to detect the encoding used by the given xml
PApp/XML.pm view on Meta::CPAN
$self->parse($_[0]);
};
$@ and fancydie "xml_include expansion failed", $@,
info => [source => PApp::Util::format_source $_[0]];
{ local $@; $self->release }
$doc;
}
=item pod2xml $pod
Converts a POD string (which can be either a fragment or a whole document)
=cut
{
package PApp::XML::Pod2xml;
sub stag { (PApp::XML::xml_tag @_) }
sub title_tag {
my ($name, $title, $cont, @a) = @_;
stag $name, @a,
(stag 'title' => $title)
. (stag 'content' => $cont)
}
sub view_item {
my $t = $_[1]->title->present ($_[0]);
my $bullet;
if ($t =~ s/^\s*\*\s+//) {
$bullet = "*";
} elsif ($t =~ s/^\s*(\d+\.)\s+//) {
$bullet = $1;
}
title_tag item => $t
=> $_[1]->content->present ($_[0]),
$bullet ? (bullet => $bullet) : ()
}
sub view_begin {
$_[1]->format eq "xmlpod"
? $_[1]->content->present ($_[0])
: stag for => format => $_[1]->format, $_[1]->content->present ($_[0])
}
sub view_for {
$_[1]->format eq "xmlpod"
? $_[1]->text
: stag for => $_[1]->text;
}
sub view_pod { stag pod => xmlns => "http://www.nethype.de/xmlns/xmlpod" => $_[1]->content->present ($_[0]) }
sub view_head1 { title_tag head1 => $_[1]->title->present ($_[0]) => $_[1]->content->present ($_[0]) }
sub view_head2 { title_tag head2 => $_[1]->title->present ($_[0]) => $_[1]->content->present ($_[0]) }
sub view_head3 { title_tag head3 => $_[1]->title->present ($_[0]) => $_[1]->content->present ($_[0]) }
sub view_head4 { title_tag head4 => $_[1]->title->present ($_[0]) => $_[1]->content->present ($_[0]) }
sub view_over { stag over => indent => $_[1]->indent, $_[1]->content->present ($_[0]) }
sub view_begin { stag begin => format => $_[1]->format, $_[1]->content->present ($_[0]) }
sub view_verbatim { stag verbatim => PApp::XML::xml_cdata $_[1] }
sub view_textblock { stag para => $_[1] }
sub view_seq_code { stag code => $_[1] }
sub view_seq_bold { stag bold => $_[1] }
sub view_seq_italic { stag italic => $_[1] }
sub view_seq_link { stag link => $_[1] }
sub view_seq_index { stag index => $_[1] }
sub view_seq_file { stag file => $_[1] }
sub view_seq_zero { "" }
sub view_seq_space { PApp::XML::xml_quote $_[1] }
sub view_seq_text { PApp::XML::xml_quote $_[1] }
sub view_seq_entity { PApp::XML::xml_quote $_[1] }
}
sub pod2xml($) {
my ($pod) = @_;
return "" if not $pod;
require Pod::POM;
my $parser = Pod::POM->new
or die "Couldn't create POM object";
my $pom = $parser->parse_text ("=pod\n\n".$pod)
or die $parser->error ();
$pom->present (PApp::XML::Pod2xml::);
}
=back
=head2 The PApp::XML Factory Class
=over 4
=item new PApp::XML parameter => value...
Creates a new PApp::XML template object with the specified behaviour. It
can be used as an object factory to create new C<PApp::XML::Template>
objects.
special a hashref containing special => coderef pairs. If a
special is encountered, the given coderef will be compiled
in instead (i.e. it will be called each time the fragment
is print'ed). The coderef will be called with a reference
to the attribute hash, the element's contents (as a
string) and the PApp::XML::Template object used to print
the string.
If a reference to a coderef is given (e.g. C<\sub {}>),
the coderef will be called during parsing and the
resulting string will be added to the compiled subroutine.
The arguments are the same, except that the contents are
not given as string but as a magic token that must be
inserted into the return value.
The return value is expected to be in "phtml"
(L<PApp::Parser>) format, the magic "contents" token must
not occur in code sections.
( run in 0.441 second using v1.01-cache-2.11-cpan-39bf76dae61 )