view release on metacpan or search on metacpan
lib/Serge/Engine/Plugin/parse_php_xhtml.pm view on Meta::CPAN
my $s = substr($$textref, $start_pos-$around, $end_pos - $start_pos + $around * 2);
$s =~ s/\n/ /sg;
my $message = $error.":\n".
"$s\n".
('-' x $around)."^\n";
$self->{errors}->{$self->get_current_file_rel} = $message;
die $message;
}
my $in_cdata;
my $in_tag;
my $in_attr;
sub fix_php_blocks {
my ($self, $s, $textref) = @_;
#print $-[0].':'.$+[0]."\n";
if ($s eq '<![CDATA[') {
if ($in_cdata) {
my $error = "Premature '<![CDATA['";
$self->die_with_error($error, $textref);
} else {
$in_cdata = 1;
}
} elsif ($s eq ']]>') {
if (!$in_cdata) {
my $error = "Premature ']]>'";
$self->die_with_error($error, $textref);
} else {
$in_cdata = undef;
}
} elsif ($s eq '<') {
if (!$in_cdata) {
if ($in_tag) {
my $error = "Premature '<'";
$self->die_with_error($error, $textref);
} else {
$in_tag = 1;
}
}
} elsif ($s eq '>') {
if (!$in_cdata) {
if (!$in_tag) {
my $error = "Premature '>'";
$self->die_with_error($error, $textref);
} elsif ($in_attr) {
my $error = "Premature '>' before '\"'";
$self->die_with_error($error, $textref);
} else {
$in_tag = undef;
}
}
} elsif ($s eq '"') {
if (!$in_cdata && $in_tag) {
$in_attr = !$in_attr;
}
} else { # PHP block
if (!$in_cdata && $in_tag && !$in_attr) {
$s = " $s=\"\" ";
}
}
return $s;
}
sub parse {
my ($self, $textref, $callbackref, $lang) = @_;
lib/Serge/Engine/Plugin/parse_php_xhtml.pm view on Meta::CPAN
$text =~ s/(<\!\[CDATA\[.*?\]\]>)/<__CDATA>$1<\/__CDATA>/sg;
# Wrapping HTML comments inside special '__COMMENT' tag
$text =~ s/<\!--(.*?)-->/<__COMMENT><\!\[CDATA\[$1\]\]><\/__COMMENT>/sg;
# Now we should properly handle the situation when PHP blocks are inside the <...>
# This violates the XML rules, so we should dance around this, converting
# all '__PHP__BLOCK__#__' to ' __PHP__BLOCK__#__="" '. Yeah, weird.
$in_cdata = undef;
$in_tag = undef;
$in_attr = undef;
$text =~ s/(<\!\[CDATA\[|\]\]>|<|>|"|__PHP__BLOCK__\d+__)/$self->fix_php_blocks($1, \$text)/ge;
# Extracting strings out of PHP blocks
foreach my $block (@{$self->{php_blocks}}) {
# Parsing the $pageTitle variable value
lib/Serge/Engine/Plugin/parse_php_xhtml.pm view on Meta::CPAN
}
if ($prohibit_translation) {
$attrs->{'.prohibit'} = 1;
}
return ($will_translate or $some_child_will_translate or $prohibit_translation or $prohibit_children_translation, $contains_translatables);
}
sub render_tag_recursively {
my ($self, $name, $subtree, $callbackref, $lang, $prohibit, $cdata, $context) = @_;
my $attrs = $subtree->[0];
my $translate = (exists $attrs->{'.translate'}) && (!exists $attrs->{'.prohibit'}) && !$prohibit;
# if translation is prohibited for an entire subtree, or if the node is going to be translated
# as a whole, then prohibit translation of children
my $prohibit_children = $prohibit || $translate;
$cdata = 1 if (($name eq '__CDATA') || ($name eq '__COMMENT'));
# if context or hint attribute is defined, use that instead of current value, even if the new value is empty;
# for values that represent empty strings, use `undef`
if (exists $attrs->{context}) {
$context = $attrs->{context} ne '' ? $attrs->{context} : undef;
}
if (exists $attrs->{'data-l10n-context'}) {
$context = $attrs->{'data-l10n-context'} ne '' ? $attrs->{'data-l10n-context'} : undef;
lib/Serge/Engine/Plugin/parse_php_xhtml.pm view on Meta::CPAN
my $inner_xml = '';
my $subnodes_count = (scalar(@$subtree) - 1) / 2;
for (my $i = 0; $i < $subnodes_count; $i++) {
my $tagname = $subtree->[1 + $i*2];
my $tagtree = $subtree->[1 + $i*2 + 1];
if ($tagname ne '0') {
# if we are going to translate this tag as a whole, then prohibit translation for the entire subtree
$inner_xml .= $self->render_tag_recursively($tagname, $tagtree, $callbackref, $lang, $prohibit_children, $cdata, $context);
} else {
# tagtree holds a string for text nodes
my $str = $tagtree;
# Escaping unsafe xml chars (excluding quotes)
xml_escape_strref(\$str, 1) unless $cdata;
# Reconstructing original XML with PHP blocks and symbolic entities
$self->reconstruct_xml(\$str);
# Add the string to a resulting xml
$inner_xml .= $str;
}
}
lib/Serge/Engine/Plugin/parse_xml.pm view on Meta::CPAN
# Wrap processing instruction inside special '__PI' tag
# to be able to reconstruct it later
$text =~ s/<\?(.*?)\?>/<__PI><\!\[CDATA\[$1\]\]><\/__PI>/sg;
# Wrap HTML comment inside special '__COMMENT' tag
# to be able to reconstruct it later
$text =~ s/<\!--(.*?)-->/<__COMMENT><\!\[CDATA\[$1\]\]><\/__COMMENT>/sg;
# Restore escaped processing instructions and comments inside cdata
$text = _unescape_pi_and_comments($text);
# Add the dummy root tag for XML to be valid
$text = '<__ROOT>'.$text.'</__ROOT>';
# Create XML parser object
use XML::Parser;
lib/Serge/Engine/Plugin/parse_xml.pm view on Meta::CPAN
$text =~ s/__PI_START__/<\?/sg;
$text =~ s/__PI_END__/\?>/sg;
$text =~ s/__COMMENT_START__/<\!--/sg;
$text =~ s/__COMMENT_END__/-->/sg;
return $text;
}
sub process_text_node {
my ($self, $path, $attrs, $strref, $callbackref, $lang, $cdata, $noquotes) = @_;
# Check if node path matches our expectations
my $ok = undef;
# Test if node path matches the mask
foreach my $rule (@{$self->{data}->{node_match}}) {
if (ref($rule) eq "HASH") {
my $prule = $rule->{path};
lib/Serge/Engine/Plugin/parse_xml.pm view on Meta::CPAN
# in InDesign mode, preserve the leading and trailing whitespace
my ($leading_whitespace, $trailing_whitespace);
if ($self->{data}->{xml_kind_indesign}) {
($$strref =~ m/^(\s+)/) && ($leading_whitespace = $1);
($$strref =~ m/(\s+)$/) && ($trailing_whitespace = $1);
}
$$strref = $trimmed;
# unescape basic XML entities unless we're inside CDATA block
xml_unescape_strref($strref) unless $cdata;
if ($is_html) {
# if node is html, pass its text to html parser for string extraction
# if html_parser fails to parse the XML due to errors,
# it will die(), and this will be catched in main application
# lazy-load html parser plugin
# (parse_php_xhtml or the one specified in html_parser config node)
if (!$self->{html_parser}) {
if (exists $self->{data}->{html_parser}) {
lib/Serge/Engine/Plugin/parse_xml.pm view on Meta::CPAN
($@) && die "Can't load parser plugin 'parse_php_xhtml': $@";
print "Loaded HTML parser plugin for HTML nodes\n" if $self->{parent}->{debug};
}
}
$self->{html_parser}->{current_file_rel} = $self->{parent}->{engine}->{current_file_rel}.":$path";
if ($lang) {
$$strref = $self->{html_parser}->parse($strref, $callbackref, $lang);
if (defined $$strref) {
# escape unsafe xml chars unless we're in CDATA block
xml_escape_strref($strref, $noquotes) unless $cdata;
} else {
$$strref = $trimmed;
}
} else {
$self->{html_parser}->parse($strref, $callbackref);
}
} else {
# additionally unescape Android-specific stuff, if requested
_android_unescape($strref) if ($self->{data}->{xml_kind_android});
lib/Serge/Engine/Plugin/parse_xml.pm view on Meta::CPAN
}
# escape Android-specific stuff if requested
_android_escape($strref) if ($self->{data}->{xml_kind_android});
# preserve symbolic entities from escaping
$$strref =~ s/&(\w+);/'__HTML__ENTITY__'.$1.'__'/ge;
# escape unsafe xml chars (in Android mode, do not xml-escape quotes)
$noquotes = $noquotes || $self->{data}->{xml_kind_android};
xml_escape_strref($strref, $noquotes) unless $cdata;
# restore symbolic entities
$$strref =~ s/__HTML__ENTITY__(\w+?)__/&$1;/g;
# in InDesign mode, make sure the leading and trailing whitespace
# is restored to the original values
if ($self->{data}->{xml_kind_indesign}) {
$$strref =~ s/^(\s+)/$leading_whitespace/e;
$$strref =~ s/(\s+)$/$trailing_whitespace/e;
}
lib/Serge/Engine/Plugin/parse_xml.pm view on Meta::CPAN
$$strref =~ s/"/\\"/g; # Android-specific quote escaping
}
sub _dummy_callback {
my ($s) = @_;
return $s;
}
sub render_tag_recursively {
my ($self, $name, $subtree, $callbackref, $lang, $path, $cdata, $parent_attrs) = @_;
my $attrs = $subtree->[0];
$cdata = 1 if (($name eq '__CDATA') || ($name eq '__COMMENT') || ($name eq '__PI'));
my $inner_xml = '';
for (my $i = 0; $i < (scalar(@$subtree) - 1) / 2; $i++) {
my $tagname = $subtree->[1 + $i*2];
my $tagtree = $subtree->[1 + $i*2 + 1];
# do not process text inside processing instructions
# TODO: this can potentially be a conditional option, disabled by default
if ($tagname eq '__PI') {
$inner_xml .= $self->render_tag_recursively($tagname, $tagtree, \&_dummy_callback, $lang, $path, $cdata, $attrs);
next;
}
if ($tagname ne '0') {
# node does not contain plain text, render the subtree
my $tagpath;
if (($tagname eq '__ROOT') || ($tagname eq '__CDATA') || ($tagname eq '__COMMENT') || ($tagname eq '__PI')) {
$tagpath = $path;
} else {
$tagpath = $path.'/'.$tagname;
}
if ($lang) {
$inner_xml .= $self->render_tag_recursively($tagname, $tagtree, $callbackref, $lang, $tagpath, $cdata, $attrs);
} else {
$self->render_tag_recursively($tagname, $tagtree, $callbackref, $lang, $tagpath, $cdata, $attrs);
}
} else {
# tagtree holds a string for text nodes
my $str = $tagtree;
$self->process_text_node($path, $parent_attrs, \$str, $callbackref, $lang, $cdata, 1);
if ($lang) {
$inner_xml .= $str;
}
}
}
# Generating the string consisting of [ attr="value"] pairs
my $attrs_text;