Serge

 view release on metacpan or  search on metacpan

lib/Serge/Engine/Plugin/parse_php_xhtml.pm  view on Meta::CPAN

    my $s = substr($$textref, $start_pos-$around, $end_pos - $start_pos + $around * 2);
    $s =~ s/\n/ /sg;
    my $message = $error.":\n".
        "$s\n".
        ('-' x $around)."^\n";

    $self->{errors}->{$self->get_current_file_rel} = $message;
    die $message;
}

my $in_cdata;
my $in_tag;
my $in_attr;
sub fix_php_blocks {
    my ($self, $s, $textref) = @_;

    #print $-[0].':'.$+[0]."\n";

    if ($s eq '<![CDATA[') {
        if ($in_cdata) {
            my $error = "Premature '<![CDATA['";
            $self->die_with_error($error, $textref);
        } else {
            $in_cdata = 1;
        }
    } elsif ($s eq ']]>') {
        if (!$in_cdata) {
            my $error = "Premature ']]>'";
            $self->die_with_error($error, $textref);
        } else {
            $in_cdata = undef;
        }
    } elsif ($s eq '<') {
        if (!$in_cdata) {
            if ($in_tag) {
                my $error = "Premature '<'";
                $self->die_with_error($error, $textref);
            } else {
                $in_tag = 1;
            }
        }
    } elsif ($s eq '>') {
        if (!$in_cdata) {
            if (!$in_tag) {
                my $error = "Premature '>'";
                $self->die_with_error($error, $textref);
            } elsif ($in_attr) {
                my $error = "Premature '>' before '\"'";
                $self->die_with_error($error, $textref);
            } else {
                $in_tag = undef;
            }
        }
    } elsif ($s eq '"') {
        if (!$in_cdata && $in_tag) {
            $in_attr = !$in_attr;
        }
    } else { # PHP block
        if (!$in_cdata && $in_tag && !$in_attr) {
            $s = " $s=\"\" ";
        }
    }

    return $s;
}

sub parse {
    my ($self, $textref, $callbackref, $lang) = @_;

lib/Serge/Engine/Plugin/parse_php_xhtml.pm  view on Meta::CPAN

    $text =~ s/(<\!\[CDATA\[.*?\]\]>)/<__CDATA>$1<\/__CDATA>/sg;

    # Wrapping HTML comments inside special '__COMMENT' tag

    $text =~ s/<\!--(.*?)-->/<__COMMENT><\!\[CDATA\[$1\]\]><\/__COMMENT>/sg;

    # Now we should properly handle the situation when PHP blocks are inside the <...>
    # This violates the XML rules, so we should dance around this, converting
    # all '__PHP__BLOCK__#__' to ' __PHP__BLOCK__#__="" '. Yeah, weird.

    $in_cdata = undef;
    $in_tag = undef;
    $in_attr = undef;

    $text =~ s/(<\!\[CDATA\[|\]\]>|<|>|"|__PHP__BLOCK__\d+__)/$self->fix_php_blocks($1, \$text)/ge;

    # Extracting strings out of PHP blocks

    foreach my $block (@{$self->{php_blocks}}) {

        # Parsing the $pageTitle variable value

lib/Serge/Engine/Plugin/parse_php_xhtml.pm  view on Meta::CPAN

    }

    if ($prohibit_translation) {
        $attrs->{'.prohibit'} = 1;
    }

    return ($will_translate or $some_child_will_translate or $prohibit_translation or $prohibit_children_translation, $contains_translatables);
}

sub render_tag_recursively {
    my ($self, $name, $subtree, $callbackref, $lang, $prohibit, $cdata, $context) = @_;
    my $attrs = $subtree->[0];

    my $translate = (exists $attrs->{'.translate'}) && (!exists $attrs->{'.prohibit'}) && !$prohibit;

    # if translation is prohibited for an entire subtree, or if the node is going to be translated
    # as a whole, then prohibit translation of children
    my $prohibit_children = $prohibit || $translate;

    $cdata = 1 if (($name eq '__CDATA') || ($name eq '__COMMENT'));

    # if context or hint attribute is defined, use that instead of current value, even if the new value is empty;
    # for values that represent empty strings, use `undef`

    if (exists $attrs->{context}) {
        $context = $attrs->{context} ne '' ? $attrs->{context} : undef;
    }

    if (exists $attrs->{'data-l10n-context'}) {
        $context = $attrs->{'data-l10n-context'} ne '' ? $attrs->{'data-l10n-context'} : undef;

lib/Serge/Engine/Plugin/parse_php_xhtml.pm  view on Meta::CPAN


    my $inner_xml = '';

    my $subnodes_count = (scalar(@$subtree) - 1) / 2;
    for (my $i = 0; $i < $subnodes_count; $i++) {
        my $tagname = $subtree->[1 + $i*2];
        my $tagtree = $subtree->[1 + $i*2 + 1];

        if ($tagname ne '0') {
            # if we are going to translate this tag as a whole, then prohibit translation for the entire subtree
            $inner_xml .= $self->render_tag_recursively($tagname, $tagtree, $callbackref, $lang, $prohibit_children, $cdata, $context);
        } else {
            # tagtree holds a string for text nodes

            my $str = $tagtree;

            # Escaping unsafe xml chars (excluding quotes)

            xml_escape_strref(\$str, 1) unless $cdata;

            # Reconstructing original XML with PHP blocks and symbolic entities

            $self->reconstruct_xml(\$str);

            # Add the string to a resulting xml

            $inner_xml .= $str;
        }
    }

lib/Serge/Engine/Plugin/parse_xml.pm  view on Meta::CPAN

    # Wrap processing instruction inside special '__PI' tag
    # to be able to reconstruct it later

    $text =~ s/<\?(.*?)\?>/<__PI><\!\[CDATA\[$1\]\]><\/__PI>/sg;

    # Wrap HTML comment inside special '__COMMENT' tag
    # to be able to reconstruct it later

    $text =~ s/<\!--(.*?)-->/<__COMMENT><\!\[CDATA\[$1\]\]><\/__COMMENT>/sg;

    # Restore escaped processing instructions and comments inside cdata

    $text = _unescape_pi_and_comments($text);

    # Add the dummy root tag for XML to be valid

    $text = '<__ROOT>'.$text.'</__ROOT>';

    # Create XML parser object

    use XML::Parser;

lib/Serge/Engine/Plugin/parse_xml.pm  view on Meta::CPAN


    $text =~ s/__PI_START__/<\?/sg;
    $text =~ s/__PI_END__/\?>/sg;
    $text =~ s/__COMMENT_START__/<\!--/sg;
    $text =~ s/__COMMENT_END__/-->/sg;

    return $text;
}

sub process_text_node {
    my ($self, $path, $attrs, $strref, $callbackref, $lang, $cdata, $noquotes) = @_;

    # Check if node path matches our expectations

    my $ok = undef;

    # Test if node path matches the mask

    foreach my $rule (@{$self->{data}->{node_match}}) {
        if (ref($rule) eq "HASH") {
            my $prule = $rule->{path};

lib/Serge/Engine/Plugin/parse_xml.pm  view on Meta::CPAN

        # in InDesign mode, preserve the leading and trailing whitespace
        my ($leading_whitespace, $trailing_whitespace);
        if ($self->{data}->{xml_kind_indesign}) {
            ($$strref =~ m/^(\s+)/) && ($leading_whitespace = $1);
            ($$strref =~ m/(\s+)$/) && ($trailing_whitespace = $1);
        }

        $$strref = $trimmed;

        # unescape basic XML entities unless we're inside CDATA block
        xml_unescape_strref($strref) unless $cdata;

        if ($is_html) {
            # if node is html, pass its text to html parser for string extraction
            # if html_parser fails to parse the XML due to errors,
            # it will die(), and this will be catched in main application

            # lazy-load html parser plugin
            # (parse_php_xhtml or the one specified in html_parser config node)
            if (!$self->{html_parser}) {
                if (exists $self->{data}->{html_parser}) {

lib/Serge/Engine/Plugin/parse_xml.pm  view on Meta::CPAN

                    ($@) && die "Can't load parser plugin 'parse_php_xhtml': $@";
                    print "Loaded HTML parser plugin for HTML nodes\n" if $self->{parent}->{debug};
                }
            }

            $self->{html_parser}->{current_file_rel} = $self->{parent}->{engine}->{current_file_rel}.":$path";
            if ($lang) {
                $$strref = $self->{html_parser}->parse($strref, $callbackref, $lang);
                if (defined $$strref) {
                    # escape unsafe xml chars unless we're in CDATA block
                    xml_escape_strref($strref, $noquotes) unless $cdata;
                } else {
                    $$strref = $trimmed;
                }
            } else {
                $self->{html_parser}->parse($strref, $callbackref);
            }
        } else {
            # additionally unescape Android-specific stuff, if requested
            _android_unescape($strref) if ($self->{data}->{xml_kind_android});

lib/Serge/Engine/Plugin/parse_xml.pm  view on Meta::CPAN

            }

            # escape Android-specific stuff if requested
            _android_escape($strref) if ($self->{data}->{xml_kind_android});

            # preserve symbolic entities from escaping
            $$strref =~ s/&(\w+);/'__HTML__ENTITY__'.$1.'__'/ge;

            # escape unsafe xml chars (in Android mode, do not xml-escape quotes)
            $noquotes = $noquotes || $self->{data}->{xml_kind_android};
            xml_escape_strref($strref, $noquotes) unless $cdata;

            # restore symbolic entities
            $$strref =~ s/__HTML__ENTITY__(\w+?)__/&$1;/g;

            # in InDesign mode, make sure the leading and trailing whitespace
            # is restored to the original values
            if ($self->{data}->{xml_kind_indesign}) {
                $$strref =~ s/^(\s+)/$leading_whitespace/e;
                $$strref =~ s/(\s+)$/$trailing_whitespace/e;
            }

lib/Serge/Engine/Plugin/parse_xml.pm  view on Meta::CPAN

    $$strref =~ s/"/\\"/g; # Android-specific quote escaping

}

sub _dummy_callback {
    my ($s) = @_;
    return $s;
}

sub render_tag_recursively {
    my ($self, $name, $subtree, $callbackref, $lang, $path, $cdata, $parent_attrs) = @_;
    my $attrs = $subtree->[0];

    $cdata = 1 if (($name eq '__CDATA') || ($name eq '__COMMENT') || ($name eq '__PI'));

    my $inner_xml = '';

    for (my $i = 0; $i < (scalar(@$subtree) - 1) / 2; $i++) {
        my $tagname = $subtree->[1 + $i*2];
        my $tagtree = $subtree->[1 + $i*2 + 1];

        # do not process text inside processing instructions
        # TODO: this can potentially be a conditional option, disabled by default
        if ($tagname eq '__PI') {
            $inner_xml .= $self->render_tag_recursively($tagname, $tagtree, \&_dummy_callback, $lang, $path, $cdata, $attrs);
            next;
        }

        if ($tagname ne '0') {
            # node does not contain plain text, render the subtree

            my $tagpath;
            if (($tagname eq '__ROOT') || ($tagname eq '__CDATA') || ($tagname eq '__COMMENT') || ($tagname eq '__PI')) {
                $tagpath = $path;
            } else {
                $tagpath = $path.'/'.$tagname;
            }

            if ($lang) {
                $inner_xml .= $self->render_tag_recursively($tagname, $tagtree, $callbackref, $lang, $tagpath, $cdata, $attrs);
            } else {
                $self->render_tag_recursively($tagname, $tagtree, $callbackref, $lang, $tagpath, $cdata, $attrs);
            }
        } else {
            # tagtree holds a string for text nodes

            my $str = $tagtree;

            $self->process_text_node($path, $parent_attrs, \$str, $callbackref, $lang, $cdata, 1);

            if ($lang) {
                $inner_xml .= $str;
            }
        }
    }

    # Generating the string consisting of [ attr="value"] pairs

    my $attrs_text;



( run in 0.255 second using v1.01-cache-2.11-cpan-454fe037f31 )