Serge

 view release on metacpan or  search on metacpan

lib/Serge/Engine/Plugin/parse_php_xhtml.pm  view on Meta::CPAN

</html>
|;
        print 'Errors found. Sending error report to '.join(', ', @$email_to)."\n";

        Serge::Mail::send_html_message(
            $email_from, # from
            $email_to, # to (list)
            $email_subject, # subject
            $text # message body
        );
    }

}

sub add_php_block {
    my ($self, $text) = @_;
    push @{$self->{php_blocks}}, $text;
    return scalar(@{$self->{php_blocks}});
}

sub reconstruct_xml {
    my ($self, $strref) = @_;

    # Substituting PHP blocks

    for (my $i = 1; $i <= scalar(@{$self->{php_blocks}}); $i++) {
        my $block = $self->{php_blocks}->[$i - 1];
        $$strref =~ s/__PHP__BLOCK__($i)__/<\?$block\?>/;
    }

    # Substituting symbolic entities

    $$strref =~ s/__HTML__ENTITY__(\w+?)__/&$1;/g;

    # Recreating DOCTYPE declaration

    $$strref =~ s/__DOCTYPE__(.*?)__END_DOCTYPE__/<!DOCTYPE$1>/sg;

}

sub get_current_file_rel {
    my ($self) = @_;
    return $self->{current_file_rel} || $self->{parent}->{engine}->{current_file_rel};
}

sub die_with_error {
    my ($self, $error, $textref) = @_;
    my $start_pos = $-[0];
    my $end_pos = $+[0];
    my $around = 40;
    my $s = substr($$textref, $start_pos-$around, $end_pos - $start_pos + $around * 2);
    $s =~ s/\n/ /sg;
    my $message = $error.":\n".
        "$s\n".
        ('-' x $around)."^\n";

    $self->{errors}->{$self->get_current_file_rel} = $message;
    die $message;
}

my $in_cdata;
my $in_tag;
my $in_attr;
sub fix_php_blocks {
    my ($self, $s, $textref) = @_;

    #print $-[0].':'.$+[0]."\n";

    if ($s eq '<![CDATA[') {
        if ($in_cdata) {
            my $error = "Premature '<![CDATA['";
            $self->die_with_error($error, $textref);
        } else {
            $in_cdata = 1;
        }
    } elsif ($s eq ']]>') {
        if (!$in_cdata) {
            my $error = "Premature ']]>'";
            $self->die_with_error($error, $textref);
        } else {
            $in_cdata = undef;
        }
    } elsif ($s eq '<') {
        if (!$in_cdata) {
            if ($in_tag) {
                my $error = "Premature '<'";
                $self->die_with_error($error, $textref);
            } else {
                $in_tag = 1;
            }
        }
    } elsif ($s eq '>') {
        if (!$in_cdata) {
            if (!$in_tag) {
                my $error = "Premature '>'";
                $self->die_with_error($error, $textref);
            } elsif ($in_attr) {
                my $error = "Premature '>' before '\"'";
                $self->die_with_error($error, $textref);
            } else {
                $in_tag = undef;
            }
        }
    } elsif ($s eq '"') {
        if (!$in_cdata && $in_tag) {
            $in_attr = !$in_attr;
        }
    } else { # PHP block
        if (!$in_cdata && $in_tag && !$in_attr) {
            $s = " $s=\"\" ";
        }
    }

    return $s;
}

sub parse {
    my ($self, $textref, $callbackref, $lang) = @_;

    die 'callbackref not specified' unless $callbackref;

    # Making a copy of the string as we will change it

    my $text = $$textref;

    # Clearing the php_blocks array

    $self->{php_blocks} = [];

    # Replacing the '<?' and '?>' with different markers to make XML valid,
    # and wrapping the PHP code into CDATA block

    $text =~ s/<\?(.*?)\?>/'__PHP__BLOCK__'.$self->add_php_block($1).'__'/sge;

    # Temporarily replacing DOCTYPE declaration

    $text =~ s/<\!DOCTYPE(.*?)>/'__DOCTYPE__'.$1.'__END_DOCTYPE__'/sge;

    # Replacing the symbolic entities as we are not going to expand them

    $text =~ s/&(\w+);/'__HTML__ENTITY__'.$1.'__'/ge;

    # Wrapping CDATA blocks inside special '__CDATA' tag

    $text =~ s/(<\!\[CDATA\[.*?\]\]>)/<__CDATA>$1<\/__CDATA>/sg;

    # Wrapping HTML comments inside special '__COMMENT' tag

    $text =~ s/<\!--(.*?)-->/<__COMMENT><\!\[CDATA\[$1\]\]><\/__COMMENT>/sg;

    # Now we should properly handle the situation when PHP blocks are inside the <...>
    # This violates the XML rules, so we should dance around this, converting
    # all '__PHP__BLOCK__#__' to ' __PHP__BLOCK__#__="" '. Yeah, weird.

    $in_cdata = undef;
    $in_tag = undef;
    $in_attr = undef;

    $text =~ s/(<\!\[CDATA\[|\]\]>|<|>|"|__PHP__BLOCK__\d+__)/$self->fix_php_blocks($1, \$text)/ge;

    # Extracting strings out of PHP blocks

    foreach my $block (@{$self->{php_blocks}}) {

        # Parsing the $pageTitle variable value
        $block =~ s/(\$pageTitle = ")(.*?)(";)/$1.&$callbackref($self->expand_entities($2), undef, 'Page title', undef, $lang).$3/ge;

        # Parsing underscore functions

        $self->parse_underscore_functions(\$block, $callbackref, $lang);
    }

    # Adding the dummy root tag for XML to be valid
    # (no extra line breaks should be added around these dummy tags,
    # as they will appear in the output before any opening PHP script blocks
    # and may break sending headers from the script)

    $text = "<root>".$text."</root>";

    # Creating XML parser object

    use XML::Parser;
    my $parser = new XML::Parser(Style => 'IxTree', ErrorContext => 4);

    # Parsing XML

    my $tree;
    eval {
        $tree = $parser->parse($text);
    };
    if ($@) {
        my $error_text = $@;
        $error_text =~ s/\t/ /g;
        $error_text =~ s/^\s+//s;

        $self->{errors}->{$self->get_current_file_rel} = $error_text;
        die $error_text;
    }

    # Adding the empty attributes hash to the root tag (for uniform processing)

    unshift @$tree, {};

    # For dummy root tag, reset this tag name to empty value, so it won't get exported

    $tree->[1] = '';

    # Analyze all tags recursively to decide which ones require localization

    $self->analyze_tag_recursively('', $tree);

    # Now, in a second pass, export all localizable strings and generate the localized output

    my $out = $self->render_tag_recursively('', $tree, $callbackref, $lang);

lib/Serge/Engine/Plugin/parse_php_xhtml.pm  view on Meta::CPAN

        if ($attrs->{lang} eq 'en') {
            $prohibit_translation = undef;
            $will_translate = 1;
        }

        if ($attrs->{lang} ne 'en') {
            $prohibit_translation = 1;
            $will_translate = undef;
        }
    }

    my $some_child_will_translate = undef;

    #print "[1][$name: proh=$prohibit_translation, can=$can_translate, some=$some_child_will_translate, will=$will_translate]\n" if $self->{parent}->{debug};

    if (!$will_translate) {
        for (my $i = 0; $i < (scalar(@$subtree) - 1) / 2; $i++) {
            my $tagname = $subtree->[1 + $i*2];
            my $tagtree = $subtree->[1 + $i*2 + 1];

            if ($tagname ne '0') {
                my ($child_will_translate, $child_contains_translatables) =
                    $self->analyze_tag_recursively($tagname, $tagtree, $prohibit_translation);
                $contains_translatables = 1 if ($child_contains_translatables && !$child_will_translate);
                $some_child_will_translate = 1 if $child_will_translate;
            } else {
                my $str = $tagtree; # this is a string for text nodes

                # Trim the string

                $str =~ s/^[\r\n\t ]+//sg;
                $str =~ s/[\r\n\t ]+$//sg;

                # Only non-empty strings which do not contain php blocks can be translated by default

                $contains_translatables = 1 if $str ne '';
                $prohibit_children_translation = 1 if $str =~ m/\b__PHP__BLOCK__(\d+)__\b/;

                #print "** [$name: can=$can_translate, ctr=$contains_translatables] [$str]\n" if $self->{parent}->{debug};
            }
        }
    }

    $will_translate = 1 if $can_translate && $contains_translatables;
    $will_translate = undef if $prohibit_translation or $prohibit_children_translation or $some_child_will_translate;

    #print "[2][$name: can=$can_translate, ctr=$contains_translatables, some=$some_child_will_translate, will=$will_translate]\n" if $self->{parent}->{debug};

    if ($will_translate) {
        $attrs->{'.translate'} = 1;
    }

    if ($prohibit_translation) {
        $attrs->{'.prohibit'} = 1;
    }

    return ($will_translate or $some_child_will_translate or $prohibit_translation or $prohibit_children_translation, $contains_translatables);
}

sub render_tag_recursively {
    my ($self, $name, $subtree, $callbackref, $lang, $prohibit, $cdata, $context) = @_;
    my $attrs = $subtree->[0];

    my $translate = (exists $attrs->{'.translate'}) && (!exists $attrs->{'.prohibit'}) && !$prohibit;

    # if translation is prohibited for an entire subtree, or if the node is going to be translated
    # as a whole, then prohibit translation of children
    my $prohibit_children = $prohibit || $translate;

    $cdata = 1 if (($name eq '__CDATA') || ($name eq '__COMMENT'));

    # if context or hint attribute is defined, use that instead of current value, even if the new value is empty;
    # for values that represent empty strings, use `undef`

    if (exists $attrs->{context}) {
        $context = $attrs->{context} ne '' ? $attrs->{context} : undef;
    }

    if (exists $attrs->{'data-l10n-context'}) {
        $context = $attrs->{'data-l10n-context'} ne '' ? $attrs->{'data-l10n-context'} : undef;
    }

    my $hint;

    if (exists $attrs->{hint}) {
        $hint = $attrs->{hint} ne '' ? $attrs->{hint} : undef;
    }

    if (exists $attrs->{'data-l10n-hint'}) {
        $hint = $attrs->{'data-l10n-hint'} ne '' ? $attrs->{'data-l10n-hint'} : undef;
    }

    my $inner_xml = '';

    my $subnodes_count = (scalar(@$subtree) - 1) / 2;
    for (my $i = 0; $i < $subnodes_count; $i++) {
        my $tagname = $subtree->[1 + $i*2];
        my $tagtree = $subtree->[1 + $i*2 + 1];

        if ($tagname ne '0') {
            # if we are going to translate this tag as a whole, then prohibit translation for the entire subtree
            $inner_xml .= $self->render_tag_recursively($tagname, $tagtree, $callbackref, $lang, $prohibit_children, $cdata, $context);
        } else {
            # tagtree holds a string for text nodes

            my $str = $tagtree;

            # Escaping unsafe xml chars (excluding quotes)

            xml_escape_strref(\$str, 1) unless $cdata;

            # Reconstructing original XML with PHP blocks and symbolic entities

            $self->reconstruct_xml(\$str);

            # Add the string to a resulting xml

            $inner_xml .= $str;
        }
    }

    # Once the inner html is prepared, pass it through localizer if necessary
    # (We do one exception for <object> tag which we extract as a whole)

    if ($translate && ($name ne 'object') && ($inner_xml ne '')) {
        $inner_xml = &$callbackref($self->expand_entities($inner_xml), $context, $hint, undef, $lang);
    }

    # If this is a <script> tag which is not a part of a translatable string,
    # then extract _("...") style strings from it

    if (!$translate && ($name eq 'script')) {
        $self->parse_underscore_functions(\$inner_xml, $callbackref, $lang);
    }

    #print "::<$name>: $translate\n===\n$inner_xml\n===\n";

    # Determine if attributes require localization.
    # This happens when this is not prohibited explicitly,
    # and there is no explicit instruction to localize the tag
    # or there is an explicit instruction to localize the non-terminal tag
    # (as terminal localizable tags will be extracted later as a whole, with all attributes,
    # so there is no need to extract attributes separately)

    my $e = exists $attrs->{'.translate'};
    my $translate_attrs = (!exists $attrs->{'.prohibit'}) && !$prohibit && (!$e || ($e && $inner_xml));

    # Deleting temporary attributes and special 'lang' and 'context' attributes

    delete $attrs->{'.translate'};
    delete $attrs->{'.prohibit'};
    if (!$self->{leave_attrs}) {
        delete $attrs->{lang};
        delete $attrs->{context};
    }

    # Adjusting <meta http-equiv="Content-Language" content="..." /> (if exists)
    # to have the proper content value, e.g. "pt-br"

    if ((lc($name) eq 'meta') && (lc($attrs->{'http-equiv'}) eq 'content-language')) {
        $attrs->{content} = $lang;
    }

    # Generating the string consisting of [attr="value"] pairs

    my $locale = locale_from_lang($lang);
    my $attrs_text;

    foreach my $key (keys %$attrs) {



( run in 0.535 second using v1.01-cache-2.11-cpan-39bf76dae61 )