Text-MediawikiFormat
view release on metacpan or search on metacpan
lib/Text/MediawikiFormat.pm view on Meta::CPAN
# CGI::escapeHTML escapes single quotes.
$attr->{$_} = CGI::escapeHTML $attr->{$_};
$newtag .= "='" . $attr->{$_} . "'";
}
}
}
$newtag .= " /" if $HTML::Tagset::emptyElement{$tagname} || $isEmptyTag;
$newtag .= ">";
# If this isn't a block level element, there's no need to track nesting.
if ( $HTML::Tagset::isPhraseMarkup{$tagname}
|| $HTML::Tagset::emptyElement{$tagname} )
{
_append_processed_line $parser, $newtag;
return;
}
# Some elements can close implicitly
if (@$tagstack) {
if ( $tagname eq $stacktop
&& $HTML::Tagset::optionalEndTag{$tagname} )
{
pop @$tagstack;
}
elsif ( !$HTML::Tagset::is_Possible_Strict_P_Content{$tagname} ) {
# Need to check more than the last item for paragraphs.
for ( my $i = $#{$tagstack}; $i >= 0; $i-- ) {
my $checking = $tagstack->[$i];
last if grep /^\Q$checking\E$/, @HTML::Tagset::p_closure_barriers;
if ( $checking eq 'p' ) {
# pop 'em all.
splice @$tagstack, $i;
last;
}
}
}
}
# Could verify here that <li> and <table> sub-elements only appear where
# they belong.
# Push the new tag onto the stack.
push @$tagstack, $tagname
unless $isEmptyTag;
_append_processed_line $parser, $newtag, $tagname eq 'pre' ? 'nowiki' : 'html';
return;
}
sub _html_comment {
my ( $parser, $text ) = @_;
_append_processed_line $parser, $text, 'nowiki';
}
sub _html_text {
my ( $parser, $dtext, $skipped_text, $is_cdata ) = @_;
my $tagstack = $parser->{tag_stack};
my ( $newtext, $newstate );
warnings::warnif("Got skipped_text: `$skipped_text'")
if $skipped_text;
if (@$tagstack) {
if ( grep /\Q$tagstack->[-1]\E/, qw{nowiki pre} ) {
$newstate = 'nowiki';
}
elsif ( $is_cdata && $HTML::Tagset::isCDATA_Parent{ $tagstack->[-1] } ) {
# If the user hadn't specifically allowed a tag which contains
# CDATA, then it won't be on the tag stack.
$newtext = $dtext;
}
}
unless ( defined $newtext ) {
$newtext = CGI::escapeHTML $dtext unless defined $newtext;
# CGI::escapeHTML escapes single quotes so the text may be included
# in attribute values, but we know we aren't processing an attribute
# value here.
$newtext =~ s/'/'/g;
}
_append_processed_line $parser, $newtext, $newstate;
}
sub _find_blocks_in_html {
my ( $text, $tags, $opts ) = @_;
my $parser = HTML::Parser->new(
start_h => [ \&_html_tag, 'self, "S", tagname, text, attr' ],
end_h => [ \&_html_tag, 'self, "E", tagname, text' ],
comment_h => [ \&_html_comment, 'self, text' ],
text_h => [ \&_html_text, 'self, dtext, skipped_text, is_cdata' ],
marked_sections => 1,
boolean_attribute_value => '__TEXT_MEDIAWIKIFORMAT_BOOL__',
);
$parser->{opts} = $opts;
$parser->{tags} = $tags;
$parser->{processed_lines} = [];
$parser->{tag_stack} = [];
my @blocks;
my @lines = split /\r?\n/, $text;
for ( my $i = 0; $i < @lines; $i++ ) {
$parser->parse( $lines[$i] );
$parser->parse("\n");
$parser->eof if $i == $#lines;
# @{$parser->{processed_lines}} may be empty when tags are
# still open.
while ( @{ $parser->{processed_lines} }
&& $parser->{processed_lines}->[0]->[2] )
{
my ( $type, $dtext )
= @{ shift @{ $parser->{processed_lines} } };
my $block;
if ($type) {
$block = _start_block( $dtext, $tags, $opts, $type );
}
else {
chomp $dtext;
$block = _start_block( $dtext, $tags, $opts );
}
push @blocks, $block if $block;
}
}
return @blocks;
}
sub _find_blocks {
my ( $text, $tags, $opts ) = @_;
my @blocks;
if ( $opts->{process_html} ) {
@blocks = _find_blocks_in_html $text, $tags, $opts;
}
else {
# The original behavior.
for my $line ( split /\r?\n/, $text ) {
my $block = _start_block( $line, $tags, $opts );
push @blocks, $block if $block;
}
}
return @blocks;
}
sub _start_block {
my ( $text, $tags, $opts, $type ) = @_;
return new_block( 'end', level => 0 ) unless $text;
( run in 1.314 second using v1.01-cache-2.11-cpan-13bb782fe5a )