Text-MediawikiFormat

 view release on metacpan or  search on metacpan

lib/Text/MediawikiFormat.pm  view on Meta::CPAN


				# CGI::escapeHTML escapes single quotes.
				$attr->{$_} = CGI::escapeHTML $attr->{$_};
				$newtag .= "='" . $attr->{$_} . "'";
			}
		}
	}
	$newtag .= " /" if $HTML::Tagset::emptyElement{$tagname} || $isEmptyTag;
	$newtag .= ">";

	# If this isn't a block level element, there's no need to track nesting.
	if (   $HTML::Tagset::isPhraseMarkup{$tagname}
		|| $HTML::Tagset::emptyElement{$tagname} )
	{
		_append_processed_line $parser, $newtag;
		return;
	}

	# Some elements can close implicitly
	if (@$tagstack) {
		if (   $tagname eq $stacktop
			&& $HTML::Tagset::optionalEndTag{$tagname} )
		{
			pop @$tagstack;
		}
		elsif ( !$HTML::Tagset::is_Possible_Strict_P_Content{$tagname} ) {

			# Need to check more than the last item for paragraphs.
			for ( my $i = $#{$tagstack}; $i >= 0; $i-- ) {
				my $checking = $tagstack->[$i];
				last if grep /^\Q$checking\E$/, @HTML::Tagset::p_closure_barriers;

				if ( $checking eq 'p' ) {

					# pop 'em all.
					splice @$tagstack, $i;
					last;
				}
			}
		}
	}

	# Could verify here that <li> and <table> sub-elements only appear where
	# they belong.

	# Push the new tag onto the stack.
	push @$tagstack, $tagname
		unless $isEmptyTag;

	_append_processed_line $parser, $newtag, $tagname eq 'pre' ? 'nowiki' : 'html';
	return;
}

sub _html_comment {
	my ( $parser, $text ) = @_;

	_append_processed_line $parser, $text, 'nowiki';
}

sub _html_text {
	my ( $parser, $dtext, $skipped_text, $is_cdata ) = @_;
	my $tagstack = $parser->{tag_stack};
	my ( $newtext, $newstate );

	warnings::warnif("Got skipped_text: `$skipped_text'")
		if $skipped_text;

	if (@$tagstack) {
		if ( grep /\Q$tagstack->[-1]\E/, qw{nowiki pre} ) {
			$newstate = 'nowiki';
		}
		elsif ( $is_cdata && $HTML::Tagset::isCDATA_Parent{ $tagstack->[-1] } ) {

			# If the user hadn't specifically allowed a tag which contains
			# CDATA, then it won't be on the tag stack.
			$newtext = $dtext;
		}
	}

	unless ( defined $newtext ) {
		$newtext = CGI::escapeHTML $dtext unless defined $newtext;

		# CGI::escapeHTML escapes single quotes so the text may be included
		# in attribute values, but we know we aren't processing an attribute
		# value here.
		$newtext =~ s/&#39;/'/g;
	}

	_append_processed_line $parser, $newtext, $newstate;
}

sub _find_blocks_in_html {
	my ( $text, $tags, $opts ) = @_;

	my $parser = HTML::Parser->new(
		start_h                 => [ \&_html_tag,     'self, "S", tagname, text, attr' ],
		end_h                   => [ \&_html_tag,     'self, "E", tagname, text' ],
		comment_h               => [ \&_html_comment, 'self, text' ],
		text_h                  => [ \&_html_text,    'self, dtext, skipped_text, is_cdata' ],
		marked_sections         => 1,
		boolean_attribute_value => '__TEXT_MEDIAWIKIFORMAT_BOOL__',
	);
	$parser->{opts}            = $opts;
	$parser->{tags}            = $tags;
	$parser->{processed_lines} = [];
	$parser->{tag_stack}       = [];

	my @blocks;
	my @lines = split /\r?\n/, $text;
	for ( my $i = 0; $i < @lines; $i++ ) {
		$parser->parse( $lines[$i] );
		$parser->parse("\n");
		$parser->eof if $i == $#lines;

		# @{$parser->{processed_lines}} may be empty when tags are
		# still open.
		while ( @{ $parser->{processed_lines} }
			&& $parser->{processed_lines}->[0]->[2] )
		{
			my ( $type, $dtext )
				= @{ shift @{ $parser->{processed_lines} } };

			my $block;
			if ($type) {
				$block = _start_block( $dtext, $tags, $opts, $type );
			}
			else {
				chomp $dtext;
				$block = _start_block( $dtext, $tags, $opts );
			}
			push @blocks, $block if $block;
		}
	}

	return @blocks;
}

sub _find_blocks {
	my ( $text, $tags, $opts ) = @_;
	my @blocks;

	if ( $opts->{process_html} ) {
		@blocks = _find_blocks_in_html $text, $tags, $opts;
	}
	else {
		# The original behavior.
		for my $line ( split /\r?\n/, $text ) {
			my $block = _start_block( $line, $tags, $opts );
			push @blocks, $block if $block;
		}
	}

	return @blocks;
}

sub _start_block {
	my ( $text, $tags, $opts, $type ) = @_;

	return new_block( 'end', level => 0 ) unless $text;



( run in 1.314 second using v1.01-cache-2.11-cpan-13bb782fe5a )