XHTML-MediaWiki
view release on metacpan or search on metacpan
lib/XHTML/MediaWiki.pm view on Meta::CPAN
if ($info->{empty}) {
warn "empty tags";
#skip empty tags;
} elsif ($info->{nowiki}) {
# my $text = _close_to($parser, $tagname);
$parser->end_nowiki();
} elsif ($info->{block}) {
$parser->close_block();
} elsif ($info->{phrase}) {
my $text = _close_to($parser, $tagname);
$parser->append_text($text);
} elsif ($info->{special}) {
$parser->close_block();
my $text = _close_to($parser, $tagname);
$parser->add_block($text);
} else {
die "helpme $tagname";
}
} else {
if ($info->{empty}) {
$parser->append_text("<$tagname/>");
} elsif ($info->{nowiki}) {
$parser->start_nowiki();
# push @$tagstack, $tagname;
} elsif (my $blockname = $info->{block}) {
$parser->close_block( new_state => $blockname );
# $parser->{state} = $blockname;
unless ($info->{notag}) {
$parser->append_text("<$tagname>");
}
push @$tagstack, $tagname;
} elsif ($info->{phrase}) {
push(@$tagstack, $tagname);
my $text = "<$tagname>";
$parser->append_text($text);
} elsif ($info->{special}) {
$parser->close_block();
push(@$tagstack, $tagname);
my $text = "<$tagname>";
$parser->add_block($text);
} else {
die "helpme $tagname";
push @$tagstack, $tagname;
}
}
} else {
$parser->append_text($parser, encode($orig));
}
return;
}
sub _html_comment
{
# warn "_html_comment: " . join(' ', @_);
}
sub _html_text
{
my ($parser, $dtext, $skipped_text, $is_cdata) = @_;
my @tagstack = @{$parser->{tag_stack}};
my ($newtext, $newstate);
if (my ($leading) = ($dtext =~ /^(\n+)/m)) {
my $x = length($leading);
$parser->end_line($x);
$dtext = substr($dtext, $x);
}
if ($is_cdata && $parser->can_cdata) {
$newtext = $dtext;
} else {
$newtext = encode($dtext);
}
$parser->append_text($newtext);
# warn "Got skipped_text: `$skipped_text'\n[$dtext]\n" if $skipped_text;
}
{
package XHTML::MediaWiki::Parser;
use base 'HTML::Parser';
use Params::Validate qw (validate);
sub can_cdata
{
my $self = shift;
if (my $current = $self->check_current_block) {
return $self->{tags}{$current->{type}}{can_cdata};
}
return 0;
}
sub end_line
{
my $self = shift;
my $block = $self->get_last_line_block;
$block->set_end_line(@_);;
}
sub state
{
my $self = shift;
my $block = $self->check_current_block;
return "none" unless $block;
return $block->get_state;
}
sub in_state
{
my $self = shift;
my $state = shift;
die if @_;
my $cstate = $self->state;
$cstate && $cstate eq $state;
}
sub in_paragraph
{
my $self = shift;
my $ret = 0;
if (my $block = $self->check_current_block) {
$ret = $block->is_paragraph;
}
return $ret;
}
sub in_prewiki
{
my $self = shift;
my $ret = 0;
if (my $block = $self->check_current_block) {
$ret = $block->is_prewiki;
}
return $ret;
}
sub noformat
{
my $self = shift;
$self->in_state('pre') or $self->in_nowiki();
}
sub add_block
lib/XHTML/MediaWiki.pm view on Meta::CPAN
}
sub get_last_line_block
{
my $self = shift;
my $block = $self->get_current_block;
if (! defined $block) {
$block = $self->{blocks}[-1];
}
return $block;
}
sub append_text
{
my $self = shift;
my $text = shift;
my $block = $self->get_current_block;
$block->append_text($text);
}
sub get_blocks
{
my $self = shift;
my @blocks;
for my $block (@{$self->{blocks}}) {
next unless $block;
if ($block->{type} eq 'paragraph' && 0 == @{$block->{lines}} && !$block->{line}) {
warn "fix";
next;
}
push @blocks, $block;
}
@blocks;
}
sub eof
{
my $self = shift;
$self->close_block();
for my $tag (@{$self->{tag_stack}}) {
$self->append_text("</$tag>\n");
}
$self->SUPER::eof(@_);
}
}
sub _find_blocks_in_html
{
my $self = shift;
my $text = shift || "";
die if @_;
my $parser = XHTML::MediaWiki::Parser->new
(start_h => [\&_html_tag, 'self, "S", tagname, text, attr'],
end_h => [\&_html_tag, 'self, "E", tagname, text'],
comment_h => [\&_html_comment, 'self, text'],
text_h => [\&_html_text, 'self, dtext, skipped_text, is_cdata'],
marked_sections => 1,
boolean_attribute_value => '__TEXT_MEDIAWIKIFORMAT_BOOL__',
);
$parser->{opts} = {},
$parser->{tags} = {
b => { phrase => 1 },
big => { phrase => 1 },
blockquote => { phrase => 1 },
br => { empty => 1 },
caption => {},
center => {},
cite => {},
code => { phrase => 1 },
dd => {},
div => {
special => 1,
},
dl => {},
dt => {},
em => {},
font => {},
h1 => { block => 'header' },
h2 => { block => 'header' },
h3 => { block => 'header' },
h4 => { block => 'header' },
h5 => { block => 'header' },
h6 => { block => 'header' },
hr => { empty => 1 },
i => { },
li => { },
nowiki => {
nowiki => 1,
notag => 1,
},
ol => { },
p => { block => 'p' },
paragraph => {
block => 'paragraph',
notag => 1
},
pre => {
block => 'pre',
# nowiki => 1,
},
rb => {},
rp => {},
rt => {},
ruby => {
block => 'ruby',
can_cdata => 1,
},
s => {},
samp => {},
small => {},
strike => {},
strong => {},
sub => {},
sup => {},
table => {},
td => {},
th => {},
tr => {},
tt => {},
u => {},
ul => {},
var => {},
};
$parser->{tag_stack} = [];
$parser->{blocks} = [];
$parser->{current_block} = undef;
my @lines = split(/\r?\n/, $text);
for my $line (@lines) {
my $close = 0;
die if chomp $line;
if ($parser->noformat) {
# we are in nowiki or pre block
} else {
if ($parser->in_prewiki && $line && $line !~ m/^\s+/) {
$parser->close_block();
}
if ($line =~ qr/^(={1,6})\s*(.+?)\s*\1$/) {
my $x = length $1;
$line = sprintf("<h%d>%s</h%d>\n", $x, $2, $x);
$parser->{last} = 'header';
} elsif ($line =~ /^$/) {
if ($parser->check_current_block) {
if ($parser->in_paragraph) {
$parser->close_block();
} elsif ($parser->in_prewiki) {
$parser->close_block();
} else {
}
} else {
unless ({header => 1, prewiki => 1}->{$parser->{last} || ''}) {
$line = "<br/>";
}
}
} elsif ($line =~ m/^\s(\s*.*)$/) {
$line = $1;
$parser->close_block( new_state => 'prewiki', auto_merge => 1 );
$parser->{last} = 'prewiki';
} elsif ($line =~ m/^(#+)\s*(.*)\s*$/) {
my $x = length $1;
$parser->close_block( new_state => 'ordered', indent => $x );
$close = 1;
$line = $2;
$parser->{last} = 'nested';
( run in 1.773 second using v1.01-cache-2.11-cpan-d7f47b0818f )