XHTML-MediaWiki

 view release on metacpan or  search on metacpan

lib/XHTML/MediaWiki.pm  view on Meta::CPAN

            if ($info->{empty}) {
warn "empty tags";
#skip empty tags;
            } elsif ($info->{nowiki}) {
#               my $text = _close_to($parser, $tagname);
                $parser->end_nowiki();
            } elsif ($info->{block}) {
                $parser->close_block();
            } elsif ($info->{phrase}) {
                my $text = _close_to($parser, $tagname);
                $parser->append_text($text);
            } elsif ($info->{special}) {
                $parser->close_block();
                my $text = _close_to($parser, $tagname);
                $parser->add_block($text);
            } else {
die "helpme $tagname";
            }
        } else {
            if ($info->{empty}) {
                $parser->append_text("<$tagname/>");
            } elsif ($info->{nowiki}) {
                $parser->start_nowiki();
#               push @$tagstack, $tagname;
            } elsif (my $blockname = $info->{block}) {
                $parser->close_block( new_state => $blockname );

#               $parser->{state} = $blockname;
                unless ($info->{notag}) {
                    $parser->append_text("<$tagname>");
                }
                push @$tagstack, $tagname;
            } elsif ($info->{phrase}) { 
                push(@$tagstack, $tagname);
                my $text = "<$tagname>";
                $parser->append_text($text);
            } elsif ($info->{special}) { 
                $parser->close_block();
                push(@$tagstack, $tagname);
                my $text = "<$tagname>";
                $parser->add_block($text);
            } else {
die "helpme $tagname";
                push @$tagstack, $tagname;
            }
        }
    } else {
        $parser->append_text($parser, encode($orig));
    }

    return;
}

sub _html_comment
{
#    warn "_html_comment: " . join(' ', @_);
}

sub _html_text
{
    my ($parser, $dtext, $skipped_text, $is_cdata) = @_;
    my @tagstack = @{$parser->{tag_stack}};
    my ($newtext, $newstate);

    if (my ($leading) = ($dtext =~ /^(\n+)/m)) {
        my $x = length($leading);
        $parser->end_line($x);
        $dtext = substr($dtext, $x);
    }

    if ($is_cdata && $parser->can_cdata) {
        $newtext = $dtext;
    } else {
        $newtext = encode($dtext);
    }

    $parser->append_text($newtext);

#    warn "Got skipped_text: `$skipped_text'\n[$dtext]\n" if $skipped_text;
}

{
    package XHTML::MediaWiki::Parser;

    use base 'HTML::Parser';

    use Params::Validate qw (validate);

    sub can_cdata
    {
        my $self = shift;
        if (my $current = $self->check_current_block) {
            return $self->{tags}{$current->{type}}{can_cdata};
        }
        return 0;
    }

    sub end_line
    {
        my $self = shift;

        my $block = $self->get_last_line_block;

        $block->set_end_line(@_);;
    }

    sub state
    {
        my $self = shift;

        my $block = $self->check_current_block;
        return "none" unless $block;
        return $block->get_state;
    }

    sub in_state
    {
        my $self = shift;
        my $state = shift;
        die if @_;
        my $cstate = $self->state;

        $cstate && $cstate eq $state;
    }

    sub in_paragraph
    {
        my $self = shift;
        my $ret = 0;
        if (my $block = $self->check_current_block) {
            $ret = $block->is_paragraph;
        }
        return $ret;
    }

    sub in_prewiki
    {
        my $self = shift;
        my $ret = 0;
        if (my $block = $self->check_current_block) {
            $ret = $block->is_prewiki;
        }
        return $ret;
    }

    sub noformat
    {
        my $self = shift;

        $self->in_state('pre') or $self->in_nowiki();
    }

    sub add_block

lib/XHTML/MediaWiki.pm  view on Meta::CPAN

    }

    sub get_last_line_block
    {
        my $self = shift;
        my $block = $self->get_current_block;

        if (! defined $block) {
            $block = $self->{blocks}[-1];
        }
        return $block;
    }

    sub append_text
    {
        my $self = shift;
        my $text = shift;

        my $block = $self->get_current_block;

        $block->append_text($text);
    }

    sub get_blocks
    {
        my $self = shift;
        my @blocks;

        for my $block (@{$self->{blocks}}) {
            next unless $block;
            if ($block->{type} eq 'paragraph' && 0 == @{$block->{lines}} && !$block->{line}) {
warn "fix";
                next;
            }
            push @blocks, $block;
        }
        @blocks;
    }

    sub eof
    {
        my $self = shift;
        $self->close_block();
        for my $tag (@{$self->{tag_stack}}) {
            $self->append_text("</$tag>\n");
        }
        $self->SUPER::eof(@_);
    }
}

sub _find_blocks_in_html
{
    my $self = shift;
    my $text = shift || "";
    die if @_;

    my $parser = XHTML::MediaWiki::Parser->new
        (start_h   => [\&_html_tag, 'self, "S", tagname, text, attr'],
         end_h     => [\&_html_tag, 'self, "E", tagname, text'],
         comment_h => [\&_html_comment, 'self, text'],
         text_h    => [\&_html_text, 'self, dtext, skipped_text, is_cdata'],
         marked_sections => 1,
         boolean_attribute_value => '__TEXT_MEDIAWIKIFORMAT_BOOL__',
        );
    $parser->{opts} = {},
    $parser->{tags} = {
        b => { phrase => 1 },
        big => { phrase => 1 },
        blockquote => { phrase => 1 },
        br => { empty => 1 },
        caption => {},
        center => {},
        cite => {},
        code => { phrase => 1 },
        dd => {},
        div => {
            special => 1,
        },
        dl => {},
        dt => {},
        em => {},
        font => {},

        h1 => { block => 'header' },
        h2 => { block => 'header' },
        h3 => { block => 'header' },
        h4 => { block => 'header' },
        h5 => { block => 'header' },
        h6 => { block => 'header' },

        hr => { empty => 1 },
        i => { },
        li => { },
        nowiki => { 
            nowiki => 1, 
            notag => 1,
        },
        ol => { },
        p => { block => 'p' },
        paragraph => {
            block => 'paragraph',
            notag => 1 
        },
        pre => { 
            block => 'pre',
#           nowiki => 1,
        },
        rb => {},
        rp => {},
        rt => {},
        ruby => { 
            block => 'ruby',
            can_cdata => 1,
        },
        s => {},
        samp => {},
        small => {},
        strike => {},
        strong => {},
        sub => {},
        sup => {},
        table => {},
        td => {},
        th => {},
        tr => {},
        tt => {},
        u => {},
        ul => {},
        var => {},
    };
    $parser->{tag_stack} = [];
    $parser->{blocks} = [];
    $parser->{current_block} = undef;

    my @lines = split(/\r?\n/, $text);

    for my $line (@lines) {
        my $close = 0;
        die if chomp $line;
        if ($parser->noformat) {
# we are in nowiki or pre block
        } else {
            if ($parser->in_prewiki && $line && $line !~ m/^\s+/) {
                $parser->close_block();
            }
            if ($line =~ qr/^(={1,6})\s*(.+?)\s*\1$/) {
                my $x = length $1;
                $line = sprintf("<h%d>%s</h%d>\n", $x, $2, $x);
                $parser->{last} = 'header';
            } elsif ($line =~ /^$/) {
                if ($parser->check_current_block) {
                    if ($parser->in_paragraph) {
                        $parser->close_block();
                    } elsif ($parser->in_prewiki) {
                        $parser->close_block();
                    } else {
                    }
                } else {
                    unless ({header => 1, prewiki => 1}->{$parser->{last} || ''}) {
                        $line = "<br/>";
                    }
                }
            } elsif ($line =~ m/^\s(\s*.*)$/) {
                $line = $1;
                $parser->close_block( new_state => 'prewiki', auto_merge => 1 );

                $parser->{last} = 'prewiki';
            } elsif ($line =~ m/^(#+)\s*(.*)\s*$/) {
                my $x = length $1;
                $parser->close_block( new_state => 'ordered', indent => $x );
                $close = 1;
                $line = $2;
                $parser->{last} = 'nested';



( run in 1.773 second using v1.01-cache-2.11-cpan-d7f47b0818f )