Tripletail
view release on metacpan or search on metacpan
ext/Tripletail-HtmlFilter/HtmlFilter.pm view on Meta::CPAN
use Tripletail;
use DynaLoader;
use base 'DynaLoader';
our @XSUBS = qw(next _next_elem Element::parse Element::attr);
our $XS_LOADERROR;
Tripletail::HtmlFilter->my_bootstrap($Tripletail::XS_VERSION);
use constant {
# 注æ: ããã夿´ããæã¯ XS å´ãä¿®æ£ããäºã
INTEREST => 0,
TRACK => 1,
FILTER_TEXT => 2,
FILTER_COMMENT => 3,
CONTEXT => 4,
HTML => 5,
OUTPUT => 6,
};
my %_MATCHER_CACHE;
sub my_bootstrap
{
my $pkg = shift;
if( !$PURE_PERL )
{
local ($@);
eval
{
local($SIG{__DIE__}) = 'DEFAULT';
$pkg->SUPER::bootstrap(@_);
};
$XS_LOADERROR = $@;
}else
{
$XS_LOADERROR = 'disabled';
}
do
{
no strict 'refs';
#$err and chomp $err;
#warn "warning: $err";
foreach my $name (@XSUBS)
{
my $xsub = __PACKAGE__.'::'.$name;
if( !defined(&$xsub) )
{
(my $ppsub = $xsub) =~ s/(\w+)$/_$1_pp/;
*$xsub = \&$ppsub;
}
}
}
}
1;
sub _new {
my $class = shift;
my $opts = { @_ };
my $this = bless [] => $class;
$this->[INTEREST] = $opts->{interest};
$this->[TRACK] = $opts->{track};
$this->[FILTER_TEXT] = $opts->{filter_text};
$this->[FILTER_COMMENT] = $opts->{filter_comment};
$this->[CONTEXT] = Tripletail::HtmlFilter::Context->_new;
$this->[HTML] = undef; # æåå
$this->[OUTPUT] = []; # Tripletail::HtmlFilter::{Element,Text,Comment}
# interest, trackã«æ¸¡ãããæ£è¦è¡¨ç¾ã¯ãã®æç¹ã§ CODE ã«ã³ã³ãã¤ã«ãã¦ããã
if ($this->[INTEREST]) {
$this->[INTEREST] = $this->_compile_matcher($this->[INTEREST]);
}
if ($this->[TRACK]) {
$this->[TRACK] = $this->_compile_matcher($this->[TRACK]);
}
$this;
}
sub set {
my $this = shift;
my $html = shift;
if (not defined $html) {
die __PACKAGE__."#set: ARG[1] is not defined.\n";
}
elsif (ref $html) {
die __PACKAGE__."#set: ARG[1] is a Ref.\n";
}
#@{$this->[HTML]} = split m/(<.+?>)/s, $html;
# âã§ã¯ã<!-- <hoge> -->ãæ£ããè§£æã§ããªããçé¢ç®ã«ãã¼ãºããå¿
è¦ããã
# ããããperlã§çé¢ç®ã«ãã¼ã¶ãæ¸ãã®ã¯é常ã«é¢åãªã®ã§æ£è¦è¡¨ç¾ã§èª¤éåã
# NB: ä»ã«ããæ£ããè§£æã§ããªããã¿ã¼ã³ãåå¨ãããã
@{$this->[HTML]} = split m/((?:<!--.*?-->)|(?:<.+?>))/s, $html;
@{$this->[OUTPUT]} = ();
$this;
}
sub toStr {
my $this = shift;
$this->[CONTEXT]->_flush($this); # æªç¢ºå®ã®é¨åã確å®ãã
join('', map {ref($_)?$_->toStr:$_} @{$this->[OUTPUT]});
}
sub _compile_matcher {
my $this = shift;
my $regexes = shift;
my $joined = join('', @$regexes);
if (my $cached = $_MATCHER_CACHE{$joined}) {
return $cached;
}
my $ret = [];
ext/Tripletail-HtmlFilter/HtmlFilter.pm view on Meta::CPAN
if ($m->($str)) {
return 1;
}
}
else {
if ($m eq $str) {
return 1;
}
}
}
undef;
};
my ($interested,$parsed);
if (defined $elem_name) {
my ($close,$nameonly) = $elem_name =~ /^(\/?)(.*)/;
if ($this->[TRACK] and $is_matched->($this->[TRACK], $nameonly)) {
$parsed = $elem;
if ($close) {
$this->[CONTEXT]->removein($nameonly);
}
else {
$this->[CONTEXT]->addin($nameonly => $parsed);
}
}
if ($this->[INTEREST] and $is_matched->($this->[INTEREST], $elem_name)) {
$interested = $elem;
}
}
($interested,$parsed);
}
sub _output {
my $this = shift;
my $elem = shift;
# 渡ããããªãã¸ã§ã¯ã(è¥ããã¯ããã¹ã)ã
# $this->[OUTPUT] ã«è¿½å ãã¦ããã ã.
# ç´æ¥pushãã¦ããã³ã¼ããããã®ã§ä¿®æ£ããéã«ã¯æ³¨æ.
push @{$this->[OUTPUT]}, $elem;
$this;
}
# =============================================================================
# Tripletail::HtmlFilter::Context.
#
package Tripletail::HtmlFilter::Context;
use constant {
IN => 0,
ADDED => 1,
DELETED => 2,
CURRENT => 3,
};
sub _new {
my $class = shift;
my $this = bless [] => $class;
$this->[IN] = [];
$this->[ADDED] = [];
$this->[DELETED] = undef;
$this->[CURRENT] = undef; # Tripletail::HtmlFilter::{Element,Comment,Text}
$this;
}
sub newElement {
my $this = shift;
my $name = shift;
Tripletail::HtmlFilter::Element->_new($name);
}
sub newText {
my $this = shift;
my $str = shift;
Tripletail::HtmlFilter::Text->_new($str);
}
sub newComment {
my $this = shift;
my $str = shift;
Tripletail::HtmlFilter::Comment->_new($str);
}
sub addin {
my $this = shift;
my $name = lc shift;
my $elem = shift;
unshift(@{$this->[IN]}, [$name, $elem]);
$this;
}
sub removein {
my $this = shift;
my $name = lc shift;
while(my $elem = shift(@{$this->[IN]})) {
last if($elem->[0] eq $name);
}
$this;
}
sub in {
my $this = shift;
my $name = lc shift;
foreach my $elem (@{$this->[IN]}) {
if($elem->[0] eq $name) {
return $elem->[1];
}
}
ext/Tripletail-HtmlFilter/HtmlFilter.pm view on Meta::CPAN
return $this; # ä½ãããå¿
è¦ãç¡ã
}
if ($this->[DELETED]) {
# åé¤ããããã«æç¤ºãããã
$this->[DELETED] = undef;
}
else {
$filter->_output($this->[CURRENT]);
}
foreach (@{$this->[ADDED]}) {
$filter->_output($_);
}
@{$this->[ADDED]} = ();
$this->[CURRENT] = undef;
$this;
}
# =============================================================================
# Tripletail::HtmlFilter::ElementBase.
#
package Tripletail::HtmlFilter::ElementBase;
sub isElement {
my $this = shift;
(ref $this) eq 'Tripletail::HtmlFilter::Element';
}
sub isText {
my $this = shift;
(ref $this) eq 'Tripletail::HtmlFilter::Text';
}
sub isComment {
my $this = shift;
(ref $this) eq 'Tripletail::HtmlFilter::Comment';
}
# =============================================================================
# Tripletail::HtmlFilter::Element.
#
package Tripletail::HtmlFilter::Element;
use constant {
# 注æ: ããã夿´ããæã¯ XS å´ãä¿®æ£ããäºã
NAME => 0,
ATTRS => 1,
ATTR_H => 2,
TAIL => 3,
};
our @ISA = qw(Tripletail::HtmlFilter::ElementBase);
sub _new {
my $class = shift;
my $name = shift; # undefå¯
if (ref $name) {
die __PACKAGE__."#_new, ARG[1] was bad Ref. [$name]\n";
}
my $this = bless [] => $class;
$this->[NAME] = $name;
$this->[ATTRS] = []; # [[key, val], [key, val], ...]
$this->[ATTR_H] = {}; # key => [key, val] ($this->[ATTRS]ã®è¦ç´ ã¨å
±æ)
$this->[TAIL] = undef;
$this;
}
sub name {
# 注æ: ãã®ã¡ã½ãã㯠XS å´ã§ã¯ä½¿ç¨ãããªãã
my $this = shift;
if (@_) {
$this->[NAME] = shift;
if (ref $this->[NAME]) {
die __PACKAGE__."#name: ARG[1] is a Ref. [".$this->[NAME]."]\n";
}
}
$this->[NAME];
}
sub _parse_pp {
my $this = shift;
local($_) = shift;
if (ref) {
die __PACKAGE__."#parse: ARG[1] is a Ref. [$_]\n";
}
s/^<//;
(s/^\s*(\/?\w+)//) and ($this->[NAME] = $1);
while(1) {
(s/([\w:\-]+)\s*=\s*"([^"]*)"//) ? ($this->attr($1 => $2)) :
(s/([\w:\-]+)\s*=\s*'([^']*)'//) ? ($this->attr($1 => $2)) :
(s/([\w:\-]+)\s*=\s*([^\s>]+)//) ? ($this->attr($1 => $2)) :
(s~(\w+|/)~~) ? ($this->end($1)) :
last;
}
$this;
}
sub _attr_pp {
my $this = shift;
my $key = shift;
if (not defined $key) {
die __PACKAGE__."#attr: ARG[1] is not defined.\n";
}
elsif (ref $key) {
die __PACKAGE__."#attr: ARG[1] is a Ref. [$key]\n";
}
if (@_) {
my $val = shift;
if (ref $val) {
die __PACKAGE__."#attr: ARG[2] is a Ref. [$val]\n";
ext/Tripletail-HtmlFilter/HtmlFilter.pm view on Meta::CPAN
undef; # åå¨ããªã
}
}
}
sub attrList {
my $this = shift;
map { $_->[0] } @{$this->[ATTRS]}
}
sub tail {
goto &end;
}
sub end {
# 注æ: ãã®ã¡ã½ãã㯠XS å´ã§ã¯ä½¿ç¨ãããªãã
my $this = shift;
if (@_) {
$this->[TAIL] = shift;
if (ref $this->[TAIL]) {
die __PACKAGE__."#end: ARG[1] is a Ref. [$this->[TAIL]]\n";
}
}
$this->[TAIL];
}
sub toStr {
my $this = shift;
my $str = '<' . $this->[NAME];
foreach my $attr (@{$this->[ATTRS]}) {
my $key = $attr->[0];
my $value = $attr->[1];
$value =~ s/"/"/g;
$str .= sprintf(qq{ %s="%s"}, $key, $value);
}
if( defined $this->[TAIL] and length $this->[TAIL] )
{
$str .= ' ' . $this->[TAIL];
}
$str .= '>';
}
# =============================================================================
# Tripletail::HtmlFilter::Text.
#
package Tripletail::HtmlFilter::Text;
use constant {
STR => 0,
};
our @ISA = qw(Tripletail::HtmlFilter::ElementBase);
sub _new {
my $class = shift;
my $str = shift;
my $this = bless [] => $class;
$this->[STR] = $str;
$this;
}
sub str {
my $this = shift;
if (@_) {
$this->[STR] = shift;
if (ref $this->[STR]) {
die ref($this)."#str: ARG[1] is a Ref. [".$this->[STR]."]\n";
}
}
$this->[STR];
}
sub toStr {
my $this = shift;
$this->[STR];
}
# =============================================================================
# Tripletail::HtmlFilter::Comment.
#
package Tripletail::HtmlFilter::Comment;
our @ISA = qw(Tripletail::HtmlFilter::Text);
use constant {
STR => Tripletail::HtmlFilter::Text::STR(),
};
sub toStr {
my $this = shift;
sprintf '<!-- %s -->', $this->[STR];
}
__END__
=encoding utf-8
=head1 NAME
Tripletail::HtmlFilter - HTMLã®ãã¼ã¹ã¨æ¸ãæã
=head1 SYNOPSIS
my $filter = $TL->newHtmlFilter(
interest => ['form', 'textarea'],
);
$filter->set($html);
while (my ($context, $elem) = $filter->next) {
...
}
print $filter->toStr;
=head1 DESCRIPTION
( run in 0.333 second using v1.01-cache-2.11-cpan-ec4f86ec37b )