Tripletail

 view release on metacpan or  search on metacpan

ext/Tripletail-HtmlFilter/HtmlFilter.pm  view on Meta::CPAN

use Tripletail;
use DynaLoader;
use base 'DynaLoader';
our @XSUBS = qw(next _next_elem Element::parse Element::attr);
our $XS_LOADERROR;
Tripletail::HtmlFilter->my_bootstrap($Tripletail::XS_VERSION);

use constant {
	# 注意: ここを変更した時は XS 側も修正する事。
	INTEREST       => 0,
	TRACK          => 1,
	FILTER_TEXT    => 2,
	FILTER_COMMENT => 3,
	CONTEXT        => 4,
	HTML           => 5,
	OUTPUT         => 6,
};
my %_MATCHER_CACHE;

sub my_bootstrap
{
  my $pkg = shift;
  
  if( !$PURE_PERL )
  {
    local ($@);
    eval
    {
      local($SIG{__DIE__}) = 'DEFAULT';
      $pkg->SUPER::bootstrap(@_);
    };
    $XS_LOADERROR = $@;
  }else
  {
    $XS_LOADERROR = 'disabled';
  }
  
  do
  {
    no strict 'refs';
    #$err and chomp $err;
    #warn "warning: $err";
    foreach my $name (@XSUBS)
    {
      my $xsub = __PACKAGE__.'::'.$name;
      if( !defined(&$xsub) )
      {
        (my $ppsub = $xsub) =~ s/(\w+)$/_$1_pp/;
        *$xsub = \&$ppsub;
      }
    }
  }
}

1;

sub _new {
    my $class = shift;
    my $opts = { @_ };

	my $this = bless [] => $class;

	$this->[INTEREST]       = $opts->{interest};
	$this->[TRACK]          = $opts->{track};
	$this->[FILTER_TEXT]    = $opts->{filter_text};
	$this->[FILTER_COMMENT] = $opts->{filter_comment};
	
    $this->[CONTEXT]        = Tripletail::HtmlFilter::Context->_new;
    $this->[HTML]           = undef; # 文字列
    $this->[OUTPUT]         = []; # Tripletail::HtmlFilter::{Element,Text,Comment}

    # interest, trackに渡された正規表現はこの時点で CODE にコンパイルしておく。
    if ($this->[INTEREST]) {
		$this->[INTEREST] = $this->_compile_matcher($this->[INTEREST]);
    }

    if ($this->[TRACK]) {
		$this->[TRACK] = $this->_compile_matcher($this->[TRACK]);
    }

    $this;
}

sub set {
    my $this = shift;
    my $html = shift;

    if (not defined $html) {
		die __PACKAGE__."#set: ARG[1] is not defined.\n";
    }
    elsif (ref $html) {
		die __PACKAGE__."#set: ARG[1] is a Ref.\n";
    }

    #@{$this->[HTML]} = split m/(<.+?>)/s, $html;
    # ↑では、<!-- <hoge> -->を正しく解析できない。真面目にパーズする必要がある
    # しかし、perlで真面目にパーザを書くのは非常に面倒なので正規表現で誤魔化す
    # NB: 他にも、正しく解析できないパターンが存在するかも
    @{$this->[HTML]} = split m/((?:<!--.*?-->)|(?:<.+?>))/s, $html;
    @{$this->[OUTPUT]} = ();
    $this;
}

sub toStr {
    my $this = shift;

    $this->[CONTEXT]->_flush($this); # 未確定の部分を確定する

    join('', map {ref($_)?$_->toStr:$_} @{$this->[OUTPUT]});
}

sub _compile_matcher {
	my $this = shift;
	my $regexes = shift;

	my $joined = join('', @$regexes);
	if (my $cached = $_MATCHER_CACHE{$joined}) {
		return $cached;
	}

	my $ret = [];

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 1.915 second using v1.00-cache-2.02-grep-82fe00e-cpan-48ebf85a1963 )