Aion-Format
view release on metacpan or search on metacpan
lib/Aion/Format/Html.pm view on Meta::CPAN
"p" => "\n",
"table" => "\n",
"ol" => "\n",
"ul" => "\n",
);
our %WITH_CLOSE_TAG2SPACE = (
"p" => "\n",
"br" => "\n",
"img" => " ",
);
# пеÑÐµÐ²Ð¾Ð´Ð¸Ñ html в text
sub from_html {
local ($_) = @_;
# 1. УбиÑаем ÑнÑиÑиеÑ:
my $ent = sub {
exists $+{word}? (exists $ENTITIES{$+{word}}? chr $ENTITIES{$+{word}}: $&):
exists $+{num}? chr $+{num}:
exists $+{hex}? chr hex $+{hex}: ""
};
s{&(
(?<word>\w+)
|\#(?<num>\d+)
|\#x(?<hex>[a-f\d]+)
);?
}{$ent->()}genix;
my $pre;
my $to = sub {
my $s1 = $pre? $+{s1}: ($+{s1} eq ""? "": " ");
my $x =
exists $+{space}? ($pre? $+{space}: " "):
exists $+{nbsp}? " ":
exists $+{xhr}? $+{xhr}:
exists $+{tag}? do {
my $tag = lc $+{tag};
$pre = 1 if $tag eq "pre";
exists $+{close}? $WITH_CLOSE_TAG2SPACE{$tag}: $TAG2SPACE{$tag}
}:
exists $+{ctag}? do {
my $tag = lc $+{ctag};
$pre = 0 if $tag eq "pre";
$CLOSE_TAG2SPACE{$tag}
}:
"";
my $s2 = $pre? $+{s2}: ($+{s2} eq "" || $s1? "": " ");
$x =~ /\n/ ? $x: join "", $s1, $x, $s2
};
s{
(?<s1> \s*) (
<(script|style|template)\b [^<>]*> .*? </ \g1 \s* >
| <xhr \b [^<>]*> (?<xhr> .*? ) </xhr \s* >
| < (?<tag> [a-z]\w* ) [^<>]*? (?<close> / )? \s*>
| </ (?<ctag> [a-z]\w* ) \s*>
| <!--.*?-->
) (?<s2> \s*)
| (?<space> [\ \t\n\r\f]+)
| (?<nbsp> \xa0)
}{$to->()}genisx;
$_
}
# ÐÑе, кÑоме запÑеÑÑннÑÑ
:
# applet, script, style, embed, object, param,
# video, audio, source, track, frame, frameset, iframe, comment
# html, head, body, title, meta, base, basefont, bgsound, link
# form, keygen, output, textarea, select, option, optgroup, legend, label, input
# plaintext, xmp
# Ð Ñак же ÑдалÑÐµÑ Ð°ÑÑибÑÑÑ Ð½Ð°ÑинаÑÑиеÑÑ Ð½Ð° "on", name, for, formaction и дÑ..
my %SAFE_TAG = map {$_=>1} qw/
a
abbr
acronym
address
area
article
aside
b
bdi
bdo
blockquote
big
blink
br
button
canvas
caption
center
cite
code
col
colgroup
command
datalist
dd
del
details
dfn
dir
div
dl
dt
( run in 0.244 second using v1.01-cache-2.11-cpan-bf8d7bb2d05 )