Aion-Format
view release on metacpan or search on metacpan
lib/Aion/Format/Html.pm view on Meta::CPAN
"p" => "\n",
"table" => "\n",
"ol" => "\n",
"ul" => "\n",
);
our %WITH_CLOSE_TAG2SPACE = (
"p" => "\n",
"br" => "\n",
"img" => " ",
);
# пеÑÐµÐ²Ð¾Ð´Ð¸Ñ html в text
sub from_html (;$) {
local ($_) = @_? @_: $_;
# 1. УбиÑаем ÑнÑиÑиеÑ:
my $ent = sub {
exists $+{word}? (exists $ENTITIES{$+{word}}? chr $ENTITIES{$+{word}}: $&):
exists $+{num}? chr $+{num}:
exists $+{hex}? chr hex $+{hex}: ""
};
s{&(
(?<word>\w+)
|\#(?<num>\d+)
|\#x(?<hex>[a-f\d]+)
);?
}{$ent->()}genix;
my $pre;
my $to = sub {
my $s1 = $pre? $+{s1}: ($+{s1} eq ""? "": " ");
my $x =
exists $+{space}? ($pre? $+{space}: " "):
exists $+{nbsp}? " ":
exists $+{xhr}? $+{xhr}:
exists $+{tag}? do {
my $tag = lc $+{tag};
$pre = 1 if $tag eq "pre";
exists $+{close}? $WITH_CLOSE_TAG2SPACE{$tag}: $TAG2SPACE{$tag}
}:
exists $+{ctag}? do {
my $tag = lc $+{ctag};
$pre = 0 if $tag eq "pre";
$CLOSE_TAG2SPACE{$tag}
}:
"";
my $s2 = $pre? $+{s2}: ($+{s2} eq "" || $s1? "": " ");
$x =~ /\n/ ? $x: join "", $s1, $x, $s2
};
s{
(?<s1> \s*) (
<(script|style|template)\b [^<>]*> .*? </ \g1 \s* >
| <xhr \b [^<>]*> (?<xhr> .*? ) </xhr \s* >
| < (?<tag> [a-z]\w* ) [^<>]*? (?<close> / )? \s*>
| </ (?<ctag> [a-z]\w* ) \s*>
| <!--.*?-->
) (?<s2> \s*)
| (?<space> [\ \t\n\r\f]+)
| (?<nbsp> \xa0)
}{$to->()}genisx;
$_
}
# ÐÑе, кÑоме запÑеÑÑннÑÑ
:
# applet, script, style, embed, object, param,
# video, audio, source, track, frame, frameset, iframe, comment
# html, head, body, title, meta, base, basefont, bgsound, link
# form, keygen, output, textarea, select, option, optgroup, legend, label, input
# plaintext, xmp
# Ð Ñак же ÑдалÑÐµÑ Ð°ÑÑибÑÑÑ Ð½Ð°ÑинаÑÑиеÑÑ Ð½Ð° "on", name, for, formaction и дÑ..
my %SAFE_TAG = map {$_=>1} qw/
a
abbr
acronym
address
area
article
aside
b
bdi
bdo
blockquote
big
blink
br
button
canvas
caption
center
cite
code
col
colgroup
command
datalist
dd
del
details
dfn
dir
div
dl
dt
em
figcaption
figure
font
footer
h1
h2
h3
h4
h5
h6
header
hgroup
lib/Aion/Format/Html.pm view on Meta::CPAN
main
map
marquee
mark
menu
meter
nav
nobr
noembed
noframes
noscript
ol
p
pre
progress
q
rp
rt
ruby
s
samp
section
small
span
strike
strong
sub
summary
sup
table
tbody
td
tfoot
th
thead
time
tr
tt
u
ul
var
wbr
/;
my %SAFE_ATTR = map {$_=>1} qw/
pubdate datetime
open optimum
dir lang language style tabindex title high low hreflang icon
max min
href media ping rel rev name type
class
src
alt crossorigin decoding height width importance intrinsicsize loading sizes srcset
align border hspace vspace longdesc axis char charoff summary
colspan rowspan
border cite bgcolor color
coords
/;
# ÑÑÐµÐ·Ð°ÐµÑ Ñ html-Ñ Ð¾Ð¿Ð°ÑнÑе, а Ñак же неведомÑе Ñеги
sub safe_html($;$) {
(local $_, my $link) = @_;
my $f = sub {
return "" if !exists $SAFE_TAG{lc $2};
return "</$2>" if $1 ne "";
my $tag = $2;
my $x = $3;
my @attrs;
while($x =~ /
\b (?<attr> [a-z][a-z\d]*) ( \s*=\s* ( (?<quot> ") (?<val> [^"]*)" | (?<quot> ') (?<val> [^']*)' | (?<val> \S*) ) )?
/gixn) {
push @attrs, $+{val} eq ""? " $+{attr}"
: join "", " ", $+{attr}, "=", $+{quot},
lc $+{attr} ~~ [qw/src href/]
? Aion::Format::Url::normalize_url($+{val}, $link)
: $+{val},
$+{quot}
if exists $SAFE_ATTR{lc $+{attr}};
}
push @attrs, " target=_blank" if lc $tag eq "a";
"<$tag@attrs>"
};
s{<(/\s*)?([a-z][a-z\d:-]*)([^<>]*)>|<!--(?:.*?)-->}{ $f->() }igse;
$_
}
1;
__END__
=encoding utf-8
=head1 NAME
( run in 0.725 second using v1.01-cache-2.11-cpan-39bf76dae61 )