Aion-Format
view release on metacpan - search on metacpan
view release on metacpan or search on metacpan
lib/Aion/Format/Html.pm view on Meta::CPAN
"p" => "\n",
"table" => "\n",
"ol" => "\n",
"ul" => "\n",
);
our %WITH_CLOSE_TAG2SPACE = (
"p" => "\n",
"br" => "\n",
"img" => " ",
);
# пеÑÐµÐ²Ð¾Ð´Ð¸Ñ html в text
sub from_html {
local ($_) = @_;
# 1. УбиÑаем ÑнÑиÑиеÑ:
my $ent = sub {
exists $+{word}? (exists $ENTITIES{$+{word}}? chr $ENTITIES{$+{word}}: $&):
exists $+{num}? chr $+{num}:
exists $+{hex}? chr hex $+{hex}: ""
};
s{&(
(?<word>\w+)
|\#(?<num>\d+)
|\#x(?<hex>[a-f\d]+)
);?
}{$ent->()}genix;
my $pre;
my $to = sub {
my $s1 = $pre? $+{s1}: ($+{s1} eq ""? "": " ");
my $x =
exists $+{space}? ($pre? $+{space}: " "):
exists $+{nbsp}? " ":
exists $+{xhr}? $+{xhr}:
exists $+{tag}? do {
my $tag = lc $+{tag};
$pre = 1 if $tag eq "pre";
exists $+{close}? $WITH_CLOSE_TAG2SPACE{$tag}: $TAG2SPACE{$tag}
}:
exists $+{ctag}? do {
my $tag = lc $+{ctag};
$pre = 0 if $tag eq "pre";
$CLOSE_TAG2SPACE{$tag}
}:
"";
my $s2 = $pre? $+{s2}: ($+{s2} eq "" || $s1? "": " ");
$x =~ /\n/ ? $x: join "", $s1, $x, $s2
};
s{
(?<s1> \s*) (
<(script|style|template)\b [^<>]*> .*? </ \g1 \s* >
| <xhr \b [^<>]*> (?<xhr> .*? ) </xhr \s* >
| < (?<tag> [a-z]\w* ) [^<>]*? (?<close> / )? \s*>
| </ (?<ctag> [a-z]\w* ) \s*>
| <!--.*?-->
) (?<s2> \s*)
| (?<space> [\ \t\n\r\f]+)
| (?<nbsp> \xa0)
}{$to->()}genisx;
$_
}
# ÐÑе, кÑоме запÑеÑÑннÑÑ
:
# applet, script, style, embed, object, param,
# video, audio, source, track, frame, frameset, iframe, comment
# html, head, body, title, meta, base, basefont, bgsound, link
# form, keygen, output, textarea, select, option, optgroup, legend, label, input
# plaintext, xmp
# Ð Ñак же ÑдалÑÐµÑ Ð°ÑÑибÑÑÑ Ð½Ð°ÑинаÑÑиеÑÑ Ð½Ð° "on", name, for, formaction и дÑ..
my %SAFE_TAG = map {$_=>1} qw/
a
abbr
acronym
address
area
article
aside
b
bdi
bdo
blockquote
big
blink
br
button
canvas
caption
center
cite
code
col
colgroup
command
datalist
dd
del
details
dfn
dir
div
dl
dt
view all matches for this distributionview release on metacpan - search on metacpan
( run in 1.289 second using v1.00-cache-2.02-grep-82fe00e-cpan-d29e8ade9f55 )