Aion-Format

 view release on metacpan or  search on metacpan

lib/Aion/Format/Html.pm  view on Meta::CPAN

	# 1. Убираем энтитиес:
	my $ent = sub {
		exists $+{word}? (exists $ENTITIES{$+{word}}? chr $ENTITIES{$+{word}}: $&):
		exists $+{num}? chr $+{num}:
		exists $+{hex}? chr hex $+{hex}: ""
	};
	s{&(
		(?<word>\w+)
		|\#(?<num>\d+)
		|\#x(?<hex>[a-f\d]+)
	);?
	}{$ent->()}genix;

	my $pre;

	my $to = sub {
		my $s1 = $pre? $+{s1}: ($+{s1} eq ""? "": " ");
		
		my $x =
		exists $+{space}? ($pre? $+{space}: " "):
		exists $+{nbsp}? " ":
		exists $+{xhr}? $+{xhr}:
		exists $+{tag}? do {
			my $tag = lc $+{tag};
			$pre = 1 if $tag eq "pre";
			exists $+{close}? $WITH_CLOSE_TAG2SPACE{$tag}: $TAG2SPACE{$tag} 
		}:
		exists $+{ctag}? do {
			my $tag = lc $+{ctag};
			$pre = 0 if $tag eq "pre";
			
			$CLOSE_TAG2SPACE{$tag}
		}:
		"";
		
		my $s2 = $pre? $+{s2}: ($+{s2} eq "" || $s1? "": " ");
		
		$x =~ /\n/ ? $x: join "", $s1, $x, $s2
	};

	s{
		(?<s1> \s*) (
		
			  <(script|style|template)\b [^<>]*> .*? </ \g1 \s* >
			| <xhr \b [^<>]*> (?<xhr> .*? ) </xhr \s* >
			| < (?<tag> [a-z]\w* ) [^<>]*? (?<close> / )? \s*>
			| </ (?<ctag> [a-z]\w* ) \s*>
			| <!--.*?-->
		
		) (?<s2> \s*)
		| (?<space> [\ \t\n\r\f]+)
		| (?<nbsp> \xa0)
	}{$to->()}genisx;

	$_
}

# Все, кроме запрещённых:
#  applet, script, style, embed, object, param,
#  video, audio, source, track, frame, frameset, iframe, comment
#  html, head, body, title, meta, base, basefont, bgsound, link
#  form, keygen, output, textarea, select, option, optgroup, legend, label, input
#  plaintext, xmp
# А так же удаляет атрибуты начинающиеся на "on", name, for, formaction и др..
my %SAFE_TAG = map {$_=>1} qw/
a
abbr
acronym
address

area
article
aside

b
bdi
bdo

blockquote
big

blink
br
button

canvas
caption
center
cite
code
col
colgroup
command

datalist
dd
del
details
dfn
dir
div
dl
dt

em

figcaption
figure
font
footer

h1
h2
h3
h4
h5
h6
header
hgroup
hr

i
img

ins
isindex

kbd
keygen

li

main
map
marquee
mark
menu

meter

nav
nobr
noembed
noframes
noscript

ol

p
pre
progress

q

rp
rt
ruby

s
samp
section
small
span
strike
strong
   sub
summary
sup



( run in 1.682 second using v1.01-cache-2.11-cpan-5735350b133 )