Aion-Format

 view release on metacpan or  search on metacpan

lib/Aion/Format/Html.pm  view on Meta::CPAN

pubdate datetime
open optimum

dir lang language style tabindex title high low hreflang icon

max min

href media ping rel rev name type

class

src

alt crossorigin decoding height width importance  intrinsicsize loading sizes srcset

align border hspace vspace longdesc  axis  char charoff summary

colspan rowspan

border cite bgcolor color

coords
/;

# срезает у html-я опасные, а так же неведомые теги
sub safe_html($;$) {
	(local $_, my $link) = @_;

	my $f = sub {
		return "" if !exists $SAFE_TAG{lc $2};
		return "</$2>" if $1 ne "";
		my $tag = $2;
		my $x = $3;
		my @attrs;
		while($x =~ /
			\b (?<attr> [a-z][a-z\d]*) ( \s*=\s* ( (?<quot> ") (?<val> [^"]*)" | (?<quot> ') (?<val> [^']*)' | (?<val> \S*) ) )?
		/gixn) {
			push @attrs, $+{val} eq ""? " $+{attr}"
				: join "", " ", $+{attr}, "=", $+{quot},
					lc $+{attr} ~~ [qw/src href/]
						? Aion::Format::Url::normalize_url($+{val}, $link)
						: $+{val},
					$+{quot}
				if exists $SAFE_ATTR{lc $+{attr}};
		}

		push @attrs, " target=_blank" if lc $tag eq "a";

		"<$tag@attrs>"
	};

	s{<(/\s*)?([a-z][a-z\d:-]*)([^<>]*)>|<!--(?:.*?)-->}{ $f->() }igse;

	$_
}

1;

__END__

=encoding utf-8

=head1 NAME

Aion::Format::Html - library for HTML formatting

=head1 SYNOPSIS

	use Aion::Format::Html;
	
	from_html "<b>&excl;</b>" # => !
	to_html "<a>"             # => &lt;a&gt;

=head1 DESCRIPION

Library for formatting HTML documents.

=head1 SUBROUTINES

=head2 from_html ($html)

Converts HTML to text.

	from_html "Basic is <b>superlanguage</b>!<br>"  # => Basic is superlanguage!\n

=head2 to_html ($html)

Escapes HTML characters.

=head2 safe_html ($html)

Trims dangerous and unknown HTML tags, as well as unknown attributes from known tags.

	safe_html "-<em>-</em><br>-" # => -<em>-</em><br>-
	safe_html "-<em onclick='  '>-</em><br onmouseout=1>-" # => -<em>-</em><br>-
	safe_html "-<xx24>-</xx24>" # => --
	safe_html "-< applet >-</ applet >" # => -< applet >-

=head2 split_on_pages ($html, $symbols_on_page, $by)

Breaks text into pages taking into account html tags.

	[split_on_pages "Alice in wonderland. This is book", 17]  # --> ["Alice in wonderland. ", "This is book"]

=head1 AUTHOR

Yaroslav O. Kosmina L<mailto:darviarush@mail.ru>

=head1 LICENSE

âš– B<GPLv3>

=head1 COPYRIGHT

The Aion::Format::Html module is copyright © 2023 Yaroslav O. Kosmina. Rusland. All rights reserved.



( run in 0.644 second using v1.01-cache-2.11-cpan-39bf76dae61 )