HTML-ExtractText
view release on metacpan or search on metacpan
lib/HTML/ExtractText.pm view on Meta::CPAN
# Text.
if ( $type eq 'text' ) {
$chunk = $node->[1];
if ( $trim ) {
$chunk =~ s/^\s+//;
$chunk =~ s/\s+$//;
$chunk =~ s/\s+/ /g;
}
}
# CDATA or raw text.
elsif ( $type eq 'cdata' || $type eq 'raw' ) {
$chunk = $node->[1];
}
# Nested tag.
elsif ( $type eq 'tag' ) {
no warnings 'recursion';
$chunk = _text( [Mojo::DOM::_nodes($node)], 1, $node->[1] eq 'pre' ? 0 : $trim );
}
# Add leading whitespace if punctuation allows it.
$chunk = " $chunk" if $text =~ /\S\z/ && $chunk =~ /^[^.!?,;:\s]+/;
( run in 0.487 second using v1.01-cache-2.11-cpan-454fe037f31 )