HTML-ExtractText

 view release on metacpan or  search on metacpan

lib/HTML/ExtractText.pm  view on Meta::CPAN

        # Text.
        if ( $type eq 'text' ) {
            $chunk = $node->[1];
            if ( $trim ) {
                $chunk =~ s/^\s+//;
                $chunk =~ s/\s+$//;
                $chunk =~ s/\s+/ /g;
            }
        }
        # CDATA or raw text.
        elsif ( $type eq 'cdata' || $type eq 'raw' ) {
            $chunk = $node->[1];
        }
        # Nested tag.
        elsif ( $type eq 'tag' ) {
           no warnings 'recursion';
           $chunk = _text( [Mojo::DOM::_nodes($node)], 1, $node->[1] eq 'pre' ? 0 : $trim );
        }

        # Add leading whitespace if punctuation allows it.
        $chunk = " $chunk" if $text =~ /\S\z/ && $chunk =~ /^[^.!?,;:\s]+/;



( run in 0.487 second using v1.01-cache-2.11-cpan-454fe037f31 )