Dallycot

 view release on metacpan or  search on metacpan

lib/Dallycot/Library/Linguistics.pm  view on Meta::CPAN

          $result -> [0] -> [0]
        }
      );
    }
    when ('Dallycot::Value::URI') {
      return $text -> resolve_content -> then(
        sub {
          my ($body) = @_;
          my $content = '';
          given ( blessed $body ) {
            when ('HTML') {    # content-type: text/html
                               # we want to strip out the HTML and keep only text in the
                               # <body /> outside of <script/> tags
              my $dom = Mojo::DOM->new( $body->{'value'} );
              $content = $dom->find('body')->all_text;
            }
            when ('Dallycot::Value::String') {    # content-type: text/plain
              $content = $body->value;
            }
            when ('XML') {       # content-type: text/xml (TEI, etc.)
              my $dom = Mojo::DOM->new->xml(1)->parse( $body->{'value'} );
              $content = $dom->all_text;
            }
            default {
              croak "Unable to extract text from " . $text->{'value'};
            }
          }
          my $worked = eval {
            # TODO: make '4096' a tunable parameter
            # algorithm takes a *long* time with large strings



( run in 1.417 second using v1.01-cache-2.11-cpan-524268b4103 )