Mojolicious

 view release on metacpan or  search on metacpan

lib/Mojo/DOM.pm  view on Meta::CPAN

sub _start { $_[0][0] eq 'root' ? 1 : 4 }

sub _text {
  my ($nodes, $xml, $all) = @_;

  my $text = '';
  while (my $node = shift @$nodes) {
    my $type = $node->[0];

    # Text
    if ($type eq 'text' || $type eq 'cdata' || $type eq 'raw') { $text .= $node->[1] }

    # Nested tag
    elsif ($type eq 'tag' && $all) {
      unshift @$nodes, @{_nodes($node)} if $xml || ($node->[1] ne 'script' && $node->[1] ne 'style');
    }
  }

  return $text;
}

lib/Mojo/DOM.pm  view on Meta::CPAN

=head1 NODES AND ELEMENTS

When we parse an HTML/XML fragment, it gets turned into a tree of nodes.

  <!DOCTYPE html>
  <html>
    <head><title>Hello</title></head>
    <body>World!</body>
  </html>

There are currently eight different kinds of nodes, C<cdata>, C<comment>, C<doctype>, C<pi>, C<raw>, C<root>, C<tag>
and C<text>. Elements are nodes of the type C<tag>.

  root
  |- doctype (html)
  +- tag (html)
     |- tag (head)
     |  +- tag (title)
     |     +- raw (Hello)
     +- tag (body)
        +- text (World!)

lib/Mojo/DOM.pm  view on Meta::CPAN


  my $tree = $dom->tree;
  $dom     = $dom->tree(['root']);

Document Object Model. Note that this structure should only be used very carefully since it is very dynamic.

=head2 type

  my $type = $dom->type;

This node's type, usually C<cdata>, C<comment>, C<doctype>, C<pi>, C<raw>, C<root>, C<tag> or C<text>.

  # "cdata"
  $dom->parse('<![CDATA[Test]]>')->child_nodes->first->type;

  # "comment"
  $dom->parse('<!-- Test -->')->child_nodes->first->type;

  # "doctype"
  $dom->parse('<!DOCTYPE html>')->child_nodes->first->type;

  # "pi"
  $dom->parse('<?xml version="1.0"?>')->child_nodes->first->type;

lib/Mojo/DOM/HTML.pm  view on Meta::CPAN

  qw(pre rp rt s script section select small strike strong style summary table tbody td template textarea tfoot th),
  qw(thead title tr tt u ul xmp)
);

sub parse {
  my ($self, $html) = (shift, "$_[0]");

  my $xml     = $self->xml;
  my $current = my $tree = ['root'];
  while ($html =~ /\G$TOKEN_RE/gcso) {
    my ($text, $doctype, $comment, $cdata, $pi, $tag, $runaway) = ($1, $2, $3, $4, $5, $6, $11);

    # Text (and runaway "<")
    $text .= '<'                                 if defined $runaway;
    _node($current, 'text', html_unescape $text) if defined $text;

    # Tag
    if (defined $tag) {

      # End
      if ($tag =~ /^\/\s*(\S+)/) {

lib/Mojo/DOM/HTML.pm  view on Meta::CPAN

      }
    }

    # DOCTYPE
    elsif (defined $doctype) { _node($current, 'doctype', $doctype) }

    # Comment
    elsif (defined $comment) { _node($current, 'comment', $comment) }

    # CDATA
    elsif (defined $cdata) { _node($current, 'cdata', $cdata) }

    # Processing instruction (try to detect XML)
    elsif (defined $pi) {
      $self->xml($xml = 1) if !exists $self->{xml} && $pi =~ /xml/i;
      _node($current, 'pi', $pi);
    }
  }

  return $self->tree($tree);
}

lib/Mojo/DOM/HTML.pm  view on Meta::CPAN

  # Root
  return join '', map { _render($_, $xml) } @$tree[1 .. $#$tree] if $type eq 'root';

  # DOCTYPE
  return '<!DOCTYPE' . $tree->[1] . '>' if $type eq 'doctype';

  # Comment
  return '<!--' . $tree->[1] . '-->' if $type eq 'comment';

  # CDATA
  return '<![CDATA[' . $tree->[1] . ']]>' if $type eq 'cdata';

  # Processing instruction
  return '<?' . $tree->[1] . '?>' if $type eq 'pi';

  # Everything else
  return '';
}

sub _start {
  my ($start, $attrs, $xml, $current) = @_;

lib/Mojolicious/Guides/Cookbook.pod  view on Meta::CPAN


  # Extract headings
  $res->dom('h1, h2, h3')->each(sub ($dom, $i) {
    say 'Heading: ', $dom->all_text;
  });

  # Visit all nodes recursively to extract more than just text
  for my $n ($res->dom->descendant_nodes->each) {

    # Text or CDATA node
    print $n->content if $n->type eq 'text' || $n->type eq 'cdata';

    # Also include alternate text for images
    print $n->{alt} if $n->type eq 'tag' && $n->tag eq 'img';
  }

For a full list of available CSS selectors see L<Mojo::DOM::CSS/"SELECTORS">.

=head2 JSON web services

Most web services these days are based on the JSON data-interchange format. That's why L<Mojolicious> comes with the

t/mojo/dom.t  view on Meta::CPAN

  is $dom->descendant_nodes->[5]->content,                                    'after',             'right content';
  is $dom->at('p')->descendant_nodes->[0]->type,                              'text',              'right type';
  is $dom->at('p')->descendant_nodes->[0]->content,                           'test',              'right type';
  is $dom->at('p')->descendant_nodes->last->type,                             'comment',           'right type';
  is $dom->at('p')->descendant_nodes->last->content,                          ' 456 ',             'right type';
  is $dom->child_nodes->[1]->child_nodes->first->parent->tag,                 'p',                 'right tag';
  is $dom->child_nodes->[1]->child_nodes->first->content,                     'test',              'right content';
  is $dom->child_nodes->[1]->child_nodes->first,                              'test',              'right content';
  is $dom->at('p')->child_nodes->first->type,                                 'text',              'right type';
  is $dom->at('p')->child_nodes->first->remove->tag,                          'p',                 'right tag';
  is $dom->at('p')->child_nodes->first->type,                                 'cdata',             'right type';
  is $dom->at('p')->child_nodes->first->content,                              '123',               'right content';
  is $dom->at('p')->child_nodes->[1]->type,                                   'comment',           'right type';
  is $dom->at('p')->child_nodes->[1]->content,                                ' 456 ',             'right content';
  is $dom->[0]->type,                                                         'doctype',           'right type';
  is $dom->[0]->content,                                                      ' before',           'right content';
  is $dom->child_nodes->[2]->type,                                            'pi',                'right type';
  is $dom->child_nodes->[2]->content,                                         'after',             'right content';
  is $dom->child_nodes->first->content(' again')->content,                    ' again',            'right content';
  is $dom->child_nodes->grep(sub { $_->type eq 'pi' })->map('remove')->first->type, 'root',        'right type';
  is "$dom", '<!DOCTYPE again><p><![CDATA[123]]><!-- 456 --></p>',                                 'right result';



( run in 0.319 second using v1.01-cache-2.11-cpan-454fe037f31 )