Mojo-DOM58

 view release on metacpan or  search on metacpan

README  view on Meta::CPAN


    When we parse an HTML/XML fragment, it gets turned into a tree of
    nodes.

      <!DOCTYPE html>
      <html>
        <head><title>Hello</title></head>
        <body>World!</body>
      </html>

    There are currently eight different kinds of nodes, cdata, comment,
    doctype, pi, raw, root, tag and text. Elements are nodes of the type
    tag.

      root
      |- doctype (html)
      +- tag (html)
         |- tag (head)
         |  +- tag (title)
         |     +- raw (Hello)
         +- tag (body)

README  view on Meta::CPAN

      my $tree = $dom->tree;
      $dom     = $dom->tree(['root']);

    Document Object Model. Note that this structure should only be used
    very carefully since it is very dynamic.

 type

      my $type = $dom->type;

    This node's type, usually cdata, comment, doctype, pi, raw, root, tag
    or text.

      # "cdata"
      $dom->parse('<![CDATA[Test]]>')->child_nodes->first->type;
    
      # "comment"
      $dom->parse('<!-- Test -->')->child_nodes->first->type;
    
      # "doctype"
      $dom->parse('<!DOCTYPE html>')->child_nodes->first->type;
    
      # "pi"
      $dom->parse('<?xml version="1.0"?>')->child_nodes->first->type;

lib/Mojo/DOM58.pm  view on Meta::CPAN

sub _start { $_[0][0] eq 'root' ? 1 : 4 }

sub _text {
  my ($nodes, $xml, $all) = @_;

  my $text = '';
  while (my $node = shift @$nodes) {
    my $type = $node->[0];

    # Text
    if ($type eq 'text' || $type eq 'cdata' || $type eq 'raw') {
      $text .= $node->[1];
    }

    # Nested tag
    elsif ($type eq 'tag' && $all) {
      unshift @$nodes, @{_nodes($node)} if $xml || ($node->[1] ne 'script' && $node->[1] ne 'style');
    }
  }

  return $text;

lib/Mojo/DOM58.pm  view on Meta::CPAN

=head1 NODES AND ELEMENTS

When we parse an HTML/XML fragment, it gets turned into a tree of nodes.

  <!DOCTYPE html>
  <html>
    <head><title>Hello</title></head>
    <body>World!</body>
  </html>

There are currently eight different kinds of nodes, C<cdata>, C<comment>,
C<doctype>, C<pi>, C<raw>, C<root>, C<tag> and C<text>. Elements are nodes of
the type C<tag>.

  root
  |- doctype (html)
  +- tag (html)
     |- tag (head)
     |  +- tag (title)
     |     +- raw (Hello)
     +- tag (body)

lib/Mojo/DOM58.pm  view on Meta::CPAN

  my $tree = $dom->tree;
  $dom     = $dom->tree(['root']);

Document Object Model. Note that this structure should only be used very
carefully since it is very dynamic.

=head2 type

  my $type = $dom->type;

This node's type, usually C<cdata>, C<comment>, C<doctype>, C<pi>, C<raw>,
C<root>, C<tag> or C<text>.

  # "cdata"
  $dom->parse('<![CDATA[Test]]>')->child_nodes->first->type;

  # "comment"
  $dom->parse('<!-- Test -->')->child_nodes->first->type;

  # "doctype"
  $dom->parse('<!DOCTYPE html>')->child_nodes->first->type;

  # "pi"
  $dom->parse('<?xml version="1.0"?>')->child_nodes->first->type;

lib/Mojo/DOM58/_HTML.pm  view on Meta::CPAN

  $self->{xml} = shift;
  return $self;
}

sub parse {
  my ($self, $html) = (shift, "$_[0]");

  my $xml = $self->xml;
  my $current = my $tree = ['root'];
  while ($html =~ /\G$TOKEN_RE/gcso) {
    my ($text, $doctype, $comment, $cdata, $pi, $tag, $runaway)
      = ($1, $2, $3, $4, $5, $6, $11);

    # Text (and runaway "<")
    $text .= '<' if defined $runaway;
    _node($current, 'text', html_unescape $text) if defined $text;

    # Tag
    if (defined $tag) {

      # End

lib/Mojo/DOM58/_HTML.pm  view on Meta::CPAN

      }
    }

    # DOCTYPE
    elsif (defined $doctype) { _node($current, 'doctype', $doctype) }

    # Comment
    elsif (defined $comment) { _node($current, 'comment', $comment) }

    # CDATA
    elsif (defined $cdata) { _node($current, 'cdata', $cdata) }

    # Processing instruction (try to detect XML)
    elsif (defined $pi) {
      $self->xml($xml = 1) if !exists $self->{xml} && $pi =~ /xml/i;
      _node($current, 'pi', $pi);
    }
  }

  return $self->tree($tree);
}

lib/Mojo/DOM58/_HTML.pm  view on Meta::CPAN

  return join '', map { _render($_, $xml) } @$tree[1 .. $#$tree]
    if $type eq 'root';

  # DOCTYPE
  return '<!DOCTYPE' . $tree->[1] . '>' if $type eq 'doctype';

  # Comment
  return '<!--' . $tree->[1] . '-->' if $type eq 'comment';

  # CDATA
  return '<![CDATA[' . $tree->[1] . ']]>' if $type eq 'cdata';

  # Processing instruction
  return '<?' . $tree->[1] . '?>' if $type eq 'pi';

  # Everything else
  return '';
}

sub _start {
  my ($start, $attrs, $xml, $current) = @_;

t/dom.t  view on Meta::CPAN

  is $dom->descendant_nodes->[5]->content, 'after', 'right content';
  is $dom->at('p')->descendant_nodes->[0]->type,    'text', 'right type';
  is $dom->at('p')->descendant_nodes->[0]->content, 'test', 'right type';
  is $dom->at('p')->descendant_nodes->last->type,    'comment', 'right type';
  is $dom->at('p')->descendant_nodes->last->content, ' 456 ',   'right type';
  is $dom->child_nodes->[1]->child_nodes->first->parent->tag, 'p', 'right tag';
  is $dom->child_nodes->[1]->child_nodes->first->content, 'test', 'right content';
  is $dom->child_nodes->[1]->child_nodes->first, 'test', 'right content';
  is $dom->at('p')->child_nodes->first->type, 'text', 'right type';
  is $dom->at('p')->child_nodes->first->remove->tag, 'p', 'right tag';
  is $dom->at('p')->child_nodes->first->type,    'cdata', 'right type';
  is $dom->at('p')->child_nodes->first->content, '123',   'right content';
  is $dom->at('p')->child_nodes->[1]->type,    'comment', 'right type';
  is $dom->at('p')->child_nodes->[1]->content, ' 456 ',   'right content';
  is $dom->[0]->type,    'doctype', 'right type';
  is $dom->[0]->content, ' before', 'right content';
  is $dom->child_nodes->[2]->type,    'pi',    'right type';
  is $dom->child_nodes->[2]->content, 'after', 'right content';
  is $dom->child_nodes->first->content(' again')->content, ' again',
    'right content';
  is $dom->child_nodes->grep(sub { $_->type eq 'pi' })->map('remove')



( run in 0.565 second using v1.01-cache-2.11-cpan-454fe037f31 )