Mojolicious
view release on metacpan or search on metacpan
lib/Mojo/DOM.pm view on Meta::CPAN
sub _start { $_[0][0] eq 'root' ? 1 : 4 }
sub _text {
my ($nodes, $xml, $all) = @_;
my $text = '';
while (my $node = shift @$nodes) {
my $type = $node->[0];
# Text
if ($type eq 'text' || $type eq 'cdata' || $type eq 'raw') { $text .= $node->[1] }
# Nested tag
elsif ($type eq 'tag' && $all) {
unshift @$nodes, @{_nodes($node)} if $xml || ($node->[1] ne 'script' && $node->[1] ne 'style');
}
}
return $text;
}
lib/Mojo/DOM.pm view on Meta::CPAN
=head1 NODES AND ELEMENTS
When we parse an HTML/XML fragment, it gets turned into a tree of nodes.
<!DOCTYPE html>
<html>
<head><title>Hello</title></head>
<body>World!</body>
</html>
There are currently eight different kinds of nodes, C<cdata>, C<comment>, C<doctype>, C<pi>, C<raw>, C<root>, C<tag>
and C<text>. Elements are nodes of the type C<tag>.
root
|- doctype (html)
+- tag (html)
|- tag (head)
| +- tag (title)
| +- raw (Hello)
+- tag (body)
+- text (World!)
lib/Mojo/DOM.pm view on Meta::CPAN
my $tree = $dom->tree;
$dom = $dom->tree(['root']);
Document Object Model. Note that this structure should only be used very carefully since it is very dynamic.
=head2 type
my $type = $dom->type;
This node's type, usually C<cdata>, C<comment>, C<doctype>, C<pi>, C<raw>, C<root>, C<tag> or C<text>.
# "cdata"
$dom->parse('<![CDATA[Test]]>')->child_nodes->first->type;
# "comment"
$dom->parse('<!-- Test -->')->child_nodes->first->type;
# "doctype"
$dom->parse('<!DOCTYPE html>')->child_nodes->first->type;
# "pi"
$dom->parse('<?xml version="1.0"?>')->child_nodes->first->type;
lib/Mojo/DOM/HTML.pm view on Meta::CPAN
qw(pre rp rt s script section select small strike strong style summary table tbody td template textarea tfoot th),
qw(thead title tr tt u ul xmp)
);
sub parse {
my ($self, $html) = (shift, "$_[0]");
my $xml = $self->xml;
my $current = my $tree = ['root'];
while ($html =~ /\G$TOKEN_RE/gcso) {
my ($text, $doctype, $comment, $cdata, $pi, $tag, $runaway) = ($1, $2, $3, $4, $5, $6, $11);
# Text (and runaway "<")
$text .= '<' if defined $runaway;
_node($current, 'text', html_unescape $text) if defined $text;
# Tag
if (defined $tag) {
# End
if ($tag =~ /^\/\s*(\S+)/) {
lib/Mojo/DOM/HTML.pm view on Meta::CPAN
}
}
# DOCTYPE
elsif (defined $doctype) { _node($current, 'doctype', $doctype) }
# Comment
elsif (defined $comment) { _node($current, 'comment', $comment) }
# CDATA
elsif (defined $cdata) { _node($current, 'cdata', $cdata) }
# Processing instruction (try to detect XML)
elsif (defined $pi) {
$self->xml($xml = 1) if !exists $self->{xml} && $pi =~ /xml/i;
_node($current, 'pi', $pi);
}
}
return $self->tree($tree);
}
lib/Mojo/DOM/HTML.pm view on Meta::CPAN
# Root
return join '', map { _render($_, $xml) } @$tree[1 .. $#$tree] if $type eq 'root';
# DOCTYPE
return '<!DOCTYPE' . $tree->[1] . '>' if $type eq 'doctype';
# Comment
return '<!--' . $tree->[1] . '-->' if $type eq 'comment';
# CDATA
return '<![CDATA[' . $tree->[1] . ']]>' if $type eq 'cdata';
# Processing instruction
return '<?' . $tree->[1] . '?>' if $type eq 'pi';
# Everything else
return '';
}
sub _start {
my ($start, $attrs, $xml, $current) = @_;
lib/Mojolicious/Guides/Cookbook.pod view on Meta::CPAN
# Extract headings
$res->dom('h1, h2, h3')->each(sub ($dom, $i) {
say 'Heading: ', $dom->all_text;
});
# Visit all nodes recursively to extract more than just text
for my $n ($res->dom->descendant_nodes->each) {
# Text or CDATA node
print $n->content if $n->type eq 'text' || $n->type eq 'cdata';
# Also include alternate text for images
print $n->{alt} if $n->type eq 'tag' && $n->tag eq 'img';
}
For a full list of available CSS selectors see L<Mojo::DOM::CSS/"SELECTORS">.
=head2 JSON web services
Most web services these days are based on the JSON data-interchange format. That's why L<Mojolicious> comes with the
t/mojo/dom.t view on Meta::CPAN
is $dom->descendant_nodes->[5]->content, 'after', 'right content';
is $dom->at('p')->descendant_nodes->[0]->type, 'text', 'right type';
is $dom->at('p')->descendant_nodes->[0]->content, 'test', 'right type';
is $dom->at('p')->descendant_nodes->last->type, 'comment', 'right type';
is $dom->at('p')->descendant_nodes->last->content, ' 456 ', 'right type';
is $dom->child_nodes->[1]->child_nodes->first->parent->tag, 'p', 'right tag';
is $dom->child_nodes->[1]->child_nodes->first->content, 'test', 'right content';
is $dom->child_nodes->[1]->child_nodes->first, 'test', 'right content';
is $dom->at('p')->child_nodes->first->type, 'text', 'right type';
is $dom->at('p')->child_nodes->first->remove->tag, 'p', 'right tag';
is $dom->at('p')->child_nodes->first->type, 'cdata', 'right type';
is $dom->at('p')->child_nodes->first->content, '123', 'right content';
is $dom->at('p')->child_nodes->[1]->type, 'comment', 'right type';
is $dom->at('p')->child_nodes->[1]->content, ' 456 ', 'right content';
is $dom->[0]->type, 'doctype', 'right type';
is $dom->[0]->content, ' before', 'right content';
is $dom->child_nodes->[2]->type, 'pi', 'right type';
is $dom->child_nodes->[2]->content, 'after', 'right content';
is $dom->child_nodes->first->content(' again')->content, ' again', 'right content';
is $dom->child_nodes->grep(sub { $_->type eq 'pi' })->map('remove')->first->type, 'root', 'right type';
is "$dom", '<!DOCTYPE again><p><![CDATA[123]]><!-- 456 --></p>', 'right result';
( run in 0.319 second using v1.01-cache-2.11-cpan-454fe037f31 )