HTML-Inspect

 view release on metacpan or  search on metacpan

xt/benchmark_xpath.pl  view on Meta::CPAN

use strict;
use warnings;
use utf8;
use FindBin qw($Bin);
use lib "$Bin/../t/lib";
use lib "$Bin/../lib";

#use Test::More;
use XML::LibXML;
use TestUtils qw(slurp);
use Benchmark;

=pod

Using XPATH expressions is faster than getElementsBy*, especially for filtering.
Here is the output on my computer

    Benchmark: timing 200000 iterations of DOM, XPATH...
           DOM:  8 wallclock secs ( 7.89 usr +  0.00 sys =  7.89 CPU) @ 25348.54/s (n=200000)
         XPATH:  6 wallclock secs ( 5.78 usr +  0.00 sys =  5.78 CPU) @ 34602.08/s (n=200000)
    Benchmark: timing 200000 iterations of DOM2, XPATH2...
          DOM2:  7 wallclock secs ( 6.77 usr +  0.00 sys =  6.77 CPU) @ 29542.10/s (n=200000)
        XPATH2:  5 wallclock secs ( 4.98 usr +  0.00 sys =  4.98 CPU) @ 40160.64/s (n=200000)

Below is the benchmark.

=cut

my $dom = XML::LibXML->load_html(
    string            => \(slurp("$Bin/../t/data/collectOpenGraph.html")),
    recover           => 2,
    suppress_errors   => 1,
    suppress_warnings => 1,
    no_network        => 1,
    no_xinclude_nodes => 1,
);
my $doc = $dom->documentElement;

timethese(
    200000,
    {
        'DOM' => sub {
            map { $_->hasAttribute('property') ? $_ : () } $doc->getElementsByTagName('meta');
        },
        'XPATH' => sub { $doc->findnodes('//meta[@property]'); },
    }
);


timethese(
    200000,
    {
        'DOM2' => sub {
            $doc->getElementsByTagName('meta');
        },
        'XPATH2' => sub { $doc->findnodes('//meta'); },
    }
);

# done_testing;



( run in 0.475 second using v1.01-cache-2.11-cpan-71847e10f99 )