HTML-Query
view release on metacpan or search on metacpan
lib/HTML/Query.pm view on Meta::CPAN
# so we can check we've done something
$comops++;
# dedup the results we've gotten
@elements = $self->_dedup(\@elements);
map { warn $_->as_HTML } @elements if DEBUG;
}
if ($comops) {
$self->debug(
'Added', scalar(@elements), ' elements to results'
) if DEBUG;
my $selector = substr ($query,$startpos, $pos - $startpos);
$self->_add_specificity($selector,$specificity);
#add in the recent pass
push(@result,@elements);
# dedup the results across the result sets, necessary for comma based selectors
@result = $self->_dedup(\@result);
# sort the result set...
@result = sort _by_address @result;
# update op counter for complete query to include ops performed
# in this fragment
$ops += $comops;
}
else {
# looks like we got an empty comma section, e.g. : ",x, ,y,"
# so we'll ignore it
}
last COMMA unless $query =~ / \G \s*,\s* /cgsx;
}
# check for any trailing text in the query that we couldn't parse
if ($query =~ / \G (.+?) \s* $ /cgsx) {
return $self->_report_error( $self->message( bad_spec => $1, $query ) );
}
# check that we performed at least one query operation
unless ($ops) {
return $self->_report_error( $self->message( bad_query => $query ) );
}
return wantarray ? @result : $self->_new_match_self(@result);
}
# return elements stored from last query
sub get_elements {
my $self = shift;
return wantarray ? @{$self->{elements}} : $self->{elements};
}
###########################################################################################################
# from CSS spec at http://www.w3.org/TR/CSS21/cascade.html#specificity
###########################################################################################################
# A selector's specificity is calculated as follows:
#
# * count the number of ID attributes in the selector (= a)
# * count the number of other attributes and pseudo-classes in the selector (= b)
# * count the number of element names in the selector (= c)
# * ignore pseudo-elements.
#
# Concatenating the three numbers a-b-c (in a number system with a large base) gives the specificity.
#
# Example(s):
#
# Some examples:
#
# * {} /* a=0 b=0 c=0 -> specificity = 0 */
# LI {} /* a=0 b=0 c=1 -> specificity = 1 */
# UL LI {} /* a=0 b=0 c=2 -> specificity = 2 */
# UL OL+LI {} /* a=0 b=0 c=3 -> specificity = 3 */
# H1 + *[REL=up]{} /* a=0 b=1 c=1 -> specificity = 11 */
# UL OL LI.red {} /* a=0 b=1 c=3 -> specificity = 13 */
# LI.red.level {} /* a=0 b=2 c=1 -> specificity = 21 */
# #x34y {} /* a=1 b=0 c=0 -> specificity = 100 */
###########################################################################################################
# calculate and return the specificity for the provided selector
sub get_specificity {
my ($self,$selector) = @_;
unless (exists $self->{specificity}->{$selector}) {
# if the invoking tree happened to be large this could get expensive real fast
# instead load up an empty instance and query that.
local $self->{elements} = [];
$self->query($selector);
}
return $self->{specificity}->{$selector};
}
sub suppress_errors {
my ($self, $setting) = @_;
if (defined($setting)) {
$self->{suppress_errors} = $setting;
}
return $self->{suppress_errors};
}
sub get_error {
my ($self) = @_;
return $self->{error};
}
sub list {
# return list of items or return unblessed list ref of items
return wantarray ? @{ $_[0] } : [ @{ $_[0] } ];
}
lib/HTML/Query.pm view on Meta::CPAN
using named parameters:
$query = HTML::Query->new( tree => $tree );
$query = HTML::Query->new( query => $query );
You can freely mix and match elements, queries and named sources. The
query will be constructed as an aggregate across them all.
$q = HTML::Query->new(
text => $text1,
text => $text2,
file => $file1,
file => $file2,
tree => $tree,
query => $query1,
);
The final, optional argument can be a selector specification. This is
immediately passed to the L<query()> method which will return a new query
with only those elements selected.
my $spec = 'ul.menu li a'; # <ul class="menu">..<li>..<a>
my $query = HTML::Query->new( $tree, $spec );
my $query = HTML::Query->new( text => $text, $spec );
my $query = HTML::Query->new(
text => $text,
file => $file,
$spec
);
The list of arguments can also be passed by reference to a list.
my $query = HTML::Query->new(\@args);
=head2 query($spec)
This method locates the descendant elements identified by the C<$spec>
argument for each element in the query. It then interally stores the results
for requerying or return. See get_elements().
my $query = HTML::Query->new(\@args);
my $results = $query->query($spec);
See L<"QUERY SYNTAX"> for the permitted syntax of the C<$spec> argument.
=head2 get_elements()
This method returns the stored results from a query. In list context it returns a list of
matching L<HTML::Element|HTML::Element> objects. In scalar context it returns a reference to
the results array.
my $query = HTML::Query->new(\@args);
my $results = $query->query($spec);
my @elements = $results->query($spec)->get_elements();
my $elements = $results->query($spec)->get_elements();
=head2 get_specificity()
Calculate the specificity for any given passed selector, a critical factor in determining how best to apply the cascade
A selector's specificity is calculated as follows:
* count the number of ID attributes in the selector (= a)
* count the number of other attributes and pseudo-classes in the selector (= b)
* count the number of element names in the selector (= c)
* ignore pseudo-elements.
The specificity is based only on the form of the selector. In particular, a selector of the form "[id=p33]" is counted
as an attribute selector (a=0, b=0, c=1, d=0), even if the id attribute is defined as an "ID" in the source document's DTD.
See the following spec for additional details:
L<http://www.w3.org/TR/CSS21/cascade.html#specificity>
=head2 size()
Returns the number of elements in the query.
=head2 first()
Returns the first element in the query.
my $elem = $query->first;
If the query is empty then an exception will be thrown. If you would rather
have an undefined value returned then you can use the C<try> method inherited
from L<Badger::Base|Badger::Base>. This effectively wraps the call to
C<first()> in an C<eval> block to catch any exceptions thrown.
my $elem = $query->try('first') || warn "no first element\n";
=head2 last()
Similar to L<first()>, but returning the last element in the query.
my $elem = $query->last;
=head2 list()
Returns a list of the L<HTML::Element|HTML::Element> object in the query in
list context, or a reference to a list in scalar context.
my @elems = $query->list;
my $elems = $query->list;
=head2 AUTOLOAD
The C<AUTOLOAD> method maps any other method calls to the
L<HTML::Element|HTML::Element> objects in the list. When called in list
context it returns a list of the values returned from calling the method on
each element. In scalar context it returns a reference to a list of return
values.
my @text_blocks = $query->as_trimmed_text;
my $text_blocks = $query->as_trimmed_text;
=head1 KNOWN BUGS
=head2 Attribute Values
It is not possible to use C<]> in an attribute value. This is due to a
limitation in the parser which will be fixed RSN.
=head1 AUTHOR
Andy Wardley L<http://wardley.org>
=head1 MAINTAINER
Kevin Kamel <kamelkev@mailermailer.com>
=head1 CONTRIBUTORS
( run in 0.523 second using v1.01-cache-2.11-cpan-d7f47b0818f )