CSS-DOM
view release on metacpan or search on metacpan
lib/CSS/DOM/Parser.pm view on Meta::CPAN
package CSS::DOM::Parser;
$VERSION = '0.17';
use strict; use warnings; no warnings qw 'utf8 parenthesis';
use re 'taint';
use Carp 1.01 qw 'shortmess croak';
use CSS::DOM;
use CSS::DOM::Rule::Style;
use CSS::DOM::Style;
use CSS::DOM::Util 'unescape';
our @CARP_NOT = qw "CSS::DOM CSS::DOM::Rule::Media";
# Tokeniser regexps
my $token_re;
# This one has to be outside the scope, because we need it in tokenise.
my $_optspace = qr/[ \t\r\n\f]*/;
{
# Vars beginning with _ here are not token regexps, but are used to
# build them.
my $_escape =qr/\\(?:[0-9a-f]{1,6}(?:\r\n|[ \n\r\t\f])?|[^\n\r\f0-9a-f])/i;
my $_id_start = qr/[_a-zA-Z]|[^\0-\177]|$_escape/;
my $_id_cont = qr/[_a-zA-Z0-9-]|[^\0-\177]|$_escape/;
my $_nl = qr/\r\n?|[\n\f]/;
my $_invalid_qq = qr/"[^\n\r\f\\"]*(?:(?:\\$_nl|$_escape)[^\n\r\f\\"]*)*/;
my $_invalid_q = qr/'[^\n\r\f\\']*(?:(?:\\$_nl|$_escape)[^\n\r\f\\']*)*/;
my $ident = qr/-?$_id_start$_id_cont*/;
my $at = qr/\@$ident/;
my $str = qr/$_invalid_qq(?:"|\z)|$_invalid_q(?:'|\z)/;
my $invalid = qr/$_invalid_qq|$_invalid_q/;
my $hash = qr/#$_id_cont+/;
my $num = qr/(?=\.?[0-9])[0-9]*(?:\.[0-9]*)?/;
my $percent = qr/$num%/;
my $dim = qr/$num$ident/;
my $url = qr/url\($_optspace(?:
$str
|
[^\0- "'()\\\x7f]*(?:$_escape[^\0- "'()\\\x7f]*)*
)$_optspace(?:\)|\z)/x;
my $uni_range = qr/U\+[0-9A-F?]{1,6}(?:-[0-9a-f]{1,6})?/i;
my $space = qr/(?:[ \t\r\n\f]+|\/\*.*?(?:\*\/|\z))[ \t\r\n\f]*
(?:\/\*.*?(?:\*\/|\z)[ \t\r\n\f]*)*/xs;
my $function = qr/$ident\(/;
# Literal tokens are as follows:
# <!-- --> ; { } ( ) [ ] ~= |= , :
# The order of some tokens is important. $url, $uni_range and $function
# have to come before $ident. $url has to come before $function. $percent
# and $dim have to come before $num.
$token_re = qr/\G(?:
($url)|($uni_range)|($function)|($ident)|($at)|($str)|($invalid)|
($hash)|($percent)|($dim)|($num)|(<!--|-->)|(;)|(\{)|(})|(\()|(\))
|(\[)|(])|($space)|(~=)|(\|=)|(,)|(:)|(.)
)/xs;
} # end of tokeniser regexps
# tokenise returns a string of token types in addition to the array of
# tokens so that we can apply grammar rules using regexps. The types are
# as follows:
# u url
# U unicode range
# f function
# i identifier
# @ at keyword
# ' string
# " invalid string (unterminated)
# # hash
# % percentage
# D dimension
# 1 number (not 0, because we want it true)
# < html comment delimiter
# s space/comments
# ~ ~=
# | |=
# d delimiter (miscellaneous character)
# The characters ;{}()[],: represent themselves. The comma and colon are
# actually delimiters according to the CSS 2.1 spec, but itâs more conveni-
# ent to have them as their own tokens.
# ~~~ It might actually make the code cleaner if we make them all their own
# tokens, in which case we can provide a $delim_re for matching against a
# token type string.
sub tokenise { warn caller unless defined $_[0];for (''.shift) {
my($tokens,@tokens)='';
while(/$token_re/gc){
my $which = (grep defined $+[$_], 1..$#+)[0];
no strict 'refs';
push @tokens, $$which;
no warnings qw]qw];
$tokens .=
qw/u U f i @ ' " # % D 1 < ; { } ( ) [ ] s ~ | , : d/
[$which-1];
# We need to close unterminated tokens for the sake of
# serialisation. If we donât, then too many other parts of
# the code base have to deal with it.
if($tokens =~ /'\z/) {
$tokens[-1] =~ /^(')[^'\\]*(?:\\.[^'\\]*)*\z
|
^(")[^"\\]*(?:\\.[^"\\]*)*\z/xs
and $tokens[-1] .= $1 || $2;
}
elsif($tokens =~ /u\z/) {
(my $copy = $tokens[-1]) =~ s/^url\($_optspace(?:
(')[^'\\]*(?:\\.[^'\\]*)*
|
(")[^"\\]*(?:\\.[^"\\]*)*
|
[^)\\]*(?:\\.[^)\\]*)*
)//sox;
my $str_delim = $1||$2;
$str_delim and $copy!~s/^['"]$_optspace//o
and $tokens[-1] .= $str_delim;
$copy or $tokens[-1] .= ')';
}
}
# This canât ever happen:
pos and pos() < length
and die "CSS::DOM::Parser internal error (please report this):"
." Can't tokenise " .substr $_,pos;
# close bracketed constructs: again, we do this here so that other
# pieces of code scattered all over the place (including the reg-
# exps below, which would need things like â(?:\)|\z)â)
# donât have to.
my $brack_count = (()=$tokens=~/[(f]/g)-(()=$tokens=~/\)/g)
+ (()=$tokens=~/\[/g)-(()=$tokens=~/]/g)
+ (()=$tokens=~/{/g)-(()=$tokens=~/}/g);
( run in 0.461 second using v1.01-cache-2.11-cpan-39bf76dae61 )