CSS-DOM

 view release on metacpan or  search on metacpan

lib/CSS/DOM.pm  view on Meta::CPAN


Otherwise, you should use C<< decode => 1 >> to instruct CSS::DOM to use
byte order marks or @charset rules.

If neither of those is present, then encoding data in the referencing
document (e.g., <link charset="..."> or an HTML document's own encoding),
if available/applicable, should be used. In this case, you should use the
C<< encoding_hint >> option, so that CSS::DOM has something to fall back
to.

If you use C<< decode => 1 >> with no encoding hint, and no BOM or @charset
is to be found, UTF-8 is assumed.

=head1 SYNTAX ERRORS

The two constructors above, and also
L<C<CSS::DOM::Style::parse>|CSS::DOM::Style/parse>, set C<$@> to the empty 
string upon success. If 
they
encounter a syntax error, they set C<$@> to the error and return an object
that represents whatever was parsed up to that point.

lib/CSS/DOM/Parser.pm  view on Meta::CPAN

sub _decode { my $at; for(''.shift) {
# ~~~ Some of this is repetitive and could probably be compressed.
	require Encode;
	if(/^(\xef\xbb\xbf(\@charset "(.*?)";))/s) {
		my $enc = $3;
		my $dec = eval{Encode::decode($3, $1, 9)};
		if(defined $dec) {
			$dec =~ /^(\x{feff}?)$2\z/
				and return Encode::decode($enc,
					$1 ? substr $_, 3 : $_);
			$@ = $1?"Invalid BOM for $enc: \\xef\\xbb\\xbf"
		      	    :"\"$enc\" is encoded in ASCII but is not"
			     ." ASCII-based";
		}
	}
	elsif(/^\xef\xbb\xbf/) {
		return Encode::decode_utf8(substr $_,3);
	}
	elsif(/^(\@charset "(.*?)";)/s) {
		my $dec = eval{Encode::decode($2, $1, 9)};
		if(defined $dec) {

lib/CSS/DOM/Parser.pm  view on Meta::CPAN

	}
	elsif(
	  /^(\xfe\xff(\0\@\0c\0h\0a\0r\0s\0e\0t\0 \0"((?:\0.)*?)\0"\0;))/s
	) {
		my $enc = Encode::decode('utf16be', $3);
		my $dec = eval{Encode::decode($enc, $1, 9)};
		if(defined $dec) {
			$dec =~ /^(\x{feff}?)\@charset "$enc";\z/
				and return Encode::decode($enc,
					$1 ? substr $_, 2 : $_);
			$@ = $1?"Invalid BOM for $enc: \\xfe\xff"
		      	    :"\"$enc\" is encoded in UCS-2 but is not"
			     ." UCS-2-based";
		}
	}
	elsif(
	  /^(\0\@\0c\0h\0a\0r\0s\0e\0t\0 \0"((?:\0.)*?)\0"\0;)/s
	) {
		my $origenc = my $enc = Encode::decode('utf16be', $2);
		my $dec = eval{Encode::decode($enc, $1, 9)};
		defined $dec or $dec

lib/CSS/DOM/Parser.pm  view on Meta::CPAN

	}
	elsif(
	  /^(\xff\xfe(\@\0c\0h\0a\0r\0s\0e\0t\0 \0"\0((?:.\0)*?)"\0;\0))/s
	) {
		my $enc = Encode::decode('utf16le', $3);
		my $dec = eval{Encode::decode($enc, $1, 9)};
		if(defined $dec) {
			$dec =~ /^(\x{feff}?)\@charset "$enc";\z/
				and return Encode::decode($enc,
					$1 ? substr $_, 2 : $_);
			$@ = $1?"Invalid BOM for $enc: \\xfe\xff"
		      	    :"\"$enc\" is encoded in UCS-2-LE but is not"
			     ." UCS-2-LE-based";
		}
	}
	elsif(
	  /^(\@\0c\0h\0a\0r\0s\0e\0t\0 \0"\0((?:.\0)*?)"\0;\0)/s
	) {
		my $origenc = my $enc = Encode::decode('utf16le', $2);
		my $dec = eval{Encode::decode($enc, $1, 9)};
		!defined $dec || $dec !~ /^\@/ and $dec

lib/CSS/DOM/Parser.pm  view on Meta::CPAN

	elsif(
	  /^(\0\0\xfe\xff(\0{3}\@\0{3}c\0{3}h\0{3}a\0{3}r\0{3}s\0{3}e\0{3}t
	     \0{3}\ \0{3}"((?:\0{3}.)*?)\0{3}"\0{3};))/sx
	) {
		my $enc = Encode::decode('utf32be', $3);
		my $dec = eval{Encode::decode($enc, $1, 9)};
		if(defined $dec) {
			$dec =~ /^(\x{feff}?)\@charset "$enc";\z/
				and return Encode::decode($enc,
					$1 ? substr $_, 2 : $_);
			$@ = $1?"Invalid BOM for $enc: \\xfe\xff"
		      	    :"\"$enc\" is encoded in UTF-32-BE but is not"
			     ." UTF-32-BE-based";
		}
	}
	elsif(
	  /^(\0{3}\@\0{3}c\0{3}h\0{3}a\0{3}r\0{3}s\0{3}e\0{3}t
	     \0{3}\ \0{3}"((?:\0{3}.)*?)\0{3}"\0{3};)/sx
	) {
		my $origenc = my $enc = Encode::decode('utf32be', $2);
		my $dec = eval{Encode::decode($enc, $1, 9)};

lib/CSS/DOM/Parser.pm  view on Meta::CPAN

	elsif(
	  /^(\xff\xfe\0\0(\@\0{3}c\0{3}h\0{3}a\0{3}r\0{3}s\0{3}e\0{3}t
	     \0{3}\ \0{3}"\0{3}((?:.\0{3})*?)"\0{3};\0{3}))/sx
	) {
		my $enc = Encode::decode('utf32le', $3);
		my $dec = eval{Encode::decode($enc, $1, 9)};
		if(defined $dec) {
			$dec =~ /^(\x{feff}?)\@charset "$enc";\z/
				and return Encode::decode($enc,
					$1 ? substr $_, 2 : $_);
			$@ = $1?"Invalid BOM for $enc: \\xfe\xff"
		      	    :"\"$enc\" is encoded in UTF-32-LE but is not"
			     ." UTF-32-LE-based";
		}
	}
	elsif(
	  /^(\@\0{3}c\0{3}h\0{3}a\0{3}r\0{3}s\0{3}e\0{3}t
	     \0{3}\ \0{3}"\0{3}((?:.\0{3})*?)"\0{3};\0{3})/sx
	) {
		my $origenc = my $enc = Encode::decode('utf32le', $2);
		my $dec = eval{Encode::decode($enc, $1, 9)};



( run in 0.499 second using v1.01-cache-2.11-cpan-e9daa2b36ef )