PPI
view release on metacpan or search on metacpan
- Now supports keeping track of line numbers and file names as
affected by the #line directive.
- Now supports UNITCHECK blocks.
- Statement::Include::module_version() implemented.
- Statement::Include::arguments() implemented.
- Statement::Variable::symbols() implemented.
- Token::QuoteLike::Words::literal() implemented.
- Token::Quote::Double::simplify() fixed.
- Element line_number(), column_number(), visual_column_number(),
logical_line_number(), and logical_filename() implemented.
- Support for Unicode byte order marks (PPI::Token::BOM) added.
- Token::Word::method_call() implemented.
- Element::descendant_of() and Element::ancestor_of() implemented.
- Statement::specialized() implemented.
- Now can handle files named "0".
(Perl::Critic got a complaint about this)
- foreach loop variables can be declared using "our".
- Much more comprehensive testing of compound statement detection.
1.204_01 Sun 18 May 2008
Summary:
lib/PPI/Structure/For.pm
lib/PPI/Structure/Given.pm
lib/PPI/Structure/List.pm
lib/PPI/Structure/Signature.pm
lib/PPI/Structure/Subscript.pm
lib/PPI/Structure/Unknown.pm
lib/PPI/Structure/When.pm
lib/PPI/Token.pm
lib/PPI/Token/ArrayIndex.pm
lib/PPI/Token/Attribute.pm
lib/PPI/Token/BOM.pm
lib/PPI/Token/Cast.pm
lib/PPI/Token/Comment.pm
lib/PPI/Token/DashedWord.pm
lib/PPI/Token/Data.pm
lib/PPI/Token/End.pm
lib/PPI/Token/HereDoc.pm
lib/PPI/Token/Label.pm
lib/PPI/Token/Magic.pm
lib/PPI/Token/Number.pm
lib/PPI/Token/Number/Binary.pm
"version" : "1.283"
},
"PPI::Token::ArrayIndex" : {
"file" : "lib/PPI/Token/ArrayIndex.pm",
"version" : "1.283"
},
"PPI::Token::Attribute" : {
"file" : "lib/PPI/Token/Attribute.pm",
"version" : "1.283"
},
"PPI::Token::BOM" : {
"file" : "lib/PPI/Token/BOM.pm",
"version" : "1.283"
},
"PPI::Token::Cast" : {
"file" : "lib/PPI/Token/Cast.pm",
"version" : "1.283"
},
"PPI::Token::Comment" : {
"file" : "lib/PPI/Token/Comment.pm",
"version" : "1.283"
},
version: '1.283'
PPI::Token:
file: lib/PPI/Token.pm
version: '1.283'
PPI::Token::ArrayIndex:
file: lib/PPI/Token/ArrayIndex.pm
version: '1.283'
PPI::Token::Attribute:
file: lib/PPI/Token/Attribute.pm
version: '1.283'
PPI::Token::BOM:
file: lib/PPI/Token/BOM.pm
version: '1.283'
PPI::Token::Cast:
file: lib/PPI/Token/Cast.pm
version: '1.283'
PPI::Token::Comment:
file: lib/PPI/Token/Comment.pm
version: '1.283'
PPI::Token::DashedWord:
file: lib/PPI/Token/DashedWord.pm
version: '1.283'
lib/PPI/Token.pm view on Meta::CPAN
use PPI::Exception ();
our $VERSION = '1.283';
our @ISA = 'PPI::Element';
# We don't load the abstracts, they are loaded
# as part of the inheritance process.
# Load the token classes
use PPI::Token::BOM ();
use PPI::Token::Whitespace ();
use PPI::Token::Comment ();
use PPI::Token::Pod ();
use PPI::Token::Number ();
use PPI::Token::Number::Binary ();
use PPI::Token::Number::Octal ();
use PPI::Token::Number::Hex ();
use PPI::Token::Number::Float ();
use PPI::Token::Number::Exp ();
use PPI::Token::Number::Version ();
lib/PPI/Token/BOM.pm view on Meta::CPAN
package PPI::Token::BOM;
=pod
=head1 NAME
PPI::Token::BOM - Tokens representing Unicode byte order marks
=head1 INHERITANCE
PPI::Token::BOM
isa PPI::Token
isa PPI::Element
=head1 DESCRIPTION
This is a special token in that it can only occur at the beginning of
documents. If a BOM byte mark occurs elsewhere in a file, it should
be treated as L<PPI::Token::Whitespace>. We recognize the byte order
marks identified at this URL:
L<https://web.archive.org/web/https://www.unicode.org/faq/utf_bom.html#BOM>
UTF-32, big-endian 00 00 FE FF
UTF-32, little-endian FF FE 00 00
UTF-16, big-endian FE FF
UTF-16, little-endian FF FE
UTF-8 EF BB BF
Note that as of this writing, PPI only has support for UTF-8
(namely, in POD and strings) and no support for UTF-16 or UTF-32. We
support the BOMs of the latter two for completeness only.
The BOM is considered non-significant, like white space.
=head1 METHODS
There are no additional methods beyond those provided by the parent
L<PPI::Token> and L<PPI::Element> classes.
=cut
use strict;
use PPI::Token ();
lib/PPI/Token/BOM.pm view on Meta::CPAN
\xfe\xff | # UTF-16, big-endian
\xff\xfe | # UTF-16, little-endian
\xef\xbb\xbf) # UTF-8
/xs) {
my $bom = $1;
if ($bom_types{$bom} ne 'UTF-8') {
return $t->_error("$bom_types{$bom} is not supported");
}
$t->_new_token('BOM', $bom) or return undef;
$t->{line_cursor} += length $bom;
}
# Continue just as if there was no BOM
$t->{class} = 'PPI::Token::Whitespace';
return $t->{class}->__TOKENIZER__on_line_start($t);
}
1;
=pod
=head1 SUPPORT
lib/PPI/Tokenizer.pm view on Meta::CPAN
document => undef,
# Line buffer
line => undef,
line_length => undef,
line_cursor => undef,
line_count => 0,
# Parse state
token => undef,
class => 'PPI::Token::BOM',
zone => 'PPI::Token::Whitespace',
# Output token buffer
tokens => [],
token_cursor => 0,
token_eof => 0,
# Perl 6 blocks
perl6 => [],
}, $class;
t/14_charsets.t view on Meta::CPAN
unless ( "ä" =~ /\w/ ) {
skip( "Unicode-incompatible locale in use (apparently)", 11 );
}
# Notorious test case.
# In 1.203 this test case causes a memory leaking infinite loop
# that consumes all available memory and then crashes the process.
good_ok( 'ä¸();', "Function with Chinese characters" );
# Byte order mark with no unicode content
good_ok( "\xef\xbb\xbf1;\n", "BOM without actual unicode content" );
# Testing accented characters in UTF-8
good_ok( 'sub func { }', "Parsed code without accented chars" );
good_ok( 'rätselhaft();', "Function with umlaut" );
good_ok( 'ätselhaft()', "Starting with umlaut" );
good_ok( '"rätselhaft"', "In double quotes" );
good_ok( "'rätselhaft'", "In single quotes" );
good_ok( 'sub func { s/a/ä/g; }', "Regex with umlaut" );
good_ok( 'sub func { $ä=1; }', "Variable with umlaut" );
good_ok( '$ä¸ = "壹";', "Variables with Chinese characters" );
t/data/26_bom/utf8.dump view on Meta::CPAN
PPI::Document
PPI::Token::BOM ''
PPI::Statement
PPI::Token::Word 'print'
PPI::Token::Whitespace ' '
PPI::Token::Number '1'
PPI::Token::Structure ';'
PPI::Token::Whitespace '\n'
xt/author/00-compile.t view on Meta::CPAN
'PPI/Structure/For.pm',
'PPI/Structure/Given.pm',
'PPI/Structure/List.pm',
'PPI/Structure/Signature.pm',
'PPI/Structure/Subscript.pm',
'PPI/Structure/Unknown.pm',
'PPI/Structure/When.pm',
'PPI/Token.pm',
'PPI/Token/ArrayIndex.pm',
'PPI/Token/Attribute.pm',
'PPI/Token/BOM.pm',
'PPI/Token/Cast.pm',
'PPI/Token/Comment.pm',
'PPI/Token/DashedWord.pm',
'PPI/Token/Data.pm',
'PPI/Token/End.pm',
'PPI/Token/HereDoc.pm',
'PPI/Token/Label.pm',
'PPI/Token/Magic.pm',
'PPI/Token/Number.pm',
'PPI/Token/Number/Binary.pm',
( run in 0.752 second using v1.01-cache-2.11-cpan-131fc08a04b )