PPI
view release on metacpan or search on metacpan
lib/PPI/Tokenizer.pm view on Meta::CPAN
endpwent
endservent
fork
getgrent
gethostent
getlogin
getnetent
getppid
getprotoent
getpwent
getservent
setgrent
setpwent
time
times
wait
wantarray
__SUB__
);
#####################################################################
# Creation and Initialization
=pod
=head2 new $file | \@lines | \$source
The main C<new> constructor creates a new Tokenizer object. These
objects have no configuration parameters, and can only be used once,
to tokenize a single perl source file.
It takes as argument either a normal scalar containing source code,
a reference to a scalar containing source code, or a reference to an
ARRAY containing newline-terminated lines of source code.
Returns a new C<PPI::Tokenizer> object on success, or throws a
L<PPI::Exception> exception on error.
=cut
sub new {
my $class = ref($_[0]) || $_[0];
# Create the empty tokenizer struct
my $self = bless {
# Source code
source => undef,
source_bytes => undef,
document => undef,
# Line buffer
line => undef,
line_length => undef,
line_cursor => undef,
line_count => 0,
# Parse state
token => undef,
class => 'PPI::Token::BOM',
zone => 'PPI::Token::Whitespace',
feature_set => undef,
# Output token buffer
tokens => [],
token_cursor => 0,
token_eof => 0,
# Perl 6 blocks
perl6 => [],
}, $class;
if ( ! ref $_[1] ) {
my $source = PPI::Util::_slurp($_[1]);
PPI::Exception->throw("Tokenizer failed to open file: $source")
if not ref $source;
$self->{source} = $$source;
} elsif ( _SCALAR0($_[1]) ) {
PPI::Exception->throw("Did not pass a string: ${$_[1]}")
if _SCALAR0( $self->{source} = ${$_[1]} );
} elsif ( _ARRAY0($_[1]) ) {
$self->{source} = join '', map "$_\n", @{$_[1]};
} else {
# We don't support whatever this is
PPI::Exception->throw(ref($_[1]) . " is not supported as a source provider");
}
# We can't handle a null string
$self->{source_bytes} = length $self->{source};
if ( $self->{source_bytes} ) {
# Split on local newlines
$self->{source} =~ s/(?:\015{1,2}\012|\015|\012)/\n/g;
$self->{source} = [ split /(?<=\n)/, $self->{source} ];
} else {
$self->{source} = [ ];
}
### EVIL
# I'm explaining this earlier than I should so you can understand
# why I'm about to do something that looks very strange. There's
# a problem with the Tokenizer, in that tokens tend to change
# classes as each letter is added, but they don't get allocated
# their definite final class until the "end" of the token, the
# detection of which occurs in about a hundred different places,
# all through various crufty code (that triples the speed).
#
# However, in general, this does not apply to tokens in which a
# whitespace character is valid, such as comments, whitespace and
# big strings.
#
# So what we do is add a space to the end of the source. This
# triggers normal "end of token" functionality for all cases. Then,
# once the tokenizer hits end of file, it examines the last token to
# manually either remove the ' ' token, or chop it off the end of
# a longer one in which the space would be valid.
if ( List::Util::any { /^__(?:DATA|END)__\s*$/ } @{$self->{source}} ) {
( run in 0.859 second using v1.01-cache-2.11-cpan-ceb78f64989 )