PPI

 view release on metacpan or  search on metacpan

lib/PPI/Tokenizer.pm  view on Meta::CPAN

		endpwent
		endservent
		fork
		getgrent
		gethostent
		getlogin
		getnetent
		getppid
		getprotoent
		getpwent
		getservent
		setgrent
		setpwent
		time
		times
		wait
		wantarray
		__SUB__
);



#####################################################################
# Creation and Initialization

=pod

=head2 new $file | \@lines | \$source

The main C<new> constructor creates a new Tokenizer object. These
objects have no configuration parameters, and can only be used once,
to tokenize a single perl source file.

It takes as argument either a normal scalar containing source code,
a reference to a scalar containing source code, or a reference to an
ARRAY containing newline-terminated lines of source code.

Returns a new C<PPI::Tokenizer> object on success, or throws a
L<PPI::Exception> exception on error.

=cut

sub new {
	my $class = ref($_[0]) || $_[0];

	# Create the empty tokenizer struct
	my $self = bless {
		# Source code
		source       => undef,
		source_bytes => undef,
		document     => undef,

		# Line buffer
		line         => undef,
		line_length  => undef,
		line_cursor  => undef,
		line_count   => 0,

		# Parse state
		token        => undef,
		class        => 'PPI::Token::BOM',
		zone         => 'PPI::Token::Whitespace',
		feature_set  => undef,

		# Output token buffer
		tokens       => [],
		token_cursor => 0,
		token_eof    => 0,

		# Perl 6 blocks
		perl6        => [],
	}, $class;

	if ( ! ref $_[1] ) {
		my $source = PPI::Util::_slurp($_[1]);
		PPI::Exception->throw("Tokenizer failed to open file: $source")
		  if not ref $source;
		$self->{source} = $$source;

	} elsif ( _SCALAR0($_[1]) ) {
		PPI::Exception->throw("Did not pass a string: ${$_[1]}")
			if _SCALAR0( $self->{source} = ${$_[1]} );

	} elsif ( _ARRAY0($_[1]) ) {
		$self->{source} = join '', map "$_\n", @{$_[1]};

	} else {
		# We don't support whatever this is
		PPI::Exception->throw(ref($_[1]) . " is not supported as a source provider");
	}

	# We can't handle a null string
	$self->{source_bytes} = length $self->{source};
	if ( $self->{source_bytes} ) {
		# Split on local newlines
		$self->{source} =~ s/(?:\015{1,2}\012|\015|\012)/\n/g;
		$self->{source} = [ split /(?<=\n)/, $self->{source} ];

	} else {
		$self->{source} = [ ];
	}

	### EVIL
	# I'm explaining this earlier than I should so you can understand
	# why I'm about to do something that looks very strange. There's
	# a problem with the Tokenizer, in that tokens tend to change
	# classes as each letter is added, but they don't get allocated
	# their definite final class until the "end" of the token, the
	# detection of which occurs in about a hundred different places,
	# all through various crufty code (that triples the speed).
	#
	# However, in general, this does not apply to tokens in which a
	# whitespace character is valid, such as comments, whitespace and
	# big strings.
	#
	# So what we do is add a space to the end of the source. This
	# triggers normal "end of token" functionality for all cases. Then,
	# once the tokenizer hits end of file, it examines the last token to
	# manually either remove the ' ' token, or chop it off the end of
	# a longer one in which the space would be valid.
	if ( List::Util::any { /^__(?:DATA|END)__\s*$/ } @{$self->{source}} ) {



( run in 0.859 second using v1.01-cache-2.11-cpan-ceb78f64989 )