BOM results from the CPAN

PPI

view release on metacpan or search on metacpan

	- Now supports keeping track of line numbers and file names as
	  affected by the #line directive.
	- Now supports UNITCHECK blocks.
	- Statement::Include::module_version() implemented.
	- Statement::Include::arguments() implemented.
	- Statement::Variable::symbols() implemented.
	- Token::QuoteLike::Words::literal() implemented.
	- Token::Quote::Double::simplify() fixed.
	- Element line_number(), column_number(), visual_column_number(),
	  logical_line_number(), and logical_filename() implemented.
	- Support for Unicode byte order marks (PPI::Token::BOM) added.
	- Token::Word::method_call() implemented.
	- Element::descendant_of() and Element::ancestor_of() implemented.
	- Statement::specialized() implemented.
	- Now can handle files named "0".
	  (Perl::Critic got a complaint about this)
	- foreach loop variables can be declared using "our".
	- Much more comprehensive testing of compound statement detection.

1.204_01 Sun 18 May 2008
	Summary:

MANIFEST view on Meta::CPAN

lib/PPI/Structure/For.pm
lib/PPI/Structure/Given.pm
lib/PPI/Structure/List.pm
lib/PPI/Structure/Signature.pm
lib/PPI/Structure/Subscript.pm
lib/PPI/Structure/Unknown.pm
lib/PPI/Structure/When.pm
lib/PPI/Token.pm
lib/PPI/Token/ArrayIndex.pm
lib/PPI/Token/Attribute.pm
lib/PPI/Token/BOM.pm
lib/PPI/Token/Cast.pm
lib/PPI/Token/Comment.pm
lib/PPI/Token/DashedWord.pm
lib/PPI/Token/Data.pm
lib/PPI/Token/End.pm
lib/PPI/Token/HereDoc.pm
lib/PPI/Token/Label.pm
lib/PPI/Token/Magic.pm
lib/PPI/Token/Number.pm
lib/PPI/Token/Number/Binary.pm

META.json view on Meta::CPAN

         "version" : "1.283"
      },
      "PPI::Token::ArrayIndex" : {
         "file" : "lib/PPI/Token/ArrayIndex.pm",
         "version" : "1.283"
      },
      "PPI::Token::Attribute" : {
         "file" : "lib/PPI/Token/Attribute.pm",
         "version" : "1.283"
      },
      "PPI::Token::BOM" : {
         "file" : "lib/PPI/Token/BOM.pm",
         "version" : "1.283"
      },
      "PPI::Token::Cast" : {
         "file" : "lib/PPI/Token/Cast.pm",
         "version" : "1.283"
      },
      "PPI::Token::Comment" : {
         "file" : "lib/PPI/Token/Comment.pm",
         "version" : "1.283"
      },

META.yml view on Meta::CPAN

    version: '1.283'
  PPI::Token:
    file: lib/PPI/Token.pm
    version: '1.283'
  PPI::Token::ArrayIndex:
    file: lib/PPI/Token/ArrayIndex.pm
    version: '1.283'
  PPI::Token::Attribute:
    file: lib/PPI/Token/Attribute.pm
    version: '1.283'
  PPI::Token::BOM:
    file: lib/PPI/Token/BOM.pm
    version: '1.283'
  PPI::Token::Cast:
    file: lib/PPI/Token/Cast.pm
    version: '1.283'
  PPI::Token::Comment:
    file: lib/PPI/Token/Comment.pm
    version: '1.283'
  PPI::Token::DashedWord:
    file: lib/PPI/Token/DashedWord.pm
    version: '1.283'

lib/PPI/Token.pm view on Meta::CPAN

use PPI::Exception ();

our $VERSION = '1.283';

our @ISA = 'PPI::Element';

# We don't load the abstracts, they are loaded
# as part of the inheritance process.

# Load the token classes
use PPI::Token::BOM                   ();
use PPI::Token::Whitespace            ();
use PPI::Token::Comment               ();
use PPI::Token::Pod                   ();
use PPI::Token::Number                ();
use PPI::Token::Number::Binary        ();
use PPI::Token::Number::Octal         ();
use PPI::Token::Number::Hex           ();
use PPI::Token::Number::Float         ();
use PPI::Token::Number::Exp           ();
use PPI::Token::Number::Version       ();

lib/PPI/Token/BOM.pm view on Meta::CPAN

package PPI::Token::BOM;

=pod

=head1 NAME

PPI::Token::BOM - Tokens representing Unicode byte order marks

=head1 INHERITANCE

  PPI::Token::BOM
  isa PPI::Token
      isa PPI::Element

=head1 DESCRIPTION

This is a special token in that it can only occur at the beginning of
documents.  If a BOM byte mark occurs elsewhere in a file, it should
be treated as L<PPI::Token::Whitespace>.  We recognize the byte order
marks identified at this URL:
L<https://web.archive.org/web/https://www.unicode.org/faq/utf_bom.html#BOM>

    UTF-32, big-endian     00 00 FE FF
    UTF-32, little-endian  FF FE 00 00
    UTF-16, big-endian     FE FF
    UTF-16, little-endian  FF FE
    UTF-8                  EF BB BF

Note that as of this writing, PPI only has support for UTF-8
(namely, in POD and strings) and no support for UTF-16 or UTF-32.  We
support the BOMs of the latter two for completeness only.

The BOM is considered non-significant, like white space.

=head1 METHODS

There are no additional methods beyond those provided by the parent
L<PPI::Token> and L<PPI::Element> classes.

=cut

use strict;
use PPI::Token ();

lib/PPI/Token/BOM.pm view on Meta::CPAN

		\xfe\xff         |  # UTF-16, big-endian
		\xff\xfe         |  # UTF-16, little-endian
		\xef\xbb\xbf)       # UTF-8
	    /xs) {
	   my $bom = $1;

	   if ($bom_types{$bom} ne 'UTF-8') {
	      return $t->_error("$bom_types{$bom} is not supported");
	   }

	   $t->_new_token('BOM', $bom) or return undef;
	   $t->{line_cursor} += length $bom;
	}

	# Continue just as if there was no BOM
	$t->{class} = 'PPI::Token::Whitespace';
	return $t->{class}->__TOKENIZER__on_line_start($t);
}

1;

=pod

=head1 SUPPORT

lib/PPI/Tokenizer.pm view on Meta::CPAN

		document     => undef,

		# Line buffer
		line         => undef,
		line_length  => undef,
		line_cursor  => undef,
		line_count   => 0,

		# Parse state
		token        => undef,
		class        => 'PPI::Token::BOM',
		zone         => 'PPI::Token::Whitespace',

		# Output token buffer
		tokens       => [],
		token_cursor => 0,
		token_eof    => 0,

		# Perl 6 blocks
		perl6        => [],
	}, $class;

t/14_charsets.t view on Meta::CPAN

	unless ( "Ã¤" =~ /\w/ ) {
		skip( "Unicode-incompatible locale in use (apparently)", 11 );
	}

	# Notorious test case.
	# In 1.203 this test case causes a memory leaking infinite loop
	# that consumes all available memory and then crashes the process.
	good_ok( 'ä¸€();', "Function with Chinese characters" );

	# Byte order mark with no unicode content
	good_ok( "\xef\xbb\xbf1;\n", "BOM without actual unicode content" );

	# Testing accented characters in UTF-8
	good_ok( 'sub func { }',           "Parsed code without accented chars" );
	good_ok( 'rÃ¤tselhaft();',          "Function with umlaut"               );
	good_ok( 'Ã¤tselhaft()',            "Starting with umlaut"               );
	good_ok( '"rÃ¤tselhaft"',           "In double quotes"                   );
	good_ok( "'rÃ¤tselhaft'",           "In single quotes"                   );
	good_ok( 'sub func { s/a/Ã¤/g; }',  "Regex with umlaut"                  );
	good_ok( 'sub func { $Ã¤=1; }',     "Variable with umlaut"               );
	good_ok( '$ä¸€ = "å£¹";',              "Variables with Chinese characters"  );

t/data/26_bom/utf8.dump view on Meta::CPAN

PPI::Document
  PPI::Token::BOM  	'ï»¿'
  PPI::Statement
    PPI::Token::Word  	'print'
    PPI::Token::Whitespace  	' '
    PPI::Token::Number  	'1'
    PPI::Token::Structure  	';'
  PPI::Token::Whitespace  	'\n'

xt/author/00-compile.t view on Meta::CPAN

    'PPI/Structure/For.pm',
    'PPI/Structure/Given.pm',
    'PPI/Structure/List.pm',
    'PPI/Structure/Signature.pm',
    'PPI/Structure/Subscript.pm',
    'PPI/Structure/Unknown.pm',
    'PPI/Structure/When.pm',
    'PPI/Token.pm',
    'PPI/Token/ArrayIndex.pm',
    'PPI/Token/Attribute.pm',
    'PPI/Token/BOM.pm',
    'PPI/Token/Cast.pm',
    'PPI/Token/Comment.pm',
    'PPI/Token/DashedWord.pm',
    'PPI/Token/Data.pm',
    'PPI/Token/End.pm',
    'PPI/Token/HereDoc.pm',
    'PPI/Token/Label.pm',
    'PPI/Token/Magic.pm',
    'PPI/Token/Number.pm',
    'PPI/Token/Number/Binary.pm',

( run in 0.857 second using v1.01-cache-2.11-cpan-e9daa2b36ef )