String-BOM
view release on metacpan or search on metacpan
use strict;
use warnings;
use Module::Build;
my $builder = Module::Build->new(
module_name => 'String::BOM',
license => 'perl',
dist_author => 'Daniel Muey <http://drmuey.com/cpan_contact.pl>',
dist_version_from => 'lib/String/BOM.pm',
requires => {
'File::Slurp' => 0,
'Test::More' => 0,
'version' => 0,
},
add_to_cleanup => [ 'String-BOM-*' ],
);
$builder->create_build_script();
Revision history for String-BOM
0.3 Thu Aug 5 22:14:15 2010
- fixed the number of tests to skip() to prevent some systems from erroneously failing tests that should have been skipped
- added dependency on File::Slurp for tests
0.2 Thu Jul 29 22:04:20 2010
fixed test mis-logic
removed utf8 from POD to avoid corruption in display
changed editing to perl's in-place edit
Added support for preserving the original file
Build.PL
Changes
MANIFEST
Makefile.PL
README
lib/String/BOM.pm
t/00.load.t
t/perlcritic.t
t/pod-coverage.t
t/pod.t
META.yml Module meta-data (added by MakeMaker)
--- #YAML:1.0
name: String-BOM
version: 0.3
abstract: simple utilities to check for a BOM and strip a BOM
license: ~
author:
- Daniel Muey <http://drmuey.com/cpan_contact.pl>
generated_by: ExtUtils::MakeMaker version 6.42
distribution_type: module
requires:
File::Slurp: 0
Test::More: 0
version: 0
meta-spec:
Makefile.PL view on Meta::CPAN
use strict;
use warnings;
use ExtUtils::MakeMaker;
WriteMakefile(
NAME => 'String::BOM',
AUTHOR => 'Daniel Muey <http://drmuey.com/cpan_contact.pl>',
VERSION_FROM => 'lib/String/BOM.pm',
ABSTRACT_FROM => 'lib/String/BOM.pm',
PL_FILES => {},
PREREQ_PM => {
'File::Slurp' => 0,
'Test::More' => 0,
'version' => 0,
},
dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', },
clean => { FILES => 'String-BOM-*' },
);
String-BOM version 0.3
DOCUMENTATION
See POD for documentation.
INSTALLATION
To install this module, run the following commands:
perl Makefile.PL
lib/String/BOM.pm view on Meta::CPAN
package String::BOM;
# use warnings;
# use strict;
$String::BOM::VERSION = '0.3';
# http://www.unicode.org/faq/utf_bom.html#BOM
# http://search.cpan.org/perldoc?PPI::Token::BOM
%String::BOM::bom_types = (
"\x00\x00\xfe\xff" => 'UTF-32',
"\xff\xfe\x00\x00" => 'UTF-32',
"\xfe\xff" => 'UTF-16',
"\xff\xfe" => 'UTF-16',
"\xef\xbb\xbf" => 'UTF-8',
);
sub string_has_bom {
if ( $_[0] =~ m/^(\x00\x00\xfe\xff|\xff\xfe\x00\x00|\xfe\xff|\xff\xfe|\xef\xbb\xbf)/s ) {
return $String::BOM::bom_types{$1};
}
return;
}
sub strip_bom_from_string {
my $copy = $_[0]; # Modification of a read-only value attempted at ...
$copy =~ s/^(\x00\x00\xfe\xff|\xff\xfe\x00\x00|\xfe\xff|\xff\xfe|\xef\xbb\xbf)//s;
return $copy;
}
lib/String/BOM.pm view on Meta::CPAN
*{"$caller\::$_"} = \&{$_};
}
}
1;
__END__
=head1 NAME
String::BOM - simple utilities to check for a BOM and strip a BOM
=head1 VERSION
This document describes String::BOM version 0.3
=head1 SYNOPSIS
use String::BOM qw(string_has_bom);
if (my $bom = string_has_bom($string)) {
print "According to the string's BOM it is '$bom'\n";
}
=head1 DESCRIPTION
See if a string or file has a BOM. Remove the BOM from a string or file.
=head2 You [c|sh]ould use PPI to do this is you are looking at a perl file
Something like this modified L<PPI> sysnopsis example should detect and remove a BOM:
use PPI;
my $Document = PPI::Document->new('Module.pm');
# Does it contain a BOM?
if ( $Document->find_any('PPI::Token::BOM') ) {
print "Module contains BOM!!\n";
$Document->prune('PPI::Token::BOM');
$Document->save('Module.pm.bom_free');
}
=head1 INTERFACE
All of these functions can be imported.
=head2 string_has_bom()
Takes a string and returns true (the type of BOM it is) if there is a BOM.
=head2 strip_bom_from_string()
Takes a string and returns a version with the BOM, if any, removed.
=head2 file_has_bom()
Takes a path and returns true (the type of BOM it is) if there is a BOM.
Check $! for file operation failure when it returns false.
=head2 strip_bom_from_file()
Takes a path and removes the BOM, if any, from it.
Check $! for file operation failure when it returns false.
A second argument with a true value will make it leave the original document on the file system with a .bak extension added.
Note: If the file had no BOM and was thus not edited then there is no .bak file.
=head1 DOM TYPES
The DOM data is the same as L<PPI::Token::BOM> which are taken from L<http://www.unicode.org/faq/utf_bom.html#BOM>.
=head1 DIAGNOSTICS
String::BOM throws no warnings or errors
=head1 CONFIGURATION AND ENVIRONMENT
String::BOM requires no configuration files or environment variables.
=head1 DEPENDENCIES
None.
=head1 INCOMPATIBILITIES
None reported.
=head1 BUGS AND LIMITATIONS
t/00.load.t view on Meta::CPAN
use Test::More tests => 85;
BEGIN {
use_ok( 'String::BOM','string_has_bom','fake' );
}
diag( "Testing String::BOM $String::BOM::VERSION" );
ok(defined &string_has_bom, 'imports fine');
ok(!defined &strip_bom_from_string, 'does not import ungiven');
ok(!defined &fake, 'doe not import non existant');
#### string tests ####
ok(string_has_bom("\x00\x00\xfe\xff miscdata") eq 'UTF-32','string_has_bom() UTF-32');
ok(string_has_bom("\xff\xfe\x00\x00 miscdata") eq 'UTF-32','string_has_bom() UTF-32');
ok(string_has_bom("\xfe\xff miscdata") eq 'UTF-16','string_has_bom() UTF-16');
ok(string_has_bom("\xff\xfe miscdata") eq 'UTF-16','string_has_bom() UTF-16');
ok(string_has_bom("\xef\xbb\xbf miscdata") eq 'UTF-8','string_has_bom() UTF-8');
ok(!String::BOM::string_has_bom("miscdata\x00\x00\xfe\xff miscdata"),'!string_has_bom() UTF-32 like');
ok(!String::BOM::string_has_bom("miscdata\xff\xfe\x00\x00 miscdata"),'!string_has_bom() UTF-32 like');
ok(!String::BOM::string_has_bom("miscdata\xfe\xff miscdata"),'!string_has_bom() UTF-16 like');
ok(!String::BOM::string_has_bom("miscdata\xff\xfe miscdata"),'!string_has_bom() UTF-16 like');
ok(!String::BOM::string_has_bom("miscdata\xef\xbb\xbf miscdata"),'!string_has_bom() UTF-8 like');
#### file tests ####
eval "require File::Slurp;";
SKIP: {
skip 'Please install File::Slurp', 71 if $@;
my %files = (
'.bom_UTF-32.1' => "\x00\x00\xfe\xff miscdata",
'.bom_UTF-32.2' => "\xff\xfe\x00\x00 miscdata",
'.bom_UTF-16.1' => "\xfe\xff miscdata",
t/00.load.t view on Meta::CPAN
'.bom_UTF-8.1' => "\xef\xbb\xbf miscdata",
);
for my $file (sort keys %files) {
unlink $file, "$file.none";
# TODO: peter out if -e either
File::Slurp::write_file($file,$files{$file});
File::Slurp::write_file("$file.none","miscdata$files{$file}");
# TODO: pwter out if !-e either
my ($name) = $file =~ m{\.bom\_(UTF-[0-9]+)\.[0-9]+};
ok(String::BOM::file_has_bom($file) eq $name, "file_has_bom() $file");
ok(!String::BOM::file_has_bom("$file.none"), "!file_has_bom() $file.none");
ok(!String::BOM::file_has_bom("$file.open_will_fail"), "!file_has_bom() $file.open_will_fail");
ok(String::BOM::strip_bom_from_file($file), "strip_bom_from_file() $file");
ok(String::BOM::strip_bom_from_file("$file.none"), "strip_bom_from_file() $file.none");
ok(!-e "$file.bak", ".bak file removed when changed");
ok(!-e "$file.none.bak", "not .bak file to remove when no change");
ok(!String::BOM::strip_bom_from_file("$file.open_will_fail"), "!strip_bom_from_file() $file.open_will_fail");
ok(!String::BOM::file_has_bom($file), "!file_has_bom() after strip $file");
ok(!String::BOM::file_has_bom("$file.none"), "!file_has_bom() (still) after strip $file.none");
File::Slurp::write_file($file,$files{$file});
File::Slurp::write_file("$file.none","miscdata$files{$file}");
ok(String::BOM::strip_bom_from_file($file,1), "strip_bom_from_file() $file");
ok(String::BOM::strip_bom_from_file("$file.none",1), "strip_bom_from_file() $file.none");
ok(-e "$file.bak", ".bak file preserved when requested when changed");
ok(!-e "$file.none.bak", "there is no .bak to preserve when requested when there is no change");
}
ok(!String::BOM::strip_bom_from_file("asfvavadf") && $!, "strip_bom_from_file() !-e file");
};
( run in 0.367 second using v1.01-cache-2.11-cpan-131fc08a04b )