String-BOM

 view release on metacpan or  search on metacpan

Build.PL  view on Meta::CPAN

use strict;
use warnings;
use Module::Build;

my $builder = Module::Build->new(
    module_name         => 'String::BOM',
    license             => 'perl',
    dist_author         => 'Daniel Muey <http://drmuey.com/cpan_contact.pl>',
    dist_version_from   => 'lib/String/BOM.pm',
    requires => {
        'File::Slurp' => 0,
        'Test::More' => 0,
        'version'    => 0,
    },
    add_to_cleanup      => [ 'String-BOM-*' ],
);

$builder->create_build_script();

Changes  view on Meta::CPAN

Revision history for String-BOM

0.3  Thu Aug  5 22:14:15 2010
    - fixed the number of tests to skip() to prevent some systems from erroneously failing tests that should have been skipped
    - added dependency on File::Slurp for tests
    
0.2  Thu Jul 29 22:04:20 2010
    fixed test mis-logic
    removed utf8 from POD to avoid corruption in display
    changed editing to perl's in-place edit
    Added support for preserving the original file

MANIFEST  view on Meta::CPAN

Build.PL
Changes
MANIFEST
Makefile.PL
README
lib/String/BOM.pm
t/00.load.t
t/perlcritic.t
t/pod-coverage.t
t/pod.t
META.yml                                 Module meta-data (added by MakeMaker)

META.yml  view on Meta::CPAN

--- #YAML:1.0
name:                String-BOM
version:             0.3
abstract:            simple utilities to check for a BOM and strip a BOM
license:             ~
author:              
    - Daniel Muey <http://drmuey.com/cpan_contact.pl>
generated_by:        ExtUtils::MakeMaker version 6.42
distribution_type:   module
requires:     
    File::Slurp:                   0
    Test::More:                    0
    version:                       0
meta-spec:

Makefile.PL  view on Meta::CPAN

use strict;
use warnings;
use ExtUtils::MakeMaker;

WriteMakefile(
    NAME                => 'String::BOM',
    AUTHOR              => 'Daniel Muey <http://drmuey.com/cpan_contact.pl>',
    VERSION_FROM        => 'lib/String/BOM.pm',
    ABSTRACT_FROM       => 'lib/String/BOM.pm',
    PL_FILES            => {},
    PREREQ_PM => {
        'File::Slurp' => 0,
        'Test::More' => 0,
        'version'    => 0,
    },
    dist                => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', },
    clean               => { FILES => 'String-BOM-*' },
);

README  view on Meta::CPAN

String-BOM version 0.3

DOCUMENTATION

See POD for documentation.

INSTALLATION

To install this module, run the following commands:

	perl Makefile.PL

lib/String/BOM.pm  view on Meta::CPAN

package String::BOM;

# use warnings;
# use strict;

$String::BOM::VERSION = '0.3';

# http://www.unicode.org/faq/utf_bom.html#BOM
# http://search.cpan.org/perldoc?PPI::Token::BOM
%String::BOM::bom_types = (
    "\x00\x00\xfe\xff" => 'UTF-32',
    "\xff\xfe\x00\x00" => 'UTF-32',
    "\xfe\xff"         => 'UTF-16',
    "\xff\xfe"         => 'UTF-16',
    "\xef\xbb\xbf"     => 'UTF-8',
);

sub string_has_bom {
    if ( $_[0] =~ m/^(\x00\x00\xfe\xff|\xff\xfe\x00\x00|\xfe\xff|\xff\xfe|\xef\xbb\xbf)/s ) {
        return $String::BOM::bom_types{$1};
    }
    return;
}

sub strip_bom_from_string {
    my $copy = $_[0];    # Modification of a read-only value attempted at ...
    $copy =~ s/^(\x00\x00\xfe\xff|\xff\xfe\x00\x00|\xfe\xff|\xff\xfe|\xef\xbb\xbf)//s;
    return $copy;
}

lib/String/BOM.pm  view on Meta::CPAN

        *{"$caller\::$_"} = \&{$_};
    }
}

1;

__END__

=head1 NAME

String::BOM - simple utilities to check for a BOM and strip a BOM

=head1 VERSION

This document describes String::BOM version 0.3

=head1 SYNOPSIS

    use String::BOM qw(string_has_bom);
    
    if (my $bom = string_has_bom($string)) {
        print "According to the string's BOM it is '$bom'\n";
    }

=head1 DESCRIPTION

See if a string or file has a BOM. Remove the BOM from a string or file.

=head2 You [c|sh]ould use PPI to do this is you are looking at a perl file

Something like this modified L<PPI> sysnopsis example should detect and remove a BOM:

    use PPI;
    
    my $Document = PPI::Document->new('Module.pm');

    # Does it contain a BOM?
    if ( $Document->find_any('PPI::Token::BOM') ) {
        print "Module contains BOM!!\n";
        $Document->prune('PPI::Token::BOM');
        $Document->save('Module.pm.bom_free');
    }

=head1 INTERFACE 

All of these functions can be imported.

=head2 string_has_bom()

Takes a string and returns true (the type of BOM it is) if there is a BOM.

=head2 strip_bom_from_string()

Takes a string and returns a version with the BOM, if any, removed.

=head2 file_has_bom()

Takes a path and returns true (the type of BOM it is) if there is a BOM.

Check $! for file operation failure when it returns false.

=head2 strip_bom_from_file()

Takes a path and removes the BOM, if any, from it.

Check $! for file operation failure when it returns false.

A second argument with a true value will make it leave the original document on the file system with a .bak extension added.

Note: If the file had no BOM and was thus not edited then there is no .bak file.

=head1 DOM TYPES

The DOM data is the same as L<PPI::Token::BOM> which are taken from L<http://www.unicode.org/faq/utf_bom.html#BOM>.

=head1 DIAGNOSTICS

String::BOM throws no warnings or errors 

=head1 CONFIGURATION AND ENVIRONMENT

String::BOM requires no configuration files or environment variables.

=head1 DEPENDENCIES

None.

=head1 INCOMPATIBILITIES

None reported.

=head1 BUGS AND LIMITATIONS

t/00.load.t  view on Meta::CPAN

use Test::More tests => 85;

BEGIN {
use_ok( 'String::BOM','string_has_bom','fake' );
}

diag( "Testing String::BOM $String::BOM::VERSION" );
ok(defined &string_has_bom, 'imports fine');
ok(!defined &strip_bom_from_string, 'does not import ungiven');
ok(!defined &fake, 'doe not import non existant');

#### string tests ####

ok(string_has_bom("\x00\x00\xfe\xff miscdata") eq 'UTF-32','string_has_bom() UTF-32');
ok(string_has_bom("\xff\xfe\x00\x00 miscdata") eq 'UTF-32','string_has_bom() UTF-32');
ok(string_has_bom("\xfe\xff miscdata") eq 'UTF-16','string_has_bom() UTF-16');
ok(string_has_bom("\xff\xfe miscdata") eq 'UTF-16','string_has_bom() UTF-16');
ok(string_has_bom("\xef\xbb\xbf miscdata") eq 'UTF-8','string_has_bom() UTF-8');

ok(!String::BOM::string_has_bom("miscdata\x00\x00\xfe\xff miscdata"),'!string_has_bom() UTF-32 like');
ok(!String::BOM::string_has_bom("miscdata\xff\xfe\x00\x00 miscdata"),'!string_has_bom() UTF-32 like');
ok(!String::BOM::string_has_bom("miscdata\xfe\xff miscdata"),'!string_has_bom() UTF-16 like');
ok(!String::BOM::string_has_bom("miscdata\xff\xfe miscdata"),'!string_has_bom() UTF-16 like');
ok(!String::BOM::string_has_bom("miscdata\xef\xbb\xbf miscdata"),'!string_has_bom() UTF-8 like');

#### file tests  ####

eval "require File::Slurp;";
SKIP: {
    skip 'Please install File::Slurp', 71 if $@;
    my %files = (
        '.bom_UTF-32.1' => "\x00\x00\xfe\xff miscdata",
        '.bom_UTF-32.2' => "\xff\xfe\x00\x00 miscdata",
        '.bom_UTF-16.1' => "\xfe\xff miscdata",

t/00.load.t  view on Meta::CPAN

        '.bom_UTF-8.1' => "\xef\xbb\xbf miscdata",
    );
    for my $file (sort keys %files) {
        unlink $file, "$file.none";
        # TODO: peter out if -e either
        File::Slurp::write_file($file,$files{$file});  
        File::Slurp::write_file("$file.none","miscdata$files{$file}");  
        # TODO: pwter out if !-e either
        
        my ($name) = $file =~ m{\.bom\_(UTF-[0-9]+)\.[0-9]+};
        ok(String::BOM::file_has_bom($file) eq $name, "file_has_bom() $file");
        ok(!String::BOM::file_has_bom("$file.none"), "!file_has_bom() $file.none");
        ok(!String::BOM::file_has_bom("$file.open_will_fail"), "!file_has_bom() $file.open_will_fail");
        ok(String::BOM::strip_bom_from_file($file), "strip_bom_from_file() $file");
        ok(String::BOM::strip_bom_from_file("$file.none"), "strip_bom_from_file() $file.none");
        ok(!-e "$file.bak", ".bak file removed when changed");
        ok(!-e "$file.none.bak", "not .bak file to remove when no change");
        ok(!String::BOM::strip_bom_from_file("$file.open_will_fail"), "!strip_bom_from_file() $file.open_will_fail");
        ok(!String::BOM::file_has_bom($file), "!file_has_bom() after strip $file");
        ok(!String::BOM::file_has_bom("$file.none"), "!file_has_bom() (still) after strip $file.none");
        
        File::Slurp::write_file($file,$files{$file});  
        File::Slurp::write_file("$file.none","miscdata$files{$file}");
        ok(String::BOM::strip_bom_from_file($file,1), "strip_bom_from_file() $file");
        ok(String::BOM::strip_bom_from_file("$file.none",1), "strip_bom_from_file() $file.none");
        ok(-e "$file.bak", ".bak file preserved when requested when changed");
        ok(!-e "$file.none.bak", "there is no .bak to preserve when requested when there is no change");
    }
    
    ok(!String::BOM::strip_bom_from_file("asfvavadf") && $!, "strip_bom_from_file() !-e file");
};



( run in 0.367 second using v1.01-cache-2.11-cpan-131fc08a04b )