File-BOM
view release on metacpan or search on metacpan
lib/File/BOM.pm view on Meta::CPAN
UTF-32LE
)
);
Readonly %enc2bom => (
reverse(%bom2enc),
map { $_ => encode($_, "\x{feff}") } qw(
UCS-2
iso-10646-1
utf8
)
);
{
local $" = '|';
my @bombs = sort { length $b <=> length $a } keys %bom2enc;
Readonly $MAX_BOM_LENGTH => length $bombs[0];
Readonly $bom_re => qr/^(@bombs)/o;
}
=head1 FUNCTIONS
=head2 open_bom
$encoding = open_bom(HANDLE, $filename, $default_mode)
($encoding, $spill) = open_bom(HANDLE, $filename, $default_mode)
opens HANDLE for reading on $filename, setting the mode to the appropriate
encoding for the BOM stored in the file.
On failure, a fatal error is raised, see the DIAGNOSTICS section for details on
how to catch these. This is in order to allow the return value(s) to be used for
other purposes.
If the file doesn't contain a BOM, $default_mode is used instead. Hence:
open_bom(FH, 'my_file.txt', ':utf8')
Opens my_file.txt for reading in an appropriate encoding found from the BOM in
that file, or as a UTF-8 file if none is found.
In the absence of a $default_mode argument, the following 2 calls should be equivalent:
open_bom(FH, 'no_bom.txt');
open(FH, '<', 'no_bom.txt');
If an undefined value is passed as the handle, a symbol will be generated for it
like open() does:
# create filehandle on the fly
$enc = open_bom(my $fh, $filename, ':utf8');
$line = <$fh>;
The filehandle will be cued up to read after the BOM. Unseekable files (e.g.
fifos) will cause croaking, unless called in list context to catch spillage
from the handle. Any spillage will be automatically decoded from the encoding,
if found.
e.g.
# croak if my_socket is unseekable
open_bom(FH, 'my_socket');
# keep spillage if my_socket is unseekable
($encoding, $spillage) = open_bom(FH, 'my_socket');
# discard any spillage from open_bom
($encoding) = open_bom(FH, 'my_socket');
=cut
sub open_bom (*$;$) {
my($fh, $filename, $mode) = @_;
if (defined $fh) {
$fh = qualify_to_ref($fh, caller);
}
else {
$fh = $_[0] = gensym();
}
my $enc;
my $spill = '';
open($fh, '<', $filename)
or croak "Couldn't read '$filename': $!";
if (wantarray) {
($enc, $spill) = get_encoding_from_filehandle($fh);
}
else {
$enc = get_encoding_from_filehandle($fh);
}
if ($enc) {
$mode = ":encoding($enc)";
$spill = decode($enc, $spill, FB_CROAK) if $spill;
}
if ($mode) {
binmode($fh, $mode)
or croak "Couldn't set binmode of handle opened on '$filename' "
. "to '$mode': $!";
}
return wantarray ? ($enc, $spill) : $enc;
}
=head2 defuse
$enc = defuse(FH);
($enc, $spill) = defuse(FH);
FH should be a filehandle opened for reading, it will have the relevant encoding
layer pushed onto it be binmode if a BOM is found. Spillage should be Unicode,
( run in 1.581 second using v1.01-cache-2.11-cpan-e1769b4cff6 )