Badger
view release on metacpan or search on metacpan
lib/Badger/Codec/Unicode.pm view on Meta::CPAN
version => 0.01,
base => 'Badger::Codec::Encode';
use Encode qw();
use bytes;
# Default encoding
our $ENCODING = 'UTF-8';
# Byte Order Markers for different UTF encodings
our $UTFBOMS = [
'UTF-8' => "\x{ef}\x{bb}\x{bf}",
'UTF-32BE' => "\x{0}\x{0}\x{fe}\x{ff}",
'UTF-32LE' => "\x{ff}\x{fe}\x{0}\x{0}",
'UTF-16BE' => "\x{fe}\x{ff}",
'UTF-16LE' => "\x{ff}\x{fe}",
];
sub encode {
my ($self, $enc, $data) = @_ == 3 ? @_ : (shift, $ENCODING, shift);
Encode::encode($enc, $data);
lib/Badger/Codec/Unicode.pm view on Meta::CPAN
sub decode {
my $self = shift;
if (@_ >= 2) {
goto &Encode::decode; # not a real GOTO - more like a magic
} # subroutine call - see perldoc -f goto
else {
my $data = shift;
my $count = 0;
# try all the BOMs in order looking for one (order is important
# 32bit BOMs look like 16bit BOMs)
while ($count < @$UTFBOMS) {
my $enc = $UTFBOMS->[$count++];
my $bom = $UTFBOMS->[$count++];
# does the string start with the bom?
if ($bom eq substr($data, 0, length($bom))) {
# decode it and hand it back
# return Encode::decode($enc, $data);
return Encode::decode($enc, substr($data, length($bom)), 1);
}
}
return $data;
}
lib/Badger/Codec/Unicode.pm view on Meta::CPAN
$utf8 = $codec->encode($data);
=head2 decode($encoding, $data)
Method for decoding Unicode data. If two arguments are provided then
the first is the encoding and the second the data to decode.
$decoded = $codec->decode( utf8 => $encoded );
If one argument is provided then the method will look for a Byte Order
Mark (BOM) to determine the encoding. If a BOM isn't present, or if the
BOM doesn't match a supported Unicode BOM (any of C<UTF-8>, C<UTF-32BE>
C<UTF-32LE>, C<UTF-16BE> or C<UTF-16LE>) then the data will not be
decoded as Unicode.
$decoded = $codec->decode($encoded); # use BOM to detect encoding
=head2 encoder()
This method returns a subroutine reference which can be called to encode
Unicode data. Internally it calls the L<encode()> method.
my $encoder = $codec->encode;
$encoded = $encoder->($data);
=head2 decoder()
t/filesystem/encoding.t view on Meta::CPAN
use Badger
Filesystem => 'Bin',
Codecs => [ codec => 'utf8' ];
my $dir = Bin->dir('testfiles')->must_exist;
# This is 'moose...' (with slashes in the 'o's them, and the '...' as one char).
my $moose = "m\x{f8}\x{f8}se\x{2026}";
# This is the same data UTF8 encoded complete with BOM
#my $data = "\x{ef}\x{bb}\x{bf}m\x{c3}\x{b8}\x{c3}\x{b8}se\x{e2}\x{80}\x{a6}";
my $data = "m\x{c3}\x{b8}\x{c3}\x{b8}se\x{e2}\x{80}\x{a6}";
# write data to file - we do this as raw so we can pass the BOM through
my $file = $dir->file('utf8_data');
$file->raw->print($data);
# read the text back in
my $text = $file->utf8->text;
ok( utf8::is_utf8($text), 'text is utf8' );
is( reasciify($text), reasciify($moose), 'data is unchanged' );
( run in 0.451 second using v1.01-cache-2.11-cpan-f29a10751f0 )