Encode-UTF8-Slow
view release on metacpan or search on metacpan
lib/Encode/UTF8/Slow.pm view on Meta::CPAN
sub bytes_to_codepoint {
# treat the scalar as bytes/octets
my $input = encode('UTF-8', shift);
# length returns number of bytes
my $len = length $input;
my $template = 'C' x $len;
my @bytes = unpack $template, $input;
# reverse encoding
if ($len == 1) {
return $bytes[0];
}
elsif ($len == 2) {
return (($bytes[0] & 0b00011111) << 6) +
($bytes[1] & 0b00111111);
}
elsif ($len == 3) {
return (($bytes[0] & 0b00001111) << 12) +
(($bytes[1] & 0b00111111) << 6) +
( $bytes[2] & 0b00111111);
}
else {
return (($bytes[0] & 0b00000111) << 18) +
(($bytes[1] & 0b00111111) << 12) +
(($bytes[2] & 0b00111111) << 6) +
($bytes[3] & 0b00111111);
}
}
1;
__END__
=encoding utf8
=head1 NAME
Encode::UTF8::Slow - A pure Perl, naive UTF-8 encoder/decoder
=head1 SYNOPSIS
use Encode::UTF8::Slow qw/bytes_to_codepoint codepoint_to_bytes/;
my $bytes = codepoint_to_bytes(0x1F4FA); #television
my $codepoint = bytes_to_codepoint('ð¼');
=head1 FUNCTIONS
=head2 codepoint_to_bytes
Takes a Unicode codepoint number and returns a scalar of UTF-8 encoded bytes
for it. Exported on request.
=head2 bytes_to_codepoint
Takes UTF-8 encoded bytes in a scalar and returns the Unicode codepoint for it.
Exported on request.
=head1 WARNING
This is a naive encoder - it doesn't handle UTF-16 pairs, BOM or other
noncharacters like 0xFFFE. It's also very slow!
=head1 SEE ALSO
=over 4
=item *
L<Unicode::UTF8|https://metacpan.org/pod/Unicode::UTF8> for a super fast UTF-8 encoder.
=item *
L<Building a UTF-8 encoder in Perl|http://perltricks.com/article/building-a-utf-8-encoder-in-perl/> my PerlTricks.com article about this code.
=item *
L<RFC 3629|https://tools.ietf.org/html/rfc3629> - which defines the current UTF-8 standard.
=back
=head1 REPOSITORY
This code is hosted at L<GitHub|https://github.com/dnmfarrell/Encode-UTF8-Slow>.
=head1 AUTHOR
E<copy> 2016 David Farrell
=head1 LICENSE
FreeBSD, see LICENSE.
=cut
( run in 0.534 second using v1.01-cache-2.11-cpan-39bf76dae61 )