Convert-Base85
view release on metacpan or search on metacpan
lib/Convert/Base85.pm view on Meta::CPAN
package Convert::Base85;
use 5.016001;
use warnings;
use strict;
use Carp;
use Math::Int128 qw(
uint128
uint128_to_number
uint128_add
uint128_and
uint128_divmod
uint128_left
uint128_mul
uint128_right);
#
# Three '#' for encoding information, four '#' for decoding.
#
#use Smart::Comments q(#####);
our $VERSION = '1.02';
use Exporter qw(import);
our %EXPORT_TAGS;
our @EXPORT_OK = (qw(base85_check base85_encode base85_decode));
#
# Add an :all tag.
#
$EXPORT_TAGS{all} = [@EXPORT_OK];
=head1 NAME
Convert::Base85 - Encoding and decoding to and from Base 85 strings
=head1 SYNOPSIS
use Convert::Base85;
my $encoded = Convert::Base85::encode($data);
my $decoded = Convert::Base85::decode($encoded);
or
use Convert::Base85 qw(base85_encode base85_decode);
my $encoded = base85_encode($data);
my $decoded = base85_decode($encoded);
=head1 DESCRIPTION
This module implements a I<Base85> conversion for encoding binary
data as text. This is done by interpreting each group of sixteen bytes
as a 128-bit integer, which is then converted to a twenty-digit base 85
representation using the alphanumeric characters 0-9, A-Z, and a-z, in
addition to the punctuation characters !, #, $, %, &, (, ), *, +, -, ;, <, =, >,
?, @, ^, _, `, {, |, }, and ~, in that order.
This creates a string that is five fourths (1.25) larger than the original
data, making it more efficient than L<MIME::Base64>'s 3-to-4 ratio (1.3333).
As noted above, the conversion makes use of 128-bit arithmatic, which most
computers can't handle natively, which is why the module L<Math::Int128>
needs to be installed as well.
=cut
#
# character value
# 0..9: 0..9
# A..Z: 10..35
# a..z: 36..61
# punc: 62..84
#
# Take a number from 0 to 84, and turn it into a character.
#
my @b85_encode = ('0' .. '9', 'A' .. 'Z', 'a' .. 'z',
'!', '#', '$', '%', '&', '(', ')', '*', '+',
'-', ';', '<', '=', '>', '?', '@', '^', '_',
'`', '{', '|', '}', '~');
#
# Take the ord() of a character, and return the number (from 0 to 84)
# for it. Unknown characters return -1.
#
my @b85_decode = (
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, 62, -1, 63, 64, 65, 66, -1, 67, 68, 69, 70, -1, 71, -1, -1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, 72, 73, 74, 75, 76,
77, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, 78, 79,
80, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 81, 82, 83, 84, -1);
=head1 FUNCTIONS
=head3 base85_check
Examine a string for characters that fall outside the Base 85 character set.
Returns the first character position that fails the test, or -1 if no characters fail.
if (my $d = base85_check($base85str) >= 0)
{
carp "Incorrect character at position $d; cannot decode input string";
return undef;
}
lib/Convert/Base85.pm view on Meta::CPAN
write 60-character lines (remembering that C<encode()> takes 16 bytes
at a time and encodes to 20 bytes). Remember to save the original length
in case the data had to be padded out to a multiple of 16.
=cut
sub encode
{
my($plain) = @_;
my @mlist;
my $rem = uint128();
#
# Extra zero bytes to bring the length up to a multiple of sixteen.
#
my $extra = -length($plain) % 16;
$plain .= "\0" x $extra;
for my $str16 (unpack '(a16)*', $plain)
{
my @tmplist = (0) x 20;
my $total16 = uint128(0);
my @plain = unpack('C*', $str16);
#
### @plain: join(", ", @plain)
#
for my $p (@plain)
{
uint128_left($total16, $total16, 8);
uint128_add($total16, $total16, uint128($p));
}
#
##### total16: "$total16"
#
for my $j (reverse 0 .. 19)
{
uint128_divmod($total16, $rem, $total16, uint128(85));
$tmplist[$j] = uint128_to_number($rem);
}
push @mlist, @tmplist;
}
return join "", map{$b85_encode[$_]} @mlist;
}
*base85_encode = \&encode;
=head3 base85_decode
=head3 Convert::Base85::decode
Converts the Base85-encoded string back to bytes. Any spaces, linebreaks, or
other whitespace are stripped from the string before decoding.
This function may be exported as C<base85_decode> into the caller's namespace.
If your original data wasn't an even multiple of sixteen in length, the
decoded data may have some padding with null bytes ('\0'), which can be removed.
#
# Decode the string and compare its length with the length of the original data.
#
my $decoded = base85_decode($data);
my $padding = length($decoded) - $datalen;
chop $decoded while ($padding-- > 0);
=cut
sub decode
{
my($encoded) = @_;
$encoded =~ tr[ \t\r\n\f][]d;
my $extra = -length($encoded) % 20;
my @mlist;
my $imul = uint128(85);
my $rem = uint128();
for my $str20 (unpack '(a20)*', $encoded)
{
my $total20 = uint128(0);
my @tmplist = (q(0)) x 16;
my @coded = unpack('C*', $str20);
#
#### $str20: $str20
#### @coded: join(", ", @coded)
#
for my $c (@coded)
{
my $iadd = uint128($b85_decode[$c]);
uint128_mul($total20, $total20, $imul);
uint128_add($total20, $total20, $iadd);
}
#
##### total20: "$total20"
#
for my $j (reverse 0 .. 15)
{
uint128_divmod($total20, $rem, $total20, uint128(256));
$tmplist[$j] = uint128_to_number($rem);
}
#
##### @tmplist: join(", ", @tmplist)
#
push @mlist, @tmplist;
}
return join "", map{chr($_)} @mlist;
}
*base85_decode = \&decode;
=head1 SEE ALSO
=head2 The Base85 Character Set
The Base85 character set is described by Robert Elz in his RFC1924 of
April 1st 1996,
L<"A Compact Representation of IPv6 Addresses"|https://tools.ietf.org/html/rfc1924>
( run in 0.701 second using v1.01-cache-2.11-cpan-df04353d9ac )