Convert-Base81

 view release on metacpan or  search on metacpan

lib/Convert/Base81.pm  view on Meta::CPAN

package Convert::Base81;

use 5.016001;
use warnings;
use strict;

use Carp;
use Math::Int128 qw(uint128 uint128_to_number
	uint128_add uint128_divmod uint128_left uint128_mul);

#use Smart::Comments q(###);

our $VERSION = '1.02';

use Exporter qw(import);

our %EXPORT_TAGS = (
	pack => [ qw(b3_pack81 b9_pack81 b27_pack81) ],
	unpack => [ qw(b3_unpack81 b9_unpack81 b27_unpack81) ],
);

our @EXPORT_OK = (
	qw(base81_check base81_encode base81_decode rwsize),
	@{ $EXPORT_TAGS{pack} },
	@{ $EXPORT_TAGS{unpack} },
);

#
# Add an :all tag automatically.
#
$EXPORT_TAGS{all} = [@EXPORT_OK];

=head1 NAME

Convert::Base81 - Encoding and decoding to and from Base 81 strings

=head1 SYNOPSIS

    use Convert::Base81;
 
    my $encoded = Convert::Base81::encode($data);
    my $decoded = Convert::Base81::decode($encoded);

or

    use Convert::Base81 qw(base81_encode base81_decode);
 
    my $encoded = base81_encode($data);
    my $decoded = base81_decode($encoded);

=head1 DESCRIPTION

This module implements a I<Base81> conversion for encoding binary
data as text. This is done by interpreting each group of fifteen bytes
as a 120-bit integer, which is then converted to a seventeen-digit base 81
representation using the alphanumeric characters 0-9, A-Z, and a-z, in
addition to the punctuation characters !, #, $, %, (, ), *,
+, -, ;, =, ?, @, ^, _, {, |, }, and ~, in that order, characters that
are safe to use in JSON and XML formats.

This creates a string that is (1.2666) larger than the original
data, making it more efficient than L<MIME::Base64>'s 3-to-4 ratio (1.3333)
but slightly less so than the efficiency of L<Convert::Ascii85>'s 4-to-5 ratio (1.25).

It does have the advantage of a natural ternary system: if your data is
composed of only three, or nine, or twenty-seven distinct values, its
size can be compressed instead of expanded, and this module has functions
that will do that.

    use Convert::Base81 qw(b3_pack81 b3_unpack81);

    my $input_string = q(rrgrbgggggrrgbrrbbbbrbrgggrggggg);
    my $b81str = b3_pack81("rgb", $input_string);

The returned string will be one-fourth the size of the original. Equivalent
functions exist for 9-digit and 27-digit values, which will return strings
one-half and three-fourths the size of the original, respectively.

=cut

#
# character    value
#  0..9:        0..9
#  A..Z:        10..35
#  a..z:        36..61
#  punc:        62..80
#
# Or, in a 9x9 tabular form, displaying the trits (0, 1, -):
#
#               |    0      1     2      3      4      5      6      7      8
#               +-------------------------------------------------------------
# ('0'..'8')  0 | 0000   0001  000-   0010   0011   001-   00-0   00-1   00--
# ('9'..'H')  9 | 0100   0101  010-   0110   0111   011-   01-0   01-1   01--
# ('I'..'Q') 18 | 0-00   0-01  0-0-   0-10   0-11   0-1-   0--0   0--1   0---
# ('R'..'Z') 27 | 1000   1001  100-   1010   1011   101-   10-0   10-1   10--
# ('a'..'i') 36 | 1100   1101  110-   1110   1111   111-   11-0   11-1   11--
# ('j'..'r') 45 | 1-00   1-01  1-0-   1-10   1-11   1-1-   1--0   1--1   1---
# ('s'..'!') 54 | -000   -001  -00-   -010   -011   -01-   -0-0   -0-1   -0--
# ('#'..';') 63 | -100   -101  -10-   -110   -111   -11-   -1-0   -1-1   -1--
# ('='..'~') 72 | --00   --01  --0-   --10   --11   --1-   ---0   ---1   ----
#
#
# Take a number from 0 to 80, and turn it into a character.
#

my @b81_encode = ('0' .. '9', 'A' .. 'Z', 'a' .. 'z',
	'!', '#', '$', '%', '(', ')', '*', '+', '-', ';',
	'=', '?', '@', '^', '_', '{', '|', '}', '~');

lib/Convert/Base81.pm  view on Meta::CPAN


=cut

sub encode
{
	my($plain) = @_;
	my @mlist;
	my($readsize, $writesize) = rwsize();
	my $imod = uint128(81);
	my $rem = uint128();

	#
	# Extra zero bytes to bring the length up to the read size.
	#
	my $extra = -length($plain) % $readsize;
	$plain .= "\0" x $extra;

	for my $str7 (unpack "(a${readsize})*", $plain)
	{
		my $ptotal = uint128(0);
		my @tmplist = (0) x $writesize;

		#
		# Calculate $ptotal = ($ptotal << 8) + $c;
		#
		for my $c (unpack('C*', $str7))
		{
			uint128_left($ptotal, $ptotal, 8);
			uint128_add($ptotal, $ptotal, uint128($c));
		}

		#
		### rtotal:  "$ptotal"
		#
		# Calculate the mod 81 list.
		#
		for my $j (reverse 0 .. $writesize - 1)
		{
			uint128_divmod($ptotal, $rem, $ptotal, $imod);
			$tmplist[$j] = uint128_to_number($rem);
		}

		push @mlist, @tmplist;
	}

	return join "",	map{$b81_encode[$_]} @mlist;
}

*base81_encode = \&encode;

=head3 base81_decode

=head3 Convert::Base81::decode

Converts the Base81-encoded string back to bytes. Any spaces, linebreaks, or
other whitespace are stripped from the string before decoding.

This function may be exported as C<base81_decode> into the caller's namespace.

If your original data wasn't an even multiple of fifteen in length, the
decoded data will have some padding with null bytes ('\0'), which can be removed.

    #
    # Decode the string and compare its length with the length of the original data.
    #
    my $decoded = base81_decode($data); 
    my $padding = length($decoded) - $datalen;
    chop $decoded while ($padding-- > 0);

=cut

sub decode
{
	my($encoded) = @_;
	my($readsize, $writesize) = rwsize();
	my $imul = uint128(81);
	my $rem = uint128();

	$encoded =~ tr[ \t\r\n\f][]d;

	my $extra = -length($encoded) % $writesize;
	$encoded .= '0' x $extra if ($extra != 0);

	my @mlist;

	for my $str9 (unpack "(a${writesize})*", $encoded)
	{
		my $etotal = uint128(0);
		my @tmplist = (q(0)) x $readsize;

		for my $c (unpack('C*', $str9))
		{
			my $iadd = uint128($b81_decode[$c]);
			uint128_mul($etotal, $etotal, $imul);
			uint128_add($etotal, $etotal, $iadd);
		}

		#
		### Read string: $str9
		### total =  sprintf("0x%0x", $etotal)
		#
		for my $j (reverse 0 .. $readsize - 1)
		{
			uint128_divmod($etotal, $rem, $etotal, uint128(256));
			$tmplist[$j] = uint128_to_number($rem);
		}
		push @mlist, @tmplist;
	}

	return join "",	map{chr($_)} @mlist;
}

*base81_decode = \&decode;

=head3 rwsize

By default, the C<encode()> function reads 15 bytes, and writes 19,
resulting in an expansion ratio of 1.2666. It does require 128-bit
integers to calculate this, which is simulated in a library. If your
decoding destination doesn't have a library available, the encode
function can be reduced to reading 7 bytes and writing 9, giving an
expansion ratio of 1.2857. This only requires 64-bit integers, which
many environments can handle.

Note that this does not affect the operation of this module, which
will use 128-bit integers regardless.

To set the smaller size, use:



( run in 0.698 second using v1.01-cache-2.11-cpan-df04353d9ac )