Algorithm-Huffman
view release on metacpan or search on metacpan
following to the missing parameter validation.
0.05 Thu Aug 29
- improved decode_bitstring method
* exceptions are raised instead of an endless loop
if an unknown character sequence is found
0.04 Wed Aug 28
- implemented decode_bitstring($bitstring) method
- fixed bug: The returned encode_bitstring had
only the length of the $orig_string,
and not longer
0.03 Mon Aug 26
- implemented encode_bitstring($orig_string) method
0.02 Tue Aug 20
- implemented $huff->decode_hash method
0.01 Tue Aug 13
- implemented new and encode_hash method
foreach my $leaf ($root->leaves_under) {
my @bit = reverse map {$_->attribute->{bit}} ($leaf, $leaf->ancestors);
my $bitstr = join "", @bit;
$encode{$leaf->name} = $bitstr;
$decode{$bitstr} = $leaf->name;
}
my $self = {
encode => \%encode,
decode => \%decode,
max_length_encoding_key => max( map length, keys %encode ),
max_length_decoding_key => max( map length, keys %decode ),
min_length_decoding_key => min( map length, keys %decode )
};
bless $self, $class;
}
sub encode_hash {
my $self = shift;
$self->{encode};
}
sub decode_hash {
my $self = shift;
$self->{decode};
}
sub encode_bitstring {
my ($self, $string) = @_;
my $max_length_encoding_key = $self->{max_length_encoding_key};
my %encode_hash = %{$self->encode_hash};
my $bitstring = "";
my ($index, $max_index) = (0, length($string)-1);
while ($index <= $max_index) {
for (my $l = $max_length_encoding_key; $l > 0; $l--) {
if (my $bits = $encode_hash{substr($string, $index, $l)}) {
$bitstring .= $bits;
$index += $l;
last;
}
}
}
return $bitstring;
}
sub encode {
my ($self, $string) = @_;
my $max_length_encoding_key = $self->{max_length_encoding_key};
my %encode_hash = %{$self->encode_hash};
my $bitvector = "";
my $offset = 0;
my ($index, $max_index) = (0, length($string)-1);
while ($index <= $max_index) {
for (my $l = $max_length_encoding_key; $l > 0; $l--) {
if (my $bits = $encode_hash{substr($string, $index, $l)}) {
vec($bitvector, $offset++, 1) = $_ for split //, $bits;
$index += $l;
last;
}
}
}
return $bitvector;
}
sub decode_bitstring {
my ($self, $bitstring) = @_;
my $max_length_decoding_key = $self->{max_length_decoding_key};
my $min_length_decoding_key = $self->{min_length_decoding_key};
my %decode_hash = %{$self->decode_hash};
my $string = "";
my ($index, $max_index) = (0, length($bitstring)-1);
while ($index < $max_index) {
my $decode = undef;
foreach my $l ($min_length_decoding_key .. $max_length_decoding_key) {
if ($decode = $decode_hash{substr($bitstring,$index,$l)}) {
$string .= $decode;
$index += $l;
last;
}
}
defined $decode
or die "Unknown bit sequence starting at index $index in the bitstring";
}
return $string;
}
sub decode {
my ($self, $bitvector) = @_;
my $max_length_decoding_key = $self->{max_length_decoding_key};
my $min_length_decoding_key = $self->{min_length_decoding_key};
my %decode_hash = %{$self->decode_hash};
my $string = "";
my ($offset, $max_offset) = (0, 8 * (length($bitvector)-1));
while ($offset < $max_offset) {
my $decode = undef;
my $bitpattern = "";
my $last_offset_ok = $offset;
foreach my $l (1 .. $max_length_decoding_key) {
$bitpattern .= vec($bitvector,$offset++,1);
if ($decode = $decode_hash{$bitpattern}) {
$string .= $decode;
last;
}
}
defined $decode
or die "Unknown bit sequence starting at offset $last_offset_ok in the bitstring";
}
return $string;
t/create_huffman_tree.t view on Meta::CPAN
"with characters " . Dumper($count_hash);
}
sub reverse_hash {
my %hash = %{shift()};
return { map {($hash{$_} => $_)} keys %hash };
}
my $huff = Algorithm::Huffman->new({a => 15, b => 7, c => 6, d => 6, e => 5});
my $encode = $huff->encode_hash;
is length($encode->{a}), 1, "Length of a";
all_are {length($encode->{shift()})} 3, ['b' .. 'e'], "Length of b, c, d, e";
foreach my $wrong_parameter( [a => 1, b => 1], undef, [[a => 1, b => 1]] ) {
dies_ok { Algorithm::Huffman->new($_) };
}
1;
t/encode_bitstring.t view on Meta::CPAN
# Create a random counting
my %counting = map { random_string('c' x myrand MAX_SUBSTRING_LENGTH)
=> myrand(MAX_COUNT)
}
(1 .. HUFFMAN_ELEMENTS);
$counting{$_} = myrand(MAX_COUNT) for ('a' .. 'z');
my $huff = Algorithm::Huffman->new(\%counting);
my $encode_hash = $huff->encode_hash;
my $max_length = max map length, keys %counting;
for (1 .. 20) {
my $s = random_string('c' x LONG_STRING_LENGTH);
my $c = "";
my $index = 0;
while ($index < LONG_STRING_LENGTH) {
for my $l (reverse (1 .. $max_length)) {
if (my $bitcode = $encode_hash->{substr($s, $index, $l)}) {
$c .= $bitcode;
$index += $l;
last;
}
}
}
my $encoded_with_huffman = $huff->encode_bitstring($s);
is $encoded_with_huffman, $c, "Coded huffman string of '$s'"
or diag Dumper($huff);
my $encoded_with_huffman_bitvector = $huff->encode($s);
is $encoded_with_huffman_bitvector,
pack("b*", $encoded_with_huffman),
"->encode('$s') checked with pack";
cmp_ok length($encoded_with_huffman)/8, "<=", LONG_STRING_LENGTH,
"Encoding produced a compression lower than only the compression of 26 characters";
is $huff->decode_bitstring($encoded_with_huffman),
$s,
"Decoding of encoding bitstring should be the same as the orig";
is $huff->decode($encoded_with_huffman_bitvector),
$s,
"Decoding of encoding (packed) bitvector should be the same as the orig";
}
my $string = random_string('c' x LONG_STRING_LENGTH);
UNKNOWN_CHAR_IN_BITSTRING_SHOULD_PRODUCE_AN_EXCEPTION: {
my $bitstring = $huff->encode_bitstring($string);
substr($bitstring,length($string)/2,1) = "a";
throws_ok {$huff->decode_bitstring($bitstring)}
qr/unknown/i,
"Unknown character (an a instead 0/1) into the bitstring";
}
BITSTRING_TOO_SHORT: {
my $bitstring = $huff->encode_bitstring($string);
$bitstring = substr($bitstring,0,length($bitstring-1));
dies_ok {$huff->decode_bitstring($bitstring)}
"Removed the last bit of the bitstring -> should die";
}
( run in 0.248 second using v1.01-cache-2.11-cpan-65fba6d93b7 )