Algorithm-Huffman
view release on metacpan or search on metacpan
use 5.006;
use strict;
use warnings;
use Carp;
require Exporter;
our @ISA = qw(Exporter);
our $VERSION = '0.09';
use Heap::Fibonacci;
use Tree::DAG_Node;
use List::Util qw/max min first/;
sub new {
my ($proto, $count_hash) = @_;
my $class = ref($proto) || $proto;
__validate_counting_hash($count_hash);
my $heap = Heap::Fibonacci->new;
my $size = 0;
while (my ($str, $count) = each %$count_hash) {
croak "The count for each character/substring must be a number"
unless $count =~ /^(-)?\d+(\.\d+)?$/;
croak "The count for each character/substring must be positive (>= 0)," .
"but found counting '$count' for the string '$str'"
unless $count >= 0;
my $leaf = Tree::DAG_Node->new({name => $str});
$leaf->attribute->{bit} = "";
$heap->add( KeyValuePair->new( $leaf, $count ) );
$size++;
}
while ($size-- >= 2) {
my $right = $heap->extract_minimum;
my $left = $heap->extract_minimum;
$right->key->attribute->{bit} = 1;
$left->key->attribute->{bit} = 0;
my $new_node = Tree::DAG_Node->new({daughters => [$left->key, $right->key]});
$new_node->attribute->{bit} = "";
my $new_count = $left->value + $right->value;
$heap->add( KeyValuePair->new( $new_node, $new_count ) );
}
my $root = $heap->extract_minimum->key;
my %encode;
my %decode;
foreach my $leaf ($root->leaves_under) {
my @bit = reverse map {$_->attribute->{bit}} ($leaf, $leaf->ancestors);
my $bitstr = join "", @bit;
$encode{$leaf->name} = $bitstr;
$decode{$bitstr} = $leaf->name;
}
my $self = {
encode => \%encode,
decode => \%decode,
max_length_encoding_key => max( map length, keys %encode ),
max_length_decoding_key => max( map length, keys %decode ),
min_length_decoding_key => min( map length, keys %decode )
};
bless $self, $class;
}
sub encode_hash {
my $self = shift;
$self->{encode};
}
sub decode_hash {
my $self = shift;
$self->{decode};
}
sub encode_bitstring {
my ($self, $string) = @_;
my $max_length_encoding_key = $self->{max_length_encoding_key};
my %encode_hash = %{$self->encode_hash};
my $bitstring = "";
my ($index, $max_index) = (0, length($string)-1);
while ($index <= $max_index) {
for (my $l = $max_length_encoding_key; $l > 0; $l--) {
if (my $bits = $encode_hash{substr($string, $index, $l)}) {
$bitstring .= $bits;
$index += $l;
last;
}
}
}
return $bitstring;
}
sub encode {
my ($self, $string) = @_;
my $max_length_encoding_key = $self->{max_length_encoding_key};
my %encode_hash = %{$self->encode_hash};
my $bitvector = "";
my $offset = 0;
my ($index, $max_index) = (0, length($string)-1);
while ($index <= $max_index) {
for (my $l = $max_length_encoding_key; $l > 0; $l--) {
if (my $bits = $encode_hash{substr($string, $index, $l)}) {
vec($bitvector, $offset++, 1) = $_ for split //, $bits;
$index += $l;
last;
}
}
}
return $bitvector;
}
sub decode_bitstring {
my ($self, $bitstring) = @_;
my $max_length_decoding_key = $self->{max_length_decoding_key};
my $min_length_decoding_key = $self->{min_length_decoding_key};
my %decode_hash = %{$self->decode_hash};
my $string = "";
my ($index, $max_index) = (0, length($bitstring)-1);
while ($index < $max_index) {
my $decode = undef;
foreach my $l ($min_length_decoding_key .. $max_length_decoding_key) {
if ($decode = $decode_hash{substr($bitstring,$index,$l)}) {
$string .= $decode;
$index += $l;
last;
}
}
defined $decode
or die "Unknown bit sequence starting at index $index in the bitstring";
}
return $string;
}
sub decode {
my ($self, $bitvector) = @_;
my $max_length_decoding_key = $self->{max_length_decoding_key};
my $min_length_decoding_key = $self->{min_length_decoding_key};
my %decode_hash = %{$self->decode_hash};
my $string = "";
my ($offset, $max_offset) = (0, 8 * (length($bitvector)-1));
while ($offset < $max_offset) {
my $decode = undef;
my $bitpattern = "";
my $last_offset_ok = $offset;
foreach my $l (1 .. $max_length_decoding_key) {
$bitpattern .= vec($bitvector,$offset++,1);
if ($decode = $decode_hash{$bitpattern}) {
$string .= $decode;
last;
}
}
defined $decode
or die "Unknown bit sequence starting at offset $last_offset_ok in the bitstring";
}
return $string;
}
sub __validate_counting_hash {
my $c = shift;
my $error_msg = undef;
defined $c
or croak "Undefined counting hash";
ref($c) eq 'HASH'
or croak "The argument for the counting hash is not a hash reference, as expected";
scalar(keys %$c) >= 2
or croak "The counting hash must have at least 2 keys";
}
1;
package KeyValuePair;
use Heap::Elem;
require Exporter;
our @ISA = qw/Exporter Heap::Elem/;
sub new {
my ($proto, $key, $value) = @_;
my $class = ref($proto) || $proto;
my $self = $class->SUPER::new;
$self->{"KeyValuePair::key"} = $key;
$self->{"KeyValuePair::value"} = $value;
return $self;
}
sub cmp {
my ($self, $other) = @_;
$self->{"KeyValuePair::value"} <=> $other->{"KeyValuePair::value"};
}
sub key {
my $self = shift;
return $self->{"KeyValuePair::key"};
}
sub value {
my $self = shift;
return $self->{"KeyValuePair::value"};
}
( run in 1.043 second using v1.01-cache-2.11-cpan-140bd7fdf52 )