Algorithm-Huffman

 view release on metacpan or  search on metacpan

Huffman.pm  view on Meta::CPAN

use 5.006;
use strict;
use warnings;
use Carp;

require Exporter;

our @ISA = qw(Exporter);

our $VERSION = '0.09';

use Heap::Fibonacci;
use Tree::DAG_Node;
use List::Util qw/max min first/;

sub new {
    my ($proto, $count_hash) = @_;
    my $class = ref($proto) || $proto;
    
    __validate_counting_hash($count_hash);
    my $heap = Heap::Fibonacci->new;
    
    my $size = 0;
    while (my ($str, $count) = each %$count_hash) {
        croak "The count for each character/substring must be a number"
            unless $count =~ /^(-)?\d+(\.\d+)?$/;
        croak "The count for each character/substring must be positive (>= 0)," .
              "but found counting '$count' for the string '$str'"
            unless $count >= 0;
        my $leaf = Tree::DAG_Node->new({name => $str});
        $leaf->attribute->{bit} = "";
        $heap->add( KeyValuePair->new( $leaf, $count ) );
        $size++;
    }
    
    while ($size-- >= 2) {        
        my $right = $heap->extract_minimum;
        my $left  = $heap->extract_minimum;
        $right->key->attribute->{bit} = 1;
        $left->key->attribute->{bit}  = 0;
        my $new_node = Tree::DAG_Node->new({daughters => [$left->key, $right->key]});
        $new_node->attribute->{bit} = "";
        my $new_count = $left->value + $right->value;
        $heap->add( KeyValuePair->new( $new_node, $new_count ) );
    }
    
    my $root = $heap->extract_minimum->key;
    
    my %encode;
    my %decode;
    foreach my $leaf ($root->leaves_under) {
        my @bit = reverse map {$_->attribute->{bit}} ($leaf, $leaf->ancestors);
        my $bitstr = join "", @bit;
        $encode{$leaf->name} = $bitstr;
        $decode{$bitstr}     = $leaf->name;
    }
    
    my $self = {
        encode => \%encode,
        decode => \%decode,
        max_length_encoding_key => max( map length, keys %encode ),
        max_length_decoding_key => max( map length, keys %decode ),
        min_length_decoding_key => min( map length, keys %decode )
    };
    
    bless $self, $class;
}

sub encode_hash {
    my $self = shift;
    $self->{encode};
}

sub decode_hash {
    my $self = shift;
    $self->{decode};
}

sub encode_bitstring {
    my ($self, $string) = @_;
    my $max_length_encoding_key = $self->{max_length_encoding_key};
    my %encode_hash = %{$self->encode_hash};

    my $bitstring = "";
    my ($index, $max_index) = (0, length($string)-1);
    while ($index <= $max_index) {
        for (my $l = $max_length_encoding_key; $l > 0; $l--) {
            if (my $bits = $encode_hash{substr($string, $index, $l)}) {
                $bitstring .= $bits;
                $index     += $l;
                last;
            }
        }
    }
    return $bitstring;
}

sub encode {
    my ($self, $string) = @_;
    my $max_length_encoding_key = $self->{max_length_encoding_key};
    my %encode_hash = %{$self->encode_hash};

    my $bitvector = "";
    my $offset = 0;
    my ($index, $max_index) = (0, length($string)-1);
    while ($index <= $max_index) {
        for (my $l = $max_length_encoding_key; $l > 0; $l--) {
            if (my $bits = $encode_hash{substr($string, $index, $l)}) {
                vec($bitvector, $offset++, 1) = $_ for split //, $bits;
                $index     += $l;
                last;
            }
        }
    }
    return $bitvector;
}

sub decode_bitstring {
    my ($self, $bitstring) = @_;
    
    my $max_length_decoding_key = $self->{max_length_decoding_key};
    my $min_length_decoding_key = $self->{min_length_decoding_key};
    my %decode_hash = %{$self->decode_hash};
    
    my $string = "";
    my ($index, $max_index) = (0, length($bitstring)-1);
    while ($index < $max_index) {
        my $decode = undef;
        foreach my $l ($min_length_decoding_key .. $max_length_decoding_key) {
            if ($decode = $decode_hash{substr($bitstring,$index,$l)}) {
                $string .= $decode;
                $index  += $l;
                last;
            }
        }
        defined $decode
            or die "Unknown bit sequence starting at index $index in the bitstring";
    }
    return $string;
}

sub decode {
    my ($self, $bitvector) = @_;
    
    my $max_length_decoding_key = $self->{max_length_decoding_key};
    my $min_length_decoding_key = $self->{min_length_decoding_key};
    my %decode_hash = %{$self->decode_hash};
    
    my $string = "";
    my ($offset, $max_offset) = (0, 8 * (length($bitvector)-1));
    while ($offset < $max_offset) {
        my $decode = undef;
        my $bitpattern = "";
        my $last_offset_ok = $offset;
        foreach my $l (1 .. $max_length_decoding_key) {
            $bitpattern .= vec($bitvector,$offset++,1);
            if ($decode = $decode_hash{$bitpattern}) {
                $string .= $decode;
                last;
            }
        }
        defined $decode
            or die "Unknown bit sequence starting at offset $last_offset_ok in the bitstring";
    }
    return $string;
}


sub __validate_counting_hash {
    my $c = shift;
    my $error_msg = undef;
    defined $c        
        or croak "Undefined counting hash";
    ref($c) eq 'HASH' 
        or croak "The argument for the counting hash is not a hash reference, as expected";
    scalar(keys %$c) >= 2
        or croak "The counting hash must have at least 2 keys";
}

1;

package KeyValuePair;

use Heap::Elem;

require Exporter;

our @ISA = qw/Exporter Heap::Elem/;

sub new {
   my ($proto, $key, $value) = @_;
   my $class = ref($proto) || $proto;

   my $self = $class->SUPER::new;

   $self->{"KeyValuePair::key"}   = $key;
   $self->{"KeyValuePair::value"} = $value;
   
   return $self;
}

sub cmp {
   my ($self, $other) = @_;
   $self->{"KeyValuePair::value"} <=> $other->{"KeyValuePair::value"};
}

sub key {
    my $self = shift;
    return $self->{"KeyValuePair::key"};
}

sub value {
    my $self = shift;
    return $self->{"KeyValuePair::value"};
}



( run in 1.043 second using v1.01-cache-2.11-cpan-140bd7fdf52 )