Algorithm-Huffman

 view release on metacpan or  search on metacpan

Huffman.pm  view on Meta::CPAN

require Exporter;

our @ISA = qw(Exporter);

our $VERSION = '0.09';

use Heap::Fibonacci;
use Tree::DAG_Node;
use List::Util qw/max min first/;

sub new {
    my ($proto, $count_hash) = @_;
    my $class = ref($proto) || $proto;
    
    __validate_counting_hash($count_hash);
    my $heap = Heap::Fibonacci->new;
    
    my $size = 0;
    while (my ($str, $count) = each %$count_hash) {
        croak "The count for each character/substring must be a number"
            unless $count =~ /^(-)?\d+(\.\d+)?$/;
        croak "The count for each character/substring must be positive (>= 0)," .
              "but found counting '$count' for the string '$str'"
            unless $count >= 0;
        my $leaf = Tree::DAG_Node->new({name => $str});
        $leaf->attribute->{bit} = "";
        $heap->add( KeyValuePair->new( $leaf, $count ) );
        $size++;
    }
    
    while ($size-- >= 2) {        
        my $right = $heap->extract_minimum;
        my $left  = $heap->extract_minimum;
        $right->key->attribute->{bit} = 1;
        $left->key->attribute->{bit}  = 0;
        my $new_node = Tree::DAG_Node->new({daughters => [$left->key, $right->key]});
        $new_node->attribute->{bit} = "";
        my $new_count = $left->value + $right->value;
        $heap->add( KeyValuePair->new( $new_node, $new_count ) );
    }
    
    my $root = $heap->extract_minimum->key;
    
    my %encode;
    my %decode;
    foreach my $leaf ($root->leaves_under) {
        my @bit = reverse map {$_->attribute->{bit}} ($leaf, $leaf->ancestors);
        my $bitstr = join "", @bit;
        $encode{$leaf->name} = $bitstr;
        $decode{$bitstr}     = $leaf->name;
    }
    
    my $self = {
        encode => \%encode,
        decode => \%decode,
        max_length_encoding_key => max( map length, keys %encode ),
        max_length_decoding_key => max( map length, keys %decode ),
        min_length_decoding_key => min( map length, keys %decode )
    };
    
    bless $self, $class;
}

sub encode_hash {
    my $self = shift;
    $self->{encode};
}

sub decode_hash {
    my $self = shift;
    $self->{decode};
}

sub encode_bitstring {
    my ($self, $string) = @_;
    my $max_length_encoding_key = $self->{max_length_encoding_key};
    my %encode_hash = %{$self->encode_hash};

    my $bitstring = "";
    my ($index, $max_index) = (0, length($string)-1);
    while ($index <= $max_index) {
        for (my $l = $max_length_encoding_key; $l > 0; $l--) {
            if (my $bits = $encode_hash{substr($string, $index, $l)}) {
                $bitstring .= $bits;
                $index     += $l;
                last;
            }
        }
    }
    return $bitstring;
}

sub encode {
    my ($self, $string) = @_;
    my $max_length_encoding_key = $self->{max_length_encoding_key};
    my %encode_hash = %{$self->encode_hash};

    my $bitvector = "";
    my $offset = 0;
    my ($index, $max_index) = (0, length($string)-1);
    while ($index <= $max_index) {
        for (my $l = $max_length_encoding_key; $l > 0; $l--) {
            if (my $bits = $encode_hash{substr($string, $index, $l)}) {
                vec($bitvector, $offset++, 1) = $_ for split //, $bits;
                $index     += $l;
                last;
            }
        }
    }
    return $bitvector;
}

sub decode_bitstring {
    my ($self, $bitstring) = @_;
    
    my $max_length_decoding_key = $self->{max_length_decoding_key};
    my $min_length_decoding_key = $self->{min_length_decoding_key};
    my %decode_hash = %{$self->decode_hash};
    
    my $string = "";
    my ($index, $max_index) = (0, length($bitstring)-1);



( run in 1.244 second using v1.01-cache-2.11-cpan-df04353d9ac )