Algorithm-Huffman

 view release on metacpan or  search on metacpan

Changes  view on Meta::CPAN

        following to the missing parameter validation.

0.05  Thu Aug 29
        - improved decode_bitstring method
          * exceptions are raised instead of an endless loop
            if an unknown character sequence is found

0.04  Wed Aug 28
        - implemented decode_bitstring($bitstring) method
        - fixed bug: The returned encode_bitstring had 
          only the length of the $orig_string,
          and not longer

0.03  Mon Aug 26
        - implemented encode_bitstring($orig_string) method

0.02  Tue Aug 20
        - implemented $huff->decode_hash method

0.01  Tue Aug 13
        - implemented new and encode_hash method

Huffman.pm  view on Meta::CPAN

    foreach my $leaf ($root->leaves_under) {
        my @bit = reverse map {$_->attribute->{bit}} ($leaf, $leaf->ancestors);
        my $bitstr = join "", @bit;
        $encode{$leaf->name} = $bitstr;
        $decode{$bitstr}     = $leaf->name;
    }
    
    my $self = {
        encode => \%encode,
        decode => \%decode,
        max_length_encoding_key => max( map length, keys %encode ),
        max_length_decoding_key => max( map length, keys %decode ),
        min_length_decoding_key => min( map length, keys %decode )
    };
    
    bless $self, $class;
}

sub encode_hash {
    my $self = shift;
    $self->{encode};
}

sub decode_hash {
    my $self = shift;
    $self->{decode};
}

sub encode_bitstring {
    my ($self, $string) = @_;
    my $max_length_encoding_key = $self->{max_length_encoding_key};
    my %encode_hash = %{$self->encode_hash};

    my $bitstring = "";
    my ($index, $max_index) = (0, length($string)-1);
    while ($index <= $max_index) {
        for (my $l = $max_length_encoding_key; $l > 0; $l--) {
            if (my $bits = $encode_hash{substr($string, $index, $l)}) {
                $bitstring .= $bits;
                $index     += $l;
                last;
            }
        }
    }
    return $bitstring;
}

sub encode {
    my ($self, $string) = @_;
    my $max_length_encoding_key = $self->{max_length_encoding_key};
    my %encode_hash = %{$self->encode_hash};

    my $bitvector = "";
    my $offset = 0;
    my ($index, $max_index) = (0, length($string)-1);
    while ($index <= $max_index) {
        for (my $l = $max_length_encoding_key; $l > 0; $l--) {
            if (my $bits = $encode_hash{substr($string, $index, $l)}) {
                vec($bitvector, $offset++, 1) = $_ for split //, $bits;
                $index     += $l;
                last;
            }
        }
    }
    return $bitvector;
}

sub decode_bitstring {
    my ($self, $bitstring) = @_;
    
    my $max_length_decoding_key = $self->{max_length_decoding_key};
    my $min_length_decoding_key = $self->{min_length_decoding_key};
    my %decode_hash = %{$self->decode_hash};
    
    my $string = "";
    my ($index, $max_index) = (0, length($bitstring)-1);
    while ($index < $max_index) {
        my $decode = undef;
        foreach my $l ($min_length_decoding_key .. $max_length_decoding_key) {
            if ($decode = $decode_hash{substr($bitstring,$index,$l)}) {
                $string .= $decode;
                $index  += $l;
                last;
            }
        }
        defined $decode
            or die "Unknown bit sequence starting at index $index in the bitstring";
    }
    return $string;
}

sub decode {
    my ($self, $bitvector) = @_;
    
    my $max_length_decoding_key = $self->{max_length_decoding_key};
    my $min_length_decoding_key = $self->{min_length_decoding_key};
    my %decode_hash = %{$self->decode_hash};
    
    my $string = "";
    my ($offset, $max_offset) = (0, 8 * (length($bitvector)-1));
    while ($offset < $max_offset) {
        my $decode = undef;
        my $bitpattern = "";
        my $last_offset_ok = $offset;
        foreach my $l (1 .. $max_length_decoding_key) {
            $bitpattern .= vec($bitvector,$offset++,1);
            if ($decode = $decode_hash{$bitpattern}) {
                $string .= $decode;
                last;
            }
        }
        defined $decode
            or die "Unknown bit sequence starting at offset $last_offset_ok in the bitstring";
    }
    return $string;

t/create_huffman_tree.t  view on Meta::CPAN

            "with characters " . Dumper($count_hash);
}

sub reverse_hash {
    my %hash = %{shift()};
    return { map {($hash{$_} => $_)} keys %hash };
}

my $huff = Algorithm::Huffman->new({a => 15, b => 7, c => 6, d => 6, e => 5});
my $encode = $huff->encode_hash;
is length($encode->{a}), 1, "Length of a";
all_are {length($encode->{shift()})} 3, ['b' .. 'e'], "Length of b, c, d, e";

foreach my $wrong_parameter( [a => 1, b => 1], undef, [[a => 1, b => 1]] ) {
    dies_ok { Algorithm::Huffman->new($_) };
}

1;

t/encode_bitstring.t  view on Meta::CPAN


# Create a random counting
my %counting = map {   random_string('c' x myrand MAX_SUBSTRING_LENGTH) 
                    => myrand(MAX_COUNT)
                   }
                   (1 .. HUFFMAN_ELEMENTS);
$counting{$_} = myrand(MAX_COUNT) for ('a' .. 'z');
my $huff = Algorithm::Huffman->new(\%counting);
my $encode_hash = $huff->encode_hash;

my $max_length = max map length, keys %counting;

for (1 .. 20) {
    my $s = random_string('c' x LONG_STRING_LENGTH);
    my $c = "";
    my $index = 0;
    while ($index < LONG_STRING_LENGTH) {
        for my $l (reverse (1 .. $max_length)) {
            if (my $bitcode = $encode_hash->{substr($s, $index, $l)}) {
                $c .= $bitcode;
                $index += $l;
                last;
            }
        }
    }
    my $encoded_with_huffman = $huff->encode_bitstring($s);
    is $encoded_with_huffman, $c, "Coded huffman string of '$s'"
    or diag Dumper($huff);
    
    my $encoded_with_huffman_bitvector = $huff->encode($s);
    is $encoded_with_huffman_bitvector,
       pack("b*", $encoded_with_huffman), 
       "->encode('$s') checked with pack";
    
    cmp_ok length($encoded_with_huffman)/8, "<=", LONG_STRING_LENGTH, 
       "Encoding produced a compression lower than only the compression of 26 characters";
    
    is $huff->decode_bitstring($encoded_with_huffman),
       $s,
       "Decoding of encoding bitstring should be the same as the orig";
       
    is $huff->decode($encoded_with_huffman_bitvector),
       $s,
       "Decoding of encoding (packed) bitvector should be the same as the orig";
}

my $string    = random_string('c' x LONG_STRING_LENGTH);

UNKNOWN_CHAR_IN_BITSTRING_SHOULD_PRODUCE_AN_EXCEPTION: {
    my $bitstring = $huff->encode_bitstring($string);
    substr($bitstring,length($string)/2,1) = "a";
    throws_ok {$huff->decode_bitstring($bitstring)} 
              qr/unknown/i, 
              "Unknown character (an a instead 0/1) into the bitstring";
}

BITSTRING_TOO_SHORT: {
    my $bitstring = $huff->encode_bitstring($string);
    $bitstring = substr($bitstring,0,length($bitstring-1));
    dies_ok {$huff->decode_bitstring($bitstring)}
            "Removed the last bit of the bitstring -> should die";
}



( run in 0.248 second using v1.01-cache-2.11-cpan-65fba6d93b7 )