Compression-Util

 view release on metacpan or  search on metacpan

examples/bzip2_decompressor.pl  view on Meta::CPAN

            say STDERR "Number of selectors: $num_sels";

            my @idxs;
            for (1 .. $num_sels) {
                my $i = 0;
                while (read_bit($fh, \$buffer)) {
                    $i += 1;
                    ($i < $num_trees) or die "error";
                }
                push @idxs, $i;
            }

            my $sels = mtf_decode(\@idxs, [0 .. $num_trees - 1]);
            say STDERR "Selectors: (@$sels)";

            my $MaxHuffmanBits = 20;
            my $num_syms       = scalar(@alphabet) + 2;

            my @trees;
            for (1 .. $num_trees) {
                my @clens;
                my $clen = bits2int($fh, 5, \$buffer);
                for (1 .. $num_syms) {
                    while (1) {

                        ($clen > 0 and $clen <= $MaxHuffmanBits)
                          or warn "Invalid code length: $clen!\n";

                        if (not read_bit($fh, \$buffer)) {
                            last;
                        }

                        $clen -= read_bit($fh, \$buffer) ? 1 : -1;
                    }

                    push @clens, $clen;
                }
                push @trees, \@clens;
                say STDERR "Code lengths: (@clens)";
            }

            foreach my $tree (@trees) {
                my $maxLen = max(@$tree);
                my $sum    = 1 << $maxLen;
                for my $clen (@$tree) {
                    $sum -= (1 << $maxLen) >> $clen;
                }

                $sum == 0 or warn "incomplete tree detected: (@$tree)\n";
            }

            my @huffman_trees = map { (huffman_from_code_lengths($_))[1] } @trees;

            my $eob = @alphabet + 1;

            my @zrle;
            my $code = '';

            my $sel_idx = 0;
            my $tree    = $huffman_trees[$sels->[$sel_idx]];
            my $decoded = 50;

            while (!eof($fh)) {
                $code .= read_bit($fh, \$buffer);

                if (length($code) > $MaxHuffmanBits) {
                    die "[!] Something went wrong: length of LL code `$code` is > $MaxHuffmanBits.\n";
                }

                if (exists($tree->{$code})) {

                    my $sym = $tree->{$code};

                    if ($sym == $eob) {    # end of block marker
                        say STDERR "EOB detected: $sym";
                        last;
                    }

                    push @zrle, $sym;
                    $code = '';

                    if (--$decoded <= 0) {
                        if (++$sel_idx <= $#$sels) {
                            $tree = $huffman_trees[$sels->[$sel_idx]];
                        }
                        else {
                            die "No more selectors";    # should not happen
                        }
                        $decoded = 50;
                    }
                }
            }

            ##say STDERR "ZRLE: (@zrle)";
            my @mtf = reverse @{zrle_decode([reverse @zrle])};
            ##say STDERR "MTF: (@mtf)";

            my $bwt = symbols2string mtf_decode(\@mtf, \@alphabet);
            ## say "BWT: ($bwt, $bwt_idx)";

            my $rle4 = string2symbols bwt_decode($bwt, $bwt_idx);
            my $data = rle4_decode($rle4);
            my $dec  = symbols2string($data);

            my $new_crc32 = oct('0b' . int2bits_lsb(crc32(pack('b*', unpack('B*', $dec))), 32));

            say STDERR "Computed CRC32: $new_crc32";

            if ($crc32 != $new_crc32) {
                warn "CRC32 error: $crc32 (stored) != $new_crc32 (actual)\n";
            }

            $stream_crc32 = ($new_crc32 ^ (0xffffffff & ((0xffffffff & ($stream_crc32 << 1)) | (($stream_crc32 >> 31) & 0x1)))) & 0xffffffff;

            print $dec;
        }
        elsif ($block_magic eq "\27rE8P\x90") {    # BlockFooter
            say STDERR "Block footer detected";
            my $stored_stream_crc32 = bits2int($fh, 32, \$buffer);
            say STDERR "Stream CRC32: $stored_stream_crc32";

            if ($stream_crc32 != $stored_stream_crc32) {
                warn "Stream CRC32 error: $stored_stream_crc32 (stored) != $stream_crc32 (actual)\n";
            }

            $buffer = '';
            last;
        }
        else {
            die "Unknown block magic: $block_magic";
        }

    }

    say STDERR "End of container";
}

say STDERR "End of input";



( run in 0.722 second using v1.01-cache-2.11-cpan-df04353d9ac )