Compression-Util

 view release on metacpan or  search on metacpan

examples/bzip2_compressor.pl  view on Meta::CPAN

use List::Util        qw(max);
use Compression::Util qw(:all);

use constant {CHUNK_SIZE => 1 << 16};

local $| = 1;

binmode(STDIN,  ":raw");
binmode(STDOUT, ":raw");

sub encode_mtf_alphabet($alphabet) {
    my %table;
    @table{@$alphabet} = ();

    my $populated = 0;
    my @marked;

    for (my $i = 0 ; $i <= 255 ; $i += 16) {

        my $enc = 0;
        foreach my $j (0 .. 15) {

examples/bzip2_compressor.pl  view on Meta::CPAN

            push @marked, $enc;
        }
    }

    say STDERR sprintf("Populated: %016b", $populated);
    say STDERR "Marked: (@marked)";

    return ($populated, \@marked);
}

sub encode_code_lengths($dict) {
    my @lengths;

    foreach my $symbol (0 .. max(keys %$dict) // 0) {
        if (exists($dict->{$symbol})) {
            push @lengths, length($dict->{$symbol});
        }
        else {
            die "Incomplete Huffman tree not supported";
            push @lengths, 0;
        }

examples/gzip_decompressor.pl  view on Meta::CPAN

    else {
        print STDERR ":: Chunk length: $len\n";
    }

    read($in_fh, (my $chunk), $len);
    return $chunk;
}

my ($DISTANCE_SYMBOLS, $LENGTH_SYMBOLS) = make_deflate_tables();

sub decode_huffman($in_fh, $buffer, $rev_dict, $dist_rev_dict, $search_window) {

    my $data = '';
    my $code = '';

    my $max_ll_code_len   = max(map { length($_) } keys %$rev_dict);
    my $max_dist_code_len = max(map { length($_) } keys %$dist_rev_dict);

    while (1) {
        $code .= read_bit_lsb($in_fh, $buffer);

examples/gzip_decompressor.pl  view on Meta::CPAN

            $code_lengths[$i] = 8;
        }

        (undef, $rev_dict)      = huffman_from_code_lengths(\@code_lengths);
        (undef, $dist_rev_dict) = huffman_from_code_lengths([(5) x 32]);
    }

    decode_huffman($in_fh, $buffer, $rev_dict, $dist_rev_dict, $search_window);
}

sub decode_CL_lengths($in_fh, $buffer, $CL_rev_dict, $size) {

    my @lengths;
    my $code = '';

    while (1) {
        $code .= read_bit_lsb($in_fh, $buffer);

        if (length($code) > 7) {
            die "[!] Something went wrong: length of CL code `$code` is > 7.\n";
        }

examples/gzip_file_compression.pl  view on Meta::CPAN

            $bitstring .= $dist_dict->{$dist_idx - 1};
            $bitstring .= int2bits_lsb($dist - $min, $bits) if ($bits > 0);
        }
    }

    $bitstring .= $dict->{256};    # end-of-block symbol

    return $bitstring;
}

sub block_type_0($chunk) {

    my $chunk_len = length($chunk);
    my $len       = int2bits_lsb($chunk_len,             16);
    my $nlen      = int2bits_lsb((~$chunk_len) & 0xffff, 16);

    $len . $nlen;
}

sub my_gzip_compress ($in_fh, $out_fh) {

examples/gzip_file_compression.pl  view on Meta::CPAN

        die "[!] The ~length value is not correct: $nlen (actual) != $expected_nlen (expected)\n";
    }
    else {
        print STDERR ":: Chunk length: $len\n";
    }

    read($in_fh, (my $chunk), $len);
    return $chunk;
}

sub decode_huffman($in_fh, $buffer, $rev_dict, $dist_rev_dict, $search_window) {

    my $data = '';
    my $code = '';

    my $max_ll_code_len   = max(map { length($_) } keys %$rev_dict);
    my $max_dist_code_len = max(map { length($_) } keys %$dist_rev_dict);

    while (1) {
        $code .= read_bit_lsb($in_fh, $buffer);

examples/gzip_file_compression.pl  view on Meta::CPAN

            $code_lengths[$i] = 8;
        }

        (undef, $rev_dict)      = huffman_from_code_lengths(\@code_lengths);
        (undef, $dist_rev_dict) = huffman_from_code_lengths([(5) x 32]);
    }

    decode_huffman($in_fh, $buffer, $rev_dict, $dist_rev_dict, $search_window);
}

sub decode_CL_lengths($in_fh, $buffer, $CL_rev_dict, $size) {

    my @lengths;
    my $code = '';

    while (1) {
        $code .= read_bit_lsb($in_fh, $buffer);

        if (length($code) > 7) {
            die "[!] Something went wrong: length of CL code `$code` is > 7.\n";
        }

lib/Compression/Util.pm  view on Meta::CPAN

}

sub int2bytes ($value, $size) {
    pack('B*', sprintf("%0*b", 8 * $size, $value));
}

sub int2bytes_lsb ($value, $size) {
    pack('b*', scalar reverse sprintf("%0*b", 8 * $size, $value));
}

sub bytes2int($fh, $n) {

    if (ref($fh) eq '') {
        open(my $fh2, '<:raw', \$fh) or confess "error: $!";
        return __SUB__->($fh2, $n);
    }

    my $bytes = '';
    $bytes .= getc($fh) for (1 .. $n);
    oct('0b' . unpack('B*', $bytes));
}

lib/Compression/Util.pm  view on Meta::CPAN

    $VERBOSE && say STDERR "Alphabet size: ", scalar(@$alphabet);

    my $rle4    = $entropy_sub->($fh);
    my $rle     = rle4_decode($rle4);
    my $mtf     = zrle_decode($rle);
    my $symbols = mtf_decode($mtf, $alphabet);

    return $symbols;
}

sub mrl_decompress($fh, $entropy_sub = \&decode_huffman_entry) {
    symbols2string(mrl_decompress_symbolic($fh, $entropy_sub));
}

############################################################
# BWT-based compression (BWT + MTF + ZRLE + Huffman coding)
############################################################

sub bwt_compress ($chunk, $entropy_sub = \&create_huffman_entry) {

    if (ref($chunk) ne '') {

lib/Compression/Util.pm  view on Meta::CPAN

        }
    }

    return \@ints;
}

###################
# LZSS SYMBOLIC
###################

sub lzss_encode_symbolic($symbols, %params) {

    if (ref($symbols) eq '') {
        return lzss_encode($symbols, %params);
    }

    my $min_len       = $params{min_len}       // $LZ_MIN_LEN;
    my $max_len       = $params{max_len}       // $LZ_MAX_LEN;
    my $max_dist      = $params{max_dist}      // $LZ_MAX_DIST;
    my $max_chain_len = $params{max_chain_len} // $LZ_MAX_CHAIN_LEN;

lib/Compression/Util.pm  view on Meta::CPAN

            push @distances, (0) x $best_n;
            push @literals, @{$symbols}[$la .. $la + $best_n - 1];

            $la += $best_n;
        }
    }

    return (\@literals, \@distances, \@lengths);
}

sub lzss_encode_fast($str, %params) {

    if (ref($str) ne '') {
        return lzss_encode_fast_symbolic($str, %params);
    }

    my @symbols = unpack('C*', $str);

    my $la  = 0;
    my $end = $#symbols;

lib/Compression/Util.pm  view on Meta::CPAN

        }
    }

    return (\@literals, \@distances, \@lengths);
}

################################
# LZ77 encoding, inspired by LZ4
################################

sub lz77_encode($chunk, $lzss_encoding_sub = \&lzss_encode) {

    local $LZ_MAX_LEN = ~0;    # maximum match length

    my ($literals, $distances, $lengths) = $lzss_encoding_sub->($chunk);

    my $literals_end = $#{$literals};
    my (@symbols, @len_symbols, @match_symbols, @dist_symbols);

    for (my $i = 0 ; $i <= $literals_end ; ++$i) {

lib/Compression/Util.pm  view on Meta::CPAN

        }

        push @dist_symbols, $distances->[$i] // 0;
    }

    return (\@symbols, \@dist_symbols, \@len_symbols, \@match_symbols);
}

*lz77_encode_symbolic = \&lz77_encode;

sub lz77_decode($symbols, $dist_symbols, $len_symbols, $match_symbols) {

    my $data     = '';
    my $data_len = 0;

    my @symbols       = @$symbols;
    my @len_symbols   = @$len_symbols;
    my @match_symbols = @$match_symbols;
    my @dist_symbols  = @$dist_symbols;

    while (@symbols) {

lib/Compression/Util.pm  view on Meta::CPAN

                $data .= substr($data, $data_len + $i - $dist - 1, 1) // confess "bad input";
            }
        }

        $data_len += $match_len;
    }

    return $data;
}

sub lz77_decode_symbolic($symbols, $dist_symbols, $len_symbols, $match_symbols) {

    my @data;
    my $data_len = 0;

    my @symbols       = @$symbols;
    my @len_symbols   = @$len_symbols;
    my @match_symbols = @$match_symbols;
    my @dist_symbols  = @$dist_symbols;

    while (@symbols) {

lib/Compression/Util.pm  view on Meta::CPAN

                push @data, $data[$data_len + $j - $dist - 1];
            }
        }

        $data_len += $match_len;
    }

    return \@data;
}

sub lz77_compress($chunk, $entropy_sub = \&create_huffman_entry, $lzss_encoding_sub = \&lzss_encode) {
    my ($symbols, $dist_symbols, $len_symbols, $match_symbols) = lz77_encode($chunk, $lzss_encoding_sub);
    $entropy_sub->($symbols) . $entropy_sub->($len_symbols) . $entropy_sub->($match_symbols) . obh_encode($dist_symbols, $entropy_sub);
}

*lz77_compress_symbolic = \&lz77_compress;

sub lz77_decompress($fh, $entropy_sub = \&decode_huffman_entry) {

    if (ref($fh) eq '') {
        open(my $fh2, '<:raw', \$fh) or confess "error: $!";
        return __SUB__->($fh2, $entropy_sub);
    }

    my $symbols       = $entropy_sub->($fh);
    my $len_symbols   = $entropy_sub->($fh);
    my $match_symbols = $entropy_sub->($fh);
    my $dist_symbols  = obh_decode($fh, $entropy_sub);

    lz77_decode($symbols, $dist_symbols, $len_symbols, $match_symbols);
}

sub lz77_decompress_symbolic($fh, $entropy_sub = \&decode_huffman_entry) {

    if (ref($fh) eq '') {
        open(my $fh2, '<:raw', \$fh) or confess "error: $!";
        return __SUB__->($fh2, $entropy_sub);
    }

    my $symbols       = $entropy_sub->($fh);
    my $len_symbols   = $entropy_sub->($fh);
    my $match_symbols = $entropy_sub->($fh);
    my $dist_symbols  = obh_decode($fh, $entropy_sub);

    lz77_decode_symbolic($symbols, $dist_symbols, $len_symbols, $match_symbols);
}

#########################
# LZSS + DEFLATE encoding
#########################

sub lzss_compress($chunk, $entropy_sub = \&create_huffman_entry, $lzss_encoding_sub = \&lzss_encode) {
    my ($literals, $distances, $lengths) = $lzss_encoding_sub->($chunk);
    deflate_encode($literals, $distances, $lengths, $entropy_sub);
}

*lzss_compress_symbolic = \&lzss_compress;

sub lzss_decompress($fh, $entropy_sub = \&decode_huffman_entry) {

    if (ref($fh) eq '') {
        open(my $fh2, '<:raw', \$fh) or confess "error: $!";
        return __SUB__->($fh2, $entropy_sub);
    }

    my ($literals, $distances, $lengths) = deflate_decode($fh, $entropy_sub);
    lzss_decode($literals, $distances, $lengths);
}

sub lzss_decompress_symbolic($fh, $entropy_sub = \&decode_huffman_entry) {

    if (ref($fh) eq '') {
        open(my $fh2, '<:raw', \$fh) or confess "error: $!";
        return __SUB__->($fh2, $entropy_sub);
    }

    my ($literals, $distances, $lengths) = deflate_decode($fh, $entropy_sub);
    lzss_decode_symbolic($literals, $distances, $lengths);
}

lib/Compression/Util.pm  view on Meta::CPAN

            $data .= $match_len >= 255 ? "\xff" : chr($match_len);
            $match_len -= 255;
        }

        $data .= pack('B*', sprintf('%016b', $distances->[$i] // 0));
    }

    return fibonacci_encode([length $data]) . $data;
}

sub lzb_decompress($fh) {

    if (ref($fh) eq '') {
        open(my $fh2, '<:raw', \$fh) or confess "error: $!";
        return __SUB__->($fh2);
    }

    my $data               = '';
    my $search_window      = '';
    my $search_window_size = 1 << 16;

lib/Compression/Util.pm  view on Meta::CPAN

            }
            else {
                $k >>= 1;
            }
        }
        push(@table, $k & 0xffffffff);
    }
    return \@table;
}

sub crc32($str, $crc = 0) {
    state $crc_table = _create_crc32_table();
    $crc &= 0xffffffff;
    $crc ^= 0xffffffff;
    foreach my $c (unpack("C*", $str)) {
        $crc = (($crc >> 8) ^ $crc_table->[($crc & 0xff) ^ $c]);
    }
    return (($crc & 0xffffffff) ^ 0xffffffff);
}

sub adler32($str, $adler = 1) {

    # Reference:
    #   https://datatracker.ietf.org/doc/html/rfc1950#section-9

    my $s1 = $adler & 0xffff;
    my $s2 = ($adler >> 16) & 0xffff;

    foreach my $c (unpack('C*', $str)) {
        $s1 = ($s1 + $c) % 65521;
        $s2 = ($s2 + $s1) % 65521;
    }
    return (($s2 << 16) + $s1);
}

#############################
# Bzip2 compression
#############################

sub _bzip2_encode_code_lengths($dict) {
    my @lengths;

    foreach my $symbol (0 .. max(keys %$dict) // 0) {
        if (exists($dict->{$symbol})) {
            push @lengths, length($dict->{$symbol});
        }
        else {
            confess "Incomplete Huffman tree not supported";
            push @lengths, 0;
        }

lib/Compression/Util.pm  view on Meta::CPAN


    foreach my $d (@$deltas) {
        $bitstring .= (($d > 0) ? ('10' x $d) : ('11' x abs($d))) . '0';
    }

    $VERBOSE && say STDERR "Deltas bitstring: $bitstring";

    return $bitstring;
}

sub bzip2_compress($fh) {

    if (ref($fh) eq '') {
        open(my $fh2, '<:raw', \$fh) or confess "error: $!";
        return __SUB__->($fh2);
    }

    my $level = 9;

    # There is a CRC32 issue on some non-compressible inputs, when using very large chunk sizes
    ## my $CHUNK_SIZE = 100_000 * $level;

lib/Compression/Util.pm  view on Meta::CPAN

    $bitstring  .= int2bits($stream_crc32, 32);
    $compressed .= pack('B*', $bitstring);

    return $compressed;
}

#################################
# Bzip2 decompression
#################################

sub bzip2_decompress($fh) {

    if (ref($fh) eq '') {
        open(my $fh2, '<:raw', \$fh) or confess "error: $!";
        return __SUB__->($fh2);
    }

    state $MaxHuffmanBits = 20;
    my $decompressed = '';

    while (!eof($fh)) {

lib/Compression/Util.pm  view on Meta::CPAN

            $bitstring .= $dist_dict->{$dist_idx - 1};
            $bitstring .= int2bits_lsb($dist - $min, $bits) if ($bits > 0);
        }
    }

    $bitstring .= $dict->{256};    # end-of-block symbol

    return $bitstring;
}

sub deflate_create_block_type_0_header($chunk) {

    my $chunk_len = length($chunk);
    my $len       = int2bits_lsb($chunk_len,             16);
    my $nlen      = int2bits_lsb((~$chunk_len) & 0xffff, 16);

    $len . $nlen;
}

sub gzip_compress ($in_fh, $lzss_encoding_sub = \&lzss_encode) {

lib/Compression/Util.pm  view on Meta::CPAN


    read($in_fh, (my $chunk), $len) // confess "Read error: $!";
    $$search_window .= $chunk;

    $$search_window = substr($$search_window, -$LZ_MAX_DIST)
      if (length($$search_window) > 2 * $LZ_MAX_DIST);

    return $chunk;
}

sub _deflate_decode_huffman($in_fh, $buffer, $rev_dict, $dist_rev_dict, $search_window) {

    state $deflate_tables = [make_deflate_tables()];
    my ($DISTANCE_SYMBOLS, $LENGTH_SYMBOLS, $LENGTH_INDICES) = @$deflate_tables;

    my $data = '';
    my $code = '';

    my $max_ll_code_len   = max(map { length($_) } keys %$rev_dict);
    my $max_dist_code_len = max(map { length($_) } keys %$dist_rev_dict);

lib/Compression/Util.pm  view on Meta::CPAN

            $code_lengths[$i] = 8;
        }

        (undef, $rev_dict)      = huffman_from_code_lengths(\@code_lengths);
        (undef, $dist_rev_dict) = huffman_from_code_lengths([(5) x 32]);
    }

    _deflate_decode_huffman($in_fh, $buffer, $rev_dict, $dist_rev_dict, $search_window);
}

sub _decode_CL_lengths($in_fh, $buffer, $CL_rev_dict, $size) {

    my @lengths;
    my $code = '';

    while (1) {
        $code .= read_bit_lsb($in_fh, $buffer);

        if (length($code) > 7) {
            confess "[!] Something went wrong: length of CL code `$code` is > 7.";
        }

lib/Compression/Util.pm  view on Meta::CPAN


    print $out_fh int2bytes($adler32, 4);

    return $compressed;
}

###############################
# ZLIB decompressor
###############################

sub zlib_decompress($in_fh) {

    if (ref($in_fh) eq '') {
        open(my $fh2, '<:raw', \$in_fh) or confess "error: $!";
        return __SUB__->($fh2);
    }

    my $decompressed = '';

    open my $out_fh, '>:raw', \$decompressed;

lib/Compression/Util.pm  view on Meta::CPAN

        return ($decompressed . __SUB__->($in_fh));
    }

    return $decompressed;
}

###############################
# LZ4 compressor
###############################

sub lz4_compress($fh, $lzss_encoding_sub = \&lzss_encode) {

    if (ref($fh) eq '') {
        open(my $fh2, '<:raw', \$fh) or confess "error: $!";
        return __SUB__->($fh2, $lzss_encoding_sub);
    }

    my $compressed = '';

    $compressed .= int2bytes_lsb(0x184D2204, 4);    # LZ4 magic number

lib/Compression/Util.pm  view on Meta::CPAN

    }

    $compressed .= int2bytes_lsb(0x00000000, 4);    # EndMark
    return $compressed;
}

###############################
# LZ4 decompressor
###############################

sub lz4_decompress($fh) {

    if (ref($fh) eq '') {
        open(my $fh2, '<:raw', \$fh) or confess "error: $!";
        return __SUB__->($fh2);
    }

    my $decompressed = '';

    while (!eof($fh)) {



( run in 1.043 second using v1.01-cache-2.11-cpan-65fba6d93b7 )