view release on metacpan or search on metacpan
examples/bzip2_compressor.pl view on Meta::CPAN
use List::Util qw(max);
use Compression::Util qw(:all);
use constant {CHUNK_SIZE => 1 << 16};
local $| = 1;
binmode(STDIN, ":raw");
binmode(STDOUT, ":raw");
sub encode_mtf_alphabet($alphabet) {
my %table;
@table{@$alphabet} = ();
my $populated = 0;
my @marked;
for (my $i = 0 ; $i <= 255 ; $i += 16) {
my $enc = 0;
foreach my $j (0 .. 15) {
examples/bzip2_compressor.pl view on Meta::CPAN
push @marked, $enc;
}
}
say STDERR sprintf("Populated: %016b", $populated);
say STDERR "Marked: (@marked)";
return ($populated, \@marked);
}
sub encode_code_lengths($dict) {
my @lengths;
foreach my $symbol (0 .. max(keys %$dict) // 0) {
if (exists($dict->{$symbol})) {
push @lengths, length($dict->{$symbol});
}
else {
die "Incomplete Huffman tree not supported";
push @lengths, 0;
}
examples/gzip_decompressor.pl view on Meta::CPAN
else {
print STDERR ":: Chunk length: $len\n";
}
read($in_fh, (my $chunk), $len);
return $chunk;
}
my ($DISTANCE_SYMBOLS, $LENGTH_SYMBOLS) = make_deflate_tables();
sub decode_huffman($in_fh, $buffer, $rev_dict, $dist_rev_dict, $search_window) {
my $data = '';
my $code = '';
my $max_ll_code_len = max(map { length($_) } keys %$rev_dict);
my $max_dist_code_len = max(map { length($_) } keys %$dist_rev_dict);
while (1) {
$code .= read_bit_lsb($in_fh, $buffer);
examples/gzip_decompressor.pl view on Meta::CPAN
$code_lengths[$i] = 8;
}
(undef, $rev_dict) = huffman_from_code_lengths(\@code_lengths);
(undef, $dist_rev_dict) = huffman_from_code_lengths([(5) x 32]);
}
decode_huffman($in_fh, $buffer, $rev_dict, $dist_rev_dict, $search_window);
}
sub decode_CL_lengths($in_fh, $buffer, $CL_rev_dict, $size) {
my @lengths;
my $code = '';
while (1) {
$code .= read_bit_lsb($in_fh, $buffer);
if (length($code) > 7) {
die "[!] Something went wrong: length of CL code `$code` is > 7.\n";
}
examples/gzip_file_compression.pl view on Meta::CPAN
$bitstring .= $dist_dict->{$dist_idx - 1};
$bitstring .= int2bits_lsb($dist - $min, $bits) if ($bits > 0);
}
}
$bitstring .= $dict->{256}; # end-of-block symbol
return $bitstring;
}
sub block_type_0($chunk) {
my $chunk_len = length($chunk);
my $len = int2bits_lsb($chunk_len, 16);
my $nlen = int2bits_lsb((~$chunk_len) & 0xffff, 16);
$len . $nlen;
}
sub my_gzip_compress ($in_fh, $out_fh) {
examples/gzip_file_compression.pl view on Meta::CPAN
die "[!] The ~length value is not correct: $nlen (actual) != $expected_nlen (expected)\n";
}
else {
print STDERR ":: Chunk length: $len\n";
}
read($in_fh, (my $chunk), $len);
return $chunk;
}
sub decode_huffman($in_fh, $buffer, $rev_dict, $dist_rev_dict, $search_window) {
my $data = '';
my $code = '';
my $max_ll_code_len = max(map { length($_) } keys %$rev_dict);
my $max_dist_code_len = max(map { length($_) } keys %$dist_rev_dict);
while (1) {
$code .= read_bit_lsb($in_fh, $buffer);
examples/gzip_file_compression.pl view on Meta::CPAN
$code_lengths[$i] = 8;
}
(undef, $rev_dict) = huffman_from_code_lengths(\@code_lengths);
(undef, $dist_rev_dict) = huffman_from_code_lengths([(5) x 32]);
}
decode_huffman($in_fh, $buffer, $rev_dict, $dist_rev_dict, $search_window);
}
sub decode_CL_lengths($in_fh, $buffer, $CL_rev_dict, $size) {
my @lengths;
my $code = '';
while (1) {
$code .= read_bit_lsb($in_fh, $buffer);
if (length($code) > 7) {
die "[!] Something went wrong: length of CL code `$code` is > 7.\n";
}
lib/Compression/Util.pm view on Meta::CPAN
}
sub int2bytes ($value, $size) {
pack('B*', sprintf("%0*b", 8 * $size, $value));
}
sub int2bytes_lsb ($value, $size) {
pack('b*', scalar reverse sprintf("%0*b", 8 * $size, $value));
}
sub bytes2int($fh, $n) {
if (ref($fh) eq '') {
open(my $fh2, '<:raw', \$fh) or confess "error: $!";
return __SUB__->($fh2, $n);
}
my $bytes = '';
$bytes .= getc($fh) for (1 .. $n);
oct('0b' . unpack('B*', $bytes));
}
lib/Compression/Util.pm view on Meta::CPAN
$VERBOSE && say STDERR "Alphabet size: ", scalar(@$alphabet);
my $rle4 = $entropy_sub->($fh);
my $rle = rle4_decode($rle4);
my $mtf = zrle_decode($rle);
my $symbols = mtf_decode($mtf, $alphabet);
return $symbols;
}
sub mrl_decompress($fh, $entropy_sub = \&decode_huffman_entry) {
symbols2string(mrl_decompress_symbolic($fh, $entropy_sub));
}
############################################################
# BWT-based compression (BWT + MTF + ZRLE + Huffman coding)
############################################################
sub bwt_compress ($chunk, $entropy_sub = \&create_huffman_entry) {
if (ref($chunk) ne '') {
lib/Compression/Util.pm view on Meta::CPAN
}
}
return \@ints;
}
###################
# LZSS SYMBOLIC
###################
sub lzss_encode_symbolic($symbols, %params) {
if (ref($symbols) eq '') {
return lzss_encode($symbols, %params);
}
my $min_len = $params{min_len} // $LZ_MIN_LEN;
my $max_len = $params{max_len} // $LZ_MAX_LEN;
my $max_dist = $params{max_dist} // $LZ_MAX_DIST;
my $max_chain_len = $params{max_chain_len} // $LZ_MAX_CHAIN_LEN;
lib/Compression/Util.pm view on Meta::CPAN
push @distances, (0) x $best_n;
push @literals, @{$symbols}[$la .. $la + $best_n - 1];
$la += $best_n;
}
}
return (\@literals, \@distances, \@lengths);
}
sub lzss_encode_fast($str, %params) {
if (ref($str) ne '') {
return lzss_encode_fast_symbolic($str, %params);
}
my @symbols = unpack('C*', $str);
my $la = 0;
my $end = $#symbols;
lib/Compression/Util.pm view on Meta::CPAN
}
}
return (\@literals, \@distances, \@lengths);
}
################################
# LZ77 encoding, inspired by LZ4
################################
sub lz77_encode($chunk, $lzss_encoding_sub = \&lzss_encode) {
local $LZ_MAX_LEN = ~0; # maximum match length
my ($literals, $distances, $lengths) = $lzss_encoding_sub->($chunk);
my $literals_end = $#{$literals};
my (@symbols, @len_symbols, @match_symbols, @dist_symbols);
for (my $i = 0 ; $i <= $literals_end ; ++$i) {
lib/Compression/Util.pm view on Meta::CPAN
}
push @dist_symbols, $distances->[$i] // 0;
}
return (\@symbols, \@dist_symbols, \@len_symbols, \@match_symbols);
}
*lz77_encode_symbolic = \&lz77_encode;
sub lz77_decode($symbols, $dist_symbols, $len_symbols, $match_symbols) {
my $data = '';
my $data_len = 0;
my @symbols = @$symbols;
my @len_symbols = @$len_symbols;
my @match_symbols = @$match_symbols;
my @dist_symbols = @$dist_symbols;
while (@symbols) {
lib/Compression/Util.pm view on Meta::CPAN
$data .= substr($data, $data_len + $i - $dist - 1, 1) // confess "bad input";
}
}
$data_len += $match_len;
}
return $data;
}
sub lz77_decode_symbolic($symbols, $dist_symbols, $len_symbols, $match_symbols) {
my @data;
my $data_len = 0;
my @symbols = @$symbols;
my @len_symbols = @$len_symbols;
my @match_symbols = @$match_symbols;
my @dist_symbols = @$dist_symbols;
while (@symbols) {
lib/Compression/Util.pm view on Meta::CPAN
push @data, $data[$data_len + $j - $dist - 1];
}
}
$data_len += $match_len;
}
return \@data;
}
sub lz77_compress($chunk, $entropy_sub = \&create_huffman_entry, $lzss_encoding_sub = \&lzss_encode) {
my ($symbols, $dist_symbols, $len_symbols, $match_symbols) = lz77_encode($chunk, $lzss_encoding_sub);
$entropy_sub->($symbols) . $entropy_sub->($len_symbols) . $entropy_sub->($match_symbols) . obh_encode($dist_symbols, $entropy_sub);
}
*lz77_compress_symbolic = \&lz77_compress;
sub lz77_decompress($fh, $entropy_sub = \&decode_huffman_entry) {
if (ref($fh) eq '') {
open(my $fh2, '<:raw', \$fh) or confess "error: $!";
return __SUB__->($fh2, $entropy_sub);
}
my $symbols = $entropy_sub->($fh);
my $len_symbols = $entropy_sub->($fh);
my $match_symbols = $entropy_sub->($fh);
my $dist_symbols = obh_decode($fh, $entropy_sub);
lz77_decode($symbols, $dist_symbols, $len_symbols, $match_symbols);
}
sub lz77_decompress_symbolic($fh, $entropy_sub = \&decode_huffman_entry) {
if (ref($fh) eq '') {
open(my $fh2, '<:raw', \$fh) or confess "error: $!";
return __SUB__->($fh2, $entropy_sub);
}
my $symbols = $entropy_sub->($fh);
my $len_symbols = $entropy_sub->($fh);
my $match_symbols = $entropy_sub->($fh);
my $dist_symbols = obh_decode($fh, $entropy_sub);
lz77_decode_symbolic($symbols, $dist_symbols, $len_symbols, $match_symbols);
}
#########################
# LZSS + DEFLATE encoding
#########################
sub lzss_compress($chunk, $entropy_sub = \&create_huffman_entry, $lzss_encoding_sub = \&lzss_encode) {
my ($literals, $distances, $lengths) = $lzss_encoding_sub->($chunk);
deflate_encode($literals, $distances, $lengths, $entropy_sub);
}
*lzss_compress_symbolic = \&lzss_compress;
sub lzss_decompress($fh, $entropy_sub = \&decode_huffman_entry) {
if (ref($fh) eq '') {
open(my $fh2, '<:raw', \$fh) or confess "error: $!";
return __SUB__->($fh2, $entropy_sub);
}
my ($literals, $distances, $lengths) = deflate_decode($fh, $entropy_sub);
lzss_decode($literals, $distances, $lengths);
}
sub lzss_decompress_symbolic($fh, $entropy_sub = \&decode_huffman_entry) {
if (ref($fh) eq '') {
open(my $fh2, '<:raw', \$fh) or confess "error: $!";
return __SUB__->($fh2, $entropy_sub);
}
my ($literals, $distances, $lengths) = deflate_decode($fh, $entropy_sub);
lzss_decode_symbolic($literals, $distances, $lengths);
}
lib/Compression/Util.pm view on Meta::CPAN
$data .= $match_len >= 255 ? "\xff" : chr($match_len);
$match_len -= 255;
}
$data .= pack('B*', sprintf('%016b', $distances->[$i] // 0));
}
return fibonacci_encode([length $data]) . $data;
}
sub lzb_decompress($fh) {
if (ref($fh) eq '') {
open(my $fh2, '<:raw', \$fh) or confess "error: $!";
return __SUB__->($fh2);
}
my $data = '';
my $search_window = '';
my $search_window_size = 1 << 16;
lib/Compression/Util.pm view on Meta::CPAN
}
else {
$k >>= 1;
}
}
push(@table, $k & 0xffffffff);
}
return \@table;
}
sub crc32($str, $crc = 0) {
state $crc_table = _create_crc32_table();
$crc &= 0xffffffff;
$crc ^= 0xffffffff;
foreach my $c (unpack("C*", $str)) {
$crc = (($crc >> 8) ^ $crc_table->[($crc & 0xff) ^ $c]);
}
return (($crc & 0xffffffff) ^ 0xffffffff);
}
sub adler32($str, $adler = 1) {
# Reference:
# https://datatracker.ietf.org/doc/html/rfc1950#section-9
my $s1 = $adler & 0xffff;
my $s2 = ($adler >> 16) & 0xffff;
foreach my $c (unpack('C*', $str)) {
$s1 = ($s1 + $c) % 65521;
$s2 = ($s2 + $s1) % 65521;
}
return (($s2 << 16) + $s1);
}
#############################
# Bzip2 compression
#############################
sub _bzip2_encode_code_lengths($dict) {
my @lengths;
foreach my $symbol (0 .. max(keys %$dict) // 0) {
if (exists($dict->{$symbol})) {
push @lengths, length($dict->{$symbol});
}
else {
confess "Incomplete Huffman tree not supported";
push @lengths, 0;
}
lib/Compression/Util.pm view on Meta::CPAN
foreach my $d (@$deltas) {
$bitstring .= (($d > 0) ? ('10' x $d) : ('11' x abs($d))) . '0';
}
$VERBOSE && say STDERR "Deltas bitstring: $bitstring";
return $bitstring;
}
sub bzip2_compress($fh) {
if (ref($fh) eq '') {
open(my $fh2, '<:raw', \$fh) or confess "error: $!";
return __SUB__->($fh2);
}
my $level = 9;
# There is a CRC32 issue on some non-compressible inputs, when using very large chunk sizes
## my $CHUNK_SIZE = 100_000 * $level;
lib/Compression/Util.pm view on Meta::CPAN
$bitstring .= int2bits($stream_crc32, 32);
$compressed .= pack('B*', $bitstring);
return $compressed;
}
#################################
# Bzip2 decompression
#################################
sub bzip2_decompress($fh) {
if (ref($fh) eq '') {
open(my $fh2, '<:raw', \$fh) or confess "error: $!";
return __SUB__->($fh2);
}
state $MaxHuffmanBits = 20;
my $decompressed = '';
while (!eof($fh)) {
lib/Compression/Util.pm view on Meta::CPAN
$bitstring .= $dist_dict->{$dist_idx - 1};
$bitstring .= int2bits_lsb($dist - $min, $bits) if ($bits > 0);
}
}
$bitstring .= $dict->{256}; # end-of-block symbol
return $bitstring;
}
sub deflate_create_block_type_0_header($chunk) {
my $chunk_len = length($chunk);
my $len = int2bits_lsb($chunk_len, 16);
my $nlen = int2bits_lsb((~$chunk_len) & 0xffff, 16);
$len . $nlen;
}
sub gzip_compress ($in_fh, $lzss_encoding_sub = \&lzss_encode) {
lib/Compression/Util.pm view on Meta::CPAN
read($in_fh, (my $chunk), $len) // confess "Read error: $!";
$$search_window .= $chunk;
$$search_window = substr($$search_window, -$LZ_MAX_DIST)
if (length($$search_window) > 2 * $LZ_MAX_DIST);
return $chunk;
}
sub _deflate_decode_huffman($in_fh, $buffer, $rev_dict, $dist_rev_dict, $search_window) {
state $deflate_tables = [make_deflate_tables()];
my ($DISTANCE_SYMBOLS, $LENGTH_SYMBOLS, $LENGTH_INDICES) = @$deflate_tables;
my $data = '';
my $code = '';
my $max_ll_code_len = max(map { length($_) } keys %$rev_dict);
my $max_dist_code_len = max(map { length($_) } keys %$dist_rev_dict);
lib/Compression/Util.pm view on Meta::CPAN
$code_lengths[$i] = 8;
}
(undef, $rev_dict) = huffman_from_code_lengths(\@code_lengths);
(undef, $dist_rev_dict) = huffman_from_code_lengths([(5) x 32]);
}
_deflate_decode_huffman($in_fh, $buffer, $rev_dict, $dist_rev_dict, $search_window);
}
sub _decode_CL_lengths($in_fh, $buffer, $CL_rev_dict, $size) {
my @lengths;
my $code = '';
while (1) {
$code .= read_bit_lsb($in_fh, $buffer);
if (length($code) > 7) {
confess "[!] Something went wrong: length of CL code `$code` is > 7.";
}
lib/Compression/Util.pm view on Meta::CPAN
print $out_fh int2bytes($adler32, 4);
return $compressed;
}
###############################
# ZLIB decompressor
###############################
sub zlib_decompress($in_fh) {
if (ref($in_fh) eq '') {
open(my $fh2, '<:raw', \$in_fh) or confess "error: $!";
return __SUB__->($fh2);
}
my $decompressed = '';
open my $out_fh, '>:raw', \$decompressed;
lib/Compression/Util.pm view on Meta::CPAN
return ($decompressed . __SUB__->($in_fh));
}
return $decompressed;
}
###############################
# LZ4 compressor
###############################
sub lz4_compress($fh, $lzss_encoding_sub = \&lzss_encode) {
if (ref($fh) eq '') {
open(my $fh2, '<:raw', \$fh) or confess "error: $!";
return __SUB__->($fh2, $lzss_encoding_sub);
}
my $compressed = '';
$compressed .= int2bytes_lsb(0x184D2204, 4); # LZ4 magic number
lib/Compression/Util.pm view on Meta::CPAN
}
$compressed .= int2bytes_lsb(0x00000000, 4); # EndMark
return $compressed;
}
###############################
# LZ4 decompressor
###############################
sub lz4_decompress($fh) {
if (ref($fh) eq '') {
open(my $fh2, '<:raw', \$fh) or confess "error: $!";
return __SUB__->($fh2);
}
my $decompressed = '';
while (!eof($fh)) {