Compress-Stream-Zstd
view release on metacpan or search on metacpan
ext/zstd/doc/educational_decoder/zstd_decompress.c view on Meta::CPAN
u8 mantissa = window_descriptor & 7;
// Use the algorithm from the specification to compute window size
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
size_t window_base = (size_t)1 << (10 + exponent);
size_t window_add = (window_base / 8) * mantissa;
header->window_size = window_base + window_add;
}
// decode dictionary id if it exists
if (dictionary_id_flag) {
// "This is a variable size field, which contains the ID of the
// dictionary required to properly decode the frame. Note that this
// field is optional. When it's not present, it's up to the caller to
// make sure it uses the correct dictionary. Format is little-endian."
const int bytes_array[] = {0, 1, 2, 4};
const int bytes = bytes_array[dictionary_id_flag];
header->dictionary_id = (u32)IO_read_bits(in, bytes * 8);
} else {
header->dictionary_id = 0;
}
// decode frame content size if it exists
if (single_segment_flag || frame_content_size_flag) {
// "This is the original (uncompressed) size. This information is
// optional. The Field_Size is provided according to value of
// Frame_Content_Size_flag. The Field_Size can be equal to 0 (not
// present), 1, 2, 4 or 8 bytes. Format is little-endian."
//
// if frame_content_size_flag == 0 but single_segment_flag is set, we
// still have a 1 byte field
const int bytes_array[] = {1, 2, 4, 8};
const int bytes = bytes_array[frame_content_size_flag];
header->frame_content_size = IO_read_bits(in, bytes * 8);
if (bytes == 2) {
// "When Field_Size is 2, the offset of 256 is added."
header->frame_content_size += 256;
}
} else {
header->frame_content_size = 0;
}
if (single_segment_flag) {
// "The Window_Descriptor byte is optional. It is absent when
// Single_Segment_flag is set. In this case, the maximum back-reference
// distance is the content size itself, which can be any value from 1 to
// 2^64-1 bytes (16 EB)."
header->window_size = header->frame_content_size;
}
}
/// Decompress the data from a frame block by block
static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
istream_t *const in) {
// "A frame encapsulates one or multiple blocks. Each block can be
// compressed or not, and has a guaranteed maximum content size, which
// depends on frame parameters. Unlike frames, each block depends on
// previous blocks for proper decoding. However, each block can be
// decompressed without waiting for its successor, allowing streaming
// operations."
int last_block = 0;
do {
// "Last_Block
//
// The lowest bit signals if this block is the last one. Frame ends
// right after this block.
//
// Block_Type and Block_Size
//
// The next 2 bits represent the Block_Type, while the remaining 21 bits
// represent the Block_Size. Format is little-endian."
last_block = (int)IO_read_bits(in, 1);
const int block_type = (int)IO_read_bits(in, 2);
const size_t block_len = IO_read_bits(in, 21);
switch (block_type) {
case 0: {
// "Raw_Block - this is an uncompressed block. Block_Size is the
// number of bytes to read and copy."
const u8 *const read_ptr = IO_get_read_ptr(in, block_len);
u8 *const write_ptr = IO_get_write_ptr(out, block_len);
// Copy the raw data into the output
memcpy(write_ptr, read_ptr, block_len);
ctx->current_total_output += block_len;
break;
}
case 1: {
// "RLE_Block - this is a single byte, repeated N times. In which
// case, Block_Size is the size to regenerate, while the
// "compressed" block is just 1 byte (the byte to repeat)."
const u8 *const read_ptr = IO_get_read_ptr(in, 1);
u8 *const write_ptr = IO_get_write_ptr(out, block_len);
// Copy `block_len` copies of `read_ptr[0]` to the output
memset(write_ptr, read_ptr[0], block_len);
ctx->current_total_output += block_len;
break;
}
case 2: {
// "Compressed_Block - this is a Zstandard compressed block,
// detailed in another section of this specification. Block_Size is
// the compressed size.
// Create a sub-stream for the block
istream_t block_stream = IO_make_sub_istream(in, block_len);
decompress_block(ctx, out, &block_stream);
break;
}
case 3:
// "Reserved - this is not a block. This value cannot be used with
// current version of this specification."
CORRUPTION();
break;
default:
IMPOSSIBLE();
}
( run in 0.593 second using v1.01-cache-2.11-cpan-39bf76dae61 )