Compress-Stream-Zstd

 view release on metacpan or  search on metacpan

ext/zstd/doc/educational_decoder/zstd_decompress.c  view on Meta::CPAN

        u8 mantissa = window_descriptor & 7;

        // Use the algorithm from the specification to compute window size
        // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
        size_t window_base = (size_t)1 << (10 + exponent);
        size_t window_add = (window_base / 8) * mantissa;
        header->window_size = window_base + window_add;
    }

    // decode dictionary id if it exists
    if (dictionary_id_flag) {
        // "This is a variable size field, which contains the ID of the
        // dictionary required to properly decode the frame. Note that this
        // field is optional. When it's not present, it's up to the caller to
        // make sure it uses the correct dictionary. Format is little-endian."
        const int bytes_array[] = {0, 1, 2, 4};
        const int bytes = bytes_array[dictionary_id_flag];

        header->dictionary_id = (u32)IO_read_bits(in, bytes * 8);
    } else {
        header->dictionary_id = 0;
    }

    // decode frame content size if it exists
    if (single_segment_flag || frame_content_size_flag) {
        // "This is the original (uncompressed) size. This information is
        // optional. The Field_Size is provided according to value of
        // Frame_Content_Size_flag. The Field_Size can be equal to 0 (not
        // present), 1, 2, 4 or 8 bytes. Format is little-endian."
        //
        // if frame_content_size_flag == 0 but single_segment_flag is set, we
        // still have a 1 byte field
        const int bytes_array[] = {1, 2, 4, 8};
        const int bytes = bytes_array[frame_content_size_flag];

        header->frame_content_size = IO_read_bits(in, bytes * 8);
        if (bytes == 2) {
            // "When Field_Size is 2, the offset of 256 is added."
            header->frame_content_size += 256;
        }
    } else {
        header->frame_content_size = 0;
    }

    if (single_segment_flag) {
        // "The Window_Descriptor byte is optional. It is absent when
        // Single_Segment_flag is set. In this case, the maximum back-reference
        // distance is the content size itself, which can be any value from 1 to
        // 2^64-1 bytes (16 EB)."
        header->window_size = header->frame_content_size;
    }
}

/// Decompress the data from a frame block by block
static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
                            istream_t *const in) {
    // "A frame encapsulates one or multiple blocks. Each block can be
    // compressed or not, and has a guaranteed maximum content size, which
    // depends on frame parameters. Unlike frames, each block depends on
    // previous blocks for proper decoding. However, each block can be
    // decompressed without waiting for its successor, allowing streaming
    // operations."
    int last_block = 0;
    do {
        // "Last_Block
        //
        // The lowest bit signals if this block is the last one. Frame ends
        // right after this block.
        //
        // Block_Type and Block_Size
        //
        // The next 2 bits represent the Block_Type, while the remaining 21 bits
        // represent the Block_Size. Format is little-endian."
        last_block = (int)IO_read_bits(in, 1);
        const int block_type = (int)IO_read_bits(in, 2);
        const size_t block_len = IO_read_bits(in, 21);

        switch (block_type) {
        case 0: {
            // "Raw_Block - this is an uncompressed block. Block_Size is the
            // number of bytes to read and copy."
            const u8 *const read_ptr = IO_get_read_ptr(in, block_len);
            u8 *const write_ptr = IO_get_write_ptr(out, block_len);

            // Copy the raw data into the output
            memcpy(write_ptr, read_ptr, block_len);

            ctx->current_total_output += block_len;
            break;
        }
        case 1: {
            // "RLE_Block - this is a single byte, repeated N times. In which
            // case, Block_Size is the size to regenerate, while the
            // "compressed" block is just 1 byte (the byte to repeat)."
            const u8 *const read_ptr = IO_get_read_ptr(in, 1);
            u8 *const write_ptr = IO_get_write_ptr(out, block_len);

            // Copy `block_len` copies of `read_ptr[0]` to the output
            memset(write_ptr, read_ptr[0], block_len);

            ctx->current_total_output += block_len;
            break;
        }
        case 2: {
            // "Compressed_Block - this is a Zstandard compressed block,
            // detailed in another section of this specification. Block_Size is
            // the compressed size.

            // Create a sub-stream for the block
            istream_t block_stream = IO_make_sub_istream(in, block_len);
            decompress_block(ctx, out, &block_stream);
            break;
        }
        case 3:
            // "Reserved - this is not a block. This value cannot be used with
            // current version of this specification."
            CORRUPTION();
            break;
        default:
            IMPOSSIBLE();
        }



( run in 0.593 second using v1.01-cache-2.11-cpan-39bf76dae61 )