ClickHouse-Encoder
view release on metacpan or search on metacpan
t/buffer-grow.t view on Meta::CPAN
#!/usr/bin/env perl
# Force the internal Buffer to grow several times during a single
# encode and verify the final wire bytes match a one-shot encode of
# the same input. Catches realloc bugs in buf_grow (e.g. a stale
# buf->ptr after SvGROW relocates the SV body).
#
# The buffer starts at 256 bytes (see buf_init). Encoding ~10 MiB of
# data forces ~16 doublings (256 -> 512 -> ... -> 16 MiB). Doing it
# both in one shot and chunked via the streamer (which calls do_encode
# repeatedly with fresh buffers) cross-validates that buf_grow's
# pointer refresh is correct and that mortal-buffer cleanup between
# blocks doesn't leak or corrupt state.
use strict;
use warnings;
use lib 'blib/lib', 'blib/arch', 't/lib';
use Test::More;
use ClickHouse::Encoder;
use TestCH qw(read_varint_ref);
*read_varint = \&read_varint_ref;
my $enc = ClickHouse::Encoder->new(columns => [['s', 'String']]);
# 50_000 strings of average ~200 bytes -> ~10 MiB. Each encode pass
# walks buf_grow many times because the buffer doubles geometrically.
my @rows = map { ['x' x (100 + ($_ % 200))] } 1 .. 50_000;
# One-shot encode: single call to do_encode, single buffer growth chain.
my $bin = $enc->encode(\@rows);
cmp_ok(length($bin), '>', 8 * 1024 * 1024,
'one-shot 50k rows produces > 8 MiB of bytes');
# Chunked encode via stream(): many calls to do_encode, each with a
# fresh mortal buffer. If buf_grow leaked or corrupted state across
# blocks, the concatenated output would diverge from the single block.
# Note: stream() emits one Native block per batch (not one continuous
# block), so we can't byte-equal the two outputs -- but we can verify
# the block-headers + bodies decode to the same row count and content.
my $chunked = '';
my @rows2 = @rows;
$enc->stream(
sub { shift @rows2 },
sub { $chunked .= $_[0] },
batch_size => 1000,
);
cmp_ok(length($chunked), '>', length($bin) * 0.95,
'streamed 50k rows in 50 blocks adds up to similar size (per-block overhead is small)');
# Round-trip: decode the streamed concatenation and confirm we get
# back exactly @rows. This validates that every doubling-cycle of
# buf_grow produced wire-correct output.
my @decoded;
my $off = 0;
my $blen = length $chunked;
while ($off < $blen) {
my $ncols = read_varint(\$chunked, \$off);
my $nrows = read_varint(\$chunked, \$off);
is($ncols, 1, 'block has 1 column');
my $name_len = read_varint(\$chunked, \$off); $off += $name_len;
my $type_len = read_varint(\$chunked, \$off); $off += $type_len;
for (1 .. $nrows) {
my $slen = read_varint(\$chunked, \$off);
push @decoded, substr($chunked, $off, $slen);
$off += $slen;
}
( run in 1.878 second using v1.01-cache-2.11-cpan-71847e10f99 )