ClickHouse-Encoder

 view release on metacpan or  search on metacpan

bench/complex_insert_benchmark.pl  view on Meta::CPAN

    ];
}

# Setup encoder
my $encoder = ClickHouse::Encoder->new(
    columns => [
        ['id',       'UInt32'],
        ['name',     'String'],
        ['tags',     'Array(String)'],
        ['coords',   'Tuple(Float64, Float64)'],
        ['optional', 'Nullable(UInt64)'],
    ],
);

# Pre-encode native
print "Encoding Native format...\n";
my $t0 = time();
my $native_data = $encoder->encode(\@data);
my $native_encode_time = time() - $t0;

# Pre-encode JSON (for JSONEachRow format)
print "Encoding JSON format...\n";
$t0 = time();
my $json_data = '';
my $json = JSON::PP->new->utf8;
for my $row (@data) {
    my %obj = (
        id       => $row->[0],
        name     => $row->[1],
        tags     => $row->[2],
        coords   => $row->[3],
        optional => $row->[4],
    );
    $json_data .= $json->encode(\%obj) . "\n";
}
my $json_encode_time = time() - $t0;

printf "\nEncoding times:\n";
printf "  Native: %.3f sec (%.0f rows/sec)\n", $native_encode_time, $ROWS / $native_encode_time;
printf "  JSON:   %.3f sec (%.0f rows/sec)\n", $json_encode_time, $ROWS / $json_encode_time;
printf "  Native encoding is %.1fx faster\n", $json_encode_time / $native_encode_time;

printf "\nData sizes:\n";
printf "  Native: %d bytes (%.2f MB)\n", length($native_data), length($native_data) / 1024 / 1024;
printf "  JSON:   %d bytes (%.2f MB)\n", length($json_data), length($json_data) / 1024 / 1024;
printf "  Native is %.0f%% smaller\n", (1 - length($native_data) / length($json_data)) * 100;

# Create test table
print "\nSetting up test table...\n";
system("clickhouse-client --port $PORT --query 'drop table if exists bench_complex'");
system("clickhouse-client --port $PORT --query 'create table bench_complex (
    id UInt32,
    name String,
    tags Array(String),
    coords Tuple(Float64, Float64),
    optional Nullable(UInt64)
) engine = Null'");

# Benchmark function
sub bench_insert {
    my ($format, $data, $iterations) = @_;
    my @times;

    for my $i (1 .. $iterations) {
        my $t0 = time();
        open my $fh, '|-', "clickhouse-client --port $PORT --query 'insert into bench_complex format $format' 2>/dev/null"
            or die "Cannot run clickhouse-client: $!";
        binmode $fh;
        print $fh $data;
        close $fh;
        my $elapsed = time() - $t0;
        push @times, $elapsed;
    }

    return @times;
}

print "\n", "=" x 60, "\n";
print "INSERT Benchmark (Complex Types)\n";
print "=" x 60, "\n\n";

# Warmup
print "Warming up...\n";
bench_insert('Native', $native_data, 1);
bench_insert('JSONEachRow', $json_data, 1);

# Benchmark
print "Benchmarking Native format...\n";
my @native_times = bench_insert('Native', $native_data, $ITERATIONS);

print "Benchmarking JSONEachRow format...\n";
my @json_times = bench_insert('JSONEachRow', $json_data, $ITERATIONS);

# Calculate statistics
sub stats {
    my @times = @_;
    my $sum = 0;
    $sum += $_ for @times;
    my $avg = $sum / @times;
    my $min = (sort { $a <=> $b } @times)[0];
    return ($avg, $min);
}

my ($native_avg, $native_min) = stats(@native_times);
my ($json_avg, $json_min) = stats(@json_times);

print "\n", "-" x 60, "\n";
print "Results (seconds per $ROWS rows with Arrays/Tuples/Nullable):\n";
print "-" x 60, "\n\n";

printf "Native format:\n";
printf "  avg: %.4f sec, min: %.4f sec\n", $native_avg, $native_min;
printf "  throughput: %.0f rows/sec, %.2f MB/sec\n",
    $ROWS / $native_avg,
    length($native_data) / $native_avg / 1024 / 1024;

printf "\nJSONEachRow format:\n";
printf "  avg: %.4f sec, min: %.4f sec\n", $json_avg, $json_min;
printf "  throughput: %.0f rows/sec, %.2f MB/sec\n",
    $ROWS / $json_avg,
    length($json_data) / $json_avg / 1024 / 1024;

my $speedup = $json_avg / $native_avg;
print "\n", "=" x 60, "\n";



( run in 0.925 second using v1.01-cache-2.11-cpan-96521ef73a4 )