ClickHouse-Encoder
view release on metacpan or search on metacpan
bench/complex_insert_benchmark.pl view on Meta::CPAN
];
}
# Setup encoder
my $encoder = ClickHouse::Encoder->new(
columns => [
['id', 'UInt32'],
['name', 'String'],
['tags', 'Array(String)'],
['coords', 'Tuple(Float64, Float64)'],
['optional', 'Nullable(UInt64)'],
],
);
# Pre-encode native
print "Encoding Native format...\n";
my $t0 = time();
my $native_data = $encoder->encode(\@data);
my $native_encode_time = time() - $t0;
# Pre-encode JSON (for JSONEachRow format)
print "Encoding JSON format...\n";
$t0 = time();
my $json_data = '';
my $json = JSON::PP->new->utf8;
for my $row (@data) {
my %obj = (
id => $row->[0],
name => $row->[1],
tags => $row->[2],
coords => $row->[3],
optional => $row->[4],
);
$json_data .= $json->encode(\%obj) . "\n";
}
my $json_encode_time = time() - $t0;
printf "\nEncoding times:\n";
printf " Native: %.3f sec (%.0f rows/sec)\n", $native_encode_time, $ROWS / $native_encode_time;
printf " JSON: %.3f sec (%.0f rows/sec)\n", $json_encode_time, $ROWS / $json_encode_time;
printf " Native encoding is %.1fx faster\n", $json_encode_time / $native_encode_time;
printf "\nData sizes:\n";
printf " Native: %d bytes (%.2f MB)\n", length($native_data), length($native_data) / 1024 / 1024;
printf " JSON: %d bytes (%.2f MB)\n", length($json_data), length($json_data) / 1024 / 1024;
printf " Native is %.0f%% smaller\n", (1 - length($native_data) / length($json_data)) * 100;
# Create test table
print "\nSetting up test table...\n";
system("clickhouse-client --port $PORT --query 'drop table if exists bench_complex'");
system("clickhouse-client --port $PORT --query 'create table bench_complex (
id UInt32,
name String,
tags Array(String),
coords Tuple(Float64, Float64),
optional Nullable(UInt64)
) engine = Null'");
# Benchmark function
sub bench_insert {
my ($format, $data, $iterations) = @_;
my @times;
for my $i (1 .. $iterations) {
my $t0 = time();
open my $fh, '|-', "clickhouse-client --port $PORT --query 'insert into bench_complex format $format' 2>/dev/null"
or die "Cannot run clickhouse-client: $!";
binmode $fh;
print $fh $data;
close $fh;
my $elapsed = time() - $t0;
push @times, $elapsed;
}
return @times;
}
print "\n", "=" x 60, "\n";
print "INSERT Benchmark (Complex Types)\n";
print "=" x 60, "\n\n";
# Warmup
print "Warming up...\n";
bench_insert('Native', $native_data, 1);
bench_insert('JSONEachRow', $json_data, 1);
# Benchmark
print "Benchmarking Native format...\n";
my @native_times = bench_insert('Native', $native_data, $ITERATIONS);
print "Benchmarking JSONEachRow format...\n";
my @json_times = bench_insert('JSONEachRow', $json_data, $ITERATIONS);
# Calculate statistics
sub stats {
my @times = @_;
my $sum = 0;
$sum += $_ for @times;
my $avg = $sum / @times;
my $min = (sort { $a <=> $b } @times)[0];
return ($avg, $min);
}
my ($native_avg, $native_min) = stats(@native_times);
my ($json_avg, $json_min) = stats(@json_times);
print "\n", "-" x 60, "\n";
print "Results (seconds per $ROWS rows with Arrays/Tuples/Nullable):\n";
print "-" x 60, "\n\n";
printf "Native format:\n";
printf " avg: %.4f sec, min: %.4f sec\n", $native_avg, $native_min;
printf " throughput: %.0f rows/sec, %.2f MB/sec\n",
$ROWS / $native_avg,
length($native_data) / $native_avg / 1024 / 1024;
printf "\nJSONEachRow format:\n";
printf " avg: %.4f sec, min: %.4f sec\n", $json_avg, $json_min;
printf " throughput: %.0f rows/sec, %.2f MB/sec\n",
$ROWS / $json_avg,
length($json_data) / $json_avg / 1024 / 1024;
my $speedup = $json_avg / $native_avg;
print "\n", "=" x 60, "\n";
( run in 0.925 second using v1.01-cache-2.11-cpan-96521ef73a4 )