ClickHouse-Encoder
view release on metacpan or search on metacpan
bench/insert_benchmark.pl view on Meta::CPAN
print "=" x 60, "\n";
print "ClickHouse Insert Benchmark: Native vs CSV\n";
print "=" x 60, "\n\n";
print "Rows per batch: $ROWS\n";
print "Iterations: $ITERATIONS\n";
print "ClickHouse port: $PORT\n\n";
# Generate test data
print "Generating test data...\n";
my @data;
for my $i (1 .. $ROWS) {
push @data, [
$i,
int(rand(1_000_000_000)),
rand() * 1000,
"string_value_$i" . ("x" x 20),
];
}
# Setup encoder
my $encoder = ClickHouse::Encoder->new(
columns => [
['id', 'UInt32'],
['bignum', 'UInt64'],
['value', 'Float64'],
['name', 'String'],
],
);
# Pre-encode data
print "Pre-encoding data...\n";
my $t0 = time();
my $native_data = $encoder->encode(\@data);
my $native_encode_time = time() - $t0;
$t0 = time();
my $csv_data = '';
for my $row (@data) {
$csv_data .= join("\t", @$row) . "\n";
}
my $csv_encode_time = time() - $t0;
printf "\nEncoding times:\n";
printf " Native: %.3f sec (%.0f rows/sec)\n", $native_encode_time, $ROWS / $native_encode_time;
printf " CSV: %.3f sec (%.0f rows/sec)\n", $csv_encode_time, $ROWS / $csv_encode_time;
printf "\nData sizes:\n";
printf " Native: %d bytes (%.2f MB)\n", length($native_data), length($native_data) / 1024 / 1024;
printf " CSV: %d bytes (%.2f MB)\n", length($csv_data), length($csv_data) / 1024 / 1024;
# Create test table
print "\nSetting up test table...\n";
system("clickhouse-client --port $PORT --query 'drop table if exists bench_native'");
system("clickhouse-client --port $PORT --query 'drop table if exists bench_csv'");
system("clickhouse-client --port $PORT --query 'create table bench_native (id UInt32, bignum UInt64, value Float64, name String) engine = Null'");
system("clickhouse-client --port $PORT --query 'create table bench_csv (id UInt32, bignum UInt64, value Float64, name String) engine = Null'");
# Benchmark function
sub bench_insert {
my ($table, $format, $data, $iterations) = @_;
my @times;
for my $i (1 .. $iterations) {
my $t0 = time();
open my $fh, '|-', "clickhouse-client --port $PORT --query 'insert into $table format $format' 2>/dev/null"
or die "Cannot run clickhouse-client: $!";
binmode $fh;
print $fh $data;
close $fh;
my $elapsed = time() - $t0;
push @times, $elapsed;
}
return @times;
}
print "\n", "=" x 60, "\n";
print "INSERT Benchmark (into Null engine - measures parsing speed)\n";
print "=" x 60, "\n\n";
# Warmup
print "Warming up...\n";
bench_insert('bench_native', 'Native', $native_data, 1);
bench_insert('bench_csv', 'TabSeparated', $csv_data, 1);
# Benchmark Native
print "Benchmarking Native format...\n";
my @native_times = bench_insert('bench_native', 'Native', $native_data, $ITERATIONS);
# Benchmark CSV
print "Benchmarking TabSeparated (CSV) format...\n";
my @csv_times = bench_insert('bench_csv', 'TabSeparated', $csv_data, $ITERATIONS);
# Calculate statistics
sub stats {
my @times = @_;
my $sum = 0;
$sum += $_ for @times;
my $avg = $sum / @times;
my $min = (sort { $a <=> $b } @times)[0];
my $max = (sort { $a <=> $b } @times)[-1];
return ($avg, $min, $max);
}
my ($native_avg, $native_min, $native_max) = stats(@native_times);
my ($csv_avg, $csv_min, $csv_max) = stats(@csv_times);
print "\n", "-" x 60, "\n";
print "Results (seconds per $ROWS rows):\n";
print "-" x 60, "\n\n";
printf "Native format:\n";
printf " avg: %.4f sec min: %.4f max: %.4f\n", $native_avg, $native_min, $native_max;
printf " throughput: %.0f rows/sec, %.2f MB/sec\n",
$ROWS / $native_avg,
length($native_data) / $native_avg / 1024 / 1024;
printf "\nTabSeparated (CSV) format:\n";
printf " avg: %.4f sec min: %.4f max: %.4f\n", $csv_avg, $csv_min, $csv_max;
printf " throughput: %.0f rows/sec, %.2f MB/sec\n",
$ROWS / $csv_avg,
length($csv_data) / $csv_avg / 1024 / 1024;
( run in 0.708 second using v1.01-cache-2.11-cpan-96521ef73a4 )