Lugh
view release on metacpan or search on metacpan
t/0036-performance-regression.t view on Meta::CPAN
#!/usr/bin/env perl
# t/36-performance-regression.t - Performance regression testing and benchmarking
use strict;
use warnings;
use Test::More;
use FindBin;
use Time::HiRes qw(time);
use Lugh;
my $model_file = "$FindBin::Bin/data/test-model.gguf";
# ============================================================================
# Configuration
# ============================================================================
# Baseline timings can be set via environment variables
# These should be calibrated for your hardware
my $BASELINE_FORWARD_MS = $ENV{LUGH_BASELINE_FORWARD_MS} // 50;
my $BASELINE_DECODE_MS = $ENV{LUGH_BASELINE_DECODE_MS} // 10;
my $BASELINE_TOKENIZE_MS = $ENV{LUGH_BASELINE_TOKENIZE_MS} // 1;
my $TOLERANCE = $ENV{LUGH_PERF_TOLERANCE} // 0.50; # 50% tolerance by default
# Number of iterations for benchmarks
my $WARMUP_ITERS = 2;
my $BENCH_ITERS = 5;
# ============================================================================
# Helper Functions
# ============================================================================
sub benchmark {
my ($name, $iterations, $code) = @_;
# Warmup
for (1..$WARMUP_ITERS) {
$code->();
}
# Benchmark
my $start = time();
for (1..$iterations) {
$code->();
}
my $elapsed = time() - $start;
my $avg_ms = ($elapsed / $iterations) * 1000;
return $avg_ms;
}
sub check_regression {
my ($name, $actual_ms, $baseline_ms, $tolerance) = @_;
$tolerance //= $TOLERANCE;
my $ratio = $actual_ms / $baseline_ms;
my $within_tolerance = $ratio < (1 + $tolerance);
my $status = $within_tolerance ? 'PASS' : 'REGRESSION';
my $pct = sprintf("%.1f%%", ($ratio - 1) * 100);
diag("$name: ${actual_ms}ms (baseline: ${baseline_ms}ms, ${pct} from baseline) [$status]");
return $within_tolerance;
}
# ============================================================================
# Section A: Basic Timing Benchmarks
# ============================================================================
subtest 'Benchmark Infrastructure' => sub {
# Test that our benchmark helper works
my $result = benchmark("test", 3, sub { my $x = 0; $x++ for 1..1000; });
ok($result >= 0, 'Benchmark returns non-negative time');
ok($result < 1000, 'Benchmark completes in reasonable time');
diag("Infrastructure test: ${result}ms");
};
SKIP: {
skip "No test model at $model_file", 1 unless -f $model_file;
# Load components
my $model = Lugh::Model->new(model => $model_file);
my $tokenizer = Lugh::Tokenizer->new(model => $model);
my $inference = Lugh::Inference->new(model => $model, n_threads => 4);
ok($model, 'Model loaded for benchmarks');
ok($tokenizer, 'Tokenizer created');
ok($inference, 'Inference created');
my $vocab_size = $tokenizer->n_vocab;
diag("Vocab size: $vocab_size");
# ============================================================================
# Section B: Tokenization Performance
# ============================================================================
subtest 'Tokenization Performance' => sub {
my $short_text = "Hello world";
my $medium_text = "The quick brown fox jumps over the lazy dog. " x 5;
my $long_text = "word " x 100;
# Short text encoding
my $short_encode_ms = benchmark("short_encode", $BENCH_ITERS * 10, sub {
my @tokens = $tokenizer->encode($short_text);
});
ok($short_encode_ms > 0, "Short encode: ${short_encode_ms}ms");
( run in 0.606 second using v1.01-cache-2.11-cpan-71847e10f99 )