iterations results from the CPAN

iterations
Lugh
view release on metacpan or search on metacpan
t/0036-performance-regression.t view on Meta::CPAN
#!/usr/bin/env perl
# t/36-performance-regression.t - Performance regression testing and benchmarking

use strict;
use warnings;
use Test::More;
use FindBin;
use Time::HiRes qw(time);

use Lugh;

my $model_file = "$FindBin::Bin/data/test-model.gguf";

# ============================================================================
# Configuration
# ============================================================================

# Baseline timings can be set via environment variables
# These should be calibrated for your hardware
my $BASELINE_FORWARD_MS = $ENV{LUGH_BASELINE_FORWARD_MS} // 50;
my $BASELINE_DECODE_MS = $ENV{LUGH_BASELINE_DECODE_MS} // 10;
my $BASELINE_TOKENIZE_MS = $ENV{LUGH_BASELINE_TOKENIZE_MS} // 1;
my $TOLERANCE = $ENV{LUGH_PERF_TOLERANCE} // 0.50;  # 50% tolerance by default

# Number of iterations for benchmarks
my $WARMUP_ITERS = 2;
my $BENCH_ITERS = 5;

# ============================================================================
# Helper Functions
# ============================================================================

sub benchmark {
    my ($name, $iterations, $code) = @_;

    # Warmup
    for (1..$WARMUP_ITERS) {
        $code->();
    }

    # Benchmark
    my $start = time();
    for (1..$iterations) {
        $code->();
    }
    my $elapsed = time() - $start;

    my $avg_ms = ($elapsed / $iterations) * 1000;
    return $avg_ms;
}

sub check_regression {
    my ($name, $actual_ms, $baseline_ms, $tolerance) = @_;
    $tolerance //= $TOLERANCE;

    my $ratio = $actual_ms / $baseline_ms;
    my $within_tolerance = $ratio < (1 + $tolerance);

    my $status = $within_tolerance ? 'PASS' : 'REGRESSION';
    my $pct = sprintf("%.1f%%", ($ratio - 1) * 100);

    diag("$name: ${actual_ms}ms (baseline: ${baseline_ms}ms, ${pct} from baseline) [$status]");

    return $within_tolerance;
}

# ============================================================================
# Section A: Basic Timing Benchmarks
# ============================================================================

subtest 'Benchmark Infrastructure' => sub {
    # Test that our benchmark helper works
    my $result = benchmark("test", 3, sub { my $x = 0; $x++ for 1..1000; });
    ok($result >= 0, 'Benchmark returns non-negative time');
    ok($result < 1000, 'Benchmark completes in reasonable time');
    diag("Infrastructure test: ${result}ms");
};

SKIP: {
    skip "No test model at $model_file", 1 unless -f $model_file;

    # Load components
    my $model = Lugh::Model->new(model => $model_file);
    my $tokenizer = Lugh::Tokenizer->new(model => $model);
    my $inference = Lugh::Inference->new(model => $model, n_threads => 4);

    ok($model, 'Model loaded for benchmarks');
    ok($tokenizer, 'Tokenizer created');
    ok($inference, 'Inference created');

    my $vocab_size = $tokenizer->n_vocab;
    diag("Vocab size: $vocab_size");

    # ============================================================================
    # Section B: Tokenization Performance
    # ============================================================================

    subtest 'Tokenization Performance' => sub {
        my $short_text = "Hello world";
        my $medium_text = "The quick brown fox jumps over the lazy dog. " x 5;
        my $long_text = "word " x 100;

        # Short text encoding
        my $short_encode_ms = benchmark("short_encode", $BENCH_ITERS * 10, sub {
            my @tokens = $tokenizer->encode($short_text);
        });
        ok($short_encode_ms > 0, "Short encode: ${short_encode_ms}ms");
( run in 0.606 second using v1.01-cache-2.11-cpan-71847e10f99 )