Lugh
view release on metacpan or search on metacpan
t/0009-performance.t view on Meta::CPAN
backend => 'CPU',
);
my @tokens = $tokenizer->encode("Hello world");
# Warm up
$inference->forward_simple(\@tokens);
# Benchmark
my $start = time();
my $iterations = 5;
for (1..$iterations) {
$inference->forward_simple(\@tokens);
}
my $elapsed = time() - $start;
my $avg_ms = ($elapsed / $iterations) * 1000;
ok($avg_ms > 0, "CPU forward pass takes measurable time (avg: ${avg_ms}ms)");
ok($avg_ms < 10000, "CPU forward pass completes in reasonable time (<10s)");
diag("CPU forward avg: ${avg_ms}ms per iteration");
}
# Test best backend (GPU if available)
{
my $best = Lugh::best_backend();
my $inference = Lugh::Inference->new(
t/0009-performance.t view on Meta::CPAN
backend => $best,
);
my @tokens = $tokenizer->encode("Hello world");
# Warm up
$inference->forward_simple(\@tokens);
# Benchmark
my $start = time();
my $iterations = 5;
for (1..$iterations) {
$inference->forward_simple(\@tokens);
}
my $elapsed = time() - $start;
my $avg_ms = ($elapsed / $iterations) * 1000;
ok($avg_ms > 0, "Best backend ($best) forward pass takes measurable time (avg: ${avg_ms}ms)");
ok($avg_ms < 10000, "Best backend forward pass completes in reasonable time (<10s)");
diag("Best backend ($best) forward avg: ${avg_ms}ms per iteration");
}
# Test KV cache improves decode performance
{
my $inference = Lugh::Inference->new(
model => $model,
t/0009-performance.t view on Meta::CPAN
isa_ok($pool, 'Lugh::MemoryPool', 'Pool is correct type');
my @tokens = $tokenizer->encode("Hello world");
# Forward with pool
my @logits_pool = $inference->forward_pool($pool, \@tokens);
ok(@logits_pool > 0, 'forward_pool returns logits');
# Multiple passes with same pool (should be efficient)
my $start = time();
my $iterations = 5;
for (1..$iterations) {
my @logits = $inference->forward_pool($pool, \@tokens);
}
my $elapsed = time() - $start;
my $avg_ms = ($elapsed / $iterations) * 1000;
ok($avg_ms > 0, "forward_pool avg: ${avg_ms}ms");
ok($pool->reset(), 'Pool reset works');
diag("Memory pool forward avg: ${avg_ms}ms per iteration");
}
# Test batch processing
{
my $inference = Lugh::Inference->new(
model => $model,
t/0036-performance-regression.t view on Meta::CPAN
# Configuration
# ============================================================================
# Baseline timings can be set via environment variables
# These should be calibrated for your hardware
my $BASELINE_FORWARD_MS = $ENV{LUGH_BASELINE_FORWARD_MS} // 50;
my $BASELINE_DECODE_MS = $ENV{LUGH_BASELINE_DECODE_MS} // 10;
my $BASELINE_TOKENIZE_MS = $ENV{LUGH_BASELINE_TOKENIZE_MS} // 1;
my $TOLERANCE = $ENV{LUGH_PERF_TOLERANCE} // 0.50; # 50% tolerance by default
# Number of iterations for benchmarks
my $WARMUP_ITERS = 2;
my $BENCH_ITERS = 5;
# ============================================================================
# Helper Functions
# ============================================================================
sub benchmark {
my ($name, $iterations, $code) = @_;
# Warmup
for (1..$WARMUP_ITERS) {
$code->();
}
# Benchmark
my $start = time();
for (1..$iterations) {
$code->();
}
my $elapsed = time() - $start;
my $avg_ms = ($elapsed / $iterations) * 1000;
return $avg_ms;
}
sub check_regression {
my ($name, $actual_ms, $baseline_ms, $tolerance) = @_;
$tolerance //= $TOLERANCE;
my $ratio = $actual_ms / $baseline_ms;
my $within_tolerance = $ratio < (1 + $tolerance);
t/0044-full-weight-training.t view on Meta::CPAN
lives_ok { $optimizer->step() } 'optimizer step after accumulation';
done_testing();
};
# =============================================================================
# STRESS TEST - MANY ITERATIONS (REGRESSION TEST FOR MEMORY CORRUPTION)
# =============================================================================
subtest 'stress test many iterations' => sub {
# This catches memory corruption that manifests after many iterations
my $iterations = 50;
for my $i (1..$iterations) {
my $ctx = Lugh::Context->new(size => 64 * 1024 * 1024);
my $logits = Lugh::Train->forward(
inference => $inference,
context => $ctx,
tokens => [1, 65, 66, 67], # 4 tokens
train_lora => 0,
train_full => 1,
);
t/0044-full-weight-training.t view on Meta::CPAN
my $loss = Lugh::Train->cross_entropy_loss($ctx, $logits, [65, 66, 67, 68]);
eval { $loss->backward() };
if ($@) {
fail("iteration $i crashed: $@");
last;
}
}
pass("completed $iterations iterations without crash");
done_testing();
};
sub min { $_[0] < $_[1] ? $_[0] : $_[1] }
done_testing();
t/1010-memory-stress.t view on Meta::CPAN
my @prompt_tokens = $tokenizer->encode("Once upon a time");
# Warmup
{
my @logits = $inference->forward_simple(\@prompt_tokens);
my $cache = $inference->create_kv_cache();
@logits = $inference->forward_cache($cache, \@prompt_tokens);
}
sub memory_growth_ok(&$$) {
my ($code, $iterations, $name) = @_;
# Run warmup
$code->();
my $before = Test::LeakTrace::leaked_count {
for (1..($iterations / 2)) { $code->() }
};
my $after = Test::LeakTrace::leaked_count {
for (1..$iterations) { $code->() }
};
# Memory should not grow linearly with iterations
my $ratio = $before > 0 ? $after / $before : 1;
my $ok = ($ratio < 2.5);
ok($ok, $name) or diag("$iterations/2 iters: $before leaks, $iterations iters: $after leaks (ratio: $ratio)");
return $ok;
}
subtest 'Stress: Repeated tokenization' => sub {
memory_growth_ok {
for my $text ("Hello", "Once upon a time", "The quick brown fox") {
my @tokens = $tokenizer->encode($text);
my $decoded = $tokenizer->decode(\@tokens);
}
} 100, 'Tokenization stress';
( run in 1.964 second using v1.01-cache-2.11-cpan-71847e10f99 )