Lugh
view release on metacpan or search on metacpan
t/1010-memory-stress.t view on Meta::CPAN
#!/usr/bin/env perl
# t/1010-memory-stress.t - Memory stress tests for Lugh
use strict;
use warnings;
use Test::More;
use FindBin;
BEGIN {
eval { require Test::LeakTrace; Test::LeakTrace->import(); };
if ($@) {
plan skip_all => 'Test::LeakTrace required for memory leak tests';
}
}
use Lugh;
my $model_file = "$FindBin::Bin/data/test-model.gguf";
unless (-e $model_file) {
plan skip_all => "No test model at $model_file";
}
# Setup
my $model = Lugh::Model->new(model => $model_file);
my $tokenizer = Lugh::Tokenizer->new(model => $model);
my $inference = Lugh::Inference->new(model => $model);
my @prompt_tokens = $tokenizer->encode("Once upon a time");
# Warmup
{
my @logits = $inference->forward_simple(\@prompt_tokens);
my $cache = $inference->create_kv_cache();
@logits = $inference->forward_cache($cache, \@prompt_tokens);
}
sub memory_growth_ok(&$$) {
my ($code, $iterations, $name) = @_;
# Run warmup
$code->();
my $before = Test::LeakTrace::leaked_count {
for (1..($iterations / 2)) { $code->() }
};
my $after = Test::LeakTrace::leaked_count {
for (1..$iterations) { $code->() }
};
# Memory should not grow linearly with iterations
my $ratio = $before > 0 ? $after / $before : 1;
my $ok = ($ratio < 2.5);
ok($ok, $name) or diag("$iterations/2 iters: $before leaks, $iterations iters: $after leaks (ratio: $ratio)");
return $ok;
}
subtest 'Stress: Repeated tokenization' => sub {
memory_growth_ok {
for my $text ("Hello", "Once upon a time", "The quick brown fox") {
my @tokens = $tokenizer->encode($text);
my $decoded = $tokenizer->decode(\@tokens);
}
} 100, 'Tokenization stress';
};
subtest 'Stress: Repeated forward passes' => sub {
memory_growth_ok {
my @logits = $inference->forward_simple(\@prompt_tokens);
my $top = $inference->sample_top_p(\@logits, 0.9, 1.0);
} 50, 'Forward pass stress';
};
subtest 'Stress: Cache create/destroy cycle' => sub {
memory_growth_ok {
my $cache = $inference->create_kv_cache();
my @logits = $inference->forward_cache($cache, \@prompt_tokens);
undef $cache;
} 50, 'Cache cycle stress';
};
subtest 'Stress: Pool create/destroy cycle' => sub {
memory_growth_ok {
my $pool = $inference->create_memory_pool();
my @logits = $inference->forward_pool($pool, \@prompt_tokens);
undef $pool;
} 50, 'Pool cycle stress';
};
subtest 'Stress: Full generation loops' => sub {
Lugh::srand(42);
memory_growth_ok {
my $cache = $inference->create_kv_cache();
my @logits = $inference->forward_cache($cache, \@prompt_tokens);
for (1..5) {
my $token = $inference->sample_top_p(\@logits, 0.9, 1.0);
@logits = $inference->forward_cache($cache, [$token]);
}
} 20, 'Generation stress';
};
subtest 'Stress: Multiple components interleaved' => sub {
Lugh::srand(42);
memory_growth_ok {
# Create various components
my $cache = $inference->create_kv_cache();
my $pool = $inference->create_memory_pool();
# Use them
my @logits1 = $inference->forward_cache($cache, \@prompt_tokens);
my @logits2 = $inference->forward_pool($pool, \@prompt_tokens);
# Sample
my $t1 = $inference->sample_top_p(\@logits1, 0.9, 1.0);
( run in 3.046 seconds using v1.01-cache-2.11-cpan-524268b4103 )