Lugh

 view release on metacpan or  search on metacpan

t/0009-performance.t  view on Meta::CPAN

            backend => 'CPU',
        );
        
        my @tokens = $tokenizer->encode("Hello world");
        
        # Warm up
        $inference->forward_simple(\@tokens);
        
        # Benchmark
        my $start = time();
        my $iterations = 5;
        for (1..$iterations) {
            $inference->forward_simple(\@tokens);
        }
        my $elapsed = time() - $start;
        my $avg_ms = ($elapsed / $iterations) * 1000;
        
        ok($avg_ms > 0, "CPU forward pass takes measurable time (avg: ${avg_ms}ms)");
        ok($avg_ms < 10000, "CPU forward pass completes in reasonable time (<10s)");
        diag("CPU forward avg: ${avg_ms}ms per iteration");
    }
    
    # Test best backend (GPU if available)
    {
        my $best = Lugh::best_backend();
        my $inference = Lugh::Inference->new(

t/0009-performance.t  view on Meta::CPAN

            backend => $best,
        );
        
        my @tokens = $tokenizer->encode("Hello world");
        
        # Warm up
        $inference->forward_simple(\@tokens);
        
        # Benchmark
        my $start = time();
        my $iterations = 5;
        for (1..$iterations) {
            $inference->forward_simple(\@tokens);
        }
        my $elapsed = time() - $start;
        my $avg_ms = ($elapsed / $iterations) * 1000;
        
        ok($avg_ms > 0, "Best backend ($best) forward pass takes measurable time (avg: ${avg_ms}ms)");
        ok($avg_ms < 10000, "Best backend forward pass completes in reasonable time (<10s)");
        diag("Best backend ($best) forward avg: ${avg_ms}ms per iteration");
    }
    
    # Test KV cache improves decode performance
    {
        my $inference = Lugh::Inference->new(
            model => $model,

t/0009-performance.t  view on Meta::CPAN

        isa_ok($pool, 'Lugh::MemoryPool', 'Pool is correct type');
        
        my @tokens = $tokenizer->encode("Hello world");
        
        # Forward with pool
        my @logits_pool = $inference->forward_pool($pool, \@tokens);
        ok(@logits_pool > 0, 'forward_pool returns logits');
        
        # Multiple passes with same pool (should be efficient)
        my $start = time();
        my $iterations = 5;
        for (1..$iterations) {
            my @logits = $inference->forward_pool($pool, \@tokens);
        }
        my $elapsed = time() - $start;
        my $avg_ms = ($elapsed / $iterations) * 1000;
        
        ok($avg_ms > 0, "forward_pool avg: ${avg_ms}ms");
        ok($pool->reset(), 'Pool reset works');
        diag("Memory pool forward avg: ${avg_ms}ms per iteration");
    }
    
    # Test batch processing
    {
        my $inference = Lugh::Inference->new(
            model => $model,

t/0036-performance-regression.t  view on Meta::CPAN

# Configuration
# ============================================================================

# Baseline timings can be set via environment variables
# These should be calibrated for your hardware
my $BASELINE_FORWARD_MS = $ENV{LUGH_BASELINE_FORWARD_MS} // 50;
my $BASELINE_DECODE_MS = $ENV{LUGH_BASELINE_DECODE_MS} // 10;
my $BASELINE_TOKENIZE_MS = $ENV{LUGH_BASELINE_TOKENIZE_MS} // 1;
my $TOLERANCE = $ENV{LUGH_PERF_TOLERANCE} // 0.50;  # 50% tolerance by default

# Number of iterations for benchmarks
my $WARMUP_ITERS = 2;
my $BENCH_ITERS = 5;

# ============================================================================
# Helper Functions
# ============================================================================

sub benchmark {
    my ($name, $iterations, $code) = @_;

    # Warmup
    for (1..$WARMUP_ITERS) {
        $code->();
    }

    # Benchmark
    my $start = time();
    for (1..$iterations) {
        $code->();
    }
    my $elapsed = time() - $start;

    my $avg_ms = ($elapsed / $iterations) * 1000;
    return $avg_ms;
}

sub check_regression {
    my ($name, $actual_ms, $baseline_ms, $tolerance) = @_;
    $tolerance //= $TOLERANCE;

    my $ratio = $actual_ms / $baseline_ms;
    my $within_tolerance = $ratio < (1 + $tolerance);

t/0044-full-weight-training.t  view on Meta::CPAN

    
    lives_ok { $optimizer->step() } 'optimizer step after accumulation';
    
    done_testing();
};

# =============================================================================
# STRESS TEST - MANY ITERATIONS (REGRESSION TEST FOR MEMORY CORRUPTION)
# =============================================================================

subtest 'stress test many iterations' => sub {
    # This catches memory corruption that manifests after many iterations
    my $iterations = 50;
    
    for my $i (1..$iterations) {
        my $ctx = Lugh::Context->new(size => 64 * 1024 * 1024);
        
        my $logits = Lugh::Train->forward(
            inference  => $inference,
            context    => $ctx,
            tokens     => [1, 65, 66, 67],  # 4 tokens
            train_lora => 0,
            train_full => 1,
        );
        

t/0044-full-weight-training.t  view on Meta::CPAN

        
        my $loss = Lugh::Train->cross_entropy_loss($ctx, $logits, [65, 66, 67, 68]);
        
        eval { $loss->backward() };
        if ($@) {
            fail("iteration $i crashed: $@");
            last;
        }
    }
    
    pass("completed $iterations iterations without crash");
    
    done_testing();
};

sub min { $_[0] < $_[1] ? $_[0] : $_[1] }

done_testing();

t/1010-memory-stress.t  view on Meta::CPAN

my @prompt_tokens = $tokenizer->encode("Once upon a time");

# Warmup
{
    my @logits = $inference->forward_simple(\@prompt_tokens);
    my $cache = $inference->create_kv_cache();
    @logits = $inference->forward_cache($cache, \@prompt_tokens);
}

sub memory_growth_ok(&$$) {
    my ($code, $iterations, $name) = @_;
    
    # Run warmup
    $code->();
    
    my $before = Test::LeakTrace::leaked_count { 
        for (1..($iterations / 2)) { $code->() }
    };
    
    my $after = Test::LeakTrace::leaked_count { 
        for (1..$iterations) { $code->() }
    };
    
    # Memory should not grow linearly with iterations
    my $ratio = $before > 0 ? $after / $before : 1;
    my $ok = ($ratio < 2.5);
    ok($ok, $name) or diag("$iterations/2 iters: $before leaks, $iterations iters: $after leaks (ratio: $ratio)");
    return $ok;
}

subtest 'Stress: Repeated tokenization' => sub {
    memory_growth_ok {
        for my $text ("Hello", "Once upon a time", "The quick brown fox") {
            my @tokens = $tokenizer->encode($text);
            my $decoded = $tokenizer->decode(\@tokens);
        }
    } 100, 'Tokenization stress';



( run in 1.600 second using v1.01-cache-2.11-cpan-71847e10f99 )