streaming results from the CPAN

Lugh
                    indices[j] = j;
                }
                
                /* Apply temperature and softmax */
                for (j = 0; j < n_vocab; j++) {
                    probs[j] = expf((probs[j] - max_logit) / temperature);
                    sum += probs[j];
                }
                for (j = 0; j < n_vocab; j++) {
                    probs[j] /= sum;
                }
                
                /* Sort by probability (bubble sort with early exit) */
                {
                    int swapped, k;
                    for (j = 0; j < n_vocab - 1; j++) {
                        swapped = 0;
                        cumsum = 0;
                        for (k = 0; k < n_vocab - j - 1; k++) {
                            if (probs[k] < probs[k + 1]) {
                                float tmp = probs[k];
                                int tmp_idx = indices[k];
                                probs[k] = probs[k + 1];
                                indices[k] = indices[k + 1];
                                probs[k + 1] = tmp;
                                indices[k + 1] = tmp_idx;
                                swapped = 1;
                            }
                        }
                        if (!swapped) break;
                        for (k = 0; k <= j; k++) cumsum += probs[k];
                        if (cumsum >= top_p) break;
                    }
                }
                
                /* Sample from top_p tokens */
                threshold = lugh_rand_float() * top_p;
                cumsum = 0.0f;
                next_token = indices[0];
                for (j = 0; j < n_vocab; j++) {
                    cumsum += probs[j];
                    if (cumsum >= threshold) {
                        next_token = indices[j];
                        break;
                    }
                    if (cumsum >= top_p) break;
                }
                
                Safefree(probs);
                Safefree(indices);
            }
            
            SvREFCNT_dec((SV*)logits_av);
            
            /* Add to results */
            av_push(result_av, newSViv(next_token));
            tokens[current_len] = next_token;
            current_len++;
            gen_count++;
            
            /* Call streaming callback if provided */
            if (callback) {
                dSP;
                int should_stop;
                
                ENTER;
                SAVETMPS;
                
                PUSHMARK(SP);
                XPUSHs(sv_2mortal(newSViv(next_token)));
                XPUSHs(sv_2mortal(newSViv(gen_count)));
                PUTBACK;
                
                call_sv(callback, G_SCALAR);
                
                SPAGAIN;
                should_stop = POPi;
                
                PUTBACK;
                FREETMPS;
                LEAVE;
                
                /* Callback returns true to stop generation */
                if (should_stop) break;
            }
            
            /* Check for EOS token */
            if (next_token == eos_token) break;
        }
    }
    
    Safefree(tokens);
    
    /* Return generated tokens as list */
    n_result = av_len(result_av) + 1;
    {
        /* Use XSRETURN explicitly */
        int count = 0;
        SP = orig_sp;  /* Restore original stack pointer */
        for (i = 0; i < n_result; i++) {
            SV **svp = av_fetch(result_av, i, 0);
            if (svp) {
                XST_mIV(count, SvIV(*svp));
                count++;
            }
        }
        SvREFCNT_dec(result_av);
        XSRETURN(count);
    }

SV *
create_kv_cache(self)
    SV *self
PREINIT:
    HV *hv;
    SV **svp;
    int n_layer, n_ctx, n_head_kv, n_embd, n_head, head_dim;
    LughKVCache *cache;
CODE:
    /* Get inference object parameters */
    hv = (HV*)SvRV(self);
( run in 0.897 second using v1.01-cache-2.11-cpan-df04353d9ac )