Lugh

 view release on metacpan or  search on metacpan

lib/Lugh.xs  view on Meta::CPAN


PROTOTYPES: DISABLE

BOOT:
    INIT_MUTEXES();

const char *
version()
CODE:
    RETVAL = "0.04";
OUTPUT:
    RETVAL

void
srand(seed)
    unsigned int seed
CODE:
    lugh_srand(seed);

const char *
ggml_version()
CODE:
    /* Return ggml build info */
    RETVAL = "ggml 0.9.5";
OUTPUT:
    RETVAL

int
has_metal()
CODE:
    /* Check if Metal backend is registered (linked at build time) */
    RETVAL = metal_is_available();
OUTPUT:
    RETVAL

int
metal_available()
CODE:
    RETVAL = metal_is_available();
OUTPUT:
    RETVAL

int
backend_count()
CODE:
    /* Return count of registered backends */
    RETVAL = (int)ggml_backend_reg_count();
OUTPUT:
    RETVAL

int
backend_device_count()
CODE:
    /* Return count of available devices */
    RETVAL = (int)ggml_backend_dev_count();
OUTPUT:
    RETVAL

void
available_backends()
PPCODE:
{
    size_t i, count;
    /* Always include "auto" option */
    count = ggml_backend_reg_count();
    EXTEND(SP, count + 1);
    
    /* List all registered backends by name */
    for (i = 0; i < count; i++) {
        ggml_backend_reg_t reg = ggml_backend_reg_get(i);
        const char *name = ggml_backend_reg_name(reg);
        mPUSHp(name, strlen(name));
    }
    
    /* Always add "auto" as special option */
    mPUSHp("auto", 4);
}

SV *
backend_info(name)
    const char *name
CODE:
{
    HV *info = newHV();
    size_t i, count;
    int found = 0;
    
    count = ggml_backend_reg_count();
    for (i = 0; i < count; i++) {
        ggml_backend_reg_t reg = ggml_backend_reg_get(i);
        const char *reg_name = ggml_backend_reg_name(reg);
        if (strcmp(name, reg_name) == 0) {
            size_t dev_count = ggml_backend_reg_dev_count(reg);
            hv_store(info, "name", 4, newSVpv(reg_name, 0), 0);
            hv_store(info, "device_count", 12, newSViv(dev_count), 0);
            
            /* Check first device for type info */
            if (dev_count > 0) {
                ggml_backend_dev_t dev = ggml_backend_reg_dev_get(reg, 0);
                const char *desc = ggml_backend_dev_description(dev);
                enum ggml_backend_dev_type dtype = ggml_backend_dev_type(dev);
                const char *type_str;
                int is_gpu = 0;
                
                switch (dtype) {
                    case GGML_BACKEND_DEVICE_TYPE_CPU:
                        type_str = "CPU";
                        break;
                    case GGML_BACKEND_DEVICE_TYPE_GPU:
                        type_str = "GPU";
                        is_gpu = 1;
                        break;
                    default:
                        type_str = "ACCEL";
                        is_gpu = 1;
                }
                hv_store(info, "description", 11, newSVpv(desc, 0), 0);
                hv_store(info, "type", 4, newSVpv(type_str, 0), 0);
                hv_store(info, "is_gpu", 6, newSViv(is_gpu), 0);
            }
            found = 1;

lib/Lugh.xs  view on Meta::CPAN

int
n_vocab(self)
    SV *self
PREINIT:
    HV *hv;
    SV **svp;
CODE:
    hv = (HV*)SvRV(self);
    svp = hv_fetch(hv, "n_vocab", 7, 0);
    RETVAL = svp ? SvIV(*svp) : 32000;
OUTPUT:
    RETVAL

int
n_embd(self)
    SV *self
PREINIT:
    HV *hv;
    SV **svp;
CODE:
    hv = (HV*)SvRV(self);
    svp = hv_fetch(hv, "n_embd", 6, 0);
    RETVAL = svp ? SvIV(*svp) : 2048;
OUTPUT:
    RETVAL

int
n_layer(self)
    SV *self
PREINIT:
    HV *hv;
    SV **svp;
CODE:
    hv = (HV*)SvRV(self);
    svp = hv_fetch(hv, "n_layer", 7, 0);
    RETVAL = svp ? SvIV(*svp) : 22;
OUTPUT:
    RETVAL

int
n_head(self)
    SV *self
PREINIT:
    HV *hv;
    SV **svp;
CODE:
    hv = (HV*)SvRV(self);
    svp = hv_fetch(hv, "n_head", 6, 0);
    RETVAL = svp ? SvIV(*svp) : 32;
OUTPUT:
    RETVAL

void
forward(self, ...)
    SV *self
PREINIT:
    HV *hv;
    LughForwardOpts opts;
    LughForwardResult result;
    int i, j;
PPCODE:
    hv = (HV*)SvRV(self);
    Zero(&opts, 1, LughForwardOpts);

    /* Parse named parameters: forward(tokens => \@t, lora => $l, ...) */
    parse_forward_options(aTHX_ &opts, &ST(0), 1, items);

    if (!opts.tokens && !opts.all_tokens) {
        croak("forward() requires tokens => \\@tokens");
    }
    
    /* Validate caches vs cache usage */
    if (opts.all_tokens) {
        if (opts.cache) croak("Use 'caches' (array) for batch mode, not 'cache'");
        if (opts.caches && opts.n_caches != opts.n_sequences) {
            Safefree(opts.caches);
            free_sequences(opts.all_tokens, opts.seq_lengths, opts.n_sequences);
            croak("Number of caches (%d) must match number of sequences (%d)", opts.n_caches, opts.n_sequences);
        }
    }
    
    if (!do_forward_unified(aTHX_ hv, &opts, &result)) {
        char *err = result.error ? result.error : "Forward pass failed";
        if (opts.tokens) Safefree(opts.tokens);
        if (opts.caches) Safefree(opts.caches);
        free_sequences(opts.all_tokens, opts.seq_lengths, opts.n_sequences);
        free_forward_result(&result);
        croak("%s", err);
    }
    
    if (opts.tokens) Safefree(opts.tokens);
    if (opts.caches) Safefree(opts.caches);
    free_sequences(opts.all_tokens, opts.seq_lengths, opts.n_sequences);
    
    if (result.is_batch) {
        AV *results_av = newAV();
        for (i = 0; i < result.n_sequences; i++) {
            AV *seq_av = newAV();
            for (j = 0; j < result.n_vocab; j++) {
                av_push(seq_av, newSVnv(result.batch_logits[i][j]));
            }
            av_push(results_av, newRV_noinc((SV*)seq_av));
        }
        free_forward_result(&result);
        EXTEND(SP, 1);
        mPUSHs(newRV_noinc((SV*)results_av));
    } else {
        EXTEND(SP, result.n_vocab);
        for (j = 0; j < result.n_vocab; j++) {
            mPUSHn(result.logits[j]);
        }
        free_forward_result(&result);
    }

SV *
forward_all(self, ...)
    SV *self
PREINIT:
    HV *hv;
    LughForwardOpts opts;
    LughForwardResult result;

lib/Lugh.xs  view on Meta::CPAN

    }
    
    /* Validate caches vs cache usage */
    if (opts.all_tokens) {
        if (opts.cache) croak("Use 'caches' (array) for batch mode, not 'cache'");
        if (opts.caches && opts.n_caches != opts.n_sequences) {
            Safefree(opts.caches);
            free_sequences(opts.all_tokens, opts.seq_lengths, opts.n_sequences);
            croak("Number of caches (%d) must match number of sequences (%d)", opts.n_caches, opts.n_sequences);
        }
    }
    
    if (!do_forward_unified(aTHX_ hv, &opts, &result)) {
        char *err = result.error ? result.error : "Forward pass failed";
        if (opts.tokens) Safefree(opts.tokens);
        if (opts.caches) Safefree(opts.caches);
        free_sequences(opts.all_tokens, opts.seq_lengths, opts.n_sequences);
        free_forward_result(&result);
        croak("%s", err);
    }
    
    if (opts.tokens) Safefree(opts.tokens);
    if (opts.caches) Safefree(opts.caches);
    free_sequences(opts.all_tokens, opts.seq_lengths, opts.n_sequences);
    
    /* Return array of arrays: [ [logits_pos0], [logits_pos1], ... ] */
    outer_av = newAV();
    
    if (result.all_logits && result.n_tokens > 0) {
        for (i = 0; i < result.n_tokens; i++) {
            AV *pos_av = newAV();
            float *pos_logits = result.all_logits + (i * result.n_vocab);
            for (j = 0; j < result.n_vocab; j++) {
                av_push(pos_av, newSVnv(pos_logits[j]));
            }
            av_push(outer_av, newRV_noinc((SV*)pos_av));
        }
    } else if (result.logits) {
        /* Fallback: only last position available */
        AV *pos_av = newAV();
        for (j = 0; j < result.n_vocab; j++) {
            av_push(pos_av, newSVnv(result.logits[j]));
        }
        av_push(outer_av, newRV_noinc((SV*)pos_av));
    }
    
    free_forward_result(&result);
    RETVAL = newRV_noinc((SV*)outer_av);
OUTPUT:
    RETVAL

void
forward_simple(self, tokens_ref)
    SV *self
    SV *tokens_ref
PREINIT:
    HV *hv;
    LughForwardOpts opts;
    LughForwardResult result;
    int j;
PPCODE:
    if (!SvROK(tokens_ref) || SvTYPE(SvRV(tokens_ref)) != SVt_PVAV) {
        croak("forward_simple() requires an array reference");
    }
    
    hv = (HV*)SvRV(self);
    Zero(&opts, 1, LughForwardOpts);
    opts.tokens = parse_tokens_av(aTHX_ (AV*)SvRV(tokens_ref), &opts.n_tokens);
    
    if (!do_forward_unified(aTHX_ hv, &opts, &result)) {
        char *err = result.error ? result.error : "Forward pass failed";
        Safefree(opts.tokens);
        free_forward_result(&result);
        croak("%s", err);
    }
    
    Safefree(opts.tokens);
    
    EXTEND(SP, result.n_vocab);
    for (j = 0; j < result.n_vocab; j++) {
        mPUSHn(result.logits[j]);
    }
    free_forward_result(&result);

void
forward_cache(self, ...)
    SV *self
PREINIT:
    HV *hv;
    LughForwardOpts opts;
    LughForwardResult result;
    int j;
PPCODE:
    hv = (HV*)SvRV(self);
    Zero(&opts, 1, LughForwardOpts);

    /* Detect positional: forward_cache($cache, \@tokens, ...) */
    if (items >= 3 && sv_isobject(ST(1)) && SvROK(ST(2)) && SvTYPE(SvRV(ST(2))) == SVt_PVAV) {
        opts.cache = get_lugh_kvcache(aTHX_ ST(1));
        opts.tokens = parse_tokens_av(aTHX_ (AV*)SvRV(ST(2)), &opts.n_tokens);
        /* Parse remaining as named params */
        parse_forward_options(aTHX_ &opts, &ST(0), 3, items);
    } else {
        /* Named params: forward_cache(cache => $c, tokens => \@t, ...) */
        parse_forward_options(aTHX_ &opts, &ST(0), 1, items);
    }

    if (!opts.tokens) croak("forward_cache() requires tokens");
    if (!opts.cache) croak("forward_cache() requires cache");
    
    if (!do_forward_unified(aTHX_ hv, &opts, &result)) {
        char *err = result.error ? result.error : "Forward pass failed";
        Safefree(opts.tokens);
        free_forward_result(&result);
        croak("%s", err);
    }
    
    Safefree(opts.tokens);
    
    EXTEND(SP, result.n_vocab);
    for (j = 0; j < result.n_vocab; j++) {
        mPUSHn(result.logits[j]);
    }
    free_forward_result(&result);

void
forward_pool(self, ...)
    SV *self
PREINIT:
    HV *hv;
    LughForwardOpts opts;
    LughForwardResult result;
    int j;
    SV **svp;
PPCODE:
    hv = (HV*)SvRV(self);
    Zero(&opts, 1, LughForwardOpts);

    /* Detect positional: forward_pool($pool, \@tokens, ...) */
    if (items >= 3 && sv_isobject(ST(1)) && SvROK(ST(2)) && SvTYPE(SvRV(ST(2))) == SVt_PVAV) {
        SV *pool_sv = ST(1);
        if (SvROK(pool_sv) && SvTYPE(SvRV(pool_sv)) == SVt_PVHV) {
            HV *pool_hv = (HV*)SvRV(pool_sv);
            svp = hv_fetch(pool_hv, "_pool_id", 8, 0);
            if (svp && *svp) opts.pool = get_mempool_by_id(SvIV(*svp));
        }
        opts.tokens = parse_tokens_av(aTHX_ (AV*)SvRV(ST(2)), &opts.n_tokens);
        /* Parse remaining as named params */
        parse_forward_options(aTHX_ &opts, &ST(0), 3, items);
    } else {
        /* Named params */
        parse_forward_options(aTHX_ &opts, &ST(0), 1, items);
    }

    if (!opts.tokens) croak("forward_pool() requires tokens");
    if (!opts.pool) croak("forward_pool() requires pool");
    
    if (!do_forward_unified(aTHX_ hv, &opts, &result)) {
        char *err = result.error ? result.error : "Forward pass failed";
        Safefree(opts.tokens);
        free_forward_result(&result);
        croak("%s", err);
    }
    
    Safefree(opts.tokens);
    
    EXTEND(SP, result.n_vocab);
    for (j = 0; j < result.n_vocab; j++) {
        mPUSHn(result.logits[j]);
    }
    free_forward_result(&result);

void
forward_batch(self, ...)
    SV *self
PREINIT:
    HV *hv;
    LughForwardOpts opts;
    LughForwardResult result;
    int i, j;
PPCODE:
    hv = (HV*)SvRV(self);
    Zero(&opts, 1, LughForwardOpts);

    /* Detect positional: forward_batch(\@sequences, ...) */
    if (items >= 2 && SvROK(ST(1)) && SvTYPE(SvRV(ST(1))) == SVt_PVAV) {
        if (!parse_sequences_av(aTHX_ (AV*)SvRV(ST(1)), &opts.all_tokens, &opts.seq_lengths, &opts.n_sequences)) {
            croak("Invalid sequences format");
        }
        /* Parse remaining as named params */
        parse_forward_options(aTHX_ &opts, &ST(0), 2, items);
    } else {
        /* Named params */
        parse_forward_options(aTHX_ &opts, &ST(0), 1, items);
    }

    if (!opts.all_tokens) croak("forward_batch() requires sequences");
    if (opts.cache) croak("Use 'caches' (array) for batch mode, not 'cache'");
    if (opts.caches && opts.n_caches != opts.n_sequences) {
        Safefree(opts.caches);
        free_sequences(opts.all_tokens, opts.seq_lengths, opts.n_sequences);
        croak("Number of caches (%d) must match number of sequences (%d)", opts.n_caches, opts.n_sequences);
    }
    
    if (!do_forward_unified(aTHX_ hv, &opts, &result)) {
        char *err = result.error ? result.error : "Forward pass failed";
        if (opts.caches) Safefree(opts.caches);
        free_sequences(opts.all_tokens, opts.seq_lengths, opts.n_sequences);
        free_forward_result(&result);
        croak("%s", err);
    }
    
    if (opts.caches) Safefree(opts.caches);
    free_sequences(opts.all_tokens, opts.seq_lengths, opts.n_sequences);
    
    /* Return array ref of results */
    {
        AV *results_av = newAV();
        for (i = 0; i < result.n_sequences; i++) {
            AV *seq_av = newAV();
            for (j = 0; j < result.n_vocab; j++) {
                av_push(seq_av, newSVnv(result.batch_logits[i][j]));
            }
            av_push(results_av, newRV_noinc((SV*)seq_av));
        }
        free_forward_result(&result);
        EXTEND(SP, 1);
        mPUSHs(newRV_noinc((SV*)results_av));
    }

void
forward_cache_pool(self, ...)
    SV *self
PREINIT:
    HV *hv;
    LughForwardOpts opts;
    LughForwardResult result;
    int j;
    SV **svp;
PPCODE:
    hv = (HV*)SvRV(self);
    Zero(&opts, 1, LughForwardOpts);

    /* Detect positional: forward_cache_pool($cache, $pool, \@tokens, ...) */
    if (items >= 4 && sv_isobject(ST(1)) && sv_isobject(ST(2)) &&
        SvROK(ST(3)) && SvTYPE(SvRV(ST(3))) == SVt_PVAV) {
        opts.cache = get_lugh_kvcache(aTHX_ ST(1));
        SV *pool_sv = ST(2);
        if (SvROK(pool_sv) && SvTYPE(SvRV(pool_sv)) == SVt_PVHV) {
            HV *pool_hv = (HV*)SvRV(pool_sv);
            svp = hv_fetch(pool_hv, "_pool_id", 8, 0);
            if (svp && *svp) opts.pool = get_mempool_by_id(SvIV(*svp));
        }
        opts.tokens = parse_tokens_av(aTHX_ (AV*)SvRV(ST(3)), &opts.n_tokens);
        /* Parse remaining as named params */
        parse_forward_options(aTHX_ &opts, &ST(0), 4, items);
    } else {
        /* Named params */
        parse_forward_options(aTHX_ &opts, &ST(0), 1, items);
    }

    if (!opts.tokens) croak("forward_cache_pool() requires tokens");
    if (!opts.cache) croak("forward_cache_pool() requires cache");
    if (!opts.pool) croak("forward_cache_pool() requires pool");
    
    if (!do_forward_unified(aTHX_ hv, &opts, &result)) {
        char *err = result.error ? result.error : "Forward pass failed";
        Safefree(opts.tokens);
        free_forward_result(&result);
        croak("%s", err);
    }
    
    Safefree(opts.tokens);
    
    EXTEND(SP, result.n_vocab);
    for (j = 0; j < result.n_vocab; j++) {
        mPUSHn(result.logits[j]);
    }
    free_forward_result(&result);

void
forward_batch_pool(self, ...)
    SV *self
PREINIT:
    HV *hv;
    LughForwardOpts opts;
    LughForwardResult result;
    int i, j;
    SV **svp;
PPCODE:
    hv = (HV*)SvRV(self);
    Zero(&opts, 1, LughForwardOpts);

    /* Detect positional: forward_batch_pool($pool, \@sequences, ...) */
    if (items >= 3 && sv_isobject(ST(1)) && SvROK(ST(2)) && SvTYPE(SvRV(ST(2))) == SVt_PVAV) {
        SV *pool_sv = ST(1);
        if (SvROK(pool_sv) && SvTYPE(SvRV(pool_sv)) == SVt_PVHV) {
            HV *pool_hv = (HV*)SvRV(pool_sv);
            svp = hv_fetch(pool_hv, "_pool_id", 8, 0);
            if (svp && *svp) opts.pool = get_mempool_by_id(SvIV(*svp));
        }
        if (!parse_sequences_av(aTHX_ (AV*)SvRV(ST(2)), &opts.all_tokens, &opts.seq_lengths, &opts.n_sequences)) {
            croak("Invalid sequences format");
        }
        /* Parse remaining as named params */
        parse_forward_options(aTHX_ &opts, &ST(0), 3, items);
    } else {
        /* Named params */
        parse_forward_options(aTHX_ &opts, &ST(0), 1, items);
    }

    if (!opts.all_tokens) croak("forward_batch_pool() requires sequences");
    if (!opts.pool) croak("forward_batch_pool() requires pool");
    if (opts.caches && opts.n_caches != opts.n_sequences) {
        Safefree(opts.caches);
        free_sequences(opts.all_tokens, opts.seq_lengths, opts.n_sequences);
        croak("Number of caches (%d) must match number of sequences (%d)", opts.n_caches, opts.n_sequences);
    }
    
    if (!do_forward_unified(aTHX_ hv, &opts, &result)) {
        char *err = result.error ? result.error : "Forward pass failed";
        if (opts.caches) Safefree(opts.caches);
        free_sequences(opts.all_tokens, opts.seq_lengths, opts.n_sequences);
        free_forward_result(&result);
        croak("%s", err);
    }
    
    if (opts.caches) Safefree(opts.caches);
    free_sequences(opts.all_tokens, opts.seq_lengths, opts.n_sequences);
    
    /* Return array ref of results */
    {
        AV *results_av = newAV();
        for (i = 0; i < result.n_sequences; i++) {
            AV *seq_av = newAV();
            for (j = 0; j < result.n_vocab; j++) {
                av_push(seq_av, newSVnv(result.batch_logits[i][j]));
            }
            av_push(results_av, newRV_noinc((SV*)seq_av));
        }
        free_forward_result(&result);
        EXTEND(SP, 1);
        mPUSHs(newRV_noinc((SV*)results_av));
    }

int
sample_top_p(self, logits_ref, ...)
    SV *self
    SV *logits_ref
PREINIT:

lib/Lugh.xs  view on Meta::CPAN

                    max_idx = j;
                }
            }
            if (max_idx != k) {
                float tmp = logits[k];
                int tmp_idx = indices[k];
                logits[k] = logits[max_idx];
                indices[k] = indices[max_idx];
                logits[max_idx] = tmp;
                indices[max_idx] = tmp_idx;
            }
        }
    }
    
    /* Renormalize top_k probabilities */
    sum = 0.0f;
    for (i = 0; i < top_k; i++) {
        sum += logits[i];
    }
    
    /* Sample from top_k tokens */
    threshold = lugh_rand_float() * sum;
    float cumsum = 0.0f;
    RETVAL = indices[0];  /* Default to most likely */
    
    for (i = 0; i < top_k; i++) {
        cumsum += logits[i];
        if (cumsum >= threshold) {
            RETVAL = indices[i];
            break;
        }
    }
    
    Safefree(logits);
    Safefree(indices);
OUTPUT:
    RETVAL

void
generate(self, tokens_ref, ...)
    SV *self
    SV *tokens_ref
PREINIT:
    HV *hv;
    SV **svp;
    LughModel *model;
    AV *tokens_av;
    AV *result_av;
    int *tokens = NULL;
    int n_tokens;
    int max_tokens = 128;
    float temperature = 0.8f;
    float top_p = 0.95f;
    int top_k = 40;
    int eos_token = 2;
    int greedy = 0;
    SV *callback = NULL;
    int i;
    int n_result;
    SV **orig_sp;
PPCODE:
    orig_sp = SP;  /* Save original stack pointer */
    
    /* Get model */
    hv = (HV*)SvRV(self);
    svp = hv_fetch(hv, "_model", 6, 0);
    if (!svp || !*svp) croak("No model in inference object");
    model = get_lugh_model(aTHX_ *svp);
    if (!model) croak("Invalid model");
    
    /* Parse input tokens */
    if (!SvROK(tokens_ref) || SvTYPE(SvRV(tokens_ref)) != SVt_PVAV) {
        croak("generate() requires an array reference of tokens");
    }
    tokens_av = (AV*)SvRV(tokens_ref);
    n_tokens = av_len(tokens_av) + 1;
    if (n_tokens == 0) {
        croak("generate() requires at least one token");
    }
    
    /* Parse optional parameters */
    for (i = 2; i < items; i += 2) {
        if (i + 1 < items) {
            const char *key = SvPV_nolen(ST(i));
            if (strEQ(key, "max_tokens")) {
                max_tokens = SvIV(ST(i + 1));
            } else if (strEQ(key, "temperature")) {
                temperature = SvNV(ST(i + 1));
            } else if (strEQ(key, "top_p")) {
                top_p = SvNV(ST(i + 1));
            } else if (strEQ(key, "top_k")) {
                top_k = SvIV(ST(i + 1));
            } else if (strEQ(key, "eos_token")) {
                eos_token = SvIV(ST(i + 1));
            } else if (strEQ(key, "greedy")) {
                greedy = SvTRUE(ST(i + 1));
            } else if (strEQ(key, "callback")) {
                if (SvROK(ST(i + 1)) && SvTYPE(SvRV(ST(i + 1))) == SVt_PVCV) {
                    callback = ST(i + 1);
                }
            }
        }
    }
    
    /* Get EOS from model if not specified */
    {
        int64_t key_id = gguf_find_key(model->gguf, "tokenizer.ggml.eos_token_id");
        if (key_id >= 0) {
            eos_token = gguf_get_val_u32(model->gguf, key_id);
        }
    }
    
    /* Initialize tokens array with prompt */
    Newx(tokens, n_tokens + max_tokens, int);
    for (i = 0; i < n_tokens; i++) {
        SV **elem = av_fetch(tokens_av, i, 0);
        tokens[i] = elem ? SvIV(*elem) : 0;
    }
    
    /* Create result array for generated tokens only */
    result_av = newAV();

lib/Lugh.xs  view on Meta::CPAN

                if (tokp && *tokp) {
                    const char *tok = SvPV_nolen(*tokp);
                    /* Skip special tokens like <s>, </s>, etc if needed */
                    if (tok[0] != '<' || !strchr(tok, '>')) {
                        /* Handle SentencePiece underscore prefix (▁ -> space) */
                        if ((unsigned char)tok[0] == 0xE2 && 
                            (unsigned char)tok[1] == 0x96 && 
                            (unsigned char)tok[2] == 0x81) {
                            sv_catpvn(result, " ", 1);
                            sv_catpv(result, tok + 3);
                        } else {
                            sv_catpv(result, tok);
                        }
                    }
                }
            }
        }
    } else {
        /* List of token ids passed directly */
        for (i = 1; i < items; i++) {
            int token_id = SvIV(ST(i));
            SV **tokp = av_fetch(id_to_token, token_id, 0);
            if (tokp && *tokp) {
                const char *tok = SvPV_nolen(*tokp);
                /* Skip special tokens like <s>, </s>, etc if needed */
                if (tok[0] != '<' || !strchr(tok, '>')) {
                    /* Handle SentencePiece underscore prefix (▁ -> space) */
                    if ((unsigned char)tok[0] == 0xE2 && 
                        (unsigned char)tok[1] == 0x96 && 
                        (unsigned char)tok[2] == 0x81) {
                        sv_catpvn(result, " ", 1);
                        sv_catpv(result, tok + 3);
                    } else {
                        sv_catpv(result, tok);
                    }
                }
            }
        }
    }
    
    RETVAL = result;
OUTPUT:
    RETVAL

void
encode(self, text, ...)
    SV *self
    SV *text
PREINIT:
    HV *hv;
    SV **svp;
    HV *token_to_id;
    AV *id_to_token;
    const char *str;
    STRLEN len;
    AV *tokens;
    int add_bos = 1;
    int bos_id, eos_id, unk_id;
    size_t pos;
    int i;
PPCODE:
    hv = (HV*)SvRV(self);
    
    svp = hv_fetch(hv, "_token_to_id", 12, 0);
    if (!svp || !SvROK(*svp)) croak("Tokenizer not initialized");
    token_to_id = (HV*)SvRV(*svp);
    
    svp = hv_fetch(hv, "_id_to_token", 12, 0);
    if (!svp || !SvROK(*svp)) croak("Tokenizer not initialized");
    id_to_token = (AV*)SvRV(*svp);
    
    svp = hv_fetch(hv, "bos_id", 6, 0);
    bos_id = svp ? SvIV(*svp) : 1;
    svp = hv_fetch(hv, "eos_id", 6, 0);
    eos_id = svp ? SvIV(*svp) : 2;
    svp = hv_fetch(hv, "unk_id", 6, 0);
    unk_id = svp ? SvIV(*svp) : 0;
    
    /* Parse optional add_bos parameter */
    for (i = 2; i < items; i += 2) {
        if (i + 1 < items) {
            const char *key = SvPV_nolen(ST(i));
            if (strEQ(key, "add_bos")) {
                add_bos = SvIV(ST(i + 1));
            }
        }
    }
    
    str = SvPV(text, len);
    
    /* Simple greedy tokenization (longest match first) */
    /* For production, should use proper BPE merge algorithm */
    
    if (add_bos) {
        XPUSHs(sv_2mortal(newSViv(bos_id)));
    }
    
    pos = 0;
    while (pos < len) {
        int best_len = 0;
        int best_id = unk_id;
        int try_len;
        char buf[256];
        int at_word_start = (pos == 0 || str[pos-1] == ' ' || str[pos-1] == '\n' || str[pos-1] == '\t');
        
        /* Skip space - it becomes part of the next token's ▁ prefix */
        if (str[pos] == ' ' || str[pos] == '\t') {
            pos++;
            continue;
        }
        
        /* Try to find longest matching token */
        for (try_len = (len - pos > 255 ? 255 : len - pos); try_len > 0; try_len--) {
            SV **id_ptr;
            
            /* Copy substring to buffer */
            memcpy(buf, str + pos, try_len);
            buf[try_len] = '\0';
            
            /* Try with SentencePiece prefix for word start */
            if (at_word_start) {

lib/Lugh.xs  view on Meta::CPAN

    for (i = 0; i < n_dims; i++) {
        ne[i] = SvIV(ST(i + 2));
        /* Validate: dimensions must be positive */
        if (ne[i] <= 0) {
            croak("Invalid dimension %d at position %d: dimensions must be positive", (int)ne[i], i);
        }
    }

    /* Create tensor based on dimensionality */
    switch (n_dims) {
        case 1:
            tensor = ggml_new_tensor_1d(lctx->ctx, GGML_TYPE_F32, ne[0]);
            break;
        case 2:
            tensor = ggml_new_tensor_2d(lctx->ctx, GGML_TYPE_F32, ne[0], ne[1]);
            break;
        case 3:
            tensor = ggml_new_tensor_3d(lctx->ctx, GGML_TYPE_F32, ne[0], ne[1], ne[2]);
            break;
        case 4:
            tensor = ggml_new_tensor_4d(lctx->ctx, GGML_TYPE_F32, ne[0], ne[1], ne[2], ne[3]);
            break;
    }
    
    if (!tensor) {
        croak("Failed to create tensor");
    }
    
    /* Return tensor pointer as blessed IV */
    RETVAL = sv_bless(
        newRV_noinc(newSViv(PTR2IV(tensor))),
        gv_stashpv(class, GV_ADD)
    );
OUTPUT:
    RETVAL

void
set_f32(self, ...)
    SV *self
PREINIT:
    struct ggml_tensor *tensor;
    int64_t i, n_elements;
CODE:
    tensor = INT2PTR(struct ggml_tensor *, SvIV(SvRV(self)));
    n_elements = ggml_nelements(tensor);
    
    if (items - 1 != n_elements) {
        croak("Expected %ld values, got %d", (long)n_elements, (int)(items - 1));
    }
    
    for (i = 0; i < n_elements; i++) {
        ggml_set_f32_1d(tensor, i, SvNV(ST(i + 1)));
    }

void
get_f32(self)
    SV *self
PREINIT:
    struct ggml_tensor *tensor;
    int64_t i, n_elements;
PPCODE:
    tensor = INT2PTR(struct ggml_tensor *, SvIV(SvRV(self)));
    n_elements = ggml_nelements(tensor);
    
    EXTEND(SP, n_elements);
    for (i = 0; i < n_elements; i++) {
        mPUSHn(ggml_get_f32_1d(tensor, i));
    }

int64_t
nelements(self)
    SV *self
CODE:
    struct ggml_tensor *tensor = INT2PTR(struct ggml_tensor *, SvIV(SvRV(self)));
    RETVAL = ggml_nelements(tensor);
OUTPUT:
    RETVAL

int
n_dims(self)
    SV *self
CODE:
    struct ggml_tensor *tensor = INT2PTR(struct ggml_tensor *, SvIV(SvRV(self)));
    RETVAL = ggml_n_dims(tensor);
OUTPUT:
    RETVAL

void
shape(self)
    SV *self
PREINIT:
    struct ggml_tensor *tensor;
    int i, n_dims;
PPCODE:
    tensor = INT2PTR(struct ggml_tensor *, SvIV(SvRV(self)));
    n_dims = ggml_n_dims(tensor);
    
    EXTEND(SP, n_dims);
    for (i = 0; i < n_dims; i++) {
        mPUSHi(tensor->ne[i]);
    }

int
type(self)
    SV *self
CODE:
    struct ggml_tensor *tensor = INT2PTR(struct ggml_tensor *, SvIV(SvRV(self)));
    RETVAL = (int)tensor->type;
OUTPUT:
    RETVAL

const char *
type_name(self)
    SV *self
CODE:
    struct ggml_tensor *tensor = INT2PTR(struct ggml_tensor *, SvIV(SvRV(self)));
    RETVAL = ggml_type_name(tensor->type);
OUTPUT:
    RETVAL

size_t
type_size(self)
    SV *self
CODE:
    struct ggml_tensor *tensor = INT2PTR(struct ggml_tensor *, SvIV(SvRV(self)));
    RETVAL = ggml_type_size(tensor->type);
OUTPUT:
    RETVAL

int64_t
blck_size(self)
    SV *self
CODE:
    struct ggml_tensor *tensor = INT2PTR(struct ggml_tensor *, SvIV(SvRV(self)));
    RETVAL = ggml_blck_size(tensor->type);
OUTPUT:
    RETVAL

int
is_quantized(self)
    SV *self
CODE:
    struct ggml_tensor *tensor = INT2PTR(struct ggml_tensor *, SvIV(SvRV(self)));
    RETVAL = ggml_is_quantized(tensor->type) ? 1 : 0;
OUTPUT:
    RETVAL

size_t
nbytes(self)
    SV *self
CODE:
    struct ggml_tensor *tensor = INT2PTR(struct ggml_tensor *, SvIV(SvRV(self)));
    RETVAL = ggml_nbytes(tensor);
OUTPUT:

lib/Lugh.xs  view on Meta::CPAN

    LughArchType at = get_arch_type(lm->architecture);
    RETVAL = arch_has_combined_qkv(at);
OUTPUT:
    RETVAL

int
arch_has_ffn_gate(self)
    SV *self
CODE:
    LughModel *lm = get_lugh_model(aTHX_ self);
    LughArchType at = get_arch_type(lm->architecture);
    RETVAL = arch_has_ffn_gate(at);
OUTPUT:
    RETVAL

int
arch_has_post_norm(self)
    SV *self
CODE:
    LughModel *lm = get_lugh_model(aTHX_ self);
    LughArchType at = get_arch_type(lm->architecture);
    RETVAL = arch_has_post_norm(at);
OUTPUT:
    RETVAL

int
arch_is_recurrent(self)
    SV *self
CODE:
    LughModel *lm = get_lugh_model(aTHX_ self);
    LughArchType at = get_arch_type(lm->architecture);
    RETVAL = arch_is_recurrent(at);
OUTPUT:
    RETVAL

int64_t
n_tensors(self)
    SV *self
CODE:
    LughModel *lm = get_lugh_model(aTHX_ self);
    RETVAL = lm->n_tensors;
OUTPUT:
    RETVAL

int64_t
n_kv(self)
    SV *self
CODE:
    LughModel *lm = get_lugh_model(aTHX_ self);
    RETVAL = lm->n_kv;
OUTPUT:
    RETVAL

void
tensor_info(self, name)
    SV *self
    const char *name
PREINIT:
    LughModel *lm;
    struct ggml_tensor *t;
PPCODE:
    lm = get_lugh_model(aTHX_ self);
    t = ggml_get_tensor(lm->ctx, name);
    if (t) {
        /* Return: type, n_dims, ne[0], ne[1], ne[2], ne[3] */
        EXTEND(SP, 6);
        mPUSHi(t->type);
        mPUSHi(ggml_n_dims(t));
        mPUSHi(t->ne[0]);
        mPUSHi(t->ne[1]);
        mPUSHi(t->ne[2]);
        mPUSHi(t->ne[3]);
    }

void
tensor_names(self)
    SV *self
PREINIT:
    LughModel *lm;
    int64_t i;
PPCODE:
    lm = get_lugh_model(aTHX_ self);
    EXTEND(SP, lm->n_tensors);
    for (i = 0; i < lm->n_tensors; i++) {
        mPUSHs(newSVpv(gguf_get_tensor_name(lm->gguf, i), 0));
    }

void
kv_keys(self)
    SV *self
PREINIT:
    LughModel *lm;
    int64_t i;
PPCODE:
    lm = get_lugh_model(aTHX_ self);
    EXTEND(SP, lm->n_kv);
    for (i = 0; i < lm->n_kv; i++) {
        mPUSHs(newSVpv(gguf_get_key(lm->gguf, i), 0));
    }

SV *
get_kv(self, key)
    SV *self
    const char *key
PREINIT:
    LughModel *lm;
    int64_t key_id;
    enum gguf_type kv_type;
CODE:
    lm = get_lugh_model(aTHX_ self);
    key_id = gguf_find_key(lm->gguf, key);
    
    if (key_id < 0) {
        RETVAL = &PL_sv_undef;
    } else {
        kv_type = gguf_get_kv_type(lm->gguf, key_id);
        switch (kv_type) {
            case GGUF_TYPE_UINT8:
                RETVAL = newSVuv(gguf_get_val_u8(lm->gguf, key_id));
                break;
            case GGUF_TYPE_INT8:
                RETVAL = newSViv(gguf_get_val_i8(lm->gguf, key_id));
                break;
            case GGUF_TYPE_UINT16:
                RETVAL = newSVuv(gguf_get_val_u16(lm->gguf, key_id));
                break;
            case GGUF_TYPE_INT16:
                RETVAL = newSViv(gguf_get_val_i16(lm->gguf, key_id));
                break;
            case GGUF_TYPE_UINT32:
                RETVAL = newSVuv(gguf_get_val_u32(lm->gguf, key_id));
                break;
            case GGUF_TYPE_INT32:
                RETVAL = newSViv(gguf_get_val_i32(lm->gguf, key_id));
                break;
            case GGUF_TYPE_UINT64:
                RETVAL = newSVuv(gguf_get_val_u64(lm->gguf, key_id));
                break;
            case GGUF_TYPE_INT64:
                RETVAL = newSViv(gguf_get_val_i64(lm->gguf, key_id));
                break;
            case GGUF_TYPE_FLOAT32:
                RETVAL = newSVnv(gguf_get_val_f32(lm->gguf, key_id));
                break;
            case GGUF_TYPE_FLOAT64:
                RETVAL = newSVnv(gguf_get_val_f64(lm->gguf, key_id));
                break;
            case GGUF_TYPE_BOOL:
                RETVAL = gguf_get_val_bool(lm->gguf, key_id) ? &PL_sv_yes : &PL_sv_no;
                break;
            case GGUF_TYPE_STRING:
                RETVAL = newSVpv(gguf_get_val_str(lm->gguf, key_id), 0);
                break;
            case GGUF_TYPE_ARRAY:

lib/Lugh.xs  view on Meta::CPAN


float
scale(self, ...)
    SV *self
PREINIT:
    LughLoRAAdapter *lora;
    MAGIC *mg;
CODE:
    if (!SvROK(self)) croak("Not a reference");
    mg = mg_findext(SvRV(self), PERL_MAGIC_ext, &lugh_lora_vtbl);
    if (!mg) croak("Invalid LoRA object");
    lora = get_lora_by_id((int)(IV)mg->mg_ptr);
    if (!lora) croak("LoRA adapter not found");
    
    if (items > 1) {
        lora->scale = SvNV(ST(1));
    }
    RETVAL = lora->scale;
OUTPUT:
    RETVAL

int
n_weights(self)
    SV *self
PREINIT:
    LughLoRAAdapter *lora;
    MAGIC *mg;
CODE:
    if (!SvROK(self)) croak("Not a reference");
    mg = mg_findext(SvRV(self), PERL_MAGIC_ext, &lugh_lora_vtbl);
    if (!mg) croak("Invalid LoRA object");
    lora = get_lora_by_id((int)(IV)mg->mg_ptr);
    if (!lora) croak("LoRA adapter not found");
    RETVAL = lora->n_weights;
OUTPUT:
    RETVAL

const char *
format(self)
    SV *self
PREINIT:
    LughLoRAAdapter *lora;
    MAGIC *mg;
CODE:
    if (!SvROK(self)) croak("Not a reference");
    mg = mg_findext(SvRV(self), PERL_MAGIC_ext, &lugh_lora_vtbl);
    if (!mg) croak("Invalid LoRA object");
    lora = get_lora_by_id((int)(IV)mg->mg_ptr);
    if (!lora) croak("LoRA adapter not found");
    RETVAL = lora->format;
OUTPUT:
    RETVAL

void
weight_names(self)
    SV *self
PREINIT:
    LughLoRAAdapter *lora;
    MAGIC *mg;
    int i;
PPCODE:
    if (!SvROK(self)) croak("Not a reference");
    mg = mg_findext(SvRV(self), PERL_MAGIC_ext, &lugh_lora_vtbl);
    if (!mg) croak("Invalid LoRA object");
    lora = get_lora_by_id((int)(IV)mg->mg_ptr);
    if (!lora) croak("LoRA adapter not found");
    
    EXTEND(SP, lora->n_weights);
    for (i = 0; i < lora->n_weights; i++) {
        mPUSHp(lora->weights[i].name, strlen(lora->weights[i].name));
    }

bool
trainable(self)
    SV *self
PREINIT:
    LughLoRAAdapter *lora;
    MAGIC *mg;
CODE:
    if (!SvROK(self)) croak("Not a reference");
    mg = mg_findext(SvRV(self), PERL_MAGIC_ext, &lugh_lora_vtbl);
    if (!mg) croak("Invalid LoRA object");
    lora = get_lora_by_id((int)(IV)mg->mg_ptr);
    if (!lora) croak("LoRA adapter not found");
    RETVAL = lora->trainable;
OUTPUT:
    RETVAL

SV *
create(class, ...)
    char *class
PREINIT:
    SV *model_sv = NULL;
    LughModel *model = NULL;
    int rank = 16;
    float alpha = 32.0f;
    float scale = 1.0f;
    AV *targets_av = NULL;
    SV *context_sv = NULL;
    LughContext *lctx = NULL;
    int i;
CODE:
    INIT_MUTEXES();
    
    /* Parse arguments */
    for (i = 1; i < items; i += 2) {
        if (i + 1 < items) {
            const char *key = SvPV_nolen(ST(i));
            if (strEQ(key, "model")) {
                model_sv = ST(i + 1);
                model = get_lugh_model(aTHX_ model_sv);
            } else if (strEQ(key, "rank")) {
                rank = SvIV(ST(i + 1));
            } else if (strEQ(key, "alpha")) {
                alpha = SvNV(ST(i + 1));
            } else if (strEQ(key, "scale")) {
                scale = SvNV(ST(i + 1));
            } else if (strEQ(key, "targets")) {
                if (SvROK(ST(i + 1)) && SvTYPE(SvRV(ST(i + 1))) == SVt_PVAV) {
                    targets_av = (AV*)SvRV(ST(i + 1));
                }

lib/Lugh.xs  view on Meta::CPAN

CODE:
    if (!SvROK(self)) croak("Not a reference");
    mg = mg_findext(SvRV(self), PERL_MAGIC_ext, &lugh_lora_vtbl);
    if (!mg) croak("Invalid LoRA object");
    lora = get_lora_by_id((int)(IV)mg->mg_ptr);
    if (!lora) croak("LoRA adapter not found");
    
    /* Check file extension */
    size_t len = strlen(path);
    if (len <= 5 || strcmp(path + len - 5, ".gguf") != 0) {
        croak("LoRA save path must end with .gguf");
    }
    
    /* Create GGUF writer context */
    struct gguf_context *gguf = gguf_init_empty();
    if (!gguf) {
        croak("Failed to create GGUF context");
    }
    
    /* Add metadata */
    gguf_set_val_str(gguf, "general.type", "adapter");
    gguf_set_val_str(gguf, "adapter.type", "lora");
    gguf_set_val_f32(gguf, "adapter.lora.alpha", lora->alpha);
    
    if (lora->architecture) {
        gguf_set_val_str(gguf, "general.architecture", lora->architecture);
    }
    
    /* Add tensor pairs */
    for (int i = 0; i < lora->n_weights; i++) {
        LughLoRAWeight *lw = &lora->weights[i];
        
        /* For loaded adapters, set names if not already set */
        if (!lora->trainable) {
            char tensor_name_a[140];
            char tensor_name_b[140];
            snprintf(tensor_name_a, sizeof(tensor_name_a), "%s.lora_a", lw->name);
            snprintf(tensor_name_b, sizeof(tensor_name_b), "%s.lora_b", lw->name);
            ggml_set_name(lw->a, tensor_name_a);
            ggml_set_name(lw->b, tensor_name_b);
        }
        
        /* Add tensors to GGUF */
        gguf_add_tensor(gguf, lw->a);
        gguf_add_tensor(gguf, lw->b);
    }
    
    /* Write to file */
    gguf_write_to_file(gguf, path, false);
    gguf_free(gguf);

void
trainable_parameters(self)
    SV *self
PREINIT:
    LughLoRAAdapter *lora;
    LughTensor *lt;
    MAGIC *mg;
    int i, count = 0;
    HV *result_hv;
PPCODE:
    if (!SvROK(self)) croak("Not a reference");
    mg = mg_findext(SvRV(self), PERL_MAGIC_ext, &lugh_lora_vtbl);
    if (!mg) croak("Invalid LoRA object");
    lora = get_lora_by_id((int)(IV)mg->mg_ptr);
    if (!lora) croak("LoRA adapter not found");
    
    if (!lora->trainable) {
        croak("trainable_parameters() only available for trainable LoRA adapters (created with create())");
    }
    
    /* Count valid tensors first */
    for (i = 0; i < lora->n_weights; i++) {
        LughLoRAWeight *lw = &lora->weights[i];
        if (lw->tensor_a_id > 0) count++;
        if (lw->tensor_b_id > 0) count++;
    }
    
    EXTEND(SP, count);
    
    /* Return all trainable tensor objects */
    for (i = 0; i < lora->n_weights; i++) {
        LughLoRAWeight *lw = &lora->weights[i];
        
        if (lw->tensor_a_id > 0) {
            lt = get_tensor_by_id(lw->tensor_a_id);
            if (lt) {
                result_hv = newHV();
                hv_store(result_hv, "_tensor_id", 10, newSViv(lt->id), 0);
                hv_store(result_hv, "_context_id", 11, newSViv(lt->context_id), 0);
                hv_store(result_hv, "requires_grad", 13, newSViv(lt->requires_grad ? 1 : 0), 0);
                mPUSHs(sv_bless(newRV_noinc((SV*)result_hv), gv_stashpv("Lugh::Autograd::Tensor", GV_ADD)));
            }
        }
        
        if (lw->tensor_b_id > 0) {
            lt = get_tensor_by_id(lw->tensor_b_id);
            if (lt) {
                result_hv = newHV();
                hv_store(result_hv, "_tensor_id", 10, newSViv(lt->id), 0);
                hv_store(result_hv, "_context_id", 11, newSViv(lt->context_id), 0);
                hv_store(result_hv, "requires_grad", 13, newSViv(lt->requires_grad ? 1 : 0), 0);
                mPUSHs(sv_bless(newRV_noinc((SV*)result_hv), gv_stashpv("Lugh::Autograd::Tensor", GV_ADD)));
            }
        }
    }

void
DESTROY(self)
    SV *self
CODE:
    /* Magic cleanup handles this */
    PERL_UNUSED_VAR(self);

# ============================================================================
# Lugh::RoPE - RoPE Scaling Configuration
# ============================================================================

MODULE = Lugh    PACKAGE = Lugh::RoPE

PROTOTYPES: DISABLE

lib/Lugh.xs  view on Meta::CPAN

    LughSpeculative *spec;
CODE:
    if (!SvROK(self) || SvTYPE(SvRV(self)) != SVt_PVHV)
        croak("Invalid Lugh::Speculative object");
    hv = (HV*)SvRV(self);
    svp = hv_fetch(hv, "_spec_id", 8, 0);
    if (!svp) croak("Invalid Lugh::Speculative object");
    spec = get_speculative_by_id(SvIV(*svp));
    if (!spec) croak("Speculative decoder has been destroyed");
    
    SPECULATIVE_LOCK(spec);
    spec->tokens_drafted = 0;
    spec->tokens_accepted = 0;
    spec->total_steps = 0;
    SPECULATIVE_UNLOCK(spec);

void
DESTROY(self)
    SV *self
CODE:
    /* Magic cleanup handles this */
    PERL_UNUSED_VAR(self);

int
init_caches(self)
    SV *self
PREINIT:
    HV *hv;
    SV **svp;
    LughSpeculative *spec;
CODE:
    if (!SvROK(self) || SvTYPE(SvRV(self)) != SVt_PVHV)
        croak("Invalid Lugh::Speculative object");
    hv = (HV*)SvRV(self);
    
    svp = hv_fetch(hv, "_spec_id", 8, 0);
    if (!svp) croak("Invalid Lugh::Speculative object");
    spec = get_speculative_by_id(SvIV(*svp));
    if (!spec) croak("Speculative decoder has been destroyed");
    
    if (!spec_init_caches(aTHX_ hv, spec)) {
        croak("Failed to initialize KV caches");
    }
    
    RETVAL = 1;
OUTPUT:
    RETVAL

void
draft_tokens(self, input_tokens_ref, n_draft)
    SV *self
    SV *input_tokens_ref
    int n_draft
PREINIT:
    HV *hv;
    SV **svp;
    LughSpeculative *spec;
    AV *input_av, *draft_av;
    int i, n_input;
    int *input_tokens;
PPCODE:
    if (!SvROK(self) || SvTYPE(SvRV(self)) != SVt_PVHV)
        croak("Invalid Lugh::Speculative object");
    hv = (HV*)SvRV(self);
    
    svp = hv_fetch(hv, "_spec_id", 8, 0);
    if (!svp) croak("Invalid Lugh::Speculative object");
    spec = get_speculative_by_id(SvIV(*svp));
    if (!spec) croak("Speculative decoder has been destroyed");
    
    if (!SvROK(input_tokens_ref) || SvTYPE(SvRV(input_tokens_ref)) != SVt_PVAV)
        croak("input_tokens must be an array reference");
    input_av = (AV*)SvRV(input_tokens_ref);
    n_input = av_len(input_av) + 1;
    
    /* Convert input tokens to C array */
    Newx(input_tokens, n_input, int);
    for (i = 0; i < n_input; i++) {
        SV **tv = av_fetch(input_av, i, 0);
        input_tokens[i] = tv ? SvIV(*tv) : 0;
    }
    
    /* Call C helper function */
    draft_av = spec_draft_tokens(aTHX_ hv, spec, input_tokens, n_input, n_draft);
    
    Safefree(input_tokens);
    
    if (!draft_av) {
        croak("Draft token generation failed");
    }
    
    EXTEND(SP, 1);
    mPUSHs(newRV_noinc((SV*)draft_av));

void
verify_tokens(self, input_tokens_ref, draft_tokens_ref)
    SV *self
    SV *input_tokens_ref
    SV *draft_tokens_ref
PREINIT:
    HV *hv;
    SV **svp;
    LughSpeculative *spec;
    AV *input_av, *draft_av_in, *accepted_av;
    int i, n_input, n_draft;
    int *input_tokens, *draft_tokens;
PPCODE:
    if (!SvROK(self) || SvTYPE(SvRV(self)) != SVt_PVHV)
        croak("Invalid Lugh::Speculative object");
    hv = (HV*)SvRV(self);
    
    svp = hv_fetch(hv, "_spec_id", 8, 0);
    if (!svp) croak("Invalid Lugh::Speculative object");
    spec = get_speculative_by_id(SvIV(*svp));
    if (!spec) croak("Speculative decoder has been destroyed");
    
    if (!SvROK(input_tokens_ref) || SvTYPE(SvRV(input_tokens_ref)) != SVt_PVAV)
        croak("input_tokens must be an array reference");
    if (!SvROK(draft_tokens_ref) || SvTYPE(SvRV(draft_tokens_ref)) != SVt_PVAV)
        croak("draft_tokens must be an array reference");
    
    input_av = (AV*)SvRV(input_tokens_ref);
    draft_av_in = (AV*)SvRV(draft_tokens_ref);
    n_input = av_len(input_av) + 1;
    n_draft = av_len(draft_av_in) + 1;
    
    /* Convert input tokens to C array */
    Newx(input_tokens, n_input, int);
    for (i = 0; i < n_input; i++) {
        SV **tv = av_fetch(input_av, i, 0);
        input_tokens[i] = tv ? SvIV(*tv) : 0;
    }
    
    /* Convert draft tokens to C array */
    Newx(draft_tokens, n_draft, int);
    for (i = 0; i < n_draft; i++) {
        SV **tv = av_fetch(draft_av_in, i, 0);
        draft_tokens[i] = tv ? SvIV(*tv) : 0;
    }
    
    /* Call C helper function */
    accepted_av = spec_verify_tokens(aTHX_ hv, spec, input_tokens, n_input, draft_tokens, n_draft);
    
    Safefree(input_tokens);
    Safefree(draft_tokens);
    
    if (!accepted_av) {
        croak("Token verification failed");
    }
    
    EXTEND(SP, 1);
    mPUSHs(newRV_noinc((SV*)accepted_av));

void
step(self, input_tokens_ref)
    SV *self
    SV *input_tokens_ref
PREINIT:
    HV *hv;
    SV **svp;
    LughSpeculative *spec;
    AV *input_av, *accepted_av;
    int *input_tokens;
    int n_input, i;
PPCODE:
    if (!SvROK(self) || SvTYPE(SvRV(self)) != SVt_PVHV)
        croak("Invalid Lugh::Speculative object");
    hv = (HV*)SvRV(self);
    
    svp = hv_fetch(hv, "_spec_id", 8, 0);
    if (!svp) croak("Invalid Lugh::Speculative object");
    spec = get_speculative_by_id(SvIV(*svp));
    if (!spec) croak("Speculative decoder has been destroyed");
    
    if (!SvROK(input_tokens_ref) || SvTYPE(SvRV(input_tokens_ref)) != SVt_PVAV)
        croak("input_tokens must be an array reference");
    
    input_av = (AV*)SvRV(input_tokens_ref);
    n_input = av_len(input_av) + 1;
    
    /* Convert input tokens to C array */
    Newx(input_tokens, n_input, int);
    for (i = 0; i < n_input; i++) {
        SV **tv = av_fetch(input_av, i, 0);
        input_tokens[i] = tv ? SvIV(*tv) : 0;
    }
    
    /* Call C helper function directly */
    accepted_av = spec_step(aTHX_ hv, spec, input_tokens, n_input);
    
    Safefree(input_tokens);
    
    if (!accepted_av) {
        if (spec_last_error[0] != '\0') {
            croak("Speculative step failed: %s", spec_last_error);
        } else {
            croak("Speculative step failed");
        }
    }
    
    EXTEND(SP, 1);
    mPUSHs(newRV_noinc((SV*)accepted_av));

void
generate(self, input_tokens_ref, max_tokens)
    SV *self
    SV *input_tokens_ref
    int max_tokens
PREINIT:
    HV *hv;
    SV **svp;
    LughSpeculative *spec;
    AV *input_av, *output_av, *accepted_av;
    int *current_tokens;
    int n_current, n_generated;
    int i;
PPCODE:
    if (!SvROK(self) || SvTYPE(SvRV(self)) != SVt_PVHV)
        croak("Invalid Lugh::Speculative object");
    hv = (HV*)SvRV(self);
    
    svp = hv_fetch(hv, "_spec_id", 8, 0);
    if (!svp) croak("Invalid Lugh::Speculative object");
    spec = get_speculative_by_id(SvIV(*svp));
    if (!spec) croak("Speculative decoder has been destroyed");
    
    if (!SvROK(input_tokens_ref) || SvTYPE(SvRV(input_tokens_ref)) != SVt_PVAV)
        croak("input_tokens must be an array reference");
    
    input_av = (AV*)SvRV(input_tokens_ref);
    n_current = av_len(input_av) + 1;
    
    if (max_tokens <= 0) max_tokens = 256;
    
    /* Initialize caches directly */
    if (!spec_init_caches(aTHX_ hv, spec)) {
        croak("Failed to initialize caches");
    }
    
    /* Build current tokens array */
    Newx(current_tokens, n_current + max_tokens, int);
    for (i = 0; i < n_current; i++) {
        SV **tv = av_fetch(input_av, i, 0);
        current_tokens[i] = tv ? SvIV(*tv) : 0;
    }
    
    output_av = newAV();
    n_generated = 0;
    
    /* Generation loop */
    while (n_generated < max_tokens) {
        int n_accepted;
        
        /* Call C helper function directly */
        accepted_av = spec_step(aTHX_ hv, spec, current_tokens, n_current);
        
        if (!accepted_av) {
            break;
        }
        
        n_accepted = av_len(accepted_av) + 1;
        
        if (n_accepted == 0) {
            av_undef(accepted_av);
            break;
        }
        
        /* Add accepted tokens to current sequence and output */
        for (i = 0; i < n_accepted && n_generated < max_tokens; i++) {
            SV **tv = av_fetch(accepted_av, i, 0);
            if (tv && *tv) {
                int token = SvIV(*tv);
                current_tokens[n_current++] = token;
                av_push(output_av, newSViv(token));
                n_generated++;
                
                /* Check for EOS token */

lib/Lugh.xs  view on Meta::CPAN

            av_push(grad_av, newSVnv(ggml_get_f32_1d(lt->grad, i)));
        }
        RETVAL = newRV_noinc((SV*)grad_av);
    }
OUTPUT:
    RETVAL

void
zero_grad(self)
    SV *self
PREINIT:
    HV *hv;
    SV **svp;
    LughTensor *lt;
CODE:
    if (!SvROK(self) || SvTYPE(SvRV(self)) != SVt_PVHV)
        croak("Invalid Lugh::Autograd::Tensor object");
    hv = (HV*)SvRV(self);
    svp = hv_fetch(hv, "_tensor_id", 10, 0);
    if (!svp) croak("Invalid tensor object");
    lt = get_tensor_by_id(SvIV(*svp));
    if (!lt) croak("Tensor has been freed");
    
    zero_grad(lt);

void
set_data(self, ...)
    SV *self
PREINIT:
    HV *hv;
    SV **svp;
    LughTensor *lt;
    int64_t i, n_elements;
CODE:
    if (!SvROK(self) || SvTYPE(SvRV(self)) != SVt_PVHV)
        croak("Invalid Lugh::Autograd::Tensor object");
    hv = (HV*)SvRV(self);
    svp = hv_fetch(hv, "_tensor_id", 10, 0);
    if (!svp) croak("Invalid tensor object");
    lt = get_tensor_by_id(SvIV(*svp));
    if (!lt) croak("Tensor has been freed");
    
    n_elements = ggml_nelements(lt->tensor);
    
    if (items - 1 != n_elements) {
        croak("Expected %ld values, got %d", (long)n_elements, (int)(items - 1));
    }
    
    for (i = 0; i < n_elements; i++) {
        ggml_set_f32_1d(lt->tensor, i, SvNV(ST(i + 1)));
    }

void
get_data(self)
    SV *self
PREINIT:
    HV *hv;
    SV **svp;
    LughTensor *lt;
    int64_t i, n_elements;
PPCODE:
    if (!SvROK(self) || SvTYPE(SvRV(self)) != SVt_PVHV)
        croak("Invalid Lugh::Autograd::Tensor object");
    hv = (HV*)SvRV(self);
    svp = hv_fetch(hv, "_tensor_id", 10, 0);
    if (!svp) croak("Invalid tensor object");
    lt = get_tensor_by_id(SvIV(*svp));
    if (!lt) croak("Tensor has been freed");
    
    n_elements = ggml_nelements(lt->tensor);
    EXTEND(SP, n_elements);
    for (i = 0; i < n_elements; i++) {
        mPUSHn(ggml_get_f32_1d(lt->tensor, i));
    }

int64_t
nelements(self)
    SV *self
PREINIT:
    HV *hv;
    SV **svp;
    LughTensor *lt;
CODE:
    if (!SvROK(self) || SvTYPE(SvRV(self)) != SVt_PVHV)
        croak("Invalid Lugh::Autograd::Tensor object");
    hv = (HV*)SvRV(self);
    svp = hv_fetch(hv, "_tensor_id", 10, 0);
    if (!svp) croak("Invalid tensor object");
    lt = get_tensor_by_id(SvIV(*svp));
    if (!lt) croak("Tensor has been freed");
    
    RETVAL = ggml_nelements(lt->tensor);
OUTPUT:
    RETVAL

void
shape(self)
    SV *self
PREINIT:
    HV *hv;
    SV **svp;
    LughTensor *lt;
    int i, n_dims;
PPCODE:
    if (!SvROK(self) || SvTYPE(SvRV(self)) != SVt_PVHV)
        croak("Invalid Lugh::Autograd::Tensor object");
    hv = (HV*)SvRV(self);
    svp = hv_fetch(hv, "_tensor_id", 10, 0);
    if (!svp) croak("Invalid tensor object");
    lt = get_tensor_by_id(SvIV(*svp));
    if (!lt) croak("Tensor has been freed");
    
    n_dims = ggml_n_dims(lt->tensor);
    EXTEND(SP, n_dims);
    for (i = 0; i < n_dims; i++) {
        mPUSHi(lt->tensor->ne[i]);
    }

bool
is_leaf(self)
    SV *self
PREINIT:
    HV *hv;
    SV **svp;
    LughTensor *lt;
CODE:
    if (!SvROK(self) || SvTYPE(SvRV(self)) != SVt_PVHV)
        croak("Invalid Lugh::Autograd::Tensor object");
    hv = (HV*)SvRV(self);
    svp = hv_fetch(hv, "_tensor_id", 10, 0);
    if (!svp) croak("Invalid tensor object");
    lt = get_tensor_by_id(SvIV(*svp));
    if (!lt) croak("Tensor has been freed");
    
    RETVAL = lt->is_leaf;
OUTPUT:
    RETVAL

void
backward(self, ...)
    SV *self
PREINIT:
    HV *hv;
    SV **svp;
    LughTensor *lt;
    LughContext *lctx;
CODE:
    if (!SvROK(self) || SvTYPE(SvRV(self)) != SVt_PVHV)
        croak("Invalid Lugh::Autograd::Tensor object");
    hv = (HV*)SvRV(self);
    svp = hv_fetch(hv, "_tensor_id", 10, 0);
    if (!svp) croak("Invalid tensor object");
    lt = get_tensor_by_id(SvIV(*svp));
    if (!lt) croak("Tensor has been freed");
    
    if (!lt->requires_grad) {
        croak("backward() called on tensor that doesn't require gradients");
    }
    
    lctx = get_context_by_id(lt->context_id);
    if (!lctx) croak("Context has been destroyed");
    
    /* Ensure gradient tensor exists */
    if (!lt->grad) {

lib/Lugh.xs  view on Meta::CPAN

    HV *logits_hv;
    SV **svp;
    LughTrainingCache *tc;
    int cache_id, n_weights, i;
CODE:
    PERL_UNUSED_ARG(class);
    
    /* Get training cache from logits tensor */
    if (!SvROK(logits_sv) || SvTYPE(SvRV(logits_sv)) != SVt_PVHV)
        croak("logits must be an Autograd::Tensor");
    logits_hv = (HV*)SvRV(logits_sv);
    svp = hv_fetch(logits_hv, "_training_cache_id", 18, 0);
    if (!svp) croak("Tensor has no training cache");
    cache_id = SvIV(*svp);
    tc = get_training_cache_by_id(cache_id);
    if (!tc) croak("Training cache not found");
    
    /* Parse weights array */
    n_weights = av_len(weights_av) + 1;
    if (n_weights == 0) return;
    
    /* Allocate storage */
    Newx(tc->weight_tensor_ids, n_weights, int);
    Newx(tc->weight_tensor_names, n_weights, char*);
    tc->n_weight_tensors = n_weights;
    
    for (i = 0; i < n_weights; i++) {
        SV **elem = av_fetch(weights_av, i, 0);
        if (!elem || !SvROK(*elem) || SvTYPE(SvRV(*elem)) != SVt_PVHV)
            croak("Weight %d must be an Autograd::Tensor", i);
        
        HV *weight_hv = (HV*)SvRV(*elem);
        svp = hv_fetch(weight_hv, "_tensor_id", 10, 0);
        if (!svp) croak("Invalid weight tensor %d", i);
        tc->weight_tensor_ids[i] = SvIV(*svp);
        
        /* Get tensor name if available */
        svp = hv_fetch(weight_hv, "name", 4, 0);
        if (svp && SvOK(*svp)) {
            STRLEN len;
            const char *name = SvPV(*svp, len);
            Newx(tc->weight_tensor_names[i], len + 1, char);
            Copy(name, tc->weight_tensor_names[i], len + 1, char);
        } else {
            tc->weight_tensor_names[i] = NULL;
        }
    }
    
    DEBUG_PRINT("Registered %d weight tensors for training\n", n_weights);

void
batch_data(class, data_av, ...)
    SV* class
    AV* data_av
PREINIT:
    STRLEN batch_size = 32;
    int shuffle = 0;
    STRLEN data_len, i, j;
    STRLEN *indices = NULL;
    AV *result_av;
PPCODE:
    PERL_UNUSED_VAR(class);
    
    /* Parse optional args - items includes class and data_av, so remaining must be even */
    if (items > 2) {
        if ((items - 2) % 2 != 0) croak("Expected key-value pairs after data");
        for (i = 2; i < (STRLEN)items; i += 2) {
            const char *key = SvPV_nolen(ST(i));
            SV *val = ST(i + 1);
            if (strEQ(key, "batch_size")) {
                batch_size = SvIV(val);
            } else if (strEQ(key, "shuffle")) {
                shuffle = SvTRUE(val);
            }
        }
    }
    
    data_len = av_len(data_av) + 1;
    if (data_len == 0) {
        XSRETURN_EMPTY;
    }
    
    /* Create index array */
    Newx(indices, data_len, STRLEN);
    for (i = 0; i < data_len; i++) {
        indices[i] = i;
    }
    
    /* Fisher-Yates shuffle if requested */
    if (shuffle) {
        for (i = data_len - 1; i > 0; i--) {
            j = (STRLEN)(Drand01() * (i + 1));
            if (j != i) {
                STRLEN tmp = indices[i];
                indices[i] = indices[j];
                indices[j] = tmp;
            }
        }
    }
    
    /* Create batches */
    result_av = newAV();
    for (i = 0; i < data_len; i += batch_size) {
        AV *batch_av = newAV();
        STRLEN end = i + batch_size;
        if (end > data_len) end = data_len;
        
        for (j = i; j < end; j++) {
            SV **elem = av_fetch(data_av, indices[j], 0);
            if (elem && *elem) {
                av_push(batch_av, SvREFCNT_inc(*elem));
            }
        }
        av_push(result_av, newRV_noinc((SV*)batch_av));
    }
    
    Safefree(indices);
    
    /* Return list of batch arrayrefs */
    {
        STRLEN n = av_len(result_av) + 1;
        EXTEND(SP, n);
        for (i = 0; i < n; i++) {
            SV **elem = av_fetch(result_av, i, 0);
            if (elem && *elem) {
                PUSHs(sv_2mortal(SvREFCNT_inc(*elem)));
            }
        }
    }
    SvREFCNT_dec(result_av);

void
tokenize_batch(class, tokenizer, texts_av, ...)
    SV* class
    SV* tokenizer
    AV* texts_av
PREINIT:
    STRLEN max_length = 512;
    STRLEN i, n_texts;
    AV *all_input_ids;
    AV *all_targets;
    AV *result;
PPCODE:
    PERL_UNUSED_VAR(class);
    
    if (!tokenizer || !SvOK(tokenizer)) {
        croak("tokenizer required");
    }
    
    /* Parse optional args */
    if (items > 3) {
        for (i = 3; i < (STRLEN)items; i += 2) {
            const char *key = SvPV_nolen(ST(i));
            SV *val = ST(i + 1);
            if (strEQ(key, "max_length")) {
                max_length = SvIV(val);
            }
        }
    }
    
    n_texts = av_len(texts_av) + 1;
    all_input_ids = newAV();
    all_targets = newAV();
    
    for (i = 0; i < n_texts; i++) {
        SV **text_svp = av_fetch(texts_av, i, 0);
        if (!text_svp || !*text_svp) continue;
        
        /* Call tokenizer->encode(text) */
        dSP;
        ENTER;
        SAVETMPS;
        
        PUSHMARK(SP);
        XPUSHs(tokenizer);
        XPUSHs(*text_svp);
        PUTBACK;
        
        int count = call_method("encode", G_LIST);
        
        SPAGAIN;
        
        if (count > 0) {
            AV *tokens = newAV();
            STRLEN j;
            
            /* Pop tokens in reverse order */
            for (j = 0; j < (STRLEN)count; j++) {
                SV *tok = POPs;
                av_unshift(tokens, 1);
                av_store(tokens, 0, SvREFCNT_inc(tok));
            }
            
            STRLEN n_tokens = av_len(tokens) + 1;
            
            /* Truncate if needed */
            if (n_tokens > max_length) {
                n_tokens = max_length;
            }
            
            /* For LM: inputs are tokens[:-1], targets are tokens[1:] */
            if (n_tokens > 1) {
                AV *input_ids = newAV();
                AV *target_ids = newAV();
                
                for (j = 0; j < n_tokens - 1; j++) {
                    SV **tokp = av_fetch(tokens, j, 0);
                    if (tokp && *tokp) {
                        av_push(input_ids, SvREFCNT_inc(*tokp));
                    }
                }
                
                for (j = 1; j < n_tokens; j++) {
                    SV **tokp = av_fetch(tokens, j, 0);
                    if (tokp && *tokp) {
                        av_push(target_ids, SvREFCNT_inc(*tokp));
                    }
                }
                
                av_push(all_input_ids, newRV_noinc((SV*)input_ids));
                av_push(all_targets, newRV_noinc((SV*)target_ids));
            }
            
            SvREFCNT_dec(tokens);
        }
        
        PUTBACK;
        FREETMPS;
        LEAVE;
    }
    
    /* Return (\@all_input_ids, \@all_targets) */
    EXTEND(SP, 2);
    PUSHs(sv_2mortal(newRV_noinc((SV*)all_input_ids)));
    PUSHs(sv_2mortal(newRV_noinc((SV*)all_targets)));

void
zero_grad(class, ...)
    SV* class
PREINIT:
    STRLEN i;
PPCODE:
    PERL_UNUSED_VAR(class);
    
    for (i = 1; i < (STRLEN)items; i++) {
        SV *tensor_sv = ST(i);
        if (!SvOK(tensor_sv)) continue;
        
        dSP;
        ENTER;
        SAVETMPS;
        
        /* Check if tensor->can('zero_grad') */
        PUSHMARK(SP);
        XPUSHs(tensor_sv);
        XPUSHs(sv_2mortal(newSVpv("zero_grad", 0)));
        PUTBACK;
        
        int count = call_method("can", G_SCALAR);
        SPAGAIN;
        
        if (count > 0) {
            SV *can_result = POPs;
            if (SvTRUE(can_result)) {
                PUTBACK;
                
                /* Call zero_grad method */
                PUSHMARK(SP);
                XPUSHs(tensor_sv);
                PUTBACK;
                
                call_method("zero_grad", G_DISCARD);
                SPAGAIN;
            }
        }
        
        PUTBACK;
        FREETMPS;
        LEAVE;
    }
    
    XSRETURN_EMPTY;

SV*
training_step(class, model, optimizer, inputs, targets, ...)
    SV* class
    SV* model
    SV* optimizer
    SV* inputs
    SV* targets
PREINIT:
    const char *loss_fn = "cross_entropy";
    SV *ctx = NULL;
    SV *logits = NULL;
    SV *loss = NULL;
    float loss_value = 0.0;
    STRLEN i;
CODE:
    PERL_UNUSED_VAR(class);
    
    /* Parse optional args */
    for (i = 5; i < (STRLEN)items; i += 2) {

lib/Lugh.xs  view on Meta::CPAN

        }
    }
    
    /* Update step count */
    hv_store(self_hv, "step_count", 10, newSViv(step_count), 0);

float
get_lr(self)
    SV* self
PREINIT:
    HV* self_hv;
    SV** svp;
CODE:
    if (!SvROK(self) || SvTYPE(SvRV(self)) != SVt_PVHV)
        croak("self must be a hash ref");
    self_hv = (HV*)SvRV(self);
    svp = hv_fetch(self_hv, "lr", 2, 0);
    RETVAL = svp ? SvNV(*svp) : 0.001f;
OUTPUT:
    RETVAL

void
set_lr(self, new_lr)
    SV* self
    float new_lr
PREINIT:
    HV* self_hv;
CODE:
    if (!SvROK(self) || SvTYPE(SvRV(self)) != SVt_PVHV)
        croak("self must be a hash ref");
    self_hv = (HV*)SvRV(self);
    hv_store(self_hv, "lr", 2, newSVnv(new_lr), 0);

int
get_step_count(self)
    SV* self
PREINIT:
    HV* self_hv;
    SV** svp;
CODE:
    if (!SvROK(self) || SvTYPE(SvRV(self)) != SVt_PVHV)
        croak("self must be a hash ref");
    self_hv = (HV*)SvRV(self);
    svp = hv_fetch(self_hv, "step_count", 10, 0);
    RETVAL = svp ? SvIV(*svp) : 0;
OUTPUT:
    RETVAL

MODULE = Lugh  PACKAGE = Lugh::Optimizer

void
clip_grad_norm(class, max_norm, ...)
    SV* class
    float max_norm
PREINIT:
    float total_norm = 0.0f;
    float clip_coef;
    STRLEN i;
    AV* tensors_av = NULL;
    AV* all_grads = NULL;
PPCODE:
    PERL_UNUSED_VAR(class);
    
    if (max_norm <= 0) {
        croak("max_norm must be positive");
    }
    
    /* Collect all gradient data */
    all_grads = newAV();
    
    for (i = 2; i < (STRLEN)items; i++) {
        SV *tensor_sv = ST(i);
        if (!SvOK(tensor_sv)) continue;
        if (!SvROK(tensor_sv) || SvTYPE(SvRV(tensor_sv)) != SVt_PVHV) continue;
        
        HV *tensor_hv = (HV*)SvRV(tensor_sv);
        SV **svp = hv_fetch(tensor_hv, "_tensor_id", 10, 0);
        if (!svp) continue;
        
        LughTensor *tensor = get_tensor_by_id(SvIV(*svp));
        if (!tensor || !tensor->grad) continue;
        
        STRLEN n_elem = ggml_nelements(tensor->grad);
        STRLEN j;
        for (j = 0; j < n_elem; j++) {
            float g = ggml_get_f32_1d(tensor->grad, j);
            total_norm += g * g;
            
            /* Store tensor_id and index for later update */
            AV *info = newAV();
            av_push(info, newSViv(tensor->id));
            av_push(info, newSViv(j));
            av_push(all_grads, newRV_noinc((SV*)info));
        }
    }
    
    total_norm = sqrtf(total_norm);
    
    /* Clip if necessary */
    if (total_norm > max_norm) {
        clip_coef = max_norm / (total_norm + 1e-6f);
        
        STRLEN n = av_len(all_grads) + 1;
        for (i = 0; i < n; i++) {
            SV **info_svp = av_fetch(all_grads, i, 0);
            if (!info_svp || !SvROK(*info_svp)) continue;
            
            AV *info = (AV*)SvRV(*info_svp);
            SV **id_svp = av_fetch(info, 0, 0);
            SV **idx_svp = av_fetch(info, 1, 0);
            if (!id_svp || !idx_svp) continue;
            
            LughTensor *tensor = get_tensor_by_id(SvIV(*id_svp));
            if (!tensor || !tensor->grad) continue;
            
            STRLEN idx = SvIV(*idx_svp);
            float g = ggml_get_f32_1d(tensor->grad, idx);
            ggml_set_f32_1d(tensor->grad, idx, g * clip_coef);
        }
    }
    
    SvREFCNT_dec(all_grads);
    
    /* Return total norm (before clipping) */
    EXTEND(SP, 1);
    mPUSHn(total_norm);

void
clip_grad_value(class, max_value, ...)
    SV* class
    float max_value
PREINIT:
    STRLEN i;
PPCODE:
    PERL_UNUSED_VAR(class);
    
    if (max_value <= 0) {
        croak("max_value must be positive");
    }
    
    for (i = 2; i < (STRLEN)items; i++) {
        SV *tensor_sv = ST(i);
        if (!SvOK(tensor_sv)) continue;
        if (!SvROK(tensor_sv) || SvTYPE(SvRV(tensor_sv)) != SVt_PVHV) continue;
        
        HV *tensor_hv = (HV*)SvRV(tensor_sv);
        SV **svp = hv_fetch(tensor_hv, "_tensor_id", 10, 0);
        if (!svp) continue;
        
        LughTensor *tensor = get_tensor_by_id(SvIV(*svp));
        if (!tensor || !tensor->grad) continue;
        
        STRLEN n_elem = ggml_nelements(tensor->grad);
        STRLEN j;
        for (j = 0; j < n_elem; j++) {
            float g = ggml_get_f32_1d(tensor->grad, j);
            if (g > max_value) {
                ggml_set_f32_1d(tensor->grad, j, max_value);
            } else if (g < -max_value) {
                ggml_set_f32_1d(tensor->grad, j, -max_value);
            }
        }
    }
    
    XSRETURN_EMPTY;

MODULE = Lugh  PACKAGE = Lugh::Optimizer::LRScheduler

SV*
new(class, optimizer, ...)
    const char* class
    SV* optimizer
PREINIT:
    HV* self;
    const char* schedule_type = "constant";
    float initial_lr = 0.0f;
    int warmup_steps = 0;
    int total_steps = 1000;
    float min_lr = 0.0f;
    float decay_rate = 0.1f;
    AV* milestones = NULL;
    STRLEN i;
CODE:
    /* Get initial LR from optimizer */
    {
        dSP;
        ENTER;
        SAVETMPS;
        PUSHMARK(SP);
        XPUSHs(optimizer);
        PUTBACK;
        int count = call_method("get_lr", G_SCALAR);
        SPAGAIN;
        if (count > 0) {



( run in 0.674 second using v1.01-cache-2.11-cpan-5511b514fd6 )