Lugh
view release on metacpan or search on metacpan
lib/Lugh.xs view on Meta::CPAN
indices[j] = j;
}
/* Apply temperature and softmax */
for (j = 0; j < n_vocab; j++) {
probs[j] = expf((probs[j] - max_logit) / temperature);
sum += probs[j];
}
for (j = 0; j < n_vocab; j++) {
probs[j] /= sum;
}
/* Sort by probability (bubble sort with early exit) */
{
int swapped, k;
for (j = 0; j < n_vocab - 1; j++) {
swapped = 0;
cumsum = 0;
for (k = 0; k < n_vocab - j - 1; k++) {
if (probs[k] < probs[k + 1]) {
float tmp = probs[k];
int tmp_idx = indices[k];
probs[k] = probs[k + 1];
indices[k] = indices[k + 1];
probs[k + 1] = tmp;
indices[k + 1] = tmp_idx;
swapped = 1;
}
}
if (!swapped) break;
for (k = 0; k <= j; k++) cumsum += probs[k];
if (cumsum >= top_p) break;
}
}
/* Sample from top_p tokens */
threshold = lugh_rand_float() * top_p;
cumsum = 0.0f;
next_token = indices[0];
for (j = 0; j < n_vocab; j++) {
cumsum += probs[j];
if (cumsum >= threshold) {
next_token = indices[j];
break;
}
if (cumsum >= top_p) break;
}
Safefree(probs);
Safefree(indices);
}
SvREFCNT_dec((SV*)logits_av);
/* Add to results */
av_push(result_av, newSViv(next_token));
tokens[current_len] = next_token;
current_len++;
gen_count++;
/* Call streaming callback if provided */
if (callback) {
dSP;
int should_stop;
ENTER;
SAVETMPS;
PUSHMARK(SP);
XPUSHs(sv_2mortal(newSViv(next_token)));
XPUSHs(sv_2mortal(newSViv(gen_count)));
PUTBACK;
call_sv(callback, G_SCALAR);
SPAGAIN;
should_stop = POPi;
PUTBACK;
FREETMPS;
LEAVE;
/* Callback returns true to stop generation */
if (should_stop) break;
}
/* Check for EOS token */
if (next_token == eos_token) break;
}
}
Safefree(tokens);
/* Return generated tokens as list */
n_result = av_len(result_av) + 1;
{
/* Use XSRETURN explicitly */
int count = 0;
SP = orig_sp; /* Restore original stack pointer */
for (i = 0; i < n_result; i++) {
SV **svp = av_fetch(result_av, i, 0);
if (svp) {
XST_mIV(count, SvIV(*svp));
count++;
}
}
SvREFCNT_dec(result_av);
XSRETURN(count);
}
SV *
create_kv_cache(self)
SV *self
PREINIT:
HV *hv;
SV **svp;
int n_layer, n_ctx, n_head_kv, n_embd, n_head, head_dim;
LughKVCache *cache;
CODE:
/* Get inference object parameters */
hv = (HV*)SvRV(self);
( run in 0.897 second using v1.01-cache-2.11-cpan-df04353d9ac )