Data-CuckooFilter-Shared
view release on metacpan or search on metacpan
add(self, item)
SV *self
SV *item
PREINIT:
EXTRACT(self);
STRLEN n;
const char *s;
CODE:
s = SvPVbyte(item, n); /* may croak (wide char) -- BEFORE the lock */
cf_rwlock_wrlock(h);
RETVAL = cf_add_locked(h, s, n);
__atomic_fetch_add(&h->hdr->stat_ops, 1, __ATOMIC_RELAXED);
cf_rwlock_wrunlock(h);
OUTPUT:
RETVAL
UV
add_many(self, items)
SV *self
SV *items
PREINIT:
const char **ps = NULL; STRLEN *ls = NULL;
if (cnt) { /* resolve all bytes BEFORE locking */
Newx(ps, cnt, const char *); SAVEFREEPV(ps); /* freed on return OR unwind */
Newx(ls, cnt, STRLEN); SAVEFREEPV(ls);
for (i = 0; i < cnt; i++) { /* a croak here holds NO lock; SAVEFREEPV cleans up */
SV **el = av_fetch(av, (SSize_t)i, 0);
if (el && *el) ps[i] = SvPVbyte(*el, ls[i]);
else { ps[i] = ""; ls[i] = 0; }
}
}
cf_rwlock_wrlock(h); /* locked region: NO croak-capable calls */
for (i = 0; i < cnt; i++) added += (UV)cf_add_locked(h, ps[i], ls[i]);
__atomic_fetch_add(&h->hdr->stat_ops, 1, __ATOMIC_RELAXED); /* a call always counts, even an empty batch */
cf_rwlock_wrunlock(h);
}
RETVAL = added;
OUTPUT:
RETVAL
int
contains(self, item)
SV *self
SV *item
PREINIT:
EXTRACT(self);
STRLEN n;
const char *s;
CODE:
s = SvPVbyte(item, n); /* may croak (wide char) -- BEFORE the lock */
cf_rwlock_rdlock(h);
RETVAL = cf_contains_locked(h, s, n);
cf_rwlock_rdunlock(h);
OUTPUT:
RETVAL
int
remove(self, item)
SV *self
SV *item
PREINIT:
EXTRACT(self);
STRLEN n;
const char *s;
CODE:
s = SvPVbyte(item, n); /* may croak (wide char) -- BEFORE the lock */
cf_rwlock_wrlock(h);
RETVAL = cf_remove_locked(h, s, n);
__atomic_fetch_add(&h->hdr->stat_ops, 1, __ATOMIC_RELAXED);
cf_rwlock_wrunlock(h);
OUTPUT:
RETVAL
void
clear(self)
SV *self
PREINIT:
EXTRACT(self);
CODE:
cf_rwlock_wrlock(h);
cf_clear_locked(h);
__atomic_fetch_add(&h->hdr->stat_ops, 1, __ATOMIC_RELAXED);
cf_rwlock_wrunlock(h);
UV
count(self)
SV *self
PREINIT:
EXTRACT(self);
UV n;
CODE:
static inline void cf_rwlock_spin_pause(void) {
#if defined(__x86_64__) || defined(__i386__)
__asm__ volatile("pause" ::: "memory");
#elif defined(__aarch64__)
__asm__ volatile("yield" ::: "memory");
#else
__asm__ volatile("" ::: "memory");
#endif
}
/* Extract writer PID from rwlock value (lower 31 bits when write-locked). */
#define CF_RWLOCK_WRITER_BIT 0x80000000U
#define CF_RWLOCK_PID_MASK 0x7FFFFFFFU
#define CF_RWLOCK_WR(pid) (CF_RWLOCK_WRITER_BIT | ((uint32_t)(pid) & CF_RWLOCK_PID_MASK))
/* Check if a PID is alive. Returns 1 if alive or unknown, 0 if definitely dead. */
/* Liveness via kill(pid,0). NOTE: cannot detect PID reuse -- if a dead
* lock-holder's PID is recycled to an unrelated live process before recovery
* runs, this reports "alive" and that slot's orphaned contribution is not
* reclaimed until the recycled process exits. Robust detection would require
* a per-slot process-start-time epoch (a header-layout/version change).
if (__atomic_compare_exchange_n(lock, &cur, 1,
1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
return;
}
if (__builtin_expect(spin < CF_RWLOCK_SPIN_LIMIT, 1)) {
cf_rwlock_spin_pause();
continue;
}
cf_park_reader(h);
cur = __atomic_load_n(lock, __ATOMIC_RELAXED);
/* Sleep when write-locked OR when yielding to waiting writers */
if (cur >= CF_RWLOCK_WRITER_BIT || cur == 0) {
long rc = syscall(SYS_futex, lock, FUTEX_WAIT, cur,
&cf_lock_timeout, NULL, 0);
if (rc == -1 && errno == ETIMEDOUT) {
cf_unpark_reader(h);
cf_recover_after_timeout(h);
spin = 0;
continue;
}
}
*i1_out = i1;
*i2_out = cf_alt(h, i1, fp);
}
/* Add (item,len). Returns 1 on success, 0 if the table is full.
*
* ATOMIC: a failed add is a true no-op (the table is byte-identical to before),
* so a failed add can never introduce a false negative -- every fingerprint
* that was present stays present. The eviction path records every swap and
* rolls them back in reverse if CF_MAX_KICKS is exhausted. */
static int cf_add_locked(CfHandle *h, const void *item, size_t len) {
uint16_t fp;
uint64_t i1, i2;
cf_hash(h, item, len, &fp, &i1, &i2);
if (cf_bucket_insert(h, i1, fp) || cf_bucket_insert(h, i2, fp)) {
h->hdr->count++;
return 1;
}
/* Both candidate buckets are full -> cuckoo eviction with a recorded
* After undoing step 0, carried == fp and nothing was modified. */
for (int n = CF_MAX_KICKS - 1; n >= 0; n--) {
uint16_t tmp = cf_bucket(h, path_i[n])[path_j[n]];
cf_bucket(h, path_i[n])[path_j[n]] = carried;
carried = tmp;
}
return 0; /* full; NO state change */
}
/* return 1 if (item,len) is probably present, else 0 (definitely absent) */
static int cf_contains_locked(CfHandle *h, const void *item, size_t len) {
uint16_t fp;
uint64_t i1, i2;
cf_hash(h, item, len, &fp, &i1, &i2);
return cf_bucket_find(h, i1, fp) >= 0 || cf_bucket_find(h, i2, fp) >= 0;
}
/* remove ONE matching fingerprint of (item,len): clear the slot, return 1 if
* found, else 0. Removing an item that was never added (or one whose 16-bit
* fingerprint collides with a present item) can delete the wrong fingerprint --
* standard cuckoo-filter caveat; only remove items you added. */
static int cf_remove_locked(CfHandle *h, const void *item, size_t len) {
uint16_t fp;
uint64_t i1, i2;
cf_hash(h, item, len, &fp, &i1, &i2);
int j = cf_bucket_find(h, i1, fp);
if (j >= 0) { cf_bucket(h, i1)[j] = 0; h->hdr->count--; return 1; }
j = cf_bucket_find(h, i2, fp);
if (j >= 0) { cf_bucket(h, i2)[j] = 0; h->hdr->count--; return 1; }
return 0;
}
/* reset all slots to 0, count = 0 (caller holds the write lock) */
static inline void cf_clear_locked(CfHandle *h) {
memset(cf_slots(h), 0, (size_t)(h->hdr->num_buckets * (uint64_t)CF_SLOTS * sizeof(uint16_t)));
h->hdr->count = 0;
}
#endif /* CUCKOO_H */
( run in 0.719 second using v1.01-cache-2.11-cpan-bbe5e583499 )