Hash-SharedMem
view release on metacpan or search on metacpan
lib/Hash/SharedMem.xs view on Meta::CPAN
static struct pvl THX_string_as_pvl(pTHX_ struct shash *sh, word ptr)
{
word len, *loc, spc, alloclen;
struct pvl pvl;
loc = pointer_loc(sh, ptr, &spc);
len = loc[0];
alloclen = len + WORD_SZ+1;
if(unlikely(alloclen < WORD_SZ+1 || alloclen > spc))
shash_error_data(sh);
if(unlikely((word)(size_t)len != len))
shash_error_errnum(sh, "use", ENOMEM);
pvl.pv = (char*)&loc[1];
pvl.len = len;
if(unlikely(pvl.pv[pvl.len])) shash_error_data(sh);
tally_event(&sh->tally, K_STRING_READ);
return pvl;
}
static MGVTBL const string_mmapref_mgvtbl;
#define string_as_sv(sh, act, ptr) THX_string_as_sv(aTHX_ sh, act, ptr)
static SV *THX_string_as_sv(pTHX_ struct shash *sh, char const *action,
word ptr)
{
struct pvl pvl = string_as_pvl(sh, ptr);
SV *sv;
if(unlikely((size_t)(STRLEN)pvl.len != pvl.len))
shash_error_errnum(sh, action, ENOMEM);
TAINT;
/*
* There are two strategies available for returning the string
* as an SV. We can copy into a plain string SV, or we can point
* into the mmaped space. In the latter case the result SV needs
* magic to keep a reference to the object representing the mmap,
* to keep it mapped. In both time and memory, the overhead of
* pointing into the mmap is pretty much fixed, but the overhead
* of copying is roughly linear in the length of the string.
* The base overhead for copying is much less than the fixed
* overhead of mapping.
*
* We therefore want to copy short strings and map long strings.
* Choosing the threshold at which to switch is a black art.
*
* Empirical result for perl 5.16 on amd64 with glibc 2.11
* is that 119-octet strings are better copied and 120-octet
* strings are better mapped, with a sharp step in the cost of
* copying at that length. This is presumably due to the memory
* allocator switching strategy when allocating 128 octets or more
* (rounded up from 120+1).
*
* The memory allocations of interest are one XPV and the
* buffer for copying, and one XPVMG and one MAGIC for mapping.
* The ugly expression here tries to compare the two sets of
* allocations. The XPVMG+MAGIC - XPV difference is compared
* against the potential buffer size. It is presumed that the
* buffer length will be rounded up to a word-aligned size.
* The structure size difference is rounded up in an attempt to
* find a threshold likely to be used by the memory allocator.
* Ideally this would be rounded to the next power of 2, but we
* can't implement that in a constant expression, so it's actually
* rounded to the next multiple of the XPVMG size. The formula
* is slightly contrived so as to achieve the exact 120-octet
* threshold on the amd64 system used for speed trials (where
* MAGIC is 40 octets, XPV is 32 octets, and XPVMG is 64 octets).
*/
if(pvl.len < sizeof(XPVMG) *
((sizeof(MAGIC)+sizeof(XPVMG)*2-1) / sizeof(XPVMG)) -
sizeof(size_t)) {
sv = newSVpvn_mortal(pvl.pv, pvl.len);
} else {
sv = sv_2mortal(newSV_type(SVt_PVMG));
(void) sv_magicext(sv, sh->data_mmap_sv, PERL_MAGIC_ext,
(MGVTBL*)&string_mmapref_mgvtbl, NULL, 0);
SvPV_set(sv, pvl.pv);
SvCUR_set(sv, pvl.len);
SvPOK_on(sv);
SvTAINTED_on(sv);
}
SvREADONLY_on(sv);
return sv;
}
#define string_cmp_pvl(sh, aptr, bpvl) THX_string_cmp_pvl(aTHX_ sh, aptr, bpvl)
static int THX_string_cmp_pvl(pTHX_ struct shash *sh, word aptr,
struct pvl bpvl)
{
struct pvl apvl = string_as_pvl(sh, aptr);
int r;
tally_event(&sh->tally, K_KEY_COMPARE);
r = memcmp(apvl.pv, bpvl.pv, apvl.len < bpvl.len ? apvl.len : bpvl.len);
return r ? r : apvl.len == bpvl.len ? 0 : apvl.len < bpvl.len ? -1 : 1;
}
#define string_eq_pvl(sh, aptr, bpvl) THX_string_eq_pvl(aTHX_ sh, aptr, bpvl)
PERL_STATIC_INLINE int THX_string_eq_pvl(pTHX_ struct shash *sh, word aptr,
struct pvl bpvl)
{
struct pvl apvl = string_as_pvl(sh, aptr);
return apvl.len == bpvl.len && memcmp(apvl.pv, bpvl.pv, apvl.len) == 0;
}
#define string_write_from_pvl(sh, alloc, pvl) \
THX_string_write_from_pvl(aTHX_ sh, alloc, pvl)
static word THX_string_write_from_pvl(pTHX_ struct shash *sh,
struct shash_alloc *alloc, struct pvl pvl)
{
word alloclen, ptr, *loc;
if(unlikely((size_t)(word)pvl.len != pvl.len))
shash_error_toobig(sh, alloc->action);
if(unlikely(pvl.len == 0) &&
likely(sh->sizes->dhd_zeropad_sz >= WORD_SZ+1))
return ZEROPAD_PTR;
alloclen = ((word)pvl.len) + WORD_SZ + 1;
if(unlikely(alloclen < WORD_SZ+1))
shash_error_toobig(sh, alloc->action);
loc = shash_alloc(sh, alloc, alloclen, &ptr);
loc[0] = pvl.len;
(void) memcpy(&loc[1], pvl.pv, pvl.len);
((byte*)&loc[1])[pvl.len] = 0;
tally_event(&sh->tally, K_STRING_WRITE);
return ptr;
lib/Hash/SharedMem.xs view on Meta::CPAN
}
}
#define btree_count(sh, rt) THX_btree_count(aTHX_ sh, rt)
static word THX_btree_count(pTHX_ struct shash *sh, word root)
{
struct cursor cur;
word cnt = 0;
if(!likely(btree_seek_min(sh, &cur, root))) return 0;
do {
cnt += cur.ent[0].fanout;
cur.ent[0].index = cur.ent[0].fanout - 1;
} while(btree_seek_inc(sh, &cur));
return cnt;
}
#define btree_size(sh, rt) THX_btree_size(aTHX_ sh, rt)
static word THX_btree_size(pTHX_ struct shash *sh, word root)
{
struct cursor cur;
word sz;
if(!likely(btree_seek_min(sh, &cur, root))) {
if(likely(sh->sizes->dhd_zeropad_sz >= WORD_SZ)) return 0;
sz = WORD_SZ;
} else {
sz = 0;
do {
int i;
word asz;
word *loc = bnode_body_loc(
unchecked_pointer_loc(sh, cur.ent[0].nodeptr));
for(i = cur.ent[0].fanout << 1; i--; ) {
asz = string_size(sh, *loc++);
sz += asz;
if(unlikely(sz < asz)) return ~(word)0;
}
/*
* To account for all the space occupied by the
* btree nodes, we allow a certain number of
* bytes per entry, such that there is space for
* an arbitrarily high btree of minimal fanout.
* The objective is to allow enough space
* per entry that for each minimally-filled
* layer-0 node we allocate space for that node
* and have one entry's allocation left over.
* That one-entry-per-node excess then accounts
* for the size of the layer-1 nodes with one
* entry's allocation per layer-1 node left
* over, and so on recursively. The space to
* allow per entry is theoretically the size
* of the minimally-filled node (WORD_SZ *
* (1+2*MINFANOUT)) divided by MINFANOUT-1;
* we round this up to an integral number of
* bytes per entry.
*
* Nodes that are more than minimally filled lead
* to this being an overestimate, because they
* are more space-efficient both in themselves
* and by using fewer higher-layer entries.
* An underfilled root node can lead to needing
* more bytes than this formula allows, but the
* space allowed for the node will always be
* strictly greater than the two words per entry
* required by the node body. Because the size
* is ultimately rounded up to word alignment
* (actually to line alignment), it is rounded
* up sufficiently to account for the single-word
* header of the root node.
*/
asz = cur.ent[0].fanout *
((WORD_SZ*(1+2*MINFANOUT) + MINFANOUT-2) /
(MINFANOUT-1));
sz += asz;
if(unlikely(sz < asz)) return ~(word)0;
cur.ent[0].index = cur.ent[0].fanout - 1;
} while(btree_seek_inc(sh, &cur));
}
sz = LINE_ALIGN(sh->sizes, sz);
return likely(sz) ? sz : ~(word)0;
}
#define btree_migrate_at_layer(shf, ptrf, el, sht, alloct) \
THX_btree_migrate_at_layer(aTHX_ shf, ptrf, el, sht, alloct)
static word THX_btree_migrate_at_layer(pTHX_ struct shash *shf, word ptrf,
int expect_layer, struct shash *sht, struct shash_alloc *alloct)
{
int layer, fanout, i;
word nodebody[MAXFANOUT*2];
word const *locf = bnode_body_loc(bnode_check(shf, ptrf, expect_layer,
&layer, &fanout));
word *loct = nodebody;
if(likely(layer == 0)) {
for(i = fanout << 1; i--; ) {
*loct++ = string_migrate(shf, *locf++, sht, alloct);
}
} else {
for(i = fanout; i--; ) {
word spc;
word ptrt = btree_migrate_at_layer(shf, locf[1],
layer-1, sht, alloct);
locf += 2;
*loct++ =
bnode_body_loc(pointer_loc(sht, ptrt, &spc))[0];
*loct++ = ptrt;
}
}
return bnode_write(sht, alloct, layer, fanout, nodebody);
}
#define btree_migrate(shf, ptrf, sht, act) \
THX_btree_migrate(aTHX_ shf, ptrf, sht, act)
static word THX_btree_migrate(pTHX_ struct shash *shf, word ptrf,
struct shash *sht, char const *action)
{
struct shash_alloc new_alloc;
if(unlikely(setjmp(new_alloc.fulljb)))
shash_error_errnum(sht, action, ENOSPC);
new_alloc.action = action;
new_alloc.prealloc_len = 0;
return btree_migrate_at_layer(shf, ptrf, -1, sht, &new_alloc);
}
( run in 0.503 second using v1.01-cache-2.11-cpan-39bf76dae61 )