formula results from the CPAN

Hash-SharedMem

static struct pvl THX_string_as_pvl(pTHX_ struct shash *sh, word ptr)
{
	word len, *loc, spc, alloclen;
	struct pvl pvl;
	loc = pointer_loc(sh, ptr, &spc);
	len = loc[0];
	alloclen = len + WORD_SZ+1;
	if(unlikely(alloclen < WORD_SZ+1 || alloclen > spc))
		shash_error_data(sh);
	if(unlikely((word)(size_t)len != len))
		shash_error_errnum(sh, "use", ENOMEM);
	pvl.pv = (char*)&loc[1];
	pvl.len = len;
	if(unlikely(pvl.pv[pvl.len])) shash_error_data(sh);
	tally_event(&sh->tally, K_STRING_READ);
	return pvl;
}

static MGVTBL const string_mmapref_mgvtbl;

#define string_as_sv(sh, act, ptr) THX_string_as_sv(aTHX_ sh, act, ptr)
static SV *THX_string_as_sv(pTHX_ struct shash *sh, char const *action,
	word ptr)
{
	struct pvl pvl = string_as_pvl(sh, ptr);
	SV *sv;
	if(unlikely((size_t)(STRLEN)pvl.len != pvl.len))
		shash_error_errnum(sh, action, ENOMEM);
	TAINT;
	/*
	 * There are two strategies available for returning the string
	 * as an SV.  We can copy into a plain string SV, or we can point
	 * into the mmaped space.  In the latter case the result SV needs
	 * magic to keep a reference to the object representing the mmap,
	 * to keep it mapped.  In both time and memory, the overhead of
	 * pointing into the mmap is pretty much fixed, but the overhead
	 * of copying is roughly linear in the length of the string.
	 * The base overhead for copying is much less than the fixed
	 * overhead of mapping.
	 *
	 * We therefore want to copy short strings and map long strings.
	 * Choosing the threshold at which to switch is a black art.
	 *
	 * Empirical result for perl 5.16 on amd64 with glibc 2.11
	 * is that 119-octet strings are better copied and 120-octet
	 * strings are better mapped, with a sharp step in the cost of
	 * copying at that length.  This is presumably due to the memory
	 * allocator switching strategy when allocating 128 octets or more
	 * (rounded up from 120+1).
	 *
	 * The memory allocations of interest are one XPV and the
	 * buffer for copying, and one XPVMG and one MAGIC for mapping.
	 * The ugly expression here tries to compare the two sets of
	 * allocations.  The XPVMG+MAGIC - XPV difference is compared
	 * against the potential buffer size.  It is presumed that the
	 * buffer length will be rounded up to a word-aligned size.
	 * The structure size difference is rounded up in an attempt to
	 * find a threshold likely to be used by the memory allocator.
	 * Ideally this would be rounded to the next power of 2, but we
	 * can't implement that in a constant expression, so it's actually
	 * rounded to the next multiple of the XPVMG size.  The formula
	 * is slightly contrived so as to achieve the exact 120-octet
	 * threshold on the amd64 system used for speed trials (where
	 * MAGIC is 40 octets, XPV is 32 octets, and XPVMG is 64 octets).
	 */
	if(pvl.len < sizeof(XPVMG) *
			((sizeof(MAGIC)+sizeof(XPVMG)*2-1) / sizeof(XPVMG)) -
			sizeof(size_t)) {
		sv = newSVpvn_mortal(pvl.pv, pvl.len);
	} else {
		sv = sv_2mortal(newSV_type(SVt_PVMG));
		(void) sv_magicext(sv, sh->data_mmap_sv, PERL_MAGIC_ext,
				(MGVTBL*)&string_mmapref_mgvtbl, NULL, 0);
		SvPV_set(sv, pvl.pv);
		SvCUR_set(sv, pvl.len);
		SvPOK_on(sv);
		SvTAINTED_on(sv);
	}
	SvREADONLY_on(sv);
	return sv;
}

#define string_cmp_pvl(sh, aptr, bpvl) THX_string_cmp_pvl(aTHX_ sh, aptr, bpvl)
static int THX_string_cmp_pvl(pTHX_ struct shash *sh, word aptr,
	struct pvl bpvl)
{
	struct pvl apvl = string_as_pvl(sh, aptr);
	int r;
	tally_event(&sh->tally, K_KEY_COMPARE);
	r = memcmp(apvl.pv, bpvl.pv, apvl.len < bpvl.len ? apvl.len : bpvl.len);
	return r ? r : apvl.len == bpvl.len ? 0 : apvl.len < bpvl.len ? -1 : 1;
}

#define string_eq_pvl(sh, aptr, bpvl) THX_string_eq_pvl(aTHX_ sh, aptr, bpvl)
PERL_STATIC_INLINE int THX_string_eq_pvl(pTHX_ struct shash *sh, word aptr,
	struct pvl bpvl)
{
	struct pvl apvl = string_as_pvl(sh, aptr);
	return apvl.len == bpvl.len && memcmp(apvl.pv, bpvl.pv, apvl.len) == 0;
}

#define string_write_from_pvl(sh, alloc, pvl) \
	THX_string_write_from_pvl(aTHX_ sh, alloc, pvl)
static word THX_string_write_from_pvl(pTHX_ struct shash *sh,
	struct shash_alloc *alloc, struct pvl pvl)
{
	word alloclen, ptr, *loc;
	if(unlikely((size_t)(word)pvl.len != pvl.len))
		shash_error_toobig(sh, alloc->action);
	if(unlikely(pvl.len == 0) &&
			likely(sh->sizes->dhd_zeropad_sz >= WORD_SZ+1))
		return ZEROPAD_PTR;
	alloclen = ((word)pvl.len) + WORD_SZ + 1;
	if(unlikely(alloclen < WORD_SZ+1))
		shash_error_toobig(sh, alloc->action);
	loc = shash_alloc(sh, alloc, alloclen, &ptr);
	loc[0] = pvl.len;
	(void) memcpy(&loc[1], pvl.pv, pvl.len);
	((byte*)&loc[1])[pvl.len] = 0;
	tally_event(&sh->tally, K_STRING_WRITE);
	return ptr;

lib/Hash/SharedMem.xs view on Meta::CPAN

	}
}

#define btree_count(sh, rt) THX_btree_count(aTHX_ sh, rt)
static word THX_btree_count(pTHX_ struct shash *sh, word root)
{
	struct cursor cur;
	word cnt = 0;
	if(!likely(btree_seek_min(sh, &cur, root))) return 0;
	do {
		cnt += cur.ent[0].fanout;
		cur.ent[0].index = cur.ent[0].fanout - 1;
	} while(btree_seek_inc(sh, &cur));
	return cnt;
}

#define btree_size(sh, rt) THX_btree_size(aTHX_ sh, rt)
static word THX_btree_size(pTHX_ struct shash *sh, word root)
{
	struct cursor cur;
	word sz;
	if(!likely(btree_seek_min(sh, &cur, root))) {
		if(likely(sh->sizes->dhd_zeropad_sz >= WORD_SZ)) return 0;
		sz = WORD_SZ;
	} else {
		sz = 0;
		do {
			int i;
			word asz;
			word *loc = bnode_body_loc(
				unchecked_pointer_loc(sh, cur.ent[0].nodeptr));
			for(i = cur.ent[0].fanout << 1; i--; ) {
				asz = string_size(sh, *loc++);
				sz += asz;
				if(unlikely(sz < asz)) return ~(word)0;
			}
			/*
			 * To account for all the space occupied by the
			 * btree nodes, we allow a certain number of
			 * bytes per entry, such that there is space for
			 * an arbitrarily high btree of minimal fanout.
			 * The objective is to allow enough space
			 * per entry that for each minimally-filled
			 * layer-0 node we allocate space for that node
			 * and have one entry's allocation left over.
			 * That one-entry-per-node excess then accounts
			 * for the size of the layer-1 nodes with one
			 * entry's allocation per layer-1 node left
			 * over, and so on recursively.  The space to
			 * allow per entry is theoretically the size
			 * of the minimally-filled node (WORD_SZ *
			 * (1+2*MINFANOUT)) divided by MINFANOUT-1;
			 * we round this up to an integral number of
			 * bytes per entry.
			 *
			 * Nodes that are more than minimally filled lead
			 * to this being an overestimate, because they
			 * are more space-efficient both in themselves
			 * and by using fewer higher-layer entries.
			 * An underfilled root node can lead to needing
			 * more bytes than this formula allows, but the
			 * space allowed for the node will always be
			 * strictly greater than the two words per entry
			 * required by the node body.  Because the size
			 * is ultimately rounded up to word alignment
			 * (actually to line alignment), it is rounded
			 * up sufficiently to account for the single-word
			 * header of the root node.
			 */
			asz = cur.ent[0].fanout *
				((WORD_SZ*(1+2*MINFANOUT) + MINFANOUT-2) /
					(MINFANOUT-1));
			sz += asz;
			if(unlikely(sz < asz)) return ~(word)0;
			cur.ent[0].index = cur.ent[0].fanout - 1;
		} while(btree_seek_inc(sh, &cur));
	}
	sz = LINE_ALIGN(sh->sizes, sz);
	return likely(sz) ? sz : ~(word)0;
}

#define btree_migrate_at_layer(shf, ptrf, el, sht, alloct) \
	THX_btree_migrate_at_layer(aTHX_ shf, ptrf, el, sht, alloct)
static word THX_btree_migrate_at_layer(pTHX_ struct shash *shf, word ptrf,
	int expect_layer, struct shash *sht, struct shash_alloc *alloct)
{
	int layer, fanout, i;
	word nodebody[MAXFANOUT*2];
	word const *locf = bnode_body_loc(bnode_check(shf, ptrf, expect_layer,
							&layer, &fanout));
	word *loct = nodebody;
	if(likely(layer == 0)) {
		for(i = fanout << 1; i--; ) {
			*loct++ = string_migrate(shf, *locf++, sht, alloct);
		}
	} else {
		for(i = fanout; i--; ) {
			word spc;
			word ptrt = btree_migrate_at_layer(shf, locf[1],
					layer-1, sht, alloct);
			locf += 2;
			*loct++ =
				bnode_body_loc(pointer_loc(sht, ptrt, &spc))[0];
			*loct++ = ptrt;
		}
	}
	return bnode_write(sht, alloct, layer, fanout, nodebody);
}

#define btree_migrate(shf, ptrf, sht, act) \
	THX_btree_migrate(aTHX_ shf, ptrf, sht, act)
static word THX_btree_migrate(pTHX_ struct shash *shf, word ptrf,
	struct shash *sht, char const *action)
{
	struct shash_alloc new_alloc;
	if(unlikely(setjmp(new_alloc.fulljb)))
		shash_error_errnum(sht, action, ENOSPC);
	new_alloc.action = action;
	new_alloc.prealloc_len = 0;
	return btree_migrate_at_layer(shf, ptrf, -1, sht, &new_alloc);
}

( run in 0.503 second using v1.01-cache-2.11-cpan-39bf76dae61 )