App-RecordStream

 view release on metacpan or  search on metacpan

src/fast-recs-collate/hash.c  view on Meta::CPAN

    newtable = realloc(hash->table,
	    sizeof *newtable * hash->nchains * 2);	/* 4 */

    if (newtable) {	/* 5 */
	hash_val_t mask = (hash->mask << 1) | 1;	/* 3 */
	hash_val_t exposed_bit = mask ^ hash->mask;	/* 6 */
	hash_val_t chain;

	assert (mask != hash->mask);

	for (chain = 0; chain < hash->nchains; chain++) { /* 7 */
	    hnode_t *low_chain = 0, *high_chain = 0, *hptr, *next;

	    for (hptr = newtable[chain]; hptr != 0; hptr = next) {
		next = hptr->next;

		if (hptr->hkey & exposed_bit) {
		    hptr->next = high_chain;
		    high_chain = hptr;
		} else {
		    hptr->next = low_chain;
		    low_chain = hptr;
		}
	    }

	    newtable[chain] = low_chain; 	/* 8 */
	    newtable[chain + hash->nchains] = high_chain;
	}

	hash->table = newtable;			/* 9 */
	hash->mask = mask;
	hash->nchains *= 2;
	hash->lowmark *= 2;
	hash->highmark *= 2;
    }
    assert (hash_verify(hash));
}

/*
 * Cut a table size in half. This is done by folding together adjacent chains
 * and populating the lower half of the table with these chains. The chains are
 * simply spliced together. Once this is done, the whole table is reallocated
 * to a smaller object.
 * Notes:
 * 1.  It is illegal to have a hash table with one slot. This would mean that
 *     hash->shift is equal to hash_val_t_bit, an illegal shift value.
 *     Also, other things could go wrong, such as hash->lowmark becoming zero.
 * 2.  Looping over each pair of sister chains, the low_chain is set to
 *     point to the head node of the chain in the lower half of the table, 
 *     and high_chain points to the head node of the sister in the upper half.
 * 3.  The intent here is to compute a pointer to the last node of the
 *     lower chain into the low_tail variable. If this chain is empty,
 *     low_tail ends up with a null value.
 * 4.  If the lower chain is not empty, we simply tack the upper chain onto it.
 *     If the upper chain is a null pointer, nothing happens.
 * 5.  Otherwise if the lower chain is empty but the upper one is not,
 *     If the low chain is empty, but the high chain is not, then the
 *     high chain is simply transferred to the lower half of the table.
 * 6.  Otherwise if both chains are empty, there is nothing to do.
 * 7.  All the chain pointers are in the lower half of the table now, so
 *     we reallocate it to a smaller object. This, of course, invalidates
 *     all pointer-to-pointers which reference into the table from the
 *     first node of each chain.
 * 8.  Though it's unlikely, the reallocation may fail. In this case we
 *     pretend that the table _was_ reallocated to a smaller object.
 * 9.  Finally, update the various table parameters to reflect the new size.
 */

static void shrink_table(hash_t *hash)
{
    hash_val_t chain, nchains;
    hnode_t **newtable, *low_tail, *low_chain, *high_chain;

    assert (hash->nchains >= 2);			/* 1 */
    nchains = hash->nchains / 2;

    for (chain = 0; chain < nchains; chain++) {
	low_chain = hash->table[chain];		/* 2 */
	high_chain = hash->table[chain + nchains];
	for (low_tail = low_chain; low_tail && low_tail->next; low_tail = low_tail->next)
	    ;	/* 3 */
	if (low_chain != 0)				/* 4 */
	    low_tail->next = high_chain;
	else if (high_chain != 0)			/* 5 */
	    hash->table[chain] = high_chain;
	else
	    assert (hash->table[chain] == NULL);	/* 6 */
    }
    newtable = realloc(hash->table,
	    sizeof *newtable * nchains);		/* 7 */
    if (newtable)					/* 8 */
	hash->table = newtable;
    hash->mask >>= 1;			/* 9 */
    hash->nchains = nchains;
    hash->lowmark /= 2;
    hash->highmark /= 2;
    assert (hash_verify(hash));
}


/*
 * Create a dynamic hash table. Both the hash table structure and the table
 * itself are dynamically allocated. Furthermore, the table is extendible in
 * that it will automatically grow as its load factor increases beyond a
 * certain threshold.
 * Notes:
 * 1. If the number of bits in the hash_val_t type has not been computed yet,
 *    we do so here, because this is likely to be the first function that the
 *    user calls.
 * 2. Allocate a hash table control structure.
 * 3. If a hash table control structure is successfully allocated, we
 *    proceed to initialize it. Otherwise we return a null pointer.
 * 4. We try to allocate the table of hash chains.
 * 5. If we were able to allocate the hash chain table, we can finish
 *    initializing the hash structure and the table. Otherwise, we must
 *    backtrack by freeing the hash structure.
 * 6. INIT_SIZE should be a power of two. The high and low marks are always set
 *    to be twice the table size and half the table size respectively. When the
 *    number of nodes in the table grows beyond the high size (beyond load
 *    factor 2), it will double in size to cut the load factor down to about
 *    about 1. If the table shrinks down to or beneath load factor 0.5,



( run in 0.984 second using v1.01-cache-2.11-cpan-df04353d9ac )