Algorithm-MinPerfHashTwoLevel
view release on metacpan or search on metacpan
MinPerfHashTwoLevel.xs view on Meta::CPAN
if (flags & MPH_F_NO_DEDUPE) { \
ofs_sv= NULL; \
} else { \
HE *ofs= hv_fetch_ent(str_ofs_hv,sv,1,0); \
ofs_sv= ofs ? HeVAL(ofs) : NULL; \
if (!ofs_sv) \
croak("panic: out of memory getting str ofs for " #he "for %u",i); \
} \
if (ofs_sv && SvOK(ofs_sv)){ \
table[i].key_ofs= SvUV(ofs_sv); \
table[i].key_len= sv_len(sv); \
} else { \
pv= SvPV(sv,pv_len); \
table[i].key_len= pv_len; \
if (pv_len) { \
table[i].key_ofs= str_buf_pos - str_buf_start; \
if (str_buf_pos + pv_len > str_buf_end) \
croak("panic: string buffer too small in SETOFS, something went horribly wrong."); \
Copy(pv,str_buf_pos,pv_len,char); \
str_buf_pos += pv_len; \
} else { \
table[i].key_ofs= 1; \
} \
if (ofs_sv) \
sv_setuv(ofs_sv,table[i].key_ofs); \
} \
} else { \
table[i].key_ofs= 0; \
table[i].key_len= 0; \
} \
} else { \
croak("no " #he " for %u",i); \
} \
} STMT_END
MODULE = Algorithm::MinPerfHashTwoLevel PACKAGE = Algorithm::MinPerfHashTwoLevel
BOOT:
{
MPH_INIT_ALL_KEYSV();
}
UV
hash_with_state(str_sv,state_sv)
SV* str_sv
SV* state_sv
PROTOTYPE: $$
CODE:
{
STRLEN str_len;
STRLEN state_len;
U8 *state_pv;
U8 *str_pv= (U8 *)SvPV(str_sv,str_len);
state_pv= (U8 *)SvPV(state_sv,state_len);
if (state_len != MPH_STATE_BYTES) {
croak("Error: state vector must be at exactly %d bytes",(int)MPH_SEED_BYTES);
}
RETVAL= mph_hash_with_state(state_pv,str_pv,str_len);
}
OUTPUT:
RETVAL
SV *
seed_state(base_seed_sv)
SV* base_seed_sv
PROTOTYPE: $
CODE:
{
STRLEN seed_len;
STRLEN state_len;
U8 *seed_pv;
U8 *state_pv;
SV *seed_sv;
if (!SvOK(base_seed_sv))
croak("Error: seed must be defined");
if (SvROK(base_seed_sv))
croak("Error: seed should not be a reference");
seed_sv= base_seed_sv;
seed_pv= (U8 *)SvPV(seed_sv,seed_len);
if (seed_len != MPH_SEED_BYTES) {
if (SvREADONLY(base_seed_sv)) {
if (seed_len < MPH_SEED_BYTES) {
warn("seed passed into seed_state() is readonly and too short, argument has been right padded with %d nulls",
(int)(MPH_SEED_BYTES - seed_len));
}
else if (seed_len > MPH_SEED_BYTES) {
warn("seed passed into seed_state() is readonly and too long, using only the first %d bytes",
(int)MPH_SEED_BYTES);
}
seed_sv= sv_2mortal(newSVsv(base_seed_sv));
}
if (seed_len < MPH_SEED_BYTES) {
sv_grow(seed_sv,MPH_SEED_BYTES+1);
while (seed_len < MPH_SEED_BYTES) {
seed_pv[seed_len] = 0;
seed_len++;
}
}
SvCUR_set(seed_sv,MPH_SEED_BYTES);
seed_pv= (U8 *)SvPV(seed_sv,seed_len);
} else {
seed_sv= base_seed_sv;
}
RETVAL= newSV(MPH_STATE_BYTES+1);
SvCUR_set(RETVAL,MPH_STATE_BYTES);
SvPOK_on(RETVAL);
state_pv= (U8 *)SvPV(RETVAL,state_len);
mph_seed_state(seed_pv,state_pv);
}
OUTPUT:
RETVAL
UV
compute_xs(self_hv)
HV *self_hv
PREINIT:
dMY_CXT;
PROTOTYPE: \%\@
CODE:
{
U8 *state_pv;
STRLEN state_len;
HE *he;
IV len_idx;
U32 bucket_count;
U32 max_xor_val;
U32 i;
U32 variant;
U32 compute_flags;
SV* buf_length_sv;
HV* source_hv;
AV *buckets_av;
AV *keys_av;
AV *by_length_av;
AV *keybuckets_av;
AV *h2_packed_av;
RETVAL = 0;
/**** extract the various reference data we need from $self */
he= hv_fetch_ent_with_keysv(self_hv,MPH_KEYSV_VARIANT,0);
if (he) {
variant= SvUV(HeVAL(he));
} else {
croak("panic: no variant in self?");
}
he= hv_fetch_ent_with_keysv(self_hv,MPH_KEYSV_COMPUTE_FLAGS,0);
if (he) {
compute_flags= SvUV(HeVAL(he));
} else {
croak("panic: no compute_flags in self?");
}
he= hv_fetch_ent_with_keysv(self_hv,MPH_KEYSV_STATE,0);
if (he) {
SV *state_sv= HeVAL(he);
state_pv= (U8 *)SvPV(state_sv,state_len);
if (state_len != MPH_STATE_BYTES) {
croak("Error: state vector must be at exactly %d bytes",(int)MPH_SEED_BYTES);
}
} else {
croak("panic: no state in self?");
}
he= hv_fetch_ent_with_keysv(self_hv,MPH_KEYSV_BUF_LENGTH,1);
if (he) {
buf_length_sv= HeVAL(he);
} else {
croak("panic: out of memory in lvalue fetch for 'buf_length' in self");
}
he= hv_fetch_ent_with_keysv(self_hv,MPH_KEYSV_SOURCE_HASH,0);
if (he) {
source_hv= (HV*)SvRV(HeVAL(he));
} else {
croak("panic: no source_hash in self");
}
he= hv_fetch_ent_with_keysv(self_hv,MPH_KEYSV_BUCKETS,1);
if (he) {
SV *rv= HeVAL(he);
if (SvROK(rv)) {
AV *old_buckets_av= (AV*)SvRV(rv);
SvREFCNT_dec(old_buckets_av);
} else {
sv_upgrade(rv, SVt_RV);
}
buckets_av= newAV();
SvRV_set(rv,(SV*)buckets_av);
SvROK_on(rv);
} else {
croak("panic: out of memory in lvalue fetch for 'buckets' in self");
}
/**** build an array of hashes in keys_av based on the normalized contents of source_hv */
keys_av= (AV *)sv_2mortal((SV*)newAV());
bucket_count= normalize_source_hash(aTHX_ source_hv, keys_av, compute_flags, buf_length_sv, state_pv);
max_xor_val= INT32_MAX;
/* if the caller wants deterministic results we sort the keys_av
* before we compute collisions - depending on the order we process
* the keys we might resolve the collisions differently */
if (compute_flags & MPH_F_DETERMINISTIC)
sortsv(AvARRAY(keys_av),bucket_count,_compare);
/**** find the collisions from the data we just computed, build an AoAoH and AoS of the
**** collision data */
keybuckets_av= (AV*)sv_2mortal((SV*)newAV()); /* AoAoH - hashes from keys_av */
h2_packed_av= (AV*)sv_2mortal((SV*)newAV()); /* AoS - packed h1 */
find_first_level_collisions(aTHX_ bucket_count, keys_av, keybuckets_av, h2_packed_av);
/* Sort the buckets by size by constructing an AoS, with the outer array indexed by length,
* and the inner string being the list of items of that length. (Thus the contents of index
* 0 is empty/undef).
* The end result is we can process the collisions from the most keys to a bucket to the
* least in O(N) and not O(N log2 N).
*
* the length of the array (av_top_index+1) reflect the number of items in the bucket
* with the most collisions - we use this later to size some of our data structures.
*/
by_length_av= idx_by_length(aTHX_ keybuckets_av);
RETVAL= solve_collisions_by_length(aTHX_ bucket_count, max_xor_val, by_length_av, h2_packed_av, keybuckets_av,
variant, buckets_av);
}
OUTPUT:
RETVAL
MODULE = Algorithm::MinPerfHashTwoLevel PACKAGE = Tie::Hash::MinPerfHashTwoLevel::OnDisk
SV *
packed_xs(variant,buf_length_sv,state_sv,comment_sv,flags,buckets_av)
U32 variant
SV* buf_length_sv
SV* state_sv
SV* comment_sv
AV *buckets_av
U32 flags
PREINIT:
dMY_CXT;
PROTOTYPE: $$$$$\@
CODE:
{
U32 buf_length= SvUV(buf_length_sv);
U32 bucket_count= av_top_index(buckets_av) + 1;
U32 header_rlen= _roundup(sizeof(struct mph_header),16);
STRLEN state_len;
char *state_pv= SvPV(state_sv, state_len);
U32 alignment= sizeof(U64);
U32 state_rlen= _roundup(state_len,alignment);
U32 table_rlen= _roundup(sizeof(struct mph_bucket) * bucket_count,alignment);
U32 key_flags_rlen= _roundup((bucket_count * 2 + 7 ) / 8,alignment);
U32 val_flags_rlen= _roundup((bucket_count + 7) / 8,alignment);
U32 str_rlen= _roundup( buf_length
+ 2
+ ( SvOK(comment_sv) ? sv_len(comment_sv) + 1 : 1 )
+ ( 2 + 8 ),
alignment );
U32 total_size;
HV *str_ofs_hv= (HV *)sv_2mortal((SV*)newHV());
SV *sv_buf;
char *start;
struct mph_header *head;
char *state;
struct mph_bucket *table;
char *key_flags;
char *val_flags;
char *str_buf_start;
char *str_buf_end;
char *str_buf_pos;
U32 i;
STRLEN pv_len;
char *pv;
U32 key_is_utf8_count[3]={0,0,0};
U32 val_is_utf8_count[2]={0,0};
U32 used_flags;
U32 the_flag;
IV key_is_utf8_generic=-1;
IV val_is_utf8_generic=-1;
for (i= 0; i < bucket_count; i++) {
SV **got= av_fetch(buckets_av,i,0);
MinPerfHashTwoLevel.xs view on Meta::CPAN
table= (struct mph_bucket *)(start + head->table_ofs);
key_flags= start + head->key_flags_ofs;
val_flags= start + head->val_flags_ofs;
str_buf_start= start + head->str_buf_ofs;
str_buf_end= start + total_size;
str_buf_pos= str_buf_start + 2;
Copy(state_pv,state,state_len,char);
pv= SvPV(comment_sv,pv_len);
Copy(pv,str_buf_pos,pv_len,char);
str_buf_pos += pv_len + 1; /* +1 to add a trailing null */
for (i= 0; i < bucket_count; i++) {
SV **got= av_fetch(buckets_av,i,0);
HV *hv= (HV *)SvRV(*got);
HE *key_normalized_he= hv_fetch_ent_with_keysv(hv,MPH_KEYSV_KEY_NORMALIZED,0);
HE *val_normalized_he= hv_fetch_ent_with_keysv(hv,MPH_KEYSV_VAL_NORMALIZED,0);
HE *xor_val_he= hv_fetch_ent_with_keysv(hv,MPH_KEYSV_XOR_VAL,0);
if (xor_val_he) {
table[i].xor_val= SvUV(HeVAL(xor_val_he));
} else {
table[i].xor_val= 0;
}
SETOFS(i,key_normalized_he,table,key_ofs,key_len,str_buf_start,str_buf_pos,str_buf_end,str_ofs_hv);
SETOFS(i,val_normalized_he,table,val_ofs,val_len,str_buf_start,str_buf_pos,str_buf_end,str_ofs_hv);
if ( key_is_utf8_generic < 0) {
HE *key_is_utf8_he= hv_fetch_ent_with_keysv(hv,MPH_KEYSV_KEY_IS_UTF8,0);
if (key_is_utf8_he) {
UV u= SvUV(HeVAL(key_is_utf8_he));
SETBITS(u,key_flags,i,2);
} else {
croak("panic: out of memory? no key_is_utf8_he for %u",i);
}
}
if ( val_is_utf8_generic < 0 ) {
HE *val_is_utf8_he= hv_fetch_ent_with_keysv(hv,MPH_KEYSV_VAL_IS_UTF8,0);
if (val_is_utf8_he) {
UV u= SvUV(HeVAL(val_is_utf8_he));
SETBITS(u,val_flags,i,1);
} else {
croak("panic: out of memory? no val_is_utf8_he for %u",i);
}
}
}
*str_buf_pos = 0; str_buf_pos++;
*str_buf_pos = 128; str_buf_pos++;
{
U32 r= (str_buf_pos - start) % alignment;
if (r) {
str_buf_pos += (alignment - r);
}
}
*((U64 *)str_buf_pos)= mph_hash_with_state(state, start, str_buf_pos - start);
str_buf_pos += sizeof(U64);
SvCUR_set(sv_buf, str_buf_pos - start);
SvPOK_on(sv_buf);
RETVAL= sv_buf;
}
OUTPUT:
RETVAL
SV*
mount_file(file_sv,error_sv,flags)
SV* file_sv
SV* error_sv
U32 flags
PROTOTYPE: $$$
CODE:
{
struct mph_obj obj;
STRLEN file_len;
char *file_pv= SvPV(file_sv,file_len);
IV mmap_status= mph_mmap(aTHX_ file_pv, &obj, error_sv, flags);
if (mmap_status < 0) {
XSRETURN_UNDEF;
}
/* copy obj into a new SV which we can return */
RETVAL= newSVpvn((char *)&obj,sizeof(struct mph_obj));
SvPOK_on(RETVAL);
SvREADONLY_on(RETVAL);
}
OUTPUT:
RETVAL
void
unmount_file(mount_sv)
SV* mount_sv
PROTOTYPE: $
CODE:
{
struct mph_obj *obj= (struct mph_obj *)SvPV_nolen(mount_sv);
mph_munmap(obj);
SvOK_off(mount_sv);
}
int
fetch_by_index(mount_sv,index,...)
SV* mount_sv
U32 index
PROTOTYPE: $$;$$
CODE:
{
struct mph_obj *obj= (struct mph_obj *)SvPV_nolen(mount_sv);
SV* key_sv= items > 2 ? ST(2) : NULL;
SV* val_sv= items > 3 ? ST(3) : NULL;
if (items > 4)
croak("Error: passed too many arguments to "
"Tie::Hash::MinPerfHashTwoLevel::OnDisk::fetch_by_index(mount_sv, index, key_sv, val_sv)");
RETVAL= lookup_bucket(aTHX_ obj->header,index,key_sv,val_sv);
}
OUTPUT:
RETVAL
int
fetch_by_key(mount_sv,key_sv,...)
SV* mount_sv
SV* key_sv
PROTOTYPE: $$;$
CODE:
{
SV* val_sv= items > 2 ? ST(2) : NULL;
struct mph_obj *obj= (struct mph_obj *)SvPV_nolen(mount_sv);
if (items > 3)
croak("Error: passed too many arguments to "
"Tie::Hash::MinPerfHashTwoLevel::OnDisk::fetch_by_key(mount_sv, index, key_sv)");
RETVAL= lookup_key(aTHX_ obj->header,key_sv,val_sv);
}
OUTPUT:
RETVAL
SV *
get_comment(self_hv)
HV* self_hv
ALIAS:
get_hdr_magic_num = 1
get_hdr_variant = 2
get_hdr_num_buckets = 3
get_hdr_state_ofs = 4
get_hdr_table_ofs = 5
get_hdr_key_flags_ofs = 6
get_hdr_val_flags_ofs = 7
get_hdr_str_buf_ofs = 8
get_hdr_table_checksum = 9
get_hdr_str_buf_checksum = 10
get_state = 11
PREINIT:
dMY_CXT;
PROTOTYPE: $
CODE:
{
struct mph_obj *obj;
SV *mount_sv;
char *start;
HE *got= hv_fetch_ent_with_keysv(self_hv,MPH_KEYSV_MOUNT,0);
if (!got)
croak("must be mounted to use this function");
mount_sv= HeVAL(got);
if (!mount_sv || !SvPOK(mount_sv))
croak("$self->'mount' is expected to be a string!");
obj= (struct mph_obj *)SvPV_nolen(mount_sv);
start= (char *)obj->header;
switch(ix) {
case 0: RETVAL= newSVpv(start + obj->header->str_buf_ofs + 2,0); break;
case 1: RETVAL= newSVuv(obj->header->magic_num); break;
case 2: RETVAL= newSVuv(obj->header->variant); break;
case 3: RETVAL= newSVuv(obj->header->num_buckets); break;
case 4: RETVAL= newSVuv(obj->header->state_ofs); break;
case 5: RETVAL= newSVuv(obj->header->table_ofs); break;
case 6: RETVAL= newSVuv(obj->header->key_flags_ofs); break;
case 7: RETVAL= newSVuv(obj->header->val_flags_ofs); break;
case 8: RETVAL= newSVuv(obj->header->str_buf_ofs); break;
case 9: RETVAL= newSVuv(obj->header->table_checksum); break;
case 10: RETVAL= newSVuv(obj->header->str_buf_checksum); break;
case 11: RETVAL= newSVpvn(start + obj->header->state_ofs, MPH_STATE_BYTES); break;
}
}
OUTPUT:
RETVAL
( run in 0.660 second using v1.01-cache-2.11-cpan-e1769b4cff6 )