Algorithm-HyperLogLog
view release on metacpan or search on metacpan
xs/hyperloglog.xs view on Meta::CPAN
# dump registers
AV*
_dump_register(HLL self)
CODE:
{
RETVAL = (AV*)sv_2mortal((SV*)newAV());
uint32_t i;
for(i = 0;i < self->m; i++){
av_push(RETVAL, newSVuv(self->registers[i]));
}
}
OUTPUT:
RETVAL
# Return number of registers.
uint32_t
register_size(HLL self)
CODE:
RETVAL = self->m;
OUTPUT:
RETVAL
# Add element to the estimator
void
add(HLL self, ...)
PREINIT:
uint32_t hash;
uint32_t index;
uint8_t rank;
I32 arg_index;
STRLEN n_a;
CODE:
{
if(items > 1){
for(arg_index = 1; arg_index < items; ++arg_index){
char* str = SvPV(ST(arg_index), n_a);
MurmurHash3_32((void *) str, strlen(str), HLL_HASH_SEED, (void *) &hash);
index = (hash >> (32 - self->k));
rank = rho( (hash << self->k), 32 - self->k );
if( rank > self->registers[index] ) {
self->registers[index] = rank;
}
}
}
}
# Estimate cardinality
double
estimate(HLL self)
CODE:
{
double estimate;
uint32_t m = self->m;
uint32_t i = 0;
double sum = 0.0;
// Calculate hermonic mean
for (i = 0; i < m; i++) {
sum += 1.0/pow(2.0, self->registers[i]);
}
estimate = self->alphaMM/sum; // E in the original paper
if( estimate <= 2.5 * m ) {
uint32_t zeros = 0;// V in the original paper
uint32_t i = 0;
for (i = 0; i < m; i++) {
if (self->registers[i] == 0) {
zeros++;
}
}
if( zeros != 0 ) {
estimate = m * log((double)m/zeros);
}
} else if (estimate > (1.0/30.0) * two_32) {
estimate = neg_two_32 * log(1.0 - ( estimate/two_32 ) );
}
RETVAL = estimate;
}
OUTPUT:
RETVAL
# Merge two HLLs
double
merge(HLL self, HLL other)
CODE:
{
uint32_t m = self->m;
uint32_t i = 0;
if (m != other->m) {
croak("hll size mismatch: %d != %d\n", m, other->m);
}
for (i = 0; i < m; i++) {
if (self->registers[i] < other->registers[i]) {
self->registers[i] = other->registers[i];
}
}
XSRETURN_UNDEF;
}
OUTPUT:
RETVAL
# Destructor
void
DESTROY(HLL self)
CODE:
{
Safefree(self->registers);
Safefree (self);
}
( run in 0.743 second using v1.01-cache-2.11-cpan-8644d7adfcd )