Algorithm-BloomFilter
view release on metacpan or search on metacpan
{
const uint64_t l = (l1 + i*l2) >> bl->shift;
if (! BIT_TEST(bitfield, l) )
return 0;
}
return 1;
}
static void
S_uint64_to_varint(unsigned char **out, uint64_t value) {
unsigned char *pos = *out;
while (value >= 0x80) { /* while we are larger than 7 bits long */
*pos++ = (value & 0x7f) | 0x80; /* write out the least significant 7 bits, set the high bit */
value >>= 7; /* shift off the 7 least significant bits */
}
*pos++ = (unsigned char)value; /* encode the last 7 bits without the high bit being set */
*out = pos;
}
static uint64_t
S_varint_to_uint64_t(unsigned char **in, size_t max_input_len)
{
uint64_t uv = 0;
unsigned int lshift = 0;
unsigned char *pos = *in;
const unsigned char *end = *in + max_input_len;
while (pos <= end && *pos & 0x80) {
uv |= ((uint64_t)(*pos++ & 0x7F) << lshift);
lshift += 7;
if (lshift > (sizeof(uint64_t) * 8)) {
*in = NULL;
return 0;
}
}
if (pos <= end) {
uv |= ((uint64_t)*pos++ << lshift);
} else {
/* end of packet reached before varint parsed */
*in = NULL;
return 0;
}
*in = pos;
return uv;
}
/* may over-allocate a bit */
int
bl_serialize(bloom_t *bl, char **out, size_t *out_len)
{
/* Format is pretty simple:
* - varint encoding number of hash functions
* - varint encoding significant_bits
* - X bytes - whatever the length in bytes for the bitmap is */
char *cur;
char *start;
const uint64_t plength = MAX_VARINT_LENGTH /* length of packet, this number */
+ bl->nbytes /* the actual data size */
+ MAX_VARINT_LENGTH /* k */
+ MAX_VARINT_LENGTH; /* significant_bits */
*out_len = (size_t)plength; /* to be revised further down */
start = cur = malloc(*out_len);
if (!cur) {
*out_len = 0;
*out = 0;
return 1;
}
*out = cur;
S_uint64_to_varint((unsigned char **)&cur, (uint64_t)bl->k);
S_uint64_to_varint((unsigned char **)&cur, (uint64_t)bl->significant_bits);
memcpy(cur, bl->bitmap, bl->nbytes);
cur += bl->nbytes;
*out_len = (size_t)(cur-start) + 1;
return 0;
}
bloom_t *
bl_deserialize(const char *blob, size_t blob_len, bl_hash_function_t hash_function)
{
bloom_t *bl = NULL;
const char const *end = blob + blob_len - 1;
bl = malloc(sizeof(bloom_t));
if (!bl)
return NULL;
bl->hash_function = hash_function;
bl->k = (unsigned int) S_varint_to_uint64_t((unsigned char **)&blob, (size_t)(end-blob));
if (blob == NULL) {
free(bl);
return NULL;
}
bl->significant_bits = (unsigned int) S_varint_to_uint64_t((unsigned char **)&blob, (size_t)(end-blob));
if (blob == NULL) {
free(bl);
return NULL;
}
bl->shift = 64 - bl->significant_bits;
bl->nbytes = end-blob;
bl->bitmap = malloc(bl->nbytes);
if (!bl->bitmap) {
free(bl);
return NULL;
}
memcpy(bl->bitmap, (char *)blob, bl->nbytes);
blob += bl->nbytes;
return bl;
}
int
bl_merge(bloom_t *into, const bloom_t * other)
{
size_t i;
size_t n;
char *bf1;
const char *bf2;
if (into->k != other->k
|| into->significant_bits != other->significant_bits
|| into->nbytes != other->nbytes /* paranoia */
|| into->hash_function != other->hash_function)
{
return 1;
}
n = into->nbytes;
( run in 1.999 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )