Alien-Judy
view release on metacpan or search on metacpan
src/judy-1.0.5/test/StringCompare.c view on Meta::CPAN
// cc -O -DCPUMHZ=1299 StringCompare.c -lm -lJudy -o StringCompare
/* Notes:
1) Use '-DCPUMHZ=1299' in cc line if better clock resolution is desired
and it compiles successfully. The 1299 is the cpu MHz : 1298.916 from
cat /proc/cpuinfo in a Linux system.
2) -static will generally get better performance because memcmp(),
memcpy() routines are usually slower with shared librarys.
*/
// Usage:
//
// StringCompare -A Hash <options> textfile
// StringCompare -A JudyHS <options> textfile
// StringCompare -A JLHash <options> textfile
// StringCompare -A JudySL <options> textfile
// StringCompare -A Ternary <options> textfile
// StringCompare -A Redblack <options> textfile
// StringCompare -A Splay <options> textfile
#include <stdlib.h> // malloc(3)
#include <unistd.h> // getopt(3)
#include <stdio.h> // printf(3)
#include <fcntl.h> // open(2)
#include <string.h> // memcmp(3), memcpy(3)
#include <errno.h> // errno(3)
#include <sys/mman.h> // mmap(2)
#include <sys/time.h> // gettimeofday(2)
#include <math.h> // pow(3)
#include <sys/utsname.h> // uname(2)
#include <Judy.h> // Judy arrays
//=======================================================================
// D e f i n e: T O T U R N O F F A S S E R T I O N S !!!!!!!!
//=======================================================================
//#define NDEBUG 1
#include <assert.h> // assert(3)
//=======================================================================
// G L O B A L D A T A
//=======================================================================
int foolflag = 0; // fool compiler from optimizing
static Word_t gStored = 0; // number of strings inserted
static Word_t gChainln = 0; // links traversed during RETRIVE
static Word_t PtsPdec = 40; // default measurement points per decade
#define INFSTRGS 1000000000 // 1 billion strings is infinity
static Word_t nStrg = INFSTRGS; // infinity -- measure all strings
static Word_t TValues = 100000; // max measure points for RETRIVE tests
static int pFlag = 0; // pre-fault hash table pages into RAM
static int rFlag = 0; // do not randomize input file
static int aFlag = 0; // word align string buffers
static int DFlag = 0; // do the delete measurement
static int CFlag = 0; // build sequential Get buffers
static Word_t aCount = 0; // Count of missaligned string buffers
// define the maximum length of a string allowed
#define MAXSTRLEN (100000)
static int MLength = MAXSTRLEN;
static Word_t HTblsz; // 1M default hash table size
static int fileidx; // argv[fileidx] == file string
// for saving input string data
typedef struct STRING_
{
int dt_strlen;
uint8_t *dt_string;
} dt_t , *Pdt_t;
static Pdt_t PdtS_ = NULL; // memory for Cache access Gets
static uint8_t *Strbuf_ = NULL;
static Word_t Strsiz_ = 0;
// Roundup BYTES to an even number of words
/*
On Linux 2.6.3-4mdkenterprise (Mandrake 10.0 Community) printing (even
to a file) makes the timings inaccurate. So, use -L2 or greater to
average (actually save min times) and print results after all tests are
completed.
*/
#define Printf if (Pass == 0) printf
#define ROUNDUPWORD(BYTES) (((BYTES) + sizeof(Word_t) - 1) & (-sizeof(Word_t)))
#define BYTES2WORDS(BYTES) (((BYTES) + sizeof(Word_t) - 1) / (sizeof(Word_t)))
//=======================================================================
// T I M I N G M A C R O S
//=======================================================================
static double DeltaUSec; // Global for remembering delta times
// Some operating systems have get_cycles() in /usr/include/asm/timex.h
#ifdef CPUMHZ
// For a 1.34 nS clock cycle processor (750Mhz)
#define CPUSPEED (1.0 / (CPUMHZ))
#include <asm/timex.h>
#define TIMER_vars(T) cycles_t __TVBeg_##T
#define STARTTm(T) __TVBeg_##T = get_cycles()
#define ENDTm(D,T) { (D) = (double)(get_cycles() - __TVBeg_##T) * CPUSPEED; }
#else // ! CPUMHZ
#define TIMER_vars(T) struct timeval __TVBeg_##T, __TVEnd_##T
#define STARTTm(T) gettimeofday(&__TVBeg_##T, NULL)
#define ENDTm(D,T) \
{ \
src/judy-1.0.5/test/StringCompare.c view on Meta::CPAN
//=======================================================================
// B u i l d s e q u e n c i a l s t r i n g b u f f e r
//=======================================================================
Pdt_t
BuildSeqBuf(Pdt_t Pstrstr, Word_t Len)
{
Word_t SumStrings = 0;
Word_t ii;
Word_t Strlen;
uint8_t *string;
assert(Len <= TValues);
// calculate how much memory needed for strings
for (ii = 0; ii < Len; ii++)
{
Strlen = Pstrstr[ii].dt_strlen;
if (aFlag)
SumStrings += ROUNDUPWORD(Strlen + 1);
else
SumStrings += Strlen + 1;
}
// check if old string buffer is big enough
if (SumStrings > Strsiz_)
{
if (Strbuf_)
free(Strbuf_);
else
SumStrings += SumStrings / 5; // bump 20%
Strbuf_ = (uint8_t *) malloc(SumStrings);
if (Strbuf_ == NULL)
MALLOCERROR;
Strsiz_ = SumStrings;
}
for (ii = 0, string = Strbuf_; ii < Len; ii++)
{
Strlen = Pstrstr[ii].dt_strlen;
PdtS_[ii].dt_strlen = Strlen;
PdtS_[ii].dt_string = string;
memcpy(string, Pstrstr[ii].dt_string, Strlen + 1);
if (aFlag)
string += ROUNDUPWORD(Strlen + 1);
else
string += Strlen + 1;
}
return (PdtS_);
}
//=======================================================================
// H A S H M E T H O D S T R U C T U R E S
//=======================================================================
// These structures are used in Hash() and JLHash() ADTs
// for storing length of string
// Hash chain structure (varible length depending on string)
// static part of the length
#define HSTRUCTOVD (sizeof(hrec_t) - sizeof(int))
typedef struct HASHREC_ *Phrec_t;
typedef struct HASHREC_
{
Phrec_t hr_Next; // collision chain link pointer
Word_t hr_Value; // Data associated with string
int hr_Strlen; // length of string 2 billion max
uint8_t hr_String[sizeof(int)]; // string is allocated with struct
} hrec_t;
// hash head structure to keep hash array information
typedef struct HASHINFO_
{
Pvoid_t hi_Htbl; // Hash table
Word_t hi_tblsize; // Hash table size (Words)
Word_t hi_TotalWords; // Hash array total words
Word_t hi_Pop1; // Hash array total population
} hinfo_t, *Phinfo_t;
// size in words of the header structure
#define HASHHEADSZ (sizeof(hinfo_t) / sizeof(Word_t))
//=======================================================================
// H A S H A L G O R I T H M
//=======================================================================
//
// For CPUs with a slow mod (%) use table size a power of 2. A test is
// made to see if the SIZE is a power of 2, and if so an .AND.(&) is used
// instead of a .MOD.(%) to trim the hash return size. Note: a SIZE == 0,
// results in no trimming of hash return size.
#define HASHSTR(STRING,LENGTH,SIZE) \
((SIZE) == ((SIZE) & -(SIZE))) ? \
(HashStr(STRING, LENGTH) & ((SIZE) -1)) : \
(HashStr(STRING, LENGTH) % (SIZE))
// String hash function. Hash string to a unsigned int (uint32_t) This
// one needs a fast 32 bit mpy, which is often very slow on older(RISC)
// machines. If you are sure you will not over populate the hash table,
// then a poorer/faster hash algorithm should be used. Replace with your
// own, milage may vary. This program measures the speed, whether used
// or not.
static uint32_t
HashStr(void *Str, Word_t Len)
{
uint32_t A = 31415;
uint32_t hashv = Len;
uint8_t *k = (uint8_t *) Str;
while (Len--)
{
hashv = (A * hashv) + *k++;
A *= 27183;
}
return (hashv);
}
//=======================================================================
// S T O R E and R E T R I V E R O U T I N E S
//=======================================================================
//=======================================================================
// H A S H M E T H O D U S I N G J U D Y L A S H A S H T A B L E
//=======================================================================
PWord_t
JLHashGet(Pvoid_t JLHash, uint8_t * String, Word_t Strlen)
{
Phinfo_t PHash = (Phinfo_t) JLHash;
Phrec_t Phrec, *PPhrec;
uint32_t hval;
if (PHash == NULL)
return (NULL);
// get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
hval = HASHSTR(String, Strlen, PHash->hi_tblsize);
JLG(PPhrec, PHash->hi_Htbl, hval); // use JudyL to get &pointer
if (PPhrec == NULL)
return (NULL); // no table entry
// search for matching string
for (Phrec = *PPhrec; Phrec != NULL; Phrec = Phrec->hr_Next)
{
gChainln++; // Hash chain length
if (Phrec->hr_Strlen == Strlen) // length match?
{
if (memcmp(Phrec->hr_String, String, Strlen) == 0)
return (&(Phrec->hr_Value)); // match! pointer to Value
}
}
return (NULL);
}
// Return pointer to struct hrec_t associated with string
PWord_t
JLHashIns(PPvoid_t PPHash, uint8_t * String, Word_t Strlen, Word_t TblSize)
{
Phrec_t Phrec, *PPhrec;
Phinfo_t PHash;
Word_t Len;
uint32_t hval;
PHash = (Phinfo_t) * PPHash; // core-dump if calling error
if (PHash == NULL) // if hash table not allocated
{
// allocate the header
PHash = (Phinfo_t) JudyMalloc(HASHHEADSZ);
if (PHash == NULL)
MALLOCERROR;
// Initialize the header struct
PHash->hi_tblsize = TblSize;
PHash->hi_TotalWords = HASHHEADSZ;
PHash->hi_Pop1 = 0; // none yet
PHash->hi_Htbl = NULL;
*PPHash = (Pvoid_t)PHash; // return header to caller
}
// get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
hval = HASHSTR(String, Strlen, PHash->hi_tblsize);
// get pointer to hash table entry
JLI(PPhrec, PHash->hi_Htbl, hval); // JLI will exit if out of memory
// search for matching string
for (Phrec = *PPhrec; Phrec != NULL; Phrec = Phrec->hr_Next)
{
if (Phrec->hr_Strlen == Strlen) // string length match?
{
if (memcmp(Phrec->hr_String, String, Strlen) == 0)
{
return (&(Phrec->hr_Value)); // match! pointer to Value
}
}
}
// String match not found, so do an insert
Len = BYTES2WORDS(Strlen + HSTRUCTOVD);
Phrec = (Phrec_t) JudyMalloc(Len); // get memory for storing string
if (Phrec == NULL)
MALLOCERROR;
PHash->hi_TotalWords += Len; // keep track of total mallocs
Phrec->hr_Strlen = Strlen; // set string length
memcpy(Phrec->hr_String, String, Strlen);
Phrec->hr_Next = *PPhrec; // pointer to synonym
*PPhrec = Phrec; // place new struct in front of list
(PHash->hi_Pop1)++; // add one to population
Phrec->hr_Value = (Word_t)0; // zero the associated Value
return (&(Phrec->hr_Value)); // return pointer to Value
}
// Return 1 if successful, else 0
int
JLHashDel(PPvoid_t PPHash, uint8_t * String, Word_t Strlen)
{
Phrec_t Phrec, *PPhrec, *PPhrec1;
Phinfo_t PHash;
uint32_t hval;
// avoid an core dump here
if (PPHash == NULL)
return (0);
PHash = (Phinfo_t) (*PPHash); // get header
if (PHash == NULL)
return (0); // not found
// get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
hval = HASHSTR(String, Strlen, PHash->hi_tblsize);
// get pointer hash table entry
JLG(PPhrec, PHash->hi_Htbl, hval);
if (PPhrec == NULL)
return (0); // hash entry not found
PPhrec1 = PPhrec; // save head hash entry ^
// search for matching string
for (Phrec = *PPhrec; Phrec != NULL; Phrec = Phrec->hr_Next)
{
if (Phrec->hr_Strlen == Strlen) // string length match?
{
if (memcmp(Phrec->hr_String, String, Strlen) == 0) // string match?
{
int Rc; // not used
Word_t Len;
*PPhrec = Phrec->hr_Next; // put next in previous
Len = BYTES2WORDS(Strlen + HSTRUCTOVD);
JudyFree(Phrec, Len);
PHash->hi_TotalWords -= Len; // ram usage accounting
(PHash->hi_Pop1)--; // Decrement population
if (*PPhrec1 == NULL) // no chain left
{
// delete hash table entry
JLD(Rc, PHash->hi_Htbl, hval);
assert(Rc == 1);
}
// If last element, free everything
if (PHash->hi_Pop1 == 0)
{
assert(PHash->hi_TotalWords == HASHHEADSZ);
JudyFree(PHash, HASHHEADSZ); // the header table
*PPHash = NULL; // from caller
}
return (1); // successful
}
}
PPhrec = &(Phrec->hr_Next); // previous = current
}
return (0); // string not found
}
// Free the whole JLHash structure
Word_t
JLHashFreeArray(PPvoid_t PPHash)
{
Phrec_t Phrec, *PPhrec;
Phinfo_t PHash;
Word_t DeletedWords, Bytes;
Word_t Index = 0; // for First, Next loop
// avoid an core dump here
if (PPHash == NULL)
return ((Word_t)0);
PHash = (Phinfo_t) (*PPHash); // get header
if (PHash == NULL)
return ((Word_t)0); // not found
// get bytes of memory usage in (JudyL) Hash table
JLMU(Bytes, PHash->hi_Htbl);
DeletedWords = HASHHEADSZ; // start with header
// Get 1st table entry in Hash table
JLF(PPhrec, PHash->hi_Htbl, Index);
// found an entry in hash table?
while (PPhrec != NULL)
{
int Rc; // not used
Phrec = *PPhrec;
// walk the synonym linked list
while (Phrec != NULL)
{
Word_t Len;
Phrec_t Phrecfree = Phrec;
// number of words to free -- struct hrec_t
Len = BYTES2WORDS(Phrec->hr_Strlen + HSTRUCTOVD);
// sum total length of mallocs in words
DeletedWords += Len;
(PHash->hi_Pop1)--; // Decrement population
// get pointer to next synonym on list
Phrec = Phrec->hr_Next;
// free the struct hrec_t
JudyFree(Phrecfree, Len);
}
// delete hash table entry
JLD(Rc, PHash->hi_Htbl, Index);
assert(Rc == 1);
// get next hash table entry
JLN(PPhrec, PHash->hi_Htbl, Index);
}
assert(PHash->hi_TotalWords == DeletedWords);
assert(PHash->hi_Pop1 == 0);
JudyFree(PHash, HASHHEADSZ); // the header table
*PPHash = NULL; // set pointer null
// return total bytes freed
return ((DeletedWords * sizeof(Word_t)) + Bytes);
}
//=======================================================================
// H A S H M E T H O D
//=======================================================================
PWord_t
HashGet(Phinfo_t PHash, uint8_t * String, Word_t Strlen)
{
Phrec_t Phrec, *Htbl;
uint32_t hval;
// avoid an core dump here
if (PHash == NULL)
return (NULL);
// get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
hval = HASHSTR(String, Strlen, PHash->hi_tblsize);
// type Hash table pointer
Htbl = (Phrec_t *) PHash->hi_Htbl;
// search for matching string
for (Phrec = Htbl[hval]; Phrec != NULL; Phrec = Phrec->hr_Next)
{
gChainln++; // Hash chain length
if (Phrec->hr_Strlen == Strlen) // length match?
{
if (memcmp(Phrec->hr_String, String, Strlen) == 0)
return (&(Phrec->hr_Value)); // match! pointer to Value
}
}
return (NULL);
}
// Return pointer to struct hrec_t associated with string
Pvoid_t
HashIns(Phinfo_t * PPHash, uint8_t * String, Word_t Strlen, Word_t TblSize)
{
Phrec_t Phrec, *Htbl;
Phinfo_t PHash;
Word_t Len;
uint32_t hval;
PHash = *PPHash; // core-dump if calling error
if (PHash == NULL) // if hash table not allocated
{
// allocate the header
PHash = (Phinfo_t) JudyMalloc(HASHHEADSZ);
if (PHash == NULL)
MALLOCERROR;
// allocate the hash table
PHash->hi_Htbl = (Pvoid_t)JudyMalloc(TblSize);
if (PHash->hi_Htbl == NULL)
MALLOCERROR;
// you cant beat this with modern compilers/librarys
memset(PHash->hi_Htbl, 0, TblSize * sizeof(Word_t));
// Initialize the header struct
PHash->hi_tblsize = TblSize;
PHash->hi_TotalWords = TblSize + HASHHEADSZ;
PHash->hi_Pop1 = 0; // none yet
*PPHash = PHash; // return header to caller
}
// get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
hval = HASHSTR(String, Strlen, TblSize);
// type Hash table pointer
Htbl = (Phrec_t *) PHash->hi_Htbl;
// search for matching string in hash table entry
for (Phrec = Htbl[hval]; Phrec != NULL; Phrec = Phrec->hr_Next)
{
if (Phrec->hr_Strlen == Strlen) // string length match?
{
if (memcmp(Phrec->hr_String, String, Strlen) == 0) // string match?
{
return (&(Phrec->hr_Value)); // match! pointer to Value
}
}
}
// string not found, so do an insert
Len = BYTES2WORDS(Strlen + HSTRUCTOVD);
Phrec = (Phrec_t) JudyMalloc(Len);
if (Phrec == NULL)
MALLOCERROR;
PHash->hi_TotalWords += Len; // keep track of total mallocs
Phrec->hr_Strlen = Strlen; // set string length
memcpy(Phrec->hr_String, String, Strlen); // copy it
// place new allocation first in chain
Phrec->hr_Next = Htbl[hval];
Htbl[hval] = Phrec;
(PHash->hi_Pop1)++; // add one to population
Phrec->hr_Value = (Word_t)0; // zero the associated Value
return (&(Phrec->hr_Value)); // return pointer to Value
}
// Delete entry in hash array, return 1 if successful, else 0
int
HashDel(Phinfo_t * PPHash, uint8_t * String, Word_t Strlen)
{
Phrec_t Phrec, *PPhrec, *Htbl;
Phinfo_t PHash;
uint32_t hval;
// avoid an core dump here
if (PPHash == NULL)
return (0);
PHash = *PPHash; // get header
if (PHash == NULL)
return (0); // not found
// get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
hval = HASHSTR(String, Strlen, PHash->hi_tblsize);
// type Hash table pointer
Htbl = (Phrec_t *) PHash->hi_Htbl;
// get pointer hash table entry
PPhrec = &Htbl[hval];
// search for matching string
for (Phrec = *PPhrec; Phrec != NULL; Phrec = Phrec->hr_Next)
{
if (Phrec->hr_Strlen == Strlen) // length match?
{
if (memcmp(Phrec->hr_String, String, Strlen) == 0)
{
Word_t Len;
// put next hrec_t in previous hrec_t
*PPhrec = Phrec->hr_Next;
Len = BYTES2WORDS(Strlen + HSTRUCTOVD);
JudyFree(Phrec, Len);
PHash->hi_TotalWords -= Len;
(PHash->hi_Pop1)--; // Decrement population
// If last element, free everything
if (PHash->hi_Pop1 == 0)
{
assert(PHash->hi_TotalWords ==
(HASHHEADSZ + PHash->hi_tblsize));
JudyFree(Htbl, PHash->hi_tblsize); // hash table
JudyFree(PHash, HASHHEADSZ); // header struct
*PPHash = NULL; // from caller
}
return (1); // successful
}
}
PPhrec = &(Phrec->hr_Next); // previous = current
}
return (0); // not found
}
Word_t
HashFreeArray(Phinfo_t * PPHash)
{
int ii;
Phrec_t Phrec, *Htbl;
Phinfo_t PHash;
Word_t DeletedWords;
// avoid an core dump here
if (PPHash == NULL)
return ((Word_t)0);
PHash = (Phinfo_t) (*PPHash); // get header
if (PHash == NULL)
return ((Word_t)0);
// start accumulator of deleted memory
DeletedWords = HASHHEADSZ + PHash->hi_tblsize;
// type Hash table pointer
Htbl = (Phrec_t *) PHash->hi_Htbl;
// walk thru all table entrys
for (ii = 0; ii < PHash->hi_tblsize; ii++)
{
Phrec = Htbl[ii]; // next hash table entry
while (Phrec != NULL) // walk the synonym linked list
{
Word_t Len;
src/judy-1.0.5/test/StringCompare.c view on Meta::CPAN
{
printf("Oops duplicate Ternary string %s\n", instr);
return;
}
p = &(pp->eqkid);
}
else if (d < 0)
p = &(pp->lokid);
else
p = &(pp->hikid);
}
for (;;)
{
*p = (Tptr) malloc(sizeof(Tnode));
pp = *p;
pp->splitchar = *s;
pp->lokid = pp->eqkid = pp->hikid = 0;
if (*s++ == 0)
{
pp->eqkid = (Tptr) instr;
gStored++; // number of strings stored
return;
}
p = &(pp->eqkid);
}
}
int
TernaryGet(Tptr p, uint8_t * s, int Len)
{
while (p)
{
if (*s < p->splitchar)
p = p->lokid;
else if (*s == p->splitchar)
{
if (*s++ == 0)
return 1;
p = p->eqkid;
}
else
p = p->hikid;
}
return 0;
}
//=======================================================================
// M E A S U R E A D T S P E E D and M E M O R Y U S A G E
//=======================================================================
//Word_t TotalJudyMalloc;
#define GETSTRING(PCurStr, Strlen)
int
main(int argc, char *argv[])
{
TIMER_vars(tm); // declare timer variables
FILE *fid; // to read file.
int Chr; // char read from fgetc
Pdt_t Pdt, Pdts; // array of lengths and pointers to str
uint8_t *PCurStr; // Current string pointer
Word_t LineCnt; // line counter
int Strlen; // = strlen();
Word_t StrTot; // Total len of strings
Word_t StrNumb; // current line number
Word_t ReadLin; // strings to read
double Mult; // multiplier between groups
Word_t Groups; // number of measurement groups
Word_t grp; // current group
Pvoid_t JudySL = NULL; // JudySL root pointer
Pvoid_t JudyHS = NULL; // JudyHS root pointer
Pvoid_t JLHash = NULL; // JLHash root pointer
Phinfo_t HRoot = NULL; // hash table root pointer
Tptr Ternary = { 0 }; // Ternary struct root pointer
Method_t Method = M_invalid; // the method to measure
Word_t lines = 0; // to shut up compiler
Word_t Bytes = 0; // Bytes deallocated from FreeArray
Word_t StringMemory; // Bytes allocated for input data
int Pass;
int Passes = 1;
int Opt;
extern char *optarg;
int ErrorFlag = 0;
// un-buffer output
setbuf(stdout, NULL);
//============================================================
// PARSE INPUT PARAMETERS
//============================================================
while ((Opt = getopt(argc, argv, "A:H:L:n:T:P:M:praDC")) != -1)
{
switch (Opt)
{
case 'A':
if (Method != M_invalid)
{
printf("\nOnly ONE '-A<ADT>' is allowed!!!!!!\n");
ErrorFlag++;
break;
}
if (strcmp(optarg, "Print") == 0)
Method = M_Print;
if (strcmp(optarg, "Hash") == 0)
{
Method = M_Hash;
HTblsz = 1LU << 20; // default 1.0+ million
}
if (strcmp(optarg, "JLHash") == 0)
{
Method = M_JLHash;
HTblsz = 0; // max 2^32
}
if (strcmp(optarg, "JudySL") == 0)
Method = M_JudySL;
if (strcmp(optarg, "Splay") == 0)
Method = M_Splay;
if (strcmp(optarg, "Redblack") == 0)
Method = M_Redblack;
if (strcmp(optarg, "JudyHS") == 0)
Method = M_JudyHS;
if (strcmp(optarg, "Ternary") == 0)
Method = M_Ternary;
break;
case 'H': // Size of Hash table
HTblsz = strtoul(optarg, NULL, 0);
break;
case 'L': // Number of Loops
Passes = atoi(optarg);
if (Passes <= 0)
{
printf("\n !! OOps - Number of Loops must be > 0\n");
ErrorFlag++;
}
break;
case 'n': // Max population of arrays
nStrg = strtoul(optarg, NULL, 0); // Size of Linear Array
if (nStrg == 0)
{
printf("\n !! OOps - Number of strings must be > 0\n");
ErrorFlag++;
}
break;
case 'T': // Maximum retrieve tests for timing
TValues = strtoul(optarg, NULL, 0);
break;
case 'P': // measurement points per decade
PtsPdec = strtoul(optarg, NULL, 0);
break;
case 'M': // maximum length of input string
MLength = atoi(optarg);
break;
case 'p': // pre-initialize Hash table
pFlag = 1;
break;
case 'r': // do not randomize input
if (CFlag)
{
printf
("\n !! OOps '-r' and '-C' flag are mutually exclusive\n");
ErrorFlag++;
break;
}
rFlag = 1;
break;
case 'a': // word align string buffers
aFlag = 1;
break;
case 'D': // do a delete at end
DFlag = 1;
break;
case 'C': // build sequential Get string buffers
if (rFlag)
{
printf
("\n !! OOps '-C' and '-r' flag are mutually exclusive\n");
ErrorFlag++;
break;
}
CFlag = 1;
break;
default:
ErrorFlag++;
break;
}
}
if (Method == -1)
{
printf
("\n !! OOps -- '-A <ADT>' I.E. '-AHash' or '-AJudyHS' is a required option\n");
ErrorFlag++;
}
fileidx = optind;
if (optind >= argc)
{
printf("\n !! OOps -- No input file specified\n");
ErrorFlag++;
}
if (ErrorFlag)
{
printf("\n");
printf("$ %s -A<ADT> -n# -H# -P# -T# -p -r -a InputStringFile\n\n",
src/judy-1.0.5/test/StringCompare.c view on Meta::CPAN
printf(" -H%lu", HTblsz);
printf(" -P%lu", PtsPdec);
printf(" -L%d", Passes);
if (pFlag)
printf(" -p");
if (rFlag)
printf(" -r");
if (DFlag)
printf(" -D");
if (CFlag)
printf(" -C");
printf(" -M%d", MLength);
printf(" %s", argv[fileidx]);
printf("\n");
// print some header
printf("# This file is in a format to input to 'jbgraph'\n");
printf("# XLABEL Stored\n");
printf("# YLABEL Microseconds / Index\n");
printf("# COLHEAD 1 Total Insert attempts\n");
printf("# COLHEAD 2 Number Gets\n");
printf("# COLHEAD 3 Duplicate strings\n");
printf("# COLHEAD 4 Insert Time (uS)\n");
printf("# COLHEAD 5 Get Time (uS)\n");
printf("# COLHEAD 6 Hash Chain Length\n");
printf("# COLHEAD 7 Average RAM/String\n");
// uname(2) strings describing the machine
{
struct utsname ubuf; // for system name
if (uname(&ubuf) == -1)
printf("# Uname(2) failed\n");
else
printf("# %s %s %s %s %s\n", ubuf.sysname, ubuf.nodename,
ubuf.release, ubuf.version, ubuf.machine);
}
if (sizeof(Word_t) == 8)
printf("# 64 Bit CPU\n");
else if (sizeof(Word_t) == 4)
printf("# 32 Bit CPU\n");
#ifdef CPUMHZ
printf("# Processor speed compiled at %d Mhz\n", CPUMHZ);
#endif // CPUMHZ
if (Method == M_Hash)
printf("# Hash record struct: sizeof(hrec_t) = %d\n", sizeof(hrec_t));
if (Method == M_Ternary)
printf("# Ternary record struct: sizeof(Tnode) = %d\n", sizeof(Tnode));
// OPEN INPUT FILE:
if ((fid = fopen(argv[fileidx], "r")) == NULL)
FILERROR;
for (StrTot = Strlen = LineCnt = 0; (Chr = fgetc(fid)) != EOF;)
{
if (Chr == '\n')
{
if (Strlen) // eat zero length lines
{
if (Strlen > MLength)
Strlen = MLength;
LineCnt++; // increase string count
Strlen++; // add a \0 for JudySL
if (aFlag) // for word alignment
StrTot += ROUNDUPWORD(Strlen);
else
StrTot += Strlen; // memory needed to store strings
if (LineCnt == nStrg) // shorten if required by -n option
break;
Strlen = 0;
}
}
else
{
Strlen++;
}
}
fclose(fid);
fid = NULL;
nStrg = LineCnt; // adj if necessary
// get struct to keep track of the strings
StringMemory = sizeof(dt_t) * nStrg;
Pdt = (Pdt_t) malloc(sizeof(dt_t) * nStrg);
if (Pdt == NULL)
MALLOCERROR;
// get memory to store the strings
StringMemory += StrTot;
PCurStr = (uint8_t *) malloc(StrTot);
if (PCurStr == NULL)
MALLOCERROR;
// BRING FILE INTO RAM, COUNT LINES and CHECK LENGTH
//============================================================
// CALCULATE NUMBER OF MEASUREMENT GROUPS -- points per decade
//============================================================
// Calculate Multiplier for number of points per decade
Mult = pow(10.0, 1.0 / (double)PtsPdec);
{
double sum;
Word_t numb, prevnumb;
// Count number of measurements needed (10K max)
sum = numb = 1;
for (Groups = 2; Groups < 10000; Groups++)
if (NextNumb(&numb, &sum, Mult, nStrg))
break;
// Get memory for measurements
Pms = (Pms_t) calloc(Groups, sizeof(ms_t));
if (Pms == NULL)
MALLOCERROR;
// Now calculate number of Indexes for each measurement point
numb = sum = 1;
prevnumb = 0;
for (grp = 0; grp < Groups; grp++)
{
Pms[grp].ms_delta = numb - prevnumb;
Pms[grp].ms_mininsert = 10000000.0; // infinity
Pms[grp].ms_minretrive = 10000000.0; // infinity
Pms[grp].ms_Bytes = 0.0;
prevnumb = numb;
NextNumb(&numb, &sum, Mult, nStrg);
}
} // Groups = number of sizes
// print remaining header
if (Method == M_Hash)
{
printf("# Allocate Hash table = %lu elements\n", HTblsz);
}
if (Method == M_JLHash)
{
if (HTblsz)
printf("# JLHash table virtual size = %lu\n", HTblsz);
else
printf("# JLHash table virtual size = 4294967296\n");
}
//=======================================================================
// Read text input file into RAM
//=======================================================================
if ((fid = fopen(argv[fileidx], "r")) == NULL)
FILERROR;
for (Strlen = LineCnt = 0; LineCnt < nStrg;)
{
Chr = fgetc(fid);
if (Chr == '\n')
{
if (Strlen) // eat zero length lines
{
if (Strlen > MLength)
Strlen = MLength;
Pdt[LineCnt].dt_string = PCurStr - Strlen;
Pdt[LineCnt].dt_strlen = Strlen;
LineCnt++;
Strlen = 0;
*PCurStr++ = '\0'; // for JudySL
if (aFlag) // for word alignment
PCurStr = (uint8_t *) ROUNDUPWORD((Word_t)PCurStr);
if ((Word_t)PCurStr % sizeof(Word_t))
aCount++;
}
}
else
{
if (Strlen < MLength)
{
Strlen++;
if (Chr == '\0')
Chr = ' '; // for JudySL
*PCurStr++ = (uint8_t) Chr;
}
}
}
fclose(fid);
fid = NULL;
assert(nStrg == LineCnt);
printf("# %lu (%.1f%%) non-Word_t aligned string buffers\n",
aCount, (double)aCount / (double)LineCnt * 100.0);
printf("# Ram used for input data = %lu bytes\n", StringMemory);
printf("# Average string length = %.1f bytes\n",
(double)(StrTot - LineCnt) / LineCnt);
// Allocate memory for Cached assess to 'Get' (largest delta). This flag
// will put the 'randomized' 'Get' order strings in a sequential buffer.
// Modern processors will 'read ahead' with an access to RAM is sequential
// -- thus saving the 'Get' having to bring the string into cache.
if (CFlag)
{
PdtS_ = (Pdt_t) malloc(TValues * sizeof(dt_t));
if (PdtS_ == NULL)
MALLOCERROR;
// now guess how much memory will be needed for the strings
Strsiz_ = ((StrTot / nStrg) * TValues);
Strsiz_ += Strsiz_; // bump %20
Strbuf_ = (uint8_t *) malloc(Strsiz_);
if (Strbuf_ == NULL)
MALLOCERROR;
printf
("# %lu bytes malloc() for 'cached' strings for Get measurement\n",
Strsiz_);
}
//=======================================================================
// TIME GETSTRING() from Cache (most of the time)
//=======================================================================
STARTTm(tm); // start timer
for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
{
GETSTRING(PCurStr, Strlen);
Strlen = Pdt[LineCnt].dt_strlen;
PCurStr = Pdt[LineCnt].dt_string;
if (strlen(PCurStr) != Strlen) // bring string into Cache
{
// necessary to prevent cc from optimizing out
printf(" !! OOps Bug, wrong string length\n");
exit(1);
}
}
ENDTm(DeltaUSec, tm); // end timer
printf
("# Access Time = %6.3f uS average per string (mostly from Cache)\n",
DeltaUSec / nStrg);
//=======================================================================
// TIME GETSTRING() + HASHSTR() from Cache (most of the time)
//=======================================================================
STARTTm(tm); // start timer
for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
{
uint32_t hval;
GETSTRING(PCurStr, Strlen);
PCurStr = Pdt[LineCnt].dt_string;
Strlen = Pdt[LineCnt].dt_strlen;
hval = HASHSTR(PCurStr, Strlen, HTblsz);
if (foolflag)
printf("OOps foolflag is set, hval = %d\n", hval);
}
ENDTm(DeltaUSec, tm); // end timer
printf
("# HashStr() Time = %6.3f uS average per string (mostly from Cache)\n",
DeltaUSec / nStrg);
// randomize the input strings (adjacent strings will not be on same page)
if (rFlag == 0)
{
Randomize(Pdt, nStrg); // Randomize ALL to be stored
//=======================================================================
// TIME GETSTRING() from RAM (most of the time)
//=======================================================================
STARTTm(tm); // start timer
for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
{
GETSTRING(PCurStr, Strlen);
Strlen = Pdt[LineCnt].dt_strlen;
PCurStr = Pdt[LineCnt].dt_string;
if (strlen(PCurStr) != Strlen) // bring string into Cache
{
// necessary to prevent cc from optimizing out
printf(" !! OOps Bug, wrong string length\n");
exit(1);
}
}
ENDTm(DeltaUSec, tm); // end timer
printf
("# Access Time = %6.3f uS average per string (mostly from RAM)\n",
DeltaUSec / nStrg);
//=======================================================================
// TIME GETSTRING() + HASHSTR() from RAM (most of the time)
//=======================================================================
STARTTm(tm); // start timer
for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
{
uint32_t hval;
GETSTRING(PCurStr, Strlen);
Strlen = Pdt[LineCnt].dt_strlen;
PCurStr = Pdt[LineCnt].dt_string;
hval = HASHSTR(PCurStr, Strlen, HTblsz);
if (foolflag)
printf("OOps foolflag is set, hval = %u\n", hval);
}
ENDTm(DeltaUSec, tm); // end timer
printf
("# HashStr() Time = %6.3f uS average per string (mostly from RAM)\n",
DeltaUSec / nStrg);
}
//=======================================================================
// Insert, Get and Delete loops
//=======================================================================
for (Pass = 0; Pass < Passes; Pass++)
{
printf("# Pass %d\n", Pass);
// heading of table
Printf
("# TotInserts DeltaGets DupStrs InsTime GetTime HChainLen Ram/String\n");
gStored = 0; // number of strings inserted
StrNumb = 0; // number of attempted strings inserted
STARTmem; // current malloc() mem usage
for (grp = 0; grp < Groups; grp++)
{
PWord_t PValue;
Word_t Begin = gStored; // remember current STOREed
Word_t Delta = Pms[grp].ms_delta;
switch (Method)
{
case M_Print:
{
STARTTm(tm); // start timer
for (lines = 0; lines < Delta; lines++, StrNumb++)
{
GETSTRING(PCurStr, Strlen);
src/judy-1.0.5/test/StringCompare.c view on Meta::CPAN
redblackinsert(&rbans, (char *)PCurStr);
}
ENDTm(DeltaUSec, tm); // end timer
break;
}
case M_Ternary:
{
STARTTm(tm); // start timer
for (lines = 0; lines < Delta; lines++, StrNumb++)
{
GETSTRING(PCurStr, Strlen);
Strlen = Pdt[StrNumb].dt_strlen;
PCurStr = Pdt[StrNumb].dt_string;
TernaryIns(&Ternary, PCurStr, Strlen);
}
ENDTm(DeltaUSec, tm); // end timer
break;
}
default:
assert(0); // cant happen
break;
}
ENDmem(DeltaMem); // current malloc() mem usage
ReadLin = StrNumb; // adjust delta
if (ReadLin > TValues)
ReadLin = TValues;
// if (Delta > TValues)
// ReadLin = Delta; // use the Delta
Printf(" %11lu", StrNumb); // Total stored
Printf(" %10lu", ReadLin); // Number to read back
Begin = gStored - Begin; // actual STORED
assert(lines == Delta);
Printf(" %8lu", Delta - Begin); // Duplicate strings
// Average time per line to store (including duplicate strings)
Mult = DeltaUSec / (double)Delta;
if (Mult < Pms[grp].ms_mininsert)
Pms[grp].ms_mininsert = Mult;
Printf(" %7.3f", Mult);
// Bytes allocated thru malloc()
if (TotalJudyMalloc == 0)
Pms[grp].ms_Bytes = (double)DeltaMem;
else
Pms[grp].ms_Bytes = (double)(TotalJudyMalloc * sizeof(Word_t));
Pms[grp].ms_Bytes /= (double)gStored;
fflush(stdout);
//=======================================================================
// READ BACK LOOP
//=======================================================================
Pdts = Pdt; // Strings to 'Get'
gChainln = 0; // total chain lengths
if (rFlag == 0)
{
Randomize(Pdt, StrNumb); // Randomize ONLY those stored
if (CFlag)
{
// Allocate and make sequencial string buffer
Pdts = BuildSeqBuf(Pdt, ReadLin);
}
}
switch (Method)
{
case M_Print:
break;
case M_Hash:
{
STARTTm(tm); // start timer
for (lines = 0; lines < ReadLin; lines++)
{
GETSTRING(PCurStr, Strlen);
Strlen = Pdts[lines].dt_strlen;
PCurStr = Pdts[lines].dt_string;
PValue = HashGet(HRoot, PCurStr, Strlen); // get string
assert(PValue != NULL);
assert(*PValue > 0);
}
ENDTm(DeltaUSec, tm); // end timer
break;
}
case M_JLHash:
{
STARTTm(tm); // start timer
for (lines = 0; lines < ReadLin; lines++)
{
GETSTRING(PCurStr, Strlen);
Strlen = Pdts[lines].dt_strlen;
PCurStr = Pdts[lines].dt_string;
PValue = JLHashGet(JLHash, PCurStr, Strlen); // get string
assert(PValue != NULL);
assert(*PValue > 0);
}
ENDTm(DeltaUSec, tm); // end timer
break;
}
case M_JudySL:
{
STARTTm(tm); // start timer
for (lines = 0; lines < ReadLin; lines++)
{
GETSTRING(PCurStr, Strlen);
PCurStr = Pdts[lines].dt_string;
JSLG(PValue, JudySL, PCurStr); // get string
assert(PValue != NULL);
assert(*PValue > 0);
}
ENDTm(DeltaUSec, tm); // end timer
( run in 0.853 second using v1.01-cache-2.11-cpan-13bb782fe5a )