Alien-Judy
view release on metacpan or search on metacpan
src/judy-1.0.5/test/StringCompare.c view on Meta::CPAN
if ((d = *s - pp->splitchar) == 0)
{
if (*s++ == 0)
{
printf("Oops duplicate Ternary string %s\n", instr);
return;
}
p = &(pp->eqkid);
}
else if (d < 0)
p = &(pp->lokid);
else
p = &(pp->hikid);
}
for (;;)
{
*p = (Tptr) malloc(sizeof(Tnode));
pp = *p;
pp->splitchar = *s;
pp->lokid = pp->eqkid = pp->hikid = 0;
if (*s++ == 0)
{
pp->eqkid = (Tptr) instr;
gStored++; // number of strings stored
return;
}
p = &(pp->eqkid);
}
}
int
TernaryGet(Tptr p, uint8_t * s, int Len)
{
while (p)
{
if (*s < p->splitchar)
p = p->lokid;
else if (*s == p->splitchar)
{
if (*s++ == 0)
return 1;
p = p->eqkid;
}
else
p = p->hikid;
}
return 0;
}
//=======================================================================
// M E A S U R E A D T S P E E D and M E M O R Y U S A G E
//=======================================================================
//Word_t TotalJudyMalloc;
#define GETSTRING(PCurStr, Strlen)
int
main(int argc, char *argv[])
{
TIMER_vars(tm); // declare timer variables
FILE *fid; // to read file.
int Chr; // char read from fgetc
Pdt_t Pdt, Pdts; // array of lengths and pointers to str
uint8_t *PCurStr; // Current string pointer
Word_t LineCnt; // line counter
int Strlen; // = strlen();
Word_t StrTot; // Total len of strings
Word_t StrNumb; // current line number
Word_t ReadLin; // strings to read
double Mult; // multiplier between groups
Word_t Groups; // number of measurement groups
Word_t grp; // current group
Pvoid_t JudySL = NULL; // JudySL root pointer
Pvoid_t JudyHS = NULL; // JudyHS root pointer
Pvoid_t JLHash = NULL; // JLHash root pointer
Phinfo_t HRoot = NULL; // hash table root pointer
Tptr Ternary = { 0 }; // Ternary struct root pointer
Method_t Method = M_invalid; // the method to measure
Word_t lines = 0; // to shut up compiler
Word_t Bytes = 0; // Bytes deallocated from FreeArray
Word_t StringMemory; // Bytes allocated for input data
int Pass;
int Passes = 1;
int Opt;
extern char *optarg;
int ErrorFlag = 0;
// un-buffer output
setbuf(stdout, NULL);
//============================================================
// PARSE INPUT PARAMETERS
//============================================================
while ((Opt = getopt(argc, argv, "A:H:L:n:T:P:M:praDC")) != -1)
{
switch (Opt)
{
case 'A':
if (Method != M_invalid)
{
printf("\nOnly ONE '-A<ADT>' is allowed!!!!!!\n");
ErrorFlag++;
break;
}
if (strcmp(optarg, "Print") == 0)
Method = M_Print;
if (strcmp(optarg, "Hash") == 0)
{
Method = M_Hash;
HTblsz = 1LU << 20; // default 1.0+ million
}
if (strcmp(optarg, "JLHash") == 0)
{
Method = M_JLHash;
HTblsz = 0; // max 2^32
}
if (strcmp(optarg, "JudySL") == 0)
src/judy-1.0.5/test/StringCompare.c view on Meta::CPAN
Strlen = 0;
*PCurStr++ = '\0'; // for JudySL
if (aFlag) // for word alignment
PCurStr = (uint8_t *) ROUNDUPWORD((Word_t)PCurStr);
if ((Word_t)PCurStr % sizeof(Word_t))
aCount++;
}
}
else
{
if (Strlen < MLength)
{
Strlen++;
if (Chr == '\0')
Chr = ' '; // for JudySL
*PCurStr++ = (uint8_t) Chr;
}
}
}
fclose(fid);
fid = NULL;
assert(nStrg == LineCnt);
printf("# %lu (%.1f%%) non-Word_t aligned string buffers\n",
aCount, (double)aCount / (double)LineCnt * 100.0);
printf("# Ram used for input data = %lu bytes\n", StringMemory);
printf("# Average string length = %.1f bytes\n",
(double)(StrTot - LineCnt) / LineCnt);
// Allocate memory for Cached assess to 'Get' (largest delta). This flag
// will put the 'randomized' 'Get' order strings in a sequential buffer.
// Modern processors will 'read ahead' with an access to RAM is sequential
// -- thus saving the 'Get' having to bring the string into cache.
if (CFlag)
{
PdtS_ = (Pdt_t) malloc(TValues * sizeof(dt_t));
if (PdtS_ == NULL)
MALLOCERROR;
// now guess how much memory will be needed for the strings
Strsiz_ = ((StrTot / nStrg) * TValues);
Strsiz_ += Strsiz_; // bump %20
Strbuf_ = (uint8_t *) malloc(Strsiz_);
if (Strbuf_ == NULL)
MALLOCERROR;
printf
("# %lu bytes malloc() for 'cached' strings for Get measurement\n",
Strsiz_);
}
//=======================================================================
// TIME GETSTRING() from Cache (most of the time)
//=======================================================================
STARTTm(tm); // start timer
for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
{
GETSTRING(PCurStr, Strlen);
Strlen = Pdt[LineCnt].dt_strlen;
PCurStr = Pdt[LineCnt].dt_string;
if (strlen(PCurStr) != Strlen) // bring string into Cache
{
// necessary to prevent cc from optimizing out
printf(" !! OOps Bug, wrong string length\n");
exit(1);
}
}
ENDTm(DeltaUSec, tm); // end timer
printf
("# Access Time = %6.3f uS average per string (mostly from Cache)\n",
DeltaUSec / nStrg);
//=======================================================================
// TIME GETSTRING() + HASHSTR() from Cache (most of the time)
//=======================================================================
STARTTm(tm); // start timer
for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
{
uint32_t hval;
GETSTRING(PCurStr, Strlen);
PCurStr = Pdt[LineCnt].dt_string;
Strlen = Pdt[LineCnt].dt_strlen;
hval = HASHSTR(PCurStr, Strlen, HTblsz);
if (foolflag)
printf("OOps foolflag is set, hval = %d\n", hval);
}
ENDTm(DeltaUSec, tm); // end timer
printf
("# HashStr() Time = %6.3f uS average per string (mostly from Cache)\n",
DeltaUSec / nStrg);
// randomize the input strings (adjacent strings will not be on same page)
if (rFlag == 0)
{
Randomize(Pdt, nStrg); // Randomize ALL to be stored
//=======================================================================
// TIME GETSTRING() from RAM (most of the time)
//=======================================================================
STARTTm(tm); // start timer
for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
{
GETSTRING(PCurStr, Strlen);
Strlen = Pdt[LineCnt].dt_strlen;
PCurStr = Pdt[LineCnt].dt_string;
if (strlen(PCurStr) != Strlen) // bring string into Cache
{
// necessary to prevent cc from optimizing out
printf(" !! OOps Bug, wrong string length\n");
exit(1);
}
}
ENDTm(DeltaUSec, tm); // end timer
printf
("# Access Time = %6.3f uS average per string (mostly from RAM)\n",
DeltaUSec / nStrg);
//=======================================================================
// TIME GETSTRING() + HASHSTR() from RAM (most of the time)
//=======================================================================
STARTTm(tm); // start timer
for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
{
uint32_t hval;
GETSTRING(PCurStr, Strlen);
Strlen = Pdt[LineCnt].dt_strlen;
PCurStr = Pdt[LineCnt].dt_string;
hval = HASHSTR(PCurStr, Strlen, HTblsz);
if (foolflag)
printf("OOps foolflag is set, hval = %u\n", hval);
}
ENDTm(DeltaUSec, tm); // end timer
printf
("# HashStr() Time = %6.3f uS average per string (mostly from RAM)\n",
DeltaUSec / nStrg);
}
//=======================================================================
// Insert, Get and Delete loops
//=======================================================================
for (Pass = 0; Pass < Passes; Pass++)
{
printf("# Pass %d\n", Pass);
// heading of table
Printf
("# TotInserts DeltaGets DupStrs InsTime GetTime HChainLen Ram/String\n");
gStored = 0; // number of strings inserted
StrNumb = 0; // number of attempted strings inserted
STARTmem; // current malloc() mem usage
for (grp = 0; grp < Groups; grp++)
{
PWord_t PValue;
Word_t Begin = gStored; // remember current STOREed
Word_t Delta = Pms[grp].ms_delta;
switch (Method)
{
case M_Print:
{
STARTTm(tm); // start timer
for (lines = 0; lines < Delta; lines++, StrNumb++)
{
GETSTRING(PCurStr, Strlen);
PCurStr = Pdt[StrNumb].dt_string;
Printf("%s\n", (char *)PCurStr);
}
ENDTm(DeltaUSec, tm); // end timer
break;
}
case M_Hash:
{
STARTTm(tm); // start timer
for (lines = 0; lines < Delta; lines++, StrNumb++)
{
GETSTRING(PCurStr, Strlen);
Strlen = Pdt[StrNumb].dt_strlen;
PCurStr = Pdt[StrNumb].dt_string;
PValue = HashIns(&HRoot, PCurStr, Strlen, HTblsz);
if ((*PValue)++ == 0)
gStored++; // number of strings stored
}
ENDTm(DeltaUSec, tm); // end timer
break;
}
case M_JLHash:
{
STARTTm(tm); // start timer
for (lines = 0; lines < Delta; lines++, StrNumb++)
{
GETSTRING(PCurStr, Strlen);
Strlen = Pdt[StrNumb].dt_strlen;
PCurStr = Pdt[StrNumb].dt_string;
PValue = JLHashIns(&JLHash, PCurStr, Strlen, HTblsz);
if ((*PValue)++ == 0)
gStored++; // number of strings stored
}
ENDTm(DeltaUSec, tm); // end timer
break;
}
case M_JudySL:
{
STARTTm(tm); // start timer
for (lines = 0; lines < Delta; lines++, StrNumb++)
{
GETSTRING(PCurStr, Strlen);
PCurStr = Pdt[StrNumb].dt_string;
JSLI(PValue, JudySL, PCurStr); // insert string
if ((*PValue)++ == 0)
gStored++; // number of strings stored
}
ENDTm(DeltaUSec, tm); // end timer
break;
}
case M_JudyHS:
{
STARTTm(tm); // start timer
for (lines = 0; lines < Delta; lines++, StrNumb++)
{
GETSTRING(PCurStr, Strlen);
Strlen = Pdt[StrNumb].dt_strlen;
PCurStr = Pdt[StrNumb].dt_string;
JHSI(PValue, JudyHS, PCurStr, Strlen); // insert string
if ((*PValue)++ == 0)
gStored++; // number of strings stored
}
ENDTm(DeltaUSec, tm); // end timer
break;
}
// NOTE: the ADT's below here are so slow, that I did not add much effort
// to clean them up. (dlb)
case M_Splay:
{
STARTTm(tm); // start timer
for (lines = 0; lines < Delta; lines++, StrNumb++)
{
GETSTRING(PCurStr, Strlen);
PCurStr = Pdt[StrNumb].dt_string;
splayinsert(&spans, (char *)PCurStr);
}
ENDTm(DeltaUSec, tm); // end timer
break;
}
case M_Redblack:
{
STARTTm(tm); // start timer
for (lines = 0; lines < Delta; lines++, StrNumb++)
{
GETSTRING(PCurStr, Strlen);
PCurStr = Pdt[StrNumb].dt_string;
redblackinsert(&rbans, (char *)PCurStr);
}
ENDTm(DeltaUSec, tm); // end timer
break;
}
case M_Ternary:
{
STARTTm(tm); // start timer
for (lines = 0; lines < Delta; lines++, StrNumb++)
{
GETSTRING(PCurStr, Strlen);
Strlen = Pdt[StrNumb].dt_strlen;
PCurStr = Pdt[StrNumb].dt_string;
TernaryIns(&Ternary, PCurStr, Strlen);
}
ENDTm(DeltaUSec, tm); // end timer
break;
}
default:
assert(0); // cant happen
break;
}
ENDmem(DeltaMem); // current malloc() mem usage
ReadLin = StrNumb; // adjust delta
if (ReadLin > TValues)
ReadLin = TValues;
// if (Delta > TValues)
// ReadLin = Delta; // use the Delta
Printf(" %11lu", StrNumb); // Total stored
Printf(" %10lu", ReadLin); // Number to read back
Begin = gStored - Begin; // actual STORED
assert(lines == Delta);
Printf(" %8lu", Delta - Begin); // Duplicate strings
// Average time per line to store (including duplicate strings)
Mult = DeltaUSec / (double)Delta;
if (Mult < Pms[grp].ms_mininsert)
Pms[grp].ms_mininsert = Mult;
Printf(" %7.3f", Mult);
// Bytes allocated thru malloc()
if (TotalJudyMalloc == 0)
Pms[grp].ms_Bytes = (double)DeltaMem;
else
Pms[grp].ms_Bytes = (double)(TotalJudyMalloc * sizeof(Word_t));
Pms[grp].ms_Bytes /= (double)gStored;
fflush(stdout);
//=======================================================================
// READ BACK LOOP
//=======================================================================
Pdts = Pdt; // Strings to 'Get'
gChainln = 0; // total chain lengths
if (rFlag == 0)
{
Randomize(Pdt, StrNumb); // Randomize ONLY those stored
if (CFlag)
{
// Allocate and make sequencial string buffer
Pdts = BuildSeqBuf(Pdt, ReadLin);
}
}
switch (Method)
{
case M_Print:
break;
case M_Hash:
{
STARTTm(tm); // start timer
for (lines = 0; lines < ReadLin; lines++)
{
GETSTRING(PCurStr, Strlen);
Strlen = Pdts[lines].dt_strlen;
PCurStr = Pdts[lines].dt_string;
PValue = HashGet(HRoot, PCurStr, Strlen); // get string
assert(PValue != NULL);
assert(*PValue > 0);
}
ENDTm(DeltaUSec, tm); // end timer
break;
}
case M_JLHash:
{
STARTTm(tm); // start timer
for (lines = 0; lines < ReadLin; lines++)
{
GETSTRING(PCurStr, Strlen);
Strlen = Pdts[lines].dt_strlen;
PCurStr = Pdts[lines].dt_string;
PValue = JLHashGet(JLHash, PCurStr, Strlen); // get string
assert(PValue != NULL);
assert(*PValue > 0);
}
ENDTm(DeltaUSec, tm); // end timer
break;
}
case M_JudySL:
{
STARTTm(tm); // start timer
for (lines = 0; lines < ReadLin; lines++)
{
GETSTRING(PCurStr, Strlen);
PCurStr = Pdts[lines].dt_string;
JSLG(PValue, JudySL, PCurStr); // get string
assert(PValue != NULL);
assert(*PValue > 0);
}
ENDTm(DeltaUSec, tm); // end timer
break;
}
case M_JudyHS:
{
STARTTm(tm); // start timer
for (lines = 0; lines < ReadLin; lines++)
{
GETSTRING(PCurStr, Strlen);
Strlen = Pdts[lines].dt_strlen;
PCurStr = Pdts[lines].dt_string;
JHSG(PValue, JudyHS, PCurStr, Strlen); // get string
assert(PValue != NULL);
assert(*PValue > 0);
}
ENDTm(DeltaUSec, tm); // end timer
break;
}
// NOTE: the ADT's below here are so slow, that I did not add much effort
// to clean them up. (dlb)
case M_Splay:
{
STARTTm(tm); // start timer
for (lines = 0; lines < ReadLin; lines++)
{
GETSTRING(PCurStr, Strlen);
PCurStr = Pdts[lines].dt_string;
splaysearch(&spans, (char *)PCurStr);
}
ENDTm(DeltaUSec, tm); // end timer
break;
}
case M_Redblack:
{
STARTTm(tm); // start timer
for (lines = 0; lines < ReadLin; lines++)
{
GETSTRING(PCurStr, Strlen);
PCurStr = Pdts[lines].dt_string;
redblacksearch(&rbans, (char *)PCurStr);
}
ENDTm(DeltaUSec, tm); // end timer
break;
}
case M_Ternary:
{
STARTTm(tm); // start timer
for (lines = 0; lines < ReadLin; lines++)
{
GETSTRING(PCurStr, Strlen);
Strlen = Pdts[lines].dt_strlen;
PCurStr = Pdts[lines].dt_string;
if (TernaryGet(Ternary, PCurStr, Strlen) == 0)
{
printf("\n OOps - Ternary Bug at Line = %d\n",
__LINE__);
exit(1);
}
}
ENDTm(DeltaUSec, tm); // end timer
break;
}
default:
assert(0); // cant happen
break;
}
Mult = DeltaUSec / (double)ReadLin;
// save least value
if (Mult < Pms[grp].ms_minretrive)
Pms[grp].ms_minretrive = Mult;
Printf(" %7.3f", Mult); // RETRIVE per string
Printf(" %9.6f", (double)gChainln / (double)ReadLin);
// RAM USED PER STRING TO STORE DATA
Printf(" %13.1f", (double)Pms[grp].ms_Bytes);
Printf("\n");
fflush(stdout);
}
if (Method == M_Print)
exit(0);
Printf("# Total Duplicate strings = %lu\n", nStrg - gStored);
//=======================================================================
// Delete loop
//=======================================================================
DeltaUSec = -1.0; // set deleted flag
if (rFlag == 0)
{
Randomize(Pdt, StrNumb); // Randomize ONLY those stored
}
switch (Method)
{
case M_JudySL:
{
if (DFlag)
{
Printf("# Begin JudySLDel() loop...\n");
STARTTm(tm); // start timer
for (lines = 0; lines < nStrg; lines++)
{
int Rc;
GETSTRING(PCurStr, Strlen);
PCurStr = Pdt[lines].dt_string;
JSLD(Rc, JudySL, PCurStr); // delete string
assert(Rc != JERR);
}
ENDTm(DeltaUSec, tm); // end timer
}
else
{
Printf("# Begin JudySLFreeArray()...\n");
STARTTm(tm); // start timer
JSLFA(Bytes, JudySL);
ENDTm(DeltaUSec, tm); // end timer
}
break;
}
case M_JudyHS:
{
if (DFlag)
{
int Rc;
Printf("# Begin JudyHSDel() loop...");
STARTTm(tm); // start timer
for (lines = 0; lines < nStrg; lines++)
{
GETSTRING(PCurStr, Strlen);
Strlen = Pdt[lines].dt_strlen;
PCurStr = Pdt[lines].dt_string;
JHSD(Rc, JudyHS, PCurStr, Strlen); // Delete string
assert(Rc != JERR);
}
ENDTm(DeltaUSec, tm); // end timer
}
else
{
Printf("# Begin JudyHSFreeArray()...\n");
STARTTm(tm); // start timer
JHSFA(Bytes, JudyHS);
ENDTm(DeltaUSec, tm); // end timer
}
break;
}
case M_Hash:
{
if (DFlag)
{
Printf("# Begin HashDel() loop...\n");
STARTTm(tm); // start timer
for (lines = 0; lines < nStrg; lines++)
{
GETSTRING(PCurStr, Strlen);
Strlen = Pdt[lines].dt_strlen;
PCurStr = Pdt[lines].dt_string;
HashDel(&HRoot, PCurStr, Strlen); // Delete string
}
ENDTm(DeltaUSec, tm); // end timer
}
else
{
Printf("# Begin HashFreeArray()...\n");
STARTTm(tm); // start timer
Bytes = HashFreeArray(&HRoot);
ENDTm(DeltaUSec, tm); // end timer
}
break;
}
case M_JLHash:
{
if (DFlag)
{
Printf("# Begin JLHashDel() loop...\n");
STARTTm(tm); // start timer
for (lines = 0; lines < nStrg; lines++)
{
GETSTRING(PCurStr, Strlen);
Strlen = Pdt[lines].dt_strlen;
PCurStr = Pdt[lines].dt_string;
JLHashDel(&JLHash, PCurStr, Strlen); // Delete string
}
ENDTm(DeltaUSec, tm); // end timer
}
else
{
Printf("# Begin JLHashFreeArray()...\n");
STARTTm(tm); // start timer
Bytes = JLHashFreeArray(&JLHash);
ENDTm(DeltaUSec, tm); // end timer
}
break;
}
default:
printf("# Delete not implemented yet, so quit\n");
Passes = 1; // No, delete, so quit
break;
}
// average time per line to delete (including duplicate strings)
if (Bytes) // Measured freed bytes?
{
Printf("# returned %lu bytes\n", Bytes);
}
if (TotalJudyMalloc) // Any bytes left after free?
{
printf
("# !!! BUG, %lu bytes not deleted in *Free()\n",
TotalJudyMalloc * sizeof(Word_t));
}
if (DeltaUSec != -1.0) // Measured how long to free?
{
Printf("# Free %lu strings, %0.3f uSecs Ave/string\n",
gStored, DeltaUSec / (double)gStored);
}
Printf("\n");
}
if (Passes != 1)
{
printf("# TotInserts 0 0 InsTime GetTime 0 Ram/String\n");
StrNumb = 0;
for (grp = 0; grp < Groups; grp++)
{
StrNumb += Pms[grp].ms_delta;
printf(" %11lu", StrNumb); // Total stored
printf(" 0 0"); // place holder
printf(" %7.3f", Pms[grp].ms_mininsert);
printf(" %7.3f", Pms[grp].ms_minretrive);
printf(" 0"); // place holder
printf(" %7.3f", Pms[grp].ms_Bytes);
printf("\n");
}
}
exit(0); // done
} // main()
( run in 0.382 second using v1.01-cache-2.11-cpan-9bca49b1385 )