Alien-Judy
view release on metacpan or search on metacpan
src/judy-1.0.5/test/StringCompare.c view on Meta::CPAN
//=======================================================================
int foolflag = 0; // fool compiler from optimizing
static Word_t gStored = 0; // number of strings inserted
static Word_t gChainln = 0; // links traversed during RETRIVE
static Word_t PtsPdec = 40; // default measurement points per decade
#define INFSTRGS 1000000000 // 1 billion strings is infinity
static Word_t nStrg = INFSTRGS; // infinity -- measure all strings
static Word_t TValues = 100000; // max measure points for RETRIVE tests
static int pFlag = 0; // pre-fault hash table pages into RAM
static int rFlag = 0; // do not randomize input file
static int aFlag = 0; // word align string buffers
static int DFlag = 0; // do the delete measurement
static int CFlag = 0; // build sequential Get buffers
static Word_t aCount = 0; // Count of missaligned string buffers
// define the maximum length of a string allowed
#define MAXSTRLEN (100000)
static int MLength = MAXSTRLEN;
static Word_t HTblsz; // 1M default hash table size
static int fileidx; // argv[fileidx] == file string
// for saving input string data
typedef struct STRING_
{
int dt_strlen;
uint8_t *dt_string;
} dt_t , *Pdt_t;
static Pdt_t PdtS_ = NULL; // memory for Cache access Gets
static uint8_t *Strbuf_ = NULL;
static Word_t Strsiz_ = 0;
// Roundup BYTES to an even number of words
/*
On Linux 2.6.3-4mdkenterprise (Mandrake 10.0 Community) printing (even
to a file) makes the timings inaccurate. So, use -L2 or greater to
average (actually save min times) and print results after all tests are
completed.
*/
#define Printf if (Pass == 0) printf
#define ROUNDUPWORD(BYTES) (((BYTES) + sizeof(Word_t) - 1) & (-sizeof(Word_t)))
#define BYTES2WORDS(BYTES) (((BYTES) + sizeof(Word_t) - 1) / (sizeof(Word_t)))
//=======================================================================
// T I M I N G M A C R O S
//=======================================================================
static double DeltaUSec; // Global for remembering delta times
// Some operating systems have get_cycles() in /usr/include/asm/timex.h
#ifdef CPUMHZ
// For a 1.34 nS clock cycle processor (750Mhz)
#define CPUSPEED (1.0 / (CPUMHZ))
#include <asm/timex.h>
#define TIMER_vars(T) cycles_t __TVBeg_##T
#define STARTTm(T) __TVBeg_##T = get_cycles()
#define ENDTm(D,T) { (D) = (double)(get_cycles() - __TVBeg_##T) * CPUSPEED; }
#else // ! CPUMHZ
#define TIMER_vars(T) struct timeval __TVBeg_##T, __TVEnd_##T
#define STARTTm(T) gettimeofday(&__TVBeg_##T, NULL)
#define ENDTm(D,T) \
{ \
gettimeofday(&__TVEnd_##T, NULL); \
(D) = (double)(__TVEnd_##T.tv_sec - __TVBeg_##T.tv_sec) * 1E6 + \
((double)(__TVEnd_##T.tv_usec - __TVBeg_##T.tv_usec)); \
}
#endif // ! CPUMHZ
//=======================================================================
// M E M O R Y S I Z E M A C R O S
//=======================================================================
// use mallinfo() instead of sbrk() for memory usage measurements
// this should include the RAM that was mmap()ed in malloc()
static Word_t DeltaMem; // for remembering
// Some mallocs have mallinfo()
// #define MALLINFO 1
#ifdef MALLINFO
#include <malloc.h> // mallinfo()
static struct mallinfo malStart;
#define STARTmem malStart = mallinfo()
#define ENDmem(DELTAMEM) \
{ \
struct mallinfo malEnd = mallinfo(); \
/* strange little dance from signed to unsigned to double */ \
unsigned int _un_int = malEnd.arena - malStart.arena; \
(DELTAMEM) = (double)_un_int; /* to double */ \
}
#else // NO MALLINFO
// this usually works for machines with less than 1-2Gb RAM.
// (it does NOT include memory ACQUIRED by mmap())
static char *malStart;
#define STARTmem (malStart = (char *)sbrk(0))
#define ENDmem(DELTAMEM) \
src/judy-1.0.5/test/StringCompare.c view on Meta::CPAN
if (HTblsz)
printf("# JLHash table virtual size = %lu\n", HTblsz);
else
printf("# JLHash table virtual size = 4294967296\n");
}
//=======================================================================
// Read text input file into RAM
//=======================================================================
if ((fid = fopen(argv[fileidx], "r")) == NULL)
FILERROR;
for (Strlen = LineCnt = 0; LineCnt < nStrg;)
{
Chr = fgetc(fid);
if (Chr == '\n')
{
if (Strlen) // eat zero length lines
{
if (Strlen > MLength)
Strlen = MLength;
Pdt[LineCnt].dt_string = PCurStr - Strlen;
Pdt[LineCnt].dt_strlen = Strlen;
LineCnt++;
Strlen = 0;
*PCurStr++ = '\0'; // for JudySL
if (aFlag) // for word alignment
PCurStr = (uint8_t *) ROUNDUPWORD((Word_t)PCurStr);
if ((Word_t)PCurStr % sizeof(Word_t))
aCount++;
}
}
else
{
if (Strlen < MLength)
{
Strlen++;
if (Chr == '\0')
Chr = ' '; // for JudySL
*PCurStr++ = (uint8_t) Chr;
}
}
}
fclose(fid);
fid = NULL;
assert(nStrg == LineCnt);
printf("# %lu (%.1f%%) non-Word_t aligned string buffers\n",
aCount, (double)aCount / (double)LineCnt * 100.0);
printf("# Ram used for input data = %lu bytes\n", StringMemory);
printf("# Average string length = %.1f bytes\n",
(double)(StrTot - LineCnt) / LineCnt);
// Allocate memory for Cached assess to 'Get' (largest delta). This flag
// will put the 'randomized' 'Get' order strings in a sequential buffer.
// Modern processors will 'read ahead' with an access to RAM is sequential
// -- thus saving the 'Get' having to bring the string into cache.
if (CFlag)
{
PdtS_ = (Pdt_t) malloc(TValues * sizeof(dt_t));
if (PdtS_ == NULL)
MALLOCERROR;
// now guess how much memory will be needed for the strings
Strsiz_ = ((StrTot / nStrg) * TValues);
Strsiz_ += Strsiz_; // bump %20
Strbuf_ = (uint8_t *) malloc(Strsiz_);
if (Strbuf_ == NULL)
MALLOCERROR;
printf
("# %lu bytes malloc() for 'cached' strings for Get measurement\n",
Strsiz_);
}
//=======================================================================
// TIME GETSTRING() from Cache (most of the time)
//=======================================================================
STARTTm(tm); // start timer
for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
{
GETSTRING(PCurStr, Strlen);
Strlen = Pdt[LineCnt].dt_strlen;
PCurStr = Pdt[LineCnt].dt_string;
if (strlen(PCurStr) != Strlen) // bring string into Cache
{
// necessary to prevent cc from optimizing out
printf(" !! OOps Bug, wrong string length\n");
exit(1);
}
}
ENDTm(DeltaUSec, tm); // end timer
printf
("# Access Time = %6.3f uS average per string (mostly from Cache)\n",
DeltaUSec / nStrg);
//=======================================================================
// TIME GETSTRING() + HASHSTR() from Cache (most of the time)
//=======================================================================
STARTTm(tm); // start timer
for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
{
uint32_t hval;
GETSTRING(PCurStr, Strlen);
PCurStr = Pdt[LineCnt].dt_string;
Strlen = Pdt[LineCnt].dt_strlen;
hval = HASHSTR(PCurStr, Strlen, HTblsz);
if (foolflag)
printf("OOps foolflag is set, hval = %d\n", hval);
}
ENDTm(DeltaUSec, tm); // end timer
( run in 0.769 second using v1.01-cache-2.11-cpan-7e98afdb40f )