Alien-Judy
view release on metacpan or search on metacpan
src/judy-1.0.5/test/SLcompare.c view on Meta::CPAN
performance of JudySL with short (a text word) strings. I have a data
base of all (28.8 million) domain names on the internet as a test case.
wc /data/domnames.txt
28826508 28826508 488227095 /data/domnames.txt
For the curious, JudySL (I.E JSLI/G macros in this program) is simply an
application subroutine that uses JudyL to the work. JudyL is a very
scaleable word to word (or pointer) mapper. JudySL is implemented as a
digital tree using JudyL arrays as 2^32 (or 2^64 in 64 bit machines)
wide (virtual) nodes. Each level in the digital tree decodes the next 4
(or 8) bytes of the line/string until only one entry would be in the
node. Then the remaining undecoded portion of the line is stored as a
string from the parent node. A digital tree does not need rotation or
balancing. In practice, digital trees are rarely use because of their
poor memory efficiency in the nodes and a tendency to have large depths.
These problems are solved in the Judy algorithm. For more details see
application notes on: www.sourcejudy.com. And for the really curious,
a technical paper is available in the application section. If you would
like to be a reviewer, contact Doug Baskins at <doug@sourcejudy.com>
*/
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h> // printf
#include <fcntl.h>
#include <string.h>
#include <errno.h> // errno
#include <sys/mman.h> // mmap()
#include <sys/stat.h> // stat()
#include <sys/time.h> // gettimeofday()
// remove all uses of this in production code.
int gDupCnt = 0; // counter for duplicate strings
//=======================================================================
// T I M I N G M A C R O S
//=======================================================================
// if your machine is one of the supported types in the following header
// file then uncomment this corresponding to what the header file says.
// This will give very high timing resolution.
//
// #define JU_xxxxxxx 1 // read timeit.h
// #define JU_LINUX_IA32 1 // I.E. IA32 Linux
//
//
// optional for high resolution times and compile with timeit.c
// cc -static -O -o Judy -DJUDYMETHOD SLcompare.c timeit.c -lJudy
//
//#include "timeit.h"
double DeltaUSec; // Global for remembering delta times
#ifndef _TIMEIT_H
// Note: I have found some Linux systems (2.4.18-6mdk) to have bugs in the
// gettimeofday() routine. Sometimes it returns a negative ~2840 microseconds
// instead of 0 or 1. If you use the above #include "timeit.h" and compile with
// timeit.c and use -DJU_LINUX_IA32, that problem will be eliminated. This is
// because for delta times less than .1 sec, the hardware free running timer
// is used instead of gettimeofday(). I have found the negative time problem
// appears about 40-50 times per second with consecutive gettimeofday() calls.
#define TIMER_vars(T) struct timeval __TVBeg_##T, __TVEnd_##T
#define STARTTm(T) gettimeofday(&__TVBeg_##T, NULL)
#define ENDTm(D,T) \
{ \
gettimeofday(&__TVEnd_##T, NULL); \
(D) = (double)(__TVEnd_##T.tv_sec - __TVBeg_##T.tv_sec) * 1E6 + \
((double)(__TVEnd_##T.tv_usec - __TVBeg_##T.tv_usec)); \
}
#endif // _TIMEIT_H
//=======================================================================
// M E M O R Y S I Z E M A C R O S
//=======================================================================
// Most mallocs have mallinfo()
// define this if your malloc does not have mallinfo();
#define NOMALLINFO 1
double DeltaMem; // for remembering
#ifndef NOMALLINFO
#include <malloc.h> // mallinfo()
struct mallinfo malStart;
#define STARTmem malStart = mallinfo() /* works with some mallocs */
#define ENDmem \
{ \
struct mallinfo malEnd = mallinfo(); \
/* strange little dance from signed to unsigned to double */ \
unsigned int _un_int = malEnd.arena - malStart.arena; \
DeltaMem = _un_int; /* to double */ \
}
#else // MALLINFO
// this usually works for machines with less than 1-2Gb RAM.
// (it does not include memory ACQUIRED by mmap())
char *malStart;
#define STARTmem (malStart = (char *)sbrk(0))
#define ENDmem \
{ \
char *malEnd = (char *)sbrk(0); \
DeltaMem = malEnd - malStart; \
}
#endif // MALLINFO
//=======================================================================
// G E T S T R I N G F R O M mmap() F I L E M A C R O
src/judy-1.0.5/test/SLcompare.c view on Meta::CPAN
}
if (curr->par == NULL)
ans->root = curr;
ans->root->colour = BLACK;
}
else
gDupCnt++; /* count duplicate strings */
return;
}
/* Create a node to hold a word */
TREEREC *
wcreate(char *word, TREEREC * par)
{
TREEREC *tmp;
tmp = (TREEREC *) malloc(sizeof(TREEREC));
tmp->word = (char *)malloc(strlen(word) + 1);
strcpy(tmp->word, word);
tmp->left = tmp->right = NULL;
tmp->par = par;
return (tmp);
}
#define INITARRAY ANSREC ans = {0}
#define STORESTRING(STRING) redblackinsert(&ans, STRING)
#define GETSTRING(STRING) redblacksearch(&ans, STRING)
#endif // REDBLACKMETHOD
//=======================================================================
// M A I N P R O G R A M -by- Doug Baskins
//=======================================================================
// error routine for system routines for accessing file
#define FILERROR \
{ \
printf("%s: Cannot open file \"%s\": %s " \
"(errno = %d)\n", argv[0], argv[1], strerror(errno), \
errno); \
fprintf(stderr, "%s: Cannot open file \"%s\": %s " \
"(errno = %d)\n", argv[0], argv[1], strerror(errno), \
errno); \
exit(1); \
}
//=======================================================================
// M E A S U R E A D T S P E E D and M E M O R Y U S A G E
//=======================================================================
int
main(int argc, char *argv[])
{
TIMER_vars(tm); // declare timer variables
int fd; // to read file.
struct stat statbuf; // to get size of file
char *Pfile; // ram address of file
size_t fsize; // file size in bytes
char *FSmap; // start address of mapped file
char *FEmap; // end address+1 of mapped file
char String[MAXLINE]; // input buffer
int StrCnt; // line counter
int ii; // temp
INITARRAY;
// CHECK FOR REQUIRED INPUT FILE PARAMETER:
if (argc != 2)
{
printf("Usage: %s <text file>\n", argv[0]);
exit(1);
}
// GET FILE SIZE
if (stat(argv[1], &statbuf) == -1)
FILERROR;
fsize = statbuf.st_size;
// OPEN INPUT FILE:
if ((fd = open(argv[1], O_RDONLY)) == -1)
FILERROR;
// MEMORY MAP FILE
Pfile =
(char *)mmap(NULL, fsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
if (Pfile == (char *)-1)
FILERROR;
FEmap = Pfile + fsize; // set end+1 address
// print command name + run arguments
printf("# ");
for (ii = 0; ii < argc; ii++)
printf("%s ", argv[ii]);
printf("\n");
fflush(stdout);
// file is in RAM, read again to measure time
STARTTm(tm); // start timer
for (StrCnt = 0, FSmap = Pfile; FSmap < FEmap; )
{
GETLINE(String, FSmap); // 'read' next string
StrCnt++;
}
ENDTm(DeltaUSec, tm); // end timer
//
// print header - 6 fields
printf("# lines avg_linelen getline stored");
printf(" RAMused/line store/ln lookup/ln ADT\n");
// print numb lines filesize/line
printf("%8u", StrCnt); // Number of lines
printf(" %6.1f byts", (double)fsize / (double)StrCnt); // filesize/line
fflush(stdout);
// print time to read a line
printf(" %5.3f uS", DeltaUSec / (double)StrCnt);
fflush(stdout);
// INPUT/STORE LOOP:
// read each input line into String and store into ADT
STARTmem; // current malloc() mem usage
STARTTm(tm); // start timer
for (FSmap = Pfile; FSmap < FEmap; )
{
GETLINE(String, FSmap); // 'read' next string
STORESTRING(String);
}
ENDTm(DeltaUSec, tm); // end timer
ENDmem; // current malloc() mem usage
// print number of non-duplicate lines
printf(" %8u", StrCnt - gDupCnt); // duplicate lines
// print RAM used by malloc() by ADT to store data
printf(" %7.1f byts", (double)(DeltaMem + MEMOVD) /
(double)(StrCnt - gDupCnt));
// print time per line to store the data
printf(" %6.3f uS", DeltaUSec / (double)StrCnt);
fflush(stdout);
// READ BACK LOOP
STARTTm(tm); // start timer
for (FSmap = Pfile; FSmap < FEmap; )
{
GETLINE(String, FSmap); // 'read' next string
GETSTRING(String);
}
ENDTm(DeltaUSec, tm); // end timer
// print time per line to lookup the data from the ADT
printf(" %6.3f uS", DeltaUSec / (double)StrCnt); // store time/line
printf(" %s\n", ADTMETHOD);
return (0); // done
} // main()
( run in 0.586 second using v1.01-cache-2.11-cpan-39bf76dae61 )