Alien-Judy

 view release on metacpan or  search on metacpan

src/judy-1.0.5/test/StringCompare.c  view on Meta::CPAN

//=======================================================================

int       foolflag = 0;                 // fool compiler from optimizing
static Word_t gStored = 0;              // number of strings inserted
static Word_t gChainln = 0;             // links traversed during RETRIVE

static Word_t PtsPdec = 40;             // default measurement points per decade

#define INFSTRGS 1000000000             // 1 billion strings is infinity

static Word_t nStrg = INFSTRGS;         // infinity -- measure all strings
static Word_t TValues = 100000;         // max measure points for RETRIVE tests
static int pFlag = 0;                   // pre-fault hash table pages into RAM
static int rFlag = 0;                   // do not randomize input file
static int aFlag = 0;                   // word align string buffers
static int DFlag = 0;                   // do the delete measurement
static int CFlag = 0;                   // build sequential Get buffers
static Word_t aCount = 0;               // Count of missaligned string buffers

//  define the maximum length of a string allowed
#define MAXSTRLEN       (100000)
static int MLength = MAXSTRLEN;
static Word_t HTblsz;                   // 1M default hash table size
static int fileidx;                     // argv[fileidx] == file string

// for saving input string data
typedef struct STRING_
{
    int       dt_strlen;
    uint8_t  *dt_string;

} dt_t   , *Pdt_t;

static Pdt_t PdtS_ = NULL;              // memory for Cache access Gets
static uint8_t *Strbuf_ = NULL;
static Word_t Strsiz_ = 0;

// Roundup BYTES to an even number of words

/*
 On Linux 2.6.3-4mdkenterprise (Mandrake 10.0 Community) printing (even
 to a file) makes the timings inaccurate.  So, use -L2 or greater to
 average (actually save min times) and print results after all tests are
 completed.
*/
#define Printf if (Pass == 0) printf

#define ROUNDUPWORD(BYTES) (((BYTES) + sizeof(Word_t) - 1) & (-sizeof(Word_t)))
#define BYTES2WORDS(BYTES) (((BYTES) + sizeof(Word_t) - 1) / (sizeof(Word_t)))

//=======================================================================
//      T I M I N G   M A C R O S
//=======================================================================

static double DeltaUSec;                // Global for remembering delta times

// Some operating systems have get_cycles() in /usr/include/asm/timex.h

#ifdef  CPUMHZ

//  For a 1.34 nS clock cycle processor (750Mhz)

#define CPUSPEED      (1.0 / (CPUMHZ))

#include <asm/timex.h>

#define TIMER_vars(T) cycles_t  __TVBeg_##T

#define STARTTm(T) __TVBeg_##T = get_cycles()

#define ENDTm(D,T) { (D) = (double)(get_cycles() - __TVBeg_##T) * CPUSPEED; }

#else  // ! CPUMHZ

#define TIMER_vars(T) struct timeval __TVBeg_##T, __TVEnd_##T

#define STARTTm(T) gettimeofday(&__TVBeg_##T, NULL)

#define ENDTm(D,T)                                                      \
{                                                                       \
    gettimeofday(&__TVEnd_##T, NULL);                                   \
    (D) = (double)(__TVEnd_##T.tv_sec  - __TVBeg_##T.tv_sec) * 1E6 +    \
         ((double)(__TVEnd_##T.tv_usec - __TVBeg_##T.tv_usec));         \
}

#endif // ! CPUMHZ

//=======================================================================
//      M E M O R Y   S I Z E   M A C R O S
//=======================================================================

// use mallinfo() instead of sbrk() for memory usage measurements
// this should include the RAM that was mmap()ed in malloc()

static Word_t DeltaMem;                 // for remembering

// Some mallocs have mallinfo()
// #define MALLINFO 1

#ifdef MALLINFO
#include <malloc.h>                     // mallinfo()

static struct mallinfo malStart;

#define STARTmem malStart = mallinfo()
#define ENDmem(DELTAMEM)                                        \
{                                                               \
    struct mallinfo malEnd = mallinfo();                        \
/* strange little dance from signed to unsigned to double */    \
    unsigned int _un_int = malEnd.arena - malStart.arena;       \
    (DELTAMEM) = (double)_un_int;      /* to double */          \
}
#else  // NO MALLINFO

// this usually works for machines with less than 1-2Gb RAM.
// (it does NOT include memory ACQUIRED by mmap())

static char *malStart;

#define STARTmem (malStart = (char *)sbrk(0))
#define ENDmem(DELTAMEM)                                        \

src/judy-1.0.5/test/StringCompare.c  view on Meta::CPAN

        if (HTblsz)
            printf("# JLHash table virtual size = %lu\n", HTblsz);
        else
            printf("# JLHash table virtual size = 4294967296\n");
    }

//=======================================================================
// Read text input file into RAM
//=======================================================================

    if ((fid = fopen(argv[fileidx], "r")) == NULL)
        FILERROR;

    for (Strlen = LineCnt = 0; LineCnt < nStrg;)
    {
        Chr = fgetc(fid);
        if (Chr == '\n')
        {
            if (Strlen)                 // eat zero length lines
            {
                if (Strlen > MLength)
                    Strlen = MLength;
                Pdt[LineCnt].dt_string = PCurStr - Strlen;
                Pdt[LineCnt].dt_strlen = Strlen;
                LineCnt++;

                Strlen = 0;
                *PCurStr++ = '\0';      // for JudySL
                if (aFlag)              // for word alignment
                    PCurStr = (uint8_t *) ROUNDUPWORD((Word_t)PCurStr);

                if ((Word_t)PCurStr % sizeof(Word_t))
                    aCount++;
            }
        }
        else
        {
            if (Strlen < MLength)
            {
                Strlen++;
                if (Chr == '\0')
                    Chr = ' ';          // for JudySL
                *PCurStr++ = (uint8_t) Chr;
            }
        }
    }
    fclose(fid);
    fid = NULL;
    assert(nStrg == LineCnt);

    printf("# %lu (%.1f%%) non-Word_t aligned string buffers\n",
           aCount, (double)aCount / (double)LineCnt * 100.0);

    printf("# Ram used for input data = %lu bytes\n", StringMemory);

    printf("# Average string length = %.1f bytes\n",
           (double)(StrTot - LineCnt) / LineCnt);

// Allocate memory for Cached assess to 'Get' (largest delta). This flag
// will put the 'randomized' 'Get' order strings in a sequential buffer.
// Modern processors will 'read ahead' with an access to RAM is sequential
// -- thus saving the 'Get' having to bring the string into cache.
    if (CFlag)
    {
        PdtS_ = (Pdt_t) malloc(TValues * sizeof(dt_t));
        if (PdtS_ == NULL)
            MALLOCERROR;

//      now guess how much memory will be needed for the strings
        Strsiz_ = ((StrTot / nStrg) * TValues);
        Strsiz_ += Strsiz_;             // bump %20

        Strbuf_ = (uint8_t *) malloc(Strsiz_);
        if (Strbuf_ == NULL)
            MALLOCERROR;

        printf
            ("# %lu bytes malloc() for 'cached' strings for Get measurement\n",
             Strsiz_);
    }

//=======================================================================
//  TIME GETSTRING() from Cache (most of the time)
//=======================================================================

    STARTTm(tm);                        // start timer
    for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
    {
        GETSTRING(PCurStr, Strlen);
        Strlen = Pdt[LineCnt].dt_strlen;
        PCurStr = Pdt[LineCnt].dt_string;

        if (strlen(PCurStr) != Strlen)  // bring string into Cache
        {
//          necessary to prevent cc from optimizing out
            printf(" !! OOps Bug, wrong string length\n");
            exit(1);
        }
    }
    ENDTm(DeltaUSec, tm);               // end timer

    printf
        ("# Access Time    = %6.3f uS average per string (mostly from Cache)\n",
         DeltaUSec / nStrg);

//=======================================================================
//  TIME GETSTRING() + HASHSTR() from Cache (most of the time)
//=======================================================================

    STARTTm(tm);                        // start timer
    for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
    {
        uint32_t  hval;
        GETSTRING(PCurStr, Strlen);
        PCurStr = Pdt[LineCnt].dt_string;
        Strlen = Pdt[LineCnt].dt_strlen;
        hval = HASHSTR(PCurStr, Strlen, HTblsz);
        if (foolflag)
            printf("OOps foolflag is set, hval = %d\n", hval);
    }
    ENDTm(DeltaUSec, tm);               // end timer



( run in 0.769 second using v1.01-cache-2.11-cpan-7e98afdb40f )