Alien-Judy

 view release on metacpan or  search on metacpan

src/judy-1.0.5/test/StringCompare.c  view on Meta::CPAN

        if ((d = *s - pp->splitchar) == 0)
        {
            if (*s++ == 0)
            {
                printf("Oops duplicate Ternary string %s\n", instr);
                return;
            }
            p = &(pp->eqkid);
        }
        else if (d < 0)
            p = &(pp->lokid);
        else
            p = &(pp->hikid);
    }
    for (;;)
    {
        *p = (Tptr) malloc(sizeof(Tnode));
        pp = *p;
        pp->splitchar = *s;
        pp->lokid = pp->eqkid = pp->hikid = 0;
        if (*s++ == 0)
        {
            pp->eqkid = (Tptr) instr;
            gStored++;                  // number of strings stored
            return;
        }
        p = &(pp->eqkid);
    }
}

int
TernaryGet(Tptr p, uint8_t * s, int Len)
{
    while (p)
    {
        if (*s < p->splitchar)
            p = p->lokid;
        else if (*s == p->splitchar)
        {
            if (*s++ == 0)
                return 1;
            p = p->eqkid;
        }
        else
            p = p->hikid;
    }
    return 0;
}

//=======================================================================
//      M E A S U R E  A D T   S P E E D  and  M E M O R Y  U S A G E
//=======================================================================

//Word_t    TotalJudyMalloc;

#define         GETSTRING(PCurStr, Strlen)

int
main(int argc, char *argv[])
{
    TIMER_vars(tm);                     // declare timer variables
    FILE     *fid;                      // to read file.
    int       Chr;                      // char read from fgetc
    Pdt_t     Pdt, Pdts;                // array of lengths and pointers to str
    uint8_t  *PCurStr;                  // Current string pointer
    Word_t    LineCnt;                  // line counter
    int       Strlen;                   // = strlen();
    Word_t    StrTot;                   // Total len of strings
    Word_t    StrNumb;                  // current line number
    Word_t    ReadLin;                  // strings to read
    double    Mult;                     // multiplier between groups
    Word_t    Groups;                   // number of measurement groups
    Word_t    grp;                      // current group
    Pvoid_t   JudySL = NULL;            // JudySL root pointer
    Pvoid_t   JudyHS = NULL;            // JudyHS root pointer
    Pvoid_t   JLHash = NULL;            // JLHash root pointer
    Phinfo_t  HRoot = NULL;             // hash table root pointer
    Tptr      Ternary = { 0 };          // Ternary struct root pointer

    Method_t  Method = M_invalid;       // the method to measure
    Word_t    lines = 0;                // to shut up compiler
    Word_t    Bytes = 0;                // Bytes deallocated from FreeArray
    Word_t    StringMemory;             // Bytes allocated for input data
    int       Pass;
    int       Passes = 1;

    int       Opt;
    extern char *optarg;
    int       ErrorFlag = 0;

//  un-buffer output
    setbuf(stdout, NULL);

//============================================================
// PARSE INPUT PARAMETERS
//============================================================

    while ((Opt = getopt(argc, argv, "A:H:L:n:T:P:M:praDC")) != -1)
    {
        switch (Opt)
        {
        case 'A':
            if (Method != M_invalid)
            {
                printf("\nOnly ONE '-A<ADT>' is allowed!!!!!!\n");
                ErrorFlag++;
                break;
            }
            if (strcmp(optarg, "Print") == 0)
                Method = M_Print;
            if (strcmp(optarg, "Hash") == 0)
            {
                Method = M_Hash;
                HTblsz = 1LU << 20;     // default 1.0+ million
            }
            if (strcmp(optarg, "JLHash") == 0)
            {
                Method = M_JLHash;
                HTblsz = 0;             // max 2^32
            }
            if (strcmp(optarg, "JudySL") == 0)

src/judy-1.0.5/test/StringCompare.c  view on Meta::CPAN


                Strlen = 0;
                *PCurStr++ = '\0';      // for JudySL
                if (aFlag)              // for word alignment
                    PCurStr = (uint8_t *) ROUNDUPWORD((Word_t)PCurStr);

                if ((Word_t)PCurStr % sizeof(Word_t))
                    aCount++;
            }
        }
        else
        {
            if (Strlen < MLength)
            {
                Strlen++;
                if (Chr == '\0')
                    Chr = ' ';          // for JudySL
                *PCurStr++ = (uint8_t) Chr;
            }
        }
    }
    fclose(fid);
    fid = NULL;
    assert(nStrg == LineCnt);

    printf("# %lu (%.1f%%) non-Word_t aligned string buffers\n",
           aCount, (double)aCount / (double)LineCnt * 100.0);

    printf("# Ram used for input data = %lu bytes\n", StringMemory);

    printf("# Average string length = %.1f bytes\n",
           (double)(StrTot - LineCnt) / LineCnt);

// Allocate memory for Cached assess to 'Get' (largest delta). This flag
// will put the 'randomized' 'Get' order strings in a sequential buffer.
// Modern processors will 'read ahead' with an access to RAM is sequential
// -- thus saving the 'Get' having to bring the string into cache.
    if (CFlag)
    {
        PdtS_ = (Pdt_t) malloc(TValues * sizeof(dt_t));
        if (PdtS_ == NULL)
            MALLOCERROR;

//      now guess how much memory will be needed for the strings
        Strsiz_ = ((StrTot / nStrg) * TValues);
        Strsiz_ += Strsiz_;             // bump %20

        Strbuf_ = (uint8_t *) malloc(Strsiz_);
        if (Strbuf_ == NULL)
            MALLOCERROR;

        printf
            ("# %lu bytes malloc() for 'cached' strings for Get measurement\n",
             Strsiz_);
    }

//=======================================================================
//  TIME GETSTRING() from Cache (most of the time)
//=======================================================================

    STARTTm(tm);                        // start timer
    for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
    {
        GETSTRING(PCurStr, Strlen);
        Strlen = Pdt[LineCnt].dt_strlen;
        PCurStr = Pdt[LineCnt].dt_string;

        if (strlen(PCurStr) != Strlen)  // bring string into Cache
        {
//          necessary to prevent cc from optimizing out
            printf(" !! OOps Bug, wrong string length\n");
            exit(1);
        }
    }
    ENDTm(DeltaUSec, tm);               // end timer

    printf
        ("# Access Time    = %6.3f uS average per string (mostly from Cache)\n",
         DeltaUSec / nStrg);

//=======================================================================
//  TIME GETSTRING() + HASHSTR() from Cache (most of the time)
//=======================================================================

    STARTTm(tm);                        // start timer
    for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
    {
        uint32_t  hval;
        GETSTRING(PCurStr, Strlen);
        PCurStr = Pdt[LineCnt].dt_string;
        Strlen = Pdt[LineCnt].dt_strlen;
        hval = HASHSTR(PCurStr, Strlen, HTblsz);
        if (foolflag)
            printf("OOps foolflag is set, hval = %d\n", hval);
    }
    ENDTm(DeltaUSec, tm);               // end timer

    printf
        ("# HashStr() Time = %6.3f uS average per string (mostly from Cache)\n",
         DeltaUSec / nStrg);

//  randomize the input strings (adjacent strings will not be on same page)

    if (rFlag == 0)
    {
        Randomize(Pdt, nStrg);          // Randomize ALL to be stored

//=======================================================================
//  TIME GETSTRING() from RAM (most of the time)
//=======================================================================

        STARTTm(tm);                    // start timer
        for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
        {
            GETSTRING(PCurStr, Strlen);
            Strlen = Pdt[LineCnt].dt_strlen;
            PCurStr = Pdt[LineCnt].dt_string;

            if (strlen(PCurStr) != Strlen)      // bring string into Cache
            {
//              necessary to prevent cc from optimizing out
                printf(" !! OOps Bug, wrong string length\n");
                exit(1);
            }
        }
        ENDTm(DeltaUSec, tm);           // end timer

        printf
            ("# Access Time    = %6.3f uS average per string (mostly from RAM)\n",
             DeltaUSec / nStrg);

//=======================================================================
//  TIME GETSTRING() + HASHSTR() from RAM (most of the time)
//=======================================================================

        STARTTm(tm);                    // start timer
        for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
        {
            uint32_t  hval;
            GETSTRING(PCurStr, Strlen);
            Strlen = Pdt[LineCnt].dt_strlen;
            PCurStr = Pdt[LineCnt].dt_string;
            hval = HASHSTR(PCurStr, Strlen, HTblsz);
            if (foolflag)
                printf("OOps foolflag is set, hval = %u\n", hval);
        }
        ENDTm(DeltaUSec, tm);           // end timer

        printf
            ("# HashStr() Time = %6.3f uS average per string (mostly from RAM)\n",
             DeltaUSec / nStrg);
    }

//=======================================================================
//  Insert, Get and Delete loops
//=======================================================================

    for (Pass = 0; Pass < Passes; Pass++)
    {
        printf("# Pass %d\n", Pass);

//      heading of table 
        Printf
            ("# TotInserts  DeltaGets  DupStrs InsTime GetTime HChainLen Ram/String\n");
        gStored = 0;                    // number of strings inserted
        StrNumb = 0;                    // number of attempted strings inserted

        STARTmem;                       // current malloc() mem usage
        for (grp = 0; grp < Groups; grp++)
        {
            PWord_t   PValue;
            Word_t    Begin = gStored;  // remember current STOREed
            Word_t    Delta = Pms[grp].ms_delta;

            switch (Method)
            {
            case M_Print:
            {
                STARTTm(tm);            // start timer
                for (lines = 0; lines < Delta; lines++, StrNumb++)
                {
                    GETSTRING(PCurStr, Strlen);
                    PCurStr = Pdt[StrNumb].dt_string;
                    Printf("%s\n", (char *)PCurStr);
                }
                ENDTm(DeltaUSec, tm);   // end timer
                break;
            }
            case M_Hash:
            {
                STARTTm(tm);            // start timer
                for (lines = 0; lines < Delta; lines++, StrNumb++)
                {
                    GETSTRING(PCurStr, Strlen);
                    Strlen = Pdt[StrNumb].dt_strlen;
                    PCurStr = Pdt[StrNumb].dt_string;

                    PValue = HashIns(&HRoot, PCurStr, Strlen, HTblsz);
                    if ((*PValue)++ == 0)
                        gStored++;      // number of strings stored
                }
                ENDTm(DeltaUSec, tm);   // end timer
                break;
            }
            case M_JLHash:
            {
                STARTTm(tm);            // start timer
                for (lines = 0; lines < Delta; lines++, StrNumb++)
                {
                    GETSTRING(PCurStr, Strlen);
                    Strlen = Pdt[StrNumb].dt_strlen;
                    PCurStr = Pdt[StrNumb].dt_string;
                    PValue = JLHashIns(&JLHash, PCurStr, Strlen, HTblsz);
                    if ((*PValue)++ == 0)
                        gStored++;      // number of strings stored
                }
                ENDTm(DeltaUSec, tm);   // end timer
                break;
            }
            case M_JudySL:
            {
                STARTTm(tm);            // start timer
                for (lines = 0; lines < Delta; lines++, StrNumb++)
                {
                    GETSTRING(PCurStr, Strlen);
                    PCurStr = Pdt[StrNumb].dt_string;

                    JSLI(PValue, JudySL, PCurStr);      // insert string
                    if ((*PValue)++ == 0)
                        gStored++;      // number of strings stored
                }
                ENDTm(DeltaUSec, tm);   // end timer
                break;
            }
            case M_JudyHS:
            {
                STARTTm(tm);            // start timer
                for (lines = 0; lines < Delta; lines++, StrNumb++)
                {
                    GETSTRING(PCurStr, Strlen);
                    Strlen = Pdt[StrNumb].dt_strlen;
                    PCurStr = Pdt[StrNumb].dt_string;

                    JHSI(PValue, JudyHS, PCurStr, Strlen);      // insert string
                    if ((*PValue)++ == 0)
                        gStored++;      // number of strings stored
                }
                ENDTm(DeltaUSec, tm);   // end timer
                break;
            }

// NOTE:  the ADT's below here are so slow, that I did not add much effort 
// to clean them up. (dlb)

            case M_Splay:
            {
                STARTTm(tm);            // start timer
                for (lines = 0; lines < Delta; lines++, StrNumb++)
                {
                    GETSTRING(PCurStr, Strlen);
                    PCurStr = Pdt[StrNumb].dt_string;

                    splayinsert(&spans, (char *)PCurStr);
                }
                ENDTm(DeltaUSec, tm);   // end timer
                break;
            }
            case M_Redblack:
            {
                STARTTm(tm);            // start timer
                for (lines = 0; lines < Delta; lines++, StrNumb++)
                {
                    GETSTRING(PCurStr, Strlen);
                    PCurStr = Pdt[StrNumb].dt_string;

                    redblackinsert(&rbans, (char *)PCurStr);
                }
                ENDTm(DeltaUSec, tm);   // end timer
                break;
            }
            case M_Ternary:
            {
                STARTTm(tm);            // start timer
                for (lines = 0; lines < Delta; lines++, StrNumb++)
                {
                    GETSTRING(PCurStr, Strlen);
                    Strlen = Pdt[StrNumb].dt_strlen;
                    PCurStr = Pdt[StrNumb].dt_string;

                    TernaryIns(&Ternary, PCurStr, Strlen);
                }
                ENDTm(DeltaUSec, tm);   // end timer
                break;
            }
            default:
                assert(0);              // cant happen
                break;
            }
            ENDmem(DeltaMem);           // current malloc() mem usage

            ReadLin = StrNumb;          // adjust delta
            if (ReadLin > TValues)
                ReadLin = TValues;
//            if (Delta > TValues)
//                ReadLin = Delta;        // use the Delta

            Printf(" %11lu", StrNumb);  // Total stored
            Printf(" %10lu", ReadLin);  // Number to read back
            Begin = gStored - Begin;    // actual STORED
            assert(lines == Delta);
            Printf(" %8lu", Delta - Begin);     // Duplicate strings

//          Average time per line to store (including duplicate strings)
            Mult = DeltaUSec / (double)Delta;

            if (Mult < Pms[grp].ms_mininsert)
                Pms[grp].ms_mininsert = Mult;

            Printf(" %7.3f", Mult);

//          Bytes allocated thru malloc()
            if (TotalJudyMalloc == 0)
                Pms[grp].ms_Bytes = (double)DeltaMem;
            else
                Pms[grp].ms_Bytes = (double)(TotalJudyMalloc * sizeof(Word_t));

            Pms[grp].ms_Bytes /= (double)gStored;

            fflush(stdout);

//=======================================================================
//  READ BACK LOOP
//=======================================================================

            Pdts = Pdt;                 // Strings to 'Get'
            gChainln = 0;               // total chain lengths

            if (rFlag == 0)
            {
                Randomize(Pdt, StrNumb);        // Randomize ONLY those stored

                if (CFlag)
                {
//                  Allocate and make sequencial string buffer
                    Pdts = BuildSeqBuf(Pdt, ReadLin);
                }
            }
            switch (Method)
            {
            case M_Print:
                break;
            case M_Hash:
            {
                STARTTm(tm);            // start timer
                for (lines = 0; lines < ReadLin; lines++)
                {
                    GETSTRING(PCurStr, Strlen);
                    Strlen = Pdts[lines].dt_strlen;
                    PCurStr = Pdts[lines].dt_string;

                    PValue = HashGet(HRoot, PCurStr, Strlen);   // get string
                    assert(PValue != NULL);
                    assert(*PValue > 0);
                }
                ENDTm(DeltaUSec, tm);   // end timer
                break;
            }
            case M_JLHash:
            {
                STARTTm(tm);            // start timer
                for (lines = 0; lines < ReadLin; lines++)
                {
                    GETSTRING(PCurStr, Strlen);
                    Strlen = Pdts[lines].dt_strlen;
                    PCurStr = Pdts[lines].dt_string;

                    PValue = JLHashGet(JLHash, PCurStr, Strlen);        // get string
                    assert(PValue != NULL);
                    assert(*PValue > 0);
                }
                ENDTm(DeltaUSec, tm);   // end timer
                break;
            }
            case M_JudySL:
            {
                STARTTm(tm);            // start timer
                for (lines = 0; lines < ReadLin; lines++)
                {
                    GETSTRING(PCurStr, Strlen);
                    PCurStr = Pdts[lines].dt_string;

                    JSLG(PValue, JudySL, PCurStr);      // get string
                    assert(PValue != NULL);
                    assert(*PValue > 0);
                }
                ENDTm(DeltaUSec, tm);   // end timer
                break;
            }
            case M_JudyHS:
            {
                STARTTm(tm);            // start timer
                for (lines = 0; lines < ReadLin; lines++)
                {
                    GETSTRING(PCurStr, Strlen);
                    Strlen = Pdts[lines].dt_strlen;
                    PCurStr = Pdts[lines].dt_string;

                    JHSG(PValue, JudyHS, PCurStr, Strlen);      // get string
                    assert(PValue != NULL);
                    assert(*PValue > 0);
                }
                ENDTm(DeltaUSec, tm);   // end timer
                break;
            }

// NOTE:  the ADT's below here are so slow, that I did not add much effort 
// to clean them up. (dlb)

            case M_Splay:
            {
                STARTTm(tm);            // start timer
                for (lines = 0; lines < ReadLin; lines++)
                {
                    GETSTRING(PCurStr, Strlen);
                    PCurStr = Pdts[lines].dt_string;

                    splaysearch(&spans, (char *)PCurStr);
                }
                ENDTm(DeltaUSec, tm);   // end timer
                break;
            }
            case M_Redblack:
            {
                STARTTm(tm);            // start timer
                for (lines = 0; lines < ReadLin; lines++)
                {
                    GETSTRING(PCurStr, Strlen);
                    PCurStr = Pdts[lines].dt_string;

                    redblacksearch(&rbans, (char *)PCurStr);
                }
                ENDTm(DeltaUSec, tm);   // end timer
                break;
            }
            case M_Ternary:
            {
                STARTTm(tm);            // start timer
                for (lines = 0; lines < ReadLin; lines++)
                {
                    GETSTRING(PCurStr, Strlen);
                    Strlen = Pdts[lines].dt_strlen;
                    PCurStr = Pdts[lines].dt_string;

                    if (TernaryGet(Ternary, PCurStr, Strlen) == 0)
                    {
                        printf("\n OOps - Ternary Bug at Line = %d\n",
                               __LINE__);
                        exit(1);
                    }
                }
                ENDTm(DeltaUSec, tm);   // end timer
                break;
            }
            default:
                assert(0);              // cant happen
                break;
            }
            Mult = DeltaUSec / (double)ReadLin;

//          save least value
            if (Mult < Pms[grp].ms_minretrive)
                Pms[grp].ms_minretrive = Mult;

            Printf(" %7.3f", Mult);     // RETRIVE per string
            Printf(" %9.6f", (double)gChainln / (double)ReadLin);

//      RAM USED PER STRING TO STORE DATA

            Printf(" %13.1f", (double)Pms[grp].ms_Bytes);
            Printf("\n");
            fflush(stdout);
        }
        if (Method == M_Print)
            exit(0);

        Printf("# Total Duplicate strings = %lu\n", nStrg - gStored);

//=======================================================================
//  Delete loop
//=======================================================================

        DeltaUSec = -1.0;               // set deleted flag

        if (rFlag == 0)
        {
            Randomize(Pdt, StrNumb);    // Randomize ONLY those stored
        }
        switch (Method)
        {
        case M_JudySL:
        {
            if (DFlag)
            {
                Printf("# Begin JudySLDel() loop...\n");
                STARTTm(tm);            // start timer
                for (lines = 0; lines < nStrg; lines++)
                {
                    int       Rc;
                    GETSTRING(PCurStr, Strlen);
                    PCurStr = Pdt[lines].dt_string;
                    JSLD(Rc, JudySL, PCurStr);  // delete string
                    assert(Rc != JERR);
                }
                ENDTm(DeltaUSec, tm);   // end timer
            }
            else
            {
                Printf("# Begin JudySLFreeArray()...\n");
                STARTTm(tm);            // start timer
                JSLFA(Bytes, JudySL);
                ENDTm(DeltaUSec, tm);   // end timer
            }
            break;
        }
        case M_JudyHS:
        {
            if (DFlag)
            {
                int       Rc;
                Printf("# Begin JudyHSDel() loop...");
                STARTTm(tm);            // start timer
                for (lines = 0; lines < nStrg; lines++)
                {
                    GETSTRING(PCurStr, Strlen);
                    Strlen = Pdt[lines].dt_strlen;
                    PCurStr = Pdt[lines].dt_string;
                    JHSD(Rc, JudyHS, PCurStr, Strlen);  // Delete string
                    assert(Rc != JERR);
                }
                ENDTm(DeltaUSec, tm);   // end timer
            }
            else
            {
                Printf("# Begin JudyHSFreeArray()...\n");
                STARTTm(tm);            // start timer
                JHSFA(Bytes, JudyHS);
                ENDTm(DeltaUSec, tm);   // end timer
            }
            break;
        }
        case M_Hash:
        {
            if (DFlag)
            {
                Printf("# Begin HashDel() loop...\n");
                STARTTm(tm);            // start timer
                for (lines = 0; lines < nStrg; lines++)
                {
                    GETSTRING(PCurStr, Strlen);
                    Strlen = Pdt[lines].dt_strlen;
                    PCurStr = Pdt[lines].dt_string;
                    HashDel(&HRoot, PCurStr, Strlen);   // Delete string
                }
                ENDTm(DeltaUSec, tm);   // end timer
            }
            else
            {
                Printf("# Begin HashFreeArray()...\n");
                STARTTm(tm);            // start timer
                Bytes = HashFreeArray(&HRoot);
                ENDTm(DeltaUSec, tm);   // end timer
            }
            break;
        }
        case M_JLHash:
        {
            if (DFlag)
            {
                Printf("# Begin JLHashDel() loop...\n");
                STARTTm(tm);            // start timer
                for (lines = 0; lines < nStrg; lines++)
                {
                    GETSTRING(PCurStr, Strlen);
                    Strlen = Pdt[lines].dt_strlen;
                    PCurStr = Pdt[lines].dt_string;
                    JLHashDel(&JLHash, PCurStr, Strlen);        // Delete string
                }
                ENDTm(DeltaUSec, tm);   // end timer
            }
            else
            {
                Printf("# Begin JLHashFreeArray()...\n");
                STARTTm(tm);            // start timer
                Bytes = JLHashFreeArray(&JLHash);
                ENDTm(DeltaUSec, tm);   // end timer
            }
            break;
        }
        default:
            printf("# Delete not implemented yet, so quit\n");
            Passes = 1;                 // No, delete, so quit
            break;
        }
//  average time per line to delete (including duplicate strings)
        if (Bytes)                      // Measured freed bytes?
        {
            Printf("#                      returned %lu bytes\n", Bytes);
        }
        if (TotalJudyMalloc)            // Any bytes left after free?
        {
            printf
                ("# !!! BUG, %lu bytes not deleted in *Free()\n",
                 TotalJudyMalloc * sizeof(Word_t));
        }
        if (DeltaUSec != -1.0)          // Measured how long to free?
        {
            Printf("# Free %lu strings, %0.3f uSecs Ave/string\n",
                   gStored, DeltaUSec / (double)gStored);
        }
        Printf("\n");
    }

    if (Passes != 1)
    {
        printf("# TotInserts      0     0 InsTime GetTime      0 Ram/String\n");
        StrNumb = 0;
        for (grp = 0; grp < Groups; grp++)
        {
            StrNumb += Pms[grp].ms_delta;

            printf(" %11lu", StrNumb);  // Total stored
            printf("      0     0");    // place holder
            printf(" %7.3f", Pms[grp].ms_mininsert);
            printf(" %7.3f", Pms[grp].ms_minretrive);
            printf("      0");          // place holder
            printf(" %7.3f", Pms[grp].ms_Bytes);
            printf("\n");
        }
    }
    exit(0);                            // done
}                                       // main()



( run in 0.382 second using v1.01-cache-2.11-cpan-9bca49b1385 )