Compress-Stream-Zstd

 view release on metacpan or  search on metacpan

ext/zstd/CONTRIBUTING.md  view on Meta::CPAN

    benchmark achieved on each run since that is likely the fastest your process will be
    capable of running your code. In our experience, this (aggregating by just taking the sample
    with the fastest running time) has been the most stable approach.
    * The more samples you have, the more stable your benchmarks should be. You can verify
    your improved stability by looking at the size of your confidence intervals as you
    increase your sample count. These should get smaller and smaller. Eventually hopefully
    smaller than the performance win you are expecting.
    * Most processors will take some time to get `hot` when running anything. The observations
    you collect during that time period will very different from the true performance number. Having
    a very large number of sample will help alleviate this problem slightly but you can also
    address is directly by simply not including the first `n` iterations of your benchmark in
    your aggregations. You can determine `n` by simply looking at the results from each iteration
    and then hand picking a good threshold after which the variance in results seems to stabilize.
2. You cannot really get reliable benchmarks if your host machine is simultaneously running
another cpu/memory-intensive application in the background. If you are running benchmarks on your
personal laptop for instance, you should close all applications (including your code editor and
browser) before running your benchmarks. You might also have invisible background applications
running. You can see what these are by looking at either Activity Monitor on Mac or Task Manager
on Windows. You will get more stable benchmark results of you end those processes as well.
    * If you have multiple cores, you can even run your benchmark on a reserved core to prevent
    pollution from other OS and user processes. There are a number of ways to do this depending

ext/zstd/contrib/largeNbDicts/largeNbDicts.c  view on Meta::CPAN

        if (benchCompression)
            DISPLAY("Compression Speed : %.1f MB/s \r", speed_MBps);
        else
            DISPLAY("Decompression Speed : %.1f MB/s \r", speed_MBps);

        fflush(stdout);
        if (BMK_isCompleted_TimedFn(benchState)) break;
        roundNb++;
    }
    DISPLAY("\n");
    /* BMK_benchTimedFn may not run exactly nbRounds iterations */
    double speedAggregated =
        aggregateData(speedPerRound, roundNb + 1, metricAggregatePref);
    if (metricAggregatePref == fastest)
      DISPLAY("Fastest Speed : %.1f MB/s \n", speedAggregated);
    else
      DISPLAY("Median Speed : %.1f MB/s \n", speedAggregated);

    char* csvFileName = malloc(strlen(exeName) + 5);
    strcpy(csvFileName, exeName);
    strcat(csvFileName, ".csv");

ext/zstd/contrib/match_finders/zstd_edist.c  view on Meta::CPAN

***************************************/

/* Just a sential for the entries of the diagonal matrix */
#define ZSTD_EDIST_DIAG_MAX (S32)(1 << 30)

/* How large should a snake be to be considered a 'big' snake. 
 * For an explanation of what a 'snake' is with respect to the 
 * edit distance matrix, see the linked paper in zstd_edist.h */
#define ZSTD_EDIST_SNAKE_THRESH 20

/* After how many iterations should we start to use the heuristic
 * based on 'big' snakes */
#define ZSTD_EDIST_SNAKE_ITER_THRESH 200

/* After how many iterations should be just give up and take 
 * the best available edit script for this round */ 
#define ZSTD_EDIST_EXPENSIVE_THRESH 1024

/*-*************************************
*  Structures
***************************************/

typedef struct {
    U32 dictIdx;
    U32 srcIdx;

ext/zstd/contrib/match_finders/zstd_edist.c  view on Meta::CPAN

    S32 const diagMin = dictLow - srcHigh;
    S32 const diagMax = dictHigh - srcLow;
    S32 const forwardMid = dictLow - srcLow;
    S32 const backwardMid = dictHigh - srcHigh;

    S32 forwardMin = forwardMid;
    S32 forwardMax = forwardMid;
    S32 backwardMin = backwardMid;
    S32 backwardMax = backwardMid;
    int odd = (forwardMid - backwardMid) & 1;
    U32 iterations;

    forwardDiag[forwardMid] = dictLow;
    backwardDiag[backwardMid] = dictHigh;

    /* Main loop for updating diag entries. Unless useHeuristics is 
     * set to false, this loop will run until it finds the minimal 
     * edit script */ 
    for (iterations = 1;;iterations++) {
        S32 diag;
        int bigSnake = 0;
        
        if (forwardMin > diagMin) {
            forwardMin--;
            forwardDiag[forwardMin - 1] = -1;
        } else {
            forwardMin++;
        }

ext/zstd/contrib/match_finders/zstd_edist.c  view on Meta::CPAN

        if (!useHeuristics)
            continue;

        /* Everything under this point is a heuristic. Using these will 
         * substantially speed up the match finding. In some cases, taking 
         * the total match finding time from several minutes to seconds.
         * Of course, the caveat is that the edit script found may no longer 
         * be optimal */ 

        /* Big snake heuristic */ 
        if (iterations > ZSTD_EDIST_SNAKE_ITER_THRESH && bigSnake) {
            {
                S32 best = 0;
                
                for (diag = forwardMax; diag >= forwardMin; diag -= 2) {
                    S32 diagDiag = diag - forwardMid;
                    S32 dictIdx = forwardDiag[diag];
                    S32 srcIdx = dictIdx - diag;
                    S32 v = (dictIdx - dictLow) * 2 - diagDiag;

                    if (v > 12 * (iterations + (diagDiag < 0 ? -diagDiag : diagDiag))) {
                        if (v > best 
                          && dictLow + ZSTD_EDIST_SNAKE_THRESH <= dictIdx && dictIdx <= dictHigh
                          && srcLow + ZSTD_EDIST_SNAKE_THRESH <= srcIdx && srcIdx <= srcHigh) {
                            S32 k;
                            for (k = 1; dict[dictIdx - k] == src[srcIdx - k]; k++) {
                                if (k == ZSTD_EDIST_SNAKE_THRESH) {
                                    best = v;
                                    partition->dictMid = dictIdx;
                                    partition->srcMid = srcIdx;
                                    break;

ext/zstd/contrib/match_finders/zstd_edist.c  view on Meta::CPAN


            {
                S32 best = 0;

                for (diag = backwardMax; diag >= backwardMin; diag -= 2) {
                    S32 diagDiag = diag - backwardMid;
                    S32 dictIdx = backwardDiag[diag];
                    S32 srcIdx = dictIdx - diag;
                    S32 v = (dictHigh - dictIdx) * 2 + diagDiag;

                    if (v > 12 * (iterations + (diagDiag < 0 ? -diagDiag : diagDiag))) {
                        if (v > best 
                          && dictLow < dictIdx && dictIdx <= dictHigh - ZSTD_EDIST_SNAKE_THRESH
                          && srcLow < srcIdx && srcIdx <= srcHigh - ZSTD_EDIST_SNAKE_THRESH) {
                            int k;
                            for (k = 0; dict[dictIdx + k] == src[srcIdx + k]; k++) {
                                if (k == ZSTD_EDIST_SNAKE_THRESH - 1) { 
                                    best = v;
                                    partition->dictMid = dictIdx;
                                    partition->srcMid = srcIdx;
                                    break; 

ext/zstd/contrib/match_finders/zstd_edist.c  view on Meta::CPAN


                if (best > 0) {
                    partition->lowUseHeuristics = 1;
                    partition->highUseHeuristics = 0;
                    return;
                }
            }
        }

        /* More general 'too expensive' heuristic */ 
        if (iterations >= ZSTD_EDIST_EXPENSIVE_THRESH) {
            S32 forwardDictSrcBest;
            S32 forwardDictBest = 0;
            S32 backwardDictSrcBest;
            S32 backwardDictBest = 0;

            forwardDictSrcBest = -1;
            for (diag = forwardMax; diag >= forwardMin; diag -= 2) {
                S32 dictIdx = MIN(forwardDiag[diag], dictHigh);
                S32 srcIdx = dictIdx - diag;

ext/zstd/lib/compress/zstd_ldm.c  view on Meta::CPAN

         * be split into two sequences. This condition holds when using
         * ZSTD_window_enforceMaxDist(), but if we move to checking offsets
         * against maxDist directly, we'll have to carefully handle that case.
         */
        ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
        /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
        newLeftoverSize = ZSTD_ldm_generateSequences_internal(
            ldmState, sequences, params, chunkStart, chunkSize);
        if (ZSTD_isError(newLeftoverSize))
            return newLeftoverSize;
        /* 4. We add the leftover literals from previous iterations to the first
         *    newly generated sequence, or add the `newLeftoverSize` if none are
         *    generated.
         */
        /* Prepend the leftover literals from the last call */
        if (prevSize < sequences->size) {
            sequences->seq[prevSize].litLength += (U32)leftoverSize;
            leftoverSize = newLeftoverSize;
        } else {
            assert(newLeftoverSize == chunkSize);
            leftoverSize += chunkSize;

ext/zstd/lib/decompress/huf_decompress.c  view on Meta::CPAN

        }
#endif
        /* Compute olimit */
        {
            /* Each iteration produces 5 output symbols per stream */
            size_t const oiters = (size_t)(oend - op[3]) / 5;
            /* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes
             * per stream.
             */
            size_t const iiters = (size_t)(ip[0] - ilimit) / 7;
            /* We can safely run iters iterations before running bounds checks */
            size_t const iters = MIN(oiters, iiters);
            size_t const symbols = iters * 5;

            /* We can simply check that op[3] < olimit, instead of checking all
             * of our bounds, since we can't hit the other bounds until we've run
             * iters iterations, which only happens when op[3] == olimit.
             */
            olimit = op[3] + symbols;

            /* Exit fast decoding loop once we get close to the end. */
            if (op[3] + 20 > olimit)
                break;

            /* Exit the decoding loop if any input pointer has crossed the
             * previous one. This indicates corruption, and a precondition
             * to our loop is that ip[i] >= ip[0].

ext/zstd/lib/decompress/huf_decompress.c  view on Meta::CPAN

             * Each table lookup consumes up to 11 bits of input, and produces
             * up to 2 bytes of output.
             */
            /* We can consume up to 7 bytes of input per iteration per stream.
             * We also know that each input pointer is >= ip[0]. So we can run
             * iters loops before running out of input.
             */
            size_t iters = (size_t)(ip[0] - ilimit) / 7;
            /* Each iteration can produce up to 10 bytes of output per stream.
             * Each output stream my advance at different rates. So take the
             * minimum number of safe iterations among all the output streams.
             */
            for (stream = 0; stream < 4; ++stream) {
                size_t const oiters = (size_t)(oend[stream] - op[stream]) / 10;
                iters = MIN(iters, oiters);
            }

            /* Each iteration produces at least 5 output symbols. So until
             * op[3] crosses olimit, we know we haven't executed iters
             * iterations yet. This saves us maintaining an iters counter,
             * at the expense of computing the remaining # of iterations
             * more frequently.
             */
            olimit = op[3] + (iters * 5);

            /* Exit the fast decoding loop if we are too close to the end. */
            if (op[3] + 10 > olimit)
                break;

            /* Exit the decoding loop if any input pointer has crossed the
             * previous one. This indicates corruption, and a precondition

ext/zstd/lib/decompress/huf_decompress_amd64.S  view on Meta::CPAN

    movq 88(%rax), %bits3
    movq 96(%rax), %dtable
    push %rax      /* argument */
    push 104(%rax) /* ilimit */
    push 112(%rax) /* oend */
    push %olimit   /* olimit space */

    subq $24, %rsp

.L_4X1_compute_olimit:
    /* Computes how many iterations we can do safely
     * %r15, %rax may be clobbered
     * rbx, rdx must be saved
     * op3 & ip0 mustn't be clobbered
     */
    movq %rbx, 0(%rsp)
    movq %rdx, 8(%rsp)

    movq 32(%rsp), %rax /* rax = oend */
    subq %op3,    %rax  /* rax = oend - op3 */

ext/zstd/lib/decompress/huf_decompress_amd64.S  view on Meta::CPAN

    movq %op2, %rax
    push %rax /* oend1 */

    movq %op1, %rax
    push %rax /* oend0 */

    /* Scratch space */
    subq $8, %rsp

.L_4X2_compute_olimit:
    /* Computes how many iterations we can do safely
     * %r15, %rax may be clobbered
     * rdx must be saved
     * op[1,2,3,4] & ip0 mustn't be clobbered
     */
    movq %rdx, 0(%rsp)

    /* We can consume up to 7 input bytes each iteration. */
    movq %ip0,     %rax  /* rax = ip0 */
    movq 40(%rsp), %rdx  /* rdx = ilimit */
    subq %rdx,     %rax  /* rax = ip0 - ilimit */

ext/zstd/tests/README.md  view on Meta::CPAN

build comparison (between facebook:dev and facebook:release), onetime will pull all the current
pull requests from the zstd repo and compare facebook:dev to all of them once, continuous
will continuously get pull requests from the zstd repo and run benchmarks against facebook:dev.

```
Example usage: python automated_benchmarking.py
```

```
usage: automated_benchmarking.py [-h] [--directory DIRECTORY]
                                 [--levels LEVELS] [--iterations ITERATIONS]
                                 [--emails EMAILS] [--frequency FREQUENCY]
                                 [--mode MODE] [--dict DICT]

optional arguments:
  -h, --help            show this help message and exit
  --directory DIRECTORY
                        directory with files to benchmark
  --levels LEVELS       levels to test e.g. ('1,2,3')
  --iterations ITERATIONS
                        number of benchmark iterations to run
  --emails EMAILS       email addresses of people who will be alerted upon
                        regression. Only for continuous mode
  --frequency FREQUENCY
                        specifies the number of seconds to wait before each
                        successive check for new PRs in continuous mode
  --mode MODE           'fastmode', 'onetime', 'current', or 'continuous' (see
                        README.md for details)
  --dict DICT           filename of dictionary to use (when set, this
                        dictionary will be used to compress the files provided
                        inside --directory)

ext/zstd/tests/automated_benchmarking.py  view on Meta::CPAN

        )
        .stdout.decode("utf-8")
        .split(" ")
    ))


def benchmark_n(executable, level, filename, n):
    speeds_arr = [benchmark_single(executable, level, filename) for _ in range(n)]
    cspeed, dspeed = max(b[0] for b in speeds_arr), max(b[1] for b in speeds_arr)
    print(
        "Bench (executable={} level={} filename={}, iterations={}):\n\t[cspeed: {} MB/s, dspeed: {} MB/s]".format(
            os.path.basename(executable),
            level,
            os.path.basename(filename),
            n,
            cspeed,
            dspeed,
        )
    )
    return (cspeed, dspeed)


def benchmark(build, filenames, levels, iterations):
    executable = clone_and_build(build)
    return [
        [benchmark_n(executable, l, f, iterations) for f in filenames] for l in levels
    ]


def benchmark_dictionary_single(executable, filenames_directory, dictionary_filename, level, iterations):
    cspeeds, dspeeds = [], []
    for _ in range(iterations):
        output = subprocess.run([executable, "-qb{}".format(level), "-D", dictionary_filename, "-r", filenames_directory], stdout=subprocess.PIPE).stdout.decode("utf-8").split(" ")
        cspeed, dspeed = parse_benchmark_output(output)
        cspeeds.append(cspeed)
        dspeeds.append(dspeed)
    max_cspeed, max_dspeed = max(cspeeds), max(dspeeds)
    print(
        "Bench (executable={} level={} filenames_directory={}, dictionary_filename={}, iterations={}):\n\t[cspeed: {} MB/s, dspeed: {} MB/s]".format(
            os.path.basename(executable),
            level,
            os.path.basename(filenames_directory),
            os.path.basename(dictionary_filename),
            iterations,
            max_cspeed,
            max_dspeed,
        )
    )
    return (max_cspeed, max_dspeed)


def benchmark_dictionary(build, filenames_directory, dictionary_filename, levels, iterations):
    executable = clone_and_build(build)
    return [benchmark_dictionary_single(executable, filenames_directory, dictionary_filename, l, iterations) for l in levels]


def parse_regressions_and_labels(old_cspeed, new_cspeed, old_dspeed, new_dspeed, baseline_build, test_build):
    cspeed_reg = (old_cspeed - new_cspeed) / old_cspeed
    dspeed_reg = (old_dspeed - new_dspeed) / old_dspeed
    baseline_label = "{}:{} ({})".format(
        baseline_build["user"], baseline_build["branch"], baseline_build["hash"]
    )
    test_label = "{}:{} ({})".format(
        test_build["user"], test_build["branch"], test_build["hash"]
    )
    return cspeed_reg, dspeed_reg, baseline_label, test_label


def get_regressions(baseline_build, test_build, iterations, filenames, levels):
    old = benchmark(baseline_build, filenames, levels, iterations)
    new = benchmark(test_build, filenames, levels, iterations)
    regressions = []
    for j, level in enumerate(levels):
        for k, filename in enumerate(filenames):
            old_cspeed, old_dspeed = old[j][k]
            new_cspeed, new_dspeed = new[j][k]
            cspeed_reg, dspeed_reg, baseline_label, test_label = parse_regressions_and_labels(
                old_cspeed, new_cspeed, old_dspeed, new_dspeed, baseline_build, test_build
            )
            if cspeed_reg > CSPEED_REGRESSION_TOLERANCE:
                regressions.append(

ext/zstd/tests/automated_benchmarking.py  view on Meta::CPAN

                        filename,
                        baseline_label,
                        test_label,
                        old_dspeed,
                        new_dspeed,
                        dspeed_reg * 100.0,
                    )
                )
    return regressions

def get_regressions_dictionary(baseline_build, test_build, filenames_directory, dictionary_filename, levels, iterations):
    old = benchmark_dictionary(baseline_build, filenames_directory, dictionary_filename, levels, iterations)
    new = benchmark_dictionary(test_build, filenames_directory, dictionary_filename, levels, iterations)
    regressions = []
    for j, level in enumerate(levels):
        old_cspeed, old_dspeed = old[j]
        new_cspeed, new_dspeed = new[j]
        cspeed_reg, dspeed_reg, baesline_label, test_label = parse_regressions_and_labels(
            old_cspeed, new_cspeed, old_dspeed, new_dspeed, baseline_build, test_build
        )
        if cspeed_reg > CSPEED_REGRESSION_TOLERANCE:
            regressions.append(
                "[COMPRESSION REGRESSION] (level={} filenames_directory={} dictionary_filename={})\n\t{} -> {}\n\t{} -> {} ({:0.2f}%)".format(

ext/zstd/tests/automated_benchmarking.py  view on Meta::CPAN

                    baseline_label,
                    test_label,
                    old_dspeed,
                    new_dspeed,
                    dspeed_reg * 100.0,
                )
            )
        return regressions


def main(filenames, levels, iterations, builds=None, emails=None, continuous=False, frequency=DEFAULT_MAX_API_CALL_FREQUENCY_SEC, dictionary_filename=None):
    if builds == None:
        builds = get_new_open_pr_builds()
    while True:
        for test_build in builds:
            if dictionary_filename == None:
                regressions = get_regressions(
                    RELEASE_BUILD, test_build, iterations, filenames, levels
                )
            else:
                regressions = get_regressions_dictionary(
                    RELEASE_BUILD, test_build, filenames, dictionary_filename, levels, iterations
                )
            body = "\n".join(regressions)
            if len(regressions) > 0:
                if emails != None:
                    os.system(
                        """
                        echo "{}" | mutt -s "[zstd regression] caused by new pr" {}
                    """.format(
                            body, emails
                        )

ext/zstd/tests/automated_benchmarking.py  view on Meta::CPAN

        if not continuous:
            break
        time.sleep(frequency)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()

    parser.add_argument("--directory", help="directory with files to benchmark", default="golden-compression")
    parser.add_argument("--levels", help="levels to test e.g. ('1,2,3')", default="1")
    parser.add_argument("--iterations", help="number of benchmark iterations to run", default="1")
    parser.add_argument("--emails", help="email addresses of people who will be alerted upon regression. Only for continuous mode", default=None)
    parser.add_argument("--frequency", help="specifies the number of seconds to wait before each successive check for new PRs in continuous mode", default=DEFAULT_MAX_API_CALL_FREQUENCY_SEC)
    parser.add_argument("--mode", help="'fastmode', 'onetime', 'current', or 'continuous' (see README.md for details)", default="current")
    parser.add_argument("--dict", help="filename of dictionary to use (when set, this dictionary will be used to compress the files provided inside --directory)", default=None)

    args = parser.parse_args()
    filenames = args.directory
    levels = [int(l) for l in args.levels.split(",")]
    mode = args.mode
    iterations = int(args.iterations)
    emails = args.emails
    frequency = int(args.frequency)
    dictionary_filename = args.dict

    if dictionary_filename == None:
        filenames = glob.glob("{}/**".format(filenames))

    if (len(filenames) == 0):
        print("0 files found")
        quit()

    if mode == "onetime":
        main(filenames, levels, iterations, frequency=frequenc, dictionary_filename=dictionary_filename)
    elif mode == "current":
        builds = [{"user": None, "branch": "None", "hash": None}]
        main(filenames, levels, iterations, builds, frequency=frequency, dictionary_filename=dictionary_filename)
    elif mode == "fastmode":
        builds = [{"user": "facebook", "branch": "release", "hash": None}]
        main(filenames, levels, iterations, builds, frequency=frequency, dictionary_filename=dictionary_filename)
    else:
        main(filenames, levels, iterations, None, emails, True, frequency=frequency, dictionary_filename=dictionary_filename)

ext/zstd/tests/paramgrill.c  view on Meta::CPAN

            }
        }
        winnerInfo = bestFeasible1;
    }

    return winnerInfo;
}

/* Optimizes for a fixed strategy */

/* flexible parameters: iterations of failed climbing (or if we do non-random, maybe this is when everything is close to visited)
   weight more on visit for bad results, less on good results/more on later results / ones with more failures.
   allocate memoTable here.
 */
static winnerInfo_t
optimizeFixedStrategy(const buffers_t buf, const contexts_t ctx,
                      const constraint_t target, paramValues_t paramTarget,
                      const ZSTD_strategy strat,
                      memoTable_t* memoTableArray, const int tries)
{
    int i = 0;

ext/zstd/tests/paramgrill.c  view on Meta::CPAN

            }
        }

        DEBUGOUTPUT("Real Opt\n");
        /* start 'real' optimization */
        {   int bestStrategy = (int)winner.params.vals[strt_ind];
            if (paramTarget.vals[strt_ind] == PARAM_UNSET) {
                int st = bestStrategy;
                int tries = g_maxTries;

                /* one iterations of hill climbing with the level-defined parameters. */
                {   winnerInfo_t const w1 = climbOnce(target, allMT, buf, ctx, winner.params);
                    if (compareResultLT(winner.result, w1.result, target, buf.srcSize)) {
                        winner = w1;
                    }
                    CHECKTIMEGT(ret, 0, _displayCleanUp);
                }

                while(st && tries > 0) {
                    winnerInfo_t wc;
                    DEBUGOUTPUT("StrategySwitch: %s\n", g_stratName[st]);

ext/zstd/tests/zstreamtest.c  view on Meta::CPAN

                case 'q':
                    argument++;
                    g_displayLevel--;
                    break;

                case 'p': /* pause at the end */
                    argument++;
                    mainPause = 1;
                    break;

                case 'i':   /* limit tests by nb of iterations (default) */
                    argument++;
                    nbTests=0; g_clockTime=0;
                    while ((*argument>='0') && (*argument<='9')) {
                        nbTests *= 10;
                        nbTests += *argument - '0';
                        argument++;
                    }
                    break;

                case 'T':   /* limit tests by time */



( run in 1.555 second using v1.01-cache-2.11-cpan-71847e10f99 )