Compress-Stream-Zstd

 view release on metacpan or  search on metacpan

ext/zstd/contrib/match_finders/zstd_edist.c  view on Meta::CPAN

/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under both the BSD-style license (found in the
 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
 * in the COPYING file in the root directory of this source tree).
 * You may select, at your option, one of the above-listed licenses.
 */

/*-*************************************
*  Dependencies
***************************************/

/* Currently relies on qsort when combining contiguous matches. This can probably 
 * be avoided but would require changes to the algorithm. The qsort is far from 
 * the bottleneck in this algorithm even for medium sized files so it's probably 
 * not worth trying to address */ 
#include <stdlib.h>
#include <assert.h>

#include "zstd_edist.h"
#include "mem.h"

/*-*************************************
*  Constants
***************************************/

/* Just a sential for the entries of the diagonal matrix */
#define ZSTD_EDIST_DIAG_MAX (S32)(1 << 30)

/* How large should a snake be to be considered a 'big' snake. 
 * For an explanation of what a 'snake' is with respect to the 
 * edit distance matrix, see the linked paper in zstd_edist.h */
#define ZSTD_EDIST_SNAKE_THRESH 20

/* After how many iterations should we start to use the heuristic
 * based on 'big' snakes */
#define ZSTD_EDIST_SNAKE_ITER_THRESH 200

/* After how many iterations should be just give up and take 
 * the best available edit script for this round */ 
#define ZSTD_EDIST_EXPENSIVE_THRESH 1024

/*-*************************************
*  Structures
***************************************/

typedef struct {
    U32 dictIdx;
    U32 srcIdx;
    U32 matchLength;
} ZSTD_eDist_match;

typedef struct {
    const BYTE* dict;
    const BYTE* src;
    size_t dictSize;
    size_t srcSize;
    S32* forwardDiag;            /* Entries of the forward diagonal stored here */
    S32* backwardDiag;           /* Entries of the backward diagonal stored here.
                                  *   Note: this buffer and the 'forwardDiag' buffer 
                                  *   are contiguous. See the ZSTD_eDist_genSequences */
    ZSTD_eDist_match* matches;   /* Accumulate matches of length 1 in this buffer. 
                                  *   In a subsequence post-processing step, we combine 
                                  *   contiguous matches. */
    U32 nbMatches;
} ZSTD_eDist_state;

typedef struct {
    S32 dictMid;           /* The mid diagonal for the dictionary */
    S32 srcMid;            /* The mid diagonal for the source */ 
    int lowUseHeuristics;  /* Should we use heuristics for the low part */
    int highUseHeuristics; /* Should we use heuristics for the high part */ 
} ZSTD_eDist_partition;

/*-*************************************
*  Internal
***************************************/

static void ZSTD_eDist_diag(ZSTD_eDist_state* state,
                    ZSTD_eDist_partition* partition,
                    S32 dictLow, S32 dictHigh, S32 srcLow, 
                    S32 srcHigh, int useHeuristics)
{
    S32* const forwardDiag = state->forwardDiag;
    S32* const backwardDiag = state->backwardDiag;
    const BYTE* const dict = state->dict;
    const BYTE* const src = state->src;

    S32 const diagMin = dictLow - srcHigh;
    S32 const diagMax = dictHigh - srcLow;
    S32 const forwardMid = dictLow - srcLow;
    S32 const backwardMid = dictHigh - srcHigh;

    S32 forwardMin = forwardMid;
    S32 forwardMax = forwardMid;
    S32 backwardMin = backwardMid;
    S32 backwardMax = backwardMid;
    int odd = (forwardMid - backwardMid) & 1;
    U32 iterations;

    forwardDiag[forwardMid] = dictLow;
    backwardDiag[backwardMid] = dictHigh;

    /* Main loop for updating diag entries. Unless useHeuristics is 
     * set to false, this loop will run until it finds the minimal 
     * edit script */ 
    for (iterations = 1;;iterations++) {
        S32 diag;
        int bigSnake = 0;
        
        if (forwardMin > diagMin) {
            forwardMin--;
            forwardDiag[forwardMin - 1] = -1;
        } else {
            forwardMin++;
        }

        if (forwardMax < diagMax) {
            forwardMax++;
            forwardDiag[forwardMax + 1] = -1;
        } else {
            forwardMax--;
        }

        for (diag = forwardMax; diag >= forwardMin; diag -= 2) {
            S32 dictIdx;
            S32 srcIdx;
            S32 low = forwardDiag[diag - 1];
            S32 high = forwardDiag[diag + 1];
            S32 dictIdx0 = low < high ? high : low + 1;

            for (dictIdx = dictIdx0, srcIdx = dictIdx0 - diag;
                dictIdx < dictHigh && srcIdx < srcHigh && dict[dictIdx] == src[srcIdx];
                dictIdx++, srcIdx++) continue;

            if (dictIdx - dictIdx0 > ZSTD_EDIST_SNAKE_THRESH)
                bigSnake = 1;

            forwardDiag[diag] = dictIdx;

            if (odd && backwardMin <= diag && diag <= backwardMax && backwardDiag[diag] <= dictIdx) {
                partition->dictMid = dictIdx;
                partition->srcMid = srcIdx;
                partition->lowUseHeuristics = 0;
                partition->highUseHeuristics = 0;
                return;
            }
        }

        if (backwardMin > diagMin) {
            backwardMin--;
            backwardDiag[backwardMin - 1] = ZSTD_EDIST_DIAG_MAX;
        } else {
            backwardMin++;
        }

        if (backwardMax < diagMax) {
            backwardMax++;
            backwardDiag[backwardMax + 1] = ZSTD_EDIST_DIAG_MAX;
        } else {
            backwardMax--;
        }


        for (diag = backwardMax; diag >= backwardMin; diag -= 2) {
            S32 dictIdx;
            S32 srcIdx;
            S32 low = backwardDiag[diag - 1];
            S32 high = backwardDiag[diag + 1];
            S32 dictIdx0 = low < high ? low : high - 1;

            for (dictIdx = dictIdx0, srcIdx = dictIdx0 - diag;
                dictLow < dictIdx && srcLow < srcIdx && dict[dictIdx - 1] == src[srcIdx - 1];
                dictIdx--, srcIdx--) continue;

            if (dictIdx0 - dictIdx > ZSTD_EDIST_SNAKE_THRESH)
                bigSnake = 1;

            backwardDiag[diag] = dictIdx;

            if (!odd && forwardMin <= diag && diag <= forwardMax && dictIdx <= forwardDiag[diag]) {
                partition->dictMid = dictIdx;
                partition->srcMid = srcIdx;
                partition->lowUseHeuristics = 0;
                partition->highUseHeuristics = 0;
                return;
            }
        }

        if (!useHeuristics)
            continue;

        /* Everything under this point is a heuristic. Using these will 
         * substantially speed up the match finding. In some cases, taking 
         * the total match finding time from several minutes to seconds.
         * Of course, the caveat is that the edit script found may no longer 
         * be optimal */ 

        /* Big snake heuristic */ 
        if (iterations > ZSTD_EDIST_SNAKE_ITER_THRESH && bigSnake) {
            {
                S32 best = 0;
                
                for (diag = forwardMax; diag >= forwardMin; diag -= 2) {
                    S32 diagDiag = diag - forwardMid;
                    S32 dictIdx = forwardDiag[diag];
                    S32 srcIdx = dictIdx - diag;
                    S32 v = (dictIdx - dictLow) * 2 - diagDiag;

                    if (v > 12 * (iterations + (diagDiag < 0 ? -diagDiag : diagDiag))) {
                        if (v > best 
                          && dictLow + ZSTD_EDIST_SNAKE_THRESH <= dictIdx && dictIdx <= dictHigh
                          && srcLow + ZSTD_EDIST_SNAKE_THRESH <= srcIdx && srcIdx <= srcHigh) {
                            S32 k;
                            for (k = 1; dict[dictIdx - k] == src[srcIdx - k]; k++) {
                                if (k == ZSTD_EDIST_SNAKE_THRESH) {
                                    best = v;
                                    partition->dictMid = dictIdx;
                                    partition->srcMid = srcIdx;
                                    break;
                                }
                            }
                        }
                    }
                }

                if (best > 0) {
                    partition->lowUseHeuristics = 0;
                    partition->highUseHeuristics = 1;
                    return;
                }
            }

            {
                S32 best = 0;

                for (diag = backwardMax; diag >= backwardMin; diag -= 2) {
                    S32 diagDiag = diag - backwardMid;
                    S32 dictIdx = backwardDiag[diag];
                    S32 srcIdx = dictIdx - diag;
                    S32 v = (dictHigh - dictIdx) * 2 + diagDiag;

                    if (v > 12 * (iterations + (diagDiag < 0 ? -diagDiag : diagDiag))) {
                        if (v > best 
                          && dictLow < dictIdx && dictIdx <= dictHigh - ZSTD_EDIST_SNAKE_THRESH
                          && srcLow < srcIdx && srcIdx <= srcHigh - ZSTD_EDIST_SNAKE_THRESH) {
                            int k;
                            for (k = 0; dict[dictIdx + k] == src[srcIdx + k]; k++) {
                                if (k == ZSTD_EDIST_SNAKE_THRESH - 1) { 
                                    best = v;
                                    partition->dictMid = dictIdx;
                                    partition->srcMid = srcIdx;
                                    break; 
                                }
                            }
                        }
                    }
                }

                if (best > 0) {
                    partition->lowUseHeuristics = 1;
                    partition->highUseHeuristics = 0;
                    return;
                }
            }
        }

        /* More general 'too expensive' heuristic */ 
        if (iterations >= ZSTD_EDIST_EXPENSIVE_THRESH) {
            S32 forwardDictSrcBest;
            S32 forwardDictBest = 0;
            S32 backwardDictSrcBest;
            S32 backwardDictBest = 0;

            forwardDictSrcBest = -1;
            for (diag = forwardMax; diag >= forwardMin; diag -= 2) {
                S32 dictIdx = MIN(forwardDiag[diag], dictHigh);
                S32 srcIdx = dictIdx - diag;

                if (srcHigh < srcIdx) {
                    dictIdx = srcHigh + diag;
                    srcIdx = srcHigh;
                }

                if (forwardDictSrcBest < dictIdx + srcIdx) {
                    forwardDictSrcBest = dictIdx + srcIdx;
                    forwardDictBest = dictIdx;
                }
            }

            backwardDictSrcBest = ZSTD_EDIST_DIAG_MAX;
            for (diag = backwardMax; diag >= backwardMin; diag -= 2) {
                S32 dictIdx = MAX(dictLow, backwardDiag[diag]);
                S32 srcIdx = dictIdx - diag;

                if (srcIdx < srcLow) {
                    dictIdx = srcLow + diag;
                    srcIdx = srcLow;
                }

                if (dictIdx + srcIdx < backwardDictSrcBest) {
                    backwardDictSrcBest = dictIdx + srcIdx;
                    backwardDictBest = dictIdx;
                }
            }

            if ((dictHigh + srcHigh) - backwardDictSrcBest < forwardDictSrcBest - (dictLow + srcLow)) {
                partition->dictMid = forwardDictBest;
                partition->srcMid = forwardDictSrcBest - forwardDictBest;
                partition->lowUseHeuristics = 0;
                partition->highUseHeuristics = 1;
            } else {
                partition->dictMid = backwardDictBest;
                partition->srcMid = backwardDictSrcBest - backwardDictBest;
                partition->lowUseHeuristics = 1;
                partition->highUseHeuristics = 0;
            }
            return;
        }
    }
}

static void ZSTD_eDist_insertMatch(ZSTD_eDist_state* state, 
                    S32 const dictIdx, S32 const srcIdx)
{
    state->matches[state->nbMatches].dictIdx = dictIdx;
    state->matches[state->nbMatches].srcIdx = srcIdx;
    state->matches[state->nbMatches].matchLength = 1;
    state->nbMatches++;



( run in 0.672 second using v1.01-cache-2.11-cpan-71847e10f99 )