Compress-Stream-Zstd
view release on metacpan or search on metacpan
ext/zstd/lib/compress/zstd_compress.c view on Meta::CPAN
BYTE const* iend = ip + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */
repcodes_t updatedRepcodes;
U32 bytesAdjustment = 0;
U32 finalMatchSplit = 0;
/* TODO(embg) support fast parsing mode in noBlockDelim mode */
(void)externalRepSearch;
if (cctx->cdict) {
dictSize = cctx->cdict->dictContentSize;
} else if (cctx->prefixDict.dict) {
dictSize = cctx->prefixDict.dictSize;
} else {
dictSize = 0;
}
DEBUGLOG(5, "ZSTD_copySequencesToSeqStoreNoBlockDelim: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize);
DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) {
const ZSTD_Sequence currSeq = inSeqs[idx];
U32 litLength = currSeq.litLength;
U32 matchLength = currSeq.matchLength;
U32 const rawOffset = currSeq.offset;
U32 offBase;
/* Modify the sequence depending on where endPosInSequence lies */
if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {
if (startPosInSequence >= litLength) {
startPosInSequence -= litLength;
litLength = 0;
matchLength -= startPosInSequence;
} else {
litLength -= startPosInSequence;
}
/* Move to the next sequence */
endPosInSequence -= currSeq.litLength + currSeq.matchLength;
startPosInSequence = 0;
} else {
/* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence
does not reach the end of the match. So, we have to split the sequence */
DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u",
currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence);
if (endPosInSequence > litLength) {
U32 firstHalfMatchLength;
litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence;
firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength;
if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) {
/* Only ever split the match if it is larger than the block size */
U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence;
if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) {
/* Move the endPosInSequence backward so that it creates match of minMatch length */
endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
firstHalfMatchLength -= bytesAdjustment;
}
matchLength = firstHalfMatchLength;
/* Flag that we split the last match - after storing the sequence, exit the loop,
but keep the value of endPosInSequence */
finalMatchSplit = 1;
} else {
/* Move the position in sequence backwards so that we don't split match, and break to store
* the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence
* should go. We prefer to do this whenever it is not necessary to split the match, or if doing so
* would cause the first half of the match to be too small
*/
bytesAdjustment = endPosInSequence - currSeq.litLength;
endPosInSequence = currSeq.litLength;
break;
}
} else {
/* This sequence ends inside the literals, break to store the last literals */
break;
}
}
/* Check if this offset can be represented with a repcode */
{ U32 const ll0 = (litLength == 0);
offBase = ZSTD_finalizeOffBase(rawOffset, updatedRepcodes.rep, ll0);
ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0);
}
if (cctx->appliedParams.validateSequences) {
seqPos->posInSrc += litLength + matchLength;
FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer),
"Sequence validation failed");
}
DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
"Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength);
ip += matchLength + litLength;
if (!finalMatchSplit)
idx++; /* Next Sequence */
}
DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);
seqPos->idx = idx;
seqPos->posInSequence = endPosInSequence;
ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
iend -= bytesAdjustment;
if (ip != iend) {
/* Store any last literals */
U32 lastLLSize = (U32)(iend - ip);
assert(ip <= iend);
DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize);
ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize);
seqPos->posInSrc += lastLLSize;
}
return bytesAdjustment;
}
typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)
{
ZSTD_sequenceCopier sequenceCopier = NULL;
assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode));
if (mode == ZSTD_sf_explicitBlockDelimiters) {
( run in 1.053 second using v1.01-cache-2.11-cpan-71847e10f99 )