Algorithm-GDiffDelta
view release on metacpan or search on metacpan
GDiffDelta.xs view on Meta::CPAN
s2 += s1;
} while (--k);
s1 %= QEF_BASE;
s2 %= QEF_BASE;
}
return (s2 << 16) | s1;
}
static U32
adler32_file (SV *f, U32 adler, Off_t offset, Off_t size, const char *from)
{
unsigned char buf[QEF_BUFSZ];
Off_t chunk_sz;
careful_fseek(f, offset, from);
while (size > 0) {
chunk_sz = QEF_MIN(QEF_BUFSZ, size);
careful_fread(buf, chunk_sz, f, from);
adler = adler32(adler, buf, chunk_sz);
size -= chunk_sz;
}
return adler;
}
static void
prepare_bdfile (SV *orig, Off_t orig_size, QefBDFile *bdf)
{
unsigned int fphbits;
Off_t hsize, offset;
U32 i;
QefBDRecord *brec;
QefBDRecord **fphash;
fphbits = hashbits(orig_size / QEF_BLK_SIZE + 1);
hsize = (Off_t) 1 << fphbits;
New(0, fphash, hsize, QefBDRecord *);
for (i = 0; i < hsize; ++i)
fphash[i] = 0;
qef_cha_init(&bdf->cha, sizeof(QefBDRecord), hsize / 4 + 1);
if (orig_size == 0)
bdf->size = 0;
else {
offset = 0;
bdf->size = orig_size;
/* Start by looking at the last (possibly incomplete) block */
if ((offset = (orig_size / QEF_BLK_SIZE) * QEF_BLK_SIZE) == orig_size)
offset -= QEF_BLK_SIZE;
while (1) {
brec = qef_cha_alloc(&bdf->cha);
brec->fp = adler32_file(orig, 0, offset,
QEF_MIN(QEF_BLK_SIZE, orig_size - offset),
"original");
brec->offset = offset;
i = QEF_HASHLONG(brec->fp, fphbits);
brec->next = fphash[i];
fphash[i] = brec;
if (offset < QEF_BLK_SIZE)
break;
offset -= QEF_BLK_SIZE;
}
assert(offset == 0);
}
bdf->fphbits = fphbits;
bdf->fphash = fphash;
}
/* Output a GDIFF DATA operation. */
static void
data_op (SV *changed, SV *delta, Off_t offset, Off_t size, unsigned char *buf)
{
size_t headsz = 0;
assert(size > 0);
assert(size <= QEF_INT_MAX);
assert(buf);
if (size <= 246)
buf[headsz++] = size;
else if (size <= QEF_USHORT_MAX) {
buf[headsz++] = 247;
QEF_BE16_PUT(buf, headsz, size);
}
else {
buf[headsz++] = 248;
QEF_BE32_PUT(buf, headsz, size);
}
/* Write the opcode and size argument (if any). */
careful_fwrite(buf, headsz, delta, "delta");
/* Copy the actual data to be inserted into the delta. */
careful_fseek(changed, offset, "changed file");
copy_data(changed, delta, size, buf, "changed file", "delta");
}
/* Output a GDIFF COPY operation. */
static void
copy_op (SV *delta, Off_t offset, Off_t size)
{
unsigned char buf[QEF_COPY_MAX];
size_t headsz = 0;
assert(size > 0);
if (offset <= QEF_USHORT_MAX) {
if (size <= QEF_UBYTE_MAX) {
buf[headsz++] = 249;
QEF_BE16_PUT(buf, headsz, offset);
buf[headsz++] = size;
GDiffDelta.xs view on Meta::CPAN
QEF_BE16_PUT(buf, headsz, offset);
QEF_BE16_PUT(buf, headsz, size);
}
else if (size <= QEF_INT_MAX) {
buf[headsz++] = 251;
QEF_BE16_PUT(buf, headsz, offset);
QEF_BE32_PUT(buf, headsz, size);
}
else {
/* TODO - break copy ops for bigger than 2Gb into smaller ones */
assert(0);
}
}
else if (offset <= QEF_INT_MAX) {
if (size <= QEF_UBYTE_MAX) {
buf[headsz++] = 252;
QEF_BE32_PUT(buf, headsz, offset);
buf[headsz++] = size;
}
else if (size <= QEF_USHORT_MAX) {
buf[headsz++] = 253;
QEF_BE32_PUT(buf, headsz, offset);
QEF_BE16_PUT(buf, headsz, size);
}
else if (size <= QEF_INT_MAX) {
buf[headsz++] = 254;
QEF_BE32_PUT(buf, headsz, offset);
QEF_BE32_PUT(buf, headsz, size);
}
else {
/* TODO - break copy ops for bigger than 2Gb into smaller ones */
assert(0);
}
}
else {
/* TODO - allow 64bit offsets */
assert(0);
}
/* Write the opcode and arguments. */
careful_fwrite(buf, headsz, delta, "delta");
}
static void
do_delta (SV *orig, SV *changed, SV *delta)
{
U32 fp;
Off_t orig_size, changed_size, changed_offset;
Off_t rsize, msize, newmsize;
Off_t off1, off2, moff, newmoff;
QefBDFile bdf;
QefBDRecord *brec;
unsigned char insbuf[QEF_BUFSZ], buf1[QEF_BUFSZ], buf2[QEF_BUFSZ];
size_t pos, sz1, sz2, minsz;
Off_t ins_offset, ins_size;
int stop;
changed_size = file_size(changed, "changed file");
if (changed_size == 0)
return;
orig_size = file_size(orig, "original");
prepare_bdfile(orig, orig_size, &bdf);
ins_size = 0;
changed_offset = 0;
while (changed_offset < changed_size) {
rsize = QEF_MIN(QEF_BLK_SIZE, changed_size - changed_offset);
fp = adler32_file(changed, 0, changed_offset, rsize, "changed file");
brec = bdf.fphash[QEF_HASHLONG(fp, bdf.fphbits)];
for (msize = 0; brec; brec = brec->next) {
if (brec->fp == fp) {
off1 = brec->offset;
off2 = changed_offset;
newmsize = 0;
newmoff = off1;
stop = 0;
while (!stop) {
sz1 = QEF_MIN(QEF_BUFSZ, orig_size - off1);
sz2 = QEF_MIN(QEF_BUFSZ, changed_size - off2);
minsz = QEF_MIN(sz1, sz2);
if (minsz == 0)
break;
/* TODO - is there a better way to buffer these? */
careful_fseek(orig, off1, "original");
careful_fread(buf1, minsz, orig, "original");
careful_fseek(changed, off2, "changed file");
careful_fread(buf2, minsz, changed, "changed file");
for (pos = 0; pos < minsz; ++pos) {
if (buf1[pos] != buf2[pos]) {
stop = 1;
break;
}
}
newmsize += pos;
off1 += minsz;
off2 += minsz;
}
if (newmsize > msize) {
moff = newmoff;
msize = newmsize;
}
}
}
if (msize < QEF_COPY_MIN) {
if (!ins_size)
ins_offset = changed_offset;
++ins_size;
++changed_offset;
if (ins_size == QEF_INT_MAX) {
data_op(changed, delta, ins_offset, ins_size, insbuf);
ins_size = 0;
}
}
else {
if (ins_size) {
data_op(changed, delta, ins_offset, ins_size, insbuf);
ins_size = 0;
}
copy_op(delta, moff, msize);
changed_offset += msize;
}
}
if (ins_size) {
data_op(changed, delta, ins_offset, ins_size, insbuf);
ins_size = 0;
}
}
MODULE = Algorithm::GDiffDelta PACKAGE = Algorithm::GDiffDelta PREFIX = qefgdiff_
PROTOTYPES: DISABLE
U32
qefgdiff_gdiff_adler32 (U32 init, SV *s)
PREINIT:
STRLEN len;
const unsigned char *buf;
CODE:
buf = (unsigned char *) SvPV(s, len);
RETVAL = adler32(init, buf, len);
OUTPUT:
RETVAL
void
qefgdiff_gdiff_delta (SV *orig, SV *changed, SV *delta)
CODE:
careful_fwrite("\xD1\xFF\xD1\xFF\x04", 5, delta, "delta");
do_delta(orig, changed, delta);
careful_fwrite("\0", 1, delta, "delta");
void
qefgdiff_gdiff_apply (SV *orig, SV *delta, SV *output)
PREINIT:
unsigned char buf[QEF_BUFSZ];
size_t r, s;
int c;
CODE:
/* Check that GDIFF header and version number are valid. */
careful_fread(buf, 5, delta, "delta");
if (buf[0] != 0xD1 || buf[1] != 0xFF ||
buf[2] != 0xD1 || buf[3] != 0xFF)
croak("incorrect GDIFF header at start of delta");
if (buf[4] != 4)
croak("wrong version of GDIFF format (%d),"
" only version 4 understood", (int) buf[4]);
while (1) {
careful_fread(buf, 1, delta, "delta");
c = buf[0];
if (c == 0)
return;
else if (c <= 246) {
careful_fread(buf, c, delta, "delta");
careful_fwrite(buf, c, output, "output");
}
else {
switch (c) {
case 247: /* ushort, <n> bytes - append <n> data bytes */
s = read_ushort(delta);
copy_data(delta, output, s, buf, "delta", "output");
break;
case 248: /* int, <n> bytes - append <n> data bytes */
s = read_int(delta);
copy_data(delta, output, s, buf, "delta", "output");
break;
case 249: /* ushort, ubyte - copy <position>, <length> */
r = read_ushort(delta);
s = read_ubyte(delta);
careful_fseek(orig, r, "original");
copy_data(orig, output, s, buf, "original", "output");
break;
case 250: /* ushort, ushort - copy <position>, <length> */
r = read_ushort(delta);
s = read_ushort(delta);
careful_fseek(orig, r, "original");
copy_data(orig, output, s, buf, "original", "output");
break;
case 251: /* ushort, int - copy <position>, <length> */
r = read_ushort(delta);
s = read_int(delta);
careful_fseek(orig, r, "original");
copy_data(orig, output, s, buf, "original", "output");
break;
case 252: /* int, ubyte - copy <position>, <length> */
r = read_int(delta);
s = read_ubyte(delta);
careful_fseek(orig, r, "original");
copy_data(orig, output, s, buf, "original", "output");
break;
case 253: /* int, ushort - copy <position>, <length> */
r = read_int(delta);
s = read_ushort(delta);
careful_fseek(orig, r, "original");
copy_data(orig, output, s, buf, "original", "output");
break;
case 254: /* int, int - copy <position>, <length> */
r = read_int(delta);
s = read_int(delta);
careful_fseek(orig, r, "original");
copy_data(orig, output, s, buf, "original", "output");
break;
case 255: /* long, int - copy <position>, <length> */
/* TODO - 64 seeking */
assert(0);
break;
default: assert(0);
}
}
}
# vi:ts=4 sw=4 expandtab:
( run in 1.635 second using v1.01-cache-2.11-cpan-0068ddc7af1 )