/* Algorithm::GDiffDelta implementation. * * Copyright (C) 2003 Davide Libenzi (code derived from libxdiff) * Copyright 2004, Geoff Richards * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifdef __cplusplus extern "C" { #endif #include "EXTERN.h" #include "perl.h" #include "XSUB.h" #ifdef __cplusplus } #endif #include #include #include #include #undef assert #include #define QEF_BUFSZ 8192 #if 0 #define QEF_DEBUG_IO #endif #include "util.c" static void careful_fread (void *ptr, size_t size, SV *f, const char *from) { #ifdef QEF_DEBUG_IO fprintf(stderr, "read from %p (%s): %u bytes ->%p\n", (void *) f, from, (unsigned int) size, ptr); #endif if (sv_isobject(f)) { I32 n; SV *ret, *buf; STRLEN len; char *str; dSP; ENTER; SAVETMPS; PUSHMARK(SP); XPUSHs(f); /* TODO - possibly use newSVpvn_share to avoid the memcpy * and extra allocation for buf? */ XPUSHs(sv_2mortal(buf = newSVpvn("", 0))); XPUSHs(sv_2mortal(newSVuv(size))); PUTBACK; n = call_method("read", G_SCALAR); assert(n == 0 || n == 1); SPAGAIN; ret = n ? POPs : &PL_sv_undef; if (!SvOK(ret)) croak("error reading from %s: %s", from, SvPV_nolen(get_sv("!", FALSE))); if (SvUV(ret) != size) croak("%s ends unexpectedly", from); if (!SvPOK(buf) || SvCUR(buf) != size) croak("'read' method left buffer badly set up", from); str = SvPV(buf, len); assert(len == size); memcpy(ptr, str, size); PUTBACK; FREETMPS; LEAVE; } else { int r = PerlIO_read(IoIFP(sv_2io(f)), ptr, size); if (r < 0) croak("error reading from %s: %s", from, strerror(errno)); else if ((size_t) r != size) croak("%s ends unexpectedly", from); } } static void careful_fwrite (const void *ptr, size_t size, SV *f, const char *to) { I32 n; SV *ret; #ifdef QEF_DEBUG_IO fprintf(stderr, "write to %p (%s): %u bytes <-%p\n", (void *) f, to, (unsigned int) size, ptr); #endif if (sv_isobject(f)) { dSP; ENTER; SAVETMPS; PUSHMARK(SP); XPUSHs(f); XPUSHs(sv_2mortal(newSVpvn(ptr, size))); XPUSHs(sv_2mortal(newSVuv(size))); PUTBACK; n = call_method("write", G_SCALAR); assert(n == 0 || n == 1); SPAGAIN; ret = n ? POPs : &PL_sv_no; n = SvTRUE(ret); PUTBACK; FREETMPS; LEAVE; if (!n) croak("error writing to %s: %s", to, SvPV_nolen(get_sv("!", FALSE))); } else { if ((size_t) PerlIO_write(IoIFP(sv_2io(f)), ptr, size) != size) croak("error writing to %s: %s", to, strerror(errno)); } } static void careful_fseek_whence (SV *f, Off_t offset, const char *from, int whence) { assert(whence == SEEK_SET || whence == SEEK_CUR || whence == SEEK_END); #ifdef QEF_DEBUG_IO fprintf(stderr, "seek %p (%s): %s %u\n", (void *) f, from, (whence == SEEK_SET ? "SEEK_SET" : whence == SEEK_CUR ? "SEEK_CUR" : "SEEK_END"), (unsigned int) offset); #endif if (sv_isobject(f)) { I32 n; SV *ret; dSP; ENTER; SAVETMPS; PUSHMARK(SP); XPUSHs(f); XPUSHs(sv_2mortal(newSVuv(offset))); XPUSHs(sv_2mortal(newSVuv(whence))); PUTBACK; n = call_method("seek", G_SCALAR); assert(n == 0 || n == 1); SPAGAIN; ret = n ? POPs : &PL_sv_undef; n = SvTRUE(ret); PUTBACK; FREETMPS; LEAVE; if (!n) croak("error seeking in %s: %s", from, SvPV_nolen(get_sv("!", FALSE))); } else { if (PerlIO_seek(IoIFP(sv_2io(f)), offset, whence)) croak("error seeking in %s: %s", from, strerror(errno)); } } QEF_INLINE static void careful_fseek (SV *f, Off_t offset, const char *from) { careful_fseek_whence(f, offset, from, SEEK_SET); } static Off_t careful_ftell (SV *f, const char *from) { Off_t offset; if (sv_isobject(f)) { I32 n; SV *ret; dSP; ENTER; SAVETMPS; PUSHMARK(SP); XPUSHs(f); PUTBACK; n = call_method("tell", G_SCALAR); assert(n == 0 || n == 1); SPAGAIN; offset = (Off_t) -1; if (n) { ret = POPs; if (SvOK(ret)) offset = SvUV(ret); } PUTBACK; FREETMPS; LEAVE; if (offset == (Off_t) -1) croak("error getting position in %s: %s", from, SvPV_nolen(get_sv("!", FALSE))); } else { offset = PerlIO_tell(IoIFP(sv_2io(f))); if (offset == (Off_t) -1) croak("error getting position in %s: %s", from, strerror(errno)); } return offset; } QEF_INLINE static size_t read_ubyte (SV *f) { unsigned char buf; careful_fread(&buf, 1, f, "delta"); return buf; } QEF_INLINE static size_t read_ushort (SV *f) { unsigned char buf[2]; careful_fread(buf, 2, f, "delta"); return buf[0] * 0x100 + buf[1]; } QEF_INLINE static size_t read_int (SV *f) { unsigned char buf[4]; careful_fread(buf, 4, f, "delta"); if (buf[0] >= 0x7F) croak("delta contains negative int value"); return buf[0] * 0x1000000 + buf[1] * 0x10000 + buf[2] * 0x100 + buf[3]; } /* The buffer is supplied by the parent so that we can avoid allocating * it on the stack every time this is called, even though that probably * wouldn't be very expensive in most implementations. */ static void copy_data (SV *in, SV *out, size_t num_bytes, unsigned char *buf, const char *from, const char *to) { assert(buf); while (num_bytes >= QEF_BUFSZ) { careful_fread(buf, QEF_BUFSZ, in, from); careful_fwrite(buf, QEF_BUFSZ, out, to); num_bytes -= QEF_BUFSZ; } careful_fread(buf, num_bytes, in, from); careful_fwrite(buf, num_bytes, out, to); } /* Work out the size of the file by seeking to the end. */ static Off_t file_size (SV *f, const char *from) { careful_fseek_whence(f, 0, from, SEEK_END); return careful_ftell(f, from); } /* largest prime smaller than 65536 */ #define QEF_BASE 65521L /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ #define QEF_NMAX 5552 #define QEF_DO1(buf, i) { s1 += buf[i]; s2 += s1; } #define QEF_DO2(buf, i) QEF_DO1(buf, i); QEF_DO1(buf, i + 1); #define QEF_DO4(buf, i) QEF_DO2(buf, i); QEF_DO2(buf, i + 2); #define QEF_DO8(buf, i) QEF_DO4(buf, i); QEF_DO4(buf, i + 4); #define QEF_DO16(buf) QEF_DO8(buf, 0); QEF_DO8(buf, 8); static U32 adler32(U32 adler, const unsigned char *buf, size_t len) { int k; U32 s1 = adler & 0xffff; U32 s2 = (adler >> 16) & 0xffff; assert(buf); while (len > 0) { k = len < QEF_NMAX ? len : QEF_NMAX; len -= k; while (k >= 16) { QEF_DO16(buf); buf += 16; k -= 16; } if (k != 0) do { s1 += *buf++; s2 += s1; } while (--k); s1 %= QEF_BASE; s2 %= QEF_BASE; } return (s2 << 16) | s1; } static U32 adler32_file (SV *f, U32 adler, Off_t offset, Off_t size, const char *from) { unsigned char buf[QEF_BUFSZ]; Off_t chunk_sz; careful_fseek(f, offset, from); while (size > 0) { chunk_sz = QEF_MIN(QEF_BUFSZ, size); careful_fread(buf, chunk_sz, f, from); adler = adler32(adler, buf, chunk_sz); size -= chunk_sz; } return adler; } static void prepare_bdfile (SV *orig, Off_t orig_size, QefBDFile *bdf) { unsigned int fphbits; Off_t hsize, offset; U32 i; QefBDRecord *brec; QefBDRecord **fphash; fphbits = hashbits(orig_size / QEF_BLK_SIZE + 1); hsize = (Off_t) 1 << fphbits; New(0, fphash, hsize, QefBDRecord *); for (i = 0; i < hsize; ++i) fphash[i] = 0; qef_cha_init(&bdf->cha, sizeof(QefBDRecord), hsize / 4 + 1); if (orig_size == 0) bdf->size = 0; else { offset = 0; bdf->size = orig_size; /* Start by looking at the last (possibly incomplete) block */ if ((offset = (orig_size / QEF_BLK_SIZE) * QEF_BLK_SIZE) == orig_size) offset -= QEF_BLK_SIZE; while (1) { brec = qef_cha_alloc(&bdf->cha); brec->fp = adler32_file(orig, 0, offset, QEF_MIN(QEF_BLK_SIZE, orig_size - offset), "original"); brec->offset = offset; i = QEF_HASHLONG(brec->fp, fphbits); brec->next = fphash[i]; fphash[i] = brec; if (offset < QEF_BLK_SIZE) break; offset -= QEF_BLK_SIZE; } assert(offset == 0); } bdf->fphbits = fphbits; bdf->fphash = fphash; } /* Output a GDIFF DATA operation. */ static void data_op (SV *changed, SV *delta, Off_t offset, Off_t size, unsigned char *buf) { size_t headsz = 0; assert(size > 0); assert(size <= QEF_INT_MAX); assert(buf); if (size <= 246) buf[headsz++] = size; else if (size <= QEF_USHORT_MAX) { buf[headsz++] = 247; QEF_BE16_PUT(buf, headsz, size); } else { buf[headsz++] = 248; QEF_BE32_PUT(buf, headsz, size); } /* Write the opcode and size argument (if any). */ careful_fwrite(buf, headsz, delta, "delta"); /* Copy the actual data to be inserted into the delta. */ careful_fseek(changed, offset, "changed file"); copy_data(changed, delta, size, buf, "changed file", "delta"); } /* Output a GDIFF COPY operation. */ static void copy_op (SV *delta, Off_t offset, Off_t size) { unsigned char buf[QEF_COPY_MAX]; size_t headsz = 0; assert(size > 0); if (offset <= QEF_USHORT_MAX) { if (size <= QEF_UBYTE_MAX) { buf[headsz++] = 249; QEF_BE16_PUT(buf, headsz, offset); buf[headsz++] = size; } else if (size <= QEF_USHORT_MAX) { buf[headsz++] = 250; QEF_BE16_PUT(buf, headsz, offset); QEF_BE16_PUT(buf, headsz, size); } else if (size <= QEF_INT_MAX) { buf[headsz++] = 251; QEF_BE16_PUT(buf, headsz, offset); QEF_BE32_PUT(buf, headsz, size); } else { /* TODO - break copy ops for bigger than 2Gb into smaller ones */ assert(0); } } else if (offset <= QEF_INT_MAX) { if (size <= QEF_UBYTE_MAX) { buf[headsz++] = 252; QEF_BE32_PUT(buf, headsz, offset); buf[headsz++] = size; } else if (size <= QEF_USHORT_MAX) { buf[headsz++] = 253; QEF_BE32_PUT(buf, headsz, offset); QEF_BE16_PUT(buf, headsz, size); } else if (size <= QEF_INT_MAX) { buf[headsz++] = 254; QEF_BE32_PUT(buf, headsz, offset); QEF_BE32_PUT(buf, headsz, size); } else { /* TODO - break copy ops for bigger than 2Gb into smaller ones */ assert(0); } } else { /* TODO - allow 64bit offsets */ assert(0); } /* Write the opcode and arguments. */ careful_fwrite(buf, headsz, delta, "delta"); } static void do_delta (SV *orig, SV *changed, SV *delta) { U32 fp; Off_t orig_size, changed_size, changed_offset; Off_t rsize, msize, newmsize; Off_t off1, off2, moff, newmoff; QefBDFile bdf; QefBDRecord *brec; unsigned char insbuf[QEF_BUFSZ], buf1[QEF_BUFSZ], buf2[QEF_BUFSZ]; size_t pos, sz1, sz2, minsz; Off_t ins_offset, ins_size; int stop; changed_size = file_size(changed, "changed file"); if (changed_size == 0) return; orig_size = file_size(orig, "original"); prepare_bdfile(orig, orig_size, &bdf); ins_size = 0; changed_offset = 0; while (changed_offset < changed_size) { rsize = QEF_MIN(QEF_BLK_SIZE, changed_size - changed_offset); fp = adler32_file(changed, 0, changed_offset, rsize, "changed file"); brec = bdf.fphash[QEF_HASHLONG(fp, bdf.fphbits)]; for (msize = 0; brec; brec = brec->next) { if (brec->fp == fp) { off1 = brec->offset; off2 = changed_offset; newmsize = 0; newmoff = off1; stop = 0; while (!stop) { sz1 = QEF_MIN(QEF_BUFSZ, orig_size - off1); sz2 = QEF_MIN(QEF_BUFSZ, changed_size - off2); minsz = QEF_MIN(sz1, sz2); if (minsz == 0) break; /* TODO - is there a better way to buffer these? */ careful_fseek(orig, off1, "original"); careful_fread(buf1, minsz, orig, "original"); careful_fseek(changed, off2, "changed file"); careful_fread(buf2, minsz, changed, "changed file"); for (pos = 0; pos < minsz; ++pos) { if (buf1[pos] != buf2[pos]) { stop = 1; break; } } newmsize += pos; off1 += minsz; off2 += minsz; } if (newmsize > msize) { moff = newmoff; msize = newmsize; } } } if (msize < QEF_COPY_MIN) { if (!ins_size) ins_offset = changed_offset; ++ins_size; ++changed_offset; if (ins_size == QEF_INT_MAX) { data_op(changed, delta, ins_offset, ins_size, insbuf); ins_size = 0; } } else { if (ins_size) { data_op(changed, delta, ins_offset, ins_size, insbuf); ins_size = 0; } copy_op(delta, moff, msize); changed_offset += msize; } } if (ins_size) { data_op(changed, delta, ins_offset, ins_size, insbuf); ins_size = 0; } } MODULE = Algorithm::GDiffDelta PACKAGE = Algorithm::GDiffDelta PREFIX = qefgdiff_ PROTOTYPES: DISABLE U32 qefgdiff_gdiff_adler32 (U32 init, SV *s) PREINIT: STRLEN len; const unsigned char *buf; CODE: buf = (unsigned char *) SvPV(s, len); RETVAL = adler32(init, buf, len); OUTPUT: RETVAL void qefgdiff_gdiff_delta (SV *orig, SV *changed, SV *delta) CODE: careful_fwrite("\xD1\xFF\xD1\xFF\x04", 5, delta, "delta"); do_delta(orig, changed, delta); careful_fwrite("\0", 1, delta, "delta"); void qefgdiff_gdiff_apply (SV *orig, SV *delta, SV *output) PREINIT: unsigned char buf[QEF_BUFSZ]; size_t r, s; int c; CODE: /* Check that GDIFF header and version number are valid. */ careful_fread(buf, 5, delta, "delta"); if (buf[0] != 0xD1 || buf[1] != 0xFF || buf[2] != 0xD1 || buf[3] != 0xFF) croak("incorrect GDIFF header at start of delta"); if (buf[4] != 4) croak("wrong version of GDIFF format (%d)," " only version 4 understood", (int) buf[4]); while (1) { careful_fread(buf, 1, delta, "delta"); c = buf[0]; if (c == 0) return; else if (c <= 246) { careful_fread(buf, c, delta, "delta"); careful_fwrite(buf, c, output, "output"); } else { switch (c) { case 247: /* ushort, bytes - append data bytes */ s = read_ushort(delta); copy_data(delta, output, s, buf, "delta", "output"); break; case 248: /* int, bytes - append data bytes */ s = read_int(delta); copy_data(delta, output, s, buf, "delta", "output"); break; case 249: /* ushort, ubyte - copy , */ r = read_ushort(delta); s = read_ubyte(delta); careful_fseek(orig, r, "original"); copy_data(orig, output, s, buf, "original", "output"); break; case 250: /* ushort, ushort - copy , */ r = read_ushort(delta); s = read_ushort(delta); careful_fseek(orig, r, "original"); copy_data(orig, output, s, buf, "original", "output"); break; case 251: /* ushort, int - copy , */ r = read_ushort(delta); s = read_int(delta); careful_fseek(orig, r, "original"); copy_data(orig, output, s, buf, "original", "output"); break; case 252: /* int, ubyte - copy , */ r = read_int(delta); s = read_ubyte(delta); careful_fseek(orig, r, "original"); copy_data(orig, output, s, buf, "original", "output"); break; case 253: /* int, ushort - copy , */ r = read_int(delta); s = read_ushort(delta); careful_fseek(orig, r, "original"); copy_data(orig, output, s, buf, "original", "output"); break; case 254: /* int, int - copy , */ r = read_int(delta); s = read_int(delta); careful_fseek(orig, r, "original"); copy_data(orig, output, s, buf, "original", "output"); break; case 255: /* long, int - copy , */ /* TODO - 64 seeking */ assert(0); break; default: assert(0); } } } # vi:ts=4 sw=4 expandtab: