/*********************************************************** bd.c -- binary diff H.Okumura ***********************************************************/ #include #include #include #include #include #define DEBUG 0 #define THRESHOLD 5 /* minimum match length */ #define PRINTSTEP 100 #if DEBUG #define readable(c) (((c) > ' ') ? (c) : '.') #endif typedef unsigned char *string; FILE *file1, *file2, *file3; string buf1, buf2, buf3; unsigned size1, size2, size3 = 0, matchlen, matchpos, printcount = 0; enum {FALSE, TRUE} afterliteral = FALSE; void Error(char *fmt, ...) { va_list args; va_start(args, fmt); vfprintf(stderr, fmt, args); fprintf(stderr, "\n"); va_end(args); exit(EXIT_FAILURE); } void ReadFile(FILE *file, string *pbuf, unsigned *size) { unsigned i; long lsize; fseek(file, 0L, SEEK_END); lsize = ftell(file); *size = (unsigned) lsize; if (*size != lsize) Error("file too long"); *pbuf = malloc((size_t) *size); if (*pbuf == NULL) Error("out of memory"); rewind(file); for (i = 0; i < *size; i++) (*pbuf)[i] = getc(file); } /* simplified Boyer-Moore string search */ void Search(string pattern, unsigned patlen) { int i, j, k, skip[256]; unsigned len; unsigned char tail; matchlen = 0; len = THRESHOLD; i = len - 1; while (i < size1 && len <= patlen) { tail = pattern[len - 1]; for (j = 0; j < 256; j++) skip[j] = len; k = len - 1; for (j = 0; j < k; j++) skip[pattern[j]] = k - j; while (i < size1) { if (buf1[i] == tail) { j = len - 2; k = i - 1; while (pattern[j] == buf1[k]) { if (j == 0) goto found; j--; k--; } } i += skip[buf1[i]]; } return; found: i++; while (pattern[len] == buf1[i] && len < patlen) { len++; i++; } matchpos = i - len; matchlen = len; len++; i++; } } void OutputUnmatch(unsigned len, unsigned pos) { #if DEBUG { unsigned i; fprintf(stderr, "LIT %u 0x%04X: ", len, pos); for (i = 0; i < len; i++) putc(readable(buf2[pos + i]), stderr); fprintf(stderr, "\n\n"); } #endif size3 += len; if (len <= 127) { putc(len - 1, file3); size3++; } else { putc((len - 128) >> 8, file3); putc((len - 128) & 255, file3); size3 += 2; } while (len-- != 0) { putc(buf2[pos], file3); pos++; } afterliteral = TRUE; } void OutputMatch(void) { #if DEBUG { unsigned i; fprintf(stderr, "COPY %u 0x%04X: ", matchlen, matchpos); for (i = 0; i < matchlen; i++) putc(readable(buf1[matchpos + i]), stderr); fprintf(stderr, "\n\n"); } #endif if (afterliteral) { if (matchlen <= 254 + THRESHOLD) { putc(matchlen - THRESHOLD, file3); size3 += 3; } else { putc(255, file3); putc((matchlen - (THRESHOLD + 255)) >> 8, file3); putc((matchlen - (THRESHOLD + 255)) & 255, file3); size3 += 5; } afterliteral = FALSE; } else { if (matchlen <= 126 + THRESHOLD) { putc(matchlen + (128 - THRESHOLD), file3); size3 += 3; } else { putc(255, file3); putc((matchlen - (THRESHOLD + 127)) >> 8, file3); putc((matchlen - (THRESHOLD + 127)) & 255, file3); size3 += 5; } } putc(matchpos >> 8, file3); putc(matchpos & 255, file3); } void Encode(void) { unsigned pos = 0, unmatchlen = 0, unmatchpos = 0; unsigned long cr; ReadFile(file1, &buf1, &size1); ReadFile(file2, &buf2, &size2); if ((buf3 = malloc(size2)) == NULL) Error("out of memory"); while (pos < size2) { Search(buf2 + pos, size2 - pos); if (matchlen >= THRESHOLD) { if (unmatchlen != 0) { OutputUnmatch(unmatchlen, unmatchpos); unmatchlen = 0; } OutputMatch(); pos += matchlen; } else { if (unmatchlen == 0) unmatchpos = pos; buf3[unmatchlen++] = buf2[pos++]; } if (pos > printcount) { printf("%12u\r", pos); printcount += PRINTSTEP; } } if (unmatchlen != 0) OutputUnmatch(unmatchlen, unmatchpos); printf("old: %u bytes\n", size1); printf("new: %u bytes\n", size2); printf("dif: %u bytes\n", size3); cr = (1000UL * size3 + size2 / 2) / size2; printf("dif/new: %1lu.%03lu\n", cr / 1000, cr % 1000); } void Decode(void) { int c; unsigned len, pos; ReadFile(file1, &buf1, &size1); while ((c = getc(file2)) != EOF) { if (afterliteral) { /* copy */ afterliteral = FALSE; if (c <= 254) len = c + THRESHOLD; else { len = getc(file2) << 8; len += getc(file2) + (255 + THRESHOLD); } pos = getc(file2) << 8; pos += getc(file2); size3 += len; while (len-- != 0) { putc(buf1[pos], file3); pos++; } } else if (c <= 127) { /* literal */ if (c <= 126) len = c + 1; else { len = getc(file2) << 8; len += getc(file2) + 128; } size3 += len; while (len-- != 0) { c = getc(file2); putc(c, file3); } afterliteral = TRUE; } else { /* copy */ if (c <= 254) len = c - (128 - THRESHOLD); else { len = getc(file2) << 8; len += getc(file2) + (127 + THRESHOLD); } pos = getc(file2) << 8; pos += getc(file2); size3 += len; while (len-- != 0) { putc(buf1[pos], file3); pos++; } } if (size3 > printcount) { printf("%12u\r", size3); printcount += PRINTSTEP; } } printf("%12u\n", size3); } int main(int argc, char *argv[]) { char *s; if (argc != 5 || strpbrk(argv[1], "DEde") == NULL) Error("To encode: bd e oldfile newfile diffile\n" "To decode: bd d oldfile diffile newfile"); if ((s = argv[2], (file1 = fopen(s, "rb")) == NULL) || (s = argv[3], (file2 = fopen(s, "rb")) == NULL) || (s = argv[4], (file3 = fopen(s, "wb")) == NULL)) Error("can't open %s", s); if (toupper(*argv[1]) == 'E') Encode(); else Decode(); fclose(file1); fclose(file2); fclose(file3); return EXIT_SUCCESS; }