/*********************************************************** brent.c -- data compression a la Brent ***********************************************************/ #include #define TRUE 1 #define FALSE 0 #define HASHMAX 12345 /* odd number */ int debug; unsigned int pos[HASHMAX]; unsigned char len[HASHMAX]; void output1(unsigned char c) { printf("%c", c); if (debug) printf("\n"); } void output2(int offset, int matchlen) { printf("(%d,%d)", offset, matchlen); if (debug) printf("\n"); } void encode(int n, unsigned char buffer[]) /* Encode buffer[1..n]. */ { int h, h1, i, j, j1, k, k1, m, m1, found; long b; j = 0; while (j < n) { m = 2; m1 = 0; h = buffer[j]; do { /* Look up buffer[j...j+m-1] in hash table. Represent the matching entry in hash table by (k, m). */ h = (int)((256L * h + buffer[j + m - 1]) % HASHMAX); if (len[h] == m) { k = pos[h]; for (i = 0; i < m; i++) if (buffer[k + i] != buffer[j + i]) break; found = (i == m); } else found = FALSE; if (debug) printf("[1] %.*s %sfound\n", m, &buffer[j], (found) ? "" : "not "); if (found) { /* Save the best match found so far. */ k1 = k; m1 = m; /* Replace the representation of the matching entry in hash table by (j, m). */ pos[h] = j; if (k + m <= n) { /* Enter buffer[k...k+m] in hash table (overwriting any matching entry). */ h1 = (int)((256L * h + buffer[k + m]) % HASHMAX); pos[h1] = k; len[h1] = m + 1; if (debug) printf("[1] %.*s entered\n", m + 1, &buffer[k]); } m++; } else { /* Enter buffer[j...j+m-1] in hash table. */ pos[h] = j; len[h] = m; if (debug) printf("[1] %.*s entered\n", m, &buffer[j]); } } while (found && j + m <= n); if (m1 >= 2) { output2(j - k1 - 1, m1); if (j + m1 < n - 1) { /* Enter substrings of maximal match in hash table. */ h = buffer[j + m1]; b = 1L; m = 2; for (j1 = j + m1 - 1; j1 >= j + 1; j1--) { /* Look up buffer[j1...j+m1+1] in hash table. */ b = (b * 256L) % HASHMAX; h = (int)((b * buffer[j1] + h) % HASHMAX); if (len[h] == m) { k = pos[h]; for (i = 0; i < m; i++) if (buffer[k + i] != buffer[j1 + i]) break; found = (i == m); } else found = FALSE; if (debug) printf("[2] %.*s %sfound\n", m, &buffer[j1], (found) ? "" : "not "); if (found) { pos[h] = j1; } else { /* Enter buffer[j1...j+m1+1] in hash table. */ pos[h] = j1; len[h] = m; if (debug) printf("[2] %.*s entered\n", m, &buffer[j1]); } m++; } } j += m1; } else { output1(buffer[j]); j++; } } } #include #include int main(int argc, char *argv[]) { if (argc == 2) debug = 0; else if (argc == 3 && argv[2][0] == '-' && (argv[2][1] == 'D' || argv[2][1] == 'd')) debug = 1; else { printf("Usage: brent somestring [-d]\n -d: debug\n"); return EXIT_FAILURE; } encode(strlen(argv[1]), (unsigned char *)argv[1]); printf("\n"); return EXIT_SUCCESS; }