1 files changed, 164 insertions, 0 deletions
diff --git a/src/crunch.c b/src/crunch.c
new file mode 100644
index 0000000..0cc8092
--- /dev/null
+++ b/src/crunch.c
@@ -0,0 +1,164 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "u816.h"
+#include "crunch.h"
+#include "self.h"
+
+#define INITIAL_BITS 9
+#define MAX_BITS 12
+#define MAX_TOKENS (1 << MAX_BITS)
+#define TOK_RESET 256
+#define TOK_END 257
+#define INIT_TOKEN 258
+
+u8 input_buf[MAX_INPUT_SIZE];
+u8 output_buf[MAX_INPUT_SIZE];
+unsigned int input_len, output_len, out_bitpos;
+
+u8 byte_tokens[256];
+typedef struct s_token {
+	u8 *start;
+	u16 length;
+} token_t;
+
+token_t tokentab[MAX_TOKENS];
+
+int token_bits;
+int max_token;
+int curr_token;
+int in_pos;
+
+void init_table(void) {
+	int i;
+
+	memset(tokentab, 0, sizeof(tokentab));
+
+	token_bits = INITIAL_BITS;
+	max_token = 1 << INITIAL_BITS;
+	curr_token = INIT_TOKEN;
+
+	for(i = 0; i < 256; i++) {
+		byte_tokens[i] = (u8)i;
+		tokentab[i].start = &byte_tokens[i];
+		tokentab[i].length = 1;
+	}
+}
+
+void inc_output_len(void) {
+	if(++output_len == MAX_INPUT_SIZE) {
+		fprintf(stderr, "%s: fatal: compressed file would be >16MB.\n", self);
+		exit(1);
+	}
+}
+
+void append_bit(int bit) {
+	output_buf[output_len] |= (bit << (7 - out_bitpos));
+	out_bitpos++;
+	if(out_bitpos == 8) {
+		out_bitpos = 0;
+		inc_output_len();
+	}
+}
+
+void store_token(int tok) {
+	int mask;
+
+	for(mask = 1 << (token_bits - 1); mask; mask >>= 1) {
+		append_bit(tok & mask ? 1 : 0);
+	}
+}
+
+/* match_token() is a brute-force search, which is why alf is so slow.
+   I'll do something smarter at some point.
+   search backwards, the tokens are stored with longer ones later
+   in the list. */
+int match_token(int pos) {
+	int i, len, maxlen;
+	token_t *t;
+	u8 *p, *q;
+
+	maxlen = input_len - pos;
+
+	for(i = curr_token - 1; i >= INIT_TOKEN; i--) {
+		t = &tokentab[i];
+
+		/* don't search past the end of the input */
+		if(t->length > maxlen) continue;
+
+		/* if the first char doesn't match, don't bother with memcmp.
+		   this is a 5x speedup (!) */
+		if(input_buf[pos] != *(t->start)) continue;
+
+		/* this is where alf spends most of its time.
+			using memcmp is noticeably slower than the code below. */
+		/*
+		if(memcmp(&input_buf[pos], t->start, t->length) == 0)
+			return i;
+		*/
+
+		/* inline memcmp replacement of sorts. I don't think it's really
+		   faster than memcmp(), it only seems that way because there's
+		   no function call overhead. ~20% speedup.
+		   making it search backwards gives a further ~25% speedup.
+		 */
+		len = t->length;
+		p = &input_buf[pos] + len - 1;
+		q = t->start + len - 1;
+		while(len) {
+			if(*p != *q) break;
+			p--; q--;
+			len--;
+		}
+		if(!len) return i;
+	}
+
+	/* hard-coded single character tokens map to their values, no need
+	   to search. */
+	return input_buf[pos];
+}
+
+void make_token(int start, int end) {
+	/* if the token table is full, reset it. basically start over like
+	   we would with a new file. */
+	if(curr_token == max_token) {
+		if(token_bits == MAX_BITS) {
+			store_token(TOK_RESET); /* stored at the *old* token size! */
+			token_bits = INITIAL_BITS;
+			init_table();
+			return; /* since we're starting over, *don't* make a token */
+		} else {
+			token_bits++;
+		}
+		max_token = 1 << token_bits;
+	}
+	tokentab[curr_token].start = &input_buf[start];
+	tokentab[curr_token].length = end - start + 1;
+	curr_token++;
+}
+
+void crunch(void) {
+	int new_pos;
+	int token;
+
+	init_table();
+	out_bitpos = 0;
+	in_pos = 0;
+
+	/* 0-byte input files don't get a TOK_RESET */
+	if(input_len)
+		store_token(TOK_RESET);
+
+	while(in_pos < input_len) {
+		token = match_token(in_pos);
+		store_token(token);
+		new_pos = in_pos + tokentab[token].length;
+		if(new_pos < input_len)
+			make_token(in_pos, new_pos);
+		in_pos = new_pos;
+	}
+
+	store_token(TOK_END);
+	if(out_bitpos) inc_output_len();
+}
+