#include #include #include #include "u816.h" #include "crunch.h" #include "self.h" #define INITIAL_BITS 9 #define MAX_BITS 12 #define MAX_TOKENS (1 << MAX_BITS) #define TOK_RESET 256 #define TOK_END 257 #define INIT_TOKEN 258 u8 input_buf[MAX_INPUT_SIZE]; u8 output_buf[MAX_INPUT_SIZE]; unsigned int input_len, output_len, out_bitpos; u8 byte_tokens[256]; typedef struct s_token { u8 *start; u16 length; } token_t; token_t tokentab[MAX_TOKENS]; int token_bits; int max_token; int curr_token; int in_pos; void init_table(void) { int i; memset(tokentab, 0, sizeof(tokentab)); token_bits = INITIAL_BITS; max_token = 1 << INITIAL_BITS; curr_token = INIT_TOKEN; for(i = 0; i < 256; i++) { byte_tokens[i] = (u8)i; tokentab[i].start = &byte_tokens[i]; tokentab[i].length = 1; } } void inc_output_len(void) { if(++output_len == MAX_INPUT_SIZE) { fprintf(stderr, "%s: fatal: compressed file would be >16MB.\n", self); exit(1); } } void append_bit(int bit) { output_buf[output_len] |= (bit << (7 - out_bitpos)); out_bitpos++; if(out_bitpos == 8) { out_bitpos = 0; inc_output_len(); } } void store_token(int tok) { int mask; for(mask = 1 << (token_bits - 1); mask; mask >>= 1) { append_bit(tok & mask ? 1 : 0); } } /* match_token() is a brute-force search, which is why alf is so slow. I'll do something smarter at some point. search backwards, the tokens are stored with longer ones later in the list. */ int match_token(int pos) { int i, len, maxlen; token_t *t; u8 *p, *q; maxlen = input_len - pos; for(i = curr_token - 1; i >= INIT_TOKEN; i--) { t = &tokentab[i]; /* don't search past the end of the input */ if(t->length > maxlen) continue; /* if the first char doesn't match, don't bother with memcmp. this is a 5x speedup (!) */ if(input_buf[pos] != *(t->start)) continue; /* this is where alf spends most of its time. using memcmp is noticeably slower than the code below. */ /* if(memcmp(&input_buf[pos], t->start, t->length) == 0) return i; */ /* inline memcmp replacement of sorts. I don't think it's really faster than memcmp(), it only seems that way because there's no function call overhead. ~20% speedup. making it search backwards gives a further ~25% speedup. */ len = t->length; p = &input_buf[pos] + len - 1; q = t->start + len - 1; while(len) { if(*p != *q) break; p--; q--; len--; } if(!len) return i; } /* hard-coded single character tokens map to their values, no need to search. */ return input_buf[pos]; } void make_token(int start, int end) { /* if the token table is full, reset it. basically start over like we would with a new file. */ if(curr_token == max_token) { if(token_bits == MAX_BITS) { store_token(TOK_RESET); /* stored at the *old* token size! */ token_bits = INITIAL_BITS; init_table(); return; /* since we're starting over, *don't* make a token */ } else { token_bits++; } max_token = 1 << token_bits; } tokentab[curr_token].start = &input_buf[start]; tokentab[curr_token].length = end - start + 1; curr_token++; } void crunch(void) { int new_pos; int token; init_table(); out_bitpos = 0; in_pos = 0; /* 0-byte input files don't get a TOK_RESET */ if(input_len) store_token(TOK_RESET); while(in_pos < input_len) { token = match_token(in_pos); store_token(token); new_pos = in_pos + tokentab[token].length; if(new_pos < input_len) make_token(in_pos, new_pos); in_pos = new_pos; } store_token(TOK_END); if(out_bitpos) inc_output_len(); }