#include #include #include #include "u816.h" #include "crunch.h" #include "self.h" #define INITIAL_BITS 9 #define MAX_BITS 12 #define MAX_TOKENS (1 << MAX_BITS) #define TOK_RESET 256 #define TOK_END 257 #define INIT_TOKEN 258 u8 input_buf[MAX_INPUT_SIZE]; u8 output_buf[MAX_INPUT_SIZE]; unsigned int input_len, output_len, out_bitpos; typedef struct s_token { u8 chr; u16 number; struct s_token *sibling; struct s_token *kids; } token_t; token_t root_tokens[256]; /* not used for lookups, just a fast way to free() everything */ token_t *tokentab[MAX_TOKENS]; int token_bits; int max_token; int curr_token = INIT_TOKEN; int in_pos; int new_pos; void init_table(void) { int i; for(i = INIT_TOKEN; i < curr_token; i++) free(tokentab[i]); for(i = 0; i < 256; i++) { root_tokens[i].chr = root_tokens[i].number = i; root_tokens[i].kids = root_tokens[i].sibling = 0; } token_bits = INITIAL_BITS; max_token = 1 << INITIAL_BITS; curr_token = INIT_TOKEN; } void inc_output_len(void) { if(++output_len == MAX_INPUT_SIZE) { fprintf(stderr, "%s: fatal: compressed file would be >16MB.\n", self); exit(1); } } void append_bit(int bit) { output_buf[output_len] |= (bit << (7 - out_bitpos)); out_bitpos++; if(out_bitpos == 8) { out_bitpos = 0; inc_output_len(); } } void store_token(int tok) { int mask; for(mask = 1 << (token_bits - 1); mask; mask >>= 1) { append_bit(tok & mask ? 1 : 0); } } token_t *get_kid(token_t *t, u8 chr) { token_t *kid; if(!t->kids) return 0; kid = t->kids; while(kid) { if(kid->chr == chr) return kid; kid = kid->sibling; } return 0; } token_t *match_token(int pos) { token_t *t, *p; t = &root_tokens[input_buf[pos]]; new_pos = pos + 1; p = t; while((p = get_kid(t, input_buf[new_pos]))) { t = p; new_pos++; if(new_pos == input_len) break; } return t; } token_t *new_token(u8 chr) { token_t *newtok; newtok = malloc(sizeof(token_t)); if(!newtok) { fprintf(stderr, "%s: fatal: out of memory!\n", self); exit(1); } newtok->chr = chr; newtok->kids = newtok->sibling = 0; newtok->number = curr_token; tokentab[curr_token] = newtok; return newtok; } void add_kid(token_t *oldtok, token_t *newtok) { token_t *kid; if(!oldtok->kids) { oldtok->kids = newtok; return; } kid = oldtok->kids; while(kid->sibling) kid = kid->sibling; kid->sibling = newtok; } void make_token(token_t *oldtok, u8 newchr) { token_t *newtok; /* if the token table is full, reset it. basically start over like we would with a new file. */ if(curr_token == max_token) { if(token_bits == MAX_BITS) { store_token(TOK_RESET); /* stored at the *old* token size! */ token_bits = INITIAL_BITS; init_table(); return; /* since we're starting over, *don't* make a token */ } else { token_bits++; } max_token = 1 << token_bits; } newtok = new_token(newchr); add_kid(oldtok, newtok); curr_token++; } void crunch(void) { token_t *tok; init_table(); out_bitpos = 0; in_pos = new_pos = 0; /* 0-byte input files don't get a TOK_RESET */ if(!input_len) { store_token(TOK_END); return; } store_token(TOK_RESET); while(in_pos < input_len) { tok = match_token(in_pos); store_token(tok->number); if(new_pos < input_len) make_token(tok, input_buf[new_pos]); in_pos = new_pos; } store_token(TOK_END); if(out_bitpos) inc_output_len(); init_table(); }