diff options
| author | B. Watson <urchlay@slackware.uk> | 2025-11-30 17:17:46 -0500 |
|---|---|---|
| committer | B. Watson <urchlay@slackware.uk> | 2025-11-30 17:17:46 -0500 |
| commit | 2b21c11d156e5c76c550b61a3b3a33a89d8578f9 (patch) | |
| tree | df06d5bf78486717b34949971131b6d16044e4d3 /src/alf.c | |
| parent | be8cd823dbd9b27889421bb1dc70217d39752663 (diff) | |
| download | alftools-2b21c11d156e5c76c550b61a3b3a33a89d8578f9.tar.gz | |
alf: isolate crunch algo in its own file.
Diffstat (limited to 'src/alf.c')
| -rw-r--r-- | src/alf.c | 173 |
1 files changed, 3 insertions, 170 deletions
@@ -10,20 +10,7 @@ #include "u816.h" #include "sanity.h" #include "self.h" - -#define INITIAL_BITS 9 -#define MAX_BITS 12 -#define MAX_TOKENS (1 << MAX_BITS) -#define TOK_RESET 256 -#define TOK_END 257 -#define INIT_TOKEN 258 /* 256 = reset dict, 257 = token_bits++ */ - -#define MAX_INPUT_SIZE (1 << 24) - -u8 input_buf[MAX_INPUT_SIZE]; -u8 output_buf[MAX_INPUT_SIZE]; -u8 byte_tokens[256]; -unsigned int input_len, output_len, out_bitpos; +#include "crunch.h" int opt_append = 0; int opt_overwrite = 0; @@ -34,37 +21,10 @@ int opt_txtconv = 0; int opt_verbose = 0; struct stat in_file_stat; - -typedef struct s_token { - u8 *start; - u16 length; -} token_t; - -token_t tokentab[MAX_TOKENS]; +long hdr_compsize_pos; FILE *out_file, *in_file; const char *out_filename, *in_filename; -int token_bits; -int max_token; -int curr_token; -long hdr_compsize_pos; -int in_pos; - -void init_table(void) { - int i; - - memset(tokentab, 0, sizeof(tokentab)); - - token_bits = INITIAL_BITS; - max_token = 1 << INITIAL_BITS; - curr_token = INIT_TOKEN; - - for(i = 0; i < 256; i++) { - byte_tokens[i] = (u8)i; - tokentab[i].start = &byte_tokens[i]; - tokentab[i].length = 1; - } -} void store_quad(int pos, unsigned long data) { int i; @@ -185,132 +145,6 @@ void open_input(const char *filename) { } } -void inc_output_len(void) { - if(++output_len == MAX_INPUT_SIZE) { - fprintf(stderr, "%s: fatal: compressed file would be >16MB.\n", self); - exit(1); - } -} - -void append_bit(int bit) { - output_buf[output_len] |= (bit << (7 - out_bitpos)); - out_bitpos++; - if(out_bitpos == 8) { - out_bitpos = 0; - inc_output_len(); - } -} - -void store_token(int tok) { - int mask; - - for(mask = 1 << (token_bits - 1); mask; mask >>= 1) { - append_bit(tok & mask ? 1 : 0); - } -} - -/* match_token() is a brute-force search, which is why alf is so slow. - I'll do something smarter at some point. - search backwards, the tokens are stored with longer ones later - in the list. */ -int match_token(int pos) { - int i, len, maxlen; - token_t *t; - u8 *p, *q; - - maxlen = input_len - pos; - - for(i = curr_token - 1; i >= INIT_TOKEN; i--) { - t = &tokentab[i]; - - /* don't search past the end of the input */ - if(t->length > maxlen) continue; - - /* if the first char doesn't match, don't bother with memcmp. - this is a 5x speedup (!) */ - if(input_buf[pos] != *(t->start)) continue; - - /* this is where alf spends most of its time. - using memcmp is noticeably slower than the code below. */ - /* - if(memcmp(&input_buf[pos], t->start, t->length) == 0) - return i; - */ - - /* inline memcmp replacement of sorts. I don't think it's really - faster than memcmp(), it only seems that way because there's - no function call overhead. ~20% speedup. - making it search backwards gives a further ~25% speedup. - */ - len = t->length; - p = &input_buf[pos] + len - 1; - q = t->start + len - 1; - while(len) { - if(*p != *q) break; - p--; q--; - len--; - } - if(!len) return i; - } - - /* hard-coded single character tokens map to their values, no need - to search. */ - return input_buf[pos]; -} - -void make_token(int start, int end) { - int i; - /* if the token table is full, reset it. basically start over like - we would with a new file. */ - if(curr_token == max_token) { - if(token_bits == MAX_BITS) { - store_token(TOK_RESET); /* stored at the *old* token size! */ - token_bits = INITIAL_BITS; - init_table(); - return; /* since we're starting over, *don't* make a token */ - } else { - token_bits++; - } - max_token = 1 << token_bits; - } - tokentab[curr_token].start = &input_buf[start]; - tokentab[curr_token].length = end - start + 1; - curr_token++; - - if(opt_verbose) { - printf("%d -> %d (%d) = ", start, end, end - start + 1); - for(i = start; i < end; i++) - putchar(input_buf[i]); - putchar('\n'); - } -} - -void crunch(void) { - int new_pos; - in_pos = 0; - int token; - - out_bitpos = 0; - - /* 0-byte input files don't get a TOK_RESET */ - if(input_len) - store_token(TOK_RESET); - - while(in_pos < input_len) { - if(opt_verbose) printf("in_pos==%d\n", in_pos); - token = match_token(in_pos); - store_token(token); - new_pos = in_pos + tokentab[token].length; - if(new_pos < input_len) - make_token(in_pos, new_pos); - in_pos = new_pos; - } - - store_token(TOK_END); - if(out_bitpos) inc_output_len(); - update_header(); -} - void make_backup(void) { char bak[PATH_MAX + 2]; strncpy(bak, out_filename, PATH_MAX); @@ -330,8 +164,6 @@ void convert_eols(void) { } void crunch_file(const char *filename) { - init_table(); - open_input(filename); /* read in entire input, couldn't do it this way on the Atari */ @@ -354,6 +186,7 @@ void crunch_file(const char *filename) { /* crunches the entire input to memory! */ crunch(); + update_header(); /* don't open the output file until crunch() has succeeded once. this avoids leaving 0-byte turds */ |
