diff options
Diffstat (limited to 'src/alf.c')
| -rw-r--r-- | src/alf.c | 441 |
1 files changed, 0 insertions, 441 deletions
diff --git a/src/alf.c b/src/alf.c deleted file mode 100644 index fb79fcf..0000000 --- a/src/alf.c +++ /dev/null @@ -1,441 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <ctype.h> -#include <unistd.h> -#include <limits.h> -#include <sys/stat.h> -#include <time.h> - -#include "self.h" - -#ifndef u8 -#define u8 unsigned char -#define u16 unsigned short -#endif - -#define INITIAL_BITS 9 -#define MAX_BITS 12 -#define MAX_TOKENS (1 << MAX_BITS) -#define TOK_RESET 256 -#define TOK_END 257 -#define INIT_TOKEN 258 /* 256 = reset dict, 257 = token_bits++ */ - -#define MAX_INPUT_SIZE (1 << 24) - -u8 input_buf[MAX_INPUT_SIZE]; -u8 output_buf[MAX_INPUT_SIZE]; -u8 byte_tokens[256]; -unsigned int input_len, output_len, out_bitpos; - -int opt_append = 0; -int opt_overwrite = 0; -int opt_zerotime = 0; -int opt_alftime = 0; -int opt_gmtime = 0; -int opt_txtconv = 0; - -struct stat in_file_stat; - -typedef struct s_token { - u8 *start; - u16 length; -} token_t; - -token_t tokentab[MAX_TOKENS]; - -FILE *out_file, *in_file; -const char *out_filename, *in_filename; -int token_bits; -int max_token; -int curr_token; -long hdr_compsize_pos; -int in_pos; - -void init_table(void) { - int i; - - memset(tokentab, 0, sizeof(tokentab)); - - token_bits = INITIAL_BITS; - max_token = 1 << INITIAL_BITS; - curr_token = INIT_TOKEN; - - for(i = 0; i < 256; i++) { - byte_tokens[i] = (u8)i; - tokentab[i].start = &byte_tokens[i]; - tokentab[i].length = 1; - } -} - -void store_quad(int pos, unsigned long data) { - int i; - - for(i = 0; i < 4; i++) { - output_buf[pos++] = data & 0xff; - data >>= 8; - } -} - -void store_cksum(void) { - int i; - u16 cksum = 0; - - for(i = 0; i < input_len; i++) - cksum += input_buf[i]; - - output_buf[23] = cksum & 0xff; - output_buf[24] = cksum >> 8; -} - -/* examples: - foo => FOO. - toolongfile => TOOLONGF. (really should be TOOLONGF.ILE) - regular.txt => REGULAR.TXT - too.many.dots => TOO.MAN - */ -void atarify_filename(char *result) { - int i; - char name[9] = { 0 }, ext[4] = { 0 }, *p; - - p = strrchr(in_filename, '/'); - if(p) - p++; - else - p = (char *)in_filename; - - strncpy(name, p, 8); - for(i = 0; i < 8; i++) { - if(!name[i]) break; - if(name[i] == '.') { - name[i] = '\0'; - break; - } - } - - strcpy(result, name); - strcat(result, "."); - - p = strchr(in_filename, '.'); - if(p) { - p++; - strncpy(ext, p, 3); - for(p = ext; *p; p++) - if(*p == '.') *p = 0; - strcat(result, ext); - } - - for(p = result; *p; p++) - *p = toupper(*p); -} - -/* see Arcinfo for the gory details. */ -unsigned long get_msdos_date_time(void) { - time_t t = in_file_stat.st_mtime; - struct tm *tm; - int msdos_year; - u16 ms_date, ms_time; - - if(opt_gmtime) - tm = gmtime(&t); - else - tm = localtime(&t); - - msdos_year = tm->tm_year + 1900 - 1980; - - ms_date = tm->tm_mday | (tm->tm_mon << 5) | (msdos_year << 9); - ms_time = (tm->tm_min << 5) | (tm->tm_hour << 11); - return ms_date | (ms_time << 16); -} - -void create_header(void) { - char hdr_filename[13]; - unsigned long time; - - atarify_filename(hdr_filename); - printf("Crunching %s\n", hdr_filename); - - if(opt_alftime) - time = 0x03130588; - else if(opt_zerotime) - time = 0; - else - time = get_msdos_date_time(); - - output_buf[0] = 0x1a; - output_buf[1] = 0x0f; - memset(&output_buf[3], 0x20, 13); - strncat((char *)&output_buf[2], hdr_filename, 13); - output_buf[14] = 0x00; - store_quad(15, 0); /* compressed size, fill in later */ - store_quad(19, time); - store_cksum(); - store_quad(25, input_len); - output_len = 29; -} - -void update_header(void) { - store_quad(15, output_len - 29); -} - -void open_input(const char *filename) { - in_filename = filename; - if(!(in_file = fopen(in_filename, "rb"))) { - perror(in_filename); - exit(1); - } -} - -void inc_output_len(void) { - if(++output_len == MAX_INPUT_SIZE) { - fprintf(stderr, "%s: fatal: compressed file would be >16MB.\n", self); - exit(1); - } -} - -void append_bit(int bit) { - output_buf[output_len] |= (bit << (7 - out_bitpos)); - out_bitpos++; - if(out_bitpos == 8) { - out_bitpos = 0; - inc_output_len(); - } -} - -void store_token(int tok) { - int mask; - - for(mask = 1 << (token_bits - 1); mask; mask >>= 1) { - append_bit(tok & mask ? 1 : 0); - } -} - -/* match_token() is a brute-force search, which is why alf is so slow. - I'll do something smarter at some point. - search backwards, the tokens are stored with longer ones later - in the list. */ -int match_token(int pos) { - int i, len, maxlen; - token_t *t; - u8 *p, *q; - - maxlen = input_len - pos; - - for(i = curr_token - 1; i >= INIT_TOKEN; i--) { - t = &tokentab[i]; - - /* don't search past the end of the input */ - if(t->length > maxlen) continue; - - /* if the first char doesn't match, don't bother with memcmp. - this is a 5x speedup (!) */ - if(input_buf[pos] != *(t->start)) continue; - - /* this is where alf spends most of its time. - using memcmp is noticeably slower than the code below. */ - /* - if(memcmp(&input_buf[pos], t->start, t->length) == 0) - return i; - */ - - /* inline memcmp replacement of sorts. I don't think it's really - faster than memcmp(), it only seems that way because there's - no function call overhead. ~20% speedup. */ - len = t->length; - p = &input_buf[pos]; - q = t->start; - while(len) { - if(*p != *q) break; - p++; q++; - len--; - } - if(!len) return i; - } - - /* hard-coded single character tokens map to their values, no need - to search. */ - return input_buf[pos]; -} - -void make_token(int start, int end) { - /* if the token table is full, reset it. basically start over like - we would with a new file. */ - if(curr_token == max_token) { - if(token_bits == MAX_BITS) { - store_token(TOK_RESET); /* stored at the *old* token size! */ - token_bits = INITIAL_BITS; - init_table(); - return; /* since we're starting over, *don't* make a token */ - } else { - token_bits++; - } - max_token = 1 << token_bits; - } - tokentab[curr_token].start = &input_buf[start]; - tokentab[curr_token].length = end - start + 1; - curr_token++; -} - -void crunch(void) { - int new_pos; - in_pos = 0; - int token; - - out_bitpos = 0; - - store_token(TOK_RESET); - - while(in_pos < input_len) { - token = match_token(in_pos); - store_token(token); - new_pos = in_pos + tokentab[token].length; - // printf(" in_pos %d, new_pos %d\n", in_pos, new_pos); - if(new_pos < input_len) - make_token(in_pos, new_pos); - in_pos = new_pos; - } - - store_token(TOK_END); - if(out_bitpos) inc_output_len(); - update_header(); -} - -void make_backup(void) { - char bak[PATH_MAX + 2]; - strncpy(bak, out_filename, PATH_MAX); - strcat(bak, "~"); - rename(out_filename, bak); -} - -void convert_eols(void) { - int i; - - for(i = 0; i < input_len; i++) { - if(input_buf[i] == '\n') - input_buf[i] = 0x9b; - else if(input_buf[i] == '\t') - input_buf[i] = 0x7f; - } -} - -void crunch_file(const char *filename) { - init_table(); - - open_input(filename); - - /* read in entire input, couldn't do it this way on the Atari */ - input_len = fread(input_buf, 1, MAX_INPUT_SIZE - 1, in_file); - - if(!feof(in_file)) { - fprintf(stderr, "%s: %s: this file is too large; only compressing the first 16MB.\n", self, filename); - } - - if(!input_len) { - fprintf(stderr, "%s: %s: can't compress a 0-byte (empty) file, skipping.\n", self, filename); - return; - } - - if(opt_txtconv) - convert_eols(); - - output_len = 0; - fstat(fileno(in_file), &in_file_stat); /* for timestamp */ - fclose(in_file); - - memset(output_buf, 0, sizeof(output_buf)); - - create_header(); - - /* crunches the entire input to memory! */ - crunch(); - - /* don't open the output file until crunch() has succeeded once. - this avoids leaving 0-byte turds */ - if(!out_file) { - if(!opt_overwrite) make_backup(); - out_file = fopen(out_filename, opt_append ? "ab" : "wb"); - if(!out_file) { - fprintf(stderr, "%s: fatal: ", self); - perror(out_filename); - exit(1); - } - } - - fwrite(output_buf, 1, output_len, out_file); -} - -void usage(void) { - extern char *usage_msg[]; - char **line; - - puts("alf (ALF compressor) v" VERSION " by B. Watson, WTFPL."); - printf("Usage: %s [-a|-o] archive.alf file [file ...]\n", self); - puts("Options:"); - - for(line = usage_msg; *line; line++) - puts(*line); - - exit(0); -} - -int main(int argc, char **argv) { - int opt; - - set_self(argv[0]); - - if(argc < 2 || !strcmp(argv[1], "--help") || !strcmp(argv[1], "-h")) { - usage(); - } - - if(!strcmp(argv[1], "--version")) { - puts(VERSION); - exit(0); - } - - /* don't let getopt() print error message for us. */ - opterr = 0; - - while((opt = getopt(argc, argv, "aAt:oV")) != -1) { - switch(opt) { - case 'A': opt_txtconv = 1; break; - case 'a': opt_append = 1; opt_overwrite = 1; break; - case 'o': opt_overwrite = 1; opt_append = 0; break; - case 't': opt_zerotime = opt_alftime = opt_gmtime = 0; - switch(*optarg) { - case 'z': opt_zerotime = 1; break; - case 'd': opt_alftime = 1; break; - case 'u': opt_gmtime = 1; break; - default: - fprintf(stderr, "%s: fatal: invalid -t suboption '-%c' (try -h or --help)\n", self, *optarg); - exit(1); - } - break; - case 'V': puts(VERSION); exit(0); break; - default: - fprintf(stderr, "%s: fatal: invalid option '-%c' (try -h or --help)\n", self, optopt); - exit(1); - } - } - - if(optind >= argc) { - fprintf(stderr, "%s: fatal: missing alf file argument (try -h or --help)\n", self); - exit(1); - } - - out_filename = argv[optind]; - - optind++; - - if(optind >= argc) { - fprintf(stderr, "%s: fatal: no filenames (nothing to compress) (try -h or --help)\n", self); - exit(1); - } - - while(optind < argc) { - crunch_file(argv[optind++]); - } - - if(out_file) fclose(out_file); - - exit(0); -} |
