#include #include #include #include #include #include #include #include #include "self.h" #ifndef u8 #define u8 unsigned char #define u16 unsigned short #endif #define INITIAL_BITS 9 #define MAX_BITS 12 #define MAX_TOKENS (1 << MAX_BITS) #define TOK_RESET 256 #define TOK_END 257 #define INIT_TOKEN 258 /* 256 = reset dict, 257 = token_bits++ */ #define MAX_INPUT_SIZE (1 << 24) u8 input_buf[MAX_INPUT_SIZE]; u8 output_buf[MAX_INPUT_SIZE]; u8 byte_tokens[256]; unsigned int input_len, output_len, out_bitpos; int opt_append = 0; int opt_overwrite = 0; int opt_zerotime = 0; int opt_alftime = 0; int opt_gmtime = 0; int opt_txtconv = 0; struct stat in_file_stat; typedef struct s_token { u8 *start; u16 length; } token_t; token_t tokentab[MAX_TOKENS]; FILE *out_file, *in_file; const char *out_filename, *in_filename; int token_bits; int max_token; int curr_token; long hdr_compsize_pos; int in_pos; void init_table(void) { int i; memset(tokentab, 0, sizeof(tokentab)); token_bits = INITIAL_BITS; max_token = 1 << INITIAL_BITS; curr_token = INIT_TOKEN; for(i = 0; i < 256; i++) { byte_tokens[i] = (u8)i; tokentab[i].start = &byte_tokens[i]; tokentab[i].length = 1; } } void store_quad(int pos, unsigned long data) { int i; for(i = 0; i < 4; i++) { output_buf[pos++] = data & 0xff; data >>= 8; } } void store_cksum(void) { int i; u16 cksum = 0; for(i = 0; i < input_len; i++) cksum += input_buf[i]; output_buf[23] = cksum & 0xff; output_buf[24] = cksum >> 8; } /* examples: foo => FOO. toolongfile => TOOLONGF. (really should be TOOLONGF.ILE) regular.txt => REGULAR.TXT too.many.dots => TOO.MAN */ void atarify_filename(char *result) { int i; char name[9] = { 0 }, ext[4] = { 0 }, *p; p = strrchr(in_filename, '/'); if(p) p++; else p = (char *)in_filename; strncpy(name, p, 8); for(i = 0; i < 8; i++) { if(!name[i]) break; if(name[i] == '.') { name[i] = '\0'; break; } } strcpy(result, name); strcat(result, "."); p = strchr(in_filename, '.'); if(p) { p++; strncpy(ext, p, 3); for(p = ext; *p; p++) if(*p == '.') *p = 0; strcat(result, ext); } for(p = result; *p; p++) *p = toupper(*p); } /* see Arcinfo for the gory details. */ unsigned long get_msdos_date_time(void) { time_t t = in_file_stat.st_mtime; struct tm *tm; int msdos_year; u16 ms_date, ms_time; if(opt_gmtime) tm = gmtime(&t); else tm = localtime(&t); msdos_year = tm->tm_year + 1900 - 1980; ms_date = tm->tm_mday | (tm->tm_mon << 5) | (msdos_year << 9); ms_time = (tm->tm_min << 5) | (tm->tm_hour << 11); return ms_date | (ms_time << 16); } void create_header(void) { char hdr_filename[13]; unsigned long time; atarify_filename(hdr_filename); printf("Crunching %s\n", hdr_filename); if(opt_alftime) time = 0x03130588; else if(opt_zerotime) time = 0; else time = get_msdos_date_time(); output_buf[0] = 0x1a; output_buf[1] = 0x0f; memset(&output_buf[3], 0x20, 13); strncat((char *)&output_buf[2], hdr_filename, 13); output_buf[14] = 0x00; store_quad(15, 0); /* compressed size, fill in later */ store_quad(19, time); store_cksum(); store_quad(25, input_len); output_len = 29; } void update_header(void) { store_quad(15, output_len - 29); } void open_input(const char *filename) { in_filename = filename; if(!(in_file = fopen(in_filename, "rb"))) { perror(in_filename); exit(1); } } void inc_output_len(void) { if(++output_len == MAX_INPUT_SIZE) { fprintf(stderr, "%s: fatal: compressed file would be >16MB.\n", self); exit(1); } } void append_bit(int bit) { output_buf[output_len] |= (bit << (7 - out_bitpos)); out_bitpos++; if(out_bitpos == 8) { out_bitpos = 0; inc_output_len(); } } void store_token(int tok) { int mask; for(mask = 1 << (token_bits - 1); mask; mask >>= 1) { append_bit(tok & mask ? 1 : 0); } } /* match_token() is a brute-force search, which is why alf is so slow. I'll do something smarter at some point. search backwards, the tokens are stored with longer ones later in the list. */ int match_token(int pos) { int i, len, maxlen; token_t *t; u8 *p, *q; maxlen = input_len - pos; for(i = curr_token - 1; i >= INIT_TOKEN; i--) { t = &tokentab[i]; /* don't search past the end of the input */ if(t->length > maxlen) continue; /* if the first char doesn't match, don't bother with memcmp. this is a 5x speedup (!) */ if(input_buf[pos] != *(t->start)) continue; /* this is where alf spends most of its time. using memcmp is noticeably slower than the code below. */ /* if(memcmp(&input_buf[pos], t->start, t->length) == 0) return i; */ /* inline memcmp replacement of sorts. I don't think it's really faster than memcmp(), it only seems that way because there's no function call overhead. ~20% speedup. */ len = t->length; p = &input_buf[pos]; q = t->start; while(len) { if(*p != *q) break; p++; q++; len--; } if(!len) return i; } /* hard-coded single character tokens map to their values, no need to search. */ return input_buf[pos]; } void make_token(int start, int end) { /* if the token table is full, reset it. basically start over like we would with a new file. */ if(curr_token == max_token) { if(token_bits == MAX_BITS) { store_token(TOK_RESET); /* stored at the *old* token size! */ token_bits = INITIAL_BITS; init_table(); return; /* since we're starting over, *don't* make a token */ } else { token_bits++; } max_token = 1 << token_bits; } tokentab[curr_token].start = &input_buf[start]; tokentab[curr_token].length = end - start + 1; curr_token++; } void crunch(void) { int new_pos; in_pos = 0; int token; out_bitpos = 0; store_token(TOK_RESET); while(in_pos < input_len) { token = match_token(in_pos); store_token(token); new_pos = in_pos + tokentab[token].length; // printf(" in_pos %d, new_pos %d\n", in_pos, new_pos); if(new_pos < input_len) make_token(in_pos, new_pos); in_pos = new_pos; } store_token(TOK_END); if(out_bitpos) inc_output_len(); update_header(); } void make_backup(void) { char bak[PATH_MAX + 2]; strncpy(bak, out_filename, PATH_MAX); strcat(bak, "~"); rename(out_filename, bak); } void convert_eols(void) { int i; for(i = 0; i < input_len; i++) { if(input_buf[i] == '\n') input_buf[i] = 0x9b; else if(input_buf[i] == '\t') input_buf[i] = 0x7f; } } void crunch_file(const char *filename) { init_table(); open_input(filename); /* read in entire input, couldn't do it this way on the Atari */ input_len = fread(input_buf, 1, MAX_INPUT_SIZE - 1, in_file); if(!feof(in_file)) { fprintf(stderr, "%s: %s: this file is too large; only compressing the first 16MB.\n", self, filename); } if(!input_len) { fprintf(stderr, "%s: %s: can't compress a 0-byte (empty) file, skipping.\n", self, filename); return; } if(opt_txtconv) convert_eols(); output_len = 0; fstat(fileno(in_file), &in_file_stat); /* for timestamp */ fclose(in_file); memset(output_buf, 0, sizeof(output_buf)); create_header(); /* crunches the entire input to memory! */ crunch(); /* don't open the output file until crunch() has succeeded once. this avoids leaving 0-byte turds */ if(!out_file) { if(!opt_overwrite) make_backup(); out_file = fopen(out_filename, opt_append ? "ab" : "wb"); if(!out_file) { fprintf(stderr, "%s: fatal: ", self); perror(out_filename); exit(1); } } fwrite(output_buf, 1, output_len, out_file); } void usage(void) { extern char *usage_msg[]; char **line; puts("alf (ALF compressor) v" VERSION " by B. Watson, WTFPL."); printf("Usage: %s [-a|-o] archive.alf file [file ...]\n", self); puts("Options:"); for(line = usage_msg; *line; line++) puts(*line); exit(0); } int main(int argc, char **argv) { int opt; set_self(argv[0]); if(argc < 2 || !strcmp(argv[1], "--help") || !strcmp(argv[1], "-h")) { usage(); } if(!strcmp(argv[1], "--version")) { puts(VERSION); exit(0); } /* don't let getopt() print error message for us. */ opterr = 0; while((opt = getopt(argc, argv, "aAt:oV")) != -1) { switch(opt) { case 'A': opt_txtconv = 1; break; case 'a': opt_append = 1; opt_overwrite = 1; break; case 'o': opt_overwrite = 1; opt_append = 0; break; case 't': opt_zerotime = opt_alftime = opt_gmtime = 0; switch(*optarg) { case 'z': opt_zerotime = 1; break; case 'd': opt_alftime = 1; break; case 'u': opt_gmtime = 1; break; default: fprintf(stderr, "%s: fatal: invalid -t suboption '-%c' (try -h or --help)\n", self, *optarg); exit(1); } break; case 'V': puts(VERSION); exit(0); break; default: fprintf(stderr, "%s: fatal: invalid option '-%c' (try -h or --help)\n", self, optopt); exit(1); } } if(optind >= argc) { fprintf(stderr, "%s: fatal: missing alf file argument (try -h or --help)\n", self); exit(1); } out_filename = argv[optind]; optind++; if(optind >= argc) { fprintf(stderr, "%s: fatal: no filenames (nothing to compress) (try -h or --help)\n", self); exit(1); } while(optind < argc) { crunch_file(argv[optind++]); } if(out_file) fclose(out_file); exit(0); }