diff options
Diffstat (limited to 'unprotbas.c')
-rw-r--r-- | unprotbas.c | 528 |
1 files changed, 256 insertions, 272 deletions
diff --git a/unprotbas.c b/unprotbas.c index 0b11e0e..ae01b50 100644 --- a/unprotbas.c +++ b/unprotbas.c @@ -2,6 +2,10 @@ #include <unistd.h> #include <stdlib.h> #include <string.h> +#include <ctype.h> +#include <time.h> + +#include "bas.h" /* attempt to fix a "list-protected" Atari 8-bit BASIC program. we don't fully detokenize, so this won't fix truly corrupted @@ -14,29 +18,11 @@ or whatever), we "fix" that by making up new variable names. */ -#define STM_OFFSET 0xf2 - -/* entire file gets read into memory (for now) */ -unsigned char data[65536]; - -/* BASIC 14-byte header values */ -unsigned short lomem; -unsigned short vntp; -unsigned short vntd; -unsigned short vvtp; -unsigned short stmtab; -unsigned short stmcur; -unsigned short starp; - -/* positions where various parts of the file start, - derived from the header vars above. */ -unsigned short codestart; -unsigned short vnstart; -unsigned short vvstart; -int filelen; - -/* name of executable, taken from argv[0] */ -char *self; +/* for the -r option */ +#define MAP_FILE "varnames.txt" +unsigned char varnames[BUFSIZE]; +unsigned char *varmap[MAXVARS]; +int varmap_count; /* these are set by the various command-line switches */ int keepvars = 0; @@ -44,64 +30,8 @@ int forcevars = 0; int keepgarbage = 1; int checkonly = 0; int was_protected = 0; -int verbose = 0; - -/* file handles */ -FILE *input_file = NULL; -FILE *output_file = NULL; - -void die(const char *msg) { - fprintf(stderr, "%s: %s\n", self, msg); - exit(1); -} - -/* read entire file into memory */ -int readfile(void) { - int got = fread(data, 1, 65535, input_file); - if(verbose) fprintf(stderr, "read %d bytes\n", got); - fclose(input_file); - return got; -} - -/* get a 16-bit value from the file, in 6502 LSB/MSB order. */ -unsigned short getword(int addr) { - return data[addr] | (data[addr + 1] << 8); -} - -void setword(int addr, int value) { - data[addr] = value & 0xff; - data[addr + 1] = value >> 8; -} - -void dump_header_vars(void) { - fprintf(stderr, "LOMEM $%04x VNTP $%04x VNTD $%04x VVTP $%04x\n", lomem, vntp, vntd, vvtp); - fprintf(stderr, "STMTAB $%04x STMCUR $%04x STARP $%04x\n", stmtab, stmcur, starp); - fprintf(stderr, "vnstart $%04x, vvstart $%04x, codestart $%04x\n", vnstart, vvstart, codestart); -} - -void read_header(void) { - lomem = getword(0); - vntp = getword(2); - vntd = getword(4); - vvtp = getword(6); - stmtab = getword(8); - stmcur = getword(10); - starp = getword(12); - codestart = stmtab - STM_OFFSET - (vntp - 256); - vnstart = vntp - 256 + 14; - vvstart = vvtp - 256 + 14; - if(verbose) dump_header_vars(); -} - -void set_header_vars(void) { - setword(0, lomem); - setword(2, vntp); - setword(4, vntd); - setword(6, vvtp); - setword(8, stmtab); - setword(10, stmcur); - setword(12, starp); -} +int readmap = 0; +int writemap = 0; /* fixline() calculates & sets correct line length, by iterating over the statement(s) within the line. the last statement's @@ -122,30 +52,29 @@ void set_header_vars(void) { 2 09 line length (or, offset to next line) [!] 3 06 offset to next statement *from the start of the line* 4 28 token for "?" - 5 14 token for : (end of statement) + 5 14 token for : (end of statement), we call it TOK_COLON 6 09 offset to next statement [!] 7 15 token for END 8 16 token for end-of-line [*] 9 ?? (line number of next statement) Note the values marked with [!] are equal. + The line length at offset 2 is what gets zeroed out by the + protection. To fix it, we follow the next-statement offsets. If + there's not a colon before the offset, replace the byte at + offset 2 with that statement's offset. [*] end-of-line is $16 *except* for REM and DATA, which are terminated with $9B instead. */ int fixline(int linepos) { /* +3 here to skip the line number + line length */ - int token, done = 0, offset = data[linepos + 3]; - - while(!done) { - offset = data[linepos + offset]; - token = data[linepos + offset - 1]; - /* fprintf(stderr, "offset %02x token %02x\n", offset, token); */ - if(token != 0x14) - done++; - } + int offset = program[linepos + 3]; - data[linepos + 2] = offset; + while(program[linepos + offset - 1] == TOK_COLON) + offset = program[linepos + offset]; + + program[linepos + 2] = offset; return offset; } @@ -159,16 +88,16 @@ int fixcode(void) { while(pos < filelen) { tmpno = getword(pos); if(tmpno <= lineno) { - fprintf(stderr, "Warning: line number %d at offset %04x is <= previous line number %d\n", + fprintf(stderr, "Warning: line number %d at offset $%04x is <= previous line number %d.\n", tmpno, pos, lineno); } lineno = tmpno; - offset = data[pos + 2]; + offset = program[pos + 2]; /* fprintf(stderr, "pos %d, line #%d, offset %d\n", pos, lineno, offset); */ if(offset < 6) { - if(verbose) fprintf(stderr, "Found invalid offset %d (<6) at line %d\n", offset, lineno); - offset += fixline(pos); + if(verbose) fprintf(stderr, "Found invalid offset %d (<6) at line %d, file offset $%04x.\n", offset, lineno, pos); + offset = fixline(pos); result++; } pos += offset; @@ -178,33 +107,29 @@ int fixcode(void) { if(lineno == 32768) break; } - if(verbose) fprintf(stderr, "End program pos $%04x/%d\n", pos, pos); + if(verbose) fprintf(stderr, "End program file offset: $%04x/%d\n", pos, pos); if(filelen > pos) { - if(verbose) fprintf(stderr, "trailing garbage at EOF, %d bytes, %s\n", - filelen - pos, (keepgarbage ? "keeping" : "removing")); + int i, same = 1; + for(i = pos; i < filelen; i++) { + if(program[i] != program[pos]) same = 0; + } + if(verbose) { + fprintf(stderr, "Trailing garbage at EOF, %d bytes, ", filelen - pos); + if(same) + fprintf(stderr, "all $%02x", program[pos]); + else + fprintf(stderr, "maybe valid data"); + fprintf(stderr, ", %s.\n", (keepgarbage ? "keeping" : "removing")); + } if(!keepgarbage) filelen = pos; + } else { + if(verbose) + fprintf(stderr, "No trailing garbage at EOF.\n"); } - return result; } -/* sometimes the variable name table isn't large enough to hold - the generated variable names. move_code() makes more space, - by moving the rest of the program (including the variable value - table) up in memory. */ -void move_code(int offset) { - memmove(data + vvstart + offset, data + vvstart, filelen); - vntd += offset; - vvtp += offset; - stmtab += offset; - stmcur += offset; - starp += offset; - set_header_vars(); - read_header(); - filelen += offset; -} - /* Fixing the variables is a bit more work than it seems like it might be, because the last byte of the name has to match the type (inverse video "(" for numeric array, inverse "$" for @@ -229,52 +154,6 @@ void move_code(int offset) { or letter+number or one-letter string/array names). */ -int vntable_ok(void) { - int vp, bad; - - if(vntp == vntd) { - if(verbose) fprintf(stderr, "No variables\n"); - return 1; - } - - /* first pass: bad = 1 if all the bytes in the table have the same - value, no matter what it is. */ - vp = vnstart + 1; - bad = 1; - while(vp < vvstart - 1) { - if(data[vp] != data[vnstart]) { - bad = 0; - break; - } - vp++; - } - if(bad) return 0; - - /* 2nd pass: bad = 1 if there's any invalid character in the table. */ - vp = vnstart; - while(vp < vvstart) { - unsigned char c = data[vp]; - - /* treat a null byte as end-of-table, ignore any junk between it and VNTP. */ - if(c == 0) break; - - vp++; - - /* inverse $ or ( is OK */ - if(c == 0xa4 || c == 0xa8) continue; - - /* numbers and letters are allowed, inverse or normal. */ - c &= 0x7f; - if(c >= 0x30 && c <= 0x39) continue; - if(c >= 0x41 && c <= 0x5a) continue; - - bad++; - break; - } - - return !bad; -} - /* walk the variable value table, generating variable names. if write is 0, just return the size the table will be. if write is 1, actually write the names to memory. */ @@ -287,28 +166,35 @@ int rebuild_vntable(int write) { while(vv < codestart) { unsigned char sigil = 0; /* type: scalar = 0, array = 1, string = 2 */ - unsigned char type = data[vv] >> 6; - /* fprintf(stderr, "%04x: %04x, %d\n", vv, data[vv], type); */ + unsigned char type = program[vv] >> 6; + /* fprintf(stderr, "%04x: %04x, %d\n", vv, program[vv], type); */ + + if(varnum == MAXVARS) { + fprintf(stderr, "Warning: skipping variable numbers >=%d in value table.\n", MAXVARS); + break; + } - if(varnum != data[vv+1]) { - fprintf(stderr, "Warning: variable value is corrupt!\n"); + if(varnum != program[vv+1]) { + fprintf(stderr, "Warning: variable #%d value is corrupt!\n", varnum); } - varnum++; switch(type) { - case 1: varname = arrays++; sigil = 0xa8; break; - case 2: varname = strings++; sigil = 0xa4; break; - default: varname = scalars++; break; + case TYPE_SCALAR: varname = scalars++; break; + case TYPE_ARRAY: varname = arrays++; sigil = 0xa8; break; + case TYPE_STRING: varname = strings++; sigil = 0xa4; break; + default: + fprintf(stderr, "Warning: variable value #%d has unknown type.\n", varnum); + break; } if(varname < 26) { - if(write) data[vp] = ('A' + varname); + if(write) program[vp] = ('A' + varname); size++; } else { varname -= 26; if(write) { - data[vp++] = 'A' + varname / 9; - data[vp] = '1' + varname % 9; + program[vp++] = 'A' + varname / 9; + program[vp] = '1' + varname % 9; } size += 2; } @@ -316,41 +202,25 @@ int rebuild_vntable(int write) { if(sigil) { size++; vp++; - if(write) data[vp++] = sigil; + if(write) program[vp++] = sigil; } else { - if(write) data[vp] |= 0x80; + if(write) program[vp] |= 0x80; vp++; } vv += 8; + varnum++; } /* there's supposed to be a null byte at the end of the table, unless - all 128 table slots are used. */ - if(write) { - if(varnum < 128) data[vp] = 0; - /* fixup the VNTD pointer */ - /* - vntd = vntp + (vp - vnstart); - fprintf(stderr, "%04x\n", vntd); - data[4] = vntd & 0xff; - data[5] = vntd >> 8; - */ - } + all 128 table slots are used... except really, there can be >=129 + entries, and there's always a null byte. */ + if(write) program[vp] = 0; + size++; return size; } -void adjust_vntable_size(int oldsize, int newsize) { - int move_by; - if(oldsize != newsize) { - move_by = newsize - oldsize; - if(verbose) fprintf(stderr, "need %d bytes for vntable, have %d, moving VVTP by %d to %04x\n", - newsize, oldsize, move_by, vvtp + move_by); - move_code(move_by); - } -} - int fixvars(void) { int old_vntable_size, new_vntable_size; @@ -366,69 +236,172 @@ int fixvars(void) { return 1; } -void print_help(void) { - fprintf(stderr, "Usage: %s [-v] [-f] [-n] [-g] <inputfile> <outputfile>\n", self); - fprintf(stderr, "-v: verbose\n"); - fprintf(stderr, "-f: force variable name table rebuild\n"); - fprintf(stderr, "-n: do not rebuild variable name table, even if it's invalid\n"); - fprintf(stderr, "-g: remove trailing garbage, if present\n"); - fprintf(stderr, "-c: check only; no output file\n"); - fprintf(stderr, "Use - as a filename to read from stdin and/or write to stdout\n"); +void write_var_map(void) { + FILE *f; + int vp, count = 0; + + if(verbose) fprintf(stderr, "Writing variable names to '" MAP_FILE "'.\n"); + f = fopen(MAP_FILE, "w"); + if(!f) { + perror(MAP_FILE); + die("Can't create map file for -w option."); + } + + for(vp = vnstart; (vp < vntd) && (program[vp] != 0); vp++) { + unsigned char c = program[vp]; + if(c < 0x80) { + fputc(c, f); + } else { + fputc(c & 0x7f, f); + fputc('\n', f); + count++; + } + } + + fclose(f); + + if(verbose) fprintf(stderr, "Wrote %d variable names to '" MAP_FILE "'.\n", count); } -void invalid_args(const char *arg) { - fprintf(stderr, "%s: Invalid argument '%s'\n\n", self, arg); - print_help(); +void die_mapfile(char *msg, int num) { + fprintf(stderr, MAP_FILE ": line %d: %s.\n", num, msg); exit(1); } -FILE *open_file(const char *name, const char *mode) { - FILE *fp; - if(!(fp = fopen(name, mode))) { - perror(name); - exit(1); +void check_varname(const unsigned char *name, int line) { + int len = strlen((char *)name); + int i; + unsigned char c = 0, type; + + /* fprintf(stderr, "check_varname(\"%s\", %d)\n", name, line); */ + + if(len < 1) die_mapfile("Blank variable name", line); + if(len > 128) die_mapfile("Variable name >128 characters", line); + if(name[0] < 'A' || name[0] > 'Z') + die_mapfile("Invalid variable name: First character must be a letter", line); + + for(i = 1; i < len; i++) { + c = name[i]; + if(c >= 'A' && c <= 'Z') continue; + if(c >= '0' && c <= '9') continue; + if(c == '$' || c == '(') { + if(i == (len - 1)) + continue; + else + die_mapfile("Invalid variable name: $ and ( only allowed at end", line); + } + die_mapfile("Invalid character in variable name", line); + } + + if(c == 0) c = name[0]; + + /* c now has the last char of the name, make sure it matches the variable type */ + type = program[vvstart + 8 * (line - 1)] >> 6; + /* type: scalar = 0, array = 1, string = 2 */ + if(type == TYPE_SCALAR) { + if(c == '$') + die_mapfile("Type mismatch: numeric variable may not end with $", line); + else if(c == '(') + die_mapfile("Type mismatch: numeric variable may not end with (", line); + } else if(type == TYPE_ARRAY) { + if(c != '(') + die_mapfile("Type mismatch: array variable must end with (", line); + } else if(type == TYPE_STRING) { + if(c != '$') + die_mapfile("Type mismatch: string variable must end with $", line); + } else { + fprintf(stderr, "Warning: variable value table is corrupt (invalid type).\n"); + } + + /* check for dups */ + for(i = 0; i < line - 1; i++) { + if(strcmp((char *)name, (char *)varmap[i]) == 0) + die_mapfile("duplicate variable name", line); } - return fp; } -void open_input(const char *name) { - if(!name) { - if(freopen(NULL, "rb", stdin)) { - input_file = stdin; - return; - } else { - perror("stdin"); - exit(1); +void read_var_map(void) { + FILE *f; + unsigned char *p = varnames, *curname = varnames; + int count = 0, vvcount = (codestart - vvstart) / 8; + + if(verbose) fprintf(stderr, "Reading variable names from " MAP_FILE ".\n"); + f = fopen(MAP_FILE, "r"); + if(!f) { + perror(MAP_FILE); + die("Can't read map file for -r option."); + } + + while(!feof(f)) { + *p = toupper(fgetc(f)); /* allow lowercase */ + + if(*p == ' ' || *p == '\t' || *p == '\r') + continue; /* ignore whitespace */ + + if(*p == '\n') { + *p = '\0'; + varmap[count++] = curname; + check_varname(curname, count); + curname = p + 1; } + p++; + } + fclose(f); + + if(verbose) fprintf(stderr, "Read %d variable names from " MAP_FILE ".\n", count); + + if(vvcount > count) { + fprintf(stderr, MAP_FILE ": not enough variables (have %d, need %d).\n", count, vvcount); + exit(1); + } else if(count > vvcount) { + fprintf(stderr, MAP_FILE ": too many variables (have %d, need %d).\n", count, vvcount); + exit(1); } - input_file = open_file(name, "rb"); + varmap_count = count; } -void open_output(const char *name) { - if(!name) { - if(isatty(fileno(stdout))) { - fprintf(stderr, "%s: refusing to write binary data to standard output\n", self); - exit(1); - } - if(freopen(NULL, "wb", stdout)) { - output_file = stdout; - return; - } else { - perror("stdout"); - exit(1); +void apply_var_map(void) { + unsigned char new_vntable[BUFSIZE]; + int i, newp = 0; + unsigned char *v; + + if(verbose) + fprintf(stderr, "Using variable names from " MAP_FILE ".\n"); + + for(i = 0; i < varmap_count; i++) { + v = varmap[i]; + while(*v) { + new_vntable[newp++] = *v; + v++; } + new_vntable[newp - 1] |= 0x80; } - output_file = open_file(name, "wb"); + new_vntable[newp++] = '\0'; + + i = vvstart - vnstart; + adjust_vntable_size(i, newp); + memmove(program + vnstart, new_vntable, newp); +} + +void print_help(void) { + printf("Usage: %s [-v] [-f] [-n] [-g] [-c] [-r|-w] <inputfile> <outputfile>\n", self); + printf(" -v: Verbose.\n"); + printf(" -f: Force variable name table rebuild.\n"); + printf(" -n: Do not rebuild variable name table, even if it's invalid.\n"); + printf(" -g: Remove trailing garbage, if present.\n"); + printf(" -c: Check only; no output file.\n"); + printf(" -w: Write variable names to 'varnames.txt'.\n"); + printf(" -r: Read variable names from 'varnames.txt'.\n"); + printf("Use - as a filename to read from stdin and/or write to stdout.\n"); } void parse_args(int argc, char **argv) { - self = *argv; - if(argc < 2) { - print_help(); - exit(0); - } + set_self(*argv); + + parse_general_args(argc, argv, print_help); + while(++argv, --argc) { if((*argv)[0] == '-') { switch((*argv)[1]) { @@ -437,73 +410,84 @@ void parse_args(int argc, char **argv) { case 'n': keepvars++; break; case 'g': keepgarbage = 0; break; case 'c': checkonly = 1; break; + case 'r': readmap = 1; break; + case 'w': writemap = 1; break; case 0: if(!input_file) open_input(NULL); - else if(!output_file) - open_output(NULL); + else if(!output_filename) + output_filename = *argv; else invalid_args(*argv); break; default: invalid_args(*argv); break; } } else { + /* arg doesn't start with a -, must be a filename */ if(!input_file) open_input(*argv); - else if(!checkonly && !output_file) - open_output(*argv); + else if(!checkonly && !output_filename) + output_filename = *argv; else invalid_args(*argv); } } - if(!input_file) die("no input file given (use - for stdin)"); - if(!checkonly && !output_file) die("no output file given (use - for stdout)"); - if(keepvars && forcevars) die("-f and -n are mutually exclusive"); + if(!input_file) die("No input file given (use - for stdin)."); + if(!checkonly && !output_filename) die("No output file given (use - for stdout)."); + if(keepvars && forcevars) die("-f and -n are mutually exclusive."); + if(readmap && writemap) die("-r and -w are mutually exclusive."); + if(readmap && keepvars) die("-r and -n are mutually exclusive, maybe you want -w?"); + if(checkonly && (readmap || writemap)) die("-c and -r/-w are mutually exclusive."); } int main(int argc, char **argv) { + int invoffs = 0; parse_args(argc, argv); - filelen = readfile(); - read_header(); - - if(lomem) die("This doesn't look like an Atari BASIC program (no $0000 signature)"); + readfile(); + parse_header(); - if(!keepvars) { - if(fixvars()) { - was_protected = 1; - if(verbose) fprintf(stderr, "Variable names replaced\n"); - } else { - if(verbose) fprintf(stderr, "Variable names were already OK\n"); + if(readmap) { + was_protected = !vntable_ok(); + read_var_map(); + apply_var_map(); + } else { + if(!keepvars) { + if(fixvars()) { + was_protected = 1; + if(verbose) fprintf(stderr, "Variable names replaced.\n"); + } else { + if(verbose) fprintf(stderr, "Variable names were already OK.\n"); + } } } - if(fixcode()) { - if(verbose) fprintf(stderr, "Fixed invalid offset in code\n"); + invoffs = fixcode(); + if(invoffs) { + if(verbose) + fprintf(stderr, "Fixed %d invalid offset%s in code.\n", + invoffs, (invoffs == 1 ? "" : "s")); was_protected = 1; } else { - if(verbose) fprintf(stderr, "No invalid offsets\n"); + if(verbose) fprintf(stderr, "No invalid offsets.\n"); } if(verbose) { - if(was_protected) - fprintf(stderr, "Program was protected.\n"); - else - fprintf(stderr, "Program was NOT protected.\n"); + fprintf(stderr, "Program was %sprotected.\n", (was_protected ? "" : "NOT ")); } if(checkonly) { if(verbose) fprintf(stderr, "Check-only mode; no output written.\n"); - if(was_protected) - return 0; - else - return 2; + return was_protected ? 0 : 2; } - int got = fwrite(data, 1, filelen, output_file); - fclose(output_file); - if(verbose) fprintf(stderr, "wrote %d bytes\n", got); + /* we don't open the output file until all processing is done, to + avoid leaving invalid output files if we exit on error. */ + open_output(output_filename); + writefile(); + + if(writemap) write_var_map(); - return 0; + return was_protected ? 0 : 2; } |