#include #include #include #include /* attempt to fix a "list-protected" Atari 8-bit BASIC program. we don't fully detokenize, so this won't fix truly corrupted files. the "fix" is in 2 parts: 1. fix any invalid (0-byte) offsets after a line number. this is what causes BASIC to lock up. 2. if the variable names were overwritten (e.g. with EOL characters, or whatever), we "fix" that by making up new variable names. */ #define STM_OFFSET 0xf2 /* entire file gets read into memory (for now) */ unsigned char data[65536]; /* BASIC 14-byte header values */ unsigned short lomem; unsigned short vntp; unsigned short vntd; unsigned short vvtp; unsigned short stmtab; unsigned short stmcur; unsigned short starp; /* positions where various parts of the file start, derived from the header vars above. */ unsigned short codestart; unsigned short vnstart; unsigned short vvstart; int filelen; /* name of executable, taken from argv[0] */ char *self; /* these are set by the various command-line switches */ int keepvars = 0; int forcevars = 0; int keepgarbage = 1; int checkonly = 0; int was_protected = 0; int verbose = 0; /* file handles */ FILE *input_file = NULL; FILE *output_file = NULL; void die(const char *msg) { fprintf(stderr, "%s: %s\n", self, msg); exit(1); } /* read entire file into memory */ int readfile(void) { int got = fread(data, 1, 65535, input_file); if(verbose) fprintf(stderr, "read %d bytes\n", got); fclose(input_file); return got; } /* get a 16-bit value from the file, in 6502 LSB/MSB order. */ unsigned short getword(int addr) { return data[addr] | (data[addr + 1] << 8); } void setword(int addr, int value) { data[addr] = value & 0xff; data[addr + 1] = value >> 8; } void dump_header_vars(void) { fprintf(stderr, "LOMEM $%04x VNTP $%04x VNTD $%04x VVTP $%04x\n", lomem, vntp, vntd, vvtp); fprintf(stderr, "STMTAB $%04x STMCUR $%04x STARP $%04x\n", stmtab, stmcur, starp); fprintf(stderr, "vnstart $%04x, vvstart $%04x, codestart $%04x\n", vnstart, vvstart, codestart); } void read_header(void) { lomem = getword(0); vntp = getword(2); vntd = getword(4); vvtp = getword(6); stmtab = getword(8); stmcur = getword(10); starp = getword(12); codestart = stmtab - STM_OFFSET - (vntp - 256); vnstart = vntp - 256 + 14; vvstart = vvtp - 256 + 14; if(verbose) dump_header_vars(); } void set_header_vars(void) { setword(0, lomem); setword(2, vntp); setword(4, vntd); setword(6, vvtp); setword(8, stmtab); setword(10, stmcur); setword(12, starp); } /* fixline() calculates & sets correct line length, by iterating over the statement(s) within the line. the last statement's offset will be the same as the line offset should have been, if it weren't zeroed. when reading this code, it's helpful to know that the lengths (line and statement) are counted from the start of the line in memory. A line with only a line number and one token (such as END) would have a line length of 6: 2 for the 16-bit line number, 1 for the length byte itself, 1 for the statement length byte (also 6), 1 for the END token, and one for the end-of-line token. A line with two statements: 10 ?:END offset value meaning 0 0A line number (low byte) 1 00 line number (high byte) 2 09 line length (or, offset to next line) [!] 3 06 offset to next statement *from the start of the line* 4 28 token for "?" 5 14 token for : (end of statement) 6 09 offset to next statement [!] 7 15 token for END 8 16 token for end-of-line [*] 9 ?? (line number of next statement) Note the values marked with [!] are equal. [*] end-of-line is $16 *except* for REM and DATA, which are terminated with $9B instead. */ int fixline(int linepos) { /* +3 here to skip the line number + line length */ int token, done = 0, offset = data[linepos + 3]; while(!done) { offset = data[linepos + offset]; token = data[linepos + offset - 1]; /* fprintf(stderr, "offset %02x token %02x\n", offset, token); */ if(token != 0x14) done++; } data[linepos + 2] = offset; return offset; } /* Iterate over all the tokenized lines. If any of them have invalid line lengths (<=5), call fixline() on them. */ int fixcode(void) { int result = 0; int pos = codestart; int offset, lineno = -1, tmpno; while(pos < filelen) { tmpno = getword(pos); if(tmpno <= lineno) { fprintf(stderr, "Warning: line number %d at offset %04x is <= previous line number %d\n", tmpno, pos, lineno); } lineno = tmpno; offset = data[pos + 2]; /* fprintf(stderr, "pos %d, line #%d, offset %d\n", pos, lineno, offset); */ if(offset < 6) { if(verbose) fprintf(stderr, "Found invalid offset %d (<6) at line %d\n", offset, lineno); offset += fixline(pos); result++; } pos += offset; /* Atari BASIC tolerates garbage after the last tokenized line, so we must do likewise. */ if(lineno == 32768) break; } if(verbose) fprintf(stderr, "End program pos $%04x/%d\n", pos, pos); if(filelen > pos) { if(verbose) fprintf(stderr, "trailing garbage at EOF, %d bytes, %s\n", filelen - pos, (keepgarbage ? "keeping" : "removing")); if(!keepgarbage) filelen = pos; } return result; } /* sometimes the variable name table isn't large enough to hold the generated variable names. move_code() makes more space, by moving the rest of the program (including the variable value table) up in memory. */ void move_code(int offset) { memmove(data + vvstart + offset, data + vvstart, filelen); vntd += offset; vvtp += offset; stmtab += offset; stmcur += offset; starp += offset; set_header_vars(); read_header(); filelen += offset; } /* Fixing the variables is a bit more work than it seems like it might be, because the last byte of the name has to match the type (inverse video "(" for numeric array, inverse "$" for string, inverse last character of name for scalars). To do this right, we have to examine the variable value table to find out the type of each variable. Each variable type get assigned A to Z, then A1 to A9, B1 to B9, etc. This means there will be A, A$, and A( variables, which might be a bit confusing, but we have to keep the generated name table as short as possible, because we can't extend the size of the table in the file. We can find the actual table size in the file by subtracting VNTP (start of variable name table) from VNTD (end of variable name table). It's possible that the table size is too small for the generated variable names, in which case we have to call move_code() to make more room. The maximum number of variable names is 128. If all 128 vars are in use, the minimum table size is 230 (26 one-letter names, 102 2-letter or letter+number or one-letter string/array names). */ int vntable_ok(void) { int vp, bad; if(vntp == vntd) { if(verbose) fprintf(stderr, "No variables\n"); return 1; } /* first pass: bad = 1 if all the bytes in the table have the same value, no matter what it is. */ vp = vnstart + 1; bad = 1; while(vp < vvstart - 1) { if(data[vp] != data[vnstart]) { bad = 0; break; } vp++; } if(bad) return 0; /* 2nd pass: bad = 1 if there's any invalid character in the table. */ vp = vnstart; while(vp < vvstart) { unsigned char c = data[vp]; /* treat a null byte as end-of-table, ignore any junk between it and VNTP. */ if(c == 0) break; vp++; /* inverse $ or ( is OK */ if(c == 0xa4 || c == 0xa8) continue; /* numbers and letters are allowed, inverse or normal. */ c &= 0x7f; if(c >= 0x30 && c <= 0x39) continue; if(c >= 0x41 && c <= 0x5a) continue; bad++; break; } return !bad; } /* walk the variable value table, generating variable names. if write is 0, just return the size the table will be. if write is 1, actually write the names to memory. */ int rebuild_vntable(int write) { int vp = vnstart; int vv = vvstart; int size = 0; int strings = 0, arrays = 0, scalars = 0, varname = 0, varnum = 0; while(vv < codestart) { unsigned char sigil = 0; /* type: scalar = 0, array = 1, string = 2 */ unsigned char type = data[vv] >> 6; /* fprintf(stderr, "%04x: %04x, %d\n", vv, data[vv], type); */ if(varnum != data[vv+1]) { fprintf(stderr, "Warning: variable value is corrupt!\n"); } varnum++; switch(type) { case 1: varname = arrays++; sigil = 0xa8; break; case 2: varname = strings++; sigil = 0xa4; break; default: varname = scalars++; break; } if(varname < 26) { if(write) data[vp] = ('A' + varname); size++; } else { varname -= 26; if(write) { data[vp++] = 'A' + varname / 9; data[vp] = '1' + varname % 9; } size += 2; } if(sigil) { size++; vp++; if(write) data[vp++] = sigil; } else { if(write) data[vp] |= 0x80; vp++; } vv += 8; } /* there's supposed to be a null byte at the end of the table, unless all 128 table slots are used. */ if(write) { if(varnum < 128) data[vp] = 0; /* fixup the VNTD pointer */ /* vntd = vntp + (vp - vnstart); fprintf(stderr, "%04x\n", vntd); data[4] = vntd & 0xff; data[5] = vntd >> 8; */ } return size; } void adjust_vntable_size(int oldsize, int newsize) { int move_by; if(oldsize != newsize) { move_by = newsize - oldsize; if(verbose) fprintf(stderr, "need %d bytes for vntable, have %d, moving VVTP by %d to %04x\n", newsize, oldsize, move_by, vvtp + move_by); move_code(move_by); } } int fixvars(void) { int old_vntable_size, new_vntable_size; if(vntable_ok() && !forcevars) return 0; old_vntable_size = vvstart - vnstart; new_vntable_size = rebuild_vntable(0); adjust_vntable_size(old_vntable_size, new_vntable_size); rebuild_vntable(1); return 1; } void print_help(void) { fprintf(stderr, "Usage: %s [-v] [-f] [-n] [-g] \n", self); fprintf(stderr, "-v: verbose\n"); fprintf(stderr, "-f: force variable name table rebuild\n"); fprintf(stderr, "-n: do not rebuild variable name table, even if it's invalid\n"); fprintf(stderr, "-g: remove trailing garbage, if present\n"); fprintf(stderr, "-c: check only; no output file\n"); fprintf(stderr, "Use - as a filename to read from stdin and/or write to stdout\n"); } void invalid_args(const char *arg) { fprintf(stderr, "%s: Invalid argument '%s'\n\n", self, arg); print_help(); exit(1); } FILE *open_file(const char *name, const char *mode) { FILE *fp; if(!(fp = fopen(name, mode))) { perror(name); exit(1); } return fp; } void open_input(const char *name) { if(!name) { if(freopen(NULL, "rb", stdin)) { input_file = stdin; return; } else { perror("stdin"); exit(1); } } input_file = open_file(name, "rb"); } void open_output(const char *name) { if(!name) { if(isatty(fileno(stdout))) { fprintf(stderr, "%s: refusing to write binary data to standard output\n", self); exit(1); } if(freopen(NULL, "wb", stdout)) { output_file = stdout; return; } else { perror("stdout"); exit(1); } } output_file = open_file(name, "wb"); } void parse_args(int argc, char **argv) { self = *argv; if(argc < 2) { print_help(); exit(0); } while(++argv, --argc) { if((*argv)[0] == '-') { switch((*argv)[1]) { case 'v': verbose++; break; case 'f': forcevars++; break; case 'n': keepvars++; break; case 'g': keepgarbage = 0; break; case 'c': checkonly = 1; break; case 0: if(!input_file) open_input(NULL); else if(!output_file) open_output(NULL); else invalid_args(*argv); break; default: invalid_args(*argv); break; } } else { if(!input_file) open_input(*argv); else if(!checkonly && !output_file) open_output(*argv); else invalid_args(*argv); } } if(!input_file) die("no input file given (use - for stdin)"); if(!checkonly && !output_file) die("no output file given (use - for stdout)"); if(keepvars && forcevars) die("-f and -n are mutually exclusive"); } int main(int argc, char **argv) { parse_args(argc, argv); filelen = readfile(); read_header(); if(lomem) die("This doesn't look like an Atari BASIC program (no $0000 signature)"); if(!keepvars) { if(fixvars()) { was_protected = 1; if(verbose) fprintf(stderr, "Variable names replaced\n"); } else { if(verbose) fprintf(stderr, "Variable names were already OK\n"); } } if(fixcode()) { if(verbose) fprintf(stderr, "Fixed invalid offset in code\n"); was_protected = 1; } else { if(verbose) fprintf(stderr, "No invalid offsets\n"); } if(verbose) { if(was_protected) fprintf(stderr, "Program was protected.\n"); else fprintf(stderr, "Program was NOT protected.\n"); } if(checkonly) { if(verbose) fprintf(stderr, "Check-only mode; no output written.\n"); if(was_protected) return 0; else return 2; } int got = fwrite(data, 1, filelen, output_file); fclose(output_file); if(verbose) fprintf(stderr, "wrote %d bytes\n", got); return 0; }