/**** TODO: if the rebuilt variable name table ends up larger than the scrambled one, the rest of the program needs to be moved upwards in memory to make room for it. currently this isn't done, so the variable *value* table gets corrupted by the last few variable names overwriting the first few values. */ #include #include #include #include /* attempt to fix a "list-protected" Atari 8-bit BASIC program. we don't fully detokenize, so this won't fix truly corrupted files. the "fix" is in 2 parts: 1. fix any invalid (0-byte) offsets after a line number. this is what causes BASIC to lock up. 2. if the variable names were overwritten (e.g. with EOL characters, or whatever), we "fix" that by making up new variable names. */ #define STM_OFFSET 0xf2 /* entire file gets read into memory (for now) */ unsigned char data[65536]; /* BASIC 14-byte header values */ unsigned short lomem; unsigned short vntp; unsigned short vntd; unsigned short vvtp; unsigned short stmtab; unsigned short stmcur; unsigned short starp; /* positions where various parts of the file start, derived from the header vars above. */ unsigned short codestart; unsigned short vnstart; unsigned short vvstart; int filelen; /* name of executable, taken from argv[0] */ char *self; /* these are set by the various command-line switches */ int keepvars = 0; int forcevars = 0; int keepgarbage = 1; int verbose = 0; /* file handles */ FILE *input_file = NULL; FILE *output_file = NULL; void die(const char *msg) { fprintf(stderr, "%s: %s\n", self, msg); exit(1); } /* read entire file into memory */ int readfile(void) { int got = fread(data, 1, 65535, input_file); fprintf(stderr, "read %d bytes\n", got); return got; } /* get a 16-bit value from the file, in 6502 LSB/MSB order. */ unsigned short getword(int addr) { return data[addr] | (data[addr + 1] << 8); } void setword(int addr, int value) { data[addr] = value & 0xff; data[addr + 1] = value >> 8; } void dump_header_vars(void) { fprintf(stderr, "LOMEM %04x\n", lomem); fprintf(stderr, "VNTP %04x\n", vntp); fprintf(stderr, "VNTD %04x\n", vntd); fprintf(stderr, "VVTP %04x\n", vvtp); fprintf(stderr, "STMTAB %04x, codestart %04x\n", stmtab, codestart); fprintf(stderr, "STMCUR %04x\n", stmcur); fprintf(stderr, "STARP %04x\n", starp); fprintf(stderr, "vvstart %04x\n", vvstart); } void read_header(void) { lomem = getword(0); vntp = getword(2); vntd = getword(4); vvtp = getword(6); stmtab = getword(8); stmcur = getword(10); starp = getword(12); codestart = stmtab - STM_OFFSET - (vntp - 256); vnstart = vntp - 256 + 14; vvstart = vvtp - 256 + 14; dump_header_vars(); } void set_header_vars(void) { setword(0, lomem); setword(2, vntp); setword(4, vntd); setword(6, vvtp); setword(8, stmtab); setword(10, stmcur); setword(12, starp); } /* fixline() calculates & sets correct line length, by iterating over the statement(s) within the line. the last statement's offset will be the same as the line offset should have been, if it weren't zeroed. when reading this code, it's helpful to know that the lengths (line and statement) are counted from the start of the line in memory. A line with only a line number and one token (such as END) would have a line length of 6: 2 for the 16-bit line number, 1 for the length byte itself, 1 for the statement length byte (also 6), 1 for the END token, and one for the end-of-line token. A line with two statements: 10 ?:END offset value meaning 0 0A line number (low byte) 1 00 line number (high byte) 2 09 line length (or, offset to next line) [!] 3 06 offset to next statement *from the start of the line* 4 28 token for "?" 5 14 token for : (end of statement) 6 09 offset to next statement [!] 7 15 token for END 8 16 token for end-of-line [*] 9 ?? (line number of next statement) Note the values marked with [!] are equal. [*] end-of-line is $16 *except* for REM and DATA, which are terminated with $9B instead. */ int fixline(int linepos) { /* +3 here to skip the line number + line length */ int token, done = 0, offset = data[linepos + 3]; while(!done) { offset = data[linepos + offset]; token = data[linepos + offset - 1]; fprintf(stderr, "offset %02x token %02x\n", offset, token); if(token != 0x14) done++; } data[linepos + 2] = offset; return offset; } /* Iterate over all the tokenized lines. If any of them have invalid line lengths (<=5), call fixline() on them. */ int fixcode(void) { int result = 0; int pos = codestart; int offset, lineno = -1, tmpno; while(pos < filelen) { tmpno = getword(pos); if(tmpno <= lineno) { fprintf(stderr, "Warning: line number %d at offset %04x is <= previous line number %d\n", tmpno, pos, lineno); } lineno = tmpno; offset = data[pos + 2]; /* fprintf(stderr, "pos %d, line #%d, offset %d\n", pos, lineno, offset); */ if(offset < 6) { fprintf(stderr, "Found invalid offset %d (<6) at line %d\n", offset, lineno); offset += fixline(pos); result++; } pos += offset; /* Atari BASIC tolerates garbage after the last tokenized line, so we must do likewise. */ if(lineno == 32768) break; } fprintf(stderr, "End program pos %04x/%d\n", pos, pos); if(filelen > pos) { fprintf(stderr, "trailing garbage at EOF, %d bytes, %s\n", filelen - pos, (keepgarbage ? "keeping" : "removing")); if(!keepgarbage) filelen = pos; } return result; } /* sometimes the variable name table isn't large enough to hold the generated variable names. move_code_up() makes more space, by moving the rest of the program (including the variable value table) up in memory. */ void move_code_up(int offset) { memmove(data + vvstart + offset, data + vvstart, filelen); vvtp += offset; stmtab += offset; stmcur += offset; starp += offset; set_header_vars(); read_header(); filelen += offset; } /* Fixing the variables is a bit more work than it seems like it might be, because the last byte of the name has to match the type (inverse video "(" for numeric array, inverse "$" for string, inverse last character of name for scalars). To do this right, we have to examine the variable value table to find out the type of each variable. Each variable type get assigned A to Z, then A1 to A9, B1 to B9, etc. This means there will be A, A$, and A( variables, which might be a bit confusing, but we have to keep the generated name table as short as possible, because we can't extend the size of the table in the file. We can find the actual table size in the file by subtracting VNTP (start of variable name table) from VNTD (end of variable name table). It's possible that the table size is too small for the generated variable names, in which case we have to call move_code_up() to make more room. The maximum number of variable names is 128. If all 128 vars are in use, the minimum table size is 230 (26 one-letter names, 102 2-letter or letter+number or one-letter string/array names). */ int fixvars(void) { int vp = vnstart, vv = vvstart; int strings = 0, arrays = 0, scalars = 0, varname = 0, varnum = 0; int bad = 0; /* See if the variables even need fixing. This code is simpler than it should be: it checks that all characters in the variable name table are valid, but doesn't check that they're in valid sequences. Example: a variable name that's just an inverse dollar sign would be considered OK). Also multiple variables of the same type with the same name would be OK. However, if all the bytes are the same value, even if it's a valid character, that's correctly detected as invalid. */ if(vntp == vntd) { fprintf(stderr, "No variables\n"); return 0; } vp = vnstart + 1; bad = 1; while(vp < vvstart - 1) { if(data[vp] != data[vnstart]) bad = 0; vp++; } vp = vnstart; while(vp < vvstart) { unsigned char c = data[vp]; /* fprintf(stderr, "%04x/%04x: %04x\n", vp, vvstart, c); */ /* allow a null byte only at the end of the table! */ /* if(c == 0 && vp == vvstart - 1) break; */ /* new rule: treat a null byte as end-of-table, ignore any junk between it and VNTP. */ if(c == 0) break; vp++; /* inverse $ or ( is OK */ if(c == 0xa4 || c == 0xa8) continue; /* numbers and letters are allowed, inverse or normal. */ c &= 0x7f; if(c >= 0x30 && c <= 0x39) continue; if(c >= 0x41 && c <= 0x5a) continue; bad++; break; } if(!forcevars && !bad) return 0; /* decide whether we have enough room. pretend every new variable name is 3 bytes (really only true for the 10th and later strings and arrays, but a little wasted space won't hurt anything). */ { int vntblsize = vvstart - vnstart; int varcount = (codestart - vvstart) / 8; int neededsize = varcount * 3 + 1; int move_up_by; fprintf(stderr, "%d variables according to value table\n", varcount); if(neededsize > vntblsize) { move_up_by = neededsize - vntblsize; fprintf(stderr, "need %d bytes for vntable, only have %d, moving up by %d to %04x\n", neededsize, vntblsize, move_up_by, vvtp + move_up_by); move_code_up(move_up_by); } } vp = vnstart; vv = vvstart; while(vv < codestart) { unsigned char sigil = 0; /* type: scalar = 0, array = 1, string = 2 */ unsigned char type = data[vv] >> 6; /* fprintf(stderr, "%04x: %04x, %d\n", vv, data[vv], type); */ if(varnum != data[vv+1]) { fprintf(stderr, "Warning: variable value is corrupt!\n"); } varnum++; switch(type) { case 1: varname = arrays++; sigil = 0xa8; break; case 2: varname = strings++; sigil = 0xa4; break; default: varname = scalars++; break; } if(varname < 26) { data[vp] = ('A' + varname); } else { varname -= 26; data[vp++] = 'A' + ((varname - 26) / 9); data[vp] = ('1' + ((varname - 26) % 9)); } if(sigil) { vp++; data[vp++] = sigil; } else { data[vp] |= 0x80; vp++; } vv += 8; } /* there's supposed to be a null byte at the end of the table, unless all 128 table slots are used. */ if(varnum < 128) data[vp] = 0; /* fixup the VNTD pointer */ vntd = vntp + (vp - vnstart); data[4] = vntd & 0xff; data[5] = vntd >> 8; fprintf(stderr, "%d variables, VNTD adjusted to %04x\n", varnum, vntd); return 1; } void print_help(void) { fprintf(stderr, "Usage: %s [-v] [-f] [-n] [-g] \n", self); fprintf(stderr, "-v: verbose\n"); fprintf(stderr, "-f: force variable name table rebuild\n"); fprintf(stderr, "-n: do not rebuild variable name table, even if it's invalid\n"); fprintf(stderr, "-g: remove trailing garbage, if present\n"); fprintf(stderr, "Use - as a filename to read from stdin and/or write to stdout\n"); } void invalid_args(const char *arg) { fprintf(stderr, "%s: Invalid argument '%s'\n\n", self, arg); print_help(); exit(1); } FILE *open_file(const char *name, const char *mode) { FILE *fp; if(!(fp = fopen(name, mode))) { perror(name); exit(1); } return fp; } void open_input(const char *name) { if(!name) { if(freopen(NULL, "rb", stdin)) { input_file = stdin; return; } else { perror("stdin"); exit(1); } } input_file = open_file(name, "rb"); } void open_output(const char *name) { if(!name) { if(isatty(fileno(stdout))) { fprintf(stderr, "%s: refusing to write binary data to standard output\n", self); exit(1); } if(freopen(NULL, "wb", stdout)) { output_file = stdout; return; } else { perror("stdout"); exit(1); } } output_file = open_file(name, "wb"); } void parse_args(int argc, char **argv) { self = *argv; if(argc < 2) { print_help(); exit(0); } while(++argv, --argc) { if((*argv)[0] == '-') { switch((*argv)[1]) { case 'v': verbose++; break; case 'f': forcevars++; break; case 'n': keepvars++; break; case 'g': keepgarbage = 0; break; case 0: if(!input_file) open_input(NULL); else if(!output_file) open_output(NULL); else invalid_args(*argv); break; default: invalid_args(*argv); break; } } else { if(!input_file) open_input(*argv); else if(!output_file) open_output(*argv); else invalid_args(*argv); } } if(!input_file) die("no input file given (use - for stdin)"); if(!output_file) die("no output file given (use - for stdout)"); if(keepvars && forcevars) die("-f and -n are mutually exclusive"); } int main(int argc, char **argv) { parse_args(argc, argv); filelen = readfile(); read_header(); if(lomem) die("This doesn't look like an Atari BASIC program (no $0000 signature)"); /* fprintf(stderr, "data at STMTAB (we hope):\n"); for(int i=codestart; i