#include #include #include #include #include #include #include "bas.h" /* attempt to fix a "list-protected" Atari 8-bit BASIC program. we don't fully detokenize, so this won't fix truly corrupted files. the "fix" is in 2 parts: 1. fix any invalid (0-byte) offsets after a line number. this is what causes BASIC to lock up. 2. if the variable names were overwritten (e.g. with EOL characters, or whatever), we "fix" that by making up new variable names. */ /* for the -r option */ #define MAP_FILE "varnames.txt" unsigned char varnames[BUFSIZE]; unsigned char *varmap[MAXVARS]; int varmap_count; /* these are set by the various command-line switches */ int keepvars = 0; int forcevars = 0; int keepgarbage = 1; int checkonly = 0; int was_protected = 0; int readmap = 0; int writemap = 0; /* fixline() calculates & sets correct line length, by iterating over the statement(s) within the line. the last statement's offset will be the same as the line offset should have been, if it weren't zeroed. when reading this code, it's helpful to know that the lengths (line and statement) are counted from the start of the line in memory. A line with only a line number and one token (such as END) would have a line length of 6: 2 for the 16-bit line number, 1 for the length byte itself, 1 for the statement length byte (also 6), 1 for the END token, and one for the end-of-line token. A line with two statements: 10 ?:END offset value meaning 0 0A line number (low byte) 1 00 line number (high byte) 2 09 line length (or, offset to next line) [!] 3 06 offset to next statement *from the start of the line* 4 28 token for "?" 5 14 token for : (end of statement), we call it TOK_COLON 6 09 offset to next statement [!] 7 15 token for END 8 16 token for end-of-line [*] 9 ?? (line number of next statement) Note the values marked with [!] are equal. The line length at offset 2 is what gets zeroed out by the protection. To fix it, we follow the next-statement offsets. If there's not a colon before the offset, replace the byte at offset 2 with that statement's offset. [*] end-of-line is $16 *except* for REM and DATA, which are terminated with $9B instead. */ int fixline(int linepos) { /* +3 here to skip the line number + line length */ int offset = program[linepos + 3]; while(program[linepos + offset - 1] == TOK_COLON) offset = program[linepos + offset]; program[linepos + 2] = offset; return offset; } /* Iterate over all the tokenized lines. If any of them have invalid line lengths (<=5), call fixline() on them. */ int fixcode(void) { int result = 0; int pos = codestart; int offset, lineno = -1, tmpno; while(pos < filelen) { tmpno = getword(pos); if(tmpno <= lineno) { fprintf(stderr, "Warning: line number %d at offset $%04x is <= previous line number %d.\n", tmpno, pos, lineno); } lineno = tmpno; offset = program[pos + 2]; /* fprintf(stderr, "pos %d, line #%d, offset %d\n", pos, lineno, offset); */ if(offset < 6) { if(verbose) fprintf(stderr, "Found invalid offset %d (<6) at line %d, file offset $%04x.\n", offset, lineno, pos); offset += fixline(pos); result++; } pos += offset; /* Atari BASIC tolerates garbage after the last tokenized line, so we must do likewise. */ if(lineno == 32768) break; } if(verbose) fprintf(stderr, "End program file offset: $%04x/%d\n", pos, pos); if(filelen > pos) { int i, same = 1; for(i = pos; i < filelen; i++) { if(program[i] != program[pos]) same = 0; } if(verbose) { fprintf(stderr, "Trailing garbage at EOF, %d bytes, ", filelen - pos); if(same) fprintf(stderr, "all $%02x", program[pos]); else fprintf(stderr, "maybe valid data"); fprintf(stderr, ", %s.\n", (keepgarbage ? "keeping" : "removing")); } if(!keepgarbage) filelen = pos; } else { if(verbose) fprintf(stderr, "No trailing garbage at EOF.\n"); } return result; } /* Fixing the variables is a bit more work than it seems like it might be, because the last byte of the name has to match the type (inverse video "(" for numeric array, inverse "$" for string, inverse last character of name for scalars). To do this right, we have to examine the variable value table to find out the type of each variable. Each variable type get assigned A to Z, then A1 to A9, B1 to B9, etc. This means there will be A, A$, and A( variables, which might be a bit confusing, but we have to keep the generated name table as short as possible, because we can't extend the size of the table in the file. We can find the actual table size in the file by subtracting VNTP (start of variable name table) from VNTD (end of variable name table). It's possible that the table size is too small for the generated variable names, in which case we have to call move_code() to make more room. The maximum number of variable names is 128. If all 128 vars are in use, the minimum table size is 230 (26 one-letter names, 102 2-letter or letter+number or one-letter string/array names). */ /* walk the variable value table, generating variable names. if write is 0, just return the size the table will be. if write is 1, actually write the names to memory. */ int rebuild_vntable(int write) { int vp = vnstart; int vv = vvstart; int size = 0; int strings = 0, arrays = 0, scalars = 0, varname = 0, varnum = 0; while(vv < codestart) { unsigned char sigil = 0; /* type: scalar = 0, array = 1, string = 2 */ unsigned char type = program[vv] >> 6; /* fprintf(stderr, "%04x: %04x, %d\n", vv, program[vv], type); */ if(varnum == MAXVARS) { fprintf(stderr, "Warning: skipping variable numbers >=%d in value table.\n", MAXVARS); break; } if(varnum != program[vv+1]) { fprintf(stderr, "Warning: variable #%d value is corrupt!\n", varnum); } switch(type) { case TYPE_SCALAR: varname = scalars++; break; case TYPE_ARRAY: varname = arrays++; sigil = 0xa8; break; case TYPE_STRING: varname = strings++; sigil = 0xa4; break; default: fprintf(stderr, "Warning: variable value #%d has unknown type.\n", varnum); break; } if(varname < 26) { if(write) program[vp] = ('A' + varname); size++; } else { varname -= 26; if(write) { program[vp++] = 'A' + varname / 9; program[vp] = '1' + varname % 9; } size += 2; } if(sigil) { size++; vp++; if(write) program[vp++] = sigil; } else { if(write) program[vp] |= 0x80; vp++; } vv += 8; varnum++; } /* there's supposed to be a null byte at the end of the table, unless all 128 table slots are used... except really, there can be >=129 entries, and there's always a null byte. */ if(write) program[vp] = 0; size++; return size; } int fixvars(void) { int old_vntable_size, new_vntable_size; if(vntable_ok() && !forcevars) return 0; old_vntable_size = vvstart - vnstart; new_vntable_size = rebuild_vntable(0); adjust_vntable_size(old_vntable_size, new_vntable_size); rebuild_vntable(1); return 1; } void write_var_map(void) { FILE *f; int vp, count = 0; if(verbose) fprintf(stderr, "Writing variable names to '" MAP_FILE "'.\n"); f = fopen(MAP_FILE, "w"); if(!f) { perror(MAP_FILE); die("Can't create map file for -w option."); } for(vp = vnstart; (vp < vntd) && (program[vp] != 0); vp++) { unsigned char c = program[vp]; if(c < 0x80) { fputc(c, f); } else { fputc(c & 0x7f, f); fputc('\n', f); count++; } } fclose(f); if(verbose) fprintf(stderr, "Wrote %d variable names to '" MAP_FILE "'.\n", count); } void die_mapfile(char *msg, int num) { fprintf(stderr, MAP_FILE ": line %d: %s.\n", num, msg); exit(1); } void check_varname(const unsigned char *name, int line) { int len = strlen((char *)name); int i; unsigned char c = 0, type; /* fprintf(stderr, "check_varname(\"%s\", %d)\n", name, line); */ if(len < 1) die_mapfile("Blank variable name", line); if(len > 128) die_mapfile("Variable name >128 characters", line); if(name[0] < 'A' || name[0] > 'Z') die_mapfile("Invalid variable name: First character must be a letter", line); for(i = 1; i < len; i++) { c = name[i]; if(c >= 'A' && c <= 'Z') continue; if(c >= '0' && c <= '9') continue; if(c == '$' || c == '(') { if(i == (len - 1)) continue; else die_mapfile("Invalid variable name: $ and ( only allowed at end", line); } die_mapfile("Invalid character in variable name", line); } if(c == 0) c = name[0]; /* c now has the last char of the name, make sure it matches the variable type */ type = program[vvstart + 8 * (line - 1)] >> 6; /* type: scalar = 0, array = 1, string = 2 */ if(type == TYPE_SCALAR) { if(c == '$') die_mapfile("Type mismatch: numeric variable may not end with $", line); else if(c == '(') die_mapfile("Type mismatch: numeric variable may not end with (", line); } else if(type == TYPE_ARRAY) { if(c != '(') die_mapfile("Type mismatch: array variable must end with (", line); } else if(type == TYPE_STRING) { if(c != '$') die_mapfile("Type mismatch: string variable must end with $", line); } else { fprintf(stderr, "Warning: variable value table is corrupt (invalid type).\n"); } /* check for dups */ for(i = 0; i < line - 1; i++) { if(strcmp((char *)name, (char *)varmap[i]) == 0) die_mapfile("duplicate variable name", line); } } void read_var_map(void) { FILE *f; unsigned char *p = varnames, *curname = varnames; int count = 0, vvcount = (codestart - vvstart) / 8; if(verbose) fprintf(stderr, "Reading variable names from " MAP_FILE ".\n"); f = fopen(MAP_FILE, "r"); if(!f) { perror(MAP_FILE); die("Can't read map file for -r option."); } while(!feof(f)) { *p = toupper(fgetc(f)); /* allow lowercase */ if(*p == ' ' || *p == '\t' || *p == '\r') continue; /* ignore whitespace */ if(*p == '\n') { *p = '\0'; varmap[count++] = curname; check_varname(curname, count); curname = p + 1; } p++; } fclose(f); if(verbose) fprintf(stderr, "Read %d variable names from " MAP_FILE ".\n", count); if(vvcount > count) { fprintf(stderr, MAP_FILE ": not enough variables (have %d, need %d).\n", count, vvcount); exit(1); } else if(count > vvcount) { fprintf(stderr, MAP_FILE ": too many variables (have %d, need %d).\n", count, vvcount); exit(1); } varmap_count = count; } void apply_var_map(void) { unsigned char new_vntable[BUFSIZE]; int i, newp = 0; unsigned char *v; if(verbose) fprintf(stderr, "Using variable names from " MAP_FILE ".\n"); for(i = 0; i < varmap_count; i++) { v = varmap[i]; while(*v) { new_vntable[newp++] = *v; v++; } new_vntable[newp - 1] |= 0x80; } new_vntable[newp++] = '\0'; i = vvstart - vnstart; adjust_vntable_size(i, newp); memmove(program + vnstart, new_vntable, newp); } void print_help(void) { fprintf(stderr, "Usage: %s [-v] [-f] [-n] [-g] [-c] [-r|-w] \n", self); fprintf(stderr, " -v: Verbose.\n"); fprintf(stderr, " -f: Force variable name table rebuild.\n"); fprintf(stderr, " -n: Do not rebuild variable name table, even if it's invalid.\n"); fprintf(stderr, " -g: Remove trailing garbage, if present.\n"); fprintf(stderr, " -c: Check only; no output file.\n"); fprintf(stderr, " -w: Write variable names to 'varnames.txt'.\n"); fprintf(stderr, " -r: Read variable names from 'varnames.txt'.\n"); fprintf(stderr, "Use - as a filename to read from stdin and/or write to stdout.\n"); } void parse_args(int argc, char **argv) { set_self(*argv); parse_general_args(argc, argv, print_help); while(++argv, --argc) { if((*argv)[0] == '-') { switch((*argv)[1]) { case 'v': verbose++; break; case 'f': forcevars++; break; case 'n': keepvars++; break; case 'g': keepgarbage = 0; break; case 'c': checkonly = 1; break; case 'r': readmap = 1; break; case 'w': writemap = 1; break; case 0: if(!input_file) open_input(NULL); else if(!output_filename) output_filename = *argv; else invalid_args(*argv); break; default: invalid_args(*argv); break; } } else { /* arg doesn't start with a -, must be a filename */ if(!input_file) open_input(*argv); else if(!checkonly && !output_filename) output_filename = *argv; else invalid_args(*argv); } } if(!input_file) die("No input file given (use - for stdin)."); if(!checkonly && !output_filename) die("No output file given (use - for stdout)."); if(keepvars && forcevars) die("-f and -n are mutually exclusive."); if(readmap && writemap) die("-r and -w are mutually exclusive."); if(readmap && keepvars) die("-r and -n are mutually exclusive, maybe you want -w?"); if(checkonly && (readmap || writemap)) die("-c and -r/-w are mutually exclusive."); } int main(int argc, char **argv) { int invoffs = 0; parse_args(argc, argv); readfile(); parse_header(); if(readmap) { was_protected = !vntable_ok(); read_var_map(); apply_var_map(); } else { if(!keepvars) { if(fixvars()) { was_protected = 1; if(verbose) fprintf(stderr, "Variable names replaced.\n"); } else { if(verbose) fprintf(stderr, "Variable names were already OK.\n"); } } } invoffs = fixcode(); if(invoffs) { if(verbose) fprintf(stderr, "Fixed %d invalid offset%s in code.\n", invoffs, (invoffs == 1 ? "" : "s")); was_protected = 1; } else { if(verbose) fprintf(stderr, "No invalid offsets.\n"); } if(verbose) { fprintf(stderr, "Program was %sprotected.\n", (was_protected ? "" : "NOT ")); } if(checkonly) { if(verbose) fprintf(stderr, "Check-only mode; no output written.\n"); return was_protected ? 0 : 2; } /* we don't open the output file until all processing is done, to avoid leaving invalid output files if we exit on error. */ open_output(output_filename); writefile(); if(writemap) write_var_map(); return was_protected ? 0 : 2; }