#include #include #include #include #include #include #include "bas.h" /* attempt to fix a "list-protected" Atari 8-bit BASIC program. we don't fully detokenize, so this won't fix truly corrupted files. the "fix" is in 2 parts: 1. fix any invalid (0-byte) offsets after a line number. this is what causes BASIC to lock up. 2. if the variable names were overwritten (e.g. with EOL characters, or whatever), we "fix" that by making up new variable names. */ /* for the -p/-pc options: 32767 END */ unsigned char badcode[] = { 0xff, 0x7f, /* line number 32767 */ 0x00, /* *bad* next-line offset */ 0x06, /* next-statement offset */ 0x15, /* END token */ 0x16, /* end-of-line token */ }; /* for -p/-pv */ int varname_char = 0x9b; /* for -s */ int shrinktable = 0; /* for the -r option */ #define MAP_FILE "varnames.txt" unsigned char varnames[BUFSIZE]; unsigned char *varmap[MAXVARS]; int varmap_count; /* these are set by the various command-line switches */ int keepvars = 0; int forcevars = 0; int keepgarbage = 1; int checkonly = 0; int was_protected = 0; int readmap = 0; int writemap = 0; int protect_vars = 0; int protect_code = 0; char *output_filename = NULL; /* fixline() calculates & sets correct line length, by iterating over the statement(s) within the line. the last statement's offset will be the same as the line offset should have been, if it weren't zeroed. when reading this code, it's helpful to know that the lengths (line and statement) are counted from the start of the line in memory. A line with only a line number and one token (such as END) would have a line length of 6: 2 for the 16-bit line number, 1 for the length byte itself, 1 for the statement length byte (also 6), 1 for the END token, and one for the end-of-line token. A line with two statements: 10 ?:END offset value meaning 0 0A line number (low byte) 1 00 line number (high byte) 2 09 line length (or, offset to next line) [!] 3 06 offset to next statement *from the start of the line* 4 28 token for "?" 5 14 token for : (end of statement), we call it TOK_COLON 6 09 offset to next statement [!] 7 15 token for END 8 16 token for end-of-line [*] 9 ?? (line number of next statement) Note the values marked with [!] are equal. The line length at offset 2 is what gets zeroed out by the protection. To fix it, we follow the next-statement offsets. If there's not a colon before the offset, replace the byte at offset 2 with that statement's offset. [*] end-of-line is $16 *except* for REM and DATA, which are terminated with $9B instead. */ int fixline(int linepos) { /* +3 here to skip the line number + line length */ int offset = program[linepos + 3]; while(program[linepos + offset - 1] == TOK_COLON) offset = program[linepos + offset]; program[linepos + 2] = offset; return offset; } /* Iterate over all the tokenized lines. If any of them have invalid line lengths (<=5), call fixline() on them. */ int fixcode(void) { int result = 0; int pos = codestart; int offset, lineno = -1, tmpno; while(pos < filelen) { tmpno = getword(pos); if(tmpno <= lineno) { fprintf(stderr, "Warning: line number %d at offset $%04x is <= previous line number %d.\n", tmpno, pos, lineno); } lineno = tmpno; offset = program[pos + 2]; /* fprintf(stderr, "pos %d, line #%d, offset %d\n", pos, lineno, offset); */ if(offset < 6) { if(verbose) fprintf(stderr, "Found invalid offset %d (<6) at line %d, file offset $%04x.\n", offset, lineno, pos); offset += fixline(pos); result++; } pos += offset; /* Atari BASIC tolerates garbage after the last tokenized line, so we must do likewise. */ if(lineno == 32768) break; } if(verbose) fprintf(stderr, "End program file offset: $%04x/%d\n", pos, pos); if(filelen > pos) { int i, same = 1; for(i = pos; i < filelen; i++) { if(program[i] != program[pos]) same = 0; } if(verbose) { fprintf(stderr, "Trailing garbage at EOF, %d bytes, ", filelen - pos); if(same) fprintf(stderr, "all $%02x", program[pos]); else fprintf(stderr, "maybe valid data"); fprintf(stderr, ", %s.\n", (keepgarbage ? "keeping" : "removing")); } if(!keepgarbage) filelen = pos; } else { if(verbose) fprintf(stderr, "No trailing garbage at EOF.\n"); } return result; } /* iterate over all the lines, insert a poisoned line 32767 just before line 32768 */ void breakcode(void) { int pos = codestart, oldpos = 0; int offset, lineno = -1, tmpno = -1; while(pos < filelen) { lineno = tmpno; tmpno = getword(pos); if(tmpno == 32768) { break; } else { offset = program[pos + 2]; if(!offset) { fprintf(stderr, "%s: program already was code-protected.\n", self); exit(2); } oldpos = pos; pos += offset; } } if(!oldpos) die("Can't protect code because there are no lines of code."); if(lineno == 32767) die("Can't protect code because there is already a line 32767."); /* pos is now the start of line 32768, move it up to make room for the new line */ offset = sizeof(badcode); memmove(program + pos + offset, program + pos, filelen); /* insert new line */ memmove(program + pos, badcode, offset); if(verbose) fprintf(stderr, "Inserted line 32767 with invalid offset at file offset $%04x.\n", pos); /* update pointers that would be affected by the code move */ stmcur += offset; starp += offset; update_header(); parse_header(); filelen += offset; } /* Fixing the variables is a bit more work than it seems like it might be, because the last byte of the name has to match the type (inverse video "(" for numeric array, inverse "$" for string, inverse last character of name for scalars). To do this right, we have to examine the variable value table to find out the type of each variable. Each variable type get assigned A to Z, then A1 to A9, B1 to B9, etc. This means there will be A, A$, and A( variables, which might be a bit confusing, but we have to keep the generated name table as short as possible, because we can't extend the size of the table in the file. We can find the actual table size in the file by subtracting VNTP (start of variable name table) from VNTD (end of variable name table). It's possible that the table size is too small for the generated variable names, in which case we have to call move_code() to make more room. The maximum number of variable names is 128. If all 128 vars are in use, the minimum table size is 230 (26 one-letter names, 102 2-letter or letter+number or one-letter string/array names). */ /* return true if the variable name table is OK */ int vntable_ok(void) { int vp, bad; if(vntp == vntd) { if(verbose) fprintf(stderr, "No variables.\n"); return 1; } /* first pass: bad = 1 if all the bytes in the table have the same value, no matter what it is. */ vp = vnstart + 1; bad = 1; while(vp < vvstart - 1) { if(program[vp] != program[vnstart]) { bad = 0; break; } vp++; } if(bad) return 0; /* 2nd pass: bad = 1 if there's any invalid character in the table. */ vp = vnstart; while(vp < vvstart) { unsigned char c = program[vp]; /* treat a null byte as end-of-table, ignore any junk between it and VNTP. */ if(c == 0) break; vp++; /* inverse $ or ( is OK */ if(c == 0xa4 || c == 0xa8) continue; /* numbers and letters are allowed, inverse or normal. */ c &= 0x7f; if(c >= 0x30 && c <= 0x39) continue; if(c >= 0x41 && c <= 0x5a) continue; bad++; break; } return !bad; } /* walk the variable value table, generating variable names. if write is 0, just return the size the table will be. if write is 1, actually write the names to memory. */ int rebuild_vntable(int write) { int vp = vnstart; int vv = vvstart; int size = 0; int strings = 0, arrays = 0, scalars = 0, varname = 0, varnum = 0; while(vv < codestart) { unsigned char sigil = 0; /* type: scalar = 0, array = 1, string = 2 */ unsigned char type = program[vv] >> 6; /* fprintf(stderr, "%04x: %04x, %d\n", vv, program[vv], type); */ if(varnum == MAXVARS) { fprintf(stderr, "Warning: skipping variable numbers >=%d in value table.\n", MAXVARS); break; } if(varnum != program[vv+1]) { fprintf(stderr, "Warning: variable #%d value is corrupt!\n", varnum); } switch(type) { case TYPE_SCALAR: varname = scalars++; break; case TYPE_ARRAY: varname = arrays++; sigil = 0xa8; break; case TYPE_STRING: varname = strings++; sigil = 0xa4; break; default: fprintf(stderr, "Warning: variable value #%d has unknown type.\n", varnum); break; } if(varname < 26) { if(write) program[vp] = ('A' + varname); size++; } else { varname -= 26; if(write) { program[vp++] = 'A' + varname / 9; program[vp] = '1' + varname % 9; } size += 2; } if(sigil) { size++; vp++; if(write) program[vp++] = sigil; } else { if(write) program[vp] |= 0x80; vp++; } vv += 8; varnum++; } /* there's supposed to be a null byte at the end of the table, unless all 128 table slots are used... except really, there can be >=129 entries, and there's always a null byte. */ if(write) program[vp] = 0; size++; return size; } int fixvars(void) { int old_vntable_size, new_vntable_size; if(vntable_ok() && !forcevars) return 0; old_vntable_size = vvstart - vnstart; new_vntable_size = rebuild_vntable(0); adjust_vntable_size(old_vntable_size, new_vntable_size); rebuild_vntable(1); return 1; } void write_var_map(void) { FILE *f; int vp, count = 0; if(verbose) fprintf(stderr, "Writing variable names to '" MAP_FILE "'.\n"); f = fopen(MAP_FILE, "w"); if(!f) { perror(MAP_FILE); die("Can't create map file for -w option."); } for(vp = vnstart; (vp < vntd) && (program[vp] != 0); vp++) { unsigned char c = program[vp]; if(c < 0x80) { fputc(c, f); } else { fputc(c & 0x7f, f); fputc('\n', f); count++; } } fclose(f); if(verbose) fprintf(stderr, "Wrote %d variable names to '" MAP_FILE "'.\n", count); } void die_mapfile(char *msg, int num) { fprintf(stderr, MAP_FILE ": line %d: %s.\n", num, msg); exit(1); } void check_varname(const unsigned char *name, int line) { int len = strlen((char *)name); int i; unsigned char c = 0, type; /* fprintf(stderr, "check_varname(\"%s\", %d)\n", name, line); */ if(len < 1) die_mapfile("Blank variable name", line); if(len > 128) die_mapfile("Variable name >128 characters", line); if(name[0] < 'A' || name[0] > 'Z') die_mapfile("Invalid variable name: First character must be a letter", line); for(i = 1; i < len; i++) { c = name[i]; if(c >= 'A' && c <= 'Z') continue; if(c >= '0' && c <= '9') continue; if(c == '$' || c == '(') { if(i == (len - 1)) continue; else die_mapfile("Invalid variable name: $ and ( only allowed at end", line); } die_mapfile("Invalid character in variable name", line); } if(c == 0) c = name[0]; /* c now has the last char of the name, make sure it matches the variable type */ type = program[vvstart + 8 * (line - 1)] >> 6; /* type: scalar = 0, array = 1, string = 2 */ if(type == TYPE_SCALAR) { if(c == '$') die_mapfile("Type mismatch: numeric variable may not end with $", line); else if(c == '(') die_mapfile("Type mismatch: numeric variable may not end with (", line); } else if(type == TYPE_ARRAY) { if(c != '(') die_mapfile("Type mismatch: array variable must end with (", line); } else if(type == TYPE_STRING) { if(c != '$') die_mapfile("Type mismatch: string variable must end with $", line); } else { fprintf(stderr, "Warning: variable value table is corrupt (invalid type).\n"); } /* check for dups */ for(i = 0; i < line - 1; i++) { if(strcmp((char *)name, (char *)varmap[i]) == 0) die_mapfile("duplicate variable name", line); } } void read_var_map(void) { FILE *f; unsigned char *p = varnames, *curname = varnames; int count = 0, vvcount = (codestart - vvstart) / 8; if(verbose) fprintf(stderr, "Reading variable names from " MAP_FILE ".\n"); f = fopen(MAP_FILE, "r"); if(!f) { perror(MAP_FILE); die("Can't read map file for -r option."); } while(!feof(f)) { *p = toupper(fgetc(f)); /* allow lowercase */ if(*p == ' ' || *p == '\t' || *p == '\r') continue; /* ignore whitespace */ if(*p == '\n') { *p = '\0'; varmap[count++] = curname; check_varname(curname, count); curname = p + 1; } p++; } fclose(f); if(verbose) fprintf(stderr, "Read %d variable names from " MAP_FILE ".\n", count); if(vvcount > count) { fprintf(stderr, MAP_FILE ": not enough variables (have %d, need %d).\n", count, vvcount); exit(1); } else if(count > vvcount) { fprintf(stderr, MAP_FILE ": too many variables (have %d, need %d).\n", count, vvcount); exit(1); } varmap_count = count; } void apply_var_map(void) { unsigned char new_vntable[BUFSIZE]; int i, newp = 0; unsigned char *v; if(verbose) fprintf(stderr, "Using variable names from " MAP_FILE ".\n"); for(i = 0; i < varmap_count; i++) { v = varmap[i]; while(*v) { new_vntable[newp++] = *v; v++; } new_vntable[newp - 1] |= 0x80; } new_vntable[newp++] = '\0'; i = vvstart - vnstart; adjust_vntable_size(i, newp); memmove(program + vnstart, new_vntable, newp); } void scramble_vars(void) { int i; if(!vntable_ok()) { fprintf(stderr, "%s: Program already was variable-protected.\n", self); exit(2); } if(shrinktable) { if(verbose) fprintf(stderr, "Shrinking variable name table.\n"); adjust_vntable_size((vvstart - 1) - vnstart, (codestart - vvstart) / 8); } if(varname_char == -1) srand(time(NULL)); for(i = vnstart; i < vvstart - 1; i++) if(varname_char == -1) program[i] = (rand() >> 8) & 0xff; else program[i] = varname_char & 0xff; if(verbose) { i -= vnstart; if(i) { fprintf(stderr, "Replaced %d byte variable name table with ", i); if(varname_char == -1) fprintf(stderr, "random characters.\n"); else fprintf(stderr, "character $%02x.\n", varname_char); } else { die("Can't protect variables because there are no variables."); } } } void print_help(void) { fprintf(stderr, "Usage: %s [-v] [-f] [-n] [-g] [-c] [-r|-w] \n", self); fprintf(stderr, " %s [-v] [-p|-pc|-pv] [-xr|-xNN] [-s] \n", self); fprintf(stderr, " -v: Verbose.\n"); fprintf(stderr, " -f: Force variable name table rebuild.\n"); fprintf(stderr, " -n: Do not rebuild variable name table, even if it's invalid.\n"); fprintf(stderr, " -g: Remove trailing garbage, if present.\n"); fprintf(stderr, " -c: Check only; no output file.\n"); fprintf(stderr, " -w: Write variable names to 'varnames.txt'.\n"); fprintf(stderr, " -r: Read variable names from 'varnames.txt'.\n"); fprintf(stderr, " -pc/-pv/-p: Protect code/variables/both.\n"); fprintf(stderr, " -s: Shrink variable name table to min size, with -p/-pv.\n"); fprintf(stderr, " -xNN: Hex code NN for variable names, with -p/-pv.\n"); fprintf(stderr, " -xr: Random variable names, with -p/-pv.\n"); fprintf(stderr, "Use - as a filename to read from stdin and/or write to stdout.\n"); } void parse_args(int argc, char **argv) { int xopt_used = 0; set_self(*argv); if(argc < 2) { print_help(); exit(1); } if(strcmp(argv[1], "--help") == 0) { print_help(); exit(0); } if(strcmp(argv[1], "--version") == 0) { printf("%s %s\n", self, VERSION); exit(0); } while(++argv, --argc) { if((*argv)[0] == '-') { switch((*argv)[1]) { case 'v': verbose++; break; case 'f': forcevars++; break; case 'n': keepvars++; break; case 'g': keepgarbage = 0; break; case 'c': checkonly = 1; break; case 'r': readmap = 1; break; case 'w': writemap = 1; break; case 'p': { switch((*argv)[2]) { case 'c': protect_code = 1; break; case 'v': protect_vars = 1; break; case 0: protect_code = protect_vars = 1; break; default: die("Invalid -p suboption (only -p, -pc, -pv are valid)."); } } break; case 'x': { xopt_used++; switch((*argv)[2]) { case 'r': varname_char = -1; break; case 0: die("-x option requires a hex number or 'r' (e.g. -x20, not -x 20)."); break; default: { char *e; varname_char = (int)strtol(&(*argv)[2], &e, 16); if(*e != 0 || varname_char > 0xff) die("invalid hex value for -x option (range is 0 to ff)."); } } } break; case 's': shrinktable = 1; break; case 0: if(!input_file) open_input(NULL); else if(!output_filename) output_filename = *argv; else invalid_args(*argv); break; default: invalid_args(*argv); break; } } else { /* arg doesn't start with a -, must be a filename */ if(!input_file) open_input(*argv); else if(!checkonly && !output_filename) output_filename = *argv; else invalid_args(*argv); } } if(!input_file) die("No input file given (use - for stdin)."); if(!checkonly && !output_filename) die("No output file given (use - for stdout)."); if(keepvars && forcevars) die("-f and -n are mutually exclusive."); if(readmap && writemap) die("-r and -w are mutually exclusive."); if(readmap && keepvars) die("-r and -n are mutually exclusive, maybe you want -w?"); if(checkonly && (readmap || writemap)) die("-c and -r/-w are mutually exclusive."); if(protect_code || protect_vars) { if(checkonly || keepvars || forcevars || readmap || writemap || !keepgarbage) die("-p, -pc, -pv options can only be combined with -v, -x, -s."); } if(xopt_used && !protect_vars) die("-x option requires -p or -pv."); if(shrinktable && !protect_vars) die("-s option requires -p or -pv."); } int main(int argc, char **argv) { int outbytes, invoffs = 0; parse_args(argc, argv); filelen = readfile(); parse_header(); if(lomem) die("This doesn't look like an Atari BASIC program (no $0000 signature)."); if(protect_code || protect_vars) { if(verbose) { fprintf(stderr, "Protecting program, "); if(protect_vars && !protect_code) fprintf(stderr, "variables only.\n"); else if(protect_code && !protect_vars) fprintf(stderr, "code only.\n"); else fprintf(stderr, "both code and variables.\n"); } if(protect_vars) scramble_vars(); if(protect_code) breakcode(); was_protected = 1; /* opposite sense for this one */ } else { if(readmap) { was_protected = !vntable_ok(); read_var_map(); apply_var_map(); } else { if(!keepvars) { if(fixvars()) { was_protected = 1; if(verbose) fprintf(stderr, "Variable names replaced.\n"); } else { if(verbose) fprintf(stderr, "Variable names were already OK.\n"); } } } invoffs = fixcode(); if(invoffs) { if(verbose) fprintf(stderr, "Fixed %d invalid offset%s in code.\n", invoffs, (invoffs == 1 ? "" : "s")); was_protected = 1; } else { if(verbose) fprintf(stderr, "No invalid offsets.\n"); } if(verbose) { fprintf(stderr, "Program was %sprotected.\n", (was_protected ? "" : "NOT ")); } if(checkonly) { if(verbose) fprintf(stderr, "Check-only mode; no output written.\n"); return was_protected ? 0 : 2; } } /* we don't open the output file until all processing is done, to avoid leaving invalid output files if we exit on error. */ open_output(output_filename); outbytes = fwrite(program, 1, filelen, output_file); fclose(output_file); if(verbose) fprintf(stderr, "Wrote %d bytes.\n", outbytes); if(writemap) write_var_map(); return was_protected ? 0 : 2; }