aboutsummaryrefslogtreecommitdiff
path: root/unprotbas.c
diff options
context:
space:
mode:
Diffstat (limited to 'unprotbas.c')
-rw-r--r--unprotbas.c528
1 files changed, 256 insertions, 272 deletions
diff --git a/unprotbas.c b/unprotbas.c
index 0b11e0e..ae01b50 100644
--- a/unprotbas.c
+++ b/unprotbas.c
@@ -2,6 +2,10 @@
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
+#include <ctype.h>
+#include <time.h>
+
+#include "bas.h"
/* attempt to fix a "list-protected" Atari 8-bit BASIC program.
we don't fully detokenize, so this won't fix truly corrupted
@@ -14,29 +18,11 @@
or whatever), we "fix" that by making up new variable names.
*/
-#define STM_OFFSET 0xf2
-
-/* entire file gets read into memory (for now) */
-unsigned char data[65536];
-
-/* BASIC 14-byte header values */
-unsigned short lomem;
-unsigned short vntp;
-unsigned short vntd;
-unsigned short vvtp;
-unsigned short stmtab;
-unsigned short stmcur;
-unsigned short starp;
-
-/* positions where various parts of the file start,
- derived from the header vars above. */
-unsigned short codestart;
-unsigned short vnstart;
-unsigned short vvstart;
-int filelen;
-
-/* name of executable, taken from argv[0] */
-char *self;
+/* for the -r option */
+#define MAP_FILE "varnames.txt"
+unsigned char varnames[BUFSIZE];
+unsigned char *varmap[MAXVARS];
+int varmap_count;
/* these are set by the various command-line switches */
int keepvars = 0;
@@ -44,64 +30,8 @@ int forcevars = 0;
int keepgarbage = 1;
int checkonly = 0;
int was_protected = 0;
-int verbose = 0;
-
-/* file handles */
-FILE *input_file = NULL;
-FILE *output_file = NULL;
-
-void die(const char *msg) {
- fprintf(stderr, "%s: %s\n", self, msg);
- exit(1);
-}
-
-/* read entire file into memory */
-int readfile(void) {
- int got = fread(data, 1, 65535, input_file);
- if(verbose) fprintf(stderr, "read %d bytes\n", got);
- fclose(input_file);
- return got;
-}
-
-/* get a 16-bit value from the file, in 6502 LSB/MSB order. */
-unsigned short getword(int addr) {
- return data[addr] | (data[addr + 1] << 8);
-}
-
-void setword(int addr, int value) {
- data[addr] = value & 0xff;
- data[addr + 1] = value >> 8;
-}
-
-void dump_header_vars(void) {
- fprintf(stderr, "LOMEM $%04x VNTP $%04x VNTD $%04x VVTP $%04x\n", lomem, vntp, vntd, vvtp);
- fprintf(stderr, "STMTAB $%04x STMCUR $%04x STARP $%04x\n", stmtab, stmcur, starp);
- fprintf(stderr, "vnstart $%04x, vvstart $%04x, codestart $%04x\n", vnstart, vvstart, codestart);
-}
-
-void read_header(void) {
- lomem = getword(0);
- vntp = getword(2);
- vntd = getword(4);
- vvtp = getword(6);
- stmtab = getword(8);
- stmcur = getword(10);
- starp = getword(12);
- codestart = stmtab - STM_OFFSET - (vntp - 256);
- vnstart = vntp - 256 + 14;
- vvstart = vvtp - 256 + 14;
- if(verbose) dump_header_vars();
-}
-
-void set_header_vars(void) {
- setword(0, lomem);
- setword(2, vntp);
- setword(4, vntd);
- setword(6, vvtp);
- setword(8, stmtab);
- setword(10, stmcur);
- setword(12, starp);
-}
+int readmap = 0;
+int writemap = 0;
/* fixline() calculates & sets correct line length, by iterating
over the statement(s) within the line. the last statement's
@@ -122,30 +52,29 @@ void set_header_vars(void) {
2 09 line length (or, offset to next line) [!]
3 06 offset to next statement *from the start of the line*
4 28 token for "?"
- 5 14 token for : (end of statement)
+ 5 14 token for : (end of statement), we call it TOK_COLON
6 09 offset to next statement [!]
7 15 token for END
8 16 token for end-of-line [*]
9 ?? (line number of next statement)
Note the values marked with [!] are equal.
+ The line length at offset 2 is what gets zeroed out by the
+ protection. To fix it, we follow the next-statement offsets. If
+ there's not a colon before the offset, replace the byte at
+ offset 2 with that statement's offset.
[*] end-of-line is $16 *except* for REM and DATA, which are
terminated with $9B instead.
*/
int fixline(int linepos) {
/* +3 here to skip the line number + line length */
- int token, done = 0, offset = data[linepos + 3];
-
- while(!done) {
- offset = data[linepos + offset];
- token = data[linepos + offset - 1];
- /* fprintf(stderr, "offset %02x token %02x\n", offset, token); */
- if(token != 0x14)
- done++;
- }
+ int offset = program[linepos + 3];
- data[linepos + 2] = offset;
+ while(program[linepos + offset - 1] == TOK_COLON)
+ offset = program[linepos + offset];
+
+ program[linepos + 2] = offset;
return offset;
}
@@ -159,16 +88,16 @@ int fixcode(void) {
while(pos < filelen) {
tmpno = getword(pos);
if(tmpno <= lineno) {
- fprintf(stderr, "Warning: line number %d at offset %04x is <= previous line number %d\n",
+ fprintf(stderr, "Warning: line number %d at offset $%04x is <= previous line number %d.\n",
tmpno, pos, lineno);
}
lineno = tmpno;
- offset = data[pos + 2];
+ offset = program[pos + 2];
/* fprintf(stderr, "pos %d, line #%d, offset %d\n", pos, lineno, offset); */
if(offset < 6) {
- if(verbose) fprintf(stderr, "Found invalid offset %d (<6) at line %d\n", offset, lineno);
- offset += fixline(pos);
+ if(verbose) fprintf(stderr, "Found invalid offset %d (<6) at line %d, file offset $%04x.\n", offset, lineno, pos);
+ offset = fixline(pos);
result++;
}
pos += offset;
@@ -178,33 +107,29 @@ int fixcode(void) {
if(lineno == 32768) break;
}
- if(verbose) fprintf(stderr, "End program pos $%04x/%d\n", pos, pos);
+ if(verbose) fprintf(stderr, "End program file offset: $%04x/%d\n", pos, pos);
if(filelen > pos) {
- if(verbose) fprintf(stderr, "trailing garbage at EOF, %d bytes, %s\n",
- filelen - pos, (keepgarbage ? "keeping" : "removing"));
+ int i, same = 1;
+ for(i = pos; i < filelen; i++) {
+ if(program[i] != program[pos]) same = 0;
+ }
+ if(verbose) {
+ fprintf(stderr, "Trailing garbage at EOF, %d bytes, ", filelen - pos);
+ if(same)
+ fprintf(stderr, "all $%02x", program[pos]);
+ else
+ fprintf(stderr, "maybe valid data");
+ fprintf(stderr, ", %s.\n", (keepgarbage ? "keeping" : "removing"));
+ }
if(!keepgarbage) filelen = pos;
+ } else {
+ if(verbose)
+ fprintf(stderr, "No trailing garbage at EOF.\n");
}
-
return result;
}
-/* sometimes the variable name table isn't large enough to hold
- the generated variable names. move_code() makes more space,
- by moving the rest of the program (including the variable value
- table) up in memory. */
-void move_code(int offset) {
- memmove(data + vvstart + offset, data + vvstart, filelen);
- vntd += offset;
- vvtp += offset;
- stmtab += offset;
- stmcur += offset;
- starp += offset;
- set_header_vars();
- read_header();
- filelen += offset;
-}
-
/* Fixing the variables is a bit more work than it seems like
it might be, because the last byte of the name has to match
the type (inverse video "(" for numeric array, inverse "$" for
@@ -229,52 +154,6 @@ void move_code(int offset) {
or letter+number or one-letter string/array names).
*/
-int vntable_ok(void) {
- int vp, bad;
-
- if(vntp == vntd) {
- if(verbose) fprintf(stderr, "No variables\n");
- return 1;
- }
-
- /* first pass: bad = 1 if all the bytes in the table have the same
- value, no matter what it is. */
- vp = vnstart + 1;
- bad = 1;
- while(vp < vvstart - 1) {
- if(data[vp] != data[vnstart]) {
- bad = 0;
- break;
- }
- vp++;
- }
- if(bad) return 0;
-
- /* 2nd pass: bad = 1 if there's any invalid character in the table. */
- vp = vnstart;
- while(vp < vvstart) {
- unsigned char c = data[vp];
-
- /* treat a null byte as end-of-table, ignore any junk between it and VNTP. */
- if(c == 0) break;
-
- vp++;
-
- /* inverse $ or ( is OK */
- if(c == 0xa4 || c == 0xa8) continue;
-
- /* numbers and letters are allowed, inverse or normal. */
- c &= 0x7f;
- if(c >= 0x30 && c <= 0x39) continue;
- if(c >= 0x41 && c <= 0x5a) continue;
-
- bad++;
- break;
- }
-
- return !bad;
-}
-
/* walk the variable value table, generating variable names.
if write is 0, just return the size the table will be.
if write is 1, actually write the names to memory. */
@@ -287,28 +166,35 @@ int rebuild_vntable(int write) {
while(vv < codestart) {
unsigned char sigil = 0;
/* type: scalar = 0, array = 1, string = 2 */
- unsigned char type = data[vv] >> 6;
- /* fprintf(stderr, "%04x: %04x, %d\n", vv, data[vv], type); */
+ unsigned char type = program[vv] >> 6;
+ /* fprintf(stderr, "%04x: %04x, %d\n", vv, program[vv], type); */
+
+ if(varnum == MAXVARS) {
+ fprintf(stderr, "Warning: skipping variable numbers >=%d in value table.\n", MAXVARS);
+ break;
+ }
- if(varnum != data[vv+1]) {
- fprintf(stderr, "Warning: variable value is corrupt!\n");
+ if(varnum != program[vv+1]) {
+ fprintf(stderr, "Warning: variable #%d value is corrupt!\n", varnum);
}
- varnum++;
switch(type) {
- case 1: varname = arrays++; sigil = 0xa8; break;
- case 2: varname = strings++; sigil = 0xa4; break;
- default: varname = scalars++; break;
+ case TYPE_SCALAR: varname = scalars++; break;
+ case TYPE_ARRAY: varname = arrays++; sigil = 0xa8; break;
+ case TYPE_STRING: varname = strings++; sigil = 0xa4; break;
+ default:
+ fprintf(stderr, "Warning: variable value #%d has unknown type.\n", varnum);
+ break;
}
if(varname < 26) {
- if(write) data[vp] = ('A' + varname);
+ if(write) program[vp] = ('A' + varname);
size++;
} else {
varname -= 26;
if(write) {
- data[vp++] = 'A' + varname / 9;
- data[vp] = '1' + varname % 9;
+ program[vp++] = 'A' + varname / 9;
+ program[vp] = '1' + varname % 9;
}
size += 2;
}
@@ -316,41 +202,25 @@ int rebuild_vntable(int write) {
if(sigil) {
size++;
vp++;
- if(write) data[vp++] = sigil;
+ if(write) program[vp++] = sigil;
} else {
- if(write) data[vp] |= 0x80;
+ if(write) program[vp] |= 0x80;
vp++;
}
vv += 8;
+ varnum++;
}
/* there's supposed to be a null byte at the end of the table, unless
- all 128 table slots are used. */
- if(write) {
- if(varnum < 128) data[vp] = 0;
- /* fixup the VNTD pointer */
- /*
- vntd = vntp + (vp - vnstart);
- fprintf(stderr, "%04x\n", vntd);
- data[4] = vntd & 0xff;
- data[5] = vntd >> 8;
- */
- }
+ all 128 table slots are used... except really, there can be >=129
+ entries, and there's always a null byte. */
+ if(write) program[vp] = 0;
+ size++;
return size;
}
-void adjust_vntable_size(int oldsize, int newsize) {
- int move_by;
- if(oldsize != newsize) {
- move_by = newsize - oldsize;
- if(verbose) fprintf(stderr, "need %d bytes for vntable, have %d, moving VVTP by %d to %04x\n",
- newsize, oldsize, move_by, vvtp + move_by);
- move_code(move_by);
- }
-}
-
int fixvars(void) {
int old_vntable_size, new_vntable_size;
@@ -366,69 +236,172 @@ int fixvars(void) {
return 1;
}
-void print_help(void) {
- fprintf(stderr, "Usage: %s [-v] [-f] [-n] [-g] <inputfile> <outputfile>\n", self);
- fprintf(stderr, "-v: verbose\n");
- fprintf(stderr, "-f: force variable name table rebuild\n");
- fprintf(stderr, "-n: do not rebuild variable name table, even if it's invalid\n");
- fprintf(stderr, "-g: remove trailing garbage, if present\n");
- fprintf(stderr, "-c: check only; no output file\n");
- fprintf(stderr, "Use - as a filename to read from stdin and/or write to stdout\n");
+void write_var_map(void) {
+ FILE *f;
+ int vp, count = 0;
+
+ if(verbose) fprintf(stderr, "Writing variable names to '" MAP_FILE "'.\n");
+ f = fopen(MAP_FILE, "w");
+ if(!f) {
+ perror(MAP_FILE);
+ die("Can't create map file for -w option.");
+ }
+
+ for(vp = vnstart; (vp < vntd) && (program[vp] != 0); vp++) {
+ unsigned char c = program[vp];
+ if(c < 0x80) {
+ fputc(c, f);
+ } else {
+ fputc(c & 0x7f, f);
+ fputc('\n', f);
+ count++;
+ }
+ }
+
+ fclose(f);
+
+ if(verbose) fprintf(stderr, "Wrote %d variable names to '" MAP_FILE "'.\n", count);
}
-void invalid_args(const char *arg) {
- fprintf(stderr, "%s: Invalid argument '%s'\n\n", self, arg);
- print_help();
+void die_mapfile(char *msg, int num) {
+ fprintf(stderr, MAP_FILE ": line %d: %s.\n", num, msg);
exit(1);
}
-FILE *open_file(const char *name, const char *mode) {
- FILE *fp;
- if(!(fp = fopen(name, mode))) {
- perror(name);
- exit(1);
+void check_varname(const unsigned char *name, int line) {
+ int len = strlen((char *)name);
+ int i;
+ unsigned char c = 0, type;
+
+ /* fprintf(stderr, "check_varname(\"%s\", %d)\n", name, line); */
+
+ if(len < 1) die_mapfile("Blank variable name", line);
+ if(len > 128) die_mapfile("Variable name >128 characters", line);
+ if(name[0] < 'A' || name[0] > 'Z')
+ die_mapfile("Invalid variable name: First character must be a letter", line);
+
+ for(i = 1; i < len; i++) {
+ c = name[i];
+ if(c >= 'A' && c <= 'Z') continue;
+ if(c >= '0' && c <= '9') continue;
+ if(c == '$' || c == '(') {
+ if(i == (len - 1))
+ continue;
+ else
+ die_mapfile("Invalid variable name: $ and ( only allowed at end", line);
+ }
+ die_mapfile("Invalid character in variable name", line);
+ }
+
+ if(c == 0) c = name[0];
+
+ /* c now has the last char of the name, make sure it matches the variable type */
+ type = program[vvstart + 8 * (line - 1)] >> 6;
+ /* type: scalar = 0, array = 1, string = 2 */
+ if(type == TYPE_SCALAR) {
+ if(c == '$')
+ die_mapfile("Type mismatch: numeric variable may not end with $", line);
+ else if(c == '(')
+ die_mapfile("Type mismatch: numeric variable may not end with (", line);
+ } else if(type == TYPE_ARRAY) {
+ if(c != '(')
+ die_mapfile("Type mismatch: array variable must end with (", line);
+ } else if(type == TYPE_STRING) {
+ if(c != '$')
+ die_mapfile("Type mismatch: string variable must end with $", line);
+ } else {
+ fprintf(stderr, "Warning: variable value table is corrupt (invalid type).\n");
+ }
+
+ /* check for dups */
+ for(i = 0; i < line - 1; i++) {
+ if(strcmp((char *)name, (char *)varmap[i]) == 0)
+ die_mapfile("duplicate variable name", line);
}
- return fp;
}
-void open_input(const char *name) {
- if(!name) {
- if(freopen(NULL, "rb", stdin)) {
- input_file = stdin;
- return;
- } else {
- perror("stdin");
- exit(1);
+void read_var_map(void) {
+ FILE *f;
+ unsigned char *p = varnames, *curname = varnames;
+ int count = 0, vvcount = (codestart - vvstart) / 8;
+
+ if(verbose) fprintf(stderr, "Reading variable names from " MAP_FILE ".\n");
+ f = fopen(MAP_FILE, "r");
+ if(!f) {
+ perror(MAP_FILE);
+ die("Can't read map file for -r option.");
+ }
+
+ while(!feof(f)) {
+ *p = toupper(fgetc(f)); /* allow lowercase */
+
+ if(*p == ' ' || *p == '\t' || *p == '\r')
+ continue; /* ignore whitespace */
+
+ if(*p == '\n') {
+ *p = '\0';
+ varmap[count++] = curname;
+ check_varname(curname, count);
+ curname = p + 1;
}
+ p++;
+ }
+ fclose(f);
+
+ if(verbose) fprintf(stderr, "Read %d variable names from " MAP_FILE ".\n", count);
+
+ if(vvcount > count) {
+ fprintf(stderr, MAP_FILE ": not enough variables (have %d, need %d).\n", count, vvcount);
+ exit(1);
+ } else if(count > vvcount) {
+ fprintf(stderr, MAP_FILE ": too many variables (have %d, need %d).\n", count, vvcount);
+ exit(1);
}
- input_file = open_file(name, "rb");
+ varmap_count = count;
}
-void open_output(const char *name) {
- if(!name) {
- if(isatty(fileno(stdout))) {
- fprintf(stderr, "%s: refusing to write binary data to standard output\n", self);
- exit(1);
- }
- if(freopen(NULL, "wb", stdout)) {
- output_file = stdout;
- return;
- } else {
- perror("stdout");
- exit(1);
+void apply_var_map(void) {
+ unsigned char new_vntable[BUFSIZE];
+ int i, newp = 0;
+ unsigned char *v;
+
+ if(verbose)
+ fprintf(stderr, "Using variable names from " MAP_FILE ".\n");
+
+ for(i = 0; i < varmap_count; i++) {
+ v = varmap[i];
+ while(*v) {
+ new_vntable[newp++] = *v;
+ v++;
}
+ new_vntable[newp - 1] |= 0x80;
}
- output_file = open_file(name, "wb");
+ new_vntable[newp++] = '\0';
+
+ i = vvstart - vnstart;
+ adjust_vntable_size(i, newp);
+ memmove(program + vnstart, new_vntable, newp);
+}
+
+void print_help(void) {
+ printf("Usage: %s [-v] [-f] [-n] [-g] [-c] [-r|-w] <inputfile> <outputfile>\n", self);
+ printf(" -v: Verbose.\n");
+ printf(" -f: Force variable name table rebuild.\n");
+ printf(" -n: Do not rebuild variable name table, even if it's invalid.\n");
+ printf(" -g: Remove trailing garbage, if present.\n");
+ printf(" -c: Check only; no output file.\n");
+ printf(" -w: Write variable names to 'varnames.txt'.\n");
+ printf(" -r: Read variable names from 'varnames.txt'.\n");
+ printf("Use - as a filename to read from stdin and/or write to stdout.\n");
}
void parse_args(int argc, char **argv) {
- self = *argv;
- if(argc < 2) {
- print_help();
- exit(0);
- }
+ set_self(*argv);
+
+ parse_general_args(argc, argv, print_help);
+
while(++argv, --argc) {
if((*argv)[0] == '-') {
switch((*argv)[1]) {
@@ -437,73 +410,84 @@ void parse_args(int argc, char **argv) {
case 'n': keepvars++; break;
case 'g': keepgarbage = 0; break;
case 'c': checkonly = 1; break;
+ case 'r': readmap = 1; break;
+ case 'w': writemap = 1; break;
case 0:
if(!input_file)
open_input(NULL);
- else if(!output_file)
- open_output(NULL);
+ else if(!output_filename)
+ output_filename = *argv;
else
invalid_args(*argv);
break;
default: invalid_args(*argv); break;
}
} else {
+ /* arg doesn't start with a -, must be a filename */
if(!input_file)
open_input(*argv);
- else if(!checkonly && !output_file)
- open_output(*argv);
+ else if(!checkonly && !output_filename)
+ output_filename = *argv;
else
invalid_args(*argv);
}
}
- if(!input_file) die("no input file given (use - for stdin)");
- if(!checkonly && !output_file) die("no output file given (use - for stdout)");
- if(keepvars && forcevars) die("-f and -n are mutually exclusive");
+ if(!input_file) die("No input file given (use - for stdin).");
+ if(!checkonly && !output_filename) die("No output file given (use - for stdout).");
+ if(keepvars && forcevars) die("-f and -n are mutually exclusive.");
+ if(readmap && writemap) die("-r and -w are mutually exclusive.");
+ if(readmap && keepvars) die("-r and -n are mutually exclusive, maybe you want -w?");
+ if(checkonly && (readmap || writemap)) die("-c and -r/-w are mutually exclusive.");
}
int main(int argc, char **argv) {
+ int invoffs = 0;
parse_args(argc, argv);
- filelen = readfile();
- read_header();
-
- if(lomem) die("This doesn't look like an Atari BASIC program (no $0000 signature)");
+ readfile();
+ parse_header();
- if(!keepvars) {
- if(fixvars()) {
- was_protected = 1;
- if(verbose) fprintf(stderr, "Variable names replaced\n");
- } else {
- if(verbose) fprintf(stderr, "Variable names were already OK\n");
+ if(readmap) {
+ was_protected = !vntable_ok();
+ read_var_map();
+ apply_var_map();
+ } else {
+ if(!keepvars) {
+ if(fixvars()) {
+ was_protected = 1;
+ if(verbose) fprintf(stderr, "Variable names replaced.\n");
+ } else {
+ if(verbose) fprintf(stderr, "Variable names were already OK.\n");
+ }
}
}
- if(fixcode()) {
- if(verbose) fprintf(stderr, "Fixed invalid offset in code\n");
+ invoffs = fixcode();
+ if(invoffs) {
+ if(verbose)
+ fprintf(stderr, "Fixed %d invalid offset%s in code.\n",
+ invoffs, (invoffs == 1 ? "" : "s"));
was_protected = 1;
} else {
- if(verbose) fprintf(stderr, "No invalid offsets\n");
+ if(verbose) fprintf(stderr, "No invalid offsets.\n");
}
if(verbose) {
- if(was_protected)
- fprintf(stderr, "Program was protected.\n");
- else
- fprintf(stderr, "Program was NOT protected.\n");
+ fprintf(stderr, "Program was %sprotected.\n", (was_protected ? "" : "NOT "));
}
if(checkonly) {
if(verbose) fprintf(stderr, "Check-only mode; no output written.\n");
- if(was_protected)
- return 0;
- else
- return 2;
+ return was_protected ? 0 : 2;
}
- int got = fwrite(data, 1, filelen, output_file);
- fclose(output_file);
- if(verbose) fprintf(stderr, "wrote %d bytes\n", got);
+ /* we don't open the output file until all processing is done, to
+ avoid leaving invalid output files if we exit on error. */
+ open_output(output_filename);
+ writefile();
+
+ if(writemap) write_var_map();
- return 0;
+ return was_protected ? 0 : 2;
}