From 96af9bc891987f6fcc560a6e403c5ada541d8699 Mon Sep 17 00:00:00 2001 From: "B. Watson" Date: Fri, 17 May 2024 05:09:45 -0400 Subject: unprotbas: added; blob2xex: tweak docs. --- unprotbas.c | 429 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 429 insertions(+) create mode 100644 unprotbas.c (limited to 'unprotbas.c') diff --git a/unprotbas.c b/unprotbas.c new file mode 100644 index 0000000..9c45fbb --- /dev/null +++ b/unprotbas.c @@ -0,0 +1,429 @@ +/**** TODO: + if the rebuilt variable name table ends up larger than the + scrambled one, the rest of the program needs to be moved upwards + in memory to make room for it. currently this isn't done, so + the variable *value* table gets corrupted by the last few + variable names overwriting the first few values. */ + +#include +#include +#include + +/* attempt to fix a "list-protected" Atari 8-bit BASIC program. + we don't fully detokenize, so this won't fix truly corrupted + files. + + the "fix" is in 2 parts: + 1. fix any invalid (0-byte) offsets after a line number. this is + what causes BASIC to lock up. + 2. if the variable names were overwritten (e.g. with EOL characters, + or whatever), we "fix" that by making up new variable names. +*/ + +#define STM_OFFSET 0xf2 + +/* entire file gets read into memory (for now) */ +unsigned char data[65536]; + +/* BASIC 14-byte header values */ +unsigned short lomem; +unsigned short vntp; +unsigned short vntd; +unsigned short vvtp; +unsigned short stmtab; +unsigned short stmcur; +unsigned short starp; + +/* positions where various parts of the file start, + derived from the header vars above. */ +unsigned short codestart; +unsigned short vnstart; +unsigned short vvstart; +int filelen; + +/* name of executable, taken from argv[0] */ +char *self; + +/* these are set by the various command-line switches */ +int keepvars = 0; +int forcevars = 0; +int keepgarbage = 1; +int verbose = 0; + +/* file handles */ +FILE *input_file = NULL; +FILE *output_file = NULL; + +void die(const char *msg) { + fprintf(stderr, "%s: %s\n", self, msg); + exit(1); +} + +/* read entire file into memory */ +int readfile(void) { + int got = fread(data, 1, 65535, input_file); + fprintf(stderr, "read %d bytes\n", got); + return got; +} + +/* get a 16-bit value from the file, in 6502 LSB/MSB order. */ +unsigned short getword(int addr) { + return data[addr] | (data[addr + 1] << 8); +} + +/* fixline() calculates & sets correct line length, by iterating + over the statement(s) within the line. the last statement's + offset will be the same as the line offset should have been, + if it weren't zeroed. when reading this code, it's helpful to + know that the lengths (line and statement) are counted from the + start of the line in memory. + + A line with only a line number and one token (such as END) would have a + line length of 6: 2 for the 16-bit line number, 1 for the length byte + itself, 1 for the statement length byte (also 6), 1 for the END token, and one + for the end-of-line token. + + A line with two statements: 10 ?:END + offset value meaning + 0 0A line number (low byte) + 1 00 line number (high byte) + 2 09 line length (or, offset to next line) [!] + 3 06 offset to next statement *from the start of the line* + 4 28 token for "?" + 5 14 token for : (end of statement) + 6 09 offset to next statement [!] + 7 15 token for END + 8 16 token for end-of-line [*] + 9 ?? (line number of next statement) + + Note the values marked with [!] are equal. + + [*] end-of-line is $16 *except* for REM and DATA, which are + terminated with $9B instead. +*/ +int fixline(int linepos) { + /* +3 here to skip the line number + line length */ + int token, done = 0, offset = data[linepos + 3]; + + while(!done) { + offset = data[linepos + offset]; + token = data[linepos + offset - 1]; + fprintf(stderr, "offset %02x token %02x\n", offset, token); + if(token != 0x14) + done++; + } + + data[linepos + 2] = offset; + return offset; +} + +/* Iterate over all the tokenized lines. If any of them have invalid + line lengths (<=5), call fixline() on them. */ +int fixcode(void) { + int result = 0; + int pos = codestart; + int offset, lineno = -1, tmpno; + + while(pos < filelen) { + tmpno = getword(pos); + if(tmpno <= lineno) { + fprintf(stderr, "Warning: line number %d at offset %04x is <= previous line number %d\n", + tmpno, pos, lineno); + } + lineno = tmpno; + + offset = data[pos + 2]; + /* fprintf(stderr, "pos %d, line #%d, offset %d\n", pos, lineno, offset); */ + if(offset < 6) { + fprintf(stderr, "Found invalid offset %d (<6) at line %d\n", offset, lineno); + offset += fixline(pos); + result++; + } + pos += offset; + + /* Atari BASIC tolerates garbage after the last tokenized line, + so we must do likewise. */ + if(lineno == 32768) break; + } + + fprintf(stderr, "End program pos %04x/%d\n", pos, pos); + + if(filelen > pos) { + fprintf(stderr, "trailing garbage at EOF, %d bytes, %s\n", + filelen - pos, (keepgarbage ? "keeping" : "removing")); + if(!keepgarbage) filelen = pos; + } + + return result; +} + +/* Fixing the variables is a bit more work than it seems like + it might be, because the last byte of the name has to match + the type (inverse video "(" for numeric array, inverse "$" for + string, inverse last character of name for scalars). To do + this right, we have to examine the variable value table to + find out the type of each variable. + + Each variable type get assigned A to Z, then A1 to A9, B1 to B9, + etc. This means there will be A, A$, and A( variables, which might + be a bit confusing, but we have to keep the generated name table as + short as possible, because we can't extend the size of the table in + the file. + + We can find the actual table size in the file by subtracting VNTP + (start of variable name table) from VNTD (end of variable name table), + and if we run out of space for the generated names, something is + seriously off... + + The maximum number of variable names is 128. If all 128 vars are in + use, the minimum table size is 230 (26 one-letter names, 102 2-letter + or letter+number or one-letter string/array names). + +*/ + +int fixvars(void) { + int vp = vnstart, vv = vvstart; + int strings = 0, arrays = 0, scalars = 0, varname = 0, varnum = 0; + int bad = 0; + + /* See if the variables even need fixing. + + This code is simpler than it should be: it checks that all + characters in the variable name table are valid, but doesn't + check that they're in valid sequences. Example: a variable name + that's just an inverse dollar sign would be considered OK). + Also multiple variables of the same type with the same name + would be OK. + + However, if all the bytes are the same value, even if it's a + valid character, that's correctly detected as invalid. + */ + + if(vntp == vntd) { + fprintf(stderr, "No variables\n"); + return 0; + } + + vp = vnstart + 1; + bad = 1; + while(vp < vvstart - 1) { + if(data[vp] != data[vnstart]) bad = 0; + vp++; + } + + vp = vnstart; + while(vp < vvstart) { + unsigned char c = data[vp]; + fprintf(stderr, "%04x/%04x: %04x\n", vp, vvstart, c); + + /* allow a null byte only at the end of the table! */ + /* if(c == 0 && vp == vvstart - 1) break; */ + /* new rule: treat a null byte as end-of-table, ignore any + junk between it and VNTP. */ + if(c == 0) break; + + vp++; + + /* inverse $ or ( is OK */ + if(c == 0xa4 || c == 0xa8) continue; + + /* numbers and letters are allowed, inverse or normal. */ + c &= 0x7f; + if(c >= 0x30 && c <= 0x39) continue; + if(c >= 0x41 && c <= 0x5a) continue; + + bad++; + break; + } + if(!forcevars && !bad) return 0; + + vp = vnstart; + while(vv < codestart) { + unsigned char sigil = 0; + /* type: scalar = 0, array = 1, string = 2 */ + unsigned char type = data[vv] >> 6; + /* fprintf(stderr, "%04x: %04x, %d\n", vv, data[vv], type); */ + + if(varnum != data[vv+1]) { + fprintf(stderr, "Warning: variable value is corrupt!\n"); + } + varnum++; + + switch(type) { + case 1: varname = arrays++; sigil = 0xa8; break; + case 2: varname = strings++; sigil = 0xa4; break; + default: varname = scalars++; break; + } + + if(varname < 26) { + data[vp] = ('A' + varname); + } else { + varname -= 26; + data[vp++] = 'A' + (varname / 9); + data[vp] = ('1' + (varname % 9)); + } + + if(sigil) { + vp++; + data[vp++] = sigil; + } else { + data[vp] |= 0x80; + vp++; + } + + vv += 8; + } + + /* there's supposed to be a null byte at the end of the table, unless + all 128 table slots are used. */ + if(varnum < 128) data[vp] = 0; + + /* fixup the VNTD pointer */ + vntd = vntp + (vp - vnstart); + data[4] = vntd & 0xff; + data[5] = vntd >> 8; + + fprintf(stderr, "%d variables, VNTD adjusted to %04x\n", varnum, vntd); + return 1; +} + +void print_help(void) { + fprintf(stderr, "Usage: %s [-v] [-f] [-n] [-g] \n", self); + fprintf(stderr, "-v: verbose\n"); + fprintf(stderr, "-f: force variable name table rebuild\n"); + fprintf(stderr, "-n: do not rebuild variable name table, even if it's invalid\n"); + fprintf(stderr, "-g: remove trailing garbage, if present\n"); + fprintf(stderr, "Use - as a filename to read from stdin and/or write to stdout\n"); +} + +void invalid_args(const char *arg) { + fprintf(stderr, "%s: Invalid argument '%s'\n\n", self, arg); + print_help(); + exit(1); +} + +FILE *open_file(const char *name, const char *mode) { + FILE *fp; + if(!(fp = fopen(name, mode))) { + perror(name); + exit(1); + } + return fp; +} + +void open_input(const char *name) { + if(!name) { + if(freopen(NULL, "rb", stdin)) { + input_file = stdin; + return; + } else { + perror("stdin"); + exit(1); + } + } + + input_file = open_file(name, "rb"); +} + +void open_output(const char *name) { + if(!name) { + if(freopen(NULL, "wb", stdout)) { + output_file = stdout; + return; + } else { + perror("stdout"); + exit(1); + } + } + + output_file = open_file(name, "wb"); +} + +void parse_args(int argc, char **argv) { + self = *argv; + if(argc < 2) { + print_help(); + exit(0); + } + while(++argv, --argc) { + if((*argv)[0] == '-') { + switch((*argv)[1]) { + case 'v': verbose++; break; + case 'f': forcevars++; break; + case 'n': keepvars++; break; + case 'g': keepgarbage = 0; break; + case 0: + if(!input_file) + open_input(NULL); + else if(!output_file) + open_output(NULL); + else + invalid_args(*argv); + break; + default: invalid_args(*argv); break; + } + } else { + if(!input_file) + open_input(*argv); + else if(!output_file) + open_output(*argv); + else + invalid_args(*argv); + } + } + + if(!input_file) die("no input file given (use - for stdin)"); + if(!output_file) die("no output file given (use - for stdout)"); + if(keepvars && forcevars) die("-f and -n are mutually exclusive"); +} + +int main(int argc, char **argv) { + parse_args(argc, argv); + + filelen = readfile(); + + lomem = getword(0); + vntp = getword(2); + vntd = getword(4); + vvtp = getword(6); + stmtab = getword(8); + stmcur = getword(10); + starp = getword(12); + codestart = stmtab - STM_OFFSET - (vntp - 256); + vnstart = vntp - 256 + 14; + vvstart = vvtp - 256 + 14; + + if(lomem) die("This doesn't look like an Atari BASIC program (no $0000 signature)"); + + fprintf(stderr, "LOMEM %04x\n", lomem); + fprintf(stderr, "VNTP %04x\n", vntp); + fprintf(stderr, "VNTD %04x\n", vntd); + fprintf(stderr, "VVTP %04x\n", vvtp); + fprintf(stderr, "STMTAB %04x, codestart %04x\n", stmtab, codestart); + fprintf(stderr, "STMCUR %04x\n", stmcur); + fprintf(stderr, "STARP %04x\n", starp); + fprintf(stderr, "vvstart %04x\n", vvstart); + + /* + fprintf(stderr, "data at STMTAB (we hope):\n"); + for(int i=codestart; i