aboutsummaryrefslogtreecommitdiff
path: root/unprotbas.c
diff options
context:
space:
mode:
authorB. Watson <urchlay@slackware.uk>2024-05-17 05:09:45 -0400
committerB. Watson <urchlay@slackware.uk>2024-05-17 05:09:45 -0400
commit96af9bc891987f6fcc560a6e403c5ada541d8699 (patch)
tree6bdd20a1fdd7f31316d14fb5e233718b48713522 /unprotbas.c
parentd4064b55a7ddbb002ef80dbc0db60cd0d95cb1cd (diff)
downloadbw-atari8-tools-96af9bc891987f6fcc560a6e403c5ada541d8699.tar.gz
unprotbas: added; blob2xex: tweak docs.
Diffstat (limited to 'unprotbas.c')
-rw-r--r--unprotbas.c429
1 files changed, 429 insertions, 0 deletions
diff --git a/unprotbas.c b/unprotbas.c
new file mode 100644
index 0000000..9c45fbb
--- /dev/null
+++ b/unprotbas.c
@@ -0,0 +1,429 @@
+/**** TODO:
+ if the rebuilt variable name table ends up larger than the
+ scrambled one, the rest of the program needs to be moved upwards
+ in memory to make room for it. currently this isn't done, so
+ the variable *value* table gets corrupted by the last few
+ variable names overwriting the first few values. */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+/* attempt to fix a "list-protected" Atari 8-bit BASIC program.
+ we don't fully detokenize, so this won't fix truly corrupted
+ files.
+
+ the "fix" is in 2 parts:
+ 1. fix any invalid (0-byte) offsets after a line number. this is
+ what causes BASIC to lock up.
+ 2. if the variable names were overwritten (e.g. with EOL characters,
+ or whatever), we "fix" that by making up new variable names.
+*/
+
+#define STM_OFFSET 0xf2
+
+/* entire file gets read into memory (for now) */
+unsigned char data[65536];
+
+/* BASIC 14-byte header values */
+unsigned short lomem;
+unsigned short vntp;
+unsigned short vntd;
+unsigned short vvtp;
+unsigned short stmtab;
+unsigned short stmcur;
+unsigned short starp;
+
+/* positions where various parts of the file start,
+ derived from the header vars above. */
+unsigned short codestart;
+unsigned short vnstart;
+unsigned short vvstart;
+int filelen;
+
+/* name of executable, taken from argv[0] */
+char *self;
+
+/* these are set by the various command-line switches */
+int keepvars = 0;
+int forcevars = 0;
+int keepgarbage = 1;
+int verbose = 0;
+
+/* file handles */
+FILE *input_file = NULL;
+FILE *output_file = NULL;
+
+void die(const char *msg) {
+ fprintf(stderr, "%s: %s\n", self, msg);
+ exit(1);
+}
+
+/* read entire file into memory */
+int readfile(void) {
+ int got = fread(data, 1, 65535, input_file);
+ fprintf(stderr, "read %d bytes\n", got);
+ return got;
+}
+
+/* get a 16-bit value from the file, in 6502 LSB/MSB order. */
+unsigned short getword(int addr) {
+ return data[addr] | (data[addr + 1] << 8);
+}
+
+/* fixline() calculates & sets correct line length, by iterating
+ over the statement(s) within the line. the last statement's
+ offset will be the same as the line offset should have been,
+ if it weren't zeroed. when reading this code, it's helpful to
+ know that the lengths (line and statement) are counted from the
+ start of the line in memory.
+
+ A line with only a line number and one token (such as END) would have a
+ line length of 6: 2 for the 16-bit line number, 1 for the length byte
+ itself, 1 for the statement length byte (also 6), 1 for the END token, and one
+ for the end-of-line token.
+
+ A line with two statements: 10 ?:END
+ offset value meaning
+ 0 0A line number (low byte)
+ 1 00 line number (high byte)
+ 2 09 line length (or, offset to next line) [!]
+ 3 06 offset to next statement *from the start of the line*
+ 4 28 token for "?"
+ 5 14 token for : (end of statement)
+ 6 09 offset to next statement [!]
+ 7 15 token for END
+ 8 16 token for end-of-line [*]
+ 9 ?? (line number of next statement)
+
+ Note the values marked with [!] are equal.
+
+ [*] end-of-line is $16 *except* for REM and DATA, which are
+ terminated with $9B instead.
+*/
+int fixline(int linepos) {
+ /* +3 here to skip the line number + line length */
+ int token, done = 0, offset = data[linepos + 3];
+
+ while(!done) {
+ offset = data[linepos + offset];
+ token = data[linepos + offset - 1];
+ fprintf(stderr, "offset %02x token %02x\n", offset, token);
+ if(token != 0x14)
+ done++;
+ }
+
+ data[linepos + 2] = offset;
+ return offset;
+}
+
+/* Iterate over all the tokenized lines. If any of them have invalid
+ line lengths (<=5), call fixline() on them. */
+int fixcode(void) {
+ int result = 0;
+ int pos = codestart;
+ int offset, lineno = -1, tmpno;
+
+ while(pos < filelen) {
+ tmpno = getword(pos);
+ if(tmpno <= lineno) {
+ fprintf(stderr, "Warning: line number %d at offset %04x is <= previous line number %d\n",
+ tmpno, pos, lineno);
+ }
+ lineno = tmpno;
+
+ offset = data[pos + 2];
+ /* fprintf(stderr, "pos %d, line #%d, offset %d\n", pos, lineno, offset); */
+ if(offset < 6) {
+ fprintf(stderr, "Found invalid offset %d (<6) at line %d\n", offset, lineno);
+ offset += fixline(pos);
+ result++;
+ }
+ pos += offset;
+
+ /* Atari BASIC tolerates garbage after the last tokenized line,
+ so we must do likewise. */
+ if(lineno == 32768) break;
+ }
+
+ fprintf(stderr, "End program pos %04x/%d\n", pos, pos);
+
+ if(filelen > pos) {
+ fprintf(stderr, "trailing garbage at EOF, %d bytes, %s\n",
+ filelen - pos, (keepgarbage ? "keeping" : "removing"));
+ if(!keepgarbage) filelen = pos;
+ }
+
+ return result;
+}
+
+/* Fixing the variables is a bit more work than it seems like
+ it might be, because the last byte of the name has to match
+ the type (inverse video "(" for numeric array, inverse "$" for
+ string, inverse last character of name for scalars). To do
+ this right, we have to examine the variable value table to
+ find out the type of each variable.
+
+ Each variable type get assigned A to Z, then A1 to A9, B1 to B9,
+ etc. This means there will be A, A$, and A( variables, which might
+ be a bit confusing, but we have to keep the generated name table as
+ short as possible, because we can't extend the size of the table in
+ the file.
+
+ We can find the actual table size in the file by subtracting VNTP
+ (start of variable name table) from VNTD (end of variable name table),
+ and if we run out of space for the generated names, something is
+ seriously off...
+
+ The maximum number of variable names is 128. If all 128 vars are in
+ use, the minimum table size is 230 (26 one-letter names, 102 2-letter
+ or letter+number or one-letter string/array names).
+
+*/
+
+int fixvars(void) {
+ int vp = vnstart, vv = vvstart;
+ int strings = 0, arrays = 0, scalars = 0, varname = 0, varnum = 0;
+ int bad = 0;
+
+ /* See if the variables even need fixing.
+
+ This code is simpler than it should be: it checks that all
+ characters in the variable name table are valid, but doesn't
+ check that they're in valid sequences. Example: a variable name
+ that's just an inverse dollar sign would be considered OK).
+ Also multiple variables of the same type with the same name
+ would be OK.
+
+ However, if all the bytes are the same value, even if it's a
+ valid character, that's correctly detected as invalid.
+ */
+
+ if(vntp == vntd) {
+ fprintf(stderr, "No variables\n");
+ return 0;
+ }
+
+ vp = vnstart + 1;
+ bad = 1;
+ while(vp < vvstart - 1) {
+ if(data[vp] != data[vnstart]) bad = 0;
+ vp++;
+ }
+
+ vp = vnstart;
+ while(vp < vvstart) {
+ unsigned char c = data[vp];
+ fprintf(stderr, "%04x/%04x: %04x\n", vp, vvstart, c);
+
+ /* allow a null byte only at the end of the table! */
+ /* if(c == 0 && vp == vvstart - 1) break; */
+ /* new rule: treat a null byte as end-of-table, ignore any
+ junk between it and VNTP. */
+ if(c == 0) break;
+
+ vp++;
+
+ /* inverse $ or ( is OK */
+ if(c == 0xa4 || c == 0xa8) continue;
+
+ /* numbers and letters are allowed, inverse or normal. */
+ c &= 0x7f;
+ if(c >= 0x30 && c <= 0x39) continue;
+ if(c >= 0x41 && c <= 0x5a) continue;
+
+ bad++;
+ break;
+ }
+ if(!forcevars && !bad) return 0;
+
+ vp = vnstart;
+ while(vv < codestart) {
+ unsigned char sigil = 0;
+ /* type: scalar = 0, array = 1, string = 2 */
+ unsigned char type = data[vv] >> 6;
+ /* fprintf(stderr, "%04x: %04x, %d\n", vv, data[vv], type); */
+
+ if(varnum != data[vv+1]) {
+ fprintf(stderr, "Warning: variable value is corrupt!\n");
+ }
+ varnum++;
+
+ switch(type) {
+ case 1: varname = arrays++; sigil = 0xa8; break;
+ case 2: varname = strings++; sigil = 0xa4; break;
+ default: varname = scalars++; break;
+ }
+
+ if(varname < 26) {
+ data[vp] = ('A' + varname);
+ } else {
+ varname -= 26;
+ data[vp++] = 'A' + (varname / 9);
+ data[vp] = ('1' + (varname % 9));
+ }
+
+ if(sigil) {
+ vp++;
+ data[vp++] = sigil;
+ } else {
+ data[vp] |= 0x80;
+ vp++;
+ }
+
+ vv += 8;
+ }
+
+ /* there's supposed to be a null byte at the end of the table, unless
+ all 128 table slots are used. */
+ if(varnum < 128) data[vp] = 0;
+
+ /* fixup the VNTD pointer */
+ vntd = vntp + (vp - vnstart);
+ data[4] = vntd & 0xff;
+ data[5] = vntd >> 8;
+
+ fprintf(stderr, "%d variables, VNTD adjusted to %04x\n", varnum, vntd);
+ return 1;
+}
+
+void print_help(void) {
+ fprintf(stderr, "Usage: %s [-v] [-f] [-n] [-g] <inputfile> <outputfile>\n", self);
+ fprintf(stderr, "-v: verbose\n");
+ fprintf(stderr, "-f: force variable name table rebuild\n");
+ fprintf(stderr, "-n: do not rebuild variable name table, even if it's invalid\n");
+ fprintf(stderr, "-g: remove trailing garbage, if present\n");
+ fprintf(stderr, "Use - as a filename to read from stdin and/or write to stdout\n");
+}
+
+void invalid_args(const char *arg) {
+ fprintf(stderr, "%s: Invalid argument '%s'\n\n", self, arg);
+ print_help();
+ exit(1);
+}
+
+FILE *open_file(const char *name, const char *mode) {
+ FILE *fp;
+ if(!(fp = fopen(name, mode))) {
+ perror(name);
+ exit(1);
+ }
+ return fp;
+}
+
+void open_input(const char *name) {
+ if(!name) {
+ if(freopen(NULL, "rb", stdin)) {
+ input_file = stdin;
+ return;
+ } else {
+ perror("stdin");
+ exit(1);
+ }
+ }
+
+ input_file = open_file(name, "rb");
+}
+
+void open_output(const char *name) {
+ if(!name) {
+ if(freopen(NULL, "wb", stdout)) {
+ output_file = stdout;
+ return;
+ } else {
+ perror("stdout");
+ exit(1);
+ }
+ }
+
+ output_file = open_file(name, "wb");
+}
+
+void parse_args(int argc, char **argv) {
+ self = *argv;
+ if(argc < 2) {
+ print_help();
+ exit(0);
+ }
+ while(++argv, --argc) {
+ if((*argv)[0] == '-') {
+ switch((*argv)[1]) {
+ case 'v': verbose++; break;
+ case 'f': forcevars++; break;
+ case 'n': keepvars++; break;
+ case 'g': keepgarbage = 0; break;
+ case 0:
+ if(!input_file)
+ open_input(NULL);
+ else if(!output_file)
+ open_output(NULL);
+ else
+ invalid_args(*argv);
+ break;
+ default: invalid_args(*argv); break;
+ }
+ } else {
+ if(!input_file)
+ open_input(*argv);
+ else if(!output_file)
+ open_output(*argv);
+ else
+ invalid_args(*argv);
+ }
+ }
+
+ if(!input_file) die("no input file given (use - for stdin)");
+ if(!output_file) die("no output file given (use - for stdout)");
+ if(keepvars && forcevars) die("-f and -n are mutually exclusive");
+}
+
+int main(int argc, char **argv) {
+ parse_args(argc, argv);
+
+ filelen = readfile();
+
+ lomem = getword(0);
+ vntp = getword(2);
+ vntd = getword(4);
+ vvtp = getword(6);
+ stmtab = getword(8);
+ stmcur = getword(10);
+ starp = getword(12);
+ codestart = stmtab - STM_OFFSET - (vntp - 256);
+ vnstart = vntp - 256 + 14;
+ vvstart = vvtp - 256 + 14;
+
+ if(lomem) die("This doesn't look like an Atari BASIC program (no $0000 signature)");
+
+ fprintf(stderr, "LOMEM %04x\n", lomem);
+ fprintf(stderr, "VNTP %04x\n", vntp);
+ fprintf(stderr, "VNTD %04x\n", vntd);
+ fprintf(stderr, "VVTP %04x\n", vvtp);
+ fprintf(stderr, "STMTAB %04x, codestart %04x\n", stmtab, codestart);
+ fprintf(stderr, "STMCUR %04x\n", stmcur);
+ fprintf(stderr, "STARP %04x\n", starp);
+ fprintf(stderr, "vvstart %04x\n", vvstart);
+
+ /*
+ fprintf(stderr, "data at STMTAB (we hope):\n");
+ for(int i=codestart; i<filelen; i++) {
+ fprintf(stderr, "%02x ", data[i]);
+ }
+ fprintf(stderr, "\n");
+ */
+
+ if(!keepvars) {
+ if(fixvars())
+ fprintf(stderr, "Variable names replaced\n");
+ else
+ fprintf(stderr, "Variable names were already OK\n");
+ }
+
+ if(fixcode())
+ fprintf(stderr, "Fixed invalid offset in code\n");
+ else
+ fprintf(stderr, "No invalid offsets (maybe wasn't protected?)\n");
+
+ fwrite(data, filelen, 1, output_file);
+ return 0;
+}