aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorB. Watson <urchlay@slackware.uk>2024-07-07 00:26:20 -0400
committerB. Watson <urchlay@slackware.uk>2024-07-07 00:26:20 -0400
commit37775a900ae8023961dbae3ca4d6273bb18c9352 (patch)
tree130e0c05a19db8d3d2fc2152518f7ef0ea4b160a
parente87e3facc185150db9ac87b2704e1d340b8cdb94 (diff)
downloadbw-atari8-tools-37775a900ae8023961dbae3ca4d6273bb18c9352.tar.gz
whichbas: added.
-rw-r--r--Makefile4
-rw-r--r--whichbas.c408
-rw-r--r--whichbas.rst75
3 files changed, 486 insertions, 1 deletions
diff --git a/Makefile b/Makefile
index 0d1d2da..4cb38be 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ CC=gcc
CFLAGS=-Wall $(COPT) -ansi -D_GNU_SOURCE -DVERSION=\"$(VERSION)\"
# BINS and SCRIPTS go in $BINDIR, DOCS go in $DOCDIR
-BINS=a8eol atr2xfd atrsize axe blob2c blob2xex cart2xex cxrefbas dumpbas fenders protbas renumbas rom2cart unmac65 unprotbas vxrefbas xex1to2 xexamine xexcat xexsplit xfd2atr listbas a8cat a8xd
+BINS=a8eol atr2xfd atrsize axe blob2c blob2xex cart2xex cxrefbas dumpbas fenders protbas renumbas rom2cart unmac65 unprotbas vxrefbas xex1to2 xexamine xexcat xexsplit xfd2atr listbas a8cat a8xd whichbas
SCRIPTS=dasm2atasm
MANS=a8eol.1 xfd2atr.1 atr2xfd.1 blob2c.1 cart2xex.1 fenders.1 xexsplit.1 xexcat.1 atrsize.1 rom2cart.1 unmac65.1 axe.1 dasm2atasm.1 blob2xex.1 xexamine.1 xex1to2.1 unprotbas.1 protbas.1 renumbas.1 dumpbas.1 vxrefbas.1 cxrefbas.1 listbas.1 a8cat.1 a8xd.1
MAN5S=xex.5
@@ -57,6 +57,8 @@ renumbas: bas.o bcdfp.o linetab.o
dumpbas: bas.o
+whichbas: bas.o
+
vxrefbas: bas.o
cxrefbas: bas.o bcdfp.o linetab.o
diff --git a/whichbas.c b/whichbas.c
new file mode 100644
index 0000000..c407c33
--- /dev/null
+++ b/whichbas.c
@@ -0,0 +1,408 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <time.h>
+
+#include "bas.h"
+
+#define BT_ATARI 1
+#define BT_TURBO 2
+#define BT_BXL 4
+#define BT_BXE 8
+
+#define BT_BXL_BXE (BT_BXL | BT_BXE)
+
+int bas_type = 0x0f; /* start out with all enabled */
+
+int comma_count; /* count of regular commas (not string/array) in statement */
+unsigned char last_cmd;
+unsigned short last_cmd_pos;
+
+void print_help(void) {
+ printf("Usage: %s [-v] <inputfile>\n", self);
+}
+
+void parse_args(int argc, char **argv) {
+ int opt;
+
+ while( (opt = getopt(argc, argv, "v")) != -1) {
+ switch(opt) {
+ case 'v': verbose = 1; break;
+ default:
+ print_help();
+ exit(1);
+ }
+ }
+
+ if(optind >= argc)
+ die("No input file given (and stdin not supported).");
+ else
+ open_input(argv[optind]);
+ if(input_file == stdin)
+ die("Reading from standard input not supported.");
+}
+
+/* don't need this.
+void add_type(int type) {
+ bas_type |= type;
+}
+*/
+
+void remove_type(int type) {
+ bas_type &= ((~type) & 0x0f);
+}
+
+void print_result(void) {
+ const char *name;
+
+ if(bas_type & BT_ATARI) {
+ name = "Atari BASIC";
+ } else if(bas_type == BT_BXL || bas_type == (BT_BXL | BT_BXE)) {
+ name = "OSS BASIC XL";
+ } else if(bas_type == BT_BXE) {
+ name = "OSS BASIC XE";
+ } else if(bas_type == BT_TURBO) {
+ name = "Turbo BASIC XL";
+ } else {
+ name = "Not Atari BASIC; probably either Turbo or BXL/BXE";
+ }
+
+ fputs(name, stdout);
+ putchar('\n');
+
+ exit(bas_type == BT_ATARI ? 0 : bas_type + 8);
+}
+
+CALLBACK(handle_cmd) {
+ int has_args = 0;
+ unsigned char nexttok;
+
+ last_cmd = tok;
+ last_cmd_pos = pos;
+ comma_count = 0;
+
+ if(verbose) fprintf(stderr, "handle_cmd: lineno %d, tok $%02x, bas_type was %02x...", lineno, tok, bas_type);
+
+ if(tok <= CMD_ERROR) return; /* legal in BASIC, ignore */
+ remove_type(BT_ATARI);
+ if(tok >= 0x5b) remove_type(BT_BXL);
+
+ nexttok = program[pos + 1];
+ has_args = !(nexttok == OP_EOS || nexttok == OP_EOL);
+
+ /* we have tokens 0x3a to 0x68 in both TB and BXE, or 47
+ of them.
+ Some tokens can't be determined, because they take the
+ same argument (or lack of) in both Turbo and BXL/XE. These
+ are:
+ 0x3c: REPEAT or ELSE (no args either way)
+ 0x42: Maybe: BPUT or RGET (take the same args... but not quite!)
+ 0x43: Maybe: BGET or BPUT (take the same args... but not quite!)
+ 0x46: LOOP or CP (no args either way)
+ 0x49: LOCK or UNPROTECT (take the same args)
+ 0x4B: RENAME in both Turbo and BXL/XE (take the same args)
+ 0x60: CLS or HITCLR (no args either way)
+ This leaves 40 we can check.
+ Covered so far: 34 (85%)
+ TODO: Unknown tokens:
+ 0x54: ??? in TB (find out what), LVAR in BXL/BXE.
+ 0x5A: BLOAD or... what? (Jindroush lists it as ?5A?)
+ TODO:
+ 0x5B: BRUN or CALL (both take a string, CALL allows "USING" though)
+ 0x5C: GO# (1 arg only) or SORTUP (optional 2nd arg of USING, but no comma)
+ 0x5D: # (1 arg only) or SORTDOWN (optional 2nd arg of USING, but no comma)
+ 0x5F: PAINT (req 2 args) or NUM (optional 2 args, probly not appear in program)
+ */
+ switch(tok) {
+ case 0x39: /* MOVE <args> or ENDWHILE */
+ case 0x3a: /* -MOVE <args> or TRACEOFF */
+ case 0x3d: /* UNTIL <args> or ENDIF */
+ case 0x56: /* DEL <args> or FAST */
+ case 0x61: /* DSOUND (4 num args) or INVERSE (no args) */
+ case 0x62: /* CIRCLE (3 num args) or NORMAL (no args) */
+ if(has_args) {
+ remove_type(BT_BXL_BXE);
+ } else {
+ remove_type(BT_TURBO);
+ }
+ break;
+ case 0x58: /* TRACE (optional + or -), EXTEND (BXE; no args) */
+ /* EXTEND can't show up in a program except maybe line 32768, e.g.
+ EXTEND:SAVE "D:BLAH". */
+ remove_type(BT_BXL);
+ if(lineno < 32768) {
+ remove_type(BT_BXE);
+ }
+ break;
+ case 0x59: /* TEXT (1st arg is number), PROCEDURE (arg is string const (not var!)) */
+ if(nexttok == OP_STRCONST) {
+ remove_type(BT_TURBO);
+ } else {
+ remove_type(BT_BXL_BXE);
+ }
+ break;
+ case 0x3f: /* WEND or LOMEM <args> */
+ case 0x40: /* ELSE or DEL <args> */
+ case 0x41: /* ENDIF or RPUT <args> */
+ case 0x45: /* DO or TAB <args> */
+ case 0x47: /* EXIT or ERASE <args> */
+ case 0x51: /* ENDPROC or PMMOVE <args> */
+ if(has_args) {
+ remove_type(BT_TURBO);
+ } else {
+ remove_type(BT_BXL_BXE);
+ }
+ break;
+ case 0x48: /* DIR (optional arg) or PROTECT (req'd arg) */
+ /* not conclusive: without args means TB, but with arg,
+ it could be either */
+ if(!has_args) {
+ remove_type(BT_BXL_BXE);
+ }
+ break;
+ case 0x4a: /* UNLOCK (req'd arg) or DIR (optional arg) */
+ /* not conclusive: without args means TB, but with arg,
+ it could be either */
+ if(!has_args) {
+ remove_type(BT_TURBO);
+ }
+ break;
+ case 0x3b: /* *F (optional + or -), TRACE (no arg) */
+ case 0x5e: /* *B (optional + or -) or EXIT (no arg) */
+ if(has_args) {
+ remove_type(BT_BXL_BXE);
+ }
+ break;
+ case 0x44: /* FILLTO or BGET (check for a # after the token) */
+ if(nexttok == OP_HASH) {
+ remove_type(BT_TURBO);
+ } else {
+ remove_type(BT_BXL_BXE);
+ }
+ break;
+ case 0x4e: /* TIME$= (1 string arg) or PMCLR (1 num arg) */
+ /* XXX: this doesn't do anything if the arg is a variable; we
+ could examine the type, but we don't yet */
+ if(nexttok == OP_STRCONST) {
+ remove_type(BT_BXL_BXE);
+ } else if(nexttok == OP_NUMCONST) {
+ remove_type(BT_TURBO);
+ }
+ break;
+ case 0x50: /* EXEC (1 arg, *must* be variable) or PMGRAPHICS (1 num arg, may be const) */
+ if(nexttok < 0x80) {
+ remove_type(BT_TURBO);
+ }
+ break;
+ case 0x57: /* DUMP (1 optional string arg) or LOCAL (1 variable arg) */
+ if(!has_args || (nexttok == OP_STRCONST)) {
+ /* if there's no arg, or one string constant arg... */
+ /* XXX: DUMP A$ not detected */
+ remove_type(BT_BXL_BXE);
+ }
+ default: break;
+ }
+ if(verbose) fprintf(stderr, " now %02x\n", bas_type);
+}
+
+CALLBACK(handle_op) {
+ unsigned char nexttok = program[pos + 1];
+ unsigned char nexttok2 = program[pos + 2];
+
+ if(tok == OP_COMMA) comma_count++;
+
+ if(verbose) fprintf(stderr, "handle_op: lineno %d, tok $%02x, comma_count %d, bas_type was %02x...", lineno, tok, comma_count, bas_type);
+
+ if(tok == 0x0d) remove_type(BT_ATARI); /* hex const (turbo *and* bxl/xe) */
+ if(tok <= OP_FUNC_STRIG) {
+ if(verbose) fprintf(stderr, " now %02x\n", bas_type);
+ return; /* legal in BASIC, ignore */
+ }
+ remove_type(BT_ATARI);
+
+ if(tok >= 0x69) {
+ remove_type(BT_BXL_BXE);
+ }
+
+ if(tok == 0x55) {
+ /* DPEEK (function) TB, USING (infix, not a function) in BXL/BXE */
+ if(nexttok == OP_FUNC_LPAR) {
+ remove_type(BT_BXL_BXE);
+ }
+ }
+
+ if(tok == 0x5c) {
+ /* DEC (function, takes str) in TB, HEX$ (function, takes num) in BXL/BXE */
+ if(nexttok2 == OP_STRCONST) {
+ remove_type(BT_BXL_BXE);
+ } else if(nexttok2 > 0x80 && (get_vartype(nexttok2) == TYPE_STRING)) {
+ /* TODO: see if this test is actually valid! */
+ remove_type(BT_BXL_BXE);
+ }
+ }
+
+ if(tok == 0x5f) {
+ /* TIME$ in TB, SYS (function) in BXL/BXE */
+ if(nexttok == OP_FUNC_LPAR) {
+ remove_type(BT_TURBO);
+ }
+ }
+
+ if(tok == 0x60) {
+ /* TIME in TB, VSTICK (function) in BXL/BXE */
+ if(nexttok == OP_FUNC_LPAR) {
+ remove_type(BT_TURBO);
+ }
+ }
+
+ if(tok == 0x61) {
+ /* MOD (infix op) in TB, HSTICK (function) in BXL/BXE */
+ if(nexttok == OP_FUNC_LPAR)
+ remove_type(BT_TURBO);
+ }
+
+ if(tok == 0x62) {
+ /* EXEC (infix op, with ON) in TB, PMADR (function) in BXL/BXE */
+ if(nexttok == OP_FUNC_LPAR)
+ remove_type(BT_TURBO);
+ }
+
+ if(tok == 0x66 || tok == 0x67 || tok == 0x68) {
+ /* either %0 %1 %2 (TB), or LEFT$ RIGHT$ MID$ (BXL/XE) */
+ if(nexttok == OP_STRCONST || nexttok >= 0x80) {
+ /* %0 %1 %2 can't be followed by a string constant *or* a variable */
+ remove_type(BT_TURBO);
+ }
+ }
+ if(verbose) fprintf(stderr, " now %02x\n", bas_type);
+}
+
+CALLBACK(handle_end_stmt) {
+ if(verbose) fprintf(stderr, "handle_end_stmt: lineno %d, tok $%02x, last_cmd $%02x, comma_count %d, bas_type was %02x...", lineno, tok, last_cmd, comma_count, bas_type);
+ switch(last_cmd) {
+ case 0x38: /* DPOKE (2 args) or WHILE (1 arg) */
+ if(comma_count) {
+ remove_type(BT_BXL_BXE);
+ } else {
+ remove_type(BT_TURBO);
+ }
+ break;
+ case 0x3e: /* WHILE (1 arg) or DPOKE (2 args) */
+ case 0x4c: /* DELETE (1 arg) or MOVE (3 or 4 args) */
+ case 0x4d: /* PAUSE (1 arg) or MISSILE (3 args) */
+ case 0x52: /* FCOLOR (1 arg) or PMWIDTH (2 args) */
+ case 0x53: /* *L (optional + or - only) or SET (req 2 num args) */
+ case 0x4f: /* PROC (1 arg) or PMCOLOR (3 args) */
+ if(comma_count) {
+ remove_type(BT_TURBO);
+ } else {
+ remove_type(BT_BXL_BXE);
+ }
+ break;
+ case 0x55: /* RENUM in both (TB req 3 args, BXL up to two) */
+ if(comma_count == 2) {
+ remove_type(BT_BXL_BXE);
+ } else {
+ remove_type(BT_TURBO);
+ }
+ break;
+ case 0x63: /* %PUT (usually seen with optional #; 1 or 2 args) or BLOAD (1 string arg) */
+ if(comma_count) {
+ /* multiple args */
+ remove_type(BT_BXL_BXE);
+ } else if(program[last_cmd + 1] == OP_STRCONST) {
+ /* one arg, string const. XXX: check var type */
+ remove_type(BT_TURBO);
+ }
+ break;
+ case 0x64: /* %GET (usually seen with optional #; 1 or 2 args) or BSAVE (3 args) */
+ if(comma_count == 2) {
+ remove_type(BT_TURBO);
+ } else {
+ remove_type(BT_BXL_BXE);
+ }
+ break;
+ default: break;
+ }
+ if(verbose) fprintf(stderr, " now %02x\n", bas_type);
+}
+
+void foreign(const char *name) {
+ fclose(input_file);
+ puts(name);
+ exit(0); /* TODO: pick a better number */
+}
+
+void detect_foreign(void) {
+ int i, nuls, c, d;
+
+ c = fgetc(input_file);
+ d = fgetc(input_file);
+
+ if(c == 0 && d == 0) {
+ /* This is why we can't read from stdin. */
+ rewind(input_file);
+ return;
+ }
+
+ if(c == EOF || d == EOF)
+ die("File is too short to be a BASIC program of any kind.");
+
+ if(c == 0xff && d == 0xff)
+ foreign("XEX executable (not BASIC at all!)");
+
+ if(c == 0xfe && d == 0xfe)
+ foreign("Mac/65 tokenized source (not BASIC at all!)");
+
+ if(c == 0xdd && d == 0x00)
+ foreign("EXTENDed OSS BASIC XE");
+
+ if(c == 0x7f && d == 'E') {
+ c = fgetc(input_file);
+ d = fgetc(input_file);
+ if(c == 'L' && d == 'F')
+ foreign("ELF executable (huh?)");
+ }
+
+ if(!(c == 0 && d == 0)) {
+ if(fseek(input_file, -3, SEEK_END) == 0) {
+ nuls = 0;
+ for(i = 0; i < 3; i++) {
+ if(fgetc(input_file) == 0) nuls++;
+ }
+ if(nuls == 3) {
+ foreign("Microsoft BASIC");
+ }
+ }
+ }
+
+ if(isdigit(c) && (d == 0x20 || isdigit(d)))
+ foreign("Text file, could be LISTed BASIC (or not)");
+
+ if(isprint(c) && isprint(d))
+ foreign("Text file (not BASIC at all!)");
+
+ foreign("Unknown file type (not BASIC at all!)");
+}
+
+int main(int argc, char **argv) {
+ set_self(*argv);
+ parse_general_args(argc, argv, print_help);
+ parse_args(argc, argv);
+
+ detect_foreign();
+
+ readfile();
+ parse_header();
+
+ on_cmd_token = handle_cmd;
+ on_exp_token = handle_op;
+ on_end_stmt = handle_end_stmt;
+
+ walk_all_code();
+
+ print_result(); /* always exits */
+ return 0; /* never happens, shuts up gcc's warning though */
+}
diff --git a/whichbas.rst b/whichbas.rst
new file mode 100644
index 0000000..5e7363c
--- /dev/null
+++ b/whichbas.rst
@@ -0,0 +1,75 @@
+========
+whichbas
+========
+
+----------------------------------------------------------
+Determine BASIC variant of a tokenized Atari 8-bit program
+----------------------------------------------------------
+
+.. include:: manhdr.rst
+
+SYNOPSIS
+========
+whichbas [-v] *input-file*
+
+DESCRIPTION
+===========
+**whichbas** reads a tokenized Atari 8-bit BASIC, Turbo BASIC,
+BASIC XL, BASIC XE, or Atari Microsoft BASIC program and attempts to
+discover which BASIC is required to run it.
+
+NOTES
+=====
+Turbo BASIC, BASIC XL, and BASIC XE are all supersets of Atari BASIC.
+If you wrote a program using one of them, but didn't use any of the
+extra commands or functions, the result is still an Atari BASIC program.
+
+There are two types of BASIC XE programs: regular and *EXTEND*\ed. The
+extended type is detected 100% reliably, because the first byte of the
+file changes from **$00** to **$DD**. Non-extended programs are only
+identified as BASIC XE if they use any of the extra commands BASIC XE
+adds to those found in BASIC XL.
+
+Atari BASIC programs can be detected 100% reliably.
+
+Detection of Turbo vs. BXL/BXE isn't 100% reliable, and probably
+never will be. There's too much overlap between the sets of extra
+tokens added by each. Programs that don't use very many of the extra
+functions provided by Turbo/BXL/BXE may show up as "Not Atari BASIC;
+probably either Turbo or BXL/BXE".
+
+Atari Microsoft BASIC is detected by checking that the first two
+bytes of the file are not zero, and that the last 3 are zero. This
+may result in false positives (files that aren't BASIC programs at
+all might show up as Microsoft). Also, no distinction is made between
+Atari MS BASIC 1.0 and 2.0.
+
+Various non-BASIC files are detected (including Mac/65 source,
+ELF binaries, etc) as a convenience, but I wouldn't rely on
+**whichbas**\'s non-BASIC file type detection if I were you.
+
+LIMITATIONS
+===========
+Currently, **whichbas** doesn't look at the variable name or type
+tables. One problem caused by this: If a program uses only Atari BASIC
+tokens, but uses variable(s) with _ in the name, it will be identified
+as Atari BASIC... even though _ in variable names is illegal in Atari
+BASIC and pretty much guarantees the program is Turbo/BXL/BXE.
+
+Looking at the variable types could also improve detection, since
+Turbo and BXL/BXE support extended variable types.
+
+**whichbas** knows nothing about other BASICs such as Frost BASIC,
+BASIC/A+, Altirra BASIC...
+
+OPTIONS
+=======
+
+.. include:: genopts.rst
+
+EXIT STATUS
+===========
+
+0 for success, 1 for failure.
+
+.. include:: manftr.rst