From 37775a900ae8023961dbae3ca4d6273bb18c9352 Mon Sep 17 00:00:00 2001 From: "B. Watson" Date: Sun, 7 Jul 2024 00:26:20 -0400 Subject: whichbas: added. --- Makefile | 4 +- whichbas.c | 408 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ whichbas.rst | 75 +++++++++++ 3 files changed, 486 insertions(+), 1 deletion(-) create mode 100644 whichbas.c create mode 100644 whichbas.rst diff --git a/Makefile b/Makefile index 0d1d2da..4cb38be 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ CC=gcc CFLAGS=-Wall $(COPT) -ansi -D_GNU_SOURCE -DVERSION=\"$(VERSION)\" # BINS and SCRIPTS go in $BINDIR, DOCS go in $DOCDIR -BINS=a8eol atr2xfd atrsize axe blob2c blob2xex cart2xex cxrefbas dumpbas fenders protbas renumbas rom2cart unmac65 unprotbas vxrefbas xex1to2 xexamine xexcat xexsplit xfd2atr listbas a8cat a8xd +BINS=a8eol atr2xfd atrsize axe blob2c blob2xex cart2xex cxrefbas dumpbas fenders protbas renumbas rom2cart unmac65 unprotbas vxrefbas xex1to2 xexamine xexcat xexsplit xfd2atr listbas a8cat a8xd whichbas SCRIPTS=dasm2atasm MANS=a8eol.1 xfd2atr.1 atr2xfd.1 blob2c.1 cart2xex.1 fenders.1 xexsplit.1 xexcat.1 atrsize.1 rom2cart.1 unmac65.1 axe.1 dasm2atasm.1 blob2xex.1 xexamine.1 xex1to2.1 unprotbas.1 protbas.1 renumbas.1 dumpbas.1 vxrefbas.1 cxrefbas.1 listbas.1 a8cat.1 a8xd.1 MAN5S=xex.5 @@ -57,6 +57,8 @@ renumbas: bas.o bcdfp.o linetab.o dumpbas: bas.o +whichbas: bas.o + vxrefbas: bas.o cxrefbas: bas.o bcdfp.o linetab.o diff --git a/whichbas.c b/whichbas.c new file mode 100644 index 0000000..c407c33 --- /dev/null +++ b/whichbas.c @@ -0,0 +1,408 @@ +#include +#include +#include +#include +#include +#include + +#include "bas.h" + +#define BT_ATARI 1 +#define BT_TURBO 2 +#define BT_BXL 4 +#define BT_BXE 8 + +#define BT_BXL_BXE (BT_BXL | BT_BXE) + +int bas_type = 0x0f; /* start out with all enabled */ + +int comma_count; /* count of regular commas (not string/array) in statement */ +unsigned char last_cmd; +unsigned short last_cmd_pos; + +void print_help(void) { + printf("Usage: %s [-v] \n", self); +} + +void parse_args(int argc, char **argv) { + int opt; + + while( (opt = getopt(argc, argv, "v")) != -1) { + switch(opt) { + case 'v': verbose = 1; break; + default: + print_help(); + exit(1); + } + } + + if(optind >= argc) + die("No input file given (and stdin not supported)."); + else + open_input(argv[optind]); + if(input_file == stdin) + die("Reading from standard input not supported."); +} + +/* don't need this. +void add_type(int type) { + bas_type |= type; +} +*/ + +void remove_type(int type) { + bas_type &= ((~type) & 0x0f); +} + +void print_result(void) { + const char *name; + + if(bas_type & BT_ATARI) { + name = "Atari BASIC"; + } else if(bas_type == BT_BXL || bas_type == (BT_BXL | BT_BXE)) { + name = "OSS BASIC XL"; + } else if(bas_type == BT_BXE) { + name = "OSS BASIC XE"; + } else if(bas_type == BT_TURBO) { + name = "Turbo BASIC XL"; + } else { + name = "Not Atari BASIC; probably either Turbo or BXL/BXE"; + } + + fputs(name, stdout); + putchar('\n'); + + exit(bas_type == BT_ATARI ? 0 : bas_type + 8); +} + +CALLBACK(handle_cmd) { + int has_args = 0; + unsigned char nexttok; + + last_cmd = tok; + last_cmd_pos = pos; + comma_count = 0; + + if(verbose) fprintf(stderr, "handle_cmd: lineno %d, tok $%02x, bas_type was %02x...", lineno, tok, bas_type); + + if(tok <= CMD_ERROR) return; /* legal in BASIC, ignore */ + remove_type(BT_ATARI); + if(tok >= 0x5b) remove_type(BT_BXL); + + nexttok = program[pos + 1]; + has_args = !(nexttok == OP_EOS || nexttok == OP_EOL); + + /* we have tokens 0x3a to 0x68 in both TB and BXE, or 47 + of them. + Some tokens can't be determined, because they take the + same argument (or lack of) in both Turbo and BXL/XE. These + are: + 0x3c: REPEAT or ELSE (no args either way) + 0x42: Maybe: BPUT or RGET (take the same args... but not quite!) + 0x43: Maybe: BGET or BPUT (take the same args... but not quite!) + 0x46: LOOP or CP (no args either way) + 0x49: LOCK or UNPROTECT (take the same args) + 0x4B: RENAME in both Turbo and BXL/XE (take the same args) + 0x60: CLS or HITCLR (no args either way) + This leaves 40 we can check. + Covered so far: 34 (85%) + TODO: Unknown tokens: + 0x54: ??? in TB (find out what), LVAR in BXL/BXE. + 0x5A: BLOAD or... what? (Jindroush lists it as ?5A?) + TODO: + 0x5B: BRUN or CALL (both take a string, CALL allows "USING" though) + 0x5C: GO# (1 arg only) or SORTUP (optional 2nd arg of USING, but no comma) + 0x5D: # (1 arg only) or SORTDOWN (optional 2nd arg of USING, but no comma) + 0x5F: PAINT (req 2 args) or NUM (optional 2 args, probly not appear in program) + */ + switch(tok) { + case 0x39: /* MOVE or ENDWHILE */ + case 0x3a: /* -MOVE or TRACEOFF */ + case 0x3d: /* UNTIL or ENDIF */ + case 0x56: /* DEL or FAST */ + case 0x61: /* DSOUND (4 num args) or INVERSE (no args) */ + case 0x62: /* CIRCLE (3 num args) or NORMAL (no args) */ + if(has_args) { + remove_type(BT_BXL_BXE); + } else { + remove_type(BT_TURBO); + } + break; + case 0x58: /* TRACE (optional + or -), EXTEND (BXE; no args) */ + /* EXTEND can't show up in a program except maybe line 32768, e.g. + EXTEND:SAVE "D:BLAH". */ + remove_type(BT_BXL); + if(lineno < 32768) { + remove_type(BT_BXE); + } + break; + case 0x59: /* TEXT (1st arg is number), PROCEDURE (arg is string const (not var!)) */ + if(nexttok == OP_STRCONST) { + remove_type(BT_TURBO); + } else { + remove_type(BT_BXL_BXE); + } + break; + case 0x3f: /* WEND or LOMEM */ + case 0x40: /* ELSE or DEL */ + case 0x41: /* ENDIF or RPUT */ + case 0x45: /* DO or TAB */ + case 0x47: /* EXIT or ERASE */ + case 0x51: /* ENDPROC or PMMOVE */ + if(has_args) { + remove_type(BT_TURBO); + } else { + remove_type(BT_BXL_BXE); + } + break; + case 0x48: /* DIR (optional arg) or PROTECT (req'd arg) */ + /* not conclusive: without args means TB, but with arg, + it could be either */ + if(!has_args) { + remove_type(BT_BXL_BXE); + } + break; + case 0x4a: /* UNLOCK (req'd arg) or DIR (optional arg) */ + /* not conclusive: without args means TB, but with arg, + it could be either */ + if(!has_args) { + remove_type(BT_TURBO); + } + break; + case 0x3b: /* *F (optional + or -), TRACE (no arg) */ + case 0x5e: /* *B (optional + or -) or EXIT (no arg) */ + if(has_args) { + remove_type(BT_BXL_BXE); + } + break; + case 0x44: /* FILLTO or BGET (check for a # after the token) */ + if(nexttok == OP_HASH) { + remove_type(BT_TURBO); + } else { + remove_type(BT_BXL_BXE); + } + break; + case 0x4e: /* TIME$= (1 string arg) or PMCLR (1 num arg) */ + /* XXX: this doesn't do anything if the arg is a variable; we + could examine the type, but we don't yet */ + if(nexttok == OP_STRCONST) { + remove_type(BT_BXL_BXE); + } else if(nexttok == OP_NUMCONST) { + remove_type(BT_TURBO); + } + break; + case 0x50: /* EXEC (1 arg, *must* be variable) or PMGRAPHICS (1 num arg, may be const) */ + if(nexttok < 0x80) { + remove_type(BT_TURBO); + } + break; + case 0x57: /* DUMP (1 optional string arg) or LOCAL (1 variable arg) */ + if(!has_args || (nexttok == OP_STRCONST)) { + /* if there's no arg, or one string constant arg... */ + /* XXX: DUMP A$ not detected */ + remove_type(BT_BXL_BXE); + } + default: break; + } + if(verbose) fprintf(stderr, " now %02x\n", bas_type); +} + +CALLBACK(handle_op) { + unsigned char nexttok = program[pos + 1]; + unsigned char nexttok2 = program[pos + 2]; + + if(tok == OP_COMMA) comma_count++; + + if(verbose) fprintf(stderr, "handle_op: lineno %d, tok $%02x, comma_count %d, bas_type was %02x...", lineno, tok, comma_count, bas_type); + + if(tok == 0x0d) remove_type(BT_ATARI); /* hex const (turbo *and* bxl/xe) */ + if(tok <= OP_FUNC_STRIG) { + if(verbose) fprintf(stderr, " now %02x\n", bas_type); + return; /* legal in BASIC, ignore */ + } + remove_type(BT_ATARI); + + if(tok >= 0x69) { + remove_type(BT_BXL_BXE); + } + + if(tok == 0x55) { + /* DPEEK (function) TB, USING (infix, not a function) in BXL/BXE */ + if(nexttok == OP_FUNC_LPAR) { + remove_type(BT_BXL_BXE); + } + } + + if(tok == 0x5c) { + /* DEC (function, takes str) in TB, HEX$ (function, takes num) in BXL/BXE */ + if(nexttok2 == OP_STRCONST) { + remove_type(BT_BXL_BXE); + } else if(nexttok2 > 0x80 && (get_vartype(nexttok2) == TYPE_STRING)) { + /* TODO: see if this test is actually valid! */ + remove_type(BT_BXL_BXE); + } + } + + if(tok == 0x5f) { + /* TIME$ in TB, SYS (function) in BXL/BXE */ + if(nexttok == OP_FUNC_LPAR) { + remove_type(BT_TURBO); + } + } + + if(tok == 0x60) { + /* TIME in TB, VSTICK (function) in BXL/BXE */ + if(nexttok == OP_FUNC_LPAR) { + remove_type(BT_TURBO); + } + } + + if(tok == 0x61) { + /* MOD (infix op) in TB, HSTICK (function) in BXL/BXE */ + if(nexttok == OP_FUNC_LPAR) + remove_type(BT_TURBO); + } + + if(tok == 0x62) { + /* EXEC (infix op, with ON) in TB, PMADR (function) in BXL/BXE */ + if(nexttok == OP_FUNC_LPAR) + remove_type(BT_TURBO); + } + + if(tok == 0x66 || tok == 0x67 || tok == 0x68) { + /* either %0 %1 %2 (TB), or LEFT$ RIGHT$ MID$ (BXL/XE) */ + if(nexttok == OP_STRCONST || nexttok >= 0x80) { + /* %0 %1 %2 can't be followed by a string constant *or* a variable */ + remove_type(BT_TURBO); + } + } + if(verbose) fprintf(stderr, " now %02x\n", bas_type); +} + +CALLBACK(handle_end_stmt) { + if(verbose) fprintf(stderr, "handle_end_stmt: lineno %d, tok $%02x, last_cmd $%02x, comma_count %d, bas_type was %02x...", lineno, tok, last_cmd, comma_count, bas_type); + switch(last_cmd) { + case 0x38: /* DPOKE (2 args) or WHILE (1 arg) */ + if(comma_count) { + remove_type(BT_BXL_BXE); + } else { + remove_type(BT_TURBO); + } + break; + case 0x3e: /* WHILE (1 arg) or DPOKE (2 args) */ + case 0x4c: /* DELETE (1 arg) or MOVE (3 or 4 args) */ + case 0x4d: /* PAUSE (1 arg) or MISSILE (3 args) */ + case 0x52: /* FCOLOR (1 arg) or PMWIDTH (2 args) */ + case 0x53: /* *L (optional + or - only) or SET (req 2 num args) */ + case 0x4f: /* PROC (1 arg) or PMCOLOR (3 args) */ + if(comma_count) { + remove_type(BT_TURBO); + } else { + remove_type(BT_BXL_BXE); + } + break; + case 0x55: /* RENUM in both (TB req 3 args, BXL up to two) */ + if(comma_count == 2) { + remove_type(BT_BXL_BXE); + } else { + remove_type(BT_TURBO); + } + break; + case 0x63: /* %PUT (usually seen with optional #; 1 or 2 args) or BLOAD (1 string arg) */ + if(comma_count) { + /* multiple args */ + remove_type(BT_BXL_BXE); + } else if(program[last_cmd + 1] == OP_STRCONST) { + /* one arg, string const. XXX: check var type */ + remove_type(BT_TURBO); + } + break; + case 0x64: /* %GET (usually seen with optional #; 1 or 2 args) or BSAVE (3 args) */ + if(comma_count == 2) { + remove_type(BT_TURBO); + } else { + remove_type(BT_BXL_BXE); + } + break; + default: break; + } + if(verbose) fprintf(stderr, " now %02x\n", bas_type); +} + +void foreign(const char *name) { + fclose(input_file); + puts(name); + exit(0); /* TODO: pick a better number */ +} + +void detect_foreign(void) { + int i, nuls, c, d; + + c = fgetc(input_file); + d = fgetc(input_file); + + if(c == 0 && d == 0) { + /* This is why we can't read from stdin. */ + rewind(input_file); + return; + } + + if(c == EOF || d == EOF) + die("File is too short to be a BASIC program of any kind."); + + if(c == 0xff && d == 0xff) + foreign("XEX executable (not BASIC at all!)"); + + if(c == 0xfe && d == 0xfe) + foreign("Mac/65 tokenized source (not BASIC at all!)"); + + if(c == 0xdd && d == 0x00) + foreign("EXTENDed OSS BASIC XE"); + + if(c == 0x7f && d == 'E') { + c = fgetc(input_file); + d = fgetc(input_file); + if(c == 'L' && d == 'F') + foreign("ELF executable (huh?)"); + } + + if(!(c == 0 && d == 0)) { + if(fseek(input_file, -3, SEEK_END) == 0) { + nuls = 0; + for(i = 0; i < 3; i++) { + if(fgetc(input_file) == 0) nuls++; + } + if(nuls == 3) { + foreign("Microsoft BASIC"); + } + } + } + + if(isdigit(c) && (d == 0x20 || isdigit(d))) + foreign("Text file, could be LISTed BASIC (or not)"); + + if(isprint(c) && isprint(d)) + foreign("Text file (not BASIC at all!)"); + + foreign("Unknown file type (not BASIC at all!)"); +} + +int main(int argc, char **argv) { + set_self(*argv); + parse_general_args(argc, argv, print_help); + parse_args(argc, argv); + + detect_foreign(); + + readfile(); + parse_header(); + + on_cmd_token = handle_cmd; + on_exp_token = handle_op; + on_end_stmt = handle_end_stmt; + + walk_all_code(); + + print_result(); /* always exits */ + return 0; /* never happens, shuts up gcc's warning though */ +} diff --git a/whichbas.rst b/whichbas.rst new file mode 100644 index 0000000..5e7363c --- /dev/null +++ b/whichbas.rst @@ -0,0 +1,75 @@ +======== +whichbas +======== + +---------------------------------------------------------- +Determine BASIC variant of a tokenized Atari 8-bit program +---------------------------------------------------------- + +.. include:: manhdr.rst + +SYNOPSIS +======== +whichbas [-v] *input-file* + +DESCRIPTION +=========== +**whichbas** reads a tokenized Atari 8-bit BASIC, Turbo BASIC, +BASIC XL, BASIC XE, or Atari Microsoft BASIC program and attempts to +discover which BASIC is required to run it. + +NOTES +===== +Turbo BASIC, BASIC XL, and BASIC XE are all supersets of Atari BASIC. +If you wrote a program using one of them, but didn't use any of the +extra commands or functions, the result is still an Atari BASIC program. + +There are two types of BASIC XE programs: regular and *EXTEND*\ed. The +extended type is detected 100% reliably, because the first byte of the +file changes from **$00** to **$DD**. Non-extended programs are only +identified as BASIC XE if they use any of the extra commands BASIC XE +adds to those found in BASIC XL. + +Atari BASIC programs can be detected 100% reliably. + +Detection of Turbo vs. BXL/BXE isn't 100% reliable, and probably +never will be. There's too much overlap between the sets of extra +tokens added by each. Programs that don't use very many of the extra +functions provided by Turbo/BXL/BXE may show up as "Not Atari BASIC; +probably either Turbo or BXL/BXE". + +Atari Microsoft BASIC is detected by checking that the first two +bytes of the file are not zero, and that the last 3 are zero. This +may result in false positives (files that aren't BASIC programs at +all might show up as Microsoft). Also, no distinction is made between +Atari MS BASIC 1.0 and 2.0. + +Various non-BASIC files are detected (including Mac/65 source, +ELF binaries, etc) as a convenience, but I wouldn't rely on +**whichbas**\'s non-BASIC file type detection if I were you. + +LIMITATIONS +=========== +Currently, **whichbas** doesn't look at the variable name or type +tables. One problem caused by this: If a program uses only Atari BASIC +tokens, but uses variable(s) with _ in the name, it will be identified +as Atari BASIC... even though _ in variable names is illegal in Atari +BASIC and pretty much guarantees the program is Turbo/BXL/BXE. + +Looking at the variable types could also improve detection, since +Turbo and BXL/BXE support extended variable types. + +**whichbas** knows nothing about other BASICs such as Frost BASIC, +BASIC/A+, Altirra BASIC... + +OPTIONS +======= + +.. include:: genopts.rst + +EXIT STATUS +=========== + +0 for success, 1 for failure. + +.. include:: manftr.rst -- cgit v1.2.3