aboutsummaryrefslogtreecommitdiff
path: root/whichbas.c
diff options
context:
space:
mode:
Diffstat (limited to 'whichbas.c')
-rw-r--r--whichbas.c1255
1 files changed, 1255 insertions, 0 deletions
diff --git a/whichbas.c b/whichbas.c
new file mode 100644
index 0000000..9f7f6db
--- /dev/null
+++ b/whichbas.c
@@ -0,0 +1,1255 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <time.h>
+#include <sys/wait.h>
+
+#include "bas.h"
+#include "whichbas.h"
+
+#define BT_INVALID 0
+#define BT_ATARI 1
+#define BT_TURBO 2
+#define BT_BXL 4
+#define BT_BXE 8
+
+#define BT_BXL_BXE (BT_BXL | BT_BXE)
+
+int bas_type = 0x0f; /* start out with all enabled */
+
+int script_mode = 0; /* -s flag */
+int script_ret; /* -s mode, exit with one of SRET_* from whichbas.h as status */
+
+int keep_going = 0; /* -k flag */
+
+int comma_count; /* count of regular commas (not string/array) in statement */
+unsigned char last_cmd;
+unsigned char last_op_tok;
+unsigned short last_cmd_pos;
+
+void print_help(void) {
+ printf("Usage: %s [-v] [-h] [-k] [-s] <inputfile> ...\n", self);
+}
+
+int child_errs = 0;
+
+/* return true if the child ran and returned 0 (success) */
+int spawn_child(const char **args) {
+ pid_t pid, status;
+ int wstatus;
+
+ pid = fork();
+ if(pid == -1) {
+ perror("fork()");
+ die("Can't spawn child process");
+ } else if(pid) {
+ /* we are the parent */
+ status = waitpid(pid, &wstatus, 0);
+ if(status < 0) {
+ perror("waitpid()");
+ die("Child process went south");
+ }
+ if(! (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) ) {
+ child_errs++;
+ return 0;
+ }
+ } else {
+ /* we are the child */
+ if(execv(args[0], (char * const *)args) < 0) {
+ perror("exec()");
+ exit(1);
+ }
+ }
+
+ return 1;
+}
+
+/* this is not the ideal way to handle multiple files. it forks a new
+ process for each one. however, I embedded a bunch of die() calls
+ in bas.c, thinking I'd never use some of the functions more than once
+ in the same run. what I get for trying to KISS... */
+void multiple_files(const char *argv0, char **list) {
+ const char *args[5]; /* 5 because eventually we include -k and/or -v */
+ int kidstat;
+
+ /* this isn't needed for things to work, but if I write buggy code,
+ it avoids a forkbomb. */
+ setenv("WHICHBAS_MULTI_PARANOIA", "1", 1);
+
+ args[0] = argv0;
+ args[2] = NULL;
+
+ while(*list) {
+ args[1] = *list;
+ printf("%s:\t", *list);
+ fflush(stdout);
+ kidstat = spawn_child(args);
+ if(!kidstat) puts("(detection failed)");
+ fflush(stdout);
+ list++;
+ }
+
+ if(child_errs) {
+ fprintf(stderr, "%s: exiting with error status because some files failed.\n", self);
+ exit(1);
+ } else {
+ exit(0);
+ }
+}
+
+void parse_args(int argc, char **argv) {
+ int opt;
+
+ while( (opt = getopt(argc, argv, "vksh")) != -1) {
+ switch(opt) {
+ case 'v': verbose = 1; break;
+ case 'k': keep_going = verbose = 1; break;
+ case 's': script_mode = 1; break;
+ case 'h': print_help(); exit(0);
+ default:
+ print_help();
+ exit(1);
+ }
+ }
+
+ if(optind >= argc)
+ die("No input file given (and stdin not supported).");
+
+ if(optind == argc - 1) { /* got one filename only */
+ open_input(argv[optind]);
+ if(input_file == stdin)
+ die("Reading from standard input not supported.");
+ } else { /* got multiple filenames */
+ if(keep_going || verbose || script_mode)
+ die("-k, -v, -s not supported with multiple filenames (yet?)");
+ if(getenv("WHICHBAS_MULTI_PARANOIA"))
+ die("BUG: multiple_files() recursion!");
+ multiple_files(argv[0], argv + optind);
+ }
+}
+
+/* don't need this.
+void add_type(int type) {
+ bas_type |= type;
+}
+*/
+
+void print_result(void) {
+ const char *name;
+
+ if(verbose) fprintf(stderr, " final bas_type %02x\n", bas_type);
+
+ if(bas_type == BT_INVALID) {
+ name = "Unknown variant: Not Atari BASIC, Turbo, BXL, or BXE";
+ script_ret = SRET_UKNOWN_DERIV;
+ } else if(bas_type & BT_ATARI) {
+ name = "Atari BASIC";
+ script_ret = SRET_ATARI;
+ } else if(bas_type & BT_TURBO) {
+ if(bas_type & BT_BXL) {
+ name = "Either Turbo BASIC XL or OSS BASIC XL";
+ script_ret = SRET_TURBO_OR_BXL;
+ } else if(bas_type & BT_BXE) {
+ name = "Either Turbo BASIC XL or OSS BASIC XE";
+ script_ret = SRET_TURBO_OR_BXE;
+ } else { /* bas_type == BT_TURBO */
+ name = "Turbo BASIC XL";
+ script_ret = SRET_TURBO;
+ }
+ } else if(bas_type == BT_BXL || bas_type == (BT_BXL | BT_BXE)) {
+ name = "OSS BASIC XL";
+ script_ret = SRET_BXL;
+ } else if(bas_type == BT_BXE) {
+ name = "OSS BASIC XE";
+ script_ret = SRET_BXE;
+ } else {
+ /* this one should never happen: */
+ name = "Either Turbo BASIC XL, OSS BASIC XL, or OSS BASIC XE";
+ script_ret = SRET_TURBO_OR_BXL_OR_BXE;
+ }
+
+ if(script_mode) {
+ exit(script_ret);
+ } else {
+ puts(name);
+ exit(0);
+ }
+}
+
+/* return true if a token is numeric constant
+ (including TB/BXE/BXL hex) */
+int is_numconst_op(unsigned char tok) {
+ switch(tok) {
+ case OP_NUMCONST:
+ case OP_HEXCONST:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/* return true if a token is a function that *returns*
+ a numeric value (says nothing about the argument types,
+ though!) */
+int is_numeric_func(unsigned char tok) {
+ switch(tok) {
+ case OP_FUNC_USR:
+ case OP_FUNC_ASC:
+ case OP_FUNC_VAL:
+ case OP_FUNC_LEN:
+ case OP_FUNC_ADR:
+ case OP_FUNC_ATN:
+ case OP_FUNC_COS:
+ case OP_FUNC_PEEK:
+ case OP_FUNC_SIN:
+ case OP_FUNC_RND:
+ case OP_FUNC_FRE:
+ case OP_FUNC_EXP:
+ case OP_FUNC_LOG:
+ case OP_FUNC_CLOG:
+ case OP_FUNC_SQR:
+ case OP_FUNC_SGN:
+ case OP_FUNC_ABS:
+ case OP_FUNC_INT:
+ case OP_FUNC_PADDLE:
+ case OP_FUNC_STICK:
+ case OP_FUNC_PTRIG:
+ case OP_FUNC_STRIG:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/* return true if a token is an arithmetic operator */
+int is_arith_op(unsigned char tok) {
+ switch(tok) {
+ case OP_UPLUS: /* not sure these two... */
+ case OP_UMINUS: /* ...really belong here */
+ case OP_NUM_LE:
+ case OP_NUM_NE:
+ case OP_NUM_GE:
+ case OP_NUM_LT:
+ case OP_NUM_GT:
+ case OP_NUM_EQ:
+ case OP_POWER:
+ case OP_MULT:
+ case OP_PLUS:
+ case OP_MINUS:
+ case OP_DIVIDE:
+ case OP_NOT:
+ case OP_OR:
+ case OP_AND:
+ case OP_NUM_ASSIGN:
+ case OP_GRP_LPAR: /* yes, this belongs here, (((A$))) is a syntax error! */
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+int is_numeric_var(unsigned char tok) {
+ int vartype;
+
+ if(tok < 0x80)
+ return 0;
+
+ vartype = get_vartype(tok);
+ return (vartype == TYPE_SCALAR || vartype == TYPE_ARRAY);
+}
+
+/* return true if a token is:
+ - a numeric constant (including hex constants),
+ - a numeric variable (including arrays),
+ - a function that returns a numeric (e.g. ASC(), SIN()).
+ for now, only standard Atari BASIC tokens are considered.
+ unary minus and plus make sense here, but binary ops don't.
+ */
+int is_numeric_rval(unsigned char tok) {
+ return
+ (tok == OP_UMINUS) ||
+ (tok == OP_UPLUS) ||
+ is_numconst_op (tok) ||
+ is_numeric_func (tok) ||
+ is_numeric_var (tok) ;
+}
+
+/* return true if a token is:
+ - a numeric constant (including hex constants),
+ - a numeric variable (including arrays),
+ - a math operator (plus, minus, etc),
+ - a function that returns a numeric (e.g. ASC(), SIN()).
+ for now, only standard Atari BASIC tokens are considered.
+ */
+int is_numeric_op(unsigned char tok) {
+ return
+ is_numeric_rval (tok) ||
+ is_arith_op (tok) ;
+}
+
+int is_string_var(unsigned char tok) {
+ return (tok >= 0x80 && (get_vartype(tok) == TYPE_STRING));
+}
+
+int is_string_const(unsigned char tok) {
+ return (tok == OP_STRCONST);
+}
+
+int is_string_exp_op(unsigned char tok) {
+ switch(tok) {
+ case OP_STR_ASSIGN:
+ case OP_STR_LE:
+ case OP_STR_NE:
+ case OP_STR_GE:
+ case OP_STR_LT:
+ case OP_STR_GT:
+ case OP_STR_EQ:
+ case OP_STR_LPAR: /* the ( in: A$(1)="A" */
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+int is_string_func(unsigned char tok) {
+ switch(tok) {
+ case OP_FUNC_STR:
+ case OP_FUNC_CHR:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/* return true if a token is:
+ - a string constant,
+ - a string variable,
+ - a function that returns a string.
+*/
+int is_string_rval(unsigned char tok) {
+ return
+ is_string_const (tok) ||
+ is_string_func (tok) ||
+ is_string_var (tok) ;
+}
+
+/* return true if a token is:
+ - a string constant,
+ - a string variable,
+ - a string expression operator, like OP_STR_LE,
+ - a function that returns a string.
+*/
+int is_string_op(unsigned char tok) {
+ return
+ is_string_rval (tok) ||
+ is_string_exp_op (tok) ;
+}
+
+/* true if an operator token is a string function in BASIC XL (or XE).
+ these tokens are all numeric functions in Turbo, so be sure you
+ know what you're doing! */
+int is_bxl_string_func(unsigned char tok) {
+ switch(tok) {
+ case 0x5c: /* BXL HEX$, Turbo DEC */
+ case 0x66: /* BXL LEFT$, Turbo %0 */
+ case 0x67: /* BXL RIGHT$, Turbo %1 */
+ case 0x68: /* BXL MID$, Turbo %2 */
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+void remove_type(int type) {
+ bas_type &= ((~type) & 0x0f);
+
+ if(keep_going) return;
+
+ /* without -k, stop if it gets narrowed down to one of these 4. */
+ if(bas_type == BT_ATARI || bas_type == BT_TURBO || bas_type == BT_BXE || bas_type == BT_BXL)
+ print_result();
+}
+
+void set_type(int type) {
+ bas_type = type;
+ if(!keep_going) print_result();
+}
+
+CALLBACK(handle_cmd) {
+ int has_args = 0, has_var_arg = 0, vartype = -1;
+ unsigned char nexttok;
+
+ last_cmd = tok;
+ last_cmd_pos = pos;
+ comma_count = 0;
+
+ if(verbose) fprintf(stderr, "handle_cmd: lineno %d, tok $%02x, bas_type was %02x\n", lineno, tok, bas_type);
+
+ nexttok = program[pos + 1];
+ has_args = !(nexttok == OP_EOS || nexttok == OP_EOL);
+ if(nexttok >= 0x80) {
+ has_var_arg = 1;
+ vartype = get_vartype(nexttok);
+ }
+
+ /* this switch is for tokens that are the same in Atari/Turbo/BXL/BXE, but with
+ different semantics. non-Atari-BASIC tokens go in the switch below, not
+ this one. */
+ switch(tok) {
+ /* TB uses the same token for CLOSE as Atari and BXL/BXE, but it allows
+ it to have no argument (meaning, close all IOCBs). SOUND is the same
+ (no args = silence all POKEY channels). */
+ case CMD_CLOSE:
+ case CMD_SOUND:
+ if(!has_args) {
+ set_type(BT_TURBO);
+ }
+ break;
+ case CMD_INPUT:
+ /* TB, BXL, BXE all support INPUT "Prompt",VAR with the same tokenized
+ form. Atari BASIC doesn't allow string constants in INPUT args. */
+ if(has_args) {
+ if(nexttok == OP_STRCONST) {
+ int pos_after_string;
+ remove_type(BT_ATARI);
+ /* TB only: INPUT "Prompt";VAR is also supported (not in BXL/BXE) */
+ pos_after_string = pos + 3 + program[pos + 2];
+ if(verbose)
+ fprintf(stderr,
+ "===> INPUT with string prompt at line %d, "
+ "pos %04x, pos_after_string %04x (token %02x)\n",
+ lineno, pos, pos_after_string, program[pos_after_string]);
+ if(program[pos_after_string] == OP_SEMICOLON) {
+ set_type(BT_TURBO);
+ }
+ }
+ } else { /* has_args is false, oh shit! */
+ fprintf(stderr, "*** INPUT without variable at line %d.\n*** Rev A BASIC bug, program will crash, better fix it!\n", lineno);
+ set_type(BT_ATARI);
+ }
+ break;
+ case CMD_GET:
+ case CMD_PUT:
+ /* TB uses the same tokens for GET and PUT as Atari/BXL/BXE, but it allows
+ the argument to be a variable without a # in front of it. */
+ if(nexttok != OP_HASH) {
+ set_type(BT_TURBO);
+ }
+ /* PARTIAL: we really should detect GET #1,A$. this is Turbo-only, but
+ probably nobody ever uses it because it doesn't seem to *work*,
+ at least not in TB 1.5. A$ always ends up empty with length 0. */
+ break;
+ case CMD_RESTORE:
+ case CMD_TRAP:
+ /* TB allows RESTORE #LABEL and TRAP #LABEL */
+ if(nexttok == OP_HASH) {
+ set_type(BT_TURBO);
+ }
+ break;
+ default: break;
+ }
+
+ if(tok <= CMD_ERROR) return; /* legal in BASIC, ignore */
+ remove_type(BT_ATARI);
+ if(tok >= 0x59) remove_type(BT_BXL);
+
+ if(tok >= 0x65) {
+ fprintf(stderr, "handle_cmd: invalid command %02x at line %d\n", tok, lineno);
+ keep_going = 0;
+ set_type(BT_INVALID);
+ }
+
+ /* we have tokens 0x3a to 0x68 in both TB and BXE, or 47 of them.
+
+ Some tokens can't be determined, because they take the
+ same argument (or lack of) in both Turbo and BXL/XE. These
+ are:
+
+ 0x3c: REPEAT or ELSE (no args either way)
+ 0x46: LOOP or CP (no args either way)
+ 0x49: LOCK or UNPROTECT (take the same args)
+ 0x4B: RENAME in both Turbo and BXL/XE (same token, same args)
+
+ 4 of them, this leaves 43 we can check.
+
+ Covered so far: 41 (95%).
+ However, some of these are marked PARTIAL because they're not detected
+ under all circumstances.
+
+ Unchecked tokens:
+
+ 0x5B: BRUN or CALL (both take a string, CALL allows "USING" though)
+ This isn't really important, as CALL requires a PROCEDURE to
+ exist, and we *do* catch the PROCEDURE token.
+ 0x5F: PAINT (req 2 args) or NUM (optional 2 args).
+ Again, not important, because it's highly unlikely any BXL/BXE
+ program will contain NUM... because when it executes, it stops the
+ program and goes back to the READY prompt (in auto-numbering mode).
+ */
+ switch(tok) {
+ case 0x39: /* MOVE <args> or ENDWHILE */
+ case 0x3a: /* -MOVE <args> or TRACEOFF */
+ case 0x3d: /* UNTIL <args> or ENDIF */
+ case 0x56: /* DEL <args> or FAST */
+ case 0x62: /* CIRCLE (3 or 4 num args) or NORMAL (no args) */
+ /* COMPLETE */
+ if(has_args) {
+ remove_type(BT_BXL_BXE);
+ } else {
+ remove_type(BT_TURBO);
+ }
+ break;
+
+ case 0x58: /* TRACE (optional + or -), EXTEND (BXE; no args) */
+ /* COMPLETE */
+ /* In BXL, this looks to be an extra END token, that behaves the same
+ as the regular one, but can't be entered in the editor. Assume
+ no BXL program contains this token. */
+ /* In BXE, EXTEND can't actually appear in a program (it's direct
+ mode only). The only way to get EXTEND into a BXE program is
+ to do a direct mode command like:
+ EXTEND:SAVE "D:PROG"
+ ...which of course puts it at line 32768. But this code will
+ never see that, because we'd already detect EXTENDed BXE
+ based on the first 2 bytes of the file.
+ */
+ /* So, if we see this token, it *has* to be Turbo's TRACE, whether
+ or not it has an argument. */
+ set_type(BT_TURBO);
+ break;
+
+ case 0x59: /* TEXT or PROCEDURE */
+ /* COMPLETE */
+ /* Turbo: TEXT (1st arg is number),
+ BXL: invalid token,
+ BXE: PROCEDURE (arg is string const (not var!)) */
+ if(nexttok == OP_STRCONST) {
+ /* this token doesn't seem to be valid in BXL at all */
+ set_type(BT_BXE);
+ } else {
+ remove_type(BT_BXL_BXE);
+ }
+ break;
+
+ case 0x3f: /* WEND or LOMEM <args> */
+ case 0x40: /* ELSE or DEL <args> */
+ case 0x41: /* ENDIF or RPUT <args> */
+ case 0x45: /* DO or TAB <args> */
+ case 0x47: /* EXIT or ERASE <args> */
+ case 0x51: /* ENDPROC or PMMOVE <args> */
+ /* COMPLETE */
+ if(has_args) {
+ remove_type(BT_TURBO);
+ } else {
+ remove_type(BT_BXL_BXE);
+ }
+ break;
+
+ case 0x48: /* DIR (optional arg) or PROTECT (req'd arg) */
+ /* PARTIAL: without args means TB, but with arg,
+ it could be either */
+ if(!has_args) {
+ remove_type(BT_BXL_BXE);
+ }
+ break;
+
+ case 0x4a: /* UNLOCK (req'd arg) or DIR (optional arg) */
+ /* PARTIAL: without args means BXL/BXE, but with arg,
+ it could be either */
+ if(!has_args) {
+ remove_type(BT_TURBO);
+ }
+ break;
+
+ case 0x3b: /* *F (optional + or -), TRACE (no arg) */
+ case 0x5e: /* *B (optional + or -) or EXIT (no arg) */
+ /* PARTIAL: doesn't catch *F or *B by itself with no +/- */
+ if(has_args) {
+ remove_type(BT_BXL_BXE);
+ }
+ break;
+
+ case 0x44: /* FILLTO or BGET (check for a # after the token) */
+ /* COMPLETE */
+ if(nexttok == OP_HASH) {
+ remove_type(BT_TURBO);
+ } else {
+ remove_type(BT_BXL_BXE);
+ }
+ break;
+
+ case 0x4e: /* TIME$= (1 string arg) or PMCLR (1 num arg) */
+ /* PARTIAL: but almost complete. nothing happens if it's
+ TIME$= with a string function (probably rare) or PMCLR
+ with a complex expression. */
+ if(nexttok == OP_STRCONST) {
+ remove_type(BT_BXL_BXE);
+ } else if(has_var_arg && vartype == TYPE_STRING) {
+ remove_type(BT_BXL_BXE);
+ } else if(nexttok == OP_NUMCONST) {
+ remove_type(BT_TURBO);
+ } else if(has_var_arg && vartype == TYPE_SCALAR) {
+ remove_type(BT_TURBO);
+ }
+ break;
+
+ case 0x50: /* EXEC (1 arg, *must* be variable) or PMGRAPHICS (1 num arg, may be const) */
+ /* PARTIAL: PMGRAPHICS VAR won't be detected. but this usage is rare. */
+ /* This check is actually redundant, because EXEC requires Turbo's
+ label type (high bits in var name table both set to 1), which we already
+ detected in check_variables(). */
+ if(!has_var_arg) {
+ remove_type(BT_TURBO);
+ }
+ break;
+
+ case 0x54: /* -- in TB, LVAR in BXL/BXE */
+ /* COMPLETE */
+ /* We can tell these apart because:
+ 1. TB gives us a next-statement offset of 5 if -- is the first (or
+ actually only) statement on a line. Normally, the minimum offset
+ is 6, but there's no OP_EOL after this token for some reason.
+ 2. If -- is the 2nd or or later statement on a line (after a colon)
+ it *does* get a statement terminator, but it's 0x9b (ATASCII EOL,
+ like a REM or DATA gets).
+ Note that it's impossible to put more statements *after* the --,
+ they just get ignored if you type them. This doesn't help us
+ here, but it's interesting anyway.
+ Also, the -- is what you type to enter it into the program, but
+ it get LISTed as a line of 30 dashes.
+ The explanation is a lot longer than the code... */
+ if(program[pos - 1] == 0x05 || nexttok == 0x9b) {
+ set_type(BT_TURBO);
+ } else {
+ remove_type(BT_TURBO);
+ }
+ break;
+
+ case 0x57: /* DUMP (1 optional string arg) or LOCAL (1 *numeric* variable arg) */
+ /* BXL/BXE's LOCAL only works on scalars, not arrays or strings. so if there's
+ no arg, or one string arg... */
+ /* PARTIAL: almost complete, doesn't handle DUMP func$(arg), which I
+ doubt anyone uses anyway. */
+ if(!has_args) {
+ /* only Turbo allows no arg... */
+ remove_type(BT_BXL_BXE);
+ } else if(nexttok == OP_STRCONST) {
+ /* only Turbo allows a string constant arg... */
+ remove_type(BT_BXL_BXE);
+ } else if(has_var_arg && vartype == TYPE_STRING) {
+ /* only Turbo allows a string variable arg... */
+ remove_type(BT_BXL_BXE);
+ } else if(has_var_arg && vartype == TYPE_SCALAR) {
+ /* only BXL/BXL allows a scalar variable arg */
+ remove_type(BT_TURBO);
+ }
+ break;
+
+ case 0x5a: /* TB: BLOAD; BXL: extension mechanism; BXE: invalid. */
+ /* COMPLETE */
+ /* This is the token used for the BXL EXTEND.COM added commands,
+ from the Toolkit disk. It's followed by a byte ranging 0x10
+ to 0x15 that specifies which extended command, e.g. 0x5a 0x11 means
+ EXIT, 0x5a 0x12 is PROCEDURE, 0x5a 0x13 is CALL. Although
+ these look like BXE's extra commands, they aren't the same tokens,
+ and BXE will choke on them (RUN causes "Error- 33", LIST causes
+ a lockup). */
+ if(nexttok >= 0x10 && nexttok <= 0x15) {
+ /* worth mentioning to the user... */
+ fprintf(stderr, "Note: program requires EXTEND.COM from BASIC XL Toolkit disk.\n");
+ set_type(BT_BXL);
+ } else {
+ /* it's BLOAD if followed by e.g. OP_STRCONST or a variable */
+ set_type(BT_TURBO);
+ }
+ break;
+
+ case 0x5c: /* GO# (1 arg only) or SORTUP (optional 2nd arg of USING, but no comma) */
+ case 0x5d: /* # (1 arg only) or SORTDOWN (optional 2nd arg of USING, but no comma) */
+ /* COMPLETE but no longer needed (check_variables() already found the
+ 11xxxxxx variables) */
+ /* Turbo BASIC labels have the high 2 bits set to 11, which is illegal
+ in Atari/BXL/BXE. */
+ if(vartype == 3) {
+ remove_type(BT_BXL_BXE);
+ } else {
+ remove_type(BT_TURBO);
+ }
+ break;
+
+ case 0x60: /* CLS (optional IOCB with #) or HITCLR (no args) */
+ /* PARTIAL: without args, can't tell them apart. */
+ /* I doubt CLS #IOCB is actually used in many Turbo BASIC
+ programs, because it's broken (at least in Turbo 1.5).
+ It's supposed to only clear the screen of output that
+ happened after the OPEN #IOCB, but it really clears the
+ whole screen. */
+ if(nexttok == OP_HASH) {
+ remove_type(BT_BXL_BXE);
+ }
+ break;
+
+ default: break;
+ }
+ if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type);
+}
+
+CALLBACK(handle_op) {
+ unsigned char nexttok = program[pos + 1];
+ unsigned char nexttok2 = program[pos + 2];
+
+ if(tok == OP_COMMA) comma_count++;
+
+ if(verbose) fprintf(stderr, "handle_op: lineno %d, tok $%02x, comma_count %d, bas_type was %02x\n", lineno, tok, comma_count, bas_type);
+
+ if(tok == 0x00) {
+ /* Turbo allows 256 variables, tokenizes the first 128 normally ($80-$FF).
+ The extra ones above $FF are tokenized as $00, varnum - $80. None of
+ our other BASICs uses $00 as an operator token, so.. */
+ set_type(BT_TURBO);
+ }
+
+ /* attempt to detect BXL/BXE DIM for 2D string arrays.
+ DIM A$(10,10) is illegal in Atari/Turbo.
+ PARTIAL: this only works if the first dimension is either a
+ constant or a scalar variable (not an array element or an
+ expression). fortunately most programs use constants in DIM.
+ */
+ if(tok == OP_DIM_STR_LPAR) {
+ int str2d = 0;
+ if(nexttok >= 0x80 && nexttok2 == OP_ARR_COMMA) {
+ str2d = 1;
+ } else if(nexttok == OP_NUMCONST || nexttok == OP_HEXCONST) {
+ str2d = (program[pos + 8] == OP_ARR_COMMA);
+ }
+ if(str2d) {
+ if(verbose)
+ fprintf(stderr, "===> found 2d string array at line %d\n", lineno);
+ remove_type(BT_ATARI | BT_TURBO);
+ }
+ }
+
+ /* BXL/BXE allows string concatenation in assignment with the comma,
+ A$="FOO","BAR" or A$=C$,D$. */
+ if(last_cmd == CMD_LET || last_cmd == CMD_ILET) {
+ if(program[last_cmd_pos + 2] == OP_STR_ASSIGN) {
+ if(tok == OP_COMMA) {
+ if(is_string_rval(nexttok)) {
+ remove_type(BT_ATARI | BT_TURBO);
+ }
+ }
+ }
+ }
+
+ if(tok == OP_HEXCONST) remove_type(BT_ATARI); /* hex const (turbo *and* bxl/xe) */
+ if(tok <= OP_FUNC_STRIG) {
+ if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type);
+ return; /* legal in BASIC, ignore */
+ }
+ remove_type(BT_ATARI);
+
+ /* only Turbo has op tokens numbered 0x69 and up. */
+ if(tok >= 0x69) {
+ set_type(BT_TURBO);
+ }
+
+ if(tok >= 0x6E) {
+ fprintf(stderr, "handle_op: invalid operator %02x at line %d\n", tok, lineno);
+ keep_going = 0;
+ set_type(BT_INVALID);
+ }
+
+ /* There are 25 extra operators in Turbo, and 20 of them are shared with
+ BXL/BXE. Of the 20, 4 of them are undecidable, and the rest are
+ covered here, which means 80% coverage of the shared ops.
+ Undecidables are:
+ 0x56 & (logical AND) or % (XOR), both infix numeric ops; can't tell apart
+ 0x57 ! (logical OR) in both Turbo and BXL/BXE, can't tell apart
+ 0x64 RAND (func, 1 num arg) or TAB (func, 1 num arg), can't tell apart
+ 0x65 TRUNC (func, 1 num arg) or PEN (func, 1 num arg), can't tell apart
+ */
+ switch(tok) {
+ case 0x55: /* DPEEK (function) TB, USING (infix, not a function) in BXL/BXE */
+ case 0x58: /* INSTR (function) or & (infix numeric) in BXE. */
+ case 0x5b: /* HEX$ (func, takes 1 num arg) or FIND( (pseudo-func, 3 args */
+ /* COMPLETE */
+ if(nexttok == OP_FUNC_LPAR) {
+ remove_type(BT_BXL_BXE);
+ } else {
+ remove_type(BT_TURBO);
+ }
+ break;
+
+ case 0x59: /* INKEY$ (0 arg pseudo-func) in TB, string array separator semicolon in BXL/BXE */
+ /* PARTIAL: ...but pretty good. we *can't* check nexttok == OP_GRP_RPAR, because
+ VAL(INKEY$) or ASC(INKEY$) are legit Turbo code. */
+ if(nexttok == OP_EOS || nexttok == OP_EOL) {
+ /* the semicolon can't be the last token on the line (needs at least
+ a right-paren), but INKEY$ can. */
+ remove_type(BT_BXL_BXE);
+ } else if(pos == last_cmd_pos + 1) {
+ /* INKEY$ can be the first operator after the command, e.g if the command
+ is IF. The semicolon cannot. */
+ remove_type(BT_BXL_BXE);
+ } else if(is_string_exp_op(last_op_tok) || is_string_exp_op(nexttok)) {
+ /* A$=INKEY$, IF INKEY$=A$, A$(LEN(A$)+1)=INKEY$, INKEY$<>"A"... */
+ remove_type(BT_BXL_BXE);
+ } else if(is_numeric_op(last_op_tok) || is_numeric_op(nexttok)) {
+ remove_type(BT_TURBO);
+ }
+ break;
+
+ case 0x5a: /* EXOR (infix num op) or BUMP( (pseudo-function, no OP_FUNC_LPAR) */
+ case 0x5d: /* DIV (infix num op) or RANDOM( (pseudo-func, 1 or 2 num args) */
+ /* COMPLETE (I think, anyway) */
+ if(last_op_tok == OP_GRP_RPAR || last_op_tok == OP_NUMCONST || last_op_tok == OP_HEXCONST || last_op_tok >= 0x80) {
+ /* if the last token was a variable or a numeric, or a right paren,
+ this is infix (can't be a function, last token would have to have
+ been a command or a math/etc operator). */
+ remove_type(BT_BXL_BXE);
+ } else {
+ remove_type(BT_TURBO);
+ }
+ break;
+
+ case 0x5c: /* DEC (function, takes str) in TB, HEX$ (function, takes num) in BXL/BXE */
+ /* COMPLETE */
+ if(is_string_rval(nexttok2)) {
+ remove_type(BT_BXL_BXE);
+ } else if(is_numeric_op(nexttok2)) {
+ remove_type(BT_TURBO);
+ }
+ break;
+
+ case 0x5e: /* FRAC (num func, 1 arg) or DPEEK (num func, 1 arg) in BXL...
+ however BXE has an optional 2nd arg. */
+ {
+ /* PARTIAL: This detects the 2nd arg for simple cases where the
+ 1st arg is a constant or a numeric variable, but not if the
+ 1st arg is an expression or an array element. */
+ int has2 = 0;
+ if(nexttok2 == OP_NUMCONST || nexttok2 == OP_HEXCONST) {
+ if(program[pos + 9] == OP_ARR_COMMA)
+ has2 = 1;
+ } else if(nexttok2 >= 0x80 && program[pos + 3] == OP_ARR_COMMA) {
+ has2 = (get_vartype(nexttok2) == TYPE_SCALAR);
+ }
+ if(has2) {
+ set_type(BT_BXE);
+ }
+ }
+ break;
+
+ case 0x5f: /* TIME$ in TB, SYS (function) in BXL/BXE */
+ case 0x60: /* TIME in TB, VSTICK (function) in BXL/BXE */
+ case 0x61: /* MOD (infix op) in TB, HSTICK (function) in BXL/BXE */
+ case 0x62: /* EXEC (infix op, with ON) in TB, PMADR (function) in BXL/BXE */
+ /* COMPLETE */
+ if(nexttok == OP_FUNC_LPAR) {
+ remove_type(BT_TURBO);
+ } else {
+ remove_type(BT_BXL_BXE);
+ }
+ break;
+
+ case 0x63: /* RND (pseudo-func, no arg) or ERR (func, 1 num arg) */
+ /* COMPLETE */
+ if(nexttok != OP_FUNC_LPAR) {
+ set_type(BT_TURBO);
+ }
+ break;
+
+ case 0x66: /* %0 in TB, LEFT$( (pseudo-func, takes string) in BXL/BXE */
+ case 0x67: /* %1 in TB, RIGHT$( (pseudo-func, takes string) in BXL/BXE */
+ case 0x68: /* %2 in TB, MID$( (pseudo-func, takes string) in BXL/BXE */
+ /* COMPLETE */
+ /* LEFT$/RIGHT$/MID$ do NOT get OP_FUNC_LPAR (the "(" is part of the
+ token name). They're always followed by a string operator... and
+ it works out that none of the tokens for BXL-only string funcs
+ are allowed to follow %0 %1 %2 in Turbo. */
+ if(is_string_op(nexttok) || is_bxl_string_func(nexttok)) {
+ remove_type(BT_TURBO);
+ } else {
+ remove_type(BT_BXL_BXE);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ last_op_tok = tok;
+ if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type);
+}
+
+/* we can count commas, because both Turbo and BXE/BXL use the "array" comma
+ to separate function arguments, not the "regular" comma. */
+CALLBACK(handle_end_stmt) {
+ if(verbose) fprintf(stderr, "handle_end_stmt: lineno %d, tok $%02x, last_cmd $%02x, comma_count %d, bas_type was %02x\n", lineno, tok, last_cmd, comma_count, bas_type);
+ switch(last_cmd) {
+ case 0x38: /* DPOKE (2 args) or WHILE (1 arg) */
+ if(comma_count) {
+ remove_type(BT_BXL_BXE);
+ } else {
+ remove_type(BT_TURBO);
+ }
+ break;
+
+ case 0x3e: /* WHILE (1 arg) or DPOKE (2 or 3 args) */
+ case 0x4c: /* DELETE (1 arg) or MOVE (3 or 4 args) */
+ case 0x4d: /* PAUSE (1 arg) or MISSILE (3 args) */
+ case 0x52: /* FCOLOR (1 arg) or PMWIDTH (2 args) */
+ case 0x53: /* *L (optional + or - only) or SET (req 2 num args) */
+ case 0x4f: /* PROC (1 arg) or PMCOLOR (3 args) */
+ if(comma_count) { /* 1 arg means no commas */
+ remove_type(BT_TURBO);
+ } else {
+ remove_type(BT_BXL_BXE);
+ }
+ break;
+
+ case 0x42: /* BPUT or RGET */
+ /* PARTIAL:
+ Turbo BGET always takes 3 args, BXL/BXE RGET takes 2 or more.
+ We can at least rule out Turbo if there aren't exactly 3 args. */
+ if(comma_count != 2) {
+ remove_type(BT_TURBO);
+ }
+ break;
+
+ case 0x43: /* BGET or BPUT */
+ /* PARTIAL:
+ Turbo BGET and BPUT always take 3 args. So does BXL BPUT.
+ BXE BPUT takes 3 args and an optional 4th. */
+ if(comma_count != 2) {
+ set_type(BT_BXE);
+ }
+ break;
+
+ case 0x55: /* RENUM in both (TB req 3 args, BXL up to two) */
+ if(comma_count == 2) {
+ remove_type(BT_BXL_BXE);
+ } else {
+ remove_type(BT_TURBO);
+ }
+ break;
+
+ case 0x61: /* DSOUND (0 or 4 num args) or INVERSE (no args) */
+ /* PARTIAL: can't tell no-argument DSOUND from INVERSE. */
+ if(comma_count) {
+ remove_type(BT_BXL_BXE);
+ }
+ break;
+
+ case 0x63: /* %PUT (usually seen with optional #; 1 or 2 args) or BLOAD (1 string arg) */
+ if(comma_count) {
+ /* multiple args */
+ remove_type(BT_BXL_BXE);
+ } else if(program[last_cmd_pos + 1] == OP_STRCONST) {
+ /* one arg, string const. XXX: check var type */
+ remove_type(BT_TURBO);
+ }
+ break;
+
+ case 0x64: /* %GET (usually seen with optional #; 1 or 2 args) or BSAVE (3 args) */
+ if(comma_count == 2) {
+ remove_type(BT_TURBO);
+ } else {
+ remove_type(BT_BXL_BXE);
+ }
+ break;
+
+ default: break;
+ }
+ if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type);
+
+ last_cmd = last_op_tok = 0;
+}
+
+/* return true if input_file is Atari MS BASIC.
+ AMSB files begin with a 3-byte header: 0x00, then 2 byte length
+ (LSB/MSB), which is actually 3 bytes less than the full length of
+ the file (or, it's the length of the file minus the 3-byte header).
+ Also, the files always end with 3 0x00 bytes.
+ We check that the header length is 3 bytes less than the file length,
+ then check for the 3 0x00's at the end.
+ */
+int detect_amsb(void) {
+ int len, c;
+
+ if(verbose) fprintf(stderr, "entering detect_amsb()\n");
+
+ rewind(input_file);
+ c = fgetc(input_file);
+ if(c) return 0;
+ c = fgetc(input_file);
+ if(c == EOF) return 0;
+ len = (fgetc(input_file) << 8) | c;
+
+ if(verbose) fprintf(stderr, "detect_amsb() header len==%d (file size should be %d)\n", len, len + 3);
+
+ fseek(input_file, 0, SEEK_END);
+ c = ftell(input_file);
+ if(verbose) fprintf(stderr, "detect_amsb() file size %d\n", c);
+ if(len != (c - 3)) {
+ if(verbose) fprintf(stderr, "detect_amsb() wrong file size!\n");
+ return 0;
+ }
+
+ if(verbose) fprintf(stderr, "detect_amsb() file size is correct, checking for 3 nulls\n");
+ fseek(input_file, -3, SEEK_END);
+ if(fgetc(input_file)) return 0;
+ if(fgetc(input_file)) return 0;
+ if(fgetc(input_file)) return 0;
+
+ if(verbose) fprintf(stderr, "detect_amsb() found 3 nulls, return 1\n");
+
+ return 1;
+}
+
+void foreign(const char *name, int srval) {
+ if(input_file) fclose(input_file);
+ if(script_mode) {
+ exit(srval);
+ } else {
+ puts(name);
+ exit(0);
+ }
+}
+
+void detect_foreign(void) {
+ int c, d;
+
+ c = fgetc(input_file);
+ d = fgetc(input_file);
+
+ if(c == EOF || d == EOF)
+ die("File is too short to be a BASIC program of any kind.");
+
+ if(c == 0 && d == 0) {
+ /* This is why we can't read from stdin. */
+ rewind(input_file);
+ return;
+ }
+
+ if(c == 0xfb && d == 0xc2)
+ foreign("Compiled Turbo BASIC XL", SRET_COMPILED_TURBO);
+
+ if(c == 0xff && d == 0xff)
+ foreign("XEX executable (not BASIC at all!)", SRET_NOT_BASIC);
+
+ if(c == 0xfe && d == 0xfe)
+ foreign("Mac/65 tokenized source (not BASIC at all!)", SRET_NOT_BASIC);
+
+ if(c == 0xdd && d == 0x00)
+ foreign("EXTENDed OSS BASIC XE", SRET_EXTENDED_BXE);
+
+ if(c == 0x7f && d == 'E') {
+ c = fgetc(input_file);
+ d = fgetc(input_file);
+ if(c == 'L' && d == 'F')
+ foreign("ELF executable (not BASIC at all!)", SRET_NOT_BASIC);
+ }
+
+ if(c == 0 && detect_amsb()) {
+ foreign("Atari Microsoft BASIC", SRET_AMSB);
+ }
+
+ if(isdigit(c) && (d == 0x20 || isdigit(d)))
+ foreign("Text file, could be LISTed BASIC (or not)", SRET_NOT_BASIC);
+
+ if(isprint(c) && isprint(d))
+ foreign("Text file (not BASIC at all!)", SRET_NOT_BASIC);
+
+ foreign("Unknown file type (not BASIC at all!)", SRET_NOT_BASIC);
+}
+
+void check_variables(void) {
+ int pos;
+
+ if(vntp == vntd) return;
+
+ /* Unlike Atari BASIC, Turbo variables can have _ in the names.
+ So can BASIC XE, though it's not documented in the BASIC XE
+ Reference Manual that I have.
+ BXL can't have _ in variable names. */
+ for(pos = vnstart; pos < vvstart; pos++) {
+ if((program[pos] & 0x7f) == '_') {
+ remove_type(BT_ATARI | BT_BXL);
+ }
+ }
+
+ /* Also, Turbo line labels (for PROC/EXEC and #/GO#) are variables
+ with a type that's illegal in Atari/BXL/BXE. */
+ for(pos = vvstart; pos < codestart; pos += 8) {
+ if((program[pos] & 0xc0) == 0xc0) {
+ set_type(BT_TURBO);
+ }
+ }
+
+ /* I was hoping to check for BXL/BXE string arrays here. However,
+ looking at a SAVEd file, they look identical to regular string
+ variables (variable type $80, rest of the VVTP entry all $00).
+ When the program's actually in memory, BXL/BXE sets the
+ variable type byte to $91 for DIMed string array var and
+ $81 (same as Atari/Turbo) for a regular DIMed string var.
+ Unfortunately in the SAVE file, it's always $80. */
+
+ /* Another thing that can't be detected: BXL/BXE's FAST mode changes
+ the program in memory (line number targets become addresses),
+ but programs don't get SAVEd this way: SAVE turns off FAST and
+ restores the program to its original state before writing it
+ to disk. Too bad. */
+}
+
+/* BASIC/A+ support is *very* simple. It's similar to BASIC XL (no
+ surprise there)... but unlike Turbo, BXL, and BXE, it's *not*
+ token-compatible with original Atari BASIC. Rather than add
+ their new tokens to the end of the lists, they're mixed in with
+ the others. So A+ can't even LOAD or RUN Atari BASIC files.
+ Appendix J of the BASIC/A+ manual tells you to LIST in BASIC,
+ reboot, ENTER in A+, to "port" your BASIC programs to A+. I
+ suppose if you upgraded from A+ to BASIC XL or XE, you'd have
+ to do the same thing to use A+ programs in XL/XE.
+
+ While this was probably a PITA for BASIC/A+ users back in the day,
+ it makes it *really* easy to detect A+ here. The last line of
+ every SAVEed program is the direct-mode command, and contains either
+ a SAVE or CSAVE cmd token. Which is the same token in Atari, Turbo,
+ BXL, and BXE... but *different* in A+.
+
+ However... I've run into at least one BASIC program in the Holmes
+ Archive that was missing its line 32768 (wish I could remember which).
+ And it's possible for files to get truncated... so I'll check a few
+ other command tokens, to deal with cases like this.
+ */
+void found_aplus(void) {
+ foreign("OSS BASIC/A+", SRET_APLUS);
+}
+
+CALLBACK(check_aplus_cmd) {
+ int has_args;
+ unsigned char nexttok, nexttok2;
+
+ nexttok = program[pos + 1];
+ nexttok2 = program[pos + 2];
+ has_args = !(nexttok == OP_EOS || nexttok == OP_EOL);
+ if(verbose) fprintf(stderr, "check_aplus_cmd: line %d, pos $%04x, tok $%02x, nexttok $%02x, nexttok2 $%02x\n", lineno, pos, tok, nexttok, nexttok2);
+
+ switch(tok) {
+ case CMD_POP: /* A+ READ */
+ case CMD_DOS: /* A+ GET */
+ case CMD_DEG: /* A+ WHILE */
+ case CMD_CLR: /* A+ DIM */
+ case 0x46: /* A+ SOUND <args>, Turbo LOOP, BXL/XE CP */
+ case 0x3c: /* A+ DIR <req'd arg>, Turbo REPEAT, BXL/XE ELSE */
+ if(has_args)
+ found_aplus();
+ break;
+
+ case CMD_SAVE: /* A+ END */
+ case CMD_GRAPHICS: /* A+ STOP */
+ case CMD_DIM: /* A+ ENDWHILE */
+ /* case CMD_GET: */ /* A+ RETURN, but Turbo allows 0 args too! */
+ case 0x38: /* A+ DOS, Turbo DPOKE, BXL/XE WHILE */
+ if(!has_args)
+ found_aplus();
+ break;
+
+ case CMD_POSITION: /* A+ ? */
+ /* PARTIAL: does nothing if 1st arg is numeric. */
+ if(!has_args)
+ /* POSITION can't have 0 args. */
+ found_aplus();
+ else if(is_string_rval(nexttok))
+ /* ? "STRING" or ? A$, e.g., can't be POSITION */
+ found_aplus();
+ else if(nexttok == OP_NUM_LE) /* numeric <= in BASIC, # in A+ */
+ found_aplus();
+ break;
+
+ case CMD_RUN: /* A+ PRINT */
+ /* PARTIAL: only detects PRINT # or PRINT <num> ... */
+ /* A+'s # token is BASIC's OP_NUM_LE! */
+ if(nexttok == OP_NUM_LE)
+ found_aplus();
+ else if(is_numconst_op(nexttok) || is_numeric_var(nexttok))
+ found_aplus();
+ break;
+
+ case CMD_XIO: /* A+ SAVE */
+ /* most programs, this is enough, because they'll end with
+ 32768 SAVE "D:BLAH" */
+ if(is_string_rval(nexttok))
+ found_aplus();
+ break;
+
+ case CMD_OPEN: /* A+ ELSE */
+ /* case CMD_CLOSE: */ /* A+ DEG */ /* can't check, Turbo allows no args */
+ case CMD_STATUS: /* A+ NEW */
+ case CMD_POINT: /* A+ LOAD */
+ case 0x42: /* A+ POSITION, Turbo BPUT, BXL RGET */
+ case 0x43: /* A+ DRAWTO, Turbo BGET, BXL BPUT */
+ if(nexttok != OP_HASH) /* # in BASIC, USING in A+ */
+ found_aplus();
+ break;
+
+ /* case 0x48: */ /* A+ CSAVE, Turbo DIR, BXL/BXE PROTECT */
+ /* DIR without arg is OK, so we can't really check this. */
+ /* break; */
+
+ case CMD_DRAWTO: /* A+ PUT */
+ case CMD_SOUND: /* A+ RPUT */
+ case CMD_LPRINT: /* A+ RGET */
+ case CMD_CSAVE: /* A+ BPUT */
+ case CMD_CLOAD: /* A+ BGET */
+ case CMD_ON: /* A+ STATUS */
+ case CMD_NOTE: /* A+ OPEN */
+ case CMD_CONT: /* A+ CLOSE */
+ case CMD_RAD: /* A+ XIO */
+ if(nexttok == OP_NUM_LE) /* numeric <= in BASIC, # in A+ */
+ found_aplus();
+ break;
+
+ default: break;
+ }
+
+ last_cmd = tok;
+}
+
+void check_aplus(void) {
+ allow_hex_const = 1;
+
+ on_cmd_token = check_aplus_cmd;
+ walk_all_code();
+}
+
+void check_atari_turbo_oss(void) {
+ allow_hex_const = 1;
+
+ on_cmd_token = handle_cmd;
+ on_exp_token = handle_op;
+ on_end_stmt = handle_end_stmt;
+
+ walk_all_code();
+}
+
+int main(int argc, char **argv) {
+ set_self(*argv);
+ parse_general_args(argc, argv, print_help);
+ parse_args(argc, argv);
+
+ detect_foreign();
+
+ readfile();
+ parse_header();
+
+ check_variables();
+ check_aplus();
+ check_atari_turbo_oss();
+
+ print_result(); /* always exits */
+ return 0; /* never happens, shuts up gcc's warning though */
+}