diff options
Diffstat (limited to 'whichbas.c')
-rw-r--r-- | whichbas.c | 1255 |
1 files changed, 1255 insertions, 0 deletions
diff --git a/whichbas.c b/whichbas.c new file mode 100644 index 0000000..9f7f6db --- /dev/null +++ b/whichbas.c @@ -0,0 +1,1255 @@ +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <time.h> +#include <sys/wait.h> + +#include "bas.h" +#include "whichbas.h" + +#define BT_INVALID 0 +#define BT_ATARI 1 +#define BT_TURBO 2 +#define BT_BXL 4 +#define BT_BXE 8 + +#define BT_BXL_BXE (BT_BXL | BT_BXE) + +int bas_type = 0x0f; /* start out with all enabled */ + +int script_mode = 0; /* -s flag */ +int script_ret; /* -s mode, exit with one of SRET_* from whichbas.h as status */ + +int keep_going = 0; /* -k flag */ + +int comma_count; /* count of regular commas (not string/array) in statement */ +unsigned char last_cmd; +unsigned char last_op_tok; +unsigned short last_cmd_pos; + +void print_help(void) { + printf("Usage: %s [-v] [-h] [-k] [-s] <inputfile> ...\n", self); +} + +int child_errs = 0; + +/* return true if the child ran and returned 0 (success) */ +int spawn_child(const char **args) { + pid_t pid, status; + int wstatus; + + pid = fork(); + if(pid == -1) { + perror("fork()"); + die("Can't spawn child process"); + } else if(pid) { + /* we are the parent */ + status = waitpid(pid, &wstatus, 0); + if(status < 0) { + perror("waitpid()"); + die("Child process went south"); + } + if(! (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) ) { + child_errs++; + return 0; + } + } else { + /* we are the child */ + if(execv(args[0], (char * const *)args) < 0) { + perror("exec()"); + exit(1); + } + } + + return 1; +} + +/* this is not the ideal way to handle multiple files. it forks a new + process for each one. however, I embedded a bunch of die() calls + in bas.c, thinking I'd never use some of the functions more than once + in the same run. what I get for trying to KISS... */ +void multiple_files(const char *argv0, char **list) { + const char *args[5]; /* 5 because eventually we include -k and/or -v */ + int kidstat; + + /* this isn't needed for things to work, but if I write buggy code, + it avoids a forkbomb. */ + setenv("WHICHBAS_MULTI_PARANOIA", "1", 1); + + args[0] = argv0; + args[2] = NULL; + + while(*list) { + args[1] = *list; + printf("%s:\t", *list); + fflush(stdout); + kidstat = spawn_child(args); + if(!kidstat) puts("(detection failed)"); + fflush(stdout); + list++; + } + + if(child_errs) { + fprintf(stderr, "%s: exiting with error status because some files failed.\n", self); + exit(1); + } else { + exit(0); + } +} + +void parse_args(int argc, char **argv) { + int opt; + + while( (opt = getopt(argc, argv, "vksh")) != -1) { + switch(opt) { + case 'v': verbose = 1; break; + case 'k': keep_going = verbose = 1; break; + case 's': script_mode = 1; break; + case 'h': print_help(); exit(0); + default: + print_help(); + exit(1); + } + } + + if(optind >= argc) + die("No input file given (and stdin not supported)."); + + if(optind == argc - 1) { /* got one filename only */ + open_input(argv[optind]); + if(input_file == stdin) + die("Reading from standard input not supported."); + } else { /* got multiple filenames */ + if(keep_going || verbose || script_mode) + die("-k, -v, -s not supported with multiple filenames (yet?)"); + if(getenv("WHICHBAS_MULTI_PARANOIA")) + die("BUG: multiple_files() recursion!"); + multiple_files(argv[0], argv + optind); + } +} + +/* don't need this. +void add_type(int type) { + bas_type |= type; +} +*/ + +void print_result(void) { + const char *name; + + if(verbose) fprintf(stderr, " final bas_type %02x\n", bas_type); + + if(bas_type == BT_INVALID) { + name = "Unknown variant: Not Atari BASIC, Turbo, BXL, or BXE"; + script_ret = SRET_UKNOWN_DERIV; + } else if(bas_type & BT_ATARI) { + name = "Atari BASIC"; + script_ret = SRET_ATARI; + } else if(bas_type & BT_TURBO) { + if(bas_type & BT_BXL) { + name = "Either Turbo BASIC XL or OSS BASIC XL"; + script_ret = SRET_TURBO_OR_BXL; + } else if(bas_type & BT_BXE) { + name = "Either Turbo BASIC XL or OSS BASIC XE"; + script_ret = SRET_TURBO_OR_BXE; + } else { /* bas_type == BT_TURBO */ + name = "Turbo BASIC XL"; + script_ret = SRET_TURBO; + } + } else if(bas_type == BT_BXL || bas_type == (BT_BXL | BT_BXE)) { + name = "OSS BASIC XL"; + script_ret = SRET_BXL; + } else if(bas_type == BT_BXE) { + name = "OSS BASIC XE"; + script_ret = SRET_BXE; + } else { + /* this one should never happen: */ + name = "Either Turbo BASIC XL, OSS BASIC XL, or OSS BASIC XE"; + script_ret = SRET_TURBO_OR_BXL_OR_BXE; + } + + if(script_mode) { + exit(script_ret); + } else { + puts(name); + exit(0); + } +} + +/* return true if a token is numeric constant + (including TB/BXE/BXL hex) */ +int is_numconst_op(unsigned char tok) { + switch(tok) { + case OP_NUMCONST: + case OP_HEXCONST: + return 1; + default: + return 0; + } +} + +/* return true if a token is a function that *returns* + a numeric value (says nothing about the argument types, + though!) */ +int is_numeric_func(unsigned char tok) { + switch(tok) { + case OP_FUNC_USR: + case OP_FUNC_ASC: + case OP_FUNC_VAL: + case OP_FUNC_LEN: + case OP_FUNC_ADR: + case OP_FUNC_ATN: + case OP_FUNC_COS: + case OP_FUNC_PEEK: + case OP_FUNC_SIN: + case OP_FUNC_RND: + case OP_FUNC_FRE: + case OP_FUNC_EXP: + case OP_FUNC_LOG: + case OP_FUNC_CLOG: + case OP_FUNC_SQR: + case OP_FUNC_SGN: + case OP_FUNC_ABS: + case OP_FUNC_INT: + case OP_FUNC_PADDLE: + case OP_FUNC_STICK: + case OP_FUNC_PTRIG: + case OP_FUNC_STRIG: + return 1; + default: + return 0; + } +} + +/* return true if a token is an arithmetic operator */ +int is_arith_op(unsigned char tok) { + switch(tok) { + case OP_UPLUS: /* not sure these two... */ + case OP_UMINUS: /* ...really belong here */ + case OP_NUM_LE: + case OP_NUM_NE: + case OP_NUM_GE: + case OP_NUM_LT: + case OP_NUM_GT: + case OP_NUM_EQ: + case OP_POWER: + case OP_MULT: + case OP_PLUS: + case OP_MINUS: + case OP_DIVIDE: + case OP_NOT: + case OP_OR: + case OP_AND: + case OP_NUM_ASSIGN: + case OP_GRP_LPAR: /* yes, this belongs here, (((A$))) is a syntax error! */ + return 1; + default: + return 0; + } +} + +int is_numeric_var(unsigned char tok) { + int vartype; + + if(tok < 0x80) + return 0; + + vartype = get_vartype(tok); + return (vartype == TYPE_SCALAR || vartype == TYPE_ARRAY); +} + +/* return true if a token is: + - a numeric constant (including hex constants), + - a numeric variable (including arrays), + - a function that returns a numeric (e.g. ASC(), SIN()). + for now, only standard Atari BASIC tokens are considered. + unary minus and plus make sense here, but binary ops don't. + */ +int is_numeric_rval(unsigned char tok) { + return + (tok == OP_UMINUS) || + (tok == OP_UPLUS) || + is_numconst_op (tok) || + is_numeric_func (tok) || + is_numeric_var (tok) ; +} + +/* return true if a token is: + - a numeric constant (including hex constants), + - a numeric variable (including arrays), + - a math operator (plus, minus, etc), + - a function that returns a numeric (e.g. ASC(), SIN()). + for now, only standard Atari BASIC tokens are considered. + */ +int is_numeric_op(unsigned char tok) { + return + is_numeric_rval (tok) || + is_arith_op (tok) ; +} + +int is_string_var(unsigned char tok) { + return (tok >= 0x80 && (get_vartype(tok) == TYPE_STRING)); +} + +int is_string_const(unsigned char tok) { + return (tok == OP_STRCONST); +} + +int is_string_exp_op(unsigned char tok) { + switch(tok) { + case OP_STR_ASSIGN: + case OP_STR_LE: + case OP_STR_NE: + case OP_STR_GE: + case OP_STR_LT: + case OP_STR_GT: + case OP_STR_EQ: + case OP_STR_LPAR: /* the ( in: A$(1)="A" */ + return 1; + default: + return 0; + } +} + +int is_string_func(unsigned char tok) { + switch(tok) { + case OP_FUNC_STR: + case OP_FUNC_CHR: + return 1; + default: + return 0; + } +} + +/* return true if a token is: + - a string constant, + - a string variable, + - a function that returns a string. +*/ +int is_string_rval(unsigned char tok) { + return + is_string_const (tok) || + is_string_func (tok) || + is_string_var (tok) ; +} + +/* return true if a token is: + - a string constant, + - a string variable, + - a string expression operator, like OP_STR_LE, + - a function that returns a string. +*/ +int is_string_op(unsigned char tok) { + return + is_string_rval (tok) || + is_string_exp_op (tok) ; +} + +/* true if an operator token is a string function in BASIC XL (or XE). + these tokens are all numeric functions in Turbo, so be sure you + know what you're doing! */ +int is_bxl_string_func(unsigned char tok) { + switch(tok) { + case 0x5c: /* BXL HEX$, Turbo DEC */ + case 0x66: /* BXL LEFT$, Turbo %0 */ + case 0x67: /* BXL RIGHT$, Turbo %1 */ + case 0x68: /* BXL MID$, Turbo %2 */ + return 1; + default: + return 0; + } +} + +void remove_type(int type) { + bas_type &= ((~type) & 0x0f); + + if(keep_going) return; + + /* without -k, stop if it gets narrowed down to one of these 4. */ + if(bas_type == BT_ATARI || bas_type == BT_TURBO || bas_type == BT_BXE || bas_type == BT_BXL) + print_result(); +} + +void set_type(int type) { + bas_type = type; + if(!keep_going) print_result(); +} + +CALLBACK(handle_cmd) { + int has_args = 0, has_var_arg = 0, vartype = -1; + unsigned char nexttok; + + last_cmd = tok; + last_cmd_pos = pos; + comma_count = 0; + + if(verbose) fprintf(stderr, "handle_cmd: lineno %d, tok $%02x, bas_type was %02x\n", lineno, tok, bas_type); + + nexttok = program[pos + 1]; + has_args = !(nexttok == OP_EOS || nexttok == OP_EOL); + if(nexttok >= 0x80) { + has_var_arg = 1; + vartype = get_vartype(nexttok); + } + + /* this switch is for tokens that are the same in Atari/Turbo/BXL/BXE, but with + different semantics. non-Atari-BASIC tokens go in the switch below, not + this one. */ + switch(tok) { + /* TB uses the same token for CLOSE as Atari and BXL/BXE, but it allows + it to have no argument (meaning, close all IOCBs). SOUND is the same + (no args = silence all POKEY channels). */ + case CMD_CLOSE: + case CMD_SOUND: + if(!has_args) { + set_type(BT_TURBO); + } + break; + case CMD_INPUT: + /* TB, BXL, BXE all support INPUT "Prompt",VAR with the same tokenized + form. Atari BASIC doesn't allow string constants in INPUT args. */ + if(has_args) { + if(nexttok == OP_STRCONST) { + int pos_after_string; + remove_type(BT_ATARI); + /* TB only: INPUT "Prompt";VAR is also supported (not in BXL/BXE) */ + pos_after_string = pos + 3 + program[pos + 2]; + if(verbose) + fprintf(stderr, + "===> INPUT with string prompt at line %d, " + "pos %04x, pos_after_string %04x (token %02x)\n", + lineno, pos, pos_after_string, program[pos_after_string]); + if(program[pos_after_string] == OP_SEMICOLON) { + set_type(BT_TURBO); + } + } + } else { /* has_args is false, oh shit! */ + fprintf(stderr, "*** INPUT without variable at line %d.\n*** Rev A BASIC bug, program will crash, better fix it!\n", lineno); + set_type(BT_ATARI); + } + break; + case CMD_GET: + case CMD_PUT: + /* TB uses the same tokens for GET and PUT as Atari/BXL/BXE, but it allows + the argument to be a variable without a # in front of it. */ + if(nexttok != OP_HASH) { + set_type(BT_TURBO); + } + /* PARTIAL: we really should detect GET #1,A$. this is Turbo-only, but + probably nobody ever uses it because it doesn't seem to *work*, + at least not in TB 1.5. A$ always ends up empty with length 0. */ + break; + case CMD_RESTORE: + case CMD_TRAP: + /* TB allows RESTORE #LABEL and TRAP #LABEL */ + if(nexttok == OP_HASH) { + set_type(BT_TURBO); + } + break; + default: break; + } + + if(tok <= CMD_ERROR) return; /* legal in BASIC, ignore */ + remove_type(BT_ATARI); + if(tok >= 0x59) remove_type(BT_BXL); + + if(tok >= 0x65) { + fprintf(stderr, "handle_cmd: invalid command %02x at line %d\n", tok, lineno); + keep_going = 0; + set_type(BT_INVALID); + } + + /* we have tokens 0x3a to 0x68 in both TB and BXE, or 47 of them. + + Some tokens can't be determined, because they take the + same argument (or lack of) in both Turbo and BXL/XE. These + are: + + 0x3c: REPEAT or ELSE (no args either way) + 0x46: LOOP or CP (no args either way) + 0x49: LOCK or UNPROTECT (take the same args) + 0x4B: RENAME in both Turbo and BXL/XE (same token, same args) + + 4 of them, this leaves 43 we can check. + + Covered so far: 41 (95%). + However, some of these are marked PARTIAL because they're not detected + under all circumstances. + + Unchecked tokens: + + 0x5B: BRUN or CALL (both take a string, CALL allows "USING" though) + This isn't really important, as CALL requires a PROCEDURE to + exist, and we *do* catch the PROCEDURE token. + 0x5F: PAINT (req 2 args) or NUM (optional 2 args). + Again, not important, because it's highly unlikely any BXL/BXE + program will contain NUM... because when it executes, it stops the + program and goes back to the READY prompt (in auto-numbering mode). + */ + switch(tok) { + case 0x39: /* MOVE <args> or ENDWHILE */ + case 0x3a: /* -MOVE <args> or TRACEOFF */ + case 0x3d: /* UNTIL <args> or ENDIF */ + case 0x56: /* DEL <args> or FAST */ + case 0x62: /* CIRCLE (3 or 4 num args) or NORMAL (no args) */ + /* COMPLETE */ + if(has_args) { + remove_type(BT_BXL_BXE); + } else { + remove_type(BT_TURBO); + } + break; + + case 0x58: /* TRACE (optional + or -), EXTEND (BXE; no args) */ + /* COMPLETE */ + /* In BXL, this looks to be an extra END token, that behaves the same + as the regular one, but can't be entered in the editor. Assume + no BXL program contains this token. */ + /* In BXE, EXTEND can't actually appear in a program (it's direct + mode only). The only way to get EXTEND into a BXE program is + to do a direct mode command like: + EXTEND:SAVE "D:PROG" + ...which of course puts it at line 32768. But this code will + never see that, because we'd already detect EXTENDed BXE + based on the first 2 bytes of the file. + */ + /* So, if we see this token, it *has* to be Turbo's TRACE, whether + or not it has an argument. */ + set_type(BT_TURBO); + break; + + case 0x59: /* TEXT or PROCEDURE */ + /* COMPLETE */ + /* Turbo: TEXT (1st arg is number), + BXL: invalid token, + BXE: PROCEDURE (arg is string const (not var!)) */ + if(nexttok == OP_STRCONST) { + /* this token doesn't seem to be valid in BXL at all */ + set_type(BT_BXE); + } else { + remove_type(BT_BXL_BXE); + } + break; + + case 0x3f: /* WEND or LOMEM <args> */ + case 0x40: /* ELSE or DEL <args> */ + case 0x41: /* ENDIF or RPUT <args> */ + case 0x45: /* DO or TAB <args> */ + case 0x47: /* EXIT or ERASE <args> */ + case 0x51: /* ENDPROC or PMMOVE <args> */ + /* COMPLETE */ + if(has_args) { + remove_type(BT_TURBO); + } else { + remove_type(BT_BXL_BXE); + } + break; + + case 0x48: /* DIR (optional arg) or PROTECT (req'd arg) */ + /* PARTIAL: without args means TB, but with arg, + it could be either */ + if(!has_args) { + remove_type(BT_BXL_BXE); + } + break; + + case 0x4a: /* UNLOCK (req'd arg) or DIR (optional arg) */ + /* PARTIAL: without args means BXL/BXE, but with arg, + it could be either */ + if(!has_args) { + remove_type(BT_TURBO); + } + break; + + case 0x3b: /* *F (optional + or -), TRACE (no arg) */ + case 0x5e: /* *B (optional + or -) or EXIT (no arg) */ + /* PARTIAL: doesn't catch *F or *B by itself with no +/- */ + if(has_args) { + remove_type(BT_BXL_BXE); + } + break; + + case 0x44: /* FILLTO or BGET (check for a # after the token) */ + /* COMPLETE */ + if(nexttok == OP_HASH) { + remove_type(BT_TURBO); + } else { + remove_type(BT_BXL_BXE); + } + break; + + case 0x4e: /* TIME$= (1 string arg) or PMCLR (1 num arg) */ + /* PARTIAL: but almost complete. nothing happens if it's + TIME$= with a string function (probably rare) or PMCLR + with a complex expression. */ + if(nexttok == OP_STRCONST) { + remove_type(BT_BXL_BXE); + } else if(has_var_arg && vartype == TYPE_STRING) { + remove_type(BT_BXL_BXE); + } else if(nexttok == OP_NUMCONST) { + remove_type(BT_TURBO); + } else if(has_var_arg && vartype == TYPE_SCALAR) { + remove_type(BT_TURBO); + } + break; + + case 0x50: /* EXEC (1 arg, *must* be variable) or PMGRAPHICS (1 num arg, may be const) */ + /* PARTIAL: PMGRAPHICS VAR won't be detected. but this usage is rare. */ + /* This check is actually redundant, because EXEC requires Turbo's + label type (high bits in var name table both set to 1), which we already + detected in check_variables(). */ + if(!has_var_arg) { + remove_type(BT_TURBO); + } + break; + + case 0x54: /* -- in TB, LVAR in BXL/BXE */ + /* COMPLETE */ + /* We can tell these apart because: + 1. TB gives us a next-statement offset of 5 if -- is the first (or + actually only) statement on a line. Normally, the minimum offset + is 6, but there's no OP_EOL after this token for some reason. + 2. If -- is the 2nd or or later statement on a line (after a colon) + it *does* get a statement terminator, but it's 0x9b (ATASCII EOL, + like a REM or DATA gets). + Note that it's impossible to put more statements *after* the --, + they just get ignored if you type them. This doesn't help us + here, but it's interesting anyway. + Also, the -- is what you type to enter it into the program, but + it get LISTed as a line of 30 dashes. + The explanation is a lot longer than the code... */ + if(program[pos - 1] == 0x05 || nexttok == 0x9b) { + set_type(BT_TURBO); + } else { + remove_type(BT_TURBO); + } + break; + + case 0x57: /* DUMP (1 optional string arg) or LOCAL (1 *numeric* variable arg) */ + /* BXL/BXE's LOCAL only works on scalars, not arrays or strings. so if there's + no arg, or one string arg... */ + /* PARTIAL: almost complete, doesn't handle DUMP func$(arg), which I + doubt anyone uses anyway. */ + if(!has_args) { + /* only Turbo allows no arg... */ + remove_type(BT_BXL_BXE); + } else if(nexttok == OP_STRCONST) { + /* only Turbo allows a string constant arg... */ + remove_type(BT_BXL_BXE); + } else if(has_var_arg && vartype == TYPE_STRING) { + /* only Turbo allows a string variable arg... */ + remove_type(BT_BXL_BXE); + } else if(has_var_arg && vartype == TYPE_SCALAR) { + /* only BXL/BXL allows a scalar variable arg */ + remove_type(BT_TURBO); + } + break; + + case 0x5a: /* TB: BLOAD; BXL: extension mechanism; BXE: invalid. */ + /* COMPLETE */ + /* This is the token used for the BXL EXTEND.COM added commands, + from the Toolkit disk. It's followed by a byte ranging 0x10 + to 0x15 that specifies which extended command, e.g. 0x5a 0x11 means + EXIT, 0x5a 0x12 is PROCEDURE, 0x5a 0x13 is CALL. Although + these look like BXE's extra commands, they aren't the same tokens, + and BXE will choke on them (RUN causes "Error- 33", LIST causes + a lockup). */ + if(nexttok >= 0x10 && nexttok <= 0x15) { + /* worth mentioning to the user... */ + fprintf(stderr, "Note: program requires EXTEND.COM from BASIC XL Toolkit disk.\n"); + set_type(BT_BXL); + } else { + /* it's BLOAD if followed by e.g. OP_STRCONST or a variable */ + set_type(BT_TURBO); + } + break; + + case 0x5c: /* GO# (1 arg only) or SORTUP (optional 2nd arg of USING, but no comma) */ + case 0x5d: /* # (1 arg only) or SORTDOWN (optional 2nd arg of USING, but no comma) */ + /* COMPLETE but no longer needed (check_variables() already found the + 11xxxxxx variables) */ + /* Turbo BASIC labels have the high 2 bits set to 11, which is illegal + in Atari/BXL/BXE. */ + if(vartype == 3) { + remove_type(BT_BXL_BXE); + } else { + remove_type(BT_TURBO); + } + break; + + case 0x60: /* CLS (optional IOCB with #) or HITCLR (no args) */ + /* PARTIAL: without args, can't tell them apart. */ + /* I doubt CLS #IOCB is actually used in many Turbo BASIC + programs, because it's broken (at least in Turbo 1.5). + It's supposed to only clear the screen of output that + happened after the OPEN #IOCB, but it really clears the + whole screen. */ + if(nexttok == OP_HASH) { + remove_type(BT_BXL_BXE); + } + break; + + default: break; + } + if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type); +} + +CALLBACK(handle_op) { + unsigned char nexttok = program[pos + 1]; + unsigned char nexttok2 = program[pos + 2]; + + if(tok == OP_COMMA) comma_count++; + + if(verbose) fprintf(stderr, "handle_op: lineno %d, tok $%02x, comma_count %d, bas_type was %02x\n", lineno, tok, comma_count, bas_type); + + if(tok == 0x00) { + /* Turbo allows 256 variables, tokenizes the first 128 normally ($80-$FF). + The extra ones above $FF are tokenized as $00, varnum - $80. None of + our other BASICs uses $00 as an operator token, so.. */ + set_type(BT_TURBO); + } + + /* attempt to detect BXL/BXE DIM for 2D string arrays. + DIM A$(10,10) is illegal in Atari/Turbo. + PARTIAL: this only works if the first dimension is either a + constant or a scalar variable (not an array element or an + expression). fortunately most programs use constants in DIM. + */ + if(tok == OP_DIM_STR_LPAR) { + int str2d = 0; + if(nexttok >= 0x80 && nexttok2 == OP_ARR_COMMA) { + str2d = 1; + } else if(nexttok == OP_NUMCONST || nexttok == OP_HEXCONST) { + str2d = (program[pos + 8] == OP_ARR_COMMA); + } + if(str2d) { + if(verbose) + fprintf(stderr, "===> found 2d string array at line %d\n", lineno); + remove_type(BT_ATARI | BT_TURBO); + } + } + + /* BXL/BXE allows string concatenation in assignment with the comma, + A$="FOO","BAR" or A$=C$,D$. */ + if(last_cmd == CMD_LET || last_cmd == CMD_ILET) { + if(program[last_cmd_pos + 2] == OP_STR_ASSIGN) { + if(tok == OP_COMMA) { + if(is_string_rval(nexttok)) { + remove_type(BT_ATARI | BT_TURBO); + } + } + } + } + + if(tok == OP_HEXCONST) remove_type(BT_ATARI); /* hex const (turbo *and* bxl/xe) */ + if(tok <= OP_FUNC_STRIG) { + if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type); + return; /* legal in BASIC, ignore */ + } + remove_type(BT_ATARI); + + /* only Turbo has op tokens numbered 0x69 and up. */ + if(tok >= 0x69) { + set_type(BT_TURBO); + } + + if(tok >= 0x6E) { + fprintf(stderr, "handle_op: invalid operator %02x at line %d\n", tok, lineno); + keep_going = 0; + set_type(BT_INVALID); + } + + /* There are 25 extra operators in Turbo, and 20 of them are shared with + BXL/BXE. Of the 20, 4 of them are undecidable, and the rest are + covered here, which means 80% coverage of the shared ops. + Undecidables are: + 0x56 & (logical AND) or % (XOR), both infix numeric ops; can't tell apart + 0x57 ! (logical OR) in both Turbo and BXL/BXE, can't tell apart + 0x64 RAND (func, 1 num arg) or TAB (func, 1 num arg), can't tell apart + 0x65 TRUNC (func, 1 num arg) or PEN (func, 1 num arg), can't tell apart + */ + switch(tok) { + case 0x55: /* DPEEK (function) TB, USING (infix, not a function) in BXL/BXE */ + case 0x58: /* INSTR (function) or & (infix numeric) in BXE. */ + case 0x5b: /* HEX$ (func, takes 1 num arg) or FIND( (pseudo-func, 3 args */ + /* COMPLETE */ + if(nexttok == OP_FUNC_LPAR) { + remove_type(BT_BXL_BXE); + } else { + remove_type(BT_TURBO); + } + break; + + case 0x59: /* INKEY$ (0 arg pseudo-func) in TB, string array separator semicolon in BXL/BXE */ + /* PARTIAL: ...but pretty good. we *can't* check nexttok == OP_GRP_RPAR, because + VAL(INKEY$) or ASC(INKEY$) are legit Turbo code. */ + if(nexttok == OP_EOS || nexttok == OP_EOL) { + /* the semicolon can't be the last token on the line (needs at least + a right-paren), but INKEY$ can. */ + remove_type(BT_BXL_BXE); + } else if(pos == last_cmd_pos + 1) { + /* INKEY$ can be the first operator after the command, e.g if the command + is IF. The semicolon cannot. */ + remove_type(BT_BXL_BXE); + } else if(is_string_exp_op(last_op_tok) || is_string_exp_op(nexttok)) { + /* A$=INKEY$, IF INKEY$=A$, A$(LEN(A$)+1)=INKEY$, INKEY$<>"A"... */ + remove_type(BT_BXL_BXE); + } else if(is_numeric_op(last_op_tok) || is_numeric_op(nexttok)) { + remove_type(BT_TURBO); + } + break; + + case 0x5a: /* EXOR (infix num op) or BUMP( (pseudo-function, no OP_FUNC_LPAR) */ + case 0x5d: /* DIV (infix num op) or RANDOM( (pseudo-func, 1 or 2 num args) */ + /* COMPLETE (I think, anyway) */ + if(last_op_tok == OP_GRP_RPAR || last_op_tok == OP_NUMCONST || last_op_tok == OP_HEXCONST || last_op_tok >= 0x80) { + /* if the last token was a variable or a numeric, or a right paren, + this is infix (can't be a function, last token would have to have + been a command or a math/etc operator). */ + remove_type(BT_BXL_BXE); + } else { + remove_type(BT_TURBO); + } + break; + + case 0x5c: /* DEC (function, takes str) in TB, HEX$ (function, takes num) in BXL/BXE */ + /* COMPLETE */ + if(is_string_rval(nexttok2)) { + remove_type(BT_BXL_BXE); + } else if(is_numeric_op(nexttok2)) { + remove_type(BT_TURBO); + } + break; + + case 0x5e: /* FRAC (num func, 1 arg) or DPEEK (num func, 1 arg) in BXL... + however BXE has an optional 2nd arg. */ + { + /* PARTIAL: This detects the 2nd arg for simple cases where the + 1st arg is a constant or a numeric variable, but not if the + 1st arg is an expression or an array element. */ + int has2 = 0; + if(nexttok2 == OP_NUMCONST || nexttok2 == OP_HEXCONST) { + if(program[pos + 9] == OP_ARR_COMMA) + has2 = 1; + } else if(nexttok2 >= 0x80 && program[pos + 3] == OP_ARR_COMMA) { + has2 = (get_vartype(nexttok2) == TYPE_SCALAR); + } + if(has2) { + set_type(BT_BXE); + } + } + break; + + case 0x5f: /* TIME$ in TB, SYS (function) in BXL/BXE */ + case 0x60: /* TIME in TB, VSTICK (function) in BXL/BXE */ + case 0x61: /* MOD (infix op) in TB, HSTICK (function) in BXL/BXE */ + case 0x62: /* EXEC (infix op, with ON) in TB, PMADR (function) in BXL/BXE */ + /* COMPLETE */ + if(nexttok == OP_FUNC_LPAR) { + remove_type(BT_TURBO); + } else { + remove_type(BT_BXL_BXE); + } + break; + + case 0x63: /* RND (pseudo-func, no arg) or ERR (func, 1 num arg) */ + /* COMPLETE */ + if(nexttok != OP_FUNC_LPAR) { + set_type(BT_TURBO); + } + break; + + case 0x66: /* %0 in TB, LEFT$( (pseudo-func, takes string) in BXL/BXE */ + case 0x67: /* %1 in TB, RIGHT$( (pseudo-func, takes string) in BXL/BXE */ + case 0x68: /* %2 in TB, MID$( (pseudo-func, takes string) in BXL/BXE */ + /* COMPLETE */ + /* LEFT$/RIGHT$/MID$ do NOT get OP_FUNC_LPAR (the "(" is part of the + token name). They're always followed by a string operator... and + it works out that none of the tokens for BXL-only string funcs + are allowed to follow %0 %1 %2 in Turbo. */ + if(is_string_op(nexttok) || is_bxl_string_func(nexttok)) { + remove_type(BT_TURBO); + } else { + remove_type(BT_BXL_BXE); + } + break; + + default: + break; + } + + last_op_tok = tok; + if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type); +} + +/* we can count commas, because both Turbo and BXE/BXL use the "array" comma + to separate function arguments, not the "regular" comma. */ +CALLBACK(handle_end_stmt) { + if(verbose) fprintf(stderr, "handle_end_stmt: lineno %d, tok $%02x, last_cmd $%02x, comma_count %d, bas_type was %02x\n", lineno, tok, last_cmd, comma_count, bas_type); + switch(last_cmd) { + case 0x38: /* DPOKE (2 args) or WHILE (1 arg) */ + if(comma_count) { + remove_type(BT_BXL_BXE); + } else { + remove_type(BT_TURBO); + } + break; + + case 0x3e: /* WHILE (1 arg) or DPOKE (2 or 3 args) */ + case 0x4c: /* DELETE (1 arg) or MOVE (3 or 4 args) */ + case 0x4d: /* PAUSE (1 arg) or MISSILE (3 args) */ + case 0x52: /* FCOLOR (1 arg) or PMWIDTH (2 args) */ + case 0x53: /* *L (optional + or - only) or SET (req 2 num args) */ + case 0x4f: /* PROC (1 arg) or PMCOLOR (3 args) */ + if(comma_count) { /* 1 arg means no commas */ + remove_type(BT_TURBO); + } else { + remove_type(BT_BXL_BXE); + } + break; + + case 0x42: /* BPUT or RGET */ + /* PARTIAL: + Turbo BGET always takes 3 args, BXL/BXE RGET takes 2 or more. + We can at least rule out Turbo if there aren't exactly 3 args. */ + if(comma_count != 2) { + remove_type(BT_TURBO); + } + break; + + case 0x43: /* BGET or BPUT */ + /* PARTIAL: + Turbo BGET and BPUT always take 3 args. So does BXL BPUT. + BXE BPUT takes 3 args and an optional 4th. */ + if(comma_count != 2) { + set_type(BT_BXE); + } + break; + + case 0x55: /* RENUM in both (TB req 3 args, BXL up to two) */ + if(comma_count == 2) { + remove_type(BT_BXL_BXE); + } else { + remove_type(BT_TURBO); + } + break; + + case 0x61: /* DSOUND (0 or 4 num args) or INVERSE (no args) */ + /* PARTIAL: can't tell no-argument DSOUND from INVERSE. */ + if(comma_count) { + remove_type(BT_BXL_BXE); + } + break; + + case 0x63: /* %PUT (usually seen with optional #; 1 or 2 args) or BLOAD (1 string arg) */ + if(comma_count) { + /* multiple args */ + remove_type(BT_BXL_BXE); + } else if(program[last_cmd_pos + 1] == OP_STRCONST) { + /* one arg, string const. XXX: check var type */ + remove_type(BT_TURBO); + } + break; + + case 0x64: /* %GET (usually seen with optional #; 1 or 2 args) or BSAVE (3 args) */ + if(comma_count == 2) { + remove_type(BT_TURBO); + } else { + remove_type(BT_BXL_BXE); + } + break; + + default: break; + } + if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type); + + last_cmd = last_op_tok = 0; +} + +/* return true if input_file is Atari MS BASIC. + AMSB files begin with a 3-byte header: 0x00, then 2 byte length + (LSB/MSB), which is actually 3 bytes less than the full length of + the file (or, it's the length of the file minus the 3-byte header). + Also, the files always end with 3 0x00 bytes. + We check that the header length is 3 bytes less than the file length, + then check for the 3 0x00's at the end. + */ +int detect_amsb(void) { + int len, c; + + if(verbose) fprintf(stderr, "entering detect_amsb()\n"); + + rewind(input_file); + c = fgetc(input_file); + if(c) return 0; + c = fgetc(input_file); + if(c == EOF) return 0; + len = (fgetc(input_file) << 8) | c; + + if(verbose) fprintf(stderr, "detect_amsb() header len==%d (file size should be %d)\n", len, len + 3); + + fseek(input_file, 0, SEEK_END); + c = ftell(input_file); + if(verbose) fprintf(stderr, "detect_amsb() file size %d\n", c); + if(len != (c - 3)) { + if(verbose) fprintf(stderr, "detect_amsb() wrong file size!\n"); + return 0; + } + + if(verbose) fprintf(stderr, "detect_amsb() file size is correct, checking for 3 nulls\n"); + fseek(input_file, -3, SEEK_END); + if(fgetc(input_file)) return 0; + if(fgetc(input_file)) return 0; + if(fgetc(input_file)) return 0; + + if(verbose) fprintf(stderr, "detect_amsb() found 3 nulls, return 1\n"); + + return 1; +} + +void foreign(const char *name, int srval) { + if(input_file) fclose(input_file); + if(script_mode) { + exit(srval); + } else { + puts(name); + exit(0); + } +} + +void detect_foreign(void) { + int c, d; + + c = fgetc(input_file); + d = fgetc(input_file); + + if(c == EOF || d == EOF) + die("File is too short to be a BASIC program of any kind."); + + if(c == 0 && d == 0) { + /* This is why we can't read from stdin. */ + rewind(input_file); + return; + } + + if(c == 0xfb && d == 0xc2) + foreign("Compiled Turbo BASIC XL", SRET_COMPILED_TURBO); + + if(c == 0xff && d == 0xff) + foreign("XEX executable (not BASIC at all!)", SRET_NOT_BASIC); + + if(c == 0xfe && d == 0xfe) + foreign("Mac/65 tokenized source (not BASIC at all!)", SRET_NOT_BASIC); + + if(c == 0xdd && d == 0x00) + foreign("EXTENDed OSS BASIC XE", SRET_EXTENDED_BXE); + + if(c == 0x7f && d == 'E') { + c = fgetc(input_file); + d = fgetc(input_file); + if(c == 'L' && d == 'F') + foreign("ELF executable (not BASIC at all!)", SRET_NOT_BASIC); + } + + if(c == 0 && detect_amsb()) { + foreign("Atari Microsoft BASIC", SRET_AMSB); + } + + if(isdigit(c) && (d == 0x20 || isdigit(d))) + foreign("Text file, could be LISTed BASIC (or not)", SRET_NOT_BASIC); + + if(isprint(c) && isprint(d)) + foreign("Text file (not BASIC at all!)", SRET_NOT_BASIC); + + foreign("Unknown file type (not BASIC at all!)", SRET_NOT_BASIC); +} + +void check_variables(void) { + int pos; + + if(vntp == vntd) return; + + /* Unlike Atari BASIC, Turbo variables can have _ in the names. + So can BASIC XE, though it's not documented in the BASIC XE + Reference Manual that I have. + BXL can't have _ in variable names. */ + for(pos = vnstart; pos < vvstart; pos++) { + if((program[pos] & 0x7f) == '_') { + remove_type(BT_ATARI | BT_BXL); + } + } + + /* Also, Turbo line labels (for PROC/EXEC and #/GO#) are variables + with a type that's illegal in Atari/BXL/BXE. */ + for(pos = vvstart; pos < codestart; pos += 8) { + if((program[pos] & 0xc0) == 0xc0) { + set_type(BT_TURBO); + } + } + + /* I was hoping to check for BXL/BXE string arrays here. However, + looking at a SAVEd file, they look identical to regular string + variables (variable type $80, rest of the VVTP entry all $00). + When the program's actually in memory, BXL/BXE sets the + variable type byte to $91 for DIMed string array var and + $81 (same as Atari/Turbo) for a regular DIMed string var. + Unfortunately in the SAVE file, it's always $80. */ + + /* Another thing that can't be detected: BXL/BXE's FAST mode changes + the program in memory (line number targets become addresses), + but programs don't get SAVEd this way: SAVE turns off FAST and + restores the program to its original state before writing it + to disk. Too bad. */ +} + +/* BASIC/A+ support is *very* simple. It's similar to BASIC XL (no + surprise there)... but unlike Turbo, BXL, and BXE, it's *not* + token-compatible with original Atari BASIC. Rather than add + their new tokens to the end of the lists, they're mixed in with + the others. So A+ can't even LOAD or RUN Atari BASIC files. + Appendix J of the BASIC/A+ manual tells you to LIST in BASIC, + reboot, ENTER in A+, to "port" your BASIC programs to A+. I + suppose if you upgraded from A+ to BASIC XL or XE, you'd have + to do the same thing to use A+ programs in XL/XE. + + While this was probably a PITA for BASIC/A+ users back in the day, + it makes it *really* easy to detect A+ here. The last line of + every SAVEed program is the direct-mode command, and contains either + a SAVE or CSAVE cmd token. Which is the same token in Atari, Turbo, + BXL, and BXE... but *different* in A+. + + However... I've run into at least one BASIC program in the Holmes + Archive that was missing its line 32768 (wish I could remember which). + And it's possible for files to get truncated... so I'll check a few + other command tokens, to deal with cases like this. + */ +void found_aplus(void) { + foreign("OSS BASIC/A+", SRET_APLUS); +} + +CALLBACK(check_aplus_cmd) { + int has_args; + unsigned char nexttok, nexttok2; + + nexttok = program[pos + 1]; + nexttok2 = program[pos + 2]; + has_args = !(nexttok == OP_EOS || nexttok == OP_EOL); + if(verbose) fprintf(stderr, "check_aplus_cmd: line %d, pos $%04x, tok $%02x, nexttok $%02x, nexttok2 $%02x\n", lineno, pos, tok, nexttok, nexttok2); + + switch(tok) { + case CMD_POP: /* A+ READ */ + case CMD_DOS: /* A+ GET */ + case CMD_DEG: /* A+ WHILE */ + case CMD_CLR: /* A+ DIM */ + case 0x46: /* A+ SOUND <args>, Turbo LOOP, BXL/XE CP */ + case 0x3c: /* A+ DIR <req'd arg>, Turbo REPEAT, BXL/XE ELSE */ + if(has_args) + found_aplus(); + break; + + case CMD_SAVE: /* A+ END */ + case CMD_GRAPHICS: /* A+ STOP */ + case CMD_DIM: /* A+ ENDWHILE */ + /* case CMD_GET: */ /* A+ RETURN, but Turbo allows 0 args too! */ + case 0x38: /* A+ DOS, Turbo DPOKE, BXL/XE WHILE */ + if(!has_args) + found_aplus(); + break; + + case CMD_POSITION: /* A+ ? */ + /* PARTIAL: does nothing if 1st arg is numeric. */ + if(!has_args) + /* POSITION can't have 0 args. */ + found_aplus(); + else if(is_string_rval(nexttok)) + /* ? "STRING" or ? A$, e.g., can't be POSITION */ + found_aplus(); + else if(nexttok == OP_NUM_LE) /* numeric <= in BASIC, # in A+ */ + found_aplus(); + break; + + case CMD_RUN: /* A+ PRINT */ + /* PARTIAL: only detects PRINT # or PRINT <num> ... */ + /* A+'s # token is BASIC's OP_NUM_LE! */ + if(nexttok == OP_NUM_LE) + found_aplus(); + else if(is_numconst_op(nexttok) || is_numeric_var(nexttok)) + found_aplus(); + break; + + case CMD_XIO: /* A+ SAVE */ + /* most programs, this is enough, because they'll end with + 32768 SAVE "D:BLAH" */ + if(is_string_rval(nexttok)) + found_aplus(); + break; + + case CMD_OPEN: /* A+ ELSE */ + /* case CMD_CLOSE: */ /* A+ DEG */ /* can't check, Turbo allows no args */ + case CMD_STATUS: /* A+ NEW */ + case CMD_POINT: /* A+ LOAD */ + case 0x42: /* A+ POSITION, Turbo BPUT, BXL RGET */ + case 0x43: /* A+ DRAWTO, Turbo BGET, BXL BPUT */ + if(nexttok != OP_HASH) /* # in BASIC, USING in A+ */ + found_aplus(); + break; + + /* case 0x48: */ /* A+ CSAVE, Turbo DIR, BXL/BXE PROTECT */ + /* DIR without arg is OK, so we can't really check this. */ + /* break; */ + + case CMD_DRAWTO: /* A+ PUT */ + case CMD_SOUND: /* A+ RPUT */ + case CMD_LPRINT: /* A+ RGET */ + case CMD_CSAVE: /* A+ BPUT */ + case CMD_CLOAD: /* A+ BGET */ + case CMD_ON: /* A+ STATUS */ + case CMD_NOTE: /* A+ OPEN */ + case CMD_CONT: /* A+ CLOSE */ + case CMD_RAD: /* A+ XIO */ + if(nexttok == OP_NUM_LE) /* numeric <= in BASIC, # in A+ */ + found_aplus(); + break; + + default: break; + } + + last_cmd = tok; +} + +void check_aplus(void) { + allow_hex_const = 1; + + on_cmd_token = check_aplus_cmd; + walk_all_code(); +} + +void check_atari_turbo_oss(void) { + allow_hex_const = 1; + + on_cmd_token = handle_cmd; + on_exp_token = handle_op; + on_end_stmt = handle_end_stmt; + + walk_all_code(); +} + +int main(int argc, char **argv) { + set_self(*argv); + parse_general_args(argc, argv, print_help); + parse_args(argc, argv); + + detect_foreign(); + + readfile(); + parse_header(); + + check_variables(); + check_aplus(); + check_atari_turbo_oss(); + + print_result(); /* always exits */ + return 0; /* never happens, shuts up gcc's warning though */ +} |