#include #include #include #include #include #include #include #include "bas.h" #include "whichbas.h" #define BT_INVALID 0 #define BT_ATARI 1 #define BT_TURBO 2 #define BT_BXL 4 #define BT_BXE 8 #define BT_BXL_BXE (BT_BXL | BT_BXE) int bas_type = 0x0f; /* start out with all enabled */ int script_mode = 0; /* -s flag */ int script_ret; /* -s mode, exit with one of SRET_* from whichbas.h as status */ int keep_going = 0; /* -k flag */ int comma_count; /* count of regular commas (not string/array) in statement */ unsigned char last_cmd; unsigned char last_op_tok; unsigned short last_cmd_pos; void print_help(void) { printf("Usage: %s [-v] [-h] [-k] [-s] ...\n", self); } int child_errs = 0; /* return true if the child ran and returned 0 (success) */ int spawn_child(const char **args) { pid_t pid, status; int wstatus; pid = fork(); if(pid == -1) { perror("fork()"); die("Can't spawn child process"); } else if(pid) { /* we are the parent */ status = waitpid(pid, &wstatus, 0); if(status < 0) { perror("waitpid()"); die("Child process went south"); } if(! (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) ) { child_errs++; return 0; } } else { /* we are the child */ if(execv(args[0], (char * const *)args) < 0) { perror("exec()"); exit(1); } } return 1; } /* this is not the ideal way to handle multiple files. it forks a new process for each one. however, I embedded a bunch of die() calls in bas.c, thinking I'd never use some of the functions more than once in the same run. what I get for trying to KISS... */ void multiple_files(const char *argv0, char **list) { const char *args[5]; /* 5 because eventually we include -k and/or -v */ int kidstat; /* this isn't needed for things to work, but if I write buggy code, it avoids a forkbomb. */ setenv("WHICHBAS_MULTI_PARANOIA", "1", 1); args[0] = argv0; args[2] = NULL; while(*list) { args[1] = *list; printf("%s:\t", *list); fflush(stdout); kidstat = spawn_child(args); if(!kidstat) puts("(detection failed)"); fflush(stdout); list++; } if(child_errs) { fprintf(stderr, "%s: exiting with error status because some files failed.\n", self); exit(1); } else { exit(0); } } void parse_args(int argc, char **argv) { int opt; while( (opt = getopt(argc, argv, "vksh")) != -1) { switch(opt) { case 'v': verbose = 1; break; case 'k': keep_going = verbose = 1; break; case 's': script_mode = 1; break; case 'h': print_help(); exit(0); default: print_help(); exit(1); } } if(optind >= argc) die("No input file given (and stdin not supported)."); if(optind == argc - 1) { /* got one filename only */ open_input(argv[optind]); if(input_file == stdin) die("Reading from standard input not supported."); } else { /* got multiple filenames */ if(keep_going || verbose || script_mode) die("-k, -v, -s not supported with multiple filenames (yet?)"); if(getenv("WHICHBAS_MULTI_PARANOIA")) die("BUG: multiple_files() recursion!"); multiple_files(argv[0], argv + optind); } } /* don't need this. void add_type(int type) { bas_type |= type; } */ void print_result(void) { const char *name; if(verbose) fprintf(stderr, " final bas_type %02x\n", bas_type); if(bas_type == BT_INVALID) { name = "Unknown variant: Not Atari BASIC, Turbo, BXL, or BXE"; script_ret = SRET_UKNOWN_DERIV; } else if(bas_type & BT_ATARI) { name = "Atari BASIC"; script_ret = SRET_ATARI; } else if(bas_type & BT_TURBO) { if(bas_type & BT_BXL) { name = "Either Turbo BASIC XL or OSS BASIC XL"; script_ret = SRET_TURBO_OR_BXL; } else if(bas_type & BT_BXE) { name = "Either Turbo BASIC XL or OSS BASIC XE"; script_ret = SRET_TURBO_OR_BXE; } else { /* bas_type == BT_TURBO */ name = "Turbo BASIC XL"; script_ret = SRET_TURBO; } } else if(bas_type == BT_BXL || bas_type == (BT_BXL | BT_BXE)) { name = "OSS BASIC XL"; script_ret = SRET_BXL; } else if(bas_type == BT_BXE) { name = "OSS BASIC XE"; script_ret = SRET_BXE; } else { /* this one should never happen: */ name = "Either Turbo BASIC XL, OSS BASIC XL, or OSS BASIC XE"; script_ret = SRET_TURBO_OR_BXL_OR_BXE; } if(script_mode) { exit(script_ret); } else { puts(name); exit(0); } } /* return true if a token is numeric constant (including TB/BXE/BXL hex) */ int is_numconst_op(unsigned char tok) { switch(tok) { case OP_NUMCONST: case OP_HEXCONST: return 1; default: return 0; } } /* return true if a token is a function that *returns* a numeric value (says nothing about the argument types, though!) */ int is_numeric_func(unsigned char tok) { switch(tok) { case OP_FUNC_USR: case OP_FUNC_ASC: case OP_FUNC_VAL: case OP_FUNC_LEN: case OP_FUNC_ADR: case OP_FUNC_ATN: case OP_FUNC_COS: case OP_FUNC_PEEK: case OP_FUNC_SIN: case OP_FUNC_RND: case OP_FUNC_FRE: case OP_FUNC_EXP: case OP_FUNC_LOG: case OP_FUNC_CLOG: case OP_FUNC_SQR: case OP_FUNC_SGN: case OP_FUNC_ABS: case OP_FUNC_INT: case OP_FUNC_PADDLE: case OP_FUNC_STICK: case OP_FUNC_PTRIG: case OP_FUNC_STRIG: return 1; default: return 0; } } /* return true if a token is an arithmetic operator */ int is_arith_op(unsigned char tok) { switch(tok) { case OP_UPLUS: /* not sure these two... */ case OP_UMINUS: /* ...really belong here */ case OP_NUM_LE: case OP_NUM_NE: case OP_NUM_GE: case OP_NUM_LT: case OP_NUM_GT: case OP_NUM_EQ: case OP_POWER: case OP_MULT: case OP_PLUS: case OP_MINUS: case OP_DIVIDE: case OP_NOT: case OP_OR: case OP_AND: case OP_NUM_ASSIGN: case OP_GRP_LPAR: /* yes, this belongs here, (((A$))) is a syntax error! */ return 1; default: return 0; } } int is_numeric_var(unsigned char tok) { int vartype; if(tok < 0x80) return 0; vartype = get_vartype(tok); return (vartype == TYPE_SCALAR || vartype == TYPE_ARRAY); } /* return true if a token is: - a numeric constant (including hex constants), - a numeric variable (including arrays), - a function that returns a numeric (e.g. ASC(), SIN()). for now, only standard Atari BASIC tokens are considered. unary minus and plus make sense here, but binary ops don't. */ int is_numeric_rval(unsigned char tok) { return (tok == OP_UMINUS) || (tok == OP_UPLUS) || is_numconst_op (tok) || is_numeric_func (tok) || is_numeric_var (tok) ; } /* return true if a token is: - a numeric constant (including hex constants), - a numeric variable (including arrays), - a math operator (plus, minus, etc), - a function that returns a numeric (e.g. ASC(), SIN()). for now, only standard Atari BASIC tokens are considered. */ int is_numeric_op(unsigned char tok) { return is_numeric_rval (tok) || is_arith_op (tok) ; } int is_string_var(unsigned char tok) { return (tok >= 0x80 && (get_vartype(tok) == TYPE_STRING)); } int is_string_const(unsigned char tok) { return (tok == OP_STRCONST); } int is_string_exp_op(unsigned char tok) { switch(tok) { case OP_STR_ASSIGN: case OP_STR_LE: case OP_STR_NE: case OP_STR_GE: case OP_STR_LT: case OP_STR_GT: case OP_STR_EQ: case OP_STR_LPAR: /* the ( in: A$(1)="A" */ return 1; default: return 0; } } int is_string_func(unsigned char tok) { switch(tok) { case OP_FUNC_STR: case OP_FUNC_CHR: return 1; default: return 0; } } /* return true if a token is: - a string constant, - a string variable, - a function that returns a string. */ int is_string_rval(unsigned char tok) { return is_string_const (tok) || is_string_func (tok) || is_string_var (tok) ; } /* return true if a token is: - a string constant, - a string variable, - a string expression operator, like OP_STR_LE, - a function that returns a string. */ int is_string_op(unsigned char tok) { return is_string_rval (tok) || is_string_exp_op (tok) ; } /* true if an operator token is a string function in BASIC XL (or XE). these tokens are all numeric functions in Turbo, so be sure you know what you're doing! */ int is_bxl_string_func(unsigned char tok) { switch(tok) { case 0x5c: /* BXL HEX$, Turbo DEC */ case 0x66: /* BXL LEFT$, Turbo %0 */ case 0x67: /* BXL RIGHT$, Turbo %1 */ case 0x68: /* BXL MID$, Turbo %2 */ return 1; default: return 0; } } void remove_type(int type) { bas_type &= ((~type) & 0x0f); if(keep_going) return; /* without -k, stop if it gets narrowed down to one of these 4. */ if(bas_type == BT_ATARI || bas_type == BT_TURBO || bas_type == BT_BXE || bas_type == BT_BXL) print_result(); } void set_type(int type) { bas_type = type; if(!keep_going) print_result(); } CALLBACK(handle_cmd) { int has_args = 0, has_var_arg = 0, vartype = -1; unsigned char nexttok; last_cmd = tok; last_cmd_pos = pos; comma_count = 0; if(verbose) fprintf(stderr, "handle_cmd: lineno %d, tok $%02x, bas_type was %02x\n", lineno, tok, bas_type); nexttok = program[pos + 1]; has_args = !(nexttok == OP_EOS || nexttok == OP_EOL); if(nexttok >= 0x80) { has_var_arg = 1; vartype = get_vartype(nexttok); } /* this switch is for tokens that are the same in Atari/Turbo/BXL/BXE, but with different semantics. non-Atari-BASIC tokens go in the switch below, not this one. */ switch(tok) { /* TB uses the same token for CLOSE as Atari and BXL/BXE, but it allows it to have no argument (meaning, close all IOCBs). SOUND is the same (no args = silence all POKEY channels). */ case CMD_CLOSE: case CMD_SOUND: if(!has_args) { set_type(BT_TURBO); } break; case CMD_INPUT: /* TB, BXL, BXE all support INPUT "Prompt",VAR with the same tokenized form. Atari BASIC doesn't allow string constants in INPUT args. */ if(has_args) { if(nexttok == OP_STRCONST) { int pos_after_string; remove_type(BT_ATARI); /* TB only: INPUT "Prompt";VAR is also supported (not in BXL/BXE) */ pos_after_string = pos + 3 + program[pos + 2]; if(verbose) fprintf(stderr, "===> INPUT with string prompt at line %d, " "pos %04x, pos_after_string %04x (token %02x)\n", lineno, pos, pos_after_string, program[pos_after_string]); if(program[pos_after_string] == OP_SEMICOLON) { set_type(BT_TURBO); } } } else { /* has_args is false, oh shit! */ fprintf(stderr, "*** INPUT without variable at line %d.\n*** Rev A BASIC bug, program will crash, better fix it!\n", lineno); set_type(BT_ATARI); } break; case CMD_GET: case CMD_PUT: /* TB uses the same tokens for GET and PUT as Atari/BXL/BXE, but it allows the argument to be a variable without a # in front of it. */ if(nexttok != OP_HASH) { set_type(BT_TURBO); } /* PARTIAL: we really should detect GET #1,A$. this is Turbo-only, but probably nobody ever uses it because it doesn't seem to *work*, at least not in TB 1.5. A$ always ends up empty with length 0. */ break; case CMD_RESTORE: case CMD_TRAP: /* TB allows RESTORE #LABEL and TRAP #LABEL */ if(nexttok == OP_HASH) { set_type(BT_TURBO); } break; default: break; } if(tok <= CMD_ERROR) return; /* legal in BASIC, ignore */ remove_type(BT_ATARI); if(tok >= 0x59) remove_type(BT_BXL); if(tok >= 0x65) { fprintf(stderr, "handle_cmd: invalid command %02x at line %d\n", tok, lineno); keep_going = 0; set_type(BT_INVALID); } /* we have tokens 0x3a to 0x68 in both TB and BXE, or 47 of them. Some tokens can't be determined, because they take the same argument (or lack of) in both Turbo and BXL/XE. These are: 0x3c: REPEAT or ELSE (no args either way) 0x46: LOOP or CP (no args either way) 0x49: LOCK or UNPROTECT (take the same args) 0x4B: RENAME in both Turbo and BXL/XE (same token, same args) 4 of them, this leaves 43 we can check. Covered so far: 41 (95%). However, some of these are marked PARTIAL because they're not detected under all circumstances. Unchecked tokens: 0x5B: BRUN or CALL (both take a string, CALL allows "USING" though) This isn't really important, as CALL requires a PROCEDURE to exist, and we *do* catch the PROCEDURE token. 0x5F: PAINT (req 2 args) or NUM (optional 2 args). Again, not important, because it's highly unlikely any BXL/BXE program will contain NUM... because when it executes, it stops the program and goes back to the READY prompt (in auto-numbering mode). */ switch(tok) { case 0x39: /* MOVE or ENDWHILE */ case 0x3a: /* -MOVE or TRACEOFF */ case 0x3d: /* UNTIL or ENDIF */ case 0x56: /* DEL or FAST */ case 0x62: /* CIRCLE (3 or 4 num args) or NORMAL (no args) */ /* COMPLETE */ if(has_args) { remove_type(BT_BXL_BXE); } else { remove_type(BT_TURBO); } break; case 0x58: /* TRACE (optional + or -), EXTEND (BXE; no args) */ /* COMPLETE */ /* In BXL, this looks to be an extra END token, that behaves the same as the regular one, but can't be entered in the editor. Assume no BXL program contains this token. */ /* In BXE, EXTEND can't actually appear in a program (it's direct mode only). The only way to get EXTEND into a BXE program is to do a direct mode command like: EXTEND:SAVE "D:PROG" ...which of course puts it at line 32768. But this code will never see that, because we'd already detect EXTENDed BXE based on the first 2 bytes of the file. */ /* So, if we see this token, it *has* to be Turbo's TRACE, whether or not it has an argument. */ set_type(BT_TURBO); break; case 0x59: /* TEXT or PROCEDURE */ /* COMPLETE */ /* Turbo: TEXT (1st arg is number), BXL: invalid token, BXE: PROCEDURE (arg is string const (not var!)) */ if(nexttok == OP_STRCONST) { /* this token doesn't seem to be valid in BXL at all */ set_type(BT_BXE); } else { remove_type(BT_BXL_BXE); } break; case 0x3f: /* WEND or LOMEM */ case 0x40: /* ELSE or DEL */ case 0x41: /* ENDIF or RPUT */ case 0x45: /* DO or TAB */ case 0x47: /* EXIT or ERASE */ case 0x51: /* ENDPROC or PMMOVE */ /* COMPLETE */ if(has_args) { remove_type(BT_TURBO); } else { remove_type(BT_BXL_BXE); } break; case 0x48: /* DIR (optional arg) or PROTECT (req'd arg) */ /* PARTIAL: without args means TB, but with arg, it could be either */ if(!has_args) { remove_type(BT_BXL_BXE); } break; case 0x4a: /* UNLOCK (req'd arg) or DIR (optional arg) */ /* PARTIAL: without args means BXL/BXE, but with arg, it could be either */ if(!has_args) { remove_type(BT_TURBO); } break; case 0x3b: /* *F (optional + or -), TRACE (no arg) */ case 0x5e: /* *B (optional + or -) or EXIT (no arg) */ /* PARTIAL: doesn't catch *F or *B by itself with no +/- */ if(has_args) { remove_type(BT_BXL_BXE); } break; case 0x44: /* FILLTO or BGET (check for a # after the token) */ /* COMPLETE */ if(nexttok == OP_HASH) { remove_type(BT_TURBO); } else { remove_type(BT_BXL_BXE); } break; case 0x4e: /* TIME$= (1 string arg) or PMCLR (1 num arg) */ /* PARTIAL: but almost complete. nothing happens if it's TIME$= with a string function (probably rare) or PMCLR with a complex expression. */ if(nexttok == OP_STRCONST) { remove_type(BT_BXL_BXE); } else if(has_var_arg && vartype == TYPE_STRING) { remove_type(BT_BXL_BXE); } else if(nexttok == OP_NUMCONST) { remove_type(BT_TURBO); } else if(has_var_arg && vartype == TYPE_SCALAR) { remove_type(BT_TURBO); } break; case 0x50: /* EXEC (1 arg, *must* be variable) or PMGRAPHICS (1 num arg, may be const) */ /* PARTIAL: PMGRAPHICS VAR won't be detected. but this usage is rare. */ /* This check is actually redundant, because EXEC requires Turbo's label type (high bits in var name table both set to 1), which we already detected in check_variables(). */ if(!has_var_arg) { remove_type(BT_TURBO); } break; case 0x54: /* -- in TB, LVAR in BXL/BXE */ /* COMPLETE */ /* We can tell these apart because: 1. TB gives us a next-statement offset of 5 if -- is the first (or actually only) statement on a line. Normally, the minimum offset is 6, but there's no OP_EOL after this token for some reason. 2. If -- is the 2nd or or later statement on a line (after a colon) it *does* get a statement terminator, but it's 0x9b (ATASCII EOL, like a REM or DATA gets). Note that it's impossible to put more statements *after* the --, they just get ignored if you type them. This doesn't help us here, but it's interesting anyway. Also, the -- is what you type to enter it into the program, but it get LISTed as a line of 30 dashes. The explanation is a lot longer than the code... */ if(program[pos - 1] == 0x05 || nexttok == 0x9b) { set_type(BT_TURBO); } else { remove_type(BT_TURBO); } break; case 0x57: /* DUMP (1 optional string arg) or LOCAL (1 *numeric* variable arg) */ /* BXL/BXE's LOCAL only works on scalars, not arrays or strings. so if there's no arg, or one string arg... */ /* PARTIAL: almost complete, doesn't handle DUMP func$(arg), which I doubt anyone uses anyway. */ if(!has_args) { /* only Turbo allows no arg... */ remove_type(BT_BXL_BXE); } else if(nexttok == OP_STRCONST) { /* only Turbo allows a string constant arg... */ remove_type(BT_BXL_BXE); } else if(has_var_arg && vartype == TYPE_STRING) { /* only Turbo allows a string variable arg... */ remove_type(BT_BXL_BXE); } else if(has_var_arg && vartype == TYPE_SCALAR) { /* only BXL/BXL allows a scalar variable arg */ remove_type(BT_TURBO); } break; case 0x5a: /* TB: BLOAD; BXL: extension mechanism; BXE: invalid. */ /* COMPLETE */ /* This is the token used for the BXL EXTEND.COM added commands, from the Toolkit disk. It's followed by a byte ranging 0x10 to 0x15 that specifies which extended command, e.g. 0x5a 0x11 means EXIT, 0x5a 0x12 is PROCEDURE, 0x5a 0x13 is CALL. Although these look like BXE's extra commands, they aren't the same tokens, and BXE will choke on them (RUN causes "Error- 33", LIST causes a lockup). */ if(nexttok >= 0x10 && nexttok <= 0x15) { /* worth mentioning to the user... */ fprintf(stderr, "Note: program requires EXTEND.COM from BASIC XL Toolkit disk.\n"); set_type(BT_BXL); } else { /* it's BLOAD if followed by e.g. OP_STRCONST or a variable */ set_type(BT_TURBO); } break; case 0x5c: /* GO# (1 arg only) or SORTUP (optional 2nd arg of USING, but no comma) */ case 0x5d: /* # (1 arg only) or SORTDOWN (optional 2nd arg of USING, but no comma) */ /* COMPLETE but no longer needed (check_variables() already found the 11xxxxxx variables) */ /* Turbo BASIC labels have the high 2 bits set to 11, which is illegal in Atari/BXL/BXE. */ if(vartype == 3) { remove_type(BT_BXL_BXE); } else { remove_type(BT_TURBO); } break; case 0x60: /* CLS (optional IOCB with #) or HITCLR (no args) */ /* PARTIAL: without args, can't tell them apart. */ /* I doubt CLS #IOCB is actually used in many Turbo BASIC programs, because it's broken (at least in Turbo 1.5). It's supposed to only clear the screen of output that happened after the OPEN #IOCB, but it really clears the whole screen. */ if(nexttok == OP_HASH) { remove_type(BT_BXL_BXE); } break; default: break; } if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type); } CALLBACK(handle_op) { unsigned char nexttok = program[pos + 1]; unsigned char nexttok2 = program[pos + 2]; if(tok == OP_COMMA) comma_count++; if(verbose) fprintf(stderr, "handle_op: lineno %d, tok $%02x, comma_count %d, bas_type was %02x\n", lineno, tok, comma_count, bas_type); if(tok == 0x00) { /* Turbo allows 256 variables, tokenizes the first 128 normally ($80-$FF). The extra ones above $FF are tokenized as $00, varnum - $80. None of our other BASICs uses $00 as an operator token, so.. */ set_type(BT_TURBO); } /* attempt to detect BXL/BXE DIM for 2D string arrays. DIM A$(10,10) is illegal in Atari/Turbo. PARTIAL: this only works if the first dimension is either a constant or a scalar variable (not an array element or an expression). fortunately most programs use constants in DIM. */ if(tok == OP_DIM_STR_LPAR) { int str2d = 0; if(nexttok >= 0x80 && nexttok2 == OP_ARR_COMMA) { str2d = 1; } else if(nexttok == OP_NUMCONST || nexttok == OP_HEXCONST) { str2d = (program[pos + 8] == OP_ARR_COMMA); } if(str2d) { if(verbose) fprintf(stderr, "===> found 2d string array at line %d\n", lineno); remove_type(BT_ATARI | BT_TURBO); } } /* BXL/BXE allows string concatenation in assignment with the comma, A$="FOO","BAR" or A$=C$,D$. */ if(last_cmd == CMD_LET || last_cmd == CMD_ILET) { if(program[last_cmd_pos + 2] == OP_STR_ASSIGN) { if(tok == OP_COMMA) { if(is_string_rval(nexttok)) { remove_type(BT_ATARI | BT_TURBO); } } } } if(tok == OP_HEXCONST) remove_type(BT_ATARI); /* hex const (turbo *and* bxl/xe) */ if(tok <= OP_FUNC_STRIG) { if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type); return; /* legal in BASIC, ignore */ } remove_type(BT_ATARI); /* only Turbo has op tokens numbered 0x69 and up. */ if(tok >= 0x69) { set_type(BT_TURBO); } if(tok >= 0x6E) { fprintf(stderr, "handle_op: invalid operator %02x at line %d\n", tok, lineno); keep_going = 0; set_type(BT_INVALID); } /* There are 25 extra operators in Turbo, and 20 of them are shared with BXL/BXE. Of the 20, 4 of them are undecidable, and the rest are covered here, which means 80% coverage of the shared ops. Undecidables are: 0x56 & (logical AND) or % (XOR), both infix numeric ops; can't tell apart 0x57 ! (logical OR) in both Turbo and BXL/BXE, can't tell apart 0x64 RAND (func, 1 num arg) or TAB (func, 1 num arg), can't tell apart 0x65 TRUNC (func, 1 num arg) or PEN (func, 1 num arg), can't tell apart */ switch(tok) { case 0x55: /* DPEEK (function) TB, USING (infix, not a function) in BXL/BXE */ case 0x58: /* INSTR (function) or & (infix numeric) in BXE. */ case 0x5b: /* HEX$ (func, takes 1 num arg) or FIND( (pseudo-func, 3 args */ /* COMPLETE */ if(nexttok == OP_FUNC_LPAR) { remove_type(BT_BXL_BXE); } else { remove_type(BT_TURBO); } break; case 0x59: /* INKEY$ (0 arg pseudo-func) in TB, string array separator semicolon in BXL/BXE */ /* PARTIAL: ...but pretty good. we *can't* check nexttok == OP_GRP_RPAR, because VAL(INKEY$) or ASC(INKEY$) are legit Turbo code. */ if(nexttok == OP_EOS || nexttok == OP_EOL) { /* the semicolon can't be the last token on the line (needs at least a right-paren), but INKEY$ can. */ remove_type(BT_BXL_BXE); } else if(pos == last_cmd_pos + 1) { /* INKEY$ can be the first operator after the command, e.g if the command is IF. The semicolon cannot. */ remove_type(BT_BXL_BXE); } else if(is_string_exp_op(last_op_tok) || is_string_exp_op(nexttok)) { /* A$=INKEY$, IF INKEY$=A$, A$(LEN(A$)+1)=INKEY$, INKEY$<>"A"... */ remove_type(BT_BXL_BXE); } else if(is_numeric_op(last_op_tok) || is_numeric_op(nexttok)) { remove_type(BT_TURBO); } break; case 0x5a: /* EXOR (infix num op) or BUMP( (pseudo-function, no OP_FUNC_LPAR) */ case 0x5d: /* DIV (infix num op) or RANDOM( (pseudo-func, 1 or 2 num args) */ /* COMPLETE (I think, anyway) */ if(last_op_tok == OP_GRP_RPAR || last_op_tok == OP_NUMCONST || last_op_tok == OP_HEXCONST || last_op_tok >= 0x80) { /* if the last token was a variable or a numeric, or a right paren, this is infix (can't be a function, last token would have to have been a command or a math/etc operator). */ remove_type(BT_BXL_BXE); } else { remove_type(BT_TURBO); } break; case 0x5c: /* DEC (function, takes str) in TB, HEX$ (function, takes num) in BXL/BXE */ /* COMPLETE */ if(is_string_rval(nexttok2)) { remove_type(BT_BXL_BXE); } else if(is_numeric_op(nexttok2)) { remove_type(BT_TURBO); } break; case 0x5e: /* FRAC (num func, 1 arg) or DPEEK (num func, 1 arg) in BXL... however BXE has an optional 2nd arg. */ { /* PARTIAL: This detects the 2nd arg for simple cases where the 1st arg is a constant or a numeric variable, but not if the 1st arg is an expression or an array element. */ int has2 = 0; if(nexttok2 == OP_NUMCONST || nexttok2 == OP_HEXCONST) { if(program[pos + 9] == OP_ARR_COMMA) has2 = 1; } else if(nexttok2 >= 0x80 && program[pos + 3] == OP_ARR_COMMA) { has2 = (get_vartype(nexttok2) == TYPE_SCALAR); } if(has2) { set_type(BT_BXE); } } break; case 0x5f: /* TIME$ in TB, SYS (function) in BXL/BXE */ case 0x60: /* TIME in TB, VSTICK (function) in BXL/BXE */ case 0x61: /* MOD (infix op) in TB, HSTICK (function) in BXL/BXE */ case 0x62: /* EXEC (infix op, with ON) in TB, PMADR (function) in BXL/BXE */ /* COMPLETE */ if(nexttok == OP_FUNC_LPAR) { remove_type(BT_TURBO); } else { remove_type(BT_BXL_BXE); } break; case 0x63: /* RND (pseudo-func, no arg) or ERR (func, 1 num arg) */ /* COMPLETE */ if(nexttok != OP_FUNC_LPAR) { set_type(BT_TURBO); } break; case 0x66: /* %0 in TB, LEFT$( (pseudo-func, takes string) in BXL/BXE */ case 0x67: /* %1 in TB, RIGHT$( (pseudo-func, takes string) in BXL/BXE */ case 0x68: /* %2 in TB, MID$( (pseudo-func, takes string) in BXL/BXE */ /* COMPLETE */ /* LEFT$/RIGHT$/MID$ do NOT get OP_FUNC_LPAR (the "(" is part of the token name). They're always followed by a string operator... and it works out that none of the tokens for BXL-only string funcs are allowed to follow %0 %1 %2 in Turbo. */ if(is_string_op(nexttok) || is_bxl_string_func(nexttok)) { remove_type(BT_TURBO); } else { remove_type(BT_BXL_BXE); } break; default: break; } last_op_tok = tok; if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type); } /* we can count commas, because both Turbo and BXE/BXL use the "array" comma to separate function arguments, not the "regular" comma. */ CALLBACK(handle_end_stmt) { if(verbose) fprintf(stderr, "handle_end_stmt: lineno %d, tok $%02x, last_cmd $%02x, comma_count %d, bas_type was %02x\n", lineno, tok, last_cmd, comma_count, bas_type); switch(last_cmd) { case 0x38: /* DPOKE (2 args) or WHILE (1 arg) */ if(comma_count) { remove_type(BT_BXL_BXE); } else { remove_type(BT_TURBO); } break; case 0x3e: /* WHILE (1 arg) or DPOKE (2 or 3 args) */ case 0x4c: /* DELETE (1 arg) or MOVE (3 or 4 args) */ case 0x4d: /* PAUSE (1 arg) or MISSILE (3 args) */ case 0x52: /* FCOLOR (1 arg) or PMWIDTH (2 args) */ case 0x53: /* *L (optional + or - only) or SET (req 2 num args) */ case 0x4f: /* PROC (1 arg) or PMCOLOR (3 args) */ if(comma_count) { /* 1 arg means no commas */ remove_type(BT_TURBO); } else { remove_type(BT_BXL_BXE); } break; case 0x42: /* BPUT or RGET */ /* PARTIAL: Turbo BGET always takes 3 args, BXL/BXE RGET takes 2 or more. We can at least rule out Turbo if there aren't exactly 3 args. */ if(comma_count != 2) { remove_type(BT_TURBO); } break; case 0x43: /* BGET or BPUT */ /* PARTIAL: Turbo BGET and BPUT always take 3 args. So does BXL BPUT. BXE BPUT takes 3 args and an optional 4th. */ if(comma_count != 2) { set_type(BT_BXE); } break; case 0x55: /* RENUM in both (TB req 3 args, BXL up to two) */ if(comma_count == 2) { remove_type(BT_BXL_BXE); } else { remove_type(BT_TURBO); } break; case 0x61: /* DSOUND (0 or 4 num args) or INVERSE (no args) */ /* PARTIAL: can't tell no-argument DSOUND from INVERSE. */ if(comma_count) { remove_type(BT_BXL_BXE); } break; case 0x63: /* %PUT (usually seen with optional #; 1 or 2 args) or BLOAD (1 string arg) */ if(comma_count) { /* multiple args */ remove_type(BT_BXL_BXE); } else if(program[last_cmd_pos + 1] == OP_STRCONST) { /* one arg, string const. XXX: check var type */ remove_type(BT_TURBO); } break; case 0x64: /* %GET (usually seen with optional #; 1 or 2 args) or BSAVE (3 args) */ if(comma_count == 2) { remove_type(BT_TURBO); } else { remove_type(BT_BXL_BXE); } break; default: break; } if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type); last_cmd = last_op_tok = 0; } /* return true if input_file is Atari MS BASIC. AMSB files begin with a 3-byte header: 0x00, then 2 byte length (LSB/MSB), which is actually 3 bytes less than the full length of the file (or, it's the length of the file minus the 3-byte header). Also, the files always end with 3 0x00 bytes. We check that the header length is 3 bytes less than the file length, then check for the 3 0x00's at the end. */ int detect_amsb(void) { int len, c; if(verbose) fprintf(stderr, "entering detect_amsb()\n"); rewind(input_file); c = fgetc(input_file); if(c) return 0; c = fgetc(input_file); if(c == EOF) return 0; len = (fgetc(input_file) << 8) | c; if(verbose) fprintf(stderr, "detect_amsb() header len==%d (file size should be %d)\n", len, len + 3); fseek(input_file, 0, SEEK_END); c = ftell(input_file); if(verbose) fprintf(stderr, "detect_amsb() file size %d\n", c); if(len != (c - 3)) { if(verbose) fprintf(stderr, "detect_amsb() wrong file size!\n"); return 0; } if(verbose) fprintf(stderr, "detect_amsb() file size is correct, checking for 3 nulls\n"); fseek(input_file, -3, SEEK_END); if(fgetc(input_file)) return 0; if(fgetc(input_file)) return 0; if(fgetc(input_file)) return 0; if(verbose) fprintf(stderr, "detect_amsb() found 3 nulls, return 1\n"); return 1; } void foreign(const char *name, int srval) { if(input_file) fclose(input_file); if(script_mode) { exit(srval); } else { puts(name); exit(0); } } void detect_foreign(void) { int c, d; c = fgetc(input_file); d = fgetc(input_file); if(c == EOF || d == EOF) die("File is too short to be a BASIC program of any kind."); if(c == 0 && d == 0) { /* This is why we can't read from stdin. */ rewind(input_file); return; } if(c == 0xfb && d == 0xc2) foreign("Compiled Turbo BASIC XL", SRET_COMPILED_TURBO); if(c == 0xff && d == 0xff) foreign("XEX executable (not BASIC at all!)", SRET_NOT_BASIC); if(c == 0xfe && d == 0xfe) foreign("Mac/65 tokenized source (not BASIC at all!)", SRET_NOT_BASIC); if(c == 0xdd && d == 0x00) foreign("EXTENDed OSS BASIC XE", SRET_EXTENDED_BXE); if(c == 0x7f && d == 'E') { c = fgetc(input_file); d = fgetc(input_file); if(c == 'L' && d == 'F') foreign("ELF executable (not BASIC at all!)", SRET_NOT_BASIC); } if(c == 0 && detect_amsb()) { foreign("Atari Microsoft BASIC", SRET_AMSB); } if(isdigit(c) && (d == 0x20 || isdigit(d))) foreign("Text file, could be LISTed BASIC (or not)", SRET_NOT_BASIC); if(isprint(c) && isprint(d)) foreign("Text file (not BASIC at all!)", SRET_NOT_BASIC); foreign("Unknown file type (not BASIC at all!)", SRET_NOT_BASIC); } void check_variables(void) { int pos; if(vntp == vntd) return; /* Unlike Atari BASIC, Turbo variables can have _ in the names. So can BASIC XE, though it's not documented in the BASIC XE Reference Manual that I have. BXL can't have _ in variable names. */ for(pos = vnstart; pos < vvstart; pos++) { if((program[pos] & 0x7f) == '_') { remove_type(BT_ATARI | BT_BXL); } } /* Also, Turbo line labels (for PROC/EXEC and #/GO#) are variables with a type that's illegal in Atari/BXL/BXE. */ for(pos = vvstart; pos < codestart; pos += 8) { if((program[pos] & 0xc0) == 0xc0) { set_type(BT_TURBO); } } /* I was hoping to check for BXL/BXE string arrays here. However, looking at a SAVEd file, they look identical to regular string variables (variable type $80, rest of the VVTP entry all $00). When the program's actually in memory, BXL/BXE sets the variable type byte to $91 for DIMed string array var and $81 (same as Atari/Turbo) for a regular DIMed string var. Unfortunately in the SAVE file, it's always $80. */ /* Another thing that can't be detected: BXL/BXE's FAST mode changes the program in memory (line number targets become addresses), but programs don't get SAVEd this way: SAVE turns off FAST and restores the program to its original state before writing it to disk. Too bad. */ } /* BASIC/A+ support is *very* simple. It's similar to BASIC XL (no surprise there)... but unlike Turbo, BXL, and BXE, it's *not* token-compatible with original Atari BASIC. Rather than add their new tokens to the end of the lists, they're mixed in with the others. So A+ can't even LOAD or RUN Atari BASIC files. Appendix J of the BASIC/A+ manual tells you to LIST in BASIC, reboot, ENTER in A+, to "port" your BASIC programs to A+. I suppose if you upgraded from A+ to BASIC XL or XE, you'd have to do the same thing to use A+ programs in XL/XE. While this was probably a PITA for BASIC/A+ users back in the day, it makes it *really* easy to detect A+ here. The last line of every SAVEed program is the direct-mode command, and contains either a SAVE or CSAVE cmd token. Which is the same token in Atari, Turbo, BXL, and BXE... but *different* in A+. However... I've run into at least one BASIC program in the Holmes Archive that was missing its line 32768 (wish I could remember which). And it's possible for files to get truncated... so I'll check a few other command tokens, to deal with cases like this. */ void found_aplus(void) { foreign("OSS BASIC/A+", SRET_APLUS); } CALLBACK(check_aplus_cmd) { int has_args; unsigned char nexttok, nexttok2; nexttok = program[pos + 1]; nexttok2 = program[pos + 2]; has_args = !(nexttok == OP_EOS || nexttok == OP_EOL); if(verbose) fprintf(stderr, "check_aplus_cmd: line %d, pos $%04x, tok $%02x, nexttok $%02x, nexttok2 $%02x\n", lineno, pos, tok, nexttok, nexttok2); switch(tok) { case CMD_POP: /* A+ READ */ case CMD_DOS: /* A+ GET */ case CMD_DEG: /* A+ WHILE */ case CMD_CLR: /* A+ DIM */ case 0x46: /* A+ SOUND , Turbo LOOP, BXL/XE CP */ case 0x3c: /* A+ DIR , Turbo REPEAT, BXL/XE ELSE */ if(has_args) found_aplus(); break; case CMD_SAVE: /* A+ END */ case CMD_GRAPHICS: /* A+ STOP */ case CMD_DIM: /* A+ ENDWHILE */ /* case CMD_GET: */ /* A+ RETURN, but Turbo allows 0 args too! */ case 0x38: /* A+ DOS, Turbo DPOKE, BXL/XE WHILE */ if(!has_args) found_aplus(); break; case CMD_POSITION: /* A+ ? */ /* PARTIAL: does nothing if 1st arg is numeric. */ if(!has_args) /* POSITION can't have 0 args. */ found_aplus(); else if(is_string_rval(nexttok)) /* ? "STRING" or ? A$, e.g., can't be POSITION */ found_aplus(); else if(nexttok == OP_NUM_LE) /* numeric <= in BASIC, # in A+ */ found_aplus(); break; case CMD_RUN: /* A+ PRINT */ /* PARTIAL: only detects PRINT # or PRINT ... */ /* A+'s # token is BASIC's OP_NUM_LE! */ if(nexttok == OP_NUM_LE) found_aplus(); else if(is_numconst_op(nexttok) || is_numeric_var(nexttok)) found_aplus(); break; case CMD_XIO: /* A+ SAVE */ /* most programs, this is enough, because they'll end with 32768 SAVE "D:BLAH" */ if(is_string_rval(nexttok)) found_aplus(); break; case CMD_OPEN: /* A+ ELSE */ /* case CMD_CLOSE: */ /* A+ DEG */ /* can't check, Turbo allows no args */ case CMD_STATUS: /* A+ NEW */ case CMD_POINT: /* A+ LOAD */ case 0x42: /* A+ POSITION, Turbo BPUT, BXL RGET */ case 0x43: /* A+ DRAWTO, Turbo BGET, BXL BPUT */ if(nexttok != OP_HASH) /* # in BASIC, USING in A+ */ found_aplus(); break; /* case 0x48: */ /* A+ CSAVE, Turbo DIR, BXL/BXE PROTECT */ /* DIR without arg is OK, so we can't really check this. */ /* break; */ case CMD_DRAWTO: /* A+ PUT */ case CMD_SOUND: /* A+ RPUT */ case CMD_LPRINT: /* A+ RGET */ case CMD_CSAVE: /* A+ BPUT */ case CMD_CLOAD: /* A+ BGET */ case CMD_ON: /* A+ STATUS */ case CMD_NOTE: /* A+ OPEN */ case CMD_CONT: /* A+ CLOSE */ case CMD_RAD: /* A+ XIO */ if(nexttok == OP_NUM_LE) /* numeric <= in BASIC, # in A+ */ found_aplus(); break; default: break; } last_cmd = tok; } void check_aplus(void) { allow_hex_const = 1; on_cmd_token = check_aplus_cmd; walk_all_code(); } void check_atari_turbo_oss(void) { allow_hex_const = 1; on_cmd_token = handle_cmd; on_exp_token = handle_op; on_end_stmt = handle_end_stmt; walk_all_code(); } int main(int argc, char **argv) { set_self(*argv); parse_general_args(argc, argv, print_help); parse_args(argc, argv); detect_foreign(); readfile(); parse_header(); check_variables(); check_aplus(); check_atari_turbo_oss(); print_result(); /* always exits */ return 0; /* never happens, shuts up gcc's warning though */ }