#include #include #include #include #include #include #include "bas.h" #define BT_INVALID 0 #define BT_ATARI 1 #define BT_TURBO 2 #define BT_BXL 4 #define BT_BXE 8 #define BT_BXL_BXE (BT_BXL | BT_BXE) int bas_type = 0x0f; /* start out with all enabled */ #define SRET_ATARI 3 #define SRET_TURBO 4 #define SRET_BXL 5 #define SRET_BXE 6 #define SRET_TURBO_OR_BXL 7 #define SRET_TURBO_OR_BXE 8 #define SRET_TURBO_OR_BXL_OR_BXE 9 #define SRET_UKNOWN_DERIV 10 #define SRET_AMSB 11 #define SRET_EXTENDED_BXE 12 #define SRET_COMPILED_TURBO 13 #define SRET_APLUS 14 #define SRET_NOT_BASIC 64 int script_mode = 0; /* -s flag */ int script_ret; /* -s mode, exit with one of SRET_*, above, as status */ int keep_going = 0; /* -k flag */ int comma_count; /* count of regular commas (not string/array) in statement */ unsigned char last_cmd; unsigned char last_op_tok; unsigned short last_cmd_pos; void print_help(void) { printf("Usage: %s [-v] [-k] [-s] \n", self); } void parse_args(int argc, char **argv) { int opt; while( (opt = getopt(argc, argv, "vks")) != -1) { switch(opt) { case 'v': verbose = 1; break; case 'k': keep_going = verbose = 1; break; case 's': script_mode = 1; break; default: print_help(); exit(1); } } if(optind >= argc) die("No input file given (and stdin not supported)."); else open_input(argv[optind]); if(input_file == stdin) die("Reading from standard input not supported."); } /* don't need this. void add_type(int type) { bas_type |= type; } */ void print_result(void) { const char *name; if(verbose) fprintf(stderr, " final bas_type %02x\n", bas_type); if(bas_type == BT_INVALID) { name = "Unknown variant: Not Atari BASIC, Turbo, BXL, or BXE"; script_ret = SRET_UKNOWN_DERIV; } else if(bas_type & BT_ATARI) { name = "Atari BASIC"; script_ret = SRET_ATARI; } else if(bas_type & BT_TURBO) { if(bas_type & BT_BXL) { name = "Either Turbo BASIC XL or OSS BASIC XL"; script_ret = SRET_TURBO_OR_BXL; } else if(bas_type & BT_BXE) { name = "Either Turbo BASIC XL or OSS BASIC XE"; script_ret = SRET_TURBO_OR_BXE; } else { /* bas_type == BT_TURBO */ name = "Turbo BASIC XL"; script_ret = SRET_TURBO; } } else if(bas_type == BT_BXL || bas_type == (BT_BXL | BT_BXE)) { name = "OSS BASIC XL"; script_ret = SRET_BXL; } else if(bas_type == BT_BXE) { name = "OSS BASIC XE"; script_ret = SRET_BXE; } else { /* this one should never happen: */ name = "Either Turbo BASIC XL, OSS BASIC XL, or OSS BASIC XE"; script_ret = SRET_TURBO_OR_BXL_OR_BXE; } if(script_mode) { exit(script_ret); } else { puts(name); exit(0); } } void remove_type(int type) { bas_type &= ((~type) & 0x0f); if(keep_going) return; /* without -k, stop if it gets narrowed down to one of these 4. */ if(bas_type == BT_ATARI || bas_type == BT_TURBO || bas_type == BT_BXE || bas_type == BT_BXL) print_result(); } void set_type(int type) { bas_type = type; if(!keep_going) print_result(); } CALLBACK(handle_cmd) { int has_args = 0, has_var_arg = 0, vartype = -1; unsigned char nexttok; last_cmd = tok; last_cmd_pos = pos; comma_count = 0; if(verbose) fprintf(stderr, "handle_cmd: lineno %d, tok $%02x, bas_type was %02x\n", lineno, tok, bas_type); nexttok = program[pos + 1]; has_args = !(nexttok == OP_EOS || nexttok == OP_EOL); if(nexttok >= 0x80) { has_var_arg = 1; vartype = get_vartype(nexttok); } /* this switch is for tokens that are the same in Atari/Turbo/BXL/BXE, but with different semantics. non-Atari-BASIC tokens go in the switch below, not this one. */ switch(tok) { /* TB uses the same token for CLOSE as Atari and BXL/BXE, but it allows it to have no argument (meaning, close all IOCBs). SOUND is the same (no args = silence all POKEY channels). */ case CMD_CLOSE: case CMD_SOUND: if(!has_args) { set_type(BT_TURBO); } break; case CMD_INPUT: /* TB, BXL, BXE all support INPUT "Prompt",VAR with the same tokenized form. Atari BASIC doesn't allow string constants in INPUT args. */ if(has_args) { if(nexttok == OP_STRCONST) { int pos_after_string; remove_type(BT_ATARI); /* TB only: INPUT "Prompt";VAR is also supported (not in BXL/BXE) */ pos_after_string = pos + 3 + program[pos + 2]; if(verbose) fprintf(stderr, "===> INPUT with string prompt at line %d, " "pos %04x, pos_after_string %04x (token %02x)\n", lineno, pos, pos_after_string, program[pos_after_string]); if(program[pos_after_string] == OP_SEMICOLON) { set_type(BT_TURBO); } } } else { /* has_args is false, oh shit! */ fprintf(stderr, "*** INPUT without variable at line %d.\n*** Rev A BASIC bug, program will crash, better fix it!\n", lineno); set_type(BT_ATARI); } break; case CMD_GET: case CMD_PUT: /* TB uses the same tokens for GET and PUT as Atari/BXL/BXE, but it allows the argument to be a variable without a # in front of it. */ if(nexttok != OP_HASH) { set_type(BT_TURBO); } /* PARTIAL: we really should detect GET #1,A$. this is Turbo-only, but probably nobody ever uses it because it doesn't seem to *work*, at least not in TB 1.5. A$ always ends up empty with length 0. */ break; case CMD_RESTORE: case CMD_TRAP: /* TB allows RESTORE #LABEL and TRAP #LABEL */ if(nexttok == OP_HASH) { set_type(BT_TURBO); } break; default: break; } if(tok <= CMD_ERROR) return; /* legal in BASIC, ignore */ remove_type(BT_ATARI); if(tok >= 0x59) remove_type(BT_BXL); if(tok >= 0x65) { fprintf(stderr, "handle_cmd: invalid command %02x at line %d\n", tok, lineno); keep_going = 0; set_type(BT_INVALID); } /* we have tokens 0x3a to 0x68 in both TB and BXE, or 47 of them. Some tokens can't be determined, because they take the same argument (or lack of) in both Turbo and BXL/XE. These are: 0x3c: REPEAT or ELSE (no args either way) 0x46: LOOP or CP (no args either way) 0x49: LOCK or UNPROTECT (take the same args) 0x4B: RENAME in both Turbo and BXL/XE (same token, same args) 4 of them, this leaves 43 we can check. Covered so far: 41 (95%) Unchecked tokens: 0x5B: BRUN or CALL (both take a string, CALL allows "USING" though) This isn't really important, as CALL requires a PROCEDURE to exist, and we *do* catch the PROCEDURE token. 0x5F: PAINT (req 2 args) or NUM (optional 2 args). Again, not important, because it's highly unlikely any BXL/BXE program will contain NUM... because when it executes, it stops the program and goes back to the READY prompt (in auto-numbering mode). */ switch(tok) { case 0x39: /* MOVE or ENDWHILE */ case 0x3a: /* -MOVE or TRACEOFF */ case 0x3d: /* UNTIL or ENDIF */ case 0x56: /* DEL or FAST */ case 0x62: /* CIRCLE (3 or 4 num args) or NORMAL (no args) */ /* COMPLETE */ if(has_args) { remove_type(BT_BXL_BXE); } else { remove_type(BT_TURBO); } break; case 0x58: /* TRACE (optional + or -), EXTEND (BXE; no args) */ /* COMPLETE */ /* In BXL, this looks to be an extra END token, that behaves the same as the regular one, but can't be entered in the editor. Assume no BXL program contains this token. */ /* In BXE, EXTEND can't actually appear in a program (it's direct mode only). The only way to get EXTEND into a BXE program is to do a direct mode command like: EXTEND:SAVE "D:PROG" ...which of course puts it at line 32768. But this code will never see that, because we'd already detect EXTENDed BXE based on the first 2 bytes of the file. */ /* So, if we see this token, it *has* to be Turbo's TRACE, whether or not it has an argument. */ set_type(BT_TURBO); break; case 0x59: /* TEXT or PROCEDURE */ /* COMPLETE */ /* Turbo: TEXT (1st arg is number), BXL: invalid token, BXE: PROCEDURE (arg is string const (not var!)) */ if(nexttok == OP_STRCONST) { /* this token doesn't seem to be valid in BXL at all */ set_type(BT_BXE); } else { remove_type(BT_BXL_BXE); } break; case 0x3f: /* WEND or LOMEM */ case 0x40: /* ELSE or DEL */ case 0x41: /* ENDIF or RPUT */ case 0x45: /* DO or TAB */ case 0x47: /* EXIT or ERASE */ case 0x51: /* ENDPROC or PMMOVE */ /* COMPLETE */ if(has_args) { remove_type(BT_TURBO); } else { remove_type(BT_BXL_BXE); } break; case 0x48: /* DIR (optional arg) or PROTECT (req'd arg) */ /* PARTIAL: without args means TB, but with arg, it could be either */ if(!has_args) { remove_type(BT_BXL_BXE); } break; case 0x4a: /* UNLOCK (req'd arg) or DIR (optional arg) */ /* PARTIAL: without args means BXL/BXE, but with arg, it could be either */ if(!has_args) { remove_type(BT_TURBO); } break; case 0x3b: /* *F (optional + or -), TRACE (no arg) */ case 0x5e: /* *B (optional + or -) or EXIT (no arg) */ /* PARTIAL: doesn't catch *F or *B by itself with no +/- */ if(has_args) { remove_type(BT_BXL_BXE); } break; case 0x44: /* FILLTO or BGET (check for a # after the token) */ /* COMPLETE */ if(nexttok == OP_HASH) { remove_type(BT_TURBO); } else { remove_type(BT_BXL_BXE); } break; case 0x4e: /* TIME$= (1 string arg) or PMCLR (1 num arg) */ /* PARTIAL: but almost complete. nothing happens if it's TIME$= with a string function (probably rare) or PMCLR with a complex expression. */ if(nexttok == OP_STRCONST) { remove_type(BT_BXL_BXE); } else if(has_var_arg && vartype == TYPE_STRING) { remove_type(BT_BXL_BXE); } else if(nexttok == OP_NUMCONST) { remove_type(BT_TURBO); } else if(has_var_arg && vartype == TYPE_SCALAR) { remove_type(BT_TURBO); } break; case 0x50: /* EXEC (1 arg, *must* be variable) or PMGRAPHICS (1 num arg, may be const) */ /* PARTIAL: PMGRAPHICS VAR won't be detected. but this usage is rare. */ /* This check is actually redundant, because EXEC requires Turbo's label type (high bits in var name table both set to 1), which we already detected in check_variables(). */ if(!has_var_arg) { remove_type(BT_TURBO); } break; case 0x54: /* -- in TB, LVAR in BXL/BXE */ /* COMPLETE */ /* We can tell these apart because: 1. TB gives us a next-statement offset of 5 if -- is the first (or actually only) statement on a line. Normally, the minimum offset is 6, but there's no OP_EOL after this token for some reason. 2. If -- is the 2nd or or later statement on a line (after a colon) it *does* get a statement terminator, but it's 0x9b (ATASCII EOL, like a REM or DATA gets). Note that it's impossible to put more statements *after* the --, they just get ignored if you type them. This doesn't help us here, but it's interesting anyway. Also, the -- is what you type to enter it into the program, but it get LISTed as a line of 30 dashes. The explanation is a lot longer than the code... */ if(program[pos - 1] == 0x05 || nexttok == 0x9b) { set_type(BT_TURBO); } else { remove_type(BT_TURBO); } break; case 0x57: /* DUMP (1 optional string arg) or LOCAL (1 *numeric* variable arg) */ /* BXL/BXE's LOCAL only works on scalars, not arrays or strings. so if there's no arg, or one string arg... */ /* PARTIAL: almost complete, doesn't handle DUMP func$(arg), which I doubt anyone uses anyway. */ if(!has_args) { /* only Turbo allows no arg... */ remove_type(BT_BXL_BXE); } else if(nexttok == OP_STRCONST) { /* only Turbo allows a string constant arg... */ remove_type(BT_BXL_BXE); } else if(has_var_arg && vartype == TYPE_STRING) { /* only Turbo allows a string variable arg... */ remove_type(BT_BXL_BXE); } else if(has_var_arg && vartype == TYPE_SCALAR) { /* only BXL/BXL allows a scalar variable arg */ remove_type(BT_TURBO); } break; case 0x5a: /* TB: BLOAD; BXL: extension mechanism; BXE: invalid. */ /* This is the token used for the BXL EXTEND.COM added commands, from the Toolkit disk. It's followed by a byte ranging 0x10 to 0x15 that specifies which extended command, e.g. 0x5a 0x11 means EXIT, 0x5a 0x12 is PROCEDURE, 0x5a 0x13 is CALL. Although these look BXE's extra commands, they aren't the same tokens, and BXE will choke on them (RUN causes "Error- 33", LIST causes a lockup). */ if(nexttok >= 0x10 && nexttok <= 0x15) { /* worth mentioning to the user... */ fprintf(stderr, "Note: program requires EXTEND.COM from the Toolkit disk.\n"); set_type(BT_BXL); } else { /* it's BLOAD if followed by e.g. OP_STRCONST or a variable */ set_type(BT_TURBO); } break; case 0x5c: /* GO# (1 arg only) or SORTUP (optional 2nd arg of USING, but no comma) */ case 0x5d: /* # (1 arg only) or SORTDOWN (optional 2nd arg of USING, but no comma) */ /* COMPLETE but no longer needed (check_variables() already found the 11xxxxxx variables) */ /* Turbo BASIC labels have the high 2 bits set to 11, which is illegal in Atari/BXL/BXE. */ if(vartype == 3) { remove_type(BT_BXL_BXE); } else { remove_type(BT_TURBO); } break; case 0x60: /* CLS (optional IOCB with #) or HITCLR (no args) */ /* PARTIAL: without args, can't tell them apart. */ /* I doubt CLS #IOCB is actually used in many Turbo BASIC programs, because it's broken (at least in Turbo 1.5). It's supposed to only clear the screen of output that happened after the OPEN #IOCB, but it really clears the whole screen. */ if(nexttok == OP_HASH) { remove_type(BT_BXL_BXE); } default: break; } if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type); } CALLBACK(handle_op) { unsigned char nexttok = program[pos + 1]; unsigned char nexttok2 = program[pos + 2]; if(tok == OP_COMMA) comma_count++; if(verbose) fprintf(stderr, "handle_op: lineno %d, tok $%02x, comma_count %d, bas_type was %02x\n", lineno, tok, comma_count, bas_type); if(tok == 0x00) { /* Turbo allows 256 variables, tokenizes the first 128 normally ($80-$FF). The extra ones above $FF are tokenized as $00, varnum - $80. None of our other BASICs uses $00 as an operator token, so.. */ set_type(BT_TURBO); } /* attempt to detect BXL/BXE DIM for 2D string arrays. DIM A$(10,10) is illegal in Atari/Turbo. PARTIAL: this only works if the first dimension is either a constant or a scalar variable (not an array element or an expression). fortunately most programs use constants in DIM. */ if(tok == OP_DIM_STR_LPAR) { int str2d = 0; if(nexttok >= 0x80 && nexttok2 == OP_ARR_COMMA) { str2d = 1; } else if(nexttok == OP_NUMCONST || nexttok == OP_HEXCONST) { str2d = (program[pos + 8] == OP_ARR_COMMA); } if(str2d) { if(verbose) fprintf(stderr, "===> found 2d string array at line %d\n", lineno); remove_type(BT_ATARI | BT_TURBO); } } if(tok == OP_HEXCONST) remove_type(BT_ATARI); /* hex const (turbo *and* bxl/xe) */ if(tok <= OP_FUNC_STRIG) { if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type); return; /* legal in BASIC, ignore */ } remove_type(BT_ATARI); /* only Turbo has op tokens numbered 0x69 and up. */ if(tok >= 0x69) { set_type(BT_TURBO); } if(tok >= 0x6E) { fprintf(stderr, "handle_op: invalid operator %02x at line %d\n", tok, lineno); keep_going = 0; set_type(BT_INVALID); } /* There are 25 extra operators in Turbo, and 20 of them are shared with BXL/BXE. Of the 20, 4 of them are undecidable, and the rest are covered here, which means 80% coverage of the shared ops. Undecidables are: 0x56 & (logical AND) or % (XOR), both infix numeric ops; can't tell apart 0x57 ! (logical OR) in both Turbo and BXL/BXE, can't tell apart 0x64 RAND (func, 1 num arg) or TAB (func, 1 num arg), can't tell apart 0x65 TRUNC (func, 1 num arg) or PEN (func, 1 num arg), can't tell apart */ switch(tok) { case 0x55: /* DPEEK (function) TB, USING (infix, not a function) in BXL/BXE */ case 0x58: /* INSTR (function) or & (infix numeric) in BXE. */ case 0x5b: /* HEX$ (func, takes 1 num arg) or FIND( (pseudo-func, 3 args */ /* COMPLETE */ if(nexttok == OP_FUNC_LPAR) { remove_type(BT_BXL_BXE); } else { remove_type(BT_TURBO); } break; case 0x59: /* INKEY$ (0 arg pseudo-func) in TB, string array separator semicolon in BXL/BXE */ /* PARTIAL: ...but pretty good. we *can't* check nexttok == OP_GRP_RPAR, because VAL(INKEY$) or ASC(INKEY$) are legit Turbo code. This can fail to catch A$(X;Y) if X and Y are both complex expressions. */ if(nexttok == OP_EOS || nexttok == OP_EOL) { /* the semicolon can't be the last token on the line (needs at least a right-paren), but INKEY$ can. */ remove_type(BT_BXL_BXE); } else if(pos == last_cmd_pos + 1) { /* INKEY$ can be the first operator after the command, e.g if the command is IF. The semicolon cannot. */ remove_type(BT_BXL_BXE); } else if(last_op_tok == OP_STR_ASSIGN) { /* catches A$=INKEY$, for what that's worth. */ remove_type(BT_BXL_BXE); } else if(last_op_tok == OP_GRP_RPAR || last_op_tok == OP_NUMCONST || last_op_tok == OP_HEXCONST || last_op_tok == OP_STRCONST || last_op_tok >= 0x80) { /* INKEY$ can't directly follow a constant, a variable, or a right-paren. */ remove_type(BT_TURBO); } else if(nexttok == OP_NUMCONST || nexttok == OP_HEXCONST || nexttok >= 0x80 || nexttok == OP_GRP_LPAR) { /* INKEY$ may not be followed by a numeric constant or a variable of any kind, or a parenthesized expression. TODO: determine exactly what all it *can* be followed by, check for that. */ remove_type(BT_TURBO); } break; case 0x5a: /* EXOR (infix num op) or BUMP( (pseudo-function, no OP_FUNC_LPAR) */ case 0x5d: /* DIV (infix num op) or RANDOM( (pseudo-func, 1 or 2 num args) */ /* COMPLETE (I think, anyway) */ if(last_op_tok == OP_GRP_RPAR || last_op_tok == OP_NUMCONST || last_op_tok == OP_HEXCONST || last_op_tok >= 0x80) { /* if the last token was a variable or a numeric, or a right paren, this is infix (can't be a function, last token would have to have been a command or a math/etc operator). */ remove_type(BT_BXL_BXE); } else { remove_type(BT_TURBO); } break; case 0x5c: /* DEC (function, takes str) in TB, HEX$ (function, takes num) in BXL/BXE */ /* PARTIAL: won't catch HEX$(ASC("A")) (or any other nested function call) */ if(nexttok2 == OP_STRCONST) { remove_type(BT_BXL_BXE); } else if(nexttok2 == OP_NUMCONST) { remove_type(BT_TURBO); } else if(nexttok2 >= 0x80) { if(get_vartype(nexttok2) == TYPE_STRING) { remove_type(BT_BXL_BXE); } else { remove_type(BT_TURBO); } } break; case 0x5e: /* FRAC (num func, 1 arg) or DPEEK (num func, 1 arg) in BXL... however BXE has an optional 2nd arg. */ { /* PARTIAL: This detects the 2nd arg for simple cases where the 1st arg is a constant or a numeric variable, but not if the 1st arg is an expression or an array element. */ int has2 = 0; if(nexttok2 == OP_NUMCONST || nexttok2 == OP_HEXCONST) { if(program[pos + 9] == OP_ARR_COMMA) has2 = 1; } else if(nexttok2 >= 0x80 && program[pos + 3] == OP_ARR_COMMA) { has2 = (get_vartype(nexttok2) == TYPE_SCALAR); } if(has2) { set_type(BT_BXE); } } break; case 0x5f: /* TIME$ in TB, SYS (function) in BXL/BXE */ case 0x60: /* TIME in TB, VSTICK (function) in BXL/BXE */ case 0x61: /* MOD (infix op) in TB, HSTICK (function) in BXL/BXE */ case 0x62: /* EXEC (infix op, with ON) in TB, PMADR (function) in BXL/BXE */ /* COMPLETE */ if(nexttok == OP_FUNC_LPAR) { remove_type(BT_TURBO); } else { remove_type(BT_BXL_BXE); } break; case 0x63: /* RND (pseudo-func, no arg) or ERR (func, 1 num arg) */ /* COMPLETE */ if(nexttok != OP_FUNC_LPAR) { set_type(BT_TURBO); } case 0x66: /* %0 in TB, LEFT$( (pseudo-func, takes string) in BXL/BXE */ case 0x67: /* %1 in TB, RIGHT$( (pseudo-func, takes string) in BXL/BXE */ case 0x68: /* %2 in TB, MID$( (pseudo-func, takes string) in BXL/BXE */ /* PARTIAL: doesn't handle LEFT$/etc first arg being a string func. */ if(nexttok == OP_STRCONST || nexttok >= 0x80) { /* %0 %1 %2 can't be followed by a string constant *or* a variable */ remove_type(BT_TURBO); /* Can't do, due to LEFT$(HEX$("1234"), 1) (or STR$, etc) */ /* } else { remove_type(BT_BXL_BXE); */ } break; default: break; } last_op_tok = tok; if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type); } /* we can count commas, because both Turbo and BXE/BXL use the "array" comma to separate function arguments, not the "regular" comma. */ CALLBACK(handle_end_stmt) { if(verbose) fprintf(stderr, "handle_end_stmt: lineno %d, tok $%02x, last_cmd $%02x, comma_count %d, bas_type was %02x\n", lineno, tok, last_cmd, comma_count, bas_type); switch(last_cmd) { case 0x38: /* DPOKE (2 args) or WHILE (1 arg) */ if(comma_count) { remove_type(BT_BXL_BXE); } else { remove_type(BT_TURBO); } break; case 0x3e: /* WHILE (1 arg) or DPOKE (2 or 3 args) */ case 0x4c: /* DELETE (1 arg) or MOVE (3 or 4 args) */ case 0x4d: /* PAUSE (1 arg) or MISSILE (3 args) */ case 0x52: /* FCOLOR (1 arg) or PMWIDTH (2 args) */ case 0x53: /* *L (optional + or - only) or SET (req 2 num args) */ case 0x4f: /* PROC (1 arg) or PMCOLOR (3 args) */ if(comma_count) { /* 1 arg means no commas */ remove_type(BT_TURBO); } else { remove_type(BT_BXL_BXE); } break; case 0x42: /* BPUT or RGET */ /* PARTIAL: Turbo BGET always takes 3 args, BXL/BXE RGET takes 2 or more. We can at least rule out Turbo if there aren't exactly 3 args. */ if(comma_count != 2) { remove_type(BT_TURBO); } break; case 0x43: /* BGET or BPUT */ /* PARTIAL: Turbo BGET and BPUT always take 3 args. So does BXL BPUT. BXE BPUT takes 3 args and an optional 4th. */ if(comma_count != 2) { set_type(BT_BXE); } break; case 0x55: /* RENUM in both (TB req 3 args, BXL up to two) */ if(comma_count == 2) { remove_type(BT_BXL_BXE); } else { remove_type(BT_TURBO); } break; case 0x61: /* DSOUND (0 or 4 num args) or INVERSE (no args) */ /* PARTIAL: can't tell no-argument DSOUND from INVERSE. */ if(comma_count) { remove_type(BT_BXL_BXE); } case 0x63: /* %PUT (usually seen with optional #; 1 or 2 args) or BLOAD (1 string arg) */ if(comma_count) { /* multiple args */ remove_type(BT_BXL_BXE); } else if(program[last_cmd_pos + 1] == OP_STRCONST) { /* one arg, string const. XXX: check var type */ remove_type(BT_TURBO); } break; case 0x64: /* %GET (usually seen with optional #; 1 or 2 args) or BSAVE (3 args) */ if(comma_count == 2) { remove_type(BT_TURBO); } else { remove_type(BT_BXL_BXE); } break; default: break; } if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type); last_cmd = last_op_tok = 0; } /* return true if input_file is Atari MS BASIC. AMSB files begin with a 3-byte header: 0x00, then 2 byte length (LSB/MSB), which is actually 3 bytes less than the full length of the file (or, it's the length of the file minus the 3-byte header). Also, the files always end with 3 0x00 bytes. We check that the header length is 3 bytes less than the file length, then check for the 3 0x00's at the end. */ int detect_amsb(void) { int len, c; if(verbose) fprintf(stderr, "entering detect_amsb()\n"); rewind(input_file); c = fgetc(input_file); if(c) return 0; c = fgetc(input_file); if(c == EOF) return 0; len = (fgetc(input_file) << 8) | c; if(verbose) fprintf(stderr, "detect_amsb() header len==%d (file size should be %d)\n", len, len + 3); fseek(input_file, 0, SEEK_END); c = ftell(input_file); if(verbose) fprintf(stderr, "detect_amsb() file size %d\n", c); if(len != (c - 3)) { if(verbose) fprintf(stderr, "detect_amsb() wrong file size!\n"); return 0; } if(verbose) fprintf(stderr, "detect_amsb() file size is correct, checking for 3 nulls\n"); fseek(input_file, -3, SEEK_END); if(fgetc(input_file)) return 0; if(fgetc(input_file)) return 0; if(fgetc(input_file)) return 0; if(verbose) fprintf(stderr, "detect_amsb() found 3 nulls, return 1\n"); return 1; } void foreign(const char *name, int srval) { if(input_file) fclose(input_file); if(script_mode) { exit(srval); } else { puts(name); exit(0); } } void detect_foreign(void) { int c, d; c = fgetc(input_file); d = fgetc(input_file); if(c == EOF || d == EOF) die("File is too short to be a BASIC program of any kind."); if(c == 0 && d == 0) { /* This is why we can't read from stdin. */ rewind(input_file); return; } if(c == 0xfb && d == 0xc2) foreign("Compiled Turbo BASIC XL", SRET_COMPILED_TURBO); if(c == 0xff && d == 0xff) foreign("XEX executable (not BASIC at all!)", SRET_NOT_BASIC); if(c == 0xfe && d == 0xfe) foreign("Mac/65 tokenized source (not BASIC at all!)", SRET_NOT_BASIC); if(c == 0xdd && d == 0x00) foreign("EXTENDed OSS BASIC XE", SRET_EXTENDED_BXE); if(c == 0x7f && d == 'E') { c = fgetc(input_file); d = fgetc(input_file); if(c == 'L' && d == 'F') foreign("ELF executable (not BASIC at all!)", SRET_NOT_BASIC); } if(c == 0 && detect_amsb()) { foreign("Atari Microsoft BASIC", SRET_AMSB); } if(isdigit(c) && (d == 0x20 || isdigit(d))) foreign("Text file, could be LISTed BASIC (or not)", SRET_NOT_BASIC); if(isprint(c) && isprint(d)) foreign("Text file (not BASIC at all!)", SRET_NOT_BASIC); foreign("Unknown file type (not BASIC at all!)", SRET_NOT_BASIC); } void check_variables(void) { int pos; if(vntp == vntd) return; /* Unlike Atari BASIC, Turbo variables can have _ in the names. So can BASIC XE, though it's not documented in the BASIC XE Reference Manual that I have. BXL can't have _ in variable names. */ for(pos = vnstart; pos < vvstart; pos++) { if((program[pos] & 0x7f) == '_') { remove_type(BT_ATARI | BT_BXL); } } /* Also, Turbo line labels (for PROC/EXEC and #/GO#) are variables with a type that's illegal in Atari/BXL/BXE. */ for(pos = vvstart; pos < codestart; pos += 8) { if((program[pos] & 0xc0) == 0xc0) { set_type(BT_TURBO); } } /* I was hoping to check for BXL/BXE string arrays here. However, looking at a SAVEd file, they look identical to regular string variables (variable type $80, rest of the VVTP entry all $00). When the program's actually in memory, BXL/BXE sets the variable type byte to $91 for DIMed string array var and $81 (same as Atari/Turbo) for a regular DIMed string var. Unfortunately in the SAVE file, it's always $80. */ /* Another thing that can't be detected: BXL/BXE's FAST mode changes the program in memory (line number targets become addresses), but programs don't get SAVEd this way: SAVE turns off FAST and restores the program to its original state before writing it to disk. Too bad. */ } /* BASIC/A+ support is *very* simple. It's similar to BASIC XL (no surprise there)... but unlike Turbo, BXL, and BXE, it's *not* token-compatible with original Atari BASIC. Rather than add their new tokens to the end of the lists, they're mixed in with the others. So A+ can't even LOAD or RUN Atari BASIC files. I suppose the manual told you to LIST in BASIC, reboot, ENTER in A+, to "port" your BASIC programs to A+. While this was probably a PITA for BASIC/A+ users back in the day, it makes it *really* easy to detect A+ here. The last line of every SAVEed program is the direct-mode command, and contains either a SAVE or CSAVE cmd token. Which is the same token in Atari, Turbo, BXL, and BXE... but *different* in A+. */ CALLBACK(check_aplus_cmd) { unsigned char nexttok; int aplus_found = 0; nexttok = program[pos + 1]; if(tok == 0x1d) { /* SAVE in A+ (OP_STRCONST next). XIO in anything else (OP_HASH next). */ /* Note that A+ still uses the same tokens as BASIC for OP_STRCONST, OP_EOS, and OP_EOL. */ aplus_found = (nexttok == OP_STRCONST); } else if(tok == 0x48) { /* CSAVE in A+ (no arg). RND in anything else (OP_FUNC_LPAR next). */ aplus_found = (nexttok == OP_EOS || nexttok == OP_EOL); } if(aplus_found) { foreign("OSS BASIC/A+", SRET_APLUS); } } void check_aplus(void) { on_cmd_token = check_aplus_cmd; walk_code(32768, 32768); } void check_atari_turbo_oss(void) { allow_hex_const = 1; on_cmd_token = handle_cmd; on_exp_token = handle_op; on_end_stmt = handle_end_stmt; walk_all_code(); } int main(int argc, char **argv) { set_self(*argv); parse_general_args(argc, argv, print_help); parse_args(argc, argv); detect_foreign(); readfile(); parse_header(); check_variables(); check_aplus(); check_atari_turbo_oss(); print_result(); /* always exits */ return 0; /* never happens, shuts up gcc's warning though */ }