#include #include #include #include #include #include #include "bas.h" #define BT_INVALID 0 #define BT_ATARI 1 #define BT_TURBO 2 #define BT_BXL 4 #define BT_BXE 8 #define BT_BXL_BXE (BT_BXL | BT_BXE) int bas_type = 0x0f; /* start out with all enabled */ int comma_count; /* count of regular commas (not string/array) in statement */ unsigned char last_cmd; unsigned char last_op_tok; unsigned short last_cmd_pos; int keep_going = 0; /* -k flag */ void print_help(void) { printf("Usage: %s [-v] [-k] \n", self); } void parse_args(int argc, char **argv) { int opt; while( (opt = getopt(argc, argv, "vk")) != -1) { switch(opt) { case 'v': verbose = 1; break; case 'k': keep_going = verbose = 1; break; default: print_help(); exit(1); } } if(optind >= argc) die("No input file given (and stdin not supported)."); else open_input(argv[optind]); if(input_file == stdin) die("Reading from standard input not supported."); } /* don't need this. void add_type(int type) { bas_type |= type; } */ void print_result(void) { const char *name; if(bas_type == BT_INVALID) { name = "Unknown variant: Not Atari BASIC, Turbo, BXL, or BXE"; } else if(bas_type & BT_ATARI) { name = "Atari BASIC"; } else if(bas_type == BT_BXL || bas_type == (BT_BXL | BT_BXE)) { name = "OSS BASIC XL"; } else if(bas_type == BT_BXE) { name = "OSS BASIC XE"; } else if(bas_type == BT_TURBO) { name = "Turbo BASIC XL"; } else { name = "Not Atari BASIC; probably either Turbo or BXL/BXE"; } if(verbose) fprintf(stderr, " final bas_type %02x\n", bas_type); fputs(name, stdout); putchar('\n'); exit(bas_type == BT_ATARI ? 0 : bas_type + 8); } void remove_type(int type) { bas_type &= ((~type) & 0x0f); if(keep_going) return; /* without -k, stop if it gets narrowed down to one of these two. */ if(bas_type == BT_TURBO || bas_type == BT_BXE) print_result(); } CALLBACK(handle_cmd) { int has_args = 0; unsigned char nexttok; last_cmd = tok; last_cmd_pos = pos; comma_count = 0; if(verbose) fprintf(stderr, "handle_cmd: lineno %d, tok $%02x, bas_type was %02x\n", lineno, tok, bas_type); if(tok <= CMD_ERROR) return; /* legal in BASIC, ignore */ remove_type(BT_ATARI); if(tok >= 0x59) remove_type(BT_BXL); if(tok >= 0x65) { fprintf(stderr, "handle_cmd: invalid command %02x at line %d\n", tok, lineno); bas_type = BT_INVALID; print_result(); } nexttok = program[pos + 1]; has_args = !(nexttok == OP_EOS || nexttok == OP_EOL); /* we have tokens 0x3a to 0x68 in both TB and BXE, or 47 of them. Some tokens can't be determined, because they take the same argument (or lack of) in both Turbo and BXL/XE. These are: 0x3c: REPEAT or ELSE (no args either way) 0x42: Maybe: BPUT or RGET (take the same args... but not quite!) 0x43: Maybe: BGET or BPUT (take the same args... but not quite!) 0x46: LOOP or CP (no args either way) 0x49: LOCK or UNPROTECT (take the same args) 0x4B: RENAME in both Turbo and BXL/XE (take the same args) 0x60: CLS or HITCLR (no args either way) This leaves 40 we can check. Covered so far: 34 (85%) TODO: Unknown tokens: 0x54: ??? in TB (find out what), LVAR in BXL/BXE. 0x5A: BLOAD or... what? (Jindroush lists it as ?5A?) TODO: 0x5B: BRUN or CALL (both take a string, CALL allows "USING" though) 0x5C: GO# (1 arg only) or SORTUP (optional 2nd arg of USING, but no comma) 0x5D: # (1 arg only) or SORTDOWN (optional 2nd arg of USING, but no comma) 0x5F: PAINT (req 2 args) or NUM (optional 2 args, probly not appear in program) */ switch(tok) { case 0x39: /* MOVE or ENDWHILE */ case 0x3a: /* -MOVE or TRACEOFF */ case 0x3d: /* UNTIL or ENDIF */ case 0x56: /* DEL or FAST */ case 0x61: /* DSOUND (4 num args) or INVERSE (no args) */ case 0x62: /* CIRCLE (3 num args) or NORMAL (no args) */ if(has_args) { remove_type(BT_BXL_BXE); } else { remove_type(BT_TURBO); } break; case 0x58: /* TRACE (optional + or -), EXTEND (BXE; no args) */ /* EXTEND can't show up in a program except maybe line 32768, e.g. EXTEND:SAVE "D:BLAH". */ remove_type(BT_BXL); if(lineno < 32768) { remove_type(BT_BXE); } break; case 0x59: /* TEXT (1st arg is number), PROCEDURE (arg is string const (not var!)) */ if(nexttok == OP_STRCONST) { remove_type(BT_TURBO); } else { remove_type(BT_BXL_BXE); } break; case 0x3f: /* WEND or LOMEM */ case 0x40: /* ELSE or DEL */ case 0x41: /* ENDIF or RPUT */ case 0x45: /* DO or TAB */ case 0x47: /* EXIT or ERASE */ case 0x51: /* ENDPROC or PMMOVE */ if(has_args) { remove_type(BT_TURBO); } else { remove_type(BT_BXL_BXE); } break; case 0x48: /* DIR (optional arg) or PROTECT (req'd arg) */ /* not conclusive: without args means TB, but with arg, it could be either */ if(!has_args) { remove_type(BT_BXL_BXE); } break; case 0x4a: /* UNLOCK (req'd arg) or DIR (optional arg) */ /* not conclusive: without args means TB, but with arg, it could be either */ if(!has_args) { remove_type(BT_TURBO); } break; case 0x3b: /* *F (optional + or -), TRACE (no arg) */ case 0x5e: /* *B (optional + or -) or EXIT (no arg) */ if(has_args) { remove_type(BT_BXL_BXE); } break; case 0x44: /* FILLTO or BGET (check for a # after the token) */ if(nexttok == OP_HASH) { remove_type(BT_TURBO); } else { remove_type(BT_BXL_BXE); } break; case 0x4e: /* TIME$= (1 string arg) or PMCLR (1 num arg) */ /* XXX: this doesn't do anything if the arg is a variable; we could examine the type, but we don't yet */ if(nexttok == OP_STRCONST) { remove_type(BT_BXL_BXE); } else if(nexttok == OP_NUMCONST) { remove_type(BT_TURBO); } break; case 0x50: /* EXEC (1 arg, *must* be variable) or PMGRAPHICS (1 num arg, may be const) */ if(nexttok < 0x80) { remove_type(BT_TURBO); } break; case 0x57: /* DUMP (1 optional string arg) or LOCAL (1 variable arg) */ if(!has_args || (nexttok == OP_STRCONST)) { /* if there's no arg, or one string constant arg... */ /* XXX: DUMP A$ not detected */ remove_type(BT_BXL_BXE); } default: break; } if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type); } CALLBACK(handle_op) { unsigned char nexttok = program[pos + 1]; unsigned char nexttok2 = program[pos + 2]; if(tok == OP_COMMA) comma_count++; if(verbose) fprintf(stderr, "handle_op: lineno %d, tok $%02x, comma_count %d, bas_type was %02x\n", lineno, tok, comma_count, bas_type); if(tok == 0x00) { /* Turbo allows 256 variables, tokenizes the first 128 normally ($80-$FF). The extra ones above $FF are tokenized as $00, varnum - $80. None of our other BASICs uses $00 as an operator token, so.. */ bas_type = BT_TURBO; print_result(); } if(tok == OP_HEXCONST) remove_type(BT_ATARI); /* hex const (turbo *and* bxl/xe) */ if(tok <= OP_FUNC_STRIG) { if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type); return; /* legal in BASIC, ignore */ } remove_type(BT_ATARI); if(tok >= 0x69) { remove_type(BT_BXL_BXE); } if(tok >= 0x6E) { fprintf(stderr, "handle_op: invalid operator %02x at line %d\n", tok, lineno); bas_type = BT_INVALID; print_result(); } /* There are 25 extra operators in Turbo, and 20 of them are shared with BXL/BXE. Of the 20, 6 of them are undecidable, and the rest are covered here, which means 70% coverage of the shared ops. Undecidables are: 0x56 & (logical AND) or % (XOR), both infix numeric ops; can't tell apart 0x57 ! (logical OR) in both Turbo and BXL/BXE, can't tell apart 0x5e FRAC (num func, 1 arg) or DPEEK (num func, 1 arg (BXE may use 2), can't tell apart... TODO: ...*unless* there are 2 args 0x63 RND (func, 1 num arg) or ERR (func, 1 num arg), can't tell apart 0x64 RAND (func, 1 num arg) or TAB (func, 1 num arg), can't tell apart 0x65 TRUNC (func, 1 num arg) or PEN (func, 1 num arg), can't tell apart */ switch(tok) { case 0x55: /* DPEEK (function) TB, USING (infix, not a function) in BXL/BXE */ case 0x58: /* INSTR (function) or & (infix numeric) in BXE. Note that BXL has this as another END token, which works like the regular END, but cannot be entered in the editor so unlikely to show up in a program (but we handle it here anyway) */ case 0x5b: /* HEX$ (func, takes 1 num arg) or FIND( (pseudo-func, 3 args */ if(nexttok == OP_FUNC_LPAR) { remove_type(BT_BXL_BXE); } else if(nexttok == OP_EOS || nexttok == OP_EOL) { /* weird BXL END token */ remove_type(BT_TURBO | BT_BXE); } else { remove_type(BT_TURBO); } break; case 0x59: /* INKEY$ (0 arg pseudo-func) in TB, string array separator semicolon in BXL/BXE */ if(nexttok == OP_NUMCONST || nexttok >= 0x80) { /* INKEY$ may not be followed by a numeric constant or a variable of any kind */ remove_type(BT_TURBO); } break; case 0x5a: /* EXOR (infix num op) or BUMP( (pseudo-function, no OP_FUNC_LPAR) */ case 0x5d: /* DIV (infix num op) or RANDOM( (pseudo-func, 1 or 2 num args) */ if(last_op_tok == OP_NUMCONST || last_op_tok == OP_HEXCONST || last_op_tok >= 0x80) { /* if the last token was a variable or a numeric, this is infix (can't be a function, last token would have to have been a command or a regular operator). */ remove_type(BT_BXL_BXE); } else { remove_type(BT_TURBO); } break; case 0x5c: /* DEC (function, takes str) in TB, HEX$ (function, takes num) in BXL/BXE */ if(nexttok2 == OP_STRCONST) { remove_type(BT_BXL_BXE); } else if(nexttok2 > 0x80 && (get_vartype(nexttok2) == TYPE_STRING)) { /* TODO: see if this test is actually valid! */ remove_type(BT_BXL_BXE); } break; case 0x5f: /* TIME$ in TB, SYS (function) in BXL/BXE */ case 0x60: /* TIME in TB, VSTICK (function) in BXL/BXE */ case 0x61: /* MOD (infix op) in TB, HSTICK (function) in BXL/BXE */ case 0x62: /* EXEC (infix op, with ON) in TB, PMADR (function) in BXL/BXE */ if(nexttok == OP_FUNC_LPAR) { remove_type(BT_TURBO); } else { remove_type(BT_BXL_BXE); } break; case 0x66: /* %0 in TB, LEFT$ (pseudo-func, takes string) in BXL/BXE */ case 0x67: /* %1 in TB, RIGHT$ (pseudo-func, takes string) in BXL/BXE */ case 0x68: /* %2 in TB, MID$ (pseudo-func, takes string) in BXL/BXE */ if(nexttok == OP_STRCONST || nexttok >= 0x80) { /* %0 %1 %2 can't be followed by a string constant *or* a variable */ remove_type(BT_TURBO); } else { /* whereas LEFT$ RIGHT$ MID$ can't be followed by anything else */ remove_type(BT_BXL_BXE); } break; default: break; } last_op_tok = tok; if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type); } /* we can count commas, because both Turbo and BXE/BXL use the "array" comma to separate function arguments, not the "regular" comma. */ CALLBACK(handle_end_stmt) { if(verbose) fprintf(stderr, "handle_end_stmt: lineno %d, tok $%02x, last_cmd $%02x, comma_count %d, bas_type was %02x\n", lineno, tok, last_cmd, comma_count, bas_type); switch(last_cmd) { case 0x38: /* DPOKE (2 args) or WHILE (1 arg) */ if(comma_count) { remove_type(BT_BXL_BXE); } else { remove_type(BT_TURBO); } break; case 0x3e: /* WHILE (1 arg) or DPOKE (2 or 3 args) */ case 0x4c: /* DELETE (1 arg) or MOVE (3 or 4 args) */ case 0x4d: /* PAUSE (1 arg) or MISSILE (3 args) */ case 0x52: /* FCOLOR (1 arg) or PMWIDTH (2 args) */ case 0x53: /* *L (optional + or - only) or SET (req 2 num args) */ case 0x4f: /* PROC (1 arg) or PMCOLOR (3 args) */ if(comma_count) { remove_type(BT_TURBO); } else { remove_type(BT_BXL_BXE); } break; case 0x55: /* RENUM in both (TB req 3 args, BXL up to two) */ if(comma_count == 2) { remove_type(BT_BXL_BXE); } else { remove_type(BT_TURBO); } break; case 0x63: /* %PUT (usually seen with optional #; 1 or 2 args) or BLOAD (1 string arg) */ if(comma_count) { /* multiple args */ remove_type(BT_BXL_BXE); } else if(program[last_cmd_pos + 1] == OP_STRCONST) { /* one arg, string const. XXX: check var type */ remove_type(BT_TURBO); } break; case 0x64: /* %GET (usually seen with optional #; 1 or 2 args) or BSAVE (3 args) */ if(comma_count == 2) { remove_type(BT_TURBO); } else { remove_type(BT_BXL_BXE); } break; default: break; } if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type); last_cmd = last_op_tok = 0; } void foreign(const char *name) { fclose(input_file); puts(name); exit(0); /* TODO: pick a better number */ } void detect_foreign(void) { int i, nuls, c, d; c = fgetc(input_file); d = fgetc(input_file); if(c == 0 && d == 0) { /* This is why we can't read from stdin. */ rewind(input_file); return; } if(c == EOF || d == EOF) die("File is too short to be a BASIC program of any kind."); if(c == 0xff && d == 0xff) foreign("XEX executable (not BASIC at all!)"); if(c == 0xfe && d == 0xfe) foreign("Mac/65 tokenized source (not BASIC at all!)"); if(c == 0xdd && d == 0x00) foreign("EXTENDed OSS BASIC XE"); if(c == 0x7f && d == 'E') { c = fgetc(input_file); d = fgetc(input_file); if(c == 'L' && d == 'F') foreign("ELF executable (huh?)"); } if(!(c == 0 && d == 0)) { if(fseek(input_file, -3, SEEK_END) == 0) { nuls = 0; for(i = 0; i < 3; i++) { if(fgetc(input_file) == 0) nuls++; } if(nuls == 3) { foreign("Microsoft BASIC"); } } } if(isdigit(c) && (d == 0x20 || isdigit(d))) foreign("Text file, could be LISTed BASIC (or not)"); if(isprint(c) && isprint(d)) foreign("Text file (not BASIC at all!)"); foreign("Unknown file type (not BASIC at all!)"); } int main(int argc, char **argv) { set_self(*argv); parse_general_args(argc, argv, print_help); parse_args(argc, argv); detect_foreign(); readfile(); parse_header(); allow_hex_const = 1; on_cmd_token = handle_cmd; on_exp_token = handle_op; on_end_stmt = handle_end_stmt; walk_all_code(); print_result(); /* always exits */ return 0; /* never happens, shuts up gcc's warning though */ }