From 6f1133ef4cd268098d0695dbdf9d285748a4108c Mon Sep 17 00:00:00 2001 From: "B. Watson" Date: Mon, 8 Jul 2024 22:45:32 -0400 Subject: whichbas: add -s (script mode) option. --- bas.c | 8 +++- whichbas.c | 152 +++++++++++++++++++++++++++++++++++++++++++++-------------- whichbas.rst | 94 ++++++++++++++++++++++++++++++------ 3 files changed, 202 insertions(+), 52 deletions(-) diff --git a/bas.c b/bas.c index c729e19..7c7fa34 100644 --- a/bas.c +++ b/bas.c @@ -309,10 +309,14 @@ void walk_code(unsigned int startlineno, unsigned int endlineno) { end = nextpos; pos = linepos; - if(offset < 6) { + if(offset < 5) { + /* actually, real Atari BASIC's minimum offset is 6. however, if you use + the "--" (line of dashes, command token 0x54) in Turbo BASIC XL, you + get an offset of 5, because there's no end-of-line after it. + it seems better to accomodate Turbo here. */ CALL(on_bad_line_length); offset = program[linepos + 2]; /* on_bad_line_length fixed it (we hope) */ - if(offset < 6) + if(offset < 5) die("Fatal: Program is code-protected; unprotect it first."); } diff --git a/whichbas.c b/whichbas.c index 9700457..3459d06 100644 --- a/whichbas.c +++ b/whichbas.c @@ -17,24 +17,40 @@ int bas_type = 0x0f; /* start out with all enabled */ +#define SRET_ATARI 3 +#define SRET_TURBO 4 +#define SRET_BXL 5 +#define SRET_BXE 6 +#define SRET_TURBO_OR_BXL 7 +#define SRET_TURBO_OR_BXE 8 +#define SRET_TURBO_OR_BXL_OR_BXE 9 +#define SRET_UKNOWN_DERIV 10 +#define SRET_AMSB 11 +#define SRET_EXTENDED_BXE 12 +#define SRET_NOT_BASIC 32 + +int script_mode = 0; /* -s flag */ +int script_ret; /* -s mode, exit with one of SRET_*, above, as status */ + +int keep_going = 0; /* -k flag */ + int comma_count; /* count of regular commas (not string/array) in statement */ unsigned char last_cmd; unsigned char last_op_tok; unsigned short last_cmd_pos; -int keep_going = 0; /* -k flag */ - void print_help(void) { - printf("Usage: %s [-v] [-k] \n", self); + printf("Usage: %s [-v] [-k] [-s] \n", self); } void parse_args(int argc, char **argv) { int opt; - while( (opt = getopt(argc, argv, "vk")) != -1) { + while( (opt = getopt(argc, argv, "vks")) != -1) { switch(opt) { case 'v': verbose = 1; break; case 'k': keep_going = verbose = 1; break; + case 's': script_mode = 1; break; default: print_help(); exit(1); @@ -62,28 +78,39 @@ void print_result(void) { if(bas_type == BT_INVALID) { name = "Unknown variant: Not Atari BASIC, Turbo, BXL, or BXE"; + script_ret = SRET_UKNOWN_DERIV; } else if(bas_type & BT_ATARI) { name = "Atari BASIC"; + script_ret = SRET_ATARI; } else if(bas_type & BT_TURBO) { if(bas_type & BT_BXL) { name = "Either Turbo BASIC XL or OSS BASIC XL"; + script_ret = SRET_TURBO_OR_BXL; } else if(bas_type & BT_BXE) { name = "Either Turbo BASIC XL or OSS BASIC XE"; + script_ret = SRET_TURBO_OR_BXE; } else { /* bas_type == BT_TURBO */ name = "Turbo BASIC XL"; + script_ret = SRET_TURBO; } } else if(bas_type == BT_BXL || bas_type == (BT_BXL | BT_BXE)) { name = "OSS BASIC XL"; + script_ret = SRET_BXL; } else if(bas_type == BT_BXE) { name = "OSS BASIC XE"; + script_ret = SRET_BXE; } else { /* this one should never happen: */ name = "Either Turbo BASIC XL, OSS BASIC XL, or OSS BASIC XE"; + script_ret = SRET_TURBO_OR_BXL_OR_BXE; } - puts(name); - - exit(bas_type == BT_ATARI ? 0 : bas_type + 8); + if(script_mode) { + exit(script_ret); + } else { + puts(name); + exit(0); + } } void remove_type(int type) { @@ -137,7 +164,7 @@ CALLBACK(handle_cmd) { } /* partial: we really should detect GET #1,A$. this is Turbo-only, but probably nobody ever uses it because it doesn't seem to *work*, - at least not in TB 1.5. */ + at least not in TB 1.5. A$ always ends up empty with length 0. */ break; case CMD_RESTORE: case CMD_TRAP: @@ -171,13 +198,18 @@ CALLBACK(handle_cmd) { 0x4B: RENAME in both Turbo and BXL/XE (take the same args) 0x60: CLS or HITCLR (no args either way) This leaves 42 we can check. - Covered so far: 38 (90%) + Covered so far: 39 (93%) TODO: Unknown tokens: - 0x54: ??? in TB (find out what), LVAR in BXL/BXE. - 0x5A: BLOAD or... what? (Jindroush lists it as ?5A?) + 0x5A: BLOAD or... what? (Jindroush lists it as BXL_EXTEND for BXL and + ?5A? for BXE) TODO: 0x5B: BRUN or CALL (both take a string, CALL allows "USING" though) - 0x5F: PAINT (req 2 args) or NUM (optional 2 args, probly not appear in program) + This isn't really important, as CALL requires a PROCEDURE to + exit, and we *do* catch the PROCEDURE token. + 0x5F: PAINT (req 2 args) or NUM (optional 2 args). + Again, not important, because it's highly unlikely any BXL/BXE + program will contain NUM... because when it executes, it stops the + program and goes back to the READY prompt (in auto-numbering mode). */ switch(tok) { case 0x39: /* MOVE or ENDWHILE */ @@ -263,6 +295,17 @@ CALLBACK(handle_cmd) { remove_type(BT_TURBO); } break; + case 0x54: /* -- in TB, LVAR in BXL/BXE */ + /* we can tell these apart because TB gives us a next-statement offset of 5 + when we use this (normally, the minimum offset is 6, but there's no OP_EOS + after this token for some reason). */ + if(program[pos - 1] == 0x05) { + bas_type = BT_TURBO; + print_result(); + } else { + remove_type(BT_TURBO); + } + break; case 0x57: /* DUMP (1 optional string arg) or LOCAL (1 *numeric* variable arg) */ /* BXL/BXE's LOCAL only works on scalars, not arrays or strings. so if there's no arg, or one string arg... */ @@ -539,62 +582,99 @@ CALLBACK(handle_end_stmt) { last_cmd = last_op_tok = 0; } -void foreign(const char *name) { +/* return true if input_file is Atari MS BASIC. + AMSB files begin with a 3-byte header: 0x00, then 2 byte length + (LSB/MSB), which is actually 3 bytes less than the full length of + the file (or, it's the length of the file minus the 3-byte header). + Also, the files always end with 3 0x00 bytes. + We check that the header length is 3 bytes less than the file length, + then check for the 3 0x00's at the end. + */ +int detect_amsb(void) { + int len, c; + + if(verbose) fprintf(stderr, "entering detect_amsb()\n"); + + rewind(input_file); + c = fgetc(input_file); + if(c) return 0; + c = fgetc(input_file); + if(c == EOF) return 0; + len = (fgetc(input_file) << 8) | c; + + if(verbose) fprintf(stderr, "detect_amsb() header len==%d (file size should be %d)\n", len, len + 3); + + fseek(input_file, 0, SEEK_END); + c = ftell(input_file); + if(verbose) fprintf(stderr, "detect_amsb() file size %d\n", c); + if(len != (c - 3)) { + if(verbose) fprintf(stderr, "detect_amsb() wrong file size!\n"); + return 0; + } + + if(verbose) fprintf(stderr, "detect_amsb() file size is correct, checking for 3 nulls\n"); + fseek(input_file, -3, SEEK_END); + if(fgetc(input_file)) return 0; + if(fgetc(input_file)) return 0; + if(fgetc(input_file)) return 0; + + if(verbose) fprintf(stderr, "detect_amsb() found 3 nulls, return 1\n"); + + return 1; +} + +void foreign(const char *name, int srval) { fclose(input_file); - puts(name); - exit(0); /* TODO: pick a better number */ + if(script_mode) { + exit(srval); + } else { + puts(name); + exit(0); + } } void detect_foreign(void) { - int i, nuls, c, d; + int c, d; c = fgetc(input_file); d = fgetc(input_file); + if(c == EOF || d == EOF) + die("File is too short to be a BASIC program of any kind."); + if(c == 0 && d == 0) { /* This is why we can't read from stdin. */ rewind(input_file); return; } - if(c == EOF || d == EOF) - die("File is too short to be a BASIC program of any kind."); - if(c == 0xff && d == 0xff) - foreign("XEX executable (not BASIC at all!)"); + foreign("XEX executable (not BASIC at all!)", SRET_NOT_BASIC); if(c == 0xfe && d == 0xfe) - foreign("Mac/65 tokenized source (not BASIC at all!)"); + foreign("Mac/65 tokenized source (not BASIC at all!)", SRET_NOT_BASIC); if(c == 0xdd && d == 0x00) - foreign("EXTENDed OSS BASIC XE"); + foreign("EXTENDed OSS BASIC XE", SRET_EXTENDED_BXE); if(c == 0x7f && d == 'E') { c = fgetc(input_file); d = fgetc(input_file); if(c == 'L' && d == 'F') - foreign("ELF executable (huh?)"); + foreign("ELF executable (not BASIC at all!)", SRET_NOT_BASIC); } - if(!(c == 0 && d == 0)) { - if(fseek(input_file, -3, SEEK_END) == 0) { - nuls = 0; - for(i = 0; i < 3; i++) { - if(fgetc(input_file) == 0) nuls++; - } - if(nuls == 3) { - foreign("Microsoft BASIC"); - } - } + if(c == 0 && detect_amsb()) { + foreign("Atari Microsoft BASIC", SRET_AMSB); } if(isdigit(c) && (d == 0x20 || isdigit(d))) - foreign("Text file, could be LISTed BASIC (or not)"); + foreign("Text file, could be LISTed BASIC (or not)", SRET_NOT_BASIC); if(isprint(c) && isprint(d)) - foreign("Text file (not BASIC at all!)"); + foreign("Text file (not BASIC at all!)", SRET_NOT_BASIC); - foreign("Unknown file type (not BASIC at all!)"); + foreign("Unknown file type (not BASIC at all!)", SRET_NOT_BASIC); } void check_variables(void) { diff --git a/whichbas.rst b/whichbas.rst index 472b216..350be21 100644 --- a/whichbas.rst +++ b/whichbas.rst @@ -27,6 +27,14 @@ OPTIONS Detection Options ----------------- +**-s** + Script (or silent) mode. Instead of printing a human-readable + name like "Turbo BASIC XL" or "OSS BASIC XE" to stdout, **whichbas** + will print nothing on standard output, but will exit with a status + indicating what it detected. The caller can check the return status + (e.g. the **$?** variable in Bourne/POSIX shells, or **ERRORLEVEL** + in MS-DOS or Windows). See **EXIT STATUS**, below. + **-k** Keep going. The default is to stop looking at the program if the BASIC type gets narrowed down to either Turbo BASIC or BASIC XE. @@ -37,7 +45,8 @@ Detection Options NOTES ===== -Atari BASIC programs are detected 100% reliably. +Atari BASIC programs are detected *almost* 100% reliably. See **BUGS**, below, +for the gory details. Turbo BASIC, BASIC XL, and BASIC XE are all supersets of Atari BASIC. If you wrote a program using one of them, but didn't use any of the @@ -55,8 +64,8 @@ adds to those found in BASIC XL. Detection of Turbo vs. BXL/BXE isn't 100% reliable, and probably never will be. There's too much overlap between the sets of extra tokens added by each. Programs that don't use very many of the extra -functions provided by Turbo/BXL/BXE may show up as "Not Atari BASIC; -probably either Turbo or BXL/BXE". +functions provided by Turbo/BXL/BXE may show up as "Either Turbo BASIC XL +or OSS BASIC XE". Atari Microsoft BASIC is detected by checking that the first two bytes of the file are not zero, and that the last 3 are zero. This @@ -70,22 +79,79 @@ ELF binaries, etc) as a convenience, but I wouldn't rely on LIMITATIONS =========== -Currently, **whichbas** doesn't look at the variable name or type -tables. One problem caused by this: If a program uses only Atari BASIC -tokens, but uses variable(s) with _ in the name, it will be identified -as Atari BASIC... even though _ in variable names is illegal in Atari -BASIC and pretty much guarantees the program is Turbo/BXL/BXE. - -Looking at the variable types could also improve detection, since -Turbo and BXL/BXE support extended variable types (procedure labels -for Turbo, string arrays for BXL/BXE). - **whichbas** knows nothing about other BASICs such as Frost BASIC, BASIC/A+, Altirra BASIC... +BUGS +==== +It's possible to get a BASIC XL/XE program to misdetect as Atari BASIC +by writing a program that does these things: + + - Dimensions a string array with a number of elements that isn't just a + numeric constant or numeric variable (e.g. *DIM A$(2+2,10)* or + *DIM A$(I*2,10)*). The code that detects a 2D string *DIM* command + can only handle simple cases like *DIM A$(10,10)* or *DIM A$(I,10)*. + It doesn't actually matter if the 2nd argument is a fancy expression, + though. + + - Does *not* actually *use* the string array variable by assigning to + it or reading a value from it. String array accesses in BASIC XL/XE + are reliably detected because they require a semicolon after the + first number, even if there isn't a 2nd number. Example: *? A$(2;)* + prints the 2nd element of the *A$* string array. If it were written + as *? A$(2)*, but *A$* is a string array (not a regular string), + BASIC XL/XE would give an *Error 40* (string type mismatch) at + runtime. + +The good news is, such a program will still work fine in Atari BASIC. +Believe it or not, it's true. + EXIT STATUS =========== -0 for success, 1 for failure. +Without the **-s** option, exit status is 0 for success, non-zero for +failure. + +With the **-s** option, the exit status is: + +**0** + Not used with **-s**. + +**1** or **2** + Error reading file. Error message will be printed on standard error. + +**3** + Atari BASIC detected. + +**4** + Turbo BASIC detected. + +**5** + OSS BASIC XL detected. + +**6** + Non-EXTENDed OSS BASIC XE detected. + +**7** + Turbo BASIC or BASIC XL (undecided which). + +**8** + Turbo BASIC or non-EXTENDed BASIC XE (undecided which). + +**9** + Turbo BASIC, BASIC XL, or non-EXTENDed BASIC XE (undecided which). + +**10** + Unknown Atari BASIC derivative (not Atari BASIC, but not + Turbo/BXL/BXE either). + +**11** + Atari Microsoft BASIC detected. + +**12** + EXTENDed OSS BASIC XE detected. + +**32** + None of the above; not BASIC. .. include:: manftr.rst -- cgit v1.2.3