diff options
| author | B. Watson <urchlay@slackware.uk> | 2024-07-08 22:45:32 -0400 | 
|---|---|---|
| committer | B. Watson <urchlay@slackware.uk> | 2024-07-08 22:45:32 -0400 | 
| commit | 6f1133ef4cd268098d0695dbdf9d285748a4108c (patch) | |
| tree | 709d4e48a6a600bc917c21058da683c85ed88e45 | |
| parent | dbb9ac8e29103dc4dce5c9fe830ddb59e5b4e785 (diff) | |
| download | bw-atari8-tools-6f1133ef4cd268098d0695dbdf9d285748a4108c.tar.gz | |
whichbas: add -s (script mode) option.
| -rw-r--r-- | bas.c | 8 | ||||
| -rw-r--r-- | whichbas.c | 152 | ||||
| -rw-r--r-- | whichbas.rst | 94 | 
3 files changed, 202 insertions, 52 deletions
| @@ -309,10 +309,14 @@ void walk_code(unsigned int startlineno, unsigned int endlineno) {  		end = nextpos;  		pos = linepos; -		if(offset < 6) { +		if(offset < 5) { +			/* actually, real Atari BASIC's minimum offset is 6. however, if you use +			   the "--" (line of dashes, command token 0x54) in Turbo BASIC XL, you +			   get an offset of 5, because there's no end-of-line after it. +			   it seems better to accomodate Turbo here. */  			CALL(on_bad_line_length);  			offset = program[linepos + 2]; /* on_bad_line_length fixed it (we hope) */ -			if(offset < 6) +			if(offset < 5)  				die("Fatal: Program is code-protected; unprotect it first.");  		} @@ -17,24 +17,40 @@  int bas_type = 0x0f; /* start out with all enabled */ +#define SRET_ATARI                 3 +#define SRET_TURBO                 4 +#define SRET_BXL                   5 +#define SRET_BXE                   6 +#define SRET_TURBO_OR_BXL          7 +#define SRET_TURBO_OR_BXE          8 +#define SRET_TURBO_OR_BXL_OR_BXE   9 +#define SRET_UKNOWN_DERIV         10 +#define SRET_AMSB                 11 +#define SRET_EXTENDED_BXE         12 +#define SRET_NOT_BASIC            32 + +int script_mode = 0; /* -s flag */ +int script_ret; /* -s mode, exit with one of SRET_*, above, as status */ + +int keep_going = 0; /* -k flag */ +  int comma_count; /* count of regular commas (not string/array) in statement */  unsigned char last_cmd;  unsigned char last_op_tok;  unsigned short last_cmd_pos; -int keep_going = 0; /* -k flag */ -  void print_help(void) { -	printf("Usage: %s [-v] [-k] <inputfile>\n", self); +	printf("Usage: %s [-v] [-k] [-s] <inputfile>\n", self);  }  void parse_args(int argc, char **argv) {  	int opt; -	while( (opt = getopt(argc, argv, "vk")) != -1) { +	while( (opt = getopt(argc, argv, "vks")) != -1) {  		switch(opt) {  			case 'v': verbose = 1; break;  			case 'k': keep_going = verbose = 1; break; +			case 's': script_mode = 1; break;  			default:  				print_help();  				exit(1); @@ -62,28 +78,39 @@ void print_result(void) {  	if(bas_type == BT_INVALID) {  		name = "Unknown variant: Not Atari BASIC, Turbo, BXL, or BXE"; +		script_ret = SRET_UKNOWN_DERIV;  	} else if(bas_type & BT_ATARI) {  		name = "Atari BASIC"; +		script_ret = SRET_ATARI;  	} else if(bas_type & BT_TURBO) {  		if(bas_type & BT_BXL) {  			name = "Either Turbo BASIC XL or OSS BASIC XL"; +			script_ret = SRET_TURBO_OR_BXL;  		} else if(bas_type & BT_BXE) {  			name = "Either Turbo BASIC XL or OSS BASIC XE"; +			script_ret = SRET_TURBO_OR_BXE;  		} else { /* bas_type == BT_TURBO */  			name = "Turbo BASIC XL"; +			script_ret = SRET_TURBO;  		}  	} else if(bas_type == BT_BXL || bas_type == (BT_BXL | BT_BXE)) {  		name = "OSS BASIC XL"; +		script_ret = SRET_BXL;  	} else if(bas_type == BT_BXE) {  		name = "OSS BASIC XE"; +		script_ret = SRET_BXE;  	} else {  		/* this one should never happen: */  		name = "Either Turbo BASIC XL, OSS BASIC XL, or OSS BASIC XE"; +		script_ret = SRET_TURBO_OR_BXL_OR_BXE;  	} -	puts(name); - -	exit(bas_type == BT_ATARI ? 0 : bas_type + 8); +	if(script_mode) { +		exit(script_ret); +	} else { +		puts(name); +		exit(0); +	}  }  void remove_type(int type) { @@ -137,7 +164,7 @@ CALLBACK(handle_cmd) {  			}  			/* partial: we really should detect GET #1,A$. this is Turbo-only, but  			   probably nobody ever uses it because it doesn't seem to *work*, -			   at least not in TB 1.5. */ +			   at least not in TB 1.5. A$ always ends up empty with length 0. */  			break;  		case CMD_RESTORE:  		case CMD_TRAP: @@ -171,13 +198,18 @@ CALLBACK(handle_cmd) {        0x4B: RENAME in both Turbo and BXL/XE (take the same args)        0x60: CLS or HITCLR (no args either way)  		This leaves 42 we can check. -		Covered so far: 38 (90%) +		Covered so far: 39 (93%)        TODO: Unknown tokens: -      0x54: ??? in TB (find out what), LVAR in BXL/BXE. -      0x5A: BLOAD or... what? (Jindroush lists it as ?5A?) +      0x5A: BLOAD or... what? (Jindroush lists it as BXL_EXTEND for BXL and +		      ?5A? for BXE)        TODO:        0x5B: BRUN or CALL (both take a string, CALL allows "USING" though) -      0x5F: PAINT (req 2 args) or NUM (optional 2 args, probly not appear in program) +		      This isn't really important, as CALL requires a PROCEDURE to +		      exit, and we *do* catch the PROCEDURE token. +      0x5F: PAINT (req 2 args) or NUM (optional 2 args). +		      Again, not important, because it's highly unlikely any BXL/BXE +		      program will contain NUM... because when it executes, it stops the +		      program and goes back to the READY prompt (in auto-numbering mode).  	 */  	switch(tok) {  		case 0x39: /* MOVE <args> or ENDWHILE */ @@ -263,6 +295,17 @@ CALLBACK(handle_cmd) {  				remove_type(BT_TURBO);  			}  			break; +      case 0x54: /* -- in TB, LVAR in BXL/BXE */ +			/* we can tell these apart because TB gives us a next-statement offset of 5 +			   when we use this (normally, the minimum offset is 6, but there's no OP_EOS +			   after this token for some reason). */ +			if(program[pos - 1] == 0x05) { +				bas_type = BT_TURBO; +				print_result(); +			} else { +				remove_type(BT_TURBO); +			} +			break;        case 0x57: /* DUMP (1 optional string arg) or LOCAL (1 *numeric* variable arg) */  			/* BXL/BXE's LOCAL only works on scalars, not arrays or strings. so if there's  			   no arg, or one string arg... */ @@ -539,62 +582,99 @@ CALLBACK(handle_end_stmt) {  	last_cmd = last_op_tok = 0;  } -void foreign(const char *name) { +/* return true if input_file is Atari MS BASIC. +   AMSB files begin with a 3-byte header: 0x00, then 2 byte length +   (LSB/MSB), which is actually 3 bytes less than the full length of +   the file (or, it's the length of the file minus the 3-byte header). +   Also, the files always end with 3 0x00 bytes. +	We check that the header length is 3 bytes less than the file length, +   then check for the 3 0x00's at the end. + */ +int detect_amsb(void) { +	int len, c; + +	if(verbose) fprintf(stderr, "entering detect_amsb()\n"); + +	rewind(input_file); +	c = fgetc(input_file); +	if(c) return 0; +	c = fgetc(input_file); +	if(c == EOF) return 0; +	len = (fgetc(input_file) << 8) | c; + +	if(verbose) fprintf(stderr, "detect_amsb() header len==%d (file size should be %d)\n", len, len + 3); + +	fseek(input_file, 0, SEEK_END); +	c = ftell(input_file); +	if(verbose) fprintf(stderr, "detect_amsb() file size %d\n", c); +	if(len != (c - 3)) { +		if(verbose) fprintf(stderr, "detect_amsb() wrong file size!\n"); +		return 0; +	} + +	if(verbose) fprintf(stderr, "detect_amsb() file size is correct, checking for 3 nulls\n"); +	fseek(input_file, -3, SEEK_END); +	if(fgetc(input_file)) return 0; +	if(fgetc(input_file)) return 0; +	if(fgetc(input_file)) return 0; + +	if(verbose) fprintf(stderr, "detect_amsb() found 3 nulls, return 1\n"); + +	return 1; +} + +void foreign(const char *name, int srval) {  	fclose(input_file); -	puts(name); -	exit(0); /* TODO: pick a better number */ +	if(script_mode) { +		exit(srval); +	} else { +		puts(name); +		exit(0); +	}  }  void detect_foreign(void) { -	int i, nuls, c, d; +	int c, d;  	c = fgetc(input_file);  	d = fgetc(input_file); +	if(c == EOF || d == EOF) +		die("File is too short to be a BASIC program of any kind."); +  	if(c == 0 && d == 0) {  		/* This is why we can't read from stdin. */  		rewind(input_file);  		return;  	} -	if(c == EOF || d == EOF) -		die("File is too short to be a BASIC program of any kind."); -  	if(c == 0xff && d == 0xff) -		foreign("XEX executable (not BASIC at all!)"); +		foreign("XEX executable (not BASIC at all!)", SRET_NOT_BASIC);  	if(c == 0xfe && d == 0xfe) -		foreign("Mac/65 tokenized source (not BASIC at all!)"); +		foreign("Mac/65 tokenized source (not BASIC at all!)", SRET_NOT_BASIC);  	if(c == 0xdd && d == 0x00) -		foreign("EXTENDed OSS BASIC XE"); +		foreign("EXTENDed OSS BASIC XE", SRET_EXTENDED_BXE);  	if(c == 0x7f && d == 'E') {  		c = fgetc(input_file);  		d = fgetc(input_file);  		if(c == 'L' && d == 'F') -			foreign("ELF executable (huh?)"); +			foreign("ELF executable (not BASIC at all!)", SRET_NOT_BASIC);  	} -	if(!(c == 0 && d == 0)) { -		if(fseek(input_file, -3, SEEK_END) == 0) { -			nuls = 0; -			for(i = 0; i < 3; i++) { -				if(fgetc(input_file) == 0) nuls++; -			} -			if(nuls == 3) { -				foreign("Microsoft BASIC"); -			} -		} +	if(c == 0 && detect_amsb()) { +		foreign("Atari Microsoft BASIC", SRET_AMSB);  	}  	if(isdigit(c) && (d == 0x20 || isdigit(d))) -		foreign("Text file, could be LISTed BASIC (or not)"); +		foreign("Text file, could be LISTed BASIC (or not)", SRET_NOT_BASIC);  	if(isprint(c) && isprint(d)) -		foreign("Text file (not BASIC at all!)"); +		foreign("Text file (not BASIC at all!)", SRET_NOT_BASIC); -	foreign("Unknown file type (not BASIC at all!)"); +	foreign("Unknown file type (not BASIC at all!)", SRET_NOT_BASIC);  }  void check_variables(void) { diff --git a/whichbas.rst b/whichbas.rst index 472b216..350be21 100644 --- a/whichbas.rst +++ b/whichbas.rst @@ -27,6 +27,14 @@ OPTIONS  Detection Options  ----------------- +**-s** +  Script (or silent) mode. Instead of printing a human-readable +  name like "Turbo BASIC XL" or "OSS BASIC XE" to stdout, **whichbas** +  will print nothing on standard output, but will exit with a status +  indicating what it detected. The caller can check the return status +  (e.g. the **$?** variable in Bourne/POSIX shells, or **ERRORLEVEL** +  in MS-DOS or Windows). See **EXIT STATUS**, below. +  **-k**    Keep going. The default is to stop looking at the program if the    BASIC type gets narrowed down to either Turbo BASIC or BASIC XE. @@ -37,7 +45,8 @@ Detection Options  NOTES  ===== -Atari BASIC programs are detected 100% reliably. +Atari BASIC programs are detected *almost* 100% reliably. See **BUGS**, below, +for the gory details.  Turbo BASIC, BASIC XL, and BASIC XE are all supersets of Atari BASIC.  If you wrote a program using one of them, but didn't use any of the @@ -55,8 +64,8 @@ adds to those found in BASIC XL.  Detection of Turbo vs. BXL/BXE isn't 100% reliable, and probably  never will be. There's too much overlap between the sets of extra  tokens added by each. Programs that don't use very many of the extra -functions provided by Turbo/BXL/BXE may show up as "Not Atari BASIC; -probably either Turbo or BXL/BXE". +functions provided by Turbo/BXL/BXE may show up as "Either Turbo BASIC XL +or OSS BASIC XE".  Atari Microsoft BASIC is detected by checking that the first two  bytes of the file are not zero, and that the last 3 are zero. This @@ -70,22 +79,79 @@ ELF binaries, etc) as a convenience, but I wouldn't rely on  LIMITATIONS  =========== -Currently, **whichbas** doesn't look at the variable name or type -tables. One problem caused by this: If a program uses only Atari BASIC -tokens, but uses variable(s) with _ in the name, it will be identified -as Atari BASIC... even though _ in variable names is illegal in Atari -BASIC and pretty much guarantees the program is Turbo/BXL/BXE. - -Looking at the variable types could also improve detection, since -Turbo and BXL/BXE support extended variable types (procedure labels -for Turbo, string arrays for BXL/BXE). -  **whichbas** knows nothing about other BASICs such as Frost BASIC,  BASIC/A+, Altirra BASIC... +BUGS +==== +It's possible to get a BASIC XL/XE program to misdetect as Atari BASIC +by writing a program that does these things: + +  - Dimensions a string array with a number of elements that isn't just a +    numeric constant or numeric variable (e.g. *DIM A$(2+2,10)* or +    *DIM A$(I*2,10)*). The code that detects a 2D string *DIM* command +    can only handle simple cases like *DIM A$(10,10)* or *DIM A$(I,10)*. +    It doesn't actually matter if the 2nd argument is a fancy expression, +    though. + +  - Does *not* actually *use* the string array variable by assigning to +    it or reading a value from it. String array accesses in BASIC XL/XE +    are reliably detected because they require a semicolon after the +    first number, even if there isn't a 2nd number. Example: *? A$(2;)* +    prints the 2nd element of the *A$* string array. If it were written +    as *? A$(2)*, but *A$* is a string array (not a regular string), +    BASIC XL/XE would give an *Error 40* (string type mismatch) at +    runtime. + +The good news is, such a program will still work fine in Atari BASIC. +Believe it or not, it's true. +  EXIT STATUS  =========== -0 for success, 1 for failure. +Without the **-s** option, exit status is 0 for success, non-zero for +failure. + +With the **-s** option, the exit status is: + +**0** +  Not used with **-s**. + +**1** or **2** +  Error reading file. Error message will be printed on standard error. + +**3** +  Atari BASIC detected. + +**4** +  Turbo BASIC detected. + +**5** +  OSS BASIC XL detected. + +**6** +  Non-EXTENDed OSS BASIC XE detected. + +**7** +  Turbo BASIC or BASIC XL (undecided which). + +**8** +  Turbo BASIC or non-EXTENDed BASIC XE (undecided which). + +**9** +  Turbo BASIC, BASIC XL, or non-EXTENDed BASIC XE (undecided which). + +**10** +  Unknown Atari BASIC derivative (not Atari BASIC, but not +  Turbo/BXL/BXE either). + +**11** +  Atari Microsoft BASIC detected. + +**12** +  EXTENDed OSS BASIC XE detected. + +**32** +  None of the above; not BASIC.  .. include:: manftr.rst | 
