#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <time.h>
#include <sys/wait.h>

#include "bas.h"
#include "whichbas.h"

#define BT_INVALID 0
#define BT_ATARI 1
#define BT_TURBO 2
#define BT_BXL 4
#define BT_BXE 8

#define BT_BXL_BXE (BT_BXL | BT_BXE)

int bas_type = 0x0f; /* start out with all enabled */

int script_mode = 0; /* -s flag */
int script_ret; /* -s mode, exit with one of SRET_* from whichbas.h as status */

int keep_going = 0; /* -k flag */

int comma_count; /* count of regular commas (not string/array) in statement */
unsigned char last_cmd;
unsigned char last_op_tok;
unsigned short last_cmd_pos;

void print_help(void) {
	printf("Usage: %s [-v] [-h] [-k] [-s] <inputfile> ...\n", self);
}

int child_errs = 0;

/* return true if the child ran and returned 0 (success) */
int spawn_child(const char **args) {
	pid_t pid, status;
	int wstatus;

	pid = fork();
	if(pid == -1) {
		perror("fork()");
		die("Can't spawn child process");
	} else if(pid) {
		/* we are the parent */
		status = waitpid(pid, &wstatus, 0);
		if(status < 0) {
			perror("waitpid()");
			die("Child process went south");
		}
		if(! (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) ) {
			child_errs++;
			return 0;
		}
	} else {
		/* we are the child */
		if(execv(args[0], (char * const *)args) < 0) {
			perror("exec()");
			exit(1);
		}
	}

	return 1;
}

/* this is not the ideal way to handle multiple files. it forks a new
   process for each one. however, I embedded a bunch of die() calls
   in bas.c, thinking I'd never use some of the functions more than once
   in the same run. what I get for trying to KISS... */
void multiple_files(const char *argv0, char **list) {
	const char *args[5]; /* 5 because eventually we include -k and/or -v */
	int kidstat;

	/* this isn't needed for things to work, but if I write buggy code,
	   it avoids a forkbomb. */
	setenv("WHICHBAS_MULTI_PARANOIA", "1", 1);

	args[0] = argv0;
	args[2] = NULL;

	while(*list) {
		args[1] = *list;
		printf("%s:\t", *list);
		fflush(stdout);
		kidstat = spawn_child(args);
		if(!kidstat) puts("(detection failed)");
		fflush(stdout);
		list++;
	}

	if(child_errs) {
		fprintf(stderr, "%s: exiting with error status because some files failed.\n", self);
		exit(1);
	} else {
		exit(0);
	}
}

void parse_args(int argc, char **argv) {
	int opt;

	while( (opt = getopt(argc, argv, "vksh")) != -1) {
		switch(opt) {
			case 'v': verbose = 1; break;
			case 'k': keep_going = verbose = 1; break;
			case 's': script_mode = 1; break;
			case 'h': print_help(); exit(0);
			default:
				print_help();
				exit(1);
		}
	}

	if(optind >= argc)
		die("No input file given (and stdin not supported).");

	if(optind == argc - 1) { /* got one filename only */
		open_input(argv[optind]);
		if(input_file == stdin)
			die("Reading from standard input not supported.");
	} else { /* got multiple filenames */
		if(keep_going || verbose || script_mode)
			die("-k, -v, -s not supported with multiple filenames (yet?)");
		if(getenv("WHICHBAS_MULTI_PARANOIA"))
			die("BUG: multiple_files() recursion!");
		multiple_files(argv[0], argv + optind);
	}
}

/* don't need this.
void add_type(int type) {
	bas_type |= type;
}
*/

void print_result(void) {
	const char *name;

	if(verbose) fprintf(stderr, "   final bas_type %02x\n", bas_type);

	if(bas_type == BT_INVALID) {
		name = "Unknown variant: Not Atari BASIC, Turbo, BXL, or BXE";
		script_ret = SRET_UKNOWN_DERIV;
	} else if(bas_type & BT_ATARI) {
		name = "Atari BASIC";
		script_ret = SRET_ATARI;
	} else if(bas_type & BT_TURBO) {
		if(bas_type & BT_BXL) {
			name = "Either Turbo BASIC XL or OSS BASIC XL";
			script_ret = SRET_TURBO_OR_BXL;
		} else if(bas_type & BT_BXE) {
			name = "Either Turbo BASIC XL or OSS BASIC XE";
			script_ret = SRET_TURBO_OR_BXE;
		} else { /* bas_type == BT_TURBO */
			name = "Turbo BASIC XL";
			script_ret = SRET_TURBO;
		}
	} else if(bas_type == BT_BXL || bas_type == (BT_BXL | BT_BXE)) {
		name = "OSS BASIC XL";
		script_ret = SRET_BXL;
	} else if(bas_type == BT_BXE) {
		name = "OSS BASIC XE";
		script_ret = SRET_BXE;
	} else {
		/* this one should never happen: */
		name = "Either Turbo BASIC XL, OSS BASIC XL, or OSS BASIC XE";
		script_ret = SRET_TURBO_OR_BXL_OR_BXE;
	}

	if(script_mode) {
		exit(script_ret);
	} else {
		puts(name);
		exit(0);
	}
}

/* return true if a token is numeric constant
   (including TB/BXE/BXL hex) */
int is_numconst_op(unsigned char tok) {
	switch(tok) {
		case OP_NUMCONST:
		case OP_HEXCONST:
			return 1;
		default:
			return 0;
	}
}

/* return true if a token is a function that *returns*
   a numeric value (says nothing about the argument types,
   though!) */
int is_numeric_func(unsigned char tok) {
	switch(tok) {
		case OP_FUNC_USR:
		case OP_FUNC_ASC:
		case OP_FUNC_VAL:
		case OP_FUNC_LEN:
		case OP_FUNC_ADR:
		case OP_FUNC_ATN:
		case OP_FUNC_COS:
		case OP_FUNC_PEEK:
		case OP_FUNC_SIN:
		case OP_FUNC_RND:
		case OP_FUNC_FRE:
		case OP_FUNC_EXP:
		case OP_FUNC_LOG:
		case OP_FUNC_CLOG:
		case OP_FUNC_SQR:
		case OP_FUNC_SGN:
		case OP_FUNC_ABS:
		case OP_FUNC_INT:
		case OP_FUNC_PADDLE:
		case OP_FUNC_STICK:
		case OP_FUNC_PTRIG:
		case OP_FUNC_STRIG:
			return 1;
		default:
			return 0;
	}
}

/* return true if a token is an arithmetic operator */
int is_arith_op(unsigned char tok) {
	switch(tok) {
		case OP_UPLUS:    /* not sure these two... */
		case OP_UMINUS:   /* ...really belong here */
		case OP_NUM_LE:
		case OP_NUM_NE:
		case OP_NUM_GE:
		case OP_NUM_LT:
		case OP_NUM_GT:
		case OP_NUM_EQ:
		case OP_POWER:
		case OP_MULT:
		case OP_PLUS:
		case OP_MINUS:
		case OP_DIVIDE:
		case OP_NOT:
		case OP_OR:
		case OP_AND:
		case OP_NUM_ASSIGN:
		case OP_GRP_LPAR: /* yes, this belongs here, (((A$))) is a syntax error! */
			return 1;
		default:
			return 0;
	}
}

int is_numeric_var(unsigned char tok) {
	int vartype;

	if(tok < 0x80)
		return 0;

	vartype = get_vartype(tok);
	return (vartype == TYPE_SCALAR || vartype == TYPE_ARRAY);
}

/* return true if a token is:
   - a numeric constant (including hex constants),
   - a numeric variable (including arrays),
	- a function that returns a numeric (e.g. ASC(), SIN()).
	for now, only standard Atari BASIC tokens are considered.
	unary minus and plus make sense here, but binary ops don't.
 */
int is_numeric_rval(unsigned char tok) {
	return
		(tok == OP_UMINUS)    ||
		(tok == OP_UPLUS)     ||
		is_numconst_op  (tok) ||
		is_numeric_func (tok) ||
		is_numeric_var  (tok) ;
}

/* return true if a token is:
   - a numeric constant (including hex constants),
   - a numeric variable (including arrays),
   - a math operator (plus, minus, etc),
	- a function that returns a numeric (e.g. ASC(), SIN()).
	for now, only standard Atari BASIC tokens are considered.
 */
int is_numeric_op(unsigned char tok) {
	return
		is_numeric_rval (tok) ||
		is_arith_op     (tok) ;
}

int is_string_var(unsigned char tok) {
	return (tok >= 0x80 && (get_vartype(tok) == TYPE_STRING));
}

int is_string_const(unsigned char tok) {
	return (tok == OP_STRCONST);
}

int is_string_exp_op(unsigned char tok) {
	switch(tok) {
		case OP_STR_ASSIGN:
		case OP_STR_LE:
		case OP_STR_NE:
		case OP_STR_GE:
		case OP_STR_LT:
		case OP_STR_GT:
		case OP_STR_EQ:
		case OP_STR_LPAR: /* the ( in: A$(1)="A" */
			return 1;
		default:
			return 0;
	}
}

int is_string_func(unsigned char tok) {
	switch(tok) {
		case OP_FUNC_STR:
		case OP_FUNC_CHR:
			return 1;
		default:
			return 0;
	}
}

/* return true if a token is:
   - a string constant,
   - a string variable,
   - a function that returns a string.
*/
int is_string_rval(unsigned char tok) {
	return
		is_string_const  (tok) ||
		is_string_func   (tok) ||
		is_string_var    (tok) ;
}

/* return true if a token is:
   - a string constant,
   - a string variable,
   - a string expression operator, like OP_STR_LE,
   - a function that returns a string.
*/
int is_string_op(unsigned char tok) {
	return
		is_string_rval   (tok) ||
		is_string_exp_op (tok) ;
}

/* true if an operator token is a string function in BASIC XL (or XE).
   these tokens are all numeric functions in Turbo, so be sure you
   know what you're doing! */
int is_bxl_string_func(unsigned char tok) {
	switch(tok) {
		case 0x5c: /* BXL HEX$, Turbo DEC */
		case 0x66: /* BXL LEFT$, Turbo %0 */
		case 0x67: /* BXL RIGHT$, Turbo %1 */
		case 0x68: /* BXL MID$, Turbo %2 */
			return 1;
		default:
			return 0;
	}
}

void remove_type(int type) {
	bas_type &= ((~type) & 0x0f);

	if(keep_going) return;

	/* without -k, stop if it gets narrowed down to one of these 4. */
	if(bas_type == BT_ATARI || bas_type == BT_TURBO || bas_type == BT_BXE || bas_type == BT_BXL)
		print_result();
}

void set_type(int type) {
	bas_type = type;
	if(!keep_going) print_result();
}

CALLBACK(handle_cmd) {
	int has_args = 0, has_var_arg = 0, vartype = -1;
	unsigned char nexttok;

	last_cmd = tok;
	last_cmd_pos = pos;
	comma_count = 0;

	if(verbose) fprintf(stderr, "handle_cmd: lineno %d, tok $%02x, bas_type was %02x\n", lineno, tok, bas_type);

	nexttok = program[pos + 1];
	has_args = !(nexttok == OP_EOS || nexttok == OP_EOL);
	if(nexttok >= 0x80) {
		has_var_arg = 1;
		vartype = get_vartype(nexttok);
	}

	/* this switch is for tokens that are the same in Atari/Turbo/BXL/BXE, but with
	   different semantics. non-Atari-BASIC tokens go in the switch below, not
	   this one. */
	switch(tok) {
	   /* TB uses the same token for CLOSE as Atari and BXL/BXE, but it allows
	      it to have no argument (meaning, close all IOCBs). SOUND is the same
		   (no args = silence all POKEY channels). */
		case CMD_CLOSE:
		case CMD_SOUND:
			if(!has_args) {
				set_type(BT_TURBO);
			}
			break;
		case CMD_INPUT:
			/* TB, BXL, BXE all support INPUT "Prompt",VAR with the same tokenized
			   form. Atari BASIC doesn't allow string constants in INPUT args. */
			if(has_args) {
				if(nexttok == OP_STRCONST) {
					int pos_after_string;
					remove_type(BT_ATARI);
					/* TB only: INPUT "Prompt";VAR is also supported (not in BXL/BXE) */
					pos_after_string = pos + 3 + program[pos + 2];
					if(verbose)
						fprintf(stderr,
								"===> INPUT with string prompt at line %d, "
								"pos %04x, pos_after_string %04x (token %02x)\n",
								lineno, pos, pos_after_string, program[pos_after_string]);
					if(program[pos_after_string] == OP_SEMICOLON) {
						set_type(BT_TURBO);
					}
				}
			} else { /* has_args is false, oh shit! */
				fprintf(stderr, "*** INPUT without variable at line %d.\n*** Rev A BASIC bug, program will crash, better fix it!\n", lineno);
				set_type(BT_ATARI);
			}
			break;
		case CMD_GET:
		case CMD_PUT:
		   /* TB uses the same tokens for GET and PUT as Atari/BXL/BXE, but it allows
		      the argument to be a variable without a # in front of it. */
			if(nexttok != OP_HASH) {
				set_type(BT_TURBO);
			}
			/* PARTIAL: we really should detect GET #1,A$. this is Turbo-only, but
			   probably nobody ever uses it because it doesn't seem to *work*,
			   at least not in TB 1.5. A$ always ends up empty with length 0. */
			break;
		case CMD_RESTORE:
		case CMD_TRAP:
			/* TB allows RESTORE #LABEL and TRAP #LABEL */
			if(nexttok == OP_HASH) {
				set_type(BT_TURBO);
			}
			break;
		default: break;
	}

	if(tok <= CMD_ERROR) return; /* legal in BASIC, ignore */
	remove_type(BT_ATARI);
	if(tok >= 0x59) remove_type(BT_BXL);

	if(tok >= 0x65) {
		fprintf(stderr, "handle_cmd: invalid command %02x at line %d\n", tok, lineno);
		keep_going = 0;
		set_type(BT_INVALID);
	}

	/* we have tokens 0x3a to 0x68 in both TB and BXE, or 47 of them.

      Some tokens can't be determined, because they take the
      same argument (or lack of) in both Turbo and BXL/XE. These
      are:

      0x3c: REPEAT or ELSE (no args either way)
      0x46: LOOP or CP (no args either way)
      0x49: LOCK or UNPROTECT (take the same args)
      0x4B: RENAME in both Turbo and BXL/XE (same token, same args)

      4 of them, this leaves 43 we can check.

      Covered so far: 41 (95%).
      However, some of these are marked PARTIAL because they're not detected
      under all circumstances.

      Unchecked tokens:

      0x5B: BRUN or CALL (both take a string, CALL allows "USING" though)
            This isn't really important, as CALL requires a PROCEDURE to
            exist, and we *do* catch the PROCEDURE token.
      0x5F: PAINT (req 2 args) or NUM (optional 2 args).
            Again, not important, because it's highly unlikely any BXL/BXE
            program will contain NUM... because when it executes, it stops the
            program and goes back to the READY prompt (in auto-numbering mode).
    */
	switch(tok) {
		case 0x39: /* MOVE <args> or ENDWHILE */
		case 0x3a: /* -MOVE <args> or TRACEOFF */
		case 0x3d: /* UNTIL <args> or ENDIF */
		case 0x56: /* DEL <args> or FAST */
      case 0x62: /* CIRCLE (3 or 4 num args) or NORMAL (no args) */
			/* COMPLETE */
			if(has_args) {
				remove_type(BT_BXL_BXE);
			} else {
				remove_type(BT_TURBO);
			}
			break;

		case 0x58: /* TRACE (optional + or -), EXTEND (BXE; no args) */
			/* COMPLETE */
			/* In BXL, this looks to be an extra END token, that behaves the same
			   as the regular one, but can't be entered in the editor. Assume
			   no BXL program contains this token. */
			/* In BXE, EXTEND can't actually appear in a program (it's direct
			   mode only). The only way to get EXTEND into a BXE program is
			   to do a direct mode command like:
			   EXTEND:SAVE "D:PROG"
				...which of course puts it at line 32768. But this code will
			   never see that, because we'd already detect EXTENDed BXE
			   based on the first 2 bytes of the file.
			*/
			/* So, if we see this token, it *has* to be Turbo's TRACE, whether
			   or not it has an argument. */
			set_type(BT_TURBO);
			break;

		case 0x59: /* TEXT or PROCEDURE */
			/* COMPLETE */
			/* Turbo: TEXT (1st arg is number),
            BXL: invalid token,
            BXE: PROCEDURE (arg is string const (not var!)) */
			if(nexttok == OP_STRCONST) {
				/* this token doesn't seem to be valid in BXL at all */
				set_type(BT_BXE);
			} else {
				remove_type(BT_BXL_BXE);
			}
			break;

		case 0x3f: /* WEND or LOMEM <args> */
		case 0x40: /* ELSE or DEL <args> */
		case 0x41: /* ENDIF or RPUT <args> */
		case 0x45: /* DO or TAB <args> */
		case 0x47: /* EXIT or ERASE <args> */
		case 0x51: /* ENDPROC or PMMOVE <args> */
			/* COMPLETE */
			if(has_args) {
				remove_type(BT_TURBO);
			} else {
				remove_type(BT_BXL_BXE);
			}
			break;

		case 0x48: /* DIR (optional arg) or PROTECT (req'd arg) */
			/* PARTIAL: without args means TB, but with arg,
				it could be either */
			if(!has_args) {
				remove_type(BT_BXL_BXE);
			}
			break;

		case 0x4a: /* UNLOCK (req'd arg) or DIR (optional arg) */
			/* PARTIAL: without args means BXL/BXE, but with arg,
				it could be either */
			if(!has_args) {
				remove_type(BT_TURBO);
			}
			break;

		case 0x3b: /* *F (optional + or -), TRACE (no arg) */
		case 0x5e: /* *B (optional + or -) or EXIT (no arg) */
			/* PARTIAL: doesn't catch *F or *B by itself with no +/- */
			if(has_args) {
				remove_type(BT_BXL_BXE);
			}
			break;

		case 0x44: /* FILLTO or BGET (check for a # after the token) */
			/* COMPLETE */
			if(nexttok == OP_HASH) {
				remove_type(BT_TURBO);
			} else {
				remove_type(BT_BXL_BXE);
			}
			break;

		case 0x4e: /* TIME$= (1 string arg) or PMCLR (1 num arg) */
			/* PARTIAL: but almost complete. nothing happens if it's
			   TIME$= with a string function (probably rare) or PMCLR
			   with a complex expression. */
			if(nexttok == OP_STRCONST) {
				remove_type(BT_BXL_BXE);
			} else if(has_var_arg && vartype == TYPE_STRING) {
				remove_type(BT_BXL_BXE);
			} else if(nexttok == OP_NUMCONST) {
				remove_type(BT_TURBO);
			} else if(has_var_arg && vartype == TYPE_SCALAR) {
				remove_type(BT_TURBO);
			}
			break;

      case 0x50: /* EXEC (1 arg, *must* be variable) or PMGRAPHICS (1 num arg, may be const) */
			/* PARTIAL: PMGRAPHICS VAR won't be detected. but this usage is rare. */
			/* This check is actually redundant, because EXEC requires Turbo's
			   label type (high bits in var name table both set to 1), which we already
			   detected in check_variables().  */
			if(!has_var_arg) {
				remove_type(BT_TURBO);
			}
			break;

		case 0x54: /* -- in TB, LVAR in BXL/BXE */
			/* COMPLETE */
			/* We can tell these apart because:
				1. TB gives us a next-statement offset of 5 if -- is the first (or
				   actually only) statement on a line. Normally, the minimum offset
				   is 6, but there's no OP_EOL after this token for some reason.
			   2. If -- is the 2nd or or later statement on a line (after a colon)
			      it *does* get a statement terminator, but it's 0x9b (ATASCII EOL,
				   like a REM or DATA gets).
			   Note that it's impossible to put more statements *after* the --,
			   they just get ignored if you type them. This doesn't help us
			   here, but it's interesting anyway.
			   Also, the -- is what you type to enter it into the program, but
			   it get LISTed as a line of 30 dashes.
			   The explanation is a lot longer than the code... */
			if(program[pos - 1] == 0x05 || nexttok == 0x9b) {
				set_type(BT_TURBO);
			} else {
				remove_type(BT_TURBO);
			}
			break;

		case 0x57: /* DUMP (1 optional string arg) or LOCAL (1 *numeric* variable arg) */
			/* BXL/BXE's LOCAL only works on scalars, not arrays or strings. so if there's
			   no arg, or one string arg... */
			/* PARTIAL: almost complete, doesn't handle DUMP func$(arg), which I
				doubt anyone uses anyway. */
			if(!has_args) {
				/* only Turbo allows no arg... */
				remove_type(BT_BXL_BXE);
			} else if(nexttok == OP_STRCONST) {
				/* only Turbo allows a string constant arg... */
				remove_type(BT_BXL_BXE);
			} else if(has_var_arg && vartype == TYPE_STRING) {
				/* only Turbo allows a string variable arg... */
				remove_type(BT_BXL_BXE);
			} else if(has_var_arg && vartype == TYPE_SCALAR) {
				/* only BXL/BXL allows a scalar variable arg */
				remove_type(BT_TURBO);
			}
			break;

		case 0x5a: /* TB: BLOAD; BXL: extension mechanism; BXE: invalid. */
			/* COMPLETE */
			/* This is the token used for the BXL EXTEND.COM added commands,
			   from the Toolkit disk. It's followed by a byte ranging 0x10
			   to 0x15 that specifies which extended command, e.g. 0x5a 0x11 means
			   EXIT, 0x5a 0x12 is PROCEDURE, 0x5a 0x13 is CALL. Although
			   these look like BXE's extra commands, they aren't the same tokens,
			   and BXE will choke on them (RUN causes "Error- 33", LIST causes
			   a lockup). */
			if(nexttok >= 0x10 && nexttok <= 0x15) {
				/* worth mentioning to the user... */
				fprintf(stderr, "Note: program requires EXTEND.COM from BASIC XL Toolkit disk.\n");
				set_type(BT_BXL);
			} else {
				/* it's BLOAD if followed by e.g. OP_STRCONST or a variable */
				set_type(BT_TURBO);
			}
			break;

		case 0x5c: /* GO# (1 arg only) or SORTUP (optional 2nd arg of USING, but no comma) */
		case 0x5d: /* # (1 arg only) or SORTDOWN (optional 2nd arg of USING, but no comma) */
			/* COMPLETE but no longer needed (check_variables() already found the
			   11xxxxxx variables) */
			/* Turbo BASIC labels have the high 2 bits set to 11, which is illegal
			   in Atari/BXL/BXE. */
			if(vartype == 3) {
				remove_type(BT_BXL_BXE);
			} else {
				remove_type(BT_TURBO);
			}
			break;

		case 0x60: /* CLS (optional IOCB with #) or HITCLR (no args) */
			/* PARTIAL: without args, can't tell them apart. */
			/* I doubt CLS #IOCB is actually used in many Turbo BASIC
			   programs, because it's broken (at least in Turbo 1.5).
			   It's supposed to only clear the screen of output that
			   happened after the OPEN #IOCB, but it really clears the
			   whole screen. */
			if(nexttok == OP_HASH) {
				remove_type(BT_BXL_BXE);
			}
			break;

		default: break;
	}
	if(verbose) fprintf(stderr, "   bas_type now %02x\n", bas_type);
}

CALLBACK(handle_op) {
	unsigned char nexttok  = program[pos + 1];
	unsigned char nexttok2 = program[pos + 2];

	if(tok == OP_COMMA) comma_count++;

	if(verbose) fprintf(stderr, "handle_op: lineno %d, tok $%02x, comma_count %d, bas_type was %02x\n", lineno, tok, comma_count, bas_type);

	if(tok == 0x00) {
		/* Turbo allows 256 variables, tokenizes the first 128 normally ($80-$FF).
		   The extra ones above $FF are tokenized as $00, varnum - $80. None of
		   our other BASICs uses $00 as an operator token, so.. */
		set_type(BT_TURBO);
	}

	/* attempt to detect BXL/BXE DIM for 2D string arrays.
	   DIM A$(10,10) is illegal in Atari/Turbo.
	   PARTIAL: this only works if the first dimension is either a
	   constant or a scalar variable (not an array element or an
	   expression). fortunately most programs use constants in DIM.
	 */
	if(tok == OP_DIM_STR_LPAR) {
		int str2d = 0;
		if(nexttok >= 0x80 && nexttok2 == OP_ARR_COMMA) {
			str2d = 1;
		} else if(nexttok == OP_NUMCONST || nexttok == OP_HEXCONST) {
			str2d = (program[pos + 8] == OP_ARR_COMMA);
		}
		if(str2d) {
			if(verbose)
				fprintf(stderr, "===> found 2d string array at line %d\n", lineno);
			remove_type(BT_ATARI | BT_TURBO);
		}
	}

	/* BXL/BXE allows string concatenation in assignment with the comma,
	   A$="FOO","BAR" or A$=C$,D$. */
	if(last_cmd == CMD_LET || last_cmd == CMD_ILET) {
		if(program[last_cmd_pos + 2] == OP_STR_ASSIGN) {
			if(tok == OP_COMMA) {
				if(is_string_rval(nexttok)) {
					remove_type(BT_ATARI | BT_TURBO);
				}
			}
		}
	}

	if(tok == OP_HEXCONST) remove_type(BT_ATARI); /* hex const (turbo *and* bxl/xe) */
	if(tok <= OP_FUNC_STRIG) {
		if(verbose) fprintf(stderr, "   bas_type now %02x\n", bas_type);
		return; /* legal in BASIC, ignore */
	}
	remove_type(BT_ATARI);

	/* only Turbo has op tokens numbered 0x69 and up. */
	if(tok >= 0x69) {
		set_type(BT_TURBO);
	}

	if(tok >= 0x6E) {
		fprintf(stderr, "handle_op: invalid operator %02x at line %d\n", tok, lineno);
		keep_going = 0;
		set_type(BT_INVALID);
	}

	/* There are 25 extra operators in Turbo, and 20 of them are shared with
	   BXL/BXE. Of the 20, 4 of them are undecidable, and the rest are
	   covered here, which means 80% coverage of the shared ops.
	   Undecidables are:
	   0x56 & (logical AND) or % (XOR), both infix numeric ops; can't tell apart
	   0x57 ! (logical OR) in both Turbo and BXL/BXE, can't tell apart
	   0x64 RAND (func, 1 num arg) or TAB (func, 1 num arg), can't tell apart
	   0x65 TRUNC (func, 1 num arg) or PEN (func, 1 num arg), can't tell apart
	 */
	switch(tok) {
		case 0x55: /* DPEEK (function) TB, USING (infix, not a function) in BXL/BXE */
		case 0x58: /* INSTR (function) or & (infix numeric) in BXE. */
		case 0x5b: /* HEX$ (func, takes 1 num arg) or FIND( (pseudo-func, 3 args */
			/* COMPLETE */
			if(nexttok == OP_FUNC_LPAR) {
				remove_type(BT_BXL_BXE);
			} else {
				remove_type(BT_TURBO);
			}
			break;

		case 0x59: /* INKEY$ (0 arg pseudo-func) in TB, string array separator semicolon in BXL/BXE */
			/* PARTIAL: ...but pretty good. we *can't* check nexttok == OP_GRP_RPAR, because
			   VAL(INKEY$) or ASC(INKEY$) are legit Turbo code. */
			if(nexttok == OP_EOS || nexttok == OP_EOL) {
				/* the semicolon can't be the last token on the line (needs at least
				   a right-paren), but INKEY$ can. */
				remove_type(BT_BXL_BXE);
			} else if(pos == last_cmd_pos + 1) {
				/* INKEY$ can be the first operator after the command, e.g if the command
				   is IF. The semicolon cannot. */
				remove_type(BT_BXL_BXE);
			} else if(is_string_exp_op(last_op_tok) || is_string_exp_op(nexttok)) {
				/* A$=INKEY$, IF INKEY$=A$, A$(LEN(A$)+1)=INKEY$, INKEY$<>"A"... */
				remove_type(BT_BXL_BXE);
			} else if(is_numeric_op(last_op_tok) || is_numeric_op(nexttok)) {
				remove_type(BT_TURBO);
			}
			break;

		case 0x5a: /* EXOR (infix num op) or BUMP( (pseudo-function, no OP_FUNC_LPAR) */
		case 0x5d: /* DIV (infix num op) or RANDOM( (pseudo-func, 1 or 2 num args) */
			/* COMPLETE (I think, anyway) */
			if(last_op_tok == OP_GRP_RPAR || last_op_tok == OP_NUMCONST || last_op_tok == OP_HEXCONST || last_op_tok >= 0x80) {
				/* if the last token was a variable or a numeric, or a right paren,
				   this is infix (can't be a function, last token would have to have
				   been a command or a math/etc operator). */
				remove_type(BT_BXL_BXE);
			} else {
				remove_type(BT_TURBO);
			}
			break;

		case 0x5c: /* DEC (function, takes str) in TB, HEX$ (function, takes num) in BXL/BXE */
			/* COMPLETE */
			if(is_string_rval(nexttok2)) {
				remove_type(BT_BXL_BXE);
			} else if(is_numeric_op(nexttok2)) {
				remove_type(BT_TURBO);
			}
			break;

		case 0x5e: /* FRAC (num func, 1 arg) or DPEEK (num func, 1 arg) in BXL...
		              however BXE has an optional 2nd arg. */
			{
				/* PARTIAL: This detects the 2nd arg for simple cases where the
				   1st arg is a constant or a numeric variable, but not if the
				   1st arg is an expression or an array element. */
				int has2 = 0;
				if(nexttok2 == OP_NUMCONST || nexttok2 == OP_HEXCONST) {
					if(program[pos + 9] == OP_ARR_COMMA)
						has2 = 1;
				} else if(nexttok2 >= 0x80 && program[pos + 3] == OP_ARR_COMMA) {
					has2 = (get_vartype(nexttok2) == TYPE_SCALAR);
				}
				if(has2) {
					set_type(BT_BXE);
				}
			}
			break;

		case 0x5f: /* TIME$ in TB, SYS (function) in BXL/BXE */
		case 0x60: /* TIME in TB, VSTICK (function) in BXL/BXE */
		case 0x61: /* MOD (infix op) in TB, HSTICK (function) in BXL/BXE */
		case 0x62: /* EXEC (infix op, with ON) in TB, PMADR (function) in BXL/BXE */
			/* COMPLETE */
			if(nexttok == OP_FUNC_LPAR) {
				remove_type(BT_TURBO);
			} else {
				remove_type(BT_BXL_BXE);
			}
			break;

		case 0x63: /* RND (pseudo-func, no arg) or ERR (func, 1 num arg) */
			/* COMPLETE */
			if(nexttok != OP_FUNC_LPAR) {
				set_type(BT_TURBO);
			}
			break;

		case 0x66: /* %0 in TB, LEFT$( (pseudo-func, takes string) in BXL/BXE */
		case 0x67: /* %1 in TB, RIGHT$( (pseudo-func, takes string) in BXL/BXE */
		case 0x68: /* %2 in TB, MID$( (pseudo-func, takes string) in BXL/BXE */
			/* COMPLETE */
			/* LEFT$/RIGHT$/MID$ do NOT get OP_FUNC_LPAR (the "(" is part of the
			   token name). They're always followed by a string operator... and
			   it works out that none of the tokens for BXL-only string funcs
			   are allowed to follow %0 %1 %2 in Turbo. */
			if(is_string_op(nexttok) || is_bxl_string_func(nexttok)) {
				remove_type(BT_TURBO);
			} else {
				remove_type(BT_BXL_BXE);
			}
			break;

		default:
			break;
	}

	last_op_tok = tok;
	if(verbose) fprintf(stderr, "   bas_type now %02x\n", bas_type);
}

/* we can count commas, because both Turbo and BXE/BXL use the "array" comma
   to separate function arguments, not the "regular" comma. */
CALLBACK(handle_end_stmt) {
	if(verbose) fprintf(stderr, "handle_end_stmt: lineno %d, tok $%02x, last_cmd $%02x, comma_count %d, bas_type was %02x\n", lineno, tok, last_cmd, comma_count, bas_type);
	switch(last_cmd) {
		case 0x38: /* DPOKE (2 args) or WHILE (1 arg) */
			if(comma_count) {
				remove_type(BT_BXL_BXE);
			} else {
				remove_type(BT_TURBO);
			}
			break;

		case 0x3e: /* WHILE (1 arg) or DPOKE (2 or 3 args) */
		case 0x4c: /* DELETE (1 arg) or MOVE (3 or 4 args) */
		case 0x4d: /* PAUSE (1 arg) or MISSILE (3 args) */
      case 0x52: /* FCOLOR (1 arg) or PMWIDTH (2 args) */
      case 0x53: /* *L (optional + or - only) or SET (req 2 num args) */
      case 0x4f: /* PROC (1 arg) or PMCOLOR (3 args) */
			if(comma_count) { /* 1 arg means no commas */
				remove_type(BT_TURBO);
			} else {
				remove_type(BT_BXL_BXE);
			}
			break;

		case 0x42: /* BPUT or RGET */
			/* PARTIAL:
			   Turbo BGET always takes 3 args, BXL/BXE RGET takes 2 or more.
			   We can at least rule out Turbo if there aren't exactly 3 args. */
			if(comma_count != 2) {
				remove_type(BT_TURBO);
			}
			break;

		case 0x43: /* BGET or BPUT */
			/* PARTIAL:
			   Turbo BGET and BPUT always take 3 args. So does BXL BPUT.
				BXE BPUT takes 3 args and an optional 4th. */
			if(comma_count != 2) {
				set_type(BT_BXE);
			}
			break;

      case 0x55: /* RENUM in both (TB req 3 args, BXL up to two) */
			if(comma_count == 2) {
				remove_type(BT_BXL_BXE);
			} else {
				remove_type(BT_TURBO);
			}
			break;

      case 0x61: /* DSOUND (0 or 4 num args) or INVERSE (no args) */
			/* PARTIAL: can't tell no-argument DSOUND from INVERSE. */
			if(comma_count) {
				remove_type(BT_BXL_BXE);
			}
			break;

      case 0x63: /* %PUT (usually seen with optional #; 1 or 2 args) or BLOAD (1 string arg) */
			if(comma_count) {
				/* multiple args */
				remove_type(BT_BXL_BXE);
			} else if(program[last_cmd_pos + 1] == OP_STRCONST) {
				/* one arg, string const. XXX: check var type */
				remove_type(BT_TURBO);
			}
			break;

      case 0x64: /* %GET (usually seen with optional #; 1 or 2 args) or BSAVE (3 args) */
			if(comma_count == 2) {
				remove_type(BT_TURBO);
			} else {
				remove_type(BT_BXL_BXE);
			}
			break;

		default: break;
	}
	if(verbose) fprintf(stderr, "   bas_type now %02x\n", bas_type);

	last_cmd = last_op_tok = 0;
}

/* return true if input_file is Atari MS BASIC.
   AMSB files begin with a 3-byte header: 0x00, then 2 byte length
   (LSB/MSB), which is actually 3 bytes less than the full length of
   the file (or, it's the length of the file minus the 3-byte header).
   Also, the files always end with 3 0x00 bytes.
	We check that the header length is 3 bytes less than the file length,
   then check for the 3 0x00's at the end.
 */
int detect_amsb(void) {
	int len, c;

	if(verbose) fprintf(stderr, "entering detect_amsb()\n");

	rewind(input_file);
	c = fgetc(input_file);
	if(c) return 0;
	c = fgetc(input_file);
	if(c == EOF) return 0;
	len = (fgetc(input_file) << 8) | c;

	if(verbose) fprintf(stderr, "detect_amsb() header len==%d (file size should be %d)\n", len, len + 3);

	fseek(input_file, 0, SEEK_END);
	c = ftell(input_file);
	if(verbose) fprintf(stderr, "detect_amsb() file size %d\n", c);
	if(len != (c - 3)) {
		if(verbose) fprintf(stderr, "detect_amsb() wrong file size!\n");
		return 0;
	}

	if(verbose) fprintf(stderr, "detect_amsb() file size is correct, checking for 3 nulls\n");
	fseek(input_file, -3, SEEK_END);
	if(fgetc(input_file)) return 0;
	if(fgetc(input_file)) return 0;
	if(fgetc(input_file)) return 0;

	if(verbose) fprintf(stderr, "detect_amsb() found 3 nulls, return 1\n");

	return 1;
}

void foreign(const char *name, int srval) {
	if(input_file) fclose(input_file);
	if(script_mode) {
		exit(srval);
	} else {
		puts(name);
		exit(0);
	}
}

void detect_foreign(void) {
	int c, d;

	c = fgetc(input_file);
	d = fgetc(input_file);

	if(c == EOF || d == EOF)
		die("File is too short to be a BASIC program of any kind.");

	if(c == 0 && d == 0) {
		/* This is why we can't read from stdin. */
		rewind(input_file);
		return;
	}

	if(c == 0xfb && d == 0xc2)
		foreign("Compiled Turbo BASIC XL", SRET_COMPILED_TURBO);

	if(c == 0xff && d == 0xff)
		foreign("XEX executable (not BASIC at all!)", SRET_NOT_BASIC);

	if(c == 0xfe && d == 0xfe)
		foreign("Mac/65 tokenized source (not BASIC at all!)", SRET_NOT_BASIC);

	if(c == 0xdd && d == 0x00)
		foreign("EXTENDed OSS BASIC XE", SRET_EXTENDED_BXE);

	if(c == 0x7f && d == 'E') {
		c = fgetc(input_file);
		d = fgetc(input_file);
		if(c == 'L' && d == 'F')
			foreign("ELF executable (not BASIC at all!)", SRET_NOT_BASIC);
	}

	if(c == 0 && detect_amsb()) {
		foreign("Atari Microsoft BASIC", SRET_AMSB);
	}

	if(isdigit(c) && (d == 0x20 || isdigit(d)))
		foreign("Text file, could be LISTed BASIC (or not)", SRET_NOT_BASIC);

	if(isprint(c) && isprint(d))
		foreign("Text file (not BASIC at all!)", SRET_NOT_BASIC);

	foreign("Unknown file type (not BASIC at all!)", SRET_NOT_BASIC);
}

void check_variables(void) {
	int pos;

	if(vntp == vntd) return;

	/* Unlike Atari BASIC, Turbo variables can have _ in the names.
	   So can BASIC XE, though it's not documented in the BASIC XE
	   Reference Manual that I have.
	   BXL can't have _ in variable names. */
	for(pos = vnstart; pos < vvstart; pos++) {
		if((program[pos] & 0x7f) == '_') {
			remove_type(BT_ATARI | BT_BXL);
		}
	}

	/* Also, Turbo line labels (for PROC/EXEC and #/GO#) are variables
	   with a type that's illegal in Atari/BXL/BXE. */
	for(pos = vvstart; pos < codestart; pos += 8) {
		if((program[pos] & 0xc0) == 0xc0) {
			set_type(BT_TURBO);
		}
	}

	/* I was hoping to check for BXL/BXE string arrays here. However,
	   looking at a SAVEd file, they look identical to regular string
	   variables (variable type $80, rest of the VVTP entry all $00).
	   When the program's actually in memory, BXL/BXE sets the
	   variable type byte to $91 for DIMed string array var and
	   $81 (same as Atari/Turbo) for a regular DIMed string var.
	   Unfortunately in the SAVE file, it's always $80. */

	/* Another thing that can't be detected: BXL/BXE's FAST mode changes
	   the program in memory (line number targets become addresses),
	   but programs don't get SAVEd this way: SAVE turns off FAST and
	   restores the program to its original state before writing it
	   to disk. Too bad. */
}

/* BASIC/A+ support is *very* simple. It's similar to BASIC XL (no
   surprise there)... but unlike Turbo, BXL, and BXE, it's *not*
   token-compatible with original Atari BASIC. Rather than add
   their new tokens to the end of the lists, they're mixed in with
   the others. So A+ can't even LOAD or RUN Atari BASIC files.
   Appendix J of the BASIC/A+ manual tells you to LIST in BASIC,
   reboot, ENTER in A+, to "port" your BASIC programs to A+. I
   suppose if you upgraded from A+ to BASIC XL or XE, you'd have
   to do the same thing to use A+ programs in XL/XE.

   While this was probably a PITA for BASIC/A+ users back in the day,
   it makes it *really* easy to detect A+ here. The last line of
   every SAVEed program is the direct-mode command, and contains either
   a SAVE or CSAVE cmd token. Which is the same token in Atari, Turbo,
   BXL, and BXE... but *different* in A+.

   However... I've run into at least one BASIC program in the Holmes
   Archive that was missing its line 32768 (wish I could remember which).
   And it's possible for files to get truncated... so I'll check a few
   other command tokens, to deal with cases like this.
 */
void found_aplus(void) {
	foreign("OSS BASIC/A+", SRET_APLUS);
}

CALLBACK(check_aplus_cmd) {
	int has_args;
	unsigned char nexttok, nexttok2;

	nexttok = program[pos + 1];
	nexttok2 = program[pos + 2];
	has_args = !(nexttok == OP_EOS || nexttok == OP_EOL);
	if(verbose) fprintf(stderr, "check_aplus_cmd: line %d, pos $%04x, tok $%02x, nexttok $%02x, nexttok2 $%02x\n", lineno, pos, tok, nexttok, nexttok2);

	switch(tok) {
		case CMD_POP:      /* A+ READ */
		case CMD_DOS:      /* A+ GET */
		case CMD_DEG:      /* A+ WHILE */
		case CMD_CLR:      /* A+ DIM */
		case 0x46:         /* A+ SOUND <args>, Turbo LOOP, BXL/XE CP */
		case 0x3c:         /* A+ DIR <req'd arg>, Turbo REPEAT, BXL/XE ELSE */
			if(has_args)
				found_aplus();
			break;

		case CMD_SAVE:     /* A+ END */
		case CMD_GRAPHICS: /* A+ STOP */
		case CMD_DIM:      /* A+ ENDWHILE */
		/* case CMD_GET: */     /* A+ RETURN, but Turbo allows 0 args too! */
		case 0x38:         /* A+ DOS, Turbo DPOKE, BXL/XE WHILE */
			if(!has_args)
				found_aplus();
			break;

		case CMD_POSITION: /* A+ ? */
			/* PARTIAL: does nothing if 1st arg is numeric. */
			if(!has_args)
				/* POSITION can't have 0 args. */
				found_aplus();
			else if(is_string_rval(nexttok))
				/* ? "STRING" or ? A$, e.g., can't be POSITION */
				found_aplus();
			else if(nexttok == OP_NUM_LE) /* numeric <= in BASIC, # in A+ */
				found_aplus();
			break;

		case CMD_RUN: /* A+ PRINT */
			/* PARTIAL: only detects PRINT # or PRINT <num> ... */
			/* A+'s # token is BASIC's OP_NUM_LE! */
			if(nexttok == OP_NUM_LE)
				found_aplus();
			else if(is_numconst_op(nexttok) || is_numeric_var(nexttok))
				found_aplus();
			break;

		case CMD_XIO:    /* A+ SAVE */
			/* most programs, this is enough, because they'll end with
				32768 SAVE "D:BLAH" */
			if(is_string_rval(nexttok))
				found_aplus();
			break;

		case CMD_OPEN:   /* A+ ELSE */
		/* case CMD_CLOSE: */  /* A+ DEG */ /* can't check, Turbo allows no args */
		case CMD_STATUS: /* A+ NEW */
		case CMD_POINT:  /* A+ LOAD */
		case 0x42:       /* A+ POSITION, Turbo BPUT, BXL RGET */
		case 0x43:       /* A+ DRAWTO, Turbo BGET, BXL BPUT */
			if(nexttok != OP_HASH) /* # in BASIC, USING in A+ */
				found_aplus();
			break;

		/* case 0x48: */ /* A+ CSAVE, Turbo DIR, BXL/BXE PROTECT */
			/* DIR without arg is OK, so we can't really check this. */
			/* break; */

		case CMD_DRAWTO: /* A+ PUT */
		case CMD_SOUND:  /* A+ RPUT */
		case CMD_LPRINT: /* A+ RGET */
		case CMD_CSAVE:  /* A+ BPUT */
		case CMD_CLOAD:  /* A+ BGET */
		case CMD_ON:     /* A+ STATUS */
		case CMD_NOTE:   /* A+ OPEN */
		case CMD_CONT:   /* A+ CLOSE */
		case CMD_RAD:    /* A+ XIO */
			if(nexttok == OP_NUM_LE) /* numeric <= in BASIC, # in A+ */
				found_aplus();
			break;

		default: break;
	}

	last_cmd = tok;
}

void check_aplus(void) {
	allow_hex_const = 1;

	on_cmd_token = check_aplus_cmd;
	walk_all_code();
}

void check_atari_turbo_oss(void) {
	allow_hex_const = 1;

	on_cmd_token = handle_cmd;
	on_exp_token = handle_op;
	on_end_stmt = handle_end_stmt;

	walk_all_code();
}

int main(int argc, char **argv) {
	set_self(*argv);
	parse_general_args(argc, argv, print_help);
	parse_args(argc, argv);

	detect_foreign();

	readfile();
	parse_header();

	check_variables();
	check_aplus();
	check_atari_turbo_oss();

	print_result(); /* always exits */
	return 0; /* never happens, shuts up gcc's warning though */
}