1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
|
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <time.h>
#include <sys/wait.h>
#include "bas.h"
#include "whichbas.h"
#define BT_INVALID 0
#define BT_ATARI 1
#define BT_TURBO 2
#define BT_BXL 4
#define BT_BXE 8
#define BT_BXL_BXE (BT_BXL | BT_BXE)
int bas_type = 0x0f; /* start out with all enabled */
int script_mode = 0; /* -s flag */
int script_ret; /* -s mode, exit with one of SRET_* from whichbas.h as status */
int keep_going = 0; /* -k flag */
int comma_count; /* count of regular commas (not string/array) in statement */
unsigned char last_cmd;
unsigned char last_op_tok;
unsigned short last_cmd_pos;
void print_help(void) {
printf("Usage: %s [-v] [-h] [-k] [-s] <inputfile> ...\n", self);
}
int child_errs = 0;
/* return true if the child ran and returned 0 (success) */
int spawn_child(const char **args) {
pid_t pid, status;
int wstatus;
pid = fork();
if(pid == -1) {
perror("fork()");
die("Can't spawn child process");
} else if(pid) {
/* we are the parent */
status = waitpid(pid, &wstatus, 0);
if(status < 0) {
perror("waitpid()");
die("Child process went south");
}
if(! (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) ) {
child_errs++;
return 0;
}
} else {
/* we are the child */
if(execv(args[0], (char * const *)args) < 0) {
perror("exec()");
exit(1);
}
}
return 1;
}
/* this is not the ideal way to handle multiple files. it forks a new
process for each one. however, I embedded a bunch of die() calls
in bas.c, thinking I'd never use some of the functions more than once
in the same run. what I get for trying to KISS... */
void multiple_files(const char *argv0, char **list) {
const char *args[5]; /* 5 because eventually we include -k and/or -v */
int kidstat;
/* this isn't needed for things to work, but if I write buggy code,
it avoids a forkbomb. */
setenv("WHICHBAS_MULTI_PARANOIA", "1", 1);
args[0] = argv0;
args[2] = NULL;
while(*list) {
args[1] = *list;
printf("%s:\t", *list);
fflush(stdout);
kidstat = spawn_child(args);
if(!kidstat) puts("(detection failed)");
fflush(stdout);
list++;
}
if(child_errs) {
fprintf(stderr, "%s: exiting with error status because some files failed.\n", self);
exit(1);
} else {
exit(0);
}
}
void parse_args(int argc, char **argv) {
int opt;
while( (opt = getopt(argc, argv, "vksh")) != -1) {
switch(opt) {
case 'v': verbose = 1; break;
case 'k': keep_going = verbose = 1; break;
case 's': script_mode = 1; break;
case 'h': print_help(); exit(0);
default:
print_help();
exit(1);
}
}
if(optind >= argc)
die("No input file given (and stdin not supported).");
if(optind == argc - 1) { /* got one filename only */
open_input(argv[optind]);
if(input_file == stdin)
die("Reading from standard input not supported.");
} else { /* got multiple filenames */
if(keep_going || verbose || script_mode)
die("-k, -v, -s not supported with multiple filenames (yet?)");
if(getenv("WHICHBAS_MULTI_PARANOIA"))
die("BUG: multiple_files() recursion!");
multiple_files(argv[0], argv + optind);
}
}
/* don't need this.
void add_type(int type) {
bas_type |= type;
}
*/
void print_result(void) {
const char *name;
if(verbose) fprintf(stderr, " final bas_type %02x\n", bas_type);
if(bas_type == BT_INVALID) {
name = "Unknown variant: Not Atari BASIC, Turbo, BXL, or BXE";
script_ret = SRET_UKNOWN_DERIV;
} else if(bas_type & BT_ATARI) {
name = "Atari BASIC";
script_ret = SRET_ATARI;
} else if(bas_type & BT_TURBO) {
if(bas_type & BT_BXL) {
name = "Either Turbo BASIC XL or OSS BASIC XL";
script_ret = SRET_TURBO_OR_BXL;
} else if(bas_type & BT_BXE) {
name = "Either Turbo BASIC XL or OSS BASIC XE";
script_ret = SRET_TURBO_OR_BXE;
} else { /* bas_type == BT_TURBO */
name = "Turbo BASIC XL";
script_ret = SRET_TURBO;
}
} else if(bas_type == BT_BXL || bas_type == (BT_BXL | BT_BXE)) {
name = "OSS BASIC XL";
script_ret = SRET_BXL;
} else if(bas_type == BT_BXE) {
name = "OSS BASIC XE";
script_ret = SRET_BXE;
} else {
/* this one should never happen: */
name = "Either Turbo BASIC XL, OSS BASIC XL, or OSS BASIC XE";
script_ret = SRET_TURBO_OR_BXL_OR_BXE;
}
if(script_mode) {
exit(script_ret);
} else {
puts(name);
exit(0);
}
}
/* return true if a token is numeric constant
(including TB/BXE/BXL hex) */
int is_numconst_op(unsigned char tok) {
switch(tok) {
case OP_NUMCONST:
case OP_HEXCONST:
return 1;
default:
return 0;
}
}
/* return true if a token is a function that *returns*
a numeric value (says nothing about the argument types,
though!) */
int is_numeric_func(unsigned char tok) {
switch(tok) {
case OP_FUNC_USR:
case OP_FUNC_ASC:
case OP_FUNC_VAL:
case OP_FUNC_LEN:
case OP_FUNC_ADR:
case OP_FUNC_ATN:
case OP_FUNC_COS:
case OP_FUNC_PEEK:
case OP_FUNC_SIN:
case OP_FUNC_RND:
case OP_FUNC_FRE:
case OP_FUNC_EXP:
case OP_FUNC_LOG:
case OP_FUNC_CLOG:
case OP_FUNC_SQR:
case OP_FUNC_SGN:
case OP_FUNC_ABS:
case OP_FUNC_INT:
case OP_FUNC_PADDLE:
case OP_FUNC_STICK:
case OP_FUNC_PTRIG:
case OP_FUNC_STRIG:
return 1;
default:
return 0;
}
}
/* return true if a token is an arithmetic operator */
int is_arith_op(unsigned char tok) {
switch(tok) {
case OP_UPLUS: /* not sure these two... */
case OP_UMINUS: /* ...really belong here */
case OP_NUM_LE:
case OP_NUM_NE:
case OP_NUM_GE:
case OP_NUM_LT:
case OP_NUM_GT:
case OP_NUM_EQ:
case OP_POWER:
case OP_MULT:
case OP_PLUS:
case OP_MINUS:
case OP_DIVIDE:
case OP_NOT:
case OP_OR:
case OP_AND:
case OP_NUM_ASSIGN:
case OP_GRP_LPAR: /* yes, this belongs here, (((A$))) is a syntax error! */
return 1;
default:
return 0;
}
}
int is_numeric_var(unsigned char tok) {
int vartype;
if(tok < 0x80)
return 0;
vartype = get_vartype(tok);
return (vartype == TYPE_SCALAR || vartype == TYPE_ARRAY);
}
/* return true if a token is:
- a numeric constant (including hex constants),
- a numeric variable (including arrays),
- a function that returns a numeric (e.g. ASC(), SIN()).
for now, only standard Atari BASIC tokens are considered.
unary minus and plus make sense here, but binary ops don't.
*/
int is_numeric_rval(unsigned char tok) {
return
(tok == OP_UMINUS) ||
(tok == OP_UPLUS) ||
is_numconst_op (tok) ||
is_numeric_func (tok) ||
is_numeric_var (tok) ;
}
/* return true if a token is:
- a numeric constant (including hex constants),
- a numeric variable (including arrays),
- a math operator (plus, minus, etc),
- a function that returns a numeric (e.g. ASC(), SIN()).
for now, only standard Atari BASIC tokens are considered.
*/
int is_numeric_op(unsigned char tok) {
return
is_numeric_rval (tok) ||
is_arith_op (tok) ;
}
int is_string_var(unsigned char tok) {
return (tok >= 0x80 && (get_vartype(tok) == TYPE_STRING));
}
int is_string_const(unsigned char tok) {
return (tok == OP_STRCONST);
}
int is_string_exp_op(unsigned char tok) {
switch(tok) {
case OP_STR_ASSIGN:
case OP_STR_LE:
case OP_STR_NE:
case OP_STR_GE:
case OP_STR_LT:
case OP_STR_GT:
case OP_STR_EQ:
case OP_STR_LPAR: /* the ( in: A$(1)="A" */
return 1;
default:
return 0;
}
}
int is_string_func(unsigned char tok) {
switch(tok) {
case OP_FUNC_STR:
case OP_FUNC_CHR:
return 1;
default:
return 0;
}
}
/* return true if a token is:
- a string constant,
- a string variable,
- a function that returns a string.
*/
int is_string_rval(unsigned char tok) {
return
is_string_const (tok) ||
is_string_func (tok) ||
is_string_var (tok) ;
}
/* return true if a token is:
- a string constant,
- a string variable,
- a string expression operator, like OP_STR_LE,
- a function that returns a string.
*/
int is_string_op(unsigned char tok) {
return
is_string_rval (tok) ||
is_string_exp_op (tok) ;
}
/* true if an operator token is a string function in BASIC XL (or XE).
these tokens are all numeric functions in Turbo, so be sure you
know what you're doing! */
int is_bxl_string_func(unsigned char tok) {
switch(tok) {
case 0x5c: /* BXL HEX$, Turbo DEC */
case 0x66: /* BXL LEFT$, Turbo %0 */
case 0x67: /* BXL RIGHT$, Turbo %1 */
case 0x68: /* BXL MID$, Turbo %2 */
return 1;
default:
return 0;
}
}
void remove_type(int type) {
bas_type &= ((~type) & 0x0f);
if(keep_going) return;
/* without -k, stop if it gets narrowed down to one of these 4. */
if(bas_type == BT_ATARI || bas_type == BT_TURBO || bas_type == BT_BXE || bas_type == BT_BXL)
print_result();
}
void set_type(int type) {
bas_type = type;
if(!keep_going) print_result();
}
CALLBACK(handle_cmd) {
int has_args = 0, has_var_arg = 0, vartype = -1;
unsigned char nexttok;
last_cmd = tok;
last_cmd_pos = pos;
comma_count = 0;
if(verbose) fprintf(stderr, "handle_cmd: lineno %d, tok $%02x, bas_type was %02x\n", lineno, tok, bas_type);
nexttok = program[pos + 1];
has_args = !(nexttok == OP_EOS || nexttok == OP_EOL);
if(nexttok >= 0x80) {
has_var_arg = 1;
vartype = get_vartype(nexttok);
}
/* this switch is for tokens that are the same in Atari/Turbo/BXL/BXE, but with
different semantics. non-Atari-BASIC tokens go in the switch below, not
this one. */
switch(tok) {
/* TB uses the same token for CLOSE as Atari and BXL/BXE, but it allows
it to have no argument (meaning, close all IOCBs). SOUND is the same
(no args = silence all POKEY channels). */
case CMD_CLOSE:
case CMD_SOUND:
if(!has_args) {
set_type(BT_TURBO);
}
break;
case CMD_INPUT:
/* TB, BXL, BXE all support INPUT "Prompt",VAR with the same tokenized
form. Atari BASIC doesn't allow string constants in INPUT args. */
if(has_args) {
if(nexttok == OP_STRCONST) {
int pos_after_string;
remove_type(BT_ATARI);
/* TB only: INPUT "Prompt";VAR is also supported (not in BXL/BXE) */
pos_after_string = pos + 3 + program[pos + 2];
if(verbose)
fprintf(stderr,
"===> INPUT with string prompt at line %d, "
"pos %04x, pos_after_string %04x (token %02x)\n",
lineno, pos, pos_after_string, program[pos_after_string]);
if(program[pos_after_string] == OP_SEMICOLON) {
set_type(BT_TURBO);
}
}
} else { /* has_args is false, oh shit! */
fprintf(stderr, "*** INPUT without variable at line %d.\n*** Rev A BASIC bug, program will crash, better fix it!\n", lineno);
set_type(BT_ATARI);
}
break;
case CMD_GET:
case CMD_PUT:
/* TB uses the same tokens for GET and PUT as Atari/BXL/BXE, but it allows
the argument to be a variable without a # in front of it. */
if(nexttok != OP_HASH) {
set_type(BT_TURBO);
}
/* PARTIAL: we really should detect GET #1,A$. this is Turbo-only, but
probably nobody ever uses it because it doesn't seem to *work*,
at least not in TB 1.5. A$ always ends up empty with length 0. */
break;
case CMD_RESTORE:
case CMD_TRAP:
/* TB allows RESTORE #LABEL and TRAP #LABEL */
if(nexttok == OP_HASH) {
set_type(BT_TURBO);
}
break;
default: break;
}
if(tok <= CMD_ERROR) return; /* legal in BASIC, ignore */
remove_type(BT_ATARI);
if(tok >= 0x59) remove_type(BT_BXL);
if(tok >= 0x65) {
fprintf(stderr, "handle_cmd: invalid command %02x at line %d\n", tok, lineno);
keep_going = 0;
set_type(BT_INVALID);
}
/* we have tokens 0x3a to 0x68 in both TB and BXE, or 47 of them.
Some tokens can't be determined, because they take the
same argument (or lack of) in both Turbo and BXL/XE. These
are:
0x3c: REPEAT or ELSE (no args either way)
0x46: LOOP or CP (no args either way)
0x49: LOCK or UNPROTECT (take the same args)
0x4B: RENAME in both Turbo and BXL/XE (same token, same args)
4 of them, this leaves 43 we can check.
Covered so far: 41 (95%).
However, some of these are marked PARTIAL because they're not detected
under all circumstances.
Unchecked tokens:
0x5B: BRUN or CALL (both take a string, CALL allows "USING" though)
This isn't really important, as CALL requires a PROCEDURE to
exist, and we *do* catch the PROCEDURE token.
0x5F: PAINT (req 2 args) or NUM (optional 2 args).
Again, not important, because it's highly unlikely any BXL/BXE
program will contain NUM... because when it executes, it stops the
program and goes back to the READY prompt (in auto-numbering mode).
*/
switch(tok) {
case 0x39: /* MOVE <args> or ENDWHILE */
case 0x3a: /* -MOVE <args> or TRACEOFF */
case 0x3d: /* UNTIL <args> or ENDIF */
case 0x56: /* DEL <args> or FAST */
case 0x62: /* CIRCLE (3 or 4 num args) or NORMAL (no args) */
/* COMPLETE */
if(has_args) {
remove_type(BT_BXL_BXE);
} else {
remove_type(BT_TURBO);
}
break;
case 0x58: /* TRACE (optional + or -), EXTEND (BXE; no args) */
/* COMPLETE */
/* In BXL, this looks to be an extra END token, that behaves the same
as the regular one, but can't be entered in the editor. Assume
no BXL program contains this token. */
/* In BXE, EXTEND can't actually appear in a program (it's direct
mode only). The only way to get EXTEND into a BXE program is
to do a direct mode command like:
EXTEND:SAVE "D:PROG"
...which of course puts it at line 32768. But this code will
never see that, because we'd already detect EXTENDed BXE
based on the first 2 bytes of the file.
*/
/* So, if we see this token, it *has* to be Turbo's TRACE, whether
or not it has an argument. */
set_type(BT_TURBO);
break;
case 0x59: /* TEXT or PROCEDURE */
/* COMPLETE */
/* Turbo: TEXT (1st arg is number),
BXL: invalid token,
BXE: PROCEDURE (arg is string const (not var!)) */
if(nexttok == OP_STRCONST) {
/* this token doesn't seem to be valid in BXL at all */
set_type(BT_BXE);
} else {
remove_type(BT_BXL_BXE);
}
break;
case 0x3f: /* WEND or LOMEM <args> */
case 0x40: /* ELSE or DEL <args> */
case 0x41: /* ENDIF or RPUT <args> */
case 0x45: /* DO or TAB <args> */
case 0x47: /* EXIT or ERASE <args> */
case 0x51: /* ENDPROC or PMMOVE <args> */
/* COMPLETE */
if(has_args) {
remove_type(BT_TURBO);
} else {
remove_type(BT_BXL_BXE);
}
break;
case 0x48: /* DIR (optional arg) or PROTECT (req'd arg) */
/* PARTIAL: without args means TB, but with arg,
it could be either */
if(!has_args) {
remove_type(BT_BXL_BXE);
}
break;
case 0x4a: /* UNLOCK (req'd arg) or DIR (optional arg) */
/* PARTIAL: without args means BXL/BXE, but with arg,
it could be either */
if(!has_args) {
remove_type(BT_TURBO);
}
break;
case 0x3b: /* *F (optional + or -), TRACE (no arg) */
case 0x5e: /* *B (optional + or -) or EXIT (no arg) */
/* PARTIAL: doesn't catch *F or *B by itself with no +/- */
if(has_args) {
remove_type(BT_BXL_BXE);
}
break;
case 0x44: /* FILLTO or BGET (check for a # after the token) */
/* COMPLETE */
if(nexttok == OP_HASH) {
remove_type(BT_TURBO);
} else {
remove_type(BT_BXL_BXE);
}
break;
case 0x4e: /* TIME$= (1 string arg) or PMCLR (1 num arg) */
/* PARTIAL: but almost complete. nothing happens if it's
TIME$= with a string function (probably rare) or PMCLR
with a complex expression. */
if(nexttok == OP_STRCONST) {
remove_type(BT_BXL_BXE);
} else if(has_var_arg && vartype == TYPE_STRING) {
remove_type(BT_BXL_BXE);
} else if(nexttok == OP_NUMCONST) {
remove_type(BT_TURBO);
} else if(has_var_arg && vartype == TYPE_SCALAR) {
remove_type(BT_TURBO);
}
break;
case 0x50: /* EXEC (1 arg, *must* be variable) or PMGRAPHICS (1 num arg, may be const) */
/* PARTIAL: PMGRAPHICS VAR won't be detected. but this usage is rare. */
/* This check is actually redundant, because EXEC requires Turbo's
label type (high bits in var name table both set to 1), which we already
detected in check_variables(). */
if(!has_var_arg) {
remove_type(BT_TURBO);
}
break;
case 0x54: /* -- in TB, LVAR in BXL/BXE */
/* COMPLETE */
/* We can tell these apart because:
1. TB gives us a next-statement offset of 5 if -- is the first (or
actually only) statement on a line. Normally, the minimum offset
is 6, but there's no OP_EOL after this token for some reason.
2. If -- is the 2nd or or later statement on a line (after a colon)
it *does* get a statement terminator, but it's 0x9b (ATASCII EOL,
like a REM or DATA gets).
Note that it's impossible to put more statements *after* the --,
they just get ignored if you type them. This doesn't help us
here, but it's interesting anyway.
Also, the -- is what you type to enter it into the program, but
it get LISTed as a line of 30 dashes.
The explanation is a lot longer than the code... */
if(program[pos - 1] == 0x05 || nexttok == 0x9b) {
set_type(BT_TURBO);
} else {
remove_type(BT_TURBO);
}
break;
case 0x57: /* DUMP (1 optional string arg) or LOCAL (1 *numeric* variable arg) */
/* BXL/BXE's LOCAL only works on scalars, not arrays or strings. so if there's
no arg, or one string arg... */
/* PARTIAL: almost complete, doesn't handle DUMP func$(arg), which I
doubt anyone uses anyway. */
if(!has_args) {
/* only Turbo allows no arg... */
remove_type(BT_BXL_BXE);
} else if(nexttok == OP_STRCONST) {
/* only Turbo allows a string constant arg... */
remove_type(BT_BXL_BXE);
} else if(has_var_arg && vartype == TYPE_STRING) {
/* only Turbo allows a string variable arg... */
remove_type(BT_BXL_BXE);
} else if(has_var_arg && vartype == TYPE_SCALAR) {
/* only BXL/BXL allows a scalar variable arg */
remove_type(BT_TURBO);
}
break;
case 0x5a: /* TB: BLOAD; BXL: extension mechanism; BXE: invalid. */
/* COMPLETE */
/* This is the token used for the BXL EXTEND.COM added commands,
from the Toolkit disk. It's followed by a byte ranging 0x10
to 0x15 that specifies which extended command, e.g. 0x5a 0x11 means
EXIT, 0x5a 0x12 is PROCEDURE, 0x5a 0x13 is CALL. Although
these look like BXE's extra commands, they aren't the same tokens,
and BXE will choke on them (RUN causes "Error- 33", LIST causes
a lockup). */
if(nexttok >= 0x10 && nexttok <= 0x15) {
/* worth mentioning to the user... */
fprintf(stderr, "Note: program requires EXTEND.COM from BASIC XL Toolkit disk.\n");
set_type(BT_BXL);
} else {
/* it's BLOAD if followed by e.g. OP_STRCONST or a variable */
set_type(BT_TURBO);
}
break;
case 0x5c: /* GO# (1 arg only) or SORTUP (optional 2nd arg of USING, but no comma) */
case 0x5d: /* # (1 arg only) or SORTDOWN (optional 2nd arg of USING, but no comma) */
/* COMPLETE but no longer needed (check_variables() already found the
11xxxxxx variables) */
/* Turbo BASIC labels have the high 2 bits set to 11, which is illegal
in Atari/BXL/BXE. */
if(vartype == 3) {
remove_type(BT_BXL_BXE);
} else {
remove_type(BT_TURBO);
}
break;
case 0x60: /* CLS (optional IOCB with #) or HITCLR (no args) */
/* PARTIAL: without args, can't tell them apart. */
/* I doubt CLS #IOCB is actually used in many Turbo BASIC
programs, because it's broken (at least in Turbo 1.5).
It's supposed to only clear the screen of output that
happened after the OPEN #IOCB, but it really clears the
whole screen. */
if(nexttok == OP_HASH) {
remove_type(BT_BXL_BXE);
}
break;
default: break;
}
if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type);
}
CALLBACK(handle_op) {
unsigned char nexttok = program[pos + 1];
unsigned char nexttok2 = program[pos + 2];
if(tok == OP_COMMA) comma_count++;
if(verbose) fprintf(stderr, "handle_op: lineno %d, tok $%02x, comma_count %d, bas_type was %02x\n", lineno, tok, comma_count, bas_type);
if(tok == 0x00) {
/* Turbo allows 256 variables, tokenizes the first 128 normally ($80-$FF).
The extra ones above $FF are tokenized as $00, varnum - $80. None of
our other BASICs uses $00 as an operator token, so.. */
set_type(BT_TURBO);
}
/* attempt to detect BXL/BXE DIM for 2D string arrays.
DIM A$(10,10) is illegal in Atari/Turbo.
PARTIAL: this only works if the first dimension is either a
constant or a scalar variable (not an array element or an
expression). fortunately most programs use constants in DIM.
*/
if(tok == OP_DIM_STR_LPAR) {
int str2d = 0;
if(nexttok >= 0x80 && nexttok2 == OP_ARR_COMMA) {
str2d = 1;
} else if(nexttok == OP_NUMCONST || nexttok == OP_HEXCONST) {
str2d = (program[pos + 8] == OP_ARR_COMMA);
}
if(str2d) {
if(verbose)
fprintf(stderr, "===> found 2d string array at line %d\n", lineno);
remove_type(BT_ATARI | BT_TURBO);
}
}
/* BXL/BXE allows string concatenation in assignment with the comma,
A$="FOO","BAR" or A$=C$,D$. */
if(last_cmd == CMD_LET || last_cmd == CMD_ILET) {
if(program[last_cmd_pos + 2] == OP_STR_ASSIGN) {
if(tok == OP_COMMA) {
if(is_string_rval(nexttok)) {
remove_type(BT_ATARI | BT_TURBO);
}
}
}
}
if(tok == OP_HEXCONST) remove_type(BT_ATARI); /* hex const (turbo *and* bxl/xe) */
if(tok <= OP_FUNC_STRIG) {
if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type);
return; /* legal in BASIC, ignore */
}
remove_type(BT_ATARI);
/* only Turbo has op tokens numbered 0x69 and up. */
if(tok >= 0x69) {
set_type(BT_TURBO);
}
if(tok >= 0x6E) {
fprintf(stderr, "handle_op: invalid operator %02x at line %d\n", tok, lineno);
keep_going = 0;
set_type(BT_INVALID);
}
/* There are 25 extra operators in Turbo, and 20 of them are shared with
BXL/BXE. Of the 20, 4 of them are undecidable, and the rest are
covered here, which means 80% coverage of the shared ops.
Undecidables are:
0x56 & (logical AND) or % (XOR), both infix numeric ops; can't tell apart
0x57 ! (logical OR) in both Turbo and BXL/BXE, can't tell apart
0x64 RAND (func, 1 num arg) or TAB (func, 1 num arg), can't tell apart
0x65 TRUNC (func, 1 num arg) or PEN (func, 1 num arg), can't tell apart
*/
switch(tok) {
case 0x55: /* DPEEK (function) TB, USING (infix, not a function) in BXL/BXE */
case 0x58: /* INSTR (function) or & (infix numeric) in BXE. */
case 0x5b: /* HEX$ (func, takes 1 num arg) or FIND( (pseudo-func, 3 args */
/* COMPLETE */
if(nexttok == OP_FUNC_LPAR) {
remove_type(BT_BXL_BXE);
} else {
remove_type(BT_TURBO);
}
break;
case 0x59: /* INKEY$ (0 arg pseudo-func) in TB, string array separator semicolon in BXL/BXE */
/* PARTIAL: ...but pretty good. we *can't* check nexttok == OP_GRP_RPAR, because
VAL(INKEY$) or ASC(INKEY$) are legit Turbo code. */
if(nexttok == OP_EOS || nexttok == OP_EOL) {
/* the semicolon can't be the last token on the line (needs at least
a right-paren), but INKEY$ can. */
remove_type(BT_BXL_BXE);
} else if(pos == last_cmd_pos + 1) {
/* INKEY$ can be the first operator after the command, e.g if the command
is IF. The semicolon cannot. */
remove_type(BT_BXL_BXE);
} else if(is_string_exp_op(last_op_tok) || is_string_exp_op(nexttok)) {
/* A$=INKEY$, IF INKEY$=A$, A$(LEN(A$)+1)=INKEY$, INKEY$<>"A"... */
remove_type(BT_BXL_BXE);
} else if(is_numeric_op(last_op_tok) || is_numeric_op(nexttok)) {
remove_type(BT_TURBO);
}
break;
case 0x5a: /* EXOR (infix num op) or BUMP( (pseudo-function, no OP_FUNC_LPAR) */
case 0x5d: /* DIV (infix num op) or RANDOM( (pseudo-func, 1 or 2 num args) */
/* COMPLETE (I think, anyway) */
if(last_op_tok == OP_GRP_RPAR || last_op_tok == OP_NUMCONST || last_op_tok == OP_HEXCONST || last_op_tok >= 0x80) {
/* if the last token was a variable or a numeric, or a right paren,
this is infix (can't be a function, last token would have to have
been a command or a math/etc operator). */
remove_type(BT_BXL_BXE);
} else {
remove_type(BT_TURBO);
}
break;
case 0x5c: /* DEC (function, takes str) in TB, HEX$ (function, takes num) in BXL/BXE */
/* COMPLETE */
if(is_string_rval(nexttok2)) {
remove_type(BT_BXL_BXE);
} else if(is_numeric_op(nexttok2)) {
remove_type(BT_TURBO);
}
break;
case 0x5e: /* FRAC (num func, 1 arg) or DPEEK (num func, 1 arg) in BXL...
however BXE has an optional 2nd arg. */
{
/* PARTIAL: This detects the 2nd arg for simple cases where the
1st arg is a constant or a numeric variable, but not if the
1st arg is an expression or an array element. */
int has2 = 0;
if(nexttok2 == OP_NUMCONST || nexttok2 == OP_HEXCONST) {
if(program[pos + 9] == OP_ARR_COMMA)
has2 = 1;
} else if(nexttok2 >= 0x80 && program[pos + 3] == OP_ARR_COMMA) {
has2 = (get_vartype(nexttok2) == TYPE_SCALAR);
}
if(has2) {
set_type(BT_BXE);
}
}
break;
case 0x5f: /* TIME$ in TB, SYS (function) in BXL/BXE */
case 0x60: /* TIME in TB, VSTICK (function) in BXL/BXE */
case 0x61: /* MOD (infix op) in TB, HSTICK (function) in BXL/BXE */
case 0x62: /* EXEC (infix op, with ON) in TB, PMADR (function) in BXL/BXE */
/* COMPLETE */
if(nexttok == OP_FUNC_LPAR) {
remove_type(BT_TURBO);
} else {
remove_type(BT_BXL_BXE);
}
break;
case 0x63: /* RND (pseudo-func, no arg) or ERR (func, 1 num arg) */
/* COMPLETE */
if(nexttok != OP_FUNC_LPAR) {
set_type(BT_TURBO);
}
break;
case 0x66: /* %0 in TB, LEFT$( (pseudo-func, takes string) in BXL/BXE */
case 0x67: /* %1 in TB, RIGHT$( (pseudo-func, takes string) in BXL/BXE */
case 0x68: /* %2 in TB, MID$( (pseudo-func, takes string) in BXL/BXE */
/* COMPLETE */
/* LEFT$/RIGHT$/MID$ do NOT get OP_FUNC_LPAR (the "(" is part of the
token name). They're always followed by a string operator... and
it works out that none of the tokens for BXL-only string funcs
are allowed to follow %0 %1 %2 in Turbo. */
if(is_string_op(nexttok) || is_bxl_string_func(nexttok)) {
remove_type(BT_TURBO);
} else {
remove_type(BT_BXL_BXE);
}
break;
default:
break;
}
last_op_tok = tok;
if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type);
}
/* we can count commas, because both Turbo and BXE/BXL use the "array" comma
to separate function arguments, not the "regular" comma. */
CALLBACK(handle_end_stmt) {
if(verbose) fprintf(stderr, "handle_end_stmt: lineno %d, tok $%02x, last_cmd $%02x, comma_count %d, bas_type was %02x\n", lineno, tok, last_cmd, comma_count, bas_type);
switch(last_cmd) {
case 0x38: /* DPOKE (2 args) or WHILE (1 arg) */
if(comma_count) {
remove_type(BT_BXL_BXE);
} else {
remove_type(BT_TURBO);
}
break;
case 0x3e: /* WHILE (1 arg) or DPOKE (2 or 3 args) */
case 0x4c: /* DELETE (1 arg) or MOVE (3 or 4 args) */
case 0x4d: /* PAUSE (1 arg) or MISSILE (3 args) */
case 0x52: /* FCOLOR (1 arg) or PMWIDTH (2 args) */
case 0x53: /* *L (optional + or - only) or SET (req 2 num args) */
case 0x4f: /* PROC (1 arg) or PMCOLOR (3 args) */
if(comma_count) { /* 1 arg means no commas */
remove_type(BT_TURBO);
} else {
remove_type(BT_BXL_BXE);
}
break;
case 0x42: /* BPUT or RGET */
/* PARTIAL:
Turbo BGET always takes 3 args, BXL/BXE RGET takes 2 or more.
We can at least rule out Turbo if there aren't exactly 3 args. */
if(comma_count != 2) {
remove_type(BT_TURBO);
}
break;
case 0x43: /* BGET or BPUT */
/* PARTIAL:
Turbo BGET and BPUT always take 3 args. So does BXL BPUT.
BXE BPUT takes 3 args and an optional 4th. */
if(comma_count != 2) {
set_type(BT_BXE);
}
break;
case 0x55: /* RENUM in both (TB req 3 args, BXL up to two) */
if(comma_count == 2) {
remove_type(BT_BXL_BXE);
} else {
remove_type(BT_TURBO);
}
break;
case 0x61: /* DSOUND (0 or 4 num args) or INVERSE (no args) */
/* PARTIAL: can't tell no-argument DSOUND from INVERSE. */
if(comma_count) {
remove_type(BT_BXL_BXE);
}
break;
case 0x63: /* %PUT (usually seen with optional #; 1 or 2 args) or BLOAD (1 string arg) */
if(comma_count) {
/* multiple args */
remove_type(BT_BXL_BXE);
} else if(program[last_cmd_pos + 1] == OP_STRCONST) {
/* one arg, string const. XXX: check var type */
remove_type(BT_TURBO);
}
break;
case 0x64: /* %GET (usually seen with optional #; 1 or 2 args) or BSAVE (3 args) */
if(comma_count == 2) {
remove_type(BT_TURBO);
} else {
remove_type(BT_BXL_BXE);
}
break;
default: break;
}
if(verbose) fprintf(stderr, " bas_type now %02x\n", bas_type);
last_cmd = last_op_tok = 0;
}
/* return true if input_file is Atari MS BASIC.
AMSB files begin with a 3-byte header: 0x00, then 2 byte length
(LSB/MSB), which is actually 3 bytes less than the full length of
the file (or, it's the length of the file minus the 3-byte header).
Also, the files always end with 3 0x00 bytes.
We check that the header length is 3 bytes less than the file length,
then check for the 3 0x00's at the end.
*/
int detect_amsb(void) {
int len, c;
if(verbose) fprintf(stderr, "entering detect_amsb()\n");
rewind(input_file);
c = fgetc(input_file);
if(c) return 0;
c = fgetc(input_file);
if(c == EOF) return 0;
len = (fgetc(input_file) << 8) | c;
if(verbose) fprintf(stderr, "detect_amsb() header len==%d (file size should be %d)\n", len, len + 3);
fseek(input_file, 0, SEEK_END);
c = ftell(input_file);
if(verbose) fprintf(stderr, "detect_amsb() file size %d\n", c);
if(len != (c - 3)) {
if(verbose) fprintf(stderr, "detect_amsb() wrong file size!\n");
return 0;
}
if(verbose) fprintf(stderr, "detect_amsb() file size is correct, checking for 3 nulls\n");
fseek(input_file, -3, SEEK_END);
if(fgetc(input_file)) return 0;
if(fgetc(input_file)) return 0;
if(fgetc(input_file)) return 0;
if(verbose) fprintf(stderr, "detect_amsb() found 3 nulls, return 1\n");
return 1;
}
void foreign(const char *name, int srval) {
if(input_file) fclose(input_file);
if(script_mode) {
exit(srval);
} else {
puts(name);
exit(0);
}
}
void detect_foreign(void) {
int c, d;
c = fgetc(input_file);
d = fgetc(input_file);
if(c == EOF || d == EOF)
die("File is too short to be a BASIC program of any kind.");
if(c == 0 && d == 0) {
/* This is why we can't read from stdin. */
rewind(input_file);
return;
}
if(c == 0xfb && d == 0xc2)
foreign("Compiled Turbo BASIC XL", SRET_COMPILED_TURBO);
if(c == 0xff && d == 0xff)
foreign("XEX executable (not BASIC at all!)", SRET_NOT_BASIC);
if(c == 0xfe && d == 0xfe)
foreign("Mac/65 tokenized source (not BASIC at all!)", SRET_NOT_BASIC);
if(c == 0xdd && d == 0x00)
foreign("EXTENDed OSS BASIC XE", SRET_EXTENDED_BXE);
if(c == 0x7f && d == 'E') {
c = fgetc(input_file);
d = fgetc(input_file);
if(c == 'L' && d == 'F')
foreign("ELF executable (not BASIC at all!)", SRET_NOT_BASIC);
}
if(c == 0 && detect_amsb()) {
foreign("Atari Microsoft BASIC", SRET_AMSB);
}
if(isdigit(c) && (d == 0x20 || isdigit(d)))
foreign("Text file, could be LISTed BASIC (or not)", SRET_NOT_BASIC);
if(isprint(c) && isprint(d))
foreign("Text file (not BASIC at all!)", SRET_NOT_BASIC);
foreign("Unknown file type (not BASIC at all!)", SRET_NOT_BASIC);
}
void check_variables(void) {
int pos;
if(vntp == vntd) return;
/* Unlike Atari BASIC, Turbo variables can have _ in the names.
So can BASIC XE, though it's not documented in the BASIC XE
Reference Manual that I have.
BXL can't have _ in variable names. */
for(pos = vnstart; pos < vvstart; pos++) {
if((program[pos] & 0x7f) == '_') {
remove_type(BT_ATARI | BT_BXL);
}
}
/* Also, Turbo line labels (for PROC/EXEC and #/GO#) are variables
with a type that's illegal in Atari/BXL/BXE. */
for(pos = vvstart; pos < codestart; pos += 8) {
if((program[pos] & 0xc0) == 0xc0) {
set_type(BT_TURBO);
}
}
/* I was hoping to check for BXL/BXE string arrays here. However,
looking at a SAVEd file, they look identical to regular string
variables (variable type $80, rest of the VVTP entry all $00).
When the program's actually in memory, BXL/BXE sets the
variable type byte to $91 for DIMed string array var and
$81 (same as Atari/Turbo) for a regular DIMed string var.
Unfortunately in the SAVE file, it's always $80. */
/* Another thing that can't be detected: BXL/BXE's FAST mode changes
the program in memory (line number targets become addresses),
but programs don't get SAVEd this way: SAVE turns off FAST and
restores the program to its original state before writing it
to disk. Too bad. */
}
/* BASIC/A+ support is *very* simple. It's similar to BASIC XL (no
surprise there)... but unlike Turbo, BXL, and BXE, it's *not*
token-compatible with original Atari BASIC. Rather than add
their new tokens to the end of the lists, they're mixed in with
the others. So A+ can't even LOAD or RUN Atari BASIC files.
Appendix J of the BASIC/A+ manual tells you to LIST in BASIC,
reboot, ENTER in A+, to "port" your BASIC programs to A+. I
suppose if you upgraded from A+ to BASIC XL or XE, you'd have
to do the same thing to use A+ programs in XL/XE.
While this was probably a PITA for BASIC/A+ users back in the day,
it makes it *really* easy to detect A+ here. The last line of
every SAVEed program is the direct-mode command, and contains either
a SAVE or CSAVE cmd token. Which is the same token in Atari, Turbo,
BXL, and BXE... but *different* in A+.
However... I've run into at least one BASIC program in the Holmes
Archive that was missing its line 32768 (wish I could remember which).
And it's possible for files to get truncated... so I'll check a few
other command tokens, to deal with cases like this.
*/
void found_aplus(void) {
foreign("OSS BASIC/A+", SRET_APLUS);
}
CALLBACK(check_aplus_cmd) {
int has_args;
unsigned char nexttok, nexttok2;
nexttok = program[pos + 1];
nexttok2 = program[pos + 2];
has_args = !(nexttok == OP_EOS || nexttok == OP_EOL);
if(verbose) fprintf(stderr, "check_aplus_cmd: line %d, pos $%04x, tok $%02x, nexttok $%02x, nexttok2 $%02x\n", lineno, pos, tok, nexttok, nexttok2);
switch(tok) {
case CMD_POP: /* A+ READ */
case CMD_DOS: /* A+ GET */
case CMD_DEG: /* A+ WHILE */
case CMD_CLR: /* A+ DIM */
case 0x46: /* A+ SOUND <args>, Turbo LOOP, BXL/XE CP */
case 0x3c: /* A+ DIR <req'd arg>, Turbo REPEAT, BXL/XE ELSE */
if(has_args)
found_aplus();
break;
case CMD_SAVE: /* A+ END */
case CMD_GRAPHICS: /* A+ STOP */
case CMD_DIM: /* A+ ENDWHILE */
/* case CMD_GET: */ /* A+ RETURN, but Turbo allows 0 args too! */
case 0x38: /* A+ DOS, Turbo DPOKE, BXL/XE WHILE */
if(!has_args)
found_aplus();
break;
case CMD_POSITION: /* A+ ? */
/* PARTIAL: does nothing if 1st arg is numeric. */
if(!has_args)
/* POSITION can't have 0 args. */
found_aplus();
else if(is_string_rval(nexttok))
/* ? "STRING" or ? A$, e.g., can't be POSITION */
found_aplus();
else if(nexttok == OP_NUM_LE) /* numeric <= in BASIC, # in A+ */
found_aplus();
break;
case CMD_RUN: /* A+ PRINT */
/* PARTIAL: only detects PRINT # or PRINT <num> ... */
/* A+'s # token is BASIC's OP_NUM_LE! */
if(nexttok == OP_NUM_LE)
found_aplus();
else if(is_numconst_op(nexttok) || is_numeric_var(nexttok))
found_aplus();
break;
case CMD_XIO: /* A+ SAVE */
/* most programs, this is enough, because they'll end with
32768 SAVE "D:BLAH" */
if(is_string_rval(nexttok))
found_aplus();
break;
case CMD_OPEN: /* A+ ELSE */
/* case CMD_CLOSE: */ /* A+ DEG */ /* can't check, Turbo allows no args */
case CMD_STATUS: /* A+ NEW */
case CMD_POINT: /* A+ LOAD */
case 0x42: /* A+ POSITION, Turbo BPUT, BXL RGET */
case 0x43: /* A+ DRAWTO, Turbo BGET, BXL BPUT */
if(nexttok != OP_HASH) /* # in BASIC, USING in A+ */
found_aplus();
break;
/* case 0x48: */ /* A+ CSAVE, Turbo DIR, BXL/BXE PROTECT */
/* DIR without arg is OK, so we can't really check this. */
/* break; */
case CMD_DRAWTO: /* A+ PUT */
case CMD_SOUND: /* A+ RPUT */
case CMD_LPRINT: /* A+ RGET */
case CMD_CSAVE: /* A+ BPUT */
case CMD_CLOAD: /* A+ BGET */
case CMD_ON: /* A+ STATUS */
case CMD_NOTE: /* A+ OPEN */
case CMD_CONT: /* A+ CLOSE */
case CMD_RAD: /* A+ XIO */
if(nexttok == OP_NUM_LE) /* numeric <= in BASIC, # in A+ */
found_aplus();
break;
default: break;
}
last_cmd = tok;
}
void check_aplus(void) {
allow_hex_const = 1;
on_cmd_token = check_aplus_cmd;
walk_all_code();
}
void check_atari_turbo_oss(void) {
allow_hex_const = 1;
on_cmd_token = handle_cmd;
on_exp_token = handle_op;
on_end_stmt = handle_end_stmt;
walk_all_code();
}
int main(int argc, char **argv) {
set_self(*argv);
parse_general_args(argc, argv, print_help);
parse_args(argc, argv);
detect_foreign();
readfile();
parse_header();
check_variables();
check_aplus();
check_atari_turbo_oss();
print_result(); /* always exits */
return 0; /* never happens, shuts up gcc's warning though */
}
|