aboutsummaryrefslogtreecommitdiff
path: root/bas.c
blob: cf03f38467bf2f52f9e9f0380ffd3722c348c478 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
/* bas.c - API for writing standalone programs that deal with
	tokenized Atari 8-bit BASIC program. */

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <time.h>

#include "bas.h"

/* caller's parse_args() should set this for the -v option */
int verbose = 0;

/* hex constants work the same way in Turbo BASIC and BASIC XL/XE: they're
   exactly like numeric constants, 6-byte BCD FP, but introduced with
   OP_HEXCONST instead of OP_NUMCONST.
   If allow_hex_const is set, on_exp_token will be called for OP_HEXCONST.
	Otherwise, a warning will be printed, on_exp_token will NOT be called,
   but the constant is skipped the same as a regular numeric, to keep
   in sync with the token stream. */
int allow_hex_const = 0;

/* BASIC/A+ uses the same cmd tokens for REM and DATA that BASIC does,
   but not for the ERROR- token. Unfortunately bas.c needs to know it's
   an A+ program so it can handle this token correctly. */
int aplus_errtok_hack = 0;

/* BASIC XL token 0x5a is followed by a single "subtoken", this skips it. */
int bxl_exttok_hack = 0;

unsigned short lomem;
unsigned short vntp;
unsigned short vntd;
unsigned short vvtp;
unsigned short stmtab;
unsigned short stmcur;
unsigned short starp;
unsigned short codestart;
unsigned short code_end;
unsigned short vnstart;
unsigned short vvstart;
int filelen;
const char *self;
unsigned char program[BUFSIZE];
FILE *input_file;
FILE *output_file;
char *output_filename = NULL;

void die(const char *msg) {
	fprintf(stderr, "%s: %s\n", self, msg);
	exit(1);
}

void parse_general_args(int argc, char **argv, void (*helpfunc)()) {
	if(argc < 2) {
		(*helpfunc)();
		exit(1);
	}

	if(strcmp(argv[1], "--help") == 0) {
		(*helpfunc)();
		exit(0);
	}

	if(strcmp(argv[1], "--version") == 0) {
		printf("%s %s\n", self, VERSION);
		exit(0);
	}
}

/* read entire file into memory */
void readfile(void) {
	filelen = fread(program, 1, BUFSIZE - 1, input_file);
	if(verbose) fprintf(stderr, "Read %d bytes.\n", filelen);
	if(!feof(input_file))
		fprintf(stderr, "Warning: file is >64KB, way too big for a BASIC program.\n");
	else if(filelen > MAX_PROG_SIZE)
		fprintf(stderr, "Warning: file is %d bytes, suspiciously large for a BASIC program.\n", filelen);
	fclose(input_file);
	input_file = NULL; /* so caller can tell it's closed */
	if(filelen < MIN_PROG_SIZE)
		die("File too short to be a BASIC program (truncated?)\n");
}

int writefile(void) {
	int outbytes;

	outbytes = fwrite(program, 1, filelen, output_file);
	fclose(output_file);
	output_file = NULL; /* so caller can tell it's closed */
	if(verbose) fprintf(stderr, "Wrote %d bytes.\n", outbytes);
	return outbytes;
}

/* get a 16-bit value from the file, in 6502 LSB/MSB order. */
unsigned short getword(int addr) {
	return program[addr] | (program[addr + 1] << 8);
}

void setword(int addr, int value) {
	program[addr] = value & 0xff;
	program[addr + 1] = value >> 8;
}

void dump_header_vars(void) {
	fprintf(stderr, "LOMEM  $%04x    VNTP $%04x   VNTD $%04x  VVTP $%04x\n", lomem, vntp, vntd, vvtp);
	fprintf(stderr, "STMTAB $%04x  STMCUR $%04x  STARP $%04x\n", stmtab, stmcur, starp);
	fprintf(stderr, "vnstart $%04x, vvstart $%04x, codestart $%04x, code_end $%04x\n", vnstart, vvstart, codestart, code_end);
}

void parse_header(void) {
	int vntp_offset;

	lomem = getword(0);
	vntp = getword(2);
	vntd = getword(4);
	vvtp = getword(6);
	stmtab = getword(8);
	stmcur = getword(10);
	starp = getword(12);

	codestart = stmtab - TBL_OFFSET - (vntp - 256);
	vnstart = vntp - TBL_OFFSET;
	vvstart = vvtp - TBL_OFFSET;
	code_end = starp - TBL_OFFSET;

	if(vnstart > 0x0e) {
		if(verbose)
			fprintf(stderr, "VNTP is $%02x (not ($100), adjusting pointers.\n", vntp);
		vntp_offset = vnstart - 0x0e;
		vnstart -= vntp_offset;
		vvstart -= vntp_offset;
	}

	if(filelen < code_end) {
		fprintf(stderr, "Warning: file is truncated: %d bytes, should be %d.\n", filelen, code_end);
	}

	if(verbose) dump_header_vars();

	/* these checks are actually kind of conservative. */
	if(lomem) die("Not an Atari BASIC program (no $0000 signature).");
	if(vntp < 0x100) die("Not an Atari BASIC program (invalid VNTP).");
	if(vvtp < vntd) die("Not an Atari BASIC program (invalid VVTP).");
	if(starp < vvtp) die("Not an Atari BASIC program (invalid STARP).");
}

void update_header(void) {
	setword(0, lomem);
	setword(2, vntp);
	setword(4, vntd);
	setword(6, vvtp);
	setword(8, stmtab);
	setword(10, stmcur);
	setword(12, starp);
}

/* sometimes the variable name table isn't large enough to hold
	the generated variable names. move_code() makes more space,
	by moving the rest of the program (including the variable value
	table) up in memory. */
void move_code(int offset) {
	unsigned char *dest = program + vvstart + offset;

	if(dest < program || ((filelen + offset) > (BUFSIZE - 1))) {
		die("Attempt to move memory out of range; corrupt header bytes?\n");
	}

	memmove(dest, program + vvstart, filelen);

	vntd += offset;
	vvtp += offset;
	stmtab += offset;
	stmcur += offset;
	starp += offset;
	filelen += offset;
	update_header();
	parse_header();
}

void adjust_vntable_size(int oldsize, int newsize) {
	int move_by;
	if(oldsize != newsize) {
		move_by = newsize - oldsize;
		if(verbose) fprintf(stderr,
				"Need %d bytes for vntable, have %d, moving VVTP by %d to $%04x.\n",
				newsize, oldsize, move_by, vvtp + move_by);
		move_code(move_by);
	}
}

unsigned char get_vartype(unsigned char tok) {
	return program[vvstart + (tok & 0x7f) * 8] >> 6;
}

/* return true if the variable name table is OK */
int vntable_ok(void) {
	int vp, bad;

	if(vntp == vntd) {
		if(verbose) fprintf(stderr, "No variables.\n");
		return 1;
	}

	/* first pass: bad = 1 if all the bytes in the table have the same
		value, no matter what it is. */
	vp = vnstart + 1;

	/* don't do this check if the table is only one byte long! */
	if(vp < vvstart - 1) {
		bad = 1;
		while(vp < vvstart - 1) {
			if(program[vp] != program[vnstart]) {
				bad = 0;
				break;
			}
			vp++;
		}
		if(bad) return 0;
	}

	/* 2nd pass: bad = 1 if there's any invalid character in the table. */
	vp = vnstart;
	while(vp < vvstart) {
		unsigned char c = program[vp];

		/* allow for (but don't require) dummy byte at VNTD. used to just
		   quit when we hit 0, but 0 might be part of a scrambled table. */
		if(c == 0 && vp == (vvstart - 1)) break;

		vp++;

		/* inverse $ or ( is OK */
		if(c == 0xa4 || c == 0xa8) continue;

		/* numbers and letters are allowed, inverse or normal. */
		c &= 0x7f;
		if(c == '_') {
			fprintf(stderr, "%s: Underscore in variable name; Turbo or BASIC XE?\n", self);
			continue;
		}
		if(c >= 0x30 && c <= 0x39) continue;
		if(c >= 0x41 && c <= 0x5a) continue;

		bad++;
		break;
	}

	return !bad;
}

void invalid_args(const char *arg) {
	fprintf(stderr, "%s: Invalid argument '%s'.\n\n", self, arg);
	exit(1);
}

FILE *open_file(const char *name, const char *mode) {
	FILE *fp;
	if(!(fp = fopen(name, mode))) {
		perror(name);
		exit(1);
	}
	return fp;
}

void open_input(const char *name) {
	if(!name || strcmp(name, "-") == 0) {
		if(isatty(fileno(stdin))) {
			die("Can't read binary data from the terminal.");
		}
		if(freopen(NULL, "rb", stdin)) {
			input_file = stdin;
			return;
		} else {
			perror("stdin");
			exit(1);
		}
	}

	input_file = open_file(name, "rb");
}

void open_output(const char *name) {
	if(!name || (strcmp(name, "-") == 0)) {
		if(isatty(fileno(stdout))) {
			die("Refusing to write binary data to the terminal.");
		}
		if(freopen(NULL, "wb", stdout)) {
			output_file = stdout;
			return;
		} else {
			perror("stdout");
			exit(1);
		}
	}
	output_file = open_file(name, "wb");
}

void set_self(const char *argv0) {
	char *p;

	self = argv0;
	p = strrchr(self, '/');
	if(p) self = p + 1;
}

/* callbacks */
CALLBACK_PTR(on_start_line);
CALLBACK_PTR(on_bad_line_length);
CALLBACK_PTR(on_end_line);
CALLBACK_PTR(on_start_stmt);
CALLBACK_PTR(on_end_stmt);
CALLBACK_PTR(on_cmd_token);
CALLBACK_PTR(on_text);
CALLBACK_PTR(on_exp_token);
CALLBACK_PTR(on_var_token);
CALLBACK_PTR(on_string_const);
CALLBACK_PTR(on_num_const);
CALLBACK_PTR(on_trailing_garbage);

#define CALL(x) if(x) (*x)(lineno, pos, program[pos], end)

void walk_code(unsigned int startlineno, unsigned int endlineno) {
	int linepos, nextpos, offset, soffset, lineno = -1, tmpno, pos, end, tok;

	linepos = codestart;
	while(linepos < filelen) { /* loop over lines */
		tmpno = getword(linepos);
		if(tmpno <= lineno) {
			fprintf(stderr, "Warning: line number %d at offset $%04x is <= previous line number %d.\n",
					tmpno, linepos, lineno);
		}
		lineno = tmpno;
		offset = program[linepos + 2];
		nextpos = linepos + offset;
		if(nextpos > filelen) {
			fprintf(stderr, "Warning: program truncated in the middle of line %d.\n", lineno);
			return;
		}

		end = nextpos;
		pos = linepos;

		if(offset < 5) {
			/* actually, real Atari BASIC's minimum offset is 6. however, if you use
			   the "--" (line of dashes, command token 0x54) in Turbo BASIC XL, you
			   get an offset of 5, because there's no end-of-line after it.
			   it seems better to accomodate Turbo here. */
			CALL(on_bad_line_length);
			offset = program[linepos + 2]; /* on_bad_line_length fixed it (we hope) */
			if(offset < 5)
				die("Fatal: Program is code-protected; unprotect it first.");
		}

		if(lineno < startlineno) {
			linepos = nextpos;
			continue;
		}

		if(lineno > endlineno) break;

		CALL(on_start_line);

		pos = linepos + 3;
		while(pos < nextpos) { /* loop over statements within a line */
			soffset = program[pos];
			if(!soffset) {
				fprintf(stderr, "Fatal: next-statement offset is 0 at line %d, pos %04x\n", lineno, pos);
				exit(1);
			}
			end = linepos + soffset;
			CALL(on_start_stmt);

			while(pos < end) {  /* loop over tokens within a statement */
				pos++;
				CALL(on_cmd_token);

				tok = program[pos];
				if((tok == CMD_REM) || (tok == CMD_DATA) || /* same in A+ */
						(aplus_errtok_hack && tok == 0x53) || /* A+'s ERROR- */
						(!aplus_errtok_hack && tok == CMD_ERROR))
				{
					pos++;
					CALL(on_text);
					pos = end;
				} else if(bxl_exttok_hack && tok == 0x5a) {
					pos += 2; /* skip subtoken */
				} else {
					pos++;
				}

				while(pos < end) {     /* loop over operators */
					tok = program[pos];
					switch(tok) {
						case OP_NUMCONST:
							CALL(on_exp_token);
							pos++;
							CALL(on_num_const);
							pos += 6;
							break;
						case OP_HEXCONST:
							if(allow_hex_const) {
								CALL(on_exp_token);
							} else {
								fprintf(stderr, "%s: found Turbo/BXL/BXE hex constant at line %d, skipping\n", self, lineno);
							}
							pos++;
							if(allow_hex_const) {
								CALL(on_num_const);
							}
							pos += 6;
							break;
						case OP_STRCONST:
							CALL(on_exp_token);
							pos++;
							CALL(on_string_const);
							pos += program[pos] + 1;
							break;
						default:
							if(tok & 0x80) {
								CALL(on_var_token);
							} else {
								CALL(on_exp_token);
							}
							pos++;
							break;
					}
				}
				CALL(on_end_stmt);
			}
		}

		CALL(on_end_line);

		linepos = nextpos;
		if(lineno == 32768) break;
	}

	if(endlineno == 32768 && linepos < filelen) {
		if(verbose)
			fprintf(stderr, "%s: Trailing garbage at EOF, %d bytes.\n", self, filelen - linepos);
		CALL(on_trailing_garbage);
	}
}