3 files changed, 45 insertions, 19 deletions
diff --git a/unprotbas.1 b/unprotbas.1
index b8123f1..98e17c4 100644
--- a/unprotbas.1
+++ b/unprotbas.1
@@ -27,7 +27,7 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
 .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
 .in \\n[rst2man-indent\\n[rst2man-indent-level]]u
 ..
-.TH "UNPROTBAS" 1 "2024-05-28" "0.2.1" "Urchlay's Atari 8-bit Tools"
+.TH "UNPROTBAS" 1 "2024-05-29" "0.2.1" "Urchlay's Atari 8-bit Tools"
 .SH NAME
 unprotbas \- Unprotect or create LIST-protected Atari 8-bit BASIC programs
 .SH SYNOPSIS
@@ -235,11 +235,21 @@ This will fail if there\(aqs already a line 32767.
 .sp
 One more thing \fBunprotbas\fP can do is remove extra data from the end
 of the file. It\(aqs possible for BASIC files to contain extra data that
-occurs after the end of the program. Some programs use this as a way
-to load arbitrary binary data into memory along with the program; for
-other programs, the extra data is truly garbage (e.g. an EOF character
-if the file came from a CP/M system, or padding to a block size if a
-dumb implementation of XMODEM was used to transfer the file).
+occurs after the end of the program. Such data might be:
+.INDENT 0.0
+.IP \(bu 2
+Arbitrary binary data used by the program at runtime, such as
+machine language routines, or fonts.
+.IP \(bu 2
+Zero bytes, caused by SAVEing the program with revision B BASIC. Every
+time a program is LOADed, edited (or not) and then SAVEd again, 16
+bytes of extra (garbage) data gets added to the program. To avoid
+this, don\(aqt use revision B (use rev C if possible, A otherwise).
+.IP \(bu 2
+Garbage added by some system previously used to store or transmit
+the file. CP/M systems might add an EOF (^Z) character. Dumb
+file transfer software might pad the file up to its block size.
+.UNINDENT
 .sp
 Normally, such "garbage" doesn\(aqt hurt anything. BASIC ignores it. Or
 it normally does... if you suspect it\(aqs causing a problem, you can
diff --git a/unprotbas.c b/unprotbas.c
index 6edb38f..37013cf 100644
--- a/unprotbas.c
+++ b/unprotbas.c
@@ -37,6 +37,11 @@
 /* tokenized colon (statement separator) */
 #define TOK_COLON 0x14
 
+/* variable types, bits 6-7 of byte 0 of each vvtable entry. */
+#define TYPE_SCALAR 0
+#define TYPE_ARRAY  1
+#define TYPE_STRING 2
+
 /* entire file gets read into memory (for now) */
 unsigned char data[BUFSIZE];
 
@@ -98,7 +103,7 @@ void die(const char *msg) {
 
 /* read entire file into memory */
 int readfile(void) {
-	int got = fread(data, 1, 65535, input_file);
+	int got = fread(data, 1, BUFSIZE - 1, input_file);
 	if(verbose) fprintf(stderr, "read %d bytes\n", got);
 	fclose(input_file);
 	if(got < MIN_PROG_SIZE)
@@ -282,7 +287,7 @@ void breakcode(void) {
 void move_code(int offset) {
 	unsigned char *dest = data + vvstart + offset;
 
-	if(dest < data || (filelen + offset) > 65535) {
+	if(dest < data || ((filelen + offset) > (BUFSIZE - 1))) {
 		die("attempt to move memory out of range; corrupt header bytes?\n");
 	}
 
@@ -394,9 +399,12 @@ int rebuild_vntable(int write) {
 		}
 
 		switch(type) {
-			case 1: varname = arrays++; sigil = 0xa8; break;
-			case 2: varname = strings++; sigil = 0xa4; break;
-			default: varname = scalars++; break;
+			case TYPE_SCALAR: varname = scalars++;               break;
+			case TYPE_ARRAY:  varname = arrays++;  sigil = 0xa8; break;
+			case TYPE_STRING: varname = strings++; sigil = 0xa4; break;
+			default:
+					  fprintf(stderr, "Warning: variable value #%d has unknown type.\n", varnum);
+					  break;
 		}
 
 		if(varname < 26) {
@@ -520,15 +528,15 @@ void check_varname(const unsigned char *name, int line) {
 	/* c now has the last char of the name, make sure it matches the variable type */
 	type = data[vvstart + 8 * (line - 1)] >> 6;
 	/* type: scalar = 0, array = 1, string = 2 */
-	if(type == 0) {
+	if(type == TYPE_SCALAR) {
 		if(c == '$')
 			die_mapfile("type mismatch: numeric variable may not end with $", line);
 		else if(c == '(')
 			die_mapfile("type mismatch: numeric variable may not end with (", line);
-	} else if(type == 1) {
+	} else if(type == TYPE_ARRAY) {
 		if(c != '(')
 			die_mapfile("type mismatch: array variable must end with (", line);
-	} else if(type == 2) {
+	} else if(type == TYPE_STRING) {
 		if(c != '$')
 			die_mapfile("type mismatch: string variable must end with $", line);
 	} else {
diff --git a/unprotbas.rst b/unprotbas.rst
index 1df865f..4228d91 100644
--- a/unprotbas.rst
+++ b/unprotbas.rst
@@ -208,11 +208,19 @@ Bad next-line pointer
 
 One more thing **unprotbas** can do is remove extra data from the end
 of the file. It's possible for BASIC files to contain extra data that
-occurs after the end of the program. Some programs use this as a way
-to load arbitrary binary data into memory along with the program; for
-other programs, the extra data is truly garbage (e.g. an EOF character
-if the file came from a CP/M system, or padding to a block size if a
-dumb implementation of XMODEM was used to transfer the file).
+occurs after the end of the program. Such data might be:
+
+- Arbitrary binary data used by the program at runtime, such as
+  machine language routines, or fonts.
+
+- Zero bytes, caused by SAVEing the program with revision B BASIC. Every
+  time a program is LOADed, edited (or not) and then SAVEd again, 16
+  bytes of extra (garbage) data gets added to the program. To avoid
+  this, don't use revision B (use rev C if possible, A otherwise).
+
+- Garbage added by some system previously used to store or transmit
+  the file. CP/M systems might add an EOF (^Z) character. Dumb
+  file transfer software might pad the file up to its block size.
 
 Normally, such "garbage" doesn't hurt anything. BASIC ignores it. Or
 it normally does... if you suspect it's causing a problem, you can