aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--unprotbas.122
-rw-r--r--unprotbas.c24
-rw-r--r--unprotbas.rst18
3 files changed, 45 insertions, 19 deletions
diff --git a/unprotbas.1 b/unprotbas.1
index b8123f1..98e17c4 100644
--- a/unprotbas.1
+++ b/unprotbas.1
@@ -27,7 +27,7 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
.in \\n[rst2man-indent\\n[rst2man-indent-level]]u
..
-.TH "UNPROTBAS" 1 "2024-05-28" "0.2.1" "Urchlay's Atari 8-bit Tools"
+.TH "UNPROTBAS" 1 "2024-05-29" "0.2.1" "Urchlay's Atari 8-bit Tools"
.SH NAME
unprotbas \- Unprotect or create LIST-protected Atari 8-bit BASIC programs
.SH SYNOPSIS
@@ -235,11 +235,21 @@ This will fail if there\(aqs already a line 32767.
.sp
One more thing \fBunprotbas\fP can do is remove extra data from the end
of the file. It\(aqs possible for BASIC files to contain extra data that
-occurs after the end of the program. Some programs use this as a way
-to load arbitrary binary data into memory along with the program; for
-other programs, the extra data is truly garbage (e.g. an EOF character
-if the file came from a CP/M system, or padding to a block size if a
-dumb implementation of XMODEM was used to transfer the file).
+occurs after the end of the program. Such data might be:
+.INDENT 0.0
+.IP \(bu 2
+Arbitrary binary data used by the program at runtime, such as
+machine language routines, or fonts.
+.IP \(bu 2
+Zero bytes, caused by SAVEing the program with revision B BASIC. Every
+time a program is LOADed, edited (or not) and then SAVEd again, 16
+bytes of extra (garbage) data gets added to the program. To avoid
+this, don\(aqt use revision B (use rev C if possible, A otherwise).
+.IP \(bu 2
+Garbage added by some system previously used to store or transmit
+the file. CP/M systems might add an EOF (^Z) character. Dumb
+file transfer software might pad the file up to its block size.
+.UNINDENT
.sp
Normally, such "garbage" doesn\(aqt hurt anything. BASIC ignores it. Or
it normally does... if you suspect it\(aqs causing a problem, you can
diff --git a/unprotbas.c b/unprotbas.c
index 6edb38f..37013cf 100644
--- a/unprotbas.c
+++ b/unprotbas.c
@@ -37,6 +37,11 @@
/* tokenized colon (statement separator) */
#define TOK_COLON 0x14
+/* variable types, bits 6-7 of byte 0 of each vvtable entry. */
+#define TYPE_SCALAR 0
+#define TYPE_ARRAY 1
+#define TYPE_STRING 2
+
/* entire file gets read into memory (for now) */
unsigned char data[BUFSIZE];
@@ -98,7 +103,7 @@ void die(const char *msg) {
/* read entire file into memory */
int readfile(void) {
- int got = fread(data, 1, 65535, input_file);
+ int got = fread(data, 1, BUFSIZE - 1, input_file);
if(verbose) fprintf(stderr, "read %d bytes\n", got);
fclose(input_file);
if(got < MIN_PROG_SIZE)
@@ -282,7 +287,7 @@ void breakcode(void) {
void move_code(int offset) {
unsigned char *dest = data + vvstart + offset;
- if(dest < data || (filelen + offset) > 65535) {
+ if(dest < data || ((filelen + offset) > (BUFSIZE - 1))) {
die("attempt to move memory out of range; corrupt header bytes?\n");
}
@@ -394,9 +399,12 @@ int rebuild_vntable(int write) {
}
switch(type) {
- case 1: varname = arrays++; sigil = 0xa8; break;
- case 2: varname = strings++; sigil = 0xa4; break;
- default: varname = scalars++; break;
+ case TYPE_SCALAR: varname = scalars++; break;
+ case TYPE_ARRAY: varname = arrays++; sigil = 0xa8; break;
+ case TYPE_STRING: varname = strings++; sigil = 0xa4; break;
+ default:
+ fprintf(stderr, "Warning: variable value #%d has unknown type.\n", varnum);
+ break;
}
if(varname < 26) {
@@ -520,15 +528,15 @@ void check_varname(const unsigned char *name, int line) {
/* c now has the last char of the name, make sure it matches the variable type */
type = data[vvstart + 8 * (line - 1)] >> 6;
/* type: scalar = 0, array = 1, string = 2 */
- if(type == 0) {
+ if(type == TYPE_SCALAR) {
if(c == '$')
die_mapfile("type mismatch: numeric variable may not end with $", line);
else if(c == '(')
die_mapfile("type mismatch: numeric variable may not end with (", line);
- } else if(type == 1) {
+ } else if(type == TYPE_ARRAY) {
if(c != '(')
die_mapfile("type mismatch: array variable must end with (", line);
- } else if(type == 2) {
+ } else if(type == TYPE_STRING) {
if(c != '$')
die_mapfile("type mismatch: string variable must end with $", line);
} else {
diff --git a/unprotbas.rst b/unprotbas.rst
index 1df865f..4228d91 100644
--- a/unprotbas.rst
+++ b/unprotbas.rst
@@ -208,11 +208,19 @@ Bad next-line pointer
One more thing **unprotbas** can do is remove extra data from the end
of the file. It's possible for BASIC files to contain extra data that
-occurs after the end of the program. Some programs use this as a way
-to load arbitrary binary data into memory along with the program; for
-other programs, the extra data is truly garbage (e.g. an EOF character
-if the file came from a CP/M system, or padding to a block size if a
-dumb implementation of XMODEM was used to transfer the file).
+occurs after the end of the program. Such data might be:
+
+- Arbitrary binary data used by the program at runtime, such as
+ machine language routines, or fonts.
+
+- Zero bytes, caused by SAVEing the program with revision B BASIC. Every
+ time a program is LOADed, edited (or not) and then SAVEd again, 16
+ bytes of extra (garbage) data gets added to the program. To avoid
+ this, don't use revision B (use rev C if possible, A otherwise).
+
+- Garbage added by some system previously used to store or transmit
+ the file. CP/M systems might add an EOF (^Z) character. Dumb
+ file transfer software might pad the file up to its block size.
Normally, such "garbage" doesn't hurt anything. BASIC ignores it. Or
it normally does... if you suspect it's causing a problem, you can