aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorB. Watson <urchlay@slackware.uk>2024-06-08 13:12:23 -0400
committerB. Watson <urchlay@slackware.uk>2024-06-08 13:12:23 -0400
commit739758781030f107f260f587a4f9c1e8bdf70f58 (patch)
tree3a18b7bb2f87db35c7fad6124331d351ed713773
parentcb69895c4dbff77fe6e2eb1cf513ee7b2f3911f3 (diff)
downloadbw-atari8-tools-739758781030f107f260f587a4f9c1e8bdf70f58.tar.gz
dumpbas: clean up code, work on doc.
-rw-r--r--bas.h21
-rw-r--r--dumpbas.1157
-rw-r--r--dumpbas.c160
-rw-r--r--dumpbas.rst123
4 files changed, 314 insertions, 147 deletions
diff --git a/bas.h b/bas.h
index 8168aa1..9d117cc 100644
--- a/bas.h
+++ b/bas.h
@@ -30,6 +30,27 @@
/* tokenized colon (statement separator) */
#define TOK_COLON 0x14
+/* BASIC tokens. Not a full set. BASIC uses 2 sets of tokens, one
+ for commands and the other for operators (which is to say, everything
+ *not* a command). */
+#define CMD_GOTO 0x0a
+#define CMD_GO_TO 0x0b
+#define CMD_GOSUB 0x0c
+#define CMD_TRAP 0x0d
+#define CMD_LIST 0x04
+#define CMD_RESTORE 0x23
+#define CMD_REM 0x00
+#define CMD_DATA 0x01
+#define CMD_ERROR 0x37
+#define OP_GOTO 0x17
+#define OP_GOSUB 0x18
+#define OP_THEN 0x1b
+#define OP_COMMA 0x12
+#define OP_EOS 0x14
+#define OP_EOL 0x16
+#define OP_NUMCONST 0x0e
+#define OP_STRCONST 0x0f
+
/* variable types, bits 6-7 of byte 0 of each vvtable entry. */
#define TYPE_SCALAR 0
#define TYPE_ARRAY 1
diff --git a/dumpbas.1 b/dumpbas.1
index 9da3e11..b973793 100644
--- a/dumpbas.1
+++ b/dumpbas.1
@@ -73,67 +73,67 @@ Don\(aqt dump lines before \fBstart\-lineno\fP\&. Default: \fI0\fP\&.
Don\(aqt dump lines after \fBstart\-lineno\fP\&. Default: \fI32768\fP\&.
.TP
.B \fB\-l\fP \fIlineno\fP
-Only dump one line. This is exactly equivalent to "\fB\-s\fP \fInum\fP \fB\-e\fP \fInum\fP".
+Only dump one line. This is exactly equivalent to "\fB\-s\fP \fIlineno\fP \fB\-e\fP \fIlineno\fP".
.UNINDENT
.SH FORMATTING
.sp
Every byte in the file is displayed in hex. However, they are grouped by line
and statement, and certain tokens get marker characters to help keep track
-of what they\(aqre for. Strings are displayed in both hex and ASCII. Floating
+of what they\(aqre for. Strings are displayed in quotes, in both hex and ASCII. Floating
point constants are displayed as 6 hex bytes with square brackets around them.
-.sp
-If \fBdumpbas\fP is run on the following program:
+.SS Line Header Markers
.INDENT 0.0
-.INDENT 3.5
-.sp
-.nf
-.ft C
-10 ? "HOW MANY TIMES";:INPUT N
-20 FOR I=1 TO N
-30 ? "HELLO ";:? I;"/";N:NEXT I
-40 REM WAIT FOR KEY
-50 POKE 764,255
-60 ? "PRESS ANY KEY"
-70 IF PEEK(764)=255 THEN 70
-80 POKE 764,255:GOTO 10
-.ft P
-.fi
-.UNINDENT
+.TP
+.B \fB@\fP
+Separates decimal line number from hex file offset.
+.TP
+.B \fB^\fP
+Prefix for line length.
+.TP
+.B \fB(\fP, \fB)\fP
+Surrounds the 2 hex bytes for the line number.
.UNINDENT
-.sp
-\fBNote:\fP The "PRESS ANY KEY" was entered in inverse video.
-.sp
-\&...it produces the following output:
+.SS Statement Markers
.INDENT 0.0
-.INDENT 3.5
-.sp
-.nf
-.ft C
- 10@0021 (0a 00): ^1b
- >17 !28 $0f =0e "H/48 O/4f W/57 /20 M/4d A/41 N/4e Y/59 /20 T/54 I/49 M/4d E/45 S/53" 15 14:
- >1b !02 80 16
- 20@003c (14 00): ^11
- >11 !08 81 2d #0e [40 01 00 00 00 00] 19 80 16
- 30@004d (1e 00): ^1d
- >0f !28 $0f =06 "H/48 E/45 L/4c L/4c O/4f /20" 15 14:
- >19 !28 81 15 $0f =01 "//2f" 15 80 14:
- >1d !09 81 16
- 40@006a (28 00): ^12
- >12 !00 57 41 49 54 20 46 4f 52 20 4b 45 59 9b
- 50@007c (32 00): ^15
- >15 !1f #0e [41 07 64 00 00 00] 12 #0e [41 02 55 00 00 00] 16
- 60@0091 (3c 00): ^15
- >15 !28 $0f =0d "|P/d0 |R/d2 |E/c5 |S/d3 |S/d3 | /a0 |A/c1 |N/ce |Y/d9 | /a0 |K/cb |E/c5 |Y/d9" 16
- 70@00a6 (46 00): ^20
- >20 !07 46 3a #0e [41 07 64 00 00 00] 2c 22 #0e [41 02 55 00 00 00] 1b #0e [40 70 00 00 00 00] 16
- 80@00c6 (50 00): ^1f
- >15 !1f #0e [41 07 64 00 00 00] 12 #0e [41 02 55 00 00 00] 14:
- >1f !0a #0e [40 10 00 00 00 00] 16
-32768@00e5 (00 80): ^0f
- >0f !19 $0f =07 "H/48 :/3a B/42 ./2e B/42 A/41 S/53" 16
-.ft P
-.fi
+.TP
+.B \fB>\fP
+Prefix for next\-statement offset. Every statement begins with this.
+.TP
+.B \fB!\fP
+Prefix for a command token. Every line of BASIC code begins with a
+command.
+.TP
+.B \fB:\fP
+Suffix for the \fI14\fP token; end of statement.
+.TP
+.B \fB#\fP
+Prefix for the \fI0e\fP token, which introduces a BCD floating point constant.
+.TP
+.B \fB[\fP, \fB]\fP
+Surrounds the 6 bytes of a BCD floating point constant.
+.TP
+.B \fB$\fP
+Prefix for the \fI0f\fP token, which introduces a string constant.
+.TP
+.B \fB=\fP
+Prefix for the string\-length byte of a string constant.
.UNINDENT
+.SS String Byte Markers
+.INDENT 0.0
+.TP
+.B \fB"\fP
+A string constant is surrounded by double\-quotes.
+.TP
+.B \fB^\fP
+Prefix for a control character. For instance, \fI03\fP is displayed as \fI^C\fP\&.
+.TP
+.B \fB|\fP
+Prefix for an inverse video character. Example: \fIc1\fP (inverse video \fIA\fP)
+is displayed as \fI|A\fP\&. May be combined with \fI^\fP, for inverse control characters.
+.TP
+.B \fB/\fP
+Separates the printable ASCII representation of a character from its hex byte.
+Example: \fIA/41\fP\&.
.UNINDENT
.SS Line header
.sp
@@ -224,6 +224,61 @@ The actual 6\-byte constant is surrounded with \fI[\fP and \fI]\fP\&.
.IP \(bu 2
The last token is \fI16\fP, which is BASIC\(aqs end\-of\-line token.
.UNINDENT
+.SH EXAMPLE
+.sp
+If \fBdumpbas\fP is run on the following program:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+10 ? "HOW MANY TIMES";:INPUT N
+20 FOR I=1 TO N
+30 ? "HELLO ";:? I;"/";N:NEXT I
+40 REM WAIT FOR KEY
+50 POKE 764,255
+60 ? "PRESS ANY KEY"
+70 IF PEEK(764)=255 THEN 70
+80 POKE 764,255:GOTO 10
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+\fBNote:\fP The "PRESS ANY KEY" was entered in inverse video.
+.sp
+\&...it produces the following output:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+ 10@0021 (0a 00): ^1b
+ >17 !28 $0f =0e "H/48 O/4f W/57 /20 M/4d A/41 N/4e Y/59 /20 T/54 I/49 M/4d E/45 S/53" 15 14:
+ >1b !02 80 16
+ 20@003c (14 00): ^11
+ >11 !08 81 2d #0e [40 01 00 00 00 00] 19 80 16
+ 30@004d (1e 00): ^1d
+ >0f !28 $0f =06 "H/48 E/45 L/4c L/4c O/4f /20" 15 14:
+ >19 !28 81 15 $0f =01 "//2f" 15 80 14:
+ >1d !09 81 16
+ 40@006a (28 00): ^12
+ >12 !00 57 41 49 54 20 46 4f 52 20 4b 45 59 9b
+ 50@007c (32 00): ^15
+ >15 !1f #0e [41 07 64 00 00 00] 12 #0e [41 02 55 00 00 00] 16
+ 60@0091 (3c 00): ^15
+ >15 !28 $0f =0d "|P/d0 |R/d2 |E/c5 |S/d3 |S/d3 | /a0 |A/c1 |N/ce |Y/d9 | /a0 |K/cb |E/c5 |Y/d9" 16
+ 70@00a6 (46 00): ^20
+ >20 !07 46 3a #0e [41 07 64 00 00 00] 2c 22 #0e [41 02 55 00 00 00] 1b #0e [40 70 00 00 00 00] 16
+ 80@00c6 (50 00): ^1f
+ >15 !1f #0e [41 07 64 00 00 00] 12 #0e [41 02 55 00 00 00] 14:
+ >1f !0a #0e [40 10 00 00 00 00] 16
+32768@00e5 (00 80): ^0f
+ >0f !19 $0f =07 "H/48 :/3a B/42 ./2e B/42 A/41 S/53" 16
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
.SH EXIT STATUS
.sp
0 for success, 1 for failure.
diff --git a/dumpbas.c b/dumpbas.c
index b7607ea..2888b52 100644
--- a/dumpbas.c
+++ b/dumpbas.c
@@ -77,10 +77,58 @@ void print_atascii(unsigned char c) {
putchar('/');
}
+/* REM, DATA, ERROR lines are terminated by $9B, a real EOL, not
+ the BASIC token. Since they're strings, print them in ASCII too. */
+int handle_text_stmt(int pos) {
+ unsigned char c;
+
+ do {
+ c = program[pos];
+ print_atascii(c);
+ printf("%02x ", c);
+ pos++;
+ } while(c != 0x9b);
+
+ return pos;
+}
+
+int handle_cmd(int pos) {
+ unsigned char tok = program[pos];
+
+ printf("!%02x ", tok);
+ switch(tok) {
+ case CMD_REM:
+ case CMD_DATA:
+ case CMD_ERROR:
+ return handle_text_stmt(pos + 1);
+ default:
+ return pos + 1;
+ }
+}
+
+void handle_string(int pos) {
+ int i, len;
+ len = program[pos + 1];
+ printf("$%02x =%02x \"", program[pos], len);
+ for(i = pos; i < pos + len; i++) {
+ unsigned char c = program[i + 2];
+ print_atascii(c);
+ printf("%02x%c", c, (i == (pos + len - 1) ? '"' : ' '));
+ }
+ putchar(' ');
+}
+
+void handle_num(int pos) {
+ int i;
+ printf("#%02x [", program[pos]);
+ for(i = 0; i < 6; i++)
+ printf("%02x%c", program[pos + 1 + i], (i == 5 ? ']' : ' '));
+ putchar(' ');
+}
+
/* sorry, this is horrid, more like assembly than C. */
int main(int argc, char **argv) {
- int pos, nextpos, offset, soffset, lineno, cmd, i, end;
- int numcount = 0, start_string = 0, end_string = 0;
+ int linepos, nextpos, offset, soffset, lineno, pos, end, tok;
set_self(*argv);
parse_general_args(argc, argv, print_help);
@@ -89,71 +137,61 @@ int main(int argc, char **argv) {
readfile();
parse_header();
- pos = codestart;
- while(pos < filelen) {
- lineno = getword(pos);
- offset = program[pos + 2];
- nextpos = pos + offset;
-
- if(lineno >= startlineno) {
- printf("%5d@%04x (%02x %02x): ^%02x ", lineno, pos, program[pos], program[pos + 1], offset);
-
- i = pos + 3;
- while(i < nextpos) {
- soffset = program[i];
- end = pos + soffset;
- while(i < end) {
- printf("\n >%02x ", program[i]); /* offset */
- i++;
- cmd = 1;
- while(i < end) {
- if(cmd) {
- putchar('!');
- } else if(program[i] == 0x0e && program[i - 1] != 0x0f && (i < start_string || i > end_string)) {
- putchar('#');
- numcount = 7;
- } else if(program[i] == 0x0f) {
- putchar('$');
- start_string = i + 2;
- end_string = start_string + program[i + 1];
- }
-
- if(numcount == 6)
- putchar('[');
-
- if(i == start_string)
- putchar('"');
-
- if(i == (start_string - 1))
- putchar('=');
-
- if(i >= start_string && i < end_string)
- print_atascii(program[i]);
-
- printf("%02x", program[i]);
- if(i == (end - 1) && program[i] == 0x14)
- putchar(':');
-
- if(numcount) {
- if(--numcount == 0)
- putchar(']');
- }
-
- i++;
- if(i == end_string)
- putchar('"');
-
- putchar(' ');
- cmd = 0;
+ linepos = codestart;
+ while(linepos < filelen) { /* loop over lines */
+ lineno = getword(linepos);
+ offset = program[linepos + 2];
+ nextpos = linepos + offset;
+
+ if(offset < 6)
+ die("Can't dump a protected program, unprotect it first.");
+
+ if(lineno < startlineno) {
+ linepos = nextpos;
+ continue;
+ }
+
+ /* line header */
+ printf("%5d@%04x (%02x %02x): ^%02x ",
+ lineno, linepos, program[linepos], program[linepos + 1], offset);
+
+ pos = linepos + 3;
+ while(pos < nextpos) { /* loop over statements within a line */
+ soffset = program[pos];
+ end = linepos + soffset;
+
+ while(pos < end) { /* loop over tokens within a statement */
+ printf("\n >%02x ", program[pos++]); /* offset */
+ pos = handle_cmd(pos++); /* 1st token is the command */
+
+ while(pos < end) { /* loop over operators */
+ tok = program[pos];
+ switch(tok) {
+ case OP_NUMCONST:
+ handle_num(pos);
+ pos += 7;
+ break;
+ case OP_STRCONST:
+ handle_string(pos);
+ pos += program[pos + 1] + 2;
+ break;
+ default:
+ printf("%02x", program[pos]);
+ if(pos == (end - 1) && tok == OP_EOS)
+ putchar(':');
+ else
+ putchar(' ');
+ pos++;
+ break;
}
}
}
-
- putchar('\n');
}
+ putchar('\n');
+
if(lineno == endlineno) break;
- pos = nextpos;
+ linepos = nextpos;
}
return 0;
diff --git a/dumpbas.rst b/dumpbas.rst
index 8b116c3..80b5653 100644
--- a/dumpbas.rst
+++ b/dumpbas.rst
@@ -53,52 +53,65 @@ Dump Options
Don't dump lines after **start-lineno**. Default: *32768*.
**-l** *lineno*
- Only dump one line. This is exactly equivalent to "**-s** *num* **-e** *num*".
+ Only dump one line. This is exactly equivalent to "**-s** *lineno* **-e** *lineno*".
FORMATTING
==========
Every byte in the file is displayed in hex. However, they are grouped by line
and statement, and certain tokens get marker characters to help keep track
-of what they're for. Strings are displayed in both hex and ASCII. Floating
+of what they're for. Strings are displayed in quotes, in both hex and ASCII. Floating
point constants are displayed as 6 hex bytes with square brackets around them.
-If **dumpbas** is run on the following program::
+Line Header Markers
+-------------------
+**@**
+ Separates decimal line number from hex file offset.
- 10 ? "HOW MANY TIMES";:INPUT N
- 20 FOR I=1 TO N
- 30 ? "HELLO ";:? I;"/";N:NEXT I
- 40 REM WAIT FOR KEY
- 50 POKE 764,255
- 60 ? "PRESS ANY KEY"
- 70 IF PEEK(764)=255 THEN 70
- 80 POKE 764,255:GOTO 10
+**^**
+ Prefix for line length.
-**Note:** The "PRESS ANY KEY" was entered in inverse video.
+**(**, **)**
+ Surrounds the 2 hex bytes for the line number.
-...it produces the following output::
+Statement Markers
+-----------------
+**>**
+ Prefix for next-statement offset. Every statement begins with this.
- 10@0021 (0a 00): ^1b
- >17 !28 $0f =0e "H/48 O/4f W/57 /20 M/4d A/41 N/4e Y/59 /20 T/54 I/49 M/4d E/45 S/53" 15 14:
- >1b !02 80 16
- 20@003c (14 00): ^11
- >11 !08 81 2d #0e [40 01 00 00 00 00] 19 80 16
- 30@004d (1e 00): ^1d
- >0f !28 $0f =06 "H/48 E/45 L/4c L/4c O/4f /20" 15 14:
- >19 !28 81 15 $0f =01 "//2f" 15 80 14:
- >1d !09 81 16
- 40@006a (28 00): ^12
- >12 !00 57 41 49 54 20 46 4f 52 20 4b 45 59 9b
- 50@007c (32 00): ^15
- >15 !1f #0e [41 07 64 00 00 00] 12 #0e [41 02 55 00 00 00] 16
- 60@0091 (3c 00): ^15
- >15 !28 $0f =0d "|P/d0 |R/d2 |E/c5 |S/d3 |S/d3 | /a0 |A/c1 |N/ce |Y/d9 | /a0 |K/cb |E/c5 |Y/d9" 16
- 70@00a6 (46 00): ^20
- >20 !07 46 3a #0e [41 07 64 00 00 00] 2c 22 #0e [41 02 55 00 00 00] 1b #0e [40 70 00 00 00 00] 16
- 80@00c6 (50 00): ^1f
- >15 !1f #0e [41 07 64 00 00 00] 12 #0e [41 02 55 00 00 00] 14:
- >1f !0a #0e [40 10 00 00 00 00] 16
- 32768@00e5 (00 80): ^0f
- >0f !19 $0f =07 "H/48 :/3a B/42 ./2e B/42 A/41 S/53" 16
+**!**
+ Prefix for a command token. Every line of BASIC code begins with a
+ command.
+
+**:**
+ Suffix for the *14* token; end of statement.
+
+**#**
+ Prefix for the *0e* token, which introduces a BCD floating point constant.
+
+**[**, **]**
+ Surrounds the 6 bytes of a BCD floating point constant.
+
+**$**
+ Prefix for the *0f* token, which introduces a string constant.
+
+**=**
+ Prefix for the string-length byte of a string constant.
+
+String Byte Markers
+-------------------
+**"**
+ A string constant is surrounded by double-quotes.
+
+**^**
+ Prefix for a control character. For instance, *03* is displayed as *^C*.
+
+**|**
+ Prefix for an inverse video character. Example: *c1* (inverse video *A*)
+ is displayed as *|A*. May be combined with *^*, for inverse control characters.
+
+**/**
+ Separates the printable ASCII representation of a character from its hex byte.
+ Example: *A/41*.
Line header
-----------
@@ -155,6 +168,46 @@ Line 20's first statement has an example of a floating point constant::
- The last token is *16*, which is BASIC's end-of-line token.
+EXAMPLE
+=======
+If **dumpbas** is run on the following program::
+
+ 10 ? "HOW MANY TIMES";:INPUT N
+ 20 FOR I=1 TO N
+ 30 ? "HELLO ";:? I;"/";N:NEXT I
+ 40 REM WAIT FOR KEY
+ 50 POKE 764,255
+ 60 ? "PRESS ANY KEY"
+ 70 IF PEEK(764)=255 THEN 70
+ 80 POKE 764,255:GOTO 10
+
+**Note:** The "PRESS ANY KEY" was entered in inverse video.
+
+...it produces the following output::
+
+ 10@0021 (0a 00): ^1b
+ >17 !28 $0f =0e "H/48 O/4f W/57 /20 M/4d A/41 N/4e Y/59 /20 T/54 I/49 M/4d E/45 S/53" 15 14:
+ >1b !02 80 16
+ 20@003c (14 00): ^11
+ >11 !08 81 2d #0e [40 01 00 00 00 00] 19 80 16
+ 30@004d (1e 00): ^1d
+ >0f !28 $0f =06 "H/48 E/45 L/4c L/4c O/4f /20" 15 14:
+ >19 !28 81 15 $0f =01 "//2f" 15 80 14:
+ >1d !09 81 16
+ 40@006a (28 00): ^12
+ >12 !00 57 41 49 54 20 46 4f 52 20 4b 45 59 9b
+ 50@007c (32 00): ^15
+ >15 !1f #0e [41 07 64 00 00 00] 12 #0e [41 02 55 00 00 00] 16
+ 60@0091 (3c 00): ^15
+ >15 !28 $0f =0d "|P/d0 |R/d2 |E/c5 |S/d3 |S/d3 | /a0 |A/c1 |N/ce |Y/d9 | /a0 |K/cb |E/c5 |Y/d9" 16
+ 70@00a6 (46 00): ^20
+ >20 !07 46 3a #0e [41 07 64 00 00 00] 2c 22 #0e [41 02 55 00 00 00] 1b #0e [40 70 00 00 00 00] 16
+ 80@00c6 (50 00): ^1f
+ >15 !1f #0e [41 07 64 00 00 00] 12 #0e [41 02 55 00 00 00] 14:
+ >1f !0a #0e [40 10 00 00 00 00] 16
+ 32768@00e5 (00 80): ^0f
+ >0f !19 $0f =07 "H/48 :/3a B/42 ./2e B/42 A/41 S/53" 16
+
EXIT STATUS
===========