aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorB. Watson <urchlay@slackware.uk>2024-04-25 14:52:52 -0400
committerB. Watson <urchlay@slackware.uk>2024-04-25 14:52:52 -0400
commit530b83e2736f1d4afeedd3bf99c8428da2adabf7 (patch)
tree0e2ef70beb8110ab08772031d42341e6197421a7
parent6ff31c1205b46448918ee4e0f10b38ad4cc6746e (diff)
downloadbw-atari8-tools-530b83e2736f1d4afeedd3bf99c8428da2adabf7.tar.gz
xexamine: added.
-rw-r--r--Makefile4
-rw-r--r--xexamine.1104
-rw-r--r--xexamine.c363
-rw-r--r--xexamine.rst56
4 files changed, 525 insertions, 2 deletions
diff --git a/Makefile b/Makefile
index 07e84c3..9fb8a1d 100644
--- a/Makefile
+++ b/Makefile
@@ -14,9 +14,9 @@ CC=gcc
CFLAGS=-Wall $(COPT) -ansi -D_GNU_SOURCE -DVERSION=\"$(VERSION)\"
# BINS and SCRIPTS go in $BINDIR, DOCS go in $DOCDIR
-BINS=a8eol xfd2atr atr2xfd blob2c cart2xex fenders xexsplit xexcat atrsize rom2cart unmac65 axe blob2xex
+BINS=a8eol xfd2atr atr2xfd blob2c cart2xex fenders xexsplit xexcat atrsize rom2cart unmac65 axe blob2xex xexamine
SCRIPTS=dasm2atasm a8utf8
-MANS=a8eol.1 xfd2atr.1 atr2xfd.1 blob2c.1 cart2xex.1 fenders.1 xexsplit.1 xexcat.1 atrsize.1 rom2cart.1 unmac65.1 axe.1 dasm2atasm.1 a8utf8.1 blob2xex.1
+MANS=a8eol.1 xfd2atr.1 atr2xfd.1 blob2c.1 cart2xex.1 fenders.1 xexsplit.1 xexcat.1 atrsize.1 rom2cart.1 unmac65.1 axe.1 dasm2atasm.1 a8utf8.1 blob2xex.1 xexamine.1
DOCS=README equates.inc *.dasm
# All the programs share this version number...
diff --git a/xexamine.1 b/xexamine.1
new file mode 100644
index 0000000..0d2cb03
--- /dev/null
+++ b/xexamine.1
@@ -0,0 +1,104 @@
+.\" Man page generated from reStructuredText.
+.
+.
+.nr rst2man-indent-level 0
+.
+.de1 rstReportMargin
+\\$1 \\n[an-margin]
+level \\n[rst2man-indent-level]
+level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
+-
+\\n[rst2man-indent0]
+\\n[rst2man-indent1]
+\\n[rst2man-indent2]
+..
+.de1 INDENT
+.\" .rstReportMargin pre:
+. RS \\$1
+. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin]
+. nr rst2man-indent-level +1
+.\" .rstReportMargin post:
+..
+.de UNINDENT
+. RE
+.\" indent \\n[an-margin]
+.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.nr rst2man-indent-level -1
+.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.in \\n[rst2man-indent\\n[rst2man-indent-level]]u
+..
+.TH "XEXAMINE" 1 "2024-04-25" "0.2.1" "Urchlay's Atari 8-bit Tools"
+.SH NAME
+xexamine \- Show information on Atari 8-bit executables (XEX)
+.\" RST source for xexamine(1) man page. Convert with:
+.
+.\" rst2man.py xexamine.rst > xexamine.1
+.
+.SH SYNOPSIS
+.sp
+xexamine [ [\fB\-h\fP] | [ [\fB\-v\fP] \fIxexfile\fP ]
+.SH DESCRIPTION
+.sp
+\fBxexamine\fP reads an Atari 8\-bit executable (.xex file) and prints
+the following information on each segment in the file:
+.sp
+Segment number (1\-based).
+.sp
+Start and end addresses (in hex).
+.sp
+Length in bytes (in decimal).
+.sp
+CRC32 checksum of the segment.
+.sp
+Segment type:
+.INDENT 0.0
+.INDENT 3.5
+If the segment is a run address (loads at RUNAD) or an init address (loads at INITAD), the
+type is "Run" or "Init", with the actual run or init address.
+.sp
+Otherwise, the percentage of the segment that contains valid 6502 object
+code is printed. This is an estimate based on static analysis and some
+heuristics, and as such, isn\(aqt 100% accurate.
+.UNINDENT
+.UNINDENT
+.SH OPTIONS
+.INDENT 0.0
+.TP
+.B \-h
+Print a short help message and exit.
+.TP
+.B \-v
+Verbose operation.
+.UNINDENT
+.SH EXIT STATUS
+.sp
+Exit status is zero if \fIxexfile\fP is a valid Atari .xex file, non\-zero otherwise.
+.SH COPYRIGHT
+.sp
+WTFPL. See \fI\%http://www.wtfpl.net/txt/copying/\fP for details.
+.SH AUTHOR
+.INDENT 0.0
+.IP B. 3
+Watson <\fI\%urchlay@slackware.uk\fP>; Urchlay on irc.libera.chat \fI##atari\fP\&.
+.UNINDENT
+.SH SEE ALSO
+.sp
+\fBa8eol\fP(1),
+\fBa8utf8\fP(1),
+\fBatr2xfd\fP(1),
+\fBatrsize\fP(1),
+\fBaxe\fP(1),
+\fBblob2c\fP(1),
+\fBcart2xex\fP(1),
+\fBdasm2atasm\fP(1),
+\fBfenders\fP(1),
+\fBrom2cart\fP(1),
+\fBunmac65\fP(1),
+\fBxexcat\fP(1),
+\fBxexsplit\fP(1),
+\fBxfd2atr\fP(1).
+.sp
+Any good Atari 8\-bit book: \fIDe Re Atari\fP, \fIThe Atari BASIC Reference
+Manual\fP, the \fIOS Users\(aq Guide\fP, \fIMapping the Atari\fP, etc.
+.\" Generated by docutils manpage writer.
+.
diff --git a/xexamine.c b/xexamine.c
new file mode 100644
index 0000000..a07da6a
--- /dev/null
+++ b/xexamine.c
@@ -0,0 +1,363 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "xex.h"
+
+#define SELF "xexamine"
+
+/* #define CLASSIFY_DEBUG */
+
+/*
+ show all segments of a xex file, including:
+ has ffff header or not
+ start address, end address, length
+ whether the segment contains code or just data (heuristics)
+ checksum of the segment contents
+*/
+
+/* crc32() and crc32_for_byte() come from public domain code:
+ http://home.thep.lu.se/~bjorn/crc/
+*/
+
+uint32_t crc32_for_byte(uint32_t r) {
+ int j;
+ for(j = 0; j < 8; ++j)
+ r = (r & 1? 0: (uint32_t)0xEDB88320L) ^ r >> 1;
+ return r ^ (uint32_t)0xFF000000L;
+}
+
+void crc32(const void *data, size_t n_bytes, uint32_t* crc) {
+ size_t i;
+ static uint32_t table[0x100];
+ if(!*table)
+ for(i = 0; i < 0x100; ++i)
+ table[i] = crc32_for_byte(i);
+ for(i = 0; i < n_bytes; ++i)
+ *crc = table[(uint8_t)*crc ^ ((uint8_t*)data)[i]] ^ *crc >> 8;
+}
+
+void usage(int status) {
+ printf("Usage: " SELF " [-v] file.xex\n");
+ if(status) fprintf(stderr, "Try '%s -h' for help\n", SELF);
+ exit(status);
+}
+
+#define CL_DATA 0
+#define CL_OPCODE 1
+#define CL_OPERAND 2
+
+/* 3 tables used by classify_seg() */
+
+int opcode_valid[] = {
+/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, /* 0 */
+ 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, /* 1 */
+ 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, /* 2 */
+ 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, /* 3 */
+ 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, /* 4 */
+ 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, /* 5 */
+ 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, /* 6 */
+ 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, /* 7 */
+ 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, /* 8 */
+ 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, /* 9 */
+ 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, /* a */
+ 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, /* b */
+ 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, /* c */
+ 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, /* d */
+ 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, /* e */
+ 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, /* f */
+};
+
+int opcode_lengths[] = {
+/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ 1, 2, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, 3, 3, 1,
+ 2, 2, 1, 1, 1, 2, 2, 1, 1, 3, 1, 1, 1, 3, 3, 1,
+ 3, 2, 1, 1, 2, 2, 2, 1, 1, 2, 1, 1, 3, 3, 3, 1,
+ 2, 2, 1, 1, 1, 2, 2, 1, 1, 3, 1, 1, 1, 3, 3, 1,
+ 1, 2, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 3, 3, 3, 1,
+ 2, 2, 1, 1, 1, 2, 2, 1, 1, 3, 1, 1, 1, 3, 3, 1,
+ 1, 2, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 3, 3, 3, 1,
+ 2, 2, 1, 1, 1, 2, 2, 1, 1, 3, 1, 1, 1, 3, 3, 1,
+ 1, 2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 3, 3, 3, 1,
+ 2, 2, 1, 1, 2, 2, 2, 1, 1, 3, 1, 1, 1, 3, 1, 1,
+ 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 1, 1, 3, 3, 3, 1,
+ 2, 2, 1, 1, 2, 2, 2, 1, 1, 3, 1, 1, 3, 3, 3, 1,
+ 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 1, 1, 3, 3, 3, 1,
+ 2, 2, 1, 1, 1, 2, 2, 1, 1, 3, 1, 1, 1, 3, 3, 1,
+ 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 1, 1, 3, 3, 3, 1,
+ 2, 2, 1, 1, 1, 2, 2, 1, 1, 3, 1, 1, 1, 3, 3, 1,
+};
+
+/* control transfers: branches, JMP abs, JMP (ind), RTS, RTI.
+ JSR doesn't count! */
+int opcode_is_ctlxfr[] = {
+/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+void find_dlist(const unsigned char *mem, unsigned char *map, int len) {
+ int i, dlstart = -1, dlend = -1;
+
+ for(i = 2; i < len-2; i++) {
+ if(mem[i] == 'p' && mem[i-1] == 'p' && mem[i-2] == 'p') {
+ dlstart = i - 2;
+ }
+
+ if(dlstart > -1 && mem[i] == 'A') {
+ dlend = i;
+ break;
+ }
+ }
+ if(dlstart != -1 && dlend != -1) {
+#ifdef CLASSIFY_DEBUG
+ printf("display list found, offsets %d - %d\n", dlstart, dlend);
+#endif
+ for(i = dlstart; i <= dlend; i++)
+ map[i] = CL_DATA;
+ }
+}
+
+/*
+ classify_seg() returns code percentage estimate.
+ possible strategies:
+ if seg is only 1 byte, it's data (return 0)
+ iterate over segment, semi-disassemble, classify each
+ byte as opcode, operand, or data.
+ instructions that aren't control transfers (aka jmp, jsr, rts, rti,
+ branches) that are immediately followed by non-code, will get marked
+ as data (back to the last transfer instruction).
+ branches that branch back before the start of the segment are data?
+ branch/jmp/jsr whose target is data, are also data?
+ but jmp/jsr outside of the segment can't be assumed data...
+ long runs (>=8) of the same byte value are data.
+*/
+
+int classify_seg(const xex_segment seg) {
+ int i, j, addr, byte, oplen, target, last_cltxfr = 0, changed;
+ int runstart = 0, runbyte = -1, runcount = 0;
+ float f;
+ unsigned char map[65536];
+
+ memset(map, 0, 65535);
+
+ /* pass 1: mark valid opcodes as CL_OPCODE, their operands as
+ CL_OPERAND, and anything else as CL_DATA. */
+ for(i = 0; i < seg.len; ) {
+ byte = seg.object[i];
+ oplen = opcode_lengths[byte];
+ if(opcode_valid[byte]) {
+ map[i] = CL_OPCODE;
+ if(oplen >= 2) map[i + 1] = CL_OPERAND;
+ if(oplen == 3) map[i + 2] = CL_OPERAND;
+ } else {
+ map[i] = CL_DATA;
+ }
+ i += oplen;
+ }
+
+ /* pass 1.5: if there's a display list, it's data */
+ find_dlist(seg.object, map, seg.len);
+
+ /* pass 3: runs of >=3 of the same byte value are data, unless
+ they're ASL A, LSR A, or NOP. */
+ for(i = 0; i < seg.len; i++) {
+ byte = seg.object[i];
+ if(byte == runbyte) {
+ runcount++;
+ } else {
+ if(
+ runcount > 8 ||
+ (runcount >= 3 && !(runbyte == 0x0a || runbyte == 0x4a || runbyte == 0xea))
+ )
+ {
+ #ifdef CLASSIFY_DEBUG
+ printf("run of %d bytes, $%02x, at %d\n", runcount, runbyte, runstart);
+ #endif
+ for(j = runstart; j < i; j++)
+ map[j] = CL_DATA;
+ }
+ runstart = i;
+ runbyte = byte;
+ runcount = 0;
+ }
+ /*
+ printf("got here, i=%d, runbyte=%02x, runstart=%d, runcount=%d\n", i, runbyte, runstart, runcount);
+ */
+ }
+
+ /* pass 4: code that doesn't branch/jump/return and runs into data
+ gets marked as data. */
+ runcount = runstart = 0;
+ do {
+ runcount++;
+ changed = 0;
+ for(i = 0; i < seg.len; i++) {
+ if(map[i] == CL_OPCODE && opcode_is_ctlxfr[seg.object[i]]) {
+ last_cltxfr = i;
+ #ifdef CLASSIFY_DEBUG
+ /*
+ printf("last_cltxfr = %04x, opcode %02x;\n", last_cltxfr, seg.object[i]);
+ */
+ #endif
+ } else if(map[i] == CL_DATA) {
+ #ifdef CLASSIFY_DEBUG
+ /*
+ printf("marking range as data: %04x - %04x\n", last_cltxfr, i);
+ */
+ #endif
+ for(j = last_cltxfr; j < i; j++) {
+ map[j] = CL_DATA;
+ }
+ last_cltxfr = i;
+ }
+ }
+
+ /* pass 4: branch and jmp abs instructions whose target is data,
+ are also data. repeats until nothing is changed. */
+ for(i = 0; i < seg.len; i++) {
+ addr = seg.start_addr + i;
+ byte = seg.object[i];
+ target = -1;
+ if(map[i] == CL_OPCODE) switch(byte) {
+ case 0x4c: /* JMP absolute */
+ case 0x20: /* JSR absolute */
+ target = addr + (seg.object[i + 1] | (seg.object[i + 2] << 8));
+ if((target < addr) || (target > (addr + seg.len)))
+ target = -1; /* jsr/jmp out of segment */
+ else {
+ if(map[target - addr] != CL_OPCODE) {
+ map[i] = map[i + 1] = map[i + 2] = CL_DATA;
+ changed = 1;
+ runstart += 3;
+ }
+ }
+ break;
+
+ case 0x10: /* BPL */
+ case 0x30: /* BMI */
+ case 0x50: /* BVC */
+ case 0x70: /* BVS */
+ case 0x90: /* BCC */
+ case 0xb0: /* BCS */
+ case 0xd0: /* BNE */
+ case 0xf0: /* BEQ */
+ target = addr + i + 2 + ((signed char)seg.object[i + 1]);
+ if((target < addr) || (target > (addr + seg.len))) {
+ /* branch out of segment, assume data */
+ target = -1;
+ map[i] = map[i + 1] = CL_DATA;
+ runstart += 2;
+ changed = 1;
+ } else if(map[target - addr] != CL_OPCODE) {
+ /* branch to data! */
+ map[i] = map[i + 1] = CL_DATA;
+ runstart += 2;
+ changed = 1;
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+ } while(changed);
+#ifdef CLASSIFY_DEBUG
+ printf("pass 4 ran %d times, changed %d bytes\n", runcount, runstart);
+#endif
+
+ /* last pass: calculate opcode/operand percentage */
+ j = 0;
+ for(i = 0; i < seg.len; i++) {
+ if(map[i] != CL_DATA) j++;
+ }
+
+#ifdef CLASSIFY_DEBUG
+ for(i = 0; i < seg.len; i++) {
+ if(i % 16 == 0) {
+ printf("\n%04x: ", i);
+ }
+ printf("%02x/%d ", seg.object[i], map[i]);
+ }
+ printf("\n");
+#endif
+
+ f = (float)j / (float)seg.len * 100.0;
+ return (int)f;
+}
+
+int main(int argc, char **argv) {
+ FILE *f;
+ xex_segment seg;
+ unsigned char buffer[65536];
+ char *filename;
+ int opt, segcount = 0;
+ uint32_t crc;
+
+ while((opt = getopt(argc, argv, "vh")) != -1) {
+ switch(opt) {
+ case 'v':
+ xex_verbose = 1;
+ break;
+ case 'h':
+ usage(0);
+ break;
+ default:
+ usage(1);
+ break;
+ }
+ }
+
+ if(optind >= argc || argv[optind + 1]) {
+ usage(1);
+ }
+
+ filename = argv[optind];
+ if( !(f = fopen(filename, "rb")) ) {
+ fprintf(stderr, "%s: ", SELF);
+ perror(filename);
+ exit(1);
+ }
+
+ seg.object = buffer;
+
+ while(xex_fread_seg(&seg, f)) {
+ if(!segcount)
+ printf("Seg | Start | End | Bytes | CRC32 | Type\n");
+ crc32(seg.object, seg.len, &crc);
+ printf("%3d | $%04x | $%04x | %5d | %08x | ",
+ ++segcount, seg.start_addr, seg.end_addr, seg.len, crc);
+ if(seg.start_addr == XEX_RUNAD && seg.len > 1)
+ printf("Run $%04x", (seg.object[0] | (seg.object[1] << 8)));
+ else if(seg.start_addr == XEX_INITAD && seg.len > 1)
+ printf("Init $%04x", (seg.object[0] | (seg.object[1] << 8)));
+ else printf("%d%% code", classify_seg(seg));
+
+ putchar('\n');
+ }
+
+ if(!segcount) {
+ fprintf(stderr, SELF ": %s is not an Atari 8-bit executable.\n", filename);
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/xexamine.rst b/xexamine.rst
new file mode 100644
index 0000000..a76ed16
--- /dev/null
+++ b/xexamine.rst
@@ -0,0 +1,56 @@
+.. RST source for xexamine(1) man page. Convert with:
+.. rst2man.py xexamine.rst > xexamine.1
+
+========
+xexamine
+========
+
+-------------------------------------------------
+Show information on Atari 8-bit executables (XEX)
+-------------------------------------------------
+
+.. include:: manhdr.rst
+
+SYNOPSIS
+========
+
+xexamine [ [**-h**] | [ [**-v**] *xexfile* ]
+
+DESCRIPTION
+===========
+
+**xexamine** reads an Atari 8-bit executable (.xex file) and prints
+the following information on each segment in the file:
+
+Segment number (1-based).
+
+Start and end addresses (in hex).
+
+Length in bytes (in decimal).
+
+CRC32 checksum of the segment.
+
+Segment type:
+
+ If the segment is a run address (loads at RUNAD) or an init address (loads at INITAD), the
+ type is "Run" or "Init", with the actual run or init address.
+
+ Otherwise, the percentage of the segment that contains valid 6502 object
+ code is printed. This is an estimate based on static analysis and some
+ heuristics, and as such, isn't 100% accurate.
+
+OPTIONS
+=======
+
+-h
+ Print a short help message and exit.
+
+-v
+ Verbose operation.
+
+EXIT STATUS
+===========
+
+Exit status is zero if *xexfile* is a valid Atari .xex file, non-zero otherwise.
+
+.. include:: manftr.rst