From 530b83e2736f1d4afeedd3bf99c8428da2adabf7 Mon Sep 17 00:00:00 2001 From: "B. Watson" Date: Thu, 25 Apr 2024 14:52:52 -0400 Subject: xexamine: added. --- Makefile | 4 +- xexamine.1 | 104 +++++++++++++++++ xexamine.c | 363 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ xexamine.rst | 56 +++++++++ 4 files changed, 525 insertions(+), 2 deletions(-) create mode 100644 xexamine.1 create mode 100644 xexamine.c create mode 100644 xexamine.rst diff --git a/Makefile b/Makefile index 07e84c3..9fb8a1d 100644 --- a/Makefile +++ b/Makefile @@ -14,9 +14,9 @@ CC=gcc CFLAGS=-Wall $(COPT) -ansi -D_GNU_SOURCE -DVERSION=\"$(VERSION)\" # BINS and SCRIPTS go in $BINDIR, DOCS go in $DOCDIR -BINS=a8eol xfd2atr atr2xfd blob2c cart2xex fenders xexsplit xexcat atrsize rom2cart unmac65 axe blob2xex +BINS=a8eol xfd2atr atr2xfd blob2c cart2xex fenders xexsplit xexcat atrsize rom2cart unmac65 axe blob2xex xexamine SCRIPTS=dasm2atasm a8utf8 -MANS=a8eol.1 xfd2atr.1 atr2xfd.1 blob2c.1 cart2xex.1 fenders.1 xexsplit.1 xexcat.1 atrsize.1 rom2cart.1 unmac65.1 axe.1 dasm2atasm.1 a8utf8.1 blob2xex.1 +MANS=a8eol.1 xfd2atr.1 atr2xfd.1 blob2c.1 cart2xex.1 fenders.1 xexsplit.1 xexcat.1 atrsize.1 rom2cart.1 unmac65.1 axe.1 dasm2atasm.1 a8utf8.1 blob2xex.1 xexamine.1 DOCS=README equates.inc *.dasm # All the programs share this version number... diff --git a/xexamine.1 b/xexamine.1 new file mode 100644 index 0000000..0d2cb03 --- /dev/null +++ b/xexamine.1 @@ -0,0 +1,104 @@ +.\" Man page generated from reStructuredText. +. +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.TH "XEXAMINE" 1 "2024-04-25" "0.2.1" "Urchlay's Atari 8-bit Tools" +.SH NAME +xexamine \- Show information on Atari 8-bit executables (XEX) +.\" RST source for xexamine(1) man page. Convert with: +. +.\" rst2man.py xexamine.rst > xexamine.1 +. +.SH SYNOPSIS +.sp +xexamine [ [\fB\-h\fP] | [ [\fB\-v\fP] \fIxexfile\fP ] +.SH DESCRIPTION +.sp +\fBxexamine\fP reads an Atari 8\-bit executable (.xex file) and prints +the following information on each segment in the file: +.sp +Segment number (1\-based). +.sp +Start and end addresses (in hex). +.sp +Length in bytes (in decimal). +.sp +CRC32 checksum of the segment. +.sp +Segment type: +.INDENT 0.0 +.INDENT 3.5 +If the segment is a run address (loads at RUNAD) or an init address (loads at INITAD), the +type is "Run" or "Init", with the actual run or init address. +.sp +Otherwise, the percentage of the segment that contains valid 6502 object +code is printed. This is an estimate based on static analysis and some +heuristics, and as such, isn\(aqt 100% accurate. +.UNINDENT +.UNINDENT +.SH OPTIONS +.INDENT 0.0 +.TP +.B \-h +Print a short help message and exit. +.TP +.B \-v +Verbose operation. +.UNINDENT +.SH EXIT STATUS +.sp +Exit status is zero if \fIxexfile\fP is a valid Atari .xex file, non\-zero otherwise. +.SH COPYRIGHT +.sp +WTFPL. See \fI\%http://www.wtfpl.net/txt/copying/\fP for details. +.SH AUTHOR +.INDENT 0.0 +.IP B. 3 +Watson <\fI\%urchlay@slackware.uk\fP>; Urchlay on irc.libera.chat \fI##atari\fP\&. +.UNINDENT +.SH SEE ALSO +.sp +\fBa8eol\fP(1), +\fBa8utf8\fP(1), +\fBatr2xfd\fP(1), +\fBatrsize\fP(1), +\fBaxe\fP(1), +\fBblob2c\fP(1), +\fBcart2xex\fP(1), +\fBdasm2atasm\fP(1), +\fBfenders\fP(1), +\fBrom2cart\fP(1), +\fBunmac65\fP(1), +\fBxexcat\fP(1), +\fBxexsplit\fP(1), +\fBxfd2atr\fP(1). +.sp +Any good Atari 8\-bit book: \fIDe Re Atari\fP, \fIThe Atari BASIC Reference +Manual\fP, the \fIOS Users\(aq Guide\fP, \fIMapping the Atari\fP, etc. +.\" Generated by docutils manpage writer. +. diff --git a/xexamine.c b/xexamine.c new file mode 100644 index 0000000..a07da6a --- /dev/null +++ b/xexamine.c @@ -0,0 +1,363 @@ +#include +#include +#include +#include +#include +#include + +#include "xex.h" + +#define SELF "xexamine" + +/* #define CLASSIFY_DEBUG */ + +/* + show all segments of a xex file, including: + has ffff header or not + start address, end address, length + whether the segment contains code or just data (heuristics) + checksum of the segment contents +*/ + +/* crc32() and crc32_for_byte() come from public domain code: + http://home.thep.lu.se/~bjorn/crc/ +*/ + +uint32_t crc32_for_byte(uint32_t r) { + int j; + for(j = 0; j < 8; ++j) + r = (r & 1? 0: (uint32_t)0xEDB88320L) ^ r >> 1; + return r ^ (uint32_t)0xFF000000L; +} + +void crc32(const void *data, size_t n_bytes, uint32_t* crc) { + size_t i; + static uint32_t table[0x100]; + if(!*table) + for(i = 0; i < 0x100; ++i) + table[i] = crc32_for_byte(i); + for(i = 0; i < n_bytes; ++i) + *crc = table[(uint8_t)*crc ^ ((uint8_t*)data)[i]] ^ *crc >> 8; +} + +void usage(int status) { + printf("Usage: " SELF " [-v] file.xex\n"); + if(status) fprintf(stderr, "Try '%s -h' for help\n", SELF); + exit(status); +} + +#define CL_DATA 0 +#define CL_OPCODE 1 +#define CL_OPERAND 2 + +/* 3 tables used by classify_seg() */ + +int opcode_valid[] = { +/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, /* 0 */ + 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, /* 1 */ + 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, /* 2 */ + 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, /* 3 */ + 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, /* 4 */ + 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, /* 5 */ + 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, /* 6 */ + 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, /* 7 */ + 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, /* 8 */ + 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, /* 9 */ + 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, /* a */ + 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, /* b */ + 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, /* c */ + 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, /* d */ + 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, /* e */ + 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, /* f */ +}; + +int opcode_lengths[] = { +/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + 1, 2, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, 3, 3, 1, + 2, 2, 1, 1, 1, 2, 2, 1, 1, 3, 1, 1, 1, 3, 3, 1, + 3, 2, 1, 1, 2, 2, 2, 1, 1, 2, 1, 1, 3, 3, 3, 1, + 2, 2, 1, 1, 1, 2, 2, 1, 1, 3, 1, 1, 1, 3, 3, 1, + 1, 2, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 3, 3, 3, 1, + 2, 2, 1, 1, 1, 2, 2, 1, 1, 3, 1, 1, 1, 3, 3, 1, + 1, 2, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 3, 3, 3, 1, + 2, 2, 1, 1, 1, 2, 2, 1, 1, 3, 1, 1, 1, 3, 3, 1, + 1, 2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 3, 3, 3, 1, + 2, 2, 1, 1, 2, 2, 2, 1, 1, 3, 1, 1, 1, 3, 1, 1, + 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 1, 1, 3, 3, 3, 1, + 2, 2, 1, 1, 2, 2, 2, 1, 1, 3, 1, 1, 3, 3, 3, 1, + 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 1, 1, 3, 3, 3, 1, + 2, 2, 1, 1, 1, 2, 2, 1, 1, 3, 1, 1, 1, 3, 3, 1, + 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 1, 1, 3, 3, 3, 1, + 2, 2, 1, 1, 1, 2, 2, 1, 1, 3, 1, 1, 1, 3, 3, 1, +}; + +/* control transfers: branches, JMP abs, JMP (ind), RTS, RTI. + JSR doesn't count! */ +int opcode_is_ctlxfr[] = { +/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +void find_dlist(const unsigned char *mem, unsigned char *map, int len) { + int i, dlstart = -1, dlend = -1; + + for(i = 2; i < len-2; i++) { + if(mem[i] == 'p' && mem[i-1] == 'p' && mem[i-2] == 'p') { + dlstart = i - 2; + } + + if(dlstart > -1 && mem[i] == 'A') { + dlend = i; + break; + } + } + if(dlstart != -1 && dlend != -1) { +#ifdef CLASSIFY_DEBUG + printf("display list found, offsets %d - %d\n", dlstart, dlend); +#endif + for(i = dlstart; i <= dlend; i++) + map[i] = CL_DATA; + } +} + +/* + classify_seg() returns code percentage estimate. + possible strategies: + if seg is only 1 byte, it's data (return 0) + iterate over segment, semi-disassemble, classify each + byte as opcode, operand, or data. + instructions that aren't control transfers (aka jmp, jsr, rts, rti, + branches) that are immediately followed by non-code, will get marked + as data (back to the last transfer instruction). + branches that branch back before the start of the segment are data? + branch/jmp/jsr whose target is data, are also data? + but jmp/jsr outside of the segment can't be assumed data... + long runs (>=8) of the same byte value are data. +*/ + +int classify_seg(const xex_segment seg) { + int i, j, addr, byte, oplen, target, last_cltxfr = 0, changed; + int runstart = 0, runbyte = -1, runcount = 0; + float f; + unsigned char map[65536]; + + memset(map, 0, 65535); + + /* pass 1: mark valid opcodes as CL_OPCODE, their operands as + CL_OPERAND, and anything else as CL_DATA. */ + for(i = 0; i < seg.len; ) { + byte = seg.object[i]; + oplen = opcode_lengths[byte]; + if(opcode_valid[byte]) { + map[i] = CL_OPCODE; + if(oplen >= 2) map[i + 1] = CL_OPERAND; + if(oplen == 3) map[i + 2] = CL_OPERAND; + } else { + map[i] = CL_DATA; + } + i += oplen; + } + + /* pass 1.5: if there's a display list, it's data */ + find_dlist(seg.object, map, seg.len); + + /* pass 3: runs of >=3 of the same byte value are data, unless + they're ASL A, LSR A, or NOP. */ + for(i = 0; i < seg.len; i++) { + byte = seg.object[i]; + if(byte == runbyte) { + runcount++; + } else { + if( + runcount > 8 || + (runcount >= 3 && !(runbyte == 0x0a || runbyte == 0x4a || runbyte == 0xea)) + ) + { + #ifdef CLASSIFY_DEBUG + printf("run of %d bytes, $%02x, at %d\n", runcount, runbyte, runstart); + #endif + for(j = runstart; j < i; j++) + map[j] = CL_DATA; + } + runstart = i; + runbyte = byte; + runcount = 0; + } + /* + printf("got here, i=%d, runbyte=%02x, runstart=%d, runcount=%d\n", i, runbyte, runstart, runcount); + */ + } + + /* pass 4: code that doesn't branch/jump/return and runs into data + gets marked as data. */ + runcount = runstart = 0; + do { + runcount++; + changed = 0; + for(i = 0; i < seg.len; i++) { + if(map[i] == CL_OPCODE && opcode_is_ctlxfr[seg.object[i]]) { + last_cltxfr = i; + #ifdef CLASSIFY_DEBUG + /* + printf("last_cltxfr = %04x, opcode %02x;\n", last_cltxfr, seg.object[i]); + */ + #endif + } else if(map[i] == CL_DATA) { + #ifdef CLASSIFY_DEBUG + /* + printf("marking range as data: %04x - %04x\n", last_cltxfr, i); + */ + #endif + for(j = last_cltxfr; j < i; j++) { + map[j] = CL_DATA; + } + last_cltxfr = i; + } + } + + /* pass 4: branch and jmp abs instructions whose target is data, + are also data. repeats until nothing is changed. */ + for(i = 0; i < seg.len; i++) { + addr = seg.start_addr + i; + byte = seg.object[i]; + target = -1; + if(map[i] == CL_OPCODE) switch(byte) { + case 0x4c: /* JMP absolute */ + case 0x20: /* JSR absolute */ + target = addr + (seg.object[i + 1] | (seg.object[i + 2] << 8)); + if((target < addr) || (target > (addr + seg.len))) + target = -1; /* jsr/jmp out of segment */ + else { + if(map[target - addr] != CL_OPCODE) { + map[i] = map[i + 1] = map[i + 2] = CL_DATA; + changed = 1; + runstart += 3; + } + } + break; + + case 0x10: /* BPL */ + case 0x30: /* BMI */ + case 0x50: /* BVC */ + case 0x70: /* BVS */ + case 0x90: /* BCC */ + case 0xb0: /* BCS */ + case 0xd0: /* BNE */ + case 0xf0: /* BEQ */ + target = addr + i + 2 + ((signed char)seg.object[i + 1]); + if((target < addr) || (target > (addr + seg.len))) { + /* branch out of segment, assume data */ + target = -1; + map[i] = map[i + 1] = CL_DATA; + runstart += 2; + changed = 1; + } else if(map[target - addr] != CL_OPCODE) { + /* branch to data! */ + map[i] = map[i + 1] = CL_DATA; + runstart += 2; + changed = 1; + } + break; + + default: + break; + } + } + } while(changed); +#ifdef CLASSIFY_DEBUG + printf("pass 4 ran %d times, changed %d bytes\n", runcount, runstart); +#endif + + /* last pass: calculate opcode/operand percentage */ + j = 0; + for(i = 0; i < seg.len; i++) { + if(map[i] != CL_DATA) j++; + } + +#ifdef CLASSIFY_DEBUG + for(i = 0; i < seg.len; i++) { + if(i % 16 == 0) { + printf("\n%04x: ", i); + } + printf("%02x/%d ", seg.object[i], map[i]); + } + printf("\n"); +#endif + + f = (float)j / (float)seg.len * 100.0; + return (int)f; +} + +int main(int argc, char **argv) { + FILE *f; + xex_segment seg; + unsigned char buffer[65536]; + char *filename; + int opt, segcount = 0; + uint32_t crc; + + while((opt = getopt(argc, argv, "vh")) != -1) { + switch(opt) { + case 'v': + xex_verbose = 1; + break; + case 'h': + usage(0); + break; + default: + usage(1); + break; + } + } + + if(optind >= argc || argv[optind + 1]) { + usage(1); + } + + filename = argv[optind]; + if( !(f = fopen(filename, "rb")) ) { + fprintf(stderr, "%s: ", SELF); + perror(filename); + exit(1); + } + + seg.object = buffer; + + while(xex_fread_seg(&seg, f)) { + if(!segcount) + printf("Seg | Start | End | Bytes | CRC32 | Type\n"); + crc32(seg.object, seg.len, &crc); + printf("%3d | $%04x | $%04x | %5d | %08x | ", + ++segcount, seg.start_addr, seg.end_addr, seg.len, crc); + if(seg.start_addr == XEX_RUNAD && seg.len > 1) + printf("Run $%04x", (seg.object[0] | (seg.object[1] << 8))); + else if(seg.start_addr == XEX_INITAD && seg.len > 1) + printf("Init $%04x", (seg.object[0] | (seg.object[1] << 8))); + else printf("%d%% code", classify_seg(seg)); + + putchar('\n'); + } + + if(!segcount) { + fprintf(stderr, SELF ": %s is not an Atari 8-bit executable.\n", filename); + return 1; + } + + return 0; +} diff --git a/xexamine.rst b/xexamine.rst new file mode 100644 index 0000000..a76ed16 --- /dev/null +++ b/xexamine.rst @@ -0,0 +1,56 @@ +.. RST source for xexamine(1) man page. Convert with: +.. rst2man.py xexamine.rst > xexamine.1 + +======== +xexamine +======== + +------------------------------------------------- +Show information on Atari 8-bit executables (XEX) +------------------------------------------------- + +.. include:: manhdr.rst + +SYNOPSIS +======== + +xexamine [ [**-h**] | [ [**-v**] *xexfile* ] + +DESCRIPTION +=========== + +**xexamine** reads an Atari 8-bit executable (.xex file) and prints +the following information on each segment in the file: + +Segment number (1-based). + +Start and end addresses (in hex). + +Length in bytes (in decimal). + +CRC32 checksum of the segment. + +Segment type: + + If the segment is a run address (loads at RUNAD) or an init address (loads at INITAD), the + type is "Run" or "Init", with the actual run or init address. + + Otherwise, the percentage of the segment that contains valid 6502 object + code is printed. This is an estimate based on static analysis and some + heuristics, and as such, isn't 100% accurate. + +OPTIONS +======= + +-h + Print a short help message and exit. + +-v + Verbose operation. + +EXIT STATUS +=========== + +Exit status is zero if *xexfile* is a valid Atari .xex file, non-zero otherwise. + +.. include:: manftr.rst -- cgit v1.2.3