diff options
Diffstat (limited to 'xexamine.c')
-rw-r--r-- | xexamine.c | 363 |
1 files changed, 363 insertions, 0 deletions
diff --git a/xexamine.c b/xexamine.c new file mode 100644 index 0000000..a07da6a --- /dev/null +++ b/xexamine.c @@ -0,0 +1,363 @@ +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> + +#include "xex.h" + +#define SELF "xexamine" + +/* #define CLASSIFY_DEBUG */ + +/* + show all segments of a xex file, including: + has ffff header or not + start address, end address, length + whether the segment contains code or just data (heuristics) + checksum of the segment contents +*/ + +/* crc32() and crc32_for_byte() come from public domain code: + http://home.thep.lu.se/~bjorn/crc/ +*/ + +uint32_t crc32_for_byte(uint32_t r) { + int j; + for(j = 0; j < 8; ++j) + r = (r & 1? 0: (uint32_t)0xEDB88320L) ^ r >> 1; + return r ^ (uint32_t)0xFF000000L; +} + +void crc32(const void *data, size_t n_bytes, uint32_t* crc) { + size_t i; + static uint32_t table[0x100]; + if(!*table) + for(i = 0; i < 0x100; ++i) + table[i] = crc32_for_byte(i); + for(i = 0; i < n_bytes; ++i) + *crc = table[(uint8_t)*crc ^ ((uint8_t*)data)[i]] ^ *crc >> 8; +} + +void usage(int status) { + printf("Usage: " SELF " [-v] file.xex\n"); + if(status) fprintf(stderr, "Try '%s -h' for help\n", SELF); + exit(status); +} + +#define CL_DATA 0 +#define CL_OPCODE 1 +#define CL_OPERAND 2 + +/* 3 tables used by classify_seg() */ + +int opcode_valid[] = { +/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, /* 0 */ + 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, /* 1 */ + 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, /* 2 */ + 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, /* 3 */ + 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, /* 4 */ + 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, /* 5 */ + 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, /* 6 */ + 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, /* 7 */ + 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, /* 8 */ + 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, /* 9 */ + 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, /* a */ + 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, /* b */ + 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, /* c */ + 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, /* d */ + 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, /* e */ + 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, /* f */ +}; + +int opcode_lengths[] = { +/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + 1, 2, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, 3, 3, 1, + 2, 2, 1, 1, 1, 2, 2, 1, 1, 3, 1, 1, 1, 3, 3, 1, + 3, 2, 1, 1, 2, 2, 2, 1, 1, 2, 1, 1, 3, 3, 3, 1, + 2, 2, 1, 1, 1, 2, 2, 1, 1, 3, 1, 1, 1, 3, 3, 1, + 1, 2, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 3, 3, 3, 1, + 2, 2, 1, 1, 1, 2, 2, 1, 1, 3, 1, 1, 1, 3, 3, 1, + 1, 2, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 3, 3, 3, 1, + 2, 2, 1, 1, 1, 2, 2, 1, 1, 3, 1, 1, 1, 3, 3, 1, + 1, 2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 3, 3, 3, 1, + 2, 2, 1, 1, 2, 2, 2, 1, 1, 3, 1, 1, 1, 3, 1, 1, + 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 1, 1, 3, 3, 3, 1, + 2, 2, 1, 1, 2, 2, 2, 1, 1, 3, 1, 1, 3, 3, 3, 1, + 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 1, 1, 3, 3, 3, 1, + 2, 2, 1, 1, 1, 2, 2, 1, 1, 3, 1, 1, 1, 3, 3, 1, + 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 1, 1, 3, 3, 3, 1, + 2, 2, 1, 1, 1, 2, 2, 1, 1, 3, 1, 1, 1, 3, 3, 1, +}; + +/* control transfers: branches, JMP abs, JMP (ind), RTS, RTI. + JSR doesn't count! */ +int opcode_is_ctlxfr[] = { +/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +void find_dlist(const unsigned char *mem, unsigned char *map, int len) { + int i, dlstart = -1, dlend = -1; + + for(i = 2; i < len-2; i++) { + if(mem[i] == 'p' && mem[i-1] == 'p' && mem[i-2] == 'p') { + dlstart = i - 2; + } + + if(dlstart > -1 && mem[i] == 'A') { + dlend = i; + break; + } + } + if(dlstart != -1 && dlend != -1) { +#ifdef CLASSIFY_DEBUG + printf("display list found, offsets %d - %d\n", dlstart, dlend); +#endif + for(i = dlstart; i <= dlend; i++) + map[i] = CL_DATA; + } +} + +/* + classify_seg() returns code percentage estimate. + possible strategies: + if seg is only 1 byte, it's data (return 0) + iterate over segment, semi-disassemble, classify each + byte as opcode, operand, or data. + instructions that aren't control transfers (aka jmp, jsr, rts, rti, + branches) that are immediately followed by non-code, will get marked + as data (back to the last transfer instruction). + branches that branch back before the start of the segment are data? + branch/jmp/jsr whose target is data, are also data? + but jmp/jsr outside of the segment can't be assumed data... + long runs (>=8) of the same byte value are data. +*/ + +int classify_seg(const xex_segment seg) { + int i, j, addr, byte, oplen, target, last_cltxfr = 0, changed; + int runstart = 0, runbyte = -1, runcount = 0; + float f; + unsigned char map[65536]; + + memset(map, 0, 65535); + + /* pass 1: mark valid opcodes as CL_OPCODE, their operands as + CL_OPERAND, and anything else as CL_DATA. */ + for(i = 0; i < seg.len; ) { + byte = seg.object[i]; + oplen = opcode_lengths[byte]; + if(opcode_valid[byte]) { + map[i] = CL_OPCODE; + if(oplen >= 2) map[i + 1] = CL_OPERAND; + if(oplen == 3) map[i + 2] = CL_OPERAND; + } else { + map[i] = CL_DATA; + } + i += oplen; + } + + /* pass 1.5: if there's a display list, it's data */ + find_dlist(seg.object, map, seg.len); + + /* pass 3: runs of >=3 of the same byte value are data, unless + they're ASL A, LSR A, or NOP. */ + for(i = 0; i < seg.len; i++) { + byte = seg.object[i]; + if(byte == runbyte) { + runcount++; + } else { + if( + runcount > 8 || + (runcount >= 3 && !(runbyte == 0x0a || runbyte == 0x4a || runbyte == 0xea)) + ) + { + #ifdef CLASSIFY_DEBUG + printf("run of %d bytes, $%02x, at %d\n", runcount, runbyte, runstart); + #endif + for(j = runstart; j < i; j++) + map[j] = CL_DATA; + } + runstart = i; + runbyte = byte; + runcount = 0; + } + /* + printf("got here, i=%d, runbyte=%02x, runstart=%d, runcount=%d\n", i, runbyte, runstart, runcount); + */ + } + + /* pass 4: code that doesn't branch/jump/return and runs into data + gets marked as data. */ + runcount = runstart = 0; + do { + runcount++; + changed = 0; + for(i = 0; i < seg.len; i++) { + if(map[i] == CL_OPCODE && opcode_is_ctlxfr[seg.object[i]]) { + last_cltxfr = i; + #ifdef CLASSIFY_DEBUG + /* + printf("last_cltxfr = %04x, opcode %02x;\n", last_cltxfr, seg.object[i]); + */ + #endif + } else if(map[i] == CL_DATA) { + #ifdef CLASSIFY_DEBUG + /* + printf("marking range as data: %04x - %04x\n", last_cltxfr, i); + */ + #endif + for(j = last_cltxfr; j < i; j++) { + map[j] = CL_DATA; + } + last_cltxfr = i; + } + } + + /* pass 4: branch and jmp abs instructions whose target is data, + are also data. repeats until nothing is changed. */ + for(i = 0; i < seg.len; i++) { + addr = seg.start_addr + i; + byte = seg.object[i]; + target = -1; + if(map[i] == CL_OPCODE) switch(byte) { + case 0x4c: /* JMP absolute */ + case 0x20: /* JSR absolute */ + target = addr + (seg.object[i + 1] | (seg.object[i + 2] << 8)); + if((target < addr) || (target > (addr + seg.len))) + target = -1; /* jsr/jmp out of segment */ + else { + if(map[target - addr] != CL_OPCODE) { + map[i] = map[i + 1] = map[i + 2] = CL_DATA; + changed = 1; + runstart += 3; + } + } + break; + + case 0x10: /* BPL */ + case 0x30: /* BMI */ + case 0x50: /* BVC */ + case 0x70: /* BVS */ + case 0x90: /* BCC */ + case 0xb0: /* BCS */ + case 0xd0: /* BNE */ + case 0xf0: /* BEQ */ + target = addr + i + 2 + ((signed char)seg.object[i + 1]); + if((target < addr) || (target > (addr + seg.len))) { + /* branch out of segment, assume data */ + target = -1; + map[i] = map[i + 1] = CL_DATA; + runstart += 2; + changed = 1; + } else if(map[target - addr] != CL_OPCODE) { + /* branch to data! */ + map[i] = map[i + 1] = CL_DATA; + runstart += 2; + changed = 1; + } + break; + + default: + break; + } + } + } while(changed); +#ifdef CLASSIFY_DEBUG + printf("pass 4 ran %d times, changed %d bytes\n", runcount, runstart); +#endif + + /* last pass: calculate opcode/operand percentage */ + j = 0; + for(i = 0; i < seg.len; i++) { + if(map[i] != CL_DATA) j++; + } + +#ifdef CLASSIFY_DEBUG + for(i = 0; i < seg.len; i++) { + if(i % 16 == 0) { + printf("\n%04x: ", i); + } + printf("%02x/%d ", seg.object[i], map[i]); + } + printf("\n"); +#endif + + f = (float)j / (float)seg.len * 100.0; + return (int)f; +} + +int main(int argc, char **argv) { + FILE *f; + xex_segment seg; + unsigned char buffer[65536]; + char *filename; + int opt, segcount = 0; + uint32_t crc; + + while((opt = getopt(argc, argv, "vh")) != -1) { + switch(opt) { + case 'v': + xex_verbose = 1; + break; + case 'h': + usage(0); + break; + default: + usage(1); + break; + } + } + + if(optind >= argc || argv[optind + 1]) { + usage(1); + } + + filename = argv[optind]; + if( !(f = fopen(filename, "rb")) ) { + fprintf(stderr, "%s: ", SELF); + perror(filename); + exit(1); + } + + seg.object = buffer; + + while(xex_fread_seg(&seg, f)) { + if(!segcount) + printf("Seg | Start | End | Bytes | CRC32 | Type\n"); + crc32(seg.object, seg.len, &crc); + printf("%3d | $%04x | $%04x | %5d | %08x | ", + ++segcount, seg.start_addr, seg.end_addr, seg.len, crc); + if(seg.start_addr == XEX_RUNAD && seg.len > 1) + printf("Run $%04x", (seg.object[0] | (seg.object[1] << 8))); + else if(seg.start_addr == XEX_INITAD && seg.len > 1) + printf("Init $%04x", (seg.object[0] | (seg.object[1] << 8))); + else printf("%d%% code", classify_seg(seg)); + + putchar('\n'); + } + + if(!segcount) { + fprintf(stderr, SELF ": %s is not an Atari 8-bit executable.\n", filename); + return 1; + } + + return 0; +} |