#include #include #include #include #include #ifndef VERSION # define VERSION "???" #endif #ifndef TAG # define TAG "" #endif #define SELF "unmac65" #define BANNER SELF " v" VERSION " by B. Watson (WTFPL)\n\n" #ifdef __CC65__ # ifdef __ATARI__ # define ATARI8 # else # error "This program only supports Atari 8-bit when built with cc65" /* Feel free to add support for other systems, if you need to. The main differences will be in fix_filename() and atari8_get_opts(). */ # endif #else # undef ATARI8 #endif #define lsbmsb(lo, hi) ( (lo) | (hi << 8) ) /* use static filename buffers on A8, since we (probably) won't be getting them passed with argv. FIXME: I really ought to do bounds checking... */ #ifdef ATARI8 char infile[128], outfile[128]; #else char *infile = NULL, *outfile = NULL; #endif FILE *input = NULL, *output = NULL; int using_stdout = 1; /* see handle_cli_opts() and/or atari8_get_opts() for these: */ char nl = '\n'; char no_numbers = 0; char leading_tabs = 0; char all_tabs = 0; char omit_dots = 0; char lcase_opcodes = 0; char lcase_all = 0; char dump_tokens = 0; char add_quote = 0; unsigned int renum_start, renum_line; int renum_incr = 0; typedef enum { CC_NONE, CC_UNPRINT, CC_ALL } chconst_opt_t; chconst_opt_t chconsts_hex = 0; /* options only available in the non-Atari8 ports */ #ifndef ATARI8 char deinverse = 0; char found_inverse = 0; char found_unprint = 0; char inv_underscore = 0; char inv_ansi = 0; char unicode = 0; #endif /* dumpbuf[] really should be local to parse_one_line(), but cc65 won't let us make this a local var, it's too big. 1000 bytes is plenty (max line length is 256 bytes, we dump them in hex at 3 chars each, plus 10-12 chars worth of formatting) */ char dumpbuf[1000]; /* number of bytes left to read (initialized from 4-byte m65 header, decremented by next_byte()). If this ever reaches 0 while there's more input, or if we get EOF while prog_bytes != 0, it's an error. */ unsigned int prog_bytes; /* Guess what these are for? */ int line_number, old_line_number = -1; char *opcode_tokens[] = { "ERROR -", /* 0x00 */ ".IF", ".ELSE", ".ENDIF", ".MACRO", ".ENDM", ".TITLE", " ", ".PAGE", ".WORD", ".ERROR", ".BYTE", ".SBYTE", ".DBYTE", ".END", ".OPT", ".TAB", /* 0x10 */ ".INCLUDE", ".DS", ".ORG", ".EQU", "BRA", "TRB", "TSB", ".FLOAT", ".CBYTE", ";", ".LOCAL", ".SET", "*=", "=", ".=", "JSR", /* 0x20 */ "JMP", "DEC", "INC", "LDX", "LDY", "STX", "STY", "CPX", "CPY", "BIT", "BRK", "CLC", "CLD", "CLI", "CLV", "DEX", /* 0x30 */ "DEY", "INX", "INY", "NOP", "PHA", "PHP", "PLA", "PLP", "RTI", "RTS", "SEC", "SED", "SEI", "TAX", "TAY", "TSX", /* 0x40 */ "TXA", "TXS", "TYA", "BCC", "BCS", "BEQ", "BMI", "BNE", "BPL", "BVC", "BVS", "ORA", "AND", "EOR", "ADC", "STA", /* 0x50 */ "LDA", "CMP", "SBC", "ASL", "ROL", "LSR", "ROR", "", /* 0x58 - the null opcode */ "STZ", "DEA", "INA", "PHX", "PHY", "PLX", "PLY" /* 0x5f */ }; /* Special opcodes */ #define MAX_OPCODE 0x5f #define NO_OPCODE 0x58 char *operand_tokens[] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* actually "'", special handling tho */ /* 0x0a, 10 decimal */ "%$", "%", "*", " ", " ", "a", /* 0x10 */ "q", "+", "-", "*", /* 0x14, 20 decimal */ "/", "&", ".DEF", "=", "<=", ">=", "<>", ">", "<", "-", /* 0x1e, 30 dec */ "[", "]", /* 0x20 */ ".OR", ".AND", ".NOT", "!", "^", ".REF", "\\", NULL, /* 0x28, 40 dec */ NULL, NULL, NULL, NULL, NULL, NULL, ".REF", ".DEF ", /* 0x30 */ ".NOT ", " .AND ", /* 0x32, 50 dec */ " .OR ", " <", " >", ",X)", "),Y", ",Y", ",X", ")", ",", /* 0x3b, 59, the null operand */ "\x1b", /* 0x3c, 60, ASCII escape, chr$(27)? */ ",", "#", "A", "(", /* 0x40, 64 dec */ "\"", "$", "Q", "NO", "NO ", "OBJ", /* 0x46, 70 dec */ "ERR", "EJECT", "LIST", "XREF", "MLIST", "CLIST", "NUM", /* 0x4d, 77 dec */ /* "M", */ /* maybe? I think not... */ }; /* Special operands */ #define MAX_OPERAND 0x4d #define NO_OPERAND 0x3b #define HEXWORD_PREFIX 5 #define HEXBYTE_PREFIX 6 #define DECWORD_PREFIX 7 #define DECBYTE_PREFIX 8 #define CHAR_CONST_PREFIX 0x0a /* Functions */ void print_label_byte(unsigned char byte, FILE *output) { putc((lcase_all ? tolower(byte) : byte), output); } #ifdef ATARI8 void print_string_byte(unsigned char byte, FILE *output) { putc(byte, output); } #else char *unicode_table[] = { "♥", "┣", "┃", "┛", "┫", "┓", "╱", "╲", "◢", "▗", "◣", "▝", "▘", "▔", "▁", "▖", "♣", "┏", "━", "╋", "⚫ ", "▄", "▎", "┳", "┻", "▌", "┗", "␛", "↑", "↓", "←", "→", " ", "!", "\"", "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ":", ";", "<", "=", ">", "?", "@", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "[", "\\", "]", "^", "_", "◆", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "♠", "|", "↰", "◀", "▶" }; /* This is probably more complex than it needs to be. */ void start_inverse(FILE *output) { if(inv_underscore) { fputs("_\x08", output); } else if(inv_ansi) { fputs("\x1b[7m", output); } } void end_inverse(FILE *output) { if(inv_underscore) { /* do nothing */; } else if(inv_ansi) { fputs("\x1b[0m", output); } } void print_string_byte(unsigned char byte, FILE *output) { int inverse; inverse = (byte >= 0x80); byte &= 0x7f; if(inverse && deinverse) { found_inverse++; inverse = 0; } if(inverse) start_inverse(output); if(unicode) { fputs(unicode_table[byte], output); } else { if(inv_underscore || inv_ansi) { putc(byte, output); } else { putc(inverse ? (byte | 0x80) : byte, output); found_inverse += inverse; } if(byte < 0x20) found_unprint++; } if(inverse) end_inverse(output); } #endif #define print_comment_byte(x, y) print_string_byte((lcase_all ? tolower(x) : x), y) void print_hex_byte(unsigned char byte, FILE *output) { fprintf(output, (lcase_opcodes ? "$%02x" : "$%02X"), byte); } void print_hex_word(int word, FILE *output) { fprintf(output, (lcase_opcodes ? "$%04x" : "$%04X"), word); } void closeall() { if(output && !using_stdout) { fclose(output); output = NULL; } if(input) { fclose(input); input = NULL; } } void exit_cleanly(int status) { #ifdef ATARI8 if(status) { puts("Press Return to exit:"); fflush(stdout); fgets(infile, 80, stdin); } #endif closeall(); exit(status); } unsigned char next_byte() { int c; if(prog_bytes <= 0) { fprintf(stderr, SELF ": corrupt or truncated file?\n"); exit_cleanly(1); } --prog_bytes; c = getc(input); if(c == EOF && prog_bytes != 0) { fprintf(stderr, SELF ": unexpected EOF\n"); exit_cleanly(1); } if(prog_bytes == 0 && getc(input) != EOF) { fprintf(stderr, SELF ": ignoring extra junk at EOF\n"); } return (unsigned char)c; } void parse_header(void) { unsigned char inbuf[4]; int bytes = fread(inbuf, 1, 4, input); if(bytes < 4) { if(ferror(input)) { perror(infile); } else { fprintf(stderr, SELF ": file is too short (%d bytes)\n", bytes); } exit_cleanly(1); } if(inbuf[0] != 0xfe || inbuf[1] != 0xfe) { fprintf(stderr, SELF ": not a mac/65 file (missing $FEFE header)\n"); exit_cleanly(1); } prog_bytes = lsbmsb(inbuf[2], inbuf[3]); if(!prog_bytes) fprintf(stderr, SELF ": file is valid but contains no lines of code\n"); if(dump_tokens) { fprintf(output, ";; Mac/65 header: "); for(bytes = 0; bytes < 4; bytes++) fprintf(output, "%02X ", inbuf[bytes]); fprintf(output, "length %d\n", prog_bytes); } } void print_opcode(unsigned char byte) { char *opc = opcode_tokens[byte]; putc((leading_tabs ? '\t' : ' '), output); if(omit_dots && opc[0] == '.' && isalpha(opc[1])) ++opc; if(lcase_opcodes) { char buf[10]; char *p = buf; strcpy(buf, opc); while(*p) { *p = tolower(*p); ++p; } opc = buf; } fputs(opc, output); putc((all_tabs ? '\t' : ' '), output); } /* Parser states. The parser is hand-rolled and kind of ugly. */ #define ST_NEED_OPCODE 1 #define ST_IN_LABEL 2 #define ST_IN_OPERAND 3 #define ST_IN_OPSTRING 4 #define ST_IN_COMMENT 5 #define ST_ERROR 6 #define ST_IN_HEXBYTE 7 #define ST_IN_HEXWORD_LSB 8 #define ST_IN_HEXWORD_MSB 9 #define ST_IN_DECBYTE 10 #define ST_IN_DECWORD_LSB 11 #define ST_IN_DECWORD_MSB 12 #define ST_IN_CHAR_CONST 13 int handle_operand(unsigned char byte) { char *operand = NULL; if(byte > 0x80) return ST_IN_OPSTRING; if(byte == NO_OPERAND) { putc((all_tabs ? '\t' : ' '), output); return ST_IN_COMMENT; } if(byte > MAX_OPERAND) return ST_ERROR; operand = operand_tokens[byte]; if(operand) { fputs(operand, output); return ST_IN_OPERAND; } switch(byte) { case HEXWORD_PREFIX: return ST_IN_HEXWORD_LSB; case HEXBYTE_PREFIX: return ST_IN_HEXBYTE; case DECWORD_PREFIX: return ST_IN_DECWORD_LSB; case DECBYTE_PREFIX: return ST_IN_DECBYTE; case CHAR_CONST_PREFIX: return ST_IN_CHAR_CONST; /* TODO: find out if any other specials exist */ default: fprintf(output, "[$%02X?]", byte); return ST_IN_OPERAND; } } void parse_one_line() { int state = ST_NEED_OPCODE; int string_len = 0; int line_bytes; int lsb = 0; unsigned char byte, linenum_lo, linenum_hi; char hexbuf[20]; #ifndef ATARI8 found_inverse = found_unprint = 0; #endif linenum_lo = next_byte(); linenum_hi = next_byte(); if(renum_incr) { line_number = renum_line; renum_line += renum_incr; } else { line_number = lsbmsb(linenum_lo, linenum_hi); } if(line_number <= old_line_number) { fprintf(stderr, SELF ": line #%d <= prev line #%d\n", line_number, old_line_number); } line_bytes = next_byte() - 3; if(dump_tokens) sprintf(dumpbuf, ";; %d (%02X %02X, len %02X):", line_number, linenum_lo, linenum_hi, line_bytes + 3); if(!no_numbers) { char *format = "%06d "; /* duplicate mac65's weird line number formatting */ if(line_number < 100) format = "%02d "; else if(line_number < 10000) format = "%04d "; fprintf(output, format, line_number); } while(line_bytes) { byte = next_byte(); --line_bytes; if(dump_tokens) { sprintf(hexbuf, " %02X", byte); strcat(dumpbuf, hexbuf); } switch(state) { case ST_NEED_OPCODE: if(byte > 0x80) { string_len = byte & 0x7f; state = ST_IN_LABEL; } else if(byte == NO_OPCODE) { state = ST_IN_COMMENT; } else if(byte <= MAX_OPCODE) { print_opcode(byte); state = ST_IN_OPERAND; } else { state = ST_ERROR; } break; case ST_IN_LABEL: print_label_byte(byte, output); if(--string_len == 0) state = ST_NEED_OPCODE; /* TODO: 2 labels is error, detect */ break; case ST_IN_OPERAND: state = handle_operand(byte); if(state == ST_IN_OPSTRING) string_len = byte & 0x7f; break; case ST_IN_OPSTRING: print_string_byte(byte, output); if(--string_len == 0) state = ST_IN_OPERAND; break; case ST_IN_COMMENT: print_comment_byte(byte, output); break; case ST_ERROR: fprintf(output, "<$%02X?>", byte); break; case ST_IN_HEXBYTE: print_hex_byte(byte, output); state = ST_IN_OPERAND; break; case ST_IN_HEXWORD_LSB: lsb = byte; state = ST_IN_HEXWORD_MSB; break; case ST_IN_HEXWORD_MSB: print_hex_word(lsbmsb(lsb, byte), output); state = ST_IN_OPERAND; break; case ST_IN_DECBYTE: fprintf(output, "%d", byte); state = ST_IN_OPERAND; break; case ST_IN_DECWORD_LSB: lsb = byte; state = ST_IN_DECWORD_MSB; break; case ST_IN_DECWORD_MSB: fprintf(output, "%d", lsbmsb(lsb, byte)); state = ST_IN_OPERAND; break; case ST_IN_CHAR_CONST: #ifndef ATARI8 if( chconsts_hex == CC_ALL || (chconsts_hex == CC_UNPRINT && (byte < 0x20 || byte > 0x7f)) ) { print_hex_byte(byte, output); state = ST_IN_OPERAND; break; } #endif putc('\'', output); print_string_byte(byte, output); if(add_quote) putc('\'', output); state = ST_IN_OPERAND; break; default: fprintf(stderr, SELF ": internal error, state %d\n", state); state = ST_ERROR; break; } } #ifndef ATARI8 if(found_inverse) { fprintf(stderr, SELF ": line %d contains %d inverse ATASCII characters >= $80\n", line_number, found_inverse); if(deinverse) printf("; XXX inverse (%d chars)", found_inverse); } if(found_unprint) fprintf(stderr, SELF ": line %d contains %d non-printable ATASCII characters <= $1F\n", line_number, found_unprint); #endif #ifdef CYGWIN_NEWLINE_HACK if(nl == '\n') putc('\r', output); #endif putc(nl, output); if(dump_tokens) { fputs(dumpbuf, output); #ifdef CYGWIN_NEWLINE_HACK if(nl == '\n') putc('\r', output); #endif putc(nl, output); } if(ferror(output)) { perror(SELF); exit_cleanly(1); } } void parse_lines() { old_line_number = -1; while(prog_bytes) parse_one_line(); closeall(); } #ifdef ATARI8 /* get rid of trailing newline, make uppercase... */ void cleanstring(char *s) { while(*s) { *s = toupper(*s); if(*s == '\n') *s = 0; ++s; } } /* add .M65 if no extension entered, add leading D: if no device name. */ void fix_filename(char *src, char *dst, char *ext) { if(!strrchr(src, '.')) strcat(src, ext); dst[0] = '\0'; if(!strrchr(src, ':')) strcat(dst, "D:"); strcat(dst, src); } void prompt_for_opt(char *prompt, char *opt) { char buffer[10]; printf("%s [%c/%c]?", prompt, (*opt ? 'Y' : 'y'), (*opt ? 'n' : 'N')); fflush(stdout); fgets(buffer, 10, stdin); cleanstring(buffer); if(buffer[0] == 'Y' || buffer[0] == 'y') *opt = 1; else if(buffer[0] == 'N' || buffer[0] == 'n') *opt = 0; /* else leave unchanged */ } void prompt_for_str(char *prompt, char *buf) { fputs(prompt, stdout); putc('?', stdout); fflush(stdout); fgets(buf, 120, stdin); cleanstring(buf); } void atari8_get_opts() { char buffer[128]; char other = 0; prompt_for_str("M65 file or Return to quit", buffer); if(!buffer[0]) { prompt_for_opt("Really quit", &other); if(other) exit_cleanly(0); else other = 0; } fix_filename(buffer, infile, ".M65"); prompt_for_str("Output file (Return for E:)", buffer); if(!buffer[0]) { using_stdout = 1; } else { using_stdout = 0; fix_filename(buffer, outfile, ".TXT"); } prompt_for_opt("Set other options", &other); if(other) { prompt_for_opt("Omit line numbers", &no_numbers); if(!no_numbers) { prompt_for_opt("Renumber lines", &other); if(other) { prompt_for_str("Starting line number", buffer); renum_start = atoi(buffer); prompt_for_str("Line num increment (Return = 10)", buffer); renum_incr = atoi(buffer); if(renum_incr < 1) renum_incr = 10; } else { renum_incr = 0; } } prompt_for_opt("Omit . (dots) from pseudo-ops", &omit_dots); prompt_for_opt("Lowercase everything", &lcase_all); if(!lcase_all) prompt_for_opt("Lowercase mnemonics, hex", &lcase_opcodes); prompt_for_opt("Replace leading spaces w/tabs", &leading_tabs); if(!leading_tabs) prompt_for_opt("Replace lead+inner spaces w/tabs", &all_tabs); other = (chconsts_hex == CC_UNPRINT); prompt_for_opt("Unprintable char consts to hex", &other); if(other) { chconsts_hex = CC_UNPRINT; } else { other = (chconsts_hex == CC_ALL); prompt_for_opt("All char consts to hex", &other); if(other) chconsts_hex = CC_ALL; else chconsts_hex = CC_NONE; } if(chconsts_hex != CC_ALL) prompt_for_opt("Close quote ' for char consts", &add_quote); prompt_for_opt("Dump tokens in hex", &dump_tokens); } fflush(stdout); } #endif void usage() { fprintf(stderr, "usage: " SELF " [options] inputfile\n\n"); fprintf(stderr, "options:\n"); #ifndef ATARI8 fprintf(stderr, " -a Use ATASCII EOLs\n"); fprintf(stderr, " -c Convert non-printable char constants to hex\n"); fprintf(stderr, "-cc Convert all char constants to hex\n"); #endif fprintf(stderr, " -e nnn[,i] Renumber starting with nnn [increment i]\n"); fprintf(stderr, " -h Help (this text)\n"); #ifndef ATARI8 fprintf(stderr, " -i Convert inverse video to normal\n"); #endif fprintf(stderr, " -l Lowercase mnemonics, hex constants\n"); fprintf(stderr, "-la Lowercase all, including comments\n"); fprintf(stderr, " -n No line numbers in output\n"); fprintf(stderr, " -o [file] Output to file (default = stdout)\n"); fprintf(stderr, " -p Omit leading . (period) from pseudo-ops\n"); fprintf(stderr, " -q Add closing quote (') to character constants\n"); fprintf(stderr, " -t Replace leading spaces with tabs\n"); fprintf(stderr, "-ta Replace spaces between all fields with tabs\n"); fprintf(stderr, " -v Verbose output (dump tokens)\n"); #ifndef ATARI8 fprintf(stderr, " -m Print inverse video as underlined\n"); fprintf(stderr, " -r Print inverse video as ANSI reverse video\n"); fprintf(stderr, " -u Print ATASCII as Unicode/UTF-8\n"); #endif exit(1); } void get_renum_args(char *arg) { renum_incr = 10; /* atoi() doesn't detect errors, so: */ if(!arg || arg[0] > '9' || arg[0] < '0') usage(); renum_start = atoi(arg); arg = strchr(arg, ','); if(arg) renum_incr = atoi(++arg); if(!renum_incr || renum_incr < 0) usage(); } /* TODO: support a few more -options A lot of the fancier options I wanted to add, would require a full parser for the grammar. I've avoided this partly because it's more work, and partly because I dunno how well yacc/bison would play with cc65... */ void handle_cli_opts(int argc, char **argv) { #ifdef ATARI8 infile[0] = '\0'; #endif while(++argv, --argc) { if(argv[0][0] == '-') { switch(tolower(argv[0][1])) { #ifndef ATARI8 case 'a': nl = 0x9b; if(argv[0][2]) usage(); break; case 'i': chconsts_hex = CC_UNPRINT; deinverse = 1; if(argv[0][2]) usage(); break; case 'm': inv_underscore = 1; if(argv[0][2]) usage(); break; case 'r': inv_ansi = 1; if(argv[0][2]) usage(); break; case 'u': unicode = 1; if(argv[0][2]) usage(); break; #endif case 'c': chconsts_hex = CC_UNPRINT; if(argv[0][2] == 'C' || argv[0][2] == 'c') chconsts_hex = CC_ALL; else if(argv[0][2]) usage(); break; case 'e': if(argv[0][2]) usage(); if(!argv[1]) usage(); get_renum_args(argv[1]); argv++, argc--; break; case 'n': no_numbers = 1; if(argv[0][2]) usage(); break; case 'h': usage(); break; case 'l': lcase_opcodes = 1; if(argv[0][2] == 'A' || argv[0][2] == 'a') lcase_all = 1; else if(argv[0][2]) usage(); break; case 'p': omit_dots = 1; if(argv[0][2]) usage(); break; case 'q': add_quote = 1; if(argv[0][2]) usage(); break; case 't': leading_tabs = 1; if(argv[0][2] == 'A' || argv[0][2] == 'a') all_tabs = 1; else if(argv[0][2]) usage(); break; case 'v': dump_tokens = 1; if(argv[0][2]) usage(); break; case 'o': if(argv[0][2]) { #ifdef ATARI8 strcpy(outfile, &argv[0][2]); #else outfile = &argv[0][2]; #endif } else if(argc == 1) { usage(); } else { ++argv, --argc; #ifdef ATARI8 strcpy(outfile, argv[0]); #else outfile = argv[0]; #endif } using_stdout = 0; break; default: usage(); break; } } else { #ifdef ATARI8 if(infile[0]) usage(); else strcpy(infile, argv[0]); #else if(infile) usage(); else infile = argv[0]; #endif } } if(!infile) usage(); } int main(int argc, char **argv) { fputs(BANNER, stderr); #ifdef ATARI8 while(1) { if(argc < 2) { atari8_get_opts(); } else { handle_cli_opts(argc, argv); argc = 1; } #else handle_cli_opts(argc, argv); #endif input = fopen(infile, "rb"); if(!input) { perror(infile); exit_cleanly(1); } if(using_stdout) { output = stdout; } else { output = fopen(outfile, "w"); if(!output) { perror(outfile); exit_cleanly(1); } } if(renum_incr) renum_line = renum_start; parse_header(); parse_lines(); #ifdef ATARI8 } #endif exit_cleanly(0); return 0; /* to shut gcc up... */ }