#include #include #include #include #include "amsbtok.h" /* this should always be defined in , but just in case... */ #ifndef BUFSIZ #define BUFSIZ 4096 #endif /* range for one-byte tokens */ #define MIN_STD_TOK 0x80 /* END */ #define MAX_STD_TOK 0xf8 /* < */ /* range for 2nd byte of two-byte tokens (1st is always 0xff) */ #define MIN_EXT_TOK 0xa3 /* SGN */ #define MAX_EXT_TOK 0xc5 /* STACK */ /* AMSB's tokens for "!", "'", REM. used to introduce comments */ #define TOK_REM 0x98 #define TOK_SQUOTE 0x9a #define TOK_BANG 0x9b /* good old Atari EOL character */ #define EOL 0x9b /* every MS BASIC I ever saw had the same line number limit. it's kind of arbitrary: why not allow the full range, up to 65535? */ #define MAX_LINENO 63999 /* a program bigger than this can't possibly fit into memory, even with cart-based AMSB2 and no DOS loaded. */ #define MAX_PROGLEN 30000 /* there should never be a valid line of BASIC longer than MAX_LINE_LEN bytes, since there would be no way to enter it in the editor. AMSB uses the standard E: device, which limits you to 4 screen lines, or max 120 bytes. there *cannot* be a line longer than MAX_LINE_LEN_HARD, because AMSB would crash when you try to LOAD such a program. */ #define MAX_LINE_LEN 0x80 #define MAX_LINE_LEN_HARD 0xff /* a program whose header has a length less than MIN_PROGLEN can't be a real AMSB program. EMPTY_PROGLEN is what you get if you SAVE when there's no program in memory (right after boot or a NEW). The minimum size for a program that actually contains code seems to be 5 (for 10 PRINT) */ #define MIN_PROGLEN 5 #define EMPTY_PROGLEN 2 /* an EOL address below this has to be an error, since this is the lowest MEMLO can ever be. */ #define MIN_PTR 0x0700 /* an EOL address higher than this has to be an error, since it would overlap the GR.0 display list or the ROMs at $c000 */ #define MAX_PTR 0xbc1f /* SAVE "filename" LOCK does 'encryption' by subtracting every byte from this (except the 3-byte header) */ #define UNLOCK_KEY 0x54 const char *self; char pipe_command[BUFSIZ + 1] = { "a8cat" }; int verbose = 0; /* -v */ int raw_output = 0; /* -a */ int check_only = 0; /* -c */ int startline = 0; /* -r */ int endline = 65536; /* -r */ int unlock_mode = 0; /* -l */ int locked = 0; int need_pclose = 0; int bytes_read = 0; int warnings = 0; int proglen = 0; int linecount = 0; FILE *infile; FILE *outfile; void set_self(const char *argv0) { char *p; self = argv0; p = strrchr(self, '/'); if(p) self = p + 1; } void die_with(const char *msg, int status) { fprintf(stderr, "%s: %s\n", self, msg); exit(status); } #define die(x) die_with(x,1) #define die2(x) die_with(x,2) /* post-processing: print "summary", exit. called by either read_byte() (on 'unexpected EOF'), or main() (on normal exit). */ void finish(int rv, const char *msg) { int progsize; if(msg) fprintf(stderr, "%s: %s\n", self, msg); if(verbose) { fprintf(stderr, "read %d bytes\n", bytes_read); fprintf(stderr, "listed %d lines\n", linecount); } if(!linecount) { fprintf(stderr, "%s: no lines of code in program\n", self); warnings++; } progsize = bytes_read - 3; if(proglen == progsize) { if(verbose) fprintf(stderr, "file size matches proglen\n"); } else { warnings++; fprintf(stderr, "%s: actual program size %d " "doesn't match program size %d in header,\n" " ", self, progsize, proglen); if(proglen > progsize) { fprintf(stderr, "AMSB will give #136 ERROR and fail to LOAD this file\n"); } else { fprintf(stderr, "AMSB will stop LOADing before the end of this file\n"); } } if(fgetc(infile) != EOF) { fprintf(stderr, "%s: trailing garbage at end of file\n", self); warnings++; } if(warnings) { fprintf(stderr, "%s: file has %d warning%s\n", self, warnings, (warnings == 1 ? "" : "s")); rv = 2; } if(need_pclose) { int got = pclose(outfile); if(verbose) fprintf(stderr, "return value from pipe is %d\n", got); if(got != 0) { die("a8cat child process failed, do you have a8cat on your PATH?"); } } if(verbose) fprintf(stderr, "exit status: %d (%s)\n", rv, (rv ? "ERROR" : "OK")); exit(rv); } unsigned char read_byte(void) { int c; c = fgetc(infile); if(c < 0) finish(2, "unexpected EOF, file truncated?"); bytes_read++; return (unsigned char)c; } /* "decrypt" a byte from a "SAVE x LOCK" program. */ unsigned char unlock_byte(unsigned char b) { return ((UNLOCK_KEY - b) & 0xff); } /* the "encryption" is the same (process is reversible) */ #define lock_byte(x) unlock_byte(x) unsigned char read_prog_byte(void) { unsigned char b = read_byte(); return locked ? unlock_byte(b) : b; } void unread_prog_byte(unsigned char b) { if(locked) b = lock_byte(b); ungetc(b, infile); bytes_read--; } int read_word(void) { int w; w = read_byte(); w |= (read_byte() << 8); return w; } int read_prog_word(void) { int w; w = read_prog_byte(); w |= (read_prog_byte() << 8); return w; } void read_header(void) { /* $00 for plain, $01 for SAVE with LOCK */ locked = read_byte(); if(locked > 1) die2("not an AMSB file: first byte not $00 or $01"); if(verbose && locked) { fprintf(stderr, "program is locked, decrypting\n"); } proglen = read_word(); if(verbose) fprintf(stderr, "proglen == %d (%04x)\n", proglen, proglen); if(proglen > MAX_PROGLEN) { fprintf(stderr, "%s: not an AMSB file: too big (%d bytes), won't fit in Atari memory\n", self, proglen); exit(2); } if(proglen == EMPTY_PROGLEN) { fprintf(stderr, "%s: program length is 2, no code in file (SAVE after NEW)\n", self); warnings++; } else { if(proglen < MIN_PROGLEN) { fprintf(stderr, "%s: not an AMSB file: program size too small (%d). Atari BASIC file?\n", self, proglen); exit(2); } } } void unknown_token(unsigned char byte, int ext) { fprintf(outfile, "", (ext ? "$ff ": ""), byte); } /* meat and potatoes. does the actual detokenizing. gets called once per line of code. returns false when it hits the last line, or true if there are more lines. */ int next_line(void) { static int last_lineno = -1; static int last_ptr = -1; int ptr, lineno, was_ff, in_string, in_comment, offset, len; int printing; unsigned char byte; offset = bytes_read; /* pointer to last token on the line, offset by whatever MEMLO happened to be when the file was SAVEd. 0 means this is the last line. */ ptr = read_prog_word(); if(!ptr) { if(verbose) fprintf(stderr, "end of program\n"); return 0; } lineno = read_prog_word(); if(verbose) fprintf(stderr, "found line %d, offset %d, end-of-line %d\n", lineno, offset, ptr); printing = (lineno >= startline) && (lineno <= endline); if(ptr < MIN_PTR) { fprintf(stderr, "%s: line %d: EOL address $%04x too low (<$%04x)\n", self, lineno, ptr, MIN_PTR); warnings++; } else if(ptr >= MAX_PTR) { fprintf(stderr, "%s: line %d: EOL address $%04x too high (>$%04x)\n", self, lineno, ptr, MAX_PTR); warnings++; } if(last_ptr != -1) { if(ptr <= last_ptr) { fprintf(stderr, "%s: line %d: EOL address $%04x <= previous $%04x\n", self, lineno, ptr, last_ptr); warnings++; } } if(lineno <= last_lineno) { fprintf(stderr, "%s: line number out of order (%d <= %d)\n", self, lineno, last_lineno); warnings++; } if(lineno > MAX_LINENO) { fprintf(stderr, "%s: line number out range (%d > %d)\n", self, lineno, MAX_LINENO); warnings++; } last_lineno = lineno; /* note that AMSB always puts a space after the line number in LIST */ if(printing) fprintf(outfile, "%d ", lineno); was_ff = 0; in_string = 0; in_comment = 0; /* walk and print the tokens. when we hit a null byte, we're done. */ while(1) { byte = read_prog_byte(); if(in_string) { if(byte == 0x00) { /* null byte ends both the string and the line of code. don't print a closing quote because AMSB doesn't. */ break; } else if(byte == '|') { /* pipe is how AMSB stores the closing quote. end the string but not the line of code, and print a " character. */ in_string = 0; if(printing) putc('"', outfile); } else { /* normal string character. */ if(printing) { putc(byte, outfile); /* one " character embedded in a string gets printed as "" */ if(byte == '"') putc(byte, outfile); } } } else if(in_comment) { /* null byte ends both the comment and the line of code. */ if(byte == 0x00) break; if(printing) putc(byte, outfile); } else if(byte == ':') { /* don't print the colon if the next token is a ! or ' for a comment */ unsigned char next = read_prog_byte(); if( !(next == TOK_SQUOTE || next == TOK_BANG) ) if(printing) putc(byte, outfile); unread_prog_byte(next); } else if(byte == '"') { /* strings start but *don't end* with a double-quote */ in_string = 1; if(printing) putc(byte, outfile); } else if(was_ff) { /* previous token was $ff, so this is a function token */ if(byte >= MIN_EXT_TOK && byte <= MAX_EXT_TOK) { if(printing) fputs(ext_tokens[byte - MIN_EXT_TOK], outfile); } else { if(printing) unknown_token(byte, 1); warnings++; } was_ff = 0; } else if(byte == 0xff) { /* next token will be a function token */ was_ff = 1; } else if(byte >= MIN_STD_TOK && byte <= MAX_STD_TOK) { /* statement token */ if(printing) fputs(std_tokens[byte - MIN_STD_TOK], outfile); if(byte == TOK_SQUOTE || byte == TOK_BANG || byte == TOK_REM) in_comment = 1; } else if(byte >= 0x80) { /* invalid token */ if(printing) unknown_token(byte, 0); warnings++; } else { /* null byte means the line of code is done */ if(!byte) break; if(byte < 0x20) { /* ATASCII graphics outside of a string */ fprintf(stderr, "%s: line %d has character %d outside of a string, " "maybe Atari BASIC?\n", self, lineno, byte); warnings++; } if(printing) putc(byte, outfile); } } len = bytes_read - offset; if(verbose) { fprintf(stderr, " line %d length: %d\n", lineno, len); } if(len > MAX_LINE_LEN) { int hard = len > MAX_LINE_LEN_HARD; fprintf(stderr, "%s: line %d is %s long (length %d > %d)\n", self, lineno, hard ? "impossibly" : "supiciously", len, hard ? MAX_LINE_LEN_HARD : MAX_LINE_LEN); } if(last_ptr != -1) { int plen = ptr - last_ptr; if(len != plen) { fprintf(stderr, "%s: line %d: EOL address doesn't match actual line length %d\n", self, lineno, len); warnings++; } } last_ptr = ptr; if(printing) putc(EOL, outfile); return 1; } /* when this gets called, input and output are open, read_header() has already run. "locking" and "unlocking" are the same transform, so this function does both. note that *no* checking of the program code is done here, so there's no need to finish() afterwards. */ void unlock_program(void) { int c; fprintf(stderr, "%s: program is %slocked, output will be %slocked\n", self, locked ? "" : "un", locked ? "un" : ""); /* 3-byte header: 0 for unlocked, 1 for locked */ fputc(!locked, outfile); /* LSB of program length (not encrypted) */ fputc(proglen & 0xff, outfile); /* MSB */ fputc((proglen >> 8) & 0xff, outfile); /* rest of file, including trailing nulls, is transformed */ while( (c = fgetc(infile)) >= 0) fputc(unlock_byte(c & 0xff), outfile); fclose(outfile); exit(0); } void print_help(void) { printf("%s v" VERSION " - detokenize Atari Microsoft BASIC files\n", self); puts("By B. Watson , released under the WTFPL"); printf("Usage: %s [[-l] | [-a] [-v] [-h] [-i] [-u] [-t] [-m] [-s] [-r *start,end*]] [file]\n", self); puts(" -a: raw ATASCII output"); puts(" -c: check only (no listing)"); puts(" -l: lock or unlock program"); puts(" -v: verbose"); puts(" -r: only list lines numbered from *start* to *end*"); puts(" --help, -h: print this help and exit"); puts(" --version: print version number and exit"); puts(" -i -u -t -m -s: passed to a8cat (try 'a8cat -h')"); puts("file must be a tokenized (SAVEd) AMSB file. if not given, reads from stdin."); } void version(void) { printf("%s " VERSION "\n", self); } void get_line_range(const char *arg) { int val = 0, comma = 0; const char *p = arg; while(*p) { if(*p >= '0' && *p <= '9') { val *= 10; val += *p - '0'; if(val > MAX_LINENO) { fprintf(stderr, "invalid line number for -r (range is 0-%d)\n", MAX_LINENO); exit(1); } } else if(*p == ',' || *p == '-') { if(comma) die("invalid argument for -r (too many commas)"); comma++; startline = val; val = 0; } else { if(comma) die("invalid argument for -r (only digits and comma allowed)"); } p++; } if(comma) endline = val ? val : MAX_LINENO; else startline = endline = val; if(endline < startline) die("invalid argument for -r (start > end)"); } void parse_args(int argc, char **argv) { char tmp[10]; int opt; if(argc >= 2) { if(strcmp(argv[1], "--help") == 0) { print_help(); exit(0); } else if(strcmp(argv[1], "--version") == 0) { version(); exit(0); } } while( (opt = getopt(argc, argv, "lr:cvaiutmsh")) != -1) { switch(opt) { case 'l': unlock_mode = 1; break; case 'c': check_only = 1; break; case 'a': raw_output = 1; break; case 'v': verbose = 1; break; case 'h': print_help(); exit(0); case 'r': get_line_range(optarg); break; case 'i': case 'u': case 't': case 'm': case 's': if(strlen(pipe_command) > (BUFSIZ - 10)) die("too many a8cat options"); sprintf(tmp, " -%c", opt); strcat(pipe_command, tmp); break; default: print_help(); exit(1); } } if(optind >= argc) { if(isatty(fileno(stdin))) { fprintf(stderr, "%s: can't read binary data from a terminal\n", self); print_help(); exit(1); } freopen(NULL, "rb", stdin); infile = stdin; } else { infile = fopen(argv[optind], "rb"); if(!infile) { fprintf(stderr, "%s: ", self); perror(argv[optind]); exit(1); } } } void open_output() { if(check_only) { outfile = freopen("/dev/null", "wb", stdout); if(!outfile) { fprintf(stderr, "%s: ", self); perror("/dev/null"); exit(1); } if(verbose) fprintf(stderr, "using /dev/null for output (check_only)\n"); } else if(raw_output || unlock_mode) { if(isatty(fileno(stdout))) { fprintf(stderr, "%s: refusing to write %s to a terminal\n", self, (unlock_mode ? "tokenized BASIC" : "raw ATASCII")); exit(1); } outfile = stdout; if(verbose) fprintf(stderr, "using stdout for output\n"); } else { if(verbose) fprintf(stderr, "using pipe for output: %s\n", pipe_command); outfile = popen(pipe_command, "w"); if(!outfile) { perror(pipe_command); exit(1); } need_pclose = 1; } } int main(int argc, char **argv) { set_self(argv[0]); parse_args(argc, argv); open_output(); read_header(); if(unlock_mode) { unlock_program(); exit(0); /* don't need finish() here, no parsing done */ } while(next_line()) linecount++; finish(0, NULL); return 0; /* never executes; shuts up gcc warning */ }