From e2ba8458a5cfdfacfaf103e7ba97d610afa6c970 Mon Sep 17 00:00:00 2001 From: "B. Watson" Date: Mon, 29 Aug 2022 16:11:13 -0400 Subject: initial commit --- a8eol.c | 567 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 567 insertions(+) create mode 100644 a8eol.c (limited to 'a8eol.c') diff --git a/a8eol.c b/a8eol.c new file mode 100644 index 0000000..6d6b779 --- /dev/null +++ b/a8eol.c @@ -0,0 +1,567 @@ +#include +#include +#include +#include +#include + +#ifndef VERSION +#define VERSION "???" +#endif + +#define SELF "a8eol" + +#define BANNER \ + "a8eol v"VERSION" by B. Watson (WTFPL)\n" + +#define USAGE \ + BANNER \ + "Converts between Atari 8-bit and UNIX / DOS / Mac Classic text file format\n\n" \ + "Usage: a8eol -[admu8ncpsxih] [infile] [outfile]\n\n" \ + "File type options:\n" \ + " -a Input is UNIX, DOS, or MacOS < 10 text; convert to Atari (EOL=$9B)\n" \ + " -d Input is Atari text; convert to DOS (EOL=$0A,$0D)\n" \ + " -m Input is Atari text; convert to MacOS < 10 (EOL=$0D)\n" \ + " -u Input is Atari text; convert to UNIX (EOL=$0A)\n" \ + "With none of the above: input type is auto-detected; output type\n" \ + "is UNIX if input is Atari, or Atari if input is UNIX/DOS/Mac\n\n" \ + "Translation options:\n" \ + " -n Translate EOL characters only; pass anything else as-is\n" \ + " -c Replace non-printing characters with ^x or {x} (turns on -8, too)\n" \ + " -p Replace non-printing characters with '.'\n" \ + " -s Remove non-printing characters\n" \ + " -x Replace non-printing characters with \\x[hex]\n" \ + "With none of the above: EOL, tab, and backspace characters are\n" \ + "translated; everything else is passed through as-is.\n\n" \ + "Other options:\n" \ + " -8 8-bit ASCII/ATASCII mode: Do not strip bit 7 (inverse video).\n" \ + " -i 'In-place' conversion. Original file renamed to infile~\n" \ + " -q Quiet operation. Error messages will still be printed.\n" \ + " -v Verbose operation. Prints extra info about what a8eol is doing.\n" \ + " -h Print this help message\n\n" \ + "Leave infile blank or use '-' to read from standard input.\n" \ + "Leave outfile blank or use '-' to write to standard output.\n" + +#define OPTIONS "admu8ncpsxiqvh" + +#define FT_AUTO 0 +#define FT_ATARI 1 +#define FT_UNIX 2 +#define FT_DOS 3 /* input_type never gets set to this! */ +#define FT_MAC9 4 /* input_type never gets set to this! */ + +#define TT_NONE 0 +#define TT_CARET 1 +#define TT_DOT 2 +#define TT_HEX 3 +#define TT_STRIP 4 +#define TT_TABS 5 + +int input_type = FT_AUTO; /* FT_UNIX works for UNIX/DOS/Mac */ +int output_type = FT_AUTO; /* DOS/Mac need to be FT_DOS or FT_MAC9 */ +int trans_type = TT_TABS; +int keep_bit_7 = 0; +int in_place = 0; +int verbose = 1; +/* TODO: track bytes/lines read/written, print if verbose > 1 */ + +static int inverse = 0; +static char buf[50]; +static char *dot = "."; +static char *inv = "{inv}"; +static char *norm = "{norm}"; +static char *empty = ""; +static char *crlf = "\r\n"; +static char *cr = "\r"; +static char *lf = "\n"; +static char eol[2] = { 0x9b, '\0' }; + +/* FIXME: ata2asc() and asc2ata() are crap code. */ + +char *ata2asc(int c) { + char *modifier = empty; + static char result[50]; + int affects_inv = 1; + char c7 = c & 0x7f; + + if(c == 0x9b) { + switch(output_type) { + case FT_DOS: + return crlf; + + case FT_MAC9: + return cr; + + case FT_UNIX: + default: + return lf; + } + } + + if(!keep_bit_7) + c &= 0x7f; + + if(trans_type != TT_CARET && (c == '|' || (c >= 32 && c <= 122))) { + buf[0] = c; + buf[1] = '\0'; + return buf; + } + + if(trans_type == TT_DOT) { + return dot; + } else if(trans_type == TT_STRIP) { + return empty; + } else if(trans_type == TT_HEX) { + sprintf(buf, "\\x%02X", c); + return buf; + } else if(trans_type == TT_TABS) { + if(c == 127) { + buf[0] = '\t'; + buf[1] = '\0'; + return buf; + } else if(c == 126) { + buf[0] = '\b'; + buf[1] = '\0'; + return buf; + } else { + buf[0] = c; + buf[1] = '\0'; + return buf; + } + } + + if(c7 == '|' || (c7 >= 32 && c7 <= 122 && c7 != 96)) { + buf[0] = c7; + buf[1] = '\0'; + } else if(c7 == 0) { + sprintf(buf, "{ctrl-,}"); + } else if(c == 27) { + sprintf(buf, "{esc}"); + affects_inv = 0; + } else if(c == 28) { + sprintf(buf, "{up}"); + affects_inv = 0; + } else if(c == 29) { + sprintf(buf, "{down}"); + affects_inv = 0; + } else if(c == 30) { + sprintf(buf, "{left}"); + affects_inv = 0; + } else if(c == 31) { + sprintf(buf, "{right}"); + affects_inv = 0; + } else if(c7 == 96) { + sprintf(buf, "{ctrl-.}"); + } else if(c7 == 123) { + sprintf(buf, "{ctrl-;}"); + } else if(c == 125) { + sprintf(buf, "{clear}"); + affects_inv = 0; + } else if(c == 126) { + sprintf(buf, "{bksp}"); + affects_inv = 0; + } else if(c == 127) { + sprintf(buf, "{tab}"); + affects_inv = 0; + } else if(c == 156) { + sprintf(buf, "{del-line}"); + affects_inv = 0; + } else if(c == 157) { + sprintf(buf, "{ins-line}"); + affects_inv = 0; + } else if(c == 158) { + sprintf(buf, "{clr-tab}"); + affects_inv = 0; + } else if(c == 159) { + sprintf(buf, "{set-tab}"); + affects_inv = 0; + } else if(c == 253) { + sprintf(buf, "{bell}"); + affects_inv = 0; + } else if(c == 254) { + sprintf(buf, "{del-char}"); + affects_inv = 0; + } else if(c == 255) { + sprintf(buf, "{ins-char}"); + affects_inv = 0; + } else if(c7 < 32) { + sprintf(buf, "{ctrl-%c}", c7+64); + } + + if(affects_inv) { + if(c >= 128) { + if(!inverse) + modifier = inv; + + inverse = 1; + } else { + if(inverse) + modifier = norm; + + inverse = 0; + } + } + + + sprintf(result, "%s%s", modifier, buf); + return result; +} + +char *asc2ata(int c) { + if(c == '\n') { + return eol; + } + + if(!keep_bit_7) + c &= 0x7f; + + buf[0] = buf[1] = '\0'; + + if(trans_type == TT_NONE || c == '|' || (c >= 32 && c <= 122)) { + buf[0] = c; + return buf; + } + + if(trans_type == TT_DOT) { + return dot; + } else if(trans_type == TT_STRIP) { + return empty; + } else if(trans_type == TT_HEX) { + sprintf(buf, "\\x%02X", c); + return buf; + } + + /* TT_CARET and TT_TABS both translate tabs */ + if(c == '\t') { + buf[0] = 127; + return buf; + } else if(c == '\b') { + buf[0] = 126; + return buf; + } + + if(trans_type == TT_TABS) { + buf[0] = c; + return buf; + } + + /* handle TT_CARET */ + buf[0] = '^'; + buf[1] = '?'; + buf[2] = '\0'; + + if(c < 32) { + buf[1] = c + 64; + return buf; + } + + return buf; +} + +int main(int argc, char **argv) { + int c; + char *rename_to = NULL; + char *infile = NULL, *outfile = NULL; + FILE *in = NULL, *out = NULL; + int last = -1; + + /*** Parse args */ + while( (c = getopt(argc, argv, OPTIONS)) != -1 ) { + switch(c) { + case 'a': + input_type = FT_UNIX; + output_type = FT_ATARI; + break; + + case 'd': + input_type = FT_ATARI; + output_type = FT_DOS; + break; + + case 'm': + input_type = FT_ATARI; + output_type = FT_MAC9; + break; + + case 'u': + input_type = FT_ATARI; + output_type = FT_UNIX; + break; + + case '8': + keep_bit_7 = 1; + break; + + case 'n': + trans_type = TT_NONE; + break; + + case 'c': + trans_type = TT_CARET; + keep_bit_7 = 1; + break; + + case 'p': + trans_type = TT_DOT; + break; + + case 's': + trans_type = TT_STRIP; + break; + + case 'x': + trans_type = TT_HEX; + break; + + case 'i': + in_place = 1; + break; + + case 'q': + verbose = 0; + break; + + case 'v': + verbose++; + break; + + case 'h': + default: + printf(USAGE); + exit(1); + } + } + + /*** Get input filename, open input if not stdin */ + if(optind < argc) { + infile = argv[optind]; + if(strcmp(infile, "-") == 0) { + in = stdin; + } else if( !(in = fopen(infile, "rb")) ) { + fprintf(stderr, SELF ": (fatal) %s: %s\n", infile, strerror(errno)); + exit(1); + } + optind++; + } else { + in = stdin; + infile = "-"; + } + + if(in_place) { + /*** Setup in-place editing */ + int len; + + if(in == stdin) { + fprintf(stderr, + SELF ": (fatal) Can't do in-place edit of standard input. " + "Run '" SELF " -h' for help.\n"); + exit(1); + } + + /* Build backup filename */ + len = strlen(infile); + rename_to = (char *)malloc(len + 2); + if(!rename_to) { + fprintf(stderr, SELF ": (fatal) Out of memory\n"); + fclose(in); + exit(1); + } + + snprintf(rename_to, len + 2, "%s~", infile); + unlink(rename_to); + + /* Rename (link) input (it's already open, no problem) */ + if(link(infile, rename_to)) { + fprintf(stderr, SELF ": (fatal) can't create %s: %s\n", + rename_to, strerror(errno)); + fclose(in); + exit(1); + } + + if(verbose) + fprintf(stderr, SELF ": backed up '%s' as '%s'\n", infile, rename_to); + + unlink(infile); + + outfile = infile; + infile = rename_to; + } else if(optind < argc) { + /*** Get output filename */ + outfile = argv[optind]; + if(strcmp(outfile, "-") == 0) + out = stdout; + } else { + /*** No output filename, will write to stdout */ + out = stdout; + outfile = "-"; + } + + /*** Open output file, if not stdout */ + /* FIXME: if we *are* reading from stdin or writing to stdout on + DOS or Windows, how do we set binary mode on stdin/stdout? */ + if( out != stdout && !(out = fopen(outfile, "wb")) ) { + fprintf(stderr, SELF ": (fatal) %s: %s\n", outfile, strerror(errno)); + fclose(in); + exit(1); + } + + /*** Try not to confuse the newbie users, if we're reading from their + console (they may be expecting a help message) */ + if(verbose && in == stdin && isatty(fileno(in))) + fprintf(stderr, + SELF ": reading from standard input (run '" + SELF " -h' for help)...\n"); + + if(verbose > 1) { + /*** If requested, show the user what's about to happen */ + if(in_place) + fprintf(stderr, SELF ": Using in-place editing mode.\n"); + + fprintf(stderr, SELF ": Input file: '%s', type ", infile); + switch(input_type) { + case FT_AUTO: + fprintf(stderr, "will be auto-detected.\n"); + break; + + case FT_ATARI: + fprintf(stderr, "set to Atari.\n"); + break; + + case FT_UNIX: + fprintf(stderr, "set to UNIX/DOS/Mac\n"); + break; + } + + fprintf(stderr, SELF ": Output file: '%s', type ", outfile); + switch(output_type) { + case FT_AUTO: + fprintf(stderr, "will be auto-detected.\n"); + break; + + case FT_ATARI: + fprintf(stderr, "set to Atari.\n"); + break; + + case FT_UNIX: + fprintf(stderr, "set to UNIX.\n"); + break; + + case FT_DOS: + fprintf(stderr, "set to DOS.\n"); + break; + + case FT_MAC9: + fprintf(stderr, "set to Mac Classic.\n"); + break; + } + + fprintf(stderr, SELF ": Non-printable characters "); + switch(trans_type) { + case TT_NONE: + fprintf(stderr, "(incl. tabs/backspaces) will be passed as-is.\n"); + break; + + case TT_TABS: + fprintf(stderr, "will be passed as-is (tabs/backspaces will be translated).\n"); + break; + + case TT_CARET: + fprintf(stderr, "will be printed as ^x or {x}.\n"); + break; + + case TT_DOT: + fprintf(stderr, "will be printed as dots.\n"); + break; + + case TT_HEX: + fprintf(stderr, "will be printed as hex escapes.\n"); + break; + + case TT_STRIP: + fprintf(stderr, "will be stripped.\n"); + break; + } + + fprintf(stderr, SELF ": Bit 7 (inverse video) will be %s.\n", + (keep_bit_7 ? "passed as-is" : "stripped")); + } + + /*** Read input, process, write; lather, rinse, repeat */ + while(!feof(in)) { + int rew = 0; + c = getc(in); + if(c < 0) break; + + switch(input_type) { + /* Auto-detection works by reading the input until we find + an Atari EOL or a UNIX/DOS/Mac \n or \r, then rewinding + the stream. Will fail if reading from a pipe. */ + case FT_AUTO: + if(c == 0x9b) { + input_type = FT_ATARI; + output_type = FT_UNIX; + if(verbose) + fprintf(stderr, SELF ": input looks like an Atari file\n"); + rew++; + } else if(c == '\n' || c == '\r') { + input_type = FT_UNIX; + output_type = FT_ATARI; + if(verbose) + fprintf(stderr, SELF ": input looks like a UNIX/DOS/Mac file\n"); + rew++; + } + + /* rewind if possible */ + if(rew) { + if(fseek(in, 0L, SEEK_SET)) { + fprintf(stderr, + SELF ": (fatal) Can't seek in input: %s\n" + "Try again without type auto-detection.\n", + strerror(errno)); + fclose(in); + fclose(out); + exit(1); + } + continue; + } + break; + + case FT_ATARI: + fputs(ata2asc(c), out); + continue; + break; + + case FT_UNIX: + if(last == '\r' && c != '\n') { + /* Must be a Mac Classic text file... */ + putc(0x9b, out); + } else if(c == '\r') { + /* Swallow CR's */ + last = c; + continue; + } + + last = c; + fputs(asc2ata(c), out); + break; + } + } + + /* If the last CR was swallowed, spit it back out */ + if(input_type == FT_UNIX && last == '\r') + putc(0x9b, out); + + /*** All done, clean up. */ + fclose(in); + fclose(out); + + if(rename_to) + free(rename_to); + + if(input_type == FT_AUTO) { + fprintf(stderr, + SELF ": (fatal) Input didn't contain any EOL/CR/LF characters!\n"); + exit(1); + } + + return 0; +} -- cgit v1.2.3