diff options
-rw-r--r-- | a8cat.1 | 36 | ||||
-rw-r--r-- | a8cat.c | 63 | ||||
-rw-r--r-- | a8cat.rst | 34 | ||||
-rw-r--r-- | atables.c | 131 | ||||
-rw-r--r-- | atables.h | 1 | ||||
-rw-r--r-- | mkatables.pl | 45 |
6 files changed, 295 insertions, 15 deletions
@@ -53,7 +53,13 @@ a8cat atari.txt > converted.txt .UNINDENT .UNINDENT .sp -The output is plain UTF\-8 Unicode, without BOM. +The output is plain UTF\-8 Unicode, without BOM. It will display correctly +in modern terminals that support Unicode and UTF\-8. If you get lots of +"empty rectangle" characters, it means your font lacks the glyphs for +the codepoints; try using the Deja Vu Sans and/or Symbola fonts. +.sp +It\(aqs even possible to edit the converted text and turn it back into +ATASCII, if you\(aqre very careful. .sp Inverse video (characters codes above \fB$80\fP) are translated using the ANSI/VT\-100 reverse video escape sequences. Exception: \fB$9B\fP @@ -74,12 +80,34 @@ option cannot be converted back to ATASCII with the \fB\-r\fP option. .B \-t Text mode. Normally, everything but EOL (\fB$9B\fP) is converted to a Unicode graphics character. In text mode, ATASCII tabs, backspace, -and bells are translated to the ASCII versions. +and bells are translated to the ASCII versions. Output created with this +option cannot be converted back to ATASCII with the \fB\-r\fP option. .TP .B \-r Reverse conversion: Input is UTF\-8, output is ATASCII (or XL ICS, with \fB\-i\fP). -Beware that printing ATASCII to a terminal may look funny, and may even confuse -the terminal. Redirecting to a file is safe. +Beware that printing ATASCII to a terminal may look funny, and may +even confuse the terminal. Redirecting to a file is safe; piping +to a pager usually is. Only the UTF\-8 codepoints that correspond to +ATASCII characters will be converted. If the \fB\-i\fP option was used to +create the input, it must be used with \fB\-r\fP also. +.TP +.B \-m +Magazine listing mode. Rather than Unicode graphics characters, +ATASCII characters are printed in symbolic form, e.g. \fI{clear}\fP or +\fI{ctrl\-A}\fP\&. The result is similar to type\-in listings in magazines +like Antic, Analog, or Compute! Output created with this option +cannot be converted back to ATASCII with the \fB\-r\fP option. +.TP +.B \-s +Strip the inverse video bit (bit 7) from all characters except the +EOL (\fB$9B\fP). Output created with this option can be converted +back to ATASCII with the \fB\-r\fP option, but of course there won\(aqt +be any inverse\-video characters in the result. +.UNINDENT +.INDENT 0.0 +.TP +.B \fB\-h\fP, \fB\-\-help\fP +Show built\-in help and exit. .UNINDENT .SH COPYRIGHT .sp @@ -15,6 +15,7 @@ const char *inverse_on = "\x1b[7m"; const char *inverse_off = "\x1b[0m"; int underline = 0, reverse = 0, textmode = 0, ics = 0; +int magazine = 0, stripinv = 0; void print_help(void) { printf("Usage: a8cat [-i] [-u] [file ...]\n"); @@ -106,9 +107,11 @@ int a8cat(const char *file) { continue; } + if(stripinv) c &= 0x7f; + if(textmode) { switch(c) { - case 0x09: /* Atari TAB is same as ASCII */ + case 0x7f: /* tab */ putchar('\t'); continue; case 0xfd: /* bell */ @@ -127,16 +130,43 @@ int a8cat(const char *file) { if(c & 0x80) { if(!inv) { inv = 1; - inverse(1); + if(magazine) + fputs("{inv}", stdout); + else + inverse(1); } } else { if(inv) { inv = 0; - inverse(0); + if(magazine) + fputs("{norm}", stdout); + else + inverse(0); } } } + if(magazine) { + /* special cases: control codes with bit 7 set can't go + in the table since it's only got 128 entries. */ + switch(c) { + case 0x9c: + fputs("{del-line}", stdout); continue; + case 0x9d: + fputs("{ins-line}", stdout); continue; + case 0x9e: + fputs("{clr-tab}", stdout); continue; + case 0x9f: + fputs("{set-tab}", stdout); continue; + case 0xfd: + fputs("{bell}", stdout); continue; + case 0xfe: + fputs("{del-char}", stdout); continue; + case 0xff: + fputs("{ins-char}", stdout); continue; + } + } + fputs(table[c & 0x7f], stdout); if(underline && (c & 0x80)) { @@ -147,7 +177,12 @@ int a8cat(const char *file) { /* gotta turn off inverse, so if there's another file after this one, it doesn't start out being printed in inverse. */ - if(inv && !underline) inverse(0); + if(inv && !underline) { + if(magazine) + fputs("{norm}", stdout); + else + inverse(0); + } fclose(input); return 0; @@ -156,20 +191,34 @@ int a8cat(const char *file) { int main(int argc, char **argv) { int opt, result = 0; - while( (opt = getopt(argc, argv, "ihurt")) != -1) { + if(argc > 1 && strcmp(argv[1], "--help") == 0) { + print_help(); + exit(0); + } + + while( (opt = getopt(argc, argv, "ihurtms")) != -1) { switch(opt) { case 'i': table = ics2utf; ics = 1; break; case 'h': print_help(); exit(0); break; case 'u': underline = 1; break; case 'r': reverse = 1; break; case 't': textmode = 1; break; + case 'm': table = ata2mag; magazine = 1; break; + case 's': stripinv = 1; break; default: print_help(); exit(1); break; } } if(reverse) { - if(underline || textmode) { - fprintf(stderr, "-t and -u options don't make sense with -r.\n"); + if(underline || textmode || stripinv || magazine) { + fprintf(stderr, "-t, -u, -m, -s options don't make sense with -r.\n"); + exit(1); + } + } + + if(magazine) { + if(ics || stripinv || underline) { + fprintf(stderr, "-i, -s, -u options don't make sense with -m.\n"); exit(1); } } @@ -26,7 +26,13 @@ goes to standard output; to write to a file, use a command like:: a8cat atari.txt > converted.txt -The output is plain UTF-8 Unicode, without BOM. +The output is plain UTF-8 Unicode, without BOM. It will display correctly +in modern terminals that support Unicode and UTF-8. If you get lots of +"empty rectangle" characters, it means your font lacks the glyphs for +the codepoints; try using the Deja Vu Sans and/or Symbola fonts. + +It's even possible to edit the converted text and turn it back into +ATASCII, if you're very careful. Inverse video (characters codes above **$80**) are translated using the ANSI/VT-100 reverse video escape sequences. Exception: **$9B** @@ -48,11 +54,31 @@ OPTIONS -t Text mode. Normally, everything but EOL (**$9B**) is converted to a Unicode graphics character. In text mode, ATASCII tabs, backspace, - and bells are translated to the ASCII versions. + and bells are translated to the ASCII versions. Output created with this + option cannot be converted back to ATASCII with the **-r** option. -r Reverse conversion: Input is UTF-8, output is ATASCII (or XL ICS, with **-i**). - Beware that printing ATASCII to a terminal may look funny, and may even confuse - the terminal. Redirecting to a file is safe. + Beware that printing ATASCII to a terminal may look funny, and may + even confuse the terminal. Redirecting to a file is safe; piping + to a pager usually is. Only the UTF-8 codepoints that correspond to + ATASCII characters will be converted. If the **-i** option was used to + create the input, it must be used with **-r** also. + +-m + Magazine listing mode. Rather than Unicode graphics characters, + ATASCII characters are printed in symbolic form, e.g. *{clear}* or + *{ctrl-A}*. The result is similar to type-in listings in magazines + like Antic, Analog, or Compute! Output created with this option + cannot be converted back to ATASCII with the **-r** option. + +-s + Strip the inverse video bit (bit 7) from all characters except the + EOL (**$9B**). Output created with this option can be converted + back to ATASCII with the **-r** option, but of course there won't + be any inverse-video characters in the result. + +**-h**, **--help** + Show built-in help and exit. .. include:: manftr.rst @@ -263,3 +263,134 @@ const char *ics2utf[] = { "▶", /* 127 $7f [del] */ }; +const char *ata2mag[] = { + "{ctrl-,}", /* 0 $00 ^@ */ + "{ctrl-A}", /* 1 $01 ^A */ + "{ctrl-B}", /* 2 $02 ^B */ + "{ctrl-C}", /* 3 $03 ^C */ + "{ctrl-D}", /* 4 $04 ^D */ + "{ctrl-E}", /* 5 $05 ^E */ + "{ctrl-F}", /* 6 $06 ^F */ + "{ctrl-G}", /* 7 $07 ^G */ + "{ctrl-H}", /* 8 $08 ^H */ + "{ctrl-I}", /* 9 $09 ^I */ + "{ctrl-J}", /* 10 $0a ^J */ + "{ctrl-K}", /* 11 $0b ^K */ + "{ctrl-L}", /* 12 $0c ^L */ + "{ctrl-M}", /* 13 $0d ^M */ + "{ctrl-N}", /* 14 $0e ^N */ + "{ctrl-O}", /* 15 $0f ^O */ + "{ctrl-P}", /* 16 $10 ^P */ + "{ctrl-Q}", /* 17 $11 ^Q */ + "{ctrl-R}", /* 18 $12 ^R */ + "{ctrl-S}", /* 19 $13 ^S */ + "{ctrl-T}", /* 20 $14 ^T */ + "{ctrl-U}", /* 21 $15 ^U */ + "{ctrl-V}", /* 22 $16 ^V */ + "{ctrl-W}", /* 23 $17 ^W */ + "{ctrl-X}", /* 24 $18 ^X */ + "{ctrl-Y}", /* 25 $19 ^Y */ + "{ctrl-Z}", /* 26 $1a ^Z */ + "{esc}", /* 27 $1b ^[ */ + "{up}", /* 28 $1c ^\ */ + "{down}", /* 29 $1d ^] */ + "{left}", /* 30 $1e ^^ */ + "{right}", /* 31 $1f ^_ */ + " ", /* 32 $20 */ + "!", /* 33 $21 ! */ + "\"", /* 34 $22 " */ + "#", /* 35 $23 # */ + "$", /* 36 $24 $ */ + "%", /* 37 $25 % */ + "&", /* 38 $26 & */ + "'", /* 39 $27 ' */ + "(", /* 40 $28 ( */ + ")", /* 41 $29 ) */ + "*", /* 42 $2a * */ + "+", /* 43 $2b + */ + ",", /* 44 $2c , */ + "-", /* 45 $2d - */ + ".", /* 46 $2e . */ + "/", /* 47 $2f / */ + "0", /* 48 $30 0 */ + "1", /* 49 $31 1 */ + "2", /* 50 $32 2 */ + "3", /* 51 $33 3 */ + "4", /* 52 $34 4 */ + "5", /* 53 $35 5 */ + "6", /* 54 $36 6 */ + "7", /* 55 $37 7 */ + "8", /* 56 $38 8 */ + "9", /* 57 $39 9 */ + ":", /* 58 $3a : */ + ";", /* 59 $3b ; */ + "<", /* 60 $3c < */ + "=", /* 61 $3d = */ + ">", /* 62 $3e > */ + "?", /* 63 $3f ? */ + "@", /* 64 $40 @ */ + "A", /* 65 $41 A */ + "B", /* 66 $42 B */ + "C", /* 67 $43 C */ + "D", /* 68 $44 D */ + "E", /* 69 $45 E */ + "F", /* 70 $46 F */ + "G", /* 71 $47 G */ + "H", /* 72 $48 H */ + "I", /* 73 $49 I */ + "J", /* 74 $4a J */ + "K", /* 75 $4b K */ + "L", /* 76 $4c L */ + "M", /* 77 $4d M */ + "N", /* 78 $4e N */ + "O", /* 79 $4f O */ + "P", /* 80 $50 P */ + "Q", /* 81 $51 Q */ + "R", /* 82 $52 R */ + "S", /* 83 $53 S */ + "T", /* 84 $54 T */ + "U", /* 85 $55 U */ + "V", /* 86 $56 V */ + "W", /* 87 $57 W */ + "X", /* 88 $58 X */ + "Y", /* 89 $59 Y */ + "Z", /* 90 $5a Z */ + "[", /* 91 $5b [ */ + "\\", /* 92 $5c \ */ + "]", /* 93 $5d ] */ + "^", /* 94 $5e ^ */ + "_", /* 95 $5f _ */ + "{ctrl-.}", /* 96 $60 ` */ + "a", /* 97 $61 a */ + "b", /* 98 $62 b */ + "c", /* 99 $63 c */ + "d", /* 100 $64 d */ + "e", /* 101 $65 e */ + "f", /* 102 $66 f */ + "g", /* 103 $67 g */ + "h", /* 104 $68 h */ + "i", /* 105 $69 i */ + "j", /* 106 $6a j */ + "k", /* 107 $6b k */ + "l", /* 108 $6c l */ + "m", /* 109 $6d m */ + "n", /* 110 $6e n */ + "o", /* 111 $6f o */ + "p", /* 112 $70 p */ + "q", /* 113 $71 q */ + "r", /* 114 $72 r */ + "s", /* 115 $73 s */ + "t", /* 116 $74 t */ + "u", /* 117 $75 u */ + "v", /* 118 $76 v */ + "w", /* 119 $77 w */ + "x", /* 120 $78 x */ + "y", /* 121 $79 y */ + "z", /* 122 $7a z */ + "{ctrl-;}", /* 123 $7b { */ + "|", /* 124 $7c | */ + "{clear}", /* 125 $7d } */ + "{bksp}", /* 126 $7e ~ */ + "{tab}", /* 127 $7f [del] */ +}; + @@ -1,2 +1,3 @@ extern const char *ata2utf[]; extern const char *ics2utf[]; +extern const char *ata2mag[]; diff --git a/mkatables.pl b/mkatables.pl index 1eb3a08..f1a0865 100644 --- a/mkatables.pl +++ b/mkatables.pl @@ -84,6 +84,50 @@ 127 => "▶", ); +%magazine = ( + 0 => "{ctrl-,}", + 1 => "{ctrl-A}", + 2 => "{ctrl-B}", + 3 => "{ctrl-C}", + 4 => "{ctrl-D}", + 5 => "{ctrl-E}", + 6 => "{ctrl-F}", + 7 => "{ctrl-G}", + 8 => "{ctrl-H}", + 9 => "{ctrl-I}", + 10 => "{ctrl-J}", + 11 => "{ctrl-K}", + 12 => "{ctrl-L}", + 13 => "{ctrl-M}", + 14 => "{ctrl-N}", + 15 => "{ctrl-O}", + 16 => "{ctrl-P}", + 17 => "{ctrl-Q}", + 18 => "{ctrl-R}", + 19 => "{ctrl-S}", + 20 => "{ctrl-T}", + 21 => "{ctrl-U}", + 22 => "{ctrl-V}", + 23 => "{ctrl-W}", + 24 => "{ctrl-X}", + 25 => "{ctrl-Y}", + 26 => "{ctrl-Z}", + 27 => "{esc}", + 28 => "{up}", + 29 => "{down}", + 30 => "{left}", + 31 => "{right}", + 34 => "\\\"", + 92 => "\\\\", + 96 => "{ctrl-.}", + 123 => "{ctrl-;}", + 125 => "{clear}", + 126 => "{bksp}", + 127 => "{tab}", + # the remaining control characters (with high bit set) + # are special-cased in a8cat.c +); + sub getcharname { my $c = shift; if($c == 127) { @@ -114,3 +158,4 @@ EOF mktable("ata2utf", \%atascii); mktable("ics2utf", \%xl); +mktable("ata2mag", \%magazine); |