From 017a503f34a0f8ce2245f1b5dd78894f65326a90 Mon Sep 17 00:00:00 2001 From: "B. Watson" Date: Sat, 29 Jun 2024 04:49:12 -0400 Subject: a8cat: added (will eventually replace both a8utf8 and a8eol). --- Makefile | 13 ++- a8cat.1 | 124 ++++++++++++++++++++++++++++ a8cat.c | 187 +++++++++++++++++++++++++++++++++++++++++ a8cat.rst | 58 +++++++++++++ atables.c | 265 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ atables.h | 2 + mkatables.pl | 116 ++++++++++++++++++++++++++ wtable.c | 140 +++++++++++++++++++++++++++++++ wtable.h | 2 + 9 files changed, 905 insertions(+), 2 deletions(-) create mode 100644 a8cat.1 create mode 100644 a8cat.c create mode 100644 a8cat.rst create mode 100644 atables.c create mode 100644 atables.h create mode 100644 mkatables.pl create mode 100644 wtable.c create mode 100644 wtable.h diff --git a/Makefile b/Makefile index 6b54cb9..4afaa47 100644 --- a/Makefile +++ b/Makefile @@ -16,9 +16,9 @@ CC=gcc CFLAGS=-Wall $(COPT) -ansi -D_GNU_SOURCE -DVERSION=\"$(VERSION)\" # BINS and SCRIPTS go in $BINDIR, DOCS go in $DOCDIR -BINS=a8eol atr2xfd atrsize axe blob2c blob2xex cart2xex cxrefbas dumpbas fenders protbas renumbas rom2cart unmac65 unprotbas vxrefbas xex1to2 xexamine xexcat xexsplit xfd2atr listbas +BINS=a8eol atr2xfd atrsize axe blob2c blob2xex cart2xex cxrefbas dumpbas fenders protbas renumbas rom2cart unmac65 unprotbas vxrefbas xex1to2 xexamine xexcat xexsplit xfd2atr listbas a8cat SCRIPTS=dasm2atasm a8utf8 -MANS=a8eol.1 xfd2atr.1 atr2xfd.1 blob2c.1 cart2xex.1 fenders.1 xexsplit.1 xexcat.1 atrsize.1 rom2cart.1 unmac65.1 axe.1 dasm2atasm.1 a8utf8.1 blob2xex.1 xexamine.1 xex1to2.1 unprotbas.1 protbas.1 renumbas.1 dumpbas.1 vxrefbas.1 cxrefbas.1 listbas.1 +MANS=a8eol.1 xfd2atr.1 atr2xfd.1 blob2c.1 cart2xex.1 fenders.1 xexsplit.1 xexcat.1 atrsize.1 rom2cart.1 unmac65.1 axe.1 dasm2atasm.1 a8utf8.1 blob2xex.1 xexamine.1 xex1to2.1 unprotbas.1 protbas.1 renumbas.1 dumpbas.1 vxrefbas.1 cxrefbas.1 listbas.1 a8cat.1 MAN5S=xex.5 MAN7S=atascii.7 DOCS=README.txt equates.inc *.dasm LICENSE ksiders/atr.txt @@ -66,6 +66,12 @@ listbas: listbas.c bas.o bcdfp.o tokens.o bas.o: bas.c bas.h +wtable.o: wtable.c wtable.h + +atables.o: atables.c atables.h + +a8cat: a8cat.c atables.o wtable.o + subdirs: for dir in $(SUBDIRS); do make -C $$dir COPT="$(COPT)"; done @@ -75,6 +81,9 @@ xfd2atr: xfd2atr.c atr2xfd: atr2xfd.c +atables.c: mkatables.pl + perl mkatables.pl > atables.c + # note to cross-compiler users: If you're building the *.bin targets, # blob2c needs to be executable on the build host. It'd also be nice # to build a blob2c for the target platform... Probably you can do diff --git a/a8cat.1 b/a8cat.1 new file mode 100644 index 0000000..d815a9c --- /dev/null +++ b/a8cat.1 @@ -0,0 +1,124 @@ +.\" Man page generated from reStructuredText. +. +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.TH "A8CAT" 1 "2024-06-29" "0.2.1" "Urchlay's Atari 8-bit Tools" +.SH NAME +a8cat \- Convert Atari 8-bit text to UTF-8 encoded Unicode. +.SH SYNOPSIS +.sp +\fIa8cat\fP [\fB\-r\fP] [\fB\-i\fP] [\fB\-u\fP] [\fB\-t\fP] [\fIinfile\fP] [\fIinfile ...\fP] +.SH DESCRIPTION +.sp +Convert Atari 8\-bit ATASCII or XL ICS (International Character +Set) text to UTF\-8 encoded Unicode. Control graphics characters are +replaced with their nearest Unicode equivalents (mostly from the Box +Drawing block, or from the Basic Latin block with \fB\-i\fP option). +.sp +If no \fIinfile\fPs are given, input is read from standard input. Output always +goes to standard output; to write to a file, use a command like: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +a8cat atari.txt > converted.txt +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +The output is plain UTF\-8 Unicode, without BOM. +.sp +Inverse video (characters codes above \fB$80\fP) are translated using +the ANSI/VT\-100 reverse video escape sequences. Exception: \fB$9B\fP +(Atari EOL) is translated to \fB\en\fP (newline). +.SH OPTIONS +.INDENT 0.0 +.TP +.B \-i +Input uses Atari XL/XE International Character Set encoding, rather than +ATASCII graphics. +.TP +.B \-u +Use "underlining" for inverse video. Each inverse character is followed by +a backspace, then a \fI_\fP character. When viewed in a pager such as \fBless\fP(1), +this causes the characters to appear underlined. Output created with this +option cannot be converted back to ATASCII with the \fB\-r\fP option. +.TP +.B \-t +Text mode. Normally, everything but EOL (\fB$9B\fP) is converted to a +Unicode graphics character. In text mode, ATASCII tabs, backspace, +and bells are translated to the ASCII versions. +.TP +.B \-r +Reverse conversion: Input is UTF\-8, output is ATASCII (or XL ICS, with \fB\-i\fP). +Beware that printing ATASCII to a terminal may look funny, and may even confuse +the terminal. Redirecting to a file is safe. +.UNINDENT +.SH COPYRIGHT +.sp +WTFPL. See \fI\%http://www.wtfpl.net/txt/copying/\fP for details. +.SH AUTHOR +.INDENT 0.0 +.IP B. 3 +Watson <\fI\%urchlay@slackware.uk\fP>; Urchlay on irc.libera.chat \fI##atari\fP\&. +.UNINDENT +.SH SEE ALSO +.sp +\fBa8eol\fP(1), +\fBa8utf8\fP(1), +\fBatr2xfd\fP(1), +\fBatrsize\fP(1), +\fBaxe\fP(1), +\fBblob2c\fP(1), +\fBblob2xex\fP(1), +\fBcart2xex\fP(1), +\fBcxrefbas\fP(1), +\fBdasm2atasm\fP(1), +\fBdumpbas\fP(1), +\fBf2toxex\fP(1), +\fBfenders\fP(1), +\fBlistbas\fP(1), +\fBprotbas\fP(1), +\fBrenumbas\fP(1), +\fBrom2cart\fP(1), +\fBunmac65\fP(1), +\fBunprotbas\fP(1), +\fBvxrefbas\fP(1), +\fBxexamine\fP(1), +\fBxexcat\fP(1), +\fBxexsplit\fP(1), +\fBxfd2atr\fP(1), +\fBxex\fP(5), +\fBatascii\fP(7). +.sp +Any good Atari 8\-bit book: \fIDe Re Atari\fP, \fIThe Atari BASIC Reference +Manual\fP, the \fIOS Users\(aq Guide\fP, \fIMapping the Atari\fP, etc. +.\" Generated by docutils manpage writer. +. diff --git a/a8cat.c b/a8cat.c new file mode 100644 index 0000000..752e96f --- /dev/null +++ b/a8cat.c @@ -0,0 +1,187 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "atables.h" +#include "wtable.h" + +const char **table = ata2utf; + +const char *inverse_on = "\x1b[7m"; +const char *inverse_off = "\x1b[0m"; + +int underline = 0, reverse = 0, textmode = 0, ics = 0; + +void print_help(void) { + printf("Usage: a8cat [-i] [-u] [file ...]\n"); +} + +FILE *open_input(const char *file) { + FILE *input; + + if(file[0] == '-' && file[1] == 0) { + if(freopen(NULL, "rb", stdin)) { + input = stdin; + } else { + perror("(standard input)"); + return NULL; + } + } else if(!(input = fopen(file, "rb"))) { + perror(file); + return NULL; + } + + return input; +} + +int handle_escape_seq(int inv, FILE *input) { + int count, c; + char buf[5] = { 0x1b, 0, 0, 0, 0 }; + + for(count = 1; count < 4; count++) { + c = fgetwc(input); + if(c == WEOF) break; + buf[count] = c; /* FIXME: might be a wide char! */ + } + + if(strcmp(inverse_on, buf) == 0) { + return 0x80; + } else if(strcmp(inverse_off, buf) == 0) { + return 0; + } else { + fputs(buf, stdout); + return inv; + } +} + +int a8revcat(const char *file) { + FILE *input; + int c, d, inv = 0; + + if( !(input = open_input(file)) ) + return 1; + + setlocale(LC_CTYPE, "en_US.UTF-8"); + while( (c = fgetwc(input)) != WEOF ) { + if(c == 0x1b) { + inv = handle_escape_seq(inv, input); + } else if(c == '\n') { + putchar(0x9b); + } else if(c < 0x80) { + putchar(c | inv); + } else { + d = wchar2atascii(c, ics); + if(d == -1) { + fprintf(stderr, "warning: unrecognized Unicode character %04x\n", c); + } else { + putchar(d | inv); + } + } + } + + return 0; +} + +/* XXX: hard-coded ANSI/vt100 escape sequences. would be + better but more complex to use terminfo to support any ol' + terminal... */ +void inverse(int onoff) { + fputs((onoff ? inverse_on : inverse_off ), stdout); +} + +int a8cat(const char *file) { + FILE *input; + int c, inv = 0; + + if( !(input = open_input(file)) ) + return 1; + + while( (c = fgetc(input)) != EOF ) { + if(c == 0x9b) { + putchar('\n'); + continue; + } + + if(textmode) { + switch(c) { + case 0x09: /* Atari TAB is same as ASCII */ + putchar('\t'); + continue; + case 0xfd: /* bell */ + putchar('\a'); + continue; + case 0x7e: /* backspace */ + putchar('\b'); + continue; + default: break; + } + } + + if(!underline) { + /* strings of inverse chars only get one "inverse on" ANSI + sequence, and one "inverse off" afterwards. */ + if(c & 0x80) { + if(!inv) { + inv = 1; + inverse(1); + } + } else { + if(inv) { + inv = 0; + inverse(0); + } + } + } + + fputs(table[c & 0x7f], stdout); + + if(underline && (c & 0x80)) { + putchar('\b'); + putchar('_'); + } + } + + /* gotta turn off inverse, so if there's another file after this one, + it doesn't start out being printed in inverse. */ + if(inv && !underline) inverse(0); + + fclose(input); + return 0; +} + +int main(int argc, char **argv) { + int opt, result = 0; + + while( (opt = getopt(argc, argv, "ihurt")) != -1) { + switch(opt) { + case 'i': table = ics2utf; ics = 1; break; + case 'h': print_help(); exit(0); break; + case 'u': underline = 1; break; + case 'r': reverse = 1; break; + case 't': textmode = 1; break; + default: print_help(); exit(1); break; + } + } + + if(reverse) { + if(underline || textmode) { + fprintf(stderr, "-t and -u options don't make sense with -r.\n"); + exit(1); + } + } + + if(optind >= argc) { + result = (reverse ? a8revcat("-") : a8cat("-")); + } else { + while(optind < argc) { + result += (reverse ? a8revcat(argv[optind]) : a8cat(argv[optind])); + optind++; + } + } + + exit(result); +} diff --git a/a8cat.rst b/a8cat.rst new file mode 100644 index 0000000..7557c01 --- /dev/null +++ b/a8cat.rst @@ -0,0 +1,58 @@ +===== +a8cat +===== + +-------------------------------------------------- +Convert Atari 8-bit text to UTF-8 encoded Unicode. +-------------------------------------------------- + +.. include:: manhdr.rst + +SYNOPSIS +======== + +*a8cat* [**-r**] [**-i**] [**-u**] [**-t**] [*infile*] [*infile ...*] + +DESCRIPTION +=========== + +Convert Atari 8-bit ATASCII or XL ICS (International Character +Set) text to UTF-8 encoded Unicode. Control graphics characters are +replaced with their nearest Unicode equivalents (mostly from the Box +Drawing block, or from the Basic Latin block with **-i** option). + +If no *infile*\s are given, input is read from standard input. Output always +goes to standard output; to write to a file, use a command like:: + + a8cat atari.txt > converted.txt + +The output is plain UTF-8 Unicode, without BOM. + +Inverse video (characters codes above **$80**) are translated using +the ANSI/VT-100 reverse video escape sequences. Exception: **$9B** +(Atari EOL) is translated to **\\n** (newline). + +OPTIONS +======= + +-i + Input uses Atari XL/XE International Character Set encoding, rather than + ATASCII graphics. + +-u + Use "underlining" for inverse video. Each inverse character is followed by + a backspace, then a *_* character. When viewed in a pager such as **less**\(1), + this causes the characters to appear underlined. Output created with this + option cannot be converted back to ATASCII with the **-r** option. + +-t + Text mode. Normally, everything but EOL (**$9B**) is converted to a + Unicode graphics character. In text mode, ATASCII tabs, backspace, + and bells are translated to the ASCII versions. + +-r + Reverse conversion: Input is UTF-8, output is ATASCII (or XL ICS, with **-i**). + Beware that printing ATASCII to a terminal may look funny, and may even confuse + the terminal. Redirecting to a file is safe. + +.. include:: manftr.rst diff --git a/atables.c b/atables.c new file mode 100644 index 0000000..ea6eedc --- /dev/null +++ b/atables.c @@ -0,0 +1,265 @@ +/* ATASCII to UTF-8 tables. Generated by mkatables.pl. + Do not edit this file; edit mkatables.pl instead. */ + +const char *ata2utf[] = { + "♥", /* 0 $00 ^@ */ + "┣", /* 1 $01 ^A */ + "┃", /* 2 $02 ^B */ + "┛", /* 3 $03 ^C */ + "┫", /* 4 $04 ^D */ + "┓", /* 5 $05 ^E */ + "╱", /* 6 $06 ^F */ + "╲", /* 7 $07 ^G */ + "◢", /* 8 $08 ^H */ + "▗", /* 9 $09 ^I */ + "◣", /* 10 $0a ^J */ + "▝", /* 11 $0b ^K */ + "▘", /* 12 $0c ^L */ + "▔", /* 13 $0d ^M */ + "▁", /* 14 $0e ^N */ + "▖", /* 15 $0f ^O */ + "♣", /* 16 $10 ^P */ + "┏", /* 17 $11 ^Q */ + "━", /* 18 $12 ^R */ + "╋", /* 19 $13 ^S */ + "●", /* 20 $14 ^T */ + "▄", /* 21 $15 ^U */ + "▎", /* 22 $16 ^V */ + "┳", /* 23 $17 ^W */ + "┻", /* 24 $18 ^X */ + "▌", /* 25 $19 ^Y */ + "┗", /* 26 $1a ^Z */ + "␛", /* 27 $1b ^[ */ + "↑", /* 28 $1c ^\ */ + "↓", /* 29 $1d ^] */ + "←", /* 30 $1e ^^ */ + "→", /* 31 $1f ^_ */ + " ", /* 32 $20 */ + "!", /* 33 $21 ! */ + "\"", /* 34 $22 " */ + "#", /* 35 $23 # */ + "$", /* 36 $24 $ */ + "%", /* 37 $25 % */ + "&", /* 38 $26 & */ + "'", /* 39 $27 ' */ + "(", /* 40 $28 ( */ + ")", /* 41 $29 ) */ + "*", /* 42 $2a * */ + "+", /* 43 $2b + */ + ",", /* 44 $2c , */ + "-", /* 45 $2d - */ + ".", /* 46 $2e . */ + "/", /* 47 $2f / */ + "0", /* 48 $30 0 */ + "1", /* 49 $31 1 */ + "2", /* 50 $32 2 */ + "3", /* 51 $33 3 */ + "4", /* 52 $34 4 */ + "5", /* 53 $35 5 */ + "6", /* 54 $36 6 */ + "7", /* 55 $37 7 */ + "8", /* 56 $38 8 */ + "9", /* 57 $39 9 */ + ":", /* 58 $3a : */ + ";", /* 59 $3b ; */ + "<", /* 60 $3c < */ + "=", /* 61 $3d = */ + ">", /* 62 $3e > */ + "?", /* 63 $3f ? */ + "@", /* 64 $40 @ */ + "A", /* 65 $41 A */ + "B", /* 66 $42 B */ + "C", /* 67 $43 C */ + "D", /* 68 $44 D */ + "E", /* 69 $45 E */ + "F", /* 70 $46 F */ + "G", /* 71 $47 G */ + "H", /* 72 $48 H */ + "I", /* 73 $49 I */ + "J", /* 74 $4a J */ + "K", /* 75 $4b K */ + "L", /* 76 $4c L */ + "M", /* 77 $4d M */ + "N", /* 78 $4e N */ + "O", /* 79 $4f O */ + "P", /* 80 $50 P */ + "Q", /* 81 $51 Q */ + "R", /* 82 $52 R */ + "S", /* 83 $53 S */ + "T", /* 84 $54 T */ + "U", /* 85 $55 U */ + "V", /* 86 $56 V */ + "W", /* 87 $57 W */ + "X", /* 88 $58 X */ + "Y", /* 89 $59 Y */ + "Z", /* 90 $5a Z */ + "[", /* 91 $5b [ */ + "\\", /* 92 $5c \ */ + "]", /* 93 $5d ] */ + "^", /* 94 $5e ^ */ + "_", /* 95 $5f _ */ + "◆", /* 96 $60 ` */ + "a", /* 97 $61 a */ + "b", /* 98 $62 b */ + "c", /* 99 $63 c */ + "d", /* 100 $64 d */ + "e", /* 101 $65 e */ + "f", /* 102 $66 f */ + "g", /* 103 $67 g */ + "h", /* 104 $68 h */ + "i", /* 105 $69 i */ + "j", /* 106 $6a j */ + "k", /* 107 $6b k */ + "l", /* 108 $6c l */ + "m", /* 109 $6d m */ + "n", /* 110 $6e n */ + "o", /* 111 $6f o */ + "p", /* 112 $70 p */ + "q", /* 113 $71 q */ + "r", /* 114 $72 r */ + "s", /* 115 $73 s */ + "t", /* 116 $74 t */ + "u", /* 117 $75 u */ + "v", /* 118 $76 v */ + "w", /* 119 $77 w */ + "x", /* 120 $78 x */ + "y", /* 121 $79 y */ + "z", /* 122 $7a z */ + "♠", /* 123 $7b { */ + "|", /* 124 $7c | */ + "↰", /* 125 $7d } */ + "◀", /* 126 $7e ~ */ + "▶", /* 127 $7f [del] */ +}; + +const char *ics2utf[] = { + "á", /* 0 $00 ^@ */ + "ù", /* 1 $01 ^A */ + "Ñ", /* 2 $02 ^B */ + "É", /* 3 $03 ^C */ + "ç", /* 4 $04 ^D */ + "ô", /* 5 $05 ^E */ + "ò", /* 6 $06 ^F */ + "ì", /* 7 $07 ^G */ + "£", /* 8 $08 ^H */ + "ï", /* 9 $09 ^I */ + "ü", /* 10 $0a ^J */ + "ä", /* 11 $0b ^K */ + "Ö", /* 12 $0c ^L */ + "ú", /* 13 $0d ^M */ + "ó", /* 14 $0e ^N */ + "ö", /* 15 $0f ^O */ + "Ü", /* 16 $10 ^P */ + "â", /* 17 $11 ^Q */ + "û", /* 18 $12 ^R */ + "î", /* 19 $13 ^S */ + "é", /* 20 $14 ^T */ + "è", /* 21 $15 ^U */ + "ñ", /* 22 $16 ^V */ + "ê", /* 23 $17 ^W */ + "ȧ", /* 24 $18 ^X */ + "à", /* 25 $19 ^Y */ + "Ȧ", /* 26 $1a ^Z */ + "␛", /* 27 $1b ^[ */ + "↑", /* 28 $1c ^\ */ + "↓", /* 29 $1d ^] */ + "←", /* 30 $1e ^^ */ + "→", /* 31 $1f ^_ */ + " ", /* 32 $20 */ + "!", /* 33 $21 ! */ + "\"", /* 34 $22 " */ + "#", /* 35 $23 # */ + "$", /* 36 $24 $ */ + "%", /* 37 $25 % */ + "&", /* 38 $26 & */ + "'", /* 39 $27 ' */ + "(", /* 40 $28 ( */ + ")", /* 41 $29 ) */ + "*", /* 42 $2a * */ + "+", /* 43 $2b + */ + ",", /* 44 $2c , */ + "-", /* 45 $2d - */ + ".", /* 46 $2e . */ + "/", /* 47 $2f / */ + "0", /* 48 $30 0 */ + "1", /* 49 $31 1 */ + "2", /* 50 $32 2 */ + "3", /* 51 $33 3 */ + "4", /* 52 $34 4 */ + "5", /* 53 $35 5 */ + "6", /* 54 $36 6 */ + "7", /* 55 $37 7 */ + "8", /* 56 $38 8 */ + "9", /* 57 $39 9 */ + ":", /* 58 $3a : */ + ";", /* 59 $3b ; */ + "<", /* 60 $3c < */ + "=", /* 61 $3d = */ + ">", /* 62 $3e > */ + "?", /* 63 $3f ? */ + "@", /* 64 $40 @ */ + "A", /* 65 $41 A */ + "B", /* 66 $42 B */ + "C", /* 67 $43 C */ + "D", /* 68 $44 D */ + "E", /* 69 $45 E */ + "F", /* 70 $46 F */ + "G", /* 71 $47 G */ + "H", /* 72 $48 H */ + "I", /* 73 $49 I */ + "J", /* 74 $4a J */ + "K", /* 75 $4b K */ + "L", /* 76 $4c L */ + "M", /* 77 $4d M */ + "N", /* 78 $4e N */ + "O", /* 79 $4f O */ + "P", /* 80 $50 P */ + "Q", /* 81 $51 Q */ + "R", /* 82 $52 R */ + "S", /* 83 $53 S */ + "T", /* 84 $54 T */ + "U", /* 85 $55 U */ + "V", /* 86 $56 V */ + "W", /* 87 $57 W */ + "X", /* 88 $58 X */ + "Y", /* 89 $59 Y */ + "Z", /* 90 $5a Z */ + "[", /* 91 $5b [ */ + "\\", /* 92 $5c \ */ + "]", /* 93 $5d ] */ + "^", /* 94 $5e ^ */ + "_", /* 95 $5f _ */ + "¡", /* 96 $60 ` */ + "a", /* 97 $61 a */ + "b", /* 98 $62 b */ + "c", /* 99 $63 c */ + "d", /* 100 $64 d */ + "e", /* 101 $65 e */ + "f", /* 102 $66 f */ + "g", /* 103 $67 g */ + "h", /* 104 $68 h */ + "i", /* 105 $69 i */ + "j", /* 106 $6a j */ + "k", /* 107 $6b k */ + "l", /* 108 $6c l */ + "m", /* 109 $6d m */ + "n", /* 110 $6e n */ + "o", /* 111 $6f o */ + "p", /* 112 $70 p */ + "q", /* 113 $71 q */ + "r", /* 114 $72 r */ + "s", /* 115 $73 s */ + "t", /* 116 $74 t */ + "u", /* 117 $75 u */ + "v", /* 118 $76 v */ + "w", /* 119 $77 w */ + "x", /* 120 $78 x */ + "y", /* 121 $79 y */ + "z", /* 122 $7a z */ + "Ä", /* 123 $7b { */ + "|", /* 124 $7c | */ + "↰", /* 125 $7d } */ + "◀", /* 126 $7e ~ */ + "▶", /* 127 $7f [del] */ +}; + diff --git a/atables.h b/atables.h new file mode 100644 index 0000000..56e6c34 --- /dev/null +++ b/atables.h @@ -0,0 +1,2 @@ +extern const char *ata2utf[]; +extern const char *ics2utf[]; diff --git a/mkatables.pl b/mkatables.pl new file mode 100644 index 0000000..1eb3a08 --- /dev/null +++ b/mkatables.pl @@ -0,0 +1,116 @@ +#!/usr/bin/perl -w + +%atascii = ( + 0 => "♥", + 1 => "┣", + 2 => "┃", + 3 => "┛", + 4 => "┫", + 5 => "┓", + 6 => "╱", + 7 => "╲", + 8 => "◢", + 9 => "▗", + 10 => "◣", + 11 => "▝", + 12 => "▘", + 13 => "▔", + 14 => "▁", + 15 => "▖", + 16 => "♣", + 17 => "┏", + 18 => "━", + 19 => "╋", + 20 => "●", + 21 => "▄", + 22 => "▎", + 23 => "┳", + 24 => "┻", + 25 => "▌", + 26 => "┗", + 27 => "␛", + 28 => "↑", + 29 => "↓", + 30 => "←", + 31 => "→", + 34 => "\\\"", + 92 => "\\\\", + 96 => "◆", + 123 => "♠", + 125 => "↰", + 126 => "◀", + 127 => "▶", +); + +%xl = ( + 0 => "á", + 1 => "ù", + 2 => "Ñ", + 3 => "É", + 4 => "ç", + 5 => "ô", + 6 => "ò", + 7 => "ì", + 8 => "£", + 9 => "ï", + 10 => "ü", + 11 => "ä", + 12 => "Ö", + 13 => "ú", + 14 => "ó", + 15 => "ö", + 16 => "Ü", + 17 => "â", + 18 => "û", + 19 => "î", + 20 => "é", + 21 => "è", + 22 => "ñ", + 23 => "ê", + 24 => "ȧ", + 25 => "à", + 26 => "Ȧ", + 27 => "␛", + 28 => "↑", + 29 => "↓", + 30 => "←", + 31 => "→", + 34 => "\\\"", + 92 => "\\\\", + 96 => "¡", + 123 => "Ä", + 125 => "↰", + 126 => "◀", + 127 => "▶", +); + +sub getcharname { + my $c = shift; + if($c == 127) { + return "[del]"; + } elsif($c < 32) { + return "^" . chr($c + 64); + } else { + return chr($c); + } +} + +sub mktable { + my ($name, $hash) = @_; + + print "const char *$name\[\] = {\n"; + for (0..127) { + my $cmt = sprintf("/* %3d \$%02x %5s */", $_, $_, getcharname($_)); + print "\t\"" . ($hash->{$_} || chr($_)), "\", $cmt\n"; + } + print "};\n\n"; +} + +print < +#include +#include "wtable.h" + +/* +#define WSEARCH_DEBUG +*/ + +wint_t wchar2ata[][2] = { + /* Unicode, ATASCII */ + { 0x2190, 0x1e }, + { 0x2191, 0x1c }, + { 0x2192, 0x1f }, + { 0x2193, 0x1d }, + { 0x21b0, 0x7d }, + { 0x241b, 0x1b }, + { 0x2501, 0x12 }, + { 0x2503, 0x02 }, + { 0x250f, 0x11 }, + { 0x2513, 0x05 }, + { 0x2517, 0x1a }, + { 0x251b, 0x03 }, + { 0x2523, 0x01 }, + { 0x252b, 0x04 }, + { 0x2533, 0x17 }, + { 0x253b, 0x18 }, + { 0x254b, 0x13 }, + { 0x2571, 0x06 }, + { 0x2572, 0x07 }, + { 0x2581, 0x0e }, + { 0x2584, 0x15 }, + { 0x258c, 0x19 }, + { 0x258e, 0x16 }, + { 0x2594, 0x0d }, + { 0x2596, 0x0f }, + { 0x2597, 0x09 }, + { 0x2598, 0x0c }, + { 0x259d, 0x0b }, + { 0x25b6, 0x7f }, + { 0x25c0, 0x7e }, + { 0x25c6, 0x60 }, + { 0x25cf, 0x14 }, + { 0x25e2, 0x08 }, + { 0x25e3, 0x0a }, + { 0x2660, 0x7b }, + { 0x2663, 0x10 }, + { 0x2665, 0x00 }, +}; + +wint_t wchar2ics[][2] = { + /* Unicode, ATASCII */ + { 0x00a1, 0x60 }, + { 0x00a3, 0x08 }, + { 0x00c4, 0x7b }, + { 0x00c9, 0x03 }, + { 0x00d1, 0x02 }, + { 0x00d6, 0x0c }, + { 0x00dc, 0x10 }, + { 0x00e0, 0x19 }, + { 0x00e1, 0x00 }, + { 0x00e2, 0x11 }, + { 0x00e4, 0x0b }, + { 0x00e7, 0x04 }, + { 0x00e8, 0x15 }, + { 0x00e9, 0x14 }, + { 0x00ea, 0x17 }, + { 0x00ec, 0x07 }, + { 0x00ee, 0x13 }, + { 0x00ef, 0x09 }, + { 0x00f1, 0x16 }, + { 0x00f2, 0x06 }, + { 0x00f3, 0x0e }, + { 0x00f4, 0x05 }, + { 0x00f6, 0x0f }, + { 0x00f9, 0x01 }, + { 0x00fa, 0x0d }, + { 0x00fb, 0x12 }, + { 0x00fc, 0x0a }, + { 0x0226, 0x1a }, + { 0x0227, 0x18 }, + { 0x2190, 0x1e }, + { 0x2191, 0x1c }, + { 0x2192, 0x1f }, + { 0x2193, 0x1d }, + { 0x21b0, 0x7d }, + { 0x241b, 0x1b }, + { 0x25b6, 0x7f }, + { 0x25c0, 0x7e }, +}; + +static int tblsize = sizeof(wchar2ata) / sizeof(wchar2ata[0]); + +static wint_t wsearch(wint_t table[][2], wint_t target, int start, int end) { + wint_t *elem; + int center; + +#ifdef WSEARCH_DEBUG + fprintf(stderr, "wsearch(0x%04x, %d, %d)\n", target, start, end); +#endif + + if(start == end) { + if(table[start][0] == target) + return table[start][1]; + else + return -1; + } else { + center = (start + end) / 2; + elem = table[center]; + +#ifdef WSEARCH_DEBUG + fprintf(stderr, "elem = 0x%04x, 0x%02x\n", elem[0], elem[1]); +#endif + + if(elem[0] == target) + return elem[1]; + else if(elem[0] > target) + return wsearch(table, target, start, center); + else + return wsearch(table, target, center + 1, end); + } +} + +int wchar2atascii(wint_t wc, int ics) { + return wsearch((ics ? wchar2ics : wchar2ata), wc, 0, tblsize - 1); +} + +#ifdef WSEARCH_DEBUG +int main(int argc, char **argv) { + printf("%02x\n", wchar2atascii(0x2190, 0)); + printf("%02x\n", wchar2atascii(0x2571, 0)); + printf("%02x\n", wchar2atascii(0x25c6, 0)); + printf("%02x\n", wchar2atascii(0x2665, 0)); + printf("%02x\n", wchar2atascii(0x2510, 0)); + return 0; +} +#endif diff --git a/wtable.h b/wtable.h new file mode 100644 index 0000000..11c5fa2 --- /dev/null +++ b/wtable.h @@ -0,0 +1,2 @@ +extern wint_t wchar2ata[][2]; +extern int wchar2atascii(wint_t wc, int ics); -- cgit v1.2.3