aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile13
-rw-r--r--a8cat.1124
-rw-r--r--a8cat.c187
-rw-r--r--a8cat.rst58
-rw-r--r--atables.c265
-rw-r--r--atables.h2
-rw-r--r--mkatables.pl116
-rw-r--r--wtable.c140
-rw-r--r--wtable.h2
9 files changed, 905 insertions, 2 deletions
diff --git a/Makefile b/Makefile
index 6b54cb9..4afaa47 100644
--- a/Makefile
+++ b/Makefile
@@ -16,9 +16,9 @@ CC=gcc
CFLAGS=-Wall $(COPT) -ansi -D_GNU_SOURCE -DVERSION=\"$(VERSION)\"
# BINS and SCRIPTS go in $BINDIR, DOCS go in $DOCDIR
-BINS=a8eol atr2xfd atrsize axe blob2c blob2xex cart2xex cxrefbas dumpbas fenders protbas renumbas rom2cart unmac65 unprotbas vxrefbas xex1to2 xexamine xexcat xexsplit xfd2atr listbas
+BINS=a8eol atr2xfd atrsize axe blob2c blob2xex cart2xex cxrefbas dumpbas fenders protbas renumbas rom2cart unmac65 unprotbas vxrefbas xex1to2 xexamine xexcat xexsplit xfd2atr listbas a8cat
SCRIPTS=dasm2atasm a8utf8
-MANS=a8eol.1 xfd2atr.1 atr2xfd.1 blob2c.1 cart2xex.1 fenders.1 xexsplit.1 xexcat.1 atrsize.1 rom2cart.1 unmac65.1 axe.1 dasm2atasm.1 a8utf8.1 blob2xex.1 xexamine.1 xex1to2.1 unprotbas.1 protbas.1 renumbas.1 dumpbas.1 vxrefbas.1 cxrefbas.1 listbas.1
+MANS=a8eol.1 xfd2atr.1 atr2xfd.1 blob2c.1 cart2xex.1 fenders.1 xexsplit.1 xexcat.1 atrsize.1 rom2cart.1 unmac65.1 axe.1 dasm2atasm.1 a8utf8.1 blob2xex.1 xexamine.1 xex1to2.1 unprotbas.1 protbas.1 renumbas.1 dumpbas.1 vxrefbas.1 cxrefbas.1 listbas.1 a8cat.1
MAN5S=xex.5
MAN7S=atascii.7
DOCS=README.txt equates.inc *.dasm LICENSE ksiders/atr.txt
@@ -66,6 +66,12 @@ listbas: listbas.c bas.o bcdfp.o tokens.o
bas.o: bas.c bas.h
+wtable.o: wtable.c wtable.h
+
+atables.o: atables.c atables.h
+
+a8cat: a8cat.c atables.o wtable.o
+
subdirs:
for dir in $(SUBDIRS); do make -C $$dir COPT="$(COPT)"; done
@@ -75,6 +81,9 @@ xfd2atr: xfd2atr.c
atr2xfd: atr2xfd.c
+atables.c: mkatables.pl
+ perl mkatables.pl > atables.c
+
# note to cross-compiler users: If you're building the *.bin targets,
# blob2c needs to be executable on the build host. It'd also be nice
# to build a blob2c for the target platform... Probably you can do
diff --git a/a8cat.1 b/a8cat.1
new file mode 100644
index 0000000..d815a9c
--- /dev/null
+++ b/a8cat.1
@@ -0,0 +1,124 @@
+.\" Man page generated from reStructuredText.
+.
+.
+.nr rst2man-indent-level 0
+.
+.de1 rstReportMargin
+\\$1 \\n[an-margin]
+level \\n[rst2man-indent-level]
+level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
+-
+\\n[rst2man-indent0]
+\\n[rst2man-indent1]
+\\n[rst2man-indent2]
+..
+.de1 INDENT
+.\" .rstReportMargin pre:
+. RS \\$1
+. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin]
+. nr rst2man-indent-level +1
+.\" .rstReportMargin post:
+..
+.de UNINDENT
+. RE
+.\" indent \\n[an-margin]
+.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.nr rst2man-indent-level -1
+.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.in \\n[rst2man-indent\\n[rst2man-indent-level]]u
+..
+.TH "A8CAT" 1 "2024-06-29" "0.2.1" "Urchlay's Atari 8-bit Tools"
+.SH NAME
+a8cat \- Convert Atari 8-bit text to UTF-8 encoded Unicode.
+.SH SYNOPSIS
+.sp
+\fIa8cat\fP [\fB\-r\fP] [\fB\-i\fP] [\fB\-u\fP] [\fB\-t\fP] [\fIinfile\fP] [\fIinfile ...\fP]
+.SH DESCRIPTION
+.sp
+Convert Atari 8\-bit ATASCII or XL ICS (International Character
+Set) text to UTF\-8 encoded Unicode. Control graphics characters are
+replaced with their nearest Unicode equivalents (mostly from the Box
+Drawing block, or from the Basic Latin block with \fB\-i\fP option).
+.sp
+If no \fIinfile\fPs are given, input is read from standard input. Output always
+goes to standard output; to write to a file, use a command like:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+a8cat atari.txt > converted.txt
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+The output is plain UTF\-8 Unicode, without BOM.
+.sp
+Inverse video (characters codes above \fB$80\fP) are translated using
+the ANSI/VT\-100 reverse video escape sequences. Exception: \fB$9B\fP
+(Atari EOL) is translated to \fB\en\fP (newline).
+.SH OPTIONS
+.INDENT 0.0
+.TP
+.B \-i
+Input uses Atari XL/XE International Character Set encoding, rather than
+ATASCII graphics.
+.TP
+.B \-u
+Use "underlining" for inverse video. Each inverse character is followed by
+a backspace, then a \fI_\fP character. When viewed in a pager such as \fBless\fP(1),
+this causes the characters to appear underlined. Output created with this
+option cannot be converted back to ATASCII with the \fB\-r\fP option.
+.TP
+.B \-t
+Text mode. Normally, everything but EOL (\fB$9B\fP) is converted to a
+Unicode graphics character. In text mode, ATASCII tabs, backspace,
+and bells are translated to the ASCII versions.
+.TP
+.B \-r
+Reverse conversion: Input is UTF\-8, output is ATASCII (or XL ICS, with \fB\-i\fP).
+Beware that printing ATASCII to a terminal may look funny, and may even confuse
+the terminal. Redirecting to a file is safe.
+.UNINDENT
+.SH COPYRIGHT
+.sp
+WTFPL. See \fI\%http://www.wtfpl.net/txt/copying/\fP for details.
+.SH AUTHOR
+.INDENT 0.0
+.IP B. 3
+Watson <\fI\%urchlay@slackware.uk\fP>; Urchlay on irc.libera.chat \fI##atari\fP\&.
+.UNINDENT
+.SH SEE ALSO
+.sp
+\fBa8eol\fP(1),
+\fBa8utf8\fP(1),
+\fBatr2xfd\fP(1),
+\fBatrsize\fP(1),
+\fBaxe\fP(1),
+\fBblob2c\fP(1),
+\fBblob2xex\fP(1),
+\fBcart2xex\fP(1),
+\fBcxrefbas\fP(1),
+\fBdasm2atasm\fP(1),
+\fBdumpbas\fP(1),
+\fBf2toxex\fP(1),
+\fBfenders\fP(1),
+\fBlistbas\fP(1),
+\fBprotbas\fP(1),
+\fBrenumbas\fP(1),
+\fBrom2cart\fP(1),
+\fBunmac65\fP(1),
+\fBunprotbas\fP(1),
+\fBvxrefbas\fP(1),
+\fBxexamine\fP(1),
+\fBxexcat\fP(1),
+\fBxexsplit\fP(1),
+\fBxfd2atr\fP(1),
+\fBxex\fP(5),
+\fBatascii\fP(7).
+.sp
+Any good Atari 8\-bit book: \fIDe Re Atari\fP, \fIThe Atari BASIC Reference
+Manual\fP, the \fIOS Users\(aq Guide\fP, \fIMapping the Atari\fP, etc.
+.\" Generated by docutils manpage writer.
+.
diff --git a/a8cat.c b/a8cat.c
new file mode 100644
index 0000000..752e96f
--- /dev/null
+++ b/a8cat.c
@@ -0,0 +1,187 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <locale.h>
+#include <wchar.h>
+#include <errno.h>
+
+#include "atables.h"
+#include "wtable.h"
+
+const char **table = ata2utf;
+
+const char *inverse_on = "\x1b[7m";
+const char *inverse_off = "\x1b[0m";
+
+int underline = 0, reverse = 0, textmode = 0, ics = 0;
+
+void print_help(void) {
+ printf("Usage: a8cat [-i] [-u] [file ...]\n");
+}
+
+FILE *open_input(const char *file) {
+ FILE *input;
+
+ if(file[0] == '-' && file[1] == 0) {
+ if(freopen(NULL, "rb", stdin)) {
+ input = stdin;
+ } else {
+ perror("(standard input)");
+ return NULL;
+ }
+ } else if(!(input = fopen(file, "rb"))) {
+ perror(file);
+ return NULL;
+ }
+
+ return input;
+}
+
+int handle_escape_seq(int inv, FILE *input) {
+ int count, c;
+ char buf[5] = { 0x1b, 0, 0, 0, 0 };
+
+ for(count = 1; count < 4; count++) {
+ c = fgetwc(input);
+ if(c == WEOF) break;
+ buf[count] = c; /* FIXME: might be a wide char! */
+ }
+
+ if(strcmp(inverse_on, buf) == 0) {
+ return 0x80;
+ } else if(strcmp(inverse_off, buf) == 0) {
+ return 0;
+ } else {
+ fputs(buf, stdout);
+ return inv;
+ }
+}
+
+int a8revcat(const char *file) {
+ FILE *input;
+ int c, d, inv = 0;
+
+ if( !(input = open_input(file)) )
+ return 1;
+
+ setlocale(LC_CTYPE, "en_US.UTF-8");
+ while( (c = fgetwc(input)) != WEOF ) {
+ if(c == 0x1b) {
+ inv = handle_escape_seq(inv, input);
+ } else if(c == '\n') {
+ putchar(0x9b);
+ } else if(c < 0x80) {
+ putchar(c | inv);
+ } else {
+ d = wchar2atascii(c, ics);
+ if(d == -1) {
+ fprintf(stderr, "warning: unrecognized Unicode character %04x\n", c);
+ } else {
+ putchar(d | inv);
+ }
+ }
+ }
+
+ return 0;
+}
+
+/* XXX: hard-coded ANSI/vt100 escape sequences. would be
+ better but more complex to use terminfo to support any ol'
+ terminal... */
+void inverse(int onoff) {
+ fputs((onoff ? inverse_on : inverse_off ), stdout);
+}
+
+int a8cat(const char *file) {
+ FILE *input;
+ int c, inv = 0;
+
+ if( !(input = open_input(file)) )
+ return 1;
+
+ while( (c = fgetc(input)) != EOF ) {
+ if(c == 0x9b) {
+ putchar('\n');
+ continue;
+ }
+
+ if(textmode) {
+ switch(c) {
+ case 0x09: /* Atari TAB is same as ASCII */
+ putchar('\t');
+ continue;
+ case 0xfd: /* bell */
+ putchar('\a');
+ continue;
+ case 0x7e: /* backspace */
+ putchar('\b');
+ continue;
+ default: break;
+ }
+ }
+
+ if(!underline) {
+ /* strings of inverse chars only get one "inverse on" ANSI
+ sequence, and one "inverse off" afterwards. */
+ if(c & 0x80) {
+ if(!inv) {
+ inv = 1;
+ inverse(1);
+ }
+ } else {
+ if(inv) {
+ inv = 0;
+ inverse(0);
+ }
+ }
+ }
+
+ fputs(table[c & 0x7f], stdout);
+
+ if(underline && (c & 0x80)) {
+ putchar('\b');
+ putchar('_');
+ }
+ }
+
+ /* gotta turn off inverse, so if there's another file after this one,
+ it doesn't start out being printed in inverse. */
+ if(inv && !underline) inverse(0);
+
+ fclose(input);
+ return 0;
+}
+
+int main(int argc, char **argv) {
+ int opt, result = 0;
+
+ while( (opt = getopt(argc, argv, "ihurt")) != -1) {
+ switch(opt) {
+ case 'i': table = ics2utf; ics = 1; break;
+ case 'h': print_help(); exit(0); break;
+ case 'u': underline = 1; break;
+ case 'r': reverse = 1; break;
+ case 't': textmode = 1; break;
+ default: print_help(); exit(1); break;
+ }
+ }
+
+ if(reverse) {
+ if(underline || textmode) {
+ fprintf(stderr, "-t and -u options don't make sense with -r.\n");
+ exit(1);
+ }
+ }
+
+ if(optind >= argc) {
+ result = (reverse ? a8revcat("-") : a8cat("-"));
+ } else {
+ while(optind < argc) {
+ result += (reverse ? a8revcat(argv[optind]) : a8cat(argv[optind]));
+ optind++;
+ }
+ }
+
+ exit(result);
+}
diff --git a/a8cat.rst b/a8cat.rst
new file mode 100644
index 0000000..7557c01
--- /dev/null
+++ b/a8cat.rst
@@ -0,0 +1,58 @@
+=====
+a8cat
+=====
+
+--------------------------------------------------
+Convert Atari 8-bit text to UTF-8 encoded Unicode.
+--------------------------------------------------
+
+.. include:: manhdr.rst
+
+SYNOPSIS
+========
+
+*a8cat* [**-r**] [**-i**] [**-u**] [**-t**] [*infile*] [*infile ...*]
+
+DESCRIPTION
+===========
+
+Convert Atari 8-bit ATASCII or XL ICS (International Character
+Set) text to UTF-8 encoded Unicode. Control graphics characters are
+replaced with their nearest Unicode equivalents (mostly from the Box
+Drawing block, or from the Basic Latin block with **-i** option).
+
+If no *infile*\s are given, input is read from standard input. Output always
+goes to standard output; to write to a file, use a command like::
+
+ a8cat atari.txt > converted.txt
+
+The output is plain UTF-8 Unicode, without BOM.
+
+Inverse video (characters codes above **$80**) are translated using
+the ANSI/VT-100 reverse video escape sequences. Exception: **$9B**
+(Atari EOL) is translated to **\\n** (newline).
+
+OPTIONS
+=======
+
+-i
+ Input uses Atari XL/XE International Character Set encoding, rather than
+ ATASCII graphics.
+
+-u
+ Use "underlining" for inverse video. Each inverse character is followed by
+ a backspace, then a *_* character. When viewed in a pager such as **less**\(1),
+ this causes the characters to appear underlined. Output created with this
+ option cannot be converted back to ATASCII with the **-r** option.
+
+-t
+ Text mode. Normally, everything but EOL (**$9B**) is converted to a
+ Unicode graphics character. In text mode, ATASCII tabs, backspace,
+ and bells are translated to the ASCII versions.
+
+-r
+ Reverse conversion: Input is UTF-8, output is ATASCII (or XL ICS, with **-i**).
+ Beware that printing ATASCII to a terminal may look funny, and may even confuse
+ the terminal. Redirecting to a file is safe.
+
+.. include:: manftr.rst
diff --git a/atables.c b/atables.c
new file mode 100644
index 0000000..ea6eedc
--- /dev/null
+++ b/atables.c
@@ -0,0 +1,265 @@
+/* ATASCII to UTF-8 tables. Generated by mkatables.pl.
+ Do not edit this file; edit mkatables.pl instead. */
+
+const char *ata2utf[] = {
+ "♥", /* 0 $00 ^@ */
+ "┣", /* 1 $01 ^A */
+ "┃", /* 2 $02 ^B */
+ "┛", /* 3 $03 ^C */
+ "┫", /* 4 $04 ^D */
+ "┓", /* 5 $05 ^E */
+ "╱", /* 6 $06 ^F */
+ "╲", /* 7 $07 ^G */
+ "◢", /* 8 $08 ^H */
+ "▗", /* 9 $09 ^I */
+ "◣", /* 10 $0a ^J */
+ "▝", /* 11 $0b ^K */
+ "▘", /* 12 $0c ^L */
+ "▔", /* 13 $0d ^M */
+ "▁", /* 14 $0e ^N */
+ "▖", /* 15 $0f ^O */
+ "♣", /* 16 $10 ^P */
+ "┏", /* 17 $11 ^Q */
+ "━", /* 18 $12 ^R */
+ "╋", /* 19 $13 ^S */
+ "●", /* 20 $14 ^T */
+ "▄", /* 21 $15 ^U */
+ "▎", /* 22 $16 ^V */
+ "┳", /* 23 $17 ^W */
+ "┻", /* 24 $18 ^X */
+ "▌", /* 25 $19 ^Y */
+ "┗", /* 26 $1a ^Z */
+ "␛", /* 27 $1b ^[ */
+ "↑", /* 28 $1c ^\ */
+ "↓", /* 29 $1d ^] */
+ "←", /* 30 $1e ^^ */
+ "→", /* 31 $1f ^_ */
+ " ", /* 32 $20 */
+ "!", /* 33 $21 ! */
+ "\"", /* 34 $22 " */
+ "#", /* 35 $23 # */
+ "$", /* 36 $24 $ */
+ "%", /* 37 $25 % */
+ "&", /* 38 $26 & */
+ "'", /* 39 $27 ' */
+ "(", /* 40 $28 ( */
+ ")", /* 41 $29 ) */
+ "*", /* 42 $2a * */
+ "+", /* 43 $2b + */
+ ",", /* 44 $2c , */
+ "-", /* 45 $2d - */
+ ".", /* 46 $2e . */
+ "/", /* 47 $2f / */
+ "0", /* 48 $30 0 */
+ "1", /* 49 $31 1 */
+ "2", /* 50 $32 2 */
+ "3", /* 51 $33 3 */
+ "4", /* 52 $34 4 */
+ "5", /* 53 $35 5 */
+ "6", /* 54 $36 6 */
+ "7", /* 55 $37 7 */
+ "8", /* 56 $38 8 */
+ "9", /* 57 $39 9 */
+ ":", /* 58 $3a : */
+ ";", /* 59 $3b ; */
+ "<", /* 60 $3c < */
+ "=", /* 61 $3d = */
+ ">", /* 62 $3e > */
+ "?", /* 63 $3f ? */
+ "@", /* 64 $40 @ */
+ "A", /* 65 $41 A */
+ "B", /* 66 $42 B */
+ "C", /* 67 $43 C */
+ "D", /* 68 $44 D */
+ "E", /* 69 $45 E */
+ "F", /* 70 $46 F */
+ "G", /* 71 $47 G */
+ "H", /* 72 $48 H */
+ "I", /* 73 $49 I */
+ "J", /* 74 $4a J */
+ "K", /* 75 $4b K */
+ "L", /* 76 $4c L */
+ "M", /* 77 $4d M */
+ "N", /* 78 $4e N */
+ "O", /* 79 $4f O */
+ "P", /* 80 $50 P */
+ "Q", /* 81 $51 Q */
+ "R", /* 82 $52 R */
+ "S", /* 83 $53 S */
+ "T", /* 84 $54 T */
+ "U", /* 85 $55 U */
+ "V", /* 86 $56 V */
+ "W", /* 87 $57 W */
+ "X", /* 88 $58 X */
+ "Y", /* 89 $59 Y */
+ "Z", /* 90 $5a Z */
+ "[", /* 91 $5b [ */
+ "\\", /* 92 $5c \ */
+ "]", /* 93 $5d ] */
+ "^", /* 94 $5e ^ */
+ "_", /* 95 $5f _ */
+ "◆", /* 96 $60 ` */
+ "a", /* 97 $61 a */
+ "b", /* 98 $62 b */
+ "c", /* 99 $63 c */
+ "d", /* 100 $64 d */
+ "e", /* 101 $65 e */
+ "f", /* 102 $66 f */
+ "g", /* 103 $67 g */
+ "h", /* 104 $68 h */
+ "i", /* 105 $69 i */
+ "j", /* 106 $6a j */
+ "k", /* 107 $6b k */
+ "l", /* 108 $6c l */
+ "m", /* 109 $6d m */
+ "n", /* 110 $6e n */
+ "o", /* 111 $6f o */
+ "p", /* 112 $70 p */
+ "q", /* 113 $71 q */
+ "r", /* 114 $72 r */
+ "s", /* 115 $73 s */
+ "t", /* 116 $74 t */
+ "u", /* 117 $75 u */
+ "v", /* 118 $76 v */
+ "w", /* 119 $77 w */
+ "x", /* 120 $78 x */
+ "y", /* 121 $79 y */
+ "z", /* 122 $7a z */
+ "♠", /* 123 $7b { */
+ "|", /* 124 $7c | */
+ "↰", /* 125 $7d } */
+ "◀", /* 126 $7e ~ */
+ "▶", /* 127 $7f [del] */
+};
+
+const char *ics2utf[] = {
+ "á", /* 0 $00 ^@ */
+ "ù", /* 1 $01 ^A */
+ "Ñ", /* 2 $02 ^B */
+ "É", /* 3 $03 ^C */
+ "ç", /* 4 $04 ^D */
+ "ô", /* 5 $05 ^E */
+ "ò", /* 6 $06 ^F */
+ "ì", /* 7 $07 ^G */
+ "£", /* 8 $08 ^H */
+ "ï", /* 9 $09 ^I */
+ "ü", /* 10 $0a ^J */
+ "ä", /* 11 $0b ^K */
+ "Ö", /* 12 $0c ^L */
+ "ú", /* 13 $0d ^M */
+ "ó", /* 14 $0e ^N */
+ "ö", /* 15 $0f ^O */
+ "Ü", /* 16 $10 ^P */
+ "â", /* 17 $11 ^Q */
+ "û", /* 18 $12 ^R */
+ "î", /* 19 $13 ^S */
+ "é", /* 20 $14 ^T */
+ "è", /* 21 $15 ^U */
+ "ñ", /* 22 $16 ^V */
+ "ê", /* 23 $17 ^W */
+ "ȧ", /* 24 $18 ^X */
+ "à", /* 25 $19 ^Y */
+ "Ȧ", /* 26 $1a ^Z */
+ "␛", /* 27 $1b ^[ */
+ "↑", /* 28 $1c ^\ */
+ "↓", /* 29 $1d ^] */
+ "←", /* 30 $1e ^^ */
+ "→", /* 31 $1f ^_ */
+ " ", /* 32 $20 */
+ "!", /* 33 $21 ! */
+ "\"", /* 34 $22 " */
+ "#", /* 35 $23 # */
+ "$", /* 36 $24 $ */
+ "%", /* 37 $25 % */
+ "&", /* 38 $26 & */
+ "'", /* 39 $27 ' */
+ "(", /* 40 $28 ( */
+ ")", /* 41 $29 ) */
+ "*", /* 42 $2a * */
+ "+", /* 43 $2b + */
+ ",", /* 44 $2c , */
+ "-", /* 45 $2d - */
+ ".", /* 46 $2e . */
+ "/", /* 47 $2f / */
+ "0", /* 48 $30 0 */
+ "1", /* 49 $31 1 */
+ "2", /* 50 $32 2 */
+ "3", /* 51 $33 3 */
+ "4", /* 52 $34 4 */
+ "5", /* 53 $35 5 */
+ "6", /* 54 $36 6 */
+ "7", /* 55 $37 7 */
+ "8", /* 56 $38 8 */
+ "9", /* 57 $39 9 */
+ ":", /* 58 $3a : */
+ ";", /* 59 $3b ; */
+ "<", /* 60 $3c < */
+ "=", /* 61 $3d = */
+ ">", /* 62 $3e > */
+ "?", /* 63 $3f ? */
+ "@", /* 64 $40 @ */
+ "A", /* 65 $41 A */
+ "B", /* 66 $42 B */
+ "C", /* 67 $43 C */
+ "D", /* 68 $44 D */
+ "E", /* 69 $45 E */
+ "F", /* 70 $46 F */
+ "G", /* 71 $47 G */
+ "H", /* 72 $48 H */
+ "I", /* 73 $49 I */
+ "J", /* 74 $4a J */
+ "K", /* 75 $4b K */
+ "L", /* 76 $4c L */
+ "M", /* 77 $4d M */
+ "N", /* 78 $4e N */
+ "O", /* 79 $4f O */
+ "P", /* 80 $50 P */
+ "Q", /* 81 $51 Q */
+ "R", /* 82 $52 R */
+ "S", /* 83 $53 S */
+ "T", /* 84 $54 T */
+ "U", /* 85 $55 U */
+ "V", /* 86 $56 V */
+ "W", /* 87 $57 W */
+ "X", /* 88 $58 X */
+ "Y", /* 89 $59 Y */
+ "Z", /* 90 $5a Z */
+ "[", /* 91 $5b [ */
+ "\\", /* 92 $5c \ */
+ "]", /* 93 $5d ] */
+ "^", /* 94 $5e ^ */
+ "_", /* 95 $5f _ */
+ "¡", /* 96 $60 ` */
+ "a", /* 97 $61 a */
+ "b", /* 98 $62 b */
+ "c", /* 99 $63 c */
+ "d", /* 100 $64 d */
+ "e", /* 101 $65 e */
+ "f", /* 102 $66 f */
+ "g", /* 103 $67 g */
+ "h", /* 104 $68 h */
+ "i", /* 105 $69 i */
+ "j", /* 106 $6a j */
+ "k", /* 107 $6b k */
+ "l", /* 108 $6c l */
+ "m", /* 109 $6d m */
+ "n", /* 110 $6e n */
+ "o", /* 111 $6f o */
+ "p", /* 112 $70 p */
+ "q", /* 113 $71 q */
+ "r", /* 114 $72 r */
+ "s", /* 115 $73 s */
+ "t", /* 116 $74 t */
+ "u", /* 117 $75 u */
+ "v", /* 118 $76 v */
+ "w", /* 119 $77 w */
+ "x", /* 120 $78 x */
+ "y", /* 121 $79 y */
+ "z", /* 122 $7a z */
+ "Ä", /* 123 $7b { */
+ "|", /* 124 $7c | */
+ "↰", /* 125 $7d } */
+ "◀", /* 126 $7e ~ */
+ "▶", /* 127 $7f [del] */
+};
+
diff --git a/atables.h b/atables.h
new file mode 100644
index 0000000..56e6c34
--- /dev/null
+++ b/atables.h
@@ -0,0 +1,2 @@
+extern const char *ata2utf[];
+extern const char *ics2utf[];
diff --git a/mkatables.pl b/mkatables.pl
new file mode 100644
index 0000000..1eb3a08
--- /dev/null
+++ b/mkatables.pl
@@ -0,0 +1,116 @@
+#!/usr/bin/perl -w
+
+%atascii = (
+ 0 => "♥",
+ 1 => "┣",
+ 2 => "┃",
+ 3 => "┛",
+ 4 => "┫",
+ 5 => "┓",
+ 6 => "╱",
+ 7 => "╲",
+ 8 => "◢",
+ 9 => "▗",
+ 10 => "◣",
+ 11 => "▝",
+ 12 => "▘",
+ 13 => "▔",
+ 14 => "▁",
+ 15 => "▖",
+ 16 => "♣",
+ 17 => "┏",
+ 18 => "━",
+ 19 => "╋",
+ 20 => "●",
+ 21 => "▄",
+ 22 => "▎",
+ 23 => "┳",
+ 24 => "┻",
+ 25 => "▌",
+ 26 => "┗",
+ 27 => "␛",
+ 28 => "↑",
+ 29 => "↓",
+ 30 => "←",
+ 31 => "→",
+ 34 => "\\\"",
+ 92 => "\\\\",
+ 96 => "◆",
+ 123 => "♠",
+ 125 => "↰",
+ 126 => "◀",
+ 127 => "▶",
+);
+
+%xl = (
+ 0 => "á",
+ 1 => "ù",
+ 2 => "Ñ",
+ 3 => "É",
+ 4 => "ç",
+ 5 => "ô",
+ 6 => "ò",
+ 7 => "ì",
+ 8 => "£",
+ 9 => "ï",
+ 10 => "ü",
+ 11 => "ä",
+ 12 => "Ö",
+ 13 => "ú",
+ 14 => "ó",
+ 15 => "ö",
+ 16 => "Ü",
+ 17 => "â",
+ 18 => "û",
+ 19 => "î",
+ 20 => "é",
+ 21 => "è",
+ 22 => "ñ",
+ 23 => "ê",
+ 24 => "ȧ",
+ 25 => "à",
+ 26 => "Ȧ",
+ 27 => "␛",
+ 28 => "↑",
+ 29 => "↓",
+ 30 => "←",
+ 31 => "→",
+ 34 => "\\\"",
+ 92 => "\\\\",
+ 96 => "¡",
+ 123 => "Ä",
+ 125 => "↰",
+ 126 => "◀",
+ 127 => "▶",
+);
+
+sub getcharname {
+ my $c = shift;
+ if($c == 127) {
+ return "[del]";
+ } elsif($c < 32) {
+ return "^" . chr($c + 64);
+ } else {
+ return chr($c);
+ }
+}
+
+sub mktable {
+ my ($name, $hash) = @_;
+
+ print "const char *$name\[\] = {\n";
+ for (0..127) {
+ my $cmt = sprintf("/* %3d \$%02x %5s */", $_, $_, getcharname($_));
+ print "\t\"" . ($hash->{$_} || chr($_)), "\", $cmt\n";
+ }
+ print "};\n\n";
+}
+
+print <<EOF;
+/* ATASCII to UTF-8 tables. Generated by mkatables.pl.
+ Do not edit this file; edit mkatables.pl instead. */
+
+EOF
+
+mktable("ata2utf", \%atascii);
+mktable("ics2utf", \%xl);
diff --git a/wtable.c b/wtable.c
new file mode 100644
index 0000000..3c008b3
--- /dev/null
+++ b/wtable.c
@@ -0,0 +1,140 @@
+/* ref:
+https://stackoverflow.com/questions/21737906/how-to-read-write-utf8-text-files-in-c
+*/
+
+#include <stdio.h>
+#include <wchar.h>
+#include "wtable.h"
+
+/*
+#define WSEARCH_DEBUG
+*/
+
+wint_t wchar2ata[][2] = {
+ /* Unicode, ATASCII */
+ { 0x2190, 0x1e },
+ { 0x2191, 0x1c },
+ { 0x2192, 0x1f },
+ { 0x2193, 0x1d },
+ { 0x21b0, 0x7d },
+ { 0x241b, 0x1b },
+ { 0x2501, 0x12 },
+ { 0x2503, 0x02 },
+ { 0x250f, 0x11 },
+ { 0x2513, 0x05 },
+ { 0x2517, 0x1a },
+ { 0x251b, 0x03 },
+ { 0x2523, 0x01 },
+ { 0x252b, 0x04 },
+ { 0x2533, 0x17 },
+ { 0x253b, 0x18 },
+ { 0x254b, 0x13 },
+ { 0x2571, 0x06 },
+ { 0x2572, 0x07 },
+ { 0x2581, 0x0e },
+ { 0x2584, 0x15 },
+ { 0x258c, 0x19 },
+ { 0x258e, 0x16 },
+ { 0x2594, 0x0d },
+ { 0x2596, 0x0f },
+ { 0x2597, 0x09 },
+ { 0x2598, 0x0c },
+ { 0x259d, 0x0b },
+ { 0x25b6, 0x7f },
+ { 0x25c0, 0x7e },
+ { 0x25c6, 0x60 },
+ { 0x25cf, 0x14 },
+ { 0x25e2, 0x08 },
+ { 0x25e3, 0x0a },
+ { 0x2660, 0x7b },
+ { 0x2663, 0x10 },
+ { 0x2665, 0x00 },
+};
+
+wint_t wchar2ics[][2] = {
+ /* Unicode, ATASCII */
+ { 0x00a1, 0x60 },
+ { 0x00a3, 0x08 },
+ { 0x00c4, 0x7b },
+ { 0x00c9, 0x03 },
+ { 0x00d1, 0x02 },
+ { 0x00d6, 0x0c },
+ { 0x00dc, 0x10 },
+ { 0x00e0, 0x19 },
+ { 0x00e1, 0x00 },
+ { 0x00e2, 0x11 },
+ { 0x00e4, 0x0b },
+ { 0x00e7, 0x04 },
+ { 0x00e8, 0x15 },
+ { 0x00e9, 0x14 },
+ { 0x00ea, 0x17 },
+ { 0x00ec, 0x07 },
+ { 0x00ee, 0x13 },
+ { 0x00ef, 0x09 },
+ { 0x00f1, 0x16 },
+ { 0x00f2, 0x06 },
+ { 0x00f3, 0x0e },
+ { 0x00f4, 0x05 },
+ { 0x00f6, 0x0f },
+ { 0x00f9, 0x01 },
+ { 0x00fa, 0x0d },
+ { 0x00fb, 0x12 },
+ { 0x00fc, 0x0a },
+ { 0x0226, 0x1a },
+ { 0x0227, 0x18 },
+ { 0x2190, 0x1e },
+ { 0x2191, 0x1c },
+ { 0x2192, 0x1f },
+ { 0x2193, 0x1d },
+ { 0x21b0, 0x7d },
+ { 0x241b, 0x1b },
+ { 0x25b6, 0x7f },
+ { 0x25c0, 0x7e },
+};
+
+static int tblsize = sizeof(wchar2ata) / sizeof(wchar2ata[0]);
+
+static wint_t wsearch(wint_t table[][2], wint_t target, int start, int end) {
+ wint_t *elem;
+ int center;
+
+#ifdef WSEARCH_DEBUG
+ fprintf(stderr, "wsearch(0x%04x, %d, %d)\n", target, start, end);
+#endif
+
+ if(start == end) {
+ if(table[start][0] == target)
+ return table[start][1];
+ else
+ return -1;
+ } else {
+ center = (start + end) / 2;
+ elem = table[center];
+
+#ifdef WSEARCH_DEBUG
+ fprintf(stderr, "elem = 0x%04x, 0x%02x\n", elem[0], elem[1]);
+#endif
+
+ if(elem[0] == target)
+ return elem[1];
+ else if(elem[0] > target)
+ return wsearch(table, target, start, center);
+ else
+ return wsearch(table, target, center + 1, end);
+ }
+}
+
+int wchar2atascii(wint_t wc, int ics) {
+ return wsearch((ics ? wchar2ics : wchar2ata), wc, 0, tblsize - 1);
+}
+
+#ifdef WSEARCH_DEBUG
+int main(int argc, char **argv) {
+ printf("%02x\n", wchar2atascii(0x2190, 0));
+ printf("%02x\n", wchar2atascii(0x2571, 0));
+ printf("%02x\n", wchar2atascii(0x25c6, 0));
+ printf("%02x\n", wchar2atascii(0x2665, 0));
+ printf("%02x\n", wchar2atascii(0x2510, 0));
+ return 0;
+}
+#endif
diff --git a/wtable.h b/wtable.h
new file mode 100644
index 0000000..11c5fa2
--- /dev/null
+++ b/wtable.h
@@ -0,0 +1,2 @@
+extern wint_t wchar2ata[][2];
+extern int wchar2atascii(wint_t wc, int ics);