9 files changed, 905 insertions, 2 deletions
diff --git a/Makefile b/Makefile
index 6b54cb9..4afaa47 100644
--- a/Makefile
+++ b/Makefile
@@ -16,9 +16,9 @@ CC=gcc
 CFLAGS=-Wall $(COPT) -ansi -D_GNU_SOURCE -DVERSION=\"$(VERSION)\"
 
 # BINS and SCRIPTS go in $BINDIR, DOCS go in $DOCDIR
-BINS=a8eol atr2xfd atrsize axe blob2c blob2xex cart2xex cxrefbas dumpbas fenders protbas renumbas rom2cart unmac65 unprotbas vxrefbas xex1to2 xexamine xexcat xexsplit xfd2atr listbas
+BINS=a8eol atr2xfd atrsize axe blob2c blob2xex cart2xex cxrefbas dumpbas fenders protbas renumbas rom2cart unmac65 unprotbas vxrefbas xex1to2 xexamine xexcat xexsplit xfd2atr listbas a8cat
 SCRIPTS=dasm2atasm a8utf8
-MANS=a8eol.1 xfd2atr.1 atr2xfd.1 blob2c.1 cart2xex.1 fenders.1 xexsplit.1 xexcat.1 atrsize.1 rom2cart.1 unmac65.1 axe.1 dasm2atasm.1 a8utf8.1 blob2xex.1 xexamine.1 xex1to2.1 unprotbas.1 protbas.1 renumbas.1 dumpbas.1 vxrefbas.1 cxrefbas.1 listbas.1
+MANS=a8eol.1 xfd2atr.1 atr2xfd.1 blob2c.1 cart2xex.1 fenders.1 xexsplit.1 xexcat.1 atrsize.1 rom2cart.1 unmac65.1 axe.1 dasm2atasm.1 a8utf8.1 blob2xex.1 xexamine.1 xex1to2.1 unprotbas.1 protbas.1 renumbas.1 dumpbas.1 vxrefbas.1 cxrefbas.1 listbas.1 a8cat.1
 MAN5S=xex.5
 MAN7S=atascii.7
 DOCS=README.txt equates.inc *.dasm LICENSE ksiders/atr.txt
@@ -66,6 +66,12 @@ listbas: listbas.c bas.o bcdfp.o tokens.o
 
 bas.o: bas.c bas.h
 
+wtable.o: wtable.c wtable.h
+
+atables.o: atables.c atables.h
+
+a8cat: a8cat.c atables.o wtable.o
+
 subdirs:
 	for dir in $(SUBDIRS); do make -C $$dir COPT="$(COPT)"; done
 
@@ -75,6 +81,9 @@ xfd2atr: xfd2atr.c
 
 atr2xfd: atr2xfd.c
 
+atables.c: mkatables.pl
+	perl mkatables.pl > atables.c
+
 # note to cross-compiler users: If you're building the *.bin targets,
 # blob2c needs to be executable on the build host. It'd also be nice
 # to build a blob2c for the target platform... Probably you can do
diff --git a/a8cat.1 b/a8cat.1
new file mode 100644
index 0000000..d815a9c
--- /dev/null
+++ b/a8cat.1
@@ -0,0 +1,124 @@
+.\" Man page generated from reStructuredText.
+.
+.
+.nr rst2man-indent-level 0
+.
+.de1 rstReportMargin
+\\$1 \\n[an-margin]
+level \\n[rst2man-indent-level]
+level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
+-
+\\n[rst2man-indent0]
+\\n[rst2man-indent1]
+\\n[rst2man-indent2]
+..
+.de1 INDENT
+.\" .rstReportMargin pre:
+. RS \\$1
+. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin]
+. nr rst2man-indent-level +1
+.\" .rstReportMargin post:
+..
+.de UNINDENT
+. RE
+.\" indent \\n[an-margin]
+.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.nr rst2man-indent-level -1
+.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
+.in \\n[rst2man-indent\\n[rst2man-indent-level]]u
+..
+.TH "A8CAT" 1 "2024-06-29" "0.2.1" "Urchlay's Atari 8-bit Tools"
+.SH NAME
+a8cat \- Convert Atari 8-bit text to UTF-8 encoded Unicode.
+.SH SYNOPSIS
+.sp
+\fIa8cat\fP [\fB\-r\fP] [\fB\-i\fP] [\fB\-u\fP] [\fB\-t\fP] [\fIinfile\fP] [\fIinfile ...\fP]
+.SH DESCRIPTION
+.sp
+Convert Atari 8\-bit ATASCII or XL ICS (International Character
+Set) text to UTF\-8 encoded Unicode. Control graphics characters are
+replaced with their nearest Unicode equivalents (mostly from the Box
+Drawing block, or from the Basic Latin block with \fB\-i\fP option).
+.sp
+If no \fIinfile\fPs are given, input is read from standard input. Output always
+goes to standard output; to write to a file, use a command like:
+.INDENT 0.0
+.INDENT 3.5
+.sp
+.nf
+.ft C
+a8cat atari.txt > converted.txt
+.ft P
+.fi
+.UNINDENT
+.UNINDENT
+.sp
+The output is plain UTF\-8 Unicode, without BOM.
+.sp
+Inverse video (characters codes above \fB$80\fP) are translated using
+the ANSI/VT\-100 reverse video escape sequences. Exception: \fB$9B\fP
+(Atari EOL) is translated to \fB\en\fP (newline).
+.SH OPTIONS
+.INDENT 0.0
+.TP
+.B  \-i
+Input uses Atari XL/XE International Character Set encoding, rather than
+ATASCII graphics.
+.TP
+.B  \-u
+Use "underlining" for inverse video. Each inverse character is followed by
+a backspace, then a \fI_\fP character. When viewed in a pager such as \fBless\fP(1),
+this causes the characters to appear underlined. Output created with this
+option cannot be converted back to ATASCII with the \fB\-r\fP option.
+.TP
+.B  \-t
+Text mode. Normally, everything but EOL (\fB$9B\fP) is converted to a
+Unicode graphics character. In text mode, ATASCII tabs, backspace,
+and bells are translated to the ASCII versions.
+.TP
+.B  \-r
+Reverse conversion: Input is UTF\-8, output is ATASCII (or XL ICS, with \fB\-i\fP).
+Beware that printing ATASCII to a terminal may look funny, and may even confuse
+the terminal. Redirecting to a file is safe.
+.UNINDENT
+.SH COPYRIGHT
+.sp
+WTFPL. See \fI\%http://www.wtfpl.net/txt/copying/\fP for details.
+.SH AUTHOR
+.INDENT 0.0
+.IP B. 3
+Watson <\fI\%urchlay@slackware.uk\fP>; Urchlay on irc.libera.chat \fI##atari\fP\&.
+.UNINDENT
+.SH SEE ALSO
+.sp
+\fBa8eol\fP(1),
+\fBa8utf8\fP(1),
+\fBatr2xfd\fP(1),
+\fBatrsize\fP(1),
+\fBaxe\fP(1),
+\fBblob2c\fP(1),
+\fBblob2xex\fP(1),
+\fBcart2xex\fP(1),
+\fBcxrefbas\fP(1),
+\fBdasm2atasm\fP(1),
+\fBdumpbas\fP(1),
+\fBf2toxex\fP(1),
+\fBfenders\fP(1),
+\fBlistbas\fP(1),
+\fBprotbas\fP(1),
+\fBrenumbas\fP(1),
+\fBrom2cart\fP(1),
+\fBunmac65\fP(1),
+\fBunprotbas\fP(1),
+\fBvxrefbas\fP(1),
+\fBxexamine\fP(1),
+\fBxexcat\fP(1),
+\fBxexsplit\fP(1),
+\fBxfd2atr\fP(1),
+\fBxex\fP(5),
+\fBatascii\fP(7).
+.sp
+Any good Atari 8\-bit book: \fIDe Re Atari\fP, \fIThe Atari BASIC  Reference
+Manual\fP,  the  \fIOS Users\(aq Guide\fP, \fIMapping the Atari\fP, etc.
+.\" Generated by docutils manpage writer.
+.
diff --git a/a8cat.c b/a8cat.c
new file mode 100644
index 0000000..752e96f
--- /dev/null
+++ b/a8cat.c
@@ -0,0 +1,187 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <locale.h>
+#include <wchar.h>
+#include <errno.h>
+
+#include "atables.h"
+#include "wtable.h"
+
+const char **table = ata2utf;
+
+const char *inverse_on  = "\x1b[7m";
+const char *inverse_off = "\x1b[0m";
+
+int underline = 0, reverse = 0, textmode = 0, ics = 0;
+
+void print_help(void) {
+	printf("Usage: a8cat [-i] [-u] [file ...]\n");
+}
+
+FILE *open_input(const char *file) {
+	FILE *input;
+
+	if(file[0] == '-' && file[1] == 0) {
+		if(freopen(NULL, "rb", stdin)) {
+			input = stdin;
+		} else {
+			perror("(standard input)");
+			return NULL;
+		}
+	} else if(!(input = fopen(file, "rb"))) {
+		perror(file);
+		return NULL;
+	}
+
+	return input;
+}
+
+int handle_escape_seq(int inv, FILE *input) {
+	int count, c;
+	char buf[5] = { 0x1b, 0, 0, 0, 0 };
+
+	for(count = 1; count < 4; count++) {
+		c = fgetwc(input);
+		if(c == WEOF) break;
+		buf[count] = c; /* FIXME: might be a wide char! */
+	}
+
+	if(strcmp(inverse_on, buf) == 0) {
+		return 0x80;
+	} else if(strcmp(inverse_off, buf) == 0) {
+		return 0;
+	} else {
+		fputs(buf, stdout);
+		return inv;
+	}
+}
+
+int a8revcat(const char *file) {
+	FILE *input;
+	int c, d, inv = 0;
+
+	if( !(input = open_input(file)) )
+		return 1;
+
+	setlocale(LC_CTYPE, "en_US.UTF-8");
+	while( (c = fgetwc(input)) != WEOF ) {
+		if(c == 0x1b) {
+			inv = handle_escape_seq(inv, input);
+		} else if(c == '\n') {
+			putchar(0x9b);
+		} else if(c < 0x80) {
+			putchar(c | inv);
+		} else {
+			d = wchar2atascii(c, ics);
+			if(d == -1) {
+				fprintf(stderr, "warning: unrecognized Unicode character %04x\n", c);
+			} else {
+				putchar(d | inv);
+			}
+		}
+	}
+
+	return 0;
+}
+
+/* XXX: hard-coded ANSI/vt100 escape sequences. would be
+   better but more complex to use terminfo to support any ol'
+   terminal... */
+void inverse(int onoff) {
+	fputs((onoff ? inverse_on : inverse_off ), stdout);
+}
+
+int a8cat(const char *file) {
+	FILE *input;
+	int c, inv = 0;
+
+	if( !(input = open_input(file)) )
+		return 1;
+
+	while( (c = fgetc(input)) != EOF ) {
+		if(c == 0x9b) {
+			putchar('\n');
+			continue;
+		}
+
+		if(textmode) {
+			switch(c) {
+				case 0x09: /* Atari TAB is same as ASCII */
+					putchar('\t');
+					continue;
+				case 0xfd: /* bell */
+					putchar('\a');
+					continue;
+				case 0x7e: /* backspace */
+					putchar('\b');
+					continue;
+				default: break;
+			}
+		}
+
+		if(!underline) {
+			/* strings of inverse chars only get one "inverse on" ANSI
+			   sequence, and one "inverse off" afterwards. */
+			if(c & 0x80) {
+				if(!inv) {
+					inv = 1;
+					inverse(1);
+				}
+			} else {
+				if(inv) {
+					inv = 0;
+					inverse(0);
+				}
+			}
+		}
+
+		fputs(table[c & 0x7f], stdout);
+
+		if(underline && (c & 0x80)) {
+			putchar('\b');
+			putchar('_');
+		}
+	}
+
+	/* gotta turn off inverse, so if there's another file after this one,
+	   it doesn't start out being printed in inverse. */
+	if(inv && !underline) inverse(0);
+
+	fclose(input);
+	return 0;
+}
+
+int main(int argc, char **argv) {
+	int opt, result = 0;
+
+	while( (opt = getopt(argc, argv, "ihurt")) != -1) {
+		switch(opt) {
+			case 'i': table = ics2utf; ics = 1; break;
+			case 'h': print_help(); exit(0); break;
+			case 'u': underline = 1; break;
+			case 'r': reverse = 1; break;
+			case 't': textmode = 1; break;
+			default: print_help(); exit(1); break;
+		}
+	}
+
+	if(reverse) {
+		if(underline || textmode) {
+			fprintf(stderr, "-t and -u options don't make sense with -r.\n");
+			exit(1);
+		}
+	}
+
+	if(optind >= argc) {
+		result = (reverse ? a8revcat("-") : a8cat("-"));
+	} else {
+		while(optind < argc) {
+			result += (reverse ? a8revcat(argv[optind]) : a8cat(argv[optind]));
+			optind++;
+		}
+	}
+
+	exit(result);
+}
diff --git a/a8cat.rst b/a8cat.rst
new file mode 100644
index 0000000..7557c01
--- /dev/null
+++ b/a8cat.rst
@@ -0,0 +1,58 @@
+=====
+a8cat
+=====
+
+--------------------------------------------------
+Convert Atari 8-bit text to UTF-8 encoded Unicode.
+--------------------------------------------------
+
+.. include:: manhdr.rst
+
+SYNOPSIS
+========
+
+*a8cat* [**-r**] [**-i**] [**-u**] [**-t**] [*infile*] [*infile ...*]
+
+DESCRIPTION
+===========
+
+Convert Atari 8-bit ATASCII or XL ICS (International Character
+Set) text to UTF-8 encoded Unicode. Control graphics characters are
+replaced with their nearest Unicode equivalents (mostly from the Box
+Drawing block, or from the Basic Latin block with **-i** option).
+
+If no *infile*\s are given, input is read from standard input. Output always
+goes to standard output; to write to a file, use a command like::
+
+  a8cat atari.txt > converted.txt
+
+The output is plain UTF-8 Unicode, without BOM.
+
+Inverse video (characters codes above **$80**) are translated using
+the ANSI/VT-100 reverse video escape sequences. Exception: **$9B**
+(Atari EOL) is translated to **\\n** (newline).
+
+OPTIONS
+=======
+
+-i
+  Input uses Atari XL/XE International Character Set encoding, rather than
+  ATASCII graphics.
+
+-u
+  Use "underlining" for inverse video. Each inverse character is followed by
+  a backspace, then a *_* character. When viewed in a pager such as **less**\(1),
+  this causes the characters to appear underlined. Output created with this
+  option cannot be converted back to ATASCII with the **-r** option.
+
+-t
+  Text mode. Normally, everything but EOL (**$9B**) is converted to a
+  Unicode graphics character. In text mode, ATASCII tabs, backspace,
+  and bells are translated to the ASCII versions.
+
+-r
+  Reverse conversion: Input is UTF-8, output is ATASCII (or XL ICS, with **-i**).
+  Beware that printing ATASCII to a terminal may look funny, and may even confuse
+  the terminal. Redirecting to a file is safe.
+
+.. include:: manftr.rst
diff --git a/atables.c b/atables.c
new file mode 100644
index 0000000..ea6eedc
--- /dev/null
+++ b/atables.c
@@ -0,0 +1,265 @@
+/* ATASCII to UTF-8 tables. Generated by mkatables.pl.
+   Do not edit this file; edit mkatables.pl instead. */
+
+const char *ata2utf[] = {
+	"♥",  /*   0 $00    ^@ */
+	"┣",  /*   1 $01    ^A */
+	"┃",  /*   2 $02    ^B */
+	"┛",  /*   3 $03    ^C */
+	"┫",  /*   4 $04    ^D */
+	"┓",  /*   5 $05    ^E */
+	"╱",  /*   6 $06    ^F */
+	"╲",  /*   7 $07    ^G */
+	"◢",  /*   8 $08    ^H */
+	"▗",  /*   9 $09    ^I */
+	"◣",  /*  10 $0a    ^J */
+	"▝",  /*  11 $0b    ^K */
+	"▘",  /*  12 $0c    ^L */
+	"▔",  /*  13 $0d    ^M */
+	"▁",  /*  14 $0e    ^N */
+	"▖",  /*  15 $0f    ^O */
+	"♣",  /*  16 $10    ^P */
+	"┏",  /*  17 $11    ^Q */
+	"━",  /*  18 $12    ^R */
+	"╋",  /*  19 $13    ^S */
+	"●",  /*  20 $14    ^T */
+	"▄",  /*  21 $15    ^U */
+	"▎",  /*  22 $16    ^V */
+	"┳",  /*  23 $17    ^W */
+	"┻",  /*  24 $18    ^X */
+	"▌",  /*  25 $19    ^Y */
+	"┗",  /*  26 $1a    ^Z */
+	"␛",  /*  27 $1b    ^[ */
+	"↑",  /*  28 $1c    ^\ */
+	"↓",  /*  29 $1d    ^] */
+	"←",  /*  30 $1e    ^^ */
+	"→",  /*  31 $1f    ^_ */
+	" ",  /*  32 $20       */
+	"!",  /*  33 $21     ! */
+	"\"",  /*  34 $22     " */
+	"#",  /*  35 $23     # */
+	"$",  /*  36 $24     $ */
+	"%",  /*  37 $25     % */
+	"&",  /*  38 $26     & */
+	"'",  /*  39 $27     ' */
+	"(",  /*  40 $28     ( */
+	")",  /*  41 $29     ) */
+	"*",  /*  42 $2a     * */
+	"+",  /*  43 $2b     + */
+	",",  /*  44 $2c     , */
+	"-",  /*  45 $2d     - */
+	".",  /*  46 $2e     . */
+	"/",  /*  47 $2f     / */
+	"0",  /*  48 $30     0 */
+	"1",  /*  49 $31     1 */
+	"2",  /*  50 $32     2 */
+	"3",  /*  51 $33     3 */
+	"4",  /*  52 $34     4 */
+	"5",  /*  53 $35     5 */
+	"6",  /*  54 $36     6 */
+	"7",  /*  55 $37     7 */
+	"8",  /*  56 $38     8 */
+	"9",  /*  57 $39     9 */
+	":",  /*  58 $3a     : */
+	";",  /*  59 $3b     ; */
+	"<",  /*  60 $3c     < */
+	"=",  /*  61 $3d     = */
+	">",  /*  62 $3e     > */
+	"?",  /*  63 $3f     ? */
+	"@",  /*  64 $40     @ */
+	"A",  /*  65 $41     A */
+	"B",  /*  66 $42     B */
+	"C",  /*  67 $43     C */
+	"D",  /*  68 $44     D */
+	"E",  /*  69 $45     E */
+	"F",  /*  70 $46     F */
+	"G",  /*  71 $47     G */
+	"H",  /*  72 $48     H */
+	"I",  /*  73 $49     I */
+	"J",  /*  74 $4a     J */
+	"K",  /*  75 $4b     K */
+	"L",  /*  76 $4c     L */
+	"M",  /*  77 $4d     M */
+	"N",  /*  78 $4e     N */
+	"O",  /*  79 $4f     O */
+	"P",  /*  80 $50     P */
+	"Q",  /*  81 $51     Q */
+	"R",  /*  82 $52     R */
+	"S",  /*  83 $53     S */
+	"T",  /*  84 $54     T */
+	"U",  /*  85 $55     U */
+	"V",  /*  86 $56     V */
+	"W",  /*  87 $57     W */
+	"X",  /*  88 $58     X */
+	"Y",  /*  89 $59     Y */
+	"Z",  /*  90 $5a     Z */
+	"[",  /*  91 $5b     [ */
+	"\\",  /*  92 $5c     \ */
+	"]",  /*  93 $5d     ] */
+	"^",  /*  94 $5e     ^ */
+	"_",  /*  95 $5f     _ */
+	"◆",  /*  96 $60     ` */
+	"a",  /*  97 $61     a */
+	"b",  /*  98 $62     b */
+	"c",  /*  99 $63     c */
+	"d",  /* 100 $64     d */
+	"e",  /* 101 $65     e */
+	"f",  /* 102 $66     f */
+	"g",  /* 103 $67     g */
+	"h",  /* 104 $68     h */
+	"i",  /* 105 $69     i */
+	"j",  /* 106 $6a     j */
+	"k",  /* 107 $6b     k */
+	"l",  /* 108 $6c     l */
+	"m",  /* 109 $6d     m */
+	"n",  /* 110 $6e     n */
+	"o",  /* 111 $6f     o */
+	"p",  /* 112 $70     p */
+	"q",  /* 113 $71     q */
+	"r",  /* 114 $72     r */
+	"s",  /* 115 $73     s */
+	"t",  /* 116 $74     t */
+	"u",  /* 117 $75     u */
+	"v",  /* 118 $76     v */
+	"w",  /* 119 $77     w */
+	"x",  /* 120 $78     x */
+	"y",  /* 121 $79     y */
+	"z",  /* 122 $7a     z */
+	"♠",  /* 123 $7b     { */
+	"|",  /* 124 $7c     | */
+	"↰",  /* 125 $7d     } */
+	"◀",  /* 126 $7e     ~ */
+	"▶",  /* 127 $7f [del] */
+};
+
+const char *ics2utf[] = {
+	"á",  /*   0 $00    ^@ */
+	"ù",  /*   1 $01    ^A */
+	"Ñ",  /*   2 $02    ^B */
+	"É",  /*   3 $03    ^C */
+	"ç",  /*   4 $04    ^D */
+	"ô",  /*   5 $05    ^E */
+	"ò",  /*   6 $06    ^F */
+	"ì",  /*   7 $07    ^G */
+	"£",  /*   8 $08    ^H */
+	"ï",  /*   9 $09    ^I */
+	"ü",  /*  10 $0a    ^J */
+	"ä",  /*  11 $0b    ^K */
+	"Ö",  /*  12 $0c    ^L */
+	"ú",  /*  13 $0d    ^M */
+	"ó",  /*  14 $0e    ^N */
+	"ö",  /*  15 $0f    ^O */
+	"Ü",  /*  16 $10    ^P */
+	"â",  /*  17 $11    ^Q */
+	"û",  /*  18 $12    ^R */
+	"î",  /*  19 $13    ^S */
+	"é",  /*  20 $14    ^T */
+	"è",  /*  21 $15    ^U */
+	"ñ",  /*  22 $16    ^V */
+	"ê",  /*  23 $17    ^W */
+	"ȧ",  /*  24 $18    ^X */
+	"à",  /*  25 $19    ^Y */
+	"Ȧ",  /*  26 $1a    ^Z */
+	"␛",  /*  27 $1b    ^[ */
+	"↑",  /*  28 $1c    ^\ */
+	"↓",  /*  29 $1d    ^] */
+	"←",  /*  30 $1e    ^^ */
+	"→",  /*  31 $1f    ^_ */
+	" ",  /*  32 $20       */
+	"!",  /*  33 $21     ! */
+	"\"",  /*  34 $22     " */
+	"#",  /*  35 $23     # */
+	"$",  /*  36 $24     $ */
+	"%",  /*  37 $25     % */
+	"&",  /*  38 $26     & */
+	"'",  /*  39 $27     ' */
+	"(",  /*  40 $28     ( */
+	")",  /*  41 $29     ) */
+	"*",  /*  42 $2a     * */
+	"+",  /*  43 $2b     + */
+	",",  /*  44 $2c     , */
+	"-",  /*  45 $2d     - */
+	".",  /*  46 $2e     . */
+	"/",  /*  47 $2f     / */
+	"0",  /*  48 $30     0 */
+	"1",  /*  49 $31     1 */
+	"2",  /*  50 $32     2 */
+	"3",  /*  51 $33     3 */
+	"4",  /*  52 $34     4 */
+	"5",  /*  53 $35     5 */
+	"6",  /*  54 $36     6 */
+	"7",  /*  55 $37     7 */
+	"8",  /*  56 $38     8 */
+	"9",  /*  57 $39     9 */
+	":",  /*  58 $3a     : */
+	";",  /*  59 $3b     ; */
+	"<",  /*  60 $3c     < */
+	"=",  /*  61 $3d     = */
+	">",  /*  62 $3e     > */
+	"?",  /*  63 $3f     ? */
+	"@",  /*  64 $40     @ */
+	"A",  /*  65 $41     A */
+	"B",  /*  66 $42     B */
+	"C",  /*  67 $43     C */
+	"D",  /*  68 $44     D */
+	"E",  /*  69 $45     E */
+	"F",  /*  70 $46     F */
+	"G",  /*  71 $47     G */
+	"H",  /*  72 $48     H */
+	"I",  /*  73 $49     I */
+	"J",  /*  74 $4a     J */
+	"K",  /*  75 $4b     K */
+	"L",  /*  76 $4c     L */
+	"M",  /*  77 $4d     M */
+	"N",  /*  78 $4e     N */
+	"O",  /*  79 $4f     O */
+	"P",  /*  80 $50     P */
+	"Q",  /*  81 $51     Q */
+	"R",  /*  82 $52     R */
+	"S",  /*  83 $53     S */
+	"T",  /*  84 $54     T */
+	"U",  /*  85 $55     U */
+	"V",  /*  86 $56     V */
+	"W",  /*  87 $57     W */
+	"X",  /*  88 $58     X */
+	"Y",  /*  89 $59     Y */
+	"Z",  /*  90 $5a     Z */
+	"[",  /*  91 $5b     [ */
+	"\\",  /*  92 $5c     \ */
+	"]",  /*  93 $5d     ] */
+	"^",  /*  94 $5e     ^ */
+	"_",  /*  95 $5f     _ */
+	"¡",  /*  96 $60     ` */
+	"a",  /*  97 $61     a */
+	"b",  /*  98 $62     b */
+	"c",  /*  99 $63     c */
+	"d",  /* 100 $64     d */
+	"e",  /* 101 $65     e */
+	"f",  /* 102 $66     f */
+	"g",  /* 103 $67     g */
+	"h",  /* 104 $68     h */
+	"i",  /* 105 $69     i */
+	"j",  /* 106 $6a     j */
+	"k",  /* 107 $6b     k */
+	"l",  /* 108 $6c     l */
+	"m",  /* 109 $6d     m */
+	"n",  /* 110 $6e     n */
+	"o",  /* 111 $6f     o */
+	"p",  /* 112 $70     p */
+	"q",  /* 113 $71     q */
+	"r",  /* 114 $72     r */
+	"s",  /* 115 $73     s */
+	"t",  /* 116 $74     t */
+	"u",  /* 117 $75     u */
+	"v",  /* 118 $76     v */
+	"w",  /* 119 $77     w */
+	"x",  /* 120 $78     x */
+	"y",  /* 121 $79     y */
+	"z",  /* 122 $7a     z */
+	"Ä",  /* 123 $7b     { */
+	"|",  /* 124 $7c     | */
+	"↰",  /* 125 $7d     } */
+	"◀",  /* 126 $7e     ~ */
+	"▶",  /* 127 $7f [del] */
+};
+
diff --git a/atables.h b/atables.h
new file mode 100644
index 0000000..56e6c34
--- /dev/null
+++ b/atables.h
@@ -0,0 +1,2 @@
+extern const char *ata2utf[];
+extern const char *ics2utf[];
diff --git a/mkatables.pl b/mkatables.pl
new file mode 100644
index 0000000..1eb3a08
--- /dev/null
+++ b/mkatables.pl
@@ -0,0 +1,116 @@
+#!/usr/bin/perl -w
+
+%atascii = (
+	0 => "♥",
+	1 => "┣",
+	2 => "┃",
+	3 => "┛",
+	4 => "┫",
+	5 => "┓",
+	6 => "╱",
+	7 => "╲",
+	8 => "◢",
+	9 => "▗",
+	10 => "◣",
+	11 => "▝",
+	12 => "▘",
+	13 => "▔",
+	14 => "▁",
+	15 => "▖",
+	16 => "♣",
+	17 => "┏",
+	18 => "━",
+	19 => "╋",
+	20 => "●",
+	21 => "▄",
+	22 => "▎",
+	23 => "┳",
+	24 => "┻",
+	25 => "▌",
+	26 => "┗",
+	27 => "␛",
+	28 => "↑",
+	29 => "↓",
+	30 => "←",
+	31 => "→",
+	34 => "\\\"",
+	92 => "\\\\",
+	96 => "◆",
+	123 => "♠",
+	125 => "↰",
+	126 => "◀",
+	127 => "▶",
+);
+
+%xl = (
+	0 => "á",
+	1 => "ù",
+	2 => "Ñ",
+	3 => "É",
+	4 => "ç",
+	5 => "ô",
+	6 => "ò",
+	7 => "ì",
+	8 => "£",
+	9 => "ï",
+	10 => "ü",
+	11 => "ä",
+	12 => "Ö",
+	13 => "ú",
+	14 => "ó",
+	15 => "ö",
+	16 => "Ü",
+	17 => "â",
+	18 => "û",
+	19 => "î",
+	20 => "é",
+	21 => "è",
+	22 => "ñ",
+	23 => "ê",
+	24 => "ȧ",
+	25 => "à",
+	26 => "Ȧ",
+	27 => "␛",
+	28 => "↑",
+	29 => "↓",
+	30 => "←",
+	31 => "→",
+	34 => "\\\"",
+	92 => "\\\\",
+	96 => "¡",
+	123 => "Ä",
+	125 => "↰",
+	126 => "◀",
+	127 => "▶",
+);
+
+sub getcharname {
+	my $c = shift;
+	if($c == 127) {
+		return "[del]";
+	} elsif($c < 32) {
+		return "^" . chr($c + 64);
+	} else {
+		return chr($c);
+	}
+}
+
+sub mktable {
+	my ($name, $hash) = @_;
+
+	print "const char *$name\[\] = {\n";
+	for (0..127) {
+		my $cmt = sprintf("/* %3d \$%02x %5s */", $_, $_, getcharname($_));
+		print "\t\"" . ($hash->{$_} || chr($_)), "\",  $cmt\n";
+	}
+	print "};\n\n";
+}
+
+print <<EOF;
+/* ATASCII to UTF-8 tables. Generated by mkatables.pl.
+   Do not edit this file; edit mkatables.pl instead. */
+
+EOF
+
+mktable("ata2utf", \%atascii);
+mktable("ics2utf", \%xl);
diff --git a/wtable.c b/wtable.c
new file mode 100644
index 0000000..3c008b3
--- /dev/null
+++ b/wtable.c
@@ -0,0 +1,140 @@
+/* ref:
+https://stackoverflow.com/questions/21737906/how-to-read-write-utf8-text-files-in-c
+*/
+
+#include <stdio.h>
+#include <wchar.h>
+#include "wtable.h"
+
+/*
+#define WSEARCH_DEBUG
+*/
+
+wint_t wchar2ata[][2] = {
+	/* Unicode, ATASCII */
+	{ 0x2190, 0x1e },
+	{ 0x2191, 0x1c },
+	{ 0x2192, 0x1f },
+	{ 0x2193, 0x1d },
+	{ 0x21b0, 0x7d },
+	{ 0x241b, 0x1b },
+	{ 0x2501, 0x12 },
+	{ 0x2503, 0x02 },
+	{ 0x250f, 0x11 },
+	{ 0x2513, 0x05 },
+	{ 0x2517, 0x1a },
+	{ 0x251b, 0x03 },
+	{ 0x2523, 0x01 },
+	{ 0x252b, 0x04 },
+	{ 0x2533, 0x17 },
+	{ 0x253b, 0x18 },
+	{ 0x254b, 0x13 },
+	{ 0x2571, 0x06 },
+	{ 0x2572, 0x07 },
+	{ 0x2581, 0x0e },
+	{ 0x2584, 0x15 },
+	{ 0x258c, 0x19 },
+	{ 0x258e, 0x16 },
+	{ 0x2594, 0x0d },
+	{ 0x2596, 0x0f },
+	{ 0x2597, 0x09 },
+	{ 0x2598, 0x0c },
+	{ 0x259d, 0x0b },
+	{ 0x25b6, 0x7f },
+	{ 0x25c0, 0x7e },
+	{ 0x25c6, 0x60 },
+	{ 0x25cf, 0x14 },
+	{ 0x25e2, 0x08 },
+	{ 0x25e3, 0x0a },
+	{ 0x2660, 0x7b },
+	{ 0x2663, 0x10 },
+	{ 0x2665, 0x00 },
+};
+
+wint_t wchar2ics[][2] = {
+	/* Unicode, ATASCII */
+	{ 0x00a1, 0x60 },
+	{ 0x00a3, 0x08 },
+	{ 0x00c4, 0x7b },
+	{ 0x00c9, 0x03 },
+	{ 0x00d1, 0x02 },
+	{ 0x00d6, 0x0c },
+	{ 0x00dc, 0x10 },
+	{ 0x00e0, 0x19 },
+	{ 0x00e1, 0x00 },
+	{ 0x00e2, 0x11 },
+	{ 0x00e4, 0x0b },
+	{ 0x00e7, 0x04 },
+	{ 0x00e8, 0x15 },
+	{ 0x00e9, 0x14 },
+	{ 0x00ea, 0x17 },
+	{ 0x00ec, 0x07 },
+	{ 0x00ee, 0x13 },
+	{ 0x00ef, 0x09 },
+	{ 0x00f1, 0x16 },
+	{ 0x00f2, 0x06 },
+	{ 0x00f3, 0x0e },
+	{ 0x00f4, 0x05 },
+	{ 0x00f6, 0x0f },
+	{ 0x00f9, 0x01 },
+	{ 0x00fa, 0x0d },
+	{ 0x00fb, 0x12 },
+	{ 0x00fc, 0x0a },
+	{ 0x0226, 0x1a },
+	{ 0x0227, 0x18 },
+	{ 0x2190, 0x1e },
+	{ 0x2191, 0x1c },
+	{ 0x2192, 0x1f },
+	{ 0x2193, 0x1d },
+	{ 0x21b0, 0x7d },
+	{ 0x241b, 0x1b },
+	{ 0x25b6, 0x7f },
+	{ 0x25c0, 0x7e },
+};
+
+static int tblsize = sizeof(wchar2ata) / sizeof(wchar2ata[0]);
+
+static wint_t wsearch(wint_t table[][2], wint_t target, int start, int end) {
+	wint_t *elem;
+	int center;
+
+#ifdef WSEARCH_DEBUG
+	fprintf(stderr, "wsearch(0x%04x, %d, %d)\n", target, start, end);
+#endif
+
+	if(start == end) {
+		if(table[start][0] == target)
+			return table[start][1];
+		else
+			return -1;
+	} else {
+		center = (start + end) / 2;
+		elem = table[center];
+
+#ifdef WSEARCH_DEBUG
+		fprintf(stderr, "elem = 0x%04x, 0x%02x\n", elem[0], elem[1]);
+#endif
+
+		if(elem[0] == target)
+			return elem[1];
+		else if(elem[0] > target)
+			return wsearch(table, target, start, center);
+		else
+			return wsearch(table, target, center + 1, end);
+	}
+}
+
+int wchar2atascii(wint_t wc, int ics) {
+	return wsearch((ics ? wchar2ics : wchar2ata), wc, 0, tblsize - 1);
+}
+
+#ifdef WSEARCH_DEBUG
+int main(int argc, char **argv) {
+	printf("%02x\n", wchar2atascii(0x2190, 0));
+	printf("%02x\n", wchar2atascii(0x2571, 0));
+	printf("%02x\n", wchar2atascii(0x25c6, 0));
+	printf("%02x\n", wchar2atascii(0x2665, 0));
+	printf("%02x\n", wchar2atascii(0x2510, 0));
+	return 0;
+}
+#endif
diff --git a/wtable.h b/wtable.h
new file mode 100644
index 0000000..11c5fa2
--- /dev/null
+++ b/wtable.h
@@ -0,0 +1,2 @@
+extern wint_t wchar2ata[][2];
+extern int wchar2atascii(wint_t wc, int ics);