From e79bf42c2fa43f131f79d0891694e59434f820e9 Mon Sep 17 00:00:00 2001 From: "B. Watson" Date: Sat, 29 Jun 2024 05:58:21 -0400 Subject: a8cat: handle ASCII tab/backspace/bell in -r mode. --- a8cat.1 | 23 +++++++++++++++-------- a8cat.c | 6 ++++++ a8cat.rst | 27 +++++++++++++++++---------- 3 files changed, 38 insertions(+), 18 deletions(-) diff --git a/a8cat.1 b/a8cat.1 index b6d85a4..42f2017 100644 --- a/a8cat.1 +++ b/a8cat.1 @@ -29,7 +29,7 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] .. .TH "A8CAT" 1 "2024-06-29" "0.2.1" "Urchlay's Atari 8-bit Tools" .SH NAME -a8cat \- Convert Atari 8-bit text to UTF-8 encoded Unicode. +a8cat \- Convert Atari 8-bit text to UTF-8 encoded Unicode (and back). .SH SYNOPSIS .sp \fIa8cat\fP [\fB\-r\fP] [\fB\-i\fP] [\fB\-u\fP] [\fB\-t\fP] [\fIinfile\fP] [\fIinfile ...\fP] @@ -53,13 +53,17 @@ a8cat atari.txt > converted.txt .UNINDENT .UNINDENT .sp -The output is plain UTF\-8 Unicode, without BOM. It will display correctly -in modern terminals that support Unicode and UTF\-8. If you get lots of -"empty rectangle" characters, it means your font lacks the glyphs for -the codepoints; try using the Deja Vu Sans and/or Symbola fonts. +The output is UTF\-8 Unicode, without BOM, but possibly with +ANSI/VT\-100 control sequences. It will display correctly in modern +terminals that support Unicode and UTF\-8. If you get lots of "empty +rectangle" characters, it means your font lacks the glyphs for the +codepoints; try using the Deja Vu Sans and/or Symbola fonts. +.sp +If you want to pipe the output to a pager, \fBless \-MR\fP is +recommended. It will display the inverse characters correctly. .sp It\(aqs even possible to edit the converted text and turn it back into -ATASCII, if you\(aqre very careful. +ATASCII, if you\(aqre very careful. See the \fB\-r\fP option, below. .sp Inverse video (characters codes above \fB$80\fP) are translated using the ANSI/VT\-100 reverse video escape sequences. Exception: \fB$9B\fP @@ -80,8 +84,7 @@ option cannot be converted back to ATASCII with the \fB\-r\fP option. .B \-t Text mode. Normally, everything but EOL (\fB$9B\fP) is converted to a Unicode graphics character. In text mode, ATASCII tabs, backspace, -and bells are translated to the ASCII versions. Output created with this -option cannot be converted back to ATASCII with the \fB\-r\fP option. +and bells are translated to the ASCII versions. .TP .B \-r Reverse conversion: Input is UTF\-8, output is ATASCII (or XL ICS, with \fB\-i\fP). @@ -106,6 +109,10 @@ be any inverse\-video characters in the result. .UNINDENT .INDENT 0.0 .TP +.B \fB\-\-\fP +End of options; the rest of the arguments are filenames. Use this if you\(aqre +trying to work with files whose names begin with \fI\-\fP\&. +.TP .B \fB\-h\fP, \fB\-\-help\fP Show built\-in help and exit. .UNINDENT diff --git a/a8cat.c b/a8cat.c index 6395a26..35d8535 100644 --- a/a8cat.c +++ b/a8cat.c @@ -72,6 +72,12 @@ int a8revcat(const char *file) { inv = handle_escape_seq(inv, input); } else if(c == '\n') { putchar(0x9b); + } else if(c == '\t') { + putchar(0x7f); + } else if(c == '\b') { + putchar(0x7e); + } else if(c == '\a') { + putchar(0xfd); } else if(c < 0x80) { putchar(c | inv); } else { diff --git a/a8cat.rst b/a8cat.rst index 68a4297..7b91fa9 100644 --- a/a8cat.rst +++ b/a8cat.rst @@ -2,9 +2,9 @@ a8cat ===== --------------------------------------------------- -Convert Atari 8-bit text to UTF-8 encoded Unicode. --------------------------------------------------- +------------------------------------------------------------- +Convert Atari 8-bit text to UTF-8 encoded Unicode (and back). +------------------------------------------------------------- .. include:: manhdr.rst @@ -26,13 +26,17 @@ goes to standard output; to write to a file, use a command like:: a8cat atari.txt > converted.txt -The output is plain UTF-8 Unicode, without BOM. It will display correctly -in modern terminals that support Unicode and UTF-8. If you get lots of -"empty rectangle" characters, it means your font lacks the glyphs for -the codepoints; try using the Deja Vu Sans and/or Symbola fonts. +The output is UTF-8 Unicode, without BOM, but possibly with +ANSI/VT-100 control sequences. It will display correctly in modern +terminals that support Unicode and UTF-8. If you get lots of "empty +rectangle" characters, it means your font lacks the glyphs for the +codepoints; try using the Deja Vu Sans and/or Symbola fonts. + +If you want to pipe the output to a pager, **less -MR** is +recommended. It will display the inverse characters correctly. It's even possible to edit the converted text and turn it back into -ATASCII, if you're very careful. +ATASCII, if you're very careful. See the **-r** option, below. Inverse video (characters codes above **$80**) are translated using the ANSI/VT-100 reverse video escape sequences. Exception: **$9B** @@ -54,8 +58,7 @@ OPTIONS -t Text mode. Normally, everything but EOL (**$9B**) is converted to a Unicode graphics character. In text mode, ATASCII tabs, backspace, - and bells are translated to the ASCII versions. Output created with this - option cannot be converted back to ATASCII with the **-r** option. + and bells are translated to the ASCII versions. -r Reverse conversion: Input is UTF-8, output is ATASCII (or XL ICS, with **-i**). @@ -78,6 +81,10 @@ OPTIONS back to ATASCII with the **-r** option, but of course there won't be any inverse-video characters in the result. +**--** + End of options; the rest of the arguments are filenames. Use this if you're + trying to work with files whose names begin with *-*. + **-h**, **--help** Show built-in help and exit. -- cgit v1.2.3