aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorB. Watson <urchlay@slackware.uk>2025-11-18 01:36:46 -0500
committerB. Watson <urchlay@slackware.uk>2025-11-18 01:36:46 -0500
commit8f14d6dd1293e0f1e2d3f65b596f0fa6f162a383 (patch)
tree2aea8ca7b98f02da42a4013fefce74e433f4f3a7
parent409ec782e1a74cbd86b6e51b49c0785021be0fdc (diff)
downloadunalf-8f14d6dd1293e0f1e2d3f65b596f0fa6f162a383.tar.gz
Better text file detection for -aa; convert tabs with -a or -aa.
-rw-r--r--src/extract.c2
-rw-r--r--src/io.c35
-rw-r--r--src/unalf.120
-rw-r--r--src/unalf.h1
-rw-r--r--src/unalf.rst18
-rw-r--r--src/usage.c3
6 files changed, 50 insertions, 29 deletions
diff --git a/src/extract.c b/src/extract.c
index 1b9cf72..21d2bb4 100644
--- a/src/extract.c
+++ b/src/extract.c
@@ -8,6 +8,7 @@
#include "addrs.h"
int bad_checksum, bad_checksum_count = 0;
+int new_file = 0;
char *out_filename;
void dpoke(int addr, u16 value) {
@@ -159,6 +160,7 @@ void extract_alf(void) {
}
bad_checksum = 0;
+ new_file = 1;
uncrunch_file();
if(bad_checksum) bad_checksum_count++;
diff --git a/src/io.c b/src/io.c
index 2323700..1896df9 100644
--- a/src/io.c
+++ b/src/io.c
@@ -5,6 +5,7 @@
#include "addrs.h"
static int headers_read = 0;
+static int convert_eols = 0;
static void die_arc(void) {
fprintf(stderr, "%s: this is an ARC file, not ALF\n", self);
@@ -104,27 +105,41 @@ void readblock(void) {
dpoke(buf_len_l, bytes);
}
-int is_text_file(char *fn) {
- if(globmatch("*.txt", fn)) return 1;
- if(globmatch("*.doc", fn)) return 1;
- if(globmatch("*.lst", fn)) return 1;
- return 0;
+static int is_printable(u8 c) {
+ return (c == 0x9b || (c >= ' ' && c <= 124));
+}
+
+static int is_text_file(u8 *buf) {
+ return is_printable(buf[0]) && is_printable(buf[1]);
}
-/* mirror of readblock() */
+/* mirror of readblock(), plus EOL conversion if needed. With -a,
+ a file is considered text if its first 2 bytes are printable ATASCII,
+ including EOLs. With -aa, all files are converted. */
void writeblock(void) {
int i, bytes, len, bufadr;
u8 *buf;
- extern char *out_filename;
bufadr = dpeek(buf_adr_l);
buf = mem + bufadr;
len = dpeek(buf_len_l);
- if(opts.txtconv) {
- if(opts.txtconv > 1 || is_text_file(out_filename))
- for(i = 0; i < len; i++)
+ if(new_file) {
+ if(opts.txtconv > 1) {
+ convert_eols = 1;
+ } else if(opts.txtconv == 1 && len > 1) {
+ convert_eols = is_text_file(buf);
+ } else {
+ convert_eols = 0;
+ }
+ }
+ new_file = 0;
+
+ if(convert_eols) {
+ for(i = 0; i < len; i++) {
if(buf[i] == 0x9b) buf[i] = '\n';
+ if(buf[i] == 0x7f) buf[i] = '\t';
+ }
}
// fprintf(stderr, "writeblock, bufadr = $%04x, len = $%04x\n", bufadr, len);
diff --git a/src/unalf.1 b/src/unalf.1
index 5d25618..34d1571 100644
--- a/src/unalf.1
+++ b/src/unalf.1
@@ -27,7 +27,7 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
.in \\n[rst2man-indent\\n[rst2man-indent-level]]u
..
-.TH "UNALF" 1 "2025-11-17" "0.1.0" "Urchlay's Atari 8-bit Tools"
+.TH "UNALF" 1 "2025-11-18" "0.1.0" "Urchlay's Atari 8-bit Tools"
.SH NAME
unalf \- extract Atari 8-bit ALF archives
.\" RST source for unalf(1) man page. Convert with:
@@ -56,22 +56,24 @@ below for details.
.INDENT 0.0
.TP
.B \-a
-Convert text file line endings from Atari EOL to UNIX newline. Text
-files are detected by name: \fB*.txt\fP, \fB*.doc\fP, \fB*.lst\fP files
-are considered text.
+Convert text file line endings and tabs from ATASCII to ASCII. Text
+files are detected by looking at the first 2 bytes of the extracted
+file. If both are printable ASCII (or ATASCII EOL, aka \fB$9B\fP), the
+file is considered text.
.sp
-Note that \fIonly\fP line\-endings are converted. Other ATASCII characters
-are left alone. If you need anything more in\-depth, use \fBa8cat\fP(1).
+Note that \fIonly\fP line\-endings and tabs are converted. Other ATASCII
+characters are left alone. If you need anything more in\-depth, use
+\fBa8cat\fP(1).
.UNINDENT
-.\" convert EOLs in text files.
+.\" convert EOLs and tabs in text files.
.
.INDENT 0.0
.TP
-.BI \-a\fB a
+.B \fB\-aa\fP
Convert line endings in all extracted files. This will corrupt any
executables or non\-text data files, so use with caution.
.UNINDENT
-.\" convert EOLs in ALL files.
+.\" convert EOLs and tabs in ALL files.
.
.INDENT 0.0
.TP
diff --git a/src/unalf.h b/src/unalf.h
index 422183a..12654ec 100644
--- a/src/unalf.h
+++ b/src/unalf.h
@@ -45,6 +45,7 @@ unsigned int getquad(int offs);
/* extract.c */
extern int bad_checksum_count;
+extern int new_file;
void extract_alf(void);
void dpoke(int addr, u16 value);
u16 dpeek(int addr);
diff --git a/src/unalf.rst b/src/unalf.rst
index e23debd..4c3a658 100644
--- a/src/unalf.rst
+++ b/src/unalf.rst
@@ -44,20 +44,22 @@ OPTIONS
=======
-a
- Convert text file line endings from Atari EOL to UNIX newline. Text
- files are detected by name: **\*.txt**, **\*.doc**, **\*.lst** files
- are considered text.
+ Convert text file line endings and tabs from ATASCII to ASCII. Text
+ files are detected by looking at the first 2 bytes of the extracted
+ file. If both are printable ASCII (or ATASCII EOL, aka **$9B**), the
+ file is considered text.
- Note that *only* line-endings are converted. Other ATASCII characters
- are left alone. If you need anything more in-depth, use **a8cat**\(1).
+ Note that *only* line-endings and tabs are converted. Other ATASCII
+ characters are left alone. If you need anything more in-depth, use
+ **a8cat**\(1).
-.. convert EOLs in text files.
+.. convert EOLs and tabs in text files.
--aa
+**-aa**
Convert line endings in all extracted files. This will corrupt any
executables or non-text data files, so use with caution.
-.. convert EOLs in ALL files.
+.. convert EOLs and tabs in ALL files.
-d output-dir
Write extracted files to this directory, which will be created if it
diff --git a/src/usage.c b/src/usage.c
index 554aa01..260a7c6 100644
--- a/src/usage.c
+++ b/src/usage.c
@@ -1,6 +1,5 @@
const char *usage_msg[] = {
- " -a: convert EOLs in text files.",
- " -aa: convert EOLs in ALL files.",
+ " -a: convert EOLs and tabs in text files.",
" -d output-dir: set output directory (created if needed).",
" -e: extract files (redundant; this is the default action).",
" -h, --help: show this help message.",