From 664a630518f8f92a8b262e973790bbdb0dfc33ad Mon Sep 17 00:00:00 2001 From: "B. Watson" Date: Wed, 26 Nov 2025 05:44:24 -0500 Subject: alf: Bounds checking on input and output sizes. Document unalf bug with files >=15MB. Use atarified filename in 'Crunching' message. --- TODO.txt | 7 +++++++ src/alf.1 | 14 ++++++++------ src/alf.c | 18 ++++++++++++++---- src/alf.rst | 14 ++++++++------ src/unalf.1 | 8 ++++++++ src/unalf.rst | 10 ++++++++++ 6 files changed, 55 insertions(+), 16 deletions(-) diff --git a/TODO.txt b/TODO.txt index d78f79c..e34e1d5 100644 --- a/TODO.txt +++ b/TODO.txt @@ -2,4 +2,11 @@ - clean up alf, try to make it faster. +- figure out why unalf craps out when extracting files of 15MB + but works fine for 14MB. when this happens, there's no error, + but the extracted file is like 7KB (and has the correct data + for the first 7KB of the file). this could be a bug in either + alf or unalf. I'll have to try it with LZ/DZ in an emulator + (with 'turbo speed' on, or else it'll take days). + - write a formal spec of the alf compression stream format? diff --git a/src/alf.1 b/src/alf.1 index c51cd8c..a2185bb 100644 --- a/src/alf.1 +++ b/src/alf.1 @@ -94,12 +94,14 @@ Note that \fBalf\fP is a complete reverse\-engineered rewrite in C, \fInot\fP a port of the original 6502 code as \fBunalf\fP is. It\(aqs still being tested, and may still contain bugs. .sp -Performance is \fIhorrible\fP, O(n^2) or worse. This shouldn\(aqt be a real -problem on modern multi\-GHz CPU, especially since most Atari 8\-bit -files are small (usually under 64KB). Compressing a 1.3MB text file -takes 5 seconds on the author\(aqs (rather modest) Intel i7 workstation. -A 50KB file takes 0.2 seconds, which is more typical of the files -you\(aqd actually use this with. +Performance is \fIhorrible\fP\&. This shouldn\(aqt be a real problem on modern +multi\-GHz CPU, especially since most Atari 8\-bit files are small +(usually under 64KB). Interestingly, it\(aqs not O(n^2), it scales +linearly, O(1): Compressing a 1.3MB text file takes 5 seconds on the +author\(aqs (rather modest) Intel i7 workstation, and a file 10x as large +takes approximately 10x as long (50 seconds). A 50KB file takes 0.2 +seconds, which is more typical of the files you\(aqd actually use this +with. .sp The date/time stamps stored in the archive are the \fBmtime\fPs of the files (which is the same time \fBls\fP(1) shows, by default), and diff --git a/src/alf.c b/src/alf.c index 12aafad..c3b210d 100644 --- a/src/alf.c +++ b/src/alf.c @@ -26,7 +26,7 @@ u8 input_buf[MAX_INPUT_SIZE]; u8 output_buf[MAX_INPUT_SIZE]; u8 byte_tokens[256]; -int input_len, output_len, out_bitpos; +unsigned int input_len, output_len, out_bitpos; int opt_append = 0; int opt_overwrite = 0; @@ -145,6 +145,7 @@ void create_header(void) { char hdr_filename[13]; atarify_filename(hdr_filename); + printf("Crunching %s\n", hdr_filename); output_buf[0] = 0x1a; output_buf[1] = 0x0f; @@ -169,12 +170,19 @@ void open_input(const char *filename) { } } +void inc_output_len(void) { + if(++output_len == MAX_INPUT_SIZE) { + fprintf(stderr, "%s: fatal: compressed file would be >16MB.\n", self); + exit(1); + } +} + void append_bit(int bit) { output_buf[output_len] |= (bit << (7 - out_bitpos)); out_bitpos++; if(out_bitpos == 8) { out_bitpos = 0; - output_len++; + inc_output_len(); } } @@ -244,18 +252,20 @@ void crunch(void) { } store_token(TOK_END); - if(out_bitpos) output_len++; + if(out_bitpos) inc_output_len(); update_header(); } void crunch_file(const char *filename) { init_table(); - printf("Crunching %s\n", filename); open_input(filename); /* read in entire input, couldn't do it this way on the Atari */ input_len = fread(input_buf, 1, MAX_INPUT_SIZE - 1, in_file); + if(!feof(in_file)) { + fprintf(stderr, "%s: warning: this file is too large; only compressing the first 16MB.\n", self); + } output_len = 0; fstat(fileno(in_file), &in_file_stat); /* for timestamp */ fclose(in_file); diff --git a/src/alf.rst b/src/alf.rst index 3e998b3..63c88b2 100644 --- a/src/alf.rst +++ b/src/alf.rst @@ -81,12 +81,14 @@ Note that **alf** is a complete reverse-engineered rewrite in C, *not* a port of the original 6502 code as **unalf** is. It's still being tested, and may still contain bugs. -Performance is *horrible*, O(n^2) or worse. This shouldn't be a real -problem on modern multi-GHz CPU, especially since most Atari 8-bit -files are small (usually under 64KB). Compressing a 1.3MB text file -takes 5 seconds on the author's (rather modest) Intel i7 workstation. -A 50KB file takes 0.2 seconds, which is more typical of the files -you'd actually use this with. +Performance is *horrible*. This shouldn't be a real problem on modern +multi-GHz CPU, especially since most Atari 8-bit files are small +(usually under 64KB). Interestingly, it's not O(n^2), it scales +linearly, O(1): Compressing a 1.3MB text file takes 5 seconds on the +author's (rather modest) Intel i7 workstation, and a file 10x as large +takes approximately 10x as long (50 seconds). A 50KB file takes 0.2 +seconds, which is more typical of the files you'd actually use this +with. The date/time stamps stored in the archive are the **mtime**\s of the files (which is the same time **ls**\(1) shows, by default), and diff --git a/src/unalf.1 b/src/unalf.1 index 59709ee..1708a4b 100644 --- a/src/unalf.1 +++ b/src/unalf.1 @@ -356,6 +356,14 @@ old disk images as \fBUNALF.COM\fP, and \fBLZ.COM\fP is sometimes called \fBALF.COM\fP or \fBALFER.COM\fP\&. I\(aqve used the original names partly out of respect for the original author, and partly to avoid confusion between my \fBunalf\fP and the Atari one. +.SH BUGS +.sp +A minor one: \fBunalf\fP can\(aqt correctly extract files larger than about +15MB. This could be a problem in \fBalf\fP (though the archives look +OK to me), or it could be a bug I introduced when porting \fBunalf\fP, +or it could be a bug in the original \fBDZ.COM\fP\&. Files this large are +a pathological case for \fIALF\fP, anyway, since nobody ever used them on +the Atari 8\-bit. .SH COPYRIGHT .sp The original AlfCrunch (\fBDZ.COM\fP and \fBLZ.COM\fP) for the Atari diff --git a/src/unalf.rst b/src/unalf.rst index 774f978..bee3046 100644 --- a/src/unalf.rst +++ b/src/unalf.rst @@ -312,6 +312,16 @@ old disk images as **UNALF.COM**, and **LZ.COM** is sometimes called out of respect for the original author, and partly to avoid confusion between my **unalf** and the Atari one. +BUGS +==== + +A minor one: **unalf** can't correctly extract files larger than about +15MB. This could be a problem in **alf** (though the archives look +OK to me), or it could be a bug I introduced when porting **unalf**, +or it could be a bug in the original **DZ.COM**. Files this large are +a pathological case for *ALF*, anyway, since nobody ever used them on +the Atari 8-bit. + COPYRIGHT ========= -- cgit v1.2.3