aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorB. Watson <urchlay@slackware.uk>2025-11-30 17:17:46 -0500
committerB. Watson <urchlay@slackware.uk>2025-11-30 17:17:46 -0500
commit2b21c11d156e5c76c550b61a3b3a33a89d8578f9 (patch)
treedf06d5bf78486717b34949971131b6d16044e4d3
parentbe8cd823dbd9b27889421bb1dc70217d39752663 (diff)
downloadalftools-2b21c11d156e5c76c550b61a3b3a33a89d8578f9.tar.gz
alf: isolate crunch algo in its own file.
-rw-r--r--src/Makefile8
-rw-r--r--src/alf.c173
-rw-r--r--src/crunch.c164
-rw-r--r--src/crunch.h7
4 files changed, 179 insertions, 173 deletions
diff --git a/src/Makefile b/src/Makefile
index 823e57c..7933168 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -45,7 +45,7 @@ MANS=alf.1 alfsum.1 unalf.1
UNALF_OBJS=unalf.o io.o listalf.o extract.o f65.o glob.o opts.o usage.o self.o asmcode.o sanity.o
ALFSUM_OBJS=alfsum.o self.o
-ALF_OBJS=alf.o self.o alfusage.o sanity.o
+ALF_OBJS=alf.o self.o alfusage.o sanity.o crunch.o
.PHONY: all clean install crosswin windows windows-upload realclean
@@ -65,7 +65,7 @@ alf: $(ALF_OBJS)
usage.o: usage.c
-alfusage.o: usage.c
+alfusage.o: alfusage.c
f65.o: ../f65/f65.c ../f65/f65.h
$(CC) $(CFLAGS) -c -o f65.o ../f65/f65.c
@@ -90,7 +90,9 @@ extract.o: extract.c addrs.h unalf.h ../f65/f65.h
sanity.o: sanity.c sanity.h u816.h
-alf.o: alf.c sanity.h self.h u816.h
+crunch.o: crunch.c crunch.h
+
+alf.o: alf.c sanity.h self.h u816.h crunch.h
ver.rst:
echo '.. |version| replace:: $(VERSION)' > ver.rst
diff --git a/src/alf.c b/src/alf.c
index f3c4e0c..c748cee 100644
--- a/src/alf.c
+++ b/src/alf.c
@@ -10,20 +10,7 @@
#include "u816.h"
#include "sanity.h"
#include "self.h"
-
-#define INITIAL_BITS 9
-#define MAX_BITS 12
-#define MAX_TOKENS (1 << MAX_BITS)
-#define TOK_RESET 256
-#define TOK_END 257
-#define INIT_TOKEN 258 /* 256 = reset dict, 257 = token_bits++ */
-
-#define MAX_INPUT_SIZE (1 << 24)
-
-u8 input_buf[MAX_INPUT_SIZE];
-u8 output_buf[MAX_INPUT_SIZE];
-u8 byte_tokens[256];
-unsigned int input_len, output_len, out_bitpos;
+#include "crunch.h"
int opt_append = 0;
int opt_overwrite = 0;
@@ -34,37 +21,10 @@ int opt_txtconv = 0;
int opt_verbose = 0;
struct stat in_file_stat;
-
-typedef struct s_token {
- u8 *start;
- u16 length;
-} token_t;
-
-token_t tokentab[MAX_TOKENS];
+long hdr_compsize_pos;
FILE *out_file, *in_file;
const char *out_filename, *in_filename;
-int token_bits;
-int max_token;
-int curr_token;
-long hdr_compsize_pos;
-int in_pos;
-
-void init_table(void) {
- int i;
-
- memset(tokentab, 0, sizeof(tokentab));
-
- token_bits = INITIAL_BITS;
- max_token = 1 << INITIAL_BITS;
- curr_token = INIT_TOKEN;
-
- for(i = 0; i < 256; i++) {
- byte_tokens[i] = (u8)i;
- tokentab[i].start = &byte_tokens[i];
- tokentab[i].length = 1;
- }
-}
void store_quad(int pos, unsigned long data) {
int i;
@@ -185,132 +145,6 @@ void open_input(const char *filename) {
}
}
-void inc_output_len(void) {
- if(++output_len == MAX_INPUT_SIZE) {
- fprintf(stderr, "%s: fatal: compressed file would be >16MB.\n", self);
- exit(1);
- }
-}
-
-void append_bit(int bit) {
- output_buf[output_len] |= (bit << (7 - out_bitpos));
- out_bitpos++;
- if(out_bitpos == 8) {
- out_bitpos = 0;
- inc_output_len();
- }
-}
-
-void store_token(int tok) {
- int mask;
-
- for(mask = 1 << (token_bits - 1); mask; mask >>= 1) {
- append_bit(tok & mask ? 1 : 0);
- }
-}
-
-/* match_token() is a brute-force search, which is why alf is so slow.
- I'll do something smarter at some point.
- search backwards, the tokens are stored with longer ones later
- in the list. */
-int match_token(int pos) {
- int i, len, maxlen;
- token_t *t;
- u8 *p, *q;
-
- maxlen = input_len - pos;
-
- for(i = curr_token - 1; i >= INIT_TOKEN; i--) {
- t = &tokentab[i];
-
- /* don't search past the end of the input */
- if(t->length > maxlen) continue;
-
- /* if the first char doesn't match, don't bother with memcmp.
- this is a 5x speedup (!) */
- if(input_buf[pos] != *(t->start)) continue;
-
- /* this is where alf spends most of its time.
- using memcmp is noticeably slower than the code below. */
- /*
- if(memcmp(&input_buf[pos], t->start, t->length) == 0)
- return i;
- */
-
- /* inline memcmp replacement of sorts. I don't think it's really
- faster than memcmp(), it only seems that way because there's
- no function call overhead. ~20% speedup.
- making it search backwards gives a further ~25% speedup.
- */
- len = t->length;
- p = &input_buf[pos] + len - 1;
- q = t->start + len - 1;
- while(len) {
- if(*p != *q) break;
- p--; q--;
- len--;
- }
- if(!len) return i;
- }
-
- /* hard-coded single character tokens map to their values, no need
- to search. */
- return input_buf[pos];
-}
-
-void make_token(int start, int end) {
- int i;
- /* if the token table is full, reset it. basically start over like
- we would with a new file. */
- if(curr_token == max_token) {
- if(token_bits == MAX_BITS) {
- store_token(TOK_RESET); /* stored at the *old* token size! */
- token_bits = INITIAL_BITS;
- init_table();
- return; /* since we're starting over, *don't* make a token */
- } else {
- token_bits++;
- }
- max_token = 1 << token_bits;
- }
- tokentab[curr_token].start = &input_buf[start];
- tokentab[curr_token].length = end - start + 1;
- curr_token++;
-
- if(opt_verbose) {
- printf("%d -> %d (%d) = ", start, end, end - start + 1);
- for(i = start; i < end; i++)
- putchar(input_buf[i]);
- putchar('\n');
- }
-}
-
-void crunch(void) {
- int new_pos;
- in_pos = 0;
- int token;
-
- out_bitpos = 0;
-
- /* 0-byte input files don't get a TOK_RESET */
- if(input_len)
- store_token(TOK_RESET);
-
- while(in_pos < input_len) {
- if(opt_verbose) printf("in_pos==%d\n", in_pos);
- token = match_token(in_pos);
- store_token(token);
- new_pos = in_pos + tokentab[token].length;
- if(new_pos < input_len)
- make_token(in_pos, new_pos);
- in_pos = new_pos;
- }
-
- store_token(TOK_END);
- if(out_bitpos) inc_output_len();
- update_header();
-}
-
void make_backup(void) {
char bak[PATH_MAX + 2];
strncpy(bak, out_filename, PATH_MAX);
@@ -330,8 +164,6 @@ void convert_eols(void) {
}
void crunch_file(const char *filename) {
- init_table();
-
open_input(filename);
/* read in entire input, couldn't do it this way on the Atari */
@@ -354,6 +186,7 @@ void crunch_file(const char *filename) {
/* crunches the entire input to memory! */
crunch();
+ update_header();
/* don't open the output file until crunch() has succeeded once.
this avoids leaving 0-byte turds */
diff --git a/src/crunch.c b/src/crunch.c
new file mode 100644
index 0000000..0cc8092
--- /dev/null
+++ b/src/crunch.c
@@ -0,0 +1,164 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "u816.h"
+#include "crunch.h"
+#include "self.h"
+
+#define INITIAL_BITS 9
+#define MAX_BITS 12
+#define MAX_TOKENS (1 << MAX_BITS)
+#define TOK_RESET 256
+#define TOK_END 257
+#define INIT_TOKEN 258
+
+u8 input_buf[MAX_INPUT_SIZE];
+u8 output_buf[MAX_INPUT_SIZE];
+unsigned int input_len, output_len, out_bitpos;
+
+u8 byte_tokens[256];
+typedef struct s_token {
+ u8 *start;
+ u16 length;
+} token_t;
+
+token_t tokentab[MAX_TOKENS];
+
+int token_bits;
+int max_token;
+int curr_token;
+int in_pos;
+
+void init_table(void) {
+ int i;
+
+ memset(tokentab, 0, sizeof(tokentab));
+
+ token_bits = INITIAL_BITS;
+ max_token = 1 << INITIAL_BITS;
+ curr_token = INIT_TOKEN;
+
+ for(i = 0; i < 256; i++) {
+ byte_tokens[i] = (u8)i;
+ tokentab[i].start = &byte_tokens[i];
+ tokentab[i].length = 1;
+ }
+}
+
+void inc_output_len(void) {
+ if(++output_len == MAX_INPUT_SIZE) {
+ fprintf(stderr, "%s: fatal: compressed file would be >16MB.\n", self);
+ exit(1);
+ }
+}
+
+void append_bit(int bit) {
+ output_buf[output_len] |= (bit << (7 - out_bitpos));
+ out_bitpos++;
+ if(out_bitpos == 8) {
+ out_bitpos = 0;
+ inc_output_len();
+ }
+}
+
+void store_token(int tok) {
+ int mask;
+
+ for(mask = 1 << (token_bits - 1); mask; mask >>= 1) {
+ append_bit(tok & mask ? 1 : 0);
+ }
+}
+
+/* match_token() is a brute-force search, which is why alf is so slow.
+ I'll do something smarter at some point.
+ search backwards, the tokens are stored with longer ones later
+ in the list. */
+int match_token(int pos) {
+ int i, len, maxlen;
+ token_t *t;
+ u8 *p, *q;
+
+ maxlen = input_len - pos;
+
+ for(i = curr_token - 1; i >= INIT_TOKEN; i--) {
+ t = &tokentab[i];
+
+ /* don't search past the end of the input */
+ if(t->length > maxlen) continue;
+
+ /* if the first char doesn't match, don't bother with memcmp.
+ this is a 5x speedup (!) */
+ if(input_buf[pos] != *(t->start)) continue;
+
+ /* this is where alf spends most of its time.
+ using memcmp is noticeably slower than the code below. */
+ /*
+ if(memcmp(&input_buf[pos], t->start, t->length) == 0)
+ return i;
+ */
+
+ /* inline memcmp replacement of sorts. I don't think it's really
+ faster than memcmp(), it only seems that way because there's
+ no function call overhead. ~20% speedup.
+ making it search backwards gives a further ~25% speedup.
+ */
+ len = t->length;
+ p = &input_buf[pos] + len - 1;
+ q = t->start + len - 1;
+ while(len) {
+ if(*p != *q) break;
+ p--; q--;
+ len--;
+ }
+ if(!len) return i;
+ }
+
+ /* hard-coded single character tokens map to their values, no need
+ to search. */
+ return input_buf[pos];
+}
+
+void make_token(int start, int end) {
+ /* if the token table is full, reset it. basically start over like
+ we would with a new file. */
+ if(curr_token == max_token) {
+ if(token_bits == MAX_BITS) {
+ store_token(TOK_RESET); /* stored at the *old* token size! */
+ token_bits = INITIAL_BITS;
+ init_table();
+ return; /* since we're starting over, *don't* make a token */
+ } else {
+ token_bits++;
+ }
+ max_token = 1 << token_bits;
+ }
+ tokentab[curr_token].start = &input_buf[start];
+ tokentab[curr_token].length = end - start + 1;
+ curr_token++;
+}
+
+void crunch(void) {
+ int new_pos;
+ int token;
+
+ init_table();
+ out_bitpos = 0;
+ in_pos = 0;
+
+ /* 0-byte input files don't get a TOK_RESET */
+ if(input_len)
+ store_token(TOK_RESET);
+
+ while(in_pos < input_len) {
+ token = match_token(in_pos);
+ store_token(token);
+ new_pos = in_pos + tokentab[token].length;
+ if(new_pos < input_len)
+ make_token(in_pos, new_pos);
+ in_pos = new_pos;
+ }
+
+ store_token(TOK_END);
+ if(out_bitpos) inc_output_len();
+}
+
diff --git a/src/crunch.h b/src/crunch.h
new file mode 100644
index 0000000..4db2f1a
--- /dev/null
+++ b/src/crunch.h
@@ -0,0 +1,7 @@
+#define MAX_INPUT_SIZE (1 << 24)
+
+extern u8 input_buf[MAX_INPUT_SIZE];
+extern u8 output_buf[MAX_INPUT_SIZE];
+extern unsigned int input_len, output_len, out_bitpos;
+
+void crunch(void);