aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorB. Watson <urchlay@slackware.uk>2025-12-08 17:07:31 -0500
committerB. Watson <urchlay@slackware.uk>2025-12-08 17:07:31 -0500
commit818fcfb56c658017765eafb2078116392d13aa76 (patch)
tree55c835a5abe95110430353c328092b074001c0d0
parent729c0305423de3bf1199139e3b09e2093b4ada18 (diff)
downloadalftools-818fcfb56c658017765eafb2078116392d13aa76.tar.gz
crunch.c: clean up a little.
-rw-r--r--src/crunch.c38
1 files changed, 20 insertions, 18 deletions
diff --git a/src/crunch.c b/src/crunch.c
index 3ab8eff..b450ae0 100644
--- a/src/crunch.c
+++ b/src/crunch.c
@@ -36,7 +36,7 @@ int token_bits;
int max_token;
int curr_token = INIT_TOKEN;
int in_pos;
-int new_pos;
+// int new_pos;
void init_table(void) {
int i;
@@ -153,19 +153,19 @@ token_t *get_kid(token_t *t, u8 chr) {
return 0;
}
-token_t *match_token(int pos) {
+token_t *match_token(void) {
token_t *t, *p;
- t = &tokens[input_buf[pos]];
- new_pos = pos + 1;
- if(new_pos == input_len)
+ t = &tokens[input_buf[in_pos]];
+ in_pos++;
+ if(in_pos == input_len)
return t;
p = t;
- while((p = get_kid(t, input_buf[new_pos]))) {
+ while((p = get_kid(t, input_buf[in_pos]))) {
t = p;
- new_pos++;
- if(new_pos == input_len) break;
+ in_pos++;
+ if(in_pos == input_len) break;
}
return t;
}
@@ -232,7 +232,7 @@ void crunch(void) {
init_table();
out_bitpos = 0;
- in_pos = new_pos = 0;
+ in_pos = 0;
/* 0-byte input files don't get a TOK_RESET */
if(!input_len) {
@@ -244,11 +244,10 @@ void crunch(void) {
store_token(TOK_RESET);
while(in_pos < input_len) {
- tok = match_token(in_pos);
+ tok = match_token();
store_token(tok->number);
- if(new_pos < input_len)
- make_token(tok, input_buf[new_pos]);
- in_pos = new_pos;
+ if(in_pos < input_len)
+ make_token(tok, input_buf[in_pos]);
}
store_token(TOK_END);
@@ -263,7 +262,7 @@ void crunch(void) {
/* ********************************************************************
-The tokens are stored in a tree with 256 roots (root_tokens[256]).
+The tokens are stored in a tree with 256 roots (tokens[0 to 256]).
There's one root per character code (0-255). This serves as the
hard-coded initial dictionary (each token number at this level maps to
its character code).
@@ -276,7 +275,7 @@ the siblings list (the first of its child nodes).
When creating a token, you pass make_token() a pointer to an existing
token. For example if your new token is for the string "ab", you pass
-make_token() the address of root_tokens['a']. The new token will be
+make_token() the address of tokens['a']. The new token will be
added to the 'a' token's kids as the sibling of the last kid in the
list (if there are already kids) or the first kid if there are none.
@@ -297,7 +296,7 @@ An example: the input begins with "abcdaZ".
Partial token structure:
-root_token[['a'] = token #97, no siblings
+token[['a'] = token #97, no siblings
\- kids: token #130, b; token #<whatever>, Z
\- kids: token #131, c
@@ -311,7 +310,10 @@ the tree, that's the token for "abc").
A new token is created for every character in the file that doesn't
match one of the existing tokens.
-Walking the tree is fast, but it could be made even faster by sorting
-the kids at each level, and doing a binary search instead of linear.
+Walking the tree is fast, but it might be made even faster by sorting
+the kids at each level, and doing a binary search instead of linear...
+or not: most nodes that have kids only have 2 or 3 (avgkidcount around
+2.5 for a text file). The overhead of sorting might make performance
+worse.
********************************************************************* */