diff options
| author | B. Watson <urchlay@slackware.uk> | 2025-12-08 17:07:31 -0500 |
|---|---|---|
| committer | B. Watson <urchlay@slackware.uk> | 2025-12-08 17:07:31 -0500 |
| commit | 818fcfb56c658017765eafb2078116392d13aa76 (patch) | |
| tree | 55c835a5abe95110430353c328092b074001c0d0 | |
| parent | 729c0305423de3bf1199139e3b09e2093b4ada18 (diff) | |
| download | alftools-818fcfb56c658017765eafb2078116392d13aa76.tar.gz | |
crunch.c: clean up a little.
| -rw-r--r-- | src/crunch.c | 38 |
1 files changed, 20 insertions, 18 deletions
diff --git a/src/crunch.c b/src/crunch.c index 3ab8eff..b450ae0 100644 --- a/src/crunch.c +++ b/src/crunch.c @@ -36,7 +36,7 @@ int token_bits; int max_token; int curr_token = INIT_TOKEN; int in_pos; -int new_pos; +// int new_pos; void init_table(void) { int i; @@ -153,19 +153,19 @@ token_t *get_kid(token_t *t, u8 chr) { return 0; } -token_t *match_token(int pos) { +token_t *match_token(void) { token_t *t, *p; - t = &tokens[input_buf[pos]]; - new_pos = pos + 1; - if(new_pos == input_len) + t = &tokens[input_buf[in_pos]]; + in_pos++; + if(in_pos == input_len) return t; p = t; - while((p = get_kid(t, input_buf[new_pos]))) { + while((p = get_kid(t, input_buf[in_pos]))) { t = p; - new_pos++; - if(new_pos == input_len) break; + in_pos++; + if(in_pos == input_len) break; } return t; } @@ -232,7 +232,7 @@ void crunch(void) { init_table(); out_bitpos = 0; - in_pos = new_pos = 0; + in_pos = 0; /* 0-byte input files don't get a TOK_RESET */ if(!input_len) { @@ -244,11 +244,10 @@ void crunch(void) { store_token(TOK_RESET); while(in_pos < input_len) { - tok = match_token(in_pos); + tok = match_token(); store_token(tok->number); - if(new_pos < input_len) - make_token(tok, input_buf[new_pos]); - in_pos = new_pos; + if(in_pos < input_len) + make_token(tok, input_buf[in_pos]); } store_token(TOK_END); @@ -263,7 +262,7 @@ void crunch(void) { /* ******************************************************************** -The tokens are stored in a tree with 256 roots (root_tokens[256]). +The tokens are stored in a tree with 256 roots (tokens[0 to 256]). There's one root per character code (0-255). This serves as the hard-coded initial dictionary (each token number at this level maps to its character code). @@ -276,7 +275,7 @@ the siblings list (the first of its child nodes). When creating a token, you pass make_token() a pointer to an existing token. For example if your new token is for the string "ab", you pass -make_token() the address of root_tokens['a']. The new token will be +make_token() the address of tokens['a']. The new token will be added to the 'a' token's kids as the sibling of the last kid in the list (if there are already kids) or the first kid if there are none. @@ -297,7 +296,7 @@ An example: the input begins with "abcdaZ". Partial token structure: -root_token[['a'] = token #97, no siblings +token[['a'] = token #97, no siblings \- kids: token #130, b; token #<whatever>, Z \- kids: token #131, c @@ -311,7 +310,10 @@ the tree, that's the token for "abc"). A new token is created for every character in the file that doesn't match one of the existing tokens. -Walking the tree is fast, but it could be made even faster by sorting -the kids at each level, and doing a binary search instead of linear. +Walking the tree is fast, but it might be made even faster by sorting +the kids at each level, and doing a binary search instead of linear... +or not: most nodes that have kids only have 2 or 3 (avgkidcount around +2.5 for a text file). The overhead of sorting might make performance +worse. ********************************************************************* */ |
