aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorB. Watson <urchlay@slackware.uk>2025-11-27 03:59:12 -0500
committerB. Watson <urchlay@slackware.uk>2025-11-27 03:59:12 -0500
commit8556c491c9580ea7a2c558687d5f216574432a02 (patch)
treef447632c3d4a7bc2b5850bc92d2b9e978c2336bd
parent5778614b922ee7db0897d6ca272046dee6e3df1d (diff)
downloadunalf-8556c491c9580ea7a2c558687d5f216574432a02.tar.gz
Tweak match_token().
-rw-r--r--src/alf.c22
1 files changed, 15 insertions, 7 deletions
diff --git a/src/alf.c b/src/alf.c
index db5c654..61e3499 100644
--- a/src/alf.c
+++ b/src/alf.c
@@ -194,15 +194,24 @@ void store_token(int tok) {
}
}
-/* search backwards, the tokens are stored with longer ones later
+/* match_token() is a brute-force search, which is why alf is so slow.
+ I'll do something smarter at some point.
+ search backwards, the tokens are stored with longer ones later
in the list. */
int match_token(int pos) {
- int i;
+ int i, maxlen;
+
+ maxlen = input_len - pos;
for(i = curr_token - 1; i >= INIT_TOKEN; i--) {
+ /* don't search past the end of the input */
+ if(tokentab[i].length > maxlen) continue;
+
/* if the first char doesn't match, don't bother with memcmp.
this is a 5x speedup (!) */
if(input_buf[pos] != *(tokentab[i].start)) continue;
+
+ /* memcmp is where alf spends most of its time. */
if(memcmp(&input_buf[pos], tokentab[i].start, tokentab[i].length) == 0)
return i;
}
@@ -213,16 +222,15 @@ int match_token(int pos) {
}
void make_token(int start, int end) {
+ /* if the token table is full, reset it. basically start over like
+ we would with a new file. */
if(curr_token == max_token) {
- // printf("%d: curr_token %d == max_token, ", in_pos, curr_token);
if(token_bits == MAX_BITS) {
- // printf("token_bits %d == MAX_BITS, reset\n", token_bits);
store_token(TOK_RESET); /* stored at the *old* token size! */
- token_bits = 9;
+ token_bits = INITIAL_BITS;
init_table();
- return;
+ return; /* since we're starting over, *don't* make a token */
} else {
- // printf("token_bits %d < MAX_BITS, inc\n", token_bits);
token_bits++;
}
max_token = 1 << token_bits;