1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
/* textcomp.c - compress strings of text to 6 bits per byte.
loosely based on the z-machine's ZSCII compression.
Example: "Taipan" (7 bytes, including null terminator) encodes as
0xb8 0x12 0x50 0x04 0xe0 0x00 (6 bytes).
Longer strings approach 75% compression ratio.
No encoded string can be over 256 bytes long, as the decompressor
can't currently handle it.
The alphabet contains only upper/lowercase letters, space, newline,
and some punctuation. In particular, numbers are not supported.
alphabet:
0 = end
1-26 = a-z
27-52 = A-Z
53 = space
54 = !
55 = %
56 = ,
57 = .
58 = ?
59 = :
60 = '
61 = (
62 = )
63 = newline
All the strings used by taipan.c are listed in the __END__ section
of messages.pl. The perl script calls this program (textcomp) once
per string, and outputs C source consisting of the encoded versions.
Each string in the __END__ section is preceded by a name, and the
generated C source uses these names with M_ prefixed.
taipan.c calls the function print_msg(const char *) to decode and
print an encoded message. The decoding step slows down printing a bit,
but it's not really noticeable. cputc() is used for printing, so it
respects the reverse video setting (set by rvs_on() and rvs_off()).
When a newline is printed, the decoder always prints a carriage
return first. Any \r sequences listed in messages.pl are discarded
before encoding is done.
Actually, no prompts ever use capital X or Z. These should be used for
dictionary lookups. Maybe X is followed by a 3-bit dict ID, for the 8
most commonly repeated phrases (one of which will of course be "Taipan"),
and Z is a 5- or 6-bit ID for 32 or 64 less common phrases. So far this
isn't implemented because the decompressor isn't reentrant (yet).
*/
#include <stdio.h>
#include <stdlib.h>
unsigned char out[1024];
int bitcount = 0;
int getcode(int c) {
if(c >= 'a' && c <= 'z')
return c - 'a' + 1;
if(c >= 'A' && c <= 'Z')
return c - 'A' + 27;
switch(c) {
case ' ': return 53;
case '!': return 54;
case '%': return 55;
case ',': return 56;
case '.': return 57;
case '?': return 58;
case ':': return 59;
case '\'': return 60;
case '(': return 61;
case ')': return 62;
case '\n': return 63;
case '\r': break;
default:
fprintf(stderr, "unhandled ASCII code %d\n", c);
exit(1);
}
return 0; /* never executes, shut gcc -Wall up */
}
void appendbit(unsigned char b) {
int pos = bitcount / 8;
int bitpos = 7 - (bitcount % 8);
unsigned char val = b << bitpos;
out[pos] |= val;
fprintf(stderr, "%d: appending bit %d at pos %d, bitpos %d, value $%02x\n", bitcount, b, pos, bitpos, val);
bitcount++;
}
void appendcode(int code) {
int bit;
for(bit = 0x20; bit > 0; bit >>= 1) {
appendbit((code & bit) != 0);
}
}
int main(int argc, char **argv) {
int c, code, count = 1; /* 1 to account for null terminator */
while((c = getchar()) != EOF) {
code = getcode(c);
fprintf(stderr, "c == %d, code == %d\n", c, code);
appendcode(code);
count++;
}
appendcode(0);
code = 0;
for(c = 0; c < ((bitcount + 7) / 8); c++) {
printf("0x%02x ", out[c]);
code++;
}
if(code > 256) {
fprintf(stderr, "input too long\n");
exit(1);
}
fprintf(stderr, "%d bytes in (added null), %d bytes out, ratio %.2f\n",
count, code, (float)(code)/(float)count);
return 0;
}
|