diff options
author | B. Watson <yalhcru@gmail.com> | 2016-02-25 16:51:09 -0500 |
---|---|---|
committer | B. Watson <yalhcru@gmail.com> | 2016-02-25 16:51:09 -0500 |
commit | 9dc7267a351b79ac5b855e812520362f28922341 (patch) | |
tree | b40e8e2144358752b990f1c94091fda0880e3eb7 /textdecomp.s | |
parent | c9c027bb8d620eb3f5066440067327b7d932a7e1 (diff) | |
download | taipan-9dc7267a351b79ac5b855e812520362f28922341.tar.gz |
dictionary text compression, 7666 bytes free
Diffstat (limited to 'textdecomp.s')
-rw-r--r-- | textdecomp.s | 133 |
1 files changed, 132 insertions, 1 deletions
diff --git a/textdecomp.s b/textdecomp.s index a19d460..da0206a 100644 --- a/textdecomp.s +++ b/textdecomp.s @@ -12,6 +12,82 @@ bitcount = FR0+1 ; counts 8..1, current bit in inbyte inbyte = FR0+2 ysave = FR0+3 + dict_escape = FR0+4 + + .rodata +; one or two letter words are not worth listing here. 3 is only good +; if it's used pretty often. +; entry 0 is a dummy! The encoder gets confused by "Z\0". This may get fixed. +; dictionary size cannot exceed 255 bytes. +; the quoted stuff in comments is read by messages.pl, it needs to be exact. +dict00: +dict01: .byte $98, $9d, $73, $54, $53, $80 ; "Li Yuen", 4 occurrences +dict02: .byte $7c, $c1, $05, $4b, $57, $12, $3d, $42, $05, $48, $00 ; "Elder Brother", 3 +dict03: .byte $64, $f5, $40 ; "you", 30 +dict04: .byte $d7, $c1, $4d, $00 ; " 'em", 8 +dict05: .byte $cc, $f5, $40 ; "You", 16 +dict06: .byte $d4, $80, $56, $14, $00 ; " have", 11 +dict07: .byte $d5, $32, $01, $30, $c0, $00 ; " shall", 6 +dict08: .byte $fb, $5c, $49, $50, $8d, $40 ; ") With ", 2 +dict09: .byte $05, $21, $cf, $00 ; "argo", 6 +dict10: .byte $4c, $82, $50, $00 ; "ship", 10 +dict11: .byte $d5, $70, $52, $14, $83, $d5, $4c, $50, $00 ; " warehouse", 4 +dict12: .byte $d5, $42, $05, $00 ; " the" 17 +dict13: .byte $d4, $f1, $80 ; " of", 14 +dict14: .byte $5c, $93, $0c, $00 ; "will", 8 +dict15: .byte $d4, $21, $45, $3b, $50, $00 ; " been ", 6 +dict16: .byte $d5, $43, $f5, $00 ; " to ", 12 +dict17: .byte $20, $14, $f5, $00 ; "has ", 7 +dict18: .byte $18, $f4, $b5, $00 ; "for ", 7 +dict19: .byte $25, $3d, $40 ; "is ", 9 +dict20: .byte $04, $e1, $00 ; "and", 10 +dict21: .byte $d4, $30, $53, $20, $00 ; " cash", 8 +dict22: .byte $04, $41, $09, $50, $93, $ce, $04, $cd, $40 ; "additional ", 3 +dict23: .byte $b8, $12, $50, $04, $e0, $00 ; "Taipan", 3 (but really many more!) +dict24: .byte $d4, $f3, $8c, $67, $50 ; " only ", 3 +dict25: .byte $d4, $25, $47, $1c, $54, $93, $00 ; " buggers", 3 +dict26: .byte $5c, $95, $08, $d4, $00 ; "with ", 4 +;dict27: .byte $78, $fd, $40 ; "Do ", 4 + +dict_offsets: + .byte dict00 - dict00 + .byte dict01 - dict00 + .byte dict02 - dict00 + .byte dict03 - dict00 + .byte dict04 - dict00 + .byte dict05 - dict00 + .byte dict06 - dict00 + .byte dict07 - dict00 + .byte dict08 - dict00 + .byte dict09 - dict00 + .byte dict10 - dict00 + .byte dict11 - dict00 + .byte dict12 - dict00 + .byte dict13 - dict00 + .byte dict14 - dict00 + .byte dict15 - dict00 + .byte dict16 - dict00 + .byte dict17 - dict00 + .byte dict18 - dict00 + .byte dict19 - dict00 + .byte dict20 - dict00 + .byte dict21 - dict00 + .byte dict22 - dict00 + .byte dict23 - dict00 + .byte dict24 - dict00 + .byte dict25 - dict00 + .byte dict26 - dict00 + ;.byte dict27 - dict00 + +; rough estimate of how many bytes are saved by the dictionary +; stuff: the dictionary + extra decoder stuff costs 221 bytes (vs. +; the original textdecode.s without dictionary). +; each dictionary entry saves (length - 2) * (occurrences - 1) bytes. +; with only dict00 - dict23, we'll save around 173 bytes. +; actually it works out to 179 bytes, but the estimate was close. + + dictsize = * - dict00 + .out .sprintf("dictionary plus dict_offsets is %d bytes", dictsize) .rodata table: ; outbyte values 53..63 @@ -24,11 +100,12 @@ table: ; outbyte values 53..63 .code .endif -; extern void __fastcall__ print_msg(char *msg); +; extern void __fastcall__ print_msg(const char *msg); _print_msg: sta srcptr stx srcptr+1 lda #0 + sta dict_escape sta outbyte ldy #$ff ; since we increment it first thing... @@ -54,6 +131,13 @@ _print_msg: rts ; 0 = end of message @notend: + ldx dict_escape ; was last character a Z? + beq @normalchar + + jsr dict_lookup + jmp @noprint + +@normalchar: cmp #27 bcs @notlower adc #'a'-1 ; 1-26 are a-z @@ -61,6 +145,11 @@ _print_msg: @notlower: cmp #52 + bne @notdict + inc dict_escape ; Z means next 6 bits are dictionary ID + bne @noprint + +@notdict: bcs @notupper adc #38 ; 27-52 are A-Z bne @printit @@ -74,6 +163,7 @@ _print_msg: sty ysave ; _cputc trashes Y jsr _cputc ldy ysave +@noprint: lda #0 sta outbyte ldx #6 @@ -81,6 +171,47 @@ _print_msg: beq @nextbyte bne @bitloop +dict_lookup: + ; dictionary lookup time. save our state on the stack + tya + pha + lda inbyte + pha + lda srcptr + pha + lda srcptr+1 + pha + lda bitcount + pha + + ; recursive call + ldx outbyte + lda dict_offsets,x + clc + adc #<dict00 + sta dict_escape ; temp usage + lda #>dict00 + adc #0 + tax + lda dict_escape + jsr _print_msg + + ; restore old state + lda #0 + sta dict_escape + pla + sta bitcount + pla + sta srcptr+1 + pla + sta srcptr + pla + sta inbyte + pla + tay + rts + decodersize = * - _print_msg .out .sprintf("print_msg() is %d bytes", decodersize + tablesize) + .out .sprintf("total textdecomp is %d bytes", decodersize + tablesize + dictsize) |