From c9c027bb8d620eb3f5066440067327b7d932a7e1 Mon Sep 17 00:00:00 2001 From: "B. Watson" Date: Thu, 25 Feb 2016 07:19:40 -0500 Subject: textdecomp dictionary, not ready for prime time --- textdecomp.s.dict.dontuseyet | 177 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 textdecomp.s.dict.dontuseyet diff --git a/textdecomp.s.dict.dontuseyet b/textdecomp.s.dict.dontuseyet new file mode 100644 index 0000000..ed2feba --- /dev/null +++ b/textdecomp.s.dict.dontuseyet @@ -0,0 +1,177 @@ + +; text decompressor for taipan. +; text is packed 6 bits per character. see textcomp.c +; for details. + + .include "atari.inc" + .export _print_msg + .import _cputc + + srcptr = FR1 + outbyte = FR0 ; decoded 6-bit byte + bitcount = FR0+1 ; counts 8..1, current bit in inbyte + inbyte = FR0+2 + ysave = FR0+3 + dict_escape = FR0+4 + + .code ; this really should be rodata, but bank 2 has no space (yet) +; one or two letter words are not worth listing here. 3 is only good +; if it's used pretty often. +; entry 0 is a dummy! The encoder gets confused by "Z\0". This may get fixed. +dict00: +dict01: .byte $98, $9d, $73, $54, $53, $80 ; "Li Yuen", 4 occurrences +dict02: .byte $7c, $c1, $05, $4b, $57, $12, $3d, $42, $05, $48, $00 ; "Elder Brother", 3 +dict03: .byte $64, $f5, $40 ; "you", 30 +dict04: .byte $d7, $c1, $4d, $00 ; " 'em", 8 +dict05: .byte $cc, $f5, $40 ; "You", 16 +dict06: .byte $d4, $80, $56, $14, $00 ; " have", 11 +dict07: .byte $d5, $32, $01, $30, $c0, $00 ; " shall", 6 +dict08: .byte $fb, $5c, $49, $50, $8d, $40 ; ") With ", 2 +dict09: .byte $05, $21, $cf, $00 ; "argo", 6 +dict10: .byte $4c, $82, $50, $00 ; "ship", 10 +dict11: .byte $d5, $70, $52, $14, $83, $d5, $4c, $50, $00 ; " warehouse", 4 +dict12: .byte $d5, $42, $05, $00 ; " the" 17 +dict13: .byte $d4, $f1, $80 ; " of", 14 +dict14: .byte $5c, $93, $0c, $00 ; "will", 8 +dict15: .byte $d4, $21, $45, $3b, $50, $00 ; " been ", 6 +dict16: .byte $d5, $43, $f5, $00 ; " to ", 12 +dict17: .byte $20, $14, $f5, $00 ; "has ", 7 +dict18: .byte $18, $f4, $b5, $00 ; "for ", 7 +dict19: .byte $25, $3d, $40 ; "is ", 9 +dict20: .byte $04, $e1, $00 ; "and", 10 +dict21: .byte $d4, $30, $53, $20, $00 ; " cash", 8 +dict22: .byte $04, $41, $09, $50, $93, $ce, $04, $cd, $40 ; "additional ", 3 +dict23: .byte $b8, $12, $50, $04, $e0, $00 ; "Taipan", 3 (but really many more!) + +dict_lo: .byte dict00, >dict01, >dict02, >dict03, >dict04, >dict05, >dict06, >dict07, >dict08, >dict09, >dict10, >dict11, >dict12, >dict13, >dict14, >dict15, >dict16, >dict17, >dict18, >dict19, >dict20, >dict21, >dict22, >dict23 + +; rough calculation of how many bytes are saved by the dictionary +; stuff: the dictionary + extra decoder stuff costs 221 bytes. +; each dictionary entry saves (length - 2) * (occurrences - 1) bytes. +; with only dict00 - dict21, we'll save around 147 bytes. + + dictsize = * - dict00 + .out .sprintf("dictionary is %d bytes", dictsize) + + .rodata +table: ; outbyte values 53..63 + .byte ' ', '!', '%', ',', '.', '?', ':', 39, 40, 41, $9b + tablesize = * - table + + .ifdef CART_TARGET + .segment "HIGHCODE" + .else + .code + .endif + +; extern void __fastcall__ print_msg(const char *msg); +_print_msg: + sta srcptr + stx srcptr+1 + lda #0 + sta dict_escape + sta outbyte + ldy #$ff ; since we increment it first thing... + + ldx #6 ; counts 6..1, current bit in outbyte +@nextbyte: + iny + lda #8 + sta bitcount + lda (srcptr),y + sta inbyte +@bitloop: + asl inbyte + rol outbyte + dex + beq @decode ; got 6 bits + dec bitcount + bne @bitloop + beq @nextbyte + +@decode: + lda outbyte + bne @notend + rts ; 0 = end of message + +@notend: + ldx dict_escape ; was last character a Z? + beq @normalchar + + ; dictionary lookup time. save our state on the stack + ; TODO: see if this code's smaller using ZP instead of stack. + ; it'll only be reentrant once, but that's enough. + tya + pha + lda inbyte + pha + lda srcptr + pha + lda srcptr+1 + pha + lda bitcount + pha + + ; recursive call + ldx outbyte + lda dict_lo,x + pha + lda dict_hi,x + tax + pla + jsr _print_msg + + ; restore old state + lda #0 + sta dict_escape + pla + sta bitcount + pla + sta srcptr+1 + pla + sta srcptr + pla + sta inbyte + pla + tay + jmp @noprint + +@normalchar: + cmp #27 + bcs @notlower + adc #'a'-1 ; 1-26 are a-z + bne @printit + +@notlower: + cmp #52 + bne @notdict + inc dict_escape ; Z means next 6 bits are dictionary ID + bne @noprint + +@notdict: + bcs @notupper + adc #38 ; 27-52 are A-Z + bne @printit + +@notupper: + sbc #53 ; 53-63 are table lookups + tax + lda table,x + +@printit: + sty ysave ; _cputc trashes Y + jsr _cputc + ldy ysave +@noprint: + lda #0 + sta outbyte + ldx #6 + dec bitcount + beq @nextbyte + bne @bitloop + + decodersize = * - _print_msg + + .out .sprintf("print_msg() is %d bytes", decodersize + tablesize) + .out .sprintf("total textdecomp is %d bytes", decodersize + tablesize + dictsize) -- cgit v1.2.3