From c9c027bb8d620eb3f5066440067327b7d932a7e1 Mon Sep 17 00:00:00 2001
From: "B. Watson" <yalhcru@gmail.com>
Date: Thu, 25 Feb 2016 07:19:40 -0500
Subject: textdecomp dictionary, not ready for prime time

---
 textdecomp.s.dict.dontuseyet | 177 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 177 insertions(+)
 create mode 100644 textdecomp.s.dict.dontuseyet
diff --git a/textdecomp.s.dict.dontuseyet b/textdecomp.s.dict.dontuseyet
new file mode 100644
index 0000000..ed2feba
--- /dev/null
+++ b/textdecomp.s.dict.dontuseyet
@@ -0,0 +1,177 @@
+
+; text decompressor for taipan.
+; text is packed 6 bits per character. see textcomp.c
+; for details.
+
+ .include "atari.inc"
+ .export _print_msg
+ .import _cputc
+
+ srcptr = FR1
+ outbyte = FR0 ; decoded 6-bit byte
+ bitcount = FR0+1 ; counts 8..1, current bit in inbyte
+ inbyte = FR0+2
+ ysave = FR0+3
+ dict_escape = FR0+4
+
+ .code ; this really should be rodata, but bank 2 has no space (yet)
+; one or two letter words are not worth listing here. 3 is only good
+; if it's used pretty often.
+; entry 0 is a dummy! The encoder gets confused by "Z\0". This may get fixed.
+dict00:
+dict01: .byte $98, $9d, $73, $54, $53, $80 ; "Li Yuen", 4 occurrences
+dict02: .byte $7c, $c1, $05, $4b, $57, $12, $3d, $42, $05, $48, $00 ; "Elder Brother", 3
+dict03: .byte $64, $f5, $40 ; "you", 30
+dict04: .byte $d7, $c1, $4d, $00 ; " 'em", 8
+dict05: .byte $cc, $f5, $40 ; "You", 16
+dict06: .byte $d4, $80, $56, $14, $00 ; " have", 11
+dict07: .byte $d5, $32, $01, $30, $c0, $00 ; " shall", 6
+dict08: .byte $fb, $5c, $49, $50, $8d, $40 ; ") With ", 2
+dict09: .byte $05, $21, $cf, $00 ; "argo", 6
+dict10: .byte $4c, $82, $50, $00 ; "ship", 10
+dict11: .byte $d5, $70, $52, $14, $83, $d5, $4c, $50, $00 ; " warehouse", 4
+dict12: .byte $d5, $42, $05, $00 ; " the" 17
+dict13: .byte $d4, $f1, $80 ; " of", 14
+dict14: .byte $5c, $93, $0c, $00 ; "will", 8
+dict15: .byte $d4, $21, $45, $3b, $50, $00 ; " been ", 6
+dict16: .byte $d5, $43, $f5, $00 ; " to ", 12
+dict17: .byte $20, $14, $f5, $00 ; "has ", 7
+dict18: .byte $18, $f4, $b5, $00 ; "for ", 7
+dict19: .byte $25, $3d, $40 ; "is ", 9
+dict20: .byte $04, $e1, $00 ; "and", 10
+dict21: .byte $d4, $30, $53, $20, $00 ; " cash", 8
+dict22: .byte $04, $41, $09, $50, $93, $ce, $04, $cd, $40 ; "additional ", 3
+dict23: .byte $b8, $12, $50, $04, $e0, $00 ; "Taipan", 3 (but really many more!)
+
+dict_lo: .byte <dict00, <dict01, <dict02, <dict03, <dict04, <dict05, <dict06, <dict07, <dict08, <dict09, <dict10, <dict11, <dict12, <dict13, <dict14, <dict15, <dict16, <dict17, <dict18, <dict19, <dict20, <dict21, <dict22, <dict23
+dict_hi: .byte >dict00, >dict01, >dict02, >dict03, >dict04, >dict05, >dict06, >dict07, >dict08, >dict09, >dict10, >dict11, >dict12, >dict13, >dict14, >dict15, >dict16, >dict17, >dict18, >dict19, >dict20, >dict21, >dict22, >dict23
+
+; rough calculation of how many bytes are saved by the dictionary
+; stuff: the dictionary + extra decoder stuff costs 221 bytes.
+; each dictionary entry saves (length - 2) * (occurrences - 1) bytes.
+; with only dict00 - dict21, we'll save around 147 bytes.
+
+ dictsize = * - dict00
+ .out .sprintf("dictionary is %d bytes", dictsize)
+
+ .rodata
+table: ; outbyte values 53..63
+ .byte ' ', '!', '%', ',', '.', '?', ':', 39, 40, 41, $9b
+ tablesize = * - table
+
+ .ifdef CART_TARGET
+  .segment "HIGHCODE"
+ .else
+  .code
+ .endif
+
+; extern void __fastcall__ print_msg(const char *msg);
+_print_msg:
+ sta srcptr
+ stx srcptr+1
+ lda #0
+ sta dict_escape
+ sta outbyte
+ ldy #$ff ; since we increment it first thing...
+
+ ldx #6 ; counts 6..1, current bit in outbyte
+@nextbyte:
+ iny
+ lda #8
+ sta bitcount
+ lda (srcptr),y
+ sta inbyte
+@bitloop:
+ asl inbyte
+ rol outbyte
+ dex
+ beq @decode ; got 6 bits
+ dec bitcount
+ bne @bitloop
+ beq @nextbyte
+
+@decode:
+ lda outbyte
+ bne @notend
+ rts ; 0 = end of message
+
+@notend:
+ ldx dict_escape ; was last character a Z?
+ beq @normalchar
+
+ ; dictionary lookup time. save our state on the stack
+ ; TODO: see if this code's smaller using ZP instead of stack.
+ ; it'll only be reentrant once, but that's enough.
+ tya
+ pha
+ lda inbyte
+ pha
+ lda srcptr
+ pha
+ lda srcptr+1
+ pha
+ lda bitcount
+ pha
+
+ ; recursive call
+ ldx outbyte
+ lda dict_lo,x
+ pha
+ lda dict_hi,x
+ tax
+ pla
+ jsr _print_msg
+
+ ; restore old state
+ lda #0
+ sta dict_escape
+ pla
+ sta bitcount
+ pla
+ sta srcptr+1
+ pla
+ sta srcptr
+ pla
+ sta inbyte
+ pla
+ tay
+ jmp @noprint
+
+@normalchar:
+ cmp #27
+ bcs @notlower
+ adc #'a'-1 ; 1-26 are a-z
+ bne @printit
+
+@notlower:
+ cmp #52
+ bne @notdict
+ inc dict_escape ; Z means next 6 bits are dictionary ID
+ bne @noprint
+
+@notdict:
+ bcs @notupper
+ adc #38 ; 27-52 are A-Z
+ bne @printit
+
+@notupper:
+ sbc #53 ; 53-63 are table lookups
+ tax
+ lda table,x
+
+@printit:
+ sty ysave ; _cputc trashes Y
+ jsr _cputc
+ ldy ysave
+@noprint:
+ lda #0
+ sta outbyte
+ ldx #6
+ dec bitcount
+ beq @nextbyte
+ bne @bitloop
+
+ decodersize = * - _print_msg
+
+ .out .sprintf("print_msg() is %d bytes", decodersize + tablesize)
+ .out .sprintf("total textdecomp is %d bytes", decodersize + tablesize + dictsize)
-- 
cgit v1.2.3