aboutsummaryrefslogtreecommitdiff
path: root/textdecomp.s
diff options
context:
space:
mode:
authorB. Watson <yalhcru@gmail.com>2016-02-26 16:26:32 -0500
committerB. Watson <yalhcru@gmail.com>2016-02-26 16:26:32 -0500
commitb37ac0ede97639931bd540fe34848eb8bf52764b (patch)
treefe1127c9d0eb22329a7d96b06f8f884109cb9650 /textdecomp.s
parentf3b7f8c68e6fe58aad1d093b4efc4eb665ff2788 (diff)
downloadtaipan-b37ac0ede97639931bd540fe34848eb8bf52764b.tar.gz
more dict entries, better comments, fix cart docs
Diffstat (limited to 'textdecomp.s')
-rw-r--r--textdecomp.s147
1 files changed, 102 insertions, 45 deletions
diff --git a/textdecomp.s b/textdecomp.s
index 617ba2f..0a0c684 100644
--- a/textdecomp.s
+++ b/textdecomp.s
@@ -1,25 +1,64 @@
-; text decompressor for taipan.
-; text is packed 6 bits per character. see textcomp.c
-; for details.
+; Text decompressor for Taipan.
+
+; extern void __fastcall__ print_msg(const char *msg);
+
+; Text is packed into one snac per character.
+
+; A snac is 6 bits, somewhere between a nybble and a byte. It could
+; also stand for "Six Numeral ASCII-like Code" :)
+
+; See textcomp.c for details of encoded format.
.include "atari.inc"
.export _print_msg
.import _cputc
srcptr = FR1
- outbyte = FR0 ; decoded 6-bit byte
+ outsnac = FR0 ; decoded snac (6-bit byte)
bitcount = FR0+1 ; counts 8..1, current bit in inbyte
- inbyte = FR0+2
+ inbyte = FR0+2 ; current input byte
ysave = FR0+3
- dict_escape = FR0+4
+ dict_escape = FR0+4 ; true if last character was a Z
.rodata
-; one or two letter words are not worth listing here. 3 is only good
-; if it's used pretty often.
-; entry 0 is a dummy! The encoder gets confused by "Z\0". This may get fixed.
-; dictionary size cannot exceed 255 bytes.
-; the quoted stuff in comments is read by messages.pl, it needs to be exact.
+
+; The dictionary itself. Each entry is a snac-encoded string. One or two
+; letter words are not worth listing here: they encode to 2 bytes each,
+; plus the dictionary escape code is 2 bytes (snacs actually) per use. 3
+; is only good if it's used pretty often.
+
+; In messages.c, dict01 to dict26 will show up as Za thru Zz, and dict27
+; and up are ZA, ZB, etc. In theory, a dict entry could reference another
+; dict entry (the decoder can handle it), but in practice it's not real
+; useful to do.
+
+; Entry 0 is a dummy! The encoder gets confused by "Z\0". This may
+; get fixed.
+
+; There can be be up 63 entries in the dictionary (64, counting the
+; dummy entry 0), since a 6-bit snac is used as the index.
+
+; Dictionary size cannot exceed 255 bytes. Actually the last entry
+; can extend past 255 bytes, so long as it *starts* within 255 bytes
+; of dict00. Break this rule and you get a range error when you build.
+
+; The quoted stuff in comments is read by messages.pl, it needs to be
+; the exact un-encoded form of the snac string. Anything after the quotes
+; (e.g. number of occuurences) is ignored. The order here isn't important,
+; messages.pl will apply them in order by length (longest first).
+
+; To get the bytes to use for a particular message:
+; echo -n "message here" | ./textcomp 2>/dev/null|perl -ple 's/0x/\$/g; s/ /, /g'
+
+; TODO: no way to use \n in these (which affects dict33), fix.
+
+; TODO: if a message used in the game is exactly the same as a dict entry,
+; figure out a way for the game to use the dict entry in-place, instead
+; of a string consisting only of a dictionary lookup. Perl script can
+; generate an asm file that gets included here? _M_taipan = dict23, and
+; in messages.c it's an extern.
+
dict00:
dict01: .byte $98, $9d, $73, $54, $53, $80 ; "Li Yuen", 4 occurrences
dict02: .byte $7c, $c1, $05, $4b, $57, $12, $3d, $42, $05, $48, $00 ; "Elder Brother", 3
@@ -52,6 +91,14 @@ dict28: .byte $d5, $70, $4e, $50, $00 ; " want"
dict29: .byte $5c, $f4, $94, $20, $93, $85, $4d, $30, $00 ; "worthiness"
dict30: .byte $d4, $d5, $43, $20, $00 ; " much"
dict31: .byte $10, $91, $86, $15, $21, $4e, $0c, $50, $00 ; "difference"
+dict32: .byte $74, $f3, $50, $48, $11, $0f, $48, $00 ; "Comprador"
+dict33: .byte $f1, $3d, $6c, $15, $03, $d2, $50, $00 ; "'s Report"
+dict34: .byte $6d, $91, $78, $d5, $71, $7c, $30, $cd, $40 ; "Aye, we'll "
+dict35: .byte $08, $f0, $52, $10, $00 ; "board"
+dict36: .byte $40, $94, $81, $50, $50, $00 ; "pirate"
+dict37: .byte $d4, $e3, $c0 ; " no"
+dict38: .byte $d5, $72, $53, $20, $00 ; " wish"
+dict39: .byte $10, $50, $94, $00 ; "debt"
dict_offsets:
.byte dict00 - dict00
@@ -86,6 +133,14 @@ dict_offsets:
.byte dict29 - dict00
.byte dict30 - dict00
.byte dict31 - dict00
+ .byte dict32 - dict00
+ .byte dict33 - dict00
+ .byte dict34 - dict00
+ .byte dict35 - dict00
+ .byte dict36 - dict00
+ .byte dict37 - dict00
+ .byte dict38 - dict00
+ .byte dict39 - dict00
; rough estimate of how many bytes are saved by the dictionary
; stuff: the dictionary + extra decoder stuff costs 221 bytes (vs.
@@ -94,13 +149,13 @@ dict_offsets:
; with only dict00 - dict23, we'll save around 173 bytes.
; actually it works out to 179 bytes, but the estimate was close.
; we've reached the point of diminishing returns: dict00 - dict31 only
-; saves 200 bytes.
+; saves 199 bytes.
dictsize = * - dict00
.out .sprintf("dictionary plus dict_offsets is %d bytes", dictsize)
.rodata
-table: ; outbyte values 53..63
+table: ; outsnac values 53..63
.byte ' ', '!', '%', ',', '.', '?', ':', 39, 40, 41, $9b
tablesize = * - table
@@ -110,58 +165,57 @@ table: ; outbyte values 53..63
.code
.endif
-; extern void __fastcall__ print_msg(const char *msg);
_print_msg:
sta srcptr
stx srcptr+1
lda #0
sta dict_escape
- sta outbyte
+ sta outsnac
ldy #$ff ; since we increment it first thing...
- ldx #6 ; counts 6..1, current bit in outbyte
+ ldx #6 ; counts 6..1, current bit in outsnac
@nextbyte:
iny
lda #8
- sta bitcount
+ sta bitcount ; counts 8..1, current bit in inbyte
lda (srcptr),y
sta inbyte
@bitloop:
- asl inbyte
- rol outbyte
+ asl inbyte ; get next bit from inbyte...
+ rol outsnac ; ...into outsnac
dex
- beq @decode ; got 6 bits
- dec bitcount
- bne @bitloop
- beq @nextbyte
+ beq @decode ; got 6 bits, decode into ascii
+ dec bitcount ; more bits in this byte?
+ bne @bitloop ; get rest of bits in this byte...
+ beq @nextbyte ; ...else next byte
@decode:
- lda outbyte
- bne @notend
- rts ; 0 = end of message
+ lda outsnac
+ bne @notend ; are we done?
+ rts ; 0 = end of message
@notend:
- ldx dict_escape ; was last character a Z?
+ ldx dict_escape ; was previous character a Z?
beq @normalchar
- jsr dict_lookup
- jmp @noprint
+ jsr dict_lookup ; if so, do a dictionary lookup...
+ jmp @noprint ; ...and pick back up at next byte
-@normalchar:
+@normalchar: ; else it's a normal character
cmp #27
bcs @notlower
- adc #'a'-1 ; 1-26 are a-z
+ adc #'a'-1 ; 1-26 are a-z
bne @printit
@notlower:
cmp #52
bne @notdict
inc dict_escape ; Z means next 6 bits are dictionary ID
- bne @noprint
+ bne @noprint ; don't actually print the Z
@notdict:
bcs @notupper
- adc #38 ; 27-52 are A-Z
+ adc #38 ; 27-51 are A-Y
bne @printit
@notupper:
@@ -175,14 +229,17 @@ _print_msg:
ldy ysave
@noprint:
lda #0
- sta outbyte
+ sta outsnac
ldx #6
dec bitcount
beq @nextbyte
bne @bitloop
dict_lookup:
- ; dictionary lookup time. save our state on the stack
+ ; dictionary lookup time. save our state on the stack. note that
+ ; using the stack means dict entries could potentially contain
+ ; dictionary escapes. each level would eat 7 bytes of stack, so be
+ ; careful (the current dictionary doesn't do this at all)
tya
pha
lda inbyte
@@ -194,17 +251,16 @@ dict_lookup:
lda bitcount
pha
- ; recursive call
- ldx outbyte
- lda dict_offsets,x
+ ldx outsnac ; get the start address of the dictionary entry into AX
+ lda dict_offsets,x ; this is why the dictionary can't be <255 bytes total
clc
- adc #<dict00
- sta dict_escape ; temp usage
+ adc #<dict00 ; calculate low byte from base + offset
+ sta dict_escape ; temp usage, will be overwritten after _print_msg
lda #>dict00
- adc #0
- tax
- lda dict_escape
- jsr _print_msg
+ adc #0 ; calculate hi byte
+ tax ; hi byte in X
+ lda dict_escape ; lo byte in A
+ jsr _print_msg ; recursive call, print the dictionary entry
; restore old state
lda #0
@@ -219,7 +275,8 @@ dict_lookup:
sta inbyte
pla
tay
- rts
+
+ rts ; print rest of original message
decodersize = * - _print_msg