diff options
author | B. Watson <yalhcru@gmail.com> | 2016-02-25 16:51:09 -0500 |
---|---|---|
committer | B. Watson <yalhcru@gmail.com> | 2016-02-25 16:51:09 -0500 |
commit | 9dc7267a351b79ac5b855e812520362f28922341 (patch) | |
tree | b40e8e2144358752b990f1c94091fda0880e3eb7 | |
parent | c9c027bb8d620eb3f5066440067327b7d932a7e1 (diff) | |
download | taipan-9dc7267a351b79ac5b855e812520362f28922341.tar.gz |
dictionary text compression, 7666 bytes free
-rw-r--r-- | messages.pl | 37 | ||||
-rw-r--r-- | taipan.c | 14 | ||||
-rw-r--r-- | textdecomp.s | 133 | ||||
-rw-r--r-- | textdecomp.s.beforedict | 86 | ||||
-rw-r--r-- | textdecomp.s.dict.dontuseyet | 177 |
5 files changed, 256 insertions, 191 deletions
diff --git a/messages.pl b/messages.pl index 65ade0f..d225d17 100644 --- a/messages.pl +++ b/messages.pl @@ -4,6 +4,21 @@ # messages are listed at the end of this file after __END__ marker. # output of this script should be redirected to messages.c. +# make dictionary from textdecomp.s comments +open my $t, "<textdecomp.s" or die $!; +while(<$t>) { + next unless /^dict(\d\d):.*;\s*"([^"]*)"/; + my $value = 'Z' . chr($1 + 96); + my $key = quotemeta $2; + $dictionary{$key} = $value; +} +close $t; + +#for(sort keys %dictionary) { +# warn "'$_' => $dictionary{$_}\n"; +#} +#exit 0; + print "// do not edit, contents are generated by messages.pl\n\n"; while(<DATA>) { @@ -17,9 +32,17 @@ while(<DATA>) { s/"//g; s/\\r//g; s/\\n/\n/g; - # warn "msg: $_\n"; + #warn "msg: $_\n"; $total_in += (1 + length); + + my $dict_used = 0; + for my $dk (keys %dictionary) { + if(s/$dk/$dictionary{$dk}/g) { + $dict_used = 1; + } + } + open my $out, ">msg.out" or die $!; print $out $_; close $out; @@ -34,7 +57,7 @@ while(<DATA>) { $total_out += @got; } die "failed to compress $orig\n" unless $readbytes; - print "\n};\n\n"; + print "\n};" . ($dict_used ? " // dictionary used" : "") . "\n\n"; } print "// messages: $msgcount\n"; @@ -149,8 +172,8 @@ me_to_go_to "me to go to:\r\n" already_here "\r\n\nYou're already here" hostile_ship " hostile ship" approaching " approaching" -fleet_drove_off "'s fleet drove them off!" -s_pirates "'s pirates" +fleet_drove_off "Li Yuen's fleet drove them off!" +s_pirates "Li Yuen's pirates" they_let_us_be "Good joss!! They let us be!!\r\n" ships_of_fleet " ships of Li Yuen's pirate\r\nfleet" captured_some_booty "We captured some booty.\r\nIt's worth " @@ -163,7 +186,7 @@ were_going_down "We're going down" storm_we_made_it " We made it!!\r\n\n" blown_off_course "We've been blown off course\r\nto " arriving_at "Arriving at " -asks " asks " +asks "Li Yuen asks " in_donation " in donation\r\nto the temple of Tin Hau, the Sea\r\nGoddess. Will you pay? " not_enough_cash "you do not have enough cash!!\r\n\n" make_up_difference "to make up\r\nthe difference for you? " @@ -184,7 +207,7 @@ to_repay_him "to repay\r\nhim? " to_borrow "to \r\nborrow? " wont_loan "\r\n\nHe won't loan you so much" bodyguards_killed " of your bodyguards have been killed\r\nby cutthroats and you have been robbed\r\nof all of your cash" -the_price_of "!! The price of " +the_price_of "Taipan!! The price of " nl_has_spc "\n has " risen "risen" dropped "dropped" @@ -215,6 +238,6 @@ wu_warn_1 "reminds you of the\r\nConfucian ideal of personal worthiness,\r\nand wu_warn_2 "He is reminded of a fabled barbarian\r\nwho came to a bad end, after not caring\r\nfor his obligations.\r\n\nHe hopes no such fate awaits you, his\r\nfriend" siezed_opium "The local authorities have seized your\r\nOpium cargo and have also fined you\r\n" whouse_theft "Messenger reports large theft\r\nfrom warehouse" -has_sent_lieutenant " has sent a Lieutenant,\r\nTaipan. He says his admiral wishes\r\nto see you in Hong Kong, posthaste!\r\n" +has_sent_lieutenant "Li Yuen has sent a Lieutenant,\r\nTaipan. He says his admiral wishes\r\nto see you in Hong Kong, posthaste!\r\n" beaten_robbed "You've been beaten up and\r\nrobbed of " in_cash " in cash" @@ -295,7 +295,7 @@ void cprint_taipan_bangbang(void); void cprint_taipan_period(void); void cprint_taipan_prompt(void); void cprint_elder_brother_wu(void); -void cprint_li_yuen(void); +// void cprint_li_yuen(void); void cprint_firm_colon(void); char get_ship_status(void); @@ -533,10 +533,12 @@ void cprint_elder_brother_wu(void) { /* This one only saves space when Li Yuen occurs at the start of a string, not in the middle */ +/* void cprint_li_yuen(void) { // cputs("Li Yuen"); print_msg(M_li_yuen); } +*/ void cprint_Do_you_want(void) { // cputs("Do you want "); @@ -2345,7 +2347,7 @@ void quit(void) at_sea(); captains_report(); - cprint_li_yuen(); + // cprint_li_yuen(); // cputs("'s fleet drove them off!"); print_msg(M_fleet_drove_off); @@ -2355,7 +2357,7 @@ void quit(void) if (((result == 0) && (randi()%(4 + (8 * li))) == 0) || (result == 2)) { clear_msg_window(); - cprint_li_yuen(); + // cprint_li_yuen(); // cputs("'s pirates"); print_msg(M_s_pirates); cprint_taipan_bangbang(); @@ -2583,7 +2585,7 @@ void li_yuen_extortion(void) { if(!amount) return; /* asking for 0 is dumb */ compradores_report(); - cprint_li_yuen(); + // cprint_li_yuen(); // cputs(" asks "); print_msg(M_asks); cprintfancy(amount); @@ -2859,7 +2861,7 @@ void good_prices(void) { unsigned char i = randi()%4; compradores_report(); - cprint_taipan(); + // cprint_taipan(); // cputs("!! The price of "); print_msg(M_the_price_of); // cputs(item[i]); @@ -3536,7 +3538,7 @@ int main(void) { if((port != 1) && (li == 0) && (!one_chance_in(4))) { compradores_report(); - cprint_li_yuen(); + // cprint_li_yuen(); /* cputs(" has sent a Lieutenant,\n" "Taipan. He says his admiral wishes\n" diff --git a/textdecomp.s b/textdecomp.s index a19d460..da0206a 100644 --- a/textdecomp.s +++ b/textdecomp.s @@ -12,6 +12,82 @@ bitcount = FR0+1 ; counts 8..1, current bit in inbyte inbyte = FR0+2 ysave = FR0+3 + dict_escape = FR0+4 + + .rodata +; one or two letter words are not worth listing here. 3 is only good +; if it's used pretty often. +; entry 0 is a dummy! The encoder gets confused by "Z\0". This may get fixed. +; dictionary size cannot exceed 255 bytes. +; the quoted stuff in comments is read by messages.pl, it needs to be exact. +dict00: +dict01: .byte $98, $9d, $73, $54, $53, $80 ; "Li Yuen", 4 occurrences +dict02: .byte $7c, $c1, $05, $4b, $57, $12, $3d, $42, $05, $48, $00 ; "Elder Brother", 3 +dict03: .byte $64, $f5, $40 ; "you", 30 +dict04: .byte $d7, $c1, $4d, $00 ; " 'em", 8 +dict05: .byte $cc, $f5, $40 ; "You", 16 +dict06: .byte $d4, $80, $56, $14, $00 ; " have", 11 +dict07: .byte $d5, $32, $01, $30, $c0, $00 ; " shall", 6 +dict08: .byte $fb, $5c, $49, $50, $8d, $40 ; ") With ", 2 +dict09: .byte $05, $21, $cf, $00 ; "argo", 6 +dict10: .byte $4c, $82, $50, $00 ; "ship", 10 +dict11: .byte $d5, $70, $52, $14, $83, $d5, $4c, $50, $00 ; " warehouse", 4 +dict12: .byte $d5, $42, $05, $00 ; " the" 17 +dict13: .byte $d4, $f1, $80 ; " of", 14 +dict14: .byte $5c, $93, $0c, $00 ; "will", 8 +dict15: .byte $d4, $21, $45, $3b, $50, $00 ; " been ", 6 +dict16: .byte $d5, $43, $f5, $00 ; " to ", 12 +dict17: .byte $20, $14, $f5, $00 ; "has ", 7 +dict18: .byte $18, $f4, $b5, $00 ; "for ", 7 +dict19: .byte $25, $3d, $40 ; "is ", 9 +dict20: .byte $04, $e1, $00 ; "and", 10 +dict21: .byte $d4, $30, $53, $20, $00 ; " cash", 8 +dict22: .byte $04, $41, $09, $50, $93, $ce, $04, $cd, $40 ; "additional ", 3 +dict23: .byte $b8, $12, $50, $04, $e0, $00 ; "Taipan", 3 (but really many more!) +dict24: .byte $d4, $f3, $8c, $67, $50 ; " only ", 3 +dict25: .byte $d4, $25, $47, $1c, $54, $93, $00 ; " buggers", 3 +dict26: .byte $5c, $95, $08, $d4, $00 ; "with ", 4 +;dict27: .byte $78, $fd, $40 ; "Do ", 4 + +dict_offsets: + .byte dict00 - dict00 + .byte dict01 - dict00 + .byte dict02 - dict00 + .byte dict03 - dict00 + .byte dict04 - dict00 + .byte dict05 - dict00 + .byte dict06 - dict00 + .byte dict07 - dict00 + .byte dict08 - dict00 + .byte dict09 - dict00 + .byte dict10 - dict00 + .byte dict11 - dict00 + .byte dict12 - dict00 + .byte dict13 - dict00 + .byte dict14 - dict00 + .byte dict15 - dict00 + .byte dict16 - dict00 + .byte dict17 - dict00 + .byte dict18 - dict00 + .byte dict19 - dict00 + .byte dict20 - dict00 + .byte dict21 - dict00 + .byte dict22 - dict00 + .byte dict23 - dict00 + .byte dict24 - dict00 + .byte dict25 - dict00 + .byte dict26 - dict00 + ;.byte dict27 - dict00 + +; rough estimate of how many bytes are saved by the dictionary +; stuff: the dictionary + extra decoder stuff costs 221 bytes (vs. +; the original textdecode.s without dictionary). +; each dictionary entry saves (length - 2) * (occurrences - 1) bytes. +; with only dict00 - dict23, we'll save around 173 bytes. +; actually it works out to 179 bytes, but the estimate was close. + + dictsize = * - dict00 + .out .sprintf("dictionary plus dict_offsets is %d bytes", dictsize) .rodata table: ; outbyte values 53..63 @@ -24,11 +100,12 @@ table: ; outbyte values 53..63 .code .endif -; extern void __fastcall__ print_msg(char *msg); +; extern void __fastcall__ print_msg(const char *msg); _print_msg: sta srcptr stx srcptr+1 lda #0 + sta dict_escape sta outbyte ldy #$ff ; since we increment it first thing... @@ -54,6 +131,13 @@ _print_msg: rts ; 0 = end of message @notend: + ldx dict_escape ; was last character a Z? + beq @normalchar + + jsr dict_lookup + jmp @noprint + +@normalchar: cmp #27 bcs @notlower adc #'a'-1 ; 1-26 are a-z @@ -61,6 +145,11 @@ _print_msg: @notlower: cmp #52 + bne @notdict + inc dict_escape ; Z means next 6 bits are dictionary ID + bne @noprint + +@notdict: bcs @notupper adc #38 ; 27-52 are A-Z bne @printit @@ -74,6 +163,7 @@ _print_msg: sty ysave ; _cputc trashes Y jsr _cputc ldy ysave +@noprint: lda #0 sta outbyte ldx #6 @@ -81,6 +171,47 @@ _print_msg: beq @nextbyte bne @bitloop +dict_lookup: + ; dictionary lookup time. save our state on the stack + tya + pha + lda inbyte + pha + lda srcptr + pha + lda srcptr+1 + pha + lda bitcount + pha + + ; recursive call + ldx outbyte + lda dict_offsets,x + clc + adc #<dict00 + sta dict_escape ; temp usage + lda #>dict00 + adc #0 + tax + lda dict_escape + jsr _print_msg + + ; restore old state + lda #0 + sta dict_escape + pla + sta bitcount + pla + sta srcptr+1 + pla + sta srcptr + pla + sta inbyte + pla + tay + rts + decodersize = * - _print_msg .out .sprintf("print_msg() is %d bytes", decodersize + tablesize) + .out .sprintf("total textdecomp is %d bytes", decodersize + tablesize + dictsize) diff --git a/textdecomp.s.beforedict b/textdecomp.s.beforedict new file mode 100644 index 0000000..a19d460 --- /dev/null +++ b/textdecomp.s.beforedict @@ -0,0 +1,86 @@ + +; text decompressor for taipan. +; text is packed 6 bits per character. see textcomp.c +; for details. + + .include "atari.inc" + .export _print_msg + .import _cputc + + srcptr = FR1 + outbyte = FR0 ; decoded 6-bit byte + bitcount = FR0+1 ; counts 8..1, current bit in inbyte + inbyte = FR0+2 + ysave = FR0+3 + + .rodata +table: ; outbyte values 53..63 + .byte ' ', '!', '%', ',', '.', '?', ':', 39, 40, 41, $9b + tablesize = * - table + + .ifdef CART_TARGET + .segment "HIGHCODE" + .else + .code + .endif + +; extern void __fastcall__ print_msg(char *msg); +_print_msg: + sta srcptr + stx srcptr+1 + lda #0 + sta outbyte + ldy #$ff ; since we increment it first thing... + + ldx #6 ; counts 6..1, current bit in outbyte +@nextbyte: + iny + lda #8 + sta bitcount + lda (srcptr),y + sta inbyte +@bitloop: + asl inbyte + rol outbyte + dex + beq @decode ; got 6 bits + dec bitcount + bne @bitloop + beq @nextbyte + +@decode: + lda outbyte + bne @notend + rts ; 0 = end of message + +@notend: + cmp #27 + bcs @notlower + adc #'a'-1 ; 1-26 are a-z + bne @printit + +@notlower: + cmp #52 + bcs @notupper + adc #38 ; 27-52 are A-Z + bne @printit + +@notupper: + sbc #53 ; 53-63 are table lookups + tax + lda table,x + +@printit: + sty ysave ; _cputc trashes Y + jsr _cputc + ldy ysave + lda #0 + sta outbyte + ldx #6 + dec bitcount + beq @nextbyte + bne @bitloop + + decodersize = * - _print_msg + + .out .sprintf("print_msg() is %d bytes", decodersize + tablesize) diff --git a/textdecomp.s.dict.dontuseyet b/textdecomp.s.dict.dontuseyet deleted file mode 100644 index ed2feba..0000000 --- a/textdecomp.s.dict.dontuseyet +++ /dev/null @@ -1,177 +0,0 @@ - -; text decompressor for taipan. -; text is packed 6 bits per character. see textcomp.c -; for details. - - .include "atari.inc" - .export _print_msg - .import _cputc - - srcptr = FR1 - outbyte = FR0 ; decoded 6-bit byte - bitcount = FR0+1 ; counts 8..1, current bit in inbyte - inbyte = FR0+2 - ysave = FR0+3 - dict_escape = FR0+4 - - .code ; this really should be rodata, but bank 2 has no space (yet) -; one or two letter words are not worth listing here. 3 is only good -; if it's used pretty often. -; entry 0 is a dummy! The encoder gets confused by "Z\0". This may get fixed. -dict00: -dict01: .byte $98, $9d, $73, $54, $53, $80 ; "Li Yuen", 4 occurrences -dict02: .byte $7c, $c1, $05, $4b, $57, $12, $3d, $42, $05, $48, $00 ; "Elder Brother", 3 -dict03: .byte $64, $f5, $40 ; "you", 30 -dict04: .byte $d7, $c1, $4d, $00 ; " 'em", 8 -dict05: .byte $cc, $f5, $40 ; "You", 16 -dict06: .byte $d4, $80, $56, $14, $00 ; " have", 11 -dict07: .byte $d5, $32, $01, $30, $c0, $00 ; " shall", 6 -dict08: .byte $fb, $5c, $49, $50, $8d, $40 ; ") With ", 2 -dict09: .byte $05, $21, $cf, $00 ; "argo", 6 -dict10: .byte $4c, $82, $50, $00 ; "ship", 10 -dict11: .byte $d5, $70, $52, $14, $83, $d5, $4c, $50, $00 ; " warehouse", 4 -dict12: .byte $d5, $42, $05, $00 ; " the" 17 -dict13: .byte $d4, $f1, $80 ; " of", 14 -dict14: .byte $5c, $93, $0c, $00 ; "will", 8 -dict15: .byte $d4, $21, $45, $3b, $50, $00 ; " been ", 6 -dict16: .byte $d5, $43, $f5, $00 ; " to ", 12 -dict17: .byte $20, $14, $f5, $00 ; "has ", 7 -dict18: .byte $18, $f4, $b5, $00 ; "for ", 7 -dict19: .byte $25, $3d, $40 ; "is ", 9 -dict20: .byte $04, $e1, $00 ; "and", 10 -dict21: .byte $d4, $30, $53, $20, $00 ; " cash", 8 -dict22: .byte $04, $41, $09, $50, $93, $ce, $04, $cd, $40 ; "additional ", 3 -dict23: .byte $b8, $12, $50, $04, $e0, $00 ; "Taipan", 3 (but really many more!) - -dict_lo: .byte <dict00, <dict01, <dict02, <dict03, <dict04, <dict05, <dict06, <dict07, <dict08, <dict09, <dict10, <dict11, <dict12, <dict13, <dict14, <dict15, <dict16, <dict17, <dict18, <dict19, <dict20, <dict21, <dict22, <dict23 -dict_hi: .byte >dict00, >dict01, >dict02, >dict03, >dict04, >dict05, >dict06, >dict07, >dict08, >dict09, >dict10, >dict11, >dict12, >dict13, >dict14, >dict15, >dict16, >dict17, >dict18, >dict19, >dict20, >dict21, >dict22, >dict23 - -; rough calculation of how many bytes are saved by the dictionary -; stuff: the dictionary + extra decoder stuff costs 221 bytes. -; each dictionary entry saves (length - 2) * (occurrences - 1) bytes. -; with only dict00 - dict21, we'll save around 147 bytes. - - dictsize = * - dict00 - .out .sprintf("dictionary is %d bytes", dictsize) - - .rodata -table: ; outbyte values 53..63 - .byte ' ', '!', '%', ',', '.', '?', ':', 39, 40, 41, $9b - tablesize = * - table - - .ifdef CART_TARGET - .segment "HIGHCODE" - .else - .code - .endif - -; extern void __fastcall__ print_msg(const char *msg); -_print_msg: - sta srcptr - stx srcptr+1 - lda #0 - sta dict_escape - sta outbyte - ldy #$ff ; since we increment it first thing... - - ldx #6 ; counts 6..1, current bit in outbyte -@nextbyte: - iny - lda #8 - sta bitcount - lda (srcptr),y - sta inbyte -@bitloop: - asl inbyte - rol outbyte - dex - beq @decode ; got 6 bits - dec bitcount - bne @bitloop - beq @nextbyte - -@decode: - lda outbyte - bne @notend - rts ; 0 = end of message - -@notend: - ldx dict_escape ; was last character a Z? - beq @normalchar - - ; dictionary lookup time. save our state on the stack - ; TODO: see if this code's smaller using ZP instead of stack. - ; it'll only be reentrant once, but that's enough. - tya - pha - lda inbyte - pha - lda srcptr - pha - lda srcptr+1 - pha - lda bitcount - pha - - ; recursive call - ldx outbyte - lda dict_lo,x - pha - lda dict_hi,x - tax - pla - jsr _print_msg - - ; restore old state - lda #0 - sta dict_escape - pla - sta bitcount - pla - sta srcptr+1 - pla - sta srcptr - pla - sta inbyte - pla - tay - jmp @noprint - -@normalchar: - cmp #27 - bcs @notlower - adc #'a'-1 ; 1-26 are a-z - bne @printit - -@notlower: - cmp #52 - bne @notdict - inc dict_escape ; Z means next 6 bits are dictionary ID - bne @noprint - -@notdict: - bcs @notupper - adc #38 ; 27-52 are A-Z - bne @printit - -@notupper: - sbc #53 ; 53-63 are table lookups - tax - lda table,x - -@printit: - sty ysave ; _cputc trashes Y - jsr _cputc - ldy ysave -@noprint: - lda #0 - sta outbyte - ldx #6 - dec bitcount - beq @nextbyte - bne @bitloop - - decodersize = * - _print_msg - - .out .sprintf("print_msg() is %d bytes", decodersize + tablesize) - .out .sprintf("total textdecomp is %d bytes", decodersize + tablesize + dictsize) |