aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bigint48.h31
-rw-r--r--bigint48.s516
-rw-r--r--bignum.h26
-rw-r--r--div.c119
4 files changed, 686 insertions, 6 deletions
diff --git a/bigint48.h b/bigint48.h
new file mode 100644
index 0000000..92b119b
--- /dev/null
+++ b/bigint48.h
@@ -0,0 +1,31 @@
+/* bignum implementation for little-endian (LSB-first) 6-byte integers.
+ Only constants and the bignum/bugnump data types need
+ be defined here. See bignum.h for API. */
+
+#define bignum(x) char x[6]
+#define bignump char *
+
+/****** constants ******/
+/* zero */
+#define BIG_0 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }
+
+/* bank interest rate is 0.5%, or .005, or 1/200 */
+#define BIG_200 { 0xc8, 0x00, 0x00, 0x00, 0x00, 0x00 }
+
+/* bignum 100, used for score calculations in final_stats() */
+#define BIG_100 { 0x64, 0x00, 0x00, 0x00, 0x00, 0x00 }
+
+/* one thousand, one million, one billion, one trillion
+ used by cprintfancy_big() */
+
+#define BIG_1K { 0xe8, 0x03, 0x00, 0x00, 0x00, 0x00 }
+
+#define BIG_1M { 0x40, 0x42, 0x0f, 0x00, 0x00, 0x00 }
+
+#define BIG_1B { 0x00, 0xca, 0x9a, 0x3b, 0x00, 0x00 }
+
+#define BIG_1T { 0x00, 0x10, 0xa5, 0xd4, 0xe8, 0x00 }
+
+/* max value for a ulong, 2**32-1 */
+#define BIG_MAX_ULONG { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }
+
diff --git a/bigint48.s b/bigint48.s
new file mode 100644
index 0000000..1971af2
--- /dev/null
+++ b/bigint48.s
@@ -0,0 +1,516 @@
+
+; 48-bit integer bignum implementation for taipan.
+; the Atari 800/XL/XE build doesn't use this, it's only for
+; the 5200 (though the A8 build can be tested with it).
+
+; .include "atari.inc" ; don't include atari.inc on the 5200, I'll get confused.
+ FR0 = $d4 ; this is the only label we need from atari.inc
+
+ .export _big_copy, _ulong_to_big, _big_cmp, _big_to_ulong, _big_add, _big_sub, _big_negate
+ .export _big_div, _big_mul
+ .import popax, pushax
+ .importzp ptr1, ptr2, ptr3, tmp1
+
+ ; used by _big_div
+ numerator = FR0
+ denominator = FR0+6
+ result = FR0+12
+ quotient = FR0+18
+ halfnum = FR0+24
+ newdenom = FR0+30
+ ;sign = FR0+36
+
+ ; used by _big_mul
+ multiplicand = FR0
+ multiplier = FR0+6
+ ;result = FR0+12 ; same as above
+
+ start = *
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; extern char __cdecl__ big_mul(bignump dest, bignump multiplicand, bignump multiplier);
+; basically, init result to 0, shift the multiplicand right, add the multiplier
+; to the result if a 1 shifted out. Then shift the multiplier left, repeat.
+; 1010 x 0101 (10 times 5):
+; result = 0;
+; 0101 >> 1, shifted out a 1, so result += 1010, then shift the 1010 left to get 10100.
+; 010 >> 1, shifted out a 0, so result stays same, then shift the 10100 left to get 101000
+; 01 >> 1, shifted out a 1, so result += 101000, then shift the 101000 left to get 1010000
+; 0 >> 1, shifted out a 0, result stays same.
+; result ends up 1010 + 101000 or 50.
+; the only PITA here is extending out to 48 bits. No overflow detection,
+; hey, C doesn't detect int overflows either so why should I worry?
+_big_mul:
+ jsr popax
+ sta ptr1
+ stx ptr1+1
+ jsr popax
+ sta ptr2
+ stx ptr2+1
+
+ ldy #5
+@p1lp:
+ lda (ptr1),y
+ sta multiplier,y
+ lda (ptr2),y
+ sta multiplicand,y
+ lda #0
+ sta result,y
+ dey
+ bpl @p1lp
+
+ ldy #48 ; bit count
+@shiftlp:
+ ; for each bit, shift the multiplicand right...
+ lsr multiplicand+5
+ ror multiplicand+4
+ ror multiplicand+3
+ ror multiplicand+2
+ ror multiplicand+1
+ ror multiplicand
+ bcc @dont_add ; if we shifted out a 0, don't add multiplier
+
+ ; if we shifted out a 1, add the multiplier.
+ clc
+ lda multiplier
+ adc result
+ sta result
+ lda multiplier+1
+ adc result+1
+ sta result+1
+ lda multiplier+2
+ adc result+2
+ sta result+2
+ lda multiplier+3
+ adc result+3
+ sta result+3
+ lda multiplier+4
+ adc result+4
+ sta result+4
+ lda multiplier+5
+ adc result+5
+ sta result+5
+
+ ; unconditionally shift the multiplier left
+@dont_add:
+ asl multiplier
+ rol multiplier+1
+ rol multiplier+2
+ rol multiplier+3
+ rol multiplier+4
+ rol multiplier+5
+
+ ; are we done?
+ dey
+ bne @shiftlp ; no, do next bit
+
+ jmp mul_div_done ; yes, store result.
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; extern char __cdecl__ big_div(bignump dest, bignump dividend, bignump divisor);
+; dividend and divisor AKA numerator and denominator.
+; this eats a ton of zero page, but we can afford it.
+
+; wrote this in C and translated to asm by hand. The C is very asm-like,
+; and would make your professor cry (or curse). The resulting asm is
+; a bloated mess, but still smaller than if I'd compiled the C code
+; with cc65. There's lots of room for improvement.
+
+;;int divide(int num, int denom) {
+;; int result, quotient, newdenom, halfnum;
+
+_big_div:
+ jsr popax ; get dividend (denominator)
+ sta ptr1
+ stx ptr1+1
+ jsr popax ; get divisor (numerator)
+ sta ptr2
+ stx ptr2+1
+ ; leave dest argument on the stack.
+
+;; result = 0;
+ ldy #5 ; copy values to ZP
+@bdcp:
+ lda (ptr1),y
+ sta denominator,y
+ lda (ptr2),y
+ sta numerator,y
+ lda #0
+ sta result,y
+ dey
+ bpl @bdcp
+ ; at this point we are done with ptr1 and ptr2, so we don't
+ ; have to save them before calling our other functions.
+
+ ; don't need this.
+;; ; deal with signs.
+;; ;lda #0 ; A already 0 from loop above
+;; sta sign
+;; ldx #denominator
+;; jsr fixsign
+;; ldx #numerator
+;; jsr fixsign
+
+;;outerloop:
+;; newdenom = denom;
+@outerloop:
+ ldx #5
+@copy_and_zero:
+ lda denominator,x
+ sta newdenom,x
+ lda #0
+ sta quotient,x
+ dex
+ bpl @copy_and_zero
+
+;; quotient = 1;
+ inc quotient ; other bytes were zeroed in loop above.
+
+;; if(newdenom < num)
+;; goto checkequal;
+ lda #newdenom
+ ldx #0
+ jsr pushax
+ lda #numerator
+ ldx #0
+ jsr _big_cmp ; Z flag for equal, N=1 less, N=0 greater
+;; if(newdenom < num)
+;; goto innerprep;
+ bmi @innerprep
+;; if(newdenom == num)
+;; goto innerprep;
+ beq @innerprep
+
+;; quotient = 0;
+;; num = 0;
+;; goto addquot;
+ lda #0
+ sta quotient
+ sta numerator
+ sta numerator+1
+ sta numerator+2
+ sta numerator+3
+ sta numerator+4
+ sta numerator+5
+ beq @addquot
+
+;;innerprep:
+;; halfnum = num >> 1;
+@innerprep:
+ lda numerator+5
+ lsr
+ sta halfnum+5
+ lda numerator+4
+ ror
+ sta halfnum+4
+ lda numerator+3
+ ror
+ sta halfnum+3
+ lda numerator+2
+ ror
+ sta halfnum+2
+ lda numerator+1
+ ror
+ sta halfnum+1
+ lda numerator
+ ror
+ sta halfnum
+
+;;innerloop:
+;; if(newdenom > halfnum)
+;; goto innerdone;
+@innerloop:
+ lda #newdenom
+ ldx #0
+ jsr pushax
+ lda #halfnum
+ ldx #0
+ jsr _big_cmp
+ beq @notgt
+ bmi @notgt
+ bpl @innerdone
+@notgt:
+
+;; newdenom <<= 1;
+ asl newdenom
+ rol newdenom+1
+ rol newdenom+2
+ rol newdenom+3
+ rol newdenom+4
+ rol newdenom+5
+
+;; quotient <<= 1;
+ asl quotient
+ rol quotient+1
+ rol quotient+2
+ rol quotient+3
+ rol quotient+4
+ rol quotient+5
+
+;; goto innerloop;
+ jmp @innerloop
+
+;;innerdone:
+;; num -= newdenom;
+@innerdone:
+ lda #numerator
+ ldx #0
+ jsr pushax
+ lda #numerator
+ ldx #0
+ jsr pushax
+ lda #newdenom
+ ldx #0
+ jsr pushax
+ jsr _big_sub
+
+;;addquot:
+;; result += quotient;
+@addquot:
+ lda #result
+ ldx #0
+ jsr pushax
+ lda #result
+ ldx #0
+ jsr pushax
+ lda #quotient
+ ldx #0
+ jsr pushax
+ jsr _big_add
+
+;; if(num) goto outerloop;
+ lda numerator
+ ora numerator+1
+ ora numerator+2
+ ora numerator+3
+ ora numerator+4
+ ora numerator+5
+ beq mul_div_done
+ jmp @outerloop ; too far for relative branch
+
+;; return result;
+mul_div_done:
+; destination is still on the stack.
+ jsr popax
+ sta ptr1
+ stx ptr1+1
+ ldy #5
+@doneloop:
+ lda result,y
+ sta (ptr1),y
+ dey
+ bpl @doneloop
+
+ rts
+
+;;}
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; extern void __fastcall__ big_copy(bignump dest, bignump src);
+_big_copy:
+ sta ptr1
+ stx ptr1+1
+ jsr popax
+ sta ptr2
+ stx ptr2+1
+ ldy #5
+@bclp:
+ lda (ptr1),y
+ sta (ptr2),y
+ dey
+ bpl @bclp
+ rts
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; extern void __fastcall__ ulong_to_big(const unsigned long l, bignump b);
+_ulong_to_big:
+ ; ptr1 = b
+ sta ptr1
+ stx ptr1+1
+
+ ; zero out top 16 bits of b
+ ldy #4
+ lda #0
+ sta (ptr1),y
+ iny
+ sta (ptr1),y
+
+ ; copy bottom 16 bits of l to bottom 16 bits of b
+ jsr popax
+ ldy #0 ; popax eats the Y reg
+ sta (ptr1),y
+ iny
+ txa ; sadly there's no stx (zp),y on the 6502
+ sta (ptr1),y
+
+ ; copy top 16 bits of l to middle 16 bits of b
+ jsr popax
+ ldy #2
+ sta (ptr1),y
+ iny
+ txa
+ sta (ptr1),y
+ rts
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; TODO: consolidate with big_copy(), they're almost identical.
+; extern char __fastcall__ big_to_ulong(bignump b, unsigned long *l);
+_big_to_ulong:
+ sta ptr2
+ stx ptr2+1
+ jsr popax
+ sta ptr1
+ stx ptr1+1
+ ldy #3
+@bclp:
+ lda (ptr1),y
+ sta (ptr2),y
+ dey
+ bpl @bclp
+ rts
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; extern signed char __fastcall__ big_cmp(bignump a, bignump b);
+_big_cmp:
+ sta ptr2 ; ptr2 = b;
+ stx ptr2+1
+ jsr popax
+ sta ptr1 ; ptr1 = a;
+ stx ptr1+1
+
+ ; since this is a signed compare, we'll check the sign bits first.
+ ; if a is negative and b isn't, or vice versa, we shortcut the rest
+ ; of the compare.
+ ldy #5 ; point at the MSB
+ lda (ptr1),y ; get MSB of a
+ bpl @a_pos
+ lda (ptr2),y ; a is negative, is b?
+ bmi @cmplp ; yep, do a normal compare
+ bpl @return_neg ; no, return negative result
+
+@a_pos:
+ lda (ptr2),y ; a is positive, is b?
+ bmi @return_pos ; no, return positive result
+ ; yes, fall thru and do a normal compare
+
+ ; Y is still 5 no matter how we got here.
+@cmplp:
+ sec
+ lda (ptr1),y
+ sbc (ptr2),y ; A = a[Y] - b[Y];
+ bne @unequal ; if non-zero subtraction result, go check carry
+ dey ; else got zero, keep comparing
+ bpl @cmplp ; more bytes to check?
+ bmi @return_a ; no. note: A still zero here
+@unequal:
+ bcs @return_pos
+@return_neg:
+ lda #$ff ; negative result, return -1
+ .byte $2c ; skip next 2 bytes
+@return_pos:
+ lda #$01 ; positive result, return 1 (actually $0101)
+@return_a:
+ tax ; sign-extend, the optimizer gets wonky about char vs int returns
+ rts
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; extern void __fastcall__ big_negate(bignump b);
+; 2's complement negation: invert all the bits, then add 1
+; TODO: this should be smaller.
+_big_negate:
+ sta ptr1
+ stx ptr1+1
+
+ ldy #0
+@invloop:
+ lda (ptr1),y
+ eor #$ff
+ sta (ptr1),y
+ iny
+ cpy #6
+ bne @invloop
+
+ ldy #0
+ sec
+ php
+@addloop:
+ plp
+ lda (ptr1),y
+ adc #0
+ php
+ sta (ptr1),y
+ iny
+ cpy #6
+ bne @addloop
+
+ plp
+ rts
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; extern char __cdecl__ big_add(bignump dest, bignump addend1, bignump addend2);
+; extern char __cdecl__ big_sub(bignump dest, bignump minuend, bignump subtrahend);
+; addition and subtraction are almost identical, so the code here does both.
+; TODO: this is not as compact as it could be.
+_big_sub:
+ lda #$80
+ .byte $2c
+_big_add:
+ lda #0
+ sta tmp1 ; tmp1 is negative if subtracting, positive if adding
+
+ jsr popax ; addend2 or subtrahend
+ sta ptr2
+ stx ptr2+1
+ jsr popax ; addend1 or minuend
+ sta ptr1
+ stx ptr1+1
+ jsr popax ; dest
+ sta ptr3
+ stx ptr3+1
+
+ ldy #0
+
+ bit tmp1
+ bmi @setc ; set carry if subtracting...
+ clc ; otherwise clear it
+ .byte $24 ; bit ZP, skip only 1 byte
+@setc:
+ sec
+
+ php ; we have to keep the C flag on the stack since the cpy below trashes it
+@addlp:
+ plp
+ lda (ptr1),y
+ bit tmp1
+ bmi @sub
+ adc (ptr2),y
+ .byte $2c
+
+@sub:
+ sbc (ptr2),y
+
+@store:
+ sta (ptr3),y
+ php
+ iny
+ cpy #6
+ bne @addlp
+ plp
+ rts
+
+ .out .sprintf("bigint48 code is %d bytes", *-start)
+
+; was going to be a helper routine for big_mul and big_div, but
+; taipan doesn't need signed multiply/divide.
+;; fixsign:
+;; lda 0+5,x
+;; bpl @pos
+;; lda #$80
+;; eor sign
+;; sta sign
+;; txa
+;; ldx #0
+;; jsr _big_negate
+;; @pos:
+;; rts
+
diff --git a/bignum.h b/bignum.h
index 535ea65..06fbd8b 100644
--- a/bignum.h
+++ b/bignum.h
@@ -19,6 +19,15 @@
to use the constants:
bignum(foo) = BIG_0;
which looks a little weird I admit.
+
+ Notice that our bignum type is signed, even though we
+ only have functions to convert to/from unsigned long.
+ Only final_stats() and cprintfancy_big() ever deal with
+ negative bignums. Actually, big_div() and big_mul() are
+ unsigned, as they will never be called with negative
+ args. The only signed behaviour we care about here:
+ - big_sub() needs to be able to give a negative result.
+ - big_cmp() needs to be a signed compare.
*/
/* list all our implementations here */
@@ -65,11 +74,11 @@ BEWARE: unlike perl's <=>, the return value is *not* guaranteed to
Do not depend on any particular positive or negative return
value from this:
if(big_cmp(a, b) == -1) // WRONG!
- if(big_cmp(a, b) < 0) // Right. */
+ if(big_cmp(a, b) < 0) // Right. */
extern signed char __fastcall__ big_cmp(bignump a, bignump b);
/* basic math functions. conceptually they return a boolean for
- success, but only division has error checking.
+ success, but currently there is no error checking.
all can be read as: dest = arg2 OP arg3;
modulus isn't implemented as taipan doesn't use it for the bank.
These are __cdecl__ *not* __fastcall__ !!
@@ -85,12 +94,17 @@ extern void __fastcall__ big_negate(bignump b);
/* returns true if the bank is maxed out. We do this by checking the exponent
byte, so the "max" is tied to the bignum implementation, which is why its
prototype is here rather than bank.h. For Atari floats, it's 1.0e+14, or
- 100 trillion.
+ 100 trillion. For bigint48 it would be something like, bit 46 is nonzero,
+ 140.7 trillion.
- If you deposit 1 in the bank at the start of the game and never deposit
- more, the interest will max it out in 1915 (661 turns of play).
- */
+ With floats, if you deposit 1 in the bank at the start of the game and never deposit
+ more, the interest would max it out in 1915 (661 turns of play).
+
+ ended up not implementing this, I don't think anyone will ever have the patience
+ to play long enough to overflow an A8 float (9.0e+97) or a 48-bit int (2**48-1,
+ or 281.5 trillion).
extern char __fastcall__ bank_maxed_out(bignump b);
+ */
/* print a bignum, Taipan-style. Code for this lives in taipan.c actually. */
extern void cprintfancy_big(bignump b);
diff --git a/div.c b/div.c
new file mode 100644
index 0000000..9ee232d
--- /dev/null
+++ b/div.c
@@ -0,0 +1,119 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+example: 10 / 3
+
+num denom quotient result
+10 3 1 0
+10 6 2 2
+4 3 1 3
+*/
+
+/*
+ // working, let's simplify it.
+int divide(int num, int denom) {
+ int result = 0, quotient = 1, newdenom;
+ fprintf(stderr, "\tdivide(%d, %d)\n", num, denom);
+ do {
+ fprintf(stderr, "outer loop\n");
+ newdenom = denom;
+ quotient = 1;
+ if(newdenom > num) {
+ fprintf(stderr, "%d > %d: quotient = 0;\n", newdenom, num);
+ quotient = 0;
+ num = 0;
+ // return result;
+ } else if(newdenom == num) {
+ fprintf(stderr, "%d == %d: quotient = 1;\n", newdenom, num);
+ quotient = 1;
+ num = 0;
+ // return ++result;
+ } else {
+ while(newdenom <= num) {
+ fprintf(stderr, "inner loop %d %d %d\n", num, newdenom, quotient);
+ newdenom <<= 1;
+ quotient <<= 1;
+ }
+ newdenom >>= 1;
+ num -= newdenom;
+ quotient >>= 1;
+ }
+ result += quotient;
+ fprintf(stderr, "num==%d, newdenom==%d, quotient==%d, result==%d\n", num, newdenom, quotient, result);
+ } while(num);
+ return result;
+}
+*/
+
+int divide(int num, int denom) {
+ int result, quotient, newdenom, halfnum;
+
+ result = 0;
+outerloop:
+ newdenom = denom;
+ quotient = 1;
+
+ if(newdenom < num)
+ goto innerprep;
+ if(newdenom == num)
+ goto innerprep;
+
+ quotient = 0;
+ num = 0;
+ goto addquot;
+
+innerprep:
+ halfnum = num >> 1;
+
+innerloop:
+ if(newdenom > halfnum)
+ goto innerdone;
+ newdenom <<= 1;
+ quotient <<= 1;
+ goto innerloop;
+
+innerdone:
+ num -= newdenom;
+
+addquot:
+ result += quotient;
+ if(num) goto outerloop;
+
+ return result;
+}
+
+/*
+ // working, but recursive, doesn't lend itself well to
+ // rewriting in asm.
+int divide(int num, int denom) {
+ int quotient = 1, olddenom = denom;
+ fprintf(stderr, "\tdivide(%d, %d)\n", num, denom);
+ if(denom > num) return 0;
+ if(denom == num) return 1;
+ while(denom <= num) {
+ denom <<= 1;
+ quotient <<= 1;
+ fprintf(stderr, "num==%d, denom==%d, quotient==%d\n", num, denom, quotient);
+ }
+ num -= (denom >> 1);
+ return ((quotient >> 1) + divide(num, olddenom));
+}
+*/
+
+int main(int argc, char **argv) {
+ int num = 100, denom = 10, result1, result2;
+
+ if(argc > 1) num = atoi(argv[1]);
+ if(argc > 2) denom = atoi(argv[2]);
+ result1 = num / denom;
+ result2 = divide(num, denom);
+ printf("%d / %d == %d\n", num, denom, result1);
+ printf("divide(%d, %d) == %d\n", num, denom, result2);
+ if(result1 == result2) {
+ puts("OK");
+ return 0;
+ }
+ puts("FAIL");
+ return 1;
+}