From cf513e685ecfab5acfc5cf3ea0b9e67370915da8 Mon Sep 17 00:00:00 2001
From: "B. Watson" <yalhcru@gmail.com>
Date: Mon, 18 Jan 2016 14:28:59 -0500
Subject: fix big_negate, allow lowercase z in firm name, tighten up bigfloat.s

---
 bigfloat.s | 101 ++++++++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 66 insertions(+), 35 deletions(-)

(limited to 'bigfloat.s')
diff --git a/bigfloat.s b/bigfloat.s
index 9cfae63..1ee2dc9 100644
--- a/bigfloat.s
+++ b/bigfloat.s
@@ -1,21 +1,23 @@
 
 
- .importzp ptr3, ptr4, sreg
+ .importzp ptr3, sreg
  .import popeax, popax, pushax, _memcmp
  .export _ulong_to_big, _big_to_ulong, _big_add, _big_sub, _big_mul, _big_div
  .export _bank_maxed_out, _big_cmp, _big_copy, _big_negate
 
  .include "atari.inc"
 
-;IFP = $d9aa
-
  fptemp = $a0 ; for now
  trampoline = $c0
 
+ ; cc65's atari.inc is missing this FP entry point:
  NORMALIZE = $dc00
 
+ ; atari.inc also has a typo, PLD1P for FLD1P
+ FLD1P = PLD1P
+
  .rodata
-BIG_64K:
+BIG_64K: ; 65535 (2**16-1) in float format.
  .byte $42, $06, $55, $36, $00, $00
 
 ;BIG_ULONG_MAX:
@@ -23,7 +25,18 @@ BIG_64K:
 
  .code
 
-; TODO: replace these *_to_* with OS calls
+; It seems like fr0_to_fptemp and friends should be using the OS
+; FLD* and FST* routines, doesn't it? But look:
+
+;fr0_to_fptemp:
+; lda #<fptemp
+; sta FLPTR
+; lda #>fptemp
+; sta FLPTR+1
+; jmp FST0P
+
+; ...that's 11 bytes of code. The fr0_to_fptemp saves 1 byte of code,
+; plus it runs faster (doesn't use FLPTR, no JMP).
 
 fr0_to_fptemp:
  ldx #5
@@ -52,36 +65,35 @@ fptemp_to_fr1:
  bpl @l
  rts
 
-fr0_to_ptr3:
- ldy #5
-@l:
- lda FR0,y
- sta (ptr3),y
- dey
- bpl @l
- rts
-
-ptr4_to_fr1:
- ldy #5
-@l:
- lda (ptr4),y
- sta FR1,y
- dey
- bpl @l
- rts
+;fr0_to_ptr3:
+; ldy #5
+;@l:
+; lda FR0,y
+; sta (ptr3),y
+; dey
+; bpl @l
+; rts
+
+;ptr4_to_fr1:
+; ldy #5
+;@l:
+; lda (ptr4),y
+; sta FR1,y
+; dey
+; bpl @l
+; rts
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; void __fastcall__ big_negate(bignump dest, bignump src);
+; void __fastcall__ big_negate(bignump b);
+; This doesn't call the ROM or use FR0/FR1, it just inverts the sign
+; bit in-place.
 _big_negate:
  sta ptr3
  stx ptr3+1
- jsr popax
- sta ptr4
- stx ptr4+1
  ldy #0
  lda (ptr3),y
  eor #$80
- sta (ptr4),y
+ sta (ptr3),y
  rts
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -134,7 +146,7 @@ _big_binary_op:
  jsr popax
  sta FLPTR
  stx FLPTR+1
- jsr PLD1P
+ jsr FLD1P
 
  ; get 1st operand (a), load into FR0
  jsr popax
@@ -194,6 +206,9 @@ _big_div:
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; void __fastcall__ ulong_to_big(const unsigned long l, bignum *b);
+; This works by splitting the 32-bit l into two 16-bit ints and
+; converting them separately using the OS, then multiplying the high
+; result by 2^16 and adding the low result.
 _ulong_to_big:
  sta ptr3
  stx ptr3+1 ; save b (destination)
@@ -211,16 +226,32 @@ _ulong_to_big:
  stx FR0+1
  jsr IFP    ; convert to fp
 
- lda #<BIG_64K  ; high value needs to be multiplied by 65536
- sta ptr4
+ ; high value needs to be multiplied by 65536
+
+ lda #<BIG_64K
+ sta FLPTR
  lda #>BIG_64K
- sta ptr4+1
- jsr ptr4_to_fr1
+ sta FLPTR+1
+ jsr FLD1P
 
- jsr FMUL       ; multiply...
+ ; old version:
+; lda #<BIG_64K
+; sta ptr4
+; lda #>BIG_64K
+; sta ptr4+1
+; jsr ptr4_to_fr1
+
+ jsr FMUL          ; multiply...
  jsr fptemp_to_fr1 ; grab low value
  jsr FADD          ; add to total
- jmp fr0_to_ptr3   ; store it in b and we're done.
+
+ ; store it in b and we're done.
+ ;jmp fr0_to_ptr3 ; used to do this, use OS instead:
+ lda ptr3
+ sta FLPTR
+ lda ptr3+1
+ sta FLPTR+1
+ jmp FST0P
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; char __fastcall__ big_to_ulong(bignump b, unsigned long *l);
@@ -235,7 +266,7 @@ _big_to_ulong:
  sta sreg
  stx FLPTR+1
  stx sreg+1
- jsr FLD0P ; there's a typo in atari.inc, should be FLD1P
+ jsr FLD0P
 
  ldx #<BIG_64K ; FR1 = 65536
  ldy #>BIG_64K
-- 
cgit v1.2.3