From cf513e685ecfab5acfc5cf3ea0b9e67370915da8 Mon Sep 17 00:00:00 2001 From: "B. Watson" Date: Mon, 18 Jan 2016 14:28:59 -0500 Subject: fix big_negate, allow lowercase z in firm name, tighten up bigfloat.s --- bigfloat.s | 101 ++++++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 66 insertions(+), 35 deletions(-) (limited to 'bigfloat.s') diff --git a/bigfloat.s b/bigfloat.s index 9cfae63..1ee2dc9 100644 --- a/bigfloat.s +++ b/bigfloat.s @@ -1,21 +1,23 @@ - .importzp ptr3, ptr4, sreg + .importzp ptr3, sreg .import popeax, popax, pushax, _memcmp .export _ulong_to_big, _big_to_ulong, _big_add, _big_sub, _big_mul, _big_div .export _bank_maxed_out, _big_cmp, _big_copy, _big_negate .include "atari.inc" -;IFP = $d9aa - fptemp = $a0 ; for now trampoline = $c0 + ; cc65's atari.inc is missing this FP entry point: NORMALIZE = $dc00 + ; atari.inc also has a typo, PLD1P for FLD1P + FLD1P = PLD1P + .rodata -BIG_64K: +BIG_64K: ; 65535 (2**16-1) in float format. .byte $42, $06, $55, $36, $00, $00 ;BIG_ULONG_MAX: @@ -23,7 +25,18 @@ BIG_64K: .code -; TODO: replace these *_to_* with OS calls +; It seems like fr0_to_fptemp and friends should be using the OS +; FLD* and FST* routines, doesn't it? But look: + +;fr0_to_fptemp: +; lda #fptemp +; sta FLPTR+1 +; jmp FST0P + +; ...that's 11 bytes of code. The fr0_to_fptemp saves 1 byte of code, +; plus it runs faster (doesn't use FLPTR, no JMP). fr0_to_fptemp: ldx #5 @@ -52,36 +65,35 @@ fptemp_to_fr1: bpl @l rts -fr0_to_ptr3: - ldy #5 -@l: - lda FR0,y - sta (ptr3),y - dey - bpl @l - rts - -ptr4_to_fr1: - ldy #5 -@l: - lda (ptr4),y - sta FR1,y - dey - bpl @l - rts +;fr0_to_ptr3: +; ldy #5 +;@l: +; lda FR0,y +; sta (ptr3),y +; dey +; bpl @l +; rts + +;ptr4_to_fr1: +; ldy #5 +;@l: +; lda (ptr4),y +; sta FR1,y +; dey +; bpl @l +; rts ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; void __fastcall__ big_negate(bignump dest, bignump src); +; void __fastcall__ big_negate(bignump b); +; This doesn't call the ROM or use FR0/FR1, it just inverts the sign +; bit in-place. _big_negate: sta ptr3 stx ptr3+1 - jsr popax - sta ptr4 - stx ptr4+1 ldy #0 lda (ptr3),y eor #$80 - sta (ptr4),y + sta (ptr3),y rts ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -134,7 +146,7 @@ _big_binary_op: jsr popax sta FLPTR stx FLPTR+1 - jsr PLD1P + jsr FLD1P ; get 1st operand (a), load into FR0 jsr popax @@ -194,6 +206,9 @@ _big_div: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; void __fastcall__ ulong_to_big(const unsigned long l, bignum *b); +; This works by splitting the 32-bit l into two 16-bit ints and +; converting them separately using the OS, then multiplying the high +; result by 2^16 and adding the low result. _ulong_to_big: sta ptr3 stx ptr3+1 ; save b (destination) @@ -211,16 +226,32 @@ _ulong_to_big: stx FR0+1 jsr IFP ; convert to fp - lda #BIG_64K - sta ptr4+1 - jsr ptr4_to_fr1 + sta FLPTR+1 + jsr FLD1P - jsr FMUL ; multiply... + ; old version: +; lda #BIG_64K +; sta ptr4+1 +; jsr ptr4_to_fr1 + + jsr FMUL ; multiply... jsr fptemp_to_fr1 ; grab low value jsr FADD ; add to total - jmp fr0_to_ptr3 ; store it in b and we're done. + + ; store it in b and we're done. + ;jmp fr0_to_ptr3 ; used to do this, use OS instead: + lda ptr3 + sta FLPTR + lda ptr3+1 + sta FLPTR+1 + jmp FST0P ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; char __fastcall__ big_to_ulong(bignump b, unsigned long *l); @@ -235,7 +266,7 @@ _big_to_ulong: sta sreg stx FLPTR+1 stx sreg+1 - jsr FLD0P ; there's a typo in atari.inc, should be FLD1P + jsr FLD0P ldx #BIG_64K -- cgit v1.2.3