1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
|
.importzp ptr3, sreg
.import popeax, popax, pushax, _memcmp
.export _ulong_to_big, _big_to_ulong, _big_add, _big_sub, _big_mul, _big_div
.export _bank_maxed_out, _big_cmp, _big_copy, _big_negate
.include "atari.inc"
fptemp = $a0 ; for now
trampoline = $c0
; cc65's atari.inc is missing this FP entry point:
NORMALIZE = $dc00
; atari.inc also has a typo, PLD1P for FLD1P
FLD1P = PLD1P
.rodata
BIG_64K: ; 65535 (2**16-1) in float format.
.byte $42, $06, $55, $36, $00, $00
;BIG_ULONG_MAX:
;.byte $44, $42, $94, $96, $72, $95
.ifdef CART_TARGET
.segment "HIGHCODE"
.else
.code
.endif
; It seems like fr0_to_fptemp and friends should be using the OS
; FLD* and FST* routines, doesn't it? But look:
;fr0_to_fptemp:
; lda #<fptemp
; sta FLPTR
; lda #>fptemp
; sta FLPTR+1
; jmp FST0P
; ...that's 11 bytes of code. The fr0_to_fptemp saves 1 byte of code,
; plus it runs faster (doesn't use FLPTR, no JMP).
fr0_to_fptemp:
ldx #5
@l:
lda FR0,x
sta fptemp,x
dex
bpl @l
rts
fptemp_to_fr0:
ldx #5
@l:
lda fptemp,x
sta FR0,x
dex
bpl @l
rts
fptemp_to_fr1:
ldx #5
@l:
lda fptemp,x
sta FR1,x
dex
bpl @l
rts
;fr0_to_ptr3:
; ldy #5
;@l:
; lda FR0,y
; sta (ptr3),y
; dey
; bpl @l
; rts
;ptr4_to_fr1:
; ldy #5
;@l:
; lda (ptr4),y
; sta FR1,y
; dey
; bpl @l
; rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; void __fastcall__ big_negate(bignump b);
; This doesn't call the ROM or use FR0/FR1, it just inverts the sign
; bit in-place.
_big_negate:
sta ptr3
stx ptr3+1
ldy #0
lda (ptr3),y
eor #$80
sta (ptr3),y
rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; truncate FR0 to integer (no rounding: 2.8 -> 2)
trunc_fr0:
lda FR0
and #$7f ; strip sign bit (we only care about exponent magnitude)
sec
sbc #$3f ; A now holds # of base-100 digits in integer part
bcs @ok ; # of int digits > 0?
jmp ZFR0 ; no, zero out FR0 and exit
@ok:
cmp #5 ; are there <= 5 int digits?
bcs @done ; no, the number's already an integer.
tax ; zero out digits: X is first one after decimal point
lda #0
@zloop:
sta FR0+1,x
inx
cpx #5
bne @zloop
@done:
rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; void __fastcall__ big_copy(bignump dest, bignump src)
_big_copy:
sta FLPTR ; src arg in FLPTR
stx FLPTR+1
jsr FLD0P ; load src value into FR0
jsr popax ; get dest arg
sta FLPTR ; dest arg in FLPTR
stx FLPTR+1
jmp FST0P ; store FR0 value into dest
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; void __fastcall__ big_binary_op(bignump dest, bignump a, bignump b, unsigned int jsraddr);
_big_binary_op:
; JSR address in A/X pair, set up JMP instruction
sta trampoline+1
stx trampoline+2
lda #$4c ; JMP opcode
sta trampoline
; get 2nd operand (b), load into FR1
jsr popax
sta FLPTR
stx FLPTR+1
jsr FLD1P
; get 1st operand (a), load into FR0
jsr popax
sta FLPTR
stx FLPTR+1
jsr FLD0P
; call the FP routine
jsr trampoline
; jsr NORMALIZE
; .byte $02
; result now in FR0, get destination & copy
jsr popax
sta FLPTR
stx FLPTR+1
jmp FST0P
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; void __cdecl__ big_add(bignump dest, bignump a, bignump b);
_big_add:
lda #<FADD
ldx #>FADD
jmp _big_binary_op
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; void __cdecl__ big_sub(bignump dest, bignump a, bignump b);
_big_sub:
lda #<FSUB
ldx #>FSUB
jmp _big_binary_op
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; void __cdecl__ big_mul(bignump dest, bignump a, bignump b);
_big_mul:
lda #<FMUL
ldx #>FMUL
jmp _big_binary_op
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; void __cdecl__ big_div(bignump dest, bignump a, bignump b);
_big_div:
lda #<FDIV
ldx #>FDIV
jmp _big_binary_op
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; void __fastcall__ big_trunc(bignump b);
; C-callable wrapper for trunc_fr0
sta FLPTR
stx FLPTR+1
jsr FLD0P
jsr trunc_fr0
jsr FST0P
rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; void __fastcall__ ulong_to_big(const unsigned long l, bignum *b);
; This works by splitting the 32-bit l into two 16-bit ints and
; converting them separately using the OS, then multiplying the high
; result by 2^16 and adding the low result.
_ulong_to_big:
sta ptr3
stx ptr3+1 ; save b (destination)
jsr popeax ; get low 16 bits of l in A/X (hi 16 bits in sreg)
sta FR0
stx FR0+1
jsr IFP ; convert A/X to fp
jsr fr0_to_fptemp ; stash it
lda sreg ; now get high 16 bits of l in A/X
sta FR0
ldx sreg+1
stx FR0+1
jsr IFP ; convert to fp
; high value needs to be multiplied by 65536
ldx #<BIG_64K ; FR1 = 65536
ldy #>BIG_64K
jsr FLD1R
;lda #<BIG_64K
;sta FLPTR
;lda #>BIG_64K
;sta FLPTR+1
;jsr FLD1P
; old version:
; lda #<BIG_64K
; sta ptr4
; lda #>BIG_64K
; sta ptr4+1
; jsr ptr4_to_fr1
jsr FMUL ; multiply...
jsr fptemp_to_fr1 ; grab low value
jsr FADD ; add to total
; store it in b and we're done.
;jmp fr0_to_ptr3 ; used to do this, use OS instead:
lda ptr3
sta FLPTR
lda ptr3+1
sta FLPTR+1
jmp FST0P
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; char __fastcall__ big_to_ulong(bignump b, unsigned long *l);
;
; This works, but it's not small, fast, or elegant...
_big_to_ulong:
sta ptr3
stx ptr3+1 ; save *l (dest)
jsr popax ; get b
sta FLPTR
sta sreg
stx FLPTR+1
stx sreg+1
jsr FLD0P
ldx #<BIG_64K ; FR1 = 65536
ldy #>BIG_64K
jsr FLD1R
jsr FDIV ; FR0 = FR0 / FR1
jsr trunc_fr0 ; FR0 = INT(FR0)
jsr fr0_to_fptemp ; stash for later...
jsr FPI ; get integer form
bcc @ok ; OS supposed to return with C set if range error
; failed, return 0 to caller
lda #0
tax
rts
@ok:
ldy #2 ; save top 16 bits of result where they belong
lda FR0
sta (ptr3),y
iny
lda FR0+1
sta (ptr3),y
jsr fptemp_to_fr0 ; this is int((*b)/65536) in FR0 now
ldx #<BIG_64K ; FR1 = 65536
ldy #>BIG_64K
jsr FLD1R
jsr FMUL ; FR0 now int((*b)/65536)*65536
jsr FMOVE ; FR1 = FR0
ldx sreg ; reload original *b in FR0
ldy sreg+1
jsr FLD0R
jsr trunc_fr0 ; grrr. If we don't do this, we get rounding (not desired)
jsr FSUB ; FR0 = FR0 - FR1
jsr FPI
ldy #0 ; store low 16 bits where they belong
lda FR0
sta (ptr3),y
iny
lda FR0+1
sta (ptr3),y
; success. return 1 to caller.
tya
ldx #0
rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; char __fastcall__ bank_maxed_out(bignump b);
_bank_maxed_out:
sta FLPTR
stx FLPTR+1
jsr FLD0P
jsr NORMALIZE ; just in case
lda FR0 ; get exponent
ldx #0
eor #$7f ; remove sign bit (should never be negative anyway!)
cmp #$46
bcc @false
lda #1
rts
@false:
txa
rts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; signed char __fastcall__ big_cmp(bignump a, bignump b)
;
; this could be better: it could be a wrapper for _big_binary_op. But
; I'd have to move stuff all around on the stack.
_big_cmp:
sta FLPTR
stx FLPTR+1
jsr FLD1P
jsr popax ; get a arg
sta FLPTR
stx FLPTR+1
jsr FLD0P
; subtract (and throw away the result, only care about sign)
jsr FSUB ; FR0 = FR0 - FR1
lda FR0 ; exponent has sign bit, and happily is 0 if the result was 0!
tax ; sign extension, grr.
rts
|