aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorB. Watson <urchlay@slackware.uk>2022-11-05 14:38:53 -0400
committerB. Watson <urchlay@slackware.uk>2022-11-05 14:39:04 -0400
commit34ffde01e321b9a433cc6ecc32a6b8a7adebce7c (patch)
treedb7b309b2d50539165aa98447deada1ffe7121f7
parent7746f34350e46f6d508aac3f99d47d8f232c42f2 (diff)
downloaddla-asm-34ffde01e321b9a433cc6ecc32a6b8a7adebce7c.tar.gz
Save another ~45 sec runtime, version 0.1.1
-rw-r--r--dla.s55
1 files changed, 33 insertions, 22 deletions
diff --git a/dla.s b/dla.s
index 4a30921..48a6031 100644
--- a/dla.s
+++ b/dla.s
@@ -33,11 +33,7 @@ pixptr: .res 2 ; used by plotsetup and friends
pixmask: .res 1 ; ditto.
cursor_x: .res 1 ; cursor x/y are args to plot/unplot/locate
cursor_y: .res 1
-
-min_x: .res 1 ; limits: if the particle gets outside this box,
-max_x: .res 1 ; delete it and spawn a new one.
-min_y: .res 1
-max_y: .res 1
+pixptr2: .res 2 ; used by drunkwalk
circlesize: .res 1 ; 0 to 3
@@ -441,7 +437,7 @@ isloop:
;;; - set pixmask to the mask for cursor_x.
;;; - set Y reg to the byte offset for cursor_x.
;;; - returns with cursor_x in X reg, pixmask in A reg too.
-;;; Called by plot, unplot, and drunkwalk (a lot!)
+;;; Called by plot, unplot, and locate.
plotsetup:
ldx cursor_y
lda lineaddrs_l,x
@@ -508,7 +504,13 @@ oob:
;;; possible (we're not there yet).
drunkwalk:
; X holds the X coord the whole time, only needs to be loaded on entry.
- ldx part_x ;3
+ ; preload pixptr, too.
+ ldx part_x
+ ldy part_y ;3
+ lda lineaddrs_l,y ; 5
+ sta pixptr ; 3
+ lda lineaddrs_h,y ; 5
+ sta pixptr+1 ; 3
; using bit/bmi/bvc saves 5.25 cycles on average, compared to
; immediate cmp and bne.
; 4 code paths: up=15, down=18, left=19, right=17, avg=17.25.
@@ -516,7 +518,7 @@ drunkwalk:
; are "branch always".
; all the "cmp #0" here get their operands modified by set_limits.
dwloop:
- ldy part_y
+ ldy part_y ; 3
bit RANDOM ;4 ; use top 2 bits (probably more random, definitely faster)
bmi lr ;2/3
bvc down ;2/3
@@ -537,15 +539,25 @@ lr:
selfmod_xmin = * + 1
cpx #0 ; 2
beq oob ; 2
- bne checkneigh ;3
+ ldy xoffsets-1,x ; moved left, check left X neighbor only.
+ lda xmasks-1,x ; right X neighbor definitely empty, because
+ and (pixptr),y ; we just moved out of that cell.
+ bne stick
+ beq check_y ; 3 ; still have to check Y (up/down) neighbors.
right:
inx ;3 ; N=0 V=0 right
selfmod_xmax = * + 1
cpx #0 ; 2
beq oob ; 2
+ ldy xoffsets+1,x ; as above, moved right, check right neighbor only.
+ lda xmasks+1,x
+ and (pixptr),y
+ bne stick
+ beq check_y ; 3
checkneigh:
; check neighbors. used to be a subroutine, inlined it.
+ ; we only get here when the pixel has moved up or down (not left/right).
; also inlined plotsetup here.
sty part_y ; 3
lda lineaddrs_l,y ; 5
@@ -574,34 +586,33 @@ slow_x:
lda xmasks+1,x
and (pixptr),y
bne stick
+
check_y:
+ ; this happens no matter what direction the pixel moved.
; (0,-1)
- ; subtract 32 (one line) from the pointer. slightly faster
+ ; subtract 32 (one line) from the pointer. one cycle faster
; than reloading from lineaddrs_l/h table.
lda pixptr ; 3
sec ; 2
sbc #$20 ; 2
- sta pixptr ; 3
- bcs pp1ok ; 3|2
- dec pixptr+1 ; 0|5
- ; =13|17 (avg closer to 13)
-pp1ok:
+ sta pixptr2 ; 3
+ lda pixptr+1 ; 3
+ sbc #0 ; 2
+ sta pixptr2+1 ; 3
;ldx part_x ; X already has this from before
ldy xoffsets,x
lda xmasks,x
sta pixmask
- and (pixptr),y
+ and (pixptr2),y
bne stick
; (0,1)
tya
ora #$40 ; add 64, AKA 2 screen lines
tay
- lda (pixptr),y
+ lda (pixptr2),y
and pixmask
- beq dwloop ; ...or fall through to stick.
- ; note that if we add much more code to drunkwalk, the beq will
- ; have to become a jmp, which takes 3 extra cycles. so any code
- ; added above had better save more than 3 cycles!
+ bne stick
+ jmp dwloop ; too far for a branch
stick: ; we always get here with Z flag clear
stx part_x ; only update part_x at exit.
@@ -761,7 +772,7 @@ ci_done:
; banner and saveprompt must start with a clear-screen code.
banner:
.byte $7d, "Diffusion Limited Aggregate",$9b
- .byte "Urchlay's ASM version 0.1.0",$9b,$9b
+ .byte "Urchlay's ASM version 0.1.1",$9b,$9b
.byte "Particle count range: 1 to 65535",$9b
.byte "How many particles [",$0