From 34ffde01e321b9a433cc6ecc32a6b8a7adebce7c Mon Sep 17 00:00:00 2001 From: "B. Watson" Date: Sat, 5 Nov 2022 14:38:53 -0400 Subject: Save another ~45 sec runtime, version 0.1.1 --- dla.s | 55 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/dla.s b/dla.s index 4a30921..48a6031 100644 --- a/dla.s +++ b/dla.s @@ -33,11 +33,7 @@ pixptr: .res 2 ; used by plotsetup and friends pixmask: .res 1 ; ditto. cursor_x: .res 1 ; cursor x/y are args to plot/unplot/locate cursor_y: .res 1 - -min_x: .res 1 ; limits: if the particle gets outside this box, -max_x: .res 1 ; delete it and spawn a new one. -min_y: .res 1 -max_y: .res 1 +pixptr2: .res 2 ; used by drunkwalk circlesize: .res 1 ; 0 to 3 @@ -441,7 +437,7 @@ isloop: ;;; - set pixmask to the mask for cursor_x. ;;; - set Y reg to the byte offset for cursor_x. ;;; - returns with cursor_x in X reg, pixmask in A reg too. -;;; Called by plot, unplot, and drunkwalk (a lot!) +;;; Called by plot, unplot, and locate. plotsetup: ldx cursor_y lda lineaddrs_l,x @@ -508,7 +504,13 @@ oob: ;;; possible (we're not there yet). drunkwalk: ; X holds the X coord the whole time, only needs to be loaded on entry. - ldx part_x ;3 + ; preload pixptr, too. + ldx part_x + ldy part_y ;3 + lda lineaddrs_l,y ; 5 + sta pixptr ; 3 + lda lineaddrs_h,y ; 5 + sta pixptr+1 ; 3 ; using bit/bmi/bvc saves 5.25 cycles on average, compared to ; immediate cmp and bne. ; 4 code paths: up=15, down=18, left=19, right=17, avg=17.25. @@ -516,7 +518,7 @@ drunkwalk: ; are "branch always". ; all the "cmp #0" here get their operands modified by set_limits. dwloop: - ldy part_y + ldy part_y ; 3 bit RANDOM ;4 ; use top 2 bits (probably more random, definitely faster) bmi lr ;2/3 bvc down ;2/3 @@ -537,15 +539,25 @@ lr: selfmod_xmin = * + 1 cpx #0 ; 2 beq oob ; 2 - bne checkneigh ;3 + ldy xoffsets-1,x ; moved left, check left X neighbor only. + lda xmasks-1,x ; right X neighbor definitely empty, because + and (pixptr),y ; we just moved out of that cell. + bne stick + beq check_y ; 3 ; still have to check Y (up/down) neighbors. right: inx ;3 ; N=0 V=0 right selfmod_xmax = * + 1 cpx #0 ; 2 beq oob ; 2 + ldy xoffsets+1,x ; as above, moved right, check right neighbor only. + lda xmasks+1,x + and (pixptr),y + bne stick + beq check_y ; 3 checkneigh: ; check neighbors. used to be a subroutine, inlined it. + ; we only get here when the pixel has moved up or down (not left/right). ; also inlined plotsetup here. sty part_y ; 3 lda lineaddrs_l,y ; 5 @@ -574,34 +586,33 @@ slow_x: lda xmasks+1,x and (pixptr),y bne stick + check_y: + ; this happens no matter what direction the pixel moved. ; (0,-1) - ; subtract 32 (one line) from the pointer. slightly faster + ; subtract 32 (one line) from the pointer. one cycle faster ; than reloading from lineaddrs_l/h table. lda pixptr ; 3 sec ; 2 sbc #$20 ; 2 - sta pixptr ; 3 - bcs pp1ok ; 3|2 - dec pixptr+1 ; 0|5 - ; =13|17 (avg closer to 13) -pp1ok: + sta pixptr2 ; 3 + lda pixptr+1 ; 3 + sbc #0 ; 2 + sta pixptr2+1 ; 3 ;ldx part_x ; X already has this from before ldy xoffsets,x lda xmasks,x sta pixmask - and (pixptr),y + and (pixptr2),y bne stick ; (0,1) tya ora #$40 ; add 64, AKA 2 screen lines tay - lda (pixptr),y + lda (pixptr2),y and pixmask - beq dwloop ; ...or fall through to stick. - ; note that if we add much more code to drunkwalk, the beq will - ; have to become a jmp, which takes 3 extra cycles. so any code - ; added above had better save more than 3 cycles! + bne stick + jmp dwloop ; too far for a branch stick: ; we always get here with Z flag clear stx part_x ; only update part_x at exit. @@ -761,7 +772,7 @@ ci_done: ; banner and saveprompt must start with a clear-screen code. banner: .byte $7d, "Diffusion Limited Aggregate",$9b - .byte "Urchlay's ASM version 0.1.0",$9b,$9b + .byte "Urchlay's ASM version 0.1.1",$9b,$9b .byte "Particle count range: 1 to 65535",$9b .byte "How many particles [",$0 -- cgit v1.2.3