From 1cf1a42b0eaf807e69608979772d09dbfe00b9fe Mon Sep 17 00:00:00 2001 From: "B. Watson" Date: Tue, 1 Nov 2022 01:27:07 -0400 Subject: fastmasks, ~15% speedup --- Makefile | 2 +- dla.s | 41 ++++++++++++++++++++++++++++++----------- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index fb969a8..3109cd9 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,7 @@ clean: rm -f dla.xex dlatbl.s dla.list dla.labels *.o test: all - atari800 -turbo dla.xex + atari800 dla.xex %.xex: %.s $(CL65) $(CL65FLAGS) -t none -o $@ $< diff --git a/dla.s b/dla.s index 7904af7..3c61064 100644 --- a/dla.s +++ b/dla.s @@ -495,6 +495,8 @@ unplot: ; sta part_y ; rts +oob: + rts ;;; Subroutine: drunkwalk ;;; Walk the point around randomly until it either is ;;; adjacent to a set pixel or goes out of bounds. @@ -547,33 +549,46 @@ dontplot: ; check neighbors. used to be a subroutine, inlined it. ; also inlined plotsetup here. - ; (-1,0) - dec cursor_x ldx cursor_y lda lineaddrs_l,x sta pixptr lda lineaddrs_h,x sta pixptr+1 + + ; 3/4 of the time, we can use a faster code path, check + ; (-1,0) and (1,0) at the same time. this happens only when + ; both pixels lie within the same byte. + ldx cursor_x + lda fastmasks,x + beq slow_x + ldy xoffsets,x + and (pixptr),y + bne stick + beq check_y +slow_x: + ; (-1,0) ldx cursor_x + dex ldy xoffsets,x lda xmasks,x and (pixptr),y bne stick ; (1,0) inx - stx cursor_x inx ldy xoffsets,x lda xmasks,x and (pixptr),y bne stick +check_y: ; (0,-1) - dec cursor_y - ldx cursor_y - lda lineaddrs_l,x - sta pixptr - lda lineaddrs_h,x - sta pixptr+1 + ldx cursor_y ;3 + dex ;2 + lda lineaddrs_l,x ;5 + sta pixptr ;3 + lda lineaddrs_h,x ;5 + sta pixptr+1 ;3 + ;=21 ldx cursor_x ldy xoffsets,x lda xmasks,x @@ -582,7 +597,7 @@ dontplot: bne stick ; (0,1) tya - ora #$40 ; add 64 + ora #$40 ; add 64, AKA 2 screen lines tay lda (pixptr),y and pixmask @@ -590,7 +605,6 @@ dontplot: jmp drunkwalk ; too far for a branch stick: -oob: rts ;;; Subroutine: drawseed @@ -800,6 +814,11 @@ xmasks: .byte $80,$40,$20,$10,$08,$04,$02,$01 .endrep +fastmasks: + .repeat 32 + .byte $00,$a0,$50,$28,$14,$0a,$05,$00 + .endrep + ;;; display list ; ANTIC opcodes blank8 = $70 -- cgit v1.2.3