diff options
-rw-r--r-- | dla.s | 27 |
1 files changed, 11 insertions, 16 deletions
@@ -507,23 +507,25 @@ oob: ;;; This is the innermost loop, so it should be as optimized as ;;; possible (we're not there yet). drunkwalk: - ; using bit/bmi/bvc saves 6.25 cycles on average, compared to + ; using bit/bmi/bvc saves 4.25 cycles on average, compared to ; immediate cmp and bne. - ; 4 code paths: up=14, down=15, left=15, right=13, avg=14.25 + ; 4 code paths: up=16, down=17, left=17, right=15, avg=16.25. + ; note that part_x and part_y are *never* zero; all the bne's here + ; are "branch always". bit RANDOM ;4 ; use top 2 bits (probably more random, definitely faster) bmi lr ;2/3 bvc down ;2/3 - dec part_y ;3 ; N=1 V=1 up + dec part_y ;5 ; N=1 V=1 up bne checkbounds ;3 down: - inc part_y ;3 ; N=1 V=0 down + inc part_y ;5 ; N=1 V=0 down bne checkbounds ;3 lr: bvc right ;2/3 - dec part_x ;3 ; N=0 V=1 left + dec part_x ;5 ; N=0 V=1 left bne checkbounds ;3 right: - inc part_x ;3 + inc part_x ;5 checkbounds: ; all the "cmp #0" here get their operands modified by set_limits. @@ -578,14 +580,8 @@ slow_x: bne stick check_y: ; (0,-1) -.if 0 - ldx part_y ;3 - lda lineaddrs_l-1,x ;5 - sta pixptr ;3 - lda lineaddrs_h-1,x ;5 - sta pixptr+1 ;3 - ;=19 -.else + ; subtract 32 (one line) from the pointer. slightly faster + ; than reloading from lineaddrs_l/h table. lda pixptr ; 3 sec ; 2 sbc #$20 ; 2 @@ -594,8 +590,7 @@ check_y: dec pixptr+1 ; 0|5 ; =13|17 (avg closer to 13) pp1ok: -.endif - ldx part_x + ;ldx part_x ; X already has this from before ldy xoffsets,x lda xmasks,x sta pixmask |