diff options
| author | B. Watson <urchlay@slackware.uk> | 2022-11-04 01:28:19 -0400 | 
|---|---|---|
| committer | B. Watson <urchlay@slackware.uk> | 2022-11-04 01:28:19 -0400 | 
| commit | 2727e3d64d05f052f92f429e3c398147843cf033 (patch) | |
| tree | 0686707912d2e6c45f169eff329d9f58e3069b15 | |
| parent | c8d7d6039963ca84d74a42233a1524d1a98836f7 (diff) | |
| download | dla-asm-2727e3d64d05f052f92f429e3c398147843cf033.tar.gz | |
Fix cycle-counting comments.
| -rw-r--r-- | dla.s | 27 | 
1 files changed, 11 insertions, 16 deletions
@@ -507,23 +507,25 @@ oob:  ;;; This is the innermost loop, so it should be as optimized as  ;;; possible (we're not there yet).  drunkwalk: - ; using bit/bmi/bvc saves 6.25 cycles on average, compared to + ; using bit/bmi/bvc saves 4.25 cycles on average, compared to   ; immediate cmp and bne. - ; 4 code paths: up=14, down=15, left=15, right=13, avg=14.25 + ; 4 code paths: up=16, down=17, left=17, right=15, avg=16.25. + ; note that part_x and part_y are *never* zero; all the bne's here + ; are "branch always".   bit RANDOM ;4 ; use top 2 bits (probably more random, definitely faster)   bmi lr     ;2/3   bvc down   ;2/3 - dec part_y ;3 ; N=1 V=1 up + dec part_y ;5 ; N=1 V=1 up   bne checkbounds ;3  down: - inc part_y ;3 ; N=1 V=0 down + inc part_y ;5 ; N=1 V=0 down   bne checkbounds ;3  lr:   bvc right ;2/3 - dec part_x ;3 ; N=0 V=1 left + dec part_x ;5 ; N=0 V=1 left   bne checkbounds ;3  right: - inc part_x ;3 + inc part_x ;5  checkbounds:   ; all the "cmp #0" here get their operands modified by set_limits. @@ -578,14 +580,8 @@ slow_x:   bne stick  check_y:   ; (0,-1) -.if 0 - ldx part_y        ;3 - lda lineaddrs_l-1,x ;5 - sta pixptr          ;3 - lda lineaddrs_h-1,x ;5 - sta pixptr+1        ;3 -                     ;=19 -.else + ; subtract 32 (one line) from the pointer. slightly faster + ; than reloading from lineaddrs_l/h table.   lda pixptr   ; 3   sec          ; 2   sbc #$20     ; 2 @@ -594,8 +590,7 @@ check_y:   dec pixptr+1 ; 0|5                ; =13|17 (avg closer to 13)  pp1ok: -.endif - ldx part_x + ;ldx part_x ; X already has this from before   ldy xoffsets,x   lda xmasks,x   sta pixmask  | 
