aboutsummaryrefslogtreecommitdiff
path: root/arrayutils.s
diff options
context:
space:
mode:
authorB. Watson <yalhcru@gmail.com>2021-04-04 03:35:22 -0400
committerB. Watson <yalhcru@gmail.com>2021-04-04 03:35:22 -0400
commit5ef86baf365b9c7e80fdc1df1283937698db37a5 (patch)
treec259f2642e4ba6b5c9feed211eedc79063a237b9 /arrayutils.s
parent30a7443be9ca61eeee3f2a2128b12598f2f3fc70 (diff)
downloadtaipan-5ef86baf365b9c7e80fdc1df1283937698db37a5.tar.gz
Inline some array code, 8128 bytes free
Diffstat (limited to 'arrayutils.s')
-rw-r--r--arrayutils.s60
1 files changed, 60 insertions, 0 deletions
diff --git a/arrayutils.s b/arrayutils.s
new file mode 100644
index 0000000..188ace4
--- /dev/null
+++ b/arrayutils.s
@@ -0,0 +1,60 @@
+; extern void clear_ships_on_screen(void);
+
+; optimized bzero() replacement.
+; the real bzero() in cc65-2.19 is 129 bytes long.
+; it's optimized for speed (has an unrolled loop) and shares
+; code with memset(). we can do it in a lot less code here,
+; especially since we only need to clear exactly 20 bytes
+; located at a known absolute address.
+
+; in C, we could write: for(i=0; i<len; i++) { arr[i] = 0; }
+; ...which takes up around 64 bytes of code.
+; instead, this: clear_ships_on_screen();
+; ...takes 3 bytes for the function call, plus ~20 bytes for
+; the function, or around 1/3 the size of the for loop, or under 1/4
+; the size of bzero() plus its function call.
+
+ .import _ships_on_screen
+ .export _have_ships_on_screen, _clear_ships_on_screen
+
+ .include "atari.inc"
+
+ .proc _clear_ships_on_screen
+ ldx #$14
+ lda #0
+@l:
+ sta _ships_on_screen-1,x
+ dex
+ bne @l
+ rts
+ .endproc
+
+; extern char have_ships_on_screen(void);
+
+; optimized (both size and speed) replacement for:
+
+; if ((ships_on_screen[0] == 0) && (ships_on_screen[1] == 0) &&
+; (ships_on_screen[2] == 0) && (ships_on_screen[3] == 0) &&
+; (ships_on_screen[4] == 0) && (ships_on_screen[5] == 0) &&
+; (ships_on_screen[6] == 0) && (ships_on_screen[7] == 0) &&
+; (ships_on_screen[8] == 0) && (ships_on_screen[9] == 0))
+
+; ...which compiles to ~100 bytes of code. a for loop would be
+; around 64 bytes (like the clearing loop above). this is 3 bytes
+; for the function call plus 11 bytes for the function, plus a
+; couple more bytes for the ! in front of the function call (since
+; the result is opposite what the original code did).
+; I could save the 3 bytes by inlining this as asm() in the C code,
+; but it would be more fragile than making a separate function.
+
+ .proc _have_ships_on_screen
+ ldx #$14 ; sizeof(ships_on_screen)
+@l:
+ lda _ships_on_screen-1,x
+ bne @done ; found a non-0 byte, A and X are both non-zero
+ dex
+ bne @l
+ ; end of loop. if we get here, A and X are both zero
+@done:
+ rts
+ .endproc