diff options
author | B. Watson <yalhcru@gmail.com> | 2021-04-04 03:35:22 -0400 |
---|---|---|
committer | B. Watson <yalhcru@gmail.com> | 2021-04-04 03:35:22 -0400 |
commit | 5ef86baf365b9c7e80fdc1df1283937698db37a5 (patch) | |
tree | c259f2642e4ba6b5c9feed211eedc79063a237b9 /arrayutils.s | |
parent | 30a7443be9ca61eeee3f2a2128b12598f2f3fc70 (diff) | |
download | taipan-5ef86baf365b9c7e80fdc1df1283937698db37a5.tar.gz |
Inline some array code, 8128 bytes free
Diffstat (limited to 'arrayutils.s')
-rw-r--r-- | arrayutils.s | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/arrayutils.s b/arrayutils.s new file mode 100644 index 0000000..188ace4 --- /dev/null +++ b/arrayutils.s @@ -0,0 +1,60 @@ +; extern void clear_ships_on_screen(void); + +; optimized bzero() replacement. +; the real bzero() in cc65-2.19 is 129 bytes long. +; it's optimized for speed (has an unrolled loop) and shares +; code with memset(). we can do it in a lot less code here, +; especially since we only need to clear exactly 20 bytes +; located at a known absolute address. + +; in C, we could write: for(i=0; i<len; i++) { arr[i] = 0; } +; ...which takes up around 64 bytes of code. +; instead, this: clear_ships_on_screen(); +; ...takes 3 bytes for the function call, plus ~20 bytes for +; the function, or around 1/3 the size of the for loop, or under 1/4 +; the size of bzero() plus its function call. + + .import _ships_on_screen + .export _have_ships_on_screen, _clear_ships_on_screen + + .include "atari.inc" + + .proc _clear_ships_on_screen + ldx #$14 + lda #0 +@l: + sta _ships_on_screen-1,x + dex + bne @l + rts + .endproc + +; extern char have_ships_on_screen(void); + +; optimized (both size and speed) replacement for: + +; if ((ships_on_screen[0] == 0) && (ships_on_screen[1] == 0) && +; (ships_on_screen[2] == 0) && (ships_on_screen[3] == 0) && +; (ships_on_screen[4] == 0) && (ships_on_screen[5] == 0) && +; (ships_on_screen[6] == 0) && (ships_on_screen[7] == 0) && +; (ships_on_screen[8] == 0) && (ships_on_screen[9] == 0)) + +; ...which compiles to ~100 bytes of code. a for loop would be +; around 64 bytes (like the clearing loop above). this is 3 bytes +; for the function call plus 11 bytes for the function, plus a +; couple more bytes for the ! in front of the function call (since +; the result is opposite what the original code did). +; I could save the 3 bytes by inlining this as asm() in the C code, +; but it would be more fragile than making a separate function. + + .proc _have_ships_on_screen + ldx #$14 ; sizeof(ships_on_screen) +@l: + lda _ships_on_screen-1,x + bne @done ; found a non-0 byte, A and X are both non-zero + dex + bne @l + ; end of loop. if we get here, A and X are both zero +@done: + rts + .endproc |