1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
|
;; void txbuf_append_chr(char c) {
;; tx_buf[txbuflen++] = c;
;; }
; compiles to 45 bytes. routine below is 29 bytes (~33% smaller)
tx_buf = $3a00 ; MUST agree with src/rxtxbuf.h!
.import _txbuflen, _txbuf_send
.export _txbuf_append_chr, _txbuf_append_str
.export _txbuf_append_spc, _txbuf_append_01
.export _txbuf_init, _txbuf_set_str, _txbuf_send_str
.importzp sreg ; avoid ptr1 & friends, callers may use
;; void txbuf_init(void) {
;; txbuflen = tx_buf[0] = 0;
;; }
; this asm implementation is the same size as the compiled code,
; but it only uses the Y register, which lets its callers avoid
; having to save the A register before calling it.
_txbuf_init:
ldy #0
sty tx_buf
sty _txbuflen
sty _txbuflen+1
rts
_txbuf_append_01:
lda #$01
.byte $2c ; BIT abs, skip next instruction
_txbuf_append_spc:
lda #' '
_txbuf_append_chr:
tax
lda #<tx_buf
clc
adc _txbuflen
sta sreg
lda #>tx_buf
adc _txbuflen+1
sta sreg+1
ldy #0
txa
sta (sreg),y
inc _txbuflen
bne ret
inc _txbuflen+1
ret:
rts ; always returns with Y == 0
;; void txbuf_set_str(const char *str) {
;; txbuf_init();
;; txbuf_append_str(str);
;; }
_txbuf_set_str:
jsr _txbuf_init ; remember, this doesn't touch A or X!
; fall through to _txbuf_append_str
;; void txbuf_append_str(const char *str) {
;; while(*str) {
;; txbuf_append_chr(*str++);
;; }
;; }
; compiles to 52 bytes.
; this routine is 22 bytes, ~57% smaller.
_txbuf_append_str:
sta sreg+2
stx sreg+3
ldy #0
@loop:
lda (sreg+2),y
beq ret
jsr _txbuf_append_chr
inc sreg+2
bne @loop
inc sreg+3
bne @loop
rts ; safety net, the above is really a 'branch always'
;; void txbuf_send_str(const char *str) {
;; txbuf_set_str(str);
;; txbuf_send();
;; }
_txbuf_send_str:
jsr _txbuf_set_str
jmp _txbuf_send
|