src/txbuf.s


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87

;; void txbuf_append_chr(char c) {
;; 	tx_buf[txbuflen++] = c;
;; }

; compiles to 45 bytes. routine below is 29 bytes (~33% smaller)

 tx_buf = $3a00 ; MUST agree with src/rxtxbuf.h!

 .import _txbuflen, _txbuf_send
 .export _txbuf_append_chr, _txbuf_append_str
 .export _txbuf_append_spc, _txbuf_append_01
 .export _txbuf_init, _txbuf_set_str, _txbuf_send_str
 .importzp sreg ; avoid ptr1 & friends, callers may use

;; void txbuf_init(void) {
;; 	txbuflen = tx_buf[0] = 0;
;; }
; this asm implementation is the same size as the compiled code,
; but it only uses the Y register, which lets its callers avoid
; having to save the A register before calling it.
_txbuf_init:
 ldy #0
 sty tx_buf
 sty _txbuflen
 sty _txbuflen+1
 rts

_txbuf_append_01:
 lda #$01
 .byte $2c ; BIT abs, skip next instruction
_txbuf_append_spc:
 lda #' '
_txbuf_append_chr:
 tax
 lda #<tx_buf
 clc
 adc _txbuflen
 sta sreg
 lda #>tx_buf
 adc _txbuflen+1
 sta sreg+1
 ldy #0
 txa
 sta (sreg),y
 inc _txbuflen
 bne ret
 inc _txbuflen+1
ret:
 rts ; always returns with Y == 0

;; void txbuf_set_str(const char *str) {
;; 	txbuf_init();
;; 	txbuf_append_str(str);
;; }
_txbuf_set_str:
 jsr _txbuf_init ; remember, this doesn't touch A or X!
 ; fall through to _txbuf_append_str

;; void txbuf_append_str(const char *str) {
;; 	while(*str) {
;; 		txbuf_append_chr(*str++);
;; 	}
;; }

; compiles to 52 bytes.
; this routine is 22 bytes, ~57% smaller.
_txbuf_append_str:
 sta sreg+2
 stx sreg+3
 ldy #0
@loop:
 lda (sreg+2),y
 beq ret
 jsr _txbuf_append_chr
 inc sreg+2
 bne @loop
 inc sreg+3
 bne @loop
 rts ; safety net, the above is really a 'branch always'

;; void txbuf_send_str(const char *str) {
;; 	txbuf_set_str(str);
;; 	txbuf_send();
;; }
_txbuf_send_str:
 jsr _txbuf_set_str
 jmp _txbuf_send