From 0ce40f8d4e3ded5b6f80a810d33eae532d84c634 Mon Sep 17 00:00:00 2001
From: "B. Watson" <urchlay@slackware.uk>
Date: Mon, 24 Oct 2022 03:35:32 -0400
Subject: initial commit (v0.0.3)

---
 Makefile    |  17 ++
 README.txt  | 104 ++++++++++++
 dla.s       | 534 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 io.s        | 203 +++++++++++++++++++++++
 mkdlatbl.pl |  58 +++++++
 xex.inc     | 166 +++++++++++++++++++
 6 files changed, 1082 insertions(+)
 create mode 100644 Makefile
 create mode 100644 README.txt
 create mode 100644 dla.s
 create mode 100644 io.s
 create mode 100644 mkdlatbl.pl
 create mode 100644 xex.inc

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..7af560a
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,17 @@
+CL65 = cl65
+CL65FLAGS =
+PERL = perl
+
+all: dla.xex
+
+dla.xex: dla.s io.s dlatbl.s xex.inc
+	$(CL65) $(CL65FLAGS) -l dla.list -Ln dla.labels -t none -o dla.xex dla.s
+
+dlatbl.s: mkdlatbl.pl
+	$(PERL) mkdlatbl.pl > dlatbl.s
+
+clean:
+	rm -f dla.xex dlatbl.s dla.list dla.labels *.o
+
+test: all
+	atari800 -turbo dla.xex
diff --git a/README.txt b/README.txt
new file mode 100644
index 0000000..98fa09e
--- /dev/null
+++ b/README.txt
@@ -0,0 +1,104 @@
+Diffusion Limited Aggregation (DLA) for Atari 8-bit
+===================================================
+
+Diffusion-limited aggregation (DLA) is the process whereby particles
+undergoing a random walk due to Brownian motion cluster together to
+form aggregates of such particles.
+
+For a good description of DLA, see:
+https://en.wikipedia.org/wiki/Diffusion_limited_aggregation
+
+This Atari 8-bit implementation is written in 6502 assembly, using the
+ca65 assembler from the cc65 suite: https://cc65.github.io/
+
+Building
+--------
+
+You need a Unix/GNU like system (which might even be modern
+Windows), with GNU (or possibly BSD) make, Perl 5, and the CC65 tools
+installed. Provided you have all that, simply type "make" to assemble
+the source.
+
+If you have trouble building on Linux, ask me for help. If you have
+trouble on other OSes, ask someone who actually knows about your OS
+(not me, I don't do Windows or Mac).
+
+Running
+-------
+
+The executable is called "dla.xex", and is a standard Atari binary
+load file. It can be run in the same way you run any other .xex files,
+e.g. in an emulator or with an SIO2PC cable on real hardware.
+
+At startup, you're asked "How many particles?". The more particles you
+enter here, the longer it will take to generate the image. The default
+(if you just press Return) is 1000, which takes approximately half
+an hour.
+
+After you enter the number of particles, the screen will clear and
+go solid black, while the image is generated. The ANTIC chip's DMA is
+disabled, to speed things up. However, you can "peek" at the progress
+of the generator by holding down the Start key. This will show the
+work in progress, but it will slow things down noticeably.
+
+Notes
+-----
+
+The algorithm works like this:
+
+1. Each particle starts on the edge of a circle whose center is the
+center of the screen. The circle's radius depends on the number of
+particles that have been rendered so far: radius is 15 for particles 1
+to 100, 30 for particles 101 to 300, 45 for particles 301 to 600, and
+75 for particles 601 and up.
+
+2. Walk the particle around randomly. For each step, pick a random one
+of the 4 cardinal directions (no diagonals).
+
+3. If the particle goes "out of bounds" (see below), respawn it and
+try again (without incrementing the particle counter).
+
+4. If the particle is ever adjacent to a set pixel, it gets stuck
+there, the particle counter is incremented, and we go back to step 1.
+
+When the particle counter reaches the max (the number the user
+entered), the process is complete, and DMA is enabled so you can see
+the result. TODO: at some point, there will be a way to save the image
+and/or generate a new image. For now, all you can do is look.
+
+It should be possible to optimize this further. The Atari will never
+be a speed demon, but I'd be happy to get execution time for 1000
+particles down to 10 or 15 minutes.
+
+It might be nice to include several built-in seeds, besides the single
+dot in the middle of the screen. Possibilites: line, plus, 4 dots in a
+square pattern...
+
+There might be a quick way to limit the particles' movement outside
+the initial circle's radius. Right now, it's limited to a square area;
+width and height are the diameter of the circle plus 10 pixels. The
+corners of this square waste a lot of time; it'd be better to come
+up with a way to do an octagon (the square with the corners cut off),
+which shouldn't slow down the inner loop too much.
+
+Tech stuff: rather than calculate points on a circle in asm code,
+the tables of points for the 4 circle sizes are pre-calculated by a
+perl script and included in the executable verbatim. The tables bloat
+the code some (2KB), but the speed boost is well worth it. Also, the
+graphics mode used is "graphics 8", but in ANTIC narrow playfield
+mode, so the X resolution is 256... meaning I don't need two bytes
+for the X cursor position (which saves a good bit of time). The code
+that plots pixels doesn't use CIO to do so (it writes directly to the
+screen memory), which also saves time. There's no floating point math
+here: if there were, the asm version wouldn't be all that much faster
+than the BASIC one...
+
+Author
+------
+
+The original version of this was in Atari BASIC, by ChrisTOS. It can
+be found at https://github.com/ctzio/DLA/
+
+This assembly version is by B. Watson (urchlay@slackware.com, Urchlay
+on libera.chat IRC). The code is licensed under the WTFPL: do WTF you
+want with it.
diff --git a/dla.s b/dla.s
new file mode 100644
index 0000000..58db8e8
--- /dev/null
+++ b/dla.s
@@ -0,0 +1,534 @@
+; Diffusion Limited Aggregation
+; B. Watson's asm rewrite of ChrisTOS's Atari 8-bit version.
+; Original lives here: https://github.com/ctzio/DLA/
+
+; This version uses ANTIC narrow playfield mode, since the original
+; uses fewer than 256 columns of a GR.8 screen. This gives a slight
+; speed boost for 2 reasons: less DMA from the ANTIC chip, and we get
+; to use 1 byte for the X coordinate.
+
+ .include "atari.inc"
+ .include "xex.inc"
+
+ loadaddr = $2000
+ screen = $4000 ; must be on a x000 (4K) boundary
+ screen2 = screen + $1000 ; rest of screen RAM after 4K boundary
+ linelen = $20 ; aka 32 bytes, antic F (GR.8) in narrow mode.
+ maxlines = $C0 ; 192 lines of display
+ screenbytes = maxlines * linelen
+ dl_len = 202 ; remember to update this if you modify the display list!
+
+ DMA_ON = $21
+ DEFAULTPART = 1000
+ screenptr = SAVMSC
+ maxparticles = $80 ; 2 bytes
+ addtmp = $82
+ pixptr = $82
+ pixmask = $84
+ cursor_x = $85 ; cursor x/y are args to plot/unplot/locate
+ cursor_y = $86
+ min_x = $87 ; limits: if the particle gets outside this box,
+ max_x = $88 ; delete it and spawn a new one.
+ min_y = $89
+ max_y = $8a
+ circlesize = $8b ; 0 to 3
+ part_x = $8c ; x/y coords of current particle
+ part_y = $8d
+ particles = $8e ; 2 bytes
+ spawn_x = $90 ; 2 bytes
+ spawn_y = $92 ; 2 bytes
+
+ dlist = screen - dl_len
+
+ ; start of init segment. gets overwritten by the main program...
+ ; and since the rest of the xex isn't loaded yet, can't call
+ ; subroutines from it!
+ xex_org loadaddr
+ .include "io.s" ; printchrx and getchrx
+msg:
+ .byte "Diffusion Limited Aggregate",$9b
+ .byte "Urchlay's ASM version 0.0.3",$9b,$9b
+ .byte "How many particles [",.sprintf("%d", DEFAULTPART),"]? ",$0
+init:
+ ; set default particles (if user just hits return)
+ lda #<DEFAULTPART
+ sta maxparticles
+ lda #>DEFAULTPART
+ sta maxparticles+1
+
+ ; print banner and prompt.
+ ldx #0
+pmloop:
+ lda msg,x
+ beq pmdone
+ jsr printchrx
+ inx
+ bne pmloop
+pmdone:
+
+ ; read up to 5 digits. for now, no editing.
+ ldx #0
+readloop:
+ jsr getchrx
+ cmp #$9b ; is it Return?
+ beq readdone ; if so, done reading.
+ cmp #$30 ; is it a digit?
+ bcc readloop ; if not, ignore it.
+ cmp #$3a
+ bcs readloop
+ sta LBUFF,x
+ jsr printchrx
+ inx
+ cpx #5
+ bne readloop
+ lda #0
+ sta LBUFF,x ; zero-terminate
+readdone:
+ cpx #0
+ beq usedefault
+
+ ; add up input digits
+ lda #0
+ sta maxparticles
+ sta maxparticles+1
+ ldx #0
+digloop:
+ lda LBUFF,x
+ beq digitsdone ; hit zero terminator
+ ldy #$0a
+ lda #0
+ sta addtmp
+ sta addtmp+1
+mul10loop:
+ clc
+ lda addtmp
+ adc maxparticles
+ sta addtmp
+ lda addtmp+1
+ adc maxparticles+1
+ sta addtmp+1
+ dey
+ bne mul10loop
+ lda LBUFF,x
+ and #$0f
+ clc
+ adc addtmp
+ sta maxparticles
+ lda addtmp+1
+ adc #0
+ sta maxparticles+1
+ inx
+ bne digloop
+ 
+digitsdone:
+usedefault:
+ rts
+
+ xex_init init
+;;;;; end of init segment
+
+ xex_org loadaddr
+main: ;;; start of main()
+ jsr initscreen
+ ; this stuff isn't working, commented out for now:
+ ; wait for shadow regs to get updated...
+ ;lda RTCLOK+2
+;wl:
+ ;cmp RTCLOK+2
+ ;beq wl
+ ;lda #1 ; ...turn off shadow reg updates (tiny speed boost)
+ ;sta CRITIC
+ lda #0
+ sta particles
+ sta particles+1
+ sta RTCLOK
+ sta RTCLOK+1
+ sta RTCLOK+2
+ sta circlesize
+ jsr set_limits
+
+ lda #<points_x
+ sta spawn_x
+ lda #>points_x
+ sta spawn_x+1
+ lda #<points_y
+ sta spawn_y
+ lda #>points_y
+ sta spawn_y+1
+
+ ; initial point in center
+ lda #$7f
+ sta cursor_x
+ lda #$5f
+ sta cursor_y
+ jsr plot
+
+ ; spawn a new particle
+next_particle:
+ jsr spawn
+ ;lda #0
+ ;sta CONSOL ; click when spawning a particle
+
+ jsr drunkwalk     ; walk it around
+ beq next_particle ; if it went out of bounds, try again
+
+ ; particle stuck to an existing pixel, draw it
+ lda part_x
+ sta cursor_x
+ lda part_y
+ sta cursor_y
+ jsr plot
+
+ inc particles
+ bne ph_ok
+ inc particles+1
+ph_ok:
+
+ ; increase circlesize at appropriate particle counts
+ ; if(particles == 100 || particles == 300 || particles == 600) goto next_size;
+ lda particles
+ ldx particles+1
+ bne not_100
+ cmp #100
+ beq next_size
+not_100:
+ cpx #>300
+ bne not_300
+ cmp #<300
+ beq next_size
+not_300:
+ cpx #>600
+ bne checkmaxparts
+ cmp #<600
+ beq next_size
+ bne checkmaxparts
+
+next_size:
+ inc circlesize
+ jsr set_limits
+ inc spawn_x+1
+ inc spawn_y+1
+
+checkmaxparts:
+ ; if(particles != maxparticles) goto next_particle;
+ lda particles
+ cmp maxparticles
+ bne next_particle
+ lda particles+1
+ cmp maxparticles+1
+ bne next_particle
+
+main_done:
+ lda #0
+ sta CRITIC
+ sta COLOR2
+ sta ATRACT
+ lda #DMA_ON
+ sta SDMCTL
+ lda RTCLOK
+ sta FR0
+ lda RTCLOK+1
+ sta FR0+1
+ lda RTCLOK+2
+ sta FR0+2
+hang: jmp hang
+; TODO: code to save image goes here.
+;;; End of main()
+
+;;; Subroutine: set_limits
+;;; Sets the X/Y min/max limits based on circlesize
+set_limits:
+ ldx circlesize
+ lda xmin,x
+ sta min_x
+ lda ymin,x
+ sta min_y
+ lda xmax,x
+ sta max_x
+ lda ymax,x
+ sta max_y
+ rts
+
+;;; Subroutine: initscreen
+;;; clear screen memory and point ANTIC to our display list.
+;;; no arguments. trashes all registers.
+initscreen:
+ jsr set_screenptr
+
+ ldx #>screenbytes ; clear this many pages
+ lda #0
+ tay
+isloop:
+ sta (screenptr),y
+ iny
+ bne isloop
+ inc screenptr+1
+ dex
+ bne isloop
+
+ lda #DMA_ON   ; set ANTIC narrow playfield mode
+ sta SDMCTL
+
+ lda #<dlist ; use our display list
+ sta SDLSTL
+ lda #>dlist
+ sta SDLSTH
+; fall through to next subroutine
+
+;;; Subroutine: set_screenptr
+;;; Set screenptr to the start of screen memory.
+;;; Trashes A, preserves X and Y.
+set_screenptr:
+ lda #<screen
+ sta screenptr
+ lda #>screen
+ sta screenptr+1
+ rts
+
+;;; Subroutine: plotsetup
+;;; - set pixptr to point to screen memory at cursor_y.
+;;; - set pixmask to the mask for cursor_x.
+;;; - set Y reg to the byte offset for cursor_x.
+;;; Called by plot, unplot, and locate.
+plotsetup:
+ ; used to:
+ ;lda cursor_y
+ ;sta pixptr
+ ;lda #0
+ ;sta pixptr+1
+ ;ldx #5 ; multiply 16-bit pixptr by 32, by left-shifting 5 times.
+;pshiftloop:
+ ;asl pixptr
+ ;rol pixptr+1
+ ;dex
+ ;bne pshiftloop
+ ;clc ; add screenptr to calculated value
+ ;lda pixptr
+ ;adc screenptr
+ ;sta pixptr
+ ;lda pixptr+1
+ ;adc screenptr+1
+ ;sta pixptr+1
+
+ ; now, use a table, which makes this run ~3.5x as fast!
+ ldx cursor_y
+ lda lineaddrs_l,x
+ sta pixptr
+ lda lineaddrs_h,x
+ sta pixptr+1
+
+ ; used to:
+ ;lda cursor_x
+ ;and #$07 ; keep low 3 bits...
+ ;tax
+ ;lda masks,x ; get the mask
+ ;sta pixmask ; ...and save it
+ ;lda cursor_x ; top 5 bits are byte offset, shift 'em down
+ ;lsr
+ ;lsr
+ ;lsr
+ ;tay ; put byte offset in Y
+
+ ; now, use tables, which shaves another ~8% off runtime:
+ ldx cursor_x
+ ldy xoffsets,x
+ lda xmasks,x
+ sta pixmask
+
+ rts
+
+;;; Subroutine: plot
+;;; plots a pixel at (cursor_x, cursor_y)
+plot:
+ jsr plotsetup
+ lda (pixptr),y
+ ora pixmask
+ sta (pixptr),y
+ rts
+
+;;; Subroutine: unplot
+;;; erases a pixel at (cursor_x, cursor_y)
+unplot:
+ jsr plotsetup
+ lda pixmask
+ eor #$ff
+ sta pixmask
+ lda (pixptr),y
+ and pixmask
+ sta (pixptr),y
+ rts
+
+;;; Subroutine: locate
+;;; check the pixel at (cursor_x, cursor_y)
+;;; if set, return with Z=0
+;;; otherwise, return with Z=1
+locate:
+ jsr plotsetup
+ lda (pixptr),y
+ and pixmask
+ rts
+
+masks: .byte $80,$40,$20,$10,$08,$04,$02,$01
+
+;;; Subroutine: spawn
+;;; Pick a random point on the edge of a circle
+spawn:
+ ldy RANDOM
+ lda (spawn_x),y
+ sta part_x
+ lda (spawn_y),y
+ sta part_y
+ rts
+
+;;; Subroutine: drunkwalk
+;;; Walk the point around randomly until it either is
+;;; adjacent to a set pixel or goes out of bounds.
+;;; Return with Z=0 if out of bounds, Z=1 if it hit a pixel.
+;;; This and check_neighbors are the innermost loop, so they
+;;; should be as optimized as possible (we're not there yet).
+drunkwalk:
+ lda RANDOM ; pick a random direction, up/down/left/right
+ and #$C0   ; use top 2 bits (hopefully more random than bottom 2).
+ cmp #$C0
+ beq up
+ cmp #$80
+ beq down
+ cmp #$40
+ beq left
+ ; right
+ inc part_x
+ bne checkbounds
+up:
+ dec part_y
+ bne checkbounds
+down:
+ inc part_y
+ bne checkbounds
+left:
+ dec part_x
+
+checkbounds:
+ lda part_x
+ cmp min_x
+ beq oob
+ cmp max_x
+ beq oob
+ lda part_y
+ cmp min_y
+ beq oob
+ cmp max_y
+ beq oob
+
+ lda part_x
+ sta cursor_x
+ lda part_y
+ sta cursor_y
+ ldx #0
+ lda CONSOL
+ cmp #6
+ bne dontplot
+ jsr plot
+ jsr unplot
+ ldx #DMA_ON
+dontplot:
+ stx SDMCTL ; nope, shadow updates are off...
+ ;stx DMACTL
+ jsr check_neighbors
+ bne stick
+ beq drunkwalk
+
+stick:
+oob:
+ rts
+
+;;; Subroutine: check_neighbors
+;;; return with Z=1 if any of the 4 neighbor pixels (l/r/u/d)
+;;; are set. otherwise, return Z=0.
+check_neighbors:
+ ; (-1,0)
+ dec cursor_x
+ jsr locate
+ bne stick
+ ; (1,0)
+ inc cursor_x
+ inc cursor_x
+ jsr locate
+ bne stick
+ ; (0,-1)
+ dec cursor_x
+ dec cursor_y
+ jsr locate
+ bne stick
+ ; (0,1)
+ ; used to:
+ ;inc cursor_y
+ ;inc cursor_y
+ ;jsr locate
+ ; this avoids recalculating the pointer:
+ tya
+ ora #$40 ; add 64
+ tay
+ lda (pixptr),y
+ and pixmask
+ rts
+
+;;;;; end of executable code
+
+ ; dlatbl.s is generated by perl script, mkdlatbl.pl
+ .include "dlatbl.s"
+
+ ; table of addresses, for each line on the screen. bloats the
+ ; code by 320 bytes, but compared to calculating the address, is
+ ; 3.5x as fast!
+lineaddrs_l:
+ laddr .set screen
+ .repeat 160
+  .byte <laddr
+  laddr .set laddr + $20
+ .endrep
+
+lineaddrs_h:
+ laddr .set screen
+ .repeat 160
+  .byte >laddr
+  laddr .set laddr + $20
+ .endrep
+
+ ; tables to replace X coord => mask-and-offset calculations.
+xoffsets:
+ xoffs .set 0
+ .repeat 32
+  .repeat 8
+   .byte xoffs
+  .endrep
+  xoffs .set xoffs + 1
+ .endrep
+
+xmasks:
+ .repeat 32
+  .byte $80,$40,$20,$10,$08,$04,$02,$01
+ .endrep
+
+;;; display list
+ ; ANTIC opcodes
+ blank8 = $70
+ gr8    = $0f
+ lms    = $40
+ jvb    = $41
+
+ xex_org dlist
+ .byte blank8, blank8, blank8
+ .byte gr8 | lms
+ .word screen
+ .repeat 127
+  .byte gr8
+ .endrep
+ .byte gr8 | lms
+ .word screen2
+ .repeat maxlines - 129
+  .byte gr8
+ .endrep
+ .byte jvb
+ .word dlist
+
+ xex_run loadaddr
diff --git a/io.s b/io.s
new file mode 100644
index 0000000..98dc219
--- /dev/null
+++ b/io.s
@@ -0,0 +1,203 @@
+ ; 20220930 bkw, aka Urchlay on libera IRC, aka urchlay@slackware.uk:
+
+ ; Example code for calling CIO through the back door, like BASIC does.
+ ; Provided under the terms of the WTFPL: Do WTF you want to with this.
+
+ ; Verbose documentation here. To skip to the actual code, search for
+ ; three ; characters.
+
+ ; There's a lot of old code that calls the OS ROM's print-character
+ ; and read-character addresses directly. These were never published
+ ; by Atari as part of their API... and in fact they changed between
+ ; the 400/800 and XL/XE, which is a major reason why certain software
+ ; is "OS B only" or "XL only". People coming from other platforms such
+ ; at the C=64 or Apple II were used to their ROMs having fixed
+ ; addresses to JSR to, for print-character and read-character, and
+ ; so they used $F6A4 to print and $F6E2 to read... which were
+ ; never guaranteed by Atari not to change. And they did change,
+ ; in the XL OS.
+
+ ; The pedantically correct way to print a character is to set up
+ ; a 1-byte buffer for IOCB #0, and call CIOV with ICCOM set to
+ ; $09 (aka 'put record'). Even Atari decided this was too much
+ ; work, so they also provided a handy "put-one-byte" vector in
+ ; EDITRV, which gets copied to ICPTL/H when the OS opens the E:
+ ; device... BASIC uses this to print characters, and you can, too. It
+ ; works on any revision of the Atari OS, because it's part of the
+ ; OS specification: if it *didn't* work on some OS version, neither
+ ; would Atari BASIC, which would count as a show-stopper!
+
+ ; Atari didn't provide a similar slot in the IOCB for the
+ ; get-one-byte vector... and generally, if you're interested in
+ ; reading input one character at a time, you don't want IOCB #0 (E:)
+ ; anyway. You want the K: device (which returns immediately after
+ ; each keypress, rather than waiting for a whole line of input). The
+ ; correct way to read from the keyboard is to open an IOCB (other
+ ; than #0) to the K: device, set up that IOCB, including a 1-byte
+ ; buffer, and call CIOV with ICCOM set to $05 (aka get-record). But
+ ; it turns out that the K: device has a get-one-byte routine that (a)
+ ; can be found in a published location (KEYBDV table) that doesn't
+ ; change with ROM revision, and (b) works without even having an IOCB
+ ; open for K:.
+
+ ; The vectors are stored as "address minus one", because they're
+ ; intended to be called via the RTS instruction (probably Atari did
+ ; this because the JSR instruction doesn't have an indexed mode like
+ ; JMP does). Read on, to see how to call them. The calling sequence
+ ; isn't as convenient as the illegal entry points (or the Commodore's
+ ; Kernal, which does publish print-acumulator and get-1-byte
+ ; vectors), but it's a lot less code than the 'proper' IOCB setup
+ ; would be. And if you copy/paste from this file, you just call these
+ ; subroutines in your code (as convenient as the Commodore).
+
+ ; You are welcome to copy the code in this file into your own
+ ; project. It's unencumbered: I release it under the WTFPL. I would
+ ; just say it's public domain, but I have been told by people who
+ ; ought to know that some countries don't actually recognize public
+ ; domain in their law. WTFPL explicitly says you can do whatever you
+ ; want with this.
+
+ ; Examples:
+
+ ; You could make your own "memo pad mode" with this:
+ ;
+ ; main:
+ ;  jsr getchr
+ ;  jsr printchr
+ ;  jmp main
+
+ ; Print a null-terminated string, up to 256 bytes long:
+ ;
+ ;  ldx #0
+ ; msgloop:
+ ;  lda message,x
+ ;  beq msgdone
+ ;  jsr printchrx
+ ;  inx
+ ;  bne msgloop
+ ; msgdone:
+ ;  rts ; or whatever other code goes here...
+ ;
+ ; message: .byte "Hello, World!",$9b,$00
+
+ ; Environment:
+
+ ; The code depends on a few symbols (equates) being defined. How you
+ ; do this depends on the assembler you're using.
+
+ ; .include "atari.inc"  ; for ca65
+ ; .include "sysequ.m65" ; for atasm
+ ; include atari8.h      ; dasm, if it actually had this file :(
+
+ ; If your assembler doesn't have a file of Atari symbols, use:
+ ; ICPTL   = $0346
+ ; ICPTH   = $0347
+ ; KEYBDV  = $E420
+ ; EDITRV  = $E400 ; only if you change getchr to use this.
+ ; ...of course your assembler might want EQU or .EQU instead of = signs.
+
+ ; .org <wherever> ; your assembler may want org without the dot, or *=
+
+ ;;; Start of actual code.
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ; Subroutine: printchr
+ ;
+ ; Print ATASCII character in A, without preserving registers.
+ ; Assumes IOCB #0 is opened on the E: device, which is how the
+ ; Atari boots up. Uses "call-by-RTS" (weird looking but standard).
+ ;
+ ; Note that this will work even if the E: handler has been replaced,
+ ; e.g. with COL80 or COL64 or such.
+ ;
+ ; Hint: if you want to print graphics instead of actual cursor controls
+ ; or insert/delete/clear/etc, print an Escape character ($1B) before each,
+ ; or set DSPFLG ($2FE) to a non-zero value.
+ ;
+printchr:
+ tay        ; save A (character to print).
+ lda ICPTH  ; set up stack, so it looks like a JSR to the
+ pha        ;   put-one-byte address for E:,
+ lda ICPTL  ;   which the OS has conveniently stashed
+ pha        ;   in IOCB #0.
+ tya        ; restore A (put-one-byte argument).
+ rts        ; "return" to put-one-byte, which will return to printchr's caller.
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ; Subroutine: getchr
+ ;
+ ; Read ATASCII character from keyboard, return in A, without
+ ; preserving registers.
+ ;
+ ; Uses the published and immutable KEYBDV address in the ROM, meaning
+ ; it (a) doesn't require an IOCB open to the K: device, and (b)
+ ; it will not use any replacement K: handler that might be loaded
+ ; (however, unlike E:, replacing the OS K: device is so rare that
+ ; I've never heard of it being done).
+ ;
+ ; Hint: This is a "blocking" function call: it waits until a key is
+ ; pressed. If you want to poll (only read input when it's available),
+ ; check CH ($02FC): if it's $FF, no key is pressed.
+ ;
+ ; Note: if you really do want to read from the E: device, change
+ ; the two KEYBDV's below to EDITRV. E: will read an entire line,
+ ; including editing (backspace, insert/delete, cursor moves, etc)
+ ; the first time it's called, and return only the first character
+ ; read. Further calls will return the rest of the characters, one at
+ ; a time, with $9B (EOL) as the last one.
+ ;
+getchr:
+ lda KEYBDV+5 ; set up stack, so it looks like a JSR to the
+ pha          ;   get-one-byte address for K:, 
+ lda KEYBDV+4 ;   which the OS ROM keeps in the
+ pha          ;   KEYBDV table ($E420).
+ rts          ; "return" to get-one-byte, which will return to getchr's caller.
+
+ ; These next two are 'wrappers' for the above, which preserve
+ ; the X register. Very convenient for use in a loop. If you don't
+ ; need these, don't copy them into your code. If you do need them,
+ ; remember that they call printchr and getchr, so you have to copy
+ ; those also.
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ; Subroutine: printchrx
+ ;
+ ; Print ATASCII character in A, preserving X register.
+ ;
+ ; Preserves X register (but nothing else), so it can be called from
+ ; within a loop that uses X for a counter, without having to worry
+ ; about it.
+ ;
+ ; On exit, A holds a copy of the X register, if you can think of
+ ; a use for that.
+ ;
+ ; Calls printchr.
+ ;
+printchrx:
+ tay          ; save A (character to print).
+ txa          ; save X,
+ pha          ;   on stack.
+ tya          ; restore A.
+ jsr printchr ; print the character.
+ pla          ; restore X,
+ tax          ;   from stack.
+ rts          ; this a regular RTS (returns to printchrx's caller).
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ; Subroutine: getchrx
+ ;
+ ; Read ATASCII char from keyboard, return in A, preserving X register.
+ ; Actually, the return value here is also in Y, if you can think of a
+ ; use for that.
+ ;
+ ; Calls getchr.
+ ;
+getchrx:
+ txa          ; save X,
+ pha          ;   on stack.
+ jsr getchr     ; get the character.
+ tay          ; save A (our return value).
+ pla          ; restore X,
+ tax          ;   from stack.
+ tya          ; restore return value to A.
+ rts          ; regular RTS.
diff --git a/mkdlatbl.pl b/mkdlatbl.pl
new file mode 100644
index 0000000..a941d38
--- /dev/null
+++ b/mkdlatbl.pl
@@ -0,0 +1,58 @@
+#!/usr/bin/perl -w
+
+use POSIX 'round';
+$PI = 3.14159265358979;
+
+# bdeg means "binary degrees", 1/256 of a circle.
+sub bdsin {
+	return sin($_[0] / 128 * $PI);
+}
+
+sub bdcos {
+	return cos($_[0] / 128 * $PI);
+}
+
+$centerx = 127;
+$centery = 95;
+
+for $r (15, 30, 45, 75) {
+	push @xmin, $centerx - ($r + 10);
+	push @xmax, $centerx + ($r + 10);
+	push @ymin, $centery - ($r + 10);
+	push @ymax, $centery + ($r + 10);
+	for $angle (0..255) {
+		my $x = round($centerx + $r * bdcos($angle));
+		my $y = round($centery + $r * bdsin($angle));
+		push @xpoints, $x;
+		push @ypoints, $y;
+	}
+}
+
+print "xmin:\n";
+for(@xmin) {
+	print " .byte $_\n";
+}
+
+print "xmax:\n";
+for(@xmax) {
+	print " .byte $_\n";
+}
+
+print "ymin:\n";
+for(@ymin) {
+	print " .byte $_\n";
+}
+
+print "ymax:\n";
+for(@ymax) {
+	print " .byte $_\n";
+}
+
+print "points_x:\n";
+for(@xpoints) {
+	print " .byte $_\n";
+}
+print "points_y:\n";
+for(@ypoints) {
+	print " .byte $_\n";
+}
diff --git a/xex.inc b/xex.inc
new file mode 100644
index 0000000..184b2f9
--- /dev/null
+++ b/xex.inc
@@ -0,0 +1,166 @@
+; xex.inc - easy way to generate an atari 8-bit executable with ca65,
+; without dealing with ca65's linker scripts and segments.
+; see xex.rst (or xex.html) for full documentation.
+
+ .macro xex_failtarget target
+  .ifdef target
+   .fatal "must assemble with '-t none'"
+  .endif
+ .endmacro
+
+ xex_failtarget __APPLE2__
+ xex_failtarget __APPLE2ENH__
+ xex_failtarget __ATARI2600__
+ xex_failtarget __ATARI5200__
+ xex_failtarget __ATARI__
+ xex_failtarget __ATARIXL__
+ xex_failtarget __ATMOS__
+ xex_failtarget __BBC__
+ xex_failtarget __C128__
+ xex_failtarget __C16__
+ xex_failtarget __C64__
+ xex_failtarget __CBM__
+ xex_failtarget __CBM510__
+ xex_failtarget __CBM610__
+ xex_failtarget __CX16__
+ xex_failtarget __GEOS__
+ xex_failtarget __GEOS_APPLE__
+ xex_failtarget __GEOS_CBM__
+ xex_failtarget __LUNIX__
+ xex_failtarget __LYNX__
+ xex_failtarget __NES__
+ xex_failtarget __OSIC1P__
+ xex_failtarget __PET__
+ xex_failtarget __PLUS4__
+ xex_failtarget __SIM6502__
+ xex_failtarget __SIM65C02__
+ xex_failtarget __SUPERVISION__
+ xex_failtarget __VIC20__
+
+ .ifndef RUNAD
+  .include "atari.inc"
+ .endif
+
+ .ifndef xex_verbose
+  xex_verbose=1
+ .endif
+
+ .ifndef xex_warnings
+  xex_warnings=1
+ .endif
+
+ xex_api_called .set 0
+ xex_segcount .set 1
+ xex_old_org .set -1
+
+ .macro xex_vprint arg
+  .if xex_verbose
+   .out .concat("xex.inc: ",arg)
+  .endif
+ .endmacro
+
+ .macro xex_warn arg
+  .if xex_verbose
+   .warning .concat("xex.inc: ",arg)
+  .endif
+ .endmacro
+
+ .macro xexstart startaddr, endaddr
+  .if xex_api_called = 0
+   .fatal "xex.inc: don't call xexstart directly, use xex_org."
+  .endif
+  xex_api_called .set 0
+  .if xex_old_org > -1
+   xex_endseg
+  .endif
+  .org 0 ; can be anything really...
+  .ifndef xex_ffff_emitted
+   .byte $ff,$ff
+   xex_ffff_emitted=1
+   xex_vprint .sprintf("starting segment %d at $%04x (with ffff header)", xex_segcount, startaddr)
+  .else
+   xex_vprint .sprintf("starting segment %d at $%04x", xex_segcount, startaddr)
+  .endif
+  .word startaddr
+  .word endaddr-1
+  .org startaddr
+  ; we don't need a label here really, but define it so it shows up in
+  ; the VICE label file created by -Ln.
+  .ident(.sprintf("xex_startaddr_%d", xex_segcount)):
+  xex_segcount .set xex_segcount + 1
+ .endmacro
+
+ .macro xex_org startaddr,limit
+  xex_api_called .set 1
+  xexstart startaddr,.ident(.sprintf("xex_endaddr_%d", xex_segcount))
+  xex_old_org .set startaddr
+  .ifblank limit
+   xex_limit .set $10000 ; impossibly high
+  .else
+   xex_limit .set limit
+  .endif
+ .endmacro
+
+ .macro xex_endseg
+  .local endaddr
+  endaddr = * - 1
+  .if xex_old_org < 0
+   xex_warn "xex_endseg called when not in a segment; harmless but redundant."
+   .exitmacro
+  .endif
+  .if endaddr < xex_old_org
+   .fatal .sprintf("cannot create an empty segment (start $%04x, end $%04x)", xex_old_org, endaddr)
+  .endif
+  .ident(.sprintf("xex_endaddr_%d", xex_segcount-1)):
+  xex_vprint .sprintf("  ending segment %d at $%04x, length $%04x", xex_segcount-1, endaddr, endaddr-xex_old_org+1)
+  xex_old_org .set -1
+  .assert .not (endaddr >= xex_limit), error, .sprintf("xex.inc: segment %d exceeds user-requested limit $%04x, by $%04x bytes", xex_segcount-1, xex_limit, endaddr - xex_limit + 1)
+  xex_limit .set $10000
+ .endmacro
+
+ .macro xex_run runaddr
+  xex_org RUNAD
+  .word runaddr
+  xex_endseg
+  xex_vprint .sprintf("         run address: $%04x", runaddr)
+  .ifndef xex_run_addr
+   xex_run_addr .set runaddr
+  .else
+   xex_warn .sprintf("multiple run addresses (previous was $%04x)", xex_run_addr)
+   xex_run_addr .set runaddr
+  .endif
+ .endmacro
+
+ .macro xex_init initaddr
+  xex_org INITAD
+  .word initaddr
+  xex_endseg
+  xex_vprint .sprintf("        init address: $%04x", initaddr)
+ .endmacro
+
+ .macro xex_incbin addr, filename, offset, length
+ .local o
+ .ifblank offset
+  o = 0
+ .else
+  o = offset
+ .endif
+  xex_org addr
+  .ifblank length
+   .incbin filename, o
+  .else
+   .incbin filename, o, length
+  .endif
+  xex_endseg
+ .endmacro
+
+ ;;; THIS DOESN'T WORK!
+ .if 0
+ .macro xex_include addr, filename
+  xex_org addr
+  .out .sprintf("before include %s: %04x", filename, *)
+  .include filename
+  .out .sprintf("after include: %04x", *)
+  xex_endseg
+ .endmacro
+ .endif
-- 
cgit v1.2.3