aboutsummaryrefslogtreecommitdiff
path: root/dasm2atasm
blob: b7ebe66bd3359d2b6248223efb19a1e9b563ba1a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
#!/usr/bin/perl -w

=head1 NAME

dasm2atasm - converts 6502 assembly in DASM syntax to ATASM (or MAC/65) format.

=head1 SYNOPSIS

	dasm2atasm mycode.asm

Writes output to I<mycode.m65>

	dasm2atasm stuff.asm other.m65

Reads from I<stuff.asm>, writes to I<other.m65>

=head1 DESCRIPTION

B<dasm2atasm> tries its best to convert DASM's syntax into something
that B<ATASM> can use. Since B<ATASM>'s syntax is 99% compatible with
that of B<MAC/65>, B<dasm2atasm> can be used for that as well.

=head1 CAVEATS

There are a few B<DASM> directives that aren't quite supported by
B<ATASM>.

=over 4

=item echo

In B<DASM> syntax, I<echo> can interpolate values, like so:

	echo $100-*, " bytes of zero page left"

B<ATASM>'s closest equivalent to I<echo> is I<.warn>, but it doesn't
allow multiple arguments or interpolation. For now, B<dasm2atasm> just
comments out the line with the I<echo> directive.

=item seg and seg.u

B<ATASM> just plain doesn't support segments. These directives will
just be commented out. This I<shouldn't> have any effect on the
object code.

=item sta.w, sty.w, stx.w 

B<ATASM> doesn't provide a way to force word addressing, when the operand
of a store instruction will allow zero page addressing to be used. You'll
run into this a lot in Atari 2600 code, or any other 6502 code that has to
maintain sync with an external piece of hardware (using word addressing
causes the 6502 to use an extra CPU cycle, which is the only way to cause
a 1-cycle delay).

For now, we're just converting any I<st?.w> instructions to the appropriate
I<.byte> directives, like so:

	;;;;; dasm2atasm: was `sta.w COLUPF', using .byte to generate opcode
	.byte $8d, COLUPF, $00

This works fine if I<COLUPF> is a zero-page label. It's possible, though
unlikely, that you'll run across code where the programmer has used I<sta.w>
with a label that would already cause absolute word addressing to be used,
in which case the extra I<$00> will break our code (literally: I<$00> is
the I<BRK> instruction!)

This isn't likely to be fixed by I<dasm2atasm>. The correct fix will be to
support I<sta.w> and friends in B<ATASM> itself, which may happen in the
future.

=item . (dot)

B<DASM> allows the use of I<.> or I<*> to represent the current program counter
in expressions. B<ATASM> only allows I<*>, and unless I want to include a
full expression-parser in B<dasm2atasm>, I can't reliably translate this.

For now, you'll have to fix this yourself. Future versions will at least
make an attempt, but this one doesn't.

=back

=head1 AUTHOR

B. Watson I<< <urchlay@urchlay.com> >>. Comments & constructive criticism
welcome, or just a friendly `hello'. Spam will be redirected to /dev/null
and so will the spammer's entire domain.

=cut

sub usage {
	print <<EOF;
Usage: $0 -[aclmr] infile.asm [outfile.m65]

EOF
	exit 1;
}

sub get_mac_sub {
	my $rex = shift;
	my $code = "sub { s/($rex)/\\U\$1/gio };";
	#warn "code is $code";
	return eval "$code";
}

sub unhex {
	# makes a proper $xx, $xx, $xx list of bytes
	# from a list of hex digits, spaces optional.
	my $bytes = shift;
	my $ret   = "";

	$bytes =~ s/\s//g;

	#warn "unhex: bytes is $bytes";

	for($bytes =~ /(..)/g) {
		#warn "unhex: found $_";
		$ret .= "\$$_, ";
	}

	chop $ret;
	chop $ret;

	return $ret;
}

sub fix_include {
	my $inc = shift;
	my $old = $inc;
	$inc =~ s/\.(\w+)("?)$/.m65$2/;

	if($recursive) {
		system("$cmd $old $inc");
	} else {
		warn "Don't forget to convert included file `$old' to .m65 format!\n";
	}
	return $inc;
}

sub do_subs {
	# Do the dirty work of the substitutions. Only reason we have this
	# as a subroutine of its own is for profiling purposes (and we do
	# spend a *lot* of time here!)
	my $line = shift;

	for($line) {
		s/^(\@?\w+):/$1/;      # no colons after labels, in atasm
		s/%/~/g;               # binary constant
		s/!=/<>/g;             # inequality

		s/^(\s+)\.?echo(.*)/;;;;;$1.warn$2/i     &&
			do { warn "$in, line $.:\n\t`.warn' not fully compatible with dasm's `echo', commented out\n" }
												 && next;

		# This is supposed to change e.g. `bpl .label' to `bpl @label'
		s/^(\s+)([a-z]{3})(\s+)\.(\w+)/$1$2$3\@$4/i
													&& next;


		s/{(\d)}/%$1/g;        # macro arg (gotta do this *after* bin. constants!)

# atasm doesn't support shifts, emulate with multiply/divide
		s/\s*<<\s*(\d+)/"*" . 2**$1/eg;
		s/\s*>>\s*(\d+)/"\/" . 2**$1/eg;

# atasm chokes sometimes when there's whitespace around an operator
#  unfortunately, a construct like `bne *-1' can't afford to lose the
#  space before the *... why, oh why, does * have to be both multiply and
#  program counter? *sigh*

#		s/\s*([-!|\/+*&])\s*/$1/g;

# ARGH. Why does dasm allow `byte #1, #2, #3'... and why do people *use* it?!
  s/^(\s+)\.?byte(\s+)/$1.byte$2/i && do { s/#//g } && next;
  s/^(\s+)\.?word(\s+)/$1.word$2/i && do { s/#//g } && next;
  s/^(\s+)\.?dc\.w(\s+)/$1.word$2/i     && do { s/#//g } && next;
  s/^(\s+)\.?dc(?:\.b)?(\s+)/$1.byte$2/i     && do { s/#//g } && next;

# 20070529 bkw: turn ".DS foo" into ".DC foo 0"
  s/^(\s+)\.?ds(\s+)(\S+)/$1.dc $3 0 /i     && do { s/#//g } && next;

# I really want to add `hex' to atasm. 'til then though, fake with .byte
		s/^(\s+)\.?hex\s+(.*)/$1 . '.byte ' .
			unhex($2)/ie                && next;

		s/^(\s+)\.?subroutine(.*)/$1.local$2/i && next;
		s/^(\s+)\.?include(\s+)(.*)/$1 . '.include' . $2 . fix_include($3)/gie
												 && next;
		s/^(\s+)\.?equ\b/$1=/i       && next;
		s/^(\s+)\.?repeat\b/$1.rept/i       && next;
		s/^(\s+)\.?repend\b/$1.endr/i       && next;
		s/^(\s+)\.?endm\b/$1.endm/i         && next;
		s/^(\s+)\.?org(\s+)([^,]*).*$/$1*=$2$3/i             && next;
		s/^(\s+)\.?incbin\b/$1\.incbin/i    && next;
		s/^(\s+)\.?err(.*)/$1.error$2/i     && next; # TODO: see if atasm allows `.error' with no message.
		s/^(\s+)\.?ifconst\s+(.*)/$1.if .def $2/i
														&& next; # TODO: test this!
		s/^(\s+)\.?else/$1.else/i           && next;
		s/^(\s+)\.?endif/$1.endif/i         && next;
		s/^(\s+)\.?if\s+(.*)/$1.if $2/i     && next;

		# stuff that doesn't work:
		s/^(\s+)(\.?seg(\..)?\s.*)/;;;;; dasm2atasm: `seg' not supported by atasm\n;;;;;$1$2/i
												 && next;
		s/^(\s+)(\.?processor\s.*)/;;;;; dasm2atasm: `processor' not supported by atasm\n;;;;;$1$2/i
												 && next;

		s/^(\s+)sta\.w(\s+)(.*)/;;;;; dasm2atasm: was `sta.w $3', using .byte to generate opcode\n$1.byte \$8d, <$3, >$3/i
												 && next;

		s/^(\s+)stx\.w(\s+)(.*)/;;;;; dasm2atasm: was `stx.w $3', using .byte to generate opcode\n$1.byte \$8e, <$3, >$3/i
												 && next;

		s/^(\s+)sta\.w(\s+)(.*)/;;;;; dasm2atasm: was `sty.w $3', using .byte to generate opcode\n$1.byte \$8c, <$3, >$3/i
												 && next;

		# atasm lacks `align', so make up for it with a macro
		if(s/(\s)\.?align(\s+)(.*)/$1ALIGN$2$3/i) {
			if(!$align_defined) { # only gotta define it if not already defined.
				for($align_macro) {
					$_ =~ s/^/($linenum += 10) . " "/gme if $linenum;
					$_ =~ s/\n/\x9b/g if $a8eol;
				}

				print OUT $align_macro; # no, I wouldn't use these globals in a CS class assignment.
				$align_defined++;
			}
			next;
		}

		# macros. This is by far the biggest pain in the ass yet.
		s/(\s)\.?mac\b/$1.macro/i;
		if(/(\s)\.macro(\s+)(\w+)/) {
			$mac_regex .= "|\\b$3\\b";
			$mac_sub = get_mac_sub($mac_regex);
		}

		if(ref $mac_sub) { # if we've found at least one macro so far...
			&$mac_sub;      # CAPITALIZE everything matching a macro name
		}  # note: this code assumes macros are *always* defined before they're
         # used. atasm requires this, but does dasm?

	}
	return $line;
}

## main() ##

$ca65 = 0;
$a8eol = 0;
$linenum = 0;
$recursive = 0;

$cmd = $0;

while($ARGV[0] =~ /^-/i) {
	my $opt = shift;
	$cmd .= " $opt";

	if($opt eq "-c") {
		$ca65++;
	} elsif($opt eq "-a") {
		$a8eol++;
	} elsif($opt eq "-l") {
		$linenum = 1000;
	} elsif($opt eq "-m") {
		$a8eol++;
		$linenum = 1000;
	} elsif($opt eq "-r") {
		$recursive++;
	} elsif($opt eq "--") {
		last;
	} else {
		warn "Unknown option '$opt'\n";
		usage;
	}
}

if($ca65 && ($linenum || $a8eol)) {
	die "Can't use line numbers and/or Atari EOLs with ca65 output\n";
}

$align_macro = <<EOF;
;;;;;; ALIGN macro defined by dasm2atasm
 .macro ALIGN
 *= [[*/%1]+1] * %1
 .endm
EOF

$align_defined = 0; # we only need to emit the macro definition once.

$in  = shift || usage;
$out = shift;

($out = $in) =~ s/(\.\w+)?$/.m65/ unless $out;

die "$0: can't use $in for both input and output\n" if $out eq $in;

open IN,  "<$in"  or die      "Can't read $in: $!\n";
open OUT, ">$out" or die "Can't write to $out: $!\n";

$hdr = <<EOF;
;;; Converted from DASM syntax with command:
;   $cmd $in $out

EOF

for($hdr) {
	$_ =~ s/^/($linenum += 10) . " "/gme if $linenum;
	$_ =~ s/\n/\x9b/g if $a8eol;
}

print OUT $hdr;

if($ca65) {
	print OUT <<EOF;
;;; ca65 features enabled by dasm2atasm
;   To build with ca65:
;     ca65 -o foo.o -t none foo.asm
;     ld65 -o foo.bin -t none foo.o
.FEATURE pc_assignment
.FEATURE labels_without_colons

EOF
}

$mac_regex = "!THIS_ISNT_SUPPOSED_TO_MATCH";
$mac_sub   = ""; # this will be the code ref we call to match $mac_regex

while(<IN>) {
	chomp;
	s/\r//; # you might not want this on dos/win, not sure if it matters.
	$label = "";

	if(/^(\w+)\s*=\s*\1/i) {
		print OUT ";;;;; dasm2atasm: labels are case-insensitive in atasm\n";
		$line = ";;;;; $_ ; This assignment is an error in atasm";
		next;
	}

# do this before we split out the label:
	s/^\./\@/;             # local label (dot in dasm, @ in atasm)

	if(s/^([^:;\s]*):?(\s+)/$2/) {
		$label = $1;
	}

	($line, $comment) = split /;/, $_, 2;
	next unless $line;
	
	$line = do_subs($line);

} continue {
	if($linenum) {
		print OUT "$linenum ";
		$linenum += 10;
	}

	print OUT $label if $label;
	print OUT $line if $line;
	print OUT ";$comment" if $comment;
	print OUT ($a8eol ? "\x9b" : "\n");
}