aboutsummaryrefslogtreecommitdiff
path: root/dasm2atasm
blob: 016722f59bd051cb6e67d58db69a74bbb7f1e91f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
#!/usr/bin/perl -w

sub usage {
	print <<EOF;
Usage: $0 -[aclmr] infile.asm [outfile.m65]
See man page for details.
EOF
	exit 1;
}

sub get_mac_sub {
	my $rex = shift;
	my $code = "sub { s/($rex)/\\U\$1/gio };";
	#warn "code is $code";
	return eval "$code";
}

sub unhex {
	# makes a proper $xx, $xx, $xx list of bytes
	# from a list of hex digits, spaces optional.
	my $bytes = shift;
	my $ret   = "";

	$bytes =~ s/\s//g;

	#warn "unhex: bytes is $bytes";

	for($bytes =~ /(..)/g) {
		#warn "unhex: found $_";
		$ret .= "\$$_, ";
	}

	chop $ret;
	chop $ret;

	return $ret;
}

sub fix_include {
	my $inc = shift;
	my $old = $inc;
	$inc =~ s/\.(\w+)("?)$/.m65$2/;

	if($recursive) {
		system("$cmd $old $inc");
	} else {
		warn "Don't forget to convert included file `$old' to .m65 format!\n";
	}
	return $inc;
}

sub do_subs {
	# Do the dirty work of the substitutions. Only reason we have this
	# as a subroutine of its own is for profiling purposes (and we do
	# spend a *lot* of time here!)
	my $line = shift;

	for($line) {
		s/^(\@?\w+):/$1/;      # no colons after labels, in atasm
		s/%/~/g;               # binary constant
		s/!=/<>/g;             # inequality

		s/^(\s+)\.?echo(.*)/;;;;;$1.warn$2/i     &&
			do { warn "$in, line $.:\n\t`.warn' not fully compatible with dasm's `echo', commented out\n" }
												 && next;

		# This is supposed to change e.g. `bpl .label' to `bpl @label'
		s/^(\s+)([a-z]{3})(\s+)\.(\w+)/$1$2$3\@$4/i
													&& next;


		s/{(\d)}/%$1/g;        # macro arg (gotta do this *after* bin. constants!)

# atasm doesn't support shifts, emulate with multiply/divide
		s/\s*<<\s*(\d+)/"*" . 2**$1/eg;
		s/\s*>>\s*(\d+)/"\/" . 2**$1/eg;

# atasm chokes sometimes when there's whitespace around an operator
#  unfortunately, a construct like `bne *-1' can't afford to lose the
#  space before the *... why, oh why, does * have to be both multiply and
#  program counter? *sigh*

#		s/\s*([-!|\/+*&])\s*/$1/g;

# ARGH. Why does dasm allow `byte #1, #2, #3'... and why do people *use* it?!
  s/^(\s+)\.?byte(\s+)/$1.byte$2/i && do { s/#//g } && next;
  s/^(\s+)\.?word(\s+)/$1.word$2/i && do { s/#//g } && next;
  s/^(\s+)\.?dc\.w(\s+)/$1.word$2/i     && do { s/#//g } && next;
  s/^(\s+)\.?dc(?:\.b)?(\s+)/$1.byte$2/i     && do { s/#//g } && next;

# 20070529 bkw: turn ".DS foo" into ".DC foo 0"
  s/^(\s+)\.?ds(\s+)(\S+)/$1.dc $3 0 /i     && do { s/#//g } && next;

# I really want to add `hex' to atasm. 'til then though, fake with .byte
		s/^(\s+)\.?hex\s+(.*)/$1 . '.byte ' .
			unhex($2)/ie                && next;

		s/^(\s+)\.?subroutine(.*)/$1.local$2/i && next;
		s/^(\s+)\.?include(\s+)(.*)/$1 . '.include' . $2 . fix_include($3)/gie
												 && next;
		s/^(\s+)\.?equ\b/$1=/i       && next;
		s/^(\s+)\.?repeat\b/$1.rept/i       && next;
		s/^(\s+)\.?repend\b/$1.endr/i       && next;
		s/^(\s+)\.?endm\b/$1.endm/i         && next;
		s/^(\s+)\.?org(\s+)([^,]*).*$/$1*=$2$3/i             && next;
		s/^(\s+)\.?incbin\b/$1\.incbin/i    && next;
		s/^(\s+)\.?err(.*)/$1.error$2/i     && next; # TODO: see if atasm allows `.error' with no message.
		s/^(\s+)\.?ifconst\s+(.*)/$1.if .def $2/i
														&& next; # TODO: test this!
		s/^(\s+)\.?else/$1.else/i           && next;
		s/^(\s+)\.?endif/$1.endif/i         && next;
		s/^(\s+)\.?if\s+(.*)/$1.if $2/i     && next;

		# stuff that doesn't work:
		s/^(\s+)(\.?seg(\..)?\s.*)/;;;;; dasm2atasm: `seg' not supported by atasm\n;;;;;$1$2/i
												 && next;
		s/^(\s+)(\.?processor\s.*)/;;;;; dasm2atasm: `processor' not supported by atasm\n;;;;;$1$2/i
												 && next;

		s/^(\s+)sta\.w(\s+)(.*)/;;;;; dasm2atasm: was `sta.w $3', using .byte to generate opcode\n$1.byte \$8d, <$3, >$3/i
												 && next;

		s/^(\s+)stx\.w(\s+)(.*)/;;;;; dasm2atasm: was `stx.w $3', using .byte to generate opcode\n$1.byte \$8e, <$3, >$3/i
												 && next;

		s/^(\s+)sta\.w(\s+)(.*)/;;;;; dasm2atasm: was `sty.w $3', using .byte to generate opcode\n$1.byte \$8c, <$3, >$3/i
												 && next;

		# atasm lacks `align', so make up for it with a macro
		if(s/(\s)\.?align(\s+)(.*)/$1ALIGN$2$3/i) {
			if(!$align_defined) { # only gotta define it if not already defined.
				for($align_macro) {
					$_ =~ s/^/($linenum += 10) . " "/gme if $linenum;
					$_ =~ s/\n/\x9b/g if $a8eol;
				}

				print OUT $align_macro; # no, I wouldn't use these globals in a CS class assignment.
				$align_defined++;
			}
			next;
		}

		# macros. This is by far the biggest pain in the ass yet.
		s/(\s)\.?mac\b/$1.macro/i;
		if(/(\s)\.macro(\s+)(\w+)/) {
			$mac_regex .= "|\\b$3\\b";
			$mac_sub = get_mac_sub($mac_regex);
		}

		if(ref $mac_sub) { # if we've found at least one macro so far...
			&$mac_sub;      # CAPITALIZE everything matching a macro name
		}  # note: this code assumes macros are *always* defined before they're
         # used. atasm requires this, but does dasm?

	}
	return $line;
}

## main() ##

$ca65 = 0;
$a8eol = 0;
$linenum = 0;
$recursive = 0;

$cmd = $0;

while($ARGV[0] =~ /^-/i) {
	my $opt = shift;
	$cmd .= " $opt";

	if($opt eq "-c") {
		$ca65++;
	} elsif($opt eq "-a") {
		$a8eol++;
	} elsif($opt eq "-l") {
		$linenum = 1000;
	} elsif($opt eq "-m") {
		$a8eol++;
		$linenum = 1000;
	} elsif($opt eq "-r") {
		$recursive++;
	} elsif($opt eq "--") {
		last;
	} else {
		warn "Unknown option '$opt'\n";
		usage;
	}
}

if($ca65 && ($linenum || $a8eol)) {
	die "Can't use line numbers and/or Atari EOLs with ca65 output\n";
}

$align_macro = <<EOF;
;;;;;; ALIGN macro defined by dasm2atasm
 .macro ALIGN
 *= [[*/%1]+1] * %1
 .endm
EOF

$align_defined = 0; # we only need to emit the macro definition once.

$in  = shift || usage;
$out = shift;

($out = $in) =~ s/(\.\w+)?$/.m65/ unless $out;

die "$0: can't use $in for both input and output\n" if $out eq $in;

open IN,  "<$in"  or die      "Can't read $in: $!\n";
open OUT, ">$out" or die "Can't write to $out: $!\n";

$hdr = <<EOF;
;;; Converted from DASM syntax with command:
;   $cmd $in $out

EOF

for($hdr) {
	$_ =~ s/^/($linenum += 10) . " "/gme if $linenum;
	$_ =~ s/\n/\x9b/g if $a8eol;
}

print OUT $hdr;

if($ca65) {
	print OUT <<EOF;
;;; ca65 features enabled by dasm2atasm
;   To build with ca65:
;     ca65 -o foo.o -t none foo.asm
;     ld65 -o foo.bin -t none foo.o
.FEATURE pc_assignment
.FEATURE labels_without_colons

EOF
}

$mac_regex = "!THIS_ISNT_SUPPOSED_TO_MATCH";
$mac_sub   = ""; # this will be the code ref we call to match $mac_regex

while(<IN>) {
	chomp;
	s/\r//; # you might not want this on dos/win, not sure if it matters.
	$label = "";

	if(/^(\w+)\s*=\s*\1/i) {
		print OUT ";;;;; dasm2atasm: labels are case-insensitive in atasm\n";
		$line = ";;;;; $_ ; This assignment is an error in atasm";
		next;
	}

# do this before we split out the label:
	s/^\./\@/;             # local label (dot in dasm, @ in atasm)

	if(s/^([^:;\s]*):?(\s+)/$2/) {
		$label = $1;
	}

	($line, $comment) = split /;/, $_, 2;
	next unless $line;
	
	$line = do_subs($line);

} continue {
	if($linenum) {
		print OUT "$linenum ";
		$linenum += 10;
	}

	print OUT $label if $label;
	print OUT $line if $line;
	print OUT ";$comment" if $comment;
	print OUT ($a8eol ? "\x9b" : "\n");
}