diff options
Diffstat (limited to 'dasm2atasm')
-rwxr-xr-x | dasm2atasm | 362 |
1 files changed, 362 insertions, 0 deletions
diff --git a/dasm2atasm b/dasm2atasm new file mode 100755 index 0000000..b7ebe66 --- /dev/null +++ b/dasm2atasm @@ -0,0 +1,362 @@ +#!/usr/bin/perl -w + +=head1 NAME + +dasm2atasm - converts 6502 assembly in DASM syntax to ATASM (or MAC/65) format. + +=head1 SYNOPSIS + + dasm2atasm mycode.asm + +Writes output to I<mycode.m65> + + dasm2atasm stuff.asm other.m65 + +Reads from I<stuff.asm>, writes to I<other.m65> + +=head1 DESCRIPTION + +B<dasm2atasm> tries its best to convert DASM's syntax into something +that B<ATASM> can use. Since B<ATASM>'s syntax is 99% compatible with +that of B<MAC/65>, B<dasm2atasm> can be used for that as well. + +=head1 CAVEATS + +There are a few B<DASM> directives that aren't quite supported by +B<ATASM>. + +=over 4 + +=item echo + +In B<DASM> syntax, I<echo> can interpolate values, like so: + + echo $100-*, " bytes of zero page left" + +B<ATASM>'s closest equivalent to I<echo> is I<.warn>, but it doesn't +allow multiple arguments or interpolation. For now, B<dasm2atasm> just +comments out the line with the I<echo> directive. + +=item seg and seg.u + +B<ATASM> just plain doesn't support segments. These directives will +just be commented out. This I<shouldn't> have any effect on the +object code. + +=item sta.w, sty.w, stx.w + +B<ATASM> doesn't provide a way to force word addressing, when the operand +of a store instruction will allow zero page addressing to be used. You'll +run into this a lot in Atari 2600 code, or any other 6502 code that has to +maintain sync with an external piece of hardware (using word addressing +causes the 6502 to use an extra CPU cycle, which is the only way to cause +a 1-cycle delay). + +For now, we're just converting any I<st?.w> instructions to the appropriate +I<.byte> directives, like so: + + ;;;;; dasm2atasm: was `sta.w COLUPF', using .byte to generate opcode + .byte $8d, COLUPF, $00 + +This works fine if I<COLUPF> is a zero-page label. It's possible, though +unlikely, that you'll run across code where the programmer has used I<sta.w> +with a label that would already cause absolute word addressing to be used, +in which case the extra I<$00> will break our code (literally: I<$00> is +the I<BRK> instruction!) + +This isn't likely to be fixed by I<dasm2atasm>. The correct fix will be to +support I<sta.w> and friends in B<ATASM> itself, which may happen in the +future. + +=item . (dot) + +B<DASM> allows the use of I<.> or I<*> to represent the current program counter +in expressions. B<ATASM> only allows I<*>, and unless I want to include a +full expression-parser in B<dasm2atasm>, I can't reliably translate this. + +For now, you'll have to fix this yourself. Future versions will at least +make an attempt, but this one doesn't. + +=back + +=head1 AUTHOR + +B. Watson I<< <urchlay@urchlay.com> >>. Comments & constructive criticism +welcome, or just a friendly `hello'. Spam will be redirected to /dev/null +and so will the spammer's entire domain. + +=cut + +sub usage { + print <<EOF; +Usage: $0 -[aclmr] infile.asm [outfile.m65] + +EOF + exit 1; +} + +sub get_mac_sub { + my $rex = shift; + my $code = "sub { s/($rex)/\\U\$1/gio };"; + #warn "code is $code"; + return eval "$code"; +} + +sub unhex { + # makes a proper $xx, $xx, $xx list of bytes + # from a list of hex digits, spaces optional. + my $bytes = shift; + my $ret = ""; + + $bytes =~ s/\s//g; + + #warn "unhex: bytes is $bytes"; + + for($bytes =~ /(..)/g) { + #warn "unhex: found $_"; + $ret .= "\$$_, "; + } + + chop $ret; + chop $ret; + + return $ret; +} + +sub fix_include { + my $inc = shift; + my $old = $inc; + $inc =~ s/\.(\w+)("?)$/.m65$2/; + + if($recursive) { + system("$cmd $old $inc"); + } else { + warn "Don't forget to convert included file `$old' to .m65 format!\n"; + } + return $inc; +} + +sub do_subs { + # Do the dirty work of the substitutions. Only reason we have this + # as a subroutine of its own is for profiling purposes (and we do + # spend a *lot* of time here!) + my $line = shift; + + for($line) { + s/^(\@?\w+):/$1/; # no colons after labels, in atasm + s/%/~/g; # binary constant + s/!=/<>/g; # inequality + + s/^(\s+)\.?echo(.*)/;;;;;$1.warn$2/i && + do { warn "$in, line $.:\n\t`.warn' not fully compatible with dasm's `echo', commented out\n" } + && next; + + # This is supposed to change e.g. `bpl .label' to `bpl @label' + s/^(\s+)([a-z]{3})(\s+)\.(\w+)/$1$2$3\@$4/i + && next; + + + s/{(\d)}/%$1/g; # macro arg (gotta do this *after* bin. constants!) + +# atasm doesn't support shifts, emulate with multiply/divide + s/\s*<<\s*(\d+)/"*" . 2**$1/eg; + s/\s*>>\s*(\d+)/"\/" . 2**$1/eg; + +# atasm chokes sometimes when there's whitespace around an operator +# unfortunately, a construct like `bne *-1' can't afford to lose the +# space before the *... why, oh why, does * have to be both multiply and +# program counter? *sigh* + +# s/\s*([-!|\/+*&])\s*/$1/g; + +# ARGH. Why does dasm allow `byte #1, #2, #3'... and why do people *use* it?! + s/^(\s+)\.?byte(\s+)/$1.byte$2/i && do { s/#//g } && next; + s/^(\s+)\.?word(\s+)/$1.word$2/i && do { s/#//g } && next; + s/^(\s+)\.?dc\.w(\s+)/$1.word$2/i && do { s/#//g } && next; + s/^(\s+)\.?dc(?:\.b)?(\s+)/$1.byte$2/i && do { s/#//g } && next; + +# 20070529 bkw: turn ".DS foo" into ".DC foo 0" + s/^(\s+)\.?ds(\s+)(\S+)/$1.dc $3 0 /i && do { s/#//g } && next; + +# I really want to add `hex' to atasm. 'til then though, fake with .byte + s/^(\s+)\.?hex\s+(.*)/$1 . '.byte ' . + unhex($2)/ie && next; + + s/^(\s+)\.?subroutine(.*)/$1.local$2/i && next; + s/^(\s+)\.?include(\s+)(.*)/$1 . '.include' . $2 . fix_include($3)/gie + && next; + s/^(\s+)\.?equ\b/$1=/i && next; + s/^(\s+)\.?repeat\b/$1.rept/i && next; + s/^(\s+)\.?repend\b/$1.endr/i && next; + s/^(\s+)\.?endm\b/$1.endm/i && next; + s/^(\s+)\.?org(\s+)([^,]*).*$/$1*=$2$3/i && next; + s/^(\s+)\.?incbin\b/$1\.incbin/i && next; + s/^(\s+)\.?err(.*)/$1.error$2/i && next; # TODO: see if atasm allows `.error' with no message. + s/^(\s+)\.?ifconst\s+(.*)/$1.if .def $2/i + && next; # TODO: test this! + s/^(\s+)\.?else/$1.else/i && next; + s/^(\s+)\.?endif/$1.endif/i && next; + s/^(\s+)\.?if\s+(.*)/$1.if $2/i && next; + + # stuff that doesn't work: + s/^(\s+)(\.?seg(\..)?\s.*)/;;;;; dasm2atasm: `seg' not supported by atasm\n;;;;;$1$2/i + && next; + s/^(\s+)(\.?processor\s.*)/;;;;; dasm2atasm: `processor' not supported by atasm\n;;;;;$1$2/i + && next; + + s/^(\s+)sta\.w(\s+)(.*)/;;;;; dasm2atasm: was `sta.w $3', using .byte to generate opcode\n$1.byte \$8d, <$3, >$3/i + && next; + + s/^(\s+)stx\.w(\s+)(.*)/;;;;; dasm2atasm: was `stx.w $3', using .byte to generate opcode\n$1.byte \$8e, <$3, >$3/i + && next; + + s/^(\s+)sta\.w(\s+)(.*)/;;;;; dasm2atasm: was `sty.w $3', using .byte to generate opcode\n$1.byte \$8c, <$3, >$3/i + && next; + + # atasm lacks `align', so make up for it with a macro + if(s/(\s)\.?align(\s+)(.*)/$1ALIGN$2$3/i) { + if(!$align_defined) { # only gotta define it if not already defined. + for($align_macro) { + $_ =~ s/^/($linenum += 10) . " "/gme if $linenum; + $_ =~ s/\n/\x9b/g if $a8eol; + } + + print OUT $align_macro; # no, I wouldn't use these globals in a CS class assignment. + $align_defined++; + } + next; + } + + # macros. This is by far the biggest pain in the ass yet. + s/(\s)\.?mac\b/$1.macro/i; + if(/(\s)\.macro(\s+)(\w+)/) { + $mac_regex .= "|\\b$3\\b"; + $mac_sub = get_mac_sub($mac_regex); + } + + if(ref $mac_sub) { # if we've found at least one macro so far... + &$mac_sub; # CAPITALIZE everything matching a macro name + } # note: this code assumes macros are *always* defined before they're + # used. atasm requires this, but does dasm? + + } + return $line; +} + +## main() ## + +$ca65 = 0; +$a8eol = 0; +$linenum = 0; +$recursive = 0; + +$cmd = $0; + +while($ARGV[0] =~ /^-/i) { + my $opt = shift; + $cmd .= " $opt"; + + if($opt eq "-c") { + $ca65++; + } elsif($opt eq "-a") { + $a8eol++; + } elsif($opt eq "-l") { + $linenum = 1000; + } elsif($opt eq "-m") { + $a8eol++; + $linenum = 1000; + } elsif($opt eq "-r") { + $recursive++; + } elsif($opt eq "--") { + last; + } else { + warn "Unknown option '$opt'\n"; + usage; + } +} + +if($ca65 && ($linenum || $a8eol)) { + die "Can't use line numbers and/or Atari EOLs with ca65 output\n"; +} + +$align_macro = <<EOF; +;;;;;; ALIGN macro defined by dasm2atasm + .macro ALIGN + *= [[*/%1]+1] * %1 + .endm +EOF + +$align_defined = 0; # we only need to emit the macro definition once. + +$in = shift || usage; +$out = shift; + +($out = $in) =~ s/(\.\w+)?$/.m65/ unless $out; + +die "$0: can't use $in for both input and output\n" if $out eq $in; + +open IN, "<$in" or die "Can't read $in: $!\n"; +open OUT, ">$out" or die "Can't write to $out: $!\n"; + +$hdr = <<EOF; +;;; Converted from DASM syntax with command: +; $cmd $in $out + +EOF + +for($hdr) { + $_ =~ s/^/($linenum += 10) . " "/gme if $linenum; + $_ =~ s/\n/\x9b/g if $a8eol; +} + +print OUT $hdr; + +if($ca65) { + print OUT <<EOF; +;;; ca65 features enabled by dasm2atasm +; To build with ca65: +; ca65 -o foo.o -t none foo.asm +; ld65 -o foo.bin -t none foo.o +.FEATURE pc_assignment +.FEATURE labels_without_colons + +EOF +} + +$mac_regex = "!THIS_ISNT_SUPPOSED_TO_MATCH"; +$mac_sub = ""; # this will be the code ref we call to match $mac_regex + +while(<IN>) { + chomp; + s/\r//; # you might not want this on dos/win, not sure if it matters. + $label = ""; + + if(/^(\w+)\s*=\s*\1/i) { + print OUT ";;;;; dasm2atasm: labels are case-insensitive in atasm\n"; + $line = ";;;;; $_ ; This assignment is an error in atasm"; + next; + } + +# do this before we split out the label: + s/^\./\@/; # local label (dot in dasm, @ in atasm) + + if(s/^([^:;\s]*):?(\s+)/$2/) { + $label = $1; + } + + ($line, $comment) = split /;/, $_, 2; + next unless $line; + + $line = do_subs($line); + +} continue { + if($linenum) { + print OUT "$linenum "; + $linenum += 10; + } + + print OUT $label if $label; + print OUT $line if $line; + print OUT ";$comment" if $comment; + print OUT ($a8eol ? "\x9b" : "\n"); +} |