#!/usr/bin/perl -w

sub usage {
	print <<EOF;
Usage: $0 -[aclmr] infile.asm [outfile.m65]
See man page for details.
EOF
	exit 1;
}

sub get_mac_sub {
	my $rex = shift;
	my $code = "sub { s/($rex)/\\U\$1/gio };";
	#warn "code is $code";
	return eval "$code";
}

sub unhex {
	# makes a proper $xx, $xx, $xx list of bytes
	# from a list of hex digits, spaces optional.
	my $bytes = shift;
	my $ret   = "";

	$bytes =~ s/\s//g;

	#warn "unhex: bytes is $bytes";

	for($bytes =~ /(..)/g) {
		#warn "unhex: found $_";
		$ret .= "\$$_, ";
	}

	chop $ret;
	chop $ret;

	return $ret;
}

sub fix_include {
	my $inc = shift;
	my $old = $inc;
	$inc =~ s/\.(\w+)("?)$/.m65$2/;

	if($recursive) {
		system("$cmd $old $inc");
	} else {
		warn "Don't forget to convert included file `$old' to .m65 format!\n";
	}
	return $inc;
}

sub do_subs {
	# Do the dirty work of the substitutions. Only reason we have this
	# as a subroutine of its own is for profiling purposes (and we do
	# spend a *lot* of time here!)
	my $line = shift;

	for($line) {
		s/^(\@?\w+):/$1/;      # no colons after labels, in atasm
		s/%/~/g;               # binary constant
		s/!=/<>/g;             # inequality

		s/^(\s+)\.?echo(.*)/;;;;;$1.warn$2/i     &&
			do { warn "$in, line $.:\n\t`.warn' not fully compatible with dasm's `echo', commented out\n" }
												 && next;

		# This is supposed to change e.g. `bpl .label' to `bpl @label'
		s/^(\s+)([a-z]{3})(\s+)\.(\w+)/$1$2$3\@$4/i
													&& next;


		s/{(\d)}/%$1/g;        # macro arg (gotta do this *after* bin. constants!)

# atasm doesn't support shifts, emulate with multiply/divide
		s/\s*<<\s*(\d+)/"*" . 2**$1/eg;
		s/\s*>>\s*(\d+)/"\/" . 2**$1/eg;

# atasm chokes sometimes when there's whitespace around an operator
#  unfortunately, a construct like `bne *-1' can't afford to lose the
#  space before the *... why, oh why, does * have to be both multiply and
#  program counter? *sigh*

#		s/\s*([-!|\/+*&])\s*/$1/g;

# ARGH. Why does dasm allow `byte #1, #2, #3'... and why do people *use* it?!
  s/^(\s+)\.?byte(\s+)/$1.byte$2/i && do { s/#//g } && next;
  s/^(\s+)\.?word(\s+)/$1.word$2/i && do { s/#//g } && next;
  s/^(\s+)\.?dc\.w(\s+)/$1.word$2/i     && do { s/#//g } && next;
  s/^(\s+)\.?dc(?:\.b)?(\s+)/$1.byte$2/i     && do { s/#//g } && next;

# 20070529 bkw: turn ".DS foo" into ".DC foo 0"
  s/^(\s+)\.?ds(\s+)(\S+)/$1.dc $3 0 /i     && do { s/#//g } && next;

# I really want to add `hex' to atasm. 'til then though, fake with .byte
		s/^(\s+)\.?hex\s+(.*)/$1 . '.byte ' .
			unhex($2)/ie                && next;

		s/^(\s+)\.?subroutine(.*)/$1.local$2/i && next;
		s/^(\s+)\.?include(\s+)(.*)/$1 . '.include' . $2 . fix_include($3)/gie
												 && next;
		s/^(\s+)\.?equ\b/$1=/i       && next;
		s/^(\s+)\.?repeat\b/$1.rept/i       && next;
		s/^(\s+)\.?repend\b/$1.endr/i       && next;
		s/^(\s+)\.?endm\b/$1.endm/i         && next;
		s/^(\s+)\.?org(\s+)([^,]*).*$/$1*=$2$3/i             && next;
		s/^(\s+)\.?incbin\b/$1\.incbin/i    && next;
		s/^(\s+)\.?err(.*)/$1.error$2/i     && next; # TODO: see if atasm allows `.error' with no message.
		s/^(\s+)\.?ifconst\s+(.*)/$1.if .def $2/i
														&& next; # TODO: test this!
		s/^(\s+)\.?else/$1.else/i           && next;
		s/^(\s+)\.?endif/$1.endif/i         && next;
		s/^(\s+)\.?if\s+(.*)/$1.if $2/i     && next;

		# stuff that doesn't work:
		s/^(\s+)(\.?seg(\..)?\s.*)/;;;;; dasm2atasm: `seg' not supported by atasm\n;;;;;$1$2/i
												 && next;
		s/^(\s+)(\.?processor\s.*)/;;;;; dasm2atasm: `processor' not supported by atasm\n;;;;;$1$2/i
												 && next;

		s/^(\s+)sta\.w(\s+)(.*)/;;;;; dasm2atasm: was `sta.w $3', using .byte to generate opcode\n$1.byte \$8d, <$3, >$3/i
												 && next;

		s/^(\s+)stx\.w(\s+)(.*)/;;;;; dasm2atasm: was `stx.w $3', using .byte to generate opcode\n$1.byte \$8e, <$3, >$3/i
												 && next;

		s/^(\s+)sta\.w(\s+)(.*)/;;;;; dasm2atasm: was `sty.w $3', using .byte to generate opcode\n$1.byte \$8c, <$3, >$3/i
												 && next;

		# atasm lacks `align', so make up for it with a macro
		if(s/(\s)\.?align(\s+)(.*)/$1ALIGN$2$3/i) {
			if(!$align_defined) { # only gotta define it if not already defined.
				for($align_macro) {
					$_ =~ s/^/($linenum += 10) . " "/gme if $linenum;
					$_ =~ s/\n/\x9b/g if $a8eol;
				}

				print OUT $align_macro; # no, I wouldn't use these globals in a CS class assignment.
				$align_defined++;
			}
			next;
		}

		# macros. This is by far the biggest pain in the ass yet.
		s/(\s)\.?mac\b/$1.macro/i;
		if(/(\s)\.macro(\s+)(\w+)/) {
			$mac_regex .= "|\\b$3\\b";
			$mac_sub = get_mac_sub($mac_regex);
		}

		if(ref $mac_sub) { # if we've found at least one macro so far...
			&$mac_sub;      # CAPITALIZE everything matching a macro name
		}  # note: this code assumes macros are *always* defined before they're
         # used. atasm requires this, but does dasm?

	}
	return $line;
}

## main() ##

$ca65 = 0;
$a8eol = 0;
$linenum = 0;
$recursive = 0;

$cmd = $0;

while($ARGV[0] =~ /^-/i) {
	my $opt = shift;
	$cmd .= " $opt";

	if($opt eq "-c") {
		$ca65++;
	} elsif($opt eq "-a") {
		$a8eol++;
	} elsif($opt eq "-l") {
		$linenum = 1000;
	} elsif($opt eq "-m") {
		$a8eol++;
		$linenum = 1000;
	} elsif($opt eq "-r") {
		$recursive++;
	} elsif($opt eq "--") {
		last;
	} else {
		warn "Unknown option '$opt'\n";
		usage;
	}
}

if($ca65 && ($linenum || $a8eol)) {
	die "Can't use line numbers and/or Atari EOLs with ca65 output\n";
}

$align_macro = <<EOF;
;;;;;; ALIGN macro defined by dasm2atasm
 .macro ALIGN
 *= [[*/%1]+1] * %1
 .endm
EOF

$align_defined = 0; # we only need to emit the macro definition once.

$in  = shift || usage;
$out = shift;

($out = $in) =~ s/(\.\w+)?$/.m65/ unless $out;

die "$0: can't use $in for both input and output\n" if $out eq $in;

open IN,  "<$in"  or die      "Can't read $in: $!\n";
open OUT, ">$out" or die "Can't write to $out: $!\n";

$hdr = <<EOF;
;;; Converted from DASM syntax with command:
;   $cmd $in $out

EOF

for($hdr) {
	$_ =~ s/^/($linenum += 10) . " "/gme if $linenum;
	$_ =~ s/\n/\x9b/g if $a8eol;
}

print OUT $hdr;

if($ca65) {
	print OUT <<EOF;
;;; ca65 features enabled by dasm2atasm
;   To build with ca65:
;     ca65 -o foo.o -t none foo.asm
;     ld65 -o foo.bin -t none foo.o
.FEATURE pc_assignment
.FEATURE labels_without_colons

EOF
}

$mac_regex = "!THIS_ISNT_SUPPOSED_TO_MATCH";
$mac_sub   = ""; # this will be the code ref we call to match $mac_regex

while(<IN>) {
	chomp;
	s/\r//; # you might not want this on dos/win, not sure if it matters.
	$label = "";

	if(/^(\w+)\s*=\s*\1/i) {
		print OUT ";;;;; dasm2atasm: labels are case-insensitive in atasm\n";
		$line = ";;;;; $_ ; This assignment is an error in atasm";
		next;
	}

# do this before we split out the label:
	s/^\./\@/;             # local label (dot in dasm, @ in atasm)

	if(s/^([^:;\s]*):?(\s+)/$2/) {
		$label = $1;
	}

	($line, $comment) = split /;/, $_, 2;
	next unless $line;
	
	$line = do_subs($line);

} continue {
	if($linenum) {
		print OUT "$linenum ";
		$linenum += 10;
	}

	print OUT $label if $label;
	print OUT $line if $line;
	print OUT ";$comment" if $comment;
	print OUT ($a8eol ? "\x9b" : "\n");
}