diff options
| -rw-r--r-- | unprotbas.1 | 60 | ||||
| -rw-r--r-- | unprotbas.c | 187 | ||||
| -rw-r--r-- | unprotbas.rst | 57 | 
3 files changed, 292 insertions, 12 deletions
diff --git a/unprotbas.1 b/unprotbas.1 index 2b389ac..992bd28 100644 --- a/unprotbas.1 +++ b/unprotbas.1 @@ -27,12 +27,12 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]  .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]  .in \\n[rst2man-indent\\n[rst2man-indent-level]]u  .. -.TH "UNPROTBAS" 1 "2024-05-19" "0.2.1" "Urchlay's Atari 8-bit Tools" +.TH "UNPROTBAS" 1 "2024-05-22" "0.2.1" "Urchlay's Atari 8-bit Tools"  .SH NAME  unprotbas \- Unprotect LIST-protected Atari 8-bit BASIC programs  .SH SYNOPSIS  .sp -unprotbas [\fB\-v\fP] [\fB\-f\fP] [\fB\-n\fP] [\fB\-g\fP] [\fB\-c\fP] \fBinput\-file\fP \fBoutput\-file\fP +unprotbas [\fB\-v\fP] [\fB\-f\fP] [\fB\-n\fP] [\fB\-g\fP] [\fB\-c\fP] [\fB\-r\fP | \fB\-w\fP] \fBinput\-file\fP \fBoutput\-file\fP  .SH DESCRIPTION  .sp  \fBunprotbas\fP modifies a LIST\-protected Atari 8\-bit BASIC program, @@ -73,6 +73,17 @@ it\(aqs left as\-is, in case it\(aqs actually data used by the program.  Check only. Does a dry run. Loads the program, unprotects it in  memory, but doesn\(aqt write the result anywhere. In this mode, there  is no \fBoutput\-file\fP\&. +.TP +.B \fB\-w\fP +Write the variable names to \fBvarnames.txt\fP, one per line. +This can be edited, and later used with \fB\-r\fP to set the variable names +to something sensible rather than A, B, C, etc. For an unprotected +program, you can use \fB\-n\fP to write the existing names rather than +generating new ones. See \fBVARIABLE NAMES\fP, below. +.TP +.B \fB\-r\fP +Read variable names from \fBvarnames.txt\fP, and use them instead of +generating the names. See \fBVARIABLE NAMES\fP, below.  .UNINDENT  .SH EXIT STATUS  .INDENT 0.0 @@ -193,6 +204,51 @@ it normally does... if you suspect it\(aqs causing a problem, you can  remove it with the \fB\-g\fP option. If removing the "garbage" causes the  program to fail to run, it wasn\(aqt garbage! \fBunprotbas\fP doesn\(aqt  remove extra data by default, to be on the safe side. +.SH VARIABLE NAMES +.sp +If variable name scrambling was used, the original variable names no +longer exist. \fBunprotbas\fP will generate them, according to these rules: +.INDENT 0.0 +.INDENT 3.5 +The first 26 numeric variables will be called \fIA\fP through \fIZ\fP\&. Further +numeric variables will be \fIA1\fP through \fIA9\fP, \fIB1\fP through \fIB9\fP, etc. +.sp +The first 26 string variables will be \fIA$\fP to \fIZ$\fP, then \fIA1$\fP to +\fIA9$\fP, \fIB1$\fP to \fIB9$\fP, etc. +.sp +The first 26 array variables will be \fIA(\fP to \fIZ(\fP, then \fIA1(\fP to +\fIA9(\fP, \fIB1(\fP to \fIB9(\fP, etc. +.UNINDENT +.UNINDENT +.sp +To properly reverse\-engineer the protected program, it\(aqs necessary to assign +meaningful variable names. \fBunprotbas\fP isn\(aqt smart enough to do this for you, +but it can semi\-automate the process. +.sp +First, run \fBunprotbas\fP with the \fB\-w\fP option. This will create a +file called \fBvarnames.txt\fP, containing the generated variable names. +These are in order, one line per variable name, ending with \fI$\fP for strings +and the \fI(\fP for arrays. +.sp +Load the unprotected program on the Atari and LIST it (or use \fBchkbas\fP to get a +listing), and edit \fBvarnames.txt\fP in a text editor. +.sp +As you figure out what each variable\(aqs purpose is, change its name +in the text file. Don\(aqt delete any lines from the file, and don\(aqt +add any. Don\(aqt get rid of the \fI$\fP or \fI(\fP at the end of any line. You +may enter the names in lowercase (\fBunprotbas\fP will convert them to +uppercase). Remember to follow the rules for BASIC variable names: +only letters and numbers allowed, and the first character must be +a letter. No duplicates of the same type are allowed (you can have +\fIFOO\fP and \fIFOO$\fP, but not two numerics called \fIFOO\fP). +.sp +When you\(aqre finished, re\-run \fBunprotbas\fP, this time with the \fB\-r\fP +option. The unprotected program will use your variable names, rather +than generating new ones. +.sp +This process can also be used for regular unprotected programs. Use +\fB\-n \-w\fP the first time, to save the existing variable names to +\fBvarnames.txt\fP rather than generating new ones.  .SH COPYRIGHT  .sp  WTFPL. See \fI\%http://www.wtfpl.net/txt/copying/\fP for details. diff --git a/unprotbas.c b/unprotbas.c index 9964d39..5ad483b 100644 --- a/unprotbas.c +++ b/unprotbas.c @@ -2,6 +2,7 @@  #include <unistd.h>  #include <stdlib.h>  #include <string.h> +#include <ctype.h>  /* attempt to fix a "list-protected" Atari 8-bit BASIC program.  	we don't fully detokenize, so this won't fix truly corrupted @@ -22,9 +23,16 @@  	can't process. */  #define MIN_PROG_SIZE 21 +#define MAP_FILE "varnames.txt" +  /* entire file gets read into memory (for now) */  unsigned char data[65536]; +/* for the -r option */ +unsigned char varnames[65536]; +unsigned char *varmap[128]; +int varmap_count; +  /* BASIC 14-byte header values */  unsigned short lomem;  unsigned short vntp; @@ -51,6 +59,8 @@ int keepgarbage = 1;  int checkonly = 0;  int was_protected = 0;  int verbose = 0; +int readmap = 0; +int writemap = 0;  /* file handles */  FILE *input_file = NULL; @@ -306,9 +316,8 @@ int rebuild_vntable(int write) {  		/* fprintf(stderr, "%04x: %04x, %d\n", vv, data[vv], type); */  		if(varnum != data[vv+1]) { -			fprintf(stderr, "Warning: variable value is corrupt!\n"); +			fprintf(stderr, "Warning: variable #%d value is corrupt!\n", varnum);  		} -		varnum++;  		switch(type) {  			case 1: varname = arrays++; sigil = 0xa8; break; @@ -338,6 +347,7 @@ int rebuild_vntable(int write) {  		}  		vv += 8; +		varnum++;  	}  	/* there's supposed to be a null byte at the end of the table, unless @@ -381,13 +391,160 @@ int fixvars(void) {  	return 1;  } +void write_var_map(void) { +	FILE *f; +	int vp, count = 0; + +	if(verbose) fprintf(stderr, "Writing variable names to " MAP_FILE "\n"); +	f = fopen(MAP_FILE, "w"); +	if(!f) { +		perror(MAP_FILE); +		die("Can't create map file for -w option."); +	} + +	for(vp = vnstart; (vp < vntd) && (data[vp] != 0); vp++) { +		unsigned char c = data[vp]; +		if(c < 0x80) { +			fputc(c, f); +		} else { +			fputc(c & 0x7f, f); +			fputc('\n', f); +			count++; +		} +	} + +	fclose(f); + +	if(verbose) fprintf(stderr, "Wrote %d variable names to " MAP_FILE "\n", count); +} + +void die_mapfile(char *msg, int num) { +	fprintf(stderr, MAP_FILE ": line %d: %s.\n", num, msg); +	exit(1); +} + +void check_varname(const unsigned char *name, int line) { +	int len = strlen((char *)name); +	int i; +	unsigned char c = 0, type; + +	/* fprintf(stderr, "check_varname(\"%s\", %d)\n", name, line); */ + +	if(len < 1) die_mapfile("blank variable name", line); +	if(len > 128) die_mapfile("variable name >128 characters", line); +	if(name[0] < 'A' || name[0] > 'Z') +		die_mapfile("invalid variable name (first character must be a letter)", line); + +	for(i = 1; i < len; i++) { +		c = name[i]; +		if(c >= 'A' && c <= 'Z') continue; +		if(c >= '0' && c <= '9') continue; +		if(i == (len - 1) && ((c == '$') || (c == '('))) continue; +		die_mapfile("invalid character in variable name", line); +	} + +	if(c == 0) c = name[0]; + +	/* c now has the last char of the name, make sure it matches the variable type */ +	type = data[vvstart + 8 * (line - 1)] >> 6; +	/* type: scalar = 0, array = 1, string = 2 */ +	if(type == 0) { +		if(c == '$') +			die_mapfile("type mismatch: numeric variable may not end with $", line); +		else if(c == '(') +			die_mapfile("type mismatch: numeric variable may not end with (", line); +	} else if(type == 1) { +		if(c != '(') +			die_mapfile("type mismatch: array variable must end with (", line); +	} else if(type == 2) { +		if(c != '$') +			die_mapfile("type mismatch: string variable must end with $", line); +	} else { +		fprintf(stderr, "Warning: variable value table is corrupt (invalid type)\n"); +	} + +	/* check for dups */ +	for(i = 0; i < line - 1; i++) { +		if(strcmp((char *)name, (char *)varmap[i]) == 0) +			die_mapfile("duplicate variable name", line); +	} +} + +void read_var_map(void) { +	FILE *f; +	unsigned char *p = varnames, *curname = varnames; +	int count = 0, vvcount = (codestart - vvstart) / 8; + +	if(verbose) fprintf(stderr, "Reading variable names from " MAP_FILE "\n"); +	f = fopen(MAP_FILE, "r"); +	if(!f) { +		perror(MAP_FILE); +		die("Can't read map file for -r option."); +	} + +	while(!feof(f)) { +		*p = toupper(fgetc(f)); /* allow lowercase */ + +		if(*p == ' ' || *p == '\t' || *p == '\r') +			continue; /* ignore whitespace */ + +		if(*p == '\n') { +			*p = '\0'; +			varmap[count++] = curname; +			check_varname(curname, count); +			curname = p + 1; +		} +		p++; +	} + +	if(verbose) fprintf(stderr, "Read %d variable names from " MAP_FILE "\n", count); + +	if(vvcount > count) { +		fprintf(stderr, MAP_FILE ": not enough variables (have %d, need %d).\n", count, vvcount); +		exit(1); +	} else if(count > vvcount) { +		fprintf(stderr, MAP_FILE ": too many variables (have %d, need %d).\n", count, vvcount); +		exit(1); +	} +	#if 0 +	for(count = 0; varmap[count] != NULL; count++) { +		fprintf(stderr, "\t%02d %s\n", count, varmap[count]); +	} +	#endif + +	varmap_count = count; +} + +void apply_var_map(void) { +	unsigned char new_vntable[65536]; +	int i, newp = 0; +	unsigned char *v; + +	for(i = 0; i < varmap_count; i++) { +		v = varmap[i]; +		while(*v) { +			new_vntable[newp++] = *v; +			v++; +		} +		new_vntable[newp - 1] |= 0x80; +	} + +	if(varmap_count < 128) new_vntable[newp++] = '\0'; + +	i = vvstart - vnstart; +	adjust_vntable_size(i, newp); +	memmove(data + vnstart, new_vntable, newp); +} +  void print_help(void) { -	fprintf(stderr, "Usage: %s [-v] [-f] [-n] [-g] <inputfile> <outputfile>\n", self); +	fprintf(stderr, "Usage: %s [-v] [-f] [-n] [-g] [-c] [-r|-w] <inputfile> <outputfile>\n", self);  	fprintf(stderr, "-v: verbose\n");  	fprintf(stderr, "-f: force variable name table rebuild\n");  	fprintf(stderr, "-n: do not rebuild variable name table, even if it's invalid\n");  	fprintf(stderr, "-g: remove trailing garbage, if present\n");  	fprintf(stderr, "-c: check only; no output file\n"); +	fprintf(stderr, "-w: write variable names to varnames.txt\n"); +	fprintf(stderr, "-r: read variable names from varnames.txt\n");  	fprintf(stderr, "Use - as a filename to read from stdin and/or write to stdout\n");  } @@ -451,6 +608,8 @@ void parse_args(int argc, char **argv) {  				case 'n': keepvars++; break;  				case 'g': keepgarbage = 0; break;  				case 'c': checkonly = 1; break; +				case 'r': readmap = 1; break; +				case 'w': writemap = 1; break;  				case 0:  							 if(!input_file)  								 open_input(NULL); @@ -474,6 +633,8 @@ void parse_args(int argc, char **argv) {  	if(!input_file) die("no input file given (use - for stdin)");  	if(!checkonly && !output_file) die("no output file given (use - for stdout)");  	if(keepvars && forcevars) die("-f and -n are mutually exclusive"); +	if(readmap && writemap) die("-r and -w are mutually exclusive"); +	if(readmap && keepvars) die("-r and -n are mutually exclusive (maybe you want -w?)");  }  int main(int argc, char **argv) { @@ -484,12 +645,18 @@ int main(int argc, char **argv) {  	if(lomem) die("This doesn't look like an Atari BASIC program (no $0000 signature)"); -	if(!keepvars) { -		if(fixvars()) { -			was_protected = 1; -			if(verbose) fprintf(stderr, "Variable names replaced\n"); -		} else { -			if(verbose) fprintf(stderr, "Variable names were already OK\n"); +	if(readmap) { +		was_protected = !vntable_ok(); +		read_var_map(); +		apply_var_map(); +	} else { +		if(!keepvars) { +			if(fixvars()) { +				was_protected = 1; +				if(verbose) fprintf(stderr, "Variable names replaced\n"); +			} else { +				if(verbose) fprintf(stderr, "Variable names were already OK\n"); +			}  		}  	} @@ -519,5 +686,7 @@ int main(int argc, char **argv) {  	fclose(output_file);  	if(verbose) fprintf(stderr, "wrote %d bytes\n", got); +	if(writemap) write_var_map(); +  	return 0;  } diff --git a/unprotbas.rst b/unprotbas.rst index 28ccd8b..752acd7 100644 --- a/unprotbas.rst +++ b/unprotbas.rst @@ -11,7 +11,7 @@ Unprotect LIST-protected Atari 8-bit BASIC programs  SYNOPSIS  ======== -unprotbas [**-v**] [**-f**] [**-n**] [**-g**] [**-c**] **input-file** **output-file** +unprotbas [**-v**] [**-f**] [**-n**] [**-g**] [**-c**] [**-r** | **-w**] **input-file** **output-file**  DESCRIPTION  =========== @@ -56,6 +56,17 @@ will be treated as options.    memory, but doesn't write the result anywhere. In this mode, there    is no **output-file**. +**-w** +  Write the variable names to **varnames.txt**, one per line. +  This can be edited, and later used with **-r** to set the variable names +  to something sensible rather than A, B, C, etc. For an unprotected +  program, you can use **-n** to write the existing names rather than +  generating new ones. See **VARIABLE NAMES**, below. + +**-r** +  Read variable names from **varnames.txt**, and use them instead of +  generating the names. See **VARIABLE NAMES**, below. +  EXIT STATUS  =========== @@ -167,4 +178,48 @@ remove it with the **-g** option. If removing the "garbage" causes the  program to fail to run, it wasn't garbage! **unprotbas** doesn't  remove extra data by default, to be on the safe side. +VARIABLE NAMES +============== + +If variable name scrambling was used, the original variable names no +longer exist. **unprotbas** will generate them, according to these rules: + +  The first 26 numeric variables will be called *A* through *Z*. Further +  numeric variables will be *A1* through *A9*, *B1* through *B9*, etc. + +  The first 26 string variables will be *A$* to *Z$*, then *A1$* to +  *A9$*, *B1$* to *B9$*, etc. + +  The first 26 array variables will be *A(* to *Z(*, then *A1(* to +  *A9(*, *B1(* to *B9(*, etc. + +To properly reverse-engineer the protected program, it's necessary to assign +meaningful variable names. **unprotbas** isn't smart enough to do this for you, +but it can semi-automate the process. + +First, run **unprotbas** with the **-w** option. This will create a +file called **varnames.txt**, containing the generated variable names. +These are in order, one line per variable name, ending with *$* for strings +and the *(* for arrays. + +Load the unprotected program on the Atari and LIST it (or use **chkbas** to get a +listing), and edit **varnames.txt** in a text editor. + +As you figure out what each variable's purpose is, change its name +in the text file. Don't delete any lines from the file, and don't +add any. Don't get rid of the *$* or *(* at the end of any line. You +may enter the names in lowercase (**unprotbas** will convert them to +uppercase). Remember to follow the rules for BASIC variable names: +only letters and numbers allowed, and the first character must be +a letter. No duplicates of the same type are allowed (you can have +*FOO* and *FOO$*, but not two numerics called *FOO*). + +When you're finished, re-run **unprotbas**, this time with the **-r** +option. The unprotected program will use your variable names, rather +than generating new ones. + +This process can also be used for regular unprotected programs. Use +**-n -w** the first time, to save the existing variable names to +**varnames.txt** rather than generating new ones. +  .. include:: manftr.rst  | 
