diff options
| author | B. Watson <urchlay@slackware.uk> | 2024-12-15 16:43:06 -0500 | 
|---|---|---|
| committer | B. Watson <urchlay@slackware.uk> | 2024-12-15 16:43:06 -0500 | 
| commit | 1bb4f5bec2b188a0c29033b81da33c148444c61e (patch) | |
| tree | 40b88db5e3e23ccedb19ad72093574b9cc6aafed | |
| parent | 49d5ffdd4abe4e0425f60cf1f40aec5d157bfe50 (diff) | |
| download | uxd-1bb4f5bec2b188a0c29033b81da33c148444c61e.tar.gz | |
add -i (info) option.
| -rw-r--r-- | uxd.1 | 5 | ||||
| -rw-r--r-- | uxd.c | 33 | ||||
| -rw-r--r-- | uxd.rst | 5 | 
3 files changed, 42 insertions, 1 deletions
@@ -76,6 +76,11 @@ option.  .B  \-h\fP,\fB  \-\-help  Print built\-in usage message and exit.  .TP +.B  \-i +After dumping, print information about the input: number of bytes, +characters, ASCII (one\-byte) characters, multi\-byte characters, and +bad sequences. +.TP  .BI \-l \ length  Stop dumping after \fIlength\fP bytes (not characters). If the limit is  reached in the middle of a multibyte character, the entire character @@ -95,6 +95,7 @@ char * const special_symbols[] = {  };  /* options */ +int print_info = 0; /* -i */  int bold = 0; /* -b */  int hilite_multi = 0; /* -r */  int mono = 0; /* -m */ @@ -105,6 +106,13 @@ long limit; /* -l */  const char *hex_byte_fmt = "%02x";   /* -u */  const char *hex_word_fmt = "%04x: "; /* "  */ +/* stats for -i option */ +long byte_count = 0; +long ascii_count = 0; +long multi_count = 0; +long bad_count = 0; +long char_count = 0; +  void usage(void) {  	printf("uxd (Utf-8 heX Dump) v" VERSION " by B. Watson. WTFPL.\n");  	printf("Usage: %s [<file>]\n", self); @@ -174,8 +182,10 @@ void parse_args(int argc, char **argv) {  			version();  	} -	while((opt = my_getopt(argc, argv, "c:nbl:rmo:S:s:uhv")) != -1) { +	while((opt = my_getopt(argc, argv, "ic:nbl:rmo:S:s:uhv")) != -1) {  		switch(opt) { +			case 'i': +				print_info = 1; break;  			case 'c':  				mono = 0; parse_colors(optarg); break;  			case 'n': @@ -395,6 +405,8 @@ int dump_utf8_char(void) {  	if(c == EOF)  		return 0; +	byte_count++; +  	bytes[0] = (unsigned char)c;  	if(filepos == 0) { @@ -404,6 +416,7 @@ int dump_utf8_char(void) {  	}  	if(c < 0x7f) { +		ascii_count++;  		cont_count = 0;  		if(c <= ' ' || c == 0x7f)  			special = 1; @@ -429,6 +442,8 @@ int dump_utf8_char(void) {  			break;  		} +		byte_count++; +  		cb = cont_bytes[i] = (unsigned char)c;  		if((cb & 0xc0) != 0x80) {  			/* Expected 10xxxxxx, got something else */ @@ -443,6 +458,14 @@ int dump_utf8_char(void) {  		bad = 1;  	if(bad) { +		bad_count++; +	} else { +		char_count++; +		if(cont_count) +			multi_count++; +	} + +	if(bad) {  		fg = BAD_FG;  		bg = BAD_BG;  		/* replacement character � is U+FFFD */ @@ -538,6 +561,14 @@ void dump_file(void) {  	/* handle the last line, if the file size not divisible by 16. */  	if(dump_column)  		print_line(); + +	if(print_info) { +		printf("Bytes: %ld\n", byte_count); +		printf("Characters: %ld\n", char_count); +		printf("  ASCII: %ld\n", ascii_count); +		printf("  Multibyte: %ld\n", multi_count); +		printf("Bad sequences: %ld\n", bad_count); +	}  }  int main(int argc, char **argv) { @@ -65,6 +65,11 @@ by itself.  -h, --help    Print built-in usage message and exit. +-i +  After dumping, print information about the input: number of bytes, +  characters, ASCII (one-byte) characters, multi-byte characters, and +  bad sequences. +  -l length    Stop dumping after *length* bytes (not characters). If the limit is    reached in the middle of a multibyte character, the entire character  | 
