diff options
-rw-r--r-- | uxd.1 | 5 | ||||
-rw-r--r-- | uxd.c | 33 | ||||
-rw-r--r-- | uxd.rst | 5 |
3 files changed, 42 insertions, 1 deletions
@@ -76,6 +76,11 @@ option. .B \-h\fP,\fB \-\-help Print built\-in usage message and exit. .TP +.B \-i +After dumping, print information about the input: number of bytes, +characters, ASCII (one\-byte) characters, multi\-byte characters, and +bad sequences. +.TP .BI \-l \ length Stop dumping after \fIlength\fP bytes (not characters). If the limit is reached in the middle of a multibyte character, the entire character @@ -95,6 +95,7 @@ char * const special_symbols[] = { }; /* options */ +int print_info = 0; /* -i */ int bold = 0; /* -b */ int hilite_multi = 0; /* -r */ int mono = 0; /* -m */ @@ -105,6 +106,13 @@ long limit; /* -l */ const char *hex_byte_fmt = "%02x"; /* -u */ const char *hex_word_fmt = "%04x: "; /* " */ +/* stats for -i option */ +long byte_count = 0; +long ascii_count = 0; +long multi_count = 0; +long bad_count = 0; +long char_count = 0; + void usage(void) { printf("uxd (Utf-8 heX Dump) v" VERSION " by B. Watson. WTFPL.\n"); printf("Usage: %s [<file>]\n", self); @@ -174,8 +182,10 @@ void parse_args(int argc, char **argv) { version(); } - while((opt = my_getopt(argc, argv, "c:nbl:rmo:S:s:uhv")) != -1) { + while((opt = my_getopt(argc, argv, "ic:nbl:rmo:S:s:uhv")) != -1) { switch(opt) { + case 'i': + print_info = 1; break; case 'c': mono = 0; parse_colors(optarg); break; case 'n': @@ -395,6 +405,8 @@ int dump_utf8_char(void) { if(c == EOF) return 0; + byte_count++; + bytes[0] = (unsigned char)c; if(filepos == 0) { @@ -404,6 +416,7 @@ int dump_utf8_char(void) { } if(c < 0x7f) { + ascii_count++; cont_count = 0; if(c <= ' ' || c == 0x7f) special = 1; @@ -429,6 +442,8 @@ int dump_utf8_char(void) { break; } + byte_count++; + cb = cont_bytes[i] = (unsigned char)c; if((cb & 0xc0) != 0x80) { /* Expected 10xxxxxx, got something else */ @@ -443,6 +458,14 @@ int dump_utf8_char(void) { bad = 1; if(bad) { + bad_count++; + } else { + char_count++; + if(cont_count) + multi_count++; + } + + if(bad) { fg = BAD_FG; bg = BAD_BG; /* replacement character � is U+FFFD */ @@ -538,6 +561,14 @@ void dump_file(void) { /* handle the last line, if the file size not divisible by 16. */ if(dump_column) print_line(); + + if(print_info) { + printf("Bytes: %ld\n", byte_count); + printf("Characters: %ld\n", char_count); + printf(" ASCII: %ld\n", ascii_count); + printf(" Multibyte: %ld\n", multi_count); + printf("Bad sequences: %ld\n", bad_count); + } } int main(int argc, char **argv) { @@ -65,6 +65,11 @@ by itself. -h, --help Print built-in usage message and exit. +-i + After dumping, print information about the input: number of bytes, + characters, ASCII (one-byte) characters, multi-byte characters, and + bad sequences. + -l length Stop dumping after *length* bytes (not characters). If the limit is reached in the middle of a multibyte character, the entire character |