aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--uxd.15
-rw-r--r--uxd.c33
-rw-r--r--uxd.rst5
3 files changed, 42 insertions, 1 deletions
diff --git a/uxd.1 b/uxd.1
index 493002f..9107d3b 100644
--- a/uxd.1
+++ b/uxd.1
@@ -76,6 +76,11 @@ option.
.B \-h\fP,\fB \-\-help
Print built\-in usage message and exit.
.TP
+.B \-i
+After dumping, print information about the input: number of bytes,
+characters, ASCII (one\-byte) characters, multi\-byte characters, and
+bad sequences.
+.TP
.BI \-l \ length
Stop dumping after \fIlength\fP bytes (not characters). If the limit is
reached in the middle of a multibyte character, the entire character
diff --git a/uxd.c b/uxd.c
index aebc224..93368a1 100644
--- a/uxd.c
+++ b/uxd.c
@@ -95,6 +95,7 @@ char * const special_symbols[] = {
};
/* options */
+int print_info = 0; /* -i */
int bold = 0; /* -b */
int hilite_multi = 0; /* -r */
int mono = 0; /* -m */
@@ -105,6 +106,13 @@ long limit; /* -l */
const char *hex_byte_fmt = "%02x"; /* -u */
const char *hex_word_fmt = "%04x: "; /* " */
+/* stats for -i option */
+long byte_count = 0;
+long ascii_count = 0;
+long multi_count = 0;
+long bad_count = 0;
+long char_count = 0;
+
void usage(void) {
printf("uxd (Utf-8 heX Dump) v" VERSION " by B. Watson. WTFPL.\n");
printf("Usage: %s [<file>]\n", self);
@@ -174,8 +182,10 @@ void parse_args(int argc, char **argv) {
version();
}
- while((opt = my_getopt(argc, argv, "c:nbl:rmo:S:s:uhv")) != -1) {
+ while((opt = my_getopt(argc, argv, "ic:nbl:rmo:S:s:uhv")) != -1) {
switch(opt) {
+ case 'i':
+ print_info = 1; break;
case 'c':
mono = 0; parse_colors(optarg); break;
case 'n':
@@ -395,6 +405,8 @@ int dump_utf8_char(void) {
if(c == EOF)
return 0;
+ byte_count++;
+
bytes[0] = (unsigned char)c;
if(filepos == 0) {
@@ -404,6 +416,7 @@ int dump_utf8_char(void) {
}
if(c < 0x7f) {
+ ascii_count++;
cont_count = 0;
if(c <= ' ' || c == 0x7f)
special = 1;
@@ -429,6 +442,8 @@ int dump_utf8_char(void) {
break;
}
+ byte_count++;
+
cb = cont_bytes[i] = (unsigned char)c;
if((cb & 0xc0) != 0x80) {
/* Expected 10xxxxxx, got something else */
@@ -443,6 +458,14 @@ int dump_utf8_char(void) {
bad = 1;
if(bad) {
+ bad_count++;
+ } else {
+ char_count++;
+ if(cont_count)
+ multi_count++;
+ }
+
+ if(bad) {
fg = BAD_FG;
bg = BAD_BG;
/* replacement character � is U+FFFD */
@@ -538,6 +561,14 @@ void dump_file(void) {
/* handle the last line, if the file size not divisible by 16. */
if(dump_column)
print_line();
+
+ if(print_info) {
+ printf("Bytes: %ld\n", byte_count);
+ printf("Characters: %ld\n", char_count);
+ printf(" ASCII: %ld\n", ascii_count);
+ printf(" Multibyte: %ld\n", multi_count);
+ printf("Bad sequences: %ld\n", bad_count);
+ }
}
int main(int argc, char **argv) {
diff --git a/uxd.rst b/uxd.rst
index 2acd120..0290f20 100644
--- a/uxd.rst
+++ b/uxd.rst
@@ -65,6 +65,11 @@ by itself.
-h, --help
Print built-in usage message and exit.
+-i
+ After dumping, print information about the input: number of bytes,
+ characters, ASCII (one-byte) characters, multi-byte characters, and
+ bad sequences.
+
-l length
Stop dumping after *length* bytes (not characters). If the limit is
reached in the middle of a multibyte character, the entire character