diff options
Diffstat (limited to 'uxd.c')
-rw-r--r-- | uxd.c | 75 |
1 files changed, 59 insertions, 16 deletions
@@ -29,10 +29,10 @@ error. If we get a sequence-starter, but the sequence doesn't have the correct number of continuation bytes (e.g. 110xxxxx followed by anything that isn't 10xxxxxx), that's an error too. -BOM: if the file contains ef bb bf (aka U+FEFF), it should be colorized +BOM: if the file contains ef bb bf (aka U+FEFF), it will be colorized as a special (non-printable). If the file begins with ff fe, it's UTF-16 (little endian). If it's -fe ff, it's UTF-16 big-endian. Probably we should detect these and +fe ff, it's UTF-16 big-endian. We detect these and print a warning on stderr. */ @@ -54,7 +54,6 @@ print a warning on stderr. #define BAD_FG BLACK #define BAD_BG RED -// const int normal_colors[] = { GREEN, PURPLE, CYAN }; const int normal_colors[] = { GREEN, YELLOW }; int cur_normal_color = 0; int dump_color; @@ -100,9 +99,15 @@ void open_input(const int argc, const char *argv1) { } } +/* Unicode control character printable equivalents. For 0, use + the "empty set" symbol. It's a lot more readable than the "nul" + symbol, ␀. Escape, tab, newline, space are what urxvt uses in + its "keycap picture" mode. The rest of there are hard to read at + normal font sizes, but it's still better than using a dot for + everything like xxd does. */ char * const special_symbols[] = { - "␀", "␁", "␂", "␃", "␄", "␅", "␆", "␇", "␈", "⇥", "↵", "␋", "␌", "␍", "␎", "␏", - "␐", "␑", "␒", "␓", "␔", "␕", "␖", "␗", "␘", "␙", "␚", "␛", "␜", "␝", "␞", "␟", + "∅", "␁", "␂", "␃", "␄", "␅", "␆", "␇", "␈", "⇥", "↵", "␋", "␌", "␍", "␎", "␏", + "␐", "␑", "␒", "␓", "␔", "␕", "␖", "␗", "␘", "␙", "␚", "⎋", "␜", "␝", "␞", "␟", "␣", }; @@ -112,6 +117,8 @@ char *get_special(unsigned char c) { return "?"; /* should never happen */ } +/* set name to use for error messages. this must be called before + open_input(). */ void set_self(const char *argv0) { self = strrchr(argv0, '/'); @@ -121,6 +128,21 @@ void set_self(const char *argv0) { self = argv0; } +void print_line(void) { + int spacing = MAX_DUMP_COLS - dump_column; + + printf("%s", left_buf); + + /* line up the rightmost field (human-readable) */ + while(spacing--) printf(" "); + + printf(" %s\n", right_buf); + + /* clear the buffers, start a new line */ + left_buf[0] = right_buf[0] = '\0'; + dump_column = 0; +} + void next_normal_color() { cur_normal_color++; cur_normal_color %= (sizeof(normal_colors) / sizeof(int)); @@ -139,14 +161,6 @@ void append_color(char *buf, int fgcolor, int bgcolor) { strcat(buf, tmpbuf); } -void print_line(void) { - int spacing = MAX_DUMP_COLS - dump_column; - printf("%s", left_buf); - while(spacing--) printf(" "); - printf(" %s\n", right_buf); - left_buf[0] = right_buf[0] = '\0'; -} - void append_color_off(char *buf) { strcat(buf, "\x1b[0m"); } @@ -169,20 +183,38 @@ void append_left(unsigned char byte, int fgcolor, int bgcolor) { if(dump_column == 7) strcat(left_buf, " "); dump_column++; - if(dump_column == MAX_DUMP_COLS) { + if(dump_column == MAX_DUMP_COLS) print_line(); - dump_column = 0; - } filepos++; } +void check_utf16(int byte0, int byte1) { + char *endian; + + if(byte0 == 0xff && byte1 == 0xfe) { + endian = "little"; + } else if(byte0 == 0xfe && byte1 == 0xff) { + endian = "big"; + } else { + return; + } + + fprintf(stderr, "%s: input looks like UTF-16, %s-endian\n", self, endian); +} + +int is_bom(unsigned char *b) { + return (b[0] == 0xef && b[1] == 0xbb && b[2] == 0xbf); +} + +/* return value: false = EOF, true = more data to read */ int dump_utf8_char(void) { unsigned char bytes[] = { 0, 0, 0, 0, 0 }; unsigned char *cont_bytes = bytes + 1; char *printable; int bad = 0, special = 0; int c, cont_count, i, fg, bg; + static int byte0; c = fgetc(input); if(c == EOF) @@ -190,6 +222,12 @@ int dump_utf8_char(void) { bytes[0] = (unsigned char)c; + if(filepos == 0) { + byte0 = c; + } else if(filepos == 1) { + check_utf16(byte0, c); + } + if(c < 0x7f) { cont_count = 0; if(c <= ' ' || c == 0x7f) @@ -236,6 +274,10 @@ int dump_utf8_char(void) { fg = SPECIAL; bg = 0; printable = get_special(bytes[0]); + } else if(cont_count == 2 && is_bom(bytes)) { + fg = SPECIAL; + bg = 0; + printable = "B"; } else { fg = normal_colors[cur_normal_color]; bg = 0; @@ -258,6 +300,7 @@ void dump_file(void) { while(dump_utf8_char()) ; + /* handle the last line, if the file size not divisible by 16. */ if(dump_column) print_line(); } |