diff options
-rw-r--r-- | usage.c | 1 | ||||
-rw-r--r-- | uxd.1 | 11 | ||||
-rw-r--r-- | uxd.c | 119 | ||||
-rw-r--r-- | uxd.rst | 7 |
4 files changed, 115 insertions, 23 deletions
@@ -2,6 +2,7 @@ char *usage_opts[] = { " -1: don't alternate colors for normal characters.", " -b: bold color output.", " -c nnnn: colors (2 to 4 digits, 0 to 7).", + " -d data: dump this data instead of a file.", " -h, --help: print this help message.", " -i: print number of bytes/chars/ascii/multibyte/bad sequences.", " -l length: stop dumping after <length> bytes (not characters).", @@ -27,7 +27,7 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] .in \\n[rst2man-indent\\n[rst2man-indent-level]]u .. -.TH "UXD" 1 "2024-12-16" "0.1.0" "Urchlay's Utilities" +.TH "UXD" 1 "2024-12-17" "0.1.0" "Urchlay's Utilities" .SH NAME uxd \- UTF-8 hex dumper .SH SYNOPSIS @@ -90,6 +90,15 @@ option. . .INDENT 0.0 .TP +.BI \-d \ data +Dump this data, instead of reading from a file or stdin. If \fIdata\fP +contains spaces or shell metacharacters, make sure you remember to +quote it. Only one \fB\-d\fP option can be given. +.UNINDENT +.\" dump this data instead of a file. +. +.INDENT 0.0 +.TP .B \-h\fP,\fB \-\-help Print built\-in usage message and exit. .UNINDENT @@ -115,7 +115,7 @@ char * const special_symbols[] = { /* options */ int alternate_colors = 1; /* -1 */ -int print_info = 0; /* -i */ +int print_info_opt = 0; /* -i */ int bold = 0; /* -b */ int hilite_multi = 0; /* -r */ int mono = 0; /* -m */ @@ -125,6 +125,8 @@ int seek_offset_zero = 0; /* -S */ long limit; /* -l */ const char *hex_byte_fmt = "%02x"; /* -u */ const char *hex_word_fmt = "%04x: "; /* " */ +char *dump_data_arg = NULL; /* -d */ +long dump_data_idx = 0; /* -d */ /* stats for -i option */ long byte_count = 0; @@ -221,12 +223,18 @@ void parse_args(int argc, char **argv) { version(); } - while((opt = my_getopt(argc, argv, "1ic:nbl:rmo:S:s:uhv")) != -1) { + while((opt = my_getopt(argc, argv, "d:1ic:nbl:rmo:S:s:uhv")) != -1) { switch(opt) { + case 'd': + if(dump_data_arg) { + fprintf(stderr, "%s: multiple -d options not supported.\n", self); + exit(1); + } + dump_data_arg = optarg; break; case '1': alternate_colors = 0; break; case 'i': - print_info = 1; break; + print_info_opt = 1; break; case 'c': mono = 0; parse_colors(optarg); break; case 'n': @@ -263,11 +271,18 @@ void parse_args(int argc, char **argv) { } } - /* filename (if present) must come after all -options, and - there can only be one filename. */ - if(optind < (argc - 1)) usage(); + if(dump_data_arg) { + if(optind != argc) { + fprintf(stderr, "%s: cannot give a filename when -d is used.\n", self); + exit(1); + } + } else { + /* filename (if present) must come after all -options, and + there can only be one filename. */ + if(optind < (argc - 1)) usage(); - open_input(argv[optind]); + open_input(argv[optind]); + } } /* read options from the environment and the command line, create a @@ -487,6 +502,29 @@ int is_out_of_range(int count, unsigned char *b) { return 1; } +int get_next_byte(void) { + int c; + + if(dump_data_arg) { + /* have to cast this to unsigned char and back to int, + to emulate fgetc() */ + c = (unsigned char)dump_data_arg[dump_data_idx++]; + if(!c) c = EOF; + } else { + c = fgetc(input); + } + + return c; +} + +void push_back_byte(int c) { + if(dump_data_arg) { + if(dump_data_idx) dump_data_idx--; + } else { + ungetc(c, input); + } +} + /* This is the 'workhorse', called for each character in the file. Return value: false = EOF, true = more data to read */ int dump_utf8_char(void) { @@ -497,7 +535,7 @@ int dump_utf8_char(void) { int c, cont_count, i; static int byte0; - c = fgetc(input); + c = get_next_byte(); if(c == EOF) return 0; @@ -532,7 +570,7 @@ int dump_utf8_char(void) { /* read and validate the continuation bytes, if any */ for(i = 0; i < cont_count; i++) { int cb; - c = fgetc(input); + c = get_next_byte(); if(c == EOF) { /* EOF in mid-sequence. Don't return 0 here, since we still @@ -550,7 +588,7 @@ int dump_utf8_char(void) { /* Expected 10xxxxxx, got something else */ cont_count = i; bad = 1; - ungetc(cb, input); + push_back_byte(cb); break; } } @@ -646,15 +684,21 @@ void seek_input(void) { fake_seek(); filepos = seekpos; } else { + fprintf(stderr, "%s: are you trying to seek backwards in stdin?\n", self); perror(self); exit(1); } } -void dump_file(void) { - if(seekpos) seek_input(); - if(seek_offset_zero) filepos = 0; +void print_info(void) { + printf("Bytes: %ld\n", byte_count); + printf("Characters: %ld\n", char_count); + printf(" ASCII: %ld\n", ascii_count); + printf(" Multibyte: %ld\n", multi_count); + printf("Bad sequences: %ld\n", bad_count); +} +void dump_loop(void) { while(dump_utf8_char()) if(limit && (filepos >= limit)) break; @@ -662,19 +706,50 @@ void dump_file(void) { if(dump_column) print_line(); - if(print_info) { - printf("Bytes: %ld\n", byte_count); - printf("Characters: %ld\n", char_count); - printf(" ASCII: %ld\n", ascii_count); - printf(" Multibyte: %ld\n", multi_count); - printf("Bad sequences: %ld\n", bad_count); - } +} + +void dump_file(void) { + if(seekpos) seek_input(); + if(seek_offset_zero) filepos = 0; + + dump_loop(); + + fclose(input); +} + +void dump_data(void) { + int datalen; + + datalen = strlen(dump_data_arg); + + if(seekpos >= datalen) + return; + + if(seekpos < 0) + dump_data_idx = datalen + seekpos; + else if(seekpos) + dump_data_idx = seekpos; + + if(seek_offset_zero) + filepos = 0; + else + filepos = dump_data_idx; + + dump_loop(); } int main(int argc, char **argv) { set_self(argv[0]); + parse_options(argc, argv); - dump_file(); - fclose(input); + + if(dump_data_arg) + dump_data(); + else + dump_file(); + + if(print_info_opt) + print_info(); + return 0; } @@ -73,6 +73,13 @@ by itself. .. colors (2 to 4 digits, 0 to 7). +-d data + Dump this data, instead of reading from a file or stdin. If *data* + contains spaces or shell metacharacters, make sure you remember to + quote it. Only one **-d** option can be given. + +.. dump this data instead of a file. + -h, --help Print built-in usage message and exit. |