aboutsummaryrefslogtreecommitdiff
path: root/uxd.c
diff options
context:
space:
mode:
authorB. Watson <urchlay@slackware.uk>2024-12-18 07:05:01 -0500
committerB. Watson <urchlay@slackware.uk>2024-12-18 07:05:01 -0500
commitd0b8532b703ef515b89eb8f34c0402262f3d3f7e (patch)
tree58ad6115dbf5ce685823aaf082558418be32c995 /uxd.c
parentc6ed5c95a56e55a2bb33c9dca819ddf377f05575 (diff)
downloaduxd-d0b8532b703ef515b89eb8f34c0402262f3d3f7e.tar.gz
add -j/-p/-w options.
Diffstat (limited to 'uxd.c')
-rw-r--r--uxd.c34
1 files changed, 25 insertions, 9 deletions
diff --git a/uxd.c b/uxd.c
index e839330..b30b22b 100644
--- a/uxd.c
+++ b/uxd.c
@@ -150,6 +150,9 @@ char *dump_data_arg = NULL; /* -d */
long dump_data_idx = 0; /* -d */
int term_utf8 = 0; /* -t, -T */
int restore_term = 0; /* -T only */
+int java_mode = 0; /* -j */
+int wtf8_mode = 0; /* -w */
+int permissive = 0; /* -l */
/* stats for -i option */
long byte_count = 0;
@@ -265,8 +268,14 @@ void parse_args(int argc, char **argv) {
version();
}
- while((opt = my_getopt(argc, argv, "tTd:1ic:nbl:rmo:S:s:uhv")) != -1) {
+ while((opt = my_getopt(argc, argv, "jwptTd:1ic:nbl:rmo:S:s:uhv")) != -1) {
switch(opt) {
+ case 'j':
+ java_mode = 1; break;
+ case 'w':
+ wtf8_mode = 1; break;
+ case 'p':
+ permissive = 1; break;
case 't':
term_utf8 = restore_term = 1; break;
case 'T':
@@ -551,6 +560,10 @@ int is_overlong(int cont_count, unsigned char *b) {
if(!cont_count)
return 0;
+ /* java mode (MUTF-8) allows exactly one overlong: */
+ if(java_mode && cont_count == 1 && b[0] == 0xc0 && b[1] == 0x80)
+ return 0;
+
/* 2 byte seqs, if the first byte is 0xc0 or 0xc1, it's overlong. */
if(cont_count == 1 && b[0] <= 0xc1)
return 1;
@@ -673,15 +686,18 @@ int dump_utf8_char(void) {
}
}
- /* don't check bad sequences for out-of-range or surrogate */
- if(!bad) {
- if(is_out_of_range(cont_count, bytes) || is_surrogate(cont_count, bytes))
- bad = 1;
- }
-
+ if(!permissive) {
+ /* don't check bad sequences for out-of-range or surrogate */
+ if(!bad) {
+ if(is_out_of_range(cont_count, bytes))
+ bad = 1;
+ else if((!wtf8_mode) && is_surrogate(cont_count, bytes))
+ bad = 1;
+ }
- if(is_overlong(cont_count, bytes))
- overlong = 1;
+ if(is_overlong(cont_count, bytes))
+ overlong = 1;
+ }
if(bad || overlong) {
bad_count++;