aboutsummaryrefslogtreecommitdiff
path: root/unifmt.pl
blob: 39f3c12e11c220ce838905f1d276ec80ab1fbacf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
#!/usr/bin/perl

# to read the docs outside of irssi: perldoc /path/to/unifmt.pl
# in irssi, "/script load unifmt.pl", then "/unifmt_help"

=encoding utf8

=pod

=head1 NAME

unifmt.pl - unicode text formatting for irssi

=head1 SYNOPSIS

in shell: cp unifmt.pl ~/.irssi/scripts/

in irssi: /script load unifmt.pl

=head1 DESCRIPTION

unifmt.pl adds keystrokes to irssi that allow you to type double-width
ASCII (Unicode FF00 block) characters, blackletter characters, and
use Unicode combining characters to make your text appear underlined
(single or double line), struck out, or "slashed" out.

Rather than executing the script as an irssi slash-command, the modes are
controlled via keystrokes. This allows the formatted text to be mixed
with normal or differently-formatted text on the same line of input.

There are 2 classes of formatting: "transforms", which convert plain
Latin alphabetics to other characters (such as fraktur or italic), and
"combines", which use plain Latin alphabetics followed by a combining
character (such as an underline).

You can mix combines and transforms, e.g. underlined fraktur, but only
one combine and one transform can be enabled at a time (so you can't
do e.g. both underline and strikethrough, or both subscript and bold
serif).

=head1 KEYSTROKES

All the formatting controls must be preceded by a prefix character,
which defaults B<^F> (control-F).

=over 4

=head2 Transforms

=item B<^F w>

Enables wide formatting. Each character you type is replaced by its
double-width equivalent from the Unicode B<U+FF01-U+FFE1> range, if
it has one. Only characters B<U+0021-U+007E> (AKA the printable ASCII
charset) have double-width equivalents. As a special case, the space
(B<U+0020>) character is replaced with B<U+3000>, B<IDEOGRAPHIC SPACE>, which
is a double-width space. All other characters are treated normally.

	Example: This is wide text

=item B<^F b>

Bold sans serif.

	Example: 𝗧𝗵𝗶𝘀 𝗶𝘀 𝗯𝗼𝗹𝗱 𝘀𝗮𝗻𝘀

=item B<^F i>

Italic sans serif

	Example: 𝘛𝘩𝘪𝘴 𝘪𝘴 𝘪𝘵𝘢𝘭𝘪𝘤 𝘴𝘢𝘯𝘴

=item B<^F j>

Bold italic sans serif

	Example: 𝙏𝙝𝙞𝙨 𝙞𝙨 𝙗𝙤𝙡𝙙 𝙞𝙩𝙖𝙡𝙞𝙘 𝙨𝙖𝙣𝙨

=item B<^F B>

Bold serif.

	Example: 𝐓𝐡𝐢𝐬 𝐢𝐬 𝐛𝐨𝐥𝐝 𝐬𝐞𝐫𝐢𝐟

=item B<^F I>

Italic serif

	Example: 𝑇ℎ𝑖𝑠 𝑖𝑠 𝑖𝑡𝑎𝑙𝑖𝑐 𝑠𝑒𝑟𝑖𝑓

=item B<^F J>

Bold italic serif

	Example: 𝑻𝒉𝒊𝒔 𝒊𝒔 𝒃𝒐𝒍𝒅 𝒊𝒕𝒂𝒍𝒊𝒄 𝒔𝒆𝒓𝒊𝒇

=item B<^F 2>

Double-struck

	Example: 𝕋𝕙𝕚𝕤 𝕚𝕤 𝕕𝕠𝕦𝕓𝕝𝕖-𝕤𝕥𝕣𝕦𝕔𝕜

=item B<^F c>

Cursive

	Example: 𝒯𝒽𝒾𝓈 𝒾𝓈 𝒸𝓊𝓇𝓈𝒾𝓋ℯ

=item B<^F C>

Bold cursive

	Example: 𝓣𝓱𝓲𝓼 𝓲𝓼 𝓫𝓸𝓵𝓭 𝓬𝓾𝓻𝓼𝓲𝓿𝓮

=item B<^F k>

Fraktur (aka blackletter)

	Example: 𝔗𝔥𝔦𝔰 𝔦𝔰 𝔣𝔯𝔞𝔨𝔱𝔲𝔯

I am aware that the Mathematical Fraktur symbols were never intended
for use as text, but this script is for fun, not for standards compliance.

=item B<^F K>

	Example: 𝔗𝔥𝔦𝔰 𝔦𝔰 𝔣𝔯𝔞𝔨𝔱𝔲𝔯

Bold fraktur (aka blackletter)

	Example: 𝕿𝖍𝖎𝖘 𝖎𝖘 𝖇𝖔𝖑𝖉 𝖋𝖗𝖆𝖐𝖙𝖚𝖗

=item B<^F ^>

Superscript. This isn't perfect: a few superscripted letters only
exist in Unicode as either uppercase or lowercase, so e.g. all
lowercase B<i> characters will display as superscripted capital B<I>.

	Example: ᵀʰᶦˢ ᶦˢ ˢᵘᵖᵉʳˢᶜʳᶦᵖᵗ

=head2 Combines

=item B<^F _>

Enables underlining. Each character you type is followed by the Unicode
combining character B<U+0332>, B<COMBINING LOW LINE>. Example: U̲n̲d̲e̲r̲l̲i̲n̲e̲d̲

=item B<^F =>

Enables double underlining. Each character you type is followed by the Unicode
combining character B<U+0333>, B<COMBINING DOUBLE LOW LINE>. Example: U̳n̳d̳e̳r̳l̳i̳n̳e̳d̳

=item B<^F ->

Enables strikethrough. Each character you type is followed by the Unicode
combining character B<U+0336>, B<COMBINING LONG STROKE OVERLAY>. Example: S̶t̶r̶i̶k̶e̶t̶h̶r̶o̶u̶g̶h̶

=item B<^F />

Enables slashthrough. Each character you type is followed by the Unicode
combining character B<U+0338>, B<COMBINING LONG SOLIDUS OVERLAY>. Example: S̸l̸a̸s̸h̸o̸u̸t̸

=item B<^F ^F>

Acts like a single B<^F> was pressed. Does not disable formatting. If
you have a regular irssi keybinding for B<^F>, it will be acted on.
Otherwise, a B<^F> will be inserted into the input buffer.

=item B<^F F>

Disables all the formatting modes. Actually, B<^F> followed by any character
not listed above will do the same thing, but I promise not to change
the B<^F F> combo in any future versions of this script.

=back

=head1 SETTINGS

=over 4

=item B<unifmt_keys>

String, the 6 remappable keystrokes used to enable the formatting
modes. This defaults to B<fw_=-/>. The order is: Prefix, Wide,
Underline, Double-Underline, Strikethrough, Slashthrough (note: only
the combines are currently remappable; transforms are not). The prefix
key is used as a control key, but when you set it, use a regular
alphabetic (e.g. don't say B<^X>, just say B<x> or B<X>).

=item B<unifmt_spaces>

Boolean, whether or not to apply combines to spaces (default: false).

=back

=head1 NOTES

For any of this to work, you'll have to enable UTF-8 in irssi, and use
a UTF-8 capable terminal. This applies to everyone else too: if you're
sending UTF-8 encoded Unicode to them, their client (and terminal if
it's a terminal client) will have to know how to display it. If not,
they'll see garbage in place of what you intended.

This script was developed with urxvt (AKA rxvt-unicode). It should work
with any terminal that fully supports UTF-8 and Unicode... but you want
a terminal that supports looking up glyphs from a list of fonts, like
urxvt does. Otherwise, you might have a hard time finding a single font
that has all the glyphs you'll need.

The editing keys (arrows, home/end, pgup/pgdn, and any alt-? combos)
might act strangely while the formatting modes are enabled, depending
on your terminal and TERM environment variable. Backspace and ^U
(kill line) should still work OK. This will probably be fixed in the
future. Tab-completing nicks doesn't work either, but fixing that will
be a huge PITA (or maybe impossible).

None of the formatting modes persist past the end of the current line of
input. Pressing Enter always clears all the modes. This is to minimize
annoyance, as there's no visual indicator of which mode(s) you're in.

Before you use this script on a public channel, you'd better make sure
the channel doesn't have rules against using fancy Unicode. You may
annoy the other users, and/or find yourself banned.

tmux doesn't seem to be capable of actually displaying the wide +
combining character combinations. They render as plain wide. If you
copy/paste them to another window (a terminal not running tmux for
instance), they show up correctly. So tmux "knows" the formatting is
there, but doesn't display it.

The underline, strike, slashout combinations don't work with screen,
and probably never will. I'd love to be proven wrong, so let me know if
you get it working there. I at least can see the wide characters with
"screen -U".

=head1 AUTHOR

Urchlay <urchlay@slackware.uk>

=head1 LICENSE

WTFPL: Do WTF you want with this.

=head1 SEE ALSO

irssi(1), urxvt(1), unicode(7), utf-8(7)

=cut

use utf8;
use feature 'unicode_strings';

our $VERSION = "0.2";
our %IRSSI = (
	authors     => 'Urchlay',
	contact     => 'Urchlay on FreeNode',
	name        => 'unifmt',
	description => 'Fancy Unicode text formatting',
	license     => 'WTFPL',
	url         => 'https://slackware.uk/~urchlay/repos/misc-scripts',
);

use warnings;
use strict;

# 20200827 bkw: adding gui_input_get_pos to the list of imports causes
# this script to fail to autoload when irssi starts (but it'll load OK
# if manually loaded after startup). I only use it for debugging anyway.
use Irssi qw{
	command            command_bind      parse_special     signal_register
	signal_add_first   signal_add_last   signal_continue   signal_emit
	signal_stop        settings_set_str  settings_get_str  settings_add_str
	settings_get_bool  settings_add_bool
	};

our $SELF = $IRSSI{name};
our $default_keys = "fw_=-/";

##sub xf_wide {
##	my $key = shift;
##	if($key == 0x20) {
##		$key = 0x3000;
##	} elsif($key >= 0x21 && $key <= 0x7e) {
##		$key += 0xfee0;
##	}
##	return $key;
##}

our %transforms = (
		'Wide' => \&xf_wide,
		'Bold Serif' => \&xf_boldserif,
		'Italic Serif' => \&xf_italserif,
		'Bold Italic Serif' => \&xf_bolditalserif,
		'Bold Sans' => \&xf_boldsans,
		'Italic Sans' => \&xf_italsans,
		'Bold Italic Sans' => \&xf_bolditalsans,
		'Double-struck' => \&xf_doublestrike,
		'Cursive' => \&xf_cursive,
		'Bold Cursive' => \&xf_boldcursive,
		'Fraktur' => \&xf_fraktur,
		'Bold Fraktur' => \&xf_boldfraktur,
		'Superscript' => \&xf_superscript,
);

our %transform_keys = (
		w => 'Wide',
		B => 'Bold Serif',
		I => 'Italic Serif',
		J => 'Bold Italic Serif',
		b => 'Bold Sans',
		i => 'Italic Sans',
		j => 'Bold Italic Sans',
		2 => 'Double-struck',
		c => 'Cursive',
		C => 'Bold Cursive',
		k => 'Fraktur',
		K => 'Bold Fraktur',
		'^' => 'Superscript',
);


# Holds a reference to one of the xf_* subs, or undef if no
# transform is active.
our $transform;

# These 2 are controlled by setting unifmt_keys:
our $prefix_key;
our %combining_map;

# Which of combining_map is active, or 0 for none
our $combining_char = 0;

# True if the last keypress was ^F
our $was_prefix = 0;

# 2 if the last keypress was escape, 1 if the last 2 were escape and [,
# 0 otherwise.
our $was_escape = 0;


# There's no way to enable debugging without editing the script.
our $DEBUG = 0;

# Only used for debugging.
our $count = 0;

# Ditto.
sub dump_buf {
	my $buf = parse_special('$L', 0, 0);
	my $len = length($buf);
	my $pos = Irssi::gui_input_get_pos();
	my $out = "pos==" . $pos . "  ";
	for(my $i = 0; $i < $len; $i++) {
		my $star = ($i == $len ? "*" : "");
		$out .= sprintf("$star%02x ", ord(substr($buf, $i, 1)));
	}
	print $out;
}

####
# transforms take numeric Unicode codepoint arg, and return
# a numeric Unicode codepoint.

sub xf_wide {
	my $key = shift;

	if($key == 0x20) {
		$key = 0x3000; # wide space, maybe better to avoid this?
	} elsif($key >= 0x21 && $key <= 0x7e) {
		# unicode 0x0f01 to 0x0fee are wide versions of ASCII
		$key += 0xfee0;
	}
	# else pass it through as-is

	return $key;
}

sub xf_alpha_map {
	my $k = shift;
	my $map = shift;

	## warn "k was $k";
	if($k >= 65 && $k <= 90) { # A-Z
		$k = ord(substr($map, $k - 65));
	} elsif($k >= 97 && $k <= 122) { # a-z
		$k = ord(substr($map, $k - 97 + 26));
	}
	## warn "k now $k";

	return $k;
}

sub xf_boldserif {
	return xf_alpha_map($_[0], "𝐀𝐁𝐂𝐃𝐄𝐅𝐆𝐇𝐈𝐉𝐊𝐋𝐌𝐍𝐎𝐏𝐐𝐑𝐒𝐓𝐔𝐕𝐖𝐗𝐘𝐙𝐚𝐛𝐜𝐝𝐞𝐟𝐠𝐡𝐢𝐣𝐤𝐥𝐦𝐧𝐨𝐩𝐪𝐫𝐬𝐭𝐮𝐯𝐰𝐱𝐲𝐳");
}

sub xf_italserif {
	return xf_alpha_map($_[0], "𝐴𝐵𝐶𝐷𝐸𝐹𝐺𝐻𝐼𝐽𝐾𝐿𝑀𝑁𝑂𝑃𝑄𝑅𝑆𝑇𝑈𝑉𝑊𝑋𝑌𝑍𝑎𝑏𝑐𝑑𝑒𝑓𝑔ℎ𝑖𝑗𝑘𝑙𝑚𝑛𝑜𝑝𝑞𝑟𝑠𝑡𝑢𝑣𝑤𝑥𝑦𝑧");
}

sub xf_bolditalserif {
	return xf_alpha_map($_[0], "𝑨𝑩𝑪𝑫𝑬𝑭𝑮𝑯𝑰𝑱𝑲𝑳𝑴𝑵𝑶𝑷𝑸𝑹𝑺𝑻𝑼𝑽𝑾𝑿𝒀𝒁𝒂𝒃𝒄𝒅𝒆𝒇𝒈𝒉𝒊𝒋𝒌𝒍𝒎𝒏𝒐𝒑𝒒𝒓𝒔𝒕𝒖𝒗𝒘𝒙𝒚𝒛");
}

sub xf_boldsans {
	return xf_alpha_map($_[0], "𝗔𝗕𝗖𝗗𝗘𝗙𝗚𝗛𝗜𝗝𝗞𝗟𝗠𝗡𝗢𝗣𝗤𝗥𝗦𝗧𝗨𝗩𝗪𝗫𝗬𝗭𝗮𝗯𝗰𝗱𝗲𝗳𝗴𝗵𝗶𝗷𝗸𝗹𝗺𝗻𝗼𝗽𝗾𝗿𝘀𝘁𝘂𝘃𝘄𝘅𝘆𝘇");
}

sub xf_italsans {
	return xf_alpha_map($_[0], "𝘈𝘉𝘊𝘋𝘌𝘍𝘎𝘏𝘐𝘑𝘒𝘓𝘔𝘕𝘖𝘗𝘘𝘙𝘚𝘛𝘜𝘝𝘞𝘟𝘠𝘡𝘢𝘣𝘤𝘥𝘦𝘧𝘨𝘩𝘪𝘫𝘬𝘭𝘮𝘯𝘰𝘱𝘲𝘳𝘴𝘵𝘶𝘷𝘸𝘹𝘺𝘻");
}

sub xf_bolditalsans {
	return xf_alpha_map($_[0], "𝘼𝘽𝘾𝘿𝙀𝙁𝙂𝙃𝙄𝙅𝙆𝙇𝙈𝙉𝙊𝙋𝙌𝙍𝙎𝙏𝙐𝙑𝙒𝙓𝙔𝙕𝙖𝙗𝙘𝙙𝙚𝙛𝙜𝙝𝙞𝙟𝙠𝙡𝙢𝙣𝙤𝙥𝙦𝙧𝙨𝙩𝙪𝙫𝙬𝙭𝙮𝙯");
}

sub xf_doublestrike {
	return xf_alpha_map($_[0], "𝔸𝔹ℂ𝔻𝔼𝔽𝔾ℍ𝕀𝕁𝕂𝕃𝕄ℕ𝕆ℙℚℝ𝕊𝕋𝕌𝕍𝕎𝕏𝕐ℤ𝕒𝕓𝕔𝕕𝕖𝕗𝕘𝕙𝕚𝕛𝕜𝕝𝕞𝕟𝕠𝕡𝕢𝕣𝕤𝕥𝕦𝕧𝕨𝕩𝕪𝕫");
}

sub xf_cursive {
	return xf_alpha_map($_[0], "𝒜ℬ𝒞𝒟ℰℱ𝒢ℋℐ𝒥𝒦ℒℳ𝒩𝒪𝒫𝒬ℛ𝒮𝒯𝒰𝒱𝒲𝒳𝒴𝒵𝒶𝒷𝒸𝒹ℯ𝒻ℊ𝒽𝒾𝒿𝓀𝓁𝓂𝓃ℴ𝓅𝓆𝓇𝓈𝓉𝓊𝓋𝓌𝓍𝓎𝓏");
}

sub xf_boldcursive {
	return xf_alpha_map($_[0], "𝓐𝓑𝓒𝓓𝓔𝓕𝓖𝓗𝓘𝓙𝓚𝓛𝓜𝓝𝓞𝓟𝓠𝓡𝓢𝓣𝓤𝓥𝓦𝓧𝓨𝓩𝓪𝓫𝓬𝓭𝓮𝓯𝓰𝓱𝓲𝓳𝓴𝓵𝓶𝓷𝓸𝓹𝓺𝓻𝓼𝓽𝓾𝓿𝔀𝔁𝔂𝔃");
}

sub xf_fraktur {
	return xf_alpha_map($_[0], "𝔄𝔅ℭ𝔇𝔈𝔉𝔊ℌℑ𝔍𝔎𝔏𝔐𝔑𝔒𝔓𝔔ℜ𝔖𝔗𝔘𝔙𝔚𝔛𝔜ℨ𝔞𝔟𝔠𝔡𝔢𝔣𝔤𝔥𝔦𝔧𝔨𝔩𝔪𝔫𝔬𝔭𝔮𝔯𝔰𝔱𝔲𝔳𝔴𝔵𝔶𝔷");
}

sub xf_boldfraktur {
	return xf_alpha_map($_[0], "𝕬𝕭𝕮𝕯𝕰𝕱𝕲𝕳𝕴𝕵𝕶𝕷𝕸𝕹𝕺𝕻𝕼𝕽𝕾𝕿𝖀𝖁𝖂𝖃𝖄𝖅𝖆𝖇𝖈𝖉𝖊𝖋𝖌𝖍𝖎𝖏𝖐𝖑𝖒𝖓𝖔𝖕𝖖𝖗𝖘𝖙𝖚𝖛𝖜𝖝𝖞𝖟");
}

sub xf_superscript {
	return xf_alpha_map($_[0], "ᴬᴮᶜᴰᴱᶠᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᑫᴿˢᵀᵁⱽᵂˣʸᶻᵃᵇᶜᵈᵉᶠᵍʰᶦʲᵏˡᵐⁿᵒᵖᑫʳˢᵗᵘᵛʷˣʸᶻ");
}
####


sub get_ctrl_key {
	return ord(uc($_[0])) - 0x40;
}

sub fmt_key {
	# \002 is "toggle bold"
	return "'\002" . $_[0] . "\002'";
}

sub init_keys {
	our $default_keys;

	my $keys = settings_get_str('unifmt_keys');
	if(length $keys != 6) {
		print "$SELF: Invalid unifmt_keys, should be 6 keystrokes, defaulting to '$default_keys'";
		settings_set_str('unifmt_keys', ($keys = $default_keys) );
	}

	my ($p, $w, $u, $d, $s, $l) = split "", $keys;

	$p = get_ctrl_key($p);
	if($p < 0 || $p > 0x1f) {
		my $pd = uc substr($default_keys, 0, 1);
		print "$SELF: Invalid prefix key, defaulting to " . fmt_key("^" . $pd);
		$p = get_ctrl_key($pd);
	}

	our $prefix_key = $p;

	our %combining_map = (
		$u => 0x332,  # underline
		$d => 0x333,  # double underline
		$s => 0x336,  # strikethrough
		$l => 0x338,  # slash-through
	);

	print "$SELF: " .
		"prefix " .    fmt_key("^" . chr($prefix_key + 0x40)) . ", " .
		"underline " . fmt_key($u) . ", " .
		"double " .    fmt_key($d) . ", " .
		"strike " .    fmt_key($s) . ", " .
		"slash " .     fmt_key($l);

	for(sort { $transform_keys{$a} cmp $transform_keys{$b} } keys %transform_keys) {
		print $transform_keys{$_} . ": " . $_;
	}
}

sub handle_keypress {
	my $key = shift;

	if($DEBUG) { printf $count++ . ": got key 0x%x", $key; dump_buf(); }

	# hackish way to let most escape codes through unmodified. assumes
	# (incorrectly) that all escape codes are either Esc-[-(something),
	# 3 bytes... or else Esc-(something that isn't [), 2 bytes. This
	# happens to let urxvt's arrow keys and alt-numbers through, at least.
	if($was_escape) {
		if($was_escape == 2 && $key != ord('[')) {
			$was_escape = 0;
		} else {
			$was_escape--;
		}
		signal_continue($key);
		return;
	}

	# don't try to combine with combining chars!
	for(values our %combining_map) {
		# warn "$key $_";
		if($key == $_) {
			signal_continue($key);
			return;
		}
	}

	# ctrl-space is mapped to the null character. make it dump the
	# current input buffer contents in hex, if debugging is active.
	if($DEBUG && $key == 0) {
		dump_buf();
		signal_stop();
		return;
	}

	# ^F pressed once: set flag, but don't insert into buffer.
	# Pressed twice = unset flag, inset into buffer.
	if($key == $prefix_key) {
		if($was_prefix) {
			$was_prefix = 0;
			signal_continue($key);
		} else {
			$was_prefix = 1;
			signal_stop();
		}
		return;
	}

	# enter/return, turn off formatting
	if($key == 0x0d || $key == 0x0a) {
		$combining_char = $was_prefix = 0;
		undef $transform;
		signal_continue($key);
		return;
	}

	# backspace/delete and control characters are acted on normally, except
	# that escape has to set a flag
	if($key == 0x7f || $key < 0x20) {
		if($key == 0x1b) {
			$was_escape = 2;
		}
		signal_continue($key);
		return;
	}

	# last key pressed was ^F, act on it, but don't insert into the buffer
	if($was_prefix) {
		# warn "prefix key pressed before " . $key;
		my $t = $transform_keys{chr($key)};
		if(defined($t)) {
			# warn "transform key $key";
			$transform = $transforms{$t};
		} else {
			$combining_char = $combining_map{chr($key)} || 0;
			# unrecognized keys also turn off transform modes
			undef $transform unless $combining_char;
		}

		$was_prefix = 0;
		signal_stop();
		return;
	}

	if(defined $transform) {
		$key = $transform->($key);
	}

	signal_continue($key);

	# if it was a space and we're not formatting spaces, we're done
	if(($key == 0x20 || $key == 0x3000) && !settings_get_bool('unifmt_spaces')) {
		return;
	}

	if($combining_char) {
		if($DEBUG) { print "combining($key, $combining_char)"; }
		signal_emit('gui key pressed', $combining_char);
	}
}

sub unifmt_help {
	command("/exec - pod2text " . __FILE__);
}

### main()
settings_add_str($SELF, 'unifmt_keys', $default_keys);
settings_add_bool($SELF, 'unifmt_spaces', 0);
init_keys();

signal_add_last('setup changed', \&init_keys);

signal_register({ "gui key pressed", [ "integer" ] });
signal_add_first("gui key pressed", \&handle_keypress);

command_bind("unifmt_help", \&unifmt_help);

print "$SELF.pl loaded, /unifmt_help for help"