1 files changed, 256 insertions, 39 deletions
diff --git a/unifmt.pl b/unifmt.pl
index 7280739..39f3c12 100644
--- a/unifmt.pl
+++ b/unifmt.pl
@@ -20,24 +20,33 @@ in irssi: /script load unifmt.pl
 =head1 DESCRIPTION
 
 unifmt.pl adds keystrokes to irssi that allow you to type double-width
-ASCII (Unicode FF00 block) characters, and use Unicode combining
-characters to make your text appear underlined (single or double line),
-struck out, or "slashed" out.
+ASCII (Unicode FF00 block) characters, blackletter characters, and
+use Unicode combining characters to make your text appear underlined
+(single or double line), struck out, or "slashed" out.
 
 Rather than executing the script as an irssi slash-command, the modes are
 controlled via keystrokes. This allows the formatted text to be mixed
-with normal text on the same line of input. Only one of the underline,
-strikeout, slashout modes can be enabled at the same time, but they can
-be combined with wide mode.
+with normal or differently-formatted text on the same line of input.
+
+There are 2 classes of formatting: "transforms", which convert plain
+Latin alphabetics to other characters (such as fraktur or italic), and
+"combines", which use plain Latin alphabetics followed by a combining
+character (such as an underline).
+
+You can mix combines and transforms, e.g. underlined fraktur, but only
+one combine and one transform can be enabled at a time (so you can't
+do e.g. both underline and strikethrough, or both subscript and bold
+serif).
 
 =head1 KEYSTROKES
 
 All the formatting controls must be preceded by a prefix character,
-which defaults B<^F> (control-F). The default keystrokes are shown
-here; see SETTINGS, below, to change them.
+which defaults B<^F> (control-F).
 
 =over 4
 
+=head2 Transforms
+
 =item B<^F w>
 
 Enables wide formatting. Each character you type is replaced by its
@@ -49,8 +58,86 @@ is a double-width space. All other characters are treated normally.
 
 	Example: Ｔｈｉｓ　ｉｓ　ｗｉｄｅ　ｔｅｘｔ
 
-Wide formatting can be combined with one (at a time) of the other
-formatting options.
+=item B<^F b>
+
+Bold sans serif.
+
+	Example: 𝗧𝗵𝗶𝘀 𝗶𝘀 𝗯𝗼𝗹𝗱 𝘀𝗮𝗻𝘀
+
+=item B<^F i>
+
+Italic sans serif
+
+	Example: 𝘛𝘩𝘪𝘴 𝘪𝘴 𝘪𝘵𝘢𝘭𝘪𝘤 𝘴𝘢𝘯𝘴
+
+=item B<^F j>
+
+Bold italic sans serif
+
+	Example: 𝙏𝙝𝙞𝙨 𝙞𝙨 𝙗𝙤𝙡𝙙 𝙞𝙩𝙖𝙡𝙞𝙘 𝙨𝙖𝙣𝙨
+
+=item B<^F B>
+
+Bold serif.
+
+	Example: 𝐓𝐡𝐢𝐬 𝐢𝐬 𝐛𝐨𝐥𝐝 𝐬𝐞𝐫𝐢𝐟
+
+=item B<^F I>
+
+Italic serif
+
+	Example: 𝑇ℎ𝑖𝑠 𝑖𝑠 𝑖𝑡𝑎𝑙𝑖𝑐 𝑠𝑒𝑟𝑖𝑓
+
+=item B<^F J>
+
+Bold italic serif
+
+	Example: 𝑻𝒉𝒊𝒔 𝒊𝒔 𝒃𝒐𝒍𝒅 𝒊𝒕𝒂𝒍𝒊𝒄 𝒔𝒆𝒓𝒊𝒇
+
+=item B<^F 2>
+
+Double-struck
+
+	Example: 𝕋𝕙𝕚𝕤 𝕚𝕤 𝕕𝕠𝕦𝕓𝕝𝕖-𝕤𝕥𝕣𝕦𝕔𝕜
+
+=item B<^F c>
+
+Cursive
+
+	Example: 𝒯𝒽𝒾𝓈 𝒾𝓈 𝒸𝓊𝓇𝓈𝒾𝓋ℯ
+
+=item B<^F C>
+
+Bold cursive
+
+	Example: 𝓣𝓱𝓲𝓼 𝓲𝓼 𝓫𝓸𝓵𝓭 𝓬𝓾𝓻𝓼𝓲𝓿𝓮
+
+=item B<^F k>
+
+Fraktur (aka blackletter)
+
+	Example: 𝔗𝔥𝔦𝔰 𝔦𝔰 𝔣𝔯𝔞𝔨𝔱𝔲𝔯
+
+I am aware that the Mathematical Fraktur symbols were never intended
+for use as text, but this script is for fun, not for standards compliance.
+
+=item B<^F K>
+
+	Example: 𝔗𝔥𝔦𝔰 𝔦𝔰 𝔣𝔯𝔞𝔨𝔱𝔲𝔯
+
+Bold fraktur (aka blackletter)
+
+	Example: 𝕿𝖍𝖎𝖘 𝖎𝖘 𝖇𝖔𝖑𝖉 𝖋𝖗𝖆𝖐𝖙𝖚𝖗
+
+=item B<^F ^>
+
+Superscript. This isn't perfect: a few superscripted letters only
+exist in Unicode as either uppercase or lowercase, so e.g. all
+lowercase B<i> characters will display as superscripted capital B<I>.
+
+	Example: ᵀʰᶦˢ ᶦˢ ˢᵘᵖᵉʳˢᶜʳᶦᵖᵗ
+
+=head2 Combines
 
 =item B<^F _>
 
@@ -92,16 +179,16 @@ the B<^F F> combo in any future versions of this script.
 
 =item B<unifmt_keys>
 
-String, the 6 keystrokes used to enable the formatting modes. This
-defaults to B<fw_=-/>. The order is: Prefix, Wide, Underline,
-Double-Underline, Strikethrough, Slashthrough. The prefix key is used as
-a control key, but when you set it, use a regular alphabetic (e.g. don't
-say B<^X>, just say B<x> or B<X>).
+String, the 6 remappable keystrokes used to enable the formatting
+modes. This defaults to B<fw_=-/>. The order is: Prefix, Wide,
+Underline, Double-Underline, Strikethrough, Slashthrough (note: only
+the combines are currently remappable; transforms are not). The prefix
+key is used as a control key, but when you set it, use a regular
+alphabetic (e.g. don't say B<^X>, just say B<x> or B<X>).
 
 =item B<unifmt_spaces>
 
-Boolean, whether or not to apply formatting to spaces (default: false).
-Does not affect wide mode (spaces will always be double-wide).
+Boolean, whether or not to apply combines to spaces (default: false).
 
 =back
 
@@ -159,7 +246,10 @@ irssi(1), urxvt(1), unicode(7), utf-8(7)
 
 =cut
 
-our $VERSION = "0.1";
+use utf8;
+use feature 'unicode_strings';
+
+our $VERSION = "0.2";
 our %IRSSI = (
 	authors     => 'Urchlay',
 	contact     => 'Urchlay on FreeNode',
@@ -185,14 +275,57 @@ use Irssi qw{
 our $SELF = $IRSSI{name};
 our $default_keys = "fw_=-/";
 
-# These 3 are controlled by setting unifmt_keys:
+##sub xf_wide {
+##	my $key = shift;
+##	if($key == 0x20) {
+##		$key = 0x3000;
+##	} elsif($key >= 0x21 && $key <= 0x7e) {
+##		$key += 0xfee0;
+##	}
+##	return $key;
+##}
+
+our %transforms = (
+		'Wide' => \&xf_wide,
+		'Bold Serif' => \&xf_boldserif,
+		'Italic Serif' => \&xf_italserif,
+		'Bold Italic Serif' => \&xf_bolditalserif,
+		'Bold Sans' => \&xf_boldsans,
+		'Italic Sans' => \&xf_italsans,
+		'Bold Italic Sans' => \&xf_bolditalsans,
+		'Double-struck' => \&xf_doublestrike,
+		'Cursive' => \&xf_cursive,
+		'Bold Cursive' => \&xf_boldcursive,
+		'Fraktur' => \&xf_fraktur,
+		'Bold Fraktur' => \&xf_boldfraktur,
+		'Superscript' => \&xf_superscript,
+);
+
+our %transform_keys = (
+		w => 'Wide',
+		B => 'Bold Serif',
+		I => 'Italic Serif',
+		J => 'Bold Italic Serif',
+		b => 'Bold Sans',
+		i => 'Italic Sans',
+		j => 'Bold Italic Sans',
+		2 => 'Double-struck',
+		c => 'Cursive',
+		C => 'Bold Cursive',
+		k => 'Fraktur',
+		K => 'Bold Fraktur',
+		'^' => 'Superscript',
+);
+
+
+# Holds a reference to one of the xf_* subs, or undef if no
+# transform is active.
+our $transform;
+
+# These 2 are controlled by setting unifmt_keys:
 our $prefix_key;
-our $wide_key;
 our %combining_map;
 
-# Toggled with ^F w
-our $widemode = 0;
-
 # Which of combining_map is active, or 0 for none
 our $combining_char = 0;
 
@@ -203,6 +336,7 @@ our $was_prefix = 0;
 # 0 otherwise.
 our $was_escape = 0;
 
+
 # There's no way to enable debugging without editing the script.
 our $DEBUG = 0;
 
@@ -222,6 +356,89 @@ sub dump_buf {
 	print $out;
 }
 
+####
+# transforms take numeric Unicode codepoint arg, and return
+# a numeric Unicode codepoint.
+
+sub xf_wide {
+	my $key = shift;
+
+	if($key == 0x20) {
+		$key = 0x3000; # wide space, maybe better to avoid this?
+	} elsif($key >= 0x21 && $key <= 0x7e) {
+		# unicode 0x0f01 to 0x0fee are wide versions of ASCII
+		$key += 0xfee0;
+	}
+	# else pass it through as-is
+
+	return $key;
+}
+
+sub xf_alpha_map {
+	my $k = shift;
+	my $map = shift;
+
+	## warn "k was $k";
+	if($k >= 65 && $k <= 90) { # A-Z
+		$k = ord(substr($map, $k - 65));
+	} elsif($k >= 97 && $k <= 122) { # a-z
+		$k = ord(substr($map, $k - 97 + 26));
+	}
+	## warn "k now $k";
+
+	return $k;
+}
+
+sub xf_boldserif {
+	return xf_alpha_map($_[0], "𝐀𝐁𝐂𝐃𝐄𝐅𝐆𝐇𝐈𝐉𝐊𝐋𝐌𝐍𝐎𝐏𝐐𝐑𝐒𝐓𝐔𝐕𝐖𝐗𝐘𝐙𝐚𝐛𝐜𝐝𝐞𝐟𝐠𝐡𝐢𝐣𝐤𝐥𝐦𝐧𝐨𝐩𝐪𝐫𝐬𝐭𝐮𝐯𝐰𝐱𝐲𝐳");
+}
+
+sub xf_italserif {
+	return xf_alpha_map($_[0], "𝐴𝐵𝐶𝐷𝐸𝐹𝐺𝐻𝐼𝐽𝐾𝐿𝑀𝑁𝑂𝑃𝑄𝑅𝑆𝑇𝑈𝑉𝑊𝑋𝑌𝑍𝑎𝑏𝑐𝑑𝑒𝑓𝑔ℎ𝑖𝑗𝑘𝑙𝑚𝑛𝑜𝑝𝑞𝑟𝑠𝑡𝑢𝑣𝑤𝑥𝑦𝑧");
+}
+
+sub xf_bolditalserif {
+	return xf_alpha_map($_[0], "𝑨𝑩𝑪𝑫𝑬𝑭𝑮𝑯𝑰𝑱𝑲𝑳𝑴𝑵𝑶𝑷𝑸𝑹𝑺𝑻𝑼𝑽𝑾𝑿𝒀𝒁𝒂𝒃𝒄𝒅𝒆𝒇𝒈𝒉𝒊𝒋𝒌𝒍𝒎𝒏𝒐𝒑𝒒𝒓𝒔𝒕𝒖𝒗𝒘𝒙𝒚𝒛");
+}
+
+sub xf_boldsans {
+	return xf_alpha_map($_[0], "𝗔𝗕𝗖𝗗𝗘𝗙𝗚𝗛𝗜𝗝𝗞𝗟𝗠𝗡𝗢𝗣𝗤𝗥𝗦𝗧𝗨𝗩𝗪𝗫𝗬𝗭𝗮𝗯𝗰𝗱𝗲𝗳𝗴𝗵𝗶𝗷𝗸𝗹𝗺𝗻𝗼𝗽𝗾𝗿𝘀𝘁𝘂𝘃𝘄𝘅𝘆𝘇");
+}
+
+sub xf_italsans {
+	return xf_alpha_map($_[0], "𝘈𝘉𝘊𝘋𝘌𝘍𝘎𝘏𝘐𝘑𝘒𝘓𝘔𝘕𝘖𝘗𝘘𝘙𝘚𝘛𝘜𝘝𝘞𝘟𝘠𝘡𝘢𝘣𝘤𝘥𝘦𝘧𝘨𝘩𝘪𝘫𝘬𝘭𝘮𝘯𝘰𝘱𝘲𝘳𝘴𝘵𝘶𝘷𝘸𝘹𝘺𝘻");
+}
+
+sub xf_bolditalsans {
+	return xf_alpha_map($_[0], "𝘼𝘽𝘾𝘿𝙀𝙁𝙂𝙃𝙄𝙅𝙆𝙇𝙈𝙉𝙊𝙋𝙌𝙍𝙎𝙏𝙐𝙑𝙒𝙓𝙔𝙕𝙖𝙗𝙘𝙙𝙚𝙛𝙜𝙝𝙞𝙟𝙠𝙡𝙢𝙣𝙤𝙥𝙦𝙧𝙨𝙩𝙪𝙫𝙬𝙭𝙮𝙯");
+}
+
+sub xf_doublestrike {
+	return xf_alpha_map($_[0], "𝔸𝔹ℂ𝔻𝔼𝔽𝔾ℍ𝕀𝕁𝕂𝕃𝕄ℕ𝕆ℙℚℝ𝕊𝕋𝕌𝕍𝕎𝕏𝕐ℤ𝕒𝕓𝕔𝕕𝕖𝕗𝕘𝕙𝕚𝕛𝕜𝕝𝕞𝕟𝕠𝕡𝕢𝕣𝕤𝕥𝕦𝕧𝕨𝕩𝕪𝕫");
+}
+
+sub xf_cursive {
+	return xf_alpha_map($_[0], "𝒜ℬ𝒞𝒟ℰℱ𝒢ℋℐ𝒥𝒦ℒℳ𝒩𝒪𝒫𝒬ℛ𝒮𝒯𝒰𝒱𝒲𝒳𝒴𝒵𝒶𝒷𝒸𝒹ℯ𝒻ℊ𝒽𝒾𝒿𝓀𝓁𝓂𝓃ℴ𝓅𝓆𝓇𝓈𝓉𝓊𝓋𝓌𝓍𝓎𝓏");
+}
+
+sub xf_boldcursive {
+	return xf_alpha_map($_[0], "𝓐𝓑𝓒𝓓𝓔𝓕𝓖𝓗𝓘𝓙𝓚𝓛𝓜𝓝𝓞𝓟𝓠𝓡𝓢𝓣𝓤𝓥𝓦𝓧𝓨𝓩𝓪𝓫𝓬𝓭𝓮𝓯𝓰𝓱𝓲𝓳𝓴𝓵𝓶𝓷𝓸𝓹𝓺𝓻𝓼𝓽𝓾𝓿𝔀𝔁𝔂𝔃");
+}
+
+sub xf_fraktur {
+	return xf_alpha_map($_[0], "𝔄𝔅ℭ𝔇𝔈𝔉𝔊ℌℑ𝔍𝔎𝔏𝔐𝔑𝔒𝔓𝔔ℜ𝔖𝔗𝔘𝔙𝔚𝔛𝔜ℨ𝔞𝔟𝔠𝔡𝔢𝔣𝔤𝔥𝔦𝔧𝔨𝔩𝔪𝔫𝔬𝔭𝔮𝔯𝔰𝔱𝔲𝔳𝔴𝔵𝔶𝔷");
+}
+
+sub xf_boldfraktur {
+	return xf_alpha_map($_[0], "𝕬𝕭𝕮𝕯𝕰𝕱𝕲𝕳𝕴𝕵𝕶𝕷𝕸𝕹𝕺𝕻𝕼𝕽𝕾𝕿𝖀𝖁𝖂𝖃𝖄𝖅𝖆𝖇𝖈𝖉𝖊𝖋𝖌𝖍𝖎𝖏𝖐𝖑𝖒𝖓𝖔𝖕𝖖𝖗𝖘𝖙𝖚𝖛𝖜𝖝𝖞𝖟");
+}
+
+sub xf_superscript {
+	return xf_alpha_map($_[0], "ᴬᴮᶜᴰᴱᶠᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᑫᴿˢᵀᵁⱽᵂˣʸᶻᵃᵇᶜᵈᵉᶠᵍʰᶦʲᵏˡᵐⁿᵒᵖᑫʳˢᵗᵘᵛʷˣʸᶻ");
+}
+####
+
+
 sub get_ctrl_key {
 	return ord(uc($_[0])) - 0x40;
 }
@@ -250,7 +467,6 @@ sub init_keys {
 	}
 
 	our $prefix_key = $p;
-	our $wide_key = $w;
 
 	our %combining_map = (
 		$u => 0x332,  # underline
@@ -261,11 +477,14 @@ sub init_keys {
 
 	print "$SELF: " .
 		"prefix " .    fmt_key("^" . chr($prefix_key + 0x40)) . ", " .
-		"wide " .      fmt_key($wide_key) . ", " .
 		"underline " . fmt_key($u) . ", " .
 		"double " .    fmt_key($d) . ", " .
 		"strike " .    fmt_key($s) . ", " .
 		"slash " .     fmt_key($l);
+
+	for(sort { $transform_keys{$a} cmp $transform_keys{$b} } keys %transform_keys) {
+		print $transform_keys{$_} . ": " . $_;
+	}
 }
 
 sub handle_keypress {
@@ -289,7 +508,7 @@ sub handle_keypress {
 
 	# don't try to combine with combining chars!
 	for(values our %combining_map) {
-		#warn "$key $_";
+		# warn "$key $_";
 		if($key == $_) {
 			signal_continue($key);
 			return;
@@ -319,7 +538,8 @@ sub handle_keypress {
 
 	# enter/return, turn off formatting
 	if($key == 0x0d || $key == 0x0a) {
-		$widemode = $combining_char = $was_prefix = 0;
+		$combining_char = $was_prefix = 0;
+		undef $transform;
 		signal_continue($key);
 		return;
 	}
@@ -336,12 +556,15 @@ sub handle_keypress {
 
 	# last key pressed was ^F, act on it, but don't insert into the buffer
 	if($was_prefix) {
-		if($key == ord($wide_key)) {
-			$widemode = 1;
+		# warn "prefix key pressed before " . $key;
+		my $t = $transform_keys{chr($key)};
+		if(defined($t)) {
+			# warn "transform key $key";
+			$transform = $transforms{$t};
 		} else {
 			$combining_char = $combining_map{chr($key)} || 0;
-			# unrecognized keys also turn off wide mode
-			$widemode = 0 unless $combining_char;
+			# unrecognized keys also turn off transform modes
+			undef $transform unless $combining_char;
 		}
 
 		$was_prefix = 0;
@@ -349,14 +572,8 @@ sub handle_keypress {
 		return;
 	}
 
-	# unicode 0x0f01 to 0x0fee are wide versions of ASCII
-	if($widemode) {
-		if($key == 0x20) {
-			$key = 0x3000;
-		} elsif($key >= 0x21 && $key <= 0x7e) {
-			$key += 0xfee0;
-		}
-		# else pass it through as-is
+	if(defined $transform) {
+		$key = $transform->($key);
 	}
 
 	signal_continue($key);