From 987089d80d236364a1af5f4eef43477240ba8abd Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Fri, 3 Jan 2025 09:46:13 -0800 Subject: [PATCH] input: legacy encoding falls back to mapping of logical key for ctrlseq Fixes #4518 If our UTF8 encoding is not recognized, we fall back to the ASCII mapping of the logical key for the control sequence. This allows cyrillic control characters to work. I also verified that non-cyrllic (US) and alternate layouts (Dvorak) work as expected still. --- src/input/KeyEncoder.zig | 70 +++++++++++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 15 deletions(-) diff --git a/src/input/KeyEncoder.zig b/src/input/KeyEncoder.zig index 734885097f..32e1be635f 100644 --- a/src/input/KeyEncoder.zig +++ b/src/input/KeyEncoder.zig @@ -282,7 +282,12 @@ fn legacy( // If we match a control sequence, we output that directly. For // ctrlSeq we have to use all mods because we want it to only // match ctrl+. - if (ctrlSeq(self.event.utf8, self.event.unshifted_codepoint, all_mods)) |char| { + if (ctrlSeq( + self.event.key, + self.event.utf8, + self.event.unshifted_codepoint, + all_mods, + )) |char| { // C0 sequences support alt-as-esc prefixing. if (binding_mods.alt) { if (buf.len < 2) return error.OutOfMemory; @@ -538,19 +543,44 @@ fn pcStyleFunctionKey( /// into a C0 byte. There are many cases for this and you should read /// the source code to understand them. fn ctrlSeq( + logical_key: key.Key, utf8: []const u8, unshifted_codepoint: u21, mods: key.Mods, ) ?u8 { + const ctrl_only = comptime (key.Mods{ .ctrl = true }).int(); + // If ctrl is not pressed then we never do anything. if (!mods.ctrl) return null; - // If we don't have exactly one byte in our utf8 sequence, then - // we don't do anything, since all our ctrl keys are based on ASCII. - if (utf8.len != 1) return null; - const char, const unset_mods = unset_mods: { - var char = utf8[0]; + var char: u8 = char: { + // If we have exactly one UTF8 byte, we assume that is the + // character we want to convert to a C0 byte. + if (utf8.len == 1) break :char utf8[0]; + + // If we have a logical key that maps to a single byte + // printable character, we use that. History to explain this: + // this was added to support cyrillic keyboard layouts such + // as Russian and Mongolian. These layouts have a `c` key that + // maps to U+0441 (cyrillic small letter "c") but every + // terminal I've tested encodes this as ctrl+c. + if (logical_key.codepoint()) |cp| { + if (std.math.cast(u8, cp)) |byte| { + // For this specific case, we only map to the key if + // we have exactly ctrl pressed. This is because shift + // would modify the key and we don't know how to do that + // properly here (don't have the layout). And we want + // to encode shift as CSIu. + if (mods.int() != ctrl_only) return null; + break :char byte; + } + } + + // Otherwise we don't have a character to convert that + // we can reliably map to a C0 byte. + return null; + }; var unset_mods = mods; // Remove alt from our modifiers because it does not impact whether @@ -596,7 +626,6 @@ fn ctrlSeq( }; // After unsetting, we only continue if we have ONLY control set. - const ctrl_only = comptime (key.Mods{ .ctrl = true }).int(); if (unset_mods.int() != ctrl_only) return null; // From Kitty's key encoding logic. I tried to discern the exact @@ -2132,36 +2161,47 @@ test "legacy: hu layout ctrl+ő sends proper codepoint" { const actual = try enc.legacy(&buf); try testing.expectEqualStrings("[337;5u", actual[1..]); } + test "ctrlseq: normal ctrl c" { - const seq = ctrlSeq("c", 'c', .{ .ctrl = true }); + const seq = ctrlSeq(.invalid, "c", 'c', .{ .ctrl = true }); try testing.expectEqual(@as(u8, 0x03), seq.?); } test "ctrlseq: normal ctrl c, right control" { - const seq = ctrlSeq("c", 'c', .{ .ctrl = true, .sides = .{ .ctrl = .right } }); + const seq = ctrlSeq(.invalid, "c", 'c', .{ .ctrl = true, .sides = .{ .ctrl = .right } }); try testing.expectEqual(@as(u8, 0x03), seq.?); } test "ctrlseq: alt should be allowed" { - const seq = ctrlSeq("c", 'c', .{ .alt = true, .ctrl = true }); + const seq = ctrlSeq(.invalid, "c", 'c', .{ .alt = true, .ctrl = true }); try testing.expectEqual(@as(u8, 0x03), seq.?); } test "ctrlseq: no ctrl does nothing" { - try testing.expect(ctrlSeq("c", 'c', .{}) == null); + try testing.expect(ctrlSeq(.invalid, "c", 'c', .{}) == null); } test "ctrlseq: shifted non-character" { - const seq = ctrlSeq("_", '-', .{ .ctrl = true, .shift = true }); + const seq = ctrlSeq(.invalid, "_", '-', .{ .ctrl = true, .shift = true }); try testing.expectEqual(@as(u8, 0x1F), seq.?); } test "ctrlseq: caps ascii letter" { - const seq = ctrlSeq("C", 'c', .{ .ctrl = true, .caps_lock = true }); + const seq = ctrlSeq(.invalid, "C", 'c', .{ .ctrl = true, .caps_lock = true }); try testing.expectEqual(@as(u8, 0x03), seq.?); } test "ctrlseq: shift does not generate ctrl seq" { - try testing.expect(ctrlSeq("C", 'c', .{ .shift = true }) == null); - try testing.expect(ctrlSeq("C", 'c', .{ .shift = true, .ctrl = true }) == null); + try testing.expect(ctrlSeq(.invalid, "C", 'c', .{ .shift = true }) == null); + try testing.expect(ctrlSeq(.invalid, "C", 'c', .{ .shift = true, .ctrl = true }) == null); +} + +test "ctrlseq: russian ctrl c" { + const seq = ctrlSeq(.c, "с", 0x0441, .{ .ctrl = true }); + try testing.expectEqual(@as(u8, 0x03), seq.?); +} + +test "ctrlseq: russian shifted ctrl c" { + const seq = ctrlSeq(.c, "с", 0x0441, .{ .ctrl = true, .shift = true }); + try testing.expect(seq == null); }