From 49a7e6c96379d7434a338a848f7e78de894c676f Mon Sep 17 00:00:00 2001 From: Bastiaan Marinus van de Weerd Date: Wed, 21 Aug 2024 19:34:38 -0400 Subject: [PATCH 1/2] Properly segment Unicode graphemes. --- scripts.js | 57 ++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/scripts.js b/scripts.js index 3dbffd9..19118fa 100644 --- a/scripts.js +++ b/scripts.js @@ -35,24 +35,69 @@ window.addEventListener('DOMContentLoaded', function() { function renderText() { // Return a space as typing indicator if text is empty. var text = decodeURIComponent(location.hash.split('#')[1] || ' '); - var fontSize = Math.min(150 / text.length, 30); clearChars(); - text.split(/.*?/u).forEach(function(chr) { + var textWidth = null; + var forEachSegment = null; + if (window.Intl && window.Intl.Segmenter) { + var segmenter = new Intl.Segmenter(); + var segments = Array.from(segmenter.segment(text)); + forEachSegment = function forEachGraphemeSegment(f) { + segments.forEach(function(seg) { + f.call(this, seg.segment, seg.index); + }); + }; + + textWidth = 0; + forEachSegment(function(seg) { + // Unicode.org specifies these properties as follows [1]: + // - `Emoji`: "characters that are emoji" + // - `Emoji_Presentation`: "characters that have emoji + // presentation by default" + // Take for example '☺' (U+263A): this is a "legacy" + // emoji that is not _presented_ as an emoji by default (but + // rather as a monospace / monochrome pictograph). As such, + // it does have propery `Emoji` but not `Emoji_Presentation`. + // In order to present such "legacy" emojis as emojis, they + // must be followed by U+FE0F (variation selector 16). + // Contrast that with '😃' (U+1F603), which *is* presented + // as an emoji by default, and as such has _both_ poperties. + // (All browsers that support `Intl.Segmenter` also support + // these Unicode property class escapes.) + // [1]: https://unicode.org/reports/tr51/#Emoji_Properties + if (seg.match(/\p{Emoji}\uFE0F|\p{Emoji_Presentation}/u)) { + textWidth += 1.65; // Roughly measured. + } else { + textWidth += 1; + } + }); + } + + // Backward compatibility + else { + textWidth = text.length; + forEachSegment = function forEachCharSegment(f) { + text.split(/.*?/u).forEach(f); + }; + } + + var fontSize = Math.min(150 / textWidth, 30); + + forEachSegment(function(str) { var charbox = charboxTemplate.content.cloneNode(true); var charElem = charbox.querySelector('.char'); charElem.style.fontSize = fontSize + 'vw'; - if (chr !== ' ') { - charElem.textContent = chr; + if (str !== ' ') { + charElem.textContent = str; } else { charElem.innerHTML = ' '; } - if (chr.match(/[0-9]/i)) { + if (str.match(/[0-9]/i)) { charElem.className = 'number'; - } else if (!chr.match(/\p{L}/iu)) { + } else if (!str.match(/\p{L}/iu)) { charElem.className = 'symbol'; } From c8134ca2b34c6b51608561daac88bbd5b1427be1 Mon Sep 17 00:00:00 2001 From: Dan Bader Date: Tue, 19 Nov 2024 11:05:01 +0100 Subject: [PATCH 2/2] Formatting --- scripts.js | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/scripts.js b/scripts.js index 19118fa..4f64694 100644 --- a/scripts.js +++ b/scripts.js @@ -41,6 +41,7 @@ window.addEventListener('DOMContentLoaded', function() { var textWidth = null; var forEachSegment = null; if (window.Intl && window.Intl.Segmenter) { + // Emoji-friendly path -- needs Intl.Segmenter support var segmenter = new Intl.Segmenter(); var segments = Array.from(segmenter.segment(text)); forEachSegment = function forEachGraphemeSegment(f) { @@ -72,10 +73,8 @@ window.addEventListener('DOMContentLoaded', function() { textWidth += 1; } }); - } - - // Backward compatibility - else { + } else { + // Backward compatibility -- no Intl.Segmenter support textWidth = text.length; forEachSegment = function forEachCharSegment(f) { text.split(/.*?/u).forEach(f);