From 9aec58207cfc71dd4d6bd1272b0f90eb323c0dc3 Mon Sep 17 00:00:00 2001 From: Martin Robinson Date: Thu, 23 May 2024 08:25:47 +0200 Subject: [PATCH] fonts: Fix rendering of emoji clusters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Emoji clusters, such ('🏳️‍🌈') do not render properly in Servo. This is because xi-unicode is inserting a linebreak opportunity between components of the cluster (see xi-editor/xi-editor#1322). This change adds a workaround for this issue. `xi-unicode` is fast, but supports an older version of the Unicode standard than libraries like `icu4x`. In addition, `icu4x` does not supoprt non-contiguous segmentation which Servo currently depends on. Finally, the currently released version of `icu4x` has the same issue (unicode-org/icu4x#4146). --- components/gfx/text/text_run.rs | 39 +++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/components/gfx/text/text_run.rs b/components/gfx/text/text_run.rs index d4bec79564a4f..d5e2015e700e9 100644 --- a/components/gfx/text/text_run.rs +++ b/components/gfx/text/text_run.rs @@ -13,6 +13,7 @@ use range::Range; use serde::{Deserialize, Serialize}; use style::str::char_is_whitespace; use unicode_bidi as bidi; +use unicode_properties::UnicodeEmoji; use webrender_api::FontInstanceKey; use xi_unicode::LineBreakLeafIter; @@ -233,6 +234,7 @@ impl<'a> TextRun { }); }; + let mut in_zwj_emoji_sequence = false; while !finished { let (idx, _is_hard_break) = breaker.next(text); if idx == text.len() { @@ -246,10 +248,43 @@ impl<'a> TextRun { slice.end = idx; let word = &text[slice.clone()]; + let mut rev_char_indices = word.char_indices().rev().peekable(); + let last_character = rev_char_indices.peek(); + + if !finished { + // This is a workaround for a bug in xi-unicode. It inserts a linebreak opportunity + // between two emojis and ZWJ character. This signals a possible emoji cluster and so it + // should all be shaped together. If this is detected, merge this segment with the + // previous one. + // + // TODO: Either fix the bug in xi-unicode or switch to something like icu4x, but that + // does not support iterative addition of text to the linebreaker. We'd have to break + // the entire inline formatting context at once. + let ends_with_emoji = matches!( + last_character, + Some((_, character)) if character.is_emoji_char_or_emoji_component() + ); + let next_character = &text[slice.end..text.len()].char_indices().next(); + if ends_with_emoji && matches!(next_character, Some((_, '\u{200d}'))) { + in_zwj_emoji_sequence = true; + continue; + } + if in_zwj_emoji_sequence { + in_zwj_emoji_sequence = false; + continue; + } + // This is another bug in xi-unicode. It inserts a line break opportunity between emojis and + // emoji components that follow them. For instance, between an emoji and a skin tone modifier. + if ends_with_emoji && + matches!(next_character, Some((_, character)) if character.is_emoji_component()) + { + continue; + } + } + // Split off any trailing whitespace into a separate glyph run. let mut whitespace = slice.end..slice.end; - let mut rev_char_indices = word.char_indices().rev().peekable(); - let ends_with_newline = rev_char_indices.peek().map_or(false, |&(_, c)| c == '\n'); + let ends_with_newline = matches!(last_character, Some((_, '\n'))); if let Some((i, _)) = rev_char_indices .take_while(|&(_, c)| char_is_whitespace(c)) .last()