From a96cf37ce94044ee8a3204971ba0570d0ac55db6 Mon Sep 17 00:00:00 2001
From: HAMANO Tsukasa <hamano@osstech.co.jp>
Date: Thu, 17 Oct 2024 14:15:42 +0900
Subject: [PATCH 1/2] add cellwidths option #6289

---
 config/src/config.rs                |  4 ++++
 config/src/terminal.rs              |  1 +
 term/src/config.rs                  |  1 +
 term/src/terminalstate/performer.rs |  4 ++--
 termwiz/src/cell.rs                 | 33 +++++++++++++++++++++++------
 termwiz/src/surface/line/line.rs    |  2 +-
 wezterm-gui/src/main.rs             |  1 +
 7 files changed, 36 insertions(+), 10 deletions(-)
diff --git a/config/src/config.rs b/config/src/config.rs
index 113cdbfba4c..4bd0d2a6465 100644
--- a/config/src/config.rs
+++ b/config/src/config.rs
@@ -38,6 +38,7 @@ use std::path::{Path, PathBuf};
 use std::sync::atomic::Ordering;
 use std::time::Duration;
 use termwiz::hyperlink;
+use termwiz::cell::CellWidth;
 use termwiz::surface::CursorShape;
 use wezterm_bidi::ParagraphDirectionHint;
 use wezterm_config_derive::ConfigMeta;
@@ -813,6 +814,9 @@ pub struct Config {
     #[dynamic(default)]
     pub treat_east_asian_ambiguous_width_as_wide: bool,
 
+    #[dynamic(default)]
+    pub cellwidths: Option<Vec<CellWidth>>,
+
     #[dynamic(default = "default_true")]
     pub allow_download_protocols: bool,
 
diff --git a/config/src/terminal.rs b/config/src/terminal.rs
index 305b4851484..dedf4c07edf 100644
--- a/config/src/terminal.rs
+++ b/config/src/terminal.rs
@@ -105,6 +105,7 @@ impl wezterm_term::TerminalConfiguration for TermConfig {
         UnicodeVersion {
             version: config.unicode_version,
             ambiguous_are_wide: config.treat_east_asian_ambiguous_width_as_wide,
+            cellwidths: config.cellwidths.clone()
         }
     }
 
diff --git a/term/src/config.rs b/term/src/config.rs
index b09fb43ccdd..54857473964 100644
--- a/term/src/config.rs
+++ b/term/src/config.rs
@@ -191,6 +191,7 @@ pub trait TerminalConfiguration: Downcast + std::fmt::Debug + Send + Sync {
         UnicodeVersion {
             version: 9,
             ambiguous_are_wide: false,
+            cellwidths: None,
         }
     }
 
diff --git a/term/src/terminalstate/performer.rs b/term/src/terminalstate/performer.rs
index 817b137baf6..43ebc367753 100644
--- a/term/src/terminalstate/performer.rs
+++ b/term/src/terminalstate/performer.rs
@@ -132,7 +132,7 @@ impl<'a> Performer<'a> {
         for g in Graphemes::new(text) {
             let g = self.remap_grapheme(g);
 
-            let print_width = grapheme_column_width(g, Some(self.unicode_version));
+            let print_width = grapheme_column_width(g, Some(self.unicode_version.clone()));
             if print_width == 0 {
                 // We got a zero-width grapheme.
                 // We used to force them into a cell to guarantee that we
@@ -816,7 +816,7 @@ impl<'a> Performer<'a> {
                     self.unicode_version.version = n;
                 }
                 ITermProprietary::UnicodeVersion(ITermUnicodeVersionOp::Push(label)) => {
-                    let vers = self.unicode_version;
+                    let vers = self.unicode_version.clone();
                     self.unicode_version_stack
                         .push(UnicodeVersionStackEntry { vers, label });
                 }
diff --git a/termwiz/src/cell.rs b/termwiz/src/cell.rs
index 0af9908ee38..08f124d7dbd 100644
--- a/termwiz/src/cell.rs
+++ b/termwiz/src/cell.rs
@@ -881,10 +881,18 @@ impl Cell {
     }
 }
 
-#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+#[derive(Clone, Debug, Eq, PartialEq, FromDynamic, ToDynamic)]
+pub struct CellWidth {
+    pub first: u32,
+    pub last: u32,
+    pub width: u8,
+}
+
+#[derive(Clone, Debug, Eq, PartialEq)]
 pub struct UnicodeVersion {
     pub version: u8,
     pub ambiguous_are_wide: bool,
+    pub cellwidths: Option<Vec<CellWidth>>,
 }
 
 impl UnicodeVersion {
@@ -892,6 +900,7 @@ impl UnicodeVersion {
         Self {
             version,
             ambiguous_are_wide: false,
+            cellwidths: None,
         }
     }
 
@@ -911,6 +920,18 @@ impl UnicodeVersion {
         }
     }
 
+    #[inline]
+    fn wcwidth(&self, c: char) -> usize {
+        if let Some(ref cellwidths) = self.cellwidths {
+            for cellwidth in cellwidths {
+                if cellwidth.first <= c as u32 && c as u32 <= cellwidth.last {
+                    return cellwidth.width.into()
+                }
+            }
+        }
+        self.width(WCWIDTH_TABLE.classify(c))
+    }
+
     #[inline]
     pub fn idx(&self) -> usize {
         (if self.version > 9 { 2 } else { 0 }) | (if self.ambiguous_are_wide { 1 } else { 0 })
@@ -920,6 +941,7 @@ impl UnicodeVersion {
 pub const LATEST_UNICODE_VERSION: UnicodeVersion = UnicodeVersion {
     version: 14,
     ambiguous_are_wide: false,
+    cellwidths: None,
 };
 
 /// Returns the number of cells visually occupied by a sequence
@@ -928,7 +950,7 @@ pub const LATEST_UNICODE_VERSION: UnicodeVersion = UnicodeVersion {
 /// and sums up the length.
 pub fn unicode_column_width(s: &str, version: Option<UnicodeVersion>) -> usize {
     Graphemes::new(s)
-        .map(|g| grapheme_column_width(g, version))
+        .map(|g| grapheme_column_width(g, version.clone()))
         .sum()
 }
 
@@ -977,13 +999,11 @@ pub fn grapheme_column_width(s: &str, version: Option<UnicodeVersion>) -> usize
     // cannot be a sequence with a variation selector, so we don't
     // need to requested `Presentation` for it.
     if s.len() == 1 {
-        let c = WCWIDTH_TABLE.classify(s.as_bytes()[0] as char);
-        return version.width(c);
+        return version.wcwidth(s.as_bytes()[0] as char);
     }
 
     // Slow path: `s.chars()` will dominate and pull up the minimum
     // runtime to ~20ns
-
     if version.version >= 14 {
         // Lookup the grapheme to see if the presentation of
         // the grapheme forces the width. We can bypass
@@ -999,8 +1019,7 @@ pub fn grapheme_column_width(s: &str, version: Option<UnicodeVersion>) -> usize
     // Otherwise, classify and sum up
     let mut width = 0;
     for c in s.chars() {
-        let c = WCWIDTH_TABLE.classify(c);
-        width += version.width(c);
+        width += version.wcwidth(c);
     }
 
     width.min(2)
diff --git a/termwiz/src/surface/line/line.rs b/termwiz/src/surface/line/line.rs
index a6f3c1a0cfc..fc8cc75809c 100644
--- a/termwiz/src/surface/line/line.rs
+++ b/termwiz/src/surface/line/line.rs
@@ -138,7 +138,7 @@ impl Line {
         let mut cells = Vec::new();
 
         for sub in Graphemes::new(s) {
-            let cell = Cell::new_grapheme(sub, attrs.clone(), unicode_version);
+            let cell = Cell::new_grapheme(sub, attrs.clone(), unicode_version.clone());
             let width = cell.width();
             cells.push(cell);
             for _ in 1..width {
diff --git a/wezterm-gui/src/main.rs b/wezterm-gui/src/main.rs
index 4d2829aa4c6..bbe24e2fa89 100644
--- a/wezterm-gui/src/main.rs
+++ b/wezterm-gui/src/main.rs
@@ -886,6 +886,7 @@ pub fn run_ls_fonts(config: config::ConfigHandle, cmd: &LsFontsCommand) -> anyho
     let unicode_version = UnicodeVersion {
         version: config.unicode_version,
         ambiguous_are_wide: config.treat_east_asian_ambiguous_width_as_wide,
+        cellwidths: config.cellwidths.clone(),
     };
 
     let text = match (&cmd.text, &cmd.codepoints) {

From 38aa08b0080c5ad7d9794b3fe413c407d2975958 Mon Sep 17 00:00:00 2001
From: HAMANO Tsukasa <hamano@osstech.co.jp>
Date: Sat, 19 Oct 2024 00:00:58 +0900
Subject: [PATCH 2/2] #6289 more efficient version, converting from a list of
 codepoints to a hashmap

---
 config/src/terminal.rs  |  3 ++-
 termwiz/src/cell.rs     | 23 ++++++++++++++++++-----
 wezterm-gui/src/main.rs |  3 ++-
 3 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/config/src/terminal.rs b/config/src/terminal.rs
index dedf4c07edf..9d96c41f141 100644
--- a/config/src/terminal.rs
+++ b/config/src/terminal.rs
@@ -3,6 +3,7 @@
 use crate::{configuration, ConfigHandle, NewlineCanon};
 use std::sync::Mutex;
 use termwiz::cell::UnicodeVersion;
+use termwiz::cell::setcellwidths;
 use wezterm_term::color::ColorPalette;
 use wezterm_term::config::BidiMode;
 
@@ -105,7 +106,7 @@ impl wezterm_term::TerminalConfiguration for TermConfig {
         UnicodeVersion {
             version: config.unicode_version,
             ambiguous_are_wide: config.treat_east_asian_ambiguous_width_as_wide,
-            cellwidths: config.cellwidths.clone()
+            cellwidths: setcellwidths(config.cellwidths.clone()),
         }
     }
 
diff --git a/termwiz/src/cell.rs b/termwiz/src/cell.rs
index 08f124d7dbd..09d6bbc6fec 100644
--- a/termwiz/src/cell.rs
+++ b/termwiz/src/cell.rs
@@ -12,6 +12,7 @@ use std::hash::{Hash, Hasher};
 use std::mem;
 use std::sync::Arc;
 use wezterm_dynamic::{FromDynamic, ToDynamic};
+use std::collections::HashMap;
 
 #[cfg_attr(feature = "use_serde", derive(Serialize, Deserialize))]
 #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
@@ -888,11 +889,25 @@ pub struct CellWidth {
     pub width: u8,
 }
 
+pub fn setcellwidths(cellwidths: Option<Vec<CellWidth>>) -> Option<HashMap<u32, u8>> {
+    if let Some(ref cellwidths) = cellwidths {
+        let mut map: HashMap<u32, u8> = HashMap::new();
+        for cellwidth in cellwidths {
+            for i in cellwidth.first..cellwidth.last+1 {
+                map.insert(i, cellwidth.width);
+            }
+        }
+        return Some(map);
+    } else {
+        return None;
+    }
+}
+
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct UnicodeVersion {
     pub version: u8,
     pub ambiguous_are_wide: bool,
-    pub cellwidths: Option<Vec<CellWidth>>,
+    pub cellwidths: Option<HashMap<u32, u8>>,
 }
 
 impl UnicodeVersion {
@@ -923,10 +938,8 @@ impl UnicodeVersion {
     #[inline]
     fn wcwidth(&self, c: char) -> usize {
         if let Some(ref cellwidths) = self.cellwidths {
-            for cellwidth in cellwidths {
-                if cellwidth.first <= c as u32 && c as u32 <= cellwidth.last {
-                    return cellwidth.width.into()
-                }
+            if let Some(width) = cellwidths.get(&(c as u32)) {
+                return (*width).into()
             }
         }
         self.width(WCWIDTH_TABLE.classify(c))
diff --git a/wezterm-gui/src/main.rs b/wezterm-gui/src/main.rs
index bbe24e2fa89..cdc0e6a2181 100644
--- a/wezterm-gui/src/main.rs
+++ b/wezterm-gui/src/main.rs
@@ -33,6 +33,7 @@ use wezterm_font::FontConfiguration;
 use wezterm_gui_subcommands::*;
 use wezterm_mux_server_impl::update_mux_domains;
 use wezterm_toast_notification::*;
+use termwiz::cell::setcellwidths;
 
 mod colorease;
 mod commands;
@@ -886,7 +887,7 @@ pub fn run_ls_fonts(config: config::ConfigHandle, cmd: &LsFontsCommand) -> anyho
     let unicode_version = UnicodeVersion {
         version: config.unicode_version,
         ambiguous_are_wide: config.treat_east_asian_ambiguous_width_as_wide,
-        cellwidths: config.cellwidths.clone(),
+        cellwidths: setcellwidths(config.cellwidths.clone()),
     };
 
     let text = match (&cmd.text, &cmd.codepoints) {