From a96cf37ce94044ee8a3204971ba0570d0ac55db6 Mon Sep 17 00:00:00 2001 From: HAMANO Tsukasa Date: Thu, 17 Oct 2024 14:15:42 +0900 Subject: [PATCH 1/2] add cellwidths option #6289 --- config/src/config.rs | 4 ++++ config/src/terminal.rs | 1 + term/src/config.rs | 1 + term/src/terminalstate/performer.rs | 4 ++-- termwiz/src/cell.rs | 33 +++++++++++++++++++++++------ termwiz/src/surface/line/line.rs | 2 +- wezterm-gui/src/main.rs | 1 + 7 files changed, 36 insertions(+), 10 deletions(-) diff --git a/config/src/config.rs b/config/src/config.rs index 113cdbfba4c..4bd0d2a6465 100644 --- a/config/src/config.rs +++ b/config/src/config.rs @@ -38,6 +38,7 @@ use std::path::{Path, PathBuf}; use std::sync::atomic::Ordering; use std::time::Duration; use termwiz::hyperlink; +use termwiz::cell::CellWidth; use termwiz::surface::CursorShape; use wezterm_bidi::ParagraphDirectionHint; use wezterm_config_derive::ConfigMeta; @@ -813,6 +814,9 @@ pub struct Config { #[dynamic(default)] pub treat_east_asian_ambiguous_width_as_wide: bool, + #[dynamic(default)] + pub cellwidths: Option>, + #[dynamic(default = "default_true")] pub allow_download_protocols: bool, diff --git a/config/src/terminal.rs b/config/src/terminal.rs index 305b4851484..dedf4c07edf 100644 --- a/config/src/terminal.rs +++ b/config/src/terminal.rs @@ -105,6 +105,7 @@ impl wezterm_term::TerminalConfiguration for TermConfig { UnicodeVersion { version: config.unicode_version, ambiguous_are_wide: config.treat_east_asian_ambiguous_width_as_wide, + cellwidths: config.cellwidths.clone() } } diff --git a/term/src/config.rs b/term/src/config.rs index b09fb43ccdd..54857473964 100644 --- a/term/src/config.rs +++ b/term/src/config.rs @@ -191,6 +191,7 @@ pub trait TerminalConfiguration: Downcast + std::fmt::Debug + Send + Sync { UnicodeVersion { version: 9, ambiguous_are_wide: false, + cellwidths: None, } } diff --git a/term/src/terminalstate/performer.rs b/term/src/terminalstate/performer.rs index 817b137baf6..43ebc367753 100644 --- a/term/src/terminalstate/performer.rs +++ b/term/src/terminalstate/performer.rs @@ -132,7 +132,7 @@ impl<'a> Performer<'a> { for g in Graphemes::new(text) { let g = self.remap_grapheme(g); - let print_width = grapheme_column_width(g, Some(self.unicode_version)); + let print_width = grapheme_column_width(g, Some(self.unicode_version.clone())); if print_width == 0 { // We got a zero-width grapheme. // We used to force them into a cell to guarantee that we @@ -816,7 +816,7 @@ impl<'a> Performer<'a> { self.unicode_version.version = n; } ITermProprietary::UnicodeVersion(ITermUnicodeVersionOp::Push(label)) => { - let vers = self.unicode_version; + let vers = self.unicode_version.clone(); self.unicode_version_stack .push(UnicodeVersionStackEntry { vers, label }); } diff --git a/termwiz/src/cell.rs b/termwiz/src/cell.rs index 0af9908ee38..08f124d7dbd 100644 --- a/termwiz/src/cell.rs +++ b/termwiz/src/cell.rs @@ -881,10 +881,18 @@ impl Cell { } } -#[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq, FromDynamic, ToDynamic)] +pub struct CellWidth { + pub first: u32, + pub last: u32, + pub width: u8, +} + +#[derive(Clone, Debug, Eq, PartialEq)] pub struct UnicodeVersion { pub version: u8, pub ambiguous_are_wide: bool, + pub cellwidths: Option>, } impl UnicodeVersion { @@ -892,6 +900,7 @@ impl UnicodeVersion { Self { version, ambiguous_are_wide: false, + cellwidths: None, } } @@ -911,6 +920,18 @@ impl UnicodeVersion { } } + #[inline] + fn wcwidth(&self, c: char) -> usize { + if let Some(ref cellwidths) = self.cellwidths { + for cellwidth in cellwidths { + if cellwidth.first <= c as u32 && c as u32 <= cellwidth.last { + return cellwidth.width.into() + } + } + } + self.width(WCWIDTH_TABLE.classify(c)) + } + #[inline] pub fn idx(&self) -> usize { (if self.version > 9 { 2 } else { 0 }) | (if self.ambiguous_are_wide { 1 } else { 0 }) @@ -920,6 +941,7 @@ impl UnicodeVersion { pub const LATEST_UNICODE_VERSION: UnicodeVersion = UnicodeVersion { version: 14, ambiguous_are_wide: false, + cellwidths: None, }; /// Returns the number of cells visually occupied by a sequence @@ -928,7 +950,7 @@ pub const LATEST_UNICODE_VERSION: UnicodeVersion = UnicodeVersion { /// and sums up the length. pub fn unicode_column_width(s: &str, version: Option) -> usize { Graphemes::new(s) - .map(|g| grapheme_column_width(g, version)) + .map(|g| grapheme_column_width(g, version.clone())) .sum() } @@ -977,13 +999,11 @@ pub fn grapheme_column_width(s: &str, version: Option) -> usize // cannot be a sequence with a variation selector, so we don't // need to requested `Presentation` for it. if s.len() == 1 { - let c = WCWIDTH_TABLE.classify(s.as_bytes()[0] as char); - return version.width(c); + return version.wcwidth(s.as_bytes()[0] as char); } // Slow path: `s.chars()` will dominate and pull up the minimum // runtime to ~20ns - if version.version >= 14 { // Lookup the grapheme to see if the presentation of // the grapheme forces the width. We can bypass @@ -999,8 +1019,7 @@ pub fn grapheme_column_width(s: &str, version: Option) -> usize // Otherwise, classify and sum up let mut width = 0; for c in s.chars() { - let c = WCWIDTH_TABLE.classify(c); - width += version.width(c); + width += version.wcwidth(c); } width.min(2) diff --git a/termwiz/src/surface/line/line.rs b/termwiz/src/surface/line/line.rs index a6f3c1a0cfc..fc8cc75809c 100644 --- a/termwiz/src/surface/line/line.rs +++ b/termwiz/src/surface/line/line.rs @@ -138,7 +138,7 @@ impl Line { let mut cells = Vec::new(); for sub in Graphemes::new(s) { - let cell = Cell::new_grapheme(sub, attrs.clone(), unicode_version); + let cell = Cell::new_grapheme(sub, attrs.clone(), unicode_version.clone()); let width = cell.width(); cells.push(cell); for _ in 1..width { diff --git a/wezterm-gui/src/main.rs b/wezterm-gui/src/main.rs index 4d2829aa4c6..bbe24e2fa89 100644 --- a/wezterm-gui/src/main.rs +++ b/wezterm-gui/src/main.rs @@ -886,6 +886,7 @@ pub fn run_ls_fonts(config: config::ConfigHandle, cmd: &LsFontsCommand) -> anyho let unicode_version = UnicodeVersion { version: config.unicode_version, ambiguous_are_wide: config.treat_east_asian_ambiguous_width_as_wide, + cellwidths: config.cellwidths.clone(), }; let text = match (&cmd.text, &cmd.codepoints) { From 38aa08b0080c5ad7d9794b3fe413c407d2975958 Mon Sep 17 00:00:00 2001 From: HAMANO Tsukasa Date: Sat, 19 Oct 2024 00:00:58 +0900 Subject: [PATCH 2/2] #6289 more efficient version, converting from a list of codepoints to a hashmap --- config/src/terminal.rs | 3 ++- termwiz/src/cell.rs | 23 ++++++++++++++++++----- wezterm-gui/src/main.rs | 3 ++- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/config/src/terminal.rs b/config/src/terminal.rs index dedf4c07edf..9d96c41f141 100644 --- a/config/src/terminal.rs +++ b/config/src/terminal.rs @@ -3,6 +3,7 @@ use crate::{configuration, ConfigHandle, NewlineCanon}; use std::sync::Mutex; use termwiz::cell::UnicodeVersion; +use termwiz::cell::setcellwidths; use wezterm_term::color::ColorPalette; use wezterm_term::config::BidiMode; @@ -105,7 +106,7 @@ impl wezterm_term::TerminalConfiguration for TermConfig { UnicodeVersion { version: config.unicode_version, ambiguous_are_wide: config.treat_east_asian_ambiguous_width_as_wide, - cellwidths: config.cellwidths.clone() + cellwidths: setcellwidths(config.cellwidths.clone()), } } diff --git a/termwiz/src/cell.rs b/termwiz/src/cell.rs index 08f124d7dbd..09d6bbc6fec 100644 --- a/termwiz/src/cell.rs +++ b/termwiz/src/cell.rs @@ -12,6 +12,7 @@ use std::hash::{Hash, Hasher}; use std::mem; use std::sync::Arc; use wezterm_dynamic::{FromDynamic, ToDynamic}; +use std::collections::HashMap; #[cfg_attr(feature = "use_serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] @@ -888,11 +889,25 @@ pub struct CellWidth { pub width: u8, } +pub fn setcellwidths(cellwidths: Option>) -> Option> { + if let Some(ref cellwidths) = cellwidths { + let mut map: HashMap = HashMap::new(); + for cellwidth in cellwidths { + for i in cellwidth.first..cellwidth.last+1 { + map.insert(i, cellwidth.width); + } + } + return Some(map); + } else { + return None; + } +} + #[derive(Clone, Debug, Eq, PartialEq)] pub struct UnicodeVersion { pub version: u8, pub ambiguous_are_wide: bool, - pub cellwidths: Option>, + pub cellwidths: Option>, } impl UnicodeVersion { @@ -923,10 +938,8 @@ impl UnicodeVersion { #[inline] fn wcwidth(&self, c: char) -> usize { if let Some(ref cellwidths) = self.cellwidths { - for cellwidth in cellwidths { - if cellwidth.first <= c as u32 && c as u32 <= cellwidth.last { - return cellwidth.width.into() - } + if let Some(width) = cellwidths.get(&(c as u32)) { + return (*width).into() } } self.width(WCWIDTH_TABLE.classify(c)) diff --git a/wezterm-gui/src/main.rs b/wezterm-gui/src/main.rs index bbe24e2fa89..cdc0e6a2181 100644 --- a/wezterm-gui/src/main.rs +++ b/wezterm-gui/src/main.rs @@ -33,6 +33,7 @@ use wezterm_font::FontConfiguration; use wezterm_gui_subcommands::*; use wezterm_mux_server_impl::update_mux_domains; use wezterm_toast_notification::*; +use termwiz::cell::setcellwidths; mod colorease; mod commands; @@ -886,7 +887,7 @@ pub fn run_ls_fonts(config: config::ConfigHandle, cmd: &LsFontsCommand) -> anyho let unicode_version = UnicodeVersion { version: config.unicode_version, ambiguous_are_wide: config.treat_east_asian_ambiguous_width_as_wide, - cellwidths: config.cellwidths.clone(), + cellwidths: setcellwidths(config.cellwidths.clone()), }; let text = match (&cmd.text, &cmd.codepoints) {