diff --git a/Cargo.lock b/Cargo.lock index 88b838f..64cba63 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "anstream" @@ -116,7 +116,7 @@ checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" [[package]] name = "ccs" -version = "0.9.0" +version = "0.9.1" dependencies = [ "anstream 0.2.6", "anyhow", @@ -126,6 +126,7 @@ dependencies = [ "owo-colors", "serde", "serde_json", + "str_indices", "toml", ] @@ -433,6 +434,12 @@ dependencies = [ "serde", ] +[[package]] +name = "str_indices" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d08889ec5408683408db66ad89e0e1f93dff55c73a4ccc71c427d5b277ee47e6" + [[package]] name = "strsim" version = "0.11.1" diff --git a/Cargo.toml b/Cargo.toml index e620d9f..360911a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ccs" -version = "0.9.0" +version = "0.9.1" edition = "2021" license = "0BSD" repository = "https://github.com/museun/ccs" @@ -14,4 +14,5 @@ indoc = "2.0.5" owo-colors = { version = "3.5.0", features = [ "supports-colors" ] } serde = { version = "1.0.213", features = [ "derive" ] } serde_json = "1.0.132" +str_indices = "0.4.4" toml = "0.7.8" diff --git a/src/parse/span.rs b/src/parse/span.rs index 2d67db9..dc9e827 100644 --- a/src/parse/span.rs +++ b/src/parse/span.rs @@ -22,9 +22,13 @@ impl Span { if matches!(render_options.render, RenderStyle::Full) { use owo_colors::OwoColorize as _; self.relocate().try_for_each(|(start, end, text)| { + let start = floor_char_boundary(text, start); + let end = ceil_char_boundary(text, end); + let head = &text[..start]; let mid = &text[start..end]; let tail = &text[end..]; + writeln!( out, " {head}{mid}{tail}", @@ -68,14 +72,56 @@ impl Span { left_pad = span.text.len() - s.len(); } + let start = span.highlight_start.saturating_sub(left_pad + 1); + let end = span.highlight_end.saturating_sub(left_pad + 1); + + let start = str_indices::chars::from_byte_idx(&span.text, start); + let end = str_indices::chars::from_byte_idx(&span.text, end); + // error messages are 1 indexed break Some(( - span.highlight_start.saturating_sub(left_pad + 1), - span.highlight_end.saturating_sub(left_pad + 1), + start, + end, // TODO use unicode-segmentation here + // what does this mean? how would segmentation be applicable here? &span.text[left_pad..], )); } }) } } + +// NOTE this is taken from +// TODO its currently unstable but its fine for what we need +fn floor_char_boundary(str: &str, index: usize) -> usize { + if index >= str.len() { + str.len() + } else { + let lower_bound = index.saturating_sub(3); + let new_index = str.as_bytes()[lower_bound..=index] + .iter() + .rposition(|&b| is_utf8_char_boundary(b)); + + lower_bound + new_index.unwrap() + } +} + +// NOTE this is taken from +// TODO its currently unstable but its fine for what we need +fn ceil_char_boundary(str: &str, index: usize) -> usize { + if index > str.len() { + str.len() + } else { + let upper_bound = Ord::min(index + 4, str.len()); + str.as_bytes()[index..upper_bound] + .iter() + .position(|&b| is_utf8_char_boundary(b)) + .map_or(upper_bound, |pos| pos + index) + } +} + +// NOTE impl detail of `u8::is_utf8_char_boundary` used by `floor_char_boundary` and `ceil_char_boundary` +const fn is_utf8_char_boundary(byte: u8) -> bool { + // This is bit magic equivalent to: b < 128 || b >= 192 + (byte as i8) >= -0x40 +} diff --git a/src/parse/text.rs b/src/parse/text.rs index 957e79f..82e5a12 100644 --- a/src/parse/text.rs +++ b/src/parse/text.rs @@ -1,6 +1,6 @@ #[derive(Debug, serde::Deserialize)] pub struct Text { - pub highlight_start: usize, - pub highlight_end: usize, + pub highlight_start: usize, // are these byte or grapheme indices? + pub highlight_end: usize, // are these byte or grapheme indices? pub text: String, }