Skip to content

Commit

Permalink
fix: count positions as utf-16 code units
Browse files Browse the repository at this point in the history
resolves #22
  • Loading branch information
tekumara committed Dec 26, 2023
1 parent b7b3bd3 commit de52345
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 10 deletions.
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion crates/typos-lsp/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ serde = { version = "1.0", features = ["derive"] }
ignore = "0.4.20"
matchit = "0.7.1"
shellexpand = "3.1.0"
unicode-segmentation = "1.10.1"
regex = "1.10.2"
once_cell = "1.19.0"

Expand Down
10 changes: 7 additions & 3 deletions crates/typos-lsp/src/lsp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,8 @@ impl LanguageServer for Backend<'static, 'static> {

Ok(InitializeResult {
capabilities: ServerCapabilities {
// only support UTF-16 positions for now, which is the default when unspecified
position_encoding: Some(PositionEncodingKind::UTF16),
text_document_sync: Some(TextDocumentSyncCapability::Kind(
// TODO: should we support incremental?
TextDocumentSyncKind::FULL,
Expand Down Expand Up @@ -459,9 +461,11 @@ impl AccumulatePosition {
.unwrap_or(0);

let before_typo = String::from_utf8_lossy(&buffer[line_start..byte_offset]);
let line_pos =
unicode_segmentation::UnicodeSegmentation::graphemes(before_typo.as_ref(), true)
.count();

// count UTF-16 code units as per
// https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocuments
// UTF-16 is the only position encoding we support for now
let line_pos = before_typo.chars().map(char::len_utf16).sum();

self.line_num = line_num;
self.line_pos = line_pos;
Expand Down
17 changes: 12 additions & 5 deletions crates/typos-lsp/tests/integration_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ async fn test_initialize_e2e() {
"codeActionKinds": ["quickfix"],
"workDoneProgress": false
},
"positionEncoding": "utf-16",
"textDocumentSync": 1,
"workspace": {
"workspaceFolders": { "changeNotifications": true, "supported": true }
Expand Down Expand Up @@ -218,17 +219,23 @@ async fn test_custom_config_file() {
}

#[test_log::test(tokio::test)]
async fn test_unicode_diagnostics() {
let did_open = &did_open("¿Qué hace él?");

async fn test_position_with_unicode_text() {
let mut server = TestServer::new();
let _ = server.request(&initialize()).await;

// start position should count graphemes with multiple code points as one visible character
// ¿ and é are two-byte code points in utf-8
let unicode_text = &did_open("¿Qué hace él?");
similar_asserts::assert_eq!(
server.request(&did_open).await,
server.request(&unicode_text).await,
publish_diagnostics(&[diag("`hace` should be `have`", 0, 5, 9)])
);

// ẽ has two code points U+0065 U+0303 (latin small letter e, combining tilde)
let unicode_text = &did_open("ẽ hace");
similar_asserts::assert_eq!(
server.request(&unicode_text).await,
publish_diagnostics(&[diag("`hace` should be `have`", 0, 3, 7)])
);
}

fn initialize() -> String {
Expand Down

0 comments on commit de52345

Please sign in to comment.