From b89adf07d498b0efb1237bb9d95afb8fc3566a9e Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Wed, 29 Jan 2025 17:22:13 +0100 Subject: [PATCH 1/3] Fix #19071: ensure `completion_item_hash` serializes items uniquely Previously it may have been possible for different completion items to produce colliding hashes, not because of the hash but because of how the items were serialized into byte streams for hashing. See #19071 for details. The chances of that happening were low, if it was actually possible at all. Nevertheless, this commit ensures that it definitely can't happen. This commit uses a handful of techniques used to fix this, but they all boil down to "ensure this could be re-parsed". If it's possible to parse to recreate the original item, then by construction there is no chance of two different items getting serialized to identical byte streams. --- crates/rust-analyzer/src/lib.rs | 74 ++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 24 deletions(-) diff --git a/crates/rust-analyzer/src/lib.rs b/crates/rust-analyzer/src/lib.rs index ccffa7a671e6..98ba8ab3af02 100644 --- a/crates/rust-analyzer/src/lib.rs +++ b/crates/rust-analyzer/src/lib.rs @@ -79,32 +79,34 @@ fn completion_item_hash(item: &CompletionItem, is_ref_completion: bool) -> [u8; u8::from(relevance.requires_import), u8::from(relevance.is_private_editable), ]); - if let Some(type_match) = &relevance.type_match { - let label = match type_match { - CompletionRelevanceTypeMatch::CouldUnify => "could_unify", - CompletionRelevanceTypeMatch::Exact => "exact", - }; - hasher.update(label); + + match relevance.type_match { + None => hasher.update([0u8]), + Some(CompletionRelevanceTypeMatch::CouldUnify) => hasher.update([1u8]), + Some(CompletionRelevanceTypeMatch::Exact) => hasher.update([2u8]), } + + hasher.update([u8::from(relevance.trait_.is_some())]); if let Some(trait_) = &relevance.trait_ { hasher.update([u8::from(trait_.is_op_method), u8::from(trait_.notable_trait)]); } - if let Some(postfix_match) = &relevance.postfix_match { - let label = match postfix_match { - CompletionRelevancePostfixMatch::NonExact => "non_exact", - CompletionRelevancePostfixMatch::Exact => "exact", - }; - hasher.update(label); + + match relevance.postfix_match { + None => hasher.update([0u8]), + Some(CompletionRelevancePostfixMatch::NonExact) => hasher.update([1u8]), + Some(CompletionRelevancePostfixMatch::Exact) => hasher.update([2u8]), } + + hasher.update([u8::from(relevance.function.is_some())]); if let Some(function) = &relevance.function { hasher.update([u8::from(function.has_params), u8::from(function.has_self_param)]); - let label = match function.return_type { - CompletionRelevanceReturnType::Other => "other", - CompletionRelevanceReturnType::DirectConstructor => "direct_constructor", - CompletionRelevanceReturnType::Constructor => "constructor", - CompletionRelevanceReturnType::Builder => "builder", + let discriminant: u8 = match function.return_type { + CompletionRelevanceReturnType::Other => 0, + CompletionRelevanceReturnType::DirectConstructor => 1, + CompletionRelevanceReturnType::Constructor => 2, + CompletionRelevanceReturnType::Builder => 3, }; - hasher.update(label); + hasher.update([discriminant]); } } @@ -115,35 +117,59 @@ fn completion_item_hash(item: &CompletionItem, is_ref_completion: bool) -> [u8; u8::from(item.deprecated), u8::from(item.trigger_call_info), ]); + + hasher.update(item.label.primary.len().to_le_bytes()); hasher.update(&item.label.primary); + + hasher.update([u8::from(item.label.detail_left.is_some())]); if let Some(label_detail) = &item.label.detail_left { + hasher.update(label_detail.len().to_le_bytes()); hasher.update(label_detail); } + + hasher.update([u8::from(item.label.detail_right.is_some())]); if let Some(label_detail) = &item.label.detail_right { + hasher.update(label_detail.len().to_le_bytes()); hasher.update(label_detail); } + // NB: do not hash edits or source range, as those may change between the time the client sends the resolve request // and the time it receives it: some editors do allow changing the buffer between that, leading to ranges being different. // // Documentation hashing is skipped too, as it's a large blob to process, // while not really making completion properties more unique as they are already. - hasher.update(item.kind.tag()); + + let kind_tag = item.kind.tag(); + hasher.update(kind_tag.len().to_le_bytes()); + hasher.update(kind_tag); + + hasher.update(item.lookup.len().to_le_bytes()); hasher.update(&item.lookup); + + hasher.update([u8::from(item.detail.is_some())]); if let Some(detail) = &item.detail { + hasher.update(detail.len().to_le_bytes()); hasher.update(detail); } + hash_completion_relevance(&mut hasher, &item.relevance); + + hasher.update([u8::from(item.ref_match.is_some())]); if let Some((ref_mode, text_size)) = &item.ref_match { - let prefix = match ref_mode { - CompletionItemRefMode::Reference(Mutability::Shared) => "&", - CompletionItemRefMode::Reference(Mutability::Mut) => "&mut ", - CompletionItemRefMode::Dereference => "*", + let descriminant = match ref_mode { + CompletionItemRefMode::Reference(Mutability::Shared) => 0u8, + CompletionItemRefMode::Reference(Mutability::Mut) => 1u8, + CompletionItemRefMode::Dereference => 2u8, }; - hasher.update(prefix); + hasher.update([descriminant]); hasher.update(u32::from(*text_size).to_le_bytes()); } + + hasher.update(item.import_to_add.len().to_le_bytes()); for import_path in &item.import_to_add { + hasher.update(import_path.len().to_le_bytes()); hasher.update(import_path); } + hasher.finalize() } From 87fb27930a8273a58e5a84f05da0ff6bf184e309 Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Wed, 29 Jan 2025 17:47:38 +0100 Subject: [PATCH 2/3] Fix typo --- crates/rust-analyzer/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/rust-analyzer/src/lib.rs b/crates/rust-analyzer/src/lib.rs index 98ba8ab3af02..0b459c10cee7 100644 --- a/crates/rust-analyzer/src/lib.rs +++ b/crates/rust-analyzer/src/lib.rs @@ -156,12 +156,12 @@ fn completion_item_hash(item: &CompletionItem, is_ref_completion: bool) -> [u8; hasher.update([u8::from(item.ref_match.is_some())]); if let Some((ref_mode, text_size)) = &item.ref_match { - let descriminant = match ref_mode { + let discriminant = match ref_mode { CompletionItemRefMode::Reference(Mutability::Shared) => 0u8, CompletionItemRefMode::Reference(Mutability::Mut) => 1u8, CompletionItemRefMode::Dereference => 2u8, }; - hasher.update([descriminant]); + hasher.update([discriminant]); hasher.update(u32::from(*text_size).to_le_bytes()); } From 88d66a8590cc6c3505c73cd9b52fc516e6a7dd7e Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Wed, 29 Jan 2025 20:21:57 +0100 Subject: [PATCH 3/3] Use `to_ne_bytes` instead of `to_le_bytes` --- crates/rust-analyzer/src/lib.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/crates/rust-analyzer/src/lib.rs b/crates/rust-analyzer/src/lib.rs index 0b459c10cee7..c2ef03ecae8d 100644 --- a/crates/rust-analyzer/src/lib.rs +++ b/crates/rust-analyzer/src/lib.rs @@ -118,18 +118,18 @@ fn completion_item_hash(item: &CompletionItem, is_ref_completion: bool) -> [u8; u8::from(item.trigger_call_info), ]); - hasher.update(item.label.primary.len().to_le_bytes()); + hasher.update(item.label.primary.len().to_ne_bytes()); hasher.update(&item.label.primary); hasher.update([u8::from(item.label.detail_left.is_some())]); if let Some(label_detail) = &item.label.detail_left { - hasher.update(label_detail.len().to_le_bytes()); + hasher.update(label_detail.len().to_ne_bytes()); hasher.update(label_detail); } hasher.update([u8::from(item.label.detail_right.is_some())]); if let Some(label_detail) = &item.label.detail_right { - hasher.update(label_detail.len().to_le_bytes()); + hasher.update(label_detail.len().to_ne_bytes()); hasher.update(label_detail); } @@ -140,15 +140,15 @@ fn completion_item_hash(item: &CompletionItem, is_ref_completion: bool) -> [u8; // while not really making completion properties more unique as they are already. let kind_tag = item.kind.tag(); - hasher.update(kind_tag.len().to_le_bytes()); + hasher.update(kind_tag.len().to_ne_bytes()); hasher.update(kind_tag); - hasher.update(item.lookup.len().to_le_bytes()); + hasher.update(item.lookup.len().to_ne_bytes()); hasher.update(&item.lookup); hasher.update([u8::from(item.detail.is_some())]); if let Some(detail) = &item.detail { - hasher.update(detail.len().to_le_bytes()); + hasher.update(detail.len().to_ne_bytes()); hasher.update(detail); } @@ -162,12 +162,12 @@ fn completion_item_hash(item: &CompletionItem, is_ref_completion: bool) -> [u8; CompletionItemRefMode::Dereference => 2u8, }; hasher.update([discriminant]); - hasher.update(u32::from(*text_size).to_le_bytes()); + hasher.update(u32::from(*text_size).to_ne_bytes()); } - hasher.update(item.import_to_add.len().to_le_bytes()); + hasher.update(item.import_to_add.len().to_ne_bytes()); for import_path in &item.import_to_add { - hasher.update(import_path.len().to_le_bytes()); + hasher.update(import_path.len().to_ne_bytes()); hasher.update(import_path); }