From b89adf07d498b0efb1237bb9d95afb8fc3566a9e Mon Sep 17 00:00:00 2001
From: Nathan Vegdahl <cessen@cessen.com>
Date: Wed, 29 Jan 2025 17:22:13 +0100
Subject: [PATCH 1/3] Fix #19071: ensure `completion_item_hash` serializes
 items uniquely

Previously it may have been possible for different completion items to
produce colliding hashes, not because of the hash but because of how
the items were serialized into byte streams for hashing. See #19071
for details.

The chances of that happening were low, if it was actually possible at
all. Nevertheless, this commit ensures that it definitely can't happen.

This commit uses a handful of techniques used to fix this, but they all
boil down to "ensure this could be re-parsed". If it's possible to parse
to recreate the original item, then by construction there is no chance
of two different items getting serialized to identical byte streams.
---
 crates/rust-analyzer/src/lib.rs | 74 ++++++++++++++++++++++-----------
 1 file changed, 50 insertions(+), 24 deletions(-)

diff --git a/crates/rust-analyzer/src/lib.rs b/crates/rust-analyzer/src/lib.rs
index ccffa7a671e6..98ba8ab3af02 100644
--- a/crates/rust-analyzer/src/lib.rs
+++ b/crates/rust-analyzer/src/lib.rs
@@ -79,32 +79,34 @@ fn completion_item_hash(item: &CompletionItem, is_ref_completion: bool) -> [u8;
             u8::from(relevance.requires_import),
             u8::from(relevance.is_private_editable),
         ]);
-        if let Some(type_match) = &relevance.type_match {
-            let label = match type_match {
-                CompletionRelevanceTypeMatch::CouldUnify => "could_unify",
-                CompletionRelevanceTypeMatch::Exact => "exact",
-            };
-            hasher.update(label);
+
+        match relevance.type_match {
+            None => hasher.update([0u8]),
+            Some(CompletionRelevanceTypeMatch::CouldUnify) => hasher.update([1u8]),
+            Some(CompletionRelevanceTypeMatch::Exact) => hasher.update([2u8]),
         }
+
+        hasher.update([u8::from(relevance.trait_.is_some())]);
         if let Some(trait_) = &relevance.trait_ {
             hasher.update([u8::from(trait_.is_op_method), u8::from(trait_.notable_trait)]);
         }
-        if let Some(postfix_match) = &relevance.postfix_match {
-            let label = match postfix_match {
-                CompletionRelevancePostfixMatch::NonExact => "non_exact",
-                CompletionRelevancePostfixMatch::Exact => "exact",
-            };
-            hasher.update(label);
+
+        match relevance.postfix_match {
+            None => hasher.update([0u8]),
+            Some(CompletionRelevancePostfixMatch::NonExact) => hasher.update([1u8]),
+            Some(CompletionRelevancePostfixMatch::Exact) => hasher.update([2u8]),
         }
+
+        hasher.update([u8::from(relevance.function.is_some())]);
         if let Some(function) = &relevance.function {
             hasher.update([u8::from(function.has_params), u8::from(function.has_self_param)]);
-            let label = match function.return_type {
-                CompletionRelevanceReturnType::Other => "other",
-                CompletionRelevanceReturnType::DirectConstructor => "direct_constructor",
-                CompletionRelevanceReturnType::Constructor => "constructor",
-                CompletionRelevanceReturnType::Builder => "builder",
+            let discriminant: u8 = match function.return_type {
+                CompletionRelevanceReturnType::Other => 0,
+                CompletionRelevanceReturnType::DirectConstructor => 1,
+                CompletionRelevanceReturnType::Constructor => 2,
+                CompletionRelevanceReturnType::Builder => 3,
             };
-            hasher.update(label);
+            hasher.update([discriminant]);
         }
     }
 
@@ -115,35 +117,59 @@ fn completion_item_hash(item: &CompletionItem, is_ref_completion: bool) -> [u8;
         u8::from(item.deprecated),
         u8::from(item.trigger_call_info),
     ]);
+
+    hasher.update(item.label.primary.len().to_le_bytes());
     hasher.update(&item.label.primary);
+
+    hasher.update([u8::from(item.label.detail_left.is_some())]);
     if let Some(label_detail) = &item.label.detail_left {
+        hasher.update(label_detail.len().to_le_bytes());
         hasher.update(label_detail);
     }
+
+    hasher.update([u8::from(item.label.detail_right.is_some())]);
     if let Some(label_detail) = &item.label.detail_right {
+        hasher.update(label_detail.len().to_le_bytes());
         hasher.update(label_detail);
     }
+
     // NB: do not hash edits or source range, as those may change between the time the client sends the resolve request
     // and the time it receives it: some editors do allow changing the buffer between that, leading to ranges being different.
     //
     // Documentation hashing is skipped too, as it's a large blob to process,
     // while not really making completion properties more unique as they are already.
-    hasher.update(item.kind.tag());
+
+    let kind_tag = item.kind.tag();
+    hasher.update(kind_tag.len().to_le_bytes());
+    hasher.update(kind_tag);
+
+    hasher.update(item.lookup.len().to_le_bytes());
     hasher.update(&item.lookup);
+
+    hasher.update([u8::from(item.detail.is_some())]);
     if let Some(detail) = &item.detail {
+        hasher.update(detail.len().to_le_bytes());
         hasher.update(detail);
     }
+
     hash_completion_relevance(&mut hasher, &item.relevance);
+
+    hasher.update([u8::from(item.ref_match.is_some())]);
     if let Some((ref_mode, text_size)) = &item.ref_match {
-        let prefix = match ref_mode {
-            CompletionItemRefMode::Reference(Mutability::Shared) => "&",
-            CompletionItemRefMode::Reference(Mutability::Mut) => "&mut ",
-            CompletionItemRefMode::Dereference => "*",
+        let descriminant = match ref_mode {
+            CompletionItemRefMode::Reference(Mutability::Shared) => 0u8,
+            CompletionItemRefMode::Reference(Mutability::Mut) => 1u8,
+            CompletionItemRefMode::Dereference => 2u8,
         };
-        hasher.update(prefix);
+        hasher.update([descriminant]);
         hasher.update(u32::from(*text_size).to_le_bytes());
     }
+
+    hasher.update(item.import_to_add.len().to_le_bytes());
     for import_path in &item.import_to_add {
+        hasher.update(import_path.len().to_le_bytes());
         hasher.update(import_path);
     }
+
     hasher.finalize()
 }

From 87fb27930a8273a58e5a84f05da0ff6bf184e309 Mon Sep 17 00:00:00 2001
From: Nathan Vegdahl <cessen@cessen.com>
Date: Wed, 29 Jan 2025 17:47:38 +0100
Subject: [PATCH 2/3] Fix typo

---
 crates/rust-analyzer/src/lib.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crates/rust-analyzer/src/lib.rs b/crates/rust-analyzer/src/lib.rs
index 98ba8ab3af02..0b459c10cee7 100644
--- a/crates/rust-analyzer/src/lib.rs
+++ b/crates/rust-analyzer/src/lib.rs
@@ -156,12 +156,12 @@ fn completion_item_hash(item: &CompletionItem, is_ref_completion: bool) -> [u8;
 
     hasher.update([u8::from(item.ref_match.is_some())]);
     if let Some((ref_mode, text_size)) = &item.ref_match {
-        let descriminant = match ref_mode {
+        let discriminant = match ref_mode {
             CompletionItemRefMode::Reference(Mutability::Shared) => 0u8,
             CompletionItemRefMode::Reference(Mutability::Mut) => 1u8,
             CompletionItemRefMode::Dereference => 2u8,
         };
-        hasher.update([descriminant]);
+        hasher.update([discriminant]);
         hasher.update(u32::from(*text_size).to_le_bytes());
     }
 

From 88d66a8590cc6c3505c73cd9b52fc516e6a7dd7e Mon Sep 17 00:00:00 2001
From: Nathan Vegdahl <cessen@cessen.com>
Date: Wed, 29 Jan 2025 20:21:57 +0100
Subject: [PATCH 3/3] Use `to_ne_bytes` instead of `to_le_bytes`

---
 crates/rust-analyzer/src/lib.rs | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/crates/rust-analyzer/src/lib.rs b/crates/rust-analyzer/src/lib.rs
index 0b459c10cee7..c2ef03ecae8d 100644
--- a/crates/rust-analyzer/src/lib.rs
+++ b/crates/rust-analyzer/src/lib.rs
@@ -118,18 +118,18 @@ fn completion_item_hash(item: &CompletionItem, is_ref_completion: bool) -> [u8;
         u8::from(item.trigger_call_info),
     ]);
 
-    hasher.update(item.label.primary.len().to_le_bytes());
+    hasher.update(item.label.primary.len().to_ne_bytes());
     hasher.update(&item.label.primary);
 
     hasher.update([u8::from(item.label.detail_left.is_some())]);
     if let Some(label_detail) = &item.label.detail_left {
-        hasher.update(label_detail.len().to_le_bytes());
+        hasher.update(label_detail.len().to_ne_bytes());
         hasher.update(label_detail);
     }
 
     hasher.update([u8::from(item.label.detail_right.is_some())]);
     if let Some(label_detail) = &item.label.detail_right {
-        hasher.update(label_detail.len().to_le_bytes());
+        hasher.update(label_detail.len().to_ne_bytes());
         hasher.update(label_detail);
     }
 
@@ -140,15 +140,15 @@ fn completion_item_hash(item: &CompletionItem, is_ref_completion: bool) -> [u8;
     // while not really making completion properties more unique as they are already.
 
     let kind_tag = item.kind.tag();
-    hasher.update(kind_tag.len().to_le_bytes());
+    hasher.update(kind_tag.len().to_ne_bytes());
     hasher.update(kind_tag);
 
-    hasher.update(item.lookup.len().to_le_bytes());
+    hasher.update(item.lookup.len().to_ne_bytes());
     hasher.update(&item.lookup);
 
     hasher.update([u8::from(item.detail.is_some())]);
     if let Some(detail) = &item.detail {
-        hasher.update(detail.len().to_le_bytes());
+        hasher.update(detail.len().to_ne_bytes());
         hasher.update(detail);
     }
 
@@ -162,12 +162,12 @@ fn completion_item_hash(item: &CompletionItem, is_ref_completion: bool) -> [u8;
             CompletionItemRefMode::Dereference => 2u8,
         };
         hasher.update([discriminant]);
-        hasher.update(u32::from(*text_size).to_le_bytes());
+        hasher.update(u32::from(*text_size).to_ne_bytes());
     }
 
-    hasher.update(item.import_to_add.len().to_le_bytes());
+    hasher.update(item.import_to_add.len().to_ne_bytes());
     for import_path in &item.import_to_add {
-        hasher.update(import_path.len().to_le_bytes());
+        hasher.update(import_path.len().to_ne_bytes());
         hasher.update(import_path);
     }