From 25c2ad15e596714df1f5ac6756aadfdc6adf1051 Mon Sep 17 00:00:00 2001
From: Sanket Kedia <sanket@trychroma.com>
Date: Tue, 22 Apr 2025 13:05:53 -0700
Subject: [PATCH 1/5] [ENH] Turn on spann by default

---
 rust/frontend/sample_configs/distributed.yaml | 2 +-
 rust/frontend/sample_configs/tilt_config.yaml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/rust/frontend/sample_configs/distributed.yaml b/rust/frontend/sample_configs/distributed.yaml
index ee42bffe86f..c1a3826ed39 100644
--- a/rust/frontend/sample_configs/distributed.yaml
+++ b/rust/frontend/sample_configs/distributed.yaml
@@ -46,4 +46,4 @@ scorecard:
       - "collection_id:*"
     score: 100
 enable_span_indexing: true
-default_knn_index: "hnsw"
+default_knn_index: "spann"
diff --git a/rust/frontend/sample_configs/tilt_config.yaml b/rust/frontend/sample_configs/tilt_config.yaml
index 520bfd87bf9..9530ecfd521 100644
--- a/rust/frontend/sample_configs/tilt_config.yaml
+++ b/rust/frontend/sample_configs/tilt_config.yaml
@@ -54,4 +54,4 @@ scorecard:
 circuit_breaker:
   requests: 1000
 enable_span_indexing: true
-default_knn_index: "hnsw"
+default_knn_index: "spann"

From 914d366278929717f0c1f9223a9791acbba7f9ae Mon Sep 17 00:00:00 2001
From: Sanket Kedia <sanket@trychroma.com>
Date: Thu, 24 Apr 2025 20:31:36 -0700
Subject: [PATCH 2/5] Take lock before doing hnsw.open()

---
 rust/index/src/spann/types.rs | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/rust/index/src/spann/types.rs b/rust/index/src/spann/types.rs
index 3b9f753fd91..9349e1f56f2 100644
--- a/rust/index/src/spann/types.rs
+++ b/rust/index/src/spann/types.rs
@@ -2022,6 +2022,11 @@ impl<'me> SpannIndexReader<'me> {
         dimensionality: usize,
         ef_search: usize,
     ) -> Result<HnswIndexRef, SpannIndexReaderError> {
+        // We take a lock here to synchronize concurrent open of the same index.
+        // Otherwise, we could end up with a corrupted index since the filesystem
+        // operations are not guaranteed to be atomic.
+        // The lock is a partitioned mutex to allow for higher concurrency across collections.
+        let _guard = hnsw_provider.write_mutex.lock(id).await;
         match hnsw_provider.get(id, cache_key).await {
             Some(index) => Ok(index),
             None => {

From 49ad1f5b90e66e59665b742a0385609b5f78fd84 Mon Sep 17 00:00:00 2001
From: Sanket Kedia <sanket@trychroma.com>
Date: Thu, 24 Apr 2025 20:34:49 -0700
Subject: [PATCH 3/5] fix rust test

---
 rust/frontend/src/impls/service_based_frontend.rs | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/rust/frontend/src/impls/service_based_frontend.rs b/rust/frontend/src/impls/service_based_frontend.rs
index 4d79e258b20..5c269164fa9 100644
--- a/rust/frontend/src/impls/service_based_frontend.rs
+++ b/rust/frontend/src/impls/service_based_frontend.rs
@@ -1438,9 +1438,13 @@ mod tests {
         assert!(segments.iter().any(
             |s| s.r#type == SegmentType::BlockfileMetadata && s.scope == SegmentScope::METADATA
         ));
-        assert!(segments
-            .iter()
-            .any(|s| s.r#type == SegmentType::HnswDistributed && s.scope == SegmentScope::VECTOR));
+        assert!(
+            segments.iter().any(
+                |s| s.r#type == SegmentType::HnswDistributed && s.scope == SegmentScope::VECTOR
+            ) || segments
+                .iter()
+                .any(|s| s.r#type == SegmentType::Spann && s.scope == SegmentScope::VECTOR)
+        );
         assert!(segments
             .iter()
             .any(|s| s.r#type == SegmentType::BlockfileRecord && s.scope == SegmentScope::RECORD));

From d05268fa2113d7dfb389290804e031b64d906a1d Mon Sep 17 00:00:00 2001
From: Sanket Kedia <sanket@trychroma.com>
Date: Thu, 24 Apr 2025 22:42:15 -0700
Subject: [PATCH 4/5] fix test

---
 chromadb/test/property/invariants.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py
index c48c5abd832..761ad3707ea 100644
--- a/chromadb/test/property/invariants.py
+++ b/chromadb/test/property/invariants.py
@@ -246,16 +246,19 @@ def fd_not_exceeding_threadpool_size(threadpool_size: int) -> None:
     )
 
 def get_space(collection: Collection):
+    # TODO: this is a hack to get the space
+    # We should update the tests to not pass space via metadata instead use collection
+    # configuration_json
+    space = None
     if "hnsw:space" in collection.metadata:
-        return collection.metadata["hnsw:space"]
+        space = collection.metadata["hnsw:space"]
     if collection._model.configuration_json is None:
-        return None
+        return space
     if 'spann' in collection._model.configuration_json and collection._model.configuration_json.get('spann') is not None and 'space' in collection._model.configuration_json.get('spann'):
-        return collection._model.configuration_json.get('spann').get('space')
+        space = collection._model.configuration_json.get('spann').get('space')
     elif 'hnsw' in collection._model.configuration_json and collection._model.configuration_json.get('hnsw') is not None and 'space' in collection._model.configuration_json.get('hnsw'):
-        return collection._model.configuration_json.get('hnsw').get('space')
-    else:
-        return None
+        space = collection._model.configuration_json.get('hnsw').get('space')
+    return space
 
 def ann_accuracy(
     collection: Collection,

From 7d79e57409e732c72e49dee68220543af0985d13 Mon Sep 17 00:00:00 2001
From: Sanket Kedia <sanket@trychroma.com>
Date: Thu, 24 Apr 2025 23:11:23 -0700
Subject: [PATCH 5/5] Modify get_space

---
 chromadb/test/property/invariants.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py
index 761ad3707ea..58b780c80d1 100644
--- a/chromadb/test/property/invariants.py
+++ b/chromadb/test/property/invariants.py
@@ -257,7 +257,8 @@ def get_space(collection: Collection):
     if 'spann' in collection._model.configuration_json and collection._model.configuration_json.get('spann') is not None and 'space' in collection._model.configuration_json.get('spann'):
         space = collection._model.configuration_json.get('spann').get('space')
     elif 'hnsw' in collection._model.configuration_json and collection._model.configuration_json.get('hnsw') is not None and 'space' in collection._model.configuration_json.get('hnsw'):
-        space = collection._model.configuration_json.get('hnsw').get('space')
+        if space is None:
+            space = collection._model.configuration_json.get('hnsw').get('space')
     return space
 
 def ann_accuracy(