diff --git a/_includes/code/howto/configure-rq/rq-compression-v4.py b/_includes/code/howto/configure-rq/rq-compression-v4.py
index d69628f15..6c1a388b6 100644
--- a/_includes/code/howto/configure-rq/rq-compression-v4.py
+++ b/_includes/code/howto/configure-rq/rq-compression-v4.py
@@ -101,6 +101,10 @@
quantizer=Configure.VectorIndex.Quantizer.rq(
bits=8, # Optional: Number of bits
rescore_limit=20, # Optional: Number of candidates to fetch before rescoring
+ cache=True, # Optional: Enable caching for flat index (enabled by default for for HNSW)
+ ),
+ vector_index_config=Configure.VectorIndex.flat(
+ vector_cache_max_objects=100000, # Optional: Maximum number of objects in the memory cache
),
# highlight-end
),
diff --git a/_includes/code/howto/manage-data.collections.py b/_includes/code/howto/manage-data.collections.py
index a111d133f..51ede8c31 100644
--- a/_includes/code/howto/manage-data.collections.py
+++ b/_includes/code/howto/manage-data.collections.py
@@ -202,7 +202,7 @@
vector_index_config=Configure.VectorIndex.hnsw(
ef_construction=300,
distance_metric=VectorDistances.COSINE,
- filter_strategy=VectorFilterStrategy.SWEEPING, # or ACORN (Available from Weaviate v1.27.0)
+ filter_strategy=VectorFilterStrategy.ACORN,
),
# highlight-end
),
@@ -212,7 +212,7 @@
# Test
collection = client.collections.use("Article")
config = collection.config.get()
-assert config.vector_config["default"].vector_index_config.filter_strategy == "sweeping"
+assert config.vector_config["default"].vector_index_config.filter_strategy == "acorn"
assert isinstance(
config.vector_config["default"].vector_index_config, _VectorIndexConfigHNSW
)
diff --git a/_includes/code/howto/manage-data.import.py b/_includes/code/howto/manage-data.import.py
index c5d26657b..4f4c85354 100644
--- a/_includes/code/howto/manage-data.import.py
+++ b/_includes/code/howto/manage-data.import.py
@@ -558,48 +558,48 @@ def add_object(obj) -> None:
client.collections.delete("NewCollection")
-# # ==================================================
-# # ===== Server-side (automatic) batch import =====
-# # ==================================================
-
-# # Re-create the collection
-# client.collections.delete("MyCollection")
-# client.collections.create(
-# "MyCollection",
-# vector_config=Configure.Vectors.self_provided()
-# )
-
-# # START ServerSideBatchImportExample
-# data_rows = [
-# {"title": f"Object {i+1}"} for i in range(5)
-# ]
-
-# collection = client.collections.get("MyCollection")
-
-# # highlight-start
-# # Use `automatic` for server-side batching. The client will send data
-# # in chunks and the server will dynamically manage the import process.
-# with collection.batch.automatic() as batch:
-# for data_row in data_rows:
-# batch.add_object(
-# properties=data_row,
-# )
-# # highlight-end
-# if batch.number_errors > 10:
-# print("Batch import stopped due to excessive errors.")
-# break
-
-# failed_objects = collection.batch.failed_objects
-# if failed_objects:
-# print(f"Number of failed imports: {len(failed_objects)}")
-# print(f"First failed object: {failed_objects[0]}")
-# # END ServerSideBatchImportExample
-
-# result = collection.aggregate.over_all(total_count=True)
-# assert result.total_count == 5
-
-# # Clean up
-# client.collections.delete(collection.name)
+# ==================================================
+# ===== Server-side (automatic) batch import =====
+# ==================================================
+
+# Re-create the collection
+client.collections.delete("MyCollection")
+client.collections.create(
+ "MyCollection",
+ vector_config=Configure.Vectors.self_provided()
+)
+
+# START ServerSideBatchImportExample
+data_rows = [
+ {"title": f"Object {i+1}"} for i in range(5)
+]
+
+collection = client.collections.get("MyCollection")
+
+# highlight-start
+# Use `experimental` for server-side batching. The client will send data
+# in batches at a rate specified by the server.
+with collection.batch.experimental() as batch:
+ for data_row in data_rows:
+ batch.add_object(
+ properties=data_row,
+ )
+# highlight-end
+ if batch.number_errors > 10:
+ print("Batch import stopped due to excessive errors.")
+ break
+
+failed_objects = collection.batch.failed_objects
+if failed_objects:
+ print(f"Number of failed imports: {len(failed_objects)}")
+ print(f"First failed object: {failed_objects[0]}")
+# END ServerSideBatchImportExample
+
+result = collection.aggregate.over_all(total_count=True)
+assert result.total_count == 5
+
+# Clean up
+client.collections.delete(collection.name)
client.close()
diff --git a/_includes/compression-by-default.mdx b/_includes/compression-by-default.mdx
index 88c373ccb..15cce7ffd 100644
--- a/_includes/compression-by-default.mdx
+++ b/_includes/compression-by-default.mdx
@@ -1,5 +1,5 @@
:::info Compression by Default
-Starting with `v1.33`, Weaviate enables **8-bit [RQ quantization](/weaviate/configuration/compression/rq-compression) by default** when creating new collections to ensure efficient resource utilization and faster performance. This behavior can be changed through the [`DEFAULT_QUANTIZATION`](/deploy/configuration/env-vars#DEFAULT_QUANTIZATION) environment variable. Note that once enabled, quantization can't be disabled for a collection.
+Starting with `v1.33`, Weaviate enables **8-bit [RQ quantization](/weaviate/configuration/compression/rq-compression) by default** when creating new collections to ensure efficient resource utilization and faster performance. This behavior can be changed through the [`DEFAULT_QUANTIZATION`](/deploy/configuration/env-vars#DEFAULT_QUANTIZATION) environment variable. Note that once enabled, quantization can't be disabled for a collection. Default quantization only applies for the HNSW vector index type.
:::
diff --git a/_includes/configuration/bq-compression-parameters.mdx b/_includes/configuration/bq-compression-parameters.mdx
index 05b8eff2d..9552fb9b1 100644
--- a/_includes/configuration/bq-compression-parameters.mdx
+++ b/_includes/configuration/bq-compression-parameters.mdx
@@ -2,5 +2,5 @@
| :---------------------- | :------ | :------ | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `bq` : `enabled` | boolean | `false` | Enable BQ. Weaviate uses binary quantization (BQ) compression when `true`.
The Python client does not use the `enabled` parameter. To enable BQ with the v4 client, set a `quantizer` in the collection definition. |
| `bq` : `rescoreLimit` | integer | -1 | The minimum number of candidates to fetch before rescoring. |
-| `bq` : `cache` | boolean | `false` | Whether to use the vector cache.
(only when using the `flat` vector index type) |
+| `bq` : `cache` | boolean | `false` | Whether to cache the vectors in memory.
(only when using the `flat` vector index type) |
| `vectorCacheMaxObjects` | integer | `1e12` | Maximum number of objects in the memory cache. By default, this limit is set to one trillion (`1e12`) objects when a new collection is created. For sizing recommendations, see [Vector cache considerations](/weaviate/concepts/vector-index#vector-cache-considerations). |
diff --git a/_includes/configuration/rq-compression-parameters.mdx b/_includes/configuration/rq-compression-parameters.mdx
index e8ce8d668..2e51380fa 100644
--- a/_includes/configuration/rq-compression-parameters.mdx
+++ b/_includes/configuration/rq-compression-parameters.mdx
@@ -1,4 +1,6 @@
-| Parameter | Type | Default | Details |
-| :------------------- | :------ | :------ | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `rq`: `bits` | integer | `8` | The number of bits used to quantize each data point. Value can be `8` or `1`.
Learn more about [8-bit](/weaviate/concepts/vector-quantization#8-bit-rq) and [1-bit](/weaviate/concepts/vector-quantization#1-bit-rq) RQ. |
-| `rq`: `rescoreLimit` | integer | `-1` | The minimum number of candidates to fetch before rescoring. |
+| Parameter | Type | Default | Details |
+| :---------------------- | :------ | :------ | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `rq`: `bits` | integer | `8` | The number of bits used to quantize each data point. Value can be `8` or `1`.
Learn more about [8-bit](/weaviate/concepts/vector-quantization#8-bit-rq) and [1-bit](/weaviate/concepts/vector-quantization#1-bit-rq) RQ. |
+| `rq`: `rescoreLimit` | integer | `-1` | The minimum number of candidates to fetch before rescoring. |
+| `rq` : `cache` | boolean | `false` | Whether to cache the vectors in memory.
(only when using the `flat` vector index type) |
+| `vectorCacheMaxObjects` | integer | `1e12` | Maximum number of objects in the memory cache. By default, this limit is set to one trillion (`1e12`) objects when a new collection is created. For sizing recommendations, see [Vector cache considerations](/weaviate/concepts/vector-index#vector-cache-considerations). |
diff --git a/_includes/named-vector-compress.mdx b/_includes/named-vector-compress.mdx
index 6757c864d..62cda2c99 100644
--- a/_includes/named-vector-compress.mdx
+++ b/_includes/named-vector-compress.mdx
@@ -1,4 +1 @@
-:::info Added in `v1.24`
-:::
-
Collections can have multiple [named vectors](/weaviate/config-refs/collections#named-vectors). The vectors in a collection can have their own configurations, and compression must be enabled independently for each vector. Every vector is independent and can use [PQ](/weaviate/configuration/compression/pq-compression), [BQ](/weaviate/configuration/compression/bq-compression), [RQ](/weaviate/configuration/compression/rq-compression), [SQ](/weaviate/configuration/compression/sq-compression), or no compression.
diff --git a/docs/deploy/configuration/env-vars/index.md b/docs/deploy/configuration/env-vars/index.md
index e9675e234..8c55dbc81 100644
--- a/docs/deploy/configuration/env-vars/index.md
+++ b/docs/deploy/configuration/env-vars/index.md
@@ -32,7 +32,7 @@ import APITable from '@site/src/components/APITable';
| --- | --- | --- | --- |
| `ASYNC_INDEXING` | If set, Weaviate creates vector indexes asynchronously to the object creation process. This can be useful for importing large amounts of data. (default: `false`) | `boolean` | `false` |
| `AUTOSCHEMA_ENABLED` | Whether to infer the schema where necessary with the autoschema (default: `true`) | `boolean` | `true` |
-| `DEFAULT_QUANTIZATION` | Default quantization technique - can be overridden by the quantization method specified in the collection definition. Available values: `rq-8`, `rq-1`, `pq`, `bq`, `sq` and `none`. Default: `rq-8`.
Added in `v1.33` | `string` | `rq-8` |
+| `DEFAULT_QUANTIZATION` | Default quantization technique - can be overridden by the quantization method specified in the collection definition. Available values: `rq-8`, `rq-1`, `pq`, `bq`, `sq` and `none`. Default: `rq-8`.
Note: Currently only applies for the HNSW vector index type.
Added in `v1.33` | `string` | `rq-8` |
| `DEFAULT_VECTORIZER_MODULE` | Default vectorizer module - can be overridden by the vectorizer in the collection definition. | `string` | `text2vec-contextionary` |
| `API_BASED_MODULES_DISABLED` | Weaviate automatically enables the usage of all [API based modules](../../../weaviate/model-providers/index.md#api-based). Set this variable to `true` in order to limit access and only allow specific modules through the [`ENABLE_MODULES`](#ENABLE_MODULES) variable. Default: `false`
Added in `v1.33` | `boolean` | `true` |
| `DISABLE_LAZY_LOAD_SHARDS` | New in v1.23. When `false`, enable lazy shard loading to improve mean time to recovery in multi-tenant deployments. | `string` | `false` |
@@ -75,7 +75,7 @@ import APITable from '@site/src/components/APITable';
| `QUERY_MAXIMUM_RESULTS` | Sets the maximum total number of objects that can be retrieved. | `string - number` | `10000` |
| `QUERY_SLOW_LOG_ENABLED` | Log slow queries for debugging. Requires a restart to update.
(New in 1.24.16, 1.25.3) | `boolean` | `False` |
| `QUERY_SLOW_LOG_THRESHOLD` | Set a threshold time for slow query logging. Requires a restart to update.
(New in 1.24.16, 1.25.3) | `string` | `2s`
Values are times: `3h`, `2s`, `100ms` |
-| `REINDEX_SET_TO_ROARINGSET_AT_STARTUP` | Allow Weaviate to perform a one-off re-indexing to [use Roaring Bitmaps](/weaviate/concepts/filtering.md#migration-to-indexFilterable).
Available in versions `1.18` and higher. | `boolean` | `true` |
+| `REINDEX_SET_TO_ROARINGSET_AT_STARTUP` | Allow Weaviate to perform a one-off re-indexing to use Roaring Bitmaps.
Available in versions `1.18` and higher. | `boolean` | `true` |
| `TOKENIZER_CONCURRENCY_COUNT` | Limit the combined number of GSE and Kagome tokenizers running at the same time. Default: `GOMAXPROCS` | `string - number` | `NUMBER_OF_CPU_CORES` |
| `TOMBSTONE_DELETION_CONCURRENCY` | The maximum number of cores to use for tombstone deletion. Set this to limit the number of cores used for cleanup. Default: Half of the available cores. (New in `v1.24.0`) | `string - int` | `4` |
| `TOMBSTONE_DELETION_MAX_PER_CYCLE` | Maximum number of tombstones to delete per cleanup cycle. Set this to limit cleanup cycles, as they are resource-intensive. As an example, set a maximum of 10000000 (10M) for a cluster with 300 million-object shards. Default: none | `string - int` (New in `v1.24.15` / `v1.25.2`) | `10000000` |
diff --git a/docs/deploy/configuration/monitoring.md b/docs/deploy/configuration/monitoring.md
index 82e7c2f5f..110459235 100644
--- a/docs/deploy/configuration/monitoring.md
+++ b/docs/deploy/configuration/monitoring.md
@@ -17,7 +17,7 @@ and more.
### Enable within Weaviate
To tell Weaviate to collect metrics and expose them in a Prometheus-compatible
-format, all that's required is to set the following environment variable:
+format, all that's required is to set the following [environment variable](./env-vars/index.md#PROMETHEUS_MONITORING_ENABLED):
```sh
PROMETHEUS_MONITORING_ENABLED=true
@@ -42,103 +42,439 @@ dashboards](https://github.com/weaviate/weaviate-examples/tree/main/monitoring-p
You can start up a full-setup including monitoring and dashboards with a single
command. In this setup the following components are used:
-* Docker Compose is used to provide a fully-configured setup that can be
+- Docker Compose is used to provide a fully-configured setup that can be
started with a single command.
-* Weaviate is configured to expose Prometheus metrics as outlined in the
+- Weaviate is configured to expose Prometheus metrics as outlined in the
section above.
-* A Prometheus instance is started with the setup and configured to scrape
+- A Prometheus instance is started with the setup and configured to scrape
metrics from Weaviate every 15s.
-* A Grafana instance is started with the setup and configured to use the
+- A Grafana instance is started with the setup and configured to use the
Prometheus instance as a metrics provider. Additionally, it runs a dashboard
provider that contains a few sample dashboards.
### Multi-tenancy
-When using multi-tenancy, we suggest setting the `PROMETHEUS_MONITORING_GROUP` [environment variable](/deploy/configuration/env-vars/index.md) as `true` so that data across all tenants are grouped together for monitoring.
+When using multi-tenancy, we suggest setting the `PROMETHEUS_MONITORING_GROUP` [environment variable](./env-vars/index.md#PROMETHEUS_MONITORING_GROUP) as `true` so that data across all tenants are grouped together for monitoring.
## Obtainable Metrics
-The list of metrics that are obtainable through Weaviate's metric system is
-constantly being expanded. The complete list is in the [`prometheus.go`](https://github.com/weaviate/weaviate/blob/main/usecases/monitoring/prometheus.go) source code file.
+:::info Versioning & breaking changes
+
+Be aware that metrics do not follow the semantic versioning guidelines of other Weaviate features. Weaviate's main APIs are stable and breaking changes are extremely rare. Metrics, however, have shorter feature lifecycles. It can sometimes be necessary to introduce an incompatible change or entirely remove a metric, for example, because the cost of observing a specific metric in production has grown too high. As a result, it is possible that a Weaviate minor release contains a breaking change for the Monitoring system. If so, it will be clearly highlighted in the [release notes](https://github.com/weaviate/weaviate/releases).
+
+:::
+
+The list of metrics that are obtainable through Weaviate's metric system is constantly being expanded. The complete list of metrics can be found in the source code files:
+- [`usecases/monitoring/prometheus.go`](https://github.com/weaviate/weaviate/blob/main/usecases/monitoring/prometheus.go)
+- [`usecases/replica/metrics.go`](https://github.com/weaviate/weaviate/blob/main/usecases/replica/metrics.go)
+- [`adapters/repos/db/metrics.go`](https://github.com/weaviate/weaviate/blob/main/adapters/repos/db/metrics.go)
+- [`adapters/repos/db/lsmkv/metrics.go`](https://github.com/weaviate/weaviate/blob/main/adapters/repos/db/lsmkv/metrics.go)
+- [`adapters/repos/db/lsmkv/memtable_metrics.go`](https://github.com/weaviate/weaviate/blob/main/adapters/repos/db/lsmkv/memtable_metrics.go)
+
+This page describes metrics and their uses. Typically metrics are quite granular, as they can always be aggregated later on. For example if the granularity is "shard", you could aggregate all "shard" metrics of the same "class" (collection) to obtain a class metrics, or aggregate all metrics to obtain the metric for the entire Weaviate instance.
+
+### General & build information
+
+| Metric | Description | Labels | Type |
+| ------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------- | ------- |
+| `weaviate_build_info` | Provides general information about the build (What version is currently running? How long has this version been running, etc) | `version`, `revision`, `branch`, `goVersion` | `Gauge` |
+| `weaviate_runtime_config_hash` | Hash value of the currently active runtime configuration, useful for tracking when new configurations take effect | `sha256` | `Gauge` |
+| `weaviate_runtime_config_last_load_success` | Indicates whether the last loading attempt was successful (`1` for success, `0` for failure) | None | `Gauge` |
+| `weaviate_schema_collections` | Shows the total number of collections at any given point | `nodeID` | `Gauge` |
+| `weaviate_schema_shards` | Shows the total number of shards at any given point | `nodeID`, `status` (HOT, COLD, WARM, FROZEN) | `Gauge` |
+
+### Object and query operations
+
+#### Batch operations
+
+| Metric | Description | Labels | Type |
+| ------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- | ----------- |
+| `batch_durations_ms` | Duration of a single batch operation in ms. The `operation` label further defines what operation as part of the batch (e.g. object, inverted, vector) is being used. Granularity is a shard of a class. | `operation`, `class_name`, `shard_name` | `Histogram` |
+| `batch_delete_durations_ms` | Duration of a batch delete in ms. The `operation` label further defines what operation as part of the batch delete is being measured. Granularity is a shard of a class | `operation`, `class_name`, `shard_name` | `Summary` |
+| `batch_size_bytes` | Size of a raw batch request batch in bytes | `api` | `Summary` |
+| `batch_size_objects` | Number of objects in a batch | None | `Summary` |
+| `batch_size_tenants` | Number of unique tenants referenced in a batch | None | `Summary` |
+| `batch_objects_processed_total` | Number of objects processed in a batch | `class_name`, `shard_name` | `Counter` |
+| `batch_objects_processed_bytes` | Number of bytes processed in a batch | `class_name`, `shard_name` | `Counter` |
+
+#### Object operations
+
+| Metric | Description | Labels | Type |
+| ---------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------- | --------- |
+| `object_count` | Numbers of objects present. Granularity is a shard of a class | `class_name`, `shard_name` | `Gauge` |
+| `objects_durations_ms` | Duration of an individual object operation, such as `put`, `delete`, etc. as indicated by the `operation` label, also as part of a batch. The `step` label adds additional precision to each `operation`. Granularity is a shard of a class. | `operation`, `step`, `class_name`, `shard_name` | `Summary` |
+
+#### Query operations
+
+| Metric | Description | Labels | Type |
+| -------------------------------------- | ------------------------------------------------------------------------------------------------------------ | ------------------------------------------- | ----------- |
+| `concurrent_queries_count` | Number of concurrently running query operations | `class_name`, `query_type` | `Gauge` |
+| `queries_durations_ms` | Duration of queries in milliseconds | `class_name`, `query_type` | `Histogram` |
+| `queries_filtered_vector_durations_ms` | Duration of queries in milliseconds | `class_name`, `shard_name`, `operation` | `Summary` |
+| `concurrent_goroutines` | Number of concurrently running goroutines | `class_name`, `query_type` | `Gauge` |
+| `requests_total` | Metric that tracks all user requests to determine if it was successful or failed | `status`, `class_name`, `api`, `query_type` | `Gauge` |
+| `query_dimensions_total` | The vector dimensions used by any read-query that involves vectors | `query_type`, `operation`, `class_name` | `Counter` |
+| `query_dimensions_combined_total` | The vector dimensions used by any read-query that involves vectors, aggregated across all classes and shards | None | `Counter` |
+
+### Vector index
+
+#### General vector index
+
+| Metric | Description | Labels | Type |
+| ------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------- | --------- |
+| `vector_index_size` | The total capacity of the vector index. Typically larger than the number of vectors imported as it grows proactively. | `class_name`, `shard_name` | `Gauge` |
+| `vector_index_operations` | Total number of mutating operations on the vector index. The operation itself is defined by the `operation` label. | `operation`, `class_name`, `shard_name` | `Gauge` |
+| `vector_index_durations_ms` | Duration of regular vector index operation, such as insert or delete. The operation itself is defined through the `operation` label. The `step` label adds more granularity to each operation. | `operation`, `step`, `class_name`, `shard_name` | `Summary` |
+| `vector_index_maintenance_durations_ms` | Duration of a sync or async vector index maintenance operation. The operation itself is defined through the `operation` label. | `operation`, `class_name`, `shard_name` | `Summary` |
+| `vector_index_tombstones` | Number of currently active tombstones in the vector index. Will go up on each incoming delete and go down after a completed repair operation. | `class_name`, `shard_name` | `Gauge` |
+| `vector_index_tombstone_cleanup_threads` | Number of currently active threads for repairing/cleaning up the vector index after deletes have occurred. | `class_name`, `shard_name` | `Gauge` |
+| `vector_index_tombstone_cleaned` | Total number of deleted and removed vectors after repair operations. | `class_name`, `shard_name` | `Counter` |
+| `vector_index_tombstone_unexpected_total` | Total number of unexpected tombstones that were found, for example because a vector was not found for an existing id in the index | `class_name`, `shard_name`, `operation` | `Counter` |
+| `vector_index_tombstone_cycle_start_timestamp_seconds` | Unix epoch timestamp of the start of the current tombstone cleanup cycle | `class_name`, `shard_name` | `Gauge` |
+| `vector_index_tombstone_cycle_end_timestamp_seconds` | Unix epoch timestamp of the end of the last tombstone cleanup cycle. A negative value indicates that the cycle is still running | `class_name`, `shard_name` | `Gauge` |
+| `vector_index_tombstone_cycle_progress` | A ratio (percentage) of the progress of the current tombstone cleanup cycle. 0 indicates the very beginning, 1 is a complete cycle. | `class_name`, `shard_name` | `Gauge` |
+| `vector_dimensions_sum` | Total dimensions in a shard | `class_name`, `shard_name` | `Gauge` |
+| `vector_segments_sum` | Total segments in a shard if quantization enabled | `class_name`, `shard_name` | `Gauge` |
+
+#### Vector index (IVF-specific)
+
+| Metric | Description | Labels | Type |
+| ------------------------------------------------- | ----------------------------------------------------------------------------------- | --------------------------------------- | ----------- |
+| `vector_index_postings` | The size of the vector index postings. Typically much lower than number of vectors. | `class_name`, `shard_name` | `Gauge` |
+| `vector_index_posting_size_vectors` | The size of individual vectors in each posting list | `class_name`, `shard_name` | `Histogram` |
+| `vector_index_pending_background_operations` | Number of background operations yet to be processed | `operation`, `class_name`, `shard_name` | `Gauge` |
+| `vector_index_background_operations_durations_ms` | Duration of typical vector index background operations (split, merge, reassign) | `operation`, `class_name`, `shard_name` | `Summary` |
+| `vector_index_store_operations_durations_ms` | Duration of store operations (put, append, get) | `operation`, `class_name`, `shard_name` | `Summary` |
+
+#### Async index queue
+
+| Metric | Description | Labels | Type |
+| ---------------------------------------- | ----------------------------------------------------------- | ------------------------------------------- | ----------- |
+| `queue_size` | Number of records in the queue | `class_name`, `shard_name` | `Gauge` |
+| `queue_disk_usage` | Disk usage of the queue | `class_name`, `shard_name` | `Gauge` |
+| `queue_paused` | Whether the queue is paused | `class_name`, `shard_name` | `Gauge` |
+| `queue_count` | Number of queues | `class_name`, `shard_name` | `Gauge` |
+| `queue_partition_processing_duration_ms` | Duration in ms of a single partition processing | `class_name`, `shard_name` | `Histogram` |
+| `vector_index_queue_insert_count` | Number of insert operations added to the vector index queue | `class_name`, `shard_name`, `target_vector` | `Counter` |
+| `vector_index_queue_delete_count` | Number of delete operations added to the vector index queue | `class_name`, `shard_name`, `target_vector` | `Counter` |
+
+#### Tombstone management
+
+| Metric | Description | Labels | Type |
+| ---------------------------------- | -------------------------------------------------------- | -------------------------- | --------- |
+| `tombstone_find_local_entrypoint` | Total number of tombstone delete local entrypoint calls | `class_name`, `shard_name` | `Counter` |
+| `tombstone_find_global_entrypoint` | Total number of tombstone delete global entrypoint calls | `class_name`, `shard_name` | `Counter` |
+| `tombstone_reassign_neighbors` | Total number of tombstone reassign neighbor calls | `class_name`, `shard_name` | `Counter` |
+| `tombstone_delete_list_size` | Delete list size of tombstones | `class_name`, `shard_name` | `Gauge` |
+
+### LSM store
+
+The following sections provide detailed metrics for LSM (Log-Structured Merge-tree) bucket operations and replication functionality.
+
+#### General LSM store
+
+| Metric | Description | Labels | Type |
+| ------------------------------------------ | --------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------- | --------- |
+| `lsm_active_segments` | Number of currently present segments per shard. Granularity is shard of a class. Grouped by `strategy`. | `strategy`, `class_name`, `shard_name`, `path` | `Gauge` |
+| `lsm_objects_bucket_segment_count` | Number of segments per shard in the objects bucket | `strategy`, `class_name`, `shard_name`, `path` | `Gauge` |
+| `lsm_compressed_vecs_bucket_segment_count` | Number of segments per shard in the vectors_compressed bucket | `strategy`, `class_name`, `shard_name`, `path` | `Gauge` |
+| `lsm_segment_count` | Number of segments by level | `strategy`, `class_name`, `shard_name`, `path`, `level` | `Gauge` |
+| `lsm_segment_objects` | Number of entries per LSM segment by level. Granularity is shard of a class. Grouped by `strategy` and `level`. | `strategy`, `class_name`, `shard_name`, `path`, `level` | `Gauge` |
+| `lsm_segment_size` | Size of LSM segment by level and unit | `strategy`, `class_name`, `shard_name`, `path`, `level`, `unit` | `Gauge` |
+| `lsm_segment_unloaded` | Number of unloaded segments | `strategy`, `class_name`, `shard_name`, `path` | `Gauge` |
+| `lsm_memtable_size` | Size of memtable by path | `strategy`, `class_name`, `shard_name`, `path` | `Gauge` |
+| `lsm_memtable_durations_ms` | Time in ms for a bucket operation to complete | `strategy`, `class_name`, `shard_name`, `path`, `operation` | `Summary` |
+| `lsm_bitmap_buffers_usage` | Number of bitmap buffers used by size | `size`, `operation` | `Counter` |
+
+#### LSM bucket operations
+
+These metrics track read and write operations on LSM buckets, providing detailed visibility into database performance.
+
+| Metric | Description | Labels | Type |
+| --------------------------------------------- | ----------------------------------------------------------- | ---------------------------------------------------------------------------------- | ----------- |
+| `lsm_bucket_read_operation_count` | Total number of LSM bucket read operations requested | `operation` (get), `component` (active_memtable, flushing_memtable, segment_group) | `Counter` |
+| `lsm_bucket_read_operation_ongoing` | Number of LSM bucket read operations currently in progress | `operation` (get), `component` (active_memtable, flushing_memtable, segment_group) | `Gauge` |
+| `lsm_bucket_read_operation_failure_count` | Number of failed LSM bucket read operations | `operation` (get), `component` (active_memtable, flushing_memtable, segment_group) | `Counter` |
+| `lsm_bucket_read_operation_duration_seconds` | Duration of LSM bucket read operations in seconds | `operation` (get), `component` (active_memtable, flushing_memtable, segment_group) | `Histogram` |
+| `lsm_bucket_write_operation_count` | Total number of LSM bucket write operations requested | `operation` (put, delete) | `Counter` |
+| `lsm_bucket_write_operation_ongoing` | Number of LSM bucket write operations currently in progress | `operation` (put, delete) | `Gauge` |
+| `lsm_bucket_write_operation_failure_count` | Number of failed LSM bucket write operations | `operation` (put, delete) | `Counter` |
+| `lsm_bucket_write_operation_duration_seconds` | Duration of LSM bucket write operations in seconds | `operation` (put, delete) | `Histogram` |
+
+#### LSM bucket lifecycle
+
+These metrics track the initialization and shutdown of LSM buckets.
+
+| Metric | Description | Labels | Type |
+| -------------------------------------- | ---------------------------------------------------------- | ---------- | ----------- |
+| `lsm_bucket_init_count` | Total number of LSM bucket initializations requested | `strategy` | `Counter` |
+| `lsm_bucket_init_in_progress` | Number of LSM bucket initializations currently in progress | `strategy` | `Gauge` |
+| `lsm_bucket_init_failure_count` | Number of failed LSM bucket initializations | `strategy` | `Counter` |
+| `lsm_bucket_init_duration_seconds` | Duration of LSM bucket initialization in seconds | `strategy` | `Histogram` |
+| `lsm_bucket_shutdown_count` | Total number of LSM bucket shutdowns requested | `strategy` | `Counter` |
+| `lsm_bucket_shutdown_in_progress` | Number of LSM bucket shutdowns currently in progress | `strategy` | `Gauge` |
+| `lsm_bucket_shutdown_duration_seconds` | Duration of LSM bucket shutdown in seconds | `strategy` | `Histogram` |
+| `lsm_bucket_shutdown_failure_count` | Number of failed LSM bucket shutdowns | `strategy` | `Counter` |
+
+#### LSM bucket cursors
+
+These metrics track cursor usage patterns in LSM buckets.
+
+| Metric | Description | Labels | Type |
+| ------------------------------------ | --------------------------------------------------- | ---------- | ----------- |
+| `lsm_bucket_opened_cursors` | Number of opened LSM bucket cursors | `strategy` | `Counter` |
+| `lsm_bucket_open_cursors` | Number of currently open LSM bucket cursors | `strategy` | `Gauge` |
+| `lsm_bucket_cursor_duration_seconds` | Duration of LSM bucket cursor operations in seconds | `strategy` | `Histogram` |
+
+#### LSM segment metrics
+
+These metrics provide visibility into LSM segment storage and size distribution.
+
+| Metric | Description | Labels | Type |
+| ------------------------------- | ------------------------------------ | ---------- | ----------- |
+| `lsm_bucket_segment_total` | Total number of LSM bucket segments | `strategy` | `Gauge` |
+| `lsm_bucket_segment_size_bytes` | Size of LSM bucket segments in bytes | `strategy` | `Histogram` |
+
+#### LSM compaction
+
+These metrics track compaction operations that merge and optimize LSM segments.
+
+| Metric | Description | Labels | Type |
+| ---------------------------------------- | ---------------------------------------------------------------------------------------- | ---------- | ----------- |
+| `lsm_bucket_compaction_count` | Total number of LSM bucket compactions requested | `strategy` | `Counter` |
+| `lsm_bucket_compaction_in_progress` | Number of LSM bucket compactions currently in progress | `strategy` | `Gauge` |
+| `lsm_bucket_compaction_failure_count` | Number of failed LSM bucket compactions | `strategy` | `Counter` |
+| `lsm_bucket_compaction_noop_count` | Number of times the periodic LSM bucket compaction task ran but found nothing to compact | `strategy` | `Counter` |
+| `lsm_bucket_compaction_duration_seconds` | Duration of LSM bucket compaction in seconds | `strategy` | `Histogram` |
+
+#### LSM memtable operations
+
+These metrics track memtable flush operations that persist in-memory data to disk.
+
+| Metric | Description | Labels | Type |
+| ------------------------------------- | ----------------------------------------------- | ---------- | ----------- |
+| `lsm_memtable_flush_total` | Total number of LSM memtable flushes | `strategy` | `Counter` |
+| `lsm_memtable_flush_in_progress` | Number of LSM memtable flushes in progress | `strategy` | `Gauge` |
+| `lsm_memtable_flush_failures_total` | Total number of failed LSM memtable flushes | `strategy` | `Counter` |
+| `lsm_memtable_flush_duration_seconds` | Duration of LSM memtable flush in seconds | `strategy` | `Histogram` |
+| `lsm_memtable_flush_size_bytes` | Size of LSM memtable at flushing time, in bytes | `strategy` | `Histogram` |
+
+#### LSM WAL recovery
+
+These metrics track Write-Ahead Log (WAL) recovery operations during startup.
+
+| Metric | Description | Labels | Type |
+| ------------------------------------------ | --------------------------------------------------------- | ---------- | ----------- |
+| `lsm_bucket_wal_recovery_count` | Total number of LSM bucket WAL recoveries requested | `strategy` | `Counter` |
+| `lsm_bucket_wal_recovery_in_progress` | Number of LSM bucket WAL recoveries currently in progress | `strategy` | `Gauge` |
+| `lsm_bucket_wal_recovery_failure_count` | Number of failed LSM bucket WAL recoveries | `strategy` | `Counter` |
+| `lsm_bucket_wal_recovery_duration_seconds` | Duration of LSM bucket WAL recovery in seconds | `strategy` | `Histogram` |
+
+### Schema & cluster consensus
+
+#### Schema & RAFT consensus
+
+| Metric | Description | Labels | Type |
+| --------------------------------- | ------------------------------------------------------------- | ------ | --------- |
+| `schema_writes_seconds` | Duration of schema writes (which always involve the leader) | `type` | `Summary` |
+| `schema_reads_local_seconds` | Duration of local schema reads that do not involve the leader | `type` | `Summary` |
+| `schema_reads_leader_seconds` | Duration of schema reads that are passed to the leader | `type` | `Summary` |
+| `schema_wait_for_version_seconds` | Duration of waiting for a schema version to be reached | `type` | `Summary` |
+
+#### Schema transactions (deprecated)
+
+| Metric | Description | Labels | Type |
+| ---------------------------- | --------------------------------------------------------------------------------------- | --------------------- | --------- |
+| `schema_tx_opened_total` | Total number of opened schema transactions | `ownership` | `Counter` |
+| `schema_tx_closed_total` | Total number of closed schema transactions. A close must be either successful or failed | `ownership`, `status` | `Counter` |
+| `schema_tx_duration_seconds` | Mean duration of a tx by status | `ownership`, `status` | `Summary` |
+
+#### RAFT metrics (internal)
+
+| Metric | Description | Labels | Type |
+| ------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------ | ------------------------- | --------- |
+| `weaviate_internal_counter_raft_apply` | Number of transactions in the configured interval | None | `Counter` |
+| `weaviate_internal_counter_raft_state_candidate` | Number of times the raft server initiated an election | None | `Counter` |
+| `weaviate_internal_counter_raft_state_follower` | Number of times in the configured interval that the raft server became a follower | None | `Summary` |
+| `weaviate_internal_counter_raft_state_leader` | Number of times the raft server became a leader | None | `Counter` |
+| `weaviate_internal_counter_raft_transition_heartbeat_timeout` | Number of times that the node transitioned to `candidate` state after not receiving a heartbeat message from the last known leader | None | `Counter` |
+| `weaviate_internal_gauge_raft_commitNumLogs` | Number of logs processed for application to the finite state machine in a single batch | None | `Gauge` |
+| `weaviate_internal_gauge_raft_leader_dispatchNumLogs` | Number of logs committed to disk in the most recent batch | None | `Gauge` |
+| `weaviate_internal_gauge_raft_leader_oldestLogAge` | The number of milliseconds since the oldest log in the leader's log store was written | None | `Gauge` |
+| `weaviate_internal_gauge_raft_peers` | The number of peers in the raft cluster configuration | None | `Gauge` |
+| `weaviate_internal_sample_raft_boltdb_logBatchSize` | Measures the total size in bytes of logs being written to the db in a single batch | `quantile=0.5, 0.9, 0.99` | `Summary` |
+| `weaviate_internal_sample_raft_boltdb_logSize` | Measures the size of logs being written to the db | `quantile=0.5, 0.9, 0.99` | `Summary` |
+| `weaviate_internal_sample_raft_boltdb_logsPerBatch` | Measures the number of logs being written per batch to the db | `quantile=0.5, 0.9, 0.99` | `Summary` |
+| `weaviate_internal_sample_raft_boltdb_writeCapacity` | Theoretical write capacity in terms of the number of logs that can be written per second | `quantile=0.5, 0.9, 0.99` | `Summary` |
+| `weaviate_internal_sample_raft_thread_fsm_saturation` | An approximate measurement of the proportion of time the Raft FSM goroutine is busy and unavailable to accept new work | `quantile=0.5, 0.9, 0.99` | `Summary` |
+| `weaviate_internal_sample_raft_thread_main_saturation` | An approximate measurement of the proportion of time the main Raft goroutine is busy and unavailable to accept new work (percentage) | `quantile=0.5, 0.9, 0.99` | `Summary` |
+| `weaviate_internal_timer_raft_boltdb_getLog` | Measures the amount of time spent reading logs from the db (in ms) | `quantile=0.5, 0.9, 0.99` | `Summary` |
+| `weaviate_internal_timer_raft_boltdb_storeLogs` | Time required to record any outstanding logs since the last request to append entries for the given node | `quantile=0.5, 0.9, 0.99` | `Summary` |
+| `weaviate_internal_timer_raft_commitTime` | Time required to commit a new entry to the raft log on the leader node | `quantile=0.5, 0.9, 0.99` | `Summary` |
+| `weaviate_internal_timer_raft_fsm_apply` | Number of logs committed by the finite state machine since the last interval | `quantile=0.5, 0.9, 0.99` | `Summary` |
+| `weaviate_internal_timer_raft_fsm_enqueue` | Time required to queue up a batch of logs for the finite state machine to apply | `quantile=0.5, 0.9, 0.99` | `Summary` |
+| `weaviate_internal_timer_raft_leader_dispatchLog` | Time required for the leader node to write a log entry to disk | `quantile=0.5, 0.9, 0.99` | `Summary` |
+
+#### Memberlist (internal)
+
+| Metric | Description | Labels | Type |
+| ------------------------------------------------------ | -------------------------------------------------------------------- | ------------------------- | --------- |
+| `weaviate_internal_sample_memberlist_queue_broadcasts` | Shows the number of messages in the broadcast queue of Memberlist | `quantile=0.5, 0.9, 0.99` | `Summary` |
+| `weaviate_internal_timer_memberlist_gossip` | Shows the latency distribution of the each gossip made in Memberlist | `quantile=0.5, 0.9, 0.99` | `Summary` |
+
+### System resources
+
+#### File I/O & memory
+
+| Metric | Description | Labels | Type |
+| ---------------------------- | ------------------------------------- | ----------------------- | --------- |
+| `file_io_writes_total_bytes` | Total number of bytes written to disk | `operation`, `strategy` | `Summary` |
+| `file_io_reads_total_bytes` | Total number of bytes read from disk | `operation` | `Summary` |
+| `mmap_operations_total` | Total number of mmap operations | `operation`, `strategy` | `Counter` |
+| `mmap_proc_maps` | Number of entries in /proc/self/maps | None | `Gauge` |
+
+#### Async operations
+
+| Metric | Description | Labels | Type |
+| -------------------------- | ------------------------------------------------------------------------------------------------------------ | ----------------------------------------------- | ------- |
+| `async_operations_running` | Number of currently running async operations. The operation itself is defined through the `operation` label. | `operation`, `class_name`, `shard_name`, `path` | `Gauge` |
-This page describes some noteworthy metrics and their uses.
+#### Checksum
+
+| Metric | Description | Labels | Type |
+| -------------------------------------- | ----------------------------------------------- | ------ | --------- |
+| `checksum_validation_duration_seconds` | Duration of checksum validation | None | `Summary` |
+| `checksum_bytes_read` | Number of bytes read during checksum validation | None | `Summary` |
+
+#### Startup
+
+| Metric | Description | Labels | Type |
+| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- | --------- |
+| `startup_progress` | A ratio (percentage) of startup progress for a particular component in a shard | `operation`, `class_name`, `shard_name` | `Gauge` |
+| `startup_durations_ms` | Duration of individual startup operations in ms. The operation itself is defined through the `operation` label. | `operation`, `class_name`, `shard_name` | `Summary` |
+| `startup_diskio_throughput` | Disk I/O throughput in bytes/s at startup operations, such as reading back the HNSW index or recovering LSM segments. The operation itself is defined by the `operation` label. | `operation`, `class_name`, `shard_name` | `Summary` |
+
+#### Backup & restore
+
+| Metric | Description | Labels | Type |
+| --------------------------------- | --------------------------------------------------------- | ---------------------------- | --------- |
+| `backup_restore_ms` | Duration of a backup restore | `backend_name`, `class_name` | `Summary` |
+| `backup_restore_class_ms` | Duration restoring class | `class_name` | `Summary` |
+| `backup_restore_init_ms` | Startup phase of a backup restore | `backend_name`, `class_name` | `Summary` |
+| `backup_restore_from_backend_ms` | File transfer stage of a backup restore | `backend_name`, `class_name` | `Summary` |
+| `backup_store_to_backend_ms` | File transfer stage of a backup store | `backend_name`, `class_name` | `Summary` |
+| `bucket_pause_durations_ms` | Bucket pause durations | `bucket_dir` | `Summary` |
+| `backup_restore_data_transferred` | Total number of bytes transferred during a backup restore | `backend_name`, `class_name` | `Counter` |
+| `backup_store_data_transferred` | Total number of bytes transferred during a backup store | `backend_name`, `class_name` | `Counter` |
+
+#### Shard management
+
+| Metric | Description | Labels | Type |
+| ----------------------------------------------------- | -------------------------------------------- | ------------------------------------------------------- | ----------- |
+| `shards_loaded` | Number of shards loaded | None | `Gauge` |
+| `shards_unloaded` | Number of shards not loaded | None | `Gauge` |
+| `shards_loading` | Number of shards in process of loading | None | `Gauge` |
+| `shards_unloading` | Number of shards in process of unloading | None | `Gauge` |
+| `weaviate_index_shards_total` | Total number of shards per index status | `status` (READONLY, INDEXING, LOADING, READY, SHUTDOWN) | `Gauge` |
+| `weaviate_index_shard_status_update_duration_seconds` | Time taken to update shard status in seconds | `status` (READONLY, INDEXING, LOADING, READY, SHUTDOWN) | `Histogram` |
+
+### Modules & extensions
+
+#### Vectorization (Text2Vec)
+
+| Metric | Description | Labels | Type |
+| ---------------------------------- | ---------------------------------------------------------------- | ------------------------- | ----------- |
+| `t2v_concurrent_batches` | Number of batches currently running | `vectorizer` | `Gauge` |
+| `t2v_batch_queue_duration_seconds` | Time of a batch spend in specific portions of the queue | `vectorizer`, `operation` | `Histogram` |
+| `t2v_request_duration_seconds` | Duration of an individual request to the vectorizer | `vectorizer` | `Histogram` |
+| `t2v_tokens_in_batch` | Number of tokens in a user-defined batch | `vectorizer` | `Histogram` |
+| `t2v_tokens_in_request` | Number of tokens in an individual request sent to the vectorizer | `vectorizer` | `Histogram` |
+| `t2v_rate_limit_stats` | Rate limit stats for the vectorizer | `vectorizer`, `stat` | `Gauge` |
+| `t2v_repeat_stats` | Why batch scheduling is repeated | `vectorizer`, `stat` | `Gauge` |
+| `t2v_requests_per_batch` | Number of requests required to process an entire (user) batch | `vectorizer` | `Histogram` |
+
+#### Tokenizer
+
+| Metric | Description | Labels | Type |
+| --------------------------------------- | ------------------------------------------------ | ----------- | ----------- |
+| `tokenizer_duration_seconds` | Duration of a tokenizer operation | `tokenizer` | `Histogram` |
+| `tokenizer_requests_total` | Number of tokenizer requests | `tokenizer` | `Counter` |
+| `tokenizer_initialize_duration_seconds` | Duration of a tokenizer initialization operation | `tokenizer` | `Histogram` |
+| `token_count_total` | Number of tokens processed | `tokenizer` | `Counter` |
+| `token_count_per_request` | Number of tokens processed per request | `tokenizer` | `Histogram` |
+
+#### Module & external API
+
+| Metric | Description | Labels | Type |
+| ------------------------------------------ | -------------------------------------------------------------- | ----------------------------------------- | ----------- |
+| `weaviate_module_requests_total` | Number of module requests to external APIs | `op`, `api` | `Counter` |
+| `weaviate_module_request_duration_seconds` | Duration of an individual request to a module external API | `op`, `api` | `Histogram` |
+| `weaviate_module_requests_per_batch` | Number of items in a batch | `op`, `api` | `Histogram` |
+| `weaviate_module_request_size_bytes` | Size (in bytes) of the request sent to an external API | `op`, `api` | `Histogram` |
+| `weaviate_module_response_size_bytes` | Size (in bytes) of the response received from an external API | `op`, `api` | `Histogram` |
+| `weaviate_vectorizer_request_tokens` | Number of tokens in the request sent to an external vectorizer | `inout`, `api` | `Histogram` |
+| `weaviate_module_request_single_count` | Number of single-item external API requests | `op`, `api` | `Counter` |
+| `weaviate_module_request_batch_count` | Number of batched module requests | `op`, `api` | `Counter` |
+| `weaviate_module_error_total` | Number of module errors | `op`, `module`, `endpoint`, `status_code` | `Counter` |
+| `weaviate_module_call_error_total` | Number of module errors (related to external calls) | `module`, `endpoint`, `status_code` | `Counter` |
+| `weaviate_module_response_status_total` | Number of API response statuses | `op`, `endpoint`, `status` | `Counter` |
+| `weaviate_module_batch_error_total` | Number of batch errors | `operation`, `class_name` | `Counter` |
+
+#### Usage Tracking
+
+| Metric | Description | Labels | Type |
+| ---------------------------------------------------- | -------------------------------------------- | ------------------------------------------------------ | ----------- |
+| `weaviate_usage_{gcs\|s3}_operations_total` | Total number of operations for module labels | `operation` (collect/upload), `status` (success/error) | `Counter` |
+| `weaviate_usage_{gcs\|s3}_operation_latency_seconds` | Latency of usage operations in seconds | `operation` (collect/upload) | `Histogram` |
+| `weaviate_usage_{gcs\|s3}_resource_count` | Number of resources tracked by module | `resource_type` (collections/shards/backups) | `Gauge` |
+| `weaviate_usage_{gcs\|s3}_uploaded_file_size_bytes` | Size of the uploaded usage file in bytes | None | `Gauge` |
-Typically metrics are quite granular, as they can always be aggregated later
-on. For example if the granularity is "shard", you could aggregate all "shard"
-metrics of the same "class" to obtain a class metrics, or aggregate all metrics
-to obtain the metric for the entire Weaviate instance.
+---
-| Metric | Description | Labels | Type |
-|---|---|---|---|
-| `async_operations_running` | Number of currently running async operations. The operation itself is defined through the `operation` label. | `operation`, `class_name`, `shard_name`, `path` | `Gauge` |
-| `batch_delete_durations_ms` | Duration of a batch delete in ms. The `operation` label further defines what operation as part of the batch delete is being measured. Granularity is a shard of a class | `class_name`, `shard_name` | `Histogram` |
-| `batch_durations_ms` | Duration of a single batch operation in ms. The `operation` label further defines what operation as part of the batch (e.g. object, inverted, vector) is being used. Granularity is a shard of a class. | `operation`, `class_name`, `shard_name` | `Histogram` |
-| `index_queue_delete_duration_ms` | Duration of deleting one or more vectors from the index queue and the underlying index. | `class_name`, `shard_name`, `target_vector` | `Summary` |
-| `index_queue_paused` | Whether the index queue is paused. | `class_name`, `shard_name`, `target_vector` | `Gauge` |
-| `index_queue_preload_count` | Number of vectors preloaded to the index queue. | `class_name`, `shard_name`, `target_vector` | `Gauge` |
-| `index_queue_preload_duration_ms` | Duration of preloading un-indexed vectors to the index queue. | `class_name`, `shard_name`, `target_vector` | `Summary` |
-| `index_queue_push_duration_ms` | Duration of pushing one or more vectors to the index queue. | `class_name`, `shard_name`, `target_vector` | `Summary` |
-| `index_queue_search_duration_ms` | Duration of searching for vectors in the index queue and the underlying index. | `class_name`, `shard_name`, `target_vector` | `Summary` |
-| `index_queue_size` | Number of vectors in the index queue. | `class_name`, `shard_name`, `target_vector` | `Gauge` |
-| `index_queue_stale_count` | Number of times the index queue has been marked as stale. | `class_name`, `shard_name`, `target_vector` | `Counter` |
-| `index_queue_vectors_dequeued` | Number of vectors sent to the workers per tick. | `class_name`, `shard_name`, `target_vector` | `Gauge` |
-| `index_queue_wait_duration_ms` | Duration of waiting for the workers to finish. | `class_name`, `shard_name`, `target_vector` | `Summary` |
-| `lsm_active_segments` | Number of currently present segments per shard. Granularity is shard of a class. Grouped by `strategy`. | `strategy`, `class_name`, `shard_name`, `path` | `Gauge` |
-| `lsm_bloom_filter_duration_ms` | Duration of a bloom filter operation per shard in ms. Granularity is shard of a class. Grouped by `strategy`. | `operation`, `strategy`, `class_name`, `shard_name` | `Histogram` |
-| `lsm_segment_count` | Number of segments by level | `strategy`, `class_name`, `shard_name`, `path`, `level` | `Gauge` |
-| `lsm_segment_objects` | Number of entries per LSM segment by level. Granularity is shard of a class. Grouped by `strategy` and `level`. | `operation`, `strategy`, `class_name`, `shard_name`, `path`, `level` | `Gauge` |
-| `lsm_segment_size` | Size of LSM segment by level and unit. | `strategy`, `class_name`, `shard_name`, `path`, `level`, `unit` | `Gauge` |
-| `object_count` | Numbers of objects present. Granularity is a shard of a class | `class_name`, `shard_name` | `Gauge` |
-| `objects_durations_ms` | Duration of an individual object operation, such as `put`, `delete`, etc. as indicated by the `operation` label, also as part of a batch. The `step` label adds additional precisions to each `operation`. Granularity is a shard of a class. | `class_name`, `shard_name` | `Histogram` |
-| `requests_total` | Metric that tracks all user requests to determine if it was successful or failed. | `api`, `query_type`, `class_name` | `Gauge` |
-| `startup_diskio_throughput` | Disk I/O throughput in bytes/s at startup operations, such as reading back the HNSW index or recovering LSM segments. The operation itself is defined by the `operation` label. | `operation`, `step`, `class_name`, `shard_name` | `Histogram` |
-| `startup_durations_ms` | Duration of individual startup operations in ms. The operation itself is defined through the `operation` label. | `operation`, `class_name`, `shard_name` | `Histogram` |
-| `vector_index_durations_ms` | Duration of regular vector index operation, such as insert or delete. The operation itself is defined through the `operation` label. The `step` label adds more granularity to each operation. | `operation`, `step`, `class_name`, `shard_name` | `Histogram` |
-| `vector_index_maintenance_durations_ms` | Duration of a sync or async vector index maintenance operation. The operation itself is defined through the `operation` label. | `opeartion`, `class_name`, `shard_name` | `Histogram` |
-| `vector_index_operations` | Total number of mutating operations on the vector index. The operation itself is defined by the `operation` label. | `operation`, `class_name`, `shard_name` | `Gauge` |
-| `vector_index_size` | The total capacity of the vector index. Typically larger than the number of vectors imported as it grows proactively. | `class_name`, `shard_name` | `Gauge` |
-| `vector_index_tombstone_cleaned` | Total number of deleted and removed vectors after repair operations. | `class_name`, `shard_name` | `Counter` |
-| `vector_index_tombstone_cleanup_threads` | Number of currently active threads for repairing/cleaning up the vector index after deletes have occurred. | `class_name`, `shard_name` | `Gauge` |
-| `vector_index_tombstones` | Number of currently active tombstones in the vector index. Will go up on each incoming delete and go down after a completed repair operation. | `class_name`, `shard_name` | `Gauge` |
-| `weaviate_build_info` | Provides general information about the build (What version is currently running? How long has this version been running, etc) | `version`, `revision`, `branch`, `goVersion` | `Gauge` |
-| `weaviate_runtime_config_hash` | Hash value of the currently active runtime configuration, useful for tracking when new configurations take effect. | `sha256` | `Gauge` |
-| `weaviate_runtime_config_last_load_success` | Indicates whether the last loading attempt was successful (`1` for success, `0` for failure). | | `Gauge` |
-| `weaviate_schema_collections` | Shows the total number of collections at any given point. | `nodeID` | `Gauge` |
-| `weaviate_schema_shards` | Shows the total number of shards at any given point. | `nodeID`, `status(HOT, COLD, WARM, FROZEN)` | `Gauge` |
-| `weaviate_internal_sample_memberlist_queue_broadcasts` | Shows the number of messages in the broadcast queue of Memberlist. | `quantile=0.5, 0.9, 0.99` | `Summary` |
-| `weaviate_internal_timer_memberlist_gossip` | Shows the latency distribution of the each gossip made in Memberlist. | `quantile=0.5, 0.9, 0.99` | `Summary` |
-| `weaviate_internal_counter_raft_apply` | Number of transactions in the configured interval. | NA | `counter` |
-| `weaviate_internal_counter_raft_state_candidate` | Number of times the raft server initiated an election. | NA | `counter` |
-| `weaviate_internal_counter_raft_state_follower` | Number of times in the configured interval that the raft server became a follower. | NA | `summary` |
-| `weaviate_internal_counter_raft_state_leader` | Number of times the raft server became a leader. | NA | `counter` |
-| `weaviate_internal_counter_raft_transition_heartbeat_timeout` | Number of times that the node transitioned to `candidate` state after not receiving a heartbeat message from the last known leader. | NA | `Counter` |
-| `weaviate_internal_gauge_raft_commitNumLogs` | Number of logs processed for application to the finite state machine in a single batch. | NA | `gauge` |
-| `weaviate_internal_gauge_raft_leader_dispatchNumLogs` | Number of logs committed to disk in the most recent batch. | NA | `gauge` |
-| `weaviate_internal_gauge_raft_leader_oldestLogAge` | The number of milliseconds since the oldest log in the leader's log store was written. This can be important for replication health where write rate is high and the snapshot is large as followers may be unable to recover from a restart if restoring takes longer than the minimum value for the current leader. Compare this with `raft_fsm_lastRestoreDuration` and `aft_rpc_installSnapshot` to monitor. In normal usage this gauge value will grow linearly over time until a snapshot completes on the leader and the log is truncated. | NA | `gauge` |
-| `weaviate_internal_gauge_raft_peers` | The number of peers in the raft cluster configuration. | NA | `gauge` |
-| `weaviate_internal_sample_raft_boltdb_logBatchSize` | Measures the total size in bytes of logs being written to the db in a single batch. | `quantile=0.5, 0.9, 0.99` | `Summary` |
-| `weaviate_internal_sample_raft_boltdb_logSize` | Measures the size of logs being written to the db. | `quantile=0.5, 0.9, 0.99` | `Summary` |
-| `weaviate_internal_sample_raft_boltdb_logsPerBatch` | Measures the number of logs being written per batch to the db. | `quantile=0.5, 0.9, 0.99` | `Summary` |
-| `weaviate_internal_sample_raft_boltdb_writeCapacity` | Theoretical write capacity in terms of the number of logs that can be written per second. Each sample outputs what the capacity would be if future batched log write operations were similar to this one. This similarity encompasses 4 things: batch size, byte size, disk performance and boltdb performance. While none of these will be static and its highly likely individual samples of this metric will vary, aggregating this metric over a larger time window should provide a decent picture into how this BoltDB store can perform | `quantile=0.5, 0.9, 0.99` | `Summary` |
-| `weaviate_internal_sample_raft_thread_fsm_saturation` | An approximate measurement of the proportion of time the Raft FSM goroutine is busy and unavailable to accept new work. | `quantile=0.5, 0.9, 0.99` | `Summary` |
-| `weaviate_internal_sample_raft_thread_main_saturation` | An approximate measurement of the proportion of time the main Raft goroutine is busy and unavailable to accept new work (percentage). | `quantile=0.5, 0.9, 0.99` | `Summary` |
-| `weaviate_internal_timer_raft_boltdb_getLog` | Measures the amount of time spent reading logs from the db (in ms). | `quantile=0.5, 0.9, 0.99` | `Summary` |
-| `weaviate_internal_timer_raft_boltdb_storeLogs` | Time required to record any outstanding logs since the last request to append entries for the given node. | `quantile=0.5, 0.9, 0.99` | `Summary` |
-| `weaviate_internal_timer_raft_commitTime` | Time required to commit a new entry to the raft log on the leader node. | `quantile=0.5, 0.9, 0.99` | `summary` |
-| `weaviate_internal_timer_raft_fsm_apply` | Number of logs committed by the finite state machine since the last interval. | `quantile=0.5, 0.9, 0.99` | `summary` |
-| `weaviate_internal_timer_raft_fsm_enqueue` | Time required to queue up a batch of logs for the finite state machine to apply. | `quantile=0.5, 0.9, 0.99` | `summary` |
-| `weaviate_internal_timer_raft_leader_dispatchLog` | Time required for the leader node to write a log entry to disk. | `quantile=0.5, 0.9, 0.99` | `Summary` |
-| `weaviate_usage_{gcs\|s3}_operations_total` | Total number of operations for module labels | `operation`: collect/upload, `status`: success/error | `Counter` |
-| `weaviate_usage_{gcs\|s3}_operation_latency_seconds` | Latency of usage operations in seconds | `operation`: collect/upload | `Histogram` |
-| `weaviate_usage_{gcs\|s3}_resource_count` | Number of resources tracked by module | `resource_type`: collections/shards/backups | `Gauge` |
-| `weaviate_usage_{gcs\|s3}_uploaded_file_size_bytes` | Size of the uploaded usage file in bytes | NA | `Gauge` |
-
-
-Extending Weaviate with new metrics is very easy. To suggest a new metric, see the [contributor guide](/contributor-guide).
-
-### Versioning
-
-Be aware that metrics do not follow the semantic versioning guidelines of other Weaviate features. Weaviate's main APIs are stable and breaking changes are extremely rare. Metrics, however, have shorter feature lifecycles. It can sometimes be necessary to introduce an incompatible change or entirely remove a metric, for example, because the cost of observing a specific metric in production has grown too high. As a result, it is possible that a Weaviate minor release contains a breaking change for the Monitoring system. If so, it will be clearly highlighted in the release notes.
+### Replication
+
+#### Async replication
+
+These metrics track asynchronous replication operations for maintaining data consistency across replicas.
+
+| Metric | Description | Labels | Type |
+| -------------------------------------------------------- | ------------------------------------------------------------------------ | ------------------------------------- | ----------- |
+| `async_replication_goroutines_running` | Number of currently running async replication goroutines | `type` (hashbeater, hashbeat_trigger) | `Gauge` |
+| `async_replication_hashtree_init_count` | Count of async replication hashtree initializations | None | `Counter` |
+| `async_replication_hashtree_init_running` | Number of currently running hashtree initializations | None | `Gauge` |
+| `async_replication_hashtree_init_failure_count` | Count of async replication hashtree initialization failures | None | `Counter` |
+| `async_replication_hashtree_init_duration_seconds` | Duration of hashtree initialization in seconds | None | `Histogram` |
+| `async_replication_iteration_count` | Count of async replication comparison iterations | None | `Counter` |
+| `async_replication_iteration_failure_count` | Count of async replication iteration failures | None | `Counter` |
+| `async_replication_iteration_duration_seconds` | Duration of async replication comparison iterations in seconds | None | `Histogram` |
+| `async_replication_hashtree_diff_duration_seconds` | Duration of async replication hashtree diff computation in seconds | None | `Histogram` |
+| `async_replication_object_digests_diff_duration_seconds` | Duration of async replication object digests diff computation in seconds | None | `Histogram` |
+| `async_replication_propagation_count` | Count of async replication propagation executions | None | `Counter` |
+| `async_replication_propagation_failure_count` | Count of async replication propagation failures | None | `Counter` |
+| `async_replication_propagation_object_count` | Count of objects propagated by async replication | None | `Counter` |
+| `async_replication_propagation_duration_seconds` | Duration of async replication propagation in seconds | None | `Histogram` |
+
+#### Replication coordinator
+
+These metrics track the replication coordinator's read and write operations across replicas.
+
+| Metric | Description | Labels | Type |
+| ------------------------------------------------- | ------------------------------------------------------------------------------ | ------ | ----------- |
+| `replication_coordinator_writes_succeed_all` | Count of requests succeeding a write to all replicas | None | `Counter` |
+| `replication_coordinator_writes_succeed_some` | Count of requests succeeding a write to some replicas > CL but less than all | None | `Counter` |
+| `replication_coordinator_writes_failed` | Count of requests failing due to consistency level | None | `Counter` |
+| `replication_coordinator_reads_succeed_all` | Count of requests succeeding a read from CL replicas | None | `Counter` |
+| `replication_coordinator_reads_succeed_some` | Count of requests succeeding a read from some replicas < CL but more than zero | None | `Counter` |
+| `replication_coordinator_reads_failed` | Count of requests failing due to read from replicas | None | `Counter` |
+| `replication_read_repair_count` | Count of read repairs started | None | `Counter` |
+| `replication_read_repair_failure` | Count of read repairs failed | None | `Counter` |
+| `replication_coordinator_writes_duration_seconds` | Duration in seconds of write operations to replicas | None | `Histogram` |
+| `replication_coordinator_reads_duration_seconds` | Duration in seconds of read operations from replicas | None | `Histogram` |
+| `replication_read_repair_duration_seconds` | Duration in seconds of read repair operations | None | `Histogram` |
+
+---
+
+
## Sample Dashboards
@@ -150,25 +486,25 @@ your uses perfectly:
| Dashboard | Purpose | Preview |
| ----------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------ |
-| [Cluster Workload in Kubernetes](https://github.com/weaviate/weaviate/blob/main/tools/dev/grafana/dashboards/kubernetes.json) | Visualize cluster workload, usage and activity in Kubernetes |  |
-| [Importing Data Into Weaviate](https://github.com/weaviate/weaviate/blob/master/tools/dev/grafana/dashboards/importing.json) | Visualize speed of import operations (including its components, such as object store, inverted index, and vector index) |  |
-| [Object Operations](https://github.com/weaviate/weaviate/blob/master/tools/dev/grafana/dashboards/objects.json) | Visualize speed of whole object operations, such as GET, PUT, etc. |  |
-| [Vector Index](https://github.com/weaviate/weaviate/blob/master/tools/dev/grafana/dashboards/vectorindex.json) | Visualize the current state, as well as operations on the HNSW vector index |  |
-| [LSM Stores](https://github.com/weaviate/weaviate/blob/master/tools/dev/grafana/dashboards/lsm.json) | Get insights into the internals (including segments) of the various LSM stores within Weaviate |  |
-| [Startup](https://github.com/weaviate/weaviate/blob/master/tools/dev/grafana/dashboards/startup.json) | Visualize the startup process, including recovery operations |  |
-| [Usage](https://github.com/weaviate/weaviate/blob/master/tools/dev/grafana/dashboards/usage.json) | Obtain usage metrics, such as number of objects imported, etc. |  |
-| [Aysnc index queue](https://github.com/weaviate/weaviate/blob/main/tools/dev/grafana/dashboards/index_queue.json) | Observe index queue activity |  |
+| [Cluster Workload in Kubernetes](https://github.com/weaviate/weaviate/blob/main/tools/dev/grafana/dashboards/kubernetes.json) | Visualize cluster workload, usage and activity in Kubernetes |  |
+| [Importing Data Into Weaviate](https://github.com/weaviate/weaviate/blob/master/tools/dev/grafana/dashboards/importing.json) | Visualize speed of import operations (including its components, such as object store, inverted index, and vector index) |  |
+| [Object Operations](https://github.com/weaviate/weaviate/blob/master/tools/dev/grafana/dashboards/objects.json) | Visualize speed of whole object operations, such as GET, PUT, etc. |  |
+| [Vector Index](https://github.com/weaviate/weaviate/blob/master/tools/dev/grafana/dashboards/vectorindex.json) | Visualize the current state, as well as operations on the HNSW vector index |  |
+| [LSM Stores](https://github.com/weaviate/weaviate/blob/master/tools/dev/grafana/dashboards/lsm.json) | Get insights into the internals (including segments) of the various LSM stores within Weaviate |  |
+| [Startup](https://github.com/weaviate/weaviate/blob/master/tools/dev/grafana/dashboards/startup.json) | Visualize the startup process, including recovery operations |  |
+| [Usage](https://github.com/weaviate/weaviate/blob/master/tools/dev/grafana/dashboards/usage.json) | Obtain usage metrics, such as number of objects imported, etc. |  |
+| [Aysnc index queue](https://github.com/weaviate/weaviate/blob/main/tools/dev/grafana/dashboards/index_queue.json) | Observe index queue activity |  |
## `nodes` API Endpoint
To get collection details programmatically, use the [`nodes`](/deploy/configuration/nodes.md) REST endpoint.
-import APIOutputs from '/_includes/rest/node-endpoint-info.mdx';
+import APIOutputs from '/\_includes/rest/node-endpoint-info.mdx';
## Questions and feedback
-import DocsFeedback from '/_includes/docs-feedback.mdx';
+import DocsFeedback from '/\_includes/docs-feedback.mdx';
diff --git a/docs/weaviate/concepts/data-import.mdx b/docs/weaviate/concepts/data-import.mdx
index 5f2b9f154..28a88b7c7 100644
--- a/docs/weaviate/concepts/data-import.mdx
+++ b/docs/weaviate/concepts/data-import.mdx
@@ -29,7 +29,7 @@ For **code examples**, check out the [How-to: Batch import](../manage-objects/im
:::caution Preview
-Server-side batching was added in **`v1.33`** as a **preview** and is **not yet supported in our client libraries**.
+Server-side batching was added in **`v1.34`** as a **preview**.
This means that the feature is still under development and may change in future releases, including potential breaking changes.
**We do not recommend using this feature in production environments at this time.**
diff --git a/docs/weaviate/concepts/filtering.md b/docs/weaviate/concepts/filtering.md
index 899852ab1..0a369348c 100644
--- a/docs/weaviate/concepts/filtering.md
+++ b/docs/weaviate/concepts/filtering.md
@@ -10,7 +10,7 @@ Weaviate provides powerful filtered vector search capabilities, allowing you to
Filtered vector search in Weaviate is based on the concept of pre-filtering. This means that the filter is constructed before the vector search is performed. Unlike some pre-filtering implementations, Weaviate's pre-filtering does not require a brute-force vector search and is highly efficient.
-Starting in `v1.27`, Weaviate introduces its implementation of the [`ACORN`](#acorn) filter strategy. This filtering method significantly improves performance for large datasets, especially when the filter has low correlation with the query vector.
+Starting in `v1.34`, Weaviate uses the [`ACORN`](#acorn) filter strategy as the default. This filtering method significantly improves performance for large datasets, especially when the filter has low correlation with the query vector.
## Post-Filtering vs Pre-Filtering
@@ -34,14 +34,11 @@ In the section about Storage, [we have described in detail which parts make up a
## Filter strategy
-As of `v1.27`, Weaviate supports two filter strategies: `sweeping` and `acorn` specifically for the HNSW index type.
+Weaviate supports two filter strategies: `sweeping` and `acorn` specifically for the HNSW index type.
### ACORN
-:::info Added in `1.27`
-:::
-
-Weaviate `1.27` adds the a new filtering algorithm that is based on the [`ACORN`](https://arxiv.org/html/2403.04871v1) paper. We refer to this as `ACORN`, but the actual implementation in Weaviate is a custom implementation that is inspired by the paper. (References to `ACORN` in this document refer to the Weaviate implementation.)
+The Weaviate filtering algorithm `ACORN` is based on the paper [ACORN: Performant and Predicate-Agnostic Search Over Vector Embeddings and Structured Data](https://arxiv.org/html/2403.04871v1). We refer to this as `ACORN`, but the actual implementation in Weaviate is a custom implementation that is inspired by the paper. (References to `ACORN` in this document refer to the Weaviate implementation.)
The `ACORN` algorithm is designed to speed up filtered searches with the [HNSW index](./indexing/vector-index.md#hierarchical-navigable-small-world-hnsw-index) by the following:
@@ -53,20 +50,17 @@ The `ACORN` algorithm is especially useful when the filter has low correlation w
Our internal testing indicates that for lowly correlated, restrictive filters, the `ACORN` algorithm can be significantly faster, especially for large datasets. If this has been a bottleneck for your use case, we recommend enabling the `ACORN` algorithm.
-As of `v1.27`, the `ACORN` algorithm can be enabled by setting the `filterStrategy` field for the relevant HNSW vector index [in the collection configuration](../manage-collections/vector-config.mdx#set-vector-index-parameters).
-
### Sweeping
-The existing and current default filter strategy in Weaviate is referred to as `sweeping`. This strategy is based on the concept of "sweeping" through the HNSW graph.
+The `sweeping` strategy is based on the concept of "sweeping" through the HNSW graph.
The algorithm starts at the root node and traverses the graph, evaluating the distance to the query vector at each node, while keeping the "allow list" of the filter as context. If the filter is not met, the node is skipped and the traversal continues. This process is repeated until the desired number of results is reached.
-## `indexFilterable` {#indexFilterable}
+The `sweeping` algorithm can be enabled by setting the `filterStrategy` field for the relevant HNSW vector index [in the collection configuration](../manage-collections/vector-config.mdx#set-vector-index-parameters).
-:::info Added in `1.18`
-:::
+## `indexFilterable` {#indexFilterable}
-Weaviate `v1.18.0` adds the `indexFilterable` index that speeds up match-based filtering through use of Roaring Bitmaps. Roaring Bitmaps employ various strategies to add efficiencies, whereby it divides data into chunks and applies an appropriate storage strategy to each one. This enables high data compression and set operations speeds, resulting in faster filtering speeds for Weaviate.
+The `indexFilterable` index speeds up match-based filtering through use of Roaring Bitmaps. Roaring Bitmaps employ various strategies to add efficiencies, whereby it divides data into chunks and applies an appropriate storage strategy to each one. This enables high data compression and set operations speeds, resulting in faster filtering speeds for Weaviate.
If you are dealing with a large dataset, this will likely improve your filtering performance significantly and we therefore encourage you to migrate and re-index.
@@ -74,16 +68,7 @@ In addition, our team maintains our underlying Roaring Bitmap library to address
#### `indexFilterable` for `text` properties
-:::info Added in `1.19`
-:::
-
-A roaring bitmap index for `text` properties is available from `1.19` and up, and it is implemented using two separate (`filterable` & `searchable`) indexes, which replaces the existing single index. You can configure the new `indexFilterable` and `indexSearchable` parameters to determine whether to create the roaring set index and the BM25-suitable Map index, respectively. (Both are enabled by default.)
-
-#### Migration to `indexFilterable` {#migration-to-indexFilterable}
-
-If you are using Weaviate version `< 1.18.0`, you can take advantage of roaring bitmaps by migrating to `1.18.0` or higher, and going through a one-time process to create the new index. Once your Weaviate instance creates the Roaring Bitmap index, it will operate in the background to speed up your work.
-
-This behavior is set through the REINDEX_SET_TO_ROARINGSET_AT_STARTUP [environment variable](/deploy/configuration/env-vars/index.md). If you do not wish for reindexing to occur, you can set this to `false` prior to upgrading.
+A roaring bitmap index for `text` properties is implemented using two separate (`filterable` & `searchable`) indexes, which replaces the existing single index. You can configure the new `indexFilterable` and `indexSearchable` parameters to determine whether to create the roaring set index and the BM25-suitable Map index, respectively. (Both are enabled by default.)
:::info Read more
To learn more about Weaviate's roaring bitmaps implementation, see the [in-line documentation](https://pkg.go.dev/github.com/weaviate/weaviate/adapters/repos/db/lsmkv/roaringset).
@@ -91,10 +76,7 @@ To learn more about Weaviate's roaring bitmaps implementation, see the [in-line
## `indexRangeFilters`
-:::info Added in `1.26`
-:::
-
-Weaviate `1.26` introduces the `indexRangeFilters` index, which is a range-based index for filtering by numerical ranges. This index is available for `int`, `number`, or `date` properties. The index is not available for arrays of these data types.
+The `indexRangeFilters` index is a range-based index for filtering by numerical ranges. This index is available for `int`, `number`, or `date` properties. The index is not available for arrays of these data types.
Internally, rangeable indexes are implemented as roaring bitmap slices. This data structure limits the index to values that can be stored as 64 bit integers.
@@ -114,7 +96,7 @@ The graphic below shows filters of varying levels of restrictiveness. From left
-Version `v1.8.0` introduces the ability to automatically switch to a flat (brute-force) vector search when a filter becomes too restrictive. This scenario only applies to combined vector and scalar searches. For a detailed explanation of why HNSW requires switching to a flat search on certain filters, see this article at [medium](https://medium.com/data-science/effects-of-filtered-hnsw-searches-on-recall-and-latency-434becf8041c). In short, if a filter is very restrictive (i.e. a small percentage of the dataset is matched), an HNSW traversal becomes exhaustive. In other words, the more restrictive the filter becomes, the closer the performance of HNSW is to a brute-force search on the entire dataset. However, with such a restrictive filter, we have already narrowed down the dataset to a small fraction. So if the performance is close to brute-force anyway, it is much more efficient to only search on the matching subset as opposed to the entire dataset.
+Weaviate offers an option to automatically switch to a flat (brute-force) vector search when a filter becomes too restrictive. This scenario only applies to combined vector and scalar searches. For a detailed explanation of why HNSW requires switching to a flat search on certain filters, see this article at [medium](https://medium.com/data-science/effects-of-filtered-hnsw-searches-on-recall-and-latency-434becf8041c). In short, if a filter is very restrictive (i.e. a small percentage of the dataset is matched), an HNSW traversal becomes exhaustive. In other words, the more restrictive the filter becomes, the closer the performance of HNSW is to a brute-force search on the entire dataset. However, with such a restrictive filter, we have already narrowed down the dataset to a small fraction. So if the performance is close to brute-force anyway, it is much more efficient to only search on the matching subset as opposed to the entire dataset.
The following graphic shows filters with varying restrictiveness. From left (0%) to right (100%), the filters become more restrictive. The **cut-off is configured at ~15% of the dataset** size. This means the right side of the dotted line uses a brute-force search.
@@ -128,10 +110,6 @@ The cutoff value can be configured as [part of the `vectorIndexConfig` settings
-:::note Performance improvements from roaring bitmaps
-From `v1.18.0` onwards, Weaviate implements 'Roaring bitmaps' for the inverted index which decreased filtering times, especially for large allow lists. In terms of the above graphs, the *blue* areas will be reduced the most, especially towards the left of the figures.
-:::
-
## Further resources
-:::info Related pages
-- [References: GraphQL API](../api/graphql/index.md)
-:::
+
+- [References: GraphQL API - Filters](../api/graphql/filters.md)
## Questions and feedback
diff --git a/docs/weaviate/concepts/vector-quantization.md b/docs/weaviate/concepts/vector-quantization.md
index 6e9ba661f..581e91f9d 100644
--- a/docs/weaviate/concepts/vector-quantization.md
+++ b/docs/weaviate/concepts/vector-quantization.md
@@ -118,25 +118,24 @@ When SQ is enabled, Weaviate boosts recall by over-fetching compressed results.
## Rotational quantization
+**Rotational quantization (RQ)** provides significant compression while maintaining high recall. Unlike SQ, RQ requires no training phase and can be enabled immediately at index creation. RQ is available in: **8-bit** and **1-bit** variants.
+
+### 8-bit RQ
+
:::info Added in `v1.32`
-**8-bit Rotational quantization (RQ)** was added in **`v1.32`**.
+**8-bit Rotational quantization (RQ)** for the **HNSW vector index** was added in **`v1.32`**.
:::
:::caution Preview
-**1-bit Rotational quantization (RQ)** was added in **`v1.33`** as a **preview**.
-
+**8-bit Rotational quantization (RQ)** for the **flat vector index** was added in **`v1.34`** as a **preview**.
This means that the feature is still under development and may change in future releases, including potential breaking changes.
**We do not recommend using this feature in production environments at this time.**
:::
-**Rotational quantization (RQ)** is a quantization technique that provides significant compression while maintaining high recall in internal testing. Unlike SQ, RQ requires no training phase and can be enabled immediately at index creation. RQ is available in two variants: **8-bit RQ** and **1-bit RQ**.
-
-### 8-bit RQ
-
8-bit RQ provides 4x compression while maintaining 98-99% recall in internal testing. The method works as follows:
1. **Fast pseudorandom rotation**: The input vector is transformed using a fast rotation based on the Walsh Hadamard Transform. This rotation takes approximately 7-10 microseconds for a 1536-dimensional vector. The output dimension is rounded up to the nearest multiple of 64.
@@ -145,6 +144,20 @@ This means that the feature is still under development and may change in future
### 1-bit RQ
+:::info Added in `v1.33`
+
+**1-bit Rotational quantization (RQ)** for the **HNSW vector index** was added in **`v1.33`**.
+
+:::
+
+:::caution Preview
+
+**1-bit Rotational quantization (RQ)** for the **flat vector index** was added in **`v1.34`** as a **preview**.
+This means that the feature is still under development and may change in future releases, including potential breaking changes.
+**We do not recommend using this feature in production environments at this time.**
+
+:::
+
1-bit RQ is an asymmetric quantization method that provides close to 32x compression as dimensionality increases. **1-bit RQ serves as a more robust and accurate alternative to BQ** with only a slight performance trade-off (approximately 10% decrease in throughput in internal testing compared to BQ). While more performant than PQ in terms of encoding time and distance calculations, 1-bit RQ typically offers slightly lower recall than well-tuned PQ.
The method works as follows:
@@ -203,7 +216,7 @@ You might be also interested in our blog post [HNSW+PQ - Exploring ANN algorithm
### With a flat index
-[BQ](#binary-quantization) can use a [flat index](./indexing/inverted-index.md). A flat index search reads from disk, compression reduces the amount of data Weaviate has to read so searches are faster.
+[RQ](#rotational-quantization) and [BQ](#binary-quantization) can be applied to a [flat index](./indexing/inverted-index.md). As a flat index search is a brute-force method, compression reduces the amount of data Weaviate has to read and increases speed.
## Rescoring
diff --git a/docs/weaviate/config-refs/indexing/vector-index.mdx b/docs/weaviate/config-refs/indexing/vector-index.mdx
index 3165c06ee..62334e560 100644
--- a/docs/weaviate/config-refs/indexing/vector-index.mdx
+++ b/docs/weaviate/config-refs/indexing/vector-index.mdx
@@ -4,7 +4,7 @@ description: Reference for vector index types and parameters in Weaviate.
---
**[Vector indexes](../../concepts/indexing/vector-index.md)** facilitate efficient, vector-first data storage and retrieval.
-There are three supported vector index types:
+There are three supported vector index types:
- **[HNSW index](#hnsw-index)**
- **[Flat index](#flat-index)**
@@ -42,24 +42,24 @@ HNSW indexes are scalable and super fast at query time, but HNSW algorithms are
Some HNSW parameters are mutable, but others cannot be modified after you create your collection.
-| Parameter | Type | Description | Default | Mutable |
-| :----------------------- | :------ | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :--------- | :------ |
-| `cleanupIntervalSeconds` | integer | Cleanup frequency. This value does not normally need to be adjusted. A higher value means cleanup runs less frequently, but it does more in a single batch. A lower value means cleanup is more frequent, but it may be less efficient on each run. | 300 | Yes |
-| `distance` | string | Distance metric. The metric that measures the distance between two arbitrary vectors. For available distance metrics, see [supported distance metrics](/weaviate/config-refs/distances.md). | `cosine` | No |
-| `ef` | integer | Balance search speed and recall. `ef` is the size of the dynamic list that the HNSW uses during search. Search is more accurate when `ef` is higher, but it is also slower. `ef` values greater than 512 show diminishing improvements in recall.
Dynamic `ef`. Weaviate automatically adjusts the `ef` value and creates a dynamic `ef` list when `ef` is set to -1. For more details, see [dynamic ef](../../concepts/indexing/vector-index.md#dynamic-ef). | -1 | Yes |
-| `efConstruction` | integer | Balance index search speed and build speed. A high `efConstruction` value means you can lower your `ef` settings, but importing is slower.
`efConstruction` must be greater than 0. | 128 | No |
-| `maxConnections` | integer | Maximum number of connections per element. `maxConnections` is the connection limit per layer for layers above the zero layer. The zero layer can have (2 \* `maxConnections`) connections.
`maxConnections` must be greater than 0. | 32 | No |
-| `dynamicEfMin` | integer | Lower bound for [dynamic `ef`](../../concepts/indexing/vector-index.md#dynamic-ef). Protects against a creating search list that is too short.
This setting is only used when `ef` is -1. | 100 | Yes |
-| `dynamicEfMax` | integer | Upper bound for [dynamic `ef`](../../concepts/indexing/vector-index.md#dynamic-ef). Protects against creating a search list that is too long.
If `dynamicEfMax` is higher than the limit, `dynamicEfMax` does not have any effect. In this case, `ef` is the limit.
This setting is only used when `ef` is -1. | 500 | Yes |
-| `dynamicEfFactor` | integer | Multiplier for [dynamic `ef`](../../concepts/indexing/vector-index.md#dynamic-ef). Sets the potential length of the search list.
This setting is only used when `ef` is -1. | 8 | Yes |
-| `filterStrategy` | string | Added in `v1.27.0`. The filter strategy to use for filtering the search results. The filter strategy can be set to `sweeping` or `acorn`.
- `sweeping`: The default filter strategy.
- `acorn`: Uses Weaviate's ACORN implementation. [Read more](../../concepts/filtering.md#filter-strategy) | `sweeping` | Yes |
-| `flatSearchCutoff` | integer | Optional. Threshold for the [flat-search cutoff](/weaviate/concepts/filtering.md#flat-search-cutoff). To force a vector index search, set `"flatSearchCutoff": 0`. | 40000 | Yes |
-| `skip` | boolean | When true, do not index the collection.
Weaviate decouples vector creation and vector storage. If you skip vector indexing, but a vectorizer is configured (or a vector is provided manually), Weaviate logs a warning each import.
To skip indexing and vector generation, set `"vectorizer": "none"` when you set `"skip": true`.
See [When to skip indexing](../../concepts/indexing/vector-index.md#when-to-skip-indexing). | `false` | No |
-| `vectorCacheMaxObjects` | integer | Maximum number of objects in the memory cache. By default, this limit is set to one trillion (`1e12`) objects when a new collection is created. For sizing recommendations, see [Vector cache considerations](../../concepts/indexing/vector-index.md#vector-cache-considerations). | `1e12` | Yes |
-| `rq` | object | Enable and configure [rotational quantization (RQ)](/weaviate/concepts/indexing/vector-index.md) compression.
For RQ configuration details, see [RQ configuration parameters](#pq-parameters). | -- | Yes |
-| `pq` | object | Enable and configure [product quantization (PQ)](/weaviate/concepts/indexing/vector-index.md) compression.
PQ assumes some data has already been loaded. You should have 10,000 to 100,000 vectors per shard loaded before you enable PQ.
For PQ configuration details, see [PQ configuration parameters](#pq-parameters). | -- | Yes |
-| `bq` | object | Enable and configure [binery quantization (BQ)](/weaviate/concepts/indexing/vector-index.md) compression.
For BQ configuration details, see [BQ configuration parameters](#bq-parameters). | -- | Yes |
-| `sq` | object | Enable and configure [product quantization (SQ)](/weaviate/concepts/indexing/vector-index.md) compression.
For SQ configuration details, see [SQ configuration parameters](#sq-parameters). | -- | Yes |
+| Parameter | Type | Description | Default | Mutable |
+| :----------------------- | :------ | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------- | :------ |
+| `cleanupIntervalSeconds` | integer | Cleanup frequency. This value does not normally need to be adjusted. A higher value means cleanup runs less frequently, but it does more in a single batch. A lower value means cleanup is more frequent, but it may be less efficient on each run. | 300 | Yes |
+| `distance` | string | Distance metric. The metric that measures the distance between two arbitrary vectors. For available distance metrics, see [supported distance metrics](/weaviate/config-refs/distances.md). | `cosine` | No |
+| `ef` | integer | Balance search speed and recall. `ef` is the size of the dynamic list that the HNSW uses during search. Search is more accurate when `ef` is higher, but it is also slower. `ef` values greater than 512 show diminishing improvements in recall.
Dynamic `ef`. Weaviate automatically adjusts the `ef` value and creates a dynamic `ef` list when `ef` is set to -1. For more details, see [dynamic ef](../../concepts/indexing/vector-index.md#dynamic-ef). | -1 | Yes |
+| `efConstruction` | integer | Balance index search speed and build speed. A high `efConstruction` value means you can lower your `ef` settings, but importing is slower.
`efConstruction` must be greater than 0. | 128 | No |
+| `maxConnections` | integer | Maximum number of connections per element. `maxConnections` is the connection limit per layer for layers above the zero layer. The zero layer can have (2 \* `maxConnections`) connections.
`maxConnections` must be greater than 0. | 32 | No |
+| `dynamicEfMin` | integer | Lower bound for [dynamic `ef`](../../concepts/indexing/vector-index.md#dynamic-ef). Protects against a creating search list that is too short.
This setting is only used when `ef` is -1. | 100 | Yes |
+| `dynamicEfMax` | integer | Upper bound for [dynamic `ef`](../../concepts/indexing/vector-index.md#dynamic-ef). Protects against creating a search list that is too long.
If `dynamicEfMax` is higher than the limit, `dynamicEfMax` does not have any effect. In this case, `ef` is the limit.
This setting is only used when `ef` is -1. | 500 | Yes |
+| `dynamicEfFactor` | integer | Multiplier for [dynamic `ef`](../../concepts/indexing/vector-index.md#dynamic-ef). Sets the potential length of the search list.
This setting is only used when `ef` is -1. | 8 | Yes |
+| `filterStrategy` | string | The filter strategy to use for filtering the search results. The filter strategy can be set to [`acorn`](../../concepts/filtering.md#acorn) (default as of `v1.34`) or [`sweeping`](../../concepts/filtering.md#sweeping). | `acorn` | Yes |
+| `flatSearchCutoff` | integer | Optional. Threshold for the [flat-search cutoff](/weaviate/concepts/filtering.md#flat-search-cutoff). To force a vector index search, set `"flatSearchCutoff": 0`. | 40000 | Yes |
+| `skip` | boolean | When true, do not index the collection.
Weaviate decouples vector creation and vector storage. If you skip vector indexing, but a vectorizer is configured (or a vector is provided manually), Weaviate logs a warning each import.
To skip indexing and vector generation, set `"vectorizer": "none"` when you set `"skip": true`.
See [When to skip indexing](../../concepts/indexing/vector-index.md#when-to-skip-indexing). | `false` | No |
+| `vectorCacheMaxObjects` | integer | Maximum number of objects in the memory cache. By default, this limit is set to one trillion (`1e12`) objects when a new collection is created. For sizing recommendations, see [Vector cache considerations](../../concepts/indexing/vector-index.md#vector-cache-considerations). | `1e12` | Yes |
+| `rq` | object | Enable and configure [rotational quantization (RQ)](/weaviate/concepts/indexing/vector-index.md) compression.
For RQ configuration details, see [RQ configuration parameters](#rq-parameters). | -- | Yes |
+| `pq` | object | Enable and configure [product quantization (PQ)](/weaviate/concepts/indexing/vector-index.md) compression.
PQ assumes some data has already been loaded. You should have 10,000 to 100,000 vectors per shard loaded before you enable PQ.
For PQ configuration details, see [PQ configuration parameters](#pq-parameters). | -- | Yes |
+| `bq` | object | Enable and configure [binary quantization (BQ)](/weaviate/concepts/indexing/vector-index.md) compression.
For BQ configuration details, see [BQ configuration parameters](#bq-parameters). | -- | Yes |
+| `sq` | object | Enable and configure [product quantization (SQ)](/weaviate/concepts/indexing/vector-index.md) compression.
For SQ configuration details, see [SQ configuration parameters](#sq-parameters). | -- | Yes |
### Database parameters for HNSW
@@ -71,7 +71,7 @@ Note that some database-level parameters are available to configure HNSW indexin
Preferably, the `PERSISTENCE_HNSW_MAX_LOG_SIZE` should set to a value close to the size of the HNSW graph.
-- [`DEFAULT_QUANTIZATION`](/deploy/configuration/env-vars/index.md#DEFAULT_QUANTIZATION) is a database-level parameter that defines which quantization technique will be used by default when creating new collections.
+- [`DEFAULT_QUANTIZATION`](/deploy/configuration/env-vars/index.md#DEFAULT_QUANTIZATION) is a database-level parameter that defines which quantization technique will be used by default when creating new collections.
### Tombstone cleanup parameters
diff --git a/docs/weaviate/configuration/compression/rq-compression.md b/docs/weaviate/configuration/compression/rq-compression.md
index 122fe0762..f8eb81179 100644
--- a/docs/weaviate/configuration/compression/rq-compression.md
+++ b/docs/weaviate/configuration/compression/rq-compression.md
@@ -21,17 +21,19 @@ import CompressionByDefault from '/\_includes/compression-by-default.mdx';
- **8-bit RQ**: Up to 4x compression while retaining almost perfect recall (98-99% on most datasets). **Recommended** for most use cases.
- **1-bit RQ**: Close to 32x compression as dimensionality increases with moderate recall across various datasets.
-:::note HNSW only
+## 8-bit RQ
-RQ is currently not supported for the flat index type.
+:::info Added in `v1.32`
-:::
+**8-bit Rotational quantization (RQ)** for the **HNSW vector index** was added in **`v1.32`**.
-## 8-bit RQ
+:::
-:::info Added in `v1.32`
+:::caution Preview
-**8-bit Rotational quantization (RQ)** was added in **`v1.32`**.
+**8-bit Rotational quantization (RQ)** for the **flat vector index** was added in **`v1.34`** as a **preview**.
+This means that the feature is still under development and may change in future releases, including potential breaking changes.
+**We do not recommend using this feature in production environments at this time.**
:::
@@ -117,10 +119,15 @@ RQ can also be enabled for an existing collection by updating the collection def
## 1-bit RQ
-:::caution Preview
+:::info Added in `v1.32`
+
+**1-bit Rotational quantization (RQ)** for the **HNSW vector index** was added in **`v1.33`**.
-**1-bit Rotational quantization (RQ)** was added in **`v1.33`** as a **preview**.
+:::
+
+:::caution Preview
+**1-bit Rotational quantization (RQ)** for the **flat vector index** was added in **`v1.34`** as a **preview**.
This means that the feature is still under development and may change in future releases, including potential breaking changes.
**We do not recommend using this feature in production environments at this time.**
diff --git a/docs/weaviate/manage-collections/vector-config.mdx b/docs/weaviate/manage-collections/vector-config.mdx
index a446085f4..0f6e15a02 100644
--- a/docs/weaviate/manage-collections/vector-config.mdx
+++ b/docs/weaviate/manage-collections/vector-config.mdx
@@ -310,10 +310,6 @@ The vector index type can be set for each collection at creation time, between `
Set vector index parameters such as [compression](../configuration/compression/index.md) and [filter strategy](../concepts/filtering.md#filter-strategy) through collection configuration. Some parameters can be [updated later](collection-operations.mdx#update-a-collection-definition) after collection creation.
-:::info Filter strategy parameter
-Was added in `v1.27`
-:::
-
+Server-side batching was added in **`v1.34`** as a **preview**.
+This means that the feature is still under development and may change in future releases, including potential breaking changes.
+**We do not recommend using this feature in production environments at this time.**
:::
-
-
-```python
-# Python support coming soon
-```
-
+
diff --git a/docs/weaviate/model-providers/_includes/integration_contextualai_rag.png b/docs/weaviate/model-providers/_includes/integration_contextualai_rag.png
new file mode 100644
index 000000000..b2c5dac19
Binary files /dev/null and b/docs/weaviate/model-providers/_includes/integration_contextualai_rag.png differ
diff --git a/docs/weaviate/model-providers/_includes/integration_contextualai_rag_grouped.png b/docs/weaviate/model-providers/_includes/integration_contextualai_rag_grouped.png
new file mode 100644
index 000000000..79c641e85
Binary files /dev/null and b/docs/weaviate/model-providers/_includes/integration_contextualai_rag_grouped.png differ
diff --git a/docs/weaviate/model-providers/_includes/integration_contextualai_rag_single.png b/docs/weaviate/model-providers/_includes/integration_contextualai_rag_single.png
new file mode 100644
index 000000000..ced03cd1b
Binary files /dev/null and b/docs/weaviate/model-providers/_includes/integration_contextualai_rag_single.png differ
diff --git a/docs/weaviate/model-providers/_includes/integration_contextualai_reranker.png b/docs/weaviate/model-providers/_includes/integration_contextualai_reranker.png
new file mode 100644
index 000000000..1fb79d1ae
Binary files /dev/null and b/docs/weaviate/model-providers/_includes/integration_contextualai_reranker.png differ
diff --git a/docs/weaviate/model-providers/_includes/provider.connect.py b/docs/weaviate/model-providers/_includes/provider.connect.py
index c6a03fc55..484c469e6 100644
--- a/docs/weaviate/model-providers/_includes/provider.connect.py
+++ b/docs/weaviate/model-providers/_includes/provider.connect.py
@@ -25,6 +25,10 @@
# Recommended: save sensitive data as environment variables
cohere_key = os.getenv("COHERE_APIKEY")
# END CohereInstantiation
+# START ContextualAIInstantiation
+# Recommended: save sensitive data as environment variables
+contextual_key = os.getenv("CONTEXTUAL_API_KEY")
+# END ContextualAIInstantiation
# START DatabricksInstantiation
# Recommended: save sensitive data as environment variables
databricks_token = os.getenv("DATABRICKS_TOKEN")
@@ -100,6 +104,9 @@
# START CohereInstantiation
"X-Cohere-Api-Key": cohere_key,
# END CohereInstantiation
+# START ContextualAIInstantiation
+ "X-ContextualAI-Api-Key": contextual_key,
+# END ContextualAIInstantiation
# START DatabricksInstantiation
"X-Databricks-Token": databricks_token,
# END DatabricksInstantiation
diff --git a/docs/weaviate/model-providers/_includes/provider.connect.ts b/docs/weaviate/model-providers/_includes/provider.connect.ts
index 959656e8c..dab23b19a 100644
--- a/docs/weaviate/model-providers/_includes/provider.connect.ts
+++ b/docs/weaviate/model-providers/_includes/provider.connect.ts
@@ -16,6 +16,9 @@ const aws_secret_key = process.env.AWS_SECRET_KEY || ''; // Replace with your A
// START CohereInstantiation
const cohereApiKey = process.env.COHERE_APIKEY || ''; // Replace with your inference API key
// END CohereInstantiation
+// START ContextualAIInstantiation
+const contextualApiKey = process.env.CONTEXTUAL_API_KEY || ''; // Replace with your inference API key
+// END ContextualAIInstantiation
// START DatabricksInstantiation
const databricksToken = process.env.DATABRICKS_TOKEN || ''; // Replace with your inference API key
// END DatabricksInstantiation
@@ -78,6 +81,9 @@ const client = await weaviate.connectToWeaviateCloud(
// START CohereInstantiation
'X-Cohere-Api-Key': cohereApiKey,
// END CohereInstantiation
+ // START ContextualAIInstantiation
+ 'X-ContextualAI-Api-Key': contextualApiKey,
+ // END ContextualAIInstantiation
// START DatabricksInstantiation
'X-Databricks-Token': databricksToken,
// END DatabricksInstantiation
diff --git a/docs/weaviate/model-providers/_includes/provider.generative.py b/docs/weaviate/model-providers/_includes/provider.generative.py
index 427684b47..250b35e47 100644
--- a/docs/weaviate/model-providers/_includes/provider.generative.py
+++ b/docs/weaviate/model-providers/_includes/provider.generative.py
@@ -421,6 +421,92 @@ def import_data():
# clean up
client.collections.delete("DemoCollection")
+# START BasicGenerativeContextualAI
+from weaviate.classes.config import Configure
+
+client.collections.create(
+ "DemoCollection",
+ # highlight-start
+ generative_config=Configure.Generative.contextualai()
+ # highlight-end
+ # Additional parameters not shown
+)
+# END BasicGenerativeContextualAI
+
+# clean up
+client.collections.delete("DemoCollection")
+
+# START GenerativeContextualAICustomModel
+from weaviate.classes.config import Configure
+
+client.collections.create(
+ "DemoCollection",
+ # highlight-start
+ generative_config=Configure.Generative.contextualai(
+ model="v2"
+ )
+ # highlight-end
+ # Additional parameters not shown
+)
+# END GenerativeContextualAICustomModel
+
+# clean up
+client.collections.delete("DemoCollection")
+
+
+# START FullGenerativeContextualAI
+from weaviate.classes.config import Configure
+
+client.collections.create(
+ "DemoCollection",
+ # highlight-start
+ generative_config=Configure.Generative.contextualai(
+ # # These parameters are optional
+ # model="v2",
+ # temperature=0.7,
+ # max_tokens=1024,
+ # top_p=0.9,
+ # system_prompt="You are a helpful assistant"
+ # avoid_commentary=True,
+ # knowledge=["Custom knowledge override", "Additional context"],
+ )
+ # highlight-end
+ # Additional parameters not shown
+)
+# END FullGenerativeContextualAI
+
+# clean up
+client.collections.delete("DemoCollection")
+import_data()
+
+# START RuntimeModelSelectionContextualAI
+from weaviate.classes.config import Configure
+from weaviate.classes.generate import GenerativeConfig
+
+collection = client.collections.use("DemoCollection")
+response = collection.generate.near_text(
+ query="A holiday film",
+ limit=2,
+ grouped_task="Write a tweet promoting these two movies",
+ # highlight-start
+ generative_provider=GenerativeConfig.contextualai(
+ # # These parameters are optional
+ # model="v2",
+ # temperature=0.7,
+ # max_tokens=1024,
+ # top_p=0.9,
+ # system_prompt="You are a helpful assistant"
+ # avoid_commentary=True,
+ # knowledge=["Custom knowledge override", "Additional context"],
+ ),
+ # Additional parameters not shown
+ # highlight-end
+)
+# END RuntimeModelSelectionContextualAI
+
+# clean up
+client.collections.delete("DemoCollection")
+
# START BasicGenerativeDatabricks
from weaviate.classes.config import Configure
diff --git a/docs/weaviate/model-providers/_includes/provider.generative.ts b/docs/weaviate/model-providers/_includes/provider.generative.ts
index 90a216fb0..144746d83 100644
--- a/docs/weaviate/model-providers/_includes/provider.generative.ts
+++ b/docs/weaviate/model-providers/_includes/provider.generative.ts
@@ -5,10 +5,10 @@ import assert from 'assert';
// ================================
import weaviate from 'weaviate-client';
-// START RuntimeModelSelectionAnthropic // START WorkingWithImagesAnthropic // START WorkingWithImagesAWS // START WorkingWithImagesGoogle // START WorkingWithImagesOpenAI // START RuntimeModelSelectionAnyscale // START RuntimeModelSelectionMistral // START RuntimeModelSelectionOpenAI // START RuntimeModelSelectionAWS // START RuntimeModelSelectionCohere // START RuntimeModelSelectionDatabricks // START RuntimeModelSelectionFriendliAI // START RuntimeModelSelectionGoogle // START RuntimeModelSelectionNVIDIA // START RuntimeModelSelectionKubeAI // START RuntimeModelSelectionAzureOpenAI // START RuntimeModelSelectionOllama // START RuntimeModelSelectionxAI
+// START RuntimeModelSelectionAnthropic // START WorkingWithImagesAnthropic // START WorkingWithImagesAWS // START WorkingWithImagesGoogle // START WorkingWithImagesOpenAI // START RuntimeModelSelectionAnyscale // START RuntimeModelSelectionMistral // START RuntimeModelSelectionOpenAI // START RuntimeModelSelectionAWS // START RuntimeModelSelectionCohere // START RuntimeModelSelectionDatabricks // START RuntimeModelSelectionFriendliAI // START RuntimeModelSelectionGoogle // START RuntimeModelSelectionNVIDIA // START RuntimeModelSelectionKubeAI // START RuntimeModelSelectionAzureOpenAI // START RuntimeModelSelectionOllama // START RuntimeModelSelectionxAI
import { generativeParameters } from 'weaviate-client';
-// END RuntimeModelSelectionAnthropic // END WorkingWithImagesAnthropic // END WorkingWithImagesAWS // END WorkingWithImagesGoogle // END WorkingWithImagesOpenAI // END RuntimeModelSelectionAnyscale // END RuntimeModelSelectionMistral // END RuntimeModelSelectionOpenAI // END RuntimeModelSelectionAWS // END RuntimeModelSelectionCohere // END RuntimeModelSelectionDatabricks // END RuntimeModelSelectionFriendliAI // END RuntimeModelSelectionGoogle // END RuntimeModelSelectionNVIDIA // END RuntimeModelSelectionKubeAI // END RuntimeModelSelectionAzureOpenAI // END RuntimeModelSelectionOllama // END RuntimeModelSelectionxAI
+// END RuntimeModelSelectionAnthropic // END WorkingWithImagesAnthropic // END WorkingWithImagesAWS // END WorkingWithImagesGoogle // END WorkingWithImagesOpenAI // END RuntimeModelSelectionAnyscale // END RuntimeModelSelectionMistral // END RuntimeModelSelectionOpenAI // END RuntimeModelSelectionAWS // END RuntimeModelSelectionCohere // END RuntimeModelSelectionDatabricks // END RuntimeModelSelectionFriendliAI // END RuntimeModelSelectionGoogle // END RuntimeModelSelectionNVIDIA // END RuntimeModelSelectionKubeAI // END RuntimeModelSelectionAzureOpenAI // END RuntimeModelSelectionOllama // END RuntimeModelSelectionxAI
// START WorkingWithImagesAnthropic // START WorkingWithImagesAWS // START WorkingWithImagesGoogle // START WorkingWithImagesOpenAI
function arrayBufferToBase64(buffer: ArrayBuffer): string {
@@ -306,7 +306,7 @@ console.log("Grouped task result:", response.generative?.text)
})();
// Clean up
- await client.collections.delete('DemoCollection');
+await client.collections.delete('DemoCollection');
// START BasicGenerativeCohere
await client.collections.create({
@@ -377,6 +377,78 @@ response = await myCollection.generate.nearText("A holiday film", {
)
// END RuntimeModelSelectionCohere
+// Clean up
+await client.collections.delete('DemoCollection');
+
+// START BasicGenerativeContextualAI
+await client.collections.create({
+ name: 'DemoCollection',
+ // highlight-start
+ generative: weaviate.configure.generative.contextualai(),
+ // highlight-end
+ // Additional parameters not shown
+});
+// END BasicGenerativeContextualAI
+
+// Clean up
+await client.collections.delete('DemoCollection');
+
+// START GenerativeContextualAICustomModel
+await client.collections.create({
+ name: 'DemoCollection',
+ // highlight-start
+ generative: weaviate.configure.generative.contextualai({
+ model: 'v2'
+ }),
+ // highlight-end
+ // Additional parameters not shown
+});
+// END GenerativeContextualAICustomModel
+
+// Clean up
+await client.collections.delete('DemoCollection');
+
+
+// START FullGenerativeContextualAI
+await client.collections.create({
+ name: 'DemoCollection',
+ // highlight-start
+ generative: weaviate.configure.generative.contextualai({
+ // These parameters are optional
+ model: 'v2',
+ // temperature: 0.7,
+ // maxTokens: 1024,
+ // topP: 0.9,
+ // systemPrompt: 'You are a helpful assistant',
+ // avoidCommentary: true,
+ // knowledge: ['Custom knowledge override', 'Additional context'],
+ }),
+ // highlight-end
+ // Additional parameters not shown
+});
+// END FullGenerativeContextualAI
+
+// START RuntimeModelSelectionContextualAI
+response = await myCollection.generate.nearText("A holiday film", {
+ // highlight-start
+ groupedTask: "Write a tweet promoting these two movies",
+ config: generativeParameters.contextualai({
+ // These parameters are optional
+ // model: 'v2',
+ // temperature: 0.7,
+ // maxTokens: 1024,
+ // topP: 0.9,
+ // systemPrompt: 'You are a helpful assistant',
+ // avoidCommentary: true,
+ // knowledge: ['Custom knowledge override', 'Additional context'],
+ }),
+ // highlight-end
+}, {
+ limit: 2,
+}
+ // Additional parameters not shown
+)
+// END RuntimeModelSelectionContextualAI
// Clean up
await client.collections.delete('DemoCollection');
@@ -1168,4 +1240,4 @@ for (const obj of groupedTaskResults.objects) {
client.close();
}
-void main()
\ No newline at end of file
+void main()
diff --git a/docs/weaviate/model-providers/_includes/provider.reranker.py b/docs/weaviate/model-providers/_includes/provider.reranker.py
index e8faa923b..0a3d50198 100644
--- a/docs/weaviate/model-providers/_includes/provider.reranker.py
+++ b/docs/weaviate/model-providers/_includes/provider.reranker.py
@@ -60,6 +60,40 @@
# Clean up
client.collections.delete("DemoCollection")
+# START RerankerContextualAIBasic
+from weaviate.classes.config import Configure
+
+client.collections.create(
+ "DemoCollection",
+ # highlight-start
+ reranker_config=Configure.Reranker.contextualai()
+ # highlight-end
+ # Additional parameters not shown
+)
+# END RerankerContextualAIBasic
+
+# Clean up
+client.collections.delete("DemoCollection")
+
+# START RerankerContextualAICustomModel
+from weaviate.classes.config import Configure
+
+client.collections.create(
+ "DemoCollection",
+ # highlight-start
+ reranker_config=Configure.Reranker.contextualai(
+ model="ctxl-rerank-v2-instruct-multilingual",
+ instruction="Prioritize internal sales documents over market analysis reports. More recent documents should be weighted higher.",
+ top_n=5
+ )
+ # highlight-end
+ # Additional parameters not shown
+)
+# END RerankerContextualAICustomModel
+
+# Clean up
+client.collections.delete("DemoCollection")
+
# START RerankerJinaAIBasic
from weaviate.classes.config import Configure
diff --git a/docs/weaviate/model-providers/_includes/provider.reranker.ts b/docs/weaviate/model-providers/_includes/provider.reranker.ts
index 0a2cc29b0..3a9c4781e 100644
--- a/docs/weaviate/model-providers/_includes/provider.reranker.ts
+++ b/docs/weaviate/model-providers/_includes/provider.reranker.ts
@@ -41,6 +41,28 @@ await client.collections.create({
});
// END RerankerCohereCustomModel
+// START RerankerContextualAIBasic
+await client.collections.create({
+ name: 'DemoCollection',
+ // highlight-start
+ reranker: weaviate.configure.reranker.contextualai(),
+ // highlight-end
+});
+// END RerankerContextualAIBasic
+
+// START RerankerContextualAICustomModel
+await client.collections.create({
+ name: 'DemoCollection',
+ // highlight-start
+ reranker: weaviate.configure.reranker.contextualai({
+ model: 'ctxl-rerank-v2-instruct-multilingual',
+ instruction: 'Prioritize internal sales documents over market analysis reports. More recent documents should be weighted higher.',
+ topN: 5,
+ }),
+ // highlight-end
+});
+// END RerankerContextualAICustomModel
+
// START RerankerJinaAIBasic
await client.collections.create({
name: 'DemoCollection',
diff --git a/docs/weaviate/model-providers/contextualai/_category_.json b/docs/weaviate/model-providers/contextualai/_category_.json
new file mode 100644
index 000000000..c00052585
--- /dev/null
+++ b/docs/weaviate/model-providers/contextualai/_category_.json
@@ -0,0 +1,4 @@
+{
+ "label": "Contextual AI",
+ "position": 223
+}
diff --git a/docs/weaviate/model-providers/contextualai/generative.md b/docs/weaviate/model-providers/contextualai/generative.md
new file mode 100644
index 000000000..8f28afaf6
--- /dev/null
+++ b/docs/weaviate/model-providers/contextualai/generative.md
@@ -0,0 +1,304 @@
+---
+title: Generative AI
+description: Contextual AI Generative Model Provider
+sidebar_position: 50
+image: og/docs/integrations/provider_integrations_contextualai.jpg
+# tags: ['model providers', 'contextualai', 'generative', 'rag']
+---
+
+# Contextual AI Generative AI with Weaviate
+
+:::info Added in `v1.34.0`
+:::
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBlock';
+import PyConnect from '!!raw-loader!../_includes/provider.connect.py';
+import TSConnect from '!!raw-loader!../_includes/provider.connect.ts';
+import PyCode from '!!raw-loader!../_includes/provider.generative.py';
+import TSCode from '!!raw-loader!../_includes/provider.generative.ts';
+
+Weaviate's integration with Contextual AI's APIs allows you to access their models' capabilities directly from Weaviate.
+
+[Configure a Weaviate collection](#configure-collection) to use a generative AI model with Contextual AI. Weaviate will perform retrieval augmented generation (RAG) using the specified model and your Contextual AI API key.
+
+More specifically, Weaviate will perform a search, retrieve the most relevant objects, and then pass them to the Contextual AI generative model to generate outputs.
+
+
+
+## Requirements
+
+### Weaviate configuration
+
+Your Weaviate instance must be configured with the Contextual AI generative AI integration (`generative-contextualai`) module.
+
+
+ For Weaviate Cloud (WCD) users
+
+This integration is enabled by default on Weaviate Cloud (WCD) serverless instances.
+
+
+
+
+ For self-hosted users
+
+- Check the [cluster metadata](/deploy/configuration/meta.md) to verify if the module is enabled.
+- Follow the [how-to configure modules](../../configuration/modules.md) guide to enable the module in Weaviate.
+
+
+
+### API credentials
+
+You must provide a valid Contextual AI API key to Weaviate for this integration. Go to [Contextual AI](https://contextual.ai/) to sign up and obtain an API key.
+
+Provide the API key to Weaviate using one of the following methods:
+
+- Set the `CONTEXTUAL_API_KEY` environment variable that is available to Weaviate.
+- Provide the API key at runtime, as shown in the examples below.
+
+
+
+
+
+
+
+
+
+
+
+
+
+## Configure collection
+
+import MutableGenerativeConfig from '/_includes/mutable-generative-config.md';
+
+
+
+[Configure a Weaviate index](../../manage-collections/generative-reranker-models.mdx#specify-a-generative-model-integration) as follows to use a Contextual AI generative AI model:
+
+
+
+
+
+
+
+
+
+
+
+
+### Select a model
+
+You can specify one of the [available models](#available-models) for Weaviate to use, as shown in the following configuration example:
+
+
+
+
+
+
+
+
+
+
+
+
+You can [specify](#generative-parameters) one of the [available models](#available-models) for Weaviate to use. The [default model](#available-models) is used if no model is specified.
+
+### Generative parameters
+
+Configure the following generative parameters to customize the model behavior.
+
+
+
+
+
+
+
+
+
+
+
+
+For further details on model parameters, see the [Contextual AI API documentation](https://docs.contextual.ai/api-reference/generate/generate).
+
+If a parameter is not specified, Weaviate uses the server-side default for that parameter. They are:
+
+- model = `"v2"`
+- temperature = `0.0`
+- topP = `0.9`
+- maxNewTokens = `1024`
+- systemPrompt = `""`
+- avoidCommentary = `false`
+- knowledge = `nil`
+
+## Select a model at runtime
+
+Aside from setting the default model provider when creating the collection, you can also override it at query time.
+
+
+
+
+
+
+
+
+
+
+## Header parameters
+
+You can provide the API key as well as some optional parameters at runtime through additional headers in the request. The following headers are available:
+
+- `X-ContextualAI-Api-Key`: The Contextual AI API key.
+
+Any additional headers provided at runtime will override the existing Weaviate configuration.
+
+Provide the headers as shown in the [API credentials examples](#api-credentials) above.
+
+## Retrieval augmented generation
+
+After configuring the generative AI integration, perform RAG operations, either with the [single prompt](#single-prompt) or [grouped task](#grouped-task) method.
+
+### Single prompt
+
+
+
+To generate text for each object in the search results, use the single prompt method.
+
+The example below generates outputs for each of the `n` search results, where `n` is specified by the `limit` parameter.
+
+When creating a single prompt query, use braces `{}` to interpolate the object properties you want Weaviate to pass on to the language model. For example, to pass on the object's `title` property, include `{title}` in the query.
+
+
+
+
+
+
+
+
+
+
+
+
+
+### Grouped task
+
+
+
+To generate one text for the entire set of search results, use the grouped task method.
+
+In other words, when you have `n` search results, the generative model generates one output for the entire group.
+
+
+
+
+
+
+
+
+
+
+
+
+
+## References
+
+### Available models
+
+Currently, the following Contextual AI generative AI models are available for use with Weaviate:
+
+- `v1`
+- `v2` (default)
+
+## Further resources
+
+### Other integrations
+
+- [Contextual AI reranker models + Weaviate](./reranker.md).
+
+### Code examples
+
+Once the integrations are configured at the collection, the data management and search operations in Weaviate work identically to any other collection. See the following model-agnostic examples:
+
+- The [How-to: Manage collections](../../manage-collections/index.mdx) and [How-to: Manage objects](../../manage-objects/index.mdx) guides show how to perform data operations (i.e. create, read, update, delete collections and objects within them).
+- The [How-to: Query & Search](../../search/index.mdx) guides show how to perform search operations (i.e. vector, keyword, hybrid) as well as retrieval augmented generation.
+
+### References
+
+- Contextual AI [Generate API documentation](https://docs.contextual.ai/api-reference/generate/generate)
+
+## Questions and feedback
+
+import DocsFeedback from '/_includes/docs-feedback.mdx';
+
+
diff --git a/docs/weaviate/model-providers/contextualai/index.md b/docs/weaviate/model-providers/contextualai/index.md
new file mode 100644
index 000000000..a2da982c4
--- /dev/null
+++ b/docs/weaviate/model-providers/contextualai/index.md
@@ -0,0 +1,55 @@
+---
+title: Contextual AI + Weaviate
+sidebar_position: 10
+image: og/docs/integrations/provider_integrations_contextualai.jpg
+# tags: ['model providers', 'contextualai']
+---
+
+
+
+Contextual AI offers a wide range of models for natural language processing and generation. Weaviate seamlessly integrates with Contextual AI's APIs, allowing users to leverage Contextual AI's models directly from the Weaviate Database.
+
+These integrations empower developers to build sophisticated AI-driven applications with ease.
+
+## Integrations with Contextual AI
+
+### Generative AI models for RAG
+
+
+
+Contextual AI's generative AI models can generate human-like text based on given prompts and contexts.
+
+[Weaviate's generative AI integration](./generative.md) enables users to perform retrieval augmented generation (RAG) directly from the Weaviate Database. This combines Weaviate's efficient storage and fast retrieval capabilities with Contextual AI's generative AI models to generate personalized and context-aware responses.
+
+[Contextual AI generative AI integration page](./generative.md)
+
+### Reranker models
+
+
+
+Contextual AI's reranker models are designed to improve the relevance and ranking of search results.
+
+[The Weaviate reranker integration](./reranker.md) allows users to easily refine their search results by leveraging Contextual AI's reranker models.
+
+[Contextual AI reranker integration page](./reranker.md)
+
+## Summary
+
+These integrations enable developers to leverage Contextual AI's powerful models directly within Weaviate.
+
+In turn, they simplify the process of building AI-driven applications to speed up your development process, so that you can focus on creating innovative solutions.
+
+## Get started
+
+You must provide a valid Contextual AI API key to Weaviate for these integrations. Go to [Contextual AI](https://contextual.ai/) to sign up and obtain an API key.
+
+Then, go to the relevant integration page to learn how to configure Weaviate with the Contextual AI models and start using them in your applications.
+
+- [Generative AI](./generative.md)
+- [Reranker](./reranker.md)
+
+## Questions and feedback
+
+import DocsFeedback from '/_includes/docs-feedback.mdx';
+
+
diff --git a/docs/weaviate/model-providers/contextualai/reranker.md b/docs/weaviate/model-providers/contextualai/reranker.md
new file mode 100644
index 000000000..e18be3c00
--- /dev/null
+++ b/docs/weaviate/model-providers/contextualai/reranker.md
@@ -0,0 +1,209 @@
+---
+title: Reranker
+description: Contextual AI Reranker Model Provider
+sidebar_position: 70
+image: og/docs/integrations/provider_integrations_contextualai.jpg
+# tags: ['model providers', 'contextualai', 'reranking']
+---
+
+# Contextual AI Reranker Models with Weaviate
+
+:::info Added in `v1.34.0`
+:::
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+import FilteredTextBlock from '@site/src/components/Documentation/FilteredTextBlock';
+import PyConnect from '!!raw-loader!../_includes/provider.connect.py';
+import TSConnect from '!!raw-loader!../_includes/provider.connect.ts';
+import PyCode from '!!raw-loader!../_includes/provider.reranker.py';
+import TSCode from '!!raw-loader!../_includes/provider.reranker.ts';
+
+Weaviate's integration with Contextual AI's APIs allows you to access their models' capabilities directly from Weaviate.
+
+[Configure a Weaviate collection](#configure-the-reranker) to use a Contextual AI reranker model, and Weaviate will use the specified model and your Contextual AI API key to rerank search results.
+
+This two-step process involves Weaviate first performing a search and then reranking the results using the specified model.
+
+
+
+## Requirements
+
+### Weaviate configuration
+
+Your Weaviate instance must be configured with the Contextual AI reranker integration (`reranker-contextualai`) module.
+
+
+ For Weaviate Cloud (WCD) users
+
+This integration is enabled by default on Weaviate Cloud (WCD) serverless instances.
+
+
+
+
+ For self-hosted users
+
+- Check the [cluster metadata](/deploy/configuration/meta.md) to verify if the module is enabled.
+- Follow the [how-to configure modules](../../configuration/modules.md) guide to enable the module in Weaviate.
+
+
+
+### API credentials
+
+You must provide a valid Contextual AI API key to Weaviate for this integration. Go to [Contextual AI](https://contextual.ai/) to sign up and obtain an API key.
+
+Provide the API key to Weaviate using one of the following methods:
+
+- Set the `CONTEXTUAL_API_KEY` environment variable that is available to Weaviate.
+- Provide the API key at runtime, as shown in the examples below.
+
+
+
+
+
+
+
+
+
+
+
+
+
+## Configure the reranker
+
+import MutableRerankerConfig from '/_includes/mutable-reranker-config.md';
+
+
+
+Configure a Weaviate collection to use a Contextual AI reranker model as follows:
+
+
+
+
+
+
+
+
+
+
+
+
+### Reranker parameters
+
+Configure the reranker behavior, including the model to use, through the following parameters:
+
+
+
+
+
+
+
+
+
+
+
+
+The [default model](#available-models) is used if no model is specified.
+
+For further details on model parameters, see the [Contextual AI API documentation](https://docs.contextual.ai/api-reference/rerank/rerank).
+
+## Header parameters
+
+You can provide the API key as well as some optional parameters at runtime through additional headers in the request. The following headers are available:
+
+- `X-ContextualAI-Api-Key`: The Contextual AI API key.
+
+Any additional headers provided at runtime will override the existing Weaviate configuration.
+
+Provide the headers as shown in the [API credentials examples](#api-credentials) above.
+
+## Reranking query
+
+Once the reranker is configured, Weaviate performs [reranking operations](../../search/rerank.md) using the specified Contextual AI model.
+
+More specifically, Weaviate performs an initial search, then reranks the results using the specified model.
+
+Any search in Weaviate can be combined with a reranker to perform reranking operations.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+## Available models
+
+- `ctxl-rerank-v1-instruct`
+- `ctxl-rerank-v2-instruct-multilingual-mini`
+- `ctxl-rerank-v2-instruct-multilingual` (default)
+
+## Further resources
+
+### Other integrations
+
+- [Contextual AI generative models + Weaviate](./generative.md).
+
+### Code examples
+
+Once the integrations are configured at the collection, the data management and search operations in Weaviate work identically to any other collection. See the following model-agnostic examples:
+
+- The [How-to: Manage collections](../../manage-collections/index.mdx) and [How-to: Manage objects](../../manage-objects/index.mdx) guides show how to perform data operations (i.e. create, read, update, delete collections and objects within them).
+- The [How-to: Query & Search](../../search/index.mdx) guides show how to perform search operations (i.e. vector, keyword, hybrid) as well as retrieval augmented generation.
+
+### References
+
+- Contextual AI [Rerank API documentation](https://docs.contextual.ai/api-reference/rerank/rerank)
+
+## Questions and feedback
+
+import DocsFeedback from '/_includes/docs-feedback.mdx';
+
+
diff --git a/docs/weaviate/model-providers/databricks/_category_.json b/docs/weaviate/model-providers/databricks/_category_.json
index 6d57dd6c8..62833ebda 100644
--- a/docs/weaviate/model-providers/databricks/_category_.json
+++ b/docs/weaviate/model-providers/databricks/_category_.json
@@ -1,4 +1,4 @@
{
"label": "Databricks",
- "position": 221
+ "position": 225
}
diff --git a/docs/weaviate/model-providers/friendliai/_category_.json b/docs/weaviate/model-providers/friendliai/_category_.json
index f7611a6c0..c00e1815e 100644
--- a/docs/weaviate/model-providers/friendliai/_category_.json
+++ b/docs/weaviate/model-providers/friendliai/_category_.json
@@ -1,4 +1,4 @@
{
"label": "FriendliAI",
- "position": 221
+ "position": 226
}
diff --git a/docs/weaviate/model-providers/google/_category_.json b/docs/weaviate/model-providers/google/_category_.json
index 1b646bcca..0a810b7dd 100644
--- a/docs/weaviate/model-providers/google/_category_.json
+++ b/docs/weaviate/model-providers/google/_category_.json
@@ -1,4 +1,4 @@
{
"label": "Google",
- "position": 225
+ "position": 230
}
diff --git a/docs/weaviate/model-providers/huggingface/_category_.json b/docs/weaviate/model-providers/huggingface/_category_.json
index 72d5fb090..451f860c8 100644
--- a/docs/weaviate/model-providers/huggingface/_category_.json
+++ b/docs/weaviate/model-providers/huggingface/_category_.json
@@ -1,4 +1,4 @@
{
"label": "Hugging Face (API)",
- "position": 230
+ "position": 235
}
diff --git a/docs/weaviate/model-providers/index.md b/docs/weaviate/model-providers/index.md
index acd404d1c..147879565 100644
--- a/docs/weaviate/model-providers/index.md
+++ b/docs/weaviate/model-providers/index.md
@@ -22,6 +22,7 @@ This enables an enhanced developed experience, such as the ability to:
| [Anyscale](./anyscale/index.md) | - | [Text](./anyscale/generative.md) | - |
| [AWS](./aws/index.md) | [Text](./aws/embeddings.md) | [Text](./aws/generative.md) |
| [Cohere](./cohere/index.md) | [Text](./cohere/embeddings.md), [Multimodal](./cohere/embeddings-multimodal.md) | [Text](./cohere/generative.md) | [Reranker](./cohere/reranker.md) |
+| [Contextual AI](./contextualai/index.md) | - | [Text](./contextualai/generative.md) | [Reranker](./contextualai/reranker.md) |
| [Databricks](./databricks/index.md) | [Text](./databricks/embeddings.md) | [Text](./databricks/generative.md) | - |
| [FriendliAI](./friendliai/index.md) | - | [Text](./friendliai/generative.md) | - |
| [Google](./google/index.md) | [Text](./google/embeddings.md), [Multimodal](./google/embeddings-multimodal.md) | [Text](./google/generative.md) | - |
@@ -38,7 +39,7 @@ This enables an enhanced developed experience, such as the ability to:
#### Enable all API-based modules
-All API-based model integrations are available by default starting with Weaviate `v1.33`. For older versions, you can enable them all by setting the [`ENABLE_API_BASED_MODULES` environment variable](/deploy/configuration/env-vars#ENABLE_API_BASED_MODULES) to `true`.
+All API-based model integrations are available by default starting with Weaviate `v1.33`. For older versions, you can enable them all by setting the [`ENABLE_API_BASED_MODULES` environment variable](/deploy/configuration/env-vars#ENABLE_API_BASED_MODULES) to `true`.
### Locally hosted
@@ -53,7 +54,7 @@ All API-based model integrations are available by default starting with Weaviate
import AcademyAdmonition from '@site/src/components/AcademyAdmonition';
-
diff --git a/docs/weaviate/starter-guides/managing-resources/compression.mdx b/docs/weaviate/starter-guides/managing-resources/compression.mdx
index f6ef15779..cae0d3cc1 100644
--- a/docs/weaviate/starter-guides/managing-resources/compression.mdx
+++ b/docs/weaviate/starter-guides/managing-resources/compression.mdx
@@ -41,7 +41,7 @@ This table shows the compression algorithms that are available for each index ty
| :--------------- | :--------- | :--------- | :------------ |
| PQ | Yes | No | Yes |
| SQ | Yes | No | Yes |
-| RQ | Yes | No | Yes |
+| RQ | Yes | Yes | Yes |
| BQ | Yes | Yes | Yes |
The [dynamic index](/weaviate/config-refs/indexing/vector-index.mdx#dynamic-index) is new in v1.25. This type of index is a [flat index](/weaviate/config-refs/indexing/vector-index.mdx#flat-index) until a collection reaches a threshold size. When the collection grows larger than the threshold size, the default is 10,000 objects, the collection is automatically reindexed and converted to an HNSW index.
@@ -130,12 +130,10 @@ Most applications benefit from compression. The cost savings are significant. In
- For most users with HNSW indexes who want the best combination of simplicity, performance, and recall, **consider 8-bit RQ compression**. RQ provides 4x compression with 98-99% recall and requires no configuration or training. It's ideal for standard use cases with embeddings from providers like OpenAI.
-- If you have a small collection that uses a flat index, consider a BQ index. The BQ index is 32 times smaller and much faster than the uncompressed equivalent.
+- If you have a small collection that uses a flat index, consider RQ compression. The flat index with RQ enabled is smaller and much faster than the uncompressed equivalent.
- If you have a very large data set or specialized search needs, consider PQ compression. PQ compression is very configurable, but it requires more expertise to tune well than SQ, RQ, or BQ.
-For collections that are small, but that are expected to grow, consider a dynamic index. In addition to setting the dynamic index type, configure the collection to use BQ compression while the index is flat and RQ compression when the collection grows large enough to move from a flat index to an HNSW index.
-
## Further resources
To enable compression, follow the steps on these pages:
diff --git a/docs/weaviate/tutorials/_includes/bulk-import.py b/docs/weaviate/tutorials/_includes/bulk-import.py
index 5dd68049c..f9dfc9345 100644
--- a/docs/weaviate/tutorials/_includes/bulk-import.py
+++ b/docs/weaviate/tutorials/_includes/bulk-import.py
@@ -53,37 +53,37 @@
)
# END CreateCollection
-# # START ServerSideBatch
-# # Server-side batching (automatic mode) - Recommended approach
-# # The server manages the import flow automatically
-# with collection.batch.automatic() as batch:
-# # Import data
-# for data_row in data_rows:
-# batch.add_object(
-# properties=data_row,
-# )
-
-# # Optional: Stop if too many errors
-# if batch.number_errors > 10:
-# print("Batch import stopped due to excessive errors.")
-# break
-
-# # Check for failed objects
-# failed_objects = collection.batch.failed_objects
-# if failed_objects:
-# print(f"Number of failed imports: {len(failed_objects)}")
-# print(f"First failed object: {failed_objects[0]}")
-# else:
-# print("All objects imported successfully!")
-
-# # Verify server-side batch import
-# result = collection.aggregate.over_all(total_count=True)
-# assert len(failed_objects) == 0, f"Server-side batch had {len(failed_objects)} failures"
-# assert (
-# result.total_count == expected_count
-# ), f"Expected {expected_count} objects, got {result.total_count}"
-# print(f"✓ Server-side batch: {result.total_count} objects imported successfully")
-# # END ServerSideBatch
+# START ServerSideBatch
+# Server-side batching (automatic mode)
+# The server manages the import flow automatically
+with collection.batch.experimental() as batch:
+ # Import data
+ for data_row in data_rows:
+ batch.add_object(
+ properties=data_row,
+ )
+
+ # Optional: Stop if too many errors
+ if batch.number_errors > 10:
+ print("Batch import stopped due to excessive errors.")
+ break
+
+# Check for failed objects
+failed_objects = collection.batch.failed_objects
+if failed_objects:
+ print(f"Number of failed imports: {len(failed_objects)}")
+ print(f"First failed object: {failed_objects[0]}")
+else:
+ print("All objects imported successfully!")
+
+# Verify server-side batch import
+result = collection.aggregate.over_all(total_count=True)
+assert len(failed_objects) == 0, f"Server-side batch had {len(failed_objects)} failures"
+assert (
+ result.total_count == expected_count
+), f"Expected {expected_count} objects, got {result.total_count}"
+print(f"✓ Server-side batch: {result.total_count} objects imported successfully")
+# END ServerSideBatch
# Alternative approach - Client-side batching
# Clean and recreate collection for demo
diff --git a/docs/weaviate/tutorials/import.mdx b/docs/weaviate/tutorials/import.mdx
index 48c59b215..6e84c6889 100644
--- a/docs/weaviate/tutorials/import.mdx
+++ b/docs/weaviate/tutorials/import.mdx
@@ -94,11 +94,12 @@ Load the data from the JSON file.
:::caution Preview
-Server-side batching was added in **`v1.33`** as a **preview** and is **not yet supported in our client libraries**.
+Server-side batching was added in **`v1.34`** as a **preview**.
+This means that the feature is still under development and may change in future releases, including potential breaking changes.
+**We do not recommend using this feature in production environments at this time.**
:::
-
### Option B: Client-side batching
diff --git a/pyproject.toml b/pyproject.toml
index 52e7cf08d..c61fa2ee3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,6 +17,9 @@ dependencies = [
"python-dotenv>=1.1.1",
"tqdm>=4.67.1",
"weaviate-agents>=0.12.0",
- "weaviate-client>=4.16.6",
+ "weaviate-client",
"weaviate-demo-datasets>=0.7.0",
]
+
+[tool.uv.sources]
+weaviate-client = { git = "https://github.com/weaviate/weaviate-python-client.git", rev = "dev/1.34" }
diff --git a/sidebars.js b/sidebars.js
index 563d05758..9f570e1e5 100644
--- a/sidebars.js
+++ b/sidebars.js
@@ -165,6 +165,19 @@ const sidebars = {
"weaviate/model-providers/cohere/reranker",
],
},
+ {
+ type: "category",
+ label: "Contextual AI",
+ className: "sidebar-item",
+ link: {
+ type: "doc",
+ id: "weaviate/model-providers/contextualai/index",
+ },
+ items: [
+ "weaviate/model-providers/contextualai/generative",
+ "weaviate/model-providers/contextualai/reranker",
+ ],
+ },
{
type: "category",
label: "Databricks",
diff --git a/static/og/docs/integrations/provider_integrations_contextualai.jpg b/static/og/docs/integrations/provider_integrations_contextualai.jpg
new file mode 100644
index 000000000..62c4ab44e
Binary files /dev/null and b/static/og/docs/integrations/provider_integrations_contextualai.jpg differ
diff --git a/tests/docker-compose-anon-2.yml b/tests/docker-compose-anon-2.yml
index 004ba518f..abaecb154 100644
--- a/tests/docker-compose-anon-2.yml
+++ b/tests/docker-compose-anon-2.yml
@@ -8,7 +8,7 @@ services:
- '8080'
- --scheme
- http
- image: cr.weaviate.io/semitechnologies/weaviate:1.33.0
+ image: cr.weaviate.io/semitechnologies/weaviate:1.34.0-rc.1
ports:
- 8090:8080
- 50061:50051
diff --git a/tests/docker-compose-anon-bind.yml b/tests/docker-compose-anon-bind.yml
index c8024466d..140ceeb3f 100644
--- a/tests/docker-compose-anon-bind.yml
+++ b/tests/docker-compose-anon-bind.yml
@@ -8,7 +8,7 @@ services:
- '8080'
- --scheme
- http
- image: cr.weaviate.io/semitechnologies/weaviate:1.33.0
+ image: cr.weaviate.io/semitechnologies/weaviate:1.34.0-rc.1
ports:
- 8380:8080
- 50351:50051
diff --git a/tests/docker-compose-anon-clip.yml b/tests/docker-compose-anon-clip.yml
index 7ffb63147..77819b478 100644
--- a/tests/docker-compose-anon-clip.yml
+++ b/tests/docker-compose-anon-clip.yml
@@ -8,7 +8,7 @@ services:
- '8080'
- --scheme
- http
- image: cr.weaviate.io/semitechnologies/weaviate:1.33.0
+ image: cr.weaviate.io/semitechnologies/weaviate:1.34.0-rc.1
ports:
- 8280:8080
- 50251:50051
diff --git a/tests/docker-compose-anon-offload.yml b/tests/docker-compose-anon-offload.yml
index e7f0d4969..6744d495a 100644
--- a/tests/docker-compose-anon-offload.yml
+++ b/tests/docker-compose-anon-offload.yml
@@ -8,7 +8,7 @@ services:
- '8080'
- --scheme
- http
- image: cr.weaviate.io/semitechnologies/weaviate:1.33.0
+ image: cr.weaviate.io/semitechnologies/weaviate:1.34.0-rc.1
ports:
- 8080:8080
- 50051:50051
diff --git a/tests/docker-compose-anon.yml b/tests/docker-compose-anon.yml
index 1908e555f..653d79160 100644
--- a/tests/docker-compose-anon.yml
+++ b/tests/docker-compose-anon.yml
@@ -8,7 +8,7 @@ services:
- '8080'
- --scheme
- http
- image: cr.weaviate.io/semitechnologies/weaviate:1.33.0
+ image: cr.weaviate.io/semitechnologies/weaviate:1.34.0-rc.1
ports:
- 8080:8080
- 50051:50051
diff --git a/tests/docker-compose-rbac.yml b/tests/docker-compose-rbac.yml
index 5d6845574..257b719d2 100644
--- a/tests/docker-compose-rbac.yml
+++ b/tests/docker-compose-rbac.yml
@@ -7,7 +7,7 @@ services:
- '8080'
- --scheme
- http
- image: cr.weaviate.io/semitechnologies/weaviate:1.33.0
+ image: cr.weaviate.io/semitechnologies/weaviate:1.34.0-rc.1
ports:
- 8580:8080
- 50551:50051
@@ -43,7 +43,7 @@ services:
KEYCLOAK_ADMIN: admin
KEYCLOAK_ADMIN_PASSWORD: admin
KC_HTTP_ENABLED: 'true'
- KC_HTTP_PORT: '8081'
+ KC_HTTP_PORT: '8081'
KC_HOSTNAME_STRICT: 'false'
KC_HOSTNAME_STRICT_HTTPS: 'false'
# Use localhost so it works from both inside and outside Docker
@@ -61,4 +61,4 @@ services:
# interval: 10s
volumes:
- keycloak_data:
\ No newline at end of file
+ keycloak_data:
diff --git a/tests/docker-compose-three-nodes.yml b/tests/docker-compose-three-nodes.yml
index b16f19380..53250f443 100644
--- a/tests/docker-compose-three-nodes.yml
+++ b/tests/docker-compose-three-nodes.yml
@@ -8,7 +8,7 @@ services:
- '8080'
- --scheme
- http
- image: cr.weaviate.io/semitechnologies/weaviate:1.33.0
+ image: cr.weaviate.io/semitechnologies/weaviate:1.34.0-rc.1
restart: on-failure:0
ports:
- "8180:8080"
@@ -35,7 +35,7 @@ services:
- '8080'
- --scheme
- http
- image: cr.weaviate.io/semitechnologies/weaviate:1.33.0
+ image: cr.weaviate.io/semitechnologies/weaviate:1.34.0-rc.1
restart: on-failure:0
ports:
- "8181:8080"
@@ -63,7 +63,7 @@ services:
- '8080'
- --scheme
- http
- image: cr.weaviate.io/semitechnologies/weaviate:1.33.0
+ image: cr.weaviate.io/semitechnologies/weaviate:1.34.0-rc.1
restart: on-failure:0
ports:
- "8182:8080"
diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml
index a6d0f9ebe..714f22284 100644
--- a/tests/docker-compose.yml
+++ b/tests/docker-compose.yml
@@ -8,7 +8,7 @@ services:
- '8080'
- --scheme
- http
- image: cr.weaviate.io/semitechnologies/weaviate:1.33.0
+ image: cr.weaviate.io/semitechnologies/weaviate:1.34.0-rc.1
ports:
- 8099:8080
- 50052:50051
diff --git a/uv.lock b/uv.lock
index 3924f4eda..7586642dc 100644
--- a/uv.lock
+++ b/uv.lock
@@ -660,7 +660,7 @@ requires-dist = [
{ name = "python-dotenv", specifier = ">=1.1.1" },
{ name = "tqdm", specifier = ">=4.67.1" },
{ name = "weaviate-agents", specifier = ">=0.12.0" },
- { name = "weaviate-client", specifier = ">=4.16.6" },
+ { name = "weaviate-client", git = "https://github.com/weaviate/weaviate-python-client.git?rev=dev%2F1.34" },
{ name = "weaviate-demo-datasets", specifier = ">=0.7.0" },
]
@@ -919,19 +919,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/34/80/de3eb55eb581815342d097214bed4c59e806b05f1b3110df03b2280d6dfd/grpcio-1.74.0-cp313-cp313-win_amd64.whl", hash = "sha256:fd3c71aeee838299c5887230b8a1822795325ddfea635edd82954c1eaa831e24", size = 4489214 },
]
-[[package]]
-name = "grpcio-health-checking"
-version = "1.74.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
- { name = "grpcio" },
- { name = "protobuf" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/82/69/87ad1cd826a9f06567bccb66f0c3228a0758ea47a1d35eef15db36b2f492/grpcio_health_checking-1.74.0.tar.gz", hash = "sha256:d6749451d4cef543c3f6260ae9a86c84b9ab02a92421cecae73a632e7fe920bf", size = 16770 }
-wheels = [
- { url = "https://files.pythonhosted.org/packages/7e/62/2fd797be7514855a0b28f946f57b9a134bba25a9873c269fa730d8fd49d8/grpcio_health_checking-1.74.0-py3-none-any.whl", hash = "sha256:9a6e7bcdf16395105425753d6e3fe31f57cb6ab3f232282254b50f02cc63d9c7", size = 18921 },
-]
-
[[package]]
name = "h11"
version = "0.16.0"
@@ -3138,21 +3125,17 @@ wheels = [
[[package]]
name = "weaviate-client"
-version = "4.16.6"
-source = { registry = "https://pypi.org/simple" }
+version = "4.17.1.dev18+g191416eb2"
+source = { git = "https://github.com/weaviate/weaviate-python-client.git?rev=dev%2F1.34#191416eb2d048f91acbd4e8322c4efda326937f8" }
dependencies = [
{ name = "authlib" },
{ name = "deprecation" },
{ name = "grpcio" },
- { name = "grpcio-health-checking" },
{ name = "httpx" },
+ { name = "protobuf" },
{ name = "pydantic" },
{ name = "validators" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/8f/1e/b44262cd9edff939f7a6e40b6134d737a28bcdb0445cbdf2af9544953658/weaviate_client-4.16.6.tar.gz", hash = "sha256:79064bd976b0ec6bee09507f74481711bcbc861bcc097ca37db22bcf948771e6", size = 779904 }
-wheels = [
- { url = "https://files.pythonhosted.org/packages/5c/d2/7cf098b1d14dd237a81b84012f0c4cdea355d2312b10410148384fa8b39a/weaviate_client-4.16.6-py3-none-any.whl", hash = "sha256:8eafcac785876bc731b7dedd7272a93b530fc5ed807ab54b6d74f9493a014dec", size = 597469 },
-]
[[package]]
name = "weaviate-demo-datasets"
diff --git a/versions-config.json b/versions-config.json
index f67e50191..794058daa 100644
--- a/versions-config.json
+++ b/versions-config.json
@@ -1,7 +1,7 @@
{
"COMMENT1": "These values are used for yarn local yarn builds",
"COMMENT2": "Build time values are set in _build_scripts/update-config-versions.js",
- "weaviate_version": "1.33.0",
+ "weaviate_version": "1.34.0",
"helm_version": "17.3.3",
"weaviate_cli_version": "3.2.2",
"weaviate_agents_version": "1.0.1",