diff --git a/docs/components/vectordbs/config.mdx b/docs/components/vectordbs/config.mdx index 5169781e92..dec4db1e85 100644 --- a/docs/components/vectordbs/config.mdx +++ b/docs/components/vectordbs/config.mdx @@ -6,7 +6,7 @@ Config in mem0 is a dictionary that specifies the settings for your vector datab The config is defined as a Python dictionary with two main keys: - `vector_store`: Specifies the vector database provider and its configuration - - `provider`: The name of the vector database (e.g., "chroma", "pgvector", "qdrant", "milvus","azure_ai_search") + - `provider`: The name of the vector database (e.g., "chroma", "pgvector", "qdrant", "milvus", "azure_ai_search", "couchbase") - `config`: A nested dictionary containing provider-specific settings ## How to Use Config diff --git a/docs/components/vectordbs/dbs/couchbase.mdx b/docs/components/vectordbs/dbs/couchbase.mdx new file mode 100644 index 0000000000..84d033637d --- /dev/null +++ b/docs/components/vectordbs/dbs/couchbase.mdx @@ -0,0 +1,50 @@ +[Couchbase](https://www.couchbase.com/) is an award-winning distributed NoSQL developer database platform that delivers unmatched versatility, +performance, scalability, and financial value for your critical applications. +Couchbase embraces AI with coding assistance for developers, +plus AI services for building applications that include RAG-powered agents, real-time analytics, and cloud-to-edge vector search. + +### Usage + +```python +import os +from mem0 import Memory + +os.environ["OPENAI_API_KEY"] = "sk-xx" + +config = { + "vector_store": { + "provider": "couchbase", + "config": { + "connection_str": "couchbase://localhost", + "username": "Administrator", + "password": "password", + "bucket_name": "mem0", + "scope_name": "_default", + "collection_name": "_default", + "embedding_model_dims": 1536, + "index_name": "_default_index", + } + } +} + +m = Memory.from_config(config) +m.add("Likes to play cricket on weekends", user_id="alice", metadata={"category": "hobbies"}) +``` + +### Config + +Let's see the available parameters for the `couchbase` config: + +| Parameter | Description | Default Value | +| --- | --- | --- | +| `connection_str` | The connection string for the Couchbase server | `None` | +| `username` | The username for the Couchbase server | `None` | +| `password` | The password for the Couchbase server | `None` | +| `bucket_name` | The name of the bucket to store the vectors | `mem0` | +| `scope_name` | The name of the scope to store the vectors | `_default` | +| `collection_name` | The name of the collection to store the vectors | `_default` | +| `embedding_model_dims` | Dimensions of the embedding model | `1536` | +| `index_name` | The name of the index to create for the vectors | `_default_index` | +``` + + diff --git a/docs/components/vectordbs/overview.mdx b/docs/components/vectordbs/overview.mdx index 5364507a7e..fb5845bea8 100644 --- a/docs/components/vectordbs/overview.mdx +++ b/docs/components/vectordbs/overview.mdx @@ -15,6 +15,7 @@ See the list of supported vector databases below. + ## Usage @@ -33,4 +34,3 @@ for example 768, you may encounter below error: `ValueError: shapes (0,1536) and (768,) not aligned: 1536 (dim 1) != 768 (dim 0)` you could add `"embedding_model_dims": 768,` to the config of the vector_store to overcome this issue. - diff --git a/docs/mint.json b/docs/mint.json index 6cfbefb920..70a205c9cf 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -113,7 +113,8 @@ "components/vectordbs/dbs/pgvector", "components/vectordbs/dbs/milvus", "components/vectordbs/dbs/azure_ai_search", - "components/vectordbs/dbs/redis" + "components/vectordbs/dbs/redis", + "components/vectordbs/dbs/couchbase" ] } ] diff --git a/mem0/configs/vector_stores/couchbase.py b/mem0/configs/vector_stores/couchbase.py new file mode 100644 index 0000000000..9a4bc4e8ed --- /dev/null +++ b/mem0/configs/vector_stores/couchbase.py @@ -0,0 +1,28 @@ +from typing import Any, Dict, Optional + +from pydantic import BaseModel, Field, model_validator + + +class CouchbaseConfig(BaseModel): + + connection_str: str = Field(..., description="Connection string for Couchbase server") + username: str = Field(..., description="Username for Couchbase authentication") + password: str = Field(..., description="Password for Couchbase authentication") + bucket_name: str = Field(..., description="Name of the Couchbase bucket") + scope_name: Optional[str] = Field("_default", description="Name of the scope") + collection_name: Optional[str] = Field("_default", description="Name of the collection") + index_name: Optional[str] = Field(None, description="Name of the search index") + embedding_model_dims: Optional[int] = Field(1536, description="Dimensions of the embedding model") + + @model_validator(mode="before") + @classmethod + def validate_extra_fields(cls, values: Dict[str, Any]) -> Dict[str, Any]: + allowed_fields = set(cls.model_fields.keys()) + input_fields = set(values.keys()) + extra_fields = input_fields - allowed_fields + if extra_fields: + raise ValueError( + f"Extra fields not allowed: {', '.join(extra_fields)}. Please input only the following fields: {', '.join(allowed_fields)}" + ) + return values + diff --git a/mem0/utils/factory.py b/mem0/utils/factory.py index bdff8fe234..759ab9b483 100644 --- a/mem0/utils/factory.py +++ b/mem0/utils/factory.py @@ -66,6 +66,7 @@ class VectorStoreFactory: "milvus": "mem0.vector_stores.milvus.MilvusDB", "azure_ai_search": "mem0.vector_stores.azure_ai_search.AzureAISearch", "redis": "mem0.vector_stores.redis.RedisDB", + "couchbase": "mem0.vector_stores.couchbase.Couchbase", } @classmethod diff --git a/mem0/vector_stores/configs.py b/mem0/vector_stores/configs.py index 75768d9661..75e5388348 100644 --- a/mem0/vector_stores/configs.py +++ b/mem0/vector_stores/configs.py @@ -17,6 +17,7 @@ class VectorStoreConfig(BaseModel): "milvus": "MilvusDBConfig", "azure_ai_search": "AzureAISearchConfig", "redis": "RedisDBConfig", + "couchbase": "CouchbaseConfig", } @model_validator(mode="after") diff --git a/mem0/vector_stores/couchbase.py b/mem0/vector_stores/couchbase.py new file mode 100644 index 0000000000..93452d07cb --- /dev/null +++ b/mem0/vector_stores/couchbase.py @@ -0,0 +1,321 @@ +import json +import logging +import time +from typing import Dict, Optional + +import couchbase.search as search +from couchbase.auth import PasswordAuthenticator +from couchbase.cluster import Cluster, ClusterOptions +from couchbase.exceptions import DocumentNotFoundException +from couchbase.management.search import SearchIndex +from couchbase.options import SearchOptions +from couchbase.vector_search import VectorQuery, VectorSearch +from pydantic import BaseModel + +from mem0.vector_stores.base import VectorStoreBase + +logger = logging.getLogger(__name__) + +class OutputData(BaseModel): + id: Optional[str] + score: Optional[float] + payload: Optional[Dict] + +class Couchbase(VectorStoreBase): + def __init__( + self, + embedding_model_dims: int, + connection_str: str, + username: str, + password: str, + bucket_name: str, + scope_name: str = "_default", + collection_name: str = "_default", + index_name: str | None = None, + embedding_key: str = "embedding", + ): + """ + Initialize the Couchbase vector store. + + Args: + bucket_name (str): Name of the Couchbase bucket. + embedding_model_dims (int): Dimensions of the embedding model. + host (str): Host address for Couchbase server. + username (str): Username for Couchbase authentication. + password (str): Password for Couchbase authentication. + collection_name (str, optional): Name of the collection. Defaults to "_default". + """ + self.cluster = Cluster(connection_str, ClusterOptions(PasswordAuthenticator(username, password))) + self.bucket = self.cluster.bucket(bucket_name) + self.scope = self.bucket.scope(scope_name) + self.collection = self.scope.collection(collection_name) + self.embedding_model_dims = embedding_model_dims + self.collection_name = collection_name + self.index_name = index_name if index_name else f"{collection_name}_index" + self.embedding_key = embedding_key + + def create_search_index(self, collection_name: str, search_index_name: str, vector_size: int, distance: str = "dot_product"): + index_definition = { + "type": "fulltext-index", + "name": search_index_name, + "sourceType": "couchbase", + "sourceName": self.bucket.name, + "planParams": {"maxPartitionsPerPIndex": 1024, "indexPartitions": 1}, + "params": { + "doc_config": { + "docid_prefix_delim": "", + "docid_regexp": "", + "mode": "scope.collection.type_field", + "type_field": "type", + }, + "mapping": { + "analysis": {}, + "default_analyzer": "standard", + "default_datetime_parser": "dateTimeOptional", + "default_field": "_all", + "default_mapping": {"dynamic": True, "enabled": False}, + "default_type": "_default", + "docvalues_dynamic": False, + "index_dynamic": True, + "store_dynamic": True, + "type_field": "_type", + "types": { + f"{self.scope.name}.{collection_name}": { + "dynamic": False, + "enabled": True, + "properties": { + "embedding": { + "dynamic": False, + "enabled": True, + "fields": [ + { + "dims": vector_size, + "index": True, + "name": "embedding", + "similarity": distance, + "type": "vector", + "vector_index_optimized_for": "recall", + } + ], + }, + "metadata": {"dynamic": True, "enabled": True}, + "payload": { + "dynamic": False, + "enabled": True, + "fields": [ + { + "include_in_all": True, + "index": True, + "name": "text", + "store": True, + "type": "text", + } + ], + }, + }, + } + }, + }, + "store": {"indexType": "scorch", "segmentVersion": 16}, + }, + "sourceParams": {}, + } + + scope_index_manager = self.scope.search_indexes() + search_index_def = SearchIndex.from_json(json.dumps(index_definition)) + max_attempts = 10 + attempt = 0 + while attempt < max_attempts: + try: + scope_index_manager.upsert_index(search_index_def) + break + except Exception as e: + print(f"Attempt {attempt + 1}/{max_attempts}: Error creating search index: {e}") + time.sleep(3) + attempt += 1 + + if attempt == max_attempts: + print(f"Error creating search index after {max_attempts} attempts.") + raise RuntimeError(f"Error creating search index after {max_attempts} attempts.") + + print(f"Search index {search_index_name} created successfully.") + + def create_col(self, name: str, vector_size: int, distance: str) -> bool: + try: + create_collection_query = f"CREATE COLLECTION {self.bucket.name}.{self.scope.name}.{name}" + self.cluster.query(create_collection_query) + logger.info(f"Collection {name} created successfully in scope {self.scope.name}.") + + create_index_query = f"CREATE PRIMARY INDEX ON {self.bucket.name}.{self.scope.name}.{name}" + self.cluster.query(create_index_query) + + # Create a search index + self.create_search_index(name, f"{name}_index", vector_size, distance) + + return True + except Exception as e: + logger.error(f"Error creating collection: {e}") + return False + + def insert(self, vectors: list, payloads: list | None = None, ids: list | None = None): + """ + Insert vectors into the Couchbase collection. + + Args: + vectors (list): List of vectors to insert. + payloads (list, optional): List of payloads corresponding to vectors. Defaults to None. + ids (list, optional): List of IDs corresponding to vectors. Defaults to None. + """ + logger.info(f"Inserting {len(vectors)} vectors into collection {self.collection_name}") + docs = {} + for idx, vector in enumerate(vectors): + doc_id = ids[idx] if ids else f"vector_{idx}" + document = { + self.embedding_key : vector, + "payload": payloads[idx] if payloads else {}, + } + docs[doc_id] = document + self.scope.collection(self.collection_name).upsert_multi(docs) + + def search(self, query: list, limit: int = 5, filters: dict | None = None) -> list: + """ + Search for similar vectors. + + Args: + query (list): Query vector. + limit (int, optional): Number of results to return. Defaults to 5. + filters (dict, optional): Filters to apply to the search. Defaults to None. + + Returns: + list: Search results. + """ + logger.info(f"Searching for similar vectors in collection {self.collection_name}") + search_req = search.SearchRequest.create( + VectorSearch.from_vector_query( + VectorQuery( + self.embedding_key, + query, + limit, + ) + ) + ) + search_iter = self.scope.search( + self.index_name, + search_req, + SearchOptions( + limit=limit, + fields=["*"], + raw=filters, + ), + ) + docs = [] + + # Parse the results + for row in search_iter.rows(): + fields = dict(row.fields) + payload = {k.split("payload.")[1]: v for k, v in fields.items() if k.startswith("payload.")} + score = row.score + doc = OutputData(id=row.id, payload=payload, score=score) + docs.append(doc) + + return docs + + + def delete(self, doc_id: str): + """ + Delete a vector by ID. + + Args: + doc_id (str): ID of the vector to delete. + """ + try: + self.collection.remove(doc_id) + logger.info(f"Deleted vector with ID {doc_id}") + except DocumentNotFoundException: + logger.warning(f"Vector with ID {doc_id} not found") + + def update(self, doc_id: str, vector: list | None = None, payload: dict | None = None): + """ + Update a vector and its payload. + + Args: + doc_id (str): ID of the vector to update. + vector (list, optional): Updated vector. Defaults to None. + payload (dict, optional): Updated payload. Defaults to None. + """ + try: + doc = self.collection.get(doc_id).content_as[dict] + if vector: + doc[self.embedding_key] = vector + if payload: + doc["payload"] = payload + self.collection.upsert(doc_id, doc) + logger.info(f"Updated vector with ID {doc_id}") + except DocumentNotFoundException: + logger.warning(f"Vector with ID {doc_id} not found") + + def get(self, doc_id: str) -> dict | None: + """ + Retrieve a vector by ID. + + Args: + doc_id (str): ID of the vector to retrieve. + + Returns: + dict: Retrieved vector. + """ + try: + doc = self.collection.get(doc_id).content_as[dict] + return doc + except DocumentNotFoundException: + logger.warning(f"Vector with ID {doc_id} not found") + return None + + def list(self, filters: dict | None = None, limit: int = 100) -> list: + """ + List all vectors in the collection. + + Args: + filters (dict, optional): Filters to apply to the list. Defaults to None. + limit (int, optional): Number of vectors to return. Defaults to 100. + + Returns: + list: List of vectors. + """ + logger.info(f"Listing vectors in collection {self.collection.name}") + query = f"SELECT id, {self.embedding_key}, payload FROM {self.bucket.name}.{self.scope.name}.{self.collection.name} WHERE 1 = 1" + results = [] + if filters: + for filter in filters: + query += f" AND {filter['field']} = {filter['value']}" + + query += f" LIMIT {limit}" + + search_result = self.cluster.query(query) + + for row in search_result.rows(): + doc_id = row.id + doc = self.collection.get(doc_id).content_as[dict] + results.append({"id": doc_id, **doc}) + + return results + + def list_cols(self): + all_scopes = self.bucket.collections().get_all_scopes() + + for current_scope in all_scopes: + if(current_scope.name == self.scope.name): + all_collections = current_scope.collections + return all_collections + return super().list_cols() + + def delete_col(self, name): + try: + self.cluster.query(f"DROP COLLECTION {self.bucket.name}.{self.scope.name}.{name}") + return True + except Exception as e: + logger.error(f"Error deleting collection: {e}") + return super().delete_col() + + def col_info(self, name): + return self.scope.collection(name) \ No newline at end of file