Update readme, logging, remove seed since import works for that, smal…

…l fixes and cleanups
ai16z · Jul 21, 2023 · fd50b63 · fd50b63
1 parent edd1102
commit fd50b63
Show file tree

Hide file tree

Showing 5 changed files with 80 additions and 90 deletions.
diff --git a/README.md b/README.md
@@ -52,11 +52,13 @@ create_memory("conversation", "I can't do that, Dave.", debug=True)
 ```python
 from agentmemory import (
     create_memory,
+    create_unique_memory,
     get_memories,
     search_memory,
     get_memory,
     update_memory,
     delete_memory,
+    delete_similar_memories,
     count_memories,
     wipe_category,
     wipe_all_memories
@@ -124,7 +126,23 @@ update_memory("conversation", 1, "Okay, I will open the podbay doors.")
 delete_memory("conversation", 1)
 ```
 
-# Documentation
+### Delete Similar Memories
+
+#### `delete_similar_memories(category, content, similarity_threshold=0.95)`
+
+Search for memories that are similar to the one that contains the given content and removes them.
+
+##### Parameters
+
+- `category` (str): The category of the collection.
+- `content` (str): The content to search for.
+- `similarity_threshold` (float, optional): The threshold for determining similarity. Defaults to 0.95.
+
+##### Returns
+
+- `bool`: True if the memory item is found and removed, False otherwise.
+
+# API Reference
 
 ## Create a Memory
 
@@ -151,6 +169,23 @@ embedding (array): Embedding of the document. Defaults to None. Use if you alrea
 >>> create_memory(category='sample_category', text='sample_text', id='sample_id', metadata={'sample_key': 'sample_value'})
 ```
 
+### Create Unique Memory
+
+#### `create_unique_memory(category, content, metadata={}, similarity=0.95)`
+
+Create a new memory only if there aren't any that are very similar to it. If a similar memory is found, the new memory's "unique" metadata field is set to "False" and it is linked to the existing memory.
+
+##### Parameters
+
+- `category` (str): The category of the collection.
+- `content` (str): The text of the memory.
+- `metadata` (dict, optional): Metadata for the memory.
+- `similarity` (float, optional): The threshold for determining similarity.
+
+##### Returns
+
+None
+
 ## Search Memory
 
 #### `search_memory(category, search_text, n_results=5, min_distance=None, max_distance=None, filter_metadata=None, contains_text=None, include_embeddings=True)`
@@ -387,7 +422,6 @@ Delete all memories across all categories.
 >>> wipe_all_memories()
 ```
 
-
 # Memory Management with ChromaDB
 
 This document provides a guide to using the memory management functions provided in the module.
@@ -457,19 +491,8 @@ The `import_file_to_memory` function imports memories from a JSON file into the
 >>> import_file_to_memory(path="/path/to/input.json")
 ```
 
-````
-
-In the above Markdown, you may replace "ChromaDB" with the actual name of the module if it's different. You can include this in your `README.md` file to give your users a guide on how to use these functions.
-
-# Publishing
-
-```bash
-bash publish.sh --version=<version> --username=<pypi_username> --password=<pypi_password>
-```
-
 # Contributions Welcome
 
 If you like this library and want to contribute in any way, please feel free to submit a PR and I will review it. Please note that the goal here is simplicity and accesibility, using common language and few dependencies.
 
 <img src="resources/youcreatethefuture.jpg">
-````
diff --git a/agentmemory/client.py b/agentmemory/client.py
@@ -1,7 +1,10 @@
 import chromadb
 
-persist_directory = "./memory"
-client = chromadb.PersistentClient(persist_directory)
+from agentmemory.helpers import debug_log
+
+storage_path = "./memory"
+client = chromadb.PersistentClient(storage_path)
+
 
 def check_client_initialized():
     """
@@ -11,7 +14,8 @@ def check_client_initialized():
         >>> check_client_initialized()
     """
     if get_chroma_client() is None:
-        set_chroma_client(chromadb.PersistentClient(persist_directory))
+        set_chroma_client(chromadb.PersistentClient(storage_path))
+
 
 def get_chroma_client():
     """
@@ -25,18 +29,18 @@ def get_chroma_client():
         <chromadb.client.Client object at 0x7f7b9c2f0d00>
     """
     global client
-    global persist_directory
+    global storage_path
     if client is None:
-        client = chromadb.PersistentClient(path=persist_directory)
+        client = chromadb.PersistentClient(path=storage_path)
     return client
 
 
-def set_chroma_client(storage_path):
+def set_chroma_client(data_storage_path=storage_path):
     """
     Set the chromadb client.
 
     Args:
-        persist_directory (string): The path to the new directory.
+        storage_path (string): The path to the new directory.
 
     Returns:
         None
@@ -45,7 +49,7 @@ def set_chroma_client(storage_path):
         >>> set_chroma_client(new_client)
     """
     global client
-    global persist_directory
-    persist_directory = storage_path
-    client = chromadb.PersistentClient(persist_directory)
-
+    global storage_path
+    storage_path = data_storage_path
+    client = chromadb.PersistentClient(storage_path)
+    debug_log("Set chroma client", {"storage_path": storage_path}, "system")
diff --git a/agentmemory/helpers.py b/agentmemory/helpers.py
@@ -17,11 +17,11 @@
     "info": "blue",
     "prompt": "cyan",
     "success": "green",
-    "debug": "magenta",
     "critical": "red",
     "system": "magenta",
 }
 
+
 def strip_embeddings(value):
     if isinstance(value, dict):
         value = value.copy()
@@ -37,6 +37,7 @@ def strip_embeddings(value):
             value[i] = strip_embeddings(value[i])
     return value
 
+
 def debug_log(
     content,
     input_dict=None,
@@ -50,7 +51,7 @@ def debug_log(
         return
 
     color = type_colors.get(type, color)
-    
+
     if input_dict is not None:
         # traverse the dict and find any value called "embedding"
         # set "embedding" value to [] to avoid printing it

diff --git a/agentmemory/main.py b/agentmemory/main.py
@@ -66,8 +66,8 @@ def create_unique_memory(category, content, metadata={}, similarity=0.95):
     Creates a new memory if there aren't any that are very similar to it
 
     Parameters:
-    - content (str): The content of the knowledge.
-    - metadata (dict, optional): Additional metadata for the knowledge.
+    - content (str): The content of the memory.
+    - metadata (dict, optional): Additional metadata for the memory.
         Defaults to empty dictionary.
     - similarity (float, optional): The threshold for determining similarity.
         Defaults to DEFAULT_SIMILARY_THRESHOLD.
@@ -88,14 +88,13 @@ def create_unique_memory(category, content, metadata={}, similarity=0.95):
 
     if len(memories) == 0:
         metadata["unique"] = "True"
-        # Create a new knowledge item
         create_memory(category, content, metadata=metadata)
         return
 
     metadata["unique"] = "False"
     metadata["related_to"] = memories[0]["id"]
     metadata["related_document"] = memories[0]["document"]
-    create_memory("knowledge", content, metadata=metadata)
+    create_memory(category, content, metadata=metadata)
 
 
 def search_memory(
@@ -385,22 +384,34 @@ def delete_similar_memories(category, content, similarity_threshold=0.95):
     - content (str): The content to search for.
     - similarity_threshold (float, optional): The threshold for determining similarity. Defaults to DEFAULT_SIMILARY_THRESHOLD.
 
-    Returns: bool - True if the knowledge item is found and removed, False otherwise.
+    Returns: bool - True if the memory item is found and removed, False otherwise.
     """
 
     memories = search_memory(category, content)
+    memories_to_delete = []
+
+    # find similar memories
     if len(memories) > 0:
-        goal = memories[0]
-        goal_similarity = 1.0 - goal["distance"]
-        if goal_similarity > similarity_threshold:
-            goal_id = goal["id"]
-            delete_memory(category, goal_id)
-            return True
+        for memory in memories:
+            goal_similarity = 1.0 - memory["distance"]
+            if goal_similarity > similarity_threshold:
+                memories_to_delete.append(memory["id"])
+            else:
+                # responses are sorted by similarity, so ignore the rest
+                break
+
+    if len(memories_to_delete) > 0:
+        debug_log(
+            f"Deleting similar memories to {content} in category {category}",
+            memories_to_delete,
+        )
+        for memory in memories_to_delete:
+            delete_memory(category, memory)
     debug_log(
         f"WARNING: Tried to delete similar memories to {content} in category {category} but none were found",
         type="warning",
     )
-    return False
+    return len(memories_to_delete) > 0
 
 
 def memory_exists(category, id, includes_metadata=None):
@@ -500,10 +511,10 @@ def wipe_all_memories():
 
     check_client_initialized()  # client is lazy loaded, so make sure it is is initialized
     client = get_chroma_client()
-    collections = get_chroma_client().list_collections()
+    collections = client.list_collections()
 
     # Iterate over all collections
     for collection in collections:
-        get_chroma_client().delete_collection(collection.name)
+        client.delete_collection(collection.name)
 
-    debug_log("Wiped all memories")
+    debug_log("Wiped all memories", type="system")
diff --git a/agentmemory/persistence.py b/agentmemory/persistence.py
@@ -115,52 +115,3 @@ def import_file_to_memory(path="./memory.json", replace=True):
 
     # Import the data into the database
     import_json_to_memory(data, replace)
-
-
-import json
-import time
-from agentmemory import create_memory
-
-
-def seed(seed_input):
-    """
-    Seed the memory bank from a JSON object or file
-
-    Parameters:
-    - data (dict): the JSON object to seed from
-
-    Returns:
-    - None
-    """
-    if seed_input is False or seed_input is None:
-        return
-
-    def seed_from_file(filename="./seeds.json"):
-        with open(filename, "r") as f:
-            seed_from_json(json.load(f))
-
-    def seed_from_json(data):
-        timestamps = [time.time() - (10 * i) for i in range(len(data))]
-        for i, entry in enumerate(data):
-            timestamp = timestamps[i]
-            entry["metadata"]["created_at"] = str(timestamp)
-            create_memory(entry["collection"], entry["message"], entry["metadata"])
-
-    # if seed is a dictionary, use it as the seed data
-    if isinstance(seed_input, dict):
-        seed_from_json(seed_input)
-
-    elif isinstance(seed_input, str) and seed_input.endswith(".json"):
-        seed_from_file(seed_input)
-
-    elif seed_input is True:
-        seed_from_file()
-    # if seed is a string, try parsing it as a json file
-    elif seed_input is not None:
-        try:
-            # parse string to dict
-            seed_data = json.loads(seed_input)
-            seed_from_json(seed_data)
-        except:
-            print("Invalid seed data. Must be a JSON file or a JSON string.")
-            return