diff --git a/quantization/AutomaticQuantizationWithVoyageAI.ipynb b/quantization/AutomaticQuantizationWithVoyageAI.ipynb
new file mode 100644
index 0000000..088924f
--- /dev/null
+++ b/quantization/AutomaticQuantizationWithVoyageAI.ipynb
@@ -0,0 +1,2422 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Atlas Vector Search - Vector Quantization - Automatic Quantization with Voyage AI"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This notebook is a companion to the [Automatic Quantization with Voyage AI](https://www.mongodb.com/docs/atlas/atlas-vector-search/tutorials/auto-quantize-with-voyage-ai/) tutorial. Refer to the page for set-up instructions and detailed explanations.\n",
+ "\n",
+ "This tutorial details techniques needed to design, deploy, and manage advanced AI workloads at scale, ensuring optimal performance and cost efficiency.\n",
+ "\n",
+ "\n",
+ "
\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "collapsed": true,
+ "executionInfo": {
+ "elapsed": 19538,
+ "status": "ok",
+ "timestamp": 1756743355864,
+ "user": {
+ "displayName": "Javier Armendariz",
+ "userId": "14823381200026660254"
+ },
+ "user_tz": 360
+ },
+ "id": "MlVgdTfI4mVF",
+ "outputId": "48eefd32-5470-44fe-db52-f3a799248c94"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/491.5 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━\u001b[0m \u001b[32m471.0/491.5 kB\u001b[0m \u001b[31m15.8 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m491.5/491.5 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h"
+ ]
+ }
+ ],
+ "source": [
+ "pip install --quiet datasets==3.6.0 gcsfs==2025.3.0 fsspec==2025.3.0"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "collapsed": true,
+ "executionInfo": {
+ "elapsed": 16093,
+ "status": "ok",
+ "timestamp": 1756743375636,
+ "user": {
+ "displayName": "Javier Armendariz",
+ "userId": "14823381200026660254"
+ },
+ "user_tz": 360
+ },
+ "id": "a_rQlPqIwJoe",
+ "outputId": "c8baf8bc-c1ac-40bb-99c3-7cd82dc477e0"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m12.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m313.6/313.6 kB\u001b[0m \u001b[31m16.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h"
+ ]
+ }
+ ],
+ "source": [
+ "pip install --quiet pymongo voyageai pandas==2.2.2 matplotlib"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "collapsed": true,
+ "executionInfo": {
+ "elapsed": 19250,
+ "status": "ok",
+ "timestamp": 1756744205537,
+ "user": {
+ "displayName": "Javier Armendariz",
+ "userId": "14823381200026660254"
+ },
+ "user_tz": 360
+ },
+ "id": "v9WNOdlUESGQ",
+ "outputId": "096a4f12-42b1-4af7-b0f9-c0eaf0cbeaaf"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Enter your Voyage API Key: ··········\n",
+ "Enter your MongoDB URI: ··········\n"
+ ]
+ }
+ ],
+ "source": [
+ "import getpass\n",
+ "import os\n",
+ "import voyageai\n",
+ "\n",
+ "# Function to securely get and set environment variables\n",
+ "def set_env_securely(var_name, prompt):\n",
+ " value = getpass.getpass(prompt)\n",
+ " os.environ[var_name] = value\n",
+ "\n",
+ "# Environment Variables\n",
+ "set_env_securely(\"VOYAGE_API_KEY\", \"Enter your Voyage API Key: \")\n",
+ "set_env_securely(\"MONGO_URI\", \"Enter your MongoDB URI: \")\n",
+ "MONGO_URI = os.environ.get(\"MONGO_URI\")\n",
+ "if not MONGO_URI:\n",
+ " raise ValueError(\"MONGO_URI not set in environment variables.\")\n",
+ "\n",
+ "# Voyage Client\n",
+ "voyage_client = voyageai.Client()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "executionInfo": {
+ "elapsed": 48,
+ "status": "ok",
+ "timestamp": 1756744211284,
+ "user": {
+ "displayName": "Javier Armendariz",
+ "userId": "14823381200026660254"
+ },
+ "user_tz": 360
+ },
+ "id": "VdidySCdPv1b"
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "from datasets import load_dataset\n",
+ "from bson.binary import Binary, BinaryVectorDtype\n",
+ "import pymongo\n",
+ "\n",
+ "# Connect to Cluster\n",
+ "def get_mongo_client(uri):\n",
+ " \"\"\"Connect to MongoDB and confirm the connection.\"\"\"\n",
+ " client = pymongo.MongoClient(uri)\n",
+ " if client.admin.command(\"ping\").get(\"ok\") == 1.0:\n",
+ " print(\"Connected to MongoDB successfully.\")\n",
+ " return client\n",
+ " print(\"Failed to connect to MongoDB.\")\n",
+ " return None\n",
+ "\n",
+ "# Generate BSON Vector\n",
+ "def generate_bson_vector(array, data_type):\n",
+ " \"\"\"Convert an array to BSON vector format.\"\"\"\n",
+ " array = [float(val) for val in eval(array)]\n",
+ " return Binary.from_vector(array, BinaryVectorDtype(data_type))\n",
+ "\n",
+ "# Load Datasets\n",
+ "def load_and_prepare_data(dataset_name, amount):\n",
+ " \"\"\"Load and prepare streaming datasets for DataFrame.\"\"\"\n",
+ " data = load_dataset(dataset_name, streaming=True, split=\"train\").take(amount)\n",
+ " return pd.DataFrame(data)\n",
+ "\n",
+ "# Insert datasets into MongoDB Collection\n",
+ "def insert_dataframe_into_collection(df, collection):\n",
+ " \"\"\"Insert Dataset records into MongoDB collection.\"\"\"\n",
+ " collection.insert_many(df.to_dict(\"records\"))\n",
+ " print(f\"Inserted {len(df)} records into '{collection.name}' collection.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "collapsed": true,
+ "executionInfo": {
+ "elapsed": 34182,
+ "status": "ok",
+ "timestamp": 1756744424113,
+ "user": {
+ "displayName": "Javier Armendariz",
+ "userId": "14823381200026660254"
+ },
+ "user_tz": 360
+ },
+ "id": "WG--GrYFPzV9",
+ "outputId": "766e33fe-48a9-41c8-a823-9f45b5453a70"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Connected to MongoDB successfully.\n",
+ "Collection 'wikipedia-22-12-en' created successfully.\n",
+ "Inserted 2500 records into 'wikipedia-22-12-en' collection.\n",
+ "Collection 'wikipedia-22-12-en-annotation' created successfully.\n",
+ "Inserted 2500 records into 'wikipedia-22-12-en-annotation' collection.\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "from bson.binary import Binary, BinaryVectorDtype\n",
+ "from pymongo.errors import CollectionInvalid\n",
+ "\n",
+ "wikipedia_data_df = load_and_prepare_data(\"MongoDB/wikipedia-22-12-en-voyage-embed\", amount=2500)\n",
+ "wikipedia_annotation_data_df = load_and_prepare_data(\"MongoDB/wikipedia-22-12-en-annotation\", amount=2500)\n",
+ "wikipedia_annotation_data_df.drop(columns=[\"_id\"], inplace=True)\n",
+ "\n",
+ "# Convert embeddings to BSON format\n",
+ "wikipedia_data_df[\"embedding\"] = wikipedia_data_df[\"embedding\"].apply(\n",
+ " lambda x: generate_bson_vector(x, BinaryVectorDtype.FLOAT32)\n",
+ ")\n",
+ "\n",
+ "# MongoDB Setup\n",
+ "mongo_client = get_mongo_client(MONGO_URI)\n",
+ "DB_NAME = \"testing_datasets\"\n",
+ "db = mongo_client[DB_NAME]\n",
+ "\n",
+ "collections = {\n",
+ " \"wikipedia-22-12-en\": wikipedia_data_df,\n",
+ " \"wikipedia-22-12-en-annotation\": wikipedia_annotation_data_df,\n",
+ "}\n",
+ "\n",
+ "# Create Collections and Insert Data\n",
+ "for collection_name, df in collections.items():\n",
+ " if collection_name not in db.list_collection_names():\n",
+ " try:\n",
+ " db.create_collection(collection_name)\n",
+ " print(f\"Collection '{collection_name}' created successfully.\")\n",
+ " except CollectionInvalid:\n",
+ " print(f\"Error creating collection '{collection_name}'.\")\n",
+ " else:\n",
+ " print(f\"Collection '{collection_name}' already exists.\")\n",
+ "\n",
+ " # Clear collection and insert fresh data\n",
+ " collection = db[collection_name]\n",
+ " collection.delete_many({})\n",
+ " insert_dataframe_into_collection(df, collection)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "executionInfo": {
+ "elapsed": 49,
+ "status": "ok",
+ "timestamp": 1756744459328,
+ "user": {
+ "displayName": "Javier Armendariz",
+ "userId": "14823381200026660254"
+ },
+ "user_tz": 360
+ },
+ "id": "SXeaGqmI1TsC"
+ },
+ "outputs": [],
+ "source": [
+ "import time\n",
+ "from pymongo.operations import SearchIndexModel\n",
+ "\n",
+ "def setup_vector_search_index(collection, index_definition, index_name=\"vector_index\"):\n",
+ " new_vector_search_index_model = SearchIndexModel(\n",
+ " definition=index_definition, name=index_name, type=\"vectorSearch\"\n",
+ " )\n",
+ "\n",
+ " # Create the new index\n",
+ " try:\n",
+ " result = collection.create_search_index(model=new_vector_search_index_model)\n",
+ " print(f\"Creating index '{index_name}'...\")\n",
+ "\n",
+ " # Wait for initial sync to complete\n",
+ " print(\"Polling to check if the index is ready. This may take around a minute.\")\n",
+ " predicate=None\n",
+ " if predicate is None:\n",
+ " predicate = lambda index: index.get(\"queryable\") is True\n",
+ " while True:\n",
+ " indices = list(collection.list_search_indexes(result))\n",
+ " if len(indices) and predicate(indices[0]):\n",
+ " break\n",
+ " time.sleep(5)\n",
+ " print(f\"Index '{index_name}' is ready for querying.\")\n",
+ " return result\n",
+ "\n",
+ " except Exception as e:\n",
+ " print(f\"Error creating new vector search index '{index_name}': {e!s}\")\n",
+ " return None"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "executionInfo": {
+ "elapsed": 50,
+ "status": "ok",
+ "timestamp": 1756744468384,
+ "user": {
+ "displayName": "Javier Armendariz",
+ "userId": "14823381200026660254"
+ },
+ "user_tz": 360
+ },
+ "id": "y0vqX0Ed1dgc"
+ },
+ "outputs": [],
+ "source": [
+ "vector_index_definition_scalar_quantized = {\n",
+ " \"fields\": [\n",
+ " {\n",
+ " \"type\": \"vector\",\n",
+ " \"path\": \"embedding\",\n",
+ " \"quantization\": \"scalar\", # Added quantization for scalar vector quantization\n",
+ " \"numDimensions\": 1024,\n",
+ " \"similarity\": \"cosine\",\n",
+ " }\n",
+ " ]\n",
+ "}\n",
+ "\n",
+ "vector_index_definition_binary_quantized = {\n",
+ " \"fields\": [\n",
+ " {\n",
+ " \"type\": \"vector\",\n",
+ " \"path\": \"embedding\",\n",
+ " \"quantization\": \"binary\", # Changed quantization to binary for binary vector quantization\n",
+ " \"numDimensions\": 1024,\n",
+ " \"similarity\": \"cosine\",\n",
+ " }\n",
+ " ]\n",
+ "}\n",
+ "\n",
+ "vector_index_definition_float32_ann = {\n",
+ " \"fields\": [\n",
+ " {\n",
+ " \"type\": \"vector\",\n",
+ " \"path\": \"embedding\",\n",
+ " \"numDimensions\": 1024,\n",
+ " \"similarity\": \"cosine\",\n",
+ " }\n",
+ " ]\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "executionInfo": {
+ "elapsed": 47,
+ "status": "ok",
+ "timestamp": 1756744474089,
+ "user": {
+ "displayName": "Javier Armendariz",
+ "userId": "14823381200026660254"
+ },
+ "user_tz": 360
+ },
+ "id": "jlQHLYxA1mNW"
+ },
+ "outputs": [],
+ "source": [
+ "import pymongo\n",
+ "vector_search_scalar_quantized_index_name = \"vector_index_scalar_quantized\"\n",
+ "vector_search_binary_quantized_index_name = \"vector_index_binary_quantized\"\n",
+ "vector_search_float32_ann_index_name = \"vector_index_float32_ann\"\n",
+ "db = mongo_client[DB_NAME]\n",
+ "wiki_data_collection = db[\"wikipedia-22-12-en\"]\n",
+ "wiki_annotation_data_collection = db[\"wikipedia-22-12-en-annotation\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 196
+ },
+ "collapsed": true,
+ "executionInfo": {
+ "elapsed": 87075,
+ "status": "ok",
+ "timestamp": 1756744563624,
+ "user": {
+ "displayName": "Javier Armendariz",
+ "userId": "14823381200026660254"
+ },
+ "user_tz": 360
+ },
+ "id": "oKw8HI3H1r7w",
+ "outputId": "8e447700-a341-411b-815e-f97e6f61a920"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Creating index 'vector_index_scalar_quantized'...\n",
+ "Polling to check if the index is ready. This may take around a minute.\n",
+ "Index 'vector_index_scalar_quantized' is ready for querying.\n",
+ "Creating index 'vector_index_binary_quantized'...\n",
+ "Polling to check if the index is ready. This may take around a minute.\n",
+ "Index 'vector_index_binary_quantized' is ready for querying.\n",
+ "Creating index 'vector_index_float32_ann'...\n",
+ "Polling to check if the index is ready. This may take around a minute.\n",
+ "Index 'vector_index_float32_ann' is ready for querying.\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "string"
+ },
+ "text/plain": [
+ "'vector_index_float32_ann'"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from pymongo.operations import SearchIndexModel\n",
+ "\n",
+ "setup_vector_search_index(\n",
+ " wiki_data_collection,\n",
+ " vector_index_definition_scalar_quantized,\n",
+ " vector_search_scalar_quantized_index_name,\n",
+ ")\n",
+ "setup_vector_search_index(\n",
+ " wiki_data_collection,\n",
+ " vector_index_definition_binary_quantized,\n",
+ " vector_search_binary_quantized_index_name,\n",
+ ")\n",
+ "setup_vector_search_index(\n",
+ " wiki_data_collection,\n",
+ " vector_index_definition_float32_ann,\n",
+ " vector_search_float32_ann_index_name,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {
+ "executionInfo": {
+ "elapsed": 47,
+ "status": "ok",
+ "timestamp": 1756744594454,
+ "user": {
+ "displayName": "Javier Armendariz",
+ "userId": "14823381200026660254"
+ },
+ "user_tz": 360
+ },
+ "id": "G-YgBPNOKwF-"
+ },
+ "outputs": [],
+ "source": [
+ "def get_embedding(text, task_prefix=\"document\"):\n",
+ " \"\"\"Fetch embedding for a given text using Voyage AI.\"\"\"\n",
+ " if not text.strip():\n",
+ " print(\"Empty text provided for embedding.\")\n",
+ " return []\n",
+ " result = voyage_client.embed([text], model=\"voyage-3-large\", input_type=task_prefix)\n",
+ " return result.embeddings[0]\n",
+ "\n",
+ "def custom_vector_search(\n",
+ " user_query,\n",
+ " collection,\n",
+ " embedding_path,\n",
+ " vector_search_index_name=\"vector_index\",\n",
+ " top_k=5,\n",
+ " num_candidates=25,\n",
+ " use_full_precision=False,\n",
+ "):\n",
+ " \"\"\"Perform vector search on a MongoDB collection using specified index.\"\"\"\n",
+ " # Generate embedding for the user query\n",
+ " query_embedding = get_embedding(user_query, task_prefix=\"query\")\n",
+ "\n",
+ " if query_embedding is None or not query_embedding:\n",
+ " return {\n",
+ " \"error\": \"Invalid query or embedding generation failed.\",\n",
+ " \"execution_time_ms\": None,\n",
+ " \"results\": [],\n",
+ " }\n",
+ "\n",
+ " # Define the vector search stage\n",
+ " vector_search_stage = {\n",
+ " \"$vectorSearch\": {\n",
+ " \"index\": vector_search_index_name,\n",
+ " \"queryVector\": query_embedding,\n",
+ " \"path\": embedding_path,\n",
+ " \"limit\": top_k,\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " # Configure search precision approach\n",
+ " if not use_full_precision:\n",
+ " # For approximate nearest neighbor (ANN) search\n",
+ " vector_search_stage[\"$vectorSearch\"][\"numCandidates\"] = num_candidates\n",
+ " else:\n",
+ " # For exact nearest neighbor (ENN) search\n",
+ " vector_search_stage[\"$vectorSearch\"][\"exact\"] = True\n",
+ "\n",
+ " # Project stage to fetch desired fields and vector search score\n",
+ " project_stage = {\n",
+ " \"$project\": {\n",
+ " \"_id\": 0,\n",
+ " \"title\": 1,\n",
+ " \"text\": 1,\n",
+ " \"wiki_id\": 1,\n",
+ " \"url\": 1,\n",
+ " \"score\": {\"$meta\": \"vectorSearchScore\"},\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " # Define the aggregate pipeline\n",
+ " pipeline = [vector_search_stage, project_stage]\n",
+ "\n",
+ " try:\n",
+ " # Execute the explain command to measure latency\n",
+ " explain_result = collection.database.command(\n",
+ " \"explain\",\n",
+ " {\"aggregate\": collection.name, \"pipeline\": pipeline, \"cursor\": {}},\n",
+ " verbosity=\"executionStats\",\n",
+ " )\n",
+ "\n",
+ " # Extract the execution time\n",
+ " vector_search_explain = explain_result[\"stages\"][0][\"$vectorSearch\"]\n",
+ " execution_time_ms = vector_search_explain[\"explain\"][\"query\"][\"stats\"][\"context\"][\"millisElapsed\"]\n",
+ "\n",
+ " # Execute the actual aggregate query\n",
+ " results = list(collection.aggregate(pipeline))\n",
+ "\n",
+ " return {\n",
+ " \"results\": results,\n",
+ " \"execution_time_ms\": execution_time_ms,\n",
+ " }\n",
+ " except Exception as e:\n",
+ " print(f\"Error during vector search: {e}\")\n",
+ " return {\n",
+ " \"error\": str(e),\n",
+ " \"execution_time_ms\": None,\n",
+ " \"results\": [],\n",
+ " }"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 175
+ },
+ "executionInfo": {
+ "elapsed": 1420,
+ "status": "ok",
+ "timestamp": 1756744627479,
+ "user": {
+ "displayName": "Javier Armendariz",
+ "userId": "14823381200026660254"
+ },
+ "user_tz": 360
+ },
+ "id": "8qxNXCRdbJKD",
+ "outputId": "81d84363-b33f-4485-d16b-5025fc1a7be6"
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "summary": "{\n \"name\": \"results_df\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"precision\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"_scalar_\",\n \"Float32_ENN\",\n \"_float32_ann\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"top_k\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 5,\n \"max\": 5,\n \"num_unique_values\": 1,\n \"samples\": [\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_candidates\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 25,\n \"max\": 25,\n \"num_unique_values\": 1,\n \"samples\": [\n 25\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"latency_ms\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 91.0287034707709,\n \"min\": 1.72466,\n \"max\": 205.778084,\n \"num_unique_values\": 4,\n \"samples\": [\n 205.778084\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"results\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}",
+ "type": "dataframe",
+ "variable_name": "results_df"
+ },
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " precision | \n",
+ " top_k | \n",
+ " num_candidates | \n",
+ " latency_ms | \n",
+ " results | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " _float32_ann | \n",
+ " 5 | \n",
+ " 25 | \n",
+ " 86.283681 | \n",
+ " {'title': 'Facebook', 'text': 'Data is read fr... | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " _scalar_ | \n",
+ " 5 | \n",
+ " 25 | \n",
+ " 205.778084 | \n",
+ " {'title': 'Facebook', 'text': 'Data is read fr... | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " _binary_ | \n",
+ " 5 | \n",
+ " 25 | \n",
+ " 26.408544 | \n",
+ " {'title': 'Facebook', 'text': 'Data is read fr... | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Float32_ENN | \n",
+ " 5 | \n",
+ " 25 | \n",
+ " 1.724660 | \n",
+ " {'title': 'Facebook', 'text': 'Data is read fr... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "text/plain": [
+ " precision top_k num_candidates latency_ms \\\n",
+ "0 _float32_ann 5 25 86.283681 \n",
+ "1 _scalar_ 5 25 205.778084 \n",
+ "2 _binary_ 5 25 26.408544 \n",
+ "3 Float32_ENN 5 25 1.724660 \n",
+ "\n",
+ " results \n",
+ "0 {'title': 'Facebook', 'text': 'Data is read fr... \n",
+ "1 {'title': 'Facebook', 'text': 'Data is read fr... \n",
+ "2 {'title': 'Facebook', 'text': 'Data is read fr... \n",
+ "3 {'title': 'Facebook', 'text': 'Data is read fr... "
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "vector_search_indicies = [\n",
+ " vector_search_float32_ann_index_name,\n",
+ " vector_search_scalar_quantized_index_name,\n",
+ " vector_search_binary_quantized_index_name,\n",
+ "]\n",
+ "\n",
+ "# Random query\n",
+ "user_query = \"How do I increase my productivity for maximum output\"\n",
+ "test_top_k = 5\n",
+ "test_num_candidates = 25\n",
+ "\n",
+ "# Result is a list of dictionaries with the following headings: precision, top_k, latency_ms, results\n",
+ "results = []\n",
+ "\n",
+ "for vector_search_index in vector_search_indicies:\n",
+ "# Conduct a vector search operation using scalar quantized\n",
+ " vector_search_results = custom_vector_search(\n",
+ " user_query,\n",
+ " wiki_data_collection,\n",
+ " embedding_path=\"embedding\",\n",
+ " vector_search_index_name=vector_search_index,\n",
+ " top_k=test_top_k,\n",
+ " num_candidates=test_num_candidates,\n",
+ " use_full_precision=False,\n",
+ " )\n",
+ " # Include the precision in the results\n",
+ " precision = vector_search_index.split(\"vector_index\")[1]\n",
+ " precision = precision.replace(\"quantized\", \"\").capitalize()\n",
+ "\n",
+ " results.append(\n",
+ " {\n",
+ " \"precision\": precision,\n",
+ " \"top_k\": test_top_k,\n",
+ " \"num_candidates\": test_num_candidates,\n",
+ " \"latency_ms\": vector_search_results[\"execution_time_ms\"],\n",
+ " \"results\": vector_search_results[\"results\"][\n",
+ " 0\n",
+ " ], # Just taking the first result\n",
+ " }\n",
+ " )\n",
+ "\n",
+ "# Conduct a vector search operation using full precision\n",
+ "precision = \"Float32_ENN\"\n",
+ "vector_search_results = custom_vector_search(\n",
+ " user_query,\n",
+ " wiki_data_collection,\n",
+ " embedding_path=\"embedding\",\n",
+ " vector_search_index_name=\"vector_index_scalar_quantized\",\n",
+ " top_k=test_top_k,\n",
+ " num_candidates=test_num_candidates,\n",
+ " use_full_precision=True,\n",
+ ")\n",
+ "\n",
+ "results.append(\n",
+ " {\n",
+ " \"precision\": precision,\n",
+ " \"top_k\": test_top_k,\n",
+ " \"num_candidates\": test_num_candidates,\n",
+ " \"latency_ms\": vector_search_results[\"execution_time_ms\"],\n",
+ " \"results\": vector_search_results[\"results\"][0], # Just taking the first result\n",
+ " }\n",
+ ")\n",
+ "\n",
+ "# Convert the results to a pandas DataFrame with the headings: precision, top_k, latency_ms\n",
+ "results_df = pd.DataFrame(results)\n",
+ "results_df.columns = [\"precision\", \"top_k\", \"num_candidates\", \"latency_ms\", \"results\"]\n",
+ "\n",
+ "# To display the results:\n",
+ "results_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {
+ "executionInfo": {
+ "elapsed": 47,
+ "status": "ok",
+ "timestamp": 1756744668014,
+ "user": {
+ "displayName": "Javier Armendariz",
+ "userId": "14823381200026660254"
+ },
+ "user_tz": 360
+ },
+ "id": "uYUCb8RHg9Xf"
+ },
+ "outputs": [],
+ "source": [
+ "from datetime import timedelta\n",
+ "\n",
+ "def format_time(ms):\n",
+ " \"\"\"Convert milliseconds to a human-readable format\"\"\"\n",
+ " delta = timedelta(milliseconds=ms)\n",
+ "\n",
+ " # Extract minutes, seconds, and milliseconds with more precision\n",
+ " minutes = delta.seconds // 60\n",
+ " seconds = delta.seconds % 60\n",
+ " milliseconds = round(ms % 1000, 3) # Keep 3 decimal places for milliseconds\n",
+ "\n",
+ " # Format based on duration\n",
+ " if minutes > 0:\n",
+ " return f\"{minutes}m {seconds}.{milliseconds:03.0f}s\"\n",
+ " elif seconds > 0:\n",
+ " return f\"{seconds}.{milliseconds:03.0f}s\"\n",
+ " else:\n",
+ " return f\"{milliseconds:.3f}ms\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {
+ "executionInfo": {
+ "elapsed": 48,
+ "status": "ok",
+ "timestamp": 1756744674155,
+ "user": {
+ "displayName": "Javier Armendariz",
+ "userId": "14823381200026660254"
+ },
+ "user_tz": 360
+ },
+ "id": "QlKQOMnWhCuN"
+ },
+ "outputs": [],
+ "source": [
+ "def measure_latency_with_varying_topk(\n",
+ " user_query,\n",
+ " collection,\n",
+ " vector_search_index_name=\"vector_index_scalar_quantized\",\n",
+ " use_full_precision=False,\n",
+ " top_k_values=[5, 10, 100],\n",
+ " num_candidates_values=[25, 50, 100, 200, 500, 1000, 2000, 5000, 10000],\n",
+ "):\n",
+ " results_data = []\n",
+ "\n",
+ " # Conduct vector search operation for each (top_k, num_candidates) combination\n",
+ " for top_k in top_k_values:\n",
+ " for num_candidates in num_candidates_values:\n",
+ " # Skip scenarios where num_candidates < top_k\n",
+ " if num_candidates < top_k:\n",
+ " continue\n",
+ "\n",
+ " # Construct the precision name\n",
+ " precision_name = vector_search_index_name.split(\"vector_index\")[1]\n",
+ " precision_name = precision_name.replace(\"quantized\", \"\").capitalize()\n",
+ "\n",
+ " # If use_full_precision is true, then the precision name is \"_float32_\"\n",
+ " if use_full_precision:\n",
+ " precision_name = \"_float32_ENN\"\n",
+ "\n",
+ " # Perform the vector search\n",
+ " vector_search_results = custom_vector_search(\n",
+ " user_query=user_query,\n",
+ " collection=collection,\n",
+ " embedding_path=\"embedding\",\n",
+ " vector_search_index_name=vector_search_index_name,\n",
+ " top_k=top_k,\n",
+ " num_candidates=num_candidates,\n",
+ " use_full_precision=use_full_precision,\n",
+ " )\n",
+ "\n",
+ " # Extract the execution time (latency)\n",
+ " latency_ms = vector_search_results[\"execution_time_ms\"]\n",
+ "\n",
+ " # Append the results to the list\n",
+ " results_data.append(\n",
+ " {\n",
+ " \"precision\": precision_name,\n",
+ " \"top_k\": top_k,\n",
+ " \"num_candidates\": num_candidates,\n",
+ " \"latency_ms\": latency_ms,\n",
+ " }\n",
+ " )\n",
+ "\n",
+ " print(f\"Top-K: {top_k}, NumCandidates: {num_candidates}, \"\n",
+ " f\"Latency: {latency_ms} ms, Precision: {precision_name}\")\n",
+ "\n",
+ " return results_data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {
+ "executionInfo": {
+ "elapsed": 47,
+ "status": "ok",
+ "timestamp": 1756744680284,
+ "user": {
+ "displayName": "Javier Armendariz",
+ "userId": "14823381200026660254"
+ },
+ "user_tz": 360
+ },
+ "id": "-V0BT0uRiJ_4"
+ },
+ "outputs": [],
+ "source": [
+ "# Define vector search indices\n",
+ "vector_search_float32_ann_index_name = \"vector_index_float32_ann\"\n",
+ "vector_search_scalar_quantized_index_name = \"vector_index_scalar_quantized\"\n",
+ "vector_search_binary_quantized_index_name = \"vector_index_binary_quantized\"\n",
+ "\n",
+ "vector_search_indices = [\n",
+ " vector_search_float32_ann_index_name,\n",
+ " vector_search_scalar_quantized_index_name,\n",
+ " vector_search_binary_quantized_index_name,\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "collapsed": true,
+ "executionInfo": {
+ "elapsed": 24303,
+ "status": "ok",
+ "timestamp": 1756744711035,
+ "user": {
+ "displayName": "Javier Armendariz",
+ "userId": "14823381200026660254"
+ },
+ "user_tz": 360
+ },
+ "id": "Uc73DGUnR2ey",
+ "outputId": "7466e740-ed71-4c6e-b1ae-69425701e449"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Top-K: 5, NumCandidates: 25, Latency: 2.34802 ms, Precision: _float32_ann\n",
+ "Top-K: 5, NumCandidates: 50, Latency: 2.69349 ms, Precision: _float32_ann\n",
+ "Top-K: 5, NumCandidates: 100, Latency: 3.085918 ms, Precision: _float32_ann\n",
+ "Top-K: 5, NumCandidates: 200, Latency: 3.796262 ms, Precision: _float32_ann\n",
+ "Top-K: 5, NumCandidates: 500, Latency: 5.063448 ms, Precision: _float32_ann\n",
+ "Top-K: 5, NumCandidates: 1000, Latency: 3.763206 ms, Precision: _float32_ann\n",
+ "Top-K: 5, NumCandidates: 2000, Latency: 4.357494 ms, Precision: _float32_ann\n",
+ "Top-K: 5, NumCandidates: 5000, Latency: 5.895361 ms, Precision: _float32_ann\n",
+ "Top-K: 5, NumCandidates: 10000, Latency: 6.080381 ms, Precision: _float32_ann\n",
+ "Top-K: 10, NumCandidates: 25, Latency: 1.736514 ms, Precision: _float32_ann\n",
+ "Top-K: 10, NumCandidates: 50, Latency: 2.267651 ms, Precision: _float32_ann\n",
+ "Top-K: 10, NumCandidates: 100, Latency: 2.410042 ms, Precision: _float32_ann\n",
+ "Top-K: 10, NumCandidates: 200, Latency: 3.321675 ms, Precision: _float32_ann\n",
+ "Top-K: 10, NumCandidates: 500, Latency: 4.584557 ms, Precision: _float32_ann\n",
+ "Top-K: 10, NumCandidates: 1000, Latency: 2.963686 ms, Precision: _float32_ann\n",
+ "Top-K: 10, NumCandidates: 2000, Latency: 3.311705 ms, Precision: _float32_ann\n",
+ "Top-K: 10, NumCandidates: 5000, Latency: 3.798438 ms, Precision: _float32_ann\n",
+ "Top-K: 10, NumCandidates: 10000, Latency: 3.305494 ms, Precision: _float32_ann\n",
+ "Top-K: 50, NumCandidates: 50, Latency: 2.085725 ms, Precision: _float32_ann\n",
+ "Top-K: 50, NumCandidates: 100, Latency: 11.986639 ms, Precision: _float32_ann\n",
+ "Top-K: 50, NumCandidates: 200, Latency: 3.460139 ms, Precision: _float32_ann\n",
+ "Top-K: 50, NumCandidates: 500, Latency: 4.313759 ms, Precision: _float32_ann\n",
+ "Top-K: 50, NumCandidates: 1000, Latency: 2.559779 ms, Precision: _float32_ann\n",
+ "Top-K: 50, NumCandidates: 2000, Latency: 2.857659 ms, Precision: _float32_ann\n",
+ "Top-K: 50, NumCandidates: 5000, Latency: 3.102541 ms, Precision: _float32_ann\n",
+ "Top-K: 50, NumCandidates: 10000, Latency: 3.148161 ms, Precision: _float32_ann\n",
+ "Top-K: 100, NumCandidates: 100, Latency: 2.475068 ms, Precision: _float32_ann\n",
+ "Top-K: 100, NumCandidates: 200, Latency: 3.011821 ms, Precision: _float32_ann\n",
+ "Top-K: 100, NumCandidates: 500, Latency: 4.320916 ms, Precision: _float32_ann\n",
+ "Top-K: 100, NumCandidates: 1000, Latency: 2.698083 ms, Precision: _float32_ann\n",
+ "Top-K: 100, NumCandidates: 2000, Latency: 2.56315 ms, Precision: _float32_ann\n",
+ "Top-K: 100, NumCandidates: 5000, Latency: 2.816961 ms, Precision: _float32_ann\n",
+ "Top-K: 100, NumCandidates: 10000, Latency: 2.66185 ms, Precision: _float32_ann\n",
+ "Top-K: 5, NumCandidates: 25, Latency: 3.40843 ms, Precision: _scalar_\n",
+ "Top-K: 5, NumCandidates: 50, Latency: 1.983106 ms, Precision: _scalar_\n",
+ "Top-K: 5, NumCandidates: 100, Latency: 2.243061 ms, Precision: _scalar_\n",
+ "Top-K: 5, NumCandidates: 200, Latency: 2.622319 ms, Precision: _scalar_\n",
+ "Top-K: 5, NumCandidates: 500, Latency: 3.773975 ms, Precision: _scalar_\n",
+ "Top-K: 5, NumCandidates: 1000, Latency: 2.988514 ms, Precision: _scalar_\n",
+ "Top-K: 5, NumCandidates: 2000, Latency: 4.051868 ms, Precision: _scalar_\n",
+ "Top-K: 5, NumCandidates: 5000, Latency: 3.148933 ms, Precision: _scalar_\n",
+ "Top-K: 5, NumCandidates: 10000, Latency: 3.377691 ms, Precision: _scalar_\n",
+ "Top-K: 10, NumCandidates: 25, Latency: 1.233653 ms, Precision: _scalar_\n",
+ "Top-K: 10, NumCandidates: 50, Latency: 1.544515 ms, Precision: _scalar_\n",
+ "Top-K: 10, NumCandidates: 100, Latency: 2.064514 ms, Precision: _scalar_\n",
+ "Top-K: 10, NumCandidates: 200, Latency: 2.793519 ms, Precision: _scalar_\n",
+ "Top-K: 10, NumCandidates: 500, Latency: 3.586925 ms, Precision: _scalar_\n",
+ "Top-K: 10, NumCandidates: 1000, Latency: 2.483578 ms, Precision: _scalar_\n",
+ "Top-K: 10, NumCandidates: 2000, Latency: 2.824229 ms, Precision: _scalar_\n",
+ "Top-K: 10, NumCandidates: 5000, Latency: 3.014467 ms, Precision: _scalar_\n",
+ "Top-K: 10, NumCandidates: 10000, Latency: 2.996548 ms, Precision: _scalar_\n",
+ "Top-K: 50, NumCandidates: 50, Latency: 1.651804 ms, Precision: _scalar_\n",
+ "Top-K: 50, NumCandidates: 100, Latency: 2.876801 ms, Precision: _scalar_\n",
+ "Top-K: 50, NumCandidates: 200, Latency: 2.583221 ms, Precision: _scalar_\n",
+ "Top-K: 50, NumCandidates: 500, Latency: 3.614238 ms, Precision: _scalar_\n",
+ "Top-K: 50, NumCandidates: 1000, Latency: 2.50203 ms, Precision: _scalar_\n",
+ "Top-K: 50, NumCandidates: 2000, Latency: 2.935384 ms, Precision: _scalar_\n",
+ "Top-K: 50, NumCandidates: 5000, Latency: 2.987948 ms, Precision: _scalar_\n",
+ "Top-K: 50, NumCandidates: 10000, Latency: 2.935468 ms, Precision: _scalar_\n",
+ "Top-K: 100, NumCandidates: 100, Latency: 1.895514 ms, Precision: _scalar_\n",
+ "Top-K: 100, NumCandidates: 200, Latency: 2.475355 ms, Precision: _scalar_\n",
+ "Top-K: 100, NumCandidates: 500, Latency: 3.728216 ms, Precision: _scalar_\n",
+ "Top-K: 100, NumCandidates: 1000, Latency: 2.455966 ms, Precision: _scalar_\n",
+ "Top-K: 100, NumCandidates: 2000, Latency: 2.946825 ms, Precision: _scalar_\n",
+ "Top-K: 100, NumCandidates: 5000, Latency: 9.271127 ms, Precision: _scalar_\n",
+ "Top-K: 100, NumCandidates: 10000, Latency: 2.896009 ms, Precision: _scalar_\n",
+ "Top-K: 5, NumCandidates: 25, Latency: 12.365069 ms, Precision: _binary_\n",
+ "Top-K: 5, NumCandidates: 50, Latency: 2.676547 ms, Precision: _binary_\n",
+ "Top-K: 5, NumCandidates: 100, Latency: 3.762128 ms, Precision: _binary_\n",
+ "Top-K: 5, NumCandidates: 200, Latency: 5.00018 ms, Precision: _binary_\n",
+ "Top-K: 5, NumCandidates: 500, Latency: 6.218719 ms, Precision: _binary_\n",
+ "Top-K: 5, NumCandidates: 1000, Latency: 5.543958 ms, Precision: _binary_\n",
+ "Top-K: 5, NumCandidates: 2000, Latency: 5.802571 ms, Precision: _binary_\n",
+ "Top-K: 5, NumCandidates: 5000, Latency: 5.624381 ms, Precision: _binary_\n",
+ "Top-K: 5, NumCandidates: 10000, Latency: 5.560114 ms, Precision: _binary_\n",
+ "Top-K: 10, NumCandidates: 25, Latency: 1.92962 ms, Precision: _binary_\n",
+ "Top-K: 10, NumCandidates: 50, Latency: 2.656911 ms, Precision: _binary_\n",
+ "Top-K: 10, NumCandidates: 100, Latency: 3.528849 ms, Precision: _binary_\n",
+ "Top-K: 10, NumCandidates: 200, Latency: 4.915123 ms, Precision: _binary_\n",
+ "Top-K: 10, NumCandidates: 500, Latency: 6.151708 ms, Precision: _binary_\n",
+ "Top-K: 10, NumCandidates: 1000, Latency: 5.041075 ms, Precision: _binary_\n",
+ "Top-K: 10, NumCandidates: 2000, Latency: 6.587714 ms, Precision: _binary_\n",
+ "Top-K: 10, NumCandidates: 5000, Latency: 5.426732 ms, Precision: _binary_\n",
+ "Top-K: 10, NumCandidates: 10000, Latency: 5.507867 ms, Precision: _binary_\n",
+ "Top-K: 50, NumCandidates: 50, Latency: 2.694392 ms, Precision: _binary_\n",
+ "Top-K: 50, NumCandidates: 100, Latency: 3.527617 ms, Precision: _binary_\n",
+ "Top-K: 50, NumCandidates: 200, Latency: 4.888442 ms, Precision: _binary_\n",
+ "Top-K: 50, NumCandidates: 500, Latency: 6.23655 ms, Precision: _binary_\n",
+ "Top-K: 50, NumCandidates: 1000, Latency: 5.018419 ms, Precision: _binary_\n",
+ "Top-K: 50, NumCandidates: 2000, Latency: 5.361668 ms, Precision: _binary_\n",
+ "Top-K: 50, NumCandidates: 5000, Latency: 5.568478 ms, Precision: _binary_\n",
+ "Top-K: 50, NumCandidates: 10000, Latency: 5.492152 ms, Precision: _binary_\n",
+ "Top-K: 100, NumCandidates: 100, Latency: 4.903153 ms, Precision: _binary_\n",
+ "Top-K: 100, NumCandidates: 200, Latency: 5.057359 ms, Precision: _binary_\n",
+ "Top-K: 100, NumCandidates: 500, Latency: 6.108579 ms, Precision: _binary_\n",
+ "Top-K: 100, NumCandidates: 1000, Latency: 5.815404 ms, Precision: _binary_\n",
+ "Top-K: 100, NumCandidates: 2000, Latency: 5.177239 ms, Precision: _binary_\n",
+ "Top-K: 100, NumCandidates: 5000, Latency: 6.662274 ms, Precision: _binary_\n",
+ "Top-K: 100, NumCandidates: 10000, Latency: 5.385733 ms, Precision: _binary_\n",
+ "Top-K: 5, NumCandidates: 25, Latency: 0.128725 ms, Precision: _float32_ENN\n",
+ "Top-K: 5, NumCandidates: 50, Latency: 0.092744 ms, Precision: _float32_ENN\n",
+ "Top-K: 5, NumCandidates: 100, Latency: 0.092424 ms, Precision: _float32_ENN\n",
+ "Top-K: 5, NumCandidates: 200, Latency: 0.08805 ms, Precision: _float32_ENN\n",
+ "Top-K: 5, NumCandidates: 500, Latency: 0.087515 ms, Precision: _float32_ENN\n",
+ "Top-K: 5, NumCandidates: 1000, Latency: 0.090264 ms, Precision: _float32_ENN\n",
+ "Top-K: 5, NumCandidates: 2000, Latency: 0.084766 ms, Precision: _float32_ENN\n",
+ "Top-K: 5, NumCandidates: 5000, Latency: 0.140481 ms, Precision: _float32_ENN\n",
+ "Top-K: 5, NumCandidates: 10000, Latency: 0.084446 ms, Precision: _float32_ENN\n",
+ "Top-K: 10, NumCandidates: 25, Latency: 0.080338 ms, Precision: _float32_ENN\n",
+ "Top-K: 10, NumCandidates: 50, Latency: 0.08325 ms, Precision: _float32_ENN\n",
+ "Top-K: 10, NumCandidates: 100, Latency: 0.095388 ms, Precision: _float32_ENN\n",
+ "Top-K: 10, NumCandidates: 200, Latency: 0.087154 ms, Precision: _float32_ENN\n",
+ "Top-K: 10, NumCandidates: 500, Latency: 0.084021 ms, Precision: _float32_ENN\n",
+ "Top-K: 10, NumCandidates: 1000, Latency: 0.077539 ms, Precision: _float32_ENN\n",
+ "Top-K: 10, NumCandidates: 2000, Latency: 0.076152 ms, Precision: _float32_ENN\n",
+ "Top-K: 10, NumCandidates: 5000, Latency: 0.083395 ms, Precision: _float32_ENN\n",
+ "Top-K: 10, NumCandidates: 10000, Latency: 0.08718 ms, Precision: _float32_ENN\n",
+ "Top-K: 50, NumCandidates: 50, Latency: 0.0806 ms, Precision: _float32_ENN\n",
+ "Top-K: 50, NumCandidates: 100, Latency: 0.080024 ms, Precision: _float32_ENN\n",
+ "Top-K: 50, NumCandidates: 200, Latency: 0.077087 ms, Precision: _float32_ENN\n",
+ "Top-K: 50, NumCandidates: 500, Latency: 0.078435 ms, Precision: _float32_ENN\n",
+ "Top-K: 50, NumCandidates: 1000, Latency: 0.08603 ms, Precision: _float32_ENN\n",
+ "Top-K: 50, NumCandidates: 2000, Latency: 0.089214 ms, Precision: _float32_ENN\n",
+ "Top-K: 50, NumCandidates: 5000, Latency: 0.077529 ms, Precision: _float32_ENN\n",
+ "Top-K: 50, NumCandidates: 10000, Latency: 0.145385 ms, Precision: _float32_ENN\n",
+ "Top-K: 100, NumCandidates: 100, Latency: 0.0853 ms, Precision: _float32_ENN\n",
+ "Top-K: 100, NumCandidates: 200, Latency: 0.090102 ms, Precision: _float32_ENN\n",
+ "Top-K: 100, NumCandidates: 500, Latency: 0.088583 ms, Precision: _float32_ENN\n",
+ "Top-K: 100, NumCandidates: 1000, Latency: 0.085062 ms, Precision: _float32_ENN\n",
+ "Top-K: 100, NumCandidates: 2000, Latency: 0.079546 ms, Precision: _float32_ENN\n",
+ "Top-K: 100, NumCandidates: 5000, Latency: 0.122299 ms, Precision: _float32_ENN\n",
+ "Top-K: 100, NumCandidates: 10000, Latency: 0.081493 ms, Precision: _float32_ENN\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Run the measurements\n",
+ "user_query = \"How do I increase my productivity for maximum output\"\n",
+ "top_k_values = [5, 10, 50, 100]\n",
+ "num_candidates_values = [25, 50, 100, 200, 500, 1000, 2000, 5000, 10000]\n",
+ "\n",
+ "latency_results = []\n",
+ "\n",
+ "for vector_search_index in vector_search_indices:\n",
+ " latency_results.append(\n",
+ " measure_latency_with_varying_topk(\n",
+ " user_query,\n",
+ " wiki_data_collection,\n",
+ " vector_search_index_name=vector_search_index,\n",
+ " use_full_precision=False,\n",
+ " top_k_values=top_k_values,\n",
+ " num_candidates_values=num_candidates_values,\n",
+ " )\n",
+ " )\n",
+ "\n",
+ "# Conduct vector search opreation using full precision\n",
+ "latency_results.append(\n",
+ " measure_latency_with_varying_topk(\n",
+ " user_query,\n",
+ " wiki_data_collection,\n",
+ " vector_search_index_name=\"vector_index_scalar_quantized\",\n",
+ " use_full_precision=True,\n",
+ " top_k_values=top_k_values,\n",
+ " num_candidates_values=num_candidates_values,\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "# Combine all results into a single DataFrame\n",
+ "all_latency_results = pd.concat([pd.DataFrame(latency_results)])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "collapsed": true,
+ "executionInfo": {
+ "elapsed": 1625,
+ "status": "ok",
+ "timestamp": 1756744745265,
+ "user": {
+ "displayName": "Javier Armendariz",
+ "userId": "14823381200026660254"
+ },
+ "user_tz": 360
+ },
+ "id": "2yddp_qBTy1X",
+ "outputId": "f8a5021a-0538-4694-9084-de9bb4e26b56"
+ },
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "# Map your precision field to the labels and colors you want in the legend\n",
+ "precision_label_map = {\n",
+ " \"_scalar_\": \"scalar\",\n",
+ " \"_binary_\": \"binary\",\n",
+ " \"_float32_ann\": \"float32_ann\",\n",
+ " \"_float32_ENN\": \"float32_ENN\",\n",
+ "}\n",
+ "\n",
+ "precision_color_map = {\n",
+ " \"_scalar_\": \"orange\",\n",
+ " \"_binary_\": \"red\",\n",
+ " \"_float32_ann\": \"blue\",\n",
+ " \"_float32_ENN\": \"purple\",\n",
+ "}\n",
+ "\n",
+ "# Assume latency_results is a list of lists. Each inner list corresponds to one precision type.\n",
+ "# Each dictionary in an inner list has keys: 'precision', 'top_k', 'num_candidates', 'latency_ms'\n",
+ "\n",
+ "# Flatten all measurements and find the unique top_k values\n",
+ "all_measurements = [m for precision_list in latency_results for m in precision_list]\n",
+ "unique_topk = sorted(set(m[\"top_k\"] for m in all_measurements))\n",
+ "\n",
+ "# For each top_k, create a separate plot\n",
+ "for k in unique_topk:\n",
+ " plt.figure(figsize=(10, 6))\n",
+ "\n",
+ " # For each precision type, filter out measurements for the current top_k value\n",
+ " for measurements in latency_results:\n",
+ " # Filter measurements with top_k equal to the current k\n",
+ " filtered = [m for m in measurements if m[\"top_k\"] == k]\n",
+ " if not filtered:\n",
+ " continue\n",
+ "\n",
+ " # Extract x (num_candidates) and y (latency) values\n",
+ " x = [m[\"num_candidates\"] for m in filtered]\n",
+ " y = [m[\"latency_ms\"] for m in filtered]\n",
+ "\n",
+ " # Determine the precision, label, and color from the first measurement in this filtered list\n",
+ " precision = filtered[0][\"precision\"]\n",
+ " label = precision_label_map.get(precision, precision)\n",
+ " color = precision_color_map.get(precision, \"blue\")\n",
+ "\n",
+ " # Plot the line for this precision type\n",
+ " plt.plot(x, y, marker=\"o\", color=color, label=label)\n",
+ "\n",
+ " # Label axes and add title including the top_k value\n",
+ " plt.xlabel(\"Number of Candidates\")\n",
+ " plt.ylabel(\"Latency (ms)\")\n",
+ " plt.title(f\"Search Latency vs Num Candidates for Top-K = {k}\")\n",
+ "\n",
+ " # Add a legend and grid, then show the plot\n",
+ " plt.legend()\n",
+ " plt.grid(True)\n",
+ " plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {
+ "executionInfo": {
+ "elapsed": 48,
+ "status": "ok",
+ "timestamp": 1756744779009,
+ "user": {
+ "displayName": "Javier Armendariz",
+ "userId": "14823381200026660254"
+ },
+ "user_tz": 360
+ },
+ "id": "-yRDptxNXtUQ"
+ },
+ "outputs": [],
+ "source": [
+ "def measure_representational_capacity_retention_against_float_enn(\n",
+ " ground_truth_collection,\n",
+ " collection,\n",
+ " quantized_index_name, # This is used for both the quantized search and (with use_full_precision=True) for the baseline.\n",
+ " top_k_values, # List/array of top-k values to test.\n",
+ " num_candidates_values, # List/array of num_candidates values to test.\n",
+ " num_queries_to_test=1,\n",
+ "):\n",
+ " retention_results = {\"per_query_retention\": {}}\n",
+ " overall_retention = {} # overall_retention[top_k][num_candidates] = [list of retention values]\n",
+ "\n",
+ " # Initialize overall retention structure\n",
+ " for top_k in top_k_values:\n",
+ " overall_retention[top_k] = {}\n",
+ " for num_candidates in num_candidates_values:\n",
+ " if num_candidates < top_k:\n",
+ " continue\n",
+ " overall_retention[top_k][num_candidates] = []\n",
+ "\n",
+ " # Extract and store the precision name from the quantized index name.\n",
+ " precision_name = quantized_index_name.split(\"vector_index\")[1]\n",
+ " precision_name = precision_name.replace(\"quantized\", \"\").capitalize()\n",
+ " retention_results[\"precision_name\"] = precision_name\n",
+ " retention_results[\"top_k_values\"] = top_k_values\n",
+ " retention_results[\"num_candidates_values\"] = num_candidates_values\n",
+ "\n",
+ " # Load ground truth annotations\n",
+ " ground_truth_annotations = list(\n",
+ " ground_truth_collection.find().limit(num_queries_to_test)\n",
+ " )\n",
+ " print(f\"Loaded {len(ground_truth_annotations)} ground truth annotations\")\n",
+ "\n",
+ " # Process each ground truth annotation\n",
+ " for annotation in ground_truth_annotations:\n",
+ " # Use the ground truth wiki_id from the annotation.\n",
+ " ground_truth_wiki_id = annotation[\"wiki_id\"]\n",
+ "\n",
+ " # Process only queries that are questions.\n",
+ " for query_type, queries in annotation[\"queries\"].items():\n",
+ " if query_type.lower() not in [\"question\", \"questions\"]:\n",
+ " continue\n",
+ "\n",
+ " for query in queries:\n",
+ " # Prepare nested dict for this query\n",
+ " if query not in retention_results[\"per_query_retention\"]:\n",
+ " retention_results[\"per_query_retention\"][query] = {}\n",
+ "\n",
+ " # For each valid combination of top_k and num_candidates\n",
+ " for top_k in top_k_values:\n",
+ " if top_k not in retention_results[\"per_query_retention\"][query]:\n",
+ " retention_results[\"per_query_retention\"][query][top_k] = {}\n",
+ " for num_candidates in num_candidates_values:\n",
+ " if num_candidates < top_k:\n",
+ " continue\n",
+ "\n",
+ " # Baseline search: full precision using ENN (Float32)\n",
+ " baseline_result = custom_vector_search(\n",
+ " user_query=query,\n",
+ " collection=collection,\n",
+ " embedding_path=\"embedding\",\n",
+ " vector_search_index_name=quantized_index_name,\n",
+ " top_k=top_k,\n",
+ " num_candidates=num_candidates,\n",
+ " use_full_precision=True,\n",
+ " )\n",
+ " baseline_ids = {\n",
+ " res[\"wiki_id\"] for res in baseline_result[\"results\"]\n",
+ " }\n",
+ "\n",
+ " # Quantized search:\n",
+ " quantized_result = custom_vector_search(\n",
+ " user_query=query,\n",
+ " collection=collection,\n",
+ " embedding_path=\"embedding\",\n",
+ " vector_search_index_name=quantized_index_name,\n",
+ " top_k=top_k,\n",
+ " num_candidates=num_candidates,\n",
+ " use_full_precision=False,\n",
+ " )\n",
+ " quantized_ids = {\n",
+ " res[\"wiki_id\"] for res in quantized_result[\"results\"]\n",
+ " }\n",
+ "\n",
+ " # Compute retention for this combination\n",
+ " if baseline_ids:\n",
+ " retention = len(\n",
+ " baseline_ids.intersection(quantized_ids)\n",
+ " ) / len(baseline_ids)\n",
+ " else:\n",
+ " retention = 0\n",
+ "\n",
+ " # Store the results per query\n",
+ " retention_results[\"per_query_retention\"][query].setdefault(\n",
+ " top_k, {}\n",
+ " )[num_candidates] = {\n",
+ " \"ground_truth_wiki_id\": ground_truth_wiki_id,\n",
+ " \"baseline_ids\": sorted(baseline_ids),\n",
+ " \"quantized_ids\": sorted(quantized_ids),\n",
+ " \"retention\": retention,\n",
+ " }\n",
+ " overall_retention[top_k][num_candidates].append(retention)\n",
+ "\n",
+ " print(\n",
+ " f\"Query: '{query}' | top_k: {top_k}, num_candidates: {num_candidates}\"\n",
+ " )\n",
+ " print(f\" Ground Truth wiki_id: {ground_truth_wiki_id}\")\n",
+ " print(f\" Baseline IDs (Float32): {sorted(baseline_ids)}\")\n",
+ " print(\n",
+ " f\" Quantized IDs: {precision_name}: {sorted(quantized_ids)}\"\n",
+ " )\n",
+ " print(f\" Retention: {retention:.4f}\\n\")\n",
+ "\n",
+ " # Compute overall average retention per combination\n",
+ " avg_overall_retention = {}\n",
+ " for top_k, cand_dict in overall_retention.items():\n",
+ " avg_overall_retention[top_k] = {}\n",
+ " for num_candidates, retentions in cand_dict.items():\n",
+ " if retentions:\n",
+ " avg = sum(retentions) / len(retentions)\n",
+ " else:\n",
+ " avg = 0\n",
+ " avg_overall_retention[top_k][num_candidates] = avg\n",
+ " print(\n",
+ " f\"Overall Average Retention for top_k {top_k}, num_candidates {num_candidates}: {avg:.4f}\"\n",
+ " )\n",
+ "\n",
+ " retention_results[\"average_retention\"] = avg_overall_retention\n",
+ " return retention_results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "collapsed": true,
+ "executionInfo": {
+ "elapsed": 27232,
+ "status": "ok",
+ "timestamp": 1756744812157,
+ "user": {
+ "displayName": "Javier Armendariz",
+ "userId": "14823381200026660254"
+ },
+ "user_tz": 360
+ },
+ "id": "URwqBqgGZGIq",
+ "outputId": "f0c15f19-243b-42a2-ba40-ca44b743606f"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Loaded 1 ground truth annotations\n",
+ "Query: 'What happened in 2022?' | top_k: 5, num_candidates: 25\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _float32_ann: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 5, num_candidates: 50\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _float32_ann: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 5, num_candidates: 100\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _float32_ann: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 5, num_candidates: 200\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _float32_ann: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 5, num_candidates: 500\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _float32_ann: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 5, num_candidates: 1000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _float32_ann: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 5, num_candidates: 5000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _float32_ann: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 10, num_candidates: 25\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _float32_ann: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 10, num_candidates: 50\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _float32_ann: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 10, num_candidates: 100\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _float32_ann: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 10, num_candidates: 200\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _float32_ann: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 10, num_candidates: 500\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _float32_ann: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 10, num_candidates: 1000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _float32_ann: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 10, num_candidates: 5000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _float32_ann: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 50, num_candidates: 50\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 3524766, 17742072, 70149799]\n",
+ " Quantized IDs: _float32_ann: [25391, 3434750, 3524766, 17742072, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 50, num_candidates: 100\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 3524766, 17742072, 70149799]\n",
+ " Quantized IDs: _float32_ann: [25391, 3434750, 3524766, 17742072, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 50, num_candidates: 200\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 3524766, 17742072, 70149799]\n",
+ " Quantized IDs: _float32_ann: [25391, 3524766, 17742072, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 50, num_candidates: 500\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 3524766, 17742072, 70149799]\n",
+ " Quantized IDs: _float32_ann: [25391, 3524766, 17742072, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 50, num_candidates: 1000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 3524766, 17742072, 70149799]\n",
+ " Quantized IDs: _float32_ann: [25391, 3524766, 17742072, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 50, num_candidates: 5000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 3524766, 17742072, 70149799]\n",
+ " Quantized IDs: _float32_ann: [25391, 3524766, 17742072, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 100, num_candidates: 100\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Quantized IDs: _float32_ann: [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 100, num_candidates: 200\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Quantized IDs: _float32_ann: [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 100, num_candidates: 500\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Quantized IDs: _float32_ann: [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 100, num_candidates: 1000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Quantized IDs: _float32_ann: [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 100, num_candidates: 5000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Quantized IDs: _float32_ann: [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Overall Average Retention for top_k 5, num_candidates 25: 1.0000\n",
+ "Overall Average Retention for top_k 5, num_candidates 50: 1.0000\n",
+ "Overall Average Retention for top_k 5, num_candidates 100: 1.0000\n",
+ "Overall Average Retention for top_k 5, num_candidates 200: 1.0000\n",
+ "Overall Average Retention for top_k 5, num_candidates 500: 1.0000\n",
+ "Overall Average Retention for top_k 5, num_candidates 1000: 1.0000\n",
+ "Overall Average Retention for top_k 5, num_candidates 5000: 1.0000\n",
+ "Overall Average Retention for top_k 10, num_candidates 25: 1.0000\n",
+ "Overall Average Retention for top_k 10, num_candidates 50: 1.0000\n",
+ "Overall Average Retention for top_k 10, num_candidates 100: 1.0000\n",
+ "Overall Average Retention for top_k 10, num_candidates 200: 1.0000\n",
+ "Overall Average Retention for top_k 10, num_candidates 500: 1.0000\n",
+ "Overall Average Retention for top_k 10, num_candidates 1000: 1.0000\n",
+ "Overall Average Retention for top_k 10, num_candidates 5000: 1.0000\n",
+ "Overall Average Retention for top_k 50, num_candidates 50: 1.0000\n",
+ "Overall Average Retention for top_k 50, num_candidates 100: 1.0000\n",
+ "Overall Average Retention for top_k 50, num_candidates 200: 1.0000\n",
+ "Overall Average Retention for top_k 50, num_candidates 500: 1.0000\n",
+ "Overall Average Retention for top_k 50, num_candidates 1000: 1.0000\n",
+ "Overall Average Retention for top_k 50, num_candidates 5000: 1.0000\n",
+ "Overall Average Retention for top_k 100, num_candidates 100: 1.0000\n",
+ "Overall Average Retention for top_k 100, num_candidates 200: 1.0000\n",
+ "Overall Average Retention for top_k 100, num_candidates 500: 1.0000\n",
+ "Overall Average Retention for top_k 100, num_candidates 1000: 1.0000\n",
+ "Overall Average Retention for top_k 100, num_candidates 5000: 1.0000\n",
+ "Loaded 1 ground truth annotations\n",
+ "Query: 'What happened in 2022?' | top_k: 5, num_candidates: 25\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _scalar_: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 5, num_candidates: 50\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _scalar_: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 5, num_candidates: 100\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _scalar_: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 5, num_candidates: 200\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _scalar_: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 5, num_candidates: 500\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _scalar_: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 5, num_candidates: 1000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _scalar_: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 5, num_candidates: 5000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _scalar_: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 10, num_candidates: 25\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _scalar_: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 10, num_candidates: 50\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _scalar_: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 10, num_candidates: 100\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _scalar_: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 10, num_candidates: 200\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _scalar_: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 10, num_candidates: 500\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _scalar_: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 10, num_candidates: 1000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _scalar_: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 10, num_candidates: 5000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _scalar_: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 50, num_candidates: 50\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 3524766, 17742072, 70149799]\n",
+ " Quantized IDs: _scalar_: [25391, 3524766, 17742072, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 50, num_candidates: 100\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 3524766, 17742072, 70149799]\n",
+ " Quantized IDs: _scalar_: [25391, 3524766, 17742072, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 50, num_candidates: 200\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 3524766, 17742072, 70149799]\n",
+ " Quantized IDs: _scalar_: [25391, 3524766, 17742072, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 50, num_candidates: 500\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 3524766, 17742072, 70149799]\n",
+ " Quantized IDs: _scalar_: [25391, 3524766, 17742072, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 50, num_candidates: 1000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 3524766, 17742072, 70149799]\n",
+ " Quantized IDs: _scalar_: [25391, 3524766, 17742072, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 50, num_candidates: 5000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 3524766, 17742072, 70149799]\n",
+ " Quantized IDs: _scalar_: [25391, 3524766, 17742072, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 100, num_candidates: 100\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Quantized IDs: _scalar_: [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 100, num_candidates: 200\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Quantized IDs: _scalar_: [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 100, num_candidates: 500\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Quantized IDs: _scalar_: [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 100, num_candidates: 1000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Quantized IDs: _scalar_: [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 100, num_candidates: 5000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Quantized IDs: _scalar_: [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Overall Average Retention for top_k 5, num_candidates 25: 1.0000\n",
+ "Overall Average Retention for top_k 5, num_candidates 50: 1.0000\n",
+ "Overall Average Retention for top_k 5, num_candidates 100: 1.0000\n",
+ "Overall Average Retention for top_k 5, num_candidates 200: 1.0000\n",
+ "Overall Average Retention for top_k 5, num_candidates 500: 1.0000\n",
+ "Overall Average Retention for top_k 5, num_candidates 1000: 1.0000\n",
+ "Overall Average Retention for top_k 5, num_candidates 5000: 1.0000\n",
+ "Overall Average Retention for top_k 10, num_candidates 25: 1.0000\n",
+ "Overall Average Retention for top_k 10, num_candidates 50: 1.0000\n",
+ "Overall Average Retention for top_k 10, num_candidates 100: 1.0000\n",
+ "Overall Average Retention for top_k 10, num_candidates 200: 1.0000\n",
+ "Overall Average Retention for top_k 10, num_candidates 500: 1.0000\n",
+ "Overall Average Retention for top_k 10, num_candidates 1000: 1.0000\n",
+ "Overall Average Retention for top_k 10, num_candidates 5000: 1.0000\n",
+ "Overall Average Retention for top_k 50, num_candidates 50: 1.0000\n",
+ "Overall Average Retention for top_k 50, num_candidates 100: 1.0000\n",
+ "Overall Average Retention for top_k 50, num_candidates 200: 1.0000\n",
+ "Overall Average Retention for top_k 50, num_candidates 500: 1.0000\n",
+ "Overall Average Retention for top_k 50, num_candidates 1000: 1.0000\n",
+ "Overall Average Retention for top_k 50, num_candidates 5000: 1.0000\n",
+ "Overall Average Retention for top_k 100, num_candidates 100: 1.0000\n",
+ "Overall Average Retention for top_k 100, num_candidates 200: 1.0000\n",
+ "Overall Average Retention for top_k 100, num_candidates 500: 1.0000\n",
+ "Overall Average Retention for top_k 100, num_candidates 1000: 1.0000\n",
+ "Overall Average Retention for top_k 100, num_candidates 5000: 1.0000\n",
+ "Loaded 1 ground truth annotations\n",
+ "Query: 'What happened in 2022?' | top_k: 5, num_candidates: 25\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _binary_: [25391, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 5, num_candidates: 50\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _binary_: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 5, num_candidates: 100\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _binary_: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 5, num_candidates: 200\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _binary_: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 5, num_candidates: 500\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _binary_: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 5, num_candidates: 1000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _binary_: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 5, num_candidates: 5000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _binary_: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 10, num_candidates: 25\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _binary_: [25391, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 10, num_candidates: 50\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _binary_: [25391, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 10, num_candidates: 100\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _binary_: [25391, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 10, num_candidates: 200\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _binary_: [25391, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 10, num_candidates: 500\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _binary_: [25391, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 10, num_candidates: 1000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _binary_: [25391, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 10, num_candidates: 5000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [70149799]\n",
+ " Quantized IDs: _binary_: [70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 50, num_candidates: 50\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 3524766, 17742072, 70149799]\n",
+ " Quantized IDs: _binary_: [25391, 31750, 3434750, 3524766, 12153654, 31591547, 42085878, 70149799]\n",
+ " Retention: 0.7500\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 50, num_candidates: 100\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 3524766, 17742072, 70149799]\n",
+ " Quantized IDs: _binary_: [25391, 3434750, 3524766, 17742072, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 50, num_candidates: 200\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 3524766, 17742072, 70149799]\n",
+ " Quantized IDs: _binary_: [25391, 3434750, 3524766, 17742072, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 50, num_candidates: 500\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 3434750, 3524766, 17742072, 70149799]\n",
+ " Quantized IDs: _binary_: [25391, 3434750, 3524766, 17742072, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 50, num_candidates: 1000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 3524766, 17742072, 70149799]\n",
+ " Quantized IDs: _binary_: [25391, 3524766, 17742072, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 50, num_candidates: 5000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 3524766, 17742072, 70149799]\n",
+ " Quantized IDs: _binary_: [25391, 3524766, 17742072, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 100, num_candidates: 100\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Quantized IDs: _binary_: [25391, 31750, 3434750, 3524766, 12153654, 17742072, 31591547, 42085878, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 100, num_candidates: 200\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Quantized IDs: _binary_: [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 100, num_candidates: 500\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Quantized IDs: _binary_: [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 100, num_candidates: 1000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Quantized IDs: _binary_: [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Query: 'What happened in 2022?' | top_k: 100, num_candidates: 5000\n",
+ " Ground Truth wiki_id: 69407798\n",
+ " Baseline IDs (Float32): [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Quantized IDs: _binary_: [25391, 31750, 3434750, 3524766, 17742072, 31591547, 42085878, 70149799]\n",
+ " Retention: 1.0000\n",
+ "\n",
+ "Overall Average Retention for top_k 5, num_candidates 25: 1.0000\n",
+ "Overall Average Retention for top_k 5, num_candidates 50: 1.0000\n",
+ "Overall Average Retention for top_k 5, num_candidates 100: 1.0000\n",
+ "Overall Average Retention for top_k 5, num_candidates 200: 1.0000\n",
+ "Overall Average Retention for top_k 5, num_candidates 500: 1.0000\n",
+ "Overall Average Retention for top_k 5, num_candidates 1000: 1.0000\n",
+ "Overall Average Retention for top_k 5, num_candidates 5000: 1.0000\n",
+ "Overall Average Retention for top_k 10, num_candidates 25: 1.0000\n",
+ "Overall Average Retention for top_k 10, num_candidates 50: 1.0000\n",
+ "Overall Average Retention for top_k 10, num_candidates 100: 1.0000\n",
+ "Overall Average Retention for top_k 10, num_candidates 200: 1.0000\n",
+ "Overall Average Retention for top_k 10, num_candidates 500: 1.0000\n",
+ "Overall Average Retention for top_k 10, num_candidates 1000: 1.0000\n",
+ "Overall Average Retention for top_k 10, num_candidates 5000: 1.0000\n",
+ "Overall Average Retention for top_k 50, num_candidates 50: 0.7500\n",
+ "Overall Average Retention for top_k 50, num_candidates 100: 1.0000\n",
+ "Overall Average Retention for top_k 50, num_candidates 200: 1.0000\n",
+ "Overall Average Retention for top_k 50, num_candidates 500: 1.0000\n",
+ "Overall Average Retention for top_k 50, num_candidates 1000: 1.0000\n",
+ "Overall Average Retention for top_k 50, num_candidates 5000: 1.0000\n",
+ "Overall Average Retention for top_k 100, num_candidates 100: 1.0000\n",
+ "Overall Average Retention for top_k 100, num_candidates 200: 1.0000\n",
+ "Overall Average Retention for top_k 100, num_candidates 500: 1.0000\n",
+ "Overall Average Retention for top_k 100, num_candidates 1000: 1.0000\n",
+ "Overall Average Retention for top_k 100, num_candidates 5000: 1.0000\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Access the database\n",
+ "DB_NAME = \"testing_datasets\"\n",
+ "db = mongo_client[DB_NAME]\n",
+ "\n",
+ "# Access collections\n",
+ "wiki_data_collection = db[\"wikipedia-22-12-en\"]\n",
+ "ground_truth_collection = db[\"wikipedia-22-12-en-annotation\"]\n",
+ "\n",
+ "overall_recall_results = []\n",
+ "top_k_values = [5, 10, 50, 100]\n",
+ "num_candidates_values = [25, 50, 100, 200, 500, 1000, 5000]\n",
+ "num_queries_to_test = 1\n",
+ "\n",
+ "for vector_search_index in vector_search_indices:\n",
+ " overall_recall_results.append(\n",
+ " measure_representational_capacity_retention_against_float_enn(\n",
+ " ground_truth_collection=ground_truth_collection,\n",
+ " collection=wiki_data_collection,\n",
+ " quantized_index_name=vector_search_index,\n",
+ " top_k_values=top_k_values,\n",
+ " num_candidates_values=num_candidates_values,\n",
+ " num_queries_to_test=num_queries_to_test,\n",
+ " )\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "executionInfo": {
+ "elapsed": 735,
+ "status": "ok",
+ "timestamp": 1756744825032,
+ "user": {
+ "displayName": "Javier Armendariz",
+ "userId": "14823381200026660254"
+ },
+ "user_tz": 360
+ },
+ "id": "QBVrxbA0aQEg",
+ "outputId": "8b92d723-ddf1-433f-f2f4-10bf1b83c5e5"
+ },
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Detailed Average Retention Results:\n",
+ "\n",
+ "_float32_ann Embedding:\n",
+ "\n",
+ "Top-K: 5\n",
+ " NumCandidates: 25, Retention: 1.0000\n",
+ " NumCandidates: 50, Retention: 1.0000\n",
+ " NumCandidates: 100, Retention: 1.0000\n",
+ " NumCandidates: 200, Retention: 1.0000\n",
+ " NumCandidates: 500, Retention: 1.0000\n",
+ " NumCandidates: 1000, Retention: 1.0000\n",
+ " NumCandidates: 5000, Retention: 1.0000\n",
+ "\n",
+ "Top-K: 10\n",
+ " NumCandidates: 25, Retention: 1.0000\n",
+ " NumCandidates: 50, Retention: 1.0000\n",
+ " NumCandidates: 100, Retention: 1.0000\n",
+ " NumCandidates: 200, Retention: 1.0000\n",
+ " NumCandidates: 500, Retention: 1.0000\n",
+ " NumCandidates: 1000, Retention: 1.0000\n",
+ " NumCandidates: 5000, Retention: 1.0000\n",
+ "\n",
+ "Top-K: 50\n",
+ " NumCandidates: 50, Retention: 1.0000\n",
+ " NumCandidates: 100, Retention: 1.0000\n",
+ " NumCandidates: 200, Retention: 1.0000\n",
+ " NumCandidates: 500, Retention: 1.0000\n",
+ " NumCandidates: 1000, Retention: 1.0000\n",
+ " NumCandidates: 5000, Retention: 1.0000\n",
+ "\n",
+ "Top-K: 100\n",
+ " NumCandidates: 100, Retention: 1.0000\n",
+ " NumCandidates: 200, Retention: 1.0000\n",
+ " NumCandidates: 500, Retention: 1.0000\n",
+ " NumCandidates: 1000, Retention: 1.0000\n",
+ " NumCandidates: 5000, Retention: 1.0000\n",
+ "\n",
+ "_scalar_ Embedding:\n",
+ "\n",
+ "Top-K: 5\n",
+ " NumCandidates: 25, Retention: 1.0000\n",
+ " NumCandidates: 50, Retention: 1.0000\n",
+ " NumCandidates: 100, Retention: 1.0000\n",
+ " NumCandidates: 200, Retention: 1.0000\n",
+ " NumCandidates: 500, Retention: 1.0000\n",
+ " NumCandidates: 1000, Retention: 1.0000\n",
+ " NumCandidates: 5000, Retention: 1.0000\n",
+ "\n",
+ "Top-K: 10\n",
+ " NumCandidates: 25, Retention: 1.0000\n",
+ " NumCandidates: 50, Retention: 1.0000\n",
+ " NumCandidates: 100, Retention: 1.0000\n",
+ " NumCandidates: 200, Retention: 1.0000\n",
+ " NumCandidates: 500, Retention: 1.0000\n",
+ " NumCandidates: 1000, Retention: 1.0000\n",
+ " NumCandidates: 5000, Retention: 1.0000\n",
+ "\n",
+ "Top-K: 50\n",
+ " NumCandidates: 50, Retention: 1.0000\n",
+ " NumCandidates: 100, Retention: 1.0000\n",
+ " NumCandidates: 200, Retention: 1.0000\n",
+ " NumCandidates: 500, Retention: 1.0000\n",
+ " NumCandidates: 1000, Retention: 1.0000\n",
+ " NumCandidates: 5000, Retention: 1.0000\n",
+ "\n",
+ "Top-K: 100\n",
+ " NumCandidates: 100, Retention: 1.0000\n",
+ " NumCandidates: 200, Retention: 1.0000\n",
+ " NumCandidates: 500, Retention: 1.0000\n",
+ " NumCandidates: 1000, Retention: 1.0000\n",
+ " NumCandidates: 5000, Retention: 1.0000\n",
+ "\n",
+ "_binary_ Embedding:\n",
+ "\n",
+ "Top-K: 5\n",
+ " NumCandidates: 25, Retention: 1.0000\n",
+ " NumCandidates: 50, Retention: 1.0000\n",
+ " NumCandidates: 100, Retention: 1.0000\n",
+ " NumCandidates: 200, Retention: 1.0000\n",
+ " NumCandidates: 500, Retention: 1.0000\n",
+ " NumCandidates: 1000, Retention: 1.0000\n",
+ " NumCandidates: 5000, Retention: 1.0000\n",
+ "\n",
+ "Top-K: 10\n",
+ " NumCandidates: 25, Retention: 1.0000\n",
+ " NumCandidates: 50, Retention: 1.0000\n",
+ " NumCandidates: 100, Retention: 1.0000\n",
+ " NumCandidates: 200, Retention: 1.0000\n",
+ " NumCandidates: 500, Retention: 1.0000\n",
+ " NumCandidates: 1000, Retention: 1.0000\n",
+ " NumCandidates: 5000, Retention: 1.0000\n",
+ "\n",
+ "Top-K: 50\n",
+ " NumCandidates: 50, Retention: 0.7500\n",
+ " NumCandidates: 100, Retention: 1.0000\n",
+ " NumCandidates: 200, Retention: 1.0000\n",
+ " NumCandidates: 500, Retention: 1.0000\n",
+ " NumCandidates: 1000, Retention: 1.0000\n",
+ " NumCandidates: 5000, Retention: 1.0000\n",
+ "\n",
+ "Top-K: 100\n",
+ " NumCandidates: 100, Retention: 1.0000\n",
+ " NumCandidates: 200, Retention: 1.0000\n",
+ " NumCandidates: 500, Retention: 1.0000\n",
+ " NumCandidates: 1000, Retention: 1.0000\n",
+ " NumCandidates: 5000, Retention: 1.0000\n"
+ ]
+ }
+ ],
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "# Define colors and labels for each precision type\n",
+ "precision_colors = {\"_scalar_\": \"orange\", \"_binary_\": \"red\", \"_float32_\": \"green\"}\n",
+ "\n",
+ "# Assume overall_retention_results is a list of dictionaries returned by your\n",
+ "# measure_representational_capacity_retention_against_float_enn function.\n",
+ "# Each dictionary should contain:\n",
+ "# - 'precision_name': the precision type (e.g., '_scalar_')\n",
+ "# - 'average_retention': a dict mapping each top_k to a dict mapping num_candidates\n",
+ "# to the average retention, e.g.,\n",
+ "# average_retention[top_k][num_candidates] = retention_value\n",
+ "\n",
+ "if overall_recall_results:\n",
+ " # Determine unique top_k values from the first result's average_retention keys\n",
+ " unique_topk = sorted(list(overall_recall_results[0][\"average_retention\"].keys()))\n",
+ "\n",
+ " for k in unique_topk:\n",
+ " plt.figure(figsize=(10, 6))\n",
+ " # For each precision type, plot retention vs. number of candidates at this top_k\n",
+ " for result in overall_recall_results:\n",
+ " precision_name = result.get(\"precision_name\", \"unknown\")\n",
+ " color = precision_colors.get(precision_name, \"blue\")\n",
+ " # Get candidate values from the average_retention dictionary for top_k k\n",
+ " candidate_values = sorted(result[\"average_retention\"][k].keys())\n",
+ " retention_values = [\n",
+ " result[\"average_retention\"][k][nc] for nc in candidate_values\n",
+ " ]\n",
+ "\n",
+ " plt.plot(\n",
+ " candidate_values,\n",
+ " retention_values,\n",
+ " marker=\"o\",\n",
+ " label=precision_name.strip(\"_\"),\n",
+ " color=color,\n",
+ " )\n",
+ "\n",
+ " plt.xlabel(\"Number of Candidates\")\n",
+ " plt.ylabel(\"Retention Score\")\n",
+ " plt.title(f\"Retention vs Number of Candidates for Top-K = {k}\")\n",
+ " plt.legend()\n",
+ " plt.grid(True)\n",
+ " plt.show()\n",
+ "\n",
+ " # Print detailed average retention results\n",
+ " print(\"\\nDetailed Average Retention Results:\")\n",
+ " for result in overall_recall_results:\n",
+ " precision_name = result.get(\"precision_name\", \"unknown\")\n",
+ " print(f\"\\n{precision_name} Embedding:\")\n",
+ " for k in sorted(result[\"average_retention\"].keys()):\n",
+ " print(f\"\\nTop-K: {k}\")\n",
+ " for nc in sorted(result[\"average_retention\"][k].keys()):\n",
+ " ret = result[\"average_retention\"][k][nc]\n",
+ " print(f\" NumCandidates: {nc}, Retention: {ret:.4f}\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": [
+ {
+ "file_id": "19v0-KnkMAf7gFvvW-prQUnTlJVrri96V",
+ "timestamp": 1756752603095
+ }
+ ]
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}