From f984d62c11888d6c07d1a5a6318cd69cf62961c0 Mon Sep 17 00:00:00 2001
From: Tyler Hutcherson <tyler.hutcherson@redis.com>
Date: Tue, 25 Mar 2025 08:48:38 -0400
Subject: [PATCH 01/11] very much wip

---
 docs/user_guide/data_validation.ipynb        | 1102 ++++++++++++++++++
 docs/validation.md                           |  228 ++++
 redisvl/index/index.py                       |   37 +-
 redisvl/index/storage.py                     |  322 ++---
 redisvl/schema/__init__.py                   |   34 +-
 redisvl/schema/fields.py                     |    4 +-
 redisvl/schema/schema.py                     |   62 +-
 redisvl/schema/type_utils.py                 |   63 +
 redisvl/schema/validation.py                 |  290 +++++
 tests/integration/test_async_search_index.py |    2 +-
 tests/integration/test_flow_async.py         |    2 +-
 tests/integration/test_search_index.py       |    2 +-
 tests/unit/conftest.py                       |  183 +++
 tests/unit/test_edge_cases.py                |  451 +++++++
 tests/unit/test_fields.py                    |   57 +
 tests/unit/test_storage.py                   |  560 ++++++++-
 tests/unit/test_validation.py                |  515 ++++++++
 17 files changed, 3658 insertions(+), 256 deletions(-)
 create mode 100644 docs/user_guide/data_validation.ipynb
 create mode 100644 docs/validation.md
 create mode 100644 redisvl/schema/type_utils.py
 create mode 100644 redisvl/schema/validation.py
 create mode 100644 tests/unit/conftest.py
 create mode 100644 tests/unit/test_edge_cases.py
 create mode 100644 tests/unit/test_validation.py

diff --git a/docs/user_guide/data_validation.ipynb b/docs/user_guide/data_validation.ipynb
new file mode 100644
index 00000000..366f47a4
--- /dev/null
+++ b/docs/user_guide/data_validation.ipynb
@@ -0,0 +1,1102 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Getting Started with RedisVL\n",
+    "`redisvl` is a versatile Python library with an integrated CLI, designed to enhance AI applications using Redis. This guide will walk you through the following steps:\n",
+    "\n",
+    "1. Defining an `IndexSchema`\n",
+    "2. Preparing a sample dataset\n",
+    "3. Creating a `SearchIndex` object\n",
+    "4. Testing `rvl` CLI functionality\n",
+    "5. Loading the sample data\n",
+    "6. Building `VectorQuery` objects and executing searches\n",
+    "7. Updating a `SearchIndex` object\n",
+    "\n",
+    "...and more!\n",
+    "\n",
+    "Prerequisites:\n",
+    "- Ensure `redisvl` is installed in your Python environment.\n",
+    "- Have a running instance of [Redis Stack](https://redis.io/docs/install/install-stack/) or [Redis Cloud](https://redis.io/cloud).\n",
+    "\n",
+    "_____"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Define an `IndexSchema`\n",
+    "\n",
+    "The `IndexSchema` maintains crucial **index configuration** and **field definitions** to\n",
+    "enable search with Redis. For ease of use, the schema can be constructed from a\n",
+    "python dictionary or yaml file.\n",
+    "\n",
+    "### Example Schema Creation\n",
+    "Consider a dataset with user information, including `job`, `age`, `credit_score`,\n",
+    "and a 3-dimensional `user_embedding` vector.\n",
+    "\n",
+    "You must also decide on a Redis index name and key prefix to use for this\n",
+    "dataset. Below are example schema definitions in both YAML and Dict format.\n",
+    "\n",
+    "**YAML Definition:**\n",
+    "\n",
+    "```yaml\n",
+    "version: '0.1.0'\n",
+    "\n",
+    "index:\n",
+    "  name: user_simple\n",
+    "  prefix: user_simple_docs\n",
+    "\n",
+    "fields:\n",
+    "    - name: user\n",
+    "      type: tag\n",
+    "    - name: credit_score\n",
+    "      type: tag\n",
+    "    - name: job\n",
+    "      type: text\n",
+    "    - name: age\n",
+    "      type: numeric\n",
+    "    - name: user_embedding\n",
+    "      type: vector\n",
+    "      attrs:\n",
+    "        algorithm: flat\n",
+    "        dims: 3\n",
+    "        distance_metric: cosine\n",
+    "        datatype: float32\n",
+    "```\n",
+    "> Store this in a local file, such as `schema.yaml`, for RedisVL usage."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Python Dictionary:**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "schema = {\n",
+    "    \"index\": {\n",
+    "        \"name\": \"user_simple\",\n",
+    "        \"prefix\": \"user_simple_docs\",\n",
+    "        \"storage_type\": \"json\"\n",
+    "    },\n",
+    "    \"fields\": [\n",
+    "        {\"name\": \"user\", \"type\": \"tag\"},\n",
+    "        {\"name\": \"credit_score\", \"type\": \"tag\"},\n",
+    "        {\"name\": \"job\", \"type\": \"text\"},\n",
+    "        {\"name\": \"age\", \"type\": \"numeric\"},\n",
+    "        {\"name\": \"location\", \"type\": \"geo\"},\n",
+    "        {\n",
+    "            \"name\": \"user_embedding\",\n",
+    "            \"type\": \"vector\",\n",
+    "            \"attrs\": {\n",
+    "                \"dims\": 3,\n",
+    "                \"distance_metric\": \"cosine\",\n",
+    "                \"algorithm\": \"flat\",\n",
+    "                \"datatype\": \"float32\"\n",
+    "            }\n",
+    "        }\n",
+    "    ]\n",
+    "}"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Sample Dataset Preparation\n",
+    "\n",
+    "Below, create a mock dataset with `user`, `job`, `age`, `credit_score`, and\n",
+    "`user_embedding` fields. The `user_embedding` vectors are synthetic examples\n",
+    "for demonstration purposes.\n",
+    "\n",
+    "For more information on creating real-world embeddings, refer to this\n",
+    "[article](https://mlops.community/vector-similarity-search-from-basics-to-production/)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "\n",
+    "data = [\n",
+    "    {\n",
+    "        'user': 'john',\n",
+    "        'age': 1,\n",
+    "        'job': 'engineer',\n",
+    "        'credit_score': 'high',\n",
+    "        'location': '37.540760,-77.433929',\n",
+    "        'user_embedding': np.array([0.1, 0.1, 0.5], dtype=np.float32).tobytes()\n",
+    "    },\n",
+    "    {\n",
+    "        'user': 'mary',\n",
+    "        'age': 2,\n",
+    "        'job': 'doctor',\n",
+    "        'credit_score': 'low',\n",
+    "        'location': '37.540760,-77.433929',\n",
+    "        'user_embedding': np.array([0.1, 0.1, 0.5], dtype=np.float32).tobytes()\n",
+    "    },\n",
+    "    {\n",
+    "        'user': 'joe',\n",
+    "        'age': 3,\n",
+    "        'job': 'dentist',\n",
+    "        'credit_score': 'medium',\n",
+    "        'location': '37.540760,-77.433929',\n",
+    "        'user_embedding': np.array([0.9, 0.9, 0.1], dtype=np.float32).tobytes()\n",
+    "    }\n",
+    "]"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    ">As seen above, the sample `user_embedding` vectors are converted into bytes. Using the `NumPy`, this is fairly trivial."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create a `SearchIndex`\n",
+    "\n",
+    "With the schema and sample dataset ready, instantiate a `SearchIndex`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from redisvl.index import SearchIndex\n",
+    "\n",
+    "index = SearchIndex.from_dict(schema)\n",
+    "# or use .from_yaml('schema_file.yaml')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we also need to facilitate a Redis connection. There are a few ways to do this:\n",
+    "\n",
+    "- Create & manage your own client connection (recommended)\n",
+    "- Provide a Redis URL and let RedisVL connect on your behalf (by default, it will connect to \"redis://localhost:6379\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Bring your own Redis connection instance\n",
+    "\n",
+    "This is ideal in scenarios where you have custom settings on the connection instance or if your application will share a connection pool:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from redis import Redis\n",
+    "\n",
+    "client = Redis.from_url(\"redis://localhost:6379\")\n",
+    "index = SearchIndex.from_dict(schema, redis_client=client)\n",
+    "\n",
+    "# alternatively, provide an async Redis client object to enable async index operations\n",
+    "# from redis.asyncio import Redis\n",
+    "# from redisvl.index import AsyncSearchIndex\n",
+    "# client = Redis.from_url(\"redis://localhost:6379\")\n",
+    "# index = AsyncSearchIndex.from_dict(schema, redis_client=client)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Let the index manage the connection instance\n",
+    "\n",
+    "This is ideal for simple cases:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "index = SearchIndex.from_dict(schema, redis_url=\"redis://localhost:6379\", validate_on_load=True)\n",
+    "\n",
+    "# If you don't specify a client or Redis URL, the index will attempt to\n",
+    "# connect to Redis at the default address (\"redis://localhost:6379\")."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create the underlying index\n",
+    "\n",
+    "Now that we are connected to Redis, we need to run the create command."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "16:42:16 redisvl.index.index INFO   Index already exists, overwriting.\n"
+     ]
+    }
+   ],
+   "source": [
+    "index.create(overwrite=True, drop=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    ">Note that at this point, the index has no entries. Data loading follows."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Inspect with the `rvl` CLI\n",
+    "Use the `rvl` CLI to inspect the created index and its fields:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[32m16:36:30\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m   Indices:\n",
+      "\u001b[32m16:36:30\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m   1. user_simple\n"
+     ]
+    }
+   ],
+   "source": [
+    "!rvl index listall"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "Index Information:\n",
+      "╭──────────────┬────────────────┬──────────────────────┬─────────────────┬────────────╮\n",
+      "│ Index Name   │ Storage Type   │ Prefixes             │ Index Options   │   Indexing │\n",
+      "├──────────────┼────────────────┼──────────────────────┼─────────────────┼────────────┤\n",
+      "│ user_simple  │ JSON           │ ['user_simple_docs'] │ []              │          0 │\n",
+      "╰──────────────┴────────────────┴──────────────────────┴─────────────────┴────────────╯\n",
+      "Index Fields:\n",
+      "╭──────────────────┬────────────────┬─────────┬────────────────┬────────────────┬────────────────┬────────────────┬────────────────┬────────────────┬─────────────────┬────────────────╮\n",
+      "│ Name             │ Attribute      │ Type    │ Field Option   │ Option Value   │ Field Option   │ Option Value   │ Field Option   │   Option Value │ Field Option    │ Option Value   │\n",
+      "├──────────────────┼────────────────┼─────────┼────────────────┼────────────────┼────────────────┼────────────────┼────────────────┼────────────────┼─────────────────┼────────────────┤\n",
+      "│ $.user           │ user           │ TAG     │ SEPARATOR      │ ,              │                │                │                │                │                 │                │\n",
+      "│ $.credit_score   │ credit_score   │ TAG     │ SEPARATOR      │ ,              │                │                │                │                │                 │                │\n",
+      "│ $.job            │ job            │ TEXT    │ WEIGHT         │ 1              │                │                │                │                │                 │                │\n",
+      "│ $.age            │ age            │ NUMERIC │                │                │                │                │                │                │                 │                │\n",
+      "│ $.location       │ location       │ GEO     │                │                │                │                │                │                │                 │                │\n",
+      "│ $.user_embedding │ user_embedding │ VECTOR  │ algorithm      │ FLAT           │ data_type      │ FLOAT32        │ dim            │              3 │ distance_metric │ COSINE         │\n",
+      "╰──────────────────┴────────────────┴─────────┴────────────────┴────────────────┴────────────────┴────────────────┴────────────────┴────────────────┴─────────────────┴────────────────╯\n"
+     ]
+    }
+   ],
+   "source": [
+    "!rvl index info -i user_simple"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load Data to `SearchIndex`\n",
+    "\n",
+    "Load the sample dataset to Redis:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "16:42:27 redisvl.index.index ERROR   Error while loading data to Redis\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/Users/tyler.hutcherson/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py\", line 201, in _preprocess_and_validate_objects\n",
+      "    processed_obj = self.validate(processed_obj)\n",
+      "  File \"/Users/tyler.hutcherson/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py\", line 158, in validate\n",
+      "    return validate_object(self.index_schema, obj)\n",
+      "  File \"/Users/tyler.hutcherson/Documents/AppliedAI/redis-vl-python/redisvl/schema/validation.py\", line 254, in validate_object\n",
+      "    validated = model_class.model_validate(flat_obj)\n",
+      "  File \"/Users/tyler.hutcherson/Library/Caches/pypoetry/virtualenvs/redisvl-VnTEShF2-py3.13/lib/python3.13/site-packages/pydantic/main.py\", line 627, in model_validate\n",
+      "    return cls.__pydantic_validator__.validate_python(\n",
+      "           ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^\n",
+      "        obj, strict=strict, from_attributes=from_attributes, context=context\n",
+      "        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "    )\n",
+      "    ^\n",
+      "pydantic_core._pydantic_core.ValidationError: 1 validation error for user_simple__PydanticModel\n",
+      "user_embedding\n",
+      "  Input should be a valid list [type=list_type, input_value=b'\\xcd\\xcc\\xcc=\\xcd\\xcc\\xcc=\\x00\\x00\\x00?', input_type=bytes]\n",
+      "    For further information visit https://errors.pydantic.dev/2.10/v/list_type\n",
+      "\n",
+      "During handling of the above exception, another exception occurred:\n",
+      "\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/Users/tyler.hutcherson/Documents/AppliedAI/redis-vl-python/redisvl/index/index.py\", line 600, in load\n",
+      "    return self._storage.write(\n",
+      "           ~~~~~~~~~~~~~~~~~~~^\n",
+      "        self._redis_client,  # type: ignore\n",
+      "        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "    ...<6 lines>...\n",
+      "        validate=self._validate_on_load,\n",
+      "        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "    )\n",
+      "    ^\n",
+      "  File \"/Users/tyler.hutcherson/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py\", line 260, in write\n",
+      "    prepared_objects = self._preprocess_and_validate_objects(\n",
+      "        objects,\n",
+      "    ...<3 lines>...\n",
+      "        validate=validate\n",
+      "    )\n",
+      "  File \"/Users/tyler.hutcherson/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py\", line 212, in _preprocess_and_validate_objects\n",
+      "    raise ValueError(f\"Validation failed for object {object_id}: {str(e)}\")\n",
+      "ValueError: Validation failed for object at index 0: 1 validation error for user_simple__PydanticModel\n",
+      "user_embedding\n",
+      "  Input should be a valid list [type=list_type, input_value=b'\\xcd\\xcc\\xcc=\\xcd\\xcc\\xcc=\\x00\\x00\\x00?', input_type=bytes]\n",
+      "    For further information visit https://errors.pydantic.dev/2.10/v/list_type\n"
+     ]
+    },
+    {
+     "ename": "ValueError",
+     "evalue": "Validation failed for object at index 0: 1 validation error for user_simple__PydanticModel\nuser_embedding\n  Input should be a valid list [type=list_type, input_value=b'\\xcd\\xcc\\xcc=\\xcd\\xcc\\xcc=\\x00\\x00\\x00?', input_type=bytes]\n    For further information visit https://errors.pydantic.dev/2.10/v/list_type",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValidationError\u001b[0m                           Traceback (most recent call last)",
+      "File \u001b[0;32m~/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py:201\u001b[0m, in \u001b[0;36mBaseStorage._preprocess_and_validate_objects\u001b[0;34m(self, objects, id_field, keys, preprocess, validate)\u001b[0m\n\u001b[1;32m    200\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m validate:\n\u001b[0;32m--> 201\u001b[0m     processed_obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprocessed_obj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    203\u001b[0m \u001b[38;5;66;03m# Store valid object with its key for writing\u001b[39;00m\n",
+      "File \u001b[0;32m~/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py:158\u001b[0m, in \u001b[0;36mBaseStorage.validate\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m    157\u001b[0m \u001b[38;5;66;03m# Pass directly to validation function and let any errors propagate\u001b[39;00m\n\u001b[0;32m--> 158\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mvalidate_object\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex_schema\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Documents/AppliedAI/redis-vl-python/redisvl/schema/validation.py:254\u001b[0m, in \u001b[0;36mvalidate_object\u001b[0;34m(schema, obj)\u001b[0m\n\u001b[1;32m    253\u001b[0m \u001b[38;5;66;03m# Validate against model\u001b[39;00m\n\u001b[0;32m--> 254\u001b[0m validated \u001b[38;5;241m=\u001b[39m \u001b[43mmodel_class\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_validate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mflat_obj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    255\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m validated\u001b[38;5;241m.\u001b[39mmodel_dump(exclude_none\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
+      "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/redisvl-VnTEShF2-py3.13/lib/python3.13/site-packages/pydantic/main.py:627\u001b[0m, in \u001b[0;36mBaseModel.model_validate\u001b[0;34m(cls, obj, strict, from_attributes, context)\u001b[0m\n\u001b[1;32m    626\u001b[0m __tracebackhide__ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 627\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__pydantic_validator__\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate_python\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    628\u001b[0m \u001b[43m    \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstrict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstrict\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrom_attributes\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfrom_attributes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcontext\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcontext\u001b[49m\n\u001b[1;32m    629\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[0;31mValidationError\u001b[0m: 1 validation error for user_simple__PydanticModel\nuser_embedding\n  Input should be a valid list [type=list_type, input_value=b'\\xcd\\xcc\\xcc=\\xcd\\xcc\\xcc=\\x00\\x00\\x00?', input_type=bytes]\n    For further information visit https://errors.pydantic.dev/2.10/v/list_type",
+      "\nDuring handling of the above exception, another exception occurred:\n",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[16], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m keys \u001b[38;5;241m=\u001b[39m \u001b[43mindex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28mprint\u001b[39m(keys)\n",
+      "File \u001b[0;32m~/Documents/AppliedAI/redis-vl-python/redisvl/index/index.py:600\u001b[0m, in \u001b[0;36mSearchIndex.load\u001b[0;34m(self, data, id_field, keys, ttl, preprocess, batch_size)\u001b[0m\n\u001b[1;32m    551\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Load objects to the Redis database. Returns the list of keys loaded\u001b[39;00m\n\u001b[1;32m    552\u001b[0m \u001b[38;5;124;03mto Redis.\u001b[39;00m\n\u001b[1;32m    553\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    597\u001b[0m \u001b[38;5;124;03m    keys = index.load(data, preprocess=add_field)\u001b[39;00m\n\u001b[1;32m    598\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    599\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 600\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_storage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwrite\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    601\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_redis_client\u001b[49m\u001b[43m,\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# type: ignore\u001b[39;49;00m\n\u001b[1;32m    602\u001b[0m \u001b[43m        \u001b[49m\u001b[43mobjects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    603\u001b[0m \u001b[43m        \u001b[49m\u001b[43mid_field\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mid_field\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    604\u001b[0m \u001b[43m        \u001b[49m\u001b[43mkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    605\u001b[0m \u001b[43m        \u001b[49m\u001b[43mttl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mttl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    606\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpreprocess\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreprocess\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    607\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbatch_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    608\u001b[0m \u001b[43m        \u001b[49m\u001b[43mvalidate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate_on_load\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    609\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    610\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m:\n\u001b[1;32m    611\u001b[0m     logger\u001b[38;5;241m.\u001b[39mexception(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError while loading data to Redis\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "File \u001b[0;32m~/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py:260\u001b[0m, in \u001b[0;36mBaseStorage.write\u001b[0;34m(self, redis_client, objects, id_field, keys, ttl, preprocess, batch_size, validate)\u001b[0m\n\u001b[1;32m    257\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m []\n\u001b[1;32m    259\u001b[0m \u001b[38;5;66;03m# Pass 1: Preprocess and validate all objects\u001b[39;00m\n\u001b[0;32m--> 260\u001b[0m prepared_objects \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_preprocess_and_validate_objects\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    261\u001b[0m \u001b[43m    \u001b[49m\u001b[43mobjects\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m    262\u001b[0m \u001b[43m    \u001b[49m\u001b[43mid_field\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mid_field\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    263\u001b[0m \u001b[43m    \u001b[49m\u001b[43mkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    264\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpreprocess\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreprocess\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    265\u001b[0m \u001b[43m    \u001b[49m\u001b[43mvalidate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvalidate\u001b[49m\n\u001b[1;32m    266\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    268\u001b[0m \u001b[38;5;66;03m# Pass 2: Write all valid objects in batches\u001b[39;00m\n\u001b[1;32m    269\u001b[0m added_keys \u001b[38;5;241m=\u001b[39m []\n",
+      "File \u001b[0;32m~/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py:212\u001b[0m, in \u001b[0;36mBaseStorage._preprocess_and_validate_objects\u001b[0;34m(self, objects, id_field, keys, preprocess, validate)\u001b[0m\n\u001b[1;32m    209\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m id_field \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(obj, \u001b[38;5;28mdict\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m id_field \u001b[38;5;129;01min\u001b[39;00m obj:\n\u001b[1;32m    210\u001b[0m             object_id \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwith \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mid_field\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mobj[id_field]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m--> 212\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mValidation failed for object \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mobject_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mstr\u001b[39m(e)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    214\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m prepared_objects\n",
+      "\u001b[0;31mValueError\u001b[0m: Validation failed for object at index 0: 1 validation error for user_simple__PydanticModel\nuser_embedding\n  Input should be a valid list [type=list_type, input_value=b'\\xcd\\xcc\\xcc=\\xcd\\xcc\\xcc=\\x00\\x00\\x00?', input_type=bytes]\n    For further information visit https://errors.pydantic.dev/2.10/v/list_type"
+     ]
+    }
+   ],
+   "source": [
+    "keys = index.load(data)\n",
+    "\n",
+    "print(keys)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "IndexInfo(name='user_simple', prefix='user_simple_docs', key_separator=':', storage_type=<StorageType.JSON: 'json'>)"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "index.schema.index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['user_simple_docs:01JQ4Y9V0NK7QBYKMCP47MT3DE']"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "index.load(data=[\n",
+    "    {\n",
+    "        'user': 'john',\n",
+    "        'age': 1,\n",
+    "        'job': 'engineer',\n",
+    "        'credit_score': 'high',\n",
+    "        'location': 1,\n",
+    "        'user_embedding': [\n",
+    "        ]\n",
+    "    }\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'index_name': 'user_simple',\n",
+       " 'index_options': [],\n",
+       " 'index_definition': ['key_type',\n",
+       "  'JSON',\n",
+       "  'prefixes',\n",
+       "  ['user_simple_docs'],\n",
+       "  'default_score',\n",
+       "  '1',\n",
+       "  'indexes_all',\n",
+       "  'false'],\n",
+       " 'attributes': [['identifier',\n",
+       "   '$.user',\n",
+       "   'attribute',\n",
+       "   'user',\n",
+       "   'type',\n",
+       "   'TAG',\n",
+       "   'SEPARATOR',\n",
+       "   ','],\n",
+       "  ['identifier',\n",
+       "   '$.credit_score',\n",
+       "   'attribute',\n",
+       "   'credit_score',\n",
+       "   'type',\n",
+       "   'TAG',\n",
+       "   'SEPARATOR',\n",
+       "   ','],\n",
+       "  ['identifier', '$.job', 'attribute', 'job', 'type', 'TEXT', 'WEIGHT', '1'],\n",
+       "  ['identifier', '$.age', 'attribute', 'age', 'type', 'NUMERIC'],\n",
+       "  ['identifier', '$.location', 'attribute', 'location', 'type', 'GEO'],\n",
+       "  ['identifier',\n",
+       "   '$.user_embedding',\n",
+       "   'attribute',\n",
+       "   'user_embedding',\n",
+       "   'type',\n",
+       "   'VECTOR',\n",
+       "   'algorithm',\n",
+       "   'FLAT',\n",
+       "   'data_type',\n",
+       "   'FLOAT32',\n",
+       "   'dim',\n",
+       "   3,\n",
+       "   'distance_metric',\n",
+       "   'COSINE']],\n",
+       " 'num_docs': 2,\n",
+       " 'max_doc_id': 2,\n",
+       " 'num_terms': 2,\n",
+       " 'num_records': 12,\n",
+       " 'inverted_sz_mb': '4.61578369140625e-4',\n",
+       " 'vector_index_sz_mb': '0.028045654296875',\n",
+       " 'total_inverted_index_blocks': 5,\n",
+       " 'offset_vectors_sz_mb': '3.814697265625e-6',\n",
+       " 'doc_table_size_mb': '2.117156982421875e-4',\n",
+       " 'sortable_values_size_mb': '0',\n",
+       " 'key_table_size_mb': '8.296966552734375e-5',\n",
+       " 'tag_overhead_sz_mb': '5.53131103515625e-5',\n",
+       " 'text_overhead_sz_mb': '6.67572021484375e-5',\n",
+       " 'total_index_memory_sz_mb': '9.565353393554688e-4',\n",
+       " 'geoshapes_sz_mb': '0',\n",
+       " 'records_per_doc_avg': '6',\n",
+       " 'bytes_per_record_avg': '40.33333206176758',\n",
+       " 'offsets_per_term_avg': '0.3333333432674408',\n",
+       " 'offset_bits_per_record_avg': '8',\n",
+       " 'hash_indexing_failures': 4,\n",
+       " 'total_indexing_time': '0.3160000145435333',\n",
+       " 'indexing': 0,\n",
+       " 'percent_indexed': '1',\n",
+       " 'number_of_uses': 2,\n",
+       " 'cleaning': 0,\n",
+       " 'gc_stats': ['bytes_collected',\n",
+       "  '0',\n",
+       "  'total_ms_run',\n",
+       "  '0',\n",
+       "  'total_cycles',\n",
+       "  '0',\n",
+       "  'average_cycle_time_ms',\n",
+       "  'nan',\n",
+       "  'last_run_time_ms',\n",
+       "  '0',\n",
+       "  'gc_numeric_trees_missed',\n",
+       "  '0',\n",
+       "  'gc_blocks_denied',\n",
+       "  '0'],\n",
+       " 'cursor_stats': ['global_idle',\n",
+       "  0,\n",
+       "  'global_total',\n",
+       "  0,\n",
+       "  'index_capacity',\n",
+       "  128,\n",
+       "  'index_total',\n",
+       "  0],\n",
+       " 'dialect_stats': ['dialect_1',\n",
+       "  0,\n",
+       "  'dialect_2',\n",
+       "  0,\n",
+       "  'dialect_3',\n",
+       "  0,\n",
+       "  'dialect_4',\n",
+       "  0],\n",
+       " 'Index Errors': ['indexing failures',\n",
+       "  4,\n",
+       "  'last indexing error',\n",
+       "  'Empty array for vector field on JSON document',\n",
+       "  'last indexing error key',\n",
+       "  'user_simple_docs:01JQ4Y9V0NK7QBYKMCP47MT3DE'],\n",
+       " 'field statistics': [['identifier',\n",
+       "   '$.user',\n",
+       "   'attribute',\n",
+       "   'user',\n",
+       "   'Index Errors',\n",
+       "   ['indexing failures',\n",
+       "    0,\n",
+       "    'last indexing error',\n",
+       "    'N/A',\n",
+       "    'last indexing error key',\n",
+       "    'N/A']],\n",
+       "  ['identifier',\n",
+       "   '$.credit_score',\n",
+       "   'attribute',\n",
+       "   'credit_score',\n",
+       "   'Index Errors',\n",
+       "   ['indexing failures',\n",
+       "    0,\n",
+       "    'last indexing error',\n",
+       "    'N/A',\n",
+       "    'last indexing error key',\n",
+       "    'N/A']],\n",
+       "  ['identifier',\n",
+       "   '$.job',\n",
+       "   'attribute',\n",
+       "   'job',\n",
+       "   'Index Errors',\n",
+       "   ['indexing failures',\n",
+       "    0,\n",
+       "    'last indexing error',\n",
+       "    'N/A',\n",
+       "    'last indexing error key',\n",
+       "    'N/A']],\n",
+       "  ['identifier',\n",
+       "   '$.age',\n",
+       "   'attribute',\n",
+       "   'age',\n",
+       "   'Index Errors',\n",
+       "   ['indexing failures',\n",
+       "    0,\n",
+       "    'last indexing error',\n",
+       "    'N/A',\n",
+       "    'last indexing error key',\n",
+       "    'N/A']],\n",
+       "  ['identifier',\n",
+       "   '$.location',\n",
+       "   'attribute',\n",
+       "   'location',\n",
+       "   'Index Errors',\n",
+       "   ['indexing failures',\n",
+       "    0,\n",
+       "    'last indexing error',\n",
+       "    'N/A',\n",
+       "    'last indexing error key',\n",
+       "    'N/A']],\n",
+       "  ['identifier',\n",
+       "   '$.user_embedding',\n",
+       "   'attribute',\n",
+       "   'user_embedding',\n",
+       "   'Index Errors',\n",
+       "   ['indexing failures',\n",
+       "    4,\n",
+       "    'last indexing error',\n",
+       "    'Empty array for vector field on JSON document',\n",
+       "    'last indexing error key',\n",
+       "    'user_simple_docs:01JQ4Y9V0NK7QBYKMCP47MT3DE']]]}"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "index.info()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    ">By default, `load` will create a unique Redis key as a combination of the index key `prefix` and a random ULID. You can also customize the key by providing direct keys or pointing to a specified `id_field` on load."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Upsert the index with new data\n",
+    "Upsert data by using the `load` method again:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['user_simple_docs:01JM2NWJGYMJ0QTR5YB4MB0BX9']\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Add more data\n",
+    "new_data = [{\n",
+    "    'user': 'tyler',\n",
+    "    'age': 9,\n",
+    "    'job': 'engineer',\n",
+    "    'credit_score': 'high',\n",
+    "    'user_embedding': np.array([0.1, 0.3, 0.5], dtype=np.float32).tobytes()\n",
+    "}]\n",
+    "keys = index.load(new_data)\n",
+    "\n",
+    "print(keys)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Creating `VectorQuery` Objects\n",
+    "\n",
+    "Next we will create a vector query object for our newly populated index. This example will use a simple vector to demonstrate how vector similarity works. Vectors in production will likely be much larger than 3 floats and often require Machine Learning models (i.e. Huggingface sentence transformers) or an embeddings API (Cohere, OpenAI). `redisvl` provides a set of [Vectorizers](https://docs.redisvl.com/en/latest/user_guide/vectorizers_04.html#openai) to assist in vector creation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from redisvl.query import VectorQuery\n",
+    "from jupyterutils import result_print\n",
+    "\n",
+    "query = VectorQuery(\n",
+    "    vector=[0.1, 0.1, 0.5],\n",
+    "    vector_field_name=\"user_embedding\",\n",
+    "    return_fields=[\"user\", \"age\", \"job\", \"credit_score\", \"vector_distance\"],\n",
+    "    num_results=3\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Executing queries\n",
+    "With our `VectorQuery` object defined above, we can execute the query over the `SearchIndex` using the `query` method."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "*=>[KNN 3 @user_embedding $vector AS vector_distance] RETURN 6 user age job credit_score vector_distance vector_distance SORTBY vector_distance ASC DIALECT 2 LIMIT 0 3\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "table><tr><th>vector_distance</th><th>user</th><th>age</th><th>job</th><th>credit_score</th></tr><tr><td>0</td><td>john</td><td>1</td><td>engineer</td><td>high</td></tr><tr><td>0</td><td>mary</td><td>2</td><td>doctor</td><td>low</td></tr><tr><td>0.0566299557686</td><td>tyler</td><td>9</td><td>engineer</td><td>high</td></tr></table>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "results = index.query(query)\n",
+    "result_print(results)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Using an Asynchronous Redis Client\n",
+    "\n",
+    "The `AsyncSearchIndex` class along with an async Redis python client allows for queries, index creation, and data loading to be done asynchronously. This is the\n",
+    "recommended route for working with `redisvl` in production-like settings."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'index': {'name': 'user_simple', 'prefix': 'user_simple_docs'},\n",
+       " 'fields': [{'name': 'user', 'type': 'tag'},\n",
+       "  {'name': 'credit_score', 'type': 'tag'},\n",
+       "  {'name': 'job', 'type': 'text'},\n",
+       "  {'name': 'age', 'type': 'numeric'},\n",
+       "  {'name': 'user_embedding',\n",
+       "   'type': 'vector',\n",
+       "   'attrs': {'dims': 3,\n",
+       "    'distance_metric': 'cosine',\n",
+       "    'algorithm': 'flat',\n",
+       "    'datatype': 'float32'}}]}"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "schema"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<redisvl.index.index.AsyncSearchIndex at 0x10facacf0>"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from redisvl.index import AsyncSearchIndex\n",
+    "from redis.asyncio import Redis\n",
+    "\n",
+    "client = Redis.from_url(\"redis://localhost:6379\")\n",
+    "\n",
+    "index = AsyncSearchIndex.from_dict(schema, redis_client=client)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table><tr><th>vector_distance</th><th>user</th><th>age</th><th>job</th><th>credit_score</th></tr><tr><td>0</td><td>john</td><td>1</td><td>engineer</td><td>high</td></tr><tr><td>0</td><td>mary</td><td>2</td><td>doctor</td><td>low</td></tr><tr><td>0.0566299557686</td><td>tyler</td><td>9</td><td>engineer</td><td>high</td></tr></table>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# execute the vector query async\n",
+    "results = await index.query(query)\n",
+    "result_print(results)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Updating a schema\n",
+    "In some scenarios, it makes sense to update the index schema. With Redis and `redisvl`, this is easy because Redis can keep the underlying data in place while you change or make updates to the index configuration."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "So for our scenario, let's imagine we want to reindex this data in 2 ways:\n",
+    "- by using a `Tag` type for `job` field instead of `Text`\n",
+    "- by using an `hnsw` vector index for the `user_embedding` field instead of a `flat` vector index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Modify this schema to have what we want\n",
+    "\n",
+    "index.schema.remove_field(\"job\")\n",
+    "index.schema.remove_field(\"user_embedding\")\n",
+    "index.schema.add_fields([\n",
+    "    {\"name\": \"job\", \"type\": \"tag\"},\n",
+    "    {\n",
+    "        \"name\": \"user_embedding\",\n",
+    "        \"type\": \"vector\",\n",
+    "        \"attrs\": {\n",
+    "            \"dims\": 3,\n",
+    "            \"distance_metric\": \"cosine\",\n",
+    "            \"algorithm\": \"hnsw\",\n",
+    "            \"datatype\": \"float32\"\n",
+    "        }\n",
+    "    }\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "11:28:32 redisvl.index.index INFO   Index already exists, overwriting.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Run the index update but keep underlying data in place\n",
+    "await index.create(overwrite=True, drop=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<table><tr><th>vector_distance</th><th>user</th><th>age</th><th>job</th><th>credit_score</th></tr><tr><td>0</td><td>mary</td><td>2</td><td>doctor</td><td>low</td></tr><tr><td>0</td><td>john</td><td>1</td><td>engineer</td><td>high</td></tr><tr><td>0.0566299557686</td><td>tyler</td><td>9</td><td>engineer</td><td>high</td></tr></table>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Execute the vector query async\n",
+    "results = await index.query(query)\n",
+    "result_print(results)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Check Index Stats\n",
+    "Use the `rvl` CLI to check the stats for the index:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Statistics:\n",
+      "╭─────────────────────────────┬─────────────╮\n",
+      "│ Stat Key                    │ Value       │\n",
+      "├─────────────────────────────┼─────────────┤\n",
+      "│ num_docs                    │ 4           │\n",
+      "│ num_terms                   │ 4           │\n",
+      "│ max_doc_id                  │ 4           │\n",
+      "│ num_records                 │ 22          │\n",
+      "│ percent_indexed             │ 1           │\n",
+      "│ hash_indexing_failures      │ 0           │\n",
+      "│ number_of_uses              │ 2           │\n",
+      "│ bytes_per_record_avg        │ 47.8        │\n",
+      "│ doc_table_size_mb           │ 0.000423431 │\n",
+      "│ inverted_sz_mb              │ 0.000911713 │\n",
+      "│ key_table_size_mb           │ 0.000165939 │\n",
+      "│ offset_bits_per_record_avg  │ nan         │\n",
+      "│ offset_vectors_sz_mb        │ 0           │\n",
+      "│ offsets_per_term_avg        │ 0           │\n",
+      "│ records_per_doc_avg         │ 5           │\n",
+      "│ sortable_values_size_mb     │ 0           │\n",
+      "│ total_indexing_time         │ 0.239       │\n",
+      "│ total_inverted_index_blocks │ 11          │\n",
+      "│ vector_index_sz_mb          │ 0.235603    │\n",
+      "╰─────────────────────────────┴─────────────╯\n"
+     ]
+    }
+   ],
+   "source": [
+    "!rvl stats -i user_simple"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Cleanup"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Below we will clean up after our work. First, you can flush all data from Redis associated with the index by\n",
+    "using the `.clear()` method. This will leave the secondary index in place for future insertions or updates.\n",
+    "\n",
+    "But if you want to clean up everything, including the index, just use `.delete()`\n",
+    "which will by default remove the index AND the underlying data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "4"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Clear all data from Redis associated with the index\n",
+    "await index.clear()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Butm the index is still in place\n",
+    "await index.exists()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Remove / delete the index in its entirety\n",
+    "await index.delete()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.2"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/validation.md b/docs/validation.md
new file mode 100644
index 00000000..204a009f
--- /dev/null
+++ b/docs/validation.md
@@ -0,0 +1,228 @@
+# RedisVL Validation System
+
+The RedisVL validation system ensures that data written to Redis indexes conforms to the defined schema. It uses dynamic Pydantic model generation to validate objects before they are stored.
+
+## Key Features
+
+- **Schema-Based Validation**: Validates objects against your index schema definition
+- **Dynamic Model Generation**: Creates Pydantic models on the fly based on your schema
+- **Type Checking**: Ensures fields contain appropriate data types
+- **Field-Specific Validation**:
+  - Text and Tag fields must be strings
+  - Numeric fields must be integers or floats
+  - Geo fields must be properly formatted latitude/longitude strings
+  - Vector fields must have the correct dimensions and data types
+- **JSON Path Support**: Validates fields extracted from nested JSON structures
+- **Fail-Fast Approach**: Stops processing at the first validation error
+- **Performance Optimized**: Caches models for repeated validation
+
+## Usage
+
+### Basic Validation
+
+```python
+from redisvl.schema.validation import validate_object
+
+# Assuming you have a schema defined
+validated_data = validate_object(schema, data)
+```
+
+### Storage Integration
+
+The validation is automatically integrated with the storage classes:
+
+```python
+from redisvl.index.storage import BaseStorage
+
+# Create storage with schema
+storage = BaseStorage(schema=schema, client=redis_client)
+
+# Write data - validation happens automatically
+storage.write_one(data)
+
+# Or validate explicitly
+validated = storage.validate_object(data)
+```
+
+## Field Type Validation
+
+The validation system supports all Redis field types:
+
+### Text Fields
+
+Text fields are validated to ensure they contain string values:
+
+```python
+# Valid
+{"title": "Hello World"}
+
+# Invalid
+{"title": 123}  # Not a string
+```
+
+### Tag Fields
+
+Tag fields are validated to ensure they contain string values:
+
+```python
+# Valid
+{"category": "electronics"}
+
+# Invalid
+{"category": 123}  # Not a string
+```
+
+### Numeric Fields
+
+Numeric fields must contain integers or floats:
+
+```python
+# Valid
+{"price": 19.99}
+{"quantity": 5}
+
+# Invalid
+{"price": "19.99"}  # String, not a number
+```
+
+### Geo Fields
+
+Geo fields must contain properly formatted latitude/longitude strings:
+
+```python
+# Valid
+{"location": "37.7749,-122.4194"}  # San Francisco
+{"location": "40.7128,-74.0060"}   # New York
+
+# Invalid
+{"location": "invalid"}            # Not in lat,lon format
+{"location": "91.0,0.0"}           # Latitude out of range (-90 to 90)
+{"location": "0.0,181.0"}          # Longitude out of range (-180 to 180)
+```
+
+### Vector Fields
+
+Vector fields must contain arrays with the correct dimensions and data types:
+
+```python
+# Valid
+{"embedding": [0.1, 0.2, 0.3, 0.4]}  # 4-dimensional float vector
+{"embedding": b'\x00\x01\x02\x03'}   # Raw bytes (dimensions not checked)
+
+# Invalid
+{"embedding": [0.1, 0.2, 0.3]}        # Wrong dimensions
+{"embedding": "not a vector"}         # Wrong type
+{"embedding": [0.1, "text", 0.3]}     # Mixed types
+```
+
+For integer vectors, the values must be within the appropriate range:
+
+- **INT8**: -128 to 127
+- **INT16**: -32,768 to 32,767
+
+```python
+# Valid INT8 vector
+{"int_vector": [1, 2, 3]}
+
+# Invalid INT8 vector
+{"int_vector": [1000, 2000, 3000]}  # Values out of range
+```
+
+## Nested JSON Validation
+
+The validation system supports extracting and validating fields from nested JSON structures:
+
+```python
+# Schema with JSON paths
+fields = {
+    "id": Field(name="id", type=FieldTypes.TAG),
+    "title": Field(name="title", type=FieldTypes.TEXT, path="$.content.title"),
+    "rating": Field(name="rating", type=FieldTypes.NUMERIC, path="$.metadata.rating")
+}
+
+# Nested JSON data
+data = {
+    "id": "doc1",
+    "content": {
+        "title": "Hello World"
+    },
+    "metadata": {
+        "rating": 4.5
+    }
+}
+
+# Validation extracts fields using JSON paths
+validated = validate_object(schema, data)
+# Result: {"id": "doc1", "title": "Hello World", "rating": 4.5}
+```
+
+## Error Handling
+
+The validation system uses a fail-fast approach, raising a `ValueError` when validation fails:
+
+```python
+try:
+    validated = validate_object(schema, data)
+except ValueError as e:
+    print(f"Validation error: {e}")
+    # Handle the error
+```
+
+The error message includes information about the field that failed validation.
+
+## Optional Fields
+
+All fields are considered optional during validation. If a field is missing, it will be excluded from the validated result:
+
+```python
+# Schema with multiple fields
+fields = {
+    "id": Field(name="id", type=FieldTypes.TAG),
+    "title": Field(name="title", type=FieldTypes.TEXT),
+    "rating": Field(name="rating", type=FieldTypes.NUMERIC)
+}
+
+# Data with missing fields
+data = {
+    "id": "doc1",
+    "title": "Hello World"
+    # rating is missing
+}
+
+# Validation succeeds with partial data
+validated = validate_object(schema, data)
+# Result: {"id": "doc1", "title": "Hello World"}
+```
+
+## Performance Considerations
+
+The validation system is optimized for performance:
+
+- **Model Caching**: Pydantic models are cached by schema name to avoid regeneration
+- **Lazy Validation**: Fields are validated only when needed
+- **Fail-Fast Approach**: Processing stops at the first validation error
+
+For large datasets, validation can be a significant part of the processing time. If you need to write many objects with the same structure, consider validating a sample first to ensure correctness.
+
+## Limitations
+
+- **JSON Path**: The current implementation only supports simple dot notation paths (e.g., `$.field.subfield`). Array indexing is not supported.
+- **Vector Bytes**: When vectors are provided as bytes, the dimensions cannot be validated.
+- **Custom Validators**: The current implementation does not support custom user-defined validators.
+
+## Best Practices
+
+1. **Define Clear Schemas**: Be explicit about field types and constraints
+2. **Pre-validate Critical Data**: For large datasets, validate a sample before processing everything
+3. **Handle Validation Errors**: Implement proper error handling for validation failures
+4. **Use JSON Paths Carefully**: Test nested JSON extraction to ensure paths are correctly defined
+5. **Consider Optional Fields**: Decide which fields are truly required for your application
+
+## Integration with Storage Classes
+
+The validation system is fully integrated with the storage classes:
+
+- **BaseStorage**: For hash-based storage, validates each field individually
+- **JsonStorage**: For JSON storage, extracts and validates fields from nested structures
+
+Each storage class automatically validates data before writing to Redis, ensuring data integrity. 
\ No newline at end of file
diff --git a/redisvl/index/index.py b/redisvl/index/index.py
index c4e5de62..0cf9b172 100644
--- a/redisvl/index/index.py
+++ b/redisvl/index/index.py
@@ -130,8 +130,7 @@ def __init__(*args, **kwargs):
     def _storage(self) -> BaseStorage:
         """The storage type for the index schema."""
         return self._STORAGE_MAP[self.schema.index.storage_type](
-            prefix=self.schema.index.prefix,
-            key_separator=self.schema.index.key_separator,
+            index_schema=self.schema
         )
 
     @property
@@ -263,6 +262,7 @@ def __init__(
         redis_client: Optional[redis.Redis] = None,
         redis_url: Optional[str] = None,
         connection_kwargs: Optional[Dict[str, Any]] = None,
+        validate_on_load: bool = False,
         **kwargs,
     ):
         """Initialize the RedisVL search index with a schema, Redis client
@@ -277,6 +277,8 @@ def __init__(
                 connect to.
             connection_kwargs (Dict[str, Any], optional): Redis client connection
                 args.
+            validate_on_load (bool, optional): Whether to validate data against schema
+                when loading. Defaults to False.
         """
         if "connection_args" in kwargs:
             connection_kwargs = kwargs.pop("connection_args")
@@ -285,7 +287,7 @@ def __init__(
             raise ValueError("Must provide a valid IndexSchema object")
 
         self.schema = schema
-
+        self._validate_on_load = validate_on_load
         self._lib_name: Optional[str] = kwargs.pop("lib_name", None)
 
         # Store connection parameters
@@ -593,7 +595,7 @@ def load(
 
         Raises:
             ValueError: If the length of provided keys does not match the length
-                of objects.
+                of objects or if validation fails when validate_on_load is enabled.
 
         .. code-block:: python
 
@@ -623,6 +625,7 @@ def add_field(d):
                 ttl=ttl,
                 preprocess=preprocess,
                 batch_size=batch_size,
+                validate=self._validate_on_load,
             )
         except:
             logger.exception("Error while loading data to Redis")
@@ -934,6 +937,7 @@ def __init__(
         redis_url: Optional[str] = None,
         redis_client: Optional[aredis.Redis] = None,
         connection_kwargs: Optional[Dict[str, Any]] = None,
+        validate_on_load: bool = False,
         **kwargs,
     ):
         """Initialize the RedisVL async search index with a schema.
@@ -946,6 +950,8 @@ def __init__(
                 instantiated redis client.
             connection_kwargs (Optional[Dict[str, Any]]): Redis client connection
                 args.
+            validate_on_load (bool, optional): Whether to validate data against schema
+                when loading. Defaults to False.
         """
         if "redis_kwargs" in kwargs:
             connection_kwargs = kwargs.pop("redis_kwargs")
@@ -955,7 +961,7 @@ def __init__(
             raise ValueError("Must provide a valid IndexSchema object")
 
         self.schema = schema
-
+        self._validate_on_load = validate_on_load
         self._lib_name: Optional[str] = kwargs.pop("lib_name", None)
 
         # Store connection parameters
@@ -1203,6 +1209,7 @@ async def expire_keys(
         else:
             return await client.expire(keys, ttl)
 
+    @deprecated_argument("concurrency", "Use batch_size instead.")
     async def load(
         self,
         data: Iterable[Any],
@@ -1211,9 +1218,10 @@ async def load(
         ttl: Optional[int] = None,
         preprocess: Optional[Callable] = None,
         concurrency: Optional[int] = None,
+        batch_size: Optional[int] = None,
     ) -> List[str]:
-        """Asynchronously load objects to Redis with concurrency control.
-        Returns the list of keys loaded to Redis.
+        """Asynchronously load objects to Redis. Returns the list of keys loaded
+        to Redis.
 
         RedisVL automatically handles constructing the object keys, batching,
         optional preprocessing steps, and setting optional expiration
@@ -1228,18 +1236,18 @@ async def load(
                 Must match the length of objects if provided. Defaults to None.
             ttl (Optional[int], optional): Time-to-live in seconds for each key.
                 Defaults to None.
-            preprocess (Optional[Callable], optional): An async function to
+            preprocess (Optional[Callable], optional): A function to
                 preprocess objects before storage. Defaults to None.
-            concurrency (Optional[int], optional): The maximum number of
-                concurrent write operations. Defaults to class's default
-                concurrency level.
+            batch_size (Optional[int], optional): Number of objects to write in
+                a single Redis pipeline execution. Defaults to class's
+                default batch size.
 
         Returns:
             List[str]: List of keys loaded to Redis.
 
         Raises:
             ValueError: If the length of provided keys does not match the
-                length of objects.
+                length of objects or if validation fails when validate_on_load is enabled.
 
         .. code-block:: python
 
@@ -1255,7 +1263,7 @@ async def load(
             keys = await index.load(data, keys=["rvl:foo", "rvl:bar"])
 
             # load data with preprocessing step
-            async def add_field(d):
+            def add_field(d):
                 d["new_field"] = 123
                 return d
             keys = await index.load(data, preprocess=add_field)
@@ -1270,7 +1278,8 @@ async def add_field(d):
                 keys=keys,
                 ttl=ttl,
                 preprocess=preprocess,
-                concurrency=concurrency,
+                batch_size=batch_size,
+                validate=self._validate_on_load,
             )
         except:
             logger.exception("Error while loading data to Redis")
diff --git a/redisvl/index/storage.py b/redisvl/index/storage.py
index 2be386c0..f90e45b4 100644
--- a/redisvl/index/storage.py
+++ b/redisvl/index/storage.py
@@ -1,14 +1,18 @@
-import asyncio
-from typing import Any, Callable, Dict, Iterable, List, Optional
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
 
-from pydantic import BaseModel
+from pydantic import BaseModel, ValidationError
 from redis import Redis
 from redis.asyncio import Redis as AsyncRedis
 from redis.commands.search.indexDefinition import IndexType
 
 from redisvl.redis.utils import convert_bytes
+from redisvl.schema import IndexSchema
+from redisvl.schema.validation import validate_object
+from redisvl.utils.log import get_logger
 from redisvl.utils.utils import create_ulid
 
+logger = get_logger(__name__)
+
 
 class BaseStorage(BaseModel):
     """
@@ -20,14 +24,10 @@ class BaseStorage(BaseModel):
 
     type: IndexType
     """Type of index used in storage"""
-    prefix: str
-    """Prefix for Redis keys"""
-    key_separator: str
-    """Separator between prefix and key value"""
+    index_schema: IndexSchema
+    """Index schema definition"""
     default_batch_size: int = 200
     """Default size for batch operations"""
-    default_write_concurrency: int = 20
-    """Default concurrency for async ops"""
 
     @staticmethod
     def _key(id: str, prefix: str, key_separator: str) -> str:
@@ -72,7 +72,9 @@ def _create_key(self, obj: Dict[str, Any], id_field: Optional[str] = None) -> st
                 raise ValueError(f"Key field {id_field} not found in record {obj}")
 
         return self._key(
-            key_value, prefix=self.prefix, key_separator=self.key_separator
+            key_value,
+            prefix=self.index_schema.index.prefix,
+            key_separator=self.index_schema.index.key_separator,
         )
 
     @staticmethod
@@ -92,35 +94,6 @@ def _preprocess(obj: Any, preprocess: Optional[Callable] = None) -> Dict[str, An
             obj = preprocess(obj)
         return obj
 
-    @staticmethod
-    async def _apreprocess(
-        obj: Any, preprocess: Optional[Callable] = None
-    ) -> Dict[str, Any]:
-        """Asynchronously apply a preprocessing function to the object if
-        provided.
-
-        Args:
-            preprocess (Optional[Callable], optional): Async function to
-                process the object.
-            obj (Any): Object to preprocess.
-
-        Returns:
-            Dict[str, Any]: Processed object as a dictionary.
-        """
-        # optionally async preprocess object
-        if preprocess:
-            obj = await preprocess(obj)
-        return obj
-
-    def _validate(self, obj: Dict[str, Any]):
-        """Validate the object before writing to Redis. This method should be
-        implemented by subclasses.
-
-        Args:
-            obj (Dict[str, Any]): The object to validate.
-        """
-        raise NotImplementedError
-
     @staticmethod
     def _set(client: Redis, key: str, obj: Dict[str, Any]):
         """Synchronously set the value in Redis for the given key.
@@ -169,6 +142,84 @@ async def _aget(client: AsyncRedis, key: str) -> Dict[str, Any]:
         """
         raise NotImplementedError
 
+    def validate(self, obj: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Validate an object against the schema using Pydantic-based validation.
+
+        Args:
+            obj: The object to validate
+
+        Returns:
+            Validated object with any type coercions applied
+
+        Raises:
+            ValueError: If validation fails
+        """
+        # Pass directly to validation function and let any errors propagate
+        return validate_object(self.index_schema, obj)
+
+    def _preprocess_and_validate_objects(
+        self,
+        objects: List[Any],
+        id_field: Optional[str] = None,
+        keys: Optional[Iterable[str]] = None,
+        preprocess: Optional[Callable] = None,
+        validate: bool = False,
+    ) -> List[Tuple[str, Dict[str, Any]]]:
+        """
+        Preprocess and validate a list of objects with fail-fast approach.
+
+        Args:
+            objects: List of objects to preprocess and validate
+            id_field: Field to use as the key
+            keys: Optional iterable of keys
+            preprocess: Optional preprocessing function
+            validate: Whether to validate against schema
+
+        Returns:
+            List of tuples (key, processed_obj) for valid objects
+
+        Raises:
+            ValueError: If any validation fails with object context
+        """
+        prepared_objects = []
+        keys_iterator = iter(keys) if keys else None
+
+        for i, obj in enumerate(objects):
+            try:
+                # Generate key
+                key = (
+                    next(keys_iterator)
+                    if keys_iterator
+                    else self._create_key(obj, id_field)
+                )
+
+                # Preprocess
+                processed_obj = self._preprocess(obj, preprocess)
+
+                # Basic type validation
+                if not isinstance(processed_obj, dict):
+                    raise ValueError(
+                        f"Object must be a dictionary, got {type(processed_obj).__name__}"
+                    )
+
+                # Schema validation if enabled
+                if validate:
+                    processed_obj = self.validate(processed_obj)
+
+                # Store valid object with its key for writing
+                prepared_objects.append((key, processed_obj))
+
+            except Exception as e:
+                # Enhance error message with object context
+                object_id = f"at index {i}"
+                if id_field and isinstance(obj, dict) and id_field in obj:
+                    object_id = f"with {id_field}={obj[id_field]}"
+
+                raise ValueError(f"Validation failed for object {object_id}: {str(e)}")
+
+        return prepared_objects
+
     def write(
         self,
         redis_client: Redis,
@@ -178,6 +229,7 @@ def write(
         ttl: Optional[int] = None,
         preprocess: Optional[Callable] = None,
         batch_size: Optional[int] = None,
+        validate: bool = False,
     ) -> List[str]:
         """Write a batch of objects to Redis as hash entries. This method
         returns a list of Redis keys written to the database.
@@ -195,44 +247,52 @@ def write(
                 objects before storage. Defaults to None.
             batch_size (Optional[int], optional): Number of objects to write
                 in a single Redis pipeline execution.
+            validate (bool, optional): Whether to validate objects against schema.
+                Defaults to False.
 
         Raises:
             ValueError: If the length of provided keys does not match the
-                length of objects.
+                length of objects, or if validation fails.
         """
         if keys and len(keys) != len(objects):  # type: ignore
             raise ValueError("Length of keys does not match the length of objects")
 
         if batch_size is None:
-            # Use default or calculate based on the input data
             batch_size = self.default_batch_size
 
-        keys_iterator = iter(keys) if keys else None
-        added_keys: List[str] = []
-
-        if objects:
-            with redis_client.pipeline(transaction=False) as pipe:
-                for i, obj in enumerate(objects, start=1):
-                    # Construct key, validate, and write
-                    key = (
-                        next(keys_iterator)
-                        if keys_iterator
-                        else self._create_key(obj, id_field)
-                    )
-                    obj = self._preprocess(obj, preprocess)
-                    self._validate(obj)
-                    self._set(pipe, key, obj)
-                    # Set TTL if provided
-                    if ttl:
-                        pipe.expire(key, ttl)
-                    # Execute mini batch
-                    if i % batch_size == 0:
-                        pipe.execute()
-                    added_keys.append(key)
-                # Clean up batches if needed
-                if i % batch_size != 0:
+        if not objects:
+            return []
+
+        # Pass 1: Preprocess and validate all objects
+        prepared_objects = self._preprocess_and_validate_objects(
+            objects,
+            id_field=id_field,
+            keys=keys,
+            preprocess=preprocess,
+            validate=validate,
+        )
+
+        # Pass 2: Write all valid objects in batches
+        added_keys = []
+
+        with redis_client.pipeline(transaction=False) as pipe:
+            for i, (key, obj) in enumerate(prepared_objects, start=1):
+                self._set(pipe, key, obj)
+
+                # Set TTL if provided
+                if ttl:
+                    pipe.expire(key, ttl)
+
+                added_keys.append(key)
+
+                # Execute in batches
+                if i % batch_size == 0:
                     pipe.execute()
 
+            # Execute any remaining commands
+            if len(prepared_objects) % batch_size != 0:
+                pipe.execute()
+
         return added_keys
 
     async def awrite(
@@ -242,12 +302,12 @@ async def awrite(
         id_field: Optional[str] = None,
         keys: Optional[Iterable[str]] = None,
         ttl: Optional[int] = None,
+        batch_size: Optional[int] = None,
         preprocess: Optional[Callable] = None,
-        concurrency: Optional[int] = None,
+        validate: bool = False,
     ) -> List[str]:
-        """Asynchronously write objects to Redis as hash entries with
-        concurrency control. The method returns a list of keys written to the
-        database.
+        """Asynchronously write objects to Redis as hash entries using pipeline batching.
+        The method returns a list of keys written to the database.
 
         Args:
             redis_client (AsyncRedis): An asynchronous Redis client used
@@ -259,47 +319,60 @@ async def awrite(
                 Must match the length of objects if provided.
             ttl (Optional[int], optional): Time-to-live in seconds for each key.
                 Defaults to None.
+            batch_size (Optional[int], optional): Number of objects to write
+                in a single Redis pipeline execution.
             preprocess (Optional[Callable], optional): An async function to
                 preprocess objects before storage. Defaults to None.
-            concurrency (Optional[int], optional): The maximum number of
-                concurrent write operations. Defaults to class's default
-                concurrency level.
+            validate (bool, optional): Whether to validate objects against schema.
+                Defaults to False.
 
         Returns:
             List[str]: List of Redis keys loaded to the databases.
 
         Raises:
             ValueError: If the length of provided keys does not match the
-                length of objects.
+                length of objects, or if validation fails.
         """
         if keys and len(keys) != len(objects):  # type: ignore
             raise ValueError("Length of keys does not match the length of objects")
 
-        if not concurrency:
-            concurrency = self.default_write_concurrency
+        if batch_size is None:
+            batch_size = self.default_batch_size
 
-        semaphore = asyncio.Semaphore(concurrency)
-        keys_iterator = iter(keys) if keys else None
+        if not objects:
+            return []
+
+        # Pass 1: Preprocess and validate all objects
+        prepared_objects = self._preprocess_and_validate_objects(
+            objects,
+            id_field=id_field,
+            keys=keys,
+            preprocess=preprocess,
+            validate=validate,
+        )
+
+        # Pass 2: Write all valid objects in batches using pipeline
+        added_keys = []
+
+        async with redis_client.pipeline(transaction=False) as pipe:
+            for i, (key, obj) in enumerate(prepared_objects, start=1):
+                await self._aset(pipe, key, obj)
 
-        async def _load(obj: Dict[str, Any], key: Optional[str] = None) -> str:
-            async with semaphore:
-                if key is None:
-                    key = self._create_key(obj, id_field)
-                obj = await self._apreprocess(obj, preprocess)
-                self._validate(obj)
-                await self._aset(redis_client, key, obj)
+                # Set TTL if provided
                 if ttl:
-                    await redis_client.expire(key, ttl)
-                return key
+                    await pipe.expire(key, ttl)
 
-        if keys_iterator:
-            tasks = [
-                asyncio.create_task(_load(obj, next(keys_iterator))) for obj in objects
-            ]
-        else:
-            tasks = [asyncio.create_task(_load(obj)) for obj in objects]
+                added_keys.append(key)
+
+                # Execute in batches
+                if i % batch_size == 0:
+                    await pipe.execute()
 
-        return await asyncio.gather(*tasks)
+            # Execute any remaining commands
+            if len(prepared_objects) % batch_size != 0:
+                await pipe.execute()
+
+        return added_keys
 
     def get(
         self, redis_client: Redis, keys: Iterable[str], batch_size: Optional[int] = None
@@ -325,9 +398,7 @@ def get(
             return []
 
         if batch_size is None:
-            batch_size = (
-                self.default_batch_size
-            )  # Use default or calculate based on the input data
+            batch_size = self.default_batch_size
 
         # Use a pipeline to batch the retrieval
         with redis_client.pipeline(transaction=False) as pipe:
@@ -345,39 +416,42 @@ async def aget(
         self,
         redis_client: AsyncRedis,
         keys: Iterable[str],
-        concurrency: Optional[int] = None,
+        batch_size: Optional[int] = None,
     ) -> List[Dict[str, Any]]:
-        """Asynchronously retrieve objects from Redis by keys, with concurrency
-        control.
+        """Asynchronously retrieve objects from Redis by keys.
 
         Args:
             redis_client (AsyncRedis): Asynchronous Redis client.
             keys (Iterable[str]): Keys to retrieve from Redis.
-            concurrency (Optional[int], optional): The number of concurrent
-                requests to make.
+            batch_size (Optional[int], optional): Number of objects to write
+                in a single Redis pipeline execution. Defaults to class's
+                default batch size.
 
         Returns:
             Dict[str, Any]: Dictionary with keys and their corresponding
                 objects.
         """
+        results: List = []
+
         if not isinstance(keys, Iterable):  # type: ignore
             raise TypeError("Keys must be an iterable of strings")
 
         if len(keys) == 0:  # type: ignore
             return []
 
-        if not concurrency:
-            concurrency = self.default_write_concurrency
-
-        semaphore = asyncio.Semaphore(concurrency)
+        if batch_size is None:
+            batch_size = self.default_batch_size
 
-        async def _get(key: str) -> Dict[str, Any]:
-            async with semaphore:
-                result = await self._aget(redis_client, key)
-                return result
+        # Use a pipeline to batch the retrieval
+        async with redis_client.pipeline(transaction=False) as pipe:
+            for i, key in enumerate(keys, start=1):
+                await self._aget(pipe, key)
+                if i % batch_size == 0:
+                    results.extend(await pipe.execute())
+            if i % batch_size != 0:
+                results.extend(await pipe.execute())
 
-        tasks = [asyncio.create_task(_get(key)) for key in keys]
-        results = await asyncio.gather(*tasks)
+        # Process results
         return convert_bytes(results)
 
 
@@ -392,19 +466,6 @@ class HashStorage(BaseStorage):
     type: IndexType = IndexType.HASH
     """Hash data type for the index"""
 
-    def _validate(self, obj: Dict[str, Any]):
-        """Validate that the given object is a dictionary, suitable for storage
-        as a Redis hash.
-
-        Args:
-            obj (Dict[str, Any]): The object to validate.
-
-        Raises:
-            TypeError: If the object is not a dictionary.
-        """
-        if not isinstance(obj, dict):
-            raise TypeError("Object must be a dictionary.")
-
     @staticmethod
     def _set(client: Redis, key: str, obj: Dict[str, Any]):
         """Synchronously set a hash value in Redis for the given key.
@@ -465,19 +526,6 @@ class JsonStorage(BaseStorage):
     type: IndexType = IndexType.JSON
     """JSON data type for the index"""
 
-    def _validate(self, obj: Dict[str, Any]):
-        """Validate that the given object is a dictionary, suitable for JSON
-        serialization.
-
-        Args:
-            obj (Dict[str, Any]): The object to validate.
-
-        Raises:
-            TypeError: If the object is not a dictionary.
-        """
-        if not isinstance(obj, dict):
-            raise TypeError("Object must be a dictionary.")
-
     @staticmethod
     def _set(client: Redis, key: str, obj: Dict[str, Any]):
         """Synchronously set a JSON obj in Redis for the given key.
diff --git a/redisvl/schema/__init__.py b/redisvl/schema/__init__.py
index 24f6b821..c835ccd5 100644
--- a/redisvl/schema/__init__.py
+++ b/redisvl/schema/__init__.py
@@ -1,3 +1,35 @@
+from redisvl.schema.fields import (
+    BaseField,
+    FieldTypes,
+    FlatVectorField,
+    GeoField,
+    HNSWVectorField,
+    NumericField,
+    TagField,
+    TextField,
+    VectorDataType,
+    VectorDistanceMetric,
+    VectorIndexAlgorithm,
+)
 from redisvl.schema.schema import IndexInfo, IndexSchema, StorageType
 
-__all__ = ["StorageType", "IndexSchema", "IndexInfo"]
+# Expose validation functionality
+from redisvl.schema.validation import validate_object
+
+__all__ = [
+    "IndexSchema",
+    "IndexInfo",
+    "StorageType",
+    "FieldTypes",
+    "VectorDistanceMetric",
+    "VectorDataType",
+    "VectorIndexAlgorithm",
+    "BaseField",
+    "TextField",
+    "TagField",
+    "NumericField",
+    "GeoField",
+    "FlatVectorField",
+    "HNSWVectorField",
+    "validate_object",
+]
diff --git a/redisvl/schema/fields.py b/redisvl/schema/fields.py
index 17714480..b77188d7 100644
--- a/redisvl/schema/fields.py
+++ b/redisvl/schema/fields.py
@@ -164,12 +164,14 @@ class BaseField(BaseModel):
     """Specified field attributes"""
 
     def _handle_names(self) -> Tuple[str, Optional[str]]:
+        """Helper to handle field naming with path support"""
         if self.path:
             return self.path, self.name
         return self.name, None
 
     def as_redis_field(self) -> RedisField:
-        raise NotImplementedError
+        """Convert schema field to Redis Field object"""
+        raise NotImplementedError("Must be implemented by field subclasses")
 
 
 class TextField(BaseField):
diff --git a/redisvl/schema/schema.py b/redisvl/schema/schema.py
index 33dfd9c7..90617d18 100644
--- a/redisvl/schema/schema.py
+++ b/redisvl/schema/schema.py
@@ -8,6 +8,7 @@
 from redis.commands.search.field import Field as RedisField
 
 from redisvl.schema.fields import BaseField, FieldFactory
+from redisvl.schema.type_utils import TypeInferrer
 from redisvl.utils.log import get_logger
 from redisvl.utils.utils import model_to_dict
 
@@ -455,64 +456,3 @@ def to_yaml(self, file_path: str, overwrite: bool = True) -> None:
         with open(fp, "w") as f:
             yaml_data = self.to_dict()
             yaml.dump(yaml_data, f, sort_keys=False)
-
-
-class TypeInferrer:
-    """Infers the type of a field based on its value."""
-
-    GEO_PATTERN = re.compile(
-        r"^\s*[-+]?([1-8]?\d(\.\d+)?|90(\.0+)?),\s*[-+]?(180(\.0+)?|((1[0-7]\d)|([1-9]?\d))(\.\d+)?)\s*$"
-    )
-
-    TYPE_METHOD_MAP = {
-        "numeric": "_is_numeric",
-        "geo": "_is_geographic",
-        "tag": "_is_tag",
-        "text": "_is_text",
-    }
-
-    @classmethod
-    def infer(cls, value: Any) -> str:
-        """Infers the field type for a given value.
-
-        Args:
-            value: The value to infer the type of.
-
-        Returns:
-            The inferred field type as a string.
-
-        Raises:
-            ValueError: If the type cannot be inferred.
-        """
-        for type_name, method_name in cls.TYPE_METHOD_MAP.items():
-            if getattr(cls, method_name)(value):
-                return type_name
-        raise ValueError(f"Unable to infer type for value: {value}")
-
-    @classmethod
-    def _is_numeric(cls, value: Any) -> bool:
-        """Check if the value is numeric."""
-        if not isinstance(value, (int, float, str)):
-            return False
-        try:
-            float(value)
-            return True
-        except (ValueError, TypeError):
-            return False
-
-    @classmethod
-    def _is_tag(cls, value: Any) -> bool:
-        """Check if the value is a tag."""
-        return isinstance(value, (list, set, tuple)) and all(
-            isinstance(v, str) for v in value
-        )
-
-    @classmethod
-    def _is_text(cls, value: Any) -> bool:
-        """Check if the value is text."""
-        return isinstance(value, str)
-
-    @classmethod
-    def _is_geographic(cls, value: Any) -> bool:
-        """Check if the value is a geographic coordinate."""
-        return isinstance(value, str) and cls.GEO_PATTERN.match(value) is not None
diff --git a/redisvl/schema/type_utils.py b/redisvl/schema/type_utils.py
new file mode 100644
index 00000000..83329961
--- /dev/null
+++ b/redisvl/schema/type_utils.py
@@ -0,0 +1,63 @@
+import re
+from typing import Any
+
+
+class TypeInferrer:
+    """Infers the type of a field based on its value."""
+
+    GEO_PATTERN = re.compile(
+        r"^\s*[-+]?([1-8]?\d(\.\d+)?|90(\.0+)?),\s*[-+]?(180(\.0+)?|((1[0-7]\d)|([1-9]?\d))(\.\d+)?)\s*$"
+    )
+
+    TYPE_METHOD_MAP = {
+        "numeric": "_is_numeric",
+        "geo": "_is_geographic",
+        "tag": "_is_tag",
+        "text": "_is_text",
+    }
+
+    @classmethod
+    def infer(cls, value: Any) -> str:
+        """Infers the field type for a given value.
+
+        Args:
+            value: The value to infer the type of.
+
+        Returns:
+            The inferred field type as a string.
+
+        Raises:
+            ValueError: If the type cannot be inferred.
+        """
+        for type_name, method_name in cls.TYPE_METHOD_MAP.items():
+            if getattr(cls, method_name)(value):
+                return type_name
+        raise ValueError(f"Unable to infer type for value: {value}")
+
+    @classmethod
+    def _is_numeric(cls, value: Any) -> bool:
+        """Check if the value is numeric."""
+        if not isinstance(value, (int, float, str)):
+            return False
+        try:
+            float(value)
+            return True
+        except (ValueError, TypeError):
+            return False
+
+    @classmethod
+    def _is_tag(cls, value: Any) -> bool:
+        """Check if the value is a tag."""
+        return isinstance(value, (list, set, tuple)) and all(
+            isinstance(v, str) for v in value
+        )
+
+    @classmethod
+    def _is_text(cls, value: Any) -> bool:
+        """Check if the value is text."""
+        return isinstance(value, str)
+
+    @classmethod
+    def _is_geographic(cls, value: Any) -> bool:
+        """Check if the value is a geographic coordinate."""
+        return isinstance(value, str) and cls.GEO_PATTERN.match(value) is not None
diff --git a/redisvl/schema/validation.py b/redisvl/schema/validation.py
new file mode 100644
index 00000000..51fcf445
--- /dev/null
+++ b/redisvl/schema/validation.py
@@ -0,0 +1,290 @@
+"""
+RedisVL Schema Validation Module
+
+This module provides utilities for validating data against RedisVL schemas
+using dynamically generated Pydantic models.
+"""
+
+import json
+import re
+import warnings
+from typing import Any, Dict, List, Optional, Type, Union
+
+from pydantic import BaseModel, Field, ValidationError, create_model, field_validator
+
+from redisvl.schema import IndexSchema
+from redisvl.schema.fields import BaseField, FieldTypes, VectorDataType
+from redisvl.schema.schema import StorageType
+from redisvl.schema.type_utils import TypeInferrer
+from redisvl.utils.log import get_logger
+
+logger = get_logger(__name__)
+
+
+class SchemaModelGenerator:
+    """
+    Generates and caches Pydantic models based on Redis schema definitions.
+
+    This class handles the conversion of RedisVL IndexSchema objects into
+    Pydantic models with appropriate field types and validators.
+    """
+
+    _model_cache: Dict[str, Type[BaseModel]] = {}
+
+    @classmethod
+    def get_model_for_schema(cls, schema: IndexSchema) -> Type[BaseModel]:
+        """
+        Get or create a Pydantic model for a schema.
+
+        Args:
+            schema: The IndexSchema to convert to a Pydantic model
+
+        Returns:
+            A Pydantic model class that can validate data against the schema
+        """
+        # Use schema identifier as cache key
+        cache_key = schema.index.name
+
+        if cache_key not in cls._model_cache:
+            cls._model_cache[cache_key] = cls._create_model(schema)
+
+        return cls._model_cache[cache_key]
+
+    @classmethod
+    def _map_field_to_pydantic_type(
+        cls, field: BaseField, storage_type: StorageType
+    ) -> Type:
+        """
+        Map Redis field types to appropriate Pydantic types.
+
+        Args:
+            field: The Redis field definition
+            storage_type: The storage type (HASH or JSON)
+
+        Returns:
+            The Pydantic field type
+
+        Raises:
+            ValueError: If the field type is not supported
+        """
+        if field.type == FieldTypes.TEXT:
+            return str
+        elif field.type == FieldTypes.TAG:
+            return str
+        elif field.type == FieldTypes.NUMERIC:
+            return Union[int, float]
+        elif field.type == FieldTypes.GEO:
+            return str
+        elif field.type == FieldTypes.VECTOR:
+            # For JSON storage, vectors are always lists
+            if storage_type == StorageType.JSON:
+                return List[Union[int, float]]
+            else:
+                return bytes
+
+        # If we get here, the field type is not supported
+        raise ValueError(f"Unsupported field type: {field.type}")
+
+    @classmethod
+    def _create_model(cls, schema: IndexSchema) -> Type[BaseModel]:
+        """
+        Create a Pydantic model from schema definition.
+
+        Args:
+            schema: The IndexSchema to convert
+
+        Returns:
+            A Pydantic model class with appropriate fields and validators
+        """
+        field_definitions = {}
+        validators = {}
+
+        # Get storage type from schema
+        storage_type = schema.index.storage_type
+
+        # Create field definitions dictionary for create_model
+        for field_name, field in schema.fields.items():
+            field_type = cls._map_field_to_pydantic_type(field, storage_type)
+
+            # Create field definition (all fields are optional in the model)
+            # this handles the cases where objects have missing fields (supported behavior)
+            field_definitions[field_name] = (
+                Optional[field_type],  # Make fields optional
+                Field(
+                    default=None,
+                    json_schema_extra={
+                        "field_type": field.type,
+                    },
+                ),
+            )
+
+            # Add field-specific validator info to our validator registry
+            if field.type == FieldTypes.GEO:
+                validators[field_name] = {"type": "geo"}
+
+            elif field.type == FieldTypes.VECTOR:
+                validators[field_name] = {
+                    "type": "vector",
+                    "dims": field.attrs.dims,
+                    "datatype": field.attrs.datatype,
+                    "storage_type": storage_type,
+                }
+
+        # First create the model class with field definitions
+        model_name = f"{schema.index.name}__PydanticModel"
+        model_class = create_model(model_name, **field_definitions)
+
+        # Then add validators to the model class
+        for field_name, validator_info in validators.items():
+            if validator_info["type"] == "geo":
+                # Add geo validator
+                validator = cls._create_geo_validator(field_name)
+                setattr(model_class, f"validate_{field_name}", validator)
+
+            elif validator_info["type"] == "vector":
+                # Add vector validator
+                validator = cls._create_vector_validator(
+                    field_name,
+                    validator_info["dims"],
+                    validator_info["datatype"],
+                    validator_info["storage_type"],
+                )
+                setattr(model_class, f"validate_{field_name}", validator)
+
+        return model_class
+
+    @staticmethod
+    def _create_geo_validator(field_name: str):
+        """
+        Create a validator for geo fields.
+
+        Args:
+            field_name: Name of the field to validate
+
+        Returns:
+            A validator function that can be attached to a Pydantic model
+        """
+
+        # Create the validator function
+        def validate_geo_field(cls, value):
+            # Skip validation for None values
+            if value is not None:
+                # Validate against pattern
+                if not re.match(TypeInferrer.GEO_PATTERN.pattern, value):
+                    raise ValueError(
+                        f"Geo field '{field_name}' value '{value}' is not a valid 'lat,lon' format"
+                    )
+            return value
+
+        # Add the field_validator decorator
+        return field_validator(field_name, mode="after")(validate_geo_field)
+
+    @staticmethod
+    def _create_vector_validator(
+        field_name: str, dims: int, datatype: VectorDataType, storage_type: StorageType
+    ):
+        """
+        Create a validator for vector fields.
+
+        Args:
+            field_name: Name of the field to validate
+            dims: Expected dimensions of the vector
+            datatype: Expected datatype of the vector elements
+            storage_type: Type of storage (HASH or JSON)
+
+        Returns:
+            A validator function that can be attached to a Pydantic model
+        """
+
+        # Create the validator function
+        def validate_vector_field(cls, value):
+            # Skip validation for None values
+            if value is not None:
+
+                # Handle list representation
+                if isinstance(value, list):
+
+                    # Validate dimensions
+                    if len(value) != dims:
+                        raise ValueError(
+                            f"Vector field '{field_name}' must have {dims} dimensions, got {len(value)}"
+                        )
+
+                    # Validate data types
+                    datatype_str = str(datatype).upper()
+
+                    # Integer-based datatypes
+                    if datatype_str in ("INT8", "UINT8"):
+                        # Check type
+                        if not all(isinstance(v, int) for v in value):
+                            raise ValueError(
+                                f"Vector field '{field_name}' must contain only integer values for {datatype_str}"
+                            )
+
+            return value
+
+        return validate_vector_field
+
+
+def extract_from_json_path(obj: Dict[str, Any], path: str) -> Any:
+    """
+    Extract a value from a nested JSON object using a path.
+
+    Args:
+        obj: The object to extract values from
+        path: JSONPath-style path (e.g., $.field.subfield)
+
+    Returns:
+        The extracted value or None if not found
+    """
+    # Handle JSONPath syntax (e.g., $.field.subfield)
+    if path.startswith("$."):
+        path_parts = path[2:].split(".")
+    else:
+        path_parts = path.split(".")
+
+    current = obj
+    for part in path_parts:
+        if isinstance(current, dict) and part in current:
+            current = current[part]
+        else:
+            return None
+
+    return current
+
+
+def validate_object(schema: IndexSchema, obj: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Validate an object against a schema.
+
+    Args:
+        schema: The IndexSchema to validate against
+        obj: The object to validate
+
+    Returns:
+        Validated object with any type coercions applied
+
+    Raises:
+        ValueError: If validation fails with enhanced error message
+    """
+    # Get Pydantic model for this schema
+    model_class = SchemaModelGenerator.get_model_for_schema(schema)
+
+    # Prepare object for validation
+    # Handle nested JSON if needed
+    if schema.index.storage_type == StorageType.JSON:
+        # Extract values from nested paths
+        flat_obj = {}
+        for field_name, field in schema.fields.items():
+            if field.path:
+                value = extract_from_json_path(obj, field.path)
+                if value is not None:
+                    flat_obj[field_name] = value
+            elif field_name in obj:
+                flat_obj[field_name] = obj[field_name]
+    else:
+        flat_obj = obj
+
+    # Validate against model
+    validated = model_class.model_validate(flat_obj)
+    return validated.model_dump(exclude_none=True)
diff --git a/tests/integration/test_async_search_index.py b/tests/integration/test_async_search_index.py
index edc6c01a..ea122d5d 100644
--- a/tests/integration/test_async_search_index.py
+++ b/tests/integration/test_async_search_index.py
@@ -284,7 +284,7 @@ async def preprocess(record):
     async def bad_preprocess(record):
         return 1
 
-    with pytest.raises(TypeError):
+    with pytest.raises(ValueError):
         await async_index.load(data, id_field="id", preprocess=bad_preprocess)
 
 
diff --git a/tests/integration/test_flow_async.py b/tests/integration/test_flow_async.py
index a368f677..c727fd28 100644
--- a/tests/integration/test_flow_async.py
+++ b/tests/integration/test_flow_async.py
@@ -52,7 +52,7 @@ async def test_simple(async_client, schema, sample_data):
     await index.create(overwrite=True, drop=True)
 
     # Prepare and load the data based on storage type
-    async def hash_preprocess(item: dict) -> dict:
+    def hash_preprocess(item: dict) -> dict:
         return {
             **item,
             "user_embedding": array_to_buffer(item["user_embedding"], "float32"),
diff --git a/tests/integration/test_search_index.py b/tests/integration/test_search_index.py
index 800f6a06..02b6d5e4 100644
--- a/tests/integration/test_search_index.py
+++ b/tests/integration/test_search_index.py
@@ -268,7 +268,7 @@ def preprocess(record):
     def bad_preprocess(record):
         return 1
 
-    with pytest.raises(TypeError):
+    with pytest.raises(ValueError):
         index.load(data, id_field="id", preprocess=bad_preprocess)
 
 
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
new file mode 100644
index 00000000..91a558f2
--- /dev/null
+++ b/tests/unit/conftest.py
@@ -0,0 +1,183 @@
+"""
+Common test fixtures and utilities for RedisVL validation tests.
+"""
+
+from typing import Any, Dict
+
+import pytest
+
+from redisvl.schema import IndexSchema
+from redisvl.schema.fields import VectorDataType, VectorDistanceMetric
+
+
+@pytest.fixture
+def comprehensive_schema():
+    """Create a comprehensive schema with all field types for testing."""
+    return IndexSchema.from_dict(
+        {
+            "index": {
+                "name": "test-index",
+                "prefix": "test",
+                "key_separator": ":",
+                "storage_type": "hash",
+            },
+            "fields": [
+                {"name": "id", "type": "tag"},
+                {"name": "title", "type": "text"},
+                {"name": "rating", "type": "numeric"},
+                {"name": "location", "type": "geo"},
+                {
+                    "name": "embedding",
+                    "type": "vector",
+                    "attrs": {
+                        "algorithm": "flat",
+                        "dims": 4,
+                        "datatype": "float32",
+                        "distance_metric": "cosine",
+                    },
+                },
+                {
+                    "name": "int_vector",
+                    "type": "vector",
+                    "attrs": {
+                        "algorithm": "flat",
+                        "dims": 3,
+                        "datatype": "int8",
+                        "distance_metric": "l2",
+                    },
+                },
+                {
+                    "name": "hnsw_vector",
+                    "type": "vector",
+                    "attrs": {
+                        "algorithm": "hnsw",
+                        "dims": 3,
+                        "distance_metric": "cosine",
+                        "datatype": "float32",
+                        "m": 16,
+                        "ef_construction": 200,
+                        "ef_runtime": 10,
+                        "epsilon": 0.01,
+                    },
+                },
+            ],
+        }
+    )
+
+
+@pytest.fixture
+def json_schema():
+    """Create a schema with JSON storage and path fields."""
+    return IndexSchema.from_dict(
+        {
+            "index": {
+                "name": "test-json-index",
+                "prefix": "test",
+                "key_separator": ":",
+                "storage_type": "json",
+            },
+            "fields": [
+                {"name": "id", "type": "tag", "path": "$.id"},
+                {"name": "user", "type": "tag", "path": "$.metadata.user"},
+                {"name": "title", "type": "text", "path": "$.content.title"},
+                {"name": "rating", "type": "numeric", "path": "$.metadata.rating"},
+                {
+                    "name": "embedding",
+                    "type": "vector",
+                    "path": "$.content.embedding",
+                    "attrs": {
+                        "algorithm": "flat",
+                        "dims": 4,
+                        "datatype": "float32",
+                        "distance_metric": "cosine",
+                    },
+                },
+            ],
+        }
+    )
+
+
+@pytest.fixture
+def valid_data():
+    """Sample valid data for testing validation."""
+    return {
+        "id": "doc1",
+        "title": "Test Document",
+        "rating": 4.5,
+        "location": "37.7749,-122.4194",
+        "embedding": [0.1, 0.2, 0.3, 0.4],
+        "int_vector": [1, 2, 3],
+        "hnsw_vector": [0.1, 0.2, 0.3],
+    }
+
+
+@pytest.fixture
+def valid_nested_data():
+    """Sample valid nested data for testing JSON path validation."""
+    return {
+        "id": "doc1",
+        "metadata": {"user": "user123", "rating": 4.5},
+        "content": {"title": "Test Document", "embedding": [0.1, 0.2, 0.3, 0.4]},
+    }
+
+
+@pytest.fixture
+def invalid_data_cases():
+    """
+    Test cases for invalid data.
+    Each case contains:
+    - field: name of the field
+    - value: invalid value to test
+    - error_text: text that should appear in error message
+    """
+    return [
+        # Text field errors
+        {"field": "title", "value": 123, "error_text": "must be a string"},
+        # Numeric field errors
+        {"field": "rating", "value": "high", "error_text": "must be a number"},
+        {"field": "rating", "value": "123.45", "error_text": "must be a number"},
+        # Tag field errors
+        {"field": "id", "value": 123, "error_text": "must be a string"},
+        # Geo field errors
+        {
+            "field": "location",
+            "value": "invalid_geo",
+            "error_text": "not a valid 'lat,lon' format",
+        },
+        {
+            "field": "location",
+            "value": "1000,-1000",
+            "error_text": "not a valid 'lat,lon' format",
+        },
+        # Vector field errors - float32
+        {"field": "embedding", "value": [0.1, 0.2, 0.3], "error_text": "dimensions"},
+        {
+            "field": "embedding",
+            "value": [0.1, "string", 0.3, 0.4],
+            "error_text": "numeric values",
+        },
+        {
+            "field": "embedding",
+            "value": "not_a_vector",
+            "error_text": "must be a list or bytes",
+        },
+        # Vector field errors - int8
+        {
+            "field": "int_vector",
+            "value": [0.1, 0.2, 0.3],
+            "error_text": "integer values",
+        },
+        {"field": "int_vector", "value": [1, 2], "error_text": "dimensions"},
+        {
+            "field": "int_vector",
+            "value": [1000, 2000, 3000],
+            "error_text": "INT8 values must be between",
+        },
+        # HNSW Vector field errors
+        {"field": "hnsw_vector", "value": [0.1, 0.2], "error_text": "dimensions"},
+        {
+            "field": "hnsw_vector",
+            "value": ["a", "b", "c"],
+            "error_text": "numeric values",
+        },
+    ]
diff --git a/tests/unit/test_edge_cases.py b/tests/unit/test_edge_cases.py
new file mode 100644
index 00000000..3646cc1f
--- /dev/null
+++ b/tests/unit/test_edge_cases.py
@@ -0,0 +1,451 @@
+"""
+Tests for edge cases in the RedisVL validation module.
+
+This module tests edge cases in the validation system that might not be
+covered in the main test files, including:
+1. Performance and caching behavior
+2. Handling of unusual data types
+3. Extreme values
+4. Boundary conditions
+"""
+
+import time
+from typing import Any, Dict, List
+
+import pytest
+
+from redisvl.index.storage import BaseStorage
+from redisvl.schema.fields import Field, FieldTypes, VectorDataType
+from redisvl.schema.index import Index, IndexSchema
+from redisvl.schema.validation import SchemaModelGenerator, validate_object
+
+
+class TestSchemaModelCaching:
+    """Tests for model caching behavior."""
+
+    def test_caching_improves_performance(self):
+        """Test that caching improves model generation performance."""
+        # Create a complex schema
+        fields = {
+            f"field_{i}": Field(name=f"field_{i}", type=FieldTypes.TEXT)
+            for i in range(50)  # 50 fields should be enough to measure performance
+        }
+
+        schema = IndexSchema(
+            index=Index(name="performance_test", prefix="doc"), fields=fields
+        )
+
+        # First generation (not cached)
+        start_time = time.time()
+        model1 = SchemaModelGenerator.get_model_for_schema(schema)
+        first_time = time.time() - start_time
+
+        # Second generation (should be cached)
+        start_time = time.time()
+        model2 = SchemaModelGenerator.get_model_for_schema(schema)
+        second_time = time.time() - start_time
+
+        # Verify second generation is faster
+        assert second_time < first_time
+
+        # Should be much faster (usually at least 10x)
+        assert second_time < (first_time * 0.5)
+
+        # Verify same model instance
+        assert model1 is model2
+
+    def test_different_schemas_get_different_models(self):
+        """Test that different schemas get different model instances."""
+        # Create two different schemas
+        schema1 = IndexSchema(
+            index=Index(name="test1", prefix="doc1"),
+            fields={"field1": Field(name="field1", type=FieldTypes.TEXT)},
+        )
+
+        schema2 = IndexSchema(
+            index=Index(name="test2", prefix="doc2"),
+            fields={"field1": Field(name="field1", type=FieldTypes.TEXT)},
+        )
+
+        # Get models
+        model1 = SchemaModelGenerator.get_model_for_schema(schema1)
+        model2 = SchemaModelGenerator.get_model_for_schema(schema2)
+
+        # Verify different model instances
+        assert model1 is not model2
+        assert model1.__name__ != model2.__name__
+
+
+class TestUnusualDataTypes:
+    """Tests for handling unusual data types during validation."""
+
+    @pytest.fixture
+    def basic_schema(self):
+        """Create a basic schema for testing."""
+        return IndexSchema(
+            index=Index(name="test", prefix="doc"),
+            fields={
+                "text_field": Field(name="text_field", type=FieldTypes.TEXT),
+                "tag_field": Field(name="tag_field", type=FieldTypes.TAG),
+                "num_field": Field(name="num_field", type=FieldTypes.NUMERIC),
+            },
+        )
+
+    def test_none_values(self, basic_schema):
+        """Test handling of None values."""
+        # Data with None values
+        data = {"text_field": None, "tag_field": None, "num_field": None}
+
+        # Validate
+        result = validate_object(basic_schema, data)
+
+        # None values should be excluded
+        assert len(result) == 0
+
+    def test_empty_string_values(self, basic_schema):
+        """Test handling of empty strings."""
+        # Data with empty strings
+        data = {"text_field": "", "tag_field": "", "num_field": 0}
+
+        # Validate
+        result = validate_object(basic_schema, data)
+
+        # Empty strings are valid for text and tag
+        assert result["text_field"] == ""
+        assert result["tag_field"] == ""
+        assert result["num_field"] == 0
+
+    def test_boolean_values(self, basic_schema):
+        """Test handling of boolean values."""
+        # Data with booleans
+        data = {"text_field": True, "tag_field": False, "num_field": True}
+
+        # Booleans aren't valid for text or tag
+        with pytest.raises(ValueError) as exc_info:
+            validate_object(basic_schema, data)
+
+        assert "text_field" in str(exc_info.value)
+
+        # Create new schema with only numeric
+        num_schema = IndexSchema(
+            index=Index(name="test", prefix="doc"),
+            fields={"num_field": Field(name="num_field", type=FieldTypes.NUMERIC)},
+        )
+
+        # Validate with only the numeric field
+        result = validate_object(num_schema, {"num_field": True})
+
+        # Python converts True to 1, False to 0
+        assert result["num_field"] == 1
+
+    def test_list_for_text(self, basic_schema):
+        """Test handling lists for text fields."""
+        # Data with list for text
+        data = {"text_field": ["item1", "item2"]}
+
+        # Lists aren't valid for text
+        with pytest.raises(ValueError) as exc_info:
+            validate_object(basic_schema, data)
+
+        assert "text_field" in str(exc_info.value)
+
+
+class TestVectorEdgeCases:
+    """Tests for edge cases with vector fields."""
+
+    @pytest.fixture
+    def vector_schema(self):
+        """Create a schema with vector fields for testing."""
+        return IndexSchema(
+            index=Index(name="test_vectors", prefix="vec"),
+            fields={
+                "float_vec": Field(
+                    name="float_vec",
+                    type=FieldTypes.VECTOR,
+                    attrs={"dims": 3, "datatype": VectorDataType.FLOAT32},
+                ),
+                "int_vec": Field(
+                    name="int_vec",
+                    type=FieldTypes.VECTOR,
+                    attrs={"dims": 3, "datatype": VectorDataType.INT8},
+                ),
+            },
+        )
+
+    def test_large_vectors(self, vector_schema):
+        """Test validation of very large vectors."""
+        # Create a large vector (1000 dimensions)
+        large_schema = IndexSchema(
+            index=Index(name="large_vec", prefix="vec"),
+            fields={
+                "large_vec": Field(
+                    name="large_vec",
+                    type=FieldTypes.VECTOR,
+                    attrs={"dims": 1000, "datatype": VectorDataType.FLOAT32},
+                )
+            },
+        )
+
+        # Valid large vector
+        large_vector = {"large_vec": [0.1] * 1000}
+        result = validate_object(large_schema, large_vector)
+        assert len(result["large_vec"]) == 1000
+
+        # Invalid dimensions
+        invalid_dims = {"large_vec": [0.1] * 999}
+        with pytest.raises(ValueError) as exc_info:
+            validate_object(large_schema, invalid_dims)
+        assert "dimensions" in str(exc_info.value)
+
+    def test_mixed_vector_types(self, vector_schema):
+        """Test validation of vectors with mixed element types."""
+        # Float vector with mixed types
+        mixed_float = {"float_vec": [1, 2.5, "3"]}
+        with pytest.raises(ValueError) as exc_info:
+            validate_object(vector_schema, mixed_float)
+        assert "float_vec" in str(exc_info.value)
+
+        # Int vector with mixed types
+        mixed_int = {"int_vec": [1, 2.5, 3]}
+        with pytest.raises(ValueError) as exc_info:
+            validate_object(vector_schema, mixed_int)
+        assert "int_vec" in str(exc_info.value)
+
+    def test_empty_vector(self, vector_schema):
+        """Test validation of empty vectors."""
+        # Empty float vector
+        empty_vec = {"float_vec": []}
+        with pytest.raises(ValueError) as exc_info:
+            validate_object(vector_schema, empty_vec)
+        assert "float_vec" in str(exc_info.value)
+        assert "dimensions" in str(exc_info.value)
+
+    def test_vector_int_range(self, vector_schema):
+        """Test validation of integer vectors with values outside allowed range."""
+        # INT8 vector with values outside range
+        out_of_range = {"int_vec": [100, 200, 300]}  # Valid int, but outside INT8 range
+        with pytest.raises(ValueError) as exc_info:
+            validate_object(vector_schema, out_of_range)
+        assert "int_vec" in str(exc_info.value)
+        assert "must be between" in str(exc_info.value)
+
+        # INT8 vector with valid range
+        valid_range = {"int_vec": [-128, 0, 127]}
+        result = validate_object(vector_schema, valid_range)
+        assert result["int_vec"] == [-128, 0, 127]
+
+
+class TestGeoEdgeCases:
+    """Tests for edge cases with geo fields."""
+
+    @pytest.fixture
+    def geo_schema(self):
+        """Create a schema with geo fields for testing."""
+        return IndexSchema(
+            index=Index(name="test_geo", prefix="geo"),
+            fields={"location": Field(name="location", type=FieldTypes.GEO)},
+        )
+
+    def test_geo_boundary_values(self, geo_schema):
+        """Test validation of geo fields with boundary values."""
+        # Valid boundary values
+        valid_boundaries = [
+            {"location": "90,180"},  # Max lat, max lon
+            {"location": "-90,-180"},  # Min lat, min lon
+            {"location": "0,0"},  # Zero point
+            {"location": "90,0"},  # North pole
+            {"location": "-90,0"},  # South pole
+        ]
+
+        for data in valid_boundaries:
+            result = validate_object(geo_schema, data)
+            assert result["location"] == data["location"]
+
+    def test_geo_invalid_boundary_values(self, geo_schema):
+        """Test validation of geo fields with invalid boundary values."""
+        # Invalid boundary values
+        invalid_boundaries = [
+            {"location": "91,0"},  # Lat > 90
+            {"location": "-91,0"},  # Lat < -90
+            {"location": "0,181"},  # Lon > 180
+            {"location": "0,-181"},  # Lon < -180
+            {"location": "90.1,0"},  # Lat > 90 (decimal)
+            {"location": "0,180.1"},  # Lon > 180 (decimal)
+        ]
+
+        for data in invalid_boundaries:
+            with pytest.raises(ValueError) as exc_info:
+                validate_object(geo_schema, data)
+            assert "location" in str(exc_info.value)
+            assert "not a valid" in str(exc_info.value)
+
+    def test_geo_formats(self, geo_schema):
+        """Test validation of geo fields with different formats."""
+        # Various valid formats
+        valid_formats = [
+            {"location": "37.7749,-122.4194"},  # Decimal degrees
+            {"location": "-37.7749,122.4194"},  # Negative latitude
+            {"location": "37.7749,122.4194"},  # Positive longitude
+            {"location": "0.0000,0.0000"},  # Zeros with decimal
+            {"location": "37,-122"},  # Integer degrees
+        ]
+
+        for data in valid_formats:
+            result = validate_object(geo_schema, data)
+            assert result["location"] == data["location"]
+
+        # Invalid formats
+        invalid_formats = [
+            {"location": "37.7749"},  # Missing longitude
+            {"location": "37.7749,"},  # Missing longitude value
+            {"location": ",122.4194"},  # Missing latitude value
+            {"location": "37.7749:122.4194"},  # Wrong separator
+            {"location": "37.7749, 122.4194"},  # Space after separator
+            {"location": "North,South"},  # Non-numeric values
+        ]
+
+        for data in invalid_formats:
+            with pytest.raises(ValueError) as exc_info:
+                validate_object(geo_schema, data)
+            assert "location" in str(exc_info.value)
+
+
+class TestNestedJsonEdgeCases:
+    """Tests for edge cases with nested JSON."""
+
+    @pytest.fixture
+    def nested_schema(self):
+        """Create a schema with JSON paths for testing."""
+        fields = {
+            "id": Field(name="id", type=FieldTypes.TAG),
+            "title": Field(name="title", type=FieldTypes.TEXT, path="$.content.title"),
+            "rating": Field(
+                name="rating", type=FieldTypes.NUMERIC, path="$.metadata.rating"
+            ),
+            "deeply_nested": Field(
+                name="deeply_nested",
+                type=FieldTypes.TEXT,
+                path="$.level1.level2.level3.level4.value",
+            ),
+        }
+
+        return IndexSchema(
+            index=Index(name="test_nested", prefix="nested"), fields=fields
+        )
+
+    def test_very_deeply_nested_json(self, nested_schema):
+        """Test validation with very deeply nested JSON."""
+        # Create a deeply nested structure
+        deeply_nested = {
+            "id": "doc1",
+            "level1": {
+                "level2": {"level3": {"level4": {"value": "deeply nested value"}}}
+            },
+        }
+
+        # Validate
+        result = validate_object(nested_schema, deeply_nested)
+        assert result["id"] == "doc1"
+        assert result["deeply_nested"] == "deeply nested value"
+
+    def test_partial_path_missing(self, nested_schema):
+        """Test validation when part of a JSON path is missing."""
+        # Create object with partial path missing
+        partial_missing = {
+            "id": "doc1",
+            "level1": {
+                "level2": {
+                    # level3 missing
+                }
+            },
+        }
+
+        # Validate - should ignore missing path
+        result = validate_object(nested_schema, partial_missing)
+        assert result["id"] == "doc1"
+        assert "deeply_nested" not in result
+
+    def test_nested_arrays(self):
+        """Test validation with nested arrays in JSON."""
+        # Create schema with path to array element
+        array_schema = IndexSchema(
+            index=Index(name="test_arrays", prefix="arr"),
+            fields={
+                "id": Field(name="id", type=FieldTypes.TAG),
+                "first_item": Field(
+                    name="first_item", type=FieldTypes.TEXT, path="$.items[0]"
+                ),
+                "nested_item": Field(
+                    name="nested_item",
+                    type=FieldTypes.TEXT,
+                    path="$.nested.items[1].name",
+                ),
+            },
+        )
+
+        # Note: JSONPath with array indexing is not supported currently
+        # This test documents this limitation
+
+        # Create data with arrays
+        array_data = {
+            "id": "arr1",
+            "items": ["first", "second", "third"],
+            "nested": {"items": [{"name": "item1"}, {"name": "item2"}]},
+        }
+
+        # Validate - array paths won't be found
+        result = validate_object(array_schema, array_data)
+        assert result["id"] == "arr1"
+        assert "first_item" not in result
+        assert "nested_item" not in result
+
+
+class TestValidationIntegrationEdgeCases:
+    """Tests for integration edge cases between storage and validation."""
+
+    @pytest.fixture
+    def storage_with_schema(self):
+        """Create a storage instance with schema for testing."""
+        schema = IndexSchema(
+            index=Index(name="test_storage", prefix="doc"),
+            fields={
+                "id": Field(name="id", type=FieldTypes.TAG),
+                "vec": Field(
+                    name="vec",
+                    type=FieldTypes.VECTOR,
+                    attrs={"dims": 3, "datatype": VectorDataType.FLOAT32},
+                ),
+            },
+        )
+
+        return BaseStorage(schema=schema, client=None)
+
+    def test_validation_with_bytes_no_client(self, storage_with_schema):
+        """Test validation with bytes when no Redis client is available."""
+        # No Redis client was provided, so hset won't be called
+        # This just tests that validation works with bytes
+
+        # Valid data with bytes
+        data = {"id": "doc1", "vec": b"\x00\x01\x02"}  # 3 bytes
+
+        # Validate - should work even without client
+        validated = storage_with_schema.validate_object(data)
+        assert validated["id"] == "doc1"
+        assert validated["vec"] == b"\x00\x01\x02"
+
+    def test_unexpected_field_is_ignored(self, storage_with_schema):
+        """Test that unexpected fields are ignored during validation."""
+        # Data with extra field
+        data = {
+            "id": "doc1",
+            "vec": [0.1, 0.2, 0.3],
+            "extra": "This field is not in the schema",
+        }
+
+        # Validate
+        validated = storage_with_schema.validate_object(data)
+
+        # Extra field should be ignored
+        assert validated["id"] == "doc1"
+        assert validated["vec"] == [0.1, 0.2, 0.3]
+        assert "extra" not in validated
diff --git a/tests/unit/test_fields.py b/tests/unit/test_fields.py
index f420afff..3376a67c 100644
--- a/tests/unit/test_fields.py
+++ b/tests/unit/test_fields.py
@@ -1,3 +1,5 @@
+from typing import Any, Optional, Tuple
+
 import pytest
 from redis.commands.search.field import GeoField as RedisGeoField
 from redis.commands.search.field import NumericField as RedisNumericField
@@ -217,3 +219,58 @@ def test_create_unknown_field_type():
     with pytest.raises(ValueError) as excinfo:
         FieldFactory.create_field("unknown", "example_field")
     assert "Unknown field type: unknown" in str(excinfo.value)
+
+
+# Add validation tests for each field type
+@pytest.mark.parametrize(
+    "field_class,valid_value,invalid_value,error_msg",
+    [
+        (TextField, "sample text", 123, "expects a string"),
+        (NumericField, 123.45, "123.45", "looks like a number"),
+        (TagField, ["tag1", "tag2"], ["tag1", 123], "must be a string"),
+        (GeoField, "37.7749,-122.4194", "invalid-geo", "not a valid 'lat,lon' format"),
+        # Add vector field test cases
+    ],
+)
+def test_field_validation(field_class, valid_value, invalid_value, error_msg):
+    """Test validation logic for each field type"""
+    # Create field instance
+    field = field_class(name="test_field")
+
+    # Test valid value
+    is_valid, error = field.validate(valid_value)
+    assert is_valid, f"Field should accept valid value: {valid_value}"
+    assert error is None, "No error message should be returned for valid value"
+
+    # Test invalid value
+    is_valid, error = field.validate(invalid_value)
+    assert not is_valid, f"Field should reject invalid value: {invalid_value}"
+    assert (
+        error_msg in error
+    ), f"Error message should contain '{error_msg}', got: {error}"
+
+
+def test_vector_field_validation():
+    """Test validation for vector fields specifically"""
+    # Create vector fields with specific dimensions
+    flat_field = create_flat_vector_field(dims=3)
+    hnsw_field = create_hnsw_vector_field(dims=3)
+
+    # Valid vector
+    valid_vector = [0.1, 0.2, 0.3]
+
+    # Test valid cases
+    assert flat_field.validate(valid_vector)[0], "Should accept valid vector"
+    assert hnsw_field.validate(valid_vector)[0], "Should accept valid vector"
+
+    # Test wrong dimensions
+    wrong_dims = [0.1, 0.2]  # Only 2 dimensions
+    is_valid, error = flat_field.validate(wrong_dims)
+    assert not is_valid, "Should reject vector with wrong dimensions"
+    assert "expects 3 dimensions" in error
+
+    # Test wrong type
+    wrong_type = ["a", "b", "c"]  # Strings instead of numbers
+    is_valid, error = hnsw_field.validate(wrong_type)
+    assert not is_valid, "Should reject vector with non-numeric elements"
+    assert "must be a number" in error
diff --git a/tests/unit/test_storage.py b/tests/unit/test_storage.py
index 21637dd5..539a1b76 100644
--- a/tests/unit/test_storage.py
+++ b/tests/unit/test_storage.py
@@ -1,32 +1,105 @@
+"""
+Tests for RedisVL storage classes with focus on validation integration.
+
+This module tests how the storage classes integrate with the validation system:
+1. How validation is used in storage operations
+2. Preprocessing and validation flow
+3. Error handling in write operations
+"""
+
+from typing import Any, Dict
+from unittest.mock import MagicMock, Mock, patch
+
 import pytest
 
-from redisvl.index.storage import BaseStorage, HashStorage, JsonStorage
+from redisvl.index.storage import HashStorage, JsonStorage
+from redisvl.schema import IndexInfo, IndexSchema
+from redisvl.schema.fields import (
+    FlatVectorField,
+    FlatVectorFieldAttributes,
+    GeoField,
+    HNSWVectorField,
+    HNSWVectorFieldAttributes,
+    NumericField,
+    TagField,
+    TextField,
+    VectorDataType,
+    VectorDistanceMetric,
+)
+from redisvl.schema.validation import validate_object
+
+
+@pytest.fixture
+def sample_schema():
+    """Create a comprehensive schema for testing with all field types"""
+    return IndexSchema.from_dict(
+        {
+            "index": {
+                "name": "test-index",
+                "prefix": "test",
+                "key_separator": ":",
+                "storage_type": "hash",
+            },
+            "fields": [
+                # Standard fields
+                {"type": "text", "name": "text_field"},
+                {"type": "numeric", "name": "num_field"},
+                {"type": "tag", "name": "tag_field"},
+                {"type": "geo", "name": "geo_field"},
+                # Vector fields
+                {
+                    "type": "vector",
+                    "name": "flat_vector",
+                    "attrs": {
+                        "algorithm": "flat",
+                        "dims": 3,
+                        "distance_metric": "cosine",
+                        "data_type": "float32",
+                    },
+                },
+                {
+                    "type": "vector",
+                    "name": "hnsw_vector",
+                    "attrs": {
+                        "algorithm": "hnsw",
+                        "dims": 3,
+                        "distance_metric": "cosine",
+                        "data_type": "float32",
+                        "m": 16,
+                        "ef_construction": 200,
+                        "ef_runtime": 10,
+                        "epsilon": 0.01,
+                    },
+                },
+            ],
+        }
+    )
 
 
 @pytest.fixture(params=[JsonStorage, HashStorage])
-def storage_instance(request):
+def storage_instance(request, sample_schema):
     StorageClass = request.param
-    instance = StorageClass(prefix="test", key_separator=":")
+    instance = StorageClass(index_schema=sample_schema)
     return instance
 
 
 def test_key_formatting(storage_instance):
     key = "1234"
     generated_key = storage_instance._key(key, "", "")
-    assert generated_key == key, "The generated key does not match the expected format."
+    assert generated_key == key
     generated_key = storage_instance._key(key, "", ":")
-    assert generated_key == key, "The generated key does not match the expected format."
+    assert generated_key == key
     generated_key = storage_instance._key(key, "test", ":")
-    assert (
-        generated_key == f"test:{key}"
-    ), "The generated key does not match the expected format."
+    assert generated_key == f"test:{key}"
 
 
 def test_create_key(storage_instance):
     id_field = "id"
     obj = {id_field: "1234"}
     expected_key = (
-        f"{storage_instance.prefix}{storage_instance.key_separator}{obj[id_field]}"
+        f"{storage_instance.index_schema.index.prefix}"
+        f"{storage_instance.index_schema.index.key_separator}"
+        f"{obj[id_field]}"
     )
     generated_key = storage_instance._create_key(obj, id_field)
     assert (
@@ -34,47 +107,456 @@ def test_create_key(storage_instance):
     ), "The generated key does not match the expected format."
 
 
-def test_validate_success(storage_instance):
-    data = {"foo": "bar"}
-    try:
-        storage_instance._validate(data)
-    except Exception as e:
-        pytest.fail(f"_validate should not raise an exception here, but raised {e}")
-
-
-def test_validate_failure(storage_instance):
-    data = "Some invalid data type"
-    with pytest.raises(TypeError):
-        storage_instance._validate(data)
-    data = 12345
-    with pytest.raises(TypeError):
-        storage_instance._validate(data)
-
-
 def test_preprocess(storage_instance):
     data = {"key": "value"}
-    preprocessed_data = storage_instance._preprocess(preprocess=None, obj=data)
+    preprocessed_data = storage_instance._preprocess(data, preprocess=None)
     assert preprocessed_data == data
 
     def fn(d):
         d["foo"] = "bar"
         return d
 
-    preprocessed_data = storage_instance._preprocess(fn, data)
+    preprocessed_data = storage_instance._preprocess(data, fn)
     assert "foo" in preprocessed_data
     assert preprocessed_data["foo"] == "bar"
 
 
-@pytest.mark.asyncio
-async def test_preprocess(storage_instance):
-    data = {"key": "value"}
-    preprocessed_data = await storage_instance._apreprocess(preprocess=None, obj=data)
-    assert preprocessed_data == data
+def test_preprocess_and_validate_objects(storage_instance):
+    """Test combined preprocessing and validation"""
+    objects = [
+        {"num_field": 123, "text_field": "valid text"},  # Valid
+        {"num_field": "123", "text_field": "valid text"},  # Invalid numeric field
+    ]
 
-    async def fn(d):
-        d["foo"] = "bar"
-        return d
+    def preprocess(obj):
+        obj["processed"] = True
+        return obj
 
-    preprocessed_data = await storage_instance._apreprocess(data, fn)
-    assert "foo" in preprocessed_data
-    assert preprocessed_data["foo"] == "bar"
+    # When validate=True, should raise ValueError for invalid object
+    with pytest.raises(ValueError) as exc_info:
+        storage_instance._preprocess_and_validate_objects(
+            objects, preprocess=preprocess, validate=True
+        )
+
+    # Error message should mention the issue
+    assert "Validation failed" in str(exc_info.value)
+    assert "must be a number" in str(exc_info.value)
+
+    # When validate=False, should process both objects without errors
+    prepared_objects = storage_instance._preprocess_and_validate_objects(
+        objects, preprocess=preprocess, validate=False
+    )
+
+    assert len(prepared_objects) == 2
+    # Preprocessing should have worked for both objects
+    assert all(obj[1].get("processed") for obj in prepared_objects)
+
+
+def test_validate_object(storage_instance):
+    """Test validation of individual objects"""
+
+    # Valid data should be returned unchanged (except for any type coercion)
+    valid_data = {
+        "text_field": "some text",
+        "num_field": 123.45,
+        "tag_field": "tag1,tag2,tag3",
+        "geo_field": "37.7749,-122.4194",
+        "flat_vector": [0.1, 0.2, 0.3],
+        "hnsw_vector": [0.4, 0.5, 0.6],
+    }
+
+    validated = storage_instance.validate(valid_data)
+    assert validated is not None
+    assert validated["num_field"] == valid_data["num_field"]
+    assert validated["text_field"] == valid_data["text_field"]
+
+    # Invalid text field
+    invalid_text = valid_data.copy()
+    invalid_text["text_field"] = 123
+    with pytest.raises(ValueError) as exc_info:
+        storage_instance.validate(invalid_text)
+    assert "text_field" in str(exc_info.value)
+
+    # Invalid numeric field (string that looks like number)
+    invalid_numeric = valid_data.copy()
+    invalid_numeric["num_field"] = "123.45"
+    with pytest.raises(ValueError) as exc_info:
+        storage_instance.validate(invalid_numeric)
+    assert "num_field" in str(exc_info.value)
+
+    # Invalid geo field
+    invalid_geo = valid_data.copy()
+    invalid_geo["geo_field"] = "invalid-geo-format"
+    with pytest.raises(ValueError) as exc_info:
+        storage_instance.validate(invalid_geo)
+    assert "geo_field" in str(exc_info.value)
+
+    # Invalid vector field (wrong dimensions)
+    invalid_vector_dims = valid_data.copy()
+    invalid_vector_dims["flat_vector"] = [0.1, 0.2]
+    with pytest.raises(ValueError) as exc_info:
+        storage_instance.validate(invalid_vector_dims)
+    assert "flat_vector" in str(exc_info.value)
+    assert "dimensions" in str(exc_info.value)
+
+    # Invalid vector field (non-numeric values)
+    invalid_vector_values = valid_data.copy()
+    invalid_vector_values["hnsw_vector"] = ["a", "b", "c"]
+    with pytest.raises(ValueError) as exc_info:
+        storage_instance.validate(invalid_vector_values)
+    assert "hnsw_vector" in str(exc_info.value)
+    assert "numeric values" in str(exc_info.value)
+
+
+def test_partial_object_validation(storage_instance):
+    """Test validation of partial objects (missing fields)"""
+
+    # Object with only some fields
+    partial_data = {
+        "text_field": "valid text",
+        # Missing num_field, tag_field, etc.
+    }
+
+    # Should validate successfully since fields are optional
+    validated = storage_instance.validate(partial_data)
+    assert validated is not None
+    assert "text_field" in validated
+    assert "num_field" not in validated
+
+    # Explicitly setting a field to None should result in it being excluded
+    null_field_data = {"text_field": "valid text", "num_field": None}
+
+    validated = storage_instance.validate(null_field_data)
+    assert "num_field" not in validated
+
+
+def test_write_with_validation(storage_instance, mocker):
+    """Test the write method with validation enabled"""
+    # Mock the _set method to avoid actual Redis calls
+    mocker.patch.object(storage_instance, "_set")
+
+    # Mock pipeline execution
+    mock_pipe = mocker.MagicMock()
+    mock_pipe.execute = mocker.MagicMock()
+
+    # Mock Redis client
+    mock_client = mocker.MagicMock()
+    mock_client.pipeline.return_value.__enter__.return_value = mock_pipe
+
+    # Valid and invalid objects
+    objects = [
+        {"text_field": "valid", "num_field": 123},  # Valid
+        {"text_field": 456, "num_field": 789},  # Invalid text field
+    ]
+
+    # With validation enabled, should raise error on first invalid object
+    with pytest.raises(ValueError) as exc_info:
+        storage_instance.write(mock_client, objects, validate=True)
+
+    assert "Validation failed" in str(exc_info.value)
+    assert "text_field" in str(exc_info.value)
+
+    # With validation disabled, should process all objects
+    keys = storage_instance.write(mock_client, objects, validate=False)
+
+    assert len(keys) == 2
+    assert storage_instance._set.call_count == 2
+
+
+class TestBaseStorageValidation:
+    """Tests for validation in BaseStorage class."""
+
+    def test_validate_object(self, comprehensive_schema, valid_data):
+        """Test the validate_object method."""
+        # Create storage
+        storage = BaseStorage(schema=comprehensive_schema)
+
+        # Validate object
+        validated = storage.validate_object(valid_data)
+
+        # Verify object was validated
+        assert validated is not None
+        assert "id" in validated
+        assert "title" in validated
+
+    def test_validate_object_with_invalid_data(self, comprehensive_schema, valid_data):
+        """Test validation with invalid data."""
+        # Create storage
+        storage = BaseStorage(schema=comprehensive_schema)
+
+        # Create invalid data
+        invalid_data = valid_data.copy()
+        invalid_data["rating"] = "not a number"
+
+        # Validation should fail
+        with pytest.raises(ValueError) as exc_info:
+            storage.validate_object(invalid_data)
+
+        # Error message should mention validation failure
+        assert "Validation failed" in str(exc_info.value)
+
+    def test_preprocess_and_validate_objects_success(
+        self, comprehensive_schema, valid_data
+    ):
+        """Test _preprocess_and_validate_objects with valid data."""
+        # Create storage
+        storage = BaseStorage(schema=comprehensive_schema)
+
+        # Process objects
+        objects = [valid_data]
+        validated_objects = storage._preprocess_and_validate_objects(objects)
+
+        # Verify objects were validated
+        assert len(validated_objects) == 1
+        assert "id" in validated_objects[0]
+        assert "title" in validated_objects[0]
+
+    def test_preprocess_and_validate_objects_fail(
+        self, comprehensive_schema, valid_data
+    ):
+        """Test _preprocess_and_validate_objects with invalid data."""
+        # Create storage
+        storage = BaseStorage(schema=comprehensive_schema)
+
+        # Create mix of valid and invalid data
+        invalid_data = valid_data.copy()
+        invalid_data["rating"] = "not a number"
+
+        # Process should fail fast on first invalid object
+        with pytest.raises(ValueError) as exc_info:
+            storage._preprocess_and_validate_objects([invalid_data, valid_data])
+
+        # Error message should mention validation failure
+        assert "Validation failed" in str(exc_info.value)
+
+    def test_write_one_validation(self, comprehensive_schema, valid_data):
+        """Test that write_one validates objects."""
+        # Create storage with mocked redis client
+        client_mock = Mock()
+        storage = BaseStorage(schema=comprehensive_schema, client=client_mock)
+
+        # Mock hset to avoid actual Redis call
+        client_mock.hset = Mock()
+
+        # Call write_one
+        storage.write_one(valid_data)
+
+        # Verify hset was called
+        client_mock.hset.assert_called_once()
+
+    def test_write_one_validation_fail(self, comprehensive_schema, valid_data):
+        """Test that write_one fails on invalid data."""
+        # Create storage with mocked redis client
+        client_mock = Mock()
+        storage = BaseStorage(schema=comprehensive_schema, client=client_mock)
+
+        # Create invalid data
+        invalid_data = valid_data.copy()
+        invalid_data["rating"] = "not a number"
+
+        # Call write_one with invalid data
+        with pytest.raises(ValueError) as exc_info:
+            storage.write_one(invalid_data)
+
+        # Verify error and that hset was not called
+        assert "Validation failed" in str(exc_info.value)
+        client_mock.hset.assert_not_called()
+
+    def test_write_many_validation(self, comprehensive_schema, valid_data):
+        """Test that write_many validates all objects."""
+        # Create storage with mocked redis client
+        client_mock = Mock()
+        storage = BaseStorage(schema=comprehensive_schema, client=client_mock)
+
+        # Mock pipeline to avoid actual Redis call
+        pipeline_mock = Mock()
+        client_mock.pipeline.return_value.__enter__.return_value = pipeline_mock
+
+        # Call write_many with multiple valid objects
+        storage.write_many([valid_data, valid_data.copy()])
+
+        # Verify pipeline executed
+        pipeline_mock.execute.assert_called_once()
+
+    def test_write_many_validation_fail(self, comprehensive_schema, valid_data):
+        """Test that write_many fails on invalid data."""
+        # Create storage with mocked redis client
+        client_mock = Mock()
+        storage = BaseStorage(schema=comprehensive_schema, client=client_mock)
+
+        # Mock pipeline to avoid actual Redis call
+        pipeline_mock = Mock()
+        client_mock.pipeline.return_value.__enter__.return_value = pipeline_mock
+
+        # Create invalid data
+        invalid_data = valid_data.copy()
+        invalid_data["rating"] = "not a number"
+
+        # Call write_many with invalid data
+        with pytest.raises(ValueError) as exc_info:
+            storage.write_many([valid_data, invalid_data])
+
+        # Verify error and that execute was not called
+        assert "Validation failed" in str(exc_info.value)
+        pipeline_mock.execute.assert_not_called()
+
+
+class TestJsonStorageValidation:
+    """Tests for validation in JsonStorage class."""
+
+    def test_validate_json_document(self, json_schema, valid_nested_data):
+        """Test validating a JSON document."""
+        # Create JSON storage
+        storage = JsonStorage(schema=json_schema)
+
+        # Validate object
+        validated = storage.validate_object(valid_nested_data)
+
+        # Verify object was validated and flattened
+        assert validated is not None
+        assert "id" in validated
+        assert "user" in validated
+        assert "title" in validated
+        assert "rating" in validated
+
+    def test_validate_json_missing_paths(self, json_schema):
+        """Test validating JSON with missing paths."""
+        # Create JSON storage
+        storage = JsonStorage(schema=json_schema)
+
+        # Create object with missing paths
+        partial_nested = {
+            "id": "doc1",
+            "metadata": {
+                "user": "user123"
+                # missing rating
+            },
+            "content": {
+                "title": "Test Document"
+                # missing embedding
+            },
+        }
+
+        # Validate object
+        validated = storage.validate_object(partial_nested)
+
+        # Verify validation succeeds with missing fields
+        assert validated is not None
+        assert "id" in validated
+        assert "user" in validated
+        assert "title" in validated
+
+        # Missing fields should be absent
+        assert "rating" not in validated
+        assert "embedding" not in validated
+
+    def test_validate_json_invalid_path(self, json_schema, valid_nested_data):
+        """Test validating JSON with invalid path values."""
+        # Create JSON storage
+        storage = JsonStorage(schema=json_schema)
+
+        # Create object with invalid data
+        invalid_nested = valid_nested_data.copy()
+        invalid_nested["metadata"]["rating"] = "not a number"
+
+        # Validation should fail
+        with pytest.raises(ValueError) as exc_info:
+            storage.validate_object(invalid_nested)
+
+        # Error message should mention validation failure
+        assert "Validation failed" in str(exc_info.value)
+        assert "rating" in str(exc_info.value)
+
+    def test_write_json_document(self, json_schema, valid_nested_data):
+        """Test writing a JSON document."""
+        # Create storage with mocked redis client
+        client_mock = Mock()
+        storage = JsonStorage(schema=json_schema, client=client_mock)
+
+        # Mock json.set to avoid actual Redis call
+        client_mock.json.set = Mock()
+
+        # Call write_one
+        storage.write_one(valid_nested_data)
+
+        # Verify json.set was called
+        client_mock.json.set.assert_called_once()
+
+    def test_write_json_validation_fail(self, json_schema, valid_nested_data):
+        """Test that write fails on invalid JSON."""
+        # Create storage with mocked redis client
+        client_mock = Mock()
+        storage = JsonStorage(schema=json_schema, client=client_mock)
+
+        # Create invalid data
+        invalid_nested = valid_nested_data.copy()
+        invalid_nested["metadata"]["rating"] = "not a number"
+
+        # Call write_one with invalid data
+        with pytest.raises(ValueError) as exc_info:
+            storage.write_one(invalid_nested)
+
+        # Verify error and that json.set was not called
+        assert "Validation failed" in str(exc_info.value)
+        client_mock.json.set.assert_not_called()
+
+
+@patch("redisvl.schema.validation.validate_object")
+class TestValidationIntegration:
+    """Tests for integration between storage and validation."""
+
+    def test_validate_object_is_called(
+        self, mock_validate, comprehensive_schema, valid_data
+    ):
+        """Test that validate_object is called from BaseStorage."""
+        # Create storage
+        storage = BaseStorage(schema=comprehensive_schema)
+
+        # Set up mock to return the input data
+        mock_validate.return_value = valid_data
+
+        # Call validate_object
+        storage.validate_object(valid_data)
+
+        # Verify mock was called with correct args
+        mock_validate.assert_called_once_with(comprehensive_schema, valid_data)
+
+    def test_preprocess_calls_validate_for_each_object(
+        self, mock_validate, comprehensive_schema, valid_data
+    ):
+        """Test that _preprocess_and_validate_objects calls validate for each object."""
+        # Create storage
+        storage = BaseStorage(schema=comprehensive_schema)
+
+        # Set up mock to return the input data
+        mock_validate.return_value = valid_data
+
+        # Call _preprocess_and_validate_objects with multiple objects
+        objects = [valid_data, valid_data.copy(), valid_data.copy()]
+        storage._preprocess_and_validate_objects(objects)
+
+        # Verify mock was called for each object
+        assert mock_validate.call_count == len(objects)
+
+    def test_preprocess_stops_on_first_validation_error(
+        self, mock_validate, comprehensive_schema, valid_data
+    ):
+        """Test that processing stops on first validation error."""
+        # Create storage
+        storage = BaseStorage(schema=comprehensive_schema)
+
+        # Set up mock to raise error on second call
+        mock_validate.side_effect = [
+            valid_data,
+            ValueError("Validation failed for 2nd object"),
+            valid_data,
+        ]
+
+        # Call _preprocess_and_validate_objects
+        objects = [valid_data, valid_data.copy(), valid_data.copy()]
+        with pytest.raises(ValueError) as exc_info:
+            storage._preprocess_and_validate_objects(objects)
+
+        # Verify error and that mock was called twice
+        assert "Validation failed for 2nd object" in str(exc_info.value)
+        assert mock_validate.call_count == 2
diff --git a/tests/unit/test_validation.py b/tests/unit/test_validation.py
new file mode 100644
index 00000000..6431303c
--- /dev/null
+++ b/tests/unit/test_validation.py
@@ -0,0 +1,515 @@
+"""
+Tests for the RedisVL schema validation module.
+
+This module tests the core validation functionality:
+1. Model generation from schemas
+2. Field-specific validators
+3. JSON path extraction
+4. Validation of various field types
+"""
+
+import re
+from typing import Any, Dict, List
+
+import pytest
+
+from redisvl.schema import IndexSchema
+from redisvl.schema.fields import FieldTypes, VectorDataType
+from redisvl.schema.type_utils import TypeInferrer
+from redisvl.schema.validation import (
+    SchemaModelGenerator,
+    extract_from_json_path,
+    validate_object,
+)
+
+
+@pytest.fixture
+def sample_schema():
+    """Create a sample schema with different field types for testing."""
+    schema_dict = {
+        "index": {
+            "name": "test-index",
+            "prefix": "test",
+            "key_separator": ":",
+            "storage_type": "hash",
+        },
+        "fields": [
+            {"name": "id", "type": "tag"},
+            {"name": "title", "type": "text"},
+            {"name": "rating", "type": "numeric"},
+            {"name": "location", "type": "geo"},
+            {
+                "name": "embedding",
+                "type": "vector",
+                "attrs": {
+                    "algorithm": "flat",
+                    "dims": 4,
+                    "datatype": "float32",
+                    "distance_metric": "cosine",
+                },
+            },
+        ],
+    }
+    return IndexSchema.from_dict(schema_dict)
+
+
+@pytest.fixture
+def sample_json_schema():
+    """Create a sample schema with JSON storage and path fields."""
+    schema_dict = {
+        "index": {
+            "name": "test-json-index",
+            "prefix": "test",
+            "key_separator": ":",
+            "storage_type": "json",
+        },
+        "fields": [
+            {"name": "id", "type": "tag", "path": "$.id"},
+            {"name": "user", "type": "tag", "path": "$.metadata.user"},
+            {"name": "title", "type": "text", "path": "$.content.title"},
+            {"name": "rating", "type": "numeric", "path": "$.metadata.rating"},
+            {
+                "name": "embedding",
+                "type": "vector",
+                "path": "$.content.embedding",
+                "attrs": {
+                    "algorithm": "flat",
+                    "dims": 4,
+                    "datatype": "float32",
+                    "distance_metric": "cosine",
+                },
+            },
+        ],
+    }
+    return IndexSchema.from_dict(schema_dict)
+
+
+@pytest.fixture
+def valid_data():
+    """Sample valid data for testing validation."""
+    return {
+        "id": "doc1",
+        "title": "Test Document",
+        "rating": 4.5,
+        "location": "37.7749,-122.4194",
+        "embedding": [0.1, 0.2, 0.3, 0.4],
+    }
+
+
+@pytest.fixture
+def valid_nested_data():
+    """Sample valid nested data for testing JSON path validation."""
+    return {
+        "id": "doc1",
+        "metadata": {"user": "user123", "rating": 4.5},
+        "content": {"title": "Test Document", "embedding": [0.1, 0.2, 0.3, 0.4]},
+    }
+
+
+class TestSchemaModelGenerator:
+    """Tests for the SchemaModelGenerator class."""
+
+    def test_get_model_for_schema(self, sample_schema):
+        """Test generating a model from a schema."""
+        # Get model for schema
+        model_class = SchemaModelGenerator.get_model_for_schema(sample_schema)
+
+        # Verify model name matches the index name
+        assert model_class.__name__ == "test-index__PydanticModel"
+
+        # Verify model has expected fields
+        for field_name in sample_schema.field_names:
+            assert field_name in model_class.model_fields
+
+    def test_model_caching(self, sample_schema):
+        """Test that models are cached and reused."""
+        # Get model twice
+        model1 = SchemaModelGenerator.get_model_for_schema(sample_schema)
+        model2 = SchemaModelGenerator.get_model_for_schema(sample_schema)
+
+        # Verify same instance
+        assert model1 is model2
+
+    def test_type_mapping(self, sample_schema):
+        """Test mapping Redis field types to Pydantic types."""
+        for field_name, field in sample_schema.fields.items():
+            field_type = SchemaModelGenerator._map_field_to_pydantic_type(field)
+
+            # Verify each field type maps to expected Python type
+            if field.type == FieldTypes.TEXT:
+                assert field_type == str
+            elif field.type == FieldTypes.TAG:
+                assert field_type == str
+            elif field.type == FieldTypes.NUMERIC:
+                assert field_type.__origin__ == type(Union)  # Check it's a Union
+            elif field.type == FieldTypes.VECTOR:
+                assert field_type.__origin__ == type(Union)  # Check it's a Union
+
+    def test_unsupported_field_type(self):
+        """Test that an error is raised for unsupported field types."""
+
+        # Create a dummy field with unsupported type
+        class DummyField:
+            type = "unsupported_type"
+
+        # Mapping should raise ValueError
+        with pytest.raises(ValueError) as exc_info:
+            SchemaModelGenerator._map_field_to_pydantic_type(DummyField())
+
+        assert "Unsupported field type" in str(exc_info.value)
+
+
+class TestFieldValidators:
+    """Tests for field-specific validators."""
+
+    def test_text_field_validation(self, sample_schema, valid_data):
+        """Test validation of text fields."""
+        model_class = SchemaModelGenerator.get_model_for_schema(sample_schema)
+
+        # Valid text field
+        valid = valid_data.copy()
+        validated = model_class.model_validate(valid)
+        assert validated.title == "Test Document"
+
+        # Invalid text field (number)
+        invalid = valid_data.copy()
+        invalid["title"] = 123
+        with pytest.raises(ValueError) as exc_info:
+            model_class.model_validate(invalid)
+        assert "title" in str(exc_info.value)
+        assert "must be a string" in str(exc_info.value)
+
+    def test_tag_field_validation(self, sample_schema, valid_data):
+        """Test validation of tag fields."""
+        model_class = SchemaModelGenerator.get_model_for_schema(sample_schema)
+
+        # Valid tag field
+        valid = valid_data.copy()
+        validated = model_class.model_validate(valid)
+        assert validated.id == "doc1"
+
+        # Invalid tag field (number)
+        invalid = valid_data.copy()
+        invalid["id"] = 123
+        with pytest.raises(ValueError) as exc_info:
+            model_class.model_validate(invalid)
+        assert "id" in str(exc_info.value)
+        assert "must be a string" in str(exc_info.value)
+
+    def test_numeric_field_validation(self, sample_schema, valid_data):
+        """Test validation of numeric fields."""
+        model_class = SchemaModelGenerator.get_model_for_schema(sample_schema)
+
+        # Valid numeric field (integer)
+        valid_int = valid_data.copy()
+        valid_int["rating"] = 5
+        validated = model_class.model_validate(valid_int)
+        assert validated.rating == 5
+
+        # Valid numeric field (float)
+        valid_float = valid_data.copy()
+        valid_float["rating"] = 4.5
+        validated = model_class.model_validate(valid_float)
+        assert validated.rating == 4.5
+
+        # Invalid numeric field (string)
+        invalid = valid_data.copy()
+        invalid["rating"] = "high"
+        with pytest.raises(ValueError) as exc_info:
+            model_class.model_validate(invalid)
+        assert "rating" in str(exc_info.value)
+        assert "must be a number" in str(exc_info.value)
+
+        # Invalid numeric field (string that looks like number)
+        invalid_num_str = valid_data.copy()
+        invalid_num_str["rating"] = "4.5"
+        with pytest.raises(ValueError) as exc_info:
+            model_class.model_validate(invalid_num_str)
+        assert "rating" in str(exc_info.value)
+        assert "must be a number" in str(exc_info.value)
+
+    def test_geo_field_validation(self, sample_schema, valid_data):
+        """Test validation of geo fields."""
+        model_class = SchemaModelGenerator.get_model_for_schema(sample_schema)
+
+        # Valid geo format
+        valid_geo = valid_data.copy()
+        valid_geo["location"] = "37.7749,-122.4194"
+        validated = model_class.model_validate(valid_geo)
+        assert validated.location == "37.7749,-122.4194"
+
+        # Invalid geo format (not matching lat,lon pattern)
+        invalid_geo = valid_data.copy()
+        invalid_geo["location"] = "invalid_geo"
+        with pytest.raises(ValueError) as exc_info:
+            model_class.model_validate(invalid_geo)
+        assert "location" in str(exc_info.value)
+        assert "not a valid 'lat,lon' format" in str(exc_info.value)
+
+        # Verify the geo pattern actually works with valid formats
+        valid_formats = [
+            "0,0",
+            "90,-180",
+            "-90,180",
+            "37.7749,-122.4194",
+            "37.7749,122.4194",
+            "-37.7749,-122.4194",
+        ]
+        for format in valid_formats:
+            assert re.match(TypeInferrer.GEO_PATTERN.pattern, format)
+
+        # Verify invalid formats fail the pattern
+        invalid_formats = [
+            "invalid",
+            "37.7749",
+            "37.7749,",
+            ",122.4194",
+            "91,0",  # Latitude > 90
+            "-91,0",  # Latitude < -90
+            "0,181",  # Longitude > 180
+            "0,-181",  # Longitude < -180
+        ]
+        for format in invalid_formats:
+            assert not re.match(TypeInferrer.GEO_PATTERN.pattern, format)
+
+    def test_vector_field_validation_float(self, sample_schema, valid_data):
+        """Test validation of float vector fields."""
+        model_class = SchemaModelGenerator.get_model_for_schema(sample_schema)
+
+        # Valid vector
+        valid_vector = valid_data.copy()
+        valid_vector["embedding"] = [0.1, 0.2, 0.3, 0.4]
+        validated = model_class.model_validate(valid_vector)
+        assert validated.embedding == [0.1, 0.2, 0.3, 0.4]
+
+        # Valid vector as bytes
+        valid_bytes = valid_data.copy()
+        valid_bytes["embedding"] = b"\x00\x01\x02\x03"
+        validated = model_class.model_validate(valid_bytes)
+        assert validated.embedding == b"\x00\x01\x02\x03"
+
+        # Invalid vector type (string)
+        invalid_type = valid_data.copy()
+        invalid_type["embedding"] = "not a vector"
+        with pytest.raises(ValueError) as exc_info:
+            model_class.model_validate(invalid_type)
+        assert "embedding" in str(exc_info.value)
+
+        # Invalid dimensions
+        invalid_dims = valid_data.copy()
+        invalid_dims["embedding"] = [0.1, 0.2, 0.3]  # 3 dimensions instead of 4
+        with pytest.raises(ValueError) as exc_info:
+            model_class.model_validate(invalid_dims)
+        assert "embedding" in str(exc_info.value)
+        assert "dimensions" in str(exc_info.value)
+
+        # Invalid vector values
+        invalid_values = valid_data.copy()
+        invalid_values["embedding"] = [0.1, "string", 0.3, 0.4]
+        with pytest.raises(ValueError) as exc_info:
+            model_class.model_validate(invalid_values)
+        assert "embedding" in str(exc_info.value)
+
+    def test_vector_field_validation_int(self, sample_schema, valid_data):
+        """Test validation of integer vector fields."""
+        model_class = SchemaModelGenerator.get_model_for_schema(sample_schema)
+
+        # Valid integer vector
+        valid_vector = valid_data.copy()
+        valid_vector["int_vector"] = [1, 2, 3]
+        validated = model_class.model_validate(valid_vector)
+        assert validated.int_vector == [1, 2, 3]
+
+        # Invalid: float values in int vector
+        invalid_floats = valid_data.copy()
+        invalid_floats["int_vector"] = [0.1, 0.2, 0.3]
+        with pytest.raises(ValueError) as exc_info:
+            model_class.model_validate(invalid_floats)
+        assert "int_vector" in str(exc_info.value)
+        assert "integer values" in str(exc_info.value)
+
+        # Invalid: values outside INT8 range
+        invalid_range = valid_data.copy()
+        invalid_range["int_vector"] = [1000, 2000, 3000]  # Outside INT8 range
+        with pytest.raises(ValueError) as exc_info:
+            model_class.model_validate(invalid_range)
+        assert "int_vector" in str(exc_info.value)
+        assert "must be between" in str(exc_info.value)
+
+
+class TestJsonPathValidation:
+    """Tests for JSON path-based validation."""
+
+    def test_extract_from_json_path(self, valid_nested_data):
+        """Test extracting values using JSON paths."""
+        # Test simple path
+        assert extract_from_json_path(valid_nested_data, "$.id") == "doc1"
+
+        # Test nested path
+        assert extract_from_json_path(valid_nested_data, "$.metadata.user") == "user123"
+        assert extract_from_json_path(valid_nested_data, "$.metadata.rating") == 4.5
+        assert (
+            extract_from_json_path(valid_nested_data, "$.content.title")
+            == "Test Document"
+        )
+        assert extract_from_json_path(valid_nested_data, "$.content.embedding") == [
+            0.1,
+            0.2,
+            0.3,
+            0.4,
+        ]
+
+        # Test non-existent path
+        assert extract_from_json_path(valid_nested_data, "$.nonexistent") is None
+        assert (
+            extract_from_json_path(valid_nested_data, "$.metadata.nonexistent") is None
+        )
+
+        # Test path with alternate formats
+        assert extract_from_json_path(valid_nested_data, "metadata.user") == "user123"
+
+    def test_validate_nested_json(self, sample_json_schema, valid_nested_data):
+        """Test validating a nested JSON object."""
+        # Validate nested object
+        validated = validate_object(sample_json_schema, valid_nested_data)
+
+        # Verify validation succeeds and flattens the structure
+        assert validated is not None
+        assert "id" in validated
+        assert "user" in validated
+        assert "title" in validated
+        assert "rating" in validated
+        assert "embedding" in validated
+
+        # Verify values were extracted correctly
+        assert validated["id"] == "doc1"
+        assert validated["user"] == "user123"
+        assert validated["title"] == "Test Document"
+        assert validated["rating"] == 4.5
+        assert validated["embedding"] == [0.1, 0.2, 0.3, 0.4]
+
+    def test_validate_nested_json_missing_paths(self, sample_json_schema):
+        """Test validating a nested JSON with missing paths."""
+        # Nested object with missing paths
+        partial_nested = {
+            "id": "doc1",
+            "metadata": {
+                "user": "user123"
+                # missing rating
+            },
+            "content": {
+                "title": "Test Document"
+                # missing embedding
+            },
+        }
+
+        # Validate object
+        validated = validate_object(sample_json_schema, partial_nested)
+
+        # Verify validation succeeds with partial data
+        assert validated is not None
+        assert "id" in validated
+        assert "user" in validated
+        assert "title" in validated
+        assert "rating" not in validated
+        assert "embedding" not in validated
+
+
+class TestObjectValidation:
+    """Tests for complete object validation."""
+
+    def test_validate_valid_object(self, sample_schema, valid_data):
+        """Test validating a valid object."""
+        # Validate object
+        validated = validate_object(sample_schema, valid_data)
+
+        # Verify no exceptions and data is returned
+        assert validated is not None
+
+        # Verify all fields are present
+        for field_name in sample_schema.field_names:
+            if field_name in valid_data:
+                assert field_name in validated
+
+    def test_validate_missing_optional_fields(self, sample_schema):
+        """Test validating an object with missing optional fields."""
+        # Object with only some fields
+        partial_data = {"id": "doc1", "title": "Test Document"}
+
+        # Validate object
+        validated = validate_object(sample_schema, partial_data)
+
+        # Verify validation passes with partial data
+        assert validated is not None
+        assert "id" in validated
+        assert "title" in validated
+        assert "rating" not in validated
+        assert "location" not in validated
+        assert "embedding" not in validated
+
+    def test_explicit_none_fields_are_excluded(self, sample_schema):
+        """Test that fields explicitly set to None are excluded from output."""
+        # Object with some fields set to None
+        data_with_none = {
+            "id": "doc1",
+            "title": "Test Document",
+            "rating": None,
+            "location": None,
+        }
+
+        # Validate object
+        validated = validate_object(sample_schema, data_with_none)
+
+        # Verify None fields are excluded
+        assert validated is not None
+        assert "id" in validated
+        assert "title" in validated
+        assert "rating" not in validated
+        assert "location" not in validated
+
+    def test_validate_with_multiple_invalid_fields(self, sample_schema, valid_data):
+        """Test validation with multiple invalid fields."""
+        # Create object with multiple invalid fields
+        invalid_data = valid_data.copy()
+        invalid_data["title"] = 123
+        invalid_data["rating"] = "not a number"
+        invalid_data["location"] = "invalid"
+
+        # Validation should fail with the first error encountered
+        with pytest.raises(ValueError) as exc_info:
+            validate_object(sample_schema, invalid_data)
+
+        # Error message should mention validation failure
+        assert "Validation failed" in str(exc_info.value)
+
+    @pytest.mark.parametrize(
+        "case",
+        [
+            {"field": "title", "value": 123, "error_text": "must be a string"},
+            {"field": "rating", "value": "high", "error_text": "must be a number"},
+            {
+                "field": "location",
+                "value": "invalid_geo",
+                "error_text": "not a valid 'lat,lon' format",
+            },
+            {
+                "field": "embedding",
+                "value": [0.1, 0.2, 0.3],
+                "error_text": "dimensions",
+            },
+        ],
+    )
+    def test_validate_invalid_field_parametrized(self, sample_schema, valid_data, case):
+        """Parametrized test for validating invalid fields."""
+        # Create invalid data according to test case
+        invalid_data = valid_data.copy()
+        invalid_data[case["field"]] = case["value"]
+
+        # Validate and check error
+        with pytest.raises(ValueError) as exc_info:
+            validate_object(sample_schema, invalid_data)
+
+        # Error should mention the field and specific issue
+        error_message = str(exc_info.value)
+        assert case["field"] in error_message
+        assert case["error_text"] in error_message

From fa8041a662cad82d5ad8b16860d2c0f3bcc96d88 Mon Sep 17 00:00:00 2001
From: Tyler Hutcherson <tyler.hutcherson@redis.com>
Date: Tue, 25 Mar 2025 22:16:58 -0400
Subject: [PATCH 02/11] dynamic pydantic model validation on load

---
 redisvl/exceptions.py         |  34 +-
 redisvl/index/index.py        |  52 +-
 redisvl/index/storage.py      |  22 +-
 redisvl/schema/validation.py  | 216 ++++----
 tests/unit/conftest.py        | 183 -------
 tests/unit/test_storage.py    | 647 +++++-------------------
 tests/unit/test_validation.py | 901 ++++++++++++++++++++--------------
 7 files changed, 824 insertions(+), 1231 deletions(-)
 delete mode 100644 tests/unit/conftest.py

diff --git a/redisvl/exceptions.py b/redisvl/exceptions.py
index e645e3e2..f8917c3d 100644
--- a/redisvl/exceptions.py
+++ b/redisvl/exceptions.py
@@ -1,10 +1,32 @@
-class RedisVLException(Exception):
-    """Base RedisVL exception"""
+"""
+RedisVL Exception Classes
 
+This module defines all custom exceptions used throughout the RedisVL library.
+"""
 
-class RedisModuleVersionError(RedisVLException):
-    """Invalid module versions installed"""
 
+class RedisVLError(Exception):
+    """Base exception for all RedisVL errors."""
 
-class RedisSearchError(RedisVLException):
-    """Error while performing a search or aggregate request"""
+    pass
+
+
+class RedisModuleVersionError(RedisVLError):
+    """Error raised when required Redis modules are missing or have incompatible versions."""
+
+    pass
+
+
+class RedisSearchError(RedisVLError):
+    """Error raised for Redis Search specific operations."""
+
+    pass
+
+
+class SchemaValidationError(RedisVLError):
+    """Error when validating data against a schema."""
+
+    def __init__(self, message, index=None):
+        if index is not None:
+            message = f"Validation failed for object at index {index}: {message}"
+        super().__init__(message)
diff --git a/redisvl/index/index.py b/redisvl/index/index.py
index 0cf9b172..806b4ba5 100644
--- a/redisvl/index/index.py
+++ b/redisvl/index/index.py
@@ -32,7 +32,12 @@
 from redis.commands.helpers import get_protocol_version  # type: ignore
 from redis.commands.search.indexDefinition import IndexDefinition
 
-from redisvl.exceptions import RedisModuleVersionError, RedisSearchError
+from redisvl.exceptions import (
+    RedisModuleVersionError,
+    RedisSearchError,
+    RedisVLError,
+    SchemaValidationError,
+)
 from redisvl.index.storage import BaseStorage, HashStorage, JsonStorage
 from redisvl.query import BaseQuery, CountQuery, FilterQuery
 from redisvl.query.filter import FilterExpression
@@ -594,27 +599,8 @@ def load(
             List[str]: List of keys loaded to Redis.
 
         Raises:
-            ValueError: If the length of provided keys does not match the length
-                of objects or if validation fails when validate_on_load is enabled.
-
-        .. code-block:: python
-
-            data = [{"test": "foo"}, {"test": "bar"}]
-
-            # simple case
-            keys = index.load(data)
-
-            # set 360 second ttl policy on data
-            keys = index.load(data, ttl=360)
-
-            # load data with predefined keys
-            keys = index.load(data, keys=["rvl:foo", "rvl:bar"])
-
-            # load data with preprocessing step
-            def add_field(d):
-                d["new_field"] = 123
-                return d
-            keys = index.load(data, preprocess=add_field)
+            SchemaValidationError: If validation fails when validate_on_load is enabled.
+            RedisVLError: If there's an error loading data to Redis.
         """
         try:
             return self._storage.write(
@@ -627,9 +613,14 @@ def add_field(d):
                 batch_size=batch_size,
                 validate=self._validate_on_load,
             )
-        except:
-            logger.exception("Error while loading data to Redis")
+        except SchemaValidationError:
+            # Pass through validation errors directly
+            logger.exception("Schema validation error while loading data")
             raise
+        except Exception as e:
+            # Wrap other errors as general RedisVL errors
+            logger.exception("Error while loading data to Redis")
+            raise RedisVLError(f"Failed to load data: {str(e)}") from e
 
     def fetch(self, id: str) -> Optional[Dict[str, Any]]:
         """Fetch an object from Redis by id.
@@ -1246,8 +1237,8 @@ async def load(
             List[str]: List of keys loaded to Redis.
 
         Raises:
-            ValueError: If the length of provided keys does not match the
-                length of objects or if validation fails when validate_on_load is enabled.
+            SchemaValidationError: If validation fails when validate_on_load is enabled.
+            RedisVLError: If there's an error loading data to Redis.
 
         .. code-block:: python
 
@@ -1281,9 +1272,14 @@ def add_field(d):
                 batch_size=batch_size,
                 validate=self._validate_on_load,
             )
-        except:
-            logger.exception("Error while loading data to Redis")
+        except SchemaValidationError:
+            # Pass through validation errors directly
+            logger.exception("Schema validation error while loading data")
             raise
+        except Exception as e:
+            # Wrap other errors as general RedisVL errors
+            logger.exception("Error while loading data to Redis")
+            raise RedisVLError(f"Failed to load data: {str(e)}") from e
 
     async def fetch(self, id: str) -> Optional[Dict[str, Any]]:
         """Asynchronously etch an object from Redis by id. The id is typically
diff --git a/redisvl/index/storage.py b/redisvl/index/storage.py
index f90e45b4..f0af1e5b 100644
--- a/redisvl/index/storage.py
+++ b/redisvl/index/storage.py
@@ -5,6 +5,7 @@
 from redis.asyncio import Redis as AsyncRedis
 from redis.commands.search.indexDefinition import IndexType
 
+from redisvl.exceptions import SchemaValidationError
 from redisvl.redis.utils import convert_bytes
 from redisvl.schema import IndexSchema
 from redisvl.schema.validation import validate_object
@@ -180,7 +181,8 @@ def _preprocess_and_validate_objects(
             List of tuples (key, processed_obj) for valid objects
 
         Raises:
-            ValueError: If any validation fails with object context
+            SchemaValidationError: If validation fails, with context about which object failed
+            ValueError: If any other processing errors occur
         """
         prepared_objects = []
         keys_iterator = iter(keys) if keys else None
@@ -197,12 +199,6 @@ def _preprocess_and_validate_objects(
                 # Preprocess
                 processed_obj = self._preprocess(obj, preprocess)
 
-                # Basic type validation
-                if not isinstance(processed_obj, dict):
-                    raise ValueError(
-                        f"Object must be a dictionary, got {type(processed_obj).__name__}"
-                    )
-
                 # Schema validation if enabled
                 if validate:
                     processed_obj = self.validate(processed_obj)
@@ -210,13 +206,15 @@ def _preprocess_and_validate_objects(
                 # Store valid object with its key for writing
                 prepared_objects.append((key, processed_obj))
 
+            except ValidationError as e:
+                # Convert Pydantic ValidationError to SchemaValidationError with index context
+                raise SchemaValidationError(str(e), index=i) from e
             except Exception as e:
-                # Enhance error message with object context
+                # Capture other exceptions with context
                 object_id = f"at index {i}"
-                if id_field and isinstance(obj, dict) and id_field in obj:
-                    object_id = f"with {id_field}={obj[id_field]}"
-
-                raise ValueError(f"Validation failed for object {object_id}: {str(e)}")
+                raise ValueError(
+                    f"Error processing object {object_id}: {str(e)}"
+                ) from e
 
         return prepared_objects
 
diff --git a/redisvl/schema/validation.py b/redisvl/schema/validation.py
index 51fcf445..b102166c 100644
--- a/redisvl/schema/validation.py
+++ b/redisvl/schema/validation.py
@@ -10,7 +10,7 @@
 import warnings
 from typing import Any, Dict, List, Optional, Type, Union
 
-from pydantic import BaseModel, Field, ValidationError, create_model, field_validator
+from pydantic import BaseModel, Field, field_validator
 
 from redisvl.schema import IndexSchema
 from redisvl.schema.fields import BaseField, FieldTypes, VectorDataType
@@ -78,7 +78,10 @@ def _map_field_to_pydantic_type(
         elif field.type == FieldTypes.VECTOR:
             # For JSON storage, vectors are always lists
             if storage_type == StorageType.JSON:
-                return List[Union[int, float]]
+                # For int data types, vectors must be ints, otherwise floats
+                if field.attrs.datatype in (VectorDataType.INT8, VectorDataType.UINT8):
+                    return List[int]
+                return List[float]
             else:
                 return bytes
 
@@ -88,7 +91,7 @@ def _map_field_to_pydantic_type(
     @classmethod
     def _create_model(cls, schema: IndexSchema) -> Type[BaseModel]:
         """
-        Create a Pydantic model from schema definition.
+        Create a Pydantic model from schema definition using type() approach.
 
         Args:
             schema: The IndexSchema to convert
@@ -96,134 +99,111 @@ def _create_model(cls, schema: IndexSchema) -> Type[BaseModel]:
         Returns:
             A Pydantic model class with appropriate fields and validators
         """
-        field_definitions = {}
-        validators = {}
-
         # Get storage type from schema
         storage_type = schema.index.storage_type
 
-        # Create field definitions dictionary for create_model
+        # Create annotations dictionary for the dynamic model
+        annotations = {}
+        class_dict = {}
+
+        # Build annotations and field metadata
         for field_name, field in schema.fields.items():
             field_type = cls._map_field_to_pydantic_type(field, storage_type)
 
-            # Create field definition (all fields are optional in the model)
-            # this handles the cases where objects have missing fields (supported behavior)
-            field_definitions[field_name] = (
-                Optional[field_type],  # Make fields optional
-                Field(
-                    default=None,
-                    json_schema_extra={
-                        "field_type": field.type,
-                    },
-                ),
-            )
-
-            # Add field-specific validator info to our validator registry
-            if field.type == FieldTypes.GEO:
-                validators[field_name] = {"type": "geo"}
-
-            elif field.type == FieldTypes.VECTOR:
-                validators[field_name] = {
-                    "type": "vector",
-                    "dims": field.attrs.dims,
-                    "datatype": field.attrs.datatype,
-                    "storage_type": storage_type,
-                }
-
-        # First create the model class with field definitions
-        model_name = f"{schema.index.name}__PydanticModel"
-        model_class = create_model(model_name, **field_definitions)
-
-        # Then add validators to the model class
-        for field_name, validator_info in validators.items():
-            if validator_info["type"] == "geo":
-                # Add geo validator
-                validator = cls._create_geo_validator(field_name)
-                setattr(model_class, f"validate_{field_name}", validator)
-
-            elif validator_info["type"] == "vector":
-                # Add vector validator
-                validator = cls._create_vector_validator(
-                    field_name,
-                    validator_info["dims"],
-                    validator_info["datatype"],
-                    validator_info["storage_type"],
-                )
-                setattr(model_class, f"validate_{field_name}", validator)
-
-        return model_class
-
-    @staticmethod
-    def _create_geo_validator(field_name: str):
-        """
-        Create a validator for geo fields.
+            # Make all fields optional in the model
+            annotations[field_name] = Optional[field_type]
 
-        Args:
-            field_name: Name of the field to validate
-
-        Returns:
-            A validator function that can be attached to a Pydantic model
-        """
+            # Add default=None to make fields truly optional (can be missing from input)
+            class_dict[field_name] = Field(default=None)
 
-        # Create the validator function
-        def validate_geo_field(cls, value):
-            # Skip validation for None values
-            if value is not None:
-                # Validate against pattern
-                if not re.match(TypeInferrer.GEO_PATTERN.pattern, value):
-                    raise ValueError(
-                        f"Geo field '{field_name}' value '{value}' is not a valid 'lat,lon' format"
-                    )
-            return value
-
-        # Add the field_validator decorator
-        return field_validator(field_name, mode="after")(validate_geo_field)
-
-    @staticmethod
-    def _create_vector_validator(
-        field_name: str, dims: int, datatype: VectorDataType, storage_type: StorageType
-    ):
-        """
-        Create a validator for vector fields.
-
-        Args:
-            field_name: Name of the field to validate
-            dims: Expected dimensions of the vector
-            datatype: Expected datatype of the vector elements
-            storage_type: Type of storage (HASH or JSON)
-
-        Returns:
-            A validator function that can be attached to a Pydantic model
-        """
-
-        # Create the validator function
-        def validate_vector_field(cls, value):
-            # Skip validation for None values
-            if value is not None:
-
-                # Handle list representation
-                if isinstance(value, list):
+            # Register validators for GEO fields
+            if field.type == FieldTypes.GEO:
 
-                    # Validate dimensions
-                    if len(value) != dims:
-                        raise ValueError(
-                            f"Vector field '{field_name}' must have {dims} dimensions, got {len(value)}"
-                        )
+                def make_geo_validator(fname: str):
+                    @field_validator(fname, mode="after")
+                    def _validate_geo(cls, value):
+                        # Skip validation for None values
+                        if value is not None:
+                            # Validate against pattern
+                            if not TypeInferrer._is_geographic(value):
+                                raise ValueError(
+                                    f"Geo field '{fname}' value '{value}' is not a valid 'lat,lon' format"
+                                )
+                        return value
+
+                    return _validate_geo
+
+                class_dict[f"validate_{field_name}"] = make_geo_validator(field_name)
+
+            # Register validators for NUMERIC fields
+            elif field.type == FieldTypes.NUMERIC:
+
+                def make_numeric_validator(fname: str):
+                    # mode='before' so it catches bools before parsing
+                    @field_validator(fname, mode="before")
+                    def _disallow_bool(cls, value):
+                        if isinstance(value, bool):
+                            raise ValueError(f"Field '{fname}' cannot be boolean.")
+                        return value
+
+                    return _disallow_bool
+
+                class_dict[f"validate_{field_name}"] = make_numeric_validator(
+                    field_name
+                )
 
-                    # Validate data types
-                    datatype_str = str(datatype).upper()
+            # Register validators for VECTOR fields
+            elif field.type == FieldTypes.VECTOR:
 
-                    # Integer-based datatypes
-                    if datatype_str in ("INT8", "UINT8"):
-                        # Check type
-                        if not all(isinstance(v, int) for v in value):
-                            raise ValueError(
-                                f"Vector field '{field_name}' must contain only integer values for {datatype_str}"
-                            )
+                def make_vector_validator(
+                    fname: str, dims: int, datatype: VectorDataType
+                ):
+                    @field_validator(fname, mode="after")
+                    def _validate_vector(cls, value):
+                        # Skip validation for None values
+                        if value is not None:
+                            # Handle list representation
+                            if isinstance(value, list):
+                                # Validate dimensions
+                                if len(value) != dims:
+                                    raise ValueError(
+                                        f"Vector field '{fname}' must have {dims} dimensions, got {len(value)}"
+                                    )
+                                # Validate data types
+                                datatype_str = str(datatype).upper()
+                                # Integer-based datatypes
+                                if datatype_str in ("INT8", "UINT8"):
+                                    # Check range for INT8
+                                    if datatype_str == "INT8":
+                                        if any(v < -128 or v > 127 for v in value):
+                                            raise ValueError(
+                                                f"Vector field '{fname}' contains values outside the INT8 range (-128 to 127)"
+                                            )
+                                    # Check range for UINT8
+                                    elif datatype_str == "UINT8":
+                                        if any(v < 0 or v > 255 for v in value):
+                                            raise ValueError(
+                                                f"Vector field '{fname}' contains values outside the UINT8 range (0 to 255)"
+                                            )
+                        return value
+
+                    return _validate_vector
+
+                class_dict[f"validate_{field_name}"] = make_vector_validator(
+                    field_name, field.attrs.dims, field.attrs.datatype
+                )
 
-            return value
+        # Create class dictionary with annotations and field metadata
+        class_dict.update(
+            **{
+                "__annotations__": annotations,
+                "model_config": {"arbitrary_types_allowed": True, "extra": "allow"},
+            }
+        )
 
-        return validate_vector_field
+        # Create the model class using type()
+        model_name = f"{schema.index.name}__PydanticModel"
+        return type(model_name, (BaseModel,), class_dict)
 
 
 def extract_from_json_path(obj: Dict[str, Any], path: str) -> Any:
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
deleted file mode 100644
index 91a558f2..00000000
--- a/tests/unit/conftest.py
+++ /dev/null
@@ -1,183 +0,0 @@
-"""
-Common test fixtures and utilities for RedisVL validation tests.
-"""
-
-from typing import Any, Dict
-
-import pytest
-
-from redisvl.schema import IndexSchema
-from redisvl.schema.fields import VectorDataType, VectorDistanceMetric
-
-
-@pytest.fixture
-def comprehensive_schema():
-    """Create a comprehensive schema with all field types for testing."""
-    return IndexSchema.from_dict(
-        {
-            "index": {
-                "name": "test-index",
-                "prefix": "test",
-                "key_separator": ":",
-                "storage_type": "hash",
-            },
-            "fields": [
-                {"name": "id", "type": "tag"},
-                {"name": "title", "type": "text"},
-                {"name": "rating", "type": "numeric"},
-                {"name": "location", "type": "geo"},
-                {
-                    "name": "embedding",
-                    "type": "vector",
-                    "attrs": {
-                        "algorithm": "flat",
-                        "dims": 4,
-                        "datatype": "float32",
-                        "distance_metric": "cosine",
-                    },
-                },
-                {
-                    "name": "int_vector",
-                    "type": "vector",
-                    "attrs": {
-                        "algorithm": "flat",
-                        "dims": 3,
-                        "datatype": "int8",
-                        "distance_metric": "l2",
-                    },
-                },
-                {
-                    "name": "hnsw_vector",
-                    "type": "vector",
-                    "attrs": {
-                        "algorithm": "hnsw",
-                        "dims": 3,
-                        "distance_metric": "cosine",
-                        "datatype": "float32",
-                        "m": 16,
-                        "ef_construction": 200,
-                        "ef_runtime": 10,
-                        "epsilon": 0.01,
-                    },
-                },
-            ],
-        }
-    )
-
-
-@pytest.fixture
-def json_schema():
-    """Create a schema with JSON storage and path fields."""
-    return IndexSchema.from_dict(
-        {
-            "index": {
-                "name": "test-json-index",
-                "prefix": "test",
-                "key_separator": ":",
-                "storage_type": "json",
-            },
-            "fields": [
-                {"name": "id", "type": "tag", "path": "$.id"},
-                {"name": "user", "type": "tag", "path": "$.metadata.user"},
-                {"name": "title", "type": "text", "path": "$.content.title"},
-                {"name": "rating", "type": "numeric", "path": "$.metadata.rating"},
-                {
-                    "name": "embedding",
-                    "type": "vector",
-                    "path": "$.content.embedding",
-                    "attrs": {
-                        "algorithm": "flat",
-                        "dims": 4,
-                        "datatype": "float32",
-                        "distance_metric": "cosine",
-                    },
-                },
-            ],
-        }
-    )
-
-
-@pytest.fixture
-def valid_data():
-    """Sample valid data for testing validation."""
-    return {
-        "id": "doc1",
-        "title": "Test Document",
-        "rating": 4.5,
-        "location": "37.7749,-122.4194",
-        "embedding": [0.1, 0.2, 0.3, 0.4],
-        "int_vector": [1, 2, 3],
-        "hnsw_vector": [0.1, 0.2, 0.3],
-    }
-
-
-@pytest.fixture
-def valid_nested_data():
-    """Sample valid nested data for testing JSON path validation."""
-    return {
-        "id": "doc1",
-        "metadata": {"user": "user123", "rating": 4.5},
-        "content": {"title": "Test Document", "embedding": [0.1, 0.2, 0.3, 0.4]},
-    }
-
-
-@pytest.fixture
-def invalid_data_cases():
-    """
-    Test cases for invalid data.
-    Each case contains:
-    - field: name of the field
-    - value: invalid value to test
-    - error_text: text that should appear in error message
-    """
-    return [
-        # Text field errors
-        {"field": "title", "value": 123, "error_text": "must be a string"},
-        # Numeric field errors
-        {"field": "rating", "value": "high", "error_text": "must be a number"},
-        {"field": "rating", "value": "123.45", "error_text": "must be a number"},
-        # Tag field errors
-        {"field": "id", "value": 123, "error_text": "must be a string"},
-        # Geo field errors
-        {
-            "field": "location",
-            "value": "invalid_geo",
-            "error_text": "not a valid 'lat,lon' format",
-        },
-        {
-            "field": "location",
-            "value": "1000,-1000",
-            "error_text": "not a valid 'lat,lon' format",
-        },
-        # Vector field errors - float32
-        {"field": "embedding", "value": [0.1, 0.2, 0.3], "error_text": "dimensions"},
-        {
-            "field": "embedding",
-            "value": [0.1, "string", 0.3, 0.4],
-            "error_text": "numeric values",
-        },
-        {
-            "field": "embedding",
-            "value": "not_a_vector",
-            "error_text": "must be a list or bytes",
-        },
-        # Vector field errors - int8
-        {
-            "field": "int_vector",
-            "value": [0.1, 0.2, 0.3],
-            "error_text": "integer values",
-        },
-        {"field": "int_vector", "value": [1, 2], "error_text": "dimensions"},
-        {
-            "field": "int_vector",
-            "value": [1000, 2000, 3000],
-            "error_text": "INT8 values must be between",
-        },
-        # HNSW Vector field errors
-        {"field": "hnsw_vector", "value": [0.1, 0.2], "error_text": "dimensions"},
-        {
-            "field": "hnsw_vector",
-            "value": ["a", "b", "c"],
-            "error_text": "numeric values",
-        },
-    ]
diff --git a/tests/unit/test_storage.py b/tests/unit/test_storage.py
index 539a1b76..4a34d340 100644
--- a/tests/unit/test_storage.py
+++ b/tests/unit/test_storage.py
@@ -1,562 +1,165 @@
-"""
-Tests for RedisVL storage classes with focus on validation integration.
-
-This module tests how the storage classes integrate with the validation system:
-1. How validation is used in storage operations
-2. Preprocessing and validation flow
-3. Error handling in write operations
-"""
-
-from typing import Any, Dict
-from unittest.mock import MagicMock, Mock, patch
-
 import pytest
+from pydantic import ValidationError
 
-from redisvl.index.storage import HashStorage, JsonStorage
-from redisvl.schema import IndexInfo, IndexSchema
-from redisvl.schema.fields import (
-    FlatVectorField,
-    FlatVectorFieldAttributes,
-    GeoField,
-    HNSWVectorField,
-    HNSWVectorFieldAttributes,
-    NumericField,
-    TagField,
-    TextField,
-    VectorDataType,
-    VectorDistanceMetric,
-)
-from redisvl.schema.validation import validate_object
+from redisvl.exceptions import SchemaValidationError
+from redisvl.index.storage import BaseStorage, HashStorage, JsonStorage
+from redisvl.schema import IndexSchema
 
 
 @pytest.fixture
-def sample_schema():
-    """Create a comprehensive schema for testing with all field types"""
-    return IndexSchema.from_dict(
-        {
-            "index": {
-                "name": "test-index",
-                "prefix": "test",
-                "key_separator": ":",
-                "storage_type": "hash",
+def sample_hash_schema():
+    """Create a sample schema with HASH storage for testing."""
+    schema_dict = {
+        "index": {
+            "name": "test-hash-index",
+            "prefix": "test",
+            "key_separator": ":",
+            "storage_type": "hash",
+        },
+        "fields": [
+            {"name": "test_id", "type": "tag"},
+            {"name": "title", "type": "text"},
+            {"name": "rating", "type": "numeric"},
+            {"name": "location", "type": "geo"},
+            {
+                "name": "embedding",
+                "type": "vector",
+                "attrs": {
+                    "algorithm": "flat",
+                    "dims": 4,
+                    "datatype": "float32",
+                    "distance_metric": "cosine",
+                },
             },
-            "fields": [
-                # Standard fields
-                {"type": "text", "name": "text_field"},
-                {"type": "numeric", "name": "num_field"},
-                {"type": "tag", "name": "tag_field"},
-                {"type": "geo", "name": "geo_field"},
-                # Vector fields
-                {
-                    "type": "vector",
-                    "name": "flat_vector",
-                    "attrs": {
-                        "algorithm": "flat",
-                        "dims": 3,
-                        "distance_metric": "cosine",
-                        "data_type": "float32",
-                    },
+            {
+                "name": "int_vector",
+                "type": "vector",
+                "attrs": {
+                    "algorithm": "flat",
+                    "dims": 3,
+                    "datatype": "int8",
+                    "distance_metric": "l2",
                 },
-                {
-                    "type": "vector",
-                    "name": "hnsw_vector",
-                    "attrs": {
-                        "algorithm": "hnsw",
-                        "dims": 3,
-                        "distance_metric": "cosine",
-                        "data_type": "float32",
-                        "m": 16,
-                        "ef_construction": 200,
-                        "ef_runtime": 10,
-                        "epsilon": 0.01,
-                    },
+            },
+        ],
+    }
+    return IndexSchema.from_dict(schema_dict)
+
+
+@pytest.fixture
+def sample_json_schema():
+    """Create a sample schema with JSON storage for testing."""
+    schema_dict = {
+        "index": {
+            "name": "test-json-index",
+            "prefix": "test",
+            "key_separator": ":",
+            "storage_type": "json",
+        },
+        "fields": [
+            {"name": "test_id", "type": "tag", "path": "$.test_id"},
+            {"name": "user", "type": "tag", "path": "$.metadata.user"},
+            {"name": "title", "type": "text", "path": "$.content.title"},
+            {"name": "rating", "type": "numeric", "path": "$.metadata.rating"},
+            {
+                "name": "embedding",
+                "type": "vector",
+                "path": "$.content.embedding",
+                "attrs": {
+                    "algorithm": "flat",
+                    "dims": 4,
+                    "datatype": "float32",
+                    "distance_metric": "cosine",
                 },
-            ],
-        }
-    )
+            },
+            {
+                "name": "int_vector",
+                "type": "vector",
+                "path": "$.content.int_vector",
+                "attrs": {
+                    "algorithm": "flat",
+                    "dims": 3,
+                    "datatype": "int8",
+                    "distance_metric": "l2",
+                },
+            },
+        ],
+    }
+    return IndexSchema.from_dict(schema_dict)
 
 
 @pytest.fixture(params=[JsonStorage, HashStorage])
-def storage_instance(request, sample_schema):
+def storage_instance(request, sample_hash_schema, sample_json_schema):
     StorageClass = request.param
-    instance = StorageClass(index_schema=sample_schema)
-    return instance
+    if isinstance(StorageClass, JsonStorage):
+        return StorageClass(index_schema=sample_json_schema)
+    return StorageClass(index_schema=sample_hash_schema)
 
 
 def test_key_formatting(storage_instance):
     key = "1234"
     generated_key = storage_instance._key(key, "", "")
-    assert generated_key == key
+    assert generated_key == key, "The generated key does not match the expected format."
     generated_key = storage_instance._key(key, "", ":")
-    assert generated_key == key
+    assert generated_key == key, "The generated key does not match the expected format."
     generated_key = storage_instance._key(key, "test", ":")
-    assert generated_key == f"test:{key}"
+    assert (
+        generated_key == f"test:{key}"
+    ), "The generated key does not match the expected format."
 
 
 def test_create_key(storage_instance):
     id_field = "id"
     obj = {id_field: "1234"}
-    expected_key = (
-        f"{storage_instance.index_schema.index.prefix}"
-        f"{storage_instance.index_schema.index.key_separator}"
-        f"{obj[id_field]}"
-    )
+    expected_key = f"{storage_instance.index_schema.index.prefix}{storage_instance.index_schema.index.key_separator}{obj[id_field]}"
     generated_key = storage_instance._create_key(obj, id_field)
     assert (
         generated_key == expected_key
     ), "The generated key does not match the expected format."
 
 
+def test_validate_success(storage_instance):
+    try:
+        storage_instance.validate(
+            {"test_id": "1234", "rating": 5, "user": "john", "title": "engineer"}
+        )
+    except Exception as e:
+        pytest.fail(f"_validate should not raise an exception here, but raised {e}")
+
+
+def test_validate_failure(storage_instance):
+    data = {"title": 5}
+    with pytest.raises(ValidationError):
+        storage_instance.validate(data)
+    data = {"user": True}
+    with pytest.raises(ValidationError):
+        storage_instance.validate(data)
+
+
+def test_validate_preprocess_and_validate_failure(storage_instance):
+    data = {"title": 5}
+    data == storage_instance._preprocess_and_validate_objects(
+        objects=[data], validate=False
+    )
+    with pytest.raises(SchemaValidationError):
+        storage_instance._preprocess_and_validate_objects(objects=[data], validate=True)
+    data = {"user": True}
+    data == storage_instance._preprocess_and_validate_objects(
+        objects=[data], validate=False
+    )
+    with pytest.raises(SchemaValidationError):
+        storage_instance._preprocess_and_validate_objects(objects=[data], validate=True)
+
+
 def test_preprocess(storage_instance):
     data = {"key": "value"}
-    preprocessed_data = storage_instance._preprocess(data, preprocess=None)
+    preprocessed_data = storage_instance._preprocess(obj=data, preprocess=None)
     assert preprocessed_data == data
 
     def fn(d):
         d["foo"] = "bar"
         return d
 
-    preprocessed_data = storage_instance._preprocess(data, fn)
+    preprocessed_data = storage_instance._preprocess(obj=data, preprocess=fn)
     assert "foo" in preprocessed_data
     assert preprocessed_data["foo"] == "bar"
-
-
-def test_preprocess_and_validate_objects(storage_instance):
-    """Test combined preprocessing and validation"""
-    objects = [
-        {"num_field": 123, "text_field": "valid text"},  # Valid
-        {"num_field": "123", "text_field": "valid text"},  # Invalid numeric field
-    ]
-
-    def preprocess(obj):
-        obj["processed"] = True
-        return obj
-
-    # When validate=True, should raise ValueError for invalid object
-    with pytest.raises(ValueError) as exc_info:
-        storage_instance._preprocess_and_validate_objects(
-            objects, preprocess=preprocess, validate=True
-        )
-
-    # Error message should mention the issue
-    assert "Validation failed" in str(exc_info.value)
-    assert "must be a number" in str(exc_info.value)
-
-    # When validate=False, should process both objects without errors
-    prepared_objects = storage_instance._preprocess_and_validate_objects(
-        objects, preprocess=preprocess, validate=False
-    )
-
-    assert len(prepared_objects) == 2
-    # Preprocessing should have worked for both objects
-    assert all(obj[1].get("processed") for obj in prepared_objects)
-
-
-def test_validate_object(storage_instance):
-    """Test validation of individual objects"""
-
-    # Valid data should be returned unchanged (except for any type coercion)
-    valid_data = {
-        "text_field": "some text",
-        "num_field": 123.45,
-        "tag_field": "tag1,tag2,tag3",
-        "geo_field": "37.7749,-122.4194",
-        "flat_vector": [0.1, 0.2, 0.3],
-        "hnsw_vector": [0.4, 0.5, 0.6],
-    }
-
-    validated = storage_instance.validate(valid_data)
-    assert validated is not None
-    assert validated["num_field"] == valid_data["num_field"]
-    assert validated["text_field"] == valid_data["text_field"]
-
-    # Invalid text field
-    invalid_text = valid_data.copy()
-    invalid_text["text_field"] = 123
-    with pytest.raises(ValueError) as exc_info:
-        storage_instance.validate(invalid_text)
-    assert "text_field" in str(exc_info.value)
-
-    # Invalid numeric field (string that looks like number)
-    invalid_numeric = valid_data.copy()
-    invalid_numeric["num_field"] = "123.45"
-    with pytest.raises(ValueError) as exc_info:
-        storage_instance.validate(invalid_numeric)
-    assert "num_field" in str(exc_info.value)
-
-    # Invalid geo field
-    invalid_geo = valid_data.copy()
-    invalid_geo["geo_field"] = "invalid-geo-format"
-    with pytest.raises(ValueError) as exc_info:
-        storage_instance.validate(invalid_geo)
-    assert "geo_field" in str(exc_info.value)
-
-    # Invalid vector field (wrong dimensions)
-    invalid_vector_dims = valid_data.copy()
-    invalid_vector_dims["flat_vector"] = [0.1, 0.2]
-    with pytest.raises(ValueError) as exc_info:
-        storage_instance.validate(invalid_vector_dims)
-    assert "flat_vector" in str(exc_info.value)
-    assert "dimensions" in str(exc_info.value)
-
-    # Invalid vector field (non-numeric values)
-    invalid_vector_values = valid_data.copy()
-    invalid_vector_values["hnsw_vector"] = ["a", "b", "c"]
-    with pytest.raises(ValueError) as exc_info:
-        storage_instance.validate(invalid_vector_values)
-    assert "hnsw_vector" in str(exc_info.value)
-    assert "numeric values" in str(exc_info.value)
-
-
-def test_partial_object_validation(storage_instance):
-    """Test validation of partial objects (missing fields)"""
-
-    # Object with only some fields
-    partial_data = {
-        "text_field": "valid text",
-        # Missing num_field, tag_field, etc.
-    }
-
-    # Should validate successfully since fields are optional
-    validated = storage_instance.validate(partial_data)
-    assert validated is not None
-    assert "text_field" in validated
-    assert "num_field" not in validated
-
-    # Explicitly setting a field to None should result in it being excluded
-    null_field_data = {"text_field": "valid text", "num_field": None}
-
-    validated = storage_instance.validate(null_field_data)
-    assert "num_field" not in validated
-
-
-def test_write_with_validation(storage_instance, mocker):
-    """Test the write method with validation enabled"""
-    # Mock the _set method to avoid actual Redis calls
-    mocker.patch.object(storage_instance, "_set")
-
-    # Mock pipeline execution
-    mock_pipe = mocker.MagicMock()
-    mock_pipe.execute = mocker.MagicMock()
-
-    # Mock Redis client
-    mock_client = mocker.MagicMock()
-    mock_client.pipeline.return_value.__enter__.return_value = mock_pipe
-
-    # Valid and invalid objects
-    objects = [
-        {"text_field": "valid", "num_field": 123},  # Valid
-        {"text_field": 456, "num_field": 789},  # Invalid text field
-    ]
-
-    # With validation enabled, should raise error on first invalid object
-    with pytest.raises(ValueError) as exc_info:
-        storage_instance.write(mock_client, objects, validate=True)
-
-    assert "Validation failed" in str(exc_info.value)
-    assert "text_field" in str(exc_info.value)
-
-    # With validation disabled, should process all objects
-    keys = storage_instance.write(mock_client, objects, validate=False)
-
-    assert len(keys) == 2
-    assert storage_instance._set.call_count == 2
-
-
-class TestBaseStorageValidation:
-    """Tests for validation in BaseStorage class."""
-
-    def test_validate_object(self, comprehensive_schema, valid_data):
-        """Test the validate_object method."""
-        # Create storage
-        storage = BaseStorage(schema=comprehensive_schema)
-
-        # Validate object
-        validated = storage.validate_object(valid_data)
-
-        # Verify object was validated
-        assert validated is not None
-        assert "id" in validated
-        assert "title" in validated
-
-    def test_validate_object_with_invalid_data(self, comprehensive_schema, valid_data):
-        """Test validation with invalid data."""
-        # Create storage
-        storage = BaseStorage(schema=comprehensive_schema)
-
-        # Create invalid data
-        invalid_data = valid_data.copy()
-        invalid_data["rating"] = "not a number"
-
-        # Validation should fail
-        with pytest.raises(ValueError) as exc_info:
-            storage.validate_object(invalid_data)
-
-        # Error message should mention validation failure
-        assert "Validation failed" in str(exc_info.value)
-
-    def test_preprocess_and_validate_objects_success(
-        self, comprehensive_schema, valid_data
-    ):
-        """Test _preprocess_and_validate_objects with valid data."""
-        # Create storage
-        storage = BaseStorage(schema=comprehensive_schema)
-
-        # Process objects
-        objects = [valid_data]
-        validated_objects = storage._preprocess_and_validate_objects(objects)
-
-        # Verify objects were validated
-        assert len(validated_objects) == 1
-        assert "id" in validated_objects[0]
-        assert "title" in validated_objects[0]
-
-    def test_preprocess_and_validate_objects_fail(
-        self, comprehensive_schema, valid_data
-    ):
-        """Test _preprocess_and_validate_objects with invalid data."""
-        # Create storage
-        storage = BaseStorage(schema=comprehensive_schema)
-
-        # Create mix of valid and invalid data
-        invalid_data = valid_data.copy()
-        invalid_data["rating"] = "not a number"
-
-        # Process should fail fast on first invalid object
-        with pytest.raises(ValueError) as exc_info:
-            storage._preprocess_and_validate_objects([invalid_data, valid_data])
-
-        # Error message should mention validation failure
-        assert "Validation failed" in str(exc_info.value)
-
-    def test_write_one_validation(self, comprehensive_schema, valid_data):
-        """Test that write_one validates objects."""
-        # Create storage with mocked redis client
-        client_mock = Mock()
-        storage = BaseStorage(schema=comprehensive_schema, client=client_mock)
-
-        # Mock hset to avoid actual Redis call
-        client_mock.hset = Mock()
-
-        # Call write_one
-        storage.write_one(valid_data)
-
-        # Verify hset was called
-        client_mock.hset.assert_called_once()
-
-    def test_write_one_validation_fail(self, comprehensive_schema, valid_data):
-        """Test that write_one fails on invalid data."""
-        # Create storage with mocked redis client
-        client_mock = Mock()
-        storage = BaseStorage(schema=comprehensive_schema, client=client_mock)
-
-        # Create invalid data
-        invalid_data = valid_data.copy()
-        invalid_data["rating"] = "not a number"
-
-        # Call write_one with invalid data
-        with pytest.raises(ValueError) as exc_info:
-            storage.write_one(invalid_data)
-
-        # Verify error and that hset was not called
-        assert "Validation failed" in str(exc_info.value)
-        client_mock.hset.assert_not_called()
-
-    def test_write_many_validation(self, comprehensive_schema, valid_data):
-        """Test that write_many validates all objects."""
-        # Create storage with mocked redis client
-        client_mock = Mock()
-        storage = BaseStorage(schema=comprehensive_schema, client=client_mock)
-
-        # Mock pipeline to avoid actual Redis call
-        pipeline_mock = Mock()
-        client_mock.pipeline.return_value.__enter__.return_value = pipeline_mock
-
-        # Call write_many with multiple valid objects
-        storage.write_many([valid_data, valid_data.copy()])
-
-        # Verify pipeline executed
-        pipeline_mock.execute.assert_called_once()
-
-    def test_write_many_validation_fail(self, comprehensive_schema, valid_data):
-        """Test that write_many fails on invalid data."""
-        # Create storage with mocked redis client
-        client_mock = Mock()
-        storage = BaseStorage(schema=comprehensive_schema, client=client_mock)
-
-        # Mock pipeline to avoid actual Redis call
-        pipeline_mock = Mock()
-        client_mock.pipeline.return_value.__enter__.return_value = pipeline_mock
-
-        # Create invalid data
-        invalid_data = valid_data.copy()
-        invalid_data["rating"] = "not a number"
-
-        # Call write_many with invalid data
-        with pytest.raises(ValueError) as exc_info:
-            storage.write_many([valid_data, invalid_data])
-
-        # Verify error and that execute was not called
-        assert "Validation failed" in str(exc_info.value)
-        pipeline_mock.execute.assert_not_called()
-
-
-class TestJsonStorageValidation:
-    """Tests for validation in JsonStorage class."""
-
-    def test_validate_json_document(self, json_schema, valid_nested_data):
-        """Test validating a JSON document."""
-        # Create JSON storage
-        storage = JsonStorage(schema=json_schema)
-
-        # Validate object
-        validated = storage.validate_object(valid_nested_data)
-
-        # Verify object was validated and flattened
-        assert validated is not None
-        assert "id" in validated
-        assert "user" in validated
-        assert "title" in validated
-        assert "rating" in validated
-
-    def test_validate_json_missing_paths(self, json_schema):
-        """Test validating JSON with missing paths."""
-        # Create JSON storage
-        storage = JsonStorage(schema=json_schema)
-
-        # Create object with missing paths
-        partial_nested = {
-            "id": "doc1",
-            "metadata": {
-                "user": "user123"
-                # missing rating
-            },
-            "content": {
-                "title": "Test Document"
-                # missing embedding
-            },
-        }
-
-        # Validate object
-        validated = storage.validate_object(partial_nested)
-
-        # Verify validation succeeds with missing fields
-        assert validated is not None
-        assert "id" in validated
-        assert "user" in validated
-        assert "title" in validated
-
-        # Missing fields should be absent
-        assert "rating" not in validated
-        assert "embedding" not in validated
-
-    def test_validate_json_invalid_path(self, json_schema, valid_nested_data):
-        """Test validating JSON with invalid path values."""
-        # Create JSON storage
-        storage = JsonStorage(schema=json_schema)
-
-        # Create object with invalid data
-        invalid_nested = valid_nested_data.copy()
-        invalid_nested["metadata"]["rating"] = "not a number"
-
-        # Validation should fail
-        with pytest.raises(ValueError) as exc_info:
-            storage.validate_object(invalid_nested)
-
-        # Error message should mention validation failure
-        assert "Validation failed" in str(exc_info.value)
-        assert "rating" in str(exc_info.value)
-
-    def test_write_json_document(self, json_schema, valid_nested_data):
-        """Test writing a JSON document."""
-        # Create storage with mocked redis client
-        client_mock = Mock()
-        storage = JsonStorage(schema=json_schema, client=client_mock)
-
-        # Mock json.set to avoid actual Redis call
-        client_mock.json.set = Mock()
-
-        # Call write_one
-        storage.write_one(valid_nested_data)
-
-        # Verify json.set was called
-        client_mock.json.set.assert_called_once()
-
-    def test_write_json_validation_fail(self, json_schema, valid_nested_data):
-        """Test that write fails on invalid JSON."""
-        # Create storage with mocked redis client
-        client_mock = Mock()
-        storage = JsonStorage(schema=json_schema, client=client_mock)
-
-        # Create invalid data
-        invalid_nested = valid_nested_data.copy()
-        invalid_nested["metadata"]["rating"] = "not a number"
-
-        # Call write_one with invalid data
-        with pytest.raises(ValueError) as exc_info:
-            storage.write_one(invalid_nested)
-
-        # Verify error and that json.set was not called
-        assert "Validation failed" in str(exc_info.value)
-        client_mock.json.set.assert_not_called()
-
-
-@patch("redisvl.schema.validation.validate_object")
-class TestValidationIntegration:
-    """Tests for integration between storage and validation."""
-
-    def test_validate_object_is_called(
-        self, mock_validate, comprehensive_schema, valid_data
-    ):
-        """Test that validate_object is called from BaseStorage."""
-        # Create storage
-        storage = BaseStorage(schema=comprehensive_schema)
-
-        # Set up mock to return the input data
-        mock_validate.return_value = valid_data
-
-        # Call validate_object
-        storage.validate_object(valid_data)
-
-        # Verify mock was called with correct args
-        mock_validate.assert_called_once_with(comprehensive_schema, valid_data)
-
-    def test_preprocess_calls_validate_for_each_object(
-        self, mock_validate, comprehensive_schema, valid_data
-    ):
-        """Test that _preprocess_and_validate_objects calls validate for each object."""
-        # Create storage
-        storage = BaseStorage(schema=comprehensive_schema)
-
-        # Set up mock to return the input data
-        mock_validate.return_value = valid_data
-
-        # Call _preprocess_and_validate_objects with multiple objects
-        objects = [valid_data, valid_data.copy(), valid_data.copy()]
-        storage._preprocess_and_validate_objects(objects)
-
-        # Verify mock was called for each object
-        assert mock_validate.call_count == len(objects)
-
-    def test_preprocess_stops_on_first_validation_error(
-        self, mock_validate, comprehensive_schema, valid_data
-    ):
-        """Test that processing stops on first validation error."""
-        # Create storage
-        storage = BaseStorage(schema=comprehensive_schema)
-
-        # Set up mock to raise error on second call
-        mock_validate.side_effect = [
-            valid_data,
-            ValueError("Validation failed for 2nd object"),
-            valid_data,
-        ]
-
-        # Call _preprocess_and_validate_objects
-        objects = [valid_data, valid_data.copy(), valid_data.copy()]
-        with pytest.raises(ValueError) as exc_info:
-            storage._preprocess_and_validate_objects(objects)
-
-        # Verify error and that mock was called twice
-        assert "Validation failed for 2nd object" in str(exc_info.value)
-        assert mock_validate.call_count == 2
diff --git a/tests/unit/test_validation.py b/tests/unit/test_validation.py
index 6431303c..ac67e810 100644
--- a/tests/unit/test_validation.py
+++ b/tests/unit/test_validation.py
@@ -9,12 +9,13 @@
 """
 
 import re
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import pytest
 
 from redisvl.schema import IndexSchema
 from redisvl.schema.fields import FieldTypes, VectorDataType
+from redisvl.schema.schema import StorageType
 from redisvl.schema.type_utils import TypeInferrer
 from redisvl.schema.validation import (
     SchemaModelGenerator,
@@ -22,19 +23,21 @@
     validate_object,
 )
 
+# -------------------- FIXTURES --------------------
+
 
 @pytest.fixture
-def sample_schema():
-    """Create a sample schema with different field types for testing."""
+def sample_hash_schema():
+    """Create a sample schema with HASH storage for testing."""
     schema_dict = {
         "index": {
-            "name": "test-index",
+            "name": "test-hash-index",
             "prefix": "test",
             "key_separator": ":",
             "storage_type": "hash",
         },
         "fields": [
-            {"name": "id", "type": "tag"},
+            {"name": "test_id", "type": "tag"},
             {"name": "title", "type": "text"},
             {"name": "rating", "type": "numeric"},
             {"name": "location", "type": "geo"},
@@ -48,6 +51,16 @@ def sample_schema():
                     "distance_metric": "cosine",
                 },
             },
+            {
+                "name": "int_vector",
+                "type": "vector",
+                "attrs": {
+                    "algorithm": "flat",
+                    "dims": 3,
+                    "datatype": "int8",
+                    "distance_metric": "l2",
+                },
+            },
         ],
     }
     return IndexSchema.from_dict(schema_dict)
@@ -55,7 +68,7 @@ def sample_schema():
 
 @pytest.fixture
 def sample_json_schema():
-    """Create a sample schema with JSON storage and path fields."""
+    """Create a sample schema with JSON storage for testing."""
     schema_dict = {
         "index": {
             "name": "test-json-index",
@@ -64,7 +77,7 @@ def sample_json_schema():
             "storage_type": "json",
         },
         "fields": [
-            {"name": "id", "type": "tag", "path": "$.id"},
+            {"name": "test_id", "type": "tag", "path": "$.test_id"},
             {"name": "user", "type": "tag", "path": "$.metadata.user"},
             {"name": "title", "type": "text", "path": "$.content.title"},
             {"name": "rating", "type": "numeric", "path": "$.metadata.rating"},
@@ -79,71 +92,178 @@ def sample_json_schema():
                     "distance_metric": "cosine",
                 },
             },
+            {
+                "name": "int_vector",
+                "type": "vector",
+                "path": "$.content.int_vector",
+                "attrs": {
+                    "algorithm": "flat",
+                    "dims": 3,
+                    "datatype": "int8",
+                    "distance_metric": "l2",
+                },
+            },
         ],
     }
     return IndexSchema.from_dict(schema_dict)
 
 
 @pytest.fixture
-def valid_data():
-    """Sample valid data for testing validation."""
+def valid_hash_data():
+    """Sample valid data for testing HASH storage validation."""
     return {
-        "id": "doc1",
+        "test_id": "doc1",
         "title": "Test Document",
         "rating": 4.5,
         "location": "37.7749,-122.4194",
-        "embedding": [0.1, 0.2, 0.3, 0.4],
+        "embedding": b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",  # Bytes for HASH
+        "int_vector": b"\x01\x02\x03",  # Bytes for HASH
     }
 
 
 @pytest.fixture
-def valid_nested_data():
-    """Sample valid nested data for testing JSON path validation."""
+def valid_json_data():
+    """Sample valid data for testing JSON storage validation."""
     return {
-        "id": "doc1",
+        "test_id": "doc1",
         "metadata": {"user": "user123", "rating": 4.5},
-        "content": {"title": "Test Document", "embedding": [0.1, 0.2, 0.3, 0.4]},
+        "content": {
+            "title": "Test Document",
+            "embedding": [0.1, 0.2, 0.3, 0.4],  # List for JSON
+            "int_vector": [1, 2, 3],  # List for JSON
+        },
     }
 
 
+# -------------------- TEST HELPERS --------------------
+
+
+def validate_field(
+    schema: IndexSchema,
+    field_name: str,
+    value: Any,
+    should_pass: bool,
+    error_text: Optional[str] = None,
+) -> Tuple[bool, Optional[str]]:
+    """
+    Helper function to validate a field value against a schema.
+
+    Args:
+        schema: The schema to validate against
+        field_name: The name of the field to validate
+        value: The value to validate
+        should_pass: Whether validation should pass
+        error_text: Expected error text if validation should fail
+
+    Returns:
+        Tuple of (validation_success, error_message)
+    """
+    # Get model for schema
+    model_class = SchemaModelGenerator.get_model_for_schema(schema)
+
+    # Create test data with minimal viable fields
+    test_data = {field_name: value}
+
+    # Try to validate
+    try:
+        validated = model_class.model_validate(test_data)
+
+        # If we got here, validation passed
+        success = True
+        error_msg = None
+
+    except Exception as e:
+        # Validation failed
+        success = False
+        error_msg = str(e)
+
+    # Check if result matches expectation
+    if success != should_pass:
+        print("ERROR", error_msg, flush=True)
+        print(validated, flush=True)
+    assert (
+        success == should_pass
+    ), f"Validation {'passed' if success else 'failed'} but expected {'pass' if should_pass else 'fail'}"
+
+    # Check error text if specified and validation failed
+    if not success and error_text and error_msg:
+        assert (
+            error_text in error_msg
+        ), f"Error '{error_msg}' does not contain expected text '{error_text}'"
+
+    return success, error_msg
+
+
+# -------------------- CATEGORY 1: BASIC UNIT TESTS --------------------
+
+
 class TestSchemaModelGenerator:
     """Tests for the SchemaModelGenerator class."""
 
-    def test_get_model_for_schema(self, sample_schema):
+    @pytest.mark.parametrize("schema_type", ["hash", "json"])
+    def test_get_model_for_schema(
+        self, schema_type, sample_hash_schema, sample_json_schema
+    ):
         """Test generating a model from a schema."""
+        # Select schema based on type
+        schema = sample_hash_schema if schema_type == "hash" else sample_json_schema
+
         # Get model for schema
-        model_class = SchemaModelGenerator.get_model_for_schema(sample_schema)
+        model_class = SchemaModelGenerator.get_model_for_schema(schema)
 
         # Verify model name matches the index name
-        assert model_class.__name__ == "test-index__PydanticModel"
+        assert model_class.__name__ == f"{schema.index.name}__PydanticModel"
 
         # Verify model has expected fields
-        for field_name in sample_schema.field_names:
+        for field_name in schema.field_names:
             assert field_name in model_class.model_fields
 
-    def test_model_caching(self, sample_schema):
+    def test_model_caching(self, sample_hash_schema):
         """Test that models are cached and reused."""
         # Get model twice
-        model1 = SchemaModelGenerator.get_model_for_schema(sample_schema)
-        model2 = SchemaModelGenerator.get_model_for_schema(sample_schema)
+        model1 = SchemaModelGenerator.get_model_for_schema(sample_hash_schema)
+        model2 = SchemaModelGenerator.get_model_for_schema(sample_hash_schema)
 
         # Verify same instance
         assert model1 is model2
 
-    def test_type_mapping(self, sample_schema):
+    @pytest.mark.parametrize(
+        "field_type,storage_type,expected_type",
+        [
+            (FieldTypes.TEXT, StorageType.HASH, str),
+            (FieldTypes.TAG, StorageType.HASH, str),
+            (FieldTypes.NUMERIC, StorageType.HASH, Union[int, float]),
+            (FieldTypes.GEO, StorageType.HASH, str),
+            (FieldTypes.VECTOR, StorageType.HASH, bytes),
+            (FieldTypes.TEXT, StorageType.JSON, str),
+            (FieldTypes.TAG, StorageType.JSON, str),
+            (FieldTypes.NUMERIC, StorageType.JSON, Union[int, float]),
+            (FieldTypes.GEO, StorageType.JSON, str),
+            (FieldTypes.VECTOR, StorageType.JSON, List[float]),
+        ],
+    )
+    def test_type_mapping(self, field_type, storage_type, expected_type):
         """Test mapping Redis field types to Pydantic types."""
-        for field_name, field in sample_schema.fields.items():
-            field_type = SchemaModelGenerator._map_field_to_pydantic_type(field)
-
-            # Verify each field type maps to expected Python type
-            if field.type == FieldTypes.TEXT:
-                assert field_type == str
-            elif field.type == FieldTypes.TAG:
-                assert field_type == str
-            elif field.type == FieldTypes.NUMERIC:
-                assert field_type.__origin__ == type(Union)  # Check it's a Union
-            elif field.type == FieldTypes.VECTOR:
-                assert field_type.__origin__ == type(Union)  # Check it's a Union
+
+        # Create a basic field of the specified type
+        class SimpleField:
+            def __init__(self, ftype):
+                self.type = ftype
+                # Add attrs for vector fields
+                if ftype == FieldTypes.VECTOR:
+
+                    class Attrs:
+                        dims = 4
+                        datatype = VectorDataType.FLOAT32
+
+                    self.attrs = Attrs()
+
+        field = SimpleField(field_type)
+        field_type_result = SchemaModelGenerator._map_field_to_pydantic_type(
+            field, storage_type
+        )
+
+        assert field_type_result == expected_type
 
     def test_unsupported_field_type(self):
         """Test that an error is raised for unsupported field types."""
@@ -154,362 +274,419 @@ class DummyField:
 
         # Mapping should raise ValueError
         with pytest.raises(ValueError) as exc_info:
-            SchemaModelGenerator._map_field_to_pydantic_type(DummyField())
+            SchemaModelGenerator._map_field_to_pydantic_type(
+                DummyField(), StorageType.HASH
+            )
 
         assert "Unsupported field type" in str(exc_info.value)
 
 
-class TestFieldValidators:
-    """Tests for field-specific validators."""
-
-    def test_text_field_validation(self, sample_schema, valid_data):
-        """Test validation of text fields."""
-        model_class = SchemaModelGenerator.get_model_for_schema(sample_schema)
-
-        # Valid text field
-        valid = valid_data.copy()
-        validated = model_class.model_validate(valid)
-        assert validated.title == "Test Document"
+class TestJsonPathExtraction:
+    """Tests for JSON path extraction functionality."""
 
-        # Invalid text field (number)
-        invalid = valid_data.copy()
-        invalid["title"] = 123
-        with pytest.raises(ValueError) as exc_info:
-            model_class.model_validate(invalid)
-        assert "title" in str(exc_info.value)
-        assert "must be a string" in str(exc_info.value)
-
-    def test_tag_field_validation(self, sample_schema, valid_data):
-        """Test validation of tag fields."""
-        model_class = SchemaModelGenerator.get_model_for_schema(sample_schema)
-
-        # Valid tag field
-        valid = valid_data.copy()
-        validated = model_class.model_validate(valid)
-        assert validated.id == "doc1"
-
-        # Invalid tag field (number)
-        invalid = valid_data.copy()
-        invalid["id"] = 123
-        with pytest.raises(ValueError) as exc_info:
-            model_class.model_validate(invalid)
-        assert "id" in str(exc_info.value)
-        assert "must be a string" in str(exc_info.value)
-
-    def test_numeric_field_validation(self, sample_schema, valid_data):
-        """Test validation of numeric fields."""
-        model_class = SchemaModelGenerator.get_model_for_schema(sample_schema)
-
-        # Valid numeric field (integer)
-        valid_int = valid_data.copy()
-        valid_int["rating"] = 5
-        validated = model_class.model_validate(valid_int)
-        assert validated.rating == 5
-
-        # Valid numeric field (float)
-        valid_float = valid_data.copy()
-        valid_float["rating"] = 4.5
-        validated = model_class.model_validate(valid_float)
-        assert validated.rating == 4.5
-
-        # Invalid numeric field (string)
-        invalid = valid_data.copy()
-        invalid["rating"] = "high"
-        with pytest.raises(ValueError) as exc_info:
-            model_class.model_validate(invalid)
-        assert "rating" in str(exc_info.value)
-        assert "must be a number" in str(exc_info.value)
-
-        # Invalid numeric field (string that looks like number)
-        invalid_num_str = valid_data.copy()
-        invalid_num_str["rating"] = "4.5"
-        with pytest.raises(ValueError) as exc_info:
-            model_class.model_validate(invalid_num_str)
-        assert "rating" in str(exc_info.value)
-        assert "must be a number" in str(exc_info.value)
-
-    def test_geo_field_validation(self, sample_schema, valid_data):
-        """Test validation of geo fields."""
-        model_class = SchemaModelGenerator.get_model_for_schema(sample_schema)
-
-        # Valid geo format
-        valid_geo = valid_data.copy()
-        valid_geo["location"] = "37.7749,-122.4194"
-        validated = model_class.model_validate(valid_geo)
-        assert validated.location == "37.7749,-122.4194"
-
-        # Invalid geo format (not matching lat,lon pattern)
-        invalid_geo = valid_data.copy()
-        invalid_geo["location"] = "invalid_geo"
-        with pytest.raises(ValueError) as exc_info:
-            model_class.model_validate(invalid_geo)
-        assert "location" in str(exc_info.value)
-        assert "not a valid 'lat,lon' format" in str(exc_info.value)
-
-        # Verify the geo pattern actually works with valid formats
-        valid_formats = [
-            "0,0",
-            "90,-180",
-            "-90,180",
-            "37.7749,-122.4194",
-            "37.7749,122.4194",
-            "-37.7749,-122.4194",
-        ]
-        for format in valid_formats:
-            assert re.match(TypeInferrer.GEO_PATTERN.pattern, format)
-
-        # Verify invalid formats fail the pattern
-        invalid_formats = [
-            "invalid",
-            "37.7749",
-            "37.7749,",
-            ",122.4194",
-            "91,0",  # Latitude > 90
-            "-91,0",  # Latitude < -90
-            "0,181",  # Longitude > 180
-            "0,-181",  # Longitude < -180
-        ]
-        for format in invalid_formats:
-            assert not re.match(TypeInferrer.GEO_PATTERN.pattern, format)
-
-    def test_vector_field_validation_float(self, sample_schema, valid_data):
-        """Test validation of float vector fields."""
-        model_class = SchemaModelGenerator.get_model_for_schema(sample_schema)
-
-        # Valid vector
-        valid_vector = valid_data.copy()
-        valid_vector["embedding"] = [0.1, 0.2, 0.3, 0.4]
-        validated = model_class.model_validate(valid_vector)
-        assert validated.embedding == [0.1, 0.2, 0.3, 0.4]
-
-        # Valid vector as bytes
-        valid_bytes = valid_data.copy()
-        valid_bytes["embedding"] = b"\x00\x01\x02\x03"
-        validated = model_class.model_validate(valid_bytes)
-        assert validated.embedding == b"\x00\x01\x02\x03"
-
-        # Invalid vector type (string)
-        invalid_type = valid_data.copy()
-        invalid_type["embedding"] = "not a vector"
-        with pytest.raises(ValueError) as exc_info:
-            model_class.model_validate(invalid_type)
-        assert "embedding" in str(exc_info.value)
-
-        # Invalid dimensions
-        invalid_dims = valid_data.copy()
-        invalid_dims["embedding"] = [0.1, 0.2, 0.3]  # 3 dimensions instead of 4
-        with pytest.raises(ValueError) as exc_info:
-            model_class.model_validate(invalid_dims)
-        assert "embedding" in str(exc_info.value)
-        assert "dimensions" in str(exc_info.value)
+    @pytest.mark.parametrize(
+        "path,expected_value",
+        [
+            ("$.test_id", "doc1"),
+            ("$.metadata.user", "user123"),
+            ("$.metadata.rating", 4.5),
+            ("$.content.title", "Test Document"),
+            ("$.content.embedding", [0.1, 0.2, 0.3, 0.4]),
+            ("metadata.user", "user123"),  # alternate format
+            ("$.nonexistent", None),  # nonexistent path
+            ("$.metadata.nonexistent", None),  # nonexistent nested path
+        ],
+    )
+    def test_extract_from_json_path(self, valid_json_data, path, expected_value):
+        """Test extracting values using JSON paths."""
+        assert extract_from_json_path(valid_json_data, path) == expected_value
 
-        # Invalid vector values
-        invalid_values = valid_data.copy()
-        invalid_values["embedding"] = [0.1, "string", 0.3, 0.4]
-        with pytest.raises(ValueError) as exc_info:
-            model_class.model_validate(invalid_values)
-        assert "embedding" in str(exc_info.value)
-
-    def test_vector_field_validation_int(self, sample_schema, valid_data):
-        """Test validation of integer vector fields."""
-        model_class = SchemaModelGenerator.get_model_for_schema(sample_schema)
-
-        # Valid integer vector
-        valid_vector = valid_data.copy()
-        valid_vector["int_vector"] = [1, 2, 3]
-        validated = model_class.model_validate(valid_vector)
-        assert validated.int_vector == [1, 2, 3]
-
-        # Invalid: float values in int vector
-        invalid_floats = valid_data.copy()
-        invalid_floats["int_vector"] = [0.1, 0.2, 0.3]
-        with pytest.raises(ValueError) as exc_info:
-            model_class.model_validate(invalid_floats)
-        assert "int_vector" in str(exc_info.value)
-        assert "integer values" in str(exc_info.value)
 
-        # Invalid: values outside INT8 range
-        invalid_range = valid_data.copy()
-        invalid_range["int_vector"] = [1000, 2000, 3000]  # Outside INT8 range
-        with pytest.raises(ValueError) as exc_info:
-            model_class.model_validate(invalid_range)
-        assert "int_vector" in str(exc_info.value)
-        assert "must be between" in str(exc_info.value)
+# # -------------------- CATEGORY 2: PARAMETRIZED VALIDATOR TESTS --------------------
 
 
-class TestJsonPathValidation:
-    """Tests for JSON path-based validation."""
+class TestBasicFieldValidation:
+    """Tests for validating non-vector field types."""
 
-    def test_extract_from_json_path(self, valid_nested_data):
-        """Test extracting values using JSON paths."""
-        # Test simple path
-        assert extract_from_json_path(valid_nested_data, "$.id") == "doc1"
+    @pytest.mark.parametrize(
+        "field_type,field_name,valid_values,invalid_values",
+        [
+            # TEXT fields
+            (
+                "text",
+                "title",
+                [("Test Document", None), ("123", None), ("", None)],
+                [(123, "string"), (True, "string"), ([], "string")],
+            ),
+            # TAG fields
+            (
+                "tag",
+                "test_id",
+                [("doc1", None), ("123", None), ("abc,def", None), ("", None)],
+                [
+                    (123, "string"),
+                    (True, "string"),
+                    ([], "string"),
+                    ([1, 2, 3], "string"),
+                ],
+            ),
+            # NUMERIC fields
+            (
+                "numeric",
+                "rating",
+                [(5, None), (4.5, None), (0, None), (-1.5, None), ("5.3", None)],
+                [("high", "number"), (True, "boolean"), ([], "number")],
+            ),
+            # GEO fields
+            (
+                "geo",
+                "location",
+                [
+                    ("0,0", None),
+                    ("90,-180", None),
+                    ("-90,180", None),
+                    ("37.7749,-122.4194", None),
+                ],
+                [
+                    ("invalid_geo", "lat,lon"),
+                    ("37.7749", "lat,lon"),
+                    ("37.7749,", "lat,lon"),
+                    (",122.4194", "lat,lon"),
+                    ("91,0", "lat,lon"),  # Latitude > 90
+                    ("-91,0", "lat,lon"),  # Latitude < -90
+                    ("0,181", "lat,lon"),  # Longitude > 180
+                    ("0,-181", "lat,lon"),  # Longitude < -180
+                    (123, "string"),
+                    (True, "string"),
+                ],
+            ),
+        ],
+    )
+    def test_basic_field_validation(
+        self, sample_hash_schema, field_type, field_name, valid_values, invalid_values
+    ):
+        """
+        Test validation of basic field types (text, tag, numeric, geo).
+
+        This test consolidates previously separate tests for different field types.
+        """
+        # Test valid values
+        for value, _ in valid_values:
+            validate_field(sample_hash_schema, field_name, value, True)
+
+            # For GEO fields, also verify pattern
+            if field_type == "geo" and isinstance(value, str):
+                assert re.match(TypeInferrer.GEO_PATTERN.pattern, value)
+
+        # Test invalid values
+        for value, error_text in invalid_values:
+            validate_field(sample_hash_schema, field_name, value, False, error_text)
+
+            # For GEO fields, also verify pattern failure
+            if field_type == "geo" and isinstance(value, str):
+                assert not re.match(TypeInferrer.GEO_PATTERN.pattern, value)
 
-        # Test nested path
-        assert extract_from_json_path(valid_nested_data, "$.metadata.user") == "user123"
-        assert extract_from_json_path(valid_nested_data, "$.metadata.rating") == 4.5
-        assert (
-            extract_from_json_path(valid_nested_data, "$.content.title")
-            == "Test Document"
+    @pytest.mark.parametrize(
+        "test_case",
+        [
+            # Valid cases for HASH storage (bytes)
+            {
+                "storage": StorageType.HASH,
+                "field_name": "embedding",
+                "value": b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+                "valid": True,
+                "error_text": None,
+                "description": "Valid bytes for HASH storage",
+            },
+            {
+                "storage": StorageType.HASH,
+                "field_name": "int_vector",
+                "value": b"\x01\x02\x03",
+                "valid": True,
+                "error_text": None,
+                "description": "Valid bytes for HASH storage (int vector)",
+            },
+            # Invalid cases for HASH storage (trying to use lists)
+            {
+                "storage": StorageType.HASH,
+                "field_name": "embedding",
+                "value": [0.1, 0.2, 0.3, 0.4],
+                "valid": False,
+                "error_text": "bytes",
+                "description": "List not valid for HASH storage",
+            },
+            # Valid cases for JSON storage (lists)
+            {
+                "storage": StorageType.JSON,
+                "field_name": "embedding",
+                "value": [0.1, 0.2, 0.3, 0.4],
+                "valid": True,
+                "error_text": None,
+                "description": "Valid list for JSON storage",
+            },
+            {
+                "storage": StorageType.JSON,
+                "field_name": "int_vector",
+                "value": [1, 2, 3],
+                "valid": True,
+                "error_text": None,
+                "description": "Valid int list for JSON storage",
+            },
+            # Invalid cases for JSON storage (trying to use bytes)
+            {
+                "storage": StorageType.JSON,
+                "field_name": "embedding",
+                "value": b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+                "valid": False,
+                "error_text": "list",
+                "description": "Bytes not valid for JSON storage",
+            },
+            # Dimension validation
+            {
+                "storage": StorageType.JSON,
+                "field_name": "embedding",
+                "value": [0.1, 0.2, 0.3],  # Should be 4 dimensions
+                "valid": False,
+                "error_text": "dimensions",
+                "description": "Wrong dimensions for vector",
+            },
+            # Type validation for int vectors
+            {
+                "storage": StorageType.JSON,
+                "field_name": "int_vector",
+                "value": [0.1, 0.2, 0.3],  # Should be integers
+                "valid": False,
+                "error_text": "integer",
+                "description": "Float values in int vector",
+            },
+        ],
+    )
+    def test_vector_field_validation(
+        self, sample_hash_schema, sample_json_schema, test_case
+    ):
+        """Test validation of vector fields with storage-specific requirements."""
+        # Select the appropriate schema based on storage type
+        schema = (
+            sample_hash_schema
+            if test_case["storage"] == StorageType.HASH
+            else sample_json_schema
         )
-        assert extract_from_json_path(valid_nested_data, "$.content.embedding") == [
-            0.1,
-            0.2,
-            0.3,
-            0.4,
-        ]
-
-        # Test non-existent path
-        assert extract_from_json_path(valid_nested_data, "$.nonexistent") is None
-        assert (
-            extract_from_json_path(valid_nested_data, "$.metadata.nonexistent") is None
+
+        # Validate the field
+        validate_field(
+            schema,
+            test_case["field_name"],
+            test_case["value"],
+            test_case["valid"],
+            test_case["error_text"],
         )
 
-        # Test path with alternate formats
-        assert extract_from_json_path(valid_nested_data, "metadata.user") == "user123"
 
-    def test_validate_nested_json(self, sample_json_schema, valid_nested_data):
-        """Test validating a nested JSON object."""
-        # Validate nested object
-        validated = validate_object(sample_json_schema, valid_nested_data)
+class TestNestedJsonValidation:
+    """Tests for JSON path-based validation with nested structures."""
 
-        # Verify validation succeeds and flattens the structure
-        assert validated is not None
-        assert "id" in validated
-        assert "user" in validated
-        assert "title" in validated
-        assert "rating" in validated
-        assert "embedding" in validated
-
-        # Verify values were extracted correctly
-        assert validated["id"] == "doc1"
-        assert validated["user"] == "user123"
-        assert validated["title"] == "Test Document"
-        assert validated["rating"] == 4.5
-        assert validated["embedding"] == [0.1, 0.2, 0.3, 0.4]
-
-    def test_validate_nested_json_missing_paths(self, sample_json_schema):
-        """Test validating a nested JSON with missing paths."""
-        # Nested object with missing paths
-        partial_nested = {
-            "id": "doc1",
-            "metadata": {
-                "user": "user123"
-                # missing rating
+    @pytest.mark.parametrize(
+        "test_case",
+        [
+            # Complete valid data
+            {
+                "data": {
+                    "test_id": "doc1",
+                    "metadata": {"user": "user123", "rating": 4.5},
+                    "content": {
+                        "title": "Test Document",
+                        "embedding": [0.1, 0.2, 0.3, 0.4],
+                        "int_vector": [1, 2, 3],
+                    },
+                },
+                "expected_fields": [
+                    "test_id",
+                    "user",
+                    "title",
+                    "rating",
+                    "embedding",
+                    "int_vector",
+                ],
+                "missing_fields": [],
             },
-            "content": {
-                "title": "Test Document"
-                # missing embedding
+            # Partial data - missing some fields
+            {
+                "data": {
+                    "test_id": "doc1",
+                    "metadata": {"user": "user123"},
+                    "content": {"title": "Test Document"},
+                },
+                "expected_fields": ["test_id", "user", "title"],
+                "missing_fields": ["rating", "embedding", "int_vector"],
             },
-        }
-
-        # Validate object
-        validated = validate_object(sample_json_schema, partial_nested)
-
-        # Verify validation succeeds with partial data
-        assert validated is not None
-        assert "id" in validated
-        assert "user" in validated
-        assert "title" in validated
-        assert "rating" not in validated
-        assert "embedding" not in validated
-
-
-class TestObjectValidation:
-    """Tests for complete object validation."""
-
-    def test_validate_valid_object(self, sample_schema, valid_data):
-        """Test validating a valid object."""
+            # Minimal data
+            {
+                "data": {"test_id": "doc1"},
+                "expected_fields": ["test_id"],
+                "missing_fields": [
+                    "user",
+                    "title",
+                    "rating",
+                    "embedding",
+                    "int_vector",
+                ],
+            },
+        ],
+    )
+    def test_nested_json_validation(self, sample_json_schema, test_case):
+        """Test validating nested JSON with various data structures."""
         # Validate object
-        validated = validate_object(sample_schema, valid_data)
+        validated = validate_object(sample_json_schema, test_case["data"])
 
-        # Verify no exceptions and data is returned
-        assert validated is not None
+        # Verify expected fields are present
+        for field in test_case["expected_fields"]:
+            assert field in validated
 
-        # Verify all fields are present
-        for field_name in sample_schema.field_names:
-            if field_name in valid_data:
-                assert field_name in validated
+        # Verify missing fields are not present
+        for field in test_case["missing_fields"]:
+            assert field not in validated
 
-    def test_validate_missing_optional_fields(self, sample_schema):
-        """Test validating an object with missing optional fields."""
-        # Object with only some fields
-        partial_data = {"id": "doc1", "title": "Test Document"}
 
-        # Validate object
-        validated = validate_object(sample_schema, partial_data)
+class TestEndToEndValidation:
+    """End-to-end tests for complete object validation against schema."""
 
-        # Verify validation passes with partial data
-        assert validated is not None
-        assert "id" in validated
-        assert "title" in validated
-        assert "rating" not in validated
-        assert "location" not in validated
-        assert "embedding" not in validated
-
-    def test_explicit_none_fields_are_excluded(self, sample_schema):
-        """Test that fields explicitly set to None are excluded from output."""
-        # Object with some fields set to None
-        data_with_none = {
-            "id": "doc1",
+    @pytest.mark.parametrize(
+        "schema_type,data,expected_result",
+        [
+            # Valid HASH data
+            (
+                "hash",
+                {
+                    "test_id": "doc1",
+                    "title": "Test Document",
+                    "rating": 4.5,
+                    "location": "37.7749,-122.4194",
+                    "embedding": b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
+                    "int_vector": b"\x01\x02\x03",
+                },
+                {
+                    "success": True,
+                    "fields": [
+                        "test_id",
+                        "title",
+                        "rating",
+                        "location",
+                        "embedding",
+                        "int_vector",
+                    ],
+                },
+            ),
+            # Partial HASH data
+            (
+                "hash",
+                {"test_id": "doc1", "title": "Test Document"},
+                {"success": True, "fields": ["test_id", "title"]},
+            ),
+            # Valid JSON data
+            (
+                "json",
+                {
+                    "test_id": "doc1",
+                    "metadata": {"user": "user123", "rating": 4.5},
+                    "content": {
+                        "title": "Test Document",
+                        "embedding": [0.1, 0.2, 0.3, 0.4],
+                        "int_vector": [1, 2, 3],
+                    },
+                },
+                {
+                    "success": True,
+                    "fields": [
+                        "test_id",
+                        "user",
+                        "rating",
+                        "title",
+                        "embedding",
+                        "int_vector",
+                    ],
+                },
+            ),
+            # Invalid HASH data - wrong vector type
+            (
+                "hash",
+                {
+                    "test_id": "doc1",
+                    "embedding": [0.1, 0.2, 0.3, 0.4],  # Should be bytes for HASH
+                },
+                {"success": False, "error_field": "embedding"},
+            ),
+            # Invalid JSON data - wrong vector type
+            (
+                "json",
+                {
+                    "test_id": "doc1",
+                    "content": {
+                        "embedding": b"\x00\x00\x00\x00"  # Should be list for JSON
+                    },
+                },
+                {"success": False, "error_field": "embedding"},
+            ),
+        ],
+    )
+    def test_end_to_end_validation(
+        self, sample_hash_schema, sample_json_schema, schema_type, data, expected_result
+    ):
+        """Test validating complete objects with various data scenarios."""
+        # Select schema based on type
+        schema = sample_hash_schema if schema_type == "hash" else sample_json_schema
+
+        if expected_result["success"]:
+            # Validation should succeed
+            validated = validate_object(schema, data)
+
+            # Verify expected fields are present
+            for field in expected_result["fields"]:
+                assert field in validated
+        else:
+            # Validation should fail
+            with pytest.raises(ValueError) as exc_info:
+                validate_object(schema, data)
+
+            # Error should mention the field
+            assert expected_result["error_field"] in str(exc_info.value)
+
+
+# -------------------- ADDITIONAL TESTS --------------------
+
+
+class TestEdgeCases:
+    """Tests for edge cases and boundary conditions."""
+
+    def test_empty_object_validation(self, sample_hash_schema, sample_json_schema):
+        """Test validating an empty object."""
+        # Empty object should validate for both storage types (all fields are optional)
+        # TODO confirm if this is indeed true
+        assert validate_object(sample_hash_schema, {}) == {}
+        assert validate_object(sample_json_schema, {}) == {}
+
+    def test_additional_fields(self, sample_hash_schema, valid_hash_data):
+        """Test that additional fields not in schema are NOT ignored."""
+        # Add extra field not in schema
+        data_with_extra = valid_hash_data.copy()
+        data_with_extra["extra_field"] = "some value"
+
+        # Validation should succeed and ignore extra field
+        validated = validate_object(sample_hash_schema, data_with_extra)
+        assert "extra_field" in validated
+
+    def test_explicit_none_fields_excluded(self, sample_hash_schema):
+        """Test that fields explicitly set to None are excluded."""
+        # Data with explicit None values
+        data = {
+            "test_id": "doc1",
             "title": "Test Document",
             "rating": None,
             "location": None,
         }
 
-        # Validate object
-        validated = validate_object(sample_schema, data_with_none)
-
-        # Verify None fields are excluded
-        assert validated is not None
-        assert "id" in validated
+        # Validate and check fields
+        validated = validate_object(sample_hash_schema, data)
+        assert "test_id" in validated
         assert "title" in validated
         assert "rating" not in validated
         assert "location" not in validated
-
-    def test_validate_with_multiple_invalid_fields(self, sample_schema, valid_data):
-        """Test validation with multiple invalid fields."""
-        # Create object with multiple invalid fields
-        invalid_data = valid_data.copy()
-        invalid_data["title"] = 123
-        invalid_data["rating"] = "not a number"
-        invalid_data["location"] = "invalid"
-
-        # Validation should fail with the first error encountered
-        with pytest.raises(ValueError) as exc_info:
-            validate_object(sample_schema, invalid_data)
-
-        # Error message should mention validation failure
-        assert "Validation failed" in str(exc_info.value)
-
-    @pytest.mark.parametrize(
-        "case",
-        [
-            {"field": "title", "value": 123, "error_text": "must be a string"},
-            {"field": "rating", "value": "high", "error_text": "must be a number"},
-            {
-                "field": "location",
-                "value": "invalid_geo",
-                "error_text": "not a valid 'lat,lon' format",
-            },
-            {
-                "field": "embedding",
-                "value": [0.1, 0.2, 0.3],
-                "error_text": "dimensions",
-            },
-        ],
-    )
-    def test_validate_invalid_field_parametrized(self, sample_schema, valid_data, case):
-        """Parametrized test for validating invalid fields."""
-        # Create invalid data according to test case
-        invalid_data = valid_data.copy()
-        invalid_data[case["field"]] = case["value"]
-
-        # Validate and check error
-        with pytest.raises(ValueError) as exc_info:
-            validate_object(sample_schema, invalid_data)
-
-        # Error should mention the field and specific issue
-        error_message = str(exc_info.value)
-        assert case["field"] in error_message
-        assert case["error_text"] in error_message

From fdd70a047ae13c60a5ce4627cfd9f541d35d16ea Mon Sep 17 00:00:00 2001
From: Tyler Hutcherson <tyler.hutcherson@redis.com>
Date: Wed, 26 Mar 2025 11:05:53 -0400
Subject: [PATCH 03/11] update tests, docs, and formatting/linting

---
 docs/user_guide/01_getting_started.ipynb     |  236 ++--
 docs/user_guide/data_validation.ipynb        | 1102 ------------------
 redisvl/index/index.py                       |   32 +-
 redisvl/index/storage.py                     |   10 +-
 redisvl/schema/validation.py                 |   19 +-
 tests/integration/test_async_search_index.py |   10 +-
 tests/integration/test_search_index.py       |    6 +-
 tests/unit/test_edge_cases.py                |  451 -------
 tests/unit/test_fields.py                    |   55 -
 tests/unit/test_storage.py                   |   24 +-
 10 files changed, 185 insertions(+), 1760 deletions(-)
 delete mode 100644 docs/user_guide/data_validation.ipynb
 delete mode 100644 tests/unit/test_edge_cases.py

diff --git a/docs/user_guide/01_getting_started.ipynb b/docs/user_guide/01_getting_started.ipynb
index 6130f589..7ab3a234 100644
--- a/docs/user_guide/01_getting_started.ipynb
+++ b/docs/user_guide/01_getting_started.ipynb
@@ -81,7 +81,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -126,7 +126,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -173,29 +173,7 @@
    "source": [
     "## Create a `SearchIndex`\n",
     "\n",
-    "With the schema and sample dataset ready, instantiate a `SearchIndex`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from redisvl.index import SearchIndex\n",
-    "\n",
-    "index = SearchIndex.from_dict(schema)\n",
-    "# or use .from_yaml('schema_file.yaml')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now we also need to facilitate a Redis connection. There are a few ways to do this:\n",
-    "\n",
-    "- Create & manage your own client connection (recommended)\n",
-    "- Provide a Redis URL and let RedisVL connect on your behalf (by default, it will connect to \"redis://localhost:6379\")"
+    "With the schema and sample dataset ready, create a `SearchIndex`."
    ]
   },
   {
@@ -209,31 +187,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 11,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<redisvl.index.index.SearchIndex at 0x10faca900>"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
+    "from redisvl.index import SearchIndex\n",
     "from redis import Redis\n",
     "\n",
     "client = Redis.from_url(\"redis://localhost:6379\")\n",
-    "index = SearchIndex.from_dict(schema, redis_client=client)\n",
-    "\n",
-    "# alternatively, provide an async Redis client object to enable async index operations\n",
-    "# from redis.asyncio import Redis\n",
-    "# from redisvl.index import AsyncSearchIndex\n",
-    "# client = Redis.from_url(\"redis://localhost:6379\")\n",
-    "# index = AsyncSearchIndex.from_dict(schema, redis_client=client)\n"
+    "index = SearchIndex.from_dict(schema, redis_client=client, validate_on_load=True)"
    ]
   },
   {
@@ -262,24 +224,24 @@
     }
    ],
    "source": [
-    "index = SearchIndex.from_dict(schema, redis_url=\"redis://localhost:6379\")\n",
+    "index = SearchIndex.from_dict(schema, redis_url=\"redis://localhost:6379\", validate_on_load=True)\n",
     "\n",
     "# If you don't specify a client or Redis URL, the index will attempt to\n",
-    "# connect to Redis at the default address (\"redis://localhost:6379\")."
+    "# connect to Redis at the default address \"redis://localhost:6379\"."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Create the underlying index\n",
+    "### Create the index\n",
     "\n",
     "Now that we are connected to Redis, we need to run the create command."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -303,15 +265,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001b[32m11:50:15\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m   Indices:\n",
-      "\u001b[32m11:50:15\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m   1. user_simple\n"
+      "\u001b[32m10:59:25\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m   Indices:\n",
+      "\u001b[32m10:59:25\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m   1. user_simple\n"
      ]
     }
    ],
@@ -321,7 +283,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -359,19 +321,22 @@
    "source": [
     "## Load Data to `SearchIndex`\n",
     "\n",
-    "Load the sample dataset to Redis:"
+    "Load the sample dataset to Redis.\n",
+    "\n",
+    "### Validate data entries on load\n",
+    "RedisVL uses pydantic validation under the hood to ensure loaded data is valid and confirms to your schema. This setting is optional and can be configured in the `SearchIndex` class."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "['user_simple_docs:01JM2NWFWNH0BNA640MT5DS8BD', 'user_simple_docs:01JM2NWFWNF4S2V4E4HYG25CVA', 'user_simple_docs:01JM2NWFWNBFXJJ4PV9F4KMJSE']\n"
+      "['user_simple_docs:01JQ9FEZ4GAAYT9W7BWAF7CV18', 'user_simple_docs:01JQ9FEZ4JCE5FD1D5QY6BAJ0J', 'user_simple_docs:01JQ9FEZ4KF9AZYBKMYNMYBZ5A']\n"
      ]
     }
    ],
@@ -388,6 +353,96 @@
     ">By default, `load` will create a unique Redis key as a combination of the index key `prefix` and a random ULID. You can also customize the key by providing direct keys or pointing to a specified `id_field` on load."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Load invalid data\n",
+    "This will raise a `SchemaValidationError` if `validate_on_load` is set to true in the `SearchIndex` class."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "11:00:03 redisvl.index.index ERROR   Schema validation error while loading data\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/Users/tyler.hutcherson/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py\", line 204, in _preprocess_and_validate_objects\n",
+      "    processed_obj = self._validate(processed_obj)\n",
+      "  File \"/Users/tyler.hutcherson/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py\", line 160, in _validate\n",
+      "    return validate_object(self.index_schema, obj)\n",
+      "  File \"/Users/tyler.hutcherson/Documents/AppliedAI/redis-vl-python/redisvl/schema/validation.py\", line 274, in validate_object\n",
+      "    validated = model_class.model_validate(flat_obj)\n",
+      "  File \"/Users/tyler.hutcherson/Library/Caches/pypoetry/virtualenvs/redisvl-VnTEShF2-py3.13/lib/python3.13/site-packages/pydantic/main.py\", line 627, in model_validate\n",
+      "    return cls.__pydantic_validator__.validate_python(\n",
+      "           ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^\n",
+      "        obj, strict=strict, from_attributes=from_attributes, context=context\n",
+      "        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "    )\n",
+      "    ^\n",
+      "pydantic_core._pydantic_core.ValidationError: 1 validation error for user_simple__PydanticModel\n",
+      "user_embedding\n",
+      "  Input should be a valid bytes [type=bytes_type, input_value=True, input_type=bool]\n",
+      "    For further information visit https://errors.pydantic.dev/2.10/v/bytes_type\n",
+      "\n",
+      "The above exception was the direct cause of the following exception:\n",
+      "\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/Users/tyler.hutcherson/Documents/AppliedAI/redis-vl-python/redisvl/index/index.py\", line 586, in load\n",
+      "    return self._storage.write(\n",
+      "           ~~~~~~~~~~~~~~~~~~~^\n",
+      "        self._redis_client,  # type: ignore\n",
+      "        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "    ...<6 lines>...\n",
+      "        validate=self._validate_on_load,\n",
+      "        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+      "    )\n",
+      "    ^\n",
+      "  File \"/Users/tyler.hutcherson/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py\", line 265, in write\n",
+      "    prepared_objects = self._preprocess_and_validate_objects(\n",
+      "        list(objects),  # Convert Iterable to List\n",
+      "    ...<3 lines>...\n",
+      "        validate=validate,\n",
+      "    )\n",
+      "  File \"/Users/tyler.hutcherson/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py\", line 211, in _preprocess_and_validate_objects\n",
+      "    raise SchemaValidationError(str(e), index=i) from e\n",
+      "redisvl.exceptions.SchemaValidationError: Validation failed for object at index 0: 1 validation error for user_simple__PydanticModel\n",
+      "user_embedding\n",
+      "  Input should be a valid bytes [type=bytes_type, input_value=True, input_type=bool]\n",
+      "    For further information visit https://errors.pydantic.dev/2.10/v/bytes_type\n"
+     ]
+    },
+    {
+     "ename": "SchemaValidationError",
+     "evalue": "Validation failed for object at index 0: 1 validation error for user_simple__PydanticModel\nuser_embedding\n  Input should be a valid bytes [type=bytes_type, input_value=True, input_type=bool]\n    For further information visit https://errors.pydantic.dev/2.10/v/bytes_type",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValidationError\u001b[0m                           Traceback (most recent call last)",
+      "File \u001b[0;32m~/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py:204\u001b[0m, in \u001b[0;36mBaseStorage._preprocess_and_validate_objects\u001b[0;34m(self, objects, id_field, keys, preprocess, validate)\u001b[0m\n\u001b[1;32m    203\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m validate:\n\u001b[0;32m--> 204\u001b[0m     processed_obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprocessed_obj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    206\u001b[0m \u001b[38;5;66;03m# Store valid object with its key for writing\u001b[39;00m\n",
+      "File \u001b[0;32m~/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py:160\u001b[0m, in \u001b[0;36mBaseStorage._validate\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m    159\u001b[0m \u001b[38;5;66;03m# Pass directly to validation function and let any errors propagate\u001b[39;00m\n\u001b[0;32m--> 160\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mvalidate_object\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex_schema\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Documents/AppliedAI/redis-vl-python/redisvl/schema/validation.py:274\u001b[0m, in \u001b[0;36mvalidate_object\u001b[0;34m(schema, obj)\u001b[0m\n\u001b[1;32m    273\u001b[0m \u001b[38;5;66;03m# Validate against model\u001b[39;00m\n\u001b[0;32m--> 274\u001b[0m validated \u001b[38;5;241m=\u001b[39m \u001b[43mmodel_class\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_validate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mflat_obj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m validated\u001b[38;5;241m.\u001b[39mmodel_dump(exclude_none\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
+      "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/redisvl-VnTEShF2-py3.13/lib/python3.13/site-packages/pydantic/main.py:627\u001b[0m, in \u001b[0;36mBaseModel.model_validate\u001b[0;34m(cls, obj, strict, from_attributes, context)\u001b[0m\n\u001b[1;32m    626\u001b[0m __tracebackhide__ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 627\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__pydantic_validator__\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate_python\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    628\u001b[0m \u001b[43m    \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstrict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstrict\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrom_attributes\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfrom_attributes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcontext\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcontext\u001b[49m\n\u001b[1;32m    629\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[0;31mValidationError\u001b[0m: 1 validation error for user_simple__PydanticModel\nuser_embedding\n  Input should be a valid bytes [type=bytes_type, input_value=True, input_type=bool]\n    For further information visit https://errors.pydantic.dev/2.10/v/bytes_type",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001b[0;31mSchemaValidationError\u001b[0m                     Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[16], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m keys \u001b[38;5;241m=\u001b[39m \u001b[43mindex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser_embedding\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m}\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Documents/AppliedAI/redis-vl-python/redisvl/index/index.py:586\u001b[0m, in \u001b[0;36mSearchIndex.load\u001b[0;34m(self, data, id_field, keys, ttl, preprocess, batch_size)\u001b[0m\n\u001b[1;32m    556\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Load objects to the Redis database. Returns the list of keys loaded\u001b[39;00m\n\u001b[1;32m    557\u001b[0m \u001b[38;5;124;03mto Redis.\u001b[39;00m\n\u001b[1;32m    558\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    583\u001b[0m \u001b[38;5;124;03m    RedisVLError: If there's an error loading data to Redis.\u001b[39;00m\n\u001b[1;32m    584\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    585\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 586\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_storage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwrite\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    587\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_redis_client\u001b[49m\u001b[43m,\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# type: ignore\u001b[39;49;00m\n\u001b[1;32m    588\u001b[0m \u001b[43m        \u001b[49m\u001b[43mobjects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    589\u001b[0m \u001b[43m        \u001b[49m\u001b[43mid_field\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mid_field\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    590\u001b[0m \u001b[43m        \u001b[49m\u001b[43mkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    591\u001b[0m \u001b[43m        \u001b[49m\u001b[43mttl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mttl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    592\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpreprocess\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreprocess\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    593\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbatch_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    594\u001b[0m \u001b[43m        \u001b[49m\u001b[43mvalidate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate_on_load\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    595\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    596\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m SchemaValidationError:\n\u001b[1;32m    597\u001b[0m     \u001b[38;5;66;03m# Pass through validation errors directly\u001b[39;00m\n\u001b[1;32m    598\u001b[0m     logger\u001b[38;5;241m.\u001b[39mexception(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSchema validation error while loading data\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "File \u001b[0;32m~/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py:265\u001b[0m, in \u001b[0;36mBaseStorage.write\u001b[0;34m(self, redis_client, objects, id_field, keys, ttl, preprocess, batch_size, validate)\u001b[0m\n\u001b[1;32m    262\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m []\n\u001b[1;32m    264\u001b[0m \u001b[38;5;66;03m# Pass 1: Preprocess and validate all objects\u001b[39;00m\n\u001b[0;32m--> 265\u001b[0m prepared_objects \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_preprocess_and_validate_objects\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    266\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mobjects\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# Convert Iterable to List\u001b[39;49;00m\n\u001b[1;32m    267\u001b[0m \u001b[43m    \u001b[49m\u001b[43mid_field\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mid_field\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    268\u001b[0m \u001b[43m    \u001b[49m\u001b[43mkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    269\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpreprocess\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreprocess\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    270\u001b[0m \u001b[43m    \u001b[49m\u001b[43mvalidate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    271\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    273\u001b[0m \u001b[38;5;66;03m# Pass 2: Write all valid objects in batches\u001b[39;00m\n\u001b[1;32m    274\u001b[0m added_keys \u001b[38;5;241m=\u001b[39m []\n",
+      "File \u001b[0;32m~/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py:211\u001b[0m, in \u001b[0;36mBaseStorage._preprocess_and_validate_objects\u001b[0;34m(self, objects, id_field, keys, preprocess, validate)\u001b[0m\n\u001b[1;32m    207\u001b[0m     prepared_objects\u001b[38;5;241m.\u001b[39mappend((key, processed_obj))\n\u001b[1;32m    209\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ValidationError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    210\u001b[0m     \u001b[38;5;66;03m# Convert Pydantic ValidationError to SchemaValidationError with index context\u001b[39;00m\n\u001b[0;32m--> 211\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m SchemaValidationError(\u001b[38;5;28mstr\u001b[39m(e), index\u001b[38;5;241m=\u001b[39mi) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m    212\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    213\u001b[0m     \u001b[38;5;66;03m# Capture other exceptions with context\u001b[39;00m\n\u001b[1;32m    214\u001b[0m     object_id \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mat index \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mi\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n",
+      "\u001b[0;31mSchemaValidationError\u001b[0m: Validation failed for object at index 0: 1 validation error for user_simple__PydanticModel\nuser_embedding\n  Input should be a valid bytes [type=bytes_type, input_value=True, input_type=bool]\n    For further information visit https://errors.pydantic.dev/2.10/v/bytes_type"
+     ]
+    }
+   ],
+   "source": [
+    "keys = index.load([{\"user_embedding\": True}])"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -398,14 +453,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "['user_simple_docs:01JM2NWJGYMJ0QTR5YB4MB0BX9']\n"
+      "['user_simple_docs:01JQ9FHCB1B64GXF6WPK127VZ6']\n"
      ]
     }
    ],
@@ -435,7 +490,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -460,20 +515,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "*=>[KNN 3 @user_embedding $vector AS vector_distance] RETURN 6 user age job credit_score vector_distance vector_distance SORTBY vector_distance ASC DIALECT 2 LIMIT 0 3\n"
-     ]
-    },
     {
      "data": {
       "text/html": [
-       "table><tr><th>vector_distance</th><th>user</th><th>age</th><th>job</th><th>credit_score</th></tr><tr><td>0</td><td>john</td><td>1</td><td>engineer</td><td>high</td></tr><tr><td>0</td><td>mary</td><td>2</td><td>doctor</td><td>low</td></tr><tr><td>0.0566299557686</td><td>tyler</td><td>9</td><td>engineer</td><td>high</td></tr></table>"
+       "<table><tr><th>vector_distance</th><th>user</th><th>age</th><th>job</th><th>credit_score</th></tr><tr><td>0</td><td>john</td><td>1</td><td>engineer</td><td>high</td></tr><tr><td>0</td><td>mary</td><td>2</td><td>doctor</td><td>low</td></tr><tr><td>0.0566299557686</td><td>tyler</td><td>9</td><td>engineer</td><td>high</td></tr></table>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -500,7 +548,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -519,7 +567,7 @@
        "    'datatype': 'float32'}}]}"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -530,32 +578,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 21,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<redisvl.index.index.AsyncSearchIndex at 0x10facacf0>"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "from redisvl.index import AsyncSearchIndex\n",
     "from redis.asyncio import Redis\n",
     "\n",
     "client = Redis.from_url(\"redis://localhost:6379\")\n",
-    "\n",
     "index = AsyncSearchIndex.from_dict(schema, redis_client=client)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
@@ -596,7 +632,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -621,14 +657,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "11:28:32 redisvl.index.index INFO   Index already exists, overwriting.\n"
+      "11:01:30 redisvl.index.index INFO   Index already exists, overwriting.\n"
      ]
     }
    ],
@@ -639,13 +675,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
-       "<table><tr><th>vector_distance</th><th>user</th><th>age</th><th>job</th><th>credit_score</th></tr><tr><td>0</td><td>mary</td><td>2</td><td>doctor</td><td>low</td></tr><tr><td>0</td><td>john</td><td>1</td><td>engineer</td><td>high</td></tr><tr><td>0.0566299557686</td><td>tyler</td><td>9</td><td>engineer</td><td>high</td></tr></table>"
+       "<table><tr><th>vector_distance</th><th>user</th><th>age</th><th>job</th><th>credit_score</th></tr><tr><td>0</td><td>john</td><td>1</td><td>engineer</td><td>high</td></tr><tr><td>0</td><td>mary</td><td>2</td><td>doctor</td><td>low</td></tr><tr><td>0.0566299557686</td><td>tyler</td><td>9</td><td>engineer</td><td>high</td></tr></table>"
       ],
       "text/plain": [
        "<IPython.core.display.HTML object>"
@@ -671,7 +707,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -684,9 +720,9 @@
       "│ Stat Key                    │ Value       │\n",
       "├─────────────────────────────┼─────────────┤\n",
       "│ num_docs                    │ 4           │\n",
-      "│ num_terms                   │ 4           │\n",
+      "│ num_terms                   │ 0           │\n",
       "│ max_doc_id                  │ 4           │\n",
-      "│ num_records                 │ 22          │\n",
+      "│ num_records                 │ 20          │\n",
       "│ percent_indexed             │ 1           │\n",
       "│ hash_indexing_failures      │ 0           │\n",
       "│ number_of_uses              │ 2           │\n",
@@ -699,9 +735,9 @@
       "│ offsets_per_term_avg        │ 0           │\n",
       "│ records_per_doc_avg         │ 5           │\n",
       "│ sortable_values_size_mb     │ 0           │\n",
-      "│ total_indexing_time         │ 0.239       │\n",
+      "│ total_indexing_time         │ 6.529       │\n",
       "│ total_inverted_index_blocks │ 11          │\n",
-      "│ vector_index_sz_mb          │ 0.235603    │\n",
+      "│ vector_index_sz_mb          │ 0.235947    │\n",
       "╰─────────────────────────────┴─────────────╯\n"
      ]
     }
@@ -730,7 +766,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
@@ -739,7 +775,7 @@
        "4"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -751,7 +787,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
@@ -760,7 +796,7 @@
        "True"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -772,7 +808,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [],
    "source": [
diff --git a/docs/user_guide/data_validation.ipynb b/docs/user_guide/data_validation.ipynb
deleted file mode 100644
index 366f47a4..00000000
--- a/docs/user_guide/data_validation.ipynb
+++ /dev/null
@@ -1,1102 +0,0 @@
-{
- "cells": [
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Getting Started with RedisVL\n",
-    "`redisvl` is a versatile Python library with an integrated CLI, designed to enhance AI applications using Redis. This guide will walk you through the following steps:\n",
-    "\n",
-    "1. Defining an `IndexSchema`\n",
-    "2. Preparing a sample dataset\n",
-    "3. Creating a `SearchIndex` object\n",
-    "4. Testing `rvl` CLI functionality\n",
-    "5. Loading the sample data\n",
-    "6. Building `VectorQuery` objects and executing searches\n",
-    "7. Updating a `SearchIndex` object\n",
-    "\n",
-    "...and more!\n",
-    "\n",
-    "Prerequisites:\n",
-    "- Ensure `redisvl` is installed in your Python environment.\n",
-    "- Have a running instance of [Redis Stack](https://redis.io/docs/install/install-stack/) or [Redis Cloud](https://redis.io/cloud).\n",
-    "\n",
-    "_____"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Define an `IndexSchema`\n",
-    "\n",
-    "The `IndexSchema` maintains crucial **index configuration** and **field definitions** to\n",
-    "enable search with Redis. For ease of use, the schema can be constructed from a\n",
-    "python dictionary or yaml file.\n",
-    "\n",
-    "### Example Schema Creation\n",
-    "Consider a dataset with user information, including `job`, `age`, `credit_score`,\n",
-    "and a 3-dimensional `user_embedding` vector.\n",
-    "\n",
-    "You must also decide on a Redis index name and key prefix to use for this\n",
-    "dataset. Below are example schema definitions in both YAML and Dict format.\n",
-    "\n",
-    "**YAML Definition:**\n",
-    "\n",
-    "```yaml\n",
-    "version: '0.1.0'\n",
-    "\n",
-    "index:\n",
-    "  name: user_simple\n",
-    "  prefix: user_simple_docs\n",
-    "\n",
-    "fields:\n",
-    "    - name: user\n",
-    "      type: tag\n",
-    "    - name: credit_score\n",
-    "      type: tag\n",
-    "    - name: job\n",
-    "      type: text\n",
-    "    - name: age\n",
-    "      type: numeric\n",
-    "    - name: user_embedding\n",
-    "      type: vector\n",
-    "      attrs:\n",
-    "        algorithm: flat\n",
-    "        dims: 3\n",
-    "        distance_metric: cosine\n",
-    "        datatype: float32\n",
-    "```\n",
-    "> Store this in a local file, such as `schema.yaml`, for RedisVL usage."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**Python Dictionary:**"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "schema = {\n",
-    "    \"index\": {\n",
-    "        \"name\": \"user_simple\",\n",
-    "        \"prefix\": \"user_simple_docs\",\n",
-    "        \"storage_type\": \"json\"\n",
-    "    },\n",
-    "    \"fields\": [\n",
-    "        {\"name\": \"user\", \"type\": \"tag\"},\n",
-    "        {\"name\": \"credit_score\", \"type\": \"tag\"},\n",
-    "        {\"name\": \"job\", \"type\": \"text\"},\n",
-    "        {\"name\": \"age\", \"type\": \"numeric\"},\n",
-    "        {\"name\": \"location\", \"type\": \"geo\"},\n",
-    "        {\n",
-    "            \"name\": \"user_embedding\",\n",
-    "            \"type\": \"vector\",\n",
-    "            \"attrs\": {\n",
-    "                \"dims\": 3,\n",
-    "                \"distance_metric\": \"cosine\",\n",
-    "                \"algorithm\": \"flat\",\n",
-    "                \"datatype\": \"float32\"\n",
-    "            }\n",
-    "        }\n",
-    "    ]\n",
-    "}"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Sample Dataset Preparation\n",
-    "\n",
-    "Below, create a mock dataset with `user`, `job`, `age`, `credit_score`, and\n",
-    "`user_embedding` fields. The `user_embedding` vectors are synthetic examples\n",
-    "for demonstration purposes.\n",
-    "\n",
-    "For more information on creating real-world embeddings, refer to this\n",
-    "[article](https://mlops.community/vector-similarity-search-from-basics-to-production/)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "\n",
-    "\n",
-    "data = [\n",
-    "    {\n",
-    "        'user': 'john',\n",
-    "        'age': 1,\n",
-    "        'job': 'engineer',\n",
-    "        'credit_score': 'high',\n",
-    "        'location': '37.540760,-77.433929',\n",
-    "        'user_embedding': np.array([0.1, 0.1, 0.5], dtype=np.float32).tobytes()\n",
-    "    },\n",
-    "    {\n",
-    "        'user': 'mary',\n",
-    "        'age': 2,\n",
-    "        'job': 'doctor',\n",
-    "        'credit_score': 'low',\n",
-    "        'location': '37.540760,-77.433929',\n",
-    "        'user_embedding': np.array([0.1, 0.1, 0.5], dtype=np.float32).tobytes()\n",
-    "    },\n",
-    "    {\n",
-    "        'user': 'joe',\n",
-    "        'age': 3,\n",
-    "        'job': 'dentist',\n",
-    "        'credit_score': 'medium',\n",
-    "        'location': '37.540760,-77.433929',\n",
-    "        'user_embedding': np.array([0.9, 0.9, 0.1], dtype=np.float32).tobytes()\n",
-    "    }\n",
-    "]"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    ">As seen above, the sample `user_embedding` vectors are converted into bytes. Using the `NumPy`, this is fairly trivial."
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Create a `SearchIndex`\n",
-    "\n",
-    "With the schema and sample dataset ready, instantiate a `SearchIndex`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from redisvl.index import SearchIndex\n",
-    "\n",
-    "index = SearchIndex.from_dict(schema)\n",
-    "# or use .from_yaml('schema_file.yaml')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now we also need to facilitate a Redis connection. There are a few ways to do this:\n",
-    "\n",
-    "- Create & manage your own client connection (recommended)\n",
-    "- Provide a Redis URL and let RedisVL connect on your behalf (by default, it will connect to \"redis://localhost:6379\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Bring your own Redis connection instance\n",
-    "\n",
-    "This is ideal in scenarios where you have custom settings on the connection instance or if your application will share a connection pool:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from redis import Redis\n",
-    "\n",
-    "client = Redis.from_url(\"redis://localhost:6379\")\n",
-    "index = SearchIndex.from_dict(schema, redis_client=client)\n",
-    "\n",
-    "# alternatively, provide an async Redis client object to enable async index operations\n",
-    "# from redis.asyncio import Redis\n",
-    "# from redisvl.index import AsyncSearchIndex\n",
-    "# client = Redis.from_url(\"redis://localhost:6379\")\n",
-    "# index = AsyncSearchIndex.from_dict(schema, redis_client=client)\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Let the index manage the connection instance\n",
-    "\n",
-    "This is ideal for simple cases:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "index = SearchIndex.from_dict(schema, redis_url=\"redis://localhost:6379\", validate_on_load=True)\n",
-    "\n",
-    "# If you don't specify a client or Redis URL, the index will attempt to\n",
-    "# connect to Redis at the default address (\"redis://localhost:6379\")."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Create the underlying index\n",
-    "\n",
-    "Now that we are connected to Redis, we need to run the create command."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "16:42:16 redisvl.index.index INFO   Index already exists, overwriting.\n"
-     ]
-    }
-   ],
-   "source": [
-    "index.create(overwrite=True, drop=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    ">Note that at this point, the index has no entries. Data loading follows."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Inspect with the `rvl` CLI\n",
-    "Use the `rvl` CLI to inspect the created index and its fields:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32m16:36:30\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m   Indices:\n",
-      "\u001b[32m16:36:30\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m   1. user_simple\n"
-     ]
-    }
-   ],
-   "source": [
-    "!rvl index listall"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\n",
-      "Index Information:\n",
-      "╭──────────────┬────────────────┬──────────────────────┬─────────────────┬────────────╮\n",
-      "│ Index Name   │ Storage Type   │ Prefixes             │ Index Options   │   Indexing │\n",
-      "├──────────────┼────────────────┼──────────────────────┼─────────────────┼────────────┤\n",
-      "│ user_simple  │ JSON           │ ['user_simple_docs'] │ []              │          0 │\n",
-      "╰──────────────┴────────────────┴──────────────────────┴─────────────────┴────────────╯\n",
-      "Index Fields:\n",
-      "╭──────────────────┬────────────────┬─────────┬────────────────┬────────────────┬────────────────┬────────────────┬────────────────┬────────────────┬─────────────────┬────────────────╮\n",
-      "│ Name             │ Attribute      │ Type    │ Field Option   │ Option Value   │ Field Option   │ Option Value   │ Field Option   │   Option Value │ Field Option    │ Option Value   │\n",
-      "├──────────────────┼────────────────┼─────────┼────────────────┼────────────────┼────────────────┼────────────────┼────────────────┼────────────────┼─────────────────┼────────────────┤\n",
-      "│ $.user           │ user           │ TAG     │ SEPARATOR      │ ,              │                │                │                │                │                 │                │\n",
-      "│ $.credit_score   │ credit_score   │ TAG     │ SEPARATOR      │ ,              │                │                │                │                │                 │                │\n",
-      "│ $.job            │ job            │ TEXT    │ WEIGHT         │ 1              │                │                │                │                │                 │                │\n",
-      "│ $.age            │ age            │ NUMERIC │                │                │                │                │                │                │                 │                │\n",
-      "│ $.location       │ location       │ GEO     │                │                │                │                │                │                │                 │                │\n",
-      "│ $.user_embedding │ user_embedding │ VECTOR  │ algorithm      │ FLAT           │ data_type      │ FLOAT32        │ dim            │              3 │ distance_metric │ COSINE         │\n",
-      "╰──────────────────┴────────────────┴─────────┴────────────────┴────────────────┴────────────────┴────────────────┴────────────────┴────────────────┴─────────────────┴────────────────╯\n"
-     ]
-    }
-   ],
-   "source": [
-    "!rvl index info -i user_simple"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Load Data to `SearchIndex`\n",
-    "\n",
-    "Load the sample dataset to Redis:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "16:42:27 redisvl.index.index ERROR   Error while loading data to Redis\n",
-      "Traceback (most recent call last):\n",
-      "  File \"/Users/tyler.hutcherson/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py\", line 201, in _preprocess_and_validate_objects\n",
-      "    processed_obj = self.validate(processed_obj)\n",
-      "  File \"/Users/tyler.hutcherson/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py\", line 158, in validate\n",
-      "    return validate_object(self.index_schema, obj)\n",
-      "  File \"/Users/tyler.hutcherson/Documents/AppliedAI/redis-vl-python/redisvl/schema/validation.py\", line 254, in validate_object\n",
-      "    validated = model_class.model_validate(flat_obj)\n",
-      "  File \"/Users/tyler.hutcherson/Library/Caches/pypoetry/virtualenvs/redisvl-VnTEShF2-py3.13/lib/python3.13/site-packages/pydantic/main.py\", line 627, in model_validate\n",
-      "    return cls.__pydantic_validator__.validate_python(\n",
-      "           ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^\n",
-      "        obj, strict=strict, from_attributes=from_attributes, context=context\n",
-      "        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
-      "    )\n",
-      "    ^\n",
-      "pydantic_core._pydantic_core.ValidationError: 1 validation error for user_simple__PydanticModel\n",
-      "user_embedding\n",
-      "  Input should be a valid list [type=list_type, input_value=b'\\xcd\\xcc\\xcc=\\xcd\\xcc\\xcc=\\x00\\x00\\x00?', input_type=bytes]\n",
-      "    For further information visit https://errors.pydantic.dev/2.10/v/list_type\n",
-      "\n",
-      "During handling of the above exception, another exception occurred:\n",
-      "\n",
-      "Traceback (most recent call last):\n",
-      "  File \"/Users/tyler.hutcherson/Documents/AppliedAI/redis-vl-python/redisvl/index/index.py\", line 600, in load\n",
-      "    return self._storage.write(\n",
-      "           ~~~~~~~~~~~~~~~~~~~^\n",
-      "        self._redis_client,  # type: ignore\n",
-      "        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
-      "    ...<6 lines>...\n",
-      "        validate=self._validate_on_load,\n",
-      "        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
-      "    )\n",
-      "    ^\n",
-      "  File \"/Users/tyler.hutcherson/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py\", line 260, in write\n",
-      "    prepared_objects = self._preprocess_and_validate_objects(\n",
-      "        objects,\n",
-      "    ...<3 lines>...\n",
-      "        validate=validate\n",
-      "    )\n",
-      "  File \"/Users/tyler.hutcherson/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py\", line 212, in _preprocess_and_validate_objects\n",
-      "    raise ValueError(f\"Validation failed for object {object_id}: {str(e)}\")\n",
-      "ValueError: Validation failed for object at index 0: 1 validation error for user_simple__PydanticModel\n",
-      "user_embedding\n",
-      "  Input should be a valid list [type=list_type, input_value=b'\\xcd\\xcc\\xcc=\\xcd\\xcc\\xcc=\\x00\\x00\\x00?', input_type=bytes]\n",
-      "    For further information visit https://errors.pydantic.dev/2.10/v/list_type\n"
-     ]
-    },
-    {
-     "ename": "ValueError",
-     "evalue": "Validation failed for object at index 0: 1 validation error for user_simple__PydanticModel\nuser_embedding\n  Input should be a valid list [type=list_type, input_value=b'\\xcd\\xcc\\xcc=\\xcd\\xcc\\xcc=\\x00\\x00\\x00?', input_type=bytes]\n    For further information visit https://errors.pydantic.dev/2.10/v/list_type",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mValidationError\u001b[0m                           Traceback (most recent call last)",
-      "File \u001b[0;32m~/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py:201\u001b[0m, in \u001b[0;36mBaseStorage._preprocess_and_validate_objects\u001b[0;34m(self, objects, id_field, keys, preprocess, validate)\u001b[0m\n\u001b[1;32m    200\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m validate:\n\u001b[0;32m--> 201\u001b[0m     processed_obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprocessed_obj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    203\u001b[0m \u001b[38;5;66;03m# Store valid object with its key for writing\u001b[39;00m\n",
-      "File \u001b[0;32m~/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py:158\u001b[0m, in \u001b[0;36mBaseStorage.validate\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m    157\u001b[0m \u001b[38;5;66;03m# Pass directly to validation function and let any errors propagate\u001b[39;00m\n\u001b[0;32m--> 158\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mvalidate_object\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex_schema\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/Documents/AppliedAI/redis-vl-python/redisvl/schema/validation.py:254\u001b[0m, in \u001b[0;36mvalidate_object\u001b[0;34m(schema, obj)\u001b[0m\n\u001b[1;32m    253\u001b[0m \u001b[38;5;66;03m# Validate against model\u001b[39;00m\n\u001b[0;32m--> 254\u001b[0m validated \u001b[38;5;241m=\u001b[39m \u001b[43mmodel_class\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_validate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mflat_obj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    255\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m validated\u001b[38;5;241m.\u001b[39mmodel_dump(exclude_none\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
-      "File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/redisvl-VnTEShF2-py3.13/lib/python3.13/site-packages/pydantic/main.py:627\u001b[0m, in \u001b[0;36mBaseModel.model_validate\u001b[0;34m(cls, obj, strict, from_attributes, context)\u001b[0m\n\u001b[1;32m    626\u001b[0m __tracebackhide__ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 627\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__pydantic_validator__\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate_python\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    628\u001b[0m \u001b[43m    \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstrict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstrict\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrom_attributes\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfrom_attributes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcontext\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcontext\u001b[49m\n\u001b[1;32m    629\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
-      "\u001b[0;31mValidationError\u001b[0m: 1 validation error for user_simple__PydanticModel\nuser_embedding\n  Input should be a valid list [type=list_type, input_value=b'\\xcd\\xcc\\xcc=\\xcd\\xcc\\xcc=\\x00\\x00\\x00?', input_type=bytes]\n    For further information visit https://errors.pydantic.dev/2.10/v/list_type",
-      "\nDuring handling of the above exception, another exception occurred:\n",
-      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[16], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m keys \u001b[38;5;241m=\u001b[39m \u001b[43mindex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28mprint\u001b[39m(keys)\n",
-      "File \u001b[0;32m~/Documents/AppliedAI/redis-vl-python/redisvl/index/index.py:600\u001b[0m, in \u001b[0;36mSearchIndex.load\u001b[0;34m(self, data, id_field, keys, ttl, preprocess, batch_size)\u001b[0m\n\u001b[1;32m    551\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Load objects to the Redis database. Returns the list of keys loaded\u001b[39;00m\n\u001b[1;32m    552\u001b[0m \u001b[38;5;124;03mto Redis.\u001b[39;00m\n\u001b[1;32m    553\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    597\u001b[0m \u001b[38;5;124;03m    keys = index.load(data, preprocess=add_field)\u001b[39;00m\n\u001b[1;32m    598\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    599\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 600\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_storage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwrite\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    601\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_redis_client\u001b[49m\u001b[43m,\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# type: ignore\u001b[39;49;00m\n\u001b[1;32m    602\u001b[0m \u001b[43m        \u001b[49m\u001b[43mobjects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    603\u001b[0m \u001b[43m        \u001b[49m\u001b[43mid_field\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mid_field\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    604\u001b[0m \u001b[43m        \u001b[49m\u001b[43mkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    605\u001b[0m \u001b[43m        \u001b[49m\u001b[43mttl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mttl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    606\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpreprocess\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreprocess\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    607\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbatch_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    608\u001b[0m \u001b[43m        \u001b[49m\u001b[43mvalidate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate_on_load\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    609\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    610\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m:\n\u001b[1;32m    611\u001b[0m     logger\u001b[38;5;241m.\u001b[39mexception(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError while loading data to Redis\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
-      "File \u001b[0;32m~/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py:260\u001b[0m, in \u001b[0;36mBaseStorage.write\u001b[0;34m(self, redis_client, objects, id_field, keys, ttl, preprocess, batch_size, validate)\u001b[0m\n\u001b[1;32m    257\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m []\n\u001b[1;32m    259\u001b[0m \u001b[38;5;66;03m# Pass 1: Preprocess and validate all objects\u001b[39;00m\n\u001b[0;32m--> 260\u001b[0m prepared_objects \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_preprocess_and_validate_objects\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    261\u001b[0m \u001b[43m    \u001b[49m\u001b[43mobjects\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m    262\u001b[0m \u001b[43m    \u001b[49m\u001b[43mid_field\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mid_field\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    263\u001b[0m \u001b[43m    \u001b[49m\u001b[43mkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    264\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpreprocess\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreprocess\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    265\u001b[0m \u001b[43m    \u001b[49m\u001b[43mvalidate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvalidate\u001b[49m\n\u001b[1;32m    266\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    268\u001b[0m \u001b[38;5;66;03m# Pass 2: Write all valid objects in batches\u001b[39;00m\n\u001b[1;32m    269\u001b[0m added_keys \u001b[38;5;241m=\u001b[39m []\n",
-      "File \u001b[0;32m~/Documents/AppliedAI/redis-vl-python/redisvl/index/storage.py:212\u001b[0m, in \u001b[0;36mBaseStorage._preprocess_and_validate_objects\u001b[0;34m(self, objects, id_field, keys, preprocess, validate)\u001b[0m\n\u001b[1;32m    209\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m id_field \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(obj, \u001b[38;5;28mdict\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m id_field \u001b[38;5;129;01min\u001b[39;00m obj:\n\u001b[1;32m    210\u001b[0m             object_id \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwith \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mid_field\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mobj[id_field]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m--> 212\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mValidation failed for object \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mobject_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mstr\u001b[39m(e)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    214\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m prepared_objects\n",
-      "\u001b[0;31mValueError\u001b[0m: Validation failed for object at index 0: 1 validation error for user_simple__PydanticModel\nuser_embedding\n  Input should be a valid list [type=list_type, input_value=b'\\xcd\\xcc\\xcc=\\xcd\\xcc\\xcc=\\x00\\x00\\x00?', input_type=bytes]\n    For further information visit https://errors.pydantic.dev/2.10/v/list_type"
-     ]
-    }
-   ],
-   "source": [
-    "keys = index.load(data)\n",
-    "\n",
-    "print(keys)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "IndexInfo(name='user_simple', prefix='user_simple_docs', key_separator=':', storage_type=<StorageType.JSON: 'json'>)"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "index.schema.index"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['user_simple_docs:01JQ4Y9V0NK7QBYKMCP47MT3DE']"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "index.load(data=[\n",
-    "    {\n",
-    "        'user': 'john',\n",
-    "        'age': 1,\n",
-    "        'job': 'engineer',\n",
-    "        'credit_score': 'high',\n",
-    "        'location': 1,\n",
-    "        'user_embedding': [\n",
-    "        ]\n",
-    "    }\n",
-    "])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'index_name': 'user_simple',\n",
-       " 'index_options': [],\n",
-       " 'index_definition': ['key_type',\n",
-       "  'JSON',\n",
-       "  'prefixes',\n",
-       "  ['user_simple_docs'],\n",
-       "  'default_score',\n",
-       "  '1',\n",
-       "  'indexes_all',\n",
-       "  'false'],\n",
-       " 'attributes': [['identifier',\n",
-       "   '$.user',\n",
-       "   'attribute',\n",
-       "   'user',\n",
-       "   'type',\n",
-       "   'TAG',\n",
-       "   'SEPARATOR',\n",
-       "   ','],\n",
-       "  ['identifier',\n",
-       "   '$.credit_score',\n",
-       "   'attribute',\n",
-       "   'credit_score',\n",
-       "   'type',\n",
-       "   'TAG',\n",
-       "   'SEPARATOR',\n",
-       "   ','],\n",
-       "  ['identifier', '$.job', 'attribute', 'job', 'type', 'TEXT', 'WEIGHT', '1'],\n",
-       "  ['identifier', '$.age', 'attribute', 'age', 'type', 'NUMERIC'],\n",
-       "  ['identifier', '$.location', 'attribute', 'location', 'type', 'GEO'],\n",
-       "  ['identifier',\n",
-       "   '$.user_embedding',\n",
-       "   'attribute',\n",
-       "   'user_embedding',\n",
-       "   'type',\n",
-       "   'VECTOR',\n",
-       "   'algorithm',\n",
-       "   'FLAT',\n",
-       "   'data_type',\n",
-       "   'FLOAT32',\n",
-       "   'dim',\n",
-       "   3,\n",
-       "   'distance_metric',\n",
-       "   'COSINE']],\n",
-       " 'num_docs': 2,\n",
-       " 'max_doc_id': 2,\n",
-       " 'num_terms': 2,\n",
-       " 'num_records': 12,\n",
-       " 'inverted_sz_mb': '4.61578369140625e-4',\n",
-       " 'vector_index_sz_mb': '0.028045654296875',\n",
-       " 'total_inverted_index_blocks': 5,\n",
-       " 'offset_vectors_sz_mb': '3.814697265625e-6',\n",
-       " 'doc_table_size_mb': '2.117156982421875e-4',\n",
-       " 'sortable_values_size_mb': '0',\n",
-       " 'key_table_size_mb': '8.296966552734375e-5',\n",
-       " 'tag_overhead_sz_mb': '5.53131103515625e-5',\n",
-       " 'text_overhead_sz_mb': '6.67572021484375e-5',\n",
-       " 'total_index_memory_sz_mb': '9.565353393554688e-4',\n",
-       " 'geoshapes_sz_mb': '0',\n",
-       " 'records_per_doc_avg': '6',\n",
-       " 'bytes_per_record_avg': '40.33333206176758',\n",
-       " 'offsets_per_term_avg': '0.3333333432674408',\n",
-       " 'offset_bits_per_record_avg': '8',\n",
-       " 'hash_indexing_failures': 4,\n",
-       " 'total_indexing_time': '0.3160000145435333',\n",
-       " 'indexing': 0,\n",
-       " 'percent_indexed': '1',\n",
-       " 'number_of_uses': 2,\n",
-       " 'cleaning': 0,\n",
-       " 'gc_stats': ['bytes_collected',\n",
-       "  '0',\n",
-       "  'total_ms_run',\n",
-       "  '0',\n",
-       "  'total_cycles',\n",
-       "  '0',\n",
-       "  'average_cycle_time_ms',\n",
-       "  'nan',\n",
-       "  'last_run_time_ms',\n",
-       "  '0',\n",
-       "  'gc_numeric_trees_missed',\n",
-       "  '0',\n",
-       "  'gc_blocks_denied',\n",
-       "  '0'],\n",
-       " 'cursor_stats': ['global_idle',\n",
-       "  0,\n",
-       "  'global_total',\n",
-       "  0,\n",
-       "  'index_capacity',\n",
-       "  128,\n",
-       "  'index_total',\n",
-       "  0],\n",
-       " 'dialect_stats': ['dialect_1',\n",
-       "  0,\n",
-       "  'dialect_2',\n",
-       "  0,\n",
-       "  'dialect_3',\n",
-       "  0,\n",
-       "  'dialect_4',\n",
-       "  0],\n",
-       " 'Index Errors': ['indexing failures',\n",
-       "  4,\n",
-       "  'last indexing error',\n",
-       "  'Empty array for vector field on JSON document',\n",
-       "  'last indexing error key',\n",
-       "  'user_simple_docs:01JQ4Y9V0NK7QBYKMCP47MT3DE'],\n",
-       " 'field statistics': [['identifier',\n",
-       "   '$.user',\n",
-       "   'attribute',\n",
-       "   'user',\n",
-       "   'Index Errors',\n",
-       "   ['indexing failures',\n",
-       "    0,\n",
-       "    'last indexing error',\n",
-       "    'N/A',\n",
-       "    'last indexing error key',\n",
-       "    'N/A']],\n",
-       "  ['identifier',\n",
-       "   '$.credit_score',\n",
-       "   'attribute',\n",
-       "   'credit_score',\n",
-       "   'Index Errors',\n",
-       "   ['indexing failures',\n",
-       "    0,\n",
-       "    'last indexing error',\n",
-       "    'N/A',\n",
-       "    'last indexing error key',\n",
-       "    'N/A']],\n",
-       "  ['identifier',\n",
-       "   '$.job',\n",
-       "   'attribute',\n",
-       "   'job',\n",
-       "   'Index Errors',\n",
-       "   ['indexing failures',\n",
-       "    0,\n",
-       "    'last indexing error',\n",
-       "    'N/A',\n",
-       "    'last indexing error key',\n",
-       "    'N/A']],\n",
-       "  ['identifier',\n",
-       "   '$.age',\n",
-       "   'attribute',\n",
-       "   'age',\n",
-       "   'Index Errors',\n",
-       "   ['indexing failures',\n",
-       "    0,\n",
-       "    'last indexing error',\n",
-       "    'N/A',\n",
-       "    'last indexing error key',\n",
-       "    'N/A']],\n",
-       "  ['identifier',\n",
-       "   '$.location',\n",
-       "   'attribute',\n",
-       "   'location',\n",
-       "   'Index Errors',\n",
-       "   ['indexing failures',\n",
-       "    0,\n",
-       "    'last indexing error',\n",
-       "    'N/A',\n",
-       "    'last indexing error key',\n",
-       "    'N/A']],\n",
-       "  ['identifier',\n",
-       "   '$.user_embedding',\n",
-       "   'attribute',\n",
-       "   'user_embedding',\n",
-       "   'Index Errors',\n",
-       "   ['indexing failures',\n",
-       "    4,\n",
-       "    'last indexing error',\n",
-       "    'Empty array for vector field on JSON document',\n",
-       "    'last indexing error key',\n",
-       "    'user_simple_docs:01JQ4Y9V0NK7QBYKMCP47MT3DE']]]}"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "index.info()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    ">By default, `load` will create a unique Redis key as a combination of the index key `prefix` and a random ULID. You can also customize the key by providing direct keys or pointing to a specified `id_field` on load."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Upsert the index with new data\n",
-    "Upsert data by using the `load` method again:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['user_simple_docs:01JM2NWJGYMJ0QTR5YB4MB0BX9']\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Add more data\n",
-    "new_data = [{\n",
-    "    'user': 'tyler',\n",
-    "    'age': 9,\n",
-    "    'job': 'engineer',\n",
-    "    'credit_score': 'high',\n",
-    "    'user_embedding': np.array([0.1, 0.3, 0.5], dtype=np.float32).tobytes()\n",
-    "}]\n",
-    "keys = index.load(new_data)\n",
-    "\n",
-    "print(keys)"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Creating `VectorQuery` Objects\n",
-    "\n",
-    "Next we will create a vector query object for our newly populated index. This example will use a simple vector to demonstrate how vector similarity works. Vectors in production will likely be much larger than 3 floats and often require Machine Learning models (i.e. Huggingface sentence transformers) or an embeddings API (Cohere, OpenAI). `redisvl` provides a set of [Vectorizers](https://docs.redisvl.com/en/latest/user_guide/vectorizers_04.html#openai) to assist in vector creation."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from redisvl.query import VectorQuery\n",
-    "from jupyterutils import result_print\n",
-    "\n",
-    "query = VectorQuery(\n",
-    "    vector=[0.1, 0.1, 0.5],\n",
-    "    vector_field_name=\"user_embedding\",\n",
-    "    return_fields=[\"user\", \"age\", \"job\", \"credit_score\", \"vector_distance\"],\n",
-    "    num_results=3\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Executing queries\n",
-    "With our `VectorQuery` object defined above, we can execute the query over the `SearchIndex` using the `query` method."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "*=>[KNN 3 @user_embedding $vector AS vector_distance] RETURN 6 user age job credit_score vector_distance vector_distance SORTBY vector_distance ASC DIALECT 2 LIMIT 0 3\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "table><tr><th>vector_distance</th><th>user</th><th>age</th><th>job</th><th>credit_score</th></tr><tr><td>0</td><td>john</td><td>1</td><td>engineer</td><td>high</td></tr><tr><td>0</td><td>mary</td><td>2</td><td>doctor</td><td>low</td></tr><tr><td>0.0566299557686</td><td>tyler</td><td>9</td><td>engineer</td><td>high</td></tr></table>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "results = index.query(query)\n",
-    "result_print(results)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Using an Asynchronous Redis Client\n",
-    "\n",
-    "The `AsyncSearchIndex` class along with an async Redis python client allows for queries, index creation, and data loading to be done asynchronously. This is the\n",
-    "recommended route for working with `redisvl` in production-like settings."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'index': {'name': 'user_simple', 'prefix': 'user_simple_docs'},\n",
-       " 'fields': [{'name': 'user', 'type': 'tag'},\n",
-       "  {'name': 'credit_score', 'type': 'tag'},\n",
-       "  {'name': 'job', 'type': 'text'},\n",
-       "  {'name': 'age', 'type': 'numeric'},\n",
-       "  {'name': 'user_embedding',\n",
-       "   'type': 'vector',\n",
-       "   'attrs': {'dims': 3,\n",
-       "    'distance_metric': 'cosine',\n",
-       "    'algorithm': 'flat',\n",
-       "    'datatype': 'float32'}}]}"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "schema"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<redisvl.index.index.AsyncSearchIndex at 0x10facacf0>"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from redisvl.index import AsyncSearchIndex\n",
-    "from redis.asyncio import Redis\n",
-    "\n",
-    "client = Redis.from_url(\"redis://localhost:6379\")\n",
-    "\n",
-    "index = AsyncSearchIndex.from_dict(schema, redis_client=client)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<table><tr><th>vector_distance</th><th>user</th><th>age</th><th>job</th><th>credit_score</th></tr><tr><td>0</td><td>john</td><td>1</td><td>engineer</td><td>high</td></tr><tr><td>0</td><td>mary</td><td>2</td><td>doctor</td><td>low</td></tr><tr><td>0.0566299557686</td><td>tyler</td><td>9</td><td>engineer</td><td>high</td></tr></table>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "# execute the vector query async\n",
-    "results = await index.query(query)\n",
-    "result_print(results)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Updating a schema\n",
-    "In some scenarios, it makes sense to update the index schema. With Redis and `redisvl`, this is easy because Redis can keep the underlying data in place while you change or make updates to the index configuration."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "So for our scenario, let's imagine we want to reindex this data in 2 ways:\n",
-    "- by using a `Tag` type for `job` field instead of `Text`\n",
-    "- by using an `hnsw` vector index for the `user_embedding` field instead of a `flat` vector index"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Modify this schema to have what we want\n",
-    "\n",
-    "index.schema.remove_field(\"job\")\n",
-    "index.schema.remove_field(\"user_embedding\")\n",
-    "index.schema.add_fields([\n",
-    "    {\"name\": \"job\", \"type\": \"tag\"},\n",
-    "    {\n",
-    "        \"name\": \"user_embedding\",\n",
-    "        \"type\": \"vector\",\n",
-    "        \"attrs\": {\n",
-    "            \"dims\": 3,\n",
-    "            \"distance_metric\": \"cosine\",\n",
-    "            \"algorithm\": \"hnsw\",\n",
-    "            \"datatype\": \"float32\"\n",
-    "        }\n",
-    "    }\n",
-    "])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "11:28:32 redisvl.index.index INFO   Index already exists, overwriting.\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Run the index update but keep underlying data in place\n",
-    "await index.create(overwrite=True, drop=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<table><tr><th>vector_distance</th><th>user</th><th>age</th><th>job</th><th>credit_score</th></tr><tr><td>0</td><td>mary</td><td>2</td><td>doctor</td><td>low</td></tr><tr><td>0</td><td>john</td><td>1</td><td>engineer</td><td>high</td></tr><tr><td>0.0566299557686</td><td>tyler</td><td>9</td><td>engineer</td><td>high</td></tr></table>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "# Execute the vector query async\n",
-    "results = await index.query(query)\n",
-    "result_print(results)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Check Index Stats\n",
-    "Use the `rvl` CLI to check the stats for the index:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Statistics:\n",
-      "╭─────────────────────────────┬─────────────╮\n",
-      "│ Stat Key                    │ Value       │\n",
-      "├─────────────────────────────┼─────────────┤\n",
-      "│ num_docs                    │ 4           │\n",
-      "│ num_terms                   │ 4           │\n",
-      "│ max_doc_id                  │ 4           │\n",
-      "│ num_records                 │ 22          │\n",
-      "│ percent_indexed             │ 1           │\n",
-      "│ hash_indexing_failures      │ 0           │\n",
-      "│ number_of_uses              │ 2           │\n",
-      "│ bytes_per_record_avg        │ 47.8        │\n",
-      "│ doc_table_size_mb           │ 0.000423431 │\n",
-      "│ inverted_sz_mb              │ 0.000911713 │\n",
-      "│ key_table_size_mb           │ 0.000165939 │\n",
-      "│ offset_bits_per_record_avg  │ nan         │\n",
-      "│ offset_vectors_sz_mb        │ 0           │\n",
-      "│ offsets_per_term_avg        │ 0           │\n",
-      "│ records_per_doc_avg         │ 5           │\n",
-      "│ sortable_values_size_mb     │ 0           │\n",
-      "│ total_indexing_time         │ 0.239       │\n",
-      "│ total_inverted_index_blocks │ 11          │\n",
-      "│ vector_index_sz_mb          │ 0.235603    │\n",
-      "╰─────────────────────────────┴─────────────╯\n"
-     ]
-    }
-   ],
-   "source": [
-    "!rvl stats -i user_simple"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Cleanup"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Below we will clean up after our work. First, you can flush all data from Redis associated with the index by\n",
-    "using the `.clear()` method. This will leave the secondary index in place for future insertions or updates.\n",
-    "\n",
-    "But if you want to clean up everything, including the index, just use `.delete()`\n",
-    "which will by default remove the index AND the underlying data."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "4"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Clear all data from Redis associated with the index\n",
-    "await index.clear()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Butm the index is still in place\n",
-    "await index.exists()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Remove / delete the index in its entirety\n",
-    "await index.delete()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "env",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.13.2"
-  },
-  "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/redisvl/index/index.py b/redisvl/index/index.py
index 806b4ba5..4ec473a4 100644
--- a/redisvl/index/index.py
+++ b/redisvl/index/index.py
@@ -175,7 +175,7 @@ def from_yaml(cls, schema_path: str, **kwargs):
 
             from redisvl.index import SearchIndex
 
-            index = SearchIndex.from_yaml("schemas/schema.yaml")
+            index = SearchIndex.from_yaml("schemas/schema.yaml", redis_url="redis://localhost:6379")
         """
         schema = IndexSchema.from_yaml(schema_path)
         return cls(schema=schema, **kwargs)
@@ -203,7 +203,7 @@ def from_dict(cls, schema_dict: Dict[str, Any], **kwargs):
                 "fields": [
                     {"name": "doc-id", "type": "tag"}
                 ]
-            })
+            }, redis_url="redis://localhost:6379")
 
         """
         schema = IndexSchema.from_dict(schema_dict)
@@ -247,10 +247,14 @@ class SearchIndex(BaseSearchIndex):
         from redisvl.index import SearchIndex
 
         # initialize the index object with schema from file
-        index = SearchIndex.from_yaml("schemas/schema.yaml", redis_url="redis://localhost:6379")
+        index = SearchIndex.from_yaml(
+            "schemas/schema.yaml",
+            redis_url="redis://localhost:6379",
+            validate_on_load=True
+        )
 
         # create the index
-        index.create(overwrite=True)
+        index.create(overwrite=True, drop=False)
 
         # data is an iterable of dictionaries
         index.load(data)
@@ -407,11 +411,6 @@ def connect(self, redis_url: Optional[str] = None, **kwargs):
             ValueError: If the Redis URL is not provided nor accessible
                 through the `REDIS_URL` environment variable.
             ModuleNotFoundError: If required Redis modules are not installed.
-
-        .. code-block:: python
-
-            index.connect(redis_url="redis://localhost:6379")
-
         """
         self.__redis_client = RedisConnectionFactory.get_redis_connection(
             redis_url=redis_url, **kwargs
@@ -431,16 +430,6 @@ def set_client(self, redis_client: redis.Redis, **kwargs):
 
         Raises:
             TypeError: If the provided client is not valid.
-
-        .. code-block:: python
-
-            import redis
-            from redisvl.index import SearchIndex
-
-            client = redis.Redis.from_url("redis://localhost:6379")
-            index = SearchIndex.from_yaml("schemas/schema.yaml")
-            index.set_client(client)
-
         """
         RedisConnectionFactory.validate_sync_redis(redis_client)
         self.__redis_client = redis_client
@@ -906,11 +895,12 @@ class AsyncSearchIndex(BaseSearchIndex):
         # initialize the index object with schema from file
         index = AsyncSearchIndex.from_yaml(
             "schemas/schema.yaml",
-            redis_url="redis://localhost:6379"
+            redis_url="redis://localhost:6379",
+            validate_on_load=True
         )
 
         # create the index
-        await index.create(overwrite=True)
+        await index.create(overwrite=True, drop=False)
 
         # data is an iterable of dictionaries
         await index.load(data)
diff --git a/redisvl/index/storage.py b/redisvl/index/storage.py
index f0af1e5b..792b6bc4 100644
--- a/redisvl/index/storage.py
+++ b/redisvl/index/storage.py
@@ -143,7 +143,7 @@ async def _aget(client: AsyncRedis, key: str) -> Dict[str, Any]:
         """
         raise NotImplementedError
 
-    def validate(self, obj: Dict[str, Any]) -> Dict[str, Any]:
+    def _validate(self, obj: Dict[str, Any]) -> Dict[str, Any]:
         """
         Validate an object against the schema using Pydantic-based validation.
 
@@ -161,7 +161,7 @@ def validate(self, obj: Dict[str, Any]) -> Dict[str, Any]:
 
     def _preprocess_and_validate_objects(
         self,
-        objects: List[Any],
+        objects: Iterable[Any],
         id_field: Optional[str] = None,
         keys: Optional[Iterable[str]] = None,
         preprocess: Optional[Callable] = None,
@@ -201,7 +201,7 @@ def _preprocess_and_validate_objects(
 
                 # Schema validation if enabled
                 if validate:
-                    processed_obj = self.validate(processed_obj)
+                    processed_obj = self._validate(processed_obj)
 
                 # Store valid object with its key for writing
                 prepared_objects.append((key, processed_obj))
@@ -263,7 +263,7 @@ def write(
 
         # Pass 1: Preprocess and validate all objects
         prepared_objects = self._preprocess_and_validate_objects(
-            objects,
+            list(objects),  # Convert Iterable to List
             id_field=id_field,
             keys=keys,
             preprocess=preprocess,
@@ -342,7 +342,7 @@ async def awrite(
 
         # Pass 1: Preprocess and validate all objects
         prepared_objects = self._preprocess_and_validate_objects(
-            objects,
+            list(objects),  # Convert Iterable to List
             id_field=id_field,
             keys=keys,
             preprocess=preprocess,
diff --git a/redisvl/schema/validation.py b/redisvl/schema/validation.py
index b102166c..c4ddd3e7 100644
--- a/redisvl/schema/validation.py
+++ b/redisvl/schema/validation.py
@@ -8,7 +8,7 @@
 import json
 import re
 import warnings
-from typing import Any, Dict, List, Optional, Type, Union
+from typing import Any, Dict, List, Optional, Type, Union, cast
 
 from pydantic import BaseModel, Field, field_validator
 
@@ -53,7 +53,7 @@ def get_model_for_schema(cls, schema: IndexSchema) -> Type[BaseModel]:
     @classmethod
     def _map_field_to_pydantic_type(
         cls, field: BaseField, storage_type: StorageType
-    ) -> Type:
+    ) -> Type[Any]:
         """
         Map Redis field types to appropriate Pydantic types.
 
@@ -72,14 +72,17 @@ def _map_field_to_pydantic_type(
         elif field.type == FieldTypes.TAG:
             return str
         elif field.type == FieldTypes.NUMERIC:
-            return Union[int, float]
+            return Union[int, float]  # type: ignore
         elif field.type == FieldTypes.GEO:
             return str
         elif field.type == FieldTypes.VECTOR:
             # For JSON storage, vectors are always lists
             if storage_type == StorageType.JSON:
                 # For int data types, vectors must be ints, otherwise floats
-                if field.attrs.datatype in (VectorDataType.INT8, VectorDataType.UINT8):
+                if field.attrs.datatype in (  # type: ignore
+                    VectorDataType.INT8,
+                    VectorDataType.UINT8,
+                ):
                     return List[int]
                 return List[float]
             else:
@@ -103,8 +106,8 @@ def _create_model(cls, schema: IndexSchema) -> Type[BaseModel]:
         storage_type = schema.index.storage_type
 
         # Create annotations dictionary for the dynamic model
-        annotations = {}
-        class_dict = {}
+        annotations: Dict[str, Any] = {}
+        class_dict: Dict[str, Any] = {}
 
         # Build annotations and field metadata
         for field_name, field in schema.fields.items():
@@ -154,6 +157,8 @@ def _disallow_bool(cls, value):
 
             # Register validators for VECTOR fields
             elif field.type == FieldTypes.VECTOR:
+                dims = field.attrs.dims  # type: ignore
+                datatype = field.attrs.datatype  # type: ignore
 
                 def make_vector_validator(
                     fname: str, dims: int, datatype: VectorDataType
@@ -190,7 +195,7 @@ def _validate_vector(cls, value):
                     return _validate_vector
 
                 class_dict[f"validate_{field_name}"] = make_vector_validator(
-                    field_name, field.attrs.dims, field.attrs.datatype
+                    field_name, dims, datatype
                 )
 
         # Create class dictionary with annotations and field metadata
diff --git a/tests/integration/test_async_search_index.py b/tests/integration/test_async_search_index.py
index ea122d5d..32a2e3d3 100644
--- a/tests/integration/test_async_search_index.py
+++ b/tests/integration/test_async_search_index.py
@@ -5,7 +5,7 @@
 from redis import Redis as SyncRedis
 from redis.asyncio import Redis as AsyncRedis
 
-from redisvl.exceptions import RedisModuleVersionError, RedisSearchError
+from redisvl.exceptions import RedisModuleVersionError, RedisSearchError, RedisVLError
 from redisvl.index import AsyncSearchIndex
 from redisvl.query import VectorQuery
 from redisvl.query.query import FilterQuery
@@ -269,7 +269,7 @@ async def test_search_index_load_preprocess(async_index):
     await async_index.create(overwrite=True, drop=True)
     data = [{"id": "1", "test": "foo"}]
 
-    async def preprocess(record):
+    def preprocess(record):
         record["test"] = "bar"
         return record
 
@@ -281,10 +281,10 @@ async def preprocess(record):
         == "bar"
     )
 
-    async def bad_preprocess(record):
+    def bad_preprocess(record):
         return 1
 
-    with pytest.raises(ValueError):
+    with pytest.raises(RedisVLError):
         await async_index.load(data, id_field="id", preprocess=bad_preprocess)
 
 
@@ -300,7 +300,7 @@ async def test_no_id_field(async_index):
     bad_data = [{"wrong_key": "1", "value": "test"}]
 
     # catch missing / invalid id_field
-    with pytest.raises(ValueError):
+    with pytest.raises(RedisVLError):
         await async_index.load(bad_data, id_field="key")
 
 
diff --git a/tests/integration/test_search_index.py b/tests/integration/test_search_index.py
index 02b6d5e4..875449be 100644
--- a/tests/integration/test_search_index.py
+++ b/tests/integration/test_search_index.py
@@ -4,7 +4,7 @@
 import pytest
 from redis import Redis
 
-from redisvl.exceptions import RedisModuleVersionError, RedisSearchError
+from redisvl.exceptions import RedisModuleVersionError, RedisSearchError, RedisVLError
 from redisvl.index import SearchIndex
 from redisvl.query import VectorQuery
 from redisvl.query.query import FilterQuery
@@ -268,7 +268,7 @@ def preprocess(record):
     def bad_preprocess(record):
         return 1
 
-    with pytest.raises(ValueError):
+    with pytest.raises(RedisVLError):
         index.load(data, id_field="id", preprocess=bad_preprocess)
 
 
@@ -277,7 +277,7 @@ def test_no_id_field(index):
     bad_data = [{"wrong_key": "1", "value": "test"}]
 
     # catch missing / invalid id_field
-    with pytest.raises(ValueError):
+    with pytest.raises(RedisVLError):
         index.load(bad_data, id_field="key")
 
 
diff --git a/tests/unit/test_edge_cases.py b/tests/unit/test_edge_cases.py
deleted file mode 100644
index 3646cc1f..00000000
--- a/tests/unit/test_edge_cases.py
+++ /dev/null
@@ -1,451 +0,0 @@
-"""
-Tests for edge cases in the RedisVL validation module.
-
-This module tests edge cases in the validation system that might not be
-covered in the main test files, including:
-1. Performance and caching behavior
-2. Handling of unusual data types
-3. Extreme values
-4. Boundary conditions
-"""
-
-import time
-from typing import Any, Dict, List
-
-import pytest
-
-from redisvl.index.storage import BaseStorage
-from redisvl.schema.fields import Field, FieldTypes, VectorDataType
-from redisvl.schema.index import Index, IndexSchema
-from redisvl.schema.validation import SchemaModelGenerator, validate_object
-
-
-class TestSchemaModelCaching:
-    """Tests for model caching behavior."""
-
-    def test_caching_improves_performance(self):
-        """Test that caching improves model generation performance."""
-        # Create a complex schema
-        fields = {
-            f"field_{i}": Field(name=f"field_{i}", type=FieldTypes.TEXT)
-            for i in range(50)  # 50 fields should be enough to measure performance
-        }
-
-        schema = IndexSchema(
-            index=Index(name="performance_test", prefix="doc"), fields=fields
-        )
-
-        # First generation (not cached)
-        start_time = time.time()
-        model1 = SchemaModelGenerator.get_model_for_schema(schema)
-        first_time = time.time() - start_time
-
-        # Second generation (should be cached)
-        start_time = time.time()
-        model2 = SchemaModelGenerator.get_model_for_schema(schema)
-        second_time = time.time() - start_time
-
-        # Verify second generation is faster
-        assert second_time < first_time
-
-        # Should be much faster (usually at least 10x)
-        assert second_time < (first_time * 0.5)
-
-        # Verify same model instance
-        assert model1 is model2
-
-    def test_different_schemas_get_different_models(self):
-        """Test that different schemas get different model instances."""
-        # Create two different schemas
-        schema1 = IndexSchema(
-            index=Index(name="test1", prefix="doc1"),
-            fields={"field1": Field(name="field1", type=FieldTypes.TEXT)},
-        )
-
-        schema2 = IndexSchema(
-            index=Index(name="test2", prefix="doc2"),
-            fields={"field1": Field(name="field1", type=FieldTypes.TEXT)},
-        )
-
-        # Get models
-        model1 = SchemaModelGenerator.get_model_for_schema(schema1)
-        model2 = SchemaModelGenerator.get_model_for_schema(schema2)
-
-        # Verify different model instances
-        assert model1 is not model2
-        assert model1.__name__ != model2.__name__
-
-
-class TestUnusualDataTypes:
-    """Tests for handling unusual data types during validation."""
-
-    @pytest.fixture
-    def basic_schema(self):
-        """Create a basic schema for testing."""
-        return IndexSchema(
-            index=Index(name="test", prefix="doc"),
-            fields={
-                "text_field": Field(name="text_field", type=FieldTypes.TEXT),
-                "tag_field": Field(name="tag_field", type=FieldTypes.TAG),
-                "num_field": Field(name="num_field", type=FieldTypes.NUMERIC),
-            },
-        )
-
-    def test_none_values(self, basic_schema):
-        """Test handling of None values."""
-        # Data with None values
-        data = {"text_field": None, "tag_field": None, "num_field": None}
-
-        # Validate
-        result = validate_object(basic_schema, data)
-
-        # None values should be excluded
-        assert len(result) == 0
-
-    def test_empty_string_values(self, basic_schema):
-        """Test handling of empty strings."""
-        # Data with empty strings
-        data = {"text_field": "", "tag_field": "", "num_field": 0}
-
-        # Validate
-        result = validate_object(basic_schema, data)
-
-        # Empty strings are valid for text and tag
-        assert result["text_field"] == ""
-        assert result["tag_field"] == ""
-        assert result["num_field"] == 0
-
-    def test_boolean_values(self, basic_schema):
-        """Test handling of boolean values."""
-        # Data with booleans
-        data = {"text_field": True, "tag_field": False, "num_field": True}
-
-        # Booleans aren't valid for text or tag
-        with pytest.raises(ValueError) as exc_info:
-            validate_object(basic_schema, data)
-
-        assert "text_field" in str(exc_info.value)
-
-        # Create new schema with only numeric
-        num_schema = IndexSchema(
-            index=Index(name="test", prefix="doc"),
-            fields={"num_field": Field(name="num_field", type=FieldTypes.NUMERIC)},
-        )
-
-        # Validate with only the numeric field
-        result = validate_object(num_schema, {"num_field": True})
-
-        # Python converts True to 1, False to 0
-        assert result["num_field"] == 1
-
-    def test_list_for_text(self, basic_schema):
-        """Test handling lists for text fields."""
-        # Data with list for text
-        data = {"text_field": ["item1", "item2"]}
-
-        # Lists aren't valid for text
-        with pytest.raises(ValueError) as exc_info:
-            validate_object(basic_schema, data)
-
-        assert "text_field" in str(exc_info.value)
-
-
-class TestVectorEdgeCases:
-    """Tests for edge cases with vector fields."""
-
-    @pytest.fixture
-    def vector_schema(self):
-        """Create a schema with vector fields for testing."""
-        return IndexSchema(
-            index=Index(name="test_vectors", prefix="vec"),
-            fields={
-                "float_vec": Field(
-                    name="float_vec",
-                    type=FieldTypes.VECTOR,
-                    attrs={"dims": 3, "datatype": VectorDataType.FLOAT32},
-                ),
-                "int_vec": Field(
-                    name="int_vec",
-                    type=FieldTypes.VECTOR,
-                    attrs={"dims": 3, "datatype": VectorDataType.INT8},
-                ),
-            },
-        )
-
-    def test_large_vectors(self, vector_schema):
-        """Test validation of very large vectors."""
-        # Create a large vector (1000 dimensions)
-        large_schema = IndexSchema(
-            index=Index(name="large_vec", prefix="vec"),
-            fields={
-                "large_vec": Field(
-                    name="large_vec",
-                    type=FieldTypes.VECTOR,
-                    attrs={"dims": 1000, "datatype": VectorDataType.FLOAT32},
-                )
-            },
-        )
-
-        # Valid large vector
-        large_vector = {"large_vec": [0.1] * 1000}
-        result = validate_object(large_schema, large_vector)
-        assert len(result["large_vec"]) == 1000
-
-        # Invalid dimensions
-        invalid_dims = {"large_vec": [0.1] * 999}
-        with pytest.raises(ValueError) as exc_info:
-            validate_object(large_schema, invalid_dims)
-        assert "dimensions" in str(exc_info.value)
-
-    def test_mixed_vector_types(self, vector_schema):
-        """Test validation of vectors with mixed element types."""
-        # Float vector with mixed types
-        mixed_float = {"float_vec": [1, 2.5, "3"]}
-        with pytest.raises(ValueError) as exc_info:
-            validate_object(vector_schema, mixed_float)
-        assert "float_vec" in str(exc_info.value)
-
-        # Int vector with mixed types
-        mixed_int = {"int_vec": [1, 2.5, 3]}
-        with pytest.raises(ValueError) as exc_info:
-            validate_object(vector_schema, mixed_int)
-        assert "int_vec" in str(exc_info.value)
-
-    def test_empty_vector(self, vector_schema):
-        """Test validation of empty vectors."""
-        # Empty float vector
-        empty_vec = {"float_vec": []}
-        with pytest.raises(ValueError) as exc_info:
-            validate_object(vector_schema, empty_vec)
-        assert "float_vec" in str(exc_info.value)
-        assert "dimensions" in str(exc_info.value)
-
-    def test_vector_int_range(self, vector_schema):
-        """Test validation of integer vectors with values outside allowed range."""
-        # INT8 vector with values outside range
-        out_of_range = {"int_vec": [100, 200, 300]}  # Valid int, but outside INT8 range
-        with pytest.raises(ValueError) as exc_info:
-            validate_object(vector_schema, out_of_range)
-        assert "int_vec" in str(exc_info.value)
-        assert "must be between" in str(exc_info.value)
-
-        # INT8 vector with valid range
-        valid_range = {"int_vec": [-128, 0, 127]}
-        result = validate_object(vector_schema, valid_range)
-        assert result["int_vec"] == [-128, 0, 127]
-
-
-class TestGeoEdgeCases:
-    """Tests for edge cases with geo fields."""
-
-    @pytest.fixture
-    def geo_schema(self):
-        """Create a schema with geo fields for testing."""
-        return IndexSchema(
-            index=Index(name="test_geo", prefix="geo"),
-            fields={"location": Field(name="location", type=FieldTypes.GEO)},
-        )
-
-    def test_geo_boundary_values(self, geo_schema):
-        """Test validation of geo fields with boundary values."""
-        # Valid boundary values
-        valid_boundaries = [
-            {"location": "90,180"},  # Max lat, max lon
-            {"location": "-90,-180"},  # Min lat, min lon
-            {"location": "0,0"},  # Zero point
-            {"location": "90,0"},  # North pole
-            {"location": "-90,0"},  # South pole
-        ]
-
-        for data in valid_boundaries:
-            result = validate_object(geo_schema, data)
-            assert result["location"] == data["location"]
-
-    def test_geo_invalid_boundary_values(self, geo_schema):
-        """Test validation of geo fields with invalid boundary values."""
-        # Invalid boundary values
-        invalid_boundaries = [
-            {"location": "91,0"},  # Lat > 90
-            {"location": "-91,0"},  # Lat < -90
-            {"location": "0,181"},  # Lon > 180
-            {"location": "0,-181"},  # Lon < -180
-            {"location": "90.1,0"},  # Lat > 90 (decimal)
-            {"location": "0,180.1"},  # Lon > 180 (decimal)
-        ]
-
-        for data in invalid_boundaries:
-            with pytest.raises(ValueError) as exc_info:
-                validate_object(geo_schema, data)
-            assert "location" in str(exc_info.value)
-            assert "not a valid" in str(exc_info.value)
-
-    def test_geo_formats(self, geo_schema):
-        """Test validation of geo fields with different formats."""
-        # Various valid formats
-        valid_formats = [
-            {"location": "37.7749,-122.4194"},  # Decimal degrees
-            {"location": "-37.7749,122.4194"},  # Negative latitude
-            {"location": "37.7749,122.4194"},  # Positive longitude
-            {"location": "0.0000,0.0000"},  # Zeros with decimal
-            {"location": "37,-122"},  # Integer degrees
-        ]
-
-        for data in valid_formats:
-            result = validate_object(geo_schema, data)
-            assert result["location"] == data["location"]
-
-        # Invalid formats
-        invalid_formats = [
-            {"location": "37.7749"},  # Missing longitude
-            {"location": "37.7749,"},  # Missing longitude value
-            {"location": ",122.4194"},  # Missing latitude value
-            {"location": "37.7749:122.4194"},  # Wrong separator
-            {"location": "37.7749, 122.4194"},  # Space after separator
-            {"location": "North,South"},  # Non-numeric values
-        ]
-
-        for data in invalid_formats:
-            with pytest.raises(ValueError) as exc_info:
-                validate_object(geo_schema, data)
-            assert "location" in str(exc_info.value)
-
-
-class TestNestedJsonEdgeCases:
-    """Tests for edge cases with nested JSON."""
-
-    @pytest.fixture
-    def nested_schema(self):
-        """Create a schema with JSON paths for testing."""
-        fields = {
-            "id": Field(name="id", type=FieldTypes.TAG),
-            "title": Field(name="title", type=FieldTypes.TEXT, path="$.content.title"),
-            "rating": Field(
-                name="rating", type=FieldTypes.NUMERIC, path="$.metadata.rating"
-            ),
-            "deeply_nested": Field(
-                name="deeply_nested",
-                type=FieldTypes.TEXT,
-                path="$.level1.level2.level3.level4.value",
-            ),
-        }
-
-        return IndexSchema(
-            index=Index(name="test_nested", prefix="nested"), fields=fields
-        )
-
-    def test_very_deeply_nested_json(self, nested_schema):
-        """Test validation with very deeply nested JSON."""
-        # Create a deeply nested structure
-        deeply_nested = {
-            "id": "doc1",
-            "level1": {
-                "level2": {"level3": {"level4": {"value": "deeply nested value"}}}
-            },
-        }
-
-        # Validate
-        result = validate_object(nested_schema, deeply_nested)
-        assert result["id"] == "doc1"
-        assert result["deeply_nested"] == "deeply nested value"
-
-    def test_partial_path_missing(self, nested_schema):
-        """Test validation when part of a JSON path is missing."""
-        # Create object with partial path missing
-        partial_missing = {
-            "id": "doc1",
-            "level1": {
-                "level2": {
-                    # level3 missing
-                }
-            },
-        }
-
-        # Validate - should ignore missing path
-        result = validate_object(nested_schema, partial_missing)
-        assert result["id"] == "doc1"
-        assert "deeply_nested" not in result
-
-    def test_nested_arrays(self):
-        """Test validation with nested arrays in JSON."""
-        # Create schema with path to array element
-        array_schema = IndexSchema(
-            index=Index(name="test_arrays", prefix="arr"),
-            fields={
-                "id": Field(name="id", type=FieldTypes.TAG),
-                "first_item": Field(
-                    name="first_item", type=FieldTypes.TEXT, path="$.items[0]"
-                ),
-                "nested_item": Field(
-                    name="nested_item",
-                    type=FieldTypes.TEXT,
-                    path="$.nested.items[1].name",
-                ),
-            },
-        )
-
-        # Note: JSONPath with array indexing is not supported currently
-        # This test documents this limitation
-
-        # Create data with arrays
-        array_data = {
-            "id": "arr1",
-            "items": ["first", "second", "third"],
-            "nested": {"items": [{"name": "item1"}, {"name": "item2"}]},
-        }
-
-        # Validate - array paths won't be found
-        result = validate_object(array_schema, array_data)
-        assert result["id"] == "arr1"
-        assert "first_item" not in result
-        assert "nested_item" not in result
-
-
-class TestValidationIntegrationEdgeCases:
-    """Tests for integration edge cases between storage and validation."""
-
-    @pytest.fixture
-    def storage_with_schema(self):
-        """Create a storage instance with schema for testing."""
-        schema = IndexSchema(
-            index=Index(name="test_storage", prefix="doc"),
-            fields={
-                "id": Field(name="id", type=FieldTypes.TAG),
-                "vec": Field(
-                    name="vec",
-                    type=FieldTypes.VECTOR,
-                    attrs={"dims": 3, "datatype": VectorDataType.FLOAT32},
-                ),
-            },
-        )
-
-        return BaseStorage(schema=schema, client=None)
-
-    def test_validation_with_bytes_no_client(self, storage_with_schema):
-        """Test validation with bytes when no Redis client is available."""
-        # No Redis client was provided, so hset won't be called
-        # This just tests that validation works with bytes
-
-        # Valid data with bytes
-        data = {"id": "doc1", "vec": b"\x00\x01\x02"}  # 3 bytes
-
-        # Validate - should work even without client
-        validated = storage_with_schema.validate_object(data)
-        assert validated["id"] == "doc1"
-        assert validated["vec"] == b"\x00\x01\x02"
-
-    def test_unexpected_field_is_ignored(self, storage_with_schema):
-        """Test that unexpected fields are ignored during validation."""
-        # Data with extra field
-        data = {
-            "id": "doc1",
-            "vec": [0.1, 0.2, 0.3],
-            "extra": "This field is not in the schema",
-        }
-
-        # Validate
-        validated = storage_with_schema.validate_object(data)
-
-        # Extra field should be ignored
-        assert validated["id"] == "doc1"
-        assert validated["vec"] == [0.1, 0.2, 0.3]
-        assert "extra" not in validated
diff --git a/tests/unit/test_fields.py b/tests/unit/test_fields.py
index 3376a67c..0c0d504e 100644
--- a/tests/unit/test_fields.py
+++ b/tests/unit/test_fields.py
@@ -219,58 +219,3 @@ def test_create_unknown_field_type():
     with pytest.raises(ValueError) as excinfo:
         FieldFactory.create_field("unknown", "example_field")
     assert "Unknown field type: unknown" in str(excinfo.value)
-
-
-# Add validation tests for each field type
-@pytest.mark.parametrize(
-    "field_class,valid_value,invalid_value,error_msg",
-    [
-        (TextField, "sample text", 123, "expects a string"),
-        (NumericField, 123.45, "123.45", "looks like a number"),
-        (TagField, ["tag1", "tag2"], ["tag1", 123], "must be a string"),
-        (GeoField, "37.7749,-122.4194", "invalid-geo", "not a valid 'lat,lon' format"),
-        # Add vector field test cases
-    ],
-)
-def test_field_validation(field_class, valid_value, invalid_value, error_msg):
-    """Test validation logic for each field type"""
-    # Create field instance
-    field = field_class(name="test_field")
-
-    # Test valid value
-    is_valid, error = field.validate(valid_value)
-    assert is_valid, f"Field should accept valid value: {valid_value}"
-    assert error is None, "No error message should be returned for valid value"
-
-    # Test invalid value
-    is_valid, error = field.validate(invalid_value)
-    assert not is_valid, f"Field should reject invalid value: {invalid_value}"
-    assert (
-        error_msg in error
-    ), f"Error message should contain '{error_msg}', got: {error}"
-
-
-def test_vector_field_validation():
-    """Test validation for vector fields specifically"""
-    # Create vector fields with specific dimensions
-    flat_field = create_flat_vector_field(dims=3)
-    hnsw_field = create_hnsw_vector_field(dims=3)
-
-    # Valid vector
-    valid_vector = [0.1, 0.2, 0.3]
-
-    # Test valid cases
-    assert flat_field.validate(valid_vector)[0], "Should accept valid vector"
-    assert hnsw_field.validate(valid_vector)[0], "Should accept valid vector"
-
-    # Test wrong dimensions
-    wrong_dims = [0.1, 0.2]  # Only 2 dimensions
-    is_valid, error = flat_field.validate(wrong_dims)
-    assert not is_valid, "Should reject vector with wrong dimensions"
-    assert "expects 3 dimensions" in error
-
-    # Test wrong type
-    wrong_type = ["a", "b", "c"]  # Strings instead of numbers
-    is_valid, error = hnsw_field.validate(wrong_type)
-    assert not is_valid, "Should reject vector with non-numeric elements"
-    assert "must be a number" in error
diff --git a/tests/unit/test_storage.py b/tests/unit/test_storage.py
index 4a34d340..11f51e73 100644
--- a/tests/unit/test_storage.py
+++ b/tests/unit/test_storage.py
@@ -19,6 +19,7 @@ def sample_hash_schema():
         "fields": [
             {"name": "test_id", "type": "tag"},
             {"name": "title", "type": "text"},
+            {"name": "user", "type": "tag"},
             {"name": "rating", "type": "numeric"},
             {"name": "location", "type": "geo"},
             {
@@ -57,14 +58,14 @@ def sample_json_schema():
             "storage_type": "json",
         },
         "fields": [
-            {"name": "test_id", "type": "tag", "path": "$.test_id"},
-            {"name": "user", "type": "tag", "path": "$.metadata.user"},
-            {"name": "title", "type": "text", "path": "$.content.title"},
-            {"name": "rating", "type": "numeric", "path": "$.metadata.rating"},
+            {"name": "test_id", "type": "tag"},
+            {"name": "user", "type": "tag"},
+            {"name": "title", "type": "text"},
+            {"name": "rating", "type": "numeric"},
+            {"name": "location", "type": "geo"},
             {
                 "name": "embedding",
                 "type": "vector",
-                "path": "$.content.embedding",
                 "attrs": {
                     "algorithm": "flat",
                     "dims": 4,
@@ -75,7 +76,6 @@ def sample_json_schema():
             {
                 "name": "int_vector",
                 "type": "vector",
-                "path": "$.content.int_vector",
                 "attrs": {
                     "algorithm": "flat",
                     "dims": 3,
@@ -120,7 +120,7 @@ def test_create_key(storage_instance):
 
 def test_validate_success(storage_instance):
     try:
-        storage_instance.validate(
+        storage_instance._validate(
             {"test_id": "1234", "rating": 5, "user": "john", "title": "engineer"}
         )
     except Exception as e:
@@ -130,10 +130,11 @@ def test_validate_success(storage_instance):
 def test_validate_failure(storage_instance):
     data = {"title": 5}
     with pytest.raises(ValidationError):
-        storage_instance.validate(data)
-    data = {"user": True}
+        storage_instance._validate(data)
+
+    data = {"user": [1]}
     with pytest.raises(ValidationError):
-        storage_instance.validate(data)
+        storage_instance._validate(data)
 
 
 def test_validate_preprocess_and_validate_failure(storage_instance):
@@ -143,7 +144,8 @@ def test_validate_preprocess_and_validate_failure(storage_instance):
     )
     with pytest.raises(SchemaValidationError):
         storage_instance._preprocess_and_validate_objects(objects=[data], validate=True)
-    data = {"user": True}
+
+    data = {"user": [1]}
     data == storage_instance._preprocess_and_validate_objects(
         objects=[data], validate=False
     )

From 7f56857a2efd4e0bdc9452f7a84b642c510cc8b3 Mon Sep 17 00:00:00 2001
From: Tyler Hutcherson <tyler.hutcherson@redis.com>
Date: Wed, 26 Mar 2025 11:07:26 -0400
Subject: [PATCH 04/11] Remove validation docs page

---
 docs/validation.md | 228 ---------------------------------------------
 1 file changed, 228 deletions(-)
 delete mode 100644 docs/validation.md

diff --git a/docs/validation.md b/docs/validation.md
deleted file mode 100644
index 204a009f..00000000
--- a/docs/validation.md
+++ /dev/null
@@ -1,228 +0,0 @@
-# RedisVL Validation System
-
-The RedisVL validation system ensures that data written to Redis indexes conforms to the defined schema. It uses dynamic Pydantic model generation to validate objects before they are stored.
-
-## Key Features
-
-- **Schema-Based Validation**: Validates objects against your index schema definition
-- **Dynamic Model Generation**: Creates Pydantic models on the fly based on your schema
-- **Type Checking**: Ensures fields contain appropriate data types
-- **Field-Specific Validation**:
-  - Text and Tag fields must be strings
-  - Numeric fields must be integers or floats
-  - Geo fields must be properly formatted latitude/longitude strings
-  - Vector fields must have the correct dimensions and data types
-- **JSON Path Support**: Validates fields extracted from nested JSON structures
-- **Fail-Fast Approach**: Stops processing at the first validation error
-- **Performance Optimized**: Caches models for repeated validation
-
-## Usage
-
-### Basic Validation
-
-```python
-from redisvl.schema.validation import validate_object
-
-# Assuming you have a schema defined
-validated_data = validate_object(schema, data)
-```
-
-### Storage Integration
-
-The validation is automatically integrated with the storage classes:
-
-```python
-from redisvl.index.storage import BaseStorage
-
-# Create storage with schema
-storage = BaseStorage(schema=schema, client=redis_client)
-
-# Write data - validation happens automatically
-storage.write_one(data)
-
-# Or validate explicitly
-validated = storage.validate_object(data)
-```
-
-## Field Type Validation
-
-The validation system supports all Redis field types:
-
-### Text Fields
-
-Text fields are validated to ensure they contain string values:
-
-```python
-# Valid
-{"title": "Hello World"}
-
-# Invalid
-{"title": 123}  # Not a string
-```
-
-### Tag Fields
-
-Tag fields are validated to ensure they contain string values:
-
-```python
-# Valid
-{"category": "electronics"}
-
-# Invalid
-{"category": 123}  # Not a string
-```
-
-### Numeric Fields
-
-Numeric fields must contain integers or floats:
-
-```python
-# Valid
-{"price": 19.99}
-{"quantity": 5}
-
-# Invalid
-{"price": "19.99"}  # String, not a number
-```
-
-### Geo Fields
-
-Geo fields must contain properly formatted latitude/longitude strings:
-
-```python
-# Valid
-{"location": "37.7749,-122.4194"}  # San Francisco
-{"location": "40.7128,-74.0060"}   # New York
-
-# Invalid
-{"location": "invalid"}            # Not in lat,lon format
-{"location": "91.0,0.0"}           # Latitude out of range (-90 to 90)
-{"location": "0.0,181.0"}          # Longitude out of range (-180 to 180)
-```
-
-### Vector Fields
-
-Vector fields must contain arrays with the correct dimensions and data types:
-
-```python
-# Valid
-{"embedding": [0.1, 0.2, 0.3, 0.4]}  # 4-dimensional float vector
-{"embedding": b'\x00\x01\x02\x03'}   # Raw bytes (dimensions not checked)
-
-# Invalid
-{"embedding": [0.1, 0.2, 0.3]}        # Wrong dimensions
-{"embedding": "not a vector"}         # Wrong type
-{"embedding": [0.1, "text", 0.3]}     # Mixed types
-```
-
-For integer vectors, the values must be within the appropriate range:
-
-- **INT8**: -128 to 127
-- **INT16**: -32,768 to 32,767
-
-```python
-# Valid INT8 vector
-{"int_vector": [1, 2, 3]}
-
-# Invalid INT8 vector
-{"int_vector": [1000, 2000, 3000]}  # Values out of range
-```
-
-## Nested JSON Validation
-
-The validation system supports extracting and validating fields from nested JSON structures:
-
-```python
-# Schema with JSON paths
-fields = {
-    "id": Field(name="id", type=FieldTypes.TAG),
-    "title": Field(name="title", type=FieldTypes.TEXT, path="$.content.title"),
-    "rating": Field(name="rating", type=FieldTypes.NUMERIC, path="$.metadata.rating")
-}
-
-# Nested JSON data
-data = {
-    "id": "doc1",
-    "content": {
-        "title": "Hello World"
-    },
-    "metadata": {
-        "rating": 4.5
-    }
-}
-
-# Validation extracts fields using JSON paths
-validated = validate_object(schema, data)
-# Result: {"id": "doc1", "title": "Hello World", "rating": 4.5}
-```
-
-## Error Handling
-
-The validation system uses a fail-fast approach, raising a `ValueError` when validation fails:
-
-```python
-try:
-    validated = validate_object(schema, data)
-except ValueError as e:
-    print(f"Validation error: {e}")
-    # Handle the error
-```
-
-The error message includes information about the field that failed validation.
-
-## Optional Fields
-
-All fields are considered optional during validation. If a field is missing, it will be excluded from the validated result:
-
-```python
-# Schema with multiple fields
-fields = {
-    "id": Field(name="id", type=FieldTypes.TAG),
-    "title": Field(name="title", type=FieldTypes.TEXT),
-    "rating": Field(name="rating", type=FieldTypes.NUMERIC)
-}
-
-# Data with missing fields
-data = {
-    "id": "doc1",
-    "title": "Hello World"
-    # rating is missing
-}
-
-# Validation succeeds with partial data
-validated = validate_object(schema, data)
-# Result: {"id": "doc1", "title": "Hello World"}
-```
-
-## Performance Considerations
-
-The validation system is optimized for performance:
-
-- **Model Caching**: Pydantic models are cached by schema name to avoid regeneration
-- **Lazy Validation**: Fields are validated only when needed
-- **Fail-Fast Approach**: Processing stops at the first validation error
-
-For large datasets, validation can be a significant part of the processing time. If you need to write many objects with the same structure, consider validating a sample first to ensure correctness.
-
-## Limitations
-
-- **JSON Path**: The current implementation only supports simple dot notation paths (e.g., `$.field.subfield`). Array indexing is not supported.
-- **Vector Bytes**: When vectors are provided as bytes, the dimensions cannot be validated.
-- **Custom Validators**: The current implementation does not support custom user-defined validators.
-
-## Best Practices
-
-1. **Define Clear Schemas**: Be explicit about field types and constraints
-2. **Pre-validate Critical Data**: For large datasets, validate a sample before processing everything
-3. **Handle Validation Errors**: Implement proper error handling for validation failures
-4. **Use JSON Paths Carefully**: Test nested JSON extraction to ensure paths are correctly defined
-5. **Consider Optional Fields**: Decide which fields are truly required for your application
-
-## Integration with Storage Classes
-
-The validation system is fully integrated with the storage classes:
-
-- **BaseStorage**: For hash-based storage, validates each field individually
-- **JsonStorage**: For JSON storage, extracts and validates fields from nested structures
-
-Each storage class automatically validates data before writing to Redis, ensuring data integrity. 
\ No newline at end of file

From 1d14f5f2e62c71ff13097433af57464b9253e0de Mon Sep 17 00:00:00 2001
From: Tyler Hutcherson <tyler.hutcherson@redis.com>
Date: Wed, 26 Mar 2025 11:24:22 -0400
Subject: [PATCH 05/11] skip cell in notebook testing

---
 docs/user_guide/01_getting_started.ipynb | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/user_guide/01_getting_started.ipynb b/docs/user_guide/01_getting_started.ipynb
index 7ab3a234..bf097415 100644
--- a/docs/user_guide/01_getting_started.ipynb
+++ b/docs/user_guide/01_getting_started.ipynb
@@ -440,6 +440,8 @@
     }
    ],
    "source": [
+    "# NBVAL_SKIP\n",
+    "\n",
     "keys = index.load([{\"user_embedding\": True}])"
    ]
   },

From f2f5010524f39ed9f72793e0c12dc72e25aaacfa Mon Sep 17 00:00:00 2001
From: Tyler Hutcherson <tyler.hutcherson@redis.com>
Date: Thu, 27 Mar 2025 15:25:45 -0400
Subject: [PATCH 06/11] update json path parser

---
 poetry.lock                  | 562 ++++++++++++++++++++++++++++++++++-
 pyproject.toml               |   2 +
 redisvl/schema/validation.py |  41 +--
 3 files changed, 570 insertions(+), 35 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 85c682e7..ed0e1652 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.0.1 and should not be changed by hand.
 
 [[package]]
 name = "accessible-pygments"
@@ -6,6 +6,8 @@ version = "0.0.5"
 description = "A collection of accessible pygments styles"
 optional = false
 python-versions = ">=3.9"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "accessible_pygments-0.0.5-py3-none-any.whl", hash = "sha256:88ae3211e68a1d0b011504b2ffc1691feafce124b845bd072ab6f9f66f34d4b7"},
     {file = "accessible_pygments-0.0.5.tar.gz", hash = "sha256:40918d3e6a2b619ad424cb91e556bd3bd8865443d9f22f1dcdf79e33c8046872"},
@@ -24,6 +26,8 @@ version = "2.4.6"
 description = "Happy Eyeballs for asyncio"
 optional = true
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"voyageai\""
 files = [
     {file = "aiohappyeyeballs-2.4.6-py3-none-any.whl", hash = "sha256:147ec992cf873d74f5062644332c539fcd42956dc69453fe5204195e560517e1"},
     {file = "aiohappyeyeballs-2.4.6.tar.gz", hash = "sha256:9b05052f9042985d32ecbe4b59a77ae19c006a78f1344d7fdad69d28ded3d0b0"},
@@ -35,6 +39,8 @@ version = "3.11.13"
 description = "Async http client/server framework (asyncio)"
 optional = true
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"voyageai\""
 files = [
     {file = "aiohttp-3.11.13-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a4fe27dbbeec445e6e1291e61d61eb212ee9fed6e47998b27de71d70d3e8777d"},
     {file = "aiohttp-3.11.13-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9e64ca2dbea28807f8484c13f684a2f761e69ba2640ec49dacd342763cc265ef"},
@@ -138,6 +144,8 @@ version = "1.2.1"
 description = "asyncio rate limiter, a leaky bucket implementation"
 optional = true
 python-versions = "<4.0,>=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"voyageai\""
 files = [
     {file = "aiolimiter-1.2.1-py3-none-any.whl", hash = "sha256:d3f249e9059a20badcb56b61601a83556133655c11d1eb3dd3e04ff069e5f3c7"},
     {file = "aiolimiter-1.2.1.tar.gz", hash = "sha256:e02a37ea1a855d9e832252a105420ad4d15011505512a1a1d814647451b5cca9"},
@@ -149,6 +157,8 @@ version = "1.3.2"
 description = "aiosignal: a list of registered asynchronous callbacks"
 optional = true
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"voyageai\""
 files = [
     {file = "aiosignal-1.3.2-py2.py3-none-any.whl", hash = "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5"},
     {file = "aiosignal-1.3.2.tar.gz", hash = "sha256:a8c255c66fafb1e499c9351d0bf32ff2d8a0321595ebac3b93713656d2436f54"},
@@ -163,6 +173,8 @@ version = "0.7.16"
 description = "A light, configurable Sphinx theme"
 optional = false
 python-versions = ">=3.9"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "alabaster-0.7.16-py3-none-any.whl", hash = "sha256:b46733c07dce03ae4e150330b975c75737fa60f0a7c591b6c8bf4928a28e2c92"},
     {file = "alabaster-0.7.16.tar.gz", hash = "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65"},
@@ -174,6 +186,8 @@ version = "0.7.0"
 description = "Reusable constraint types to use with typing.Annotated"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"},
     {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
@@ -185,6 +199,8 @@ version = "4.8.0"
 description = "High level compatibility layer for multiple asynchronous event loop implementations"
 optional = true
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "(extra == \"openai\" or extra == \"cohere\" or extra == \"mistralai\") and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "anyio-4.8.0-py3-none-any.whl", hash = "sha256:b5011f270ab5eb0abf13385f851315585cc37ef330dd88e27ec3d34d651fd47a"},
     {file = "anyio-4.8.0.tar.gz", hash = "sha256:1d9fe889df5212298c0c0723fa20479d1b94883a2df44bd3897aa91083316f7a"},
@@ -207,6 +223,8 @@ version = "0.1.4"
 description = "Disable App Nap on macOS >= 10.9"
 optional = false
 python-versions = ">=3.6"
+groups = ["dev", "docs"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and platform_system == \"Darwin\""
 files = [
     {file = "appnope-0.1.4-py2.py3-none-any.whl", hash = "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c"},
     {file = "appnope-0.1.4.tar.gz", hash = "sha256:1de3860566df9caf38f01f86f65e0e13e379af54f9e4bee1e66b48f2efffd1ee"},
@@ -218,6 +236,8 @@ version = "3.3.8"
 description = "An abstract syntax tree for Python with inference support."
 optional = false
 python-versions = ">=3.9.0"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "astroid-3.3.8-py3-none-any.whl", hash = "sha256:187ccc0c248bfbba564826c26f070494f7bc964fd286b6d9fff4420e55de828c"},
     {file = "astroid-3.3.8.tar.gz", hash = "sha256:a88c7994f914a4ea8572fac479459f4955eeccc877be3f2d959a33273b0cf40b"},
@@ -232,6 +252,8 @@ version = "3.0.0"
 description = "Annotate AST trees with source code positions"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2"},
     {file = "asttokens-3.0.0.tar.gz", hash = "sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7"},
@@ -247,6 +269,8 @@ version = "5.0.1"
 description = "Timeout context manager for asyncio programs"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "python_full_version < \"3.11.3\""
 files = [
     {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"},
     {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"},
@@ -258,10 +282,12 @@ version = "25.1.0"
 description = "Classes Without Boilerplate"
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "dev", "docs"]
 files = [
     {file = "attrs-25.1.0-py3-none-any.whl", hash = "sha256:c75a69e28a550a7e93789579c22aa26b0f5b83b75dc4e08fe092980051e1090a"},
     {file = "attrs-25.1.0.tar.gz", hash = "sha256:1c97078a80c814273a76b2a298a932eb681c87415c11dee0a6921de7f1b02c3e"},
 ]
+markers = {main = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"voyageai\"", dev = "python_version <= \"3.11\" or python_version >= \"3.12\"", docs = "python_version <= \"3.11\" or python_version >= \"3.12\""}
 
 [package.extras]
 benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
@@ -277,6 +303,8 @@ version = "2.17.0"
 description = "Internationalization utilities"
 optional = false
 python-versions = ">=3.8"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2"},
     {file = "babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d"},
@@ -291,10 +319,12 @@ version = "4.13.3"
 description = "Screen-scraping library"
 optional = false
 python-versions = ">=3.7.0"
+groups = ["main", "docs"]
 files = [
     {file = "beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16"},
     {file = "beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b"},
 ]
+markers = {main = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\"", docs = "python_version <= \"3.11\" or python_version >= \"3.12\""}
 
 [package.dependencies]
 soupsieve = ">1.2"
@@ -313,6 +343,8 @@ version = "25.1.0"
 description = "The uncompromising code formatter."
 optional = false
 python-versions = ">=3.9"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "black-25.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:759e7ec1e050a15f89b770cefbf91ebee8917aac5c20483bc2d80a6c3a04df32"},
     {file = "black-25.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e519ecf93120f34243e6b0054db49c00a35f84f195d5bce7e9f5cfc578fc2da"},
@@ -359,6 +391,8 @@ version = "6.2.0"
 description = "An easy safelist-based HTML-sanitizing tool."
 optional = false
 python-versions = ">=3.9"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "bleach-6.2.0-py3-none-any.whl", hash = "sha256:117d9c6097a7c3d22fd578fcd8d35ff1e125df6736f554da4e432fdd63f31e5e"},
     {file = "bleach-6.2.0.tar.gz", hash = "sha256:123e894118b8a599fd80d3ec1a6d4cc7ce4e5882b1317a7e1ba69b56e95f991f"},
@@ -377,6 +411,8 @@ version = "1.36.0"
 description = "The AWS SDK for Python"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"bedrock\""
 files = [
     {file = "boto3-1.36.0-py3-none-any.whl", hash = "sha256:d0ca7a58ce25701a52232cc8df9d87854824f1f2964b929305722ebc7959d5a9"},
     {file = "boto3-1.36.0.tar.gz", hash = "sha256:159898f51c2997a12541c0e02d6e5a8fe2993ddb307b9478fd9a339f98b57e00"},
@@ -396,6 +432,8 @@ version = "1.36.26"
 description = "Low-level, data-driven core of boto 3."
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"bedrock\""
 files = [
     {file = "botocore-1.36.26-py3-none-any.whl", hash = "sha256:4e3f19913887a58502e71ef8d696fe7eaa54de7813ff73390cd5883f837dfa6e"},
     {file = "botocore-1.36.26.tar.gz", hash = "sha256:4a63bcef7ecf6146fd3a61dc4f9b33b7473b49bdaf1770e9aaca6eee0c9eab62"},
@@ -418,6 +456,8 @@ version = "5.5.2"
 description = "Extensible memoizing collections and decorators"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"vertexai\""
 files = [
     {file = "cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a"},
     {file = "cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4"},
@@ -429,6 +469,8 @@ version = "1.0.0"
 description = "RFC 7049 - Concise Binary Object Representation"
 optional = true
 python-versions = "*"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "cbor-1.0.0.tar.gz", hash = "sha256:13225a262ddf5615cbd9fd55a76a0d53069d18b07d2e9f19c39e6acb8609bbb6"},
 ]
@@ -439,6 +481,8 @@ version = "5.6.5"
 description = "CBOR (de)serializer with extensive tag support"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "cbor2-5.6.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e16c4a87fc999b4926f5c8f6c696b0d251b4745bc40f6c5aee51d69b30b15ca2"},
     {file = "cbor2-5.6.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:87026fc838370d69f23ed8572939bd71cea2b3f6c8f8bb8283f573374b4d7f33"},
@@ -497,10 +541,12 @@ version = "2025.1.31"
 description = "Python package for providing Mozilla's CA Bundle."
 optional = false
 python-versions = ">=3.6"
+groups = ["main", "dev", "docs"]
 files = [
     {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"},
     {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"},
 ]
+markers = {main = "(python_version <= \"3.11\" or python_version >= \"3.12\") and (extra == \"openai\" or extra == \"cohere\" or extra == \"mistralai\" or extra == \"sentence-transformers\" or extra == \"vertexai\" or extra == \"voyageai\" or extra == \"ranx\") and (extra == \"openai\" or extra == \"cohere\" or extra == \"mistralai\" or extra == \"sentence-transformers\" or extra == \"vertexai\" or extra == \"voyageai\" or python_version >= \"3.10\")", dev = "python_version <= \"3.11\" or python_version >= \"3.12\"", docs = "python_version <= \"3.11\" or python_version >= \"3.12\""}
 
 [[package]]
 name = "cffi"
@@ -508,6 +554,7 @@ version = "1.17.1"
 description = "Foreign Function Interface for Python calling C code."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev", "docs"]
 files = [
     {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"},
     {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"},
@@ -577,6 +624,7 @@ files = [
     {file = "cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662"},
     {file = "cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824"},
 ]
+markers = {dev = "(implementation_name == \"pypy\" or platform_python_implementation != \"PyPy\") and (python_version <= \"3.11\" or python_version >= \"3.12\")", docs = "(python_version <= \"3.11\" or python_version >= \"3.12\") and implementation_name == \"pypy\""}
 
 [package.dependencies]
 pycparser = "*"
@@ -587,6 +635,8 @@ version = "3.4.0"
 description = "Validate configuration and produce human readable error messages."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"},
     {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"},
@@ -598,6 +648,7 @@ version = "3.4.1"
 description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
 optional = false
 python-versions = ">=3.7"
+groups = ["main", "dev", "docs"]
 files = [
     {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"},
     {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"},
@@ -692,6 +743,7 @@ files = [
     {file = "charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85"},
     {file = "charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3"},
 ]
+markers = {main = "(python_version <= \"3.11\" or python_version >= \"3.12\") and (extra == \"sentence-transformers\" or extra == \"cohere\" or extra == \"vertexai\" or extra == \"voyageai\" or extra == \"ranx\") and (extra == \"sentence-transformers\" or extra == \"cohere\" or extra == \"vertexai\" or extra == \"voyageai\" or python_version >= \"3.10\")", dev = "python_version <= \"3.11\" or python_version >= \"3.12\"", docs = "python_version <= \"3.11\" or python_version >= \"3.12\""}
 
 [[package]]
 name = "click"
@@ -699,6 +751,8 @@ version = "8.1.8"
 description = "Composable command line interface toolkit"
 optional = false
 python-versions = ">=3.7"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"},
     {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"},
@@ -713,6 +767,8 @@ version = "5.13.12"
 description = ""
 optional = true
 python-versions = "<4.0,>=3.9"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"cohere\""
 files = [
     {file = "cohere-5.13.12-py3-none-any.whl", hash = "sha256:2a043591a3e5280b47716a6b311e4c7f58e799364113a9cb81b50cd4f6c95f7e"},
     {file = "cohere-5.13.12.tar.gz", hash = "sha256:97bb9ac107e580780b941acbabd3aa5e71960e6835398292c46aaa8a0a4cab88"},
@@ -735,10 +791,12 @@ version = "0.4.6"
 description = "Cross-platform colored terminal text."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+groups = ["main", "dev", "docs"]
 files = [
     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
 ]
+markers = {main = "(python_version <= \"3.11\" or python_version >= \"3.12\") and (extra == \"sentence-transformers\" or extra == \"cohere\" or extra == \"openai\" or extra == \"ranx\") and platform_system == \"Windows\" and (extra == \"sentence-transformers\" or extra == \"cohere\" or extra == \"openai\" or python_version >= \"3.10\")", dev = "(platform_system == \"Windows\" or sys_platform == \"win32\") and (python_version <= \"3.11\" or python_version >= \"3.12\")", docs = "(platform_system == \"Windows\" or sys_platform == \"win32\") and (python_version <= \"3.11\" or python_version >= \"3.12\")"}
 
 [[package]]
 name = "coloredlogs"
@@ -746,6 +804,8 @@ version = "15.0.1"
 description = "Colored terminal output for Python's logging module"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+groups = ["main"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934"},
     {file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"},
@@ -763,6 +823,8 @@ version = "0.2.2"
 description = "Jupyter Python Comm implementation, for usage in ipykernel, xeus-python etc."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "comm-0.2.2-py3-none-any.whl", hash = "sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3"},
     {file = "comm-0.2.2.tar.gz", hash = "sha256:3fd7a84065306e07bea1773df6eb8282de51ba82f77c72f9c85716ab11fe980e"},
@@ -780,6 +842,8 @@ version = "1.3.1"
 description = "Python library for calculating contours of 2D quadrilateral grids"
 optional = true
 python-versions = ">=3.10"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "contourpy-1.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a045f341a77b77e1c5de31e74e966537bba9f3c4099b35bf4c2e3939dd54cdab"},
     {file = "contourpy-1.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:500360b77259914f7805af7462e41f9cb7ca92ad38e9f94d6c8641b089338124"},
@@ -853,6 +917,8 @@ version = "7.6.12"
 description = "Code coverage measurement for Python"
 optional = false
 python-versions = ">=3.9"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "coverage-7.6.12-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:704c8c8c6ce6569286ae9622e534b4f5b9759b6f2cd643f1c1a61f666d534fe8"},
     {file = "coverage-7.6.12-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ad7525bf0241e5502168ae9c643a2f6c219fa0a283001cee4cf23a9b7da75879"},
@@ -928,6 +994,8 @@ version = "2.9.1"
 description = "Thin Python bindings to de/compression algorithms in Rust"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "cramjam-2.9.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:8e82464d1e00fbbb12958999b8471ba5e9f3d9711954505a0a7b378762332e6f"},
     {file = "cramjam-2.9.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6d2df8a6511cc08ef1fccd2e0c65e2ebc9f57574ec8376052a76851af5398810"},
@@ -1030,6 +1098,8 @@ version = "44.0.1"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
 optional = false
 python-versions = "!=3.9.0,!=3.9.1,>=3.7"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "cryptography-44.0.1-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:bf688f615c29bfe9dfc44312ca470989279f0e94bb9f631f85e3459af8efc009"},
     {file = "cryptography-44.0.1-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd7c7e2d71d908dc0f8d2027e1604102140d84b155e658c20e8ad1304317691f"},
@@ -1083,6 +1153,8 @@ version = "0.12.1"
 description = "Composable style cycles"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30"},
     {file = "cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c"},
@@ -1098,6 +1170,8 @@ version = "1.8.12"
 description = "An implementation of the Debug Adapter Protocol for Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "debugpy-1.8.12-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:a2ba7ffe58efeae5b8fad1165357edfe01464f9aef25e814e891ec690e7dd82a"},
     {file = "debugpy-1.8.12-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cbbd4149c4fc5e7d508ece083e78c17442ee13b0e69bfa6bd63003e486770f45"},
@@ -1133,6 +1207,8 @@ version = "5.2.1"
 description = "Decorators for Humans"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a"},
     {file = "decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360"},
@@ -1144,6 +1220,8 @@ version = "0.7.1"
 description = "XML bomb protection for Python stdlib modules"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"},
     {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"},
@@ -1155,6 +1233,8 @@ version = "0.3.9"
 description = "serialize all of Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "dill-0.3.9-py3-none-any.whl", hash = "sha256:468dff3b89520b474c0397703366b7b95eebe6303f108adf9b19da1f702be87a"},
     {file = "dill-0.3.9.tar.gz", hash = "sha256:81aa267dddf68cbfe8029c42ca9ec6a4ab3b22371d1c450abc54422577b4512c"},
@@ -1170,6 +1250,8 @@ version = "0.3.9"
 description = "Distribution utilities"
 optional = false
 python-versions = "*"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87"},
     {file = "distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403"},
@@ -1181,6 +1263,8 @@ version = "1.9.0"
 description = "Distro - an OS platform information API"
 optional = true
 python-versions = ">=3.6"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"openai\""
 files = [
     {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
     {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
@@ -1192,6 +1276,8 @@ version = "7.1.0"
 description = "A Python library for the Docker Engine API."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0"},
     {file = "docker-7.1.0.tar.gz", hash = "sha256:ad8c70e6e3f8926cb8a92619b832b4ea5299e2831c14284663184e200546fa6c"},
@@ -1214,6 +1300,8 @@ version = "0.16"
 description = "Parse Python docstrings in reST, Google and Numpydoc format"
 optional = true
 python-versions = ">=3.6,<4.0"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"vertexai\""
 files = [
     {file = "docstring_parser-0.16-py3-none-any.whl", hash = "sha256:bf0a1387354d3691d102edef7ec124f219ef639982d096e26e3b60aeffa90637"},
     {file = "docstring_parser-0.16.tar.gz", hash = "sha256:538beabd0af1e2db0146b6bd3caa526c35a34d61af9fd2887f3a8a27a739aa6e"},
@@ -1225,6 +1313,8 @@ version = "0.21.2"
 description = "Docutils -- Python Documentation Utilities"
 optional = false
 python-versions = ">=3.9"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2"},
     {file = "docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f"},
@@ -1236,6 +1326,8 @@ version = "0.2.2"
 description = "Like `typing._eval_type`, but lets older Python versions use newer typing features."
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"mistralai\""
 files = [
     {file = "eval_type_backport-0.2.2-py3-none-any.whl", hash = "sha256:cb6ad7c393517f476f96d456d0412ea80f0a8cf96f6892834cd9340149111b0a"},
     {file = "eval_type_backport-0.2.2.tar.gz", hash = "sha256:f0576b4cf01ebb5bd358d02314d31846af5e07678387486e2c798af0e7d849c1"},
@@ -1250,10 +1342,12 @@ version = "1.2.2"
 description = "Backport of PEP 654 (exception groups)"
 optional = false
 python-versions = ">=3.7"
+groups = ["main", "dev", "docs"]
 files = [
     {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
     {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
 ]
+markers = {main = "(extra == \"openai\" or extra == \"cohere\" or extra == \"mistralai\") and python_version < \"3.11\"", dev = "python_version < \"3.11\"", docs = "python_version < \"3.11\""}
 
 [package.extras]
 test = ["pytest (>=6)"]
@@ -1264,6 +1358,8 @@ version = "2.1.1"
 description = "execnet: rapid multi-Python deployment"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "execnet-2.1.1-py3-none-any.whl", hash = "sha256:26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc"},
     {file = "execnet-2.1.1.tar.gz", hash = "sha256:5189b52c6121c24feae288166ab41b32549c7e2348652736540b9e6e7d4e72e3"},
@@ -1278,6 +1374,8 @@ version = "2.2.0"
 description = "Get the currently executing AST node of a frame, and other information"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa"},
     {file = "executing-2.2.0.tar.gz", hash = "sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755"},
@@ -1292,6 +1390,8 @@ version = "1.10.0"
 description = "Fast read/write of AVRO files"
 optional = true
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"cohere\""
 files = [
     {file = "fastavro-1.10.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1a9fe0672d2caf0fe54e3be659b13de3cad25a267f2073d6f4b9f8862acc31eb"},
     {file = "fastavro-1.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:86dd0410770e0c99363788f0584523709d85e57bb457372ec5c285a482c17fe6"},
@@ -1338,6 +1438,8 @@ version = "2.21.1"
 description = "Fastest Python implementation of JSON schema"
 optional = false
 python-versions = "*"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "fastjsonschema-2.21.1-py3-none-any.whl", hash = "sha256:c9e5b7e908310918cf494a434eeb31384dd84a98b57a30bcb1f535015b554667"},
     {file = "fastjsonschema-2.21.1.tar.gz", hash = "sha256:794d4f0a58f848961ba16af7b9c85a3e88cd360df008c59aac6fc5ae9323b5d4"},
@@ -1352,6 +1454,8 @@ version = "2024.11.0"
 description = "Python support for Parquet file format"
 optional = true
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "fastparquet-2024.11.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:60ccf587410f0979105e17036df61bb60e1c2b81880dc91895cdb4ee65b71e7f"},
     {file = "fastparquet-2024.11.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a5ad5fc14b0567e700bea3cd528a0bd45a6f9371370b49de8889fb3d10a6574a"},
@@ -1412,10 +1516,12 @@ version = "3.17.0"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.9"
+groups = ["main", "dev"]
 files = [
     {file = "filelock-3.17.0-py3-none-any.whl", hash = "sha256:533dc2f7ba78dc2f0f531fc6c4940addf7b70a481e269a5a3b93be94ffbe8338"},
     {file = "filelock-3.17.0.tar.gz", hash = "sha256:ee4e77401ef576ebb38cd7f13b9b28893194acc20a8e68e18730ba9c0e54660e"},
 ]
+markers = {main = "(extra == \"sentence-transformers\" or extra == \"cohere\") and (python_version <= \"3.11\" or python_version >= \"3.12\")", dev = "python_version <= \"3.11\" or python_version >= \"3.12\""}
 
 [package.extras]
 docs = ["furo (>=2024.8.6)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"]
@@ -1428,6 +1534,8 @@ version = "4.56.0"
 description = "Tools to manipulate font files"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "fonttools-4.56.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:331954d002dbf5e704c7f3756028e21db07097c19722569983ba4d74df014000"},
     {file = "fonttools-4.56.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8d1613abd5af2f93c05867b3a3759a56e8bf97eb79b1da76b2bc10892f96ff16"},
@@ -1501,6 +1609,8 @@ version = "1.5.0"
 description = "A list-like structure which implements collections.abc.MutableSequence"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"voyageai\""
 files = [
     {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5b6a66c18b5b9dd261ca98dffcb826a525334b2f29e7caa54e182255c5f6a65a"},
     {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d1b3eb7b05ea246510b43a7e53ed1653e55c2121019a97e60cad7efb881a97bb"},
@@ -1602,6 +1712,8 @@ version = "2025.2.0"
 description = "File-system specification"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and (extra == \"sentence-transformers\" or extra == \"cohere\" or extra == \"ranx\") and (extra == \"sentence-transformers\" or extra == \"cohere\" or python_version >= \"3.10\")"
 files = [
     {file = "fsspec-2025.2.0-py3-none-any.whl", hash = "sha256:9de2ad9ce1f85e1931858535bc882543171d197001a0a5eb2ddc04f1781ab95b"},
     {file = "fsspec-2025.2.0.tar.gz", hash = "sha256:1c24b16eaa0a1798afa0337aa0db9b256718ab2a89c425371f5628d22c3b6afd"},
@@ -1641,6 +1753,8 @@ version = "2.24.1"
 description = "Google API client core library"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"vertexai\""
 files = [
     {file = "google_api_core-2.24.1-py3-none-any.whl", hash = "sha256:bc78d608f5a5bf853b80bd70a795f703294de656c096c0968320830a4bc280f1"},
     {file = "google_api_core-2.24.1.tar.gz", hash = "sha256:f8b36f5456ab0dd99a1b693a40a31d1e7757beea380ad1b38faaf8941eae9d8a"},
@@ -1676,6 +1790,8 @@ version = "2.38.0"
 description = "Google Authentication Library"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"vertexai\""
 files = [
     {file = "google_auth-2.38.0-py2.py3-none-any.whl", hash = "sha256:e7dae6694313f434a2727bf2906f27ad259bae090d7aa896590d86feec3d9d4a"},
     {file = "google_auth-2.38.0.tar.gz", hash = "sha256:8285113607d3b80a3f1543b75962447ba8a09fe85783432a784fdeef6ac094c4"},
@@ -1700,6 +1816,8 @@ version = "1.82.0"
 description = "Vertex AI API client library"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"vertexai\""
 files = [
     {file = "google_cloud_aiplatform-1.82.0-py2.py3-none-any.whl", hash = "sha256:13368a961b2bfa8f46ccd10371bb19bd5f946d8f29c411726061ed1a140ce890"},
     {file = "google_cloud_aiplatform-1.82.0.tar.gz", hash = "sha256:b7ea7379249cc1821aa46300a16e4b15aa64aa22665e2536b2bcb7e473d7438e"},
@@ -1751,6 +1869,8 @@ version = "3.30.0"
 description = "Google BigQuery API client library"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"vertexai\""
 files = [
     {file = "google_cloud_bigquery-3.30.0-py2.py3-none-any.whl", hash = "sha256:f4d28d846a727f20569c9b2d2f4fa703242daadcb2ec4240905aa485ba461877"},
     {file = "google_cloud_bigquery-3.30.0.tar.gz", hash = "sha256:7e27fbafc8ed33cc200fe05af12ecd74d279fe3da6692585a3cef7aee90575b6"},
@@ -1782,6 +1902,8 @@ version = "2.4.2"
 description = "Google Cloud API client core library"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"vertexai\""
 files = [
     {file = "google_cloud_core-2.4.2-py2.py3-none-any.whl", hash = "sha256:7459c3e83de7cb8b9ecfec9babc910efb4314030c56dd798eaad12c426f7d180"},
     {file = "google_cloud_core-2.4.2.tar.gz", hash = "sha256:a4fcb0e2fcfd4bfe963837fad6d10943754fd79c1a50097d68540b6eb3d67f35"},
@@ -1800,6 +1922,8 @@ version = "1.14.1"
 description = "Google Cloud Resource Manager API client library"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"vertexai\""
 files = [
     {file = "google_cloud_resource_manager-1.14.1-py2.py3-none-any.whl", hash = "sha256:68340599f85ebf07a6e18487e460ea07cc15e132068f6b188786d01c2cf25518"},
     {file = "google_cloud_resource_manager-1.14.1.tar.gz", hash = "sha256:41e9e546aaa03d5160cdfa2341dbe81ef7596706c300a89b94c429f1f3411f87"},
@@ -1821,6 +1945,8 @@ version = "2.19.0"
 description = "Google Cloud Storage API client library"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"vertexai\""
 files = [
     {file = "google_cloud_storage-2.19.0-py2.py3-none-any.whl", hash = "sha256:aeb971b5c29cf8ab98445082cbfe7b161a1f48ed275822f59ed3f1524ea54fba"},
     {file = "google_cloud_storage-2.19.0.tar.gz", hash = "sha256:cd05e9e7191ba6cb68934d8eb76054d9be4562aa89dbc4236feee4d7d51342b2"},
@@ -1844,6 +1970,8 @@ version = "1.6.0"
 description = "A python wrapper of the C library 'Google CRC32C'"
 optional = true
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"vertexai\""
 files = [
     {file = "google_crc32c-1.6.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:5bcc90b34df28a4b38653c36bb5ada35671ad105c99cfe915fb5bed7ad6924aa"},
     {file = "google_crc32c-1.6.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:d9e9913f7bd69e093b81da4535ce27af842e7bf371cde42d1ae9e9bd382dc0e9"},
@@ -1883,6 +2011,8 @@ version = "2.7.2"
 description = "Utilities for Google Media Downloads and Resumable Uploads"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"vertexai\""
 files = [
     {file = "google_resumable_media-2.7.2-py2.py3-none-any.whl", hash = "sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa"},
     {file = "google_resumable_media-2.7.2.tar.gz", hash = "sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0"},
@@ -1901,6 +2031,8 @@ version = "1.68.0"
 description = "Common protobufs used in Google APIs"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"vertexai\""
 files = [
     {file = "googleapis_common_protos-1.68.0-py2.py3-none-any.whl", hash = "sha256:aaf179b2f81df26dfadac95def3b16a95064c76a5f45f07e4c68a21bb371c4ac"},
     {file = "googleapis_common_protos-1.68.0.tar.gz", hash = "sha256:95d38161f4f9af0d9423eed8fb7b64ffd2568c3464eb542ff02c5bfa1953ab3c"},
@@ -1919,6 +2051,8 @@ version = "3.1.1"
 description = "Lightweight in-process concurrent programming"
 optional = false
 python-versions = ">=3.7"
+groups = ["docs"]
+markers = "(platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\") and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "greenlet-3.1.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:0bbae94a29c9e5c7e4a2b7f0aae5c17e8e90acbfd3bf6270eeba60c39fce3563"},
     {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fde093fb93f35ca72a556cf72c92ea3ebfda3d79fc35bb19fbe685853869a83"},
@@ -2005,6 +2139,8 @@ version = "0.14.0"
 description = "IAM API client library"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"vertexai\""
 files = [
     {file = "grpc_google_iam_v1-0.14.0-py2.py3-none-any.whl", hash = "sha256:fb4a084b30099ba3ab07d61d620a0d4429570b13ff53bd37bac75235f98b7da4"},
     {file = "grpc_google_iam_v1-0.14.0.tar.gz", hash = "sha256:c66e07aa642e39bb37950f9e7f491f70dad150ac9801263b42b2814307c2df99"},
@@ -2021,6 +2157,8 @@ version = "1.70.0"
 description = "HTTP/2-based RPC framework"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"vertexai\""
 files = [
     {file = "grpcio-1.70.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:95469d1977429f45fe7df441f586521361e235982a0b39e33841549143ae2851"},
     {file = "grpcio-1.70.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:ed9718f17fbdb472e33b869c77a16d0b55e166b100ec57b016dc7de9c8d236bf"},
@@ -2088,6 +2226,8 @@ version = "1.70.0"
 description = "Status proto mapping for gRPC"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"vertexai\" and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "grpcio_status-1.70.0-py3-none-any.whl", hash = "sha256:fc5a2ae2b9b1c1969cc49f3262676e6854aa2398ec69cb5bd6c47cd501904a85"},
     {file = "grpcio_status-1.70.0.tar.gz", hash = "sha256:0e7b42816512433b18b9d764285ff029bde059e9d41f8fe10a60631bd8348101"},
@@ -2104,6 +2244,8 @@ version = "0.14.0"
 description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "(extra == \"openai\" or extra == \"cohere\" or extra == \"mistralai\") and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
     {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
@@ -2115,6 +2257,8 @@ version = "1.0.7"
 description = "A minimal low-level HTTP client."
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(extra == \"openai\" or extra == \"cohere\" or extra == \"mistralai\") and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd"},
     {file = "httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c"},
@@ -2136,6 +2280,8 @@ version = "0.28.1"
 description = "The next generation HTTP client."
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(extra == \"openai\" or extra == \"cohere\" or extra == \"mistralai\") and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"},
     {file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"},
@@ -2160,6 +2306,8 @@ version = "0.4.0"
 description = "Consume Server-Sent Event (SSE) messages with HTTPX."
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"cohere\""
 files = [
     {file = "httpx-sse-0.4.0.tar.gz", hash = "sha256:1e81a3a3070ce322add1d3529ed42eb5f70817f45ed6ec915ab753f961139721"},
     {file = "httpx_sse-0.4.0-py3-none-any.whl", hash = "sha256:f329af6eae57eaa2bdfd962b42524764af68075ea87370a2de920af5341e318f"},
@@ -2171,6 +2319,8 @@ version = "0.29.1"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = true
 python-versions = ">=3.8.0"
+groups = ["main"]
+markers = "(extra == \"sentence-transformers\" or extra == \"cohere\") and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "huggingface_hub-0.29.1-py3-none-any.whl", hash = "sha256:352f69caf16566c7b6de84b54a822f6238e17ddd8ae3da4f8f2272aea5b198d5"},
     {file = "huggingface_hub-0.29.1.tar.gz", hash = "sha256:9524eae42077b8ff4fc459ceb7a514eca1c1232b775276b009709fe2a084f250"},
@@ -2205,6 +2355,8 @@ version = "10.0"
 description = "Human friendly output for text interfaces using Python"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+groups = ["main"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477"},
     {file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"},
@@ -2219,6 +2371,8 @@ version = "2.6.8"
 description = "File identification library for Python"
 optional = false
 python-versions = ">=3.9"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "identify-2.6.8-py2.py3-none-any.whl", hash = "sha256:83657f0f766a3c8d0eaea16d4ef42494b39b34629a4b3192a9d020d349b3e255"},
     {file = "identify-2.6.8.tar.gz", hash = "sha256:61491417ea2c0c5c670484fd8abbb34de34cdae1e5f39a73ee65e48e4bb663fc"},
@@ -2233,10 +2387,12 @@ version = "3.10"
 description = "Internationalized Domain Names in Applications (IDNA)"
 optional = false
 python-versions = ">=3.6"
+groups = ["main", "dev", "docs"]
 files = [
     {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"},
     {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"},
 ]
+markers = {main = "(python_version <= \"3.11\" or python_version >= \"3.12\") and (extra == \"openai\" or extra == \"cohere\" or extra == \"mistralai\" or extra == \"sentence-transformers\" or extra == \"vertexai\" or extra == \"voyageai\" or extra == \"ranx\") and (extra == \"openai\" or extra == \"cohere\" or extra == \"mistralai\" or extra == \"sentence-transformers\" or extra == \"vertexai\" or extra == \"voyageai\" or python_version >= \"3.10\")", dev = "python_version <= \"3.11\" or python_version >= \"3.12\"", docs = "python_version <= \"3.11\" or python_version >= \"3.12\""}
 
 [package.extras]
 all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
@@ -2247,6 +2403,8 @@ version = "3.3.0"
 description = "Iterative JSON parser with standard Python iterator interfaces"
 optional = true
 python-versions = "*"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "ijson-3.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7f7a5250599c366369fbf3bc4e176f5daa28eb6bc7d6130d02462ed335361675"},
     {file = "ijson-3.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f87a7e52f79059f9c58f6886c262061065eb6f7554a587be7ed3aa63e6b71b34"},
@@ -2350,6 +2508,8 @@ version = "1.4.1"
 description = "Getting image size from png/jpeg/jpeg2000/gif file"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b"},
     {file = "imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a"},
@@ -2361,10 +2521,12 @@ version = "8.6.1"
 description = "Read metadata from Python packages"
 optional = false
 python-versions = ">=3.9"
+groups = ["dev", "docs"]
 files = [
     {file = "importlib_metadata-8.6.1-py3-none-any.whl", hash = "sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e"},
     {file = "importlib_metadata-8.6.1.tar.gz", hash = "sha256:310b41d755445d74569f993ccfc22838295d9fe005425094fad953d7f15c8580"},
 ]
+markers = {dev = "python_version < \"3.10\"", docs = "python_version <= \"3.11\" or python_version >= \"3.12\""}
 
 [package.dependencies]
 zipp = ">=3.20"
@@ -2384,6 +2546,8 @@ version = "2.0.0"
 description = "brain-dead simple config-ini parsing"
 optional = false
 python-versions = ">=3.7"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
     {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
@@ -2395,6 +2559,8 @@ version = "2.5.3"
 description = "inscriptis - HTML to text converter."
 optional = true
 python-versions = "<4.0,>=3.9"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "inscriptis-2.5.3-py3-none-any.whl", hash = "sha256:25962cf5a60b1a8f33e7bfbbea08a29af82299702339b9b90c538653a5c7aa38"},
     {file = "inscriptis-2.5.3.tar.gz", hash = "sha256:256043caa13e4995c71fafdeadec4ac42b57f3914cb41023ecbee8bc27ca1cc0"},
@@ -2413,6 +2579,8 @@ version = "6.29.5"
 description = "IPython Kernel for Jupyter"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "ipykernel-6.29.5-py3-none-any.whl", hash = "sha256:afdb66ba5aa354b09b91379bac28ae4afebbb30e8b39510c9690afb7a10421b5"},
     {file = "ipykernel-6.29.5.tar.gz", hash = "sha256:f093a22c4a40f8828f8e330a9c297cb93dcab13bd9678ded6de8e5cf81c56215"},
@@ -2446,6 +2614,8 @@ version = "8.18.1"
 description = "IPython: Productive Interactive Computing"
 optional = false
 python-versions = ">=3.9"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "ipython-8.18.1-py3-none-any.whl", hash = "sha256:e8267419d72d81955ec1177f8a29aaa90ac80ad647499201119e2f05e99aa397"},
     {file = "ipython-8.18.1.tar.gz", hash = "sha256:ca6f079bb33457c66e233e4580ebfc4128855b4cf6370dddd73842a9563e8a27"},
@@ -2483,6 +2653,8 @@ version = "0.5.9"
 description = "provides a common interface to many IR ad-hoc ranking benchmarks, training datasets, etc."
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "ir_datasets-0.5.9-py3-none-any.whl", hash = "sha256:07c9bed07f31031f1da1bc02afc7a1077b1179a3af402d061f83bf6fb833b90a"},
     {file = "ir_datasets-0.5.9.tar.gz", hash = "sha256:35c90980fbd0f4ea8fe22a1ab16d2bb6be3dc373cbd6dfab1d905f176a70e5ac"},
@@ -2510,6 +2682,8 @@ version = "5.13.2"
 description = "A Python utility / library to sort Python imports."
 optional = false
 python-versions = ">=3.8.0"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "isort-5.13.2-py3-none-any.whl", hash = "sha256:8ca5e72a8d85860d5a3fa69b8745237f2939afe12dbf656afbcb47fe72d947a6"},
     {file = "isort-5.13.2.tar.gz", hash = "sha256:48fdfcb9face5d58a4f6dde2e72a1fb8dcaf8ab26f95ab49fab84c2ddefb0109"},
@@ -2524,6 +2698,8 @@ version = "0.19.2"
 description = "An autocompletion tool for Python that can be used for text editors."
 optional = false
 python-versions = ">=3.6"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9"},
     {file = "jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0"},
@@ -2543,6 +2719,8 @@ version = "3.1.5"
 description = "A very fast and expressive template engine."
 optional = false
 python-versions = ">=3.7"
+groups = ["main", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "jinja2-3.1.5-py3-none-any.whl", hash = "sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb"},
     {file = "jinja2-3.1.5.tar.gz", hash = "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb"},
@@ -2560,6 +2738,8 @@ version = "0.8.2"
 description = "Fast iterable JSON parser."
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"openai\""
 files = [
     {file = "jiter-0.8.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:ca8577f6a413abe29b079bc30f907894d7eb07a865c4df69475e868d73e71c7b"},
     {file = "jiter-0.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b25bd626bde7fb51534190c7e3cb97cee89ee76b76d7585580e22f34f5e3f393"},
@@ -2645,6 +2825,8 @@ version = "1.0.1"
 description = "JSON Matching Expressions"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"bedrock\""
 files = [
     {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"},
     {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"},
@@ -2656,17 +2838,38 @@ version = "1.4.2"
 description = "Lightweight pipelining with Python functions"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"sentence-transformers\""
 files = [
     {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"},
     {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"},
 ]
 
+[[package]]
+name = "jsonpath-ng"
+version = "1.7.0"
+description = "A final implementation of JSONPath for Python that aims to be standard compliant, including arithmetic and binary comparison operators and providing clear AST for metaprogramming."
+optional = false
+python-versions = "*"
+groups = ["main"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
+files = [
+    {file = "jsonpath-ng-1.7.0.tar.gz", hash = "sha256:f6f5f7fd4e5ff79c785f1573b394043b39849fb2bb47bcead935d12b00beab3c"},
+    {file = "jsonpath_ng-1.7.0-py2-none-any.whl", hash = "sha256:898c93fc173f0c336784a3fa63d7434297544b7198124a68f9a3ef9597b0ae6e"},
+    {file = "jsonpath_ng-1.7.0-py3-none-any.whl", hash = "sha256:f3d7f9e848cba1b6da28c55b1c26ff915dc9e0b1ba7e752a53d6da8d5cbd00b6"},
+]
+
+[package.dependencies]
+ply = "*"
+
 [[package]]
 name = "jsonpath-python"
 version = "1.0.6"
 description = "A more powerful JSONPath implementation in modern python"
 optional = true
 python-versions = ">=3.6"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"mistralai\""
 files = [
     {file = "jsonpath-python-1.0.6.tar.gz", hash = "sha256:dd5be4a72d8a2995c3f583cf82bf3cd1a9544cfdabf2d22595b67aff07349666"},
     {file = "jsonpath_python-1.0.6-py3-none-any.whl", hash = "sha256:1e3b78df579f5efc23565293612decee04214609208a2335884b3ee3f786b575"},
@@ -2678,6 +2881,8 @@ version = "4.23.0"
 description = "An implementation of JSON Schema validation for Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "jsonschema-4.23.0-py3-none-any.whl", hash = "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566"},
     {file = "jsonschema-4.23.0.tar.gz", hash = "sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4"},
@@ -2699,6 +2904,8 @@ version = "2024.10.1"
 description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry"
 optional = false
 python-versions = ">=3.9"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "jsonschema_specifications-2024.10.1-py3-none-any.whl", hash = "sha256:a09a0680616357d9a0ecf05c12ad234479f549239d0f5b55f3deea67475da9bf"},
     {file = "jsonschema_specifications-2024.10.1.tar.gz", hash = "sha256:0f38b83639958ce1152d02a7f062902c41c8fd20d558b0c34344292d417ae272"},
@@ -2713,6 +2920,8 @@ version = "1.0.1"
 description = "A defined interface for working with a cache of jupyter notebooks."
 optional = false
 python-versions = ">=3.9"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "jupyter_cache-1.0.1-py3-none-any.whl", hash = "sha256:9c3cafd825ba7da8b5830485343091143dff903e4d8c69db9349b728b140abf6"},
     {file = "jupyter_cache-1.0.1.tar.gz", hash = "sha256:16e808eb19e3fb67a223db906e131ea6e01f03aa27f49a7214ce6a5fec186fb9"},
@@ -2740,6 +2949,8 @@ version = "8.6.3"
 description = "Jupyter protocol implementation and client libraries"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "jupyter_client-8.6.3-py3-none-any.whl", hash = "sha256:e8a19cc986cc45905ac3362915f410f3af85424b4c0905e94fa5f2cb08e8f23f"},
     {file = "jupyter_client-8.6.3.tar.gz", hash = "sha256:35b3a0947c4a6e9d589eb97d7d4cd5e90f910ee73101611f01283732bd6d9419"},
@@ -2763,6 +2974,8 @@ version = "5.7.2"
 description = "Jupyter core package. A base package on which Jupyter projects rely."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "jupyter_core-5.7.2-py3-none-any.whl", hash = "sha256:4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409"},
     {file = "jupyter_core-5.7.2.tar.gz", hash = "sha256:aa5f8d32bbf6b431ac830496da7392035d6f61b4f54872f15c4bd2a9c3f536d9"},
@@ -2783,6 +2996,8 @@ version = "0.3.0"
 description = "Pygments theme using JupyterLab CSS variables"
 optional = false
 python-versions = ">=3.8"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "jupyterlab_pygments-0.3.0-py3-none-any.whl", hash = "sha256:841a89020971da1d8693f1a99997aefc5dc424bb1b251fd6322462a1b8842780"},
     {file = "jupyterlab_pygments-0.3.0.tar.gz", hash = "sha256:721aca4d9029252b11cfa9d185e5b5af4d54772bb8072f9b7036f4170054d35d"},
@@ -2794,6 +3009,8 @@ version = "1.4.8"
 description = "A fast implementation of the Cassowary constraint solver"
 optional = true
 python-versions = ">=3.10"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "kiwisolver-1.4.8-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:88c6f252f6816a73b1f8c904f7bbe02fd67c09a69f7cb8a0eecdbf5ce78e63db"},
     {file = "kiwisolver-1.4.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c72941acb7b67138f35b879bbe85be0f6c6a70cab78fe3ef6db9c024d9223e5b"},
@@ -2883,6 +3100,8 @@ version = "0.44.0"
 description = "lightweight wrapper around basic LLVM functionality"
 optional = true
 python-versions = ">=3.10"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "llvmlite-0.44.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:9fbadbfba8422123bab5535b293da1cf72f9f478a65645ecd73e781f962ca614"},
     {file = "llvmlite-0.44.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cccf8eb28f24840f2689fb1a45f9c0f7e582dd24e088dcf96e424834af11f791"},
@@ -2913,6 +3132,8 @@ version = "5.3.1"
 description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
 optional = true
 python-versions = ">=3.6"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "lxml-5.3.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a4058f16cee694577f7e4dd410263cd0ef75644b43802a689c2b3c2a7e69453b"},
     {file = "lxml-5.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:364de8f57d6eda0c16dcfb999af902da31396949efa0e583e12675d09709881b"},
@@ -3067,6 +3288,8 @@ version = "4.4.3"
 description = "LZ4 Bindings for Python"
 optional = true
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "lz4-4.4.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1ebf23ffd36b32b980f720a81990fcfdeadacafe7498fbeff7a8e058259d4e58"},
     {file = "lz4-4.4.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8fe3caea61427057a9e3697c69b2403510fdccfca4483520d02b98ffae74531e"},
@@ -3112,10 +3335,12 @@ version = "3.0.0"
 description = "Python port of markdown-it. Markdown parsing, done right!"
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "docs"]
 files = [
     {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"},
     {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"},
 ]
+markers = {main = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\"", docs = "python_version <= \"3.11\" or python_version >= \"3.12\""}
 
 [package.dependencies]
 mdurl = ">=0.1,<1.0"
@@ -3136,6 +3361,8 @@ version = "3.0.2"
 description = "Safely add untrusted strings to HTML/XML markup."
 optional = false
 python-versions = ">=3.9"
+groups = ["main", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"},
     {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"},
@@ -3206,6 +3433,8 @@ version = "3.10.1"
 description = "Python plotting package"
 optional = true
 python-versions = ">=3.10"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "matplotlib-3.10.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:ff2ae14910be903f4a24afdbb6d7d3a6c44da210fc7d42790b87aeac92238a16"},
     {file = "matplotlib-3.10.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0721a3fd3d5756ed593220a8b86808a36c5031fce489adb5b31ee6dbb47dd5b2"},
@@ -3263,6 +3492,8 @@ version = "0.1.7"
 description = "Inline Matplotlib backend for Jupyter"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca"},
     {file = "matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90"},
@@ -3277,6 +3508,8 @@ version = "0.7.0"
 description = "McCabe checker, plugin for flake8"
 optional = false
 python-versions = ">=3.6"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"},
     {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
@@ -3288,6 +3521,8 @@ version = "0.4.2"
 description = "Collection of plugins for markdown-it-py"
 optional = false
 python-versions = ">=3.8"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "mdit_py_plugins-0.4.2-py3-none-any.whl", hash = "sha256:0c673c3f889399a33b95e88d2f0d111b4447bdfea7f237dab2d488f459835636"},
     {file = "mdit_py_plugins-0.4.2.tar.gz", hash = "sha256:5f2cd1fdb606ddf152d37ec30e46101a60512bc0e5fa1a7002c36647b09e26b5"},
@@ -3307,10 +3542,12 @@ version = "0.1.2"
 description = "Markdown URL utilities"
 optional = false
 python-versions = ">=3.7"
+groups = ["main", "docs"]
 files = [
     {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"},
     {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
 ]
+markers = {main = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\"", docs = "python_version <= \"3.11\" or python_version >= \"3.12\""}
 
 [[package]]
 name = "mistralai"
@@ -3318,6 +3555,8 @@ version = "1.5.0"
 description = "Python Client SDK for the Mistral AI API."
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"mistralai\""
 files = [
     {file = "mistralai-1.5.0-py3-none-any.whl", hash = "sha256:9372537719f87bd6f9feef4747d0bf1f4fbe971f8c02945ca4b4bf3c94571c97"},
     {file = "mistralai-1.5.0.tar.gz", hash = "sha256:fd94bc93bc25aad9c6dd8005b1a0bc4ba1250c6b3fbf855a49936989cc6e5c0d"},
@@ -3340,6 +3579,8 @@ version = "3.1.2"
 description = "A sane and fast Markdown parser with useful plugins and renderers"
 optional = false
 python-versions = ">=3.8"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "mistune-3.1.2-py3-none-any.whl", hash = "sha256:4b47731332315cdca99e0ded46fc0004001c1299ff773dfb48fbe1fd226de319"},
     {file = "mistune-3.1.2.tar.gz", hash = "sha256:733bf018ba007e8b5f2d3a9eb624034f6ee26c4ea769a98ec533ee111d504dff"},
@@ -3354,6 +3595,8 @@ version = "0.4.1"
 description = ""
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "ml_dtypes-0.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1fe8b5b5e70cd67211db94b05cfd58dace592f24489b038dc6f9fe347d2e07d5"},
     {file = "ml_dtypes-0.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c09a6d11d8475c2a9fd2bc0695628aec105f97cab3b3a3fb7c9660348ff7d24"},
@@ -3377,8 +3620,8 @@ files = [
 [package.dependencies]
 numpy = [
     {version = ">1.20", markers = "python_version < \"3.10\""},
-    {version = ">=1.23.3", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
     {version = ">=1.21.2", markers = "python_version >= \"3.10\" and python_version < \"3.11\""},
+    {version = ">=1.23.3", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
     {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
 ]
 
@@ -3391,6 +3634,8 @@ version = "1.3.0"
 description = "Python library for arbitrary-precision floating-point arithmetic"
 optional = true
 python-versions = "*"
+groups = ["main"]
+markers = "extra == \"sentence-transformers\" and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"},
     {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"},
@@ -3408,6 +3653,8 @@ version = "6.1.0"
 description = "multidict implementation"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"voyageai\""
 files = [
     {file = "multidict-6.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3380252550e372e8511d49481bd836264c009adb826b23fefcc5dd3c69692f60"},
     {file = "multidict-6.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:99f826cbf970077383d7de805c0681799491cb939c25450b9b5b3ced03ca99f1"},
@@ -3512,6 +3759,8 @@ version = "1.9.0"
 description = "Optional static typing for Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "mypy-1.9.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f8a67616990062232ee4c3952f41c779afac41405806042a8126fe96e098419f"},
     {file = "mypy-1.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d357423fa57a489e8c47b7c85dfb96698caba13d66e086b412298a1a0ea3b0ed"},
@@ -3559,10 +3808,12 @@ version = "1.0.0"
 description = "Type system extensions for programs checked with the mypy type checker."
 optional = false
 python-versions = ">=3.5"
+groups = ["main", "dev"]
 files = [
     {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
     {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
 ]
+markers = {main = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"mistralai\"", dev = "python_version <= \"3.11\" or python_version >= \"3.12\""}
 
 [[package]]
 name = "myst-nb"
@@ -3570,6 +3821,8 @@ version = "1.2.0"
 description = "A Jupyter Notebook Sphinx reader built on top of the MyST markdown parser."
 optional = false
 python-versions = ">=3.9"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "myst_nb-1.2.0-py3-none-any.whl", hash = "sha256:0e09909877848c0cf45e1aecee97481512efa29a0c4caa37870a03bba11c56c1"},
     {file = "myst_nb-1.2.0.tar.gz", hash = "sha256:af459ec753b341952182b45b0a80b4776cebf80c9ee6aaca2a3f4027b440c9de"},
@@ -3598,6 +3851,8 @@ version = "3.0.1"
 description = "An extended [CommonMark](https://spec.commonmark.org/) compliant parser,"
 optional = false
 python-versions = ">=3.8"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "myst_parser-3.0.1-py3-none-any.whl", hash = "sha256:6457aaa33a5d474aca678b8ead9b3dc298e89c68e67012e73146ea6fd54babf1"},
     {file = "myst_parser-3.0.1.tar.gz", hash = "sha256:88f0cb406cb363b077d176b51c476f62d60604d68a8dcdf4832e080441301a87"},
@@ -3624,6 +3879,8 @@ version = "0.10.2"
 description = "A client library for executing notebooks. Formerly nbconvert's ExecutePreprocessor."
 optional = false
 python-versions = ">=3.9.0"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "nbclient-0.10.2-py3-none-any.whl", hash = "sha256:4ffee11e788b4a27fabeb7955547e4318a5298f34342a4bfd01f2e1faaeadc3d"},
     {file = "nbclient-0.10.2.tar.gz", hash = "sha256:90b7fc6b810630db87a6d0c2250b1f0ab4cf4d3c27a299b0cde78a4ed3fd9193"},
@@ -3646,6 +3903,8 @@ version = "7.16.6"
 description = "Converting Jupyter Notebooks (.ipynb files) to other formats.  Output formats include asciidoc, html, latex, markdown, pdf, py, rst, script.  nbconvert can be used both as a Python library (`import nbconvert`) or as a command line tool (invoked as `jupyter nbconvert ...`)."
 optional = false
 python-versions = ">=3.8"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "nbconvert-7.16.6-py3-none-any.whl", hash = "sha256:1375a7b67e0c2883678c48e506dc320febb57685e5ee67faa51b18a90f3a712b"},
     {file = "nbconvert-7.16.6.tar.gz", hash = "sha256:576a7e37c6480da7b8465eefa66c17844243816ce1ccc372633c6b71c3c0f582"},
@@ -3683,6 +3942,8 @@ version = "5.10.4"
 description = "The Jupyter Notebook format"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "nbformat-5.10.4-py3-none-any.whl", hash = "sha256:3b48d6c8fbca4b299bf3982ea7db1af21580e4fec269ad087b9e81588891200b"},
     {file = "nbformat-5.10.4.tar.gz", hash = "sha256:322168b14f937a5d11362988ecac2a4952d3d8e3a2cbeb2319584631226d5b3a"},
@@ -3704,6 +3965,8 @@ version = "0.9.6"
 description = "Jupyter Notebook Tools for Sphinx"
 optional = false
 python-versions = ">=3.6"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "nbsphinx-0.9.6-py3-none-any.whl", hash = "sha256:336b0b557945a7678ec7449b16449f854bc852a435bb53b8a72e6b5dc740d992"},
     {file = "nbsphinx-0.9.6.tar.gz", hash = "sha256:c2b28a2d702f1159a95b843831798e86e60a17fc647b9bff9ba1585355de54e3"},
@@ -3723,6 +3986,8 @@ version = "0.11.0"
 description = "A py.test plugin to validate Jupyter notebooks"
 optional = false
 python-versions = ">=3.7, <4"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "nbval-0.11.0-py2.py3-none-any.whl", hash = "sha256:307aecc866c9a1e8a13bb5bbb008a702bacfda2394dff6fe504a3108a58042a0"},
     {file = "nbval-0.11.0.tar.gz", hash = "sha256:77c95797607b0a968babd2597ee3494102d25c3ad37435debbdac0e46e379094"},
@@ -3741,6 +4006,8 @@ version = "1.6.0"
 description = "Patch asyncio to allow nested event loops"
 optional = false
 python-versions = ">=3.5"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c"},
     {file = "nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe"},
@@ -3752,6 +4019,8 @@ version = "3.2.1"
 description = "Python package for creating and manipulating graphs and networks"
 optional = true
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"sentence-transformers\""
 files = [
     {file = "networkx-3.2.1-py3-none-any.whl", hash = "sha256:f18c69adc97877c42332c170849c96cefa91881c99a7cb3e95b7c659ebdc1ec2"},
     {file = "networkx-3.2.1.tar.gz", hash = "sha256:9f1bb5cf3409bf324e0a722c20bdb4c20ee39bf1c30ce8ae499c8502b0b5e0c6"},
@@ -3770,6 +4039,8 @@ version = "1.9.1"
 description = "Node.js virtual environment builder"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"},
     {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"},
@@ -3781,6 +4052,8 @@ version = "0.61.0"
 description = "compiling Python code using LLVM"
 optional = true
 python-versions = ">=3.10"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "numba-0.61.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:9cab9783a700fa428b1a54d65295122bc03b3de1d01fb819a6b9dbbddfdb8c43"},
     {file = "numba-0.61.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:46c5ae094fb3706f5adf9021bfb7fc11e44818d61afee695cdee4eadfed45e98"},
@@ -3815,6 +4088,8 @@ version = "1.26.4"
 description = "Fundamental package for array computing in Python"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "python_version <= \"3.11\""
 files = [
     {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"},
     {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"},
@@ -3860,6 +4135,8 @@ version = "2.1.3"
 description = "Fundamental package for array computing in Python"
 optional = false
 python-versions = ">=3.10"
+groups = ["main"]
+markers = "python_version >= \"3.12\""
 files = [
     {file = "numpy-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c894b4305373b9c5576d7a12b473702afdf48ce5369c074ba304cc5ad8730dff"},
     {file = "numpy-2.1.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b47fbb433d3260adcd51eb54f92a2ffbc90a4595f8970ee00e064c644ac788f5"},
@@ -3924,6 +4201,8 @@ version = "12.4.5.8"
 description = "CUBLAS native runtime libraries"
 optional = true
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"sentence-transformers\" and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0f8aa1706812e00b9f19dfe0cdb3999b092ccb8ca168c0db5b8ea712456fd9b3"},
     {file = "nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl", hash = "sha256:2fc8da60df463fdefa81e323eef2e36489e1c94335b5358bcb38360adf75ac9b"},
@@ -3936,6 +4215,8 @@ version = "12.4.127"
 description = "CUDA profiling tools runtime libs."
 optional = true
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"sentence-transformers\" and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:79279b35cf6f91da114182a5ce1864997fd52294a87a16179ce275773799458a"},
     {file = "nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9dec60f5ac126f7bb551c055072b69d85392b13311fcc1bcda2202d172df30fb"},
@@ -3948,6 +4229,8 @@ version = "12.4.127"
 description = "NVRTC native runtime libraries"
 optional = true
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"sentence-transformers\" and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0eedf14185e04b76aa05b1fea04133e59f465b6f960c0cbf4e37c3cb6b0ea198"},
     {file = "nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a178759ebb095827bd30ef56598ec182b85547f1508941a3d560eb7ea1fbf338"},
@@ -3960,6 +4243,8 @@ version = "12.4.127"
 description = "CUDA Runtime native Libraries"
 optional = true
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"sentence-transformers\" and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:961fe0e2e716a2a1d967aab7caee97512f71767f852f67432d572e36cb3a11f3"},
     {file = "nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:64403288fa2136ee8e467cdc9c9427e0434110899d07c779f25b5c068934faa5"},
@@ -3972,6 +4257,8 @@ version = "9.1.0.70"
 description = "cuDNN runtime libraries"
 optional = true
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"sentence-transformers\" and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f"},
     {file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-win_amd64.whl", hash = "sha256:6278562929433d68365a07a4a1546c237ba2849852c0d4b2262a486e805b977a"},
@@ -3986,6 +4273,8 @@ version = "11.2.1.3"
 description = "CUFFT native runtime libraries"
 optional = true
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"sentence-transformers\" and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5dad8008fc7f92f5ddfa2101430917ce2ffacd86824914c82e28990ad7f00399"},
     {file = "nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f083fc24912aa410be21fa16d157fed2055dab1cc4b6934a0e03cba69eb242b9"},
@@ -4001,6 +4290,8 @@ version = "10.3.5.147"
 description = "CURAND native runtime libraries"
 optional = true
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"sentence-transformers\" and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1f173f09e3e3c76ab084aba0de819c49e56614feae5c12f69883f4ae9bb5fad9"},
     {file = "nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a88f583d4e0bb643c49743469964103aa59f7f708d862c3ddb0fc07f851e3b8b"},
@@ -4013,6 +4304,8 @@ version = "11.6.1.9"
 description = "CUDA solver native runtime libraries"
 optional = true
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"sentence-transformers\" and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d338f155f174f90724bbde3758b7ac375a70ce8e706d70b018dd3375545fc84e"},
     {file = "nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl", hash = "sha256:19e33fa442bcfd085b3086c4ebf7e8debc07cfe01e11513cc6d332fd918ac260"},
@@ -4030,6 +4323,8 @@ version = "12.3.1.170"
 description = "CUSPARSE native runtime libraries"
 optional = true
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"sentence-transformers\" and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_aarch64.whl", hash = "sha256:9d32f62896231ebe0480efd8a7f702e143c98cfaa0e8a76df3386c1ba2b54df3"},
     {file = "nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ea4f11a2904e2a8dc4b1833cc1b5181cde564edd0d5cd33e3c168eff2d1863f1"},
@@ -4045,6 +4340,8 @@ version = "0.6.2"
 description = "NVIDIA cuSPARSELt"
 optional = true
 python-versions = "*"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"sentence-transformers\" and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "nvidia_cusparselt_cu12-0.6.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:067a7f6d03ea0d4841c85f0c6f1991c5dda98211f6302cb83a4ab234ee95bef8"},
     {file = "nvidia_cusparselt_cu12-0.6.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:df2c24502fd76ebafe7457dbc4716b2fec071aabaed4fb7691a201cde03704d9"},
@@ -4057,6 +4354,8 @@ version = "2.21.5"
 description = "NVIDIA Collective Communication Library (NCCL) Runtime"
 optional = true
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"sentence-transformers\" and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:8579076d30a8c24988834445f8d633c697d42397e92ffc3f63fa26766d25e0a0"},
 ]
@@ -4067,7 +4366,10 @@ version = "12.4.127"
 description = "Nvidia JIT LTO Library"
 optional = true
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"sentence-transformers\" and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
+    {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:4abe7fef64914ccfa909bc2ba39739670ecc9e820c83ccc7a6ed414122599b83"},
     {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:06b3b9b25bf3f8af351d664978ca26a16d2c5127dbd53c0497e28d1fb9611d57"},
     {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:fd9020c501d27d135f983c6d3e244b197a7ccad769e34df53a42e276b0e25fa1"},
 ]
@@ -4078,6 +4380,8 @@ version = "12.4.127"
 description = "NVIDIA Tools Extension"
 optional = true
 python-versions = ">=3"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"sentence-transformers\" and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7959ad635db13edf4fc65c06a6e9f9e55fc2f92596db928d169c0bb031e88ef3"},
     {file = "nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:781e950d9b9f60d8241ccea575b32f5105a5baf4c2351cab5256a24869f12a1a"},
@@ -4090,6 +4394,8 @@ version = "1.65.1"
 description = "The official Python library for the openai API"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"openai\""
 files = [
     {file = "openai-1.65.1-py3-none-any.whl", hash = "sha256:396652a6452dd42791b3ad8a3aab09b1feb7c1c4550a672586fb300760a8e204"},
     {file = "openai-1.65.1.tar.gz", hash = "sha256:9d9370a20d2b8c3ce319fd2194c2eef5eab59effbcc5b04ff480977edc530fba"},
@@ -4115,6 +4421,8 @@ version = "3.10.15"
 description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "orjson-3.10.15-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:552c883d03ad185f720d0c09583ebde257e41b9521b74ff40e08b7dec4559c04"},
     {file = "orjson-3.10.15-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:616e3e8d438d02e4854f70bfdc03a6bcdb697358dbaa6bcd19cbe24d24ece1f8"},
@@ -4203,10 +4511,12 @@ version = "24.2"
 description = "Core utilities for Python packages"
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "dev", "docs"]
 files = [
     {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
     {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
 ]
+markers = {main = "(python_version <= \"3.11\" or python_version >= \"3.12\") and (extra == \"sentence-transformers\" or extra == \"cohere\" or extra == \"vertexai\" or extra == \"ranx\") and (extra == \"sentence-transformers\" or extra == \"cohere\" or extra == \"vertexai\" or python_version >= \"3.10\")", dev = "python_version <= \"3.11\" or python_version >= \"3.12\"", docs = "python_version <= \"3.11\" or python_version >= \"3.12\""}
 
 [[package]]
 name = "pandas"
@@ -4214,6 +4524,8 @@ version = "2.2.3"
 description = "Powerful data structures for data analysis, time series, and statistics"
 optional = true
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5"},
     {file = "pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348"},
@@ -4300,6 +4612,8 @@ version = "1.5.1"
 description = "Utilities for writing pandoc filters in python"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "pandocfilters-1.5.1-py2.py3-none-any.whl", hash = "sha256:93be382804a9cdb0a7267585f157e5d1731bbe5545a85b268d6f5fe6232de2bc"},
     {file = "pandocfilters-1.5.1.tar.gz", hash = "sha256:002b4a555ee4ebc03f8b66307e287fa492e4a77b4ea14d3f934328297bb4939e"},
@@ -4311,6 +4625,8 @@ version = "0.8.4"
 description = "A Python Parser"
 optional = false
 python-versions = ">=3.6"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18"},
     {file = "parso-0.8.4.tar.gz", hash = "sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d"},
@@ -4326,6 +4642,8 @@ version = "0.12.1"
 description = "Utility library for gitignore style pattern matching of file paths."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
     {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
@@ -4337,6 +4655,8 @@ version = "4.9.0"
 description = "Pexpect allows easy control of interactive console applications."
 optional = false
 python-versions = "*"
+groups = ["dev", "docs"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and sys_platform != \"win32\""
 files = [
     {file = "pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523"},
     {file = "pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f"},
@@ -4351,6 +4671,8 @@ version = "11.1.0"
 description = "Python Imaging Library (Fork)"
 optional = true
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and (extra == \"sentence-transformers\" or extra == \"ranx\") and (extra == \"sentence-transformers\" or python_version >= \"3.10\")"
 files = [
     {file = "pillow-11.1.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:e1abe69aca89514737465752b4bcaf8016de61b3be1397a8fc260ba33321b3a8"},
     {file = "pillow-11.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c640e5a06869c75994624551f45e5506e4256562ead981cce820d5ab39ae2192"},
@@ -4439,6 +4761,8 @@ version = "4.3.6"
 description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"},
     {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"},
@@ -4455,6 +4779,8 @@ version = "1.5.0"
 description = "plugin and hook calling mechanisms for python"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"},
     {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"},
@@ -4464,12 +4790,27 @@ files = [
 dev = ["pre-commit", "tox"]
 testing = ["pytest", "pytest-benchmark"]
 
+[[package]]
+name = "ply"
+version = "3.11"
+description = "Python Lex & Yacc"
+optional = false
+python-versions = "*"
+groups = ["main"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
+files = [
+    {file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"},
+    {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"},
+]
+
 [[package]]
 name = "pre-commit"
 version = "4.1.0"
 description = "A framework for managing and maintaining multi-language pre-commit hooks."
 optional = false
 python-versions = ">=3.9"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "pre_commit-4.1.0-py2.py3-none-any.whl", hash = "sha256:d29e7cb346295bcc1cc75fc3e92e343495e3ea0196c9ec6ba53f49f10ab6ae7b"},
     {file = "pre_commit-4.1.0.tar.gz", hash = "sha256:ae3f018575a588e30dfddfab9a05448bfbd6b73d78709617b5a2b853549716d4"},
@@ -4488,6 +4829,8 @@ version = "3.0.50"
 description = "Library for building powerful interactive command lines in Python"
 optional = false
 python-versions = ">=3.8.0"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "prompt_toolkit-3.0.50-py3-none-any.whl", hash = "sha256:9b6427eb19e479d98acff65196a307c555eb567989e6d88ebbb1b509d9779198"},
     {file = "prompt_toolkit-3.0.50.tar.gz", hash = "sha256:544748f3860a2623ca5cd6d2795e7a14f3d0e1c3c9728359013f79877fc89bab"},
@@ -4502,6 +4845,8 @@ version = "0.3.0"
 description = "Accelerated property cache"
 optional = true
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"voyageai\""
 files = [
     {file = "propcache-0.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:efa44f64c37cc30c9f05932c740a8b40ce359f51882c70883cc95feac842da4d"},
     {file = "propcache-0.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2383a17385d9800b6eb5855c2f05ee550f803878f344f58b6e194de08b96352c"},
@@ -4609,6 +4954,8 @@ version = "1.26.0"
 description = "Beautiful, Pythonic protocol buffers"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"vertexai\""
 files = [
     {file = "proto_plus-1.26.0-py3-none-any.whl", hash = "sha256:bf2dfaa3da281fc3187d12d224c707cb57214fb2c22ba854eb0c105a3fb2d4d7"},
     {file = "proto_plus-1.26.0.tar.gz", hash = "sha256:6e93d5f5ca267b54300880fff156b6a3386b3fa3f43b1da62e680fc0c586ef22"},
@@ -4626,6 +4973,8 @@ version = "5.29.3"
 description = ""
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"vertexai\""
 files = [
     {file = "protobuf-5.29.3-cp310-abi3-win32.whl", hash = "sha256:3ea51771449e1035f26069c4c7fd51fba990d07bc55ba80701c78f886bf9c888"},
     {file = "protobuf-5.29.3-cp310-abi3-win_amd64.whl", hash = "sha256:a4fa6f80816a9a0678429e84973f2f98cbc218cca434abe8db2ad0bffc98503a"},
@@ -4646,6 +4995,8 @@ version = "7.0.0"
 description = "Cross-platform lib for process and system monitoring in Python.  NOTE: the syntax of this script MUST be kept compatible with Python 2.7."
 optional = false
 python-versions = ">=3.6"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25"},
     {file = "psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da"},
@@ -4669,6 +5020,8 @@ version = "0.7.0"
 description = "Run a subprocess in a pseudo terminal"
 optional = false
 python-versions = "*"
+groups = ["dev", "docs"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and sys_platform != \"win32\""
 files = [
     {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"},
     {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"},
@@ -4680,6 +5033,8 @@ version = "0.2.3"
 description = "Safely evaluate AST nodes without side effects"
 optional = false
 python-versions = "*"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0"},
     {file = "pure_eval-0.2.3.tar.gz", hash = "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42"},
@@ -4694,6 +5049,8 @@ version = "0.6.1"
 description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"vertexai\""
 files = [
     {file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"},
     {file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"},
@@ -4705,6 +5062,8 @@ version = "0.4.1"
 description = "A collection of ASN.1-based protocols modules"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"vertexai\""
 files = [
     {file = "pyasn1_modules-0.4.1-py3-none-any.whl", hash = "sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd"},
     {file = "pyasn1_modules-0.4.1.tar.gz", hash = "sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c"},
@@ -4719,10 +5078,12 @@ version = "2.22"
 description = "C parser in Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev", "docs"]
 files = [
     {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"},
     {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"},
 ]
+markers = {dev = "(implementation_name == \"pypy\" or platform_python_implementation != \"PyPy\") and (python_version <= \"3.11\" or python_version >= \"3.12\")", docs = "(python_version <= \"3.11\" or python_version >= \"3.12\") and implementation_name == \"pypy\""}
 
 [[package]]
 name = "pydantic"
@@ -4730,6 +5091,8 @@ version = "2.10.6"
 description = "Data validation using Python type hints"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "pydantic-2.10.6-py3-none-any.whl", hash = "sha256:427d664bf0b8a2b34ff5dd0f5a18df00591adcee7198fbd71981054cef37b584"},
     {file = "pydantic-2.10.6.tar.gz", hash = "sha256:ca5daa827cce33de7a42be142548b0096bf05a7e7b365aebfa5f8eeec7128236"},
@@ -4750,6 +5113,8 @@ version = "2.27.2"
 description = "Core functionality for Pydantic validation and serialization"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "pydantic_core-2.27.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2d367ca20b2f14095a8f4fa1210f5a7b78b8a20009ecced6b12818f455b1e9fa"},
     {file = "pydantic_core-2.27.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:491a2b73db93fab69731eaee494f320faa4e093dbed776be1a829c2eb222c34c"},
@@ -4862,6 +5227,8 @@ version = "0.15.4"
 description = "Bootstrap-based Sphinx theme from the PyData community"
 optional = false
 python-versions = ">=3.9"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "pydata_sphinx_theme-0.15.4-py3-none-any.whl", hash = "sha256:2136ad0e9500d0949f96167e63f3e298620040aea8f9c74621959eda5d4cf8e6"},
     {file = "pydata_sphinx_theme-0.15.4.tar.gz", hash = "sha256:7762ec0ac59df3acecf49fd2f889e1b4565dbce8b88b2e29ee06fdd90645a06d"},
@@ -4890,10 +5257,12 @@ version = "2.19.1"
 description = "Pygments is a syntax highlighting package written in Python."
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "dev", "docs"]
 files = [
     {file = "pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c"},
     {file = "pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f"},
 ]
+markers = {main = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\"", dev = "python_version <= \"3.11\" or python_version >= \"3.12\"", docs = "python_version <= \"3.11\" or python_version >= \"3.12\""}
 
 [package.extras]
 windows-terminal = ["colorama (>=0.4.6)"]
@@ -4904,6 +5273,8 @@ version = "3.3.4"
 description = "python code static checker"
 optional = false
 python-versions = ">=3.9.0"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "pylint-3.3.4-py3-none-any.whl", hash = "sha256:289e6a1eb27b453b08436478391a48cd53bb0efb824873f949e709350f3de018"},
     {file = "pylint-3.3.4.tar.gz", hash = "sha256:74ae7a38b177e69a9b525d0794bd8183820bfa7eb68cc1bee6e8ed22a42be4ce"},
@@ -4934,6 +5305,8 @@ version = "3.2.1"
 description = "pyparsing module - Classes and methods to define and execute parsing grammars"
 optional = true
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "pyparsing-3.2.1-py3-none-any.whl", hash = "sha256:506ff4f4386c4cec0590ec19e6302d3aedb992fdc02c761e90416f158dacf8e1"},
     {file = "pyparsing-3.2.1.tar.gz", hash = "sha256:61980854fd66de3a90028d679a954d5f2623e83144b5afe5ee86f43d762e5f0a"},
@@ -4948,6 +5321,8 @@ version = "3.5.4"
 description = "A python implementation of GNU readline."
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "sys_platform == \"win32\" and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6"},
     {file = "pyreadline3-3.5.4.tar.gz", hash = "sha256:8d57d53039a1c75adba8e50dd3d992b28143480816187ea5efbd5c78e6c885b7"},
@@ -4962,6 +5337,8 @@ version = "8.3.4"
 description = "pytest: simple powerful testing with Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6"},
     {file = "pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761"},
@@ -4984,6 +5361,8 @@ version = "0.23.8"
 description = "Pytest support for asyncio"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "pytest_asyncio-0.23.8-py3-none-any.whl", hash = "sha256:50265d892689a5faefb84df80819d1ecef566eb3549cf915dfb33569359d1ce2"},
     {file = "pytest_asyncio-0.23.8.tar.gz", hash = "sha256:759b10b33a6dc61cce40a8bd5205e302978bbbcc00e279a8b61d9a6a3c82e4d3"},
@@ -5002,6 +5381,8 @@ version = "3.6.1"
 description = "pytest xdist plugin for distributed testing, most importantly across multiple CPUs"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "pytest_xdist-3.6.1-py3-none-any.whl", hash = "sha256:9ed4adfb68a016610848639bb7e02c9352d5d9f03d04809919e2dafc3be4cca7"},
     {file = "pytest_xdist-3.6.1.tar.gz", hash = "sha256:ead156a4db231eec769737f57668ef58a2084a34b2e55c4a8fa20d861107300d"},
@@ -5023,10 +5404,12 @@ version = "2.9.0.post0"
 description = "Extensions to the standard Python datetime module"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
+groups = ["main", "dev", "docs"]
 files = [
     {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
     {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
 ]
+markers = {main = "(python_version <= \"3.11\" or python_version >= \"3.12\") and (extra == \"vertexai\" or extra == \"mistralai\" or extra == \"bedrock\" or extra == \"ranx\") and (extra == \"vertexai\" or extra == \"mistralai\" or extra == \"bedrock\" or python_version >= \"3.10\")", dev = "python_version <= \"3.11\" or python_version >= \"3.12\"", docs = "python_version <= \"3.11\" or python_version >= \"3.12\""}
 
 [package.dependencies]
 six = ">=1.5"
@@ -5037,6 +5420,8 @@ version = "1.0.1"
 description = "Read key-value pairs from a .env file and set them as environment variables"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"},
     {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"},
@@ -5051,6 +5436,8 @@ version = "3.0.0"
 description = "Universally unique lexicographically sortable identifier"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "python_ulid-3.0.0-py3-none-any.whl", hash = "sha256:e4c4942ff50dbd79167ad01ac725ec58f924b4018025ce22c858bfcff99a5e31"},
     {file = "python_ulid-3.0.0.tar.gz", hash = "sha256:e50296a47dc8209d28629a22fc81ca26c00982c78934bd7766377ba37ea49a9f"},
@@ -5065,6 +5452,8 @@ version = "2025.1"
 description = "World timezone definitions, modern and historical"
 optional = true
 python-versions = "*"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "pytz-2025.1-py2.py3-none-any.whl", hash = "sha256:89dd22dca55b46eac6eda23b2d72721bf1bdfef212645d81513ef5d03038de57"},
     {file = "pytz-2025.1.tar.gz", hash = "sha256:c2db42be2a2518b28e65f9207c4d05e6ff547d1efa4086469ef855e4ab70178e"},
@@ -5076,6 +5465,7 @@ version = "308"
 description = "Python for Window Extensions"
 optional = false
 python-versions = "*"
+groups = ["dev", "docs"]
 files = [
     {file = "pywin32-308-cp310-cp310-win32.whl", hash = "sha256:796ff4426437896550d2981b9c2ac0ffd75238ad9ea2d3bfa67a1abd546d262e"},
     {file = "pywin32-308-cp310-cp310-win_amd64.whl", hash = "sha256:4fc888c59b3c0bef905ce7eb7e2106a07712015ea1c8234b703a088d46110e8e"},
@@ -5096,6 +5486,7 @@ files = [
     {file = "pywin32-308-cp39-cp39-win32.whl", hash = "sha256:7873ca4dc60ab3287919881a7d4f88baee4a6e639aa6962de25a98ba6b193341"},
     {file = "pywin32-308-cp39-cp39-win_amd64.whl", hash = "sha256:71b3322d949b4cc20776436a9c9ba0eeedcbc9c650daa536df63f0ff111bb920"},
 ]
+markers = {dev = "(python_version <= \"3.11\" or python_version >= \"3.12\") and sys_platform == \"win32\"", docs = "sys_platform == \"win32\" and platform_python_implementation != \"PyPy\" and (python_version <= \"3.11\" or python_version >= \"3.12\")"}
 
 [[package]]
 name = "pyyaml"
@@ -5103,6 +5494,8 @@ version = "6.0.2"
 description = "YAML parser and emitter for Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"},
     {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"},
@@ -5165,6 +5558,8 @@ version = "26.2.1"
 description = "Python bindings for 0MQ"
 optional = false
 python-versions = ">=3.7"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "pyzmq-26.2.1-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:f39d1227e8256d19899d953e6e19ed2ccb689102e6d85e024da5acf410f301eb"},
     {file = "pyzmq-26.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a23948554c692df95daed595fdd3b76b420a4939d7a8a28d6d7dea9711878641"},
@@ -5286,6 +5681,8 @@ version = "0.3.20"
 description = "ranx: A Blazing-Fast Python Library for Ranking Evaluation, Comparison, and Fusion"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "ranx-0.3.20-py3-none-any.whl", hash = "sha256:e056e4d5981b0328b045868cc7064fc57a545f36009fbe9bb602295ec33335de"},
     {file = "ranx-0.3.20.tar.gz", hash = "sha256:8afc6f2042c40645e5d1fd80c35ed75a885e18bd2db7e95cc7ec32a0b41e59ea"},
@@ -5312,6 +5709,8 @@ version = "5.2.1"
 description = "Python client for Redis database and key-value store"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "redis-5.2.1-py3-none-any.whl", hash = "sha256:ee7e1056b9aea0f04c6c2ed59452947f34c4940ee025f5dd83e6a6418b6989e4"},
     {file = "redis-5.2.1.tar.gz", hash = "sha256:16f2e22dff21d5125e8481515e386711a34cbec50f0e44413dd7d9c060a54e0f"},
@@ -5330,6 +5729,8 @@ version = "0.36.2"
 description = "JSON Referencing + Python"
 optional = false
 python-versions = ">=3.9"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0"},
     {file = "referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa"},
@@ -5346,6 +5747,8 @@ version = "2024.11.6"
 description = "Alternative regular expression module, to replace re."
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"sentence-transformers\""
 files = [
     {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"},
     {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0"},
@@ -5449,10 +5852,12 @@ version = "2.32.3"
 description = "Python HTTP for Humans."
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "dev", "docs"]
 files = [
     {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
     {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
 ]
+markers = {main = "(python_version <= \"3.11\" or python_version >= \"3.12\") and (extra == \"sentence-transformers\" or extra == \"cohere\" or extra == \"vertexai\" or extra == \"voyageai\" or extra == \"ranx\") and (extra == \"sentence-transformers\" or extra == \"cohere\" or extra == \"vertexai\" or extra == \"voyageai\" or python_version >= \"3.10\")", dev = "python_version <= \"3.11\" or python_version >= \"3.12\"", docs = "python_version <= \"3.11\" or python_version >= \"3.12\""}
 
 [package.dependencies]
 certifi = ">=2017.4.17"
@@ -5470,6 +5875,8 @@ version = "13.9.4"
 description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
 optional = true
 python-versions = ">=3.8.0"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "rich-13.9.4-py3-none-any.whl", hash = "sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90"},
     {file = "rich-13.9.4.tar.gz", hash = "sha256:439594978a49a09530cff7ebc4b5c7103ef57baf48d5ea3184f21d9a2befa098"},
@@ -5489,6 +5896,8 @@ version = "0.23.1"
 description = "Python bindings to Rust's persistent data structures (rpds)"
 optional = false
 python-versions = ">=3.9"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "rpds_py-0.23.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2a54027554ce9b129fc3d633c92fa33b30de9f08bc61b32c053dc9b537266fed"},
     {file = "rpds_py-0.23.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b5ef909a37e9738d146519657a1aab4584018746a18f71c692f2f22168ece40c"},
@@ -5601,6 +6010,8 @@ version = "4.9"
 description = "Pure-Python RSA implementation"
 optional = true
 python-versions = ">=3.6,<4"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"vertexai\""
 files = [
     {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"},
     {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"},
@@ -5615,6 +6026,8 @@ version = "0.11.3"
 description = "An Amazon S3 Transfer Manager"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"bedrock\""
 files = [
     {file = "s3transfer-0.11.3-py3-none-any.whl", hash = "sha256:ca855bdeb885174b5ffa95b9913622459d4ad8e331fc98eb01e6d5eb6a30655d"},
     {file = "s3transfer-0.11.3.tar.gz", hash = "sha256:edae4977e3a122445660c7c114bba949f9d191bae3b34a096f18a1c8c354527a"},
@@ -5632,6 +6045,8 @@ version = "0.5.3"
 description = ""
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"sentence-transformers\""
 files = [
     {file = "safetensors-0.5.3-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:bd20eb133db8ed15b40110b7c00c6df51655a2998132193de2f75f72d99c7073"},
     {file = "safetensors-0.5.3-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:21d01c14ff6c415c485616b8b0bf961c46b3b343ca59110d38d744e577f9cce7"},
@@ -5669,6 +6084,8 @@ version = "1.6.1"
 description = "A set of python modules for machine learning and data mining"
 optional = true
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"sentence-transformers\""
 files = [
     {file = "scikit_learn-1.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d056391530ccd1e501056160e3c9673b4da4805eb67eb2bdf4e983e1f9c9204e"},
     {file = "scikit_learn-1.6.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:0c8d036eb937dbb568c6242fa598d551d88fb4399c0344d95c001980ec1c7d36"},
@@ -5723,6 +6140,8 @@ version = "1.13.1"
 description = "Fundamental algorithms for scientific computing in Python"
 optional = true
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "extra == \"sentence-transformers\" and python_version < \"3.10\""
 files = [
     {file = "scipy-1.13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:20335853b85e9a49ff7572ab453794298bcf0354d8068c5f6775a0eabf350aca"},
     {file = "scipy-1.13.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d605e9c23906d1994f55ace80e0125c587f96c020037ea6aa98d01b4bd2e222f"},
@@ -5765,6 +6184,8 @@ version = "1.15.2"
 description = "Fundamental algorithms for scientific computing in Python"
 optional = true
 python-versions = ">=3.10"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and (extra == \"sentence-transformers\" or extra == \"ranx\") and (python_version >= \"3.10\" or extra == \"sentence-transformers\")"
 files = [
     {file = "scipy-1.15.2-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:a2ec871edaa863e8213ea5df811cd600734f6400b4af272e1c011e69401218e9"},
     {file = "scipy-1.15.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:6f223753c6ea76983af380787611ae1291e3ceb23917393079dcc746ba60cfb5"},
@@ -5828,6 +6249,8 @@ version = "0.13.2"
 description = "Statistical data visualization"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987"},
     {file = "seaborn-0.13.2.tar.gz", hash = "sha256:93e60a40988f4d65e9f4885df477e2fdaff6b73a9ded434c1ab356dd57eefff7"},
@@ -5849,6 +6272,8 @@ version = "3.4.1"
 description = "State-of-the-Art Text Embeddings"
 optional = true
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"sentence-transformers\""
 files = [
     {file = "sentence_transformers-3.4.1-py3-none-any.whl", hash = "sha256:e026dc6d56801fd83f74ad29a30263f401b4b522165c19386d8bc10dcca805da"},
     {file = "sentence_transformers-3.4.1.tar.gz", hash = "sha256:68daa57504ff548340e54ff117bd86c1d2f784b21e0fb2689cf3272b8937b24b"},
@@ -5876,6 +6301,8 @@ version = "75.8.2"
 description = "Easily download, build, install, upgrade, and uninstall Python packages"
 optional = true
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "extra == \"sentence-transformers\" and python_version >= \"3.12\""
 files = [
     {file = "setuptools-75.8.2-py3-none-any.whl", hash = "sha256:558e47c15f1811c1fa7adbd0096669bf76c1d3f433f58324df69f3f5ecac4e8f"},
     {file = "setuptools-75.8.2.tar.gz", hash = "sha256:4880473a969e5f23f2a2be3646b2dfd84af9028716d398e46192f84bc36900d2"},
@@ -5896,6 +6323,8 @@ version = "2.0.7"
 description = "Manipulation and analysis of geometric objects"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"vertexai\""
 files = [
     {file = "shapely-2.0.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:33fb10e50b16113714ae40adccf7670379e9ccf5b7a41d0002046ba2b8f0f691"},
     {file = "shapely-2.0.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f44eda8bd7a4bccb0f281264b34bf3518d8c4c9a8ffe69a1a05dabf6e8461147"},
@@ -5954,10 +6383,12 @@ version = "1.17.0"
 description = "Python 2 and 3 compatibility utilities"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
+groups = ["main", "dev", "docs"]
 files = [
     {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"},
     {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
 ]
+markers = {main = "(python_version <= \"3.11\" or python_version >= \"3.12\") and (extra == \"vertexai\" or extra == \"mistralai\" or extra == \"bedrock\" or extra == \"ranx\") and (extra == \"vertexai\" or extra == \"mistralai\" or extra == \"bedrock\" or python_version >= \"3.10\")", dev = "python_version <= \"3.11\" or python_version >= \"3.12\"", docs = "python_version <= \"3.11\" or python_version >= \"3.12\""}
 
 [[package]]
 name = "sniffio"
@@ -5965,6 +6396,8 @@ version = "1.3.1"
 description = "Sniff out which async library your code is running under"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "(extra == \"openai\" or extra == \"cohere\" or extra == \"mistralai\") and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
     {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
@@ -5976,6 +6409,8 @@ version = "2.2.0"
 description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms."
 optional = false
 python-versions = "*"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "snowballstemmer-2.2.0-py2.py3-none-any.whl", hash = "sha256:c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a"},
     {file = "snowballstemmer-2.2.0.tar.gz", hash = "sha256:09b16deb8547d3412ad7b590689584cd0fe25ec8db3be37788be3810cbf19cb1"},
@@ -5987,10 +6422,12 @@ version = "2.6"
 description = "A modern CSS selector implementation for Beautiful Soup."
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "docs"]
 files = [
     {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"},
     {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"},
 ]
+markers = {main = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\"", docs = "python_version <= \"3.11\" or python_version >= \"3.12\""}
 
 [[package]]
 name = "sphinx"
@@ -5998,6 +6435,8 @@ version = "7.4.7"
 description = "Python documentation generator"
 optional = false
 python-versions = ">=3.9"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "sphinx-7.4.7-py3-none-any.whl", hash = "sha256:c2419e2135d11f1951cd994d6eb18a1835bd8fdd8429f9ca375dc1f3281bd239"},
     {file = "sphinx-7.4.7.tar.gz", hash = "sha256:242f92a7ea7e6c5b406fdc2615413890ba9f699114a9c09192d7dfead2ee9cfe"},
@@ -6034,6 +6473,8 @@ version = "0.5.2"
 description = "Add a copy button to each of your code cells."
 optional = false
 python-versions = ">=3.7"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "sphinx-copybutton-0.5.2.tar.gz", hash = "sha256:4cf17c82fb9646d1bc9ca92ac280813a3b605d8c421225fd9913154103ee1fbd"},
     {file = "sphinx_copybutton-0.5.2-py3-none-any.whl", hash = "sha256:fb543fd386d917746c9a2c50360c7905b605726b9355cd26e9974857afeae06e"},
@@ -6052,6 +6493,8 @@ version = "0.5.0"
 description = "A sphinx extension for designing beautiful, view size responsive web components."
 optional = false
 python-versions = ">=3.8"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "sphinx_design-0.5.0-py3-none-any.whl", hash = "sha256:1af1267b4cea2eedd6724614f19dcc88fe2e15aff65d06b2f6252cee9c4f4c1e"},
     {file = "sphinx_design-0.5.0.tar.gz", hash = "sha256:e8e513acea6f92d15c6de3b34e954458f245b8e761b45b63950f65373352ab00"},
@@ -6075,6 +6518,8 @@ version = "1.0.1"
 description = "Sphinx Extension adding support for custom favicons"
 optional = false
 python-versions = ">=3.7"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "sphinx-favicon-1.0.1.tar.gz", hash = "sha256:df796de32125609c1b4a8964db74270ebf4502089c27cd53f542354dc0b57e8e"},
     {file = "sphinx_favicon-1.0.1-py3-none-any.whl", hash = "sha256:7c93d6b634cb4c9687ceab67a8526f05d3b02679df94e273e51a43282e6b034c"},
@@ -6094,6 +6539,8 @@ version = "2.0.0"
 description = "sphinxcontrib-applehelp is a Sphinx extension which outputs Apple help books"
 optional = false
 python-versions = ">=3.9"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "sphinxcontrib_applehelp-2.0.0-py3-none-any.whl", hash = "sha256:4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5"},
     {file = "sphinxcontrib_applehelp-2.0.0.tar.gz", hash = "sha256:2f29ef331735ce958efa4734873f084941970894c6090408b079c61b2e1c06d1"},
@@ -6110,6 +6557,8 @@ version = "2.0.0"
 description = "sphinxcontrib-devhelp is a sphinx extension which outputs Devhelp documents"
 optional = false
 python-versions = ">=3.9"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "sphinxcontrib_devhelp-2.0.0-py3-none-any.whl", hash = "sha256:aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2"},
     {file = "sphinxcontrib_devhelp-2.0.0.tar.gz", hash = "sha256:411f5d96d445d1d73bb5d52133377b4248ec79db5c793ce7dbe59e074b4dd1ad"},
@@ -6126,6 +6575,8 @@ version = "2.1.0"
 description = "sphinxcontrib-htmlhelp is a sphinx extension which renders HTML help files"
 optional = false
 python-versions = ">=3.9"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8"},
     {file = "sphinxcontrib_htmlhelp-2.1.0.tar.gz", hash = "sha256:c9e2916ace8aad64cc13a0d233ee22317f2b9025b9cf3295249fa985cc7082e9"},
@@ -6142,6 +6593,8 @@ version = "1.0.1"
 description = "A sphinx extension which renders display math in HTML via JavaScript"
 optional = false
 python-versions = ">=3.5"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8"},
     {file = "sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178"},
@@ -6156,6 +6609,8 @@ version = "2.0.0"
 description = "sphinxcontrib-qthelp is a sphinx extension which outputs QtHelp documents"
 optional = false
 python-versions = ">=3.9"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "sphinxcontrib_qthelp-2.0.0-py3-none-any.whl", hash = "sha256:b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb"},
     {file = "sphinxcontrib_qthelp-2.0.0.tar.gz", hash = "sha256:4fe7d0ac8fc171045be623aba3e2a8f613f8682731f9153bb2e40ece16b9bbab"},
@@ -6172,6 +6627,8 @@ version = "2.0.0"
 description = "sphinxcontrib-serializinghtml is a sphinx extension which outputs \"serialized\" HTML files (json and pickle)"
 optional = false
 python-versions = ">=3.9"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331"},
     {file = "sphinxcontrib_serializinghtml-2.0.0.tar.gz", hash = "sha256:e9d912827f872c029017a53f0ef2180b327c3f7fd23c87229f7a8e8b70031d4d"},
@@ -6188,6 +6645,8 @@ version = "2.0.38"
 description = "Database Abstraction Library"
 optional = false
 python-versions = ">=3.7"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "SQLAlchemy-2.0.38-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5e1d9e429028ce04f187a9f522818386c8b076723cdbe9345708384f49ebcec6"},
     {file = "SQLAlchemy-2.0.38-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b87a90f14c68c925817423b0424381f0e16d80fc9a1a1046ef202ab25b19a444"},
@@ -6221,27 +6680,16 @@ files = [
     {file = "SQLAlchemy-2.0.38-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5dba1cdb8f319084f5b00d41207b2079822aa8d6a4667c0f369fce85e34b0c86"},
     {file = "SQLAlchemy-2.0.38-cp313-cp313-win32.whl", hash = "sha256:eae27ad7580529a427cfdd52c87abb2dfb15ce2b7a3e0fc29fbb63e2ed6f8120"},
     {file = "SQLAlchemy-2.0.38-cp313-cp313-win_amd64.whl", hash = "sha256:b335a7c958bc945e10c522c069cd6e5804f4ff20f9a744dd38e748eb602cbbda"},
-    {file = "SQLAlchemy-2.0.38-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:40310db77a55512a18827488e592965d3dec6a3f1e3d8af3f8243134029daca3"},
     {file = "SQLAlchemy-2.0.38-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d3043375dd5bbcb2282894cbb12e6c559654c67b5fffb462fda815a55bf93f7"},
-    {file = "SQLAlchemy-2.0.38-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70065dfabf023b155a9c2a18f573e47e6ca709b9e8619b2e04c54d5bcf193178"},
     {file = "SQLAlchemy-2.0.38-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:c058b84c3b24812c859300f3b5abf300daa34df20d4d4f42e9652a4d1c48c8a4"},
-    {file = "SQLAlchemy-2.0.38-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:0398361acebb42975deb747a824b5188817d32b5c8f8aba767d51ad0cc7bb08d"},
     {file = "SQLAlchemy-2.0.38-cp37-cp37m-win32.whl", hash = "sha256:a2bc4e49e8329f3283d99840c136ff2cd1a29e49b5624a46a290f04dff48e079"},
     {file = "SQLAlchemy-2.0.38-cp37-cp37m-win_amd64.whl", hash = "sha256:9cd136184dd5f58892f24001cdce986f5d7e96059d004118d5410671579834a4"},
-    {file = "SQLAlchemy-2.0.38-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:665255e7aae5f38237b3a6eae49d2358d83a59f39ac21036413fab5d1e810578"},
-    {file = "SQLAlchemy-2.0.38-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:92f99f2623ff16bd4aaf786ccde759c1f676d39c7bf2855eb0b540e1ac4530c8"},
     {file = "SQLAlchemy-2.0.38-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa498d1392216fae47eaf10c593e06c34476ced9549657fca713d0d1ba5f7248"},
-    {file = "SQLAlchemy-2.0.38-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9afbc3909d0274d6ac8ec891e30210563b2c8bdd52ebbda14146354e7a69373"},
     {file = "SQLAlchemy-2.0.38-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:57dd41ba32430cbcc812041d4de8d2ca4651aeefad2626921ae2a23deb8cd6ff"},
-    {file = "SQLAlchemy-2.0.38-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:3e35d5565b35b66905b79ca4ae85840a8d40d31e0b3e2990f2e7692071b179ca"},
     {file = "SQLAlchemy-2.0.38-cp38-cp38-win32.whl", hash = "sha256:f0d3de936b192980209d7b5149e3c98977c3810d401482d05fb6d668d53c1c63"},
     {file = "SQLAlchemy-2.0.38-cp38-cp38-win_amd64.whl", hash = "sha256:3868acb639c136d98107c9096303d2d8e5da2880f7706f9f8c06a7f961961149"},
-    {file = "SQLAlchemy-2.0.38-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:07258341402a718f166618470cde0c34e4cec85a39767dce4e24f61ba5e667ea"},
-    {file = "SQLAlchemy-2.0.38-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0a826f21848632add58bef4f755a33d45105d25656a0c849f2dc2df1c71f6f50"},
     {file = "SQLAlchemy-2.0.38-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:386b7d136919bb66ced64d2228b92d66140de5fefb3c7df6bd79069a269a7b06"},
-    {file = "SQLAlchemy-2.0.38-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f2951dc4b4f990a4b394d6b382accb33141d4d3bd3ef4e2b27287135d6bdd68"},
     {file = "SQLAlchemy-2.0.38-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8bf312ed8ac096d674c6aa9131b249093c1b37c35db6a967daa4c84746bc1bc9"},
-    {file = "SQLAlchemy-2.0.38-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:6db316d6e340f862ec059dc12e395d71f39746a20503b124edc255973977b728"},
     {file = "SQLAlchemy-2.0.38-cp39-cp39-win32.whl", hash = "sha256:c09a6ea87658695e527104cf857c70f79f14e9484605e205217aae0ec27b45fc"},
     {file = "SQLAlchemy-2.0.38-cp39-cp39-win_amd64.whl", hash = "sha256:12f5c9ed53334c3ce719155424dc5407aaa4f6cadeb09c5b627e06abb93933a1"},
     {file = "SQLAlchemy-2.0.38-py3-none-any.whl", hash = "sha256:63178c675d4c80def39f1febd625a6333f44c0ba269edd8a468b156394b27753"},
@@ -6283,6 +6731,8 @@ version = "0.6.3"
 description = "Extract data from python stack frames and tracebacks for informative displays"
 optional = false
 python-versions = "*"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695"},
     {file = "stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9"},
@@ -6302,6 +6752,8 @@ version = "1.13.1"
 description = "Computer algebra system (CAS) in Python"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"sentence-transformers\" and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "sympy-1.13.1-py3-none-any.whl", hash = "sha256:db36cdc64bf61b9b24578b6f7bab1ecdd2452cf008f34faa33776680c26d66f8"},
     {file = "sympy-1.13.1.tar.gz", hash = "sha256:9cebf7e04ff162015ce31c9c6c9144daa34a93bd082f54fd8f12deca4f47515f"},
@@ -6319,6 +6771,8 @@ version = "0.9.0"
 description = "Pretty-print tabular data"
 optional = false
 python-versions = ">=3.7"
+groups = ["main", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"},
     {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"},
@@ -6333,6 +6787,8 @@ version = "9.0.0"
 description = "Retry code until it succeeds"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "tenacity-9.0.0-py3-none-any.whl", hash = "sha256:93de0c98785b27fcf659856aa9f54bfbd399e29969b0621bc7f762bd441b4539"},
     {file = "tenacity-9.0.0.tar.gz", hash = "sha256:807f37ca97d62aa361264d497b0e31e92b8027044942bfa756160d908320d73b"},
@@ -6348,6 +6804,8 @@ version = "4.9.1"
 description = "Python library for throwaway instances of anything that can run in a Docker container"
 optional = false
 python-versions = "<4.0,>=3.9"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "testcontainers-4.9.1-py3-none-any.whl", hash = "sha256:315fb94b42a383872df530aa45319745278ef0cc18b9cfcdc231a75d14afa5a0"},
     {file = "testcontainers-4.9.1.tar.gz", hash = "sha256:37fe9a222549ddb788463935965b16f91809e9a8d654f437d6a59eac9b77f76f"},
@@ -6401,6 +6859,8 @@ version = "3.5.0"
 description = "threadpoolctl"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"sentence-transformers\""
 files = [
     {file = "threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467"},
     {file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"},
@@ -6412,6 +6872,8 @@ version = "1.4.0"
 description = "A tiny CSS parser"
 optional = false
 python-versions = ">=3.8"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "tinycss2-1.4.0-py3-none-any.whl", hash = "sha256:3a49cf47b7675da0b15d0c6e1df8df4ebd96e9394bb905a5775adb0d884c5289"},
     {file = "tinycss2-1.4.0.tar.gz", hash = "sha256:10c0972f6fc0fbee87c3edb76549357415e94548c1ae10ebccdea16fb404a9b7"},
@@ -6430,6 +6892,8 @@ version = "0.21.0"
 description = ""
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "(extra == \"sentence-transformers\" or extra == \"cohere\") and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "tokenizers-0.21.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3c4c93eae637e7d2aaae3d376f06085164e1660f89304c0ab2b1d08a406636b2"},
     {file = "tokenizers-0.21.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:f53ea537c925422a2e0e92a24cce96f6bc5046bbef24a1652a5edc8ba975f62e"},
@@ -6462,6 +6926,8 @@ version = "2.2.1"
 description = "A lil' TOML parser"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev", "docs"]
+markers = "python_version < \"3.11\""
 files = [
     {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
     {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
@@ -6503,6 +6969,8 @@ version = "0.13.2"
 description = "Style preserving TOML library"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "tomlkit-0.13.2-py3-none-any.whl", hash = "sha256:7a974427f6e119197f670fbbbeae7bef749a6c14e793db934baefc1b5f03efde"},
     {file = "tomlkit-0.13.2.tar.gz", hash = "sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79"},
@@ -6514,6 +6982,8 @@ version = "2.6.0"
 description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
 optional = true
 python-versions = ">=3.9.0"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"sentence-transformers\""
 files = [
     {file = "torch-2.6.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:6860df13d9911ac158f4c44031609700e1eba07916fff62e21e6ffa0a9e01961"},
     {file = "torch-2.6.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:c4f103a49830ce4c7561ef4434cc7926e5a5fe4e5eb100c19ab36ea1e2b634ab"},
@@ -6570,6 +7040,8 @@ version = "6.4.2"
 description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "tornado-6.4.2-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:e828cce1123e9e44ae2a50a9de3055497ab1d0aeb440c5ac23064d9e44880da1"},
     {file = "tornado-6.4.2-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:072ce12ada169c5b00b7d92a99ba089447ccc993ea2143c9ede887e0937aa803"},
@@ -6590,6 +7062,8 @@ version = "4.67.1"
 description = "Fast, Extensible Progress Meter"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and (extra == \"sentence-transformers\" or extra == \"cohere\" or extra == \"openai\" or extra == \"ranx\") and (extra == \"sentence-transformers\" or extra == \"cohere\" or extra == \"openai\" or python_version >= \"3.10\")"
 files = [
     {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"},
     {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"},
@@ -6611,6 +7085,8 @@ version = "5.14.3"
 description = "Traitlets Python configuration system"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f"},
     {file = "traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7"},
@@ -6626,6 +7102,8 @@ version = "4.49.0"
 description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
 optional = true
 python-versions = ">=3.9.0"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"sentence-transformers\""
 files = [
     {file = "transformers-4.49.0-py3-none-any.whl", hash = "sha256:6b4fded1c5fee04d384b1014495b4235a2b53c87503d7d592423c06128cbbe03"},
     {file = "transformers-4.49.0.tar.gz", hash = "sha256:7e40e640b5b8dc3f48743f5f5adbdce3660c82baafbd3afdfc04143cdbd2089e"},
@@ -6695,6 +7173,8 @@ version = "2.6"
 description = "Support tools for TREC CAR participants. Also see trec-car.cs.unh.edu"
 optional = true
 python-versions = ">=3.6"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "trec-car-tools-2.6.tar.gz", hash = "sha256:2fce2de120224fd569b151d5bed358a4ed334e643889b9e3dfe3e5a3d15d21c8"},
     {file = "trec_car_tools-2.6-py3-none-any.whl", hash = "sha256:e6f0373259e1c234222da7270ab54ca7af7a6f8d0dd32b13e158c1659d3991cf"},
@@ -6710,6 +7190,8 @@ version = "3.2.0"
 description = "A language and compiler for custom Deep Learning operations"
 optional = true
 python-versions = "*"
+groups = ["main"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and extra == \"sentence-transformers\" and (python_version <= \"3.11\" or python_version >= \"3.12\")"
 files = [
     {file = "triton-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3e54983cd51875855da7c68ec05c05cf8bb08df361b1d5b69e05e40b0c9bd62"},
     {file = "triton-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8009a1fb093ee8546495e96731336a33fb8856a38e45bb4ab6affd6dbc3ba220"},
@@ -6729,6 +7211,8 @@ version = "1.16.0.20241221"
 description = "Typing stubs for cffi"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "types_cffi-1.16.0.20241221-py3-none-any.whl", hash = "sha256:e5b76b4211d7a9185f6ab8d06a106d56c7eb80af7cdb8bfcb4186ade10fb112f"},
     {file = "types_cffi-1.16.0.20241221.tar.gz", hash = "sha256:1c96649618f4b6145f58231acb976e0b448be6b847f7ab733dabe62dfbff6591"},
@@ -6743,6 +7227,8 @@ version = "24.1.0.20240722"
 description = "Typing stubs for pyOpenSSL"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "types-pyOpenSSL-24.1.0.20240722.tar.gz", hash = "sha256:47913b4678a01d879f503a12044468221ed8576263c1540dcb0484ca21b08c39"},
     {file = "types_pyOpenSSL-24.1.0.20240722-py3-none-any.whl", hash = "sha256:6a7a5d2ec042537934cfb4c9d4deb0e16c4c6250b09358df1f083682fe6fda54"},
@@ -6758,6 +7244,8 @@ version = "6.0.12.20241230"
 description = "Typing stubs for PyYAML"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "types_PyYAML-6.0.12.20241230-py3-none-any.whl", hash = "sha256:fa4d32565219b68e6dee5f67534c722e53c00d1cfc09c435ef04d7353e1e96e6"},
     {file = "types_pyyaml-6.0.12.20241230.tar.gz", hash = "sha256:7f07622dbd34bb9c8b264fe860a17e0efcad00d50b5f27e93984909d9363498c"},
@@ -6769,6 +7257,8 @@ version = "4.6.0.20241004"
 description = "Typing stubs for redis"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "types-redis-4.6.0.20241004.tar.gz", hash = "sha256:5f17d2b3f9091ab75384153bfa276619ffa1cf6a38da60e10d5e6749cc5b902e"},
     {file = "types_redis-4.6.0.20241004-py3-none-any.whl", hash = "sha256:ef5da68cb827e5f606c8f9c0b49eeee4c2669d6d97122f301d3a55dc6a63f6ed"},
@@ -6784,6 +7274,8 @@ version = "2.31.0.6"
 description = "Typing stubs for requests"
 optional = true
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "python_version < \"3.10\" and extra == \"cohere\""
 files = [
     {file = "types-requests-2.31.0.6.tar.gz", hash = "sha256:cd74ce3b53c461f1228a9b783929ac73a666658f223e28ed29753771477b3bd0"},
     {file = "types_requests-2.31.0.6-py3-none-any.whl", hash = "sha256:a2db9cb228a81da8348b49ad6db3f5519452dd20a9c1e1a868c83c5fe88fd1a9"},
@@ -6798,6 +7290,8 @@ version = "2.32.0.20241016"
 description = "Typing stubs for requests"
 optional = true
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"cohere\" and python_version >= \"3.10\""
 files = [
     {file = "types-requests-2.32.0.20241016.tar.gz", hash = "sha256:0d9cad2f27515d0e3e3da7134a1b6f28fb97129d86b867f24d9c726452634d95"},
     {file = "types_requests-2.32.0.20241016-py3-none-any.whl", hash = "sha256:4195d62d6d3e043a4eaaf08ff8a62184584d2e8684e9d2aa178c7915a7da3747"},
@@ -6812,6 +7306,8 @@ version = "75.8.0.20250225"
 description = "Typing stubs for setuptools"
 optional = false
 python-versions = ">=3.9"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "types_setuptools-75.8.0.20250225-py3-none-any.whl", hash = "sha256:94c86b439cc60bcc68c1cda3fd2c301f007f8f9502f4fbb54c66cb5ce9b875af"},
     {file = "types_setuptools-75.8.0.20250225.tar.gz", hash = "sha256:6038f7e983d55792a5f90d8fdbf5d4c186026214a16bb65dd6ae83c624ae9636"},
@@ -6823,6 +7319,8 @@ version = "0.9.0.20241207"
 description = "Typing stubs for tabulate"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "types_tabulate-0.9.0.20241207-py3-none-any.whl", hash = "sha256:b8dad1343c2a8ba5861c5441370c3e35908edd234ff036d4298708a1d4cf8a85"},
     {file = "types_tabulate-0.9.0.20241207.tar.gz", hash = "sha256:ac1ac174750c0a385dfd248edc6279fa328aaf4ea317915ab879a2ec47833230"},
@@ -6834,6 +7332,8 @@ version = "1.26.25.14"
 description = "Typing stubs for urllib3"
 optional = true
 python-versions = "*"
+groups = ["main"]
+markers = "python_version < \"3.10\" and extra == \"cohere\""
 files = [
     {file = "types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f"},
     {file = "types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e"},
@@ -6845,6 +7345,8 @@ version = "4.12.2"
 description = "Backported and Experimental Type Hints for Python 3.8+"
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
     {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
@@ -6856,6 +7358,8 @@ version = "0.9.0"
 description = "Runtime inspection utilities for typing module."
 optional = true
 python-versions = "*"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"mistralai\""
 files = [
     {file = "typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f"},
     {file = "typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78"},
@@ -6871,6 +7375,8 @@ version = "2025.1"
 description = "Provider of IANA time zone data"
 optional = true
 python-versions = ">=2"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "tzdata-2025.1-py2.py3-none-any.whl", hash = "sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639"},
     {file = "tzdata-2025.1.tar.gz", hash = "sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694"},
@@ -6882,6 +7388,8 @@ version = "0.2.3"
 description = "Pure Python decompression module for .Z files compressed using Unix compress utility"
 optional = true
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "unlzw3-0.2.3-py3-none-any.whl", hash = "sha256:7760fb4f3afa1225623944c061991d89a061f7fb78665dbc4cddfdb562bb4a8b"},
     {file = "unlzw3-0.2.3.tar.gz", hash = "sha256:ede5d928c792fff9da406f20334f9739693327f448f383ae1df1774627197bbb"},
@@ -6897,10 +7405,12 @@ version = "1.26.20"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
+groups = ["main", "dev", "docs"]
 files = [
     {file = "urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e"},
     {file = "urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32"},
 ]
+markers = {main = "extra == \"sentence-transformers\" and python_version < \"3.10\" or extra == \"cohere\" and python_version < \"3.10\" or extra == \"vertexai\" and python_version < \"3.10\" or extra == \"voyageai\" and python_version < \"3.10\" or extra == \"bedrock\" and python_version < \"3.10\"", dev = "python_version < \"3.10\"", docs = "python_version < \"3.10\""}
 
 [package.extras]
 brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"]
@@ -6913,10 +7423,12 @@ version = "2.3.0"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
+groups = ["main", "dev", "docs"]
 files = [
     {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"},
     {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"},
 ]
+markers = {main = "(python_version <= \"3.11\" or python_version >= \"3.12\") and (extra == \"sentence-transformers\" or extra == \"cohere\" or extra == \"vertexai\" or extra == \"voyageai\" or extra == \"ranx\" or extra == \"bedrock\") and python_version >= \"3.10\"", dev = "python_version <= \"3.11\" and python_version >= \"3.10\" or python_version >= \"3.12\"", docs = "python_version <= \"3.11\" and python_version >= \"3.10\" or python_version >= \"3.12\""}
 
 [package.extras]
 brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"]
@@ -6930,6 +7442,8 @@ version = "20.29.2"
 description = "Virtual Python Environment builder"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "virtualenv-20.29.2-py3-none-any.whl", hash = "sha256:febddfc3d1ea571bdb1dc0f98d7b45d24def7428214d4fb73cc486c9568cce6a"},
     {file = "virtualenv-20.29.2.tar.gz", hash = "sha256:fdaabebf6d03b5ba83ae0a02cfe96f48a716f4fae556461d180825866f75b728"},
@@ -6950,6 +7464,8 @@ version = "0.2.4"
 description = ""
 optional = true
 python-versions = "<4.0.0,>=3.7.1"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"voyageai\""
 files = [
     {file = "voyageai-0.2.4-py3-none-any.whl", hash = "sha256:e3070e5c78dec89adae43231334b4637aa88933dad99b1c33d3219fdfc94dfa4"},
     {file = "voyageai-0.2.4.tar.gz", hash = "sha256:b9911d8629e8a4e363291c133482fead49a3536afdf1e735f3ab3aaccd8d250d"},
@@ -6968,6 +7484,8 @@ version = "0.2.5"
 description = "Python library to work with ARC and WARC files"
 optional = true
 python-versions = "*"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "warc3_wet-0.2.5-py3-none-any.whl", hash = "sha256:5a9a525383fb1af159734baa75f349a7c4ec7bccd1b938681b5748515d2bf624"},
     {file = "warc3_wet-0.2.5.tar.gz", hash = "sha256:15e50402dabaa1e95307f1e2a6169cfd5f137b70761d9f0b16a10aa6de227970"},
@@ -6979,6 +7497,8 @@ version = "0.2.5"
 description = "Python library to work with ARC and WARC files, with fixes for ClueWeb09"
 optional = true
 python-versions = "*"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "warc3-wet-clueweb09-0.2.5.tar.gz", hash = "sha256:3054bfc07da525d5967df8ca3175f78fa3f78514c82643f8c81fbca96300b836"},
 ]
@@ -6989,6 +7509,8 @@ version = "0.2.13"
 description = "Measures the displayed width of unicode strings in a terminal"
 optional = false
 python-versions = "*"
+groups = ["dev", "docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"},
     {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"},
@@ -7000,6 +7522,8 @@ version = "0.5.1"
 description = "Character encoding aliases for legacy web content"
 optional = false
 python-versions = "*"
+groups = ["docs"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"},
     {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"},
@@ -7011,6 +7535,8 @@ version = "1.17.2"
 description = "Module for decorators, wrappers and monkey patching."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version <= \"3.11\" or python_version >= \"3.12\""
 files = [
     {file = "wrapt-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d57c572081fed831ad2d26fd430d565b76aa277ed1d30ff4d40670b1c0dd984"},
     {file = "wrapt-1.17.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5e251054542ae57ac7f3fba5d10bfff615b6c2fb09abeb37d2f1463f841ae22"},
@@ -7099,6 +7625,8 @@ version = "1.18.3"
 description = "Yet another URL library"
 optional = true
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"voyageai\""
 files = [
     {file = "yarl-1.18.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7df647e8edd71f000a5208fe6ff8c382a1de8edfbccdbbfe649d263de07d8c34"},
     {file = "yarl-1.18.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c69697d3adff5aa4f874b19c0e4ed65180ceed6318ec856ebc423aa5850d84f7"},
@@ -7195,10 +7723,12 @@ version = "3.21.0"
 description = "Backport of pathlib-compatible object wrapper for zip files"
 optional = false
 python-versions = ">=3.9"
+groups = ["dev", "docs"]
 files = [
     {file = "zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931"},
     {file = "zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4"},
 ]
+markers = {dev = "python_version < \"3.10\"", docs = "python_version <= \"3.11\" or python_version >= \"3.12\""}
 
 [package.extras]
 check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"]
@@ -7214,6 +7744,8 @@ version = "0.1.9"
 description = "Low-level interface to the zlib library that enables capturing the decoding state"
 optional = true
 python-versions = ">=3.6"
+groups = ["main"]
+markers = "(python_version <= \"3.11\" or python_version >= \"3.12\") and extra == \"ranx\" and python_version >= \"3.10\""
 files = [
     {file = "zlib_state-0.1.9-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:97f45d0f80e9d7070229ecb36112eea6a17dc40053449a9c613ef837d9cb66b4"},
     {file = "zlib_state-0.1.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3564eaa130f2533b87b82d0e622cfb5c25acec123e7bfe38d39db9ce6349cb52"},
@@ -7256,6 +7788,6 @@ vertexai = ["google-cloud-aiplatform", "protobuf"]
 voyageai = ["voyageai"]
 
 [metadata]
-lock-version = "2.0"
+lock-version = "2.1"
 python-versions = ">=3.9,<3.14"
-content-hash = "8be5a998fb20c7b99f19af0112aa1c2c7e981f802c26e7a3bce08eeb61dfb741"
+content-hash = "3bcbaaf402487a181810db22556d0207a555d5683984cc3af14a803974c8900e"
diff --git a/pyproject.toml b/pyproject.toml
index 2a897726..c5eae39a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,6 +33,8 @@ tenacity = ">=8.2.2"
 tabulate = "^0.9.0"
 ml-dtypes = "^0.4.0"
 python-ulid = "^3.0.0"
+jsonpath-ng = "^1.5.0"
+
 openai = { version = "^1.13.0", optional = true }
 sentence-transformers = { version = "^3.4.0", optional = true }
 scipy = [
diff --git a/redisvl/schema/validation.py b/redisvl/schema/validation.py
index c4ddd3e7..dce0095d 100644
--- a/redisvl/schema/validation.py
+++ b/redisvl/schema/validation.py
@@ -5,11 +5,9 @@
 using dynamically generated Pydantic models.
 """
 
-import json
-import re
-import warnings
-from typing import Any, Dict, List, Optional, Type, Union, cast
+from typing import Any, Dict, List, Optional, Type, Union
 
+from jsonpath_ng import parse as jsonpath_parse
 from pydantic import BaseModel, Field, field_validator
 
 from redisvl.schema import IndexSchema
@@ -213,29 +211,32 @@ def _validate_vector(cls, value):
 
 def extract_from_json_path(obj: Dict[str, Any], path: str) -> Any:
     """
-    Extract a value from a nested JSON object using a path.
+    Extract a value from a nested JSON object using a JSONPath expression.
 
     Args:
         obj: The object to extract values from
-        path: JSONPath-style path (e.g., $.field.subfield)
+        path: JSONPath expression (e.g., $.field.subfield, $.[*].name)
 
     Returns:
         The extracted value or None if not found
-    """
-    # Handle JSONPath syntax (e.g., $.field.subfield)
-    if path.startswith("$."):
-        path_parts = path[2:].split(".")
-    else:
-        path_parts = path.split(".")
 
-    current = obj
-    for part in path_parts:
-        if isinstance(current, dict) and part in current:
-            current = current[part]
-        else:
-            return None
-
-    return current
+    Notes:
+        This function uses the jsonpath-ng library for proper JSONPath parsing
+        and supports the full JSONPath specification including filters, wildcards,
+        and array indexing.
+    """
+    # If path doesn't start with $, add it as per JSONPath spec
+    if not path.startswith("$"):
+        path = f"$.{path}"
+
+    # Parse and find the JSONPath expression
+    jsonpath_expr = jsonpath_parse(path)
+    matches = jsonpath_expr.find(obj)
+
+    # Return the first match value, or None if no matches
+    if matches:
+        return matches[0].value
+    return None
 
 
 def validate_object(schema: IndexSchema, obj: Dict[str, Any]) -> Dict[str, Any]:

From 4827f61d65d6ad3e5b65ec9918836d888f026e0a Mon Sep 17 00:00:00 2001
From: Tyler Hutcherson <tyler.hutcherson@redis.com>
Date: Thu, 27 Mar 2025 15:36:19 -0400
Subject: [PATCH 07/11] hash the schema as the client side cache key

---
 redisvl/schema/validation.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/redisvl/schema/validation.py b/redisvl/schema/validation.py
index dce0095d..629b193b 100644
--- a/redisvl/schema/validation.py
+++ b/redisvl/schema/validation.py
@@ -5,6 +5,7 @@
 using dynamically generated Pydantic models.
 """
 
+import json
 from typing import Any, Dict, List, Optional, Type, Union
 
 from jsonpath_ng import parse as jsonpath_parse
@@ -41,7 +42,7 @@ def get_model_for_schema(cls, schema: IndexSchema) -> Type[BaseModel]:
             A Pydantic model class that can validate data against the schema
         """
         # Use schema identifier as cache key
-        cache_key = schema.index.name
+        cache_key = str(hash(json.dumps(schema.to_dict(), sort_keys=True).encode()))
 
         if cache_key not in cls._model_cache:
             cls._model_cache[cache_key] = cls._create_model(schema)

From 51e6fc1216f84fd6d64433513ff82714fb7e3226 Mon Sep 17 00:00:00 2001
From: Tyler Hutcherson <tyler.hutcherson@redis.com>
Date: Fri, 28 Mar 2025 17:16:39 -0400
Subject: [PATCH 08/11] use hf access tokens

---
 .github/workflows/test.yml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index f96fca62..15c05f43 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -133,6 +133,11 @@ jobs:
         with:
           credentials_json: ${{ secrets.GOOGLE_CREDENTIALS }}
 
+      - name: Set HuggingFace token
+        run: |
+          mkdir -p ~/.huggingface
+          echo '{"token":"${{ secrets.HF_TOKEN }}"}' > ~/.huggingface/token 
+
       - name: Run tests
         if: matrix.connection == 'plain' && matrix.redis-version == 'latest'
         env:
@@ -149,6 +154,7 @@ jobs:
           OPENAI_API_VERSION: ${{ secrets.OPENAI_API_VERSION }}
           AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
           AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
           make test-all
 
@@ -173,6 +179,7 @@ jobs:
           OPENAI_API_VERSION: ${{ secrets.OPENAI_API_VERSION }}
           AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
           AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
           docker run -d --name redis -p 6379:6379 redis/redis-stack-server:latest
           make test-notebooks

From 0379db5733d9965b7a757bfdbf47b03c0e613da0 Mon Sep 17 00:00:00 2001
From: Tyler Hutcherson <tyler.hutcherson@redis.com>
Date: Fri, 28 Mar 2025 18:20:12 -0400
Subject: [PATCH 09/11] make extension classes accept vectorizer kwargs

---
 redisvl/extensions/llmcache/semantic.py             | 13 ++++++-------
 redisvl/extensions/router/semantic.py               | 13 ++++++++++---
 .../extensions/session_manager/semantic_session.py  |  9 ++++++---
 3 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/redisvl/extensions/llmcache/semantic.py b/redisvl/extensions/llmcache/semantic.py
index 2b70ae09..6e15c43b 100644
--- a/redisvl/extensions/llmcache/semantic.py
+++ b/redisvl/extensions/llmcache/semantic.py
@@ -95,12 +95,8 @@ def __init__(
         }
 
         # Use the index name as the key prefix by default
-        if "prefix" in kwargs:
-            prefix = kwargs["prefix"]
-        else:
-            prefix = name
-
-        dtype = kwargs.get("dtype")
+        prefix = kwargs.pop("prefix", name)
+        dtype = kwargs.pop("dtype", None)
 
         # Validate a provided vectorizer or set the default
         if vectorizer:
@@ -111,7 +107,10 @@ def __init__(
                     f"Provided dtype {dtype} does not match vectorizer dtype {vectorizer.dtype}"
                 )
         else:
-            vectorizer_kwargs = {"dtype": dtype} if dtype else {}
+            vectorizer_kwargs = kwargs
+
+            if dtype:
+                vectorizer_kwargs.update(**{"dtype": dtype})
 
             vectorizer = HFTextVectorizer(
                 model="sentence-transformers/all-mpnet-base-v2",
diff --git a/redisvl/extensions/router/semantic.py b/redisvl/extensions/router/semantic.py
index c06789e1..be83b447 100644
--- a/redisvl/extensions/router/semantic.py
+++ b/redisvl/extensions/router/semantic.py
@@ -72,7 +72,7 @@ def __init__(
             connection_kwargs (Dict[str, Any]): The connection arguments
                 for the redis client. Defaults to empty {}.
         """
-        dtype = kwargs.get("dtype")
+        dtype = kwargs.pop("dtype", None)
 
         # Validate a provided vectorizer or set the default
         if vectorizer:
@@ -83,8 +83,15 @@ def __init__(
                     f"Provided dtype {dtype} does not match vectorizer dtype {vectorizer.dtype}"
                 )
         else:
-            vectorizer_kwargs = {"dtype": dtype} if dtype else {}
-            vectorizer = HFTextVectorizer(**vectorizer_kwargs)
+            vectorizer_kwargs = kwargs
+
+            if dtype:
+                vectorizer_kwargs.update(**{"dtype": dtype})
+
+            vectorizer = HFTextVectorizer(
+                model="sentence-transformers/all-mpnet-base-v2",
+                **vectorizer_kwargs,
+            )
 
         if routing_config is None:
             routing_config = RoutingConfig()
diff --git a/redisvl/extensions/session_manager/semantic_session.py b/redisvl/extensions/session_manager/semantic_session.py
index 1aa15315..9497d06c 100644
--- a/redisvl/extensions/session_manager/semantic_session.py
+++ b/redisvl/extensions/session_manager/semantic_session.py
@@ -71,7 +71,7 @@ def __init__(
         super().__init__(name, session_tag)
 
         prefix = prefix or name
-        dtype = kwargs.get("dtype")
+        dtype = kwargs.pop("dtype", None)
 
         # Validate a provided vectorizer or set the default
         if vectorizer:
@@ -82,10 +82,13 @@ def __init__(
                     f"Provided dtype {dtype} does not match vectorizer dtype {vectorizer.dtype}"
                 )
         else:
-            vectorizer_kwargs = {"dtype": dtype} if dtype else {}
+            vectorizer_kwargs = kwargs
+
+            if dtype:
+                vectorizer_kwargs.update(**{"dtype": dtype})
 
             vectorizer = HFTextVectorizer(
-                model="sentence-transformers/msmarco-distilbert-cos-v5",
+                model="sentence-transformers/all-mpnet-base-v2",
                 **vectorizer_kwargs,
             )
 

From b5f378031e3200cbf0f5c31eb6cd750f36cc596d Mon Sep 17 00:00:00 2001
From: Tyler Hutcherson <tyler.hutcherson@redis.com>
Date: Fri, 28 Mar 2025 18:22:34 -0400
Subject: [PATCH 10/11] clean up tests a bit

---
 tests/{unit => integration}/test_cross_encoder_reranker.py | 0
 tests/unit/test_utils.py                                   | 3 ---
 tests/unit/test_validation.py                              | 2 +-
 3 files changed, 1 insertion(+), 4 deletions(-)
 rename tests/{unit => integration}/test_cross_encoder_reranker.py (100%)

diff --git a/tests/unit/test_cross_encoder_reranker.py b/tests/integration/test_cross_encoder_reranker.py
similarity index 100%
rename from tests/unit/test_cross_encoder_reranker.py
rename to tests/integration/test_cross_encoder_reranker.py
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index 83300d0c..af0cc192 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -1,6 +1,3 @@
-import importlib
-import io
-import logging
 import re
 import sys
 from functools import wraps
diff --git a/tests/unit/test_validation.py b/tests/unit/test_validation.py
index ac67e810..68933938 100644
--- a/tests/unit/test_validation.py
+++ b/tests/unit/test_validation.py
@@ -9,7 +9,7 @@
 """
 
 import re
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, List, Optional, Tuple, Union
 
 import pytest
 

From e406d76dc1fe7439765a2510b80206ec50d5700a Mon Sep 17 00:00:00 2001
From: Tyler Hutcherson <tyler.hutcherson@redis.com>
Date: Fri, 28 Mar 2025 18:37:16 -0400
Subject: [PATCH 11/11] start centralizing the use of fixtures for hugging face
 models

---
 tests/conftest.py                             | 10 ++++++++++
 tests/integration/test_threshold_optimizer.py | 12 ++++++++----
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 24da05e5..f0c3a435 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -5,6 +5,7 @@
 from testcontainers.compose import DockerCompose
 
 from redisvl.redis.connection import RedisConnectionFactory
+from redisvl.utils.vectorize import HFTextVectorizer
 
 
 @pytest.fixture(autouse=True)
@@ -68,6 +69,15 @@ def client(redis_url):
     yield conn
 
 
+@pytest.fixture(scope="session", autouse=True)
+def hf_vectorizer():
+    return HFTextVectorizer(
+        model="sentence-transformers/all-mpnet-base-v2",
+        token=os.getenv("HF_TOKEN"),
+        cache_folder=os.getenv("SENTENCE_TRANSFORMERS_HOME"),
+    )
+
+
 @pytest.fixture
 def sample_datetimes():
     return {
diff --git a/tests/integration/test_threshold_optimizer.py b/tests/integration/test_threshold_optimizer.py
index 44871901..b510b038 100644
--- a/tests/integration/test_threshold_optimizer.py
+++ b/tests/integration/test_threshold_optimizer.py
@@ -35,10 +35,11 @@ def routes():
 
 
 @pytest.fixture
-def semantic_router(client, routes):
+def semantic_router(client, routes, hf_vectorizer):
     router = SemanticRouter(
         name="test-router",
         routes=routes,
+        vectorizer=hf_vectorizer,
         routing_config=RoutingConfig(max_k=2),
         redis_client=client,
         overwrite=False,
@@ -86,7 +87,7 @@ def test_data_optimization():
 
 
 def test_routes_different_distance_thresholds_optimizer_default(
-    semantic_router, routes, redis_url, test_data_optimization
+    semantic_router, routes, redis_url, test_data_optimization, hf_vectorizer
 ):
     redis_version = semantic_router._index.client.info()["redis_version"]
     if not compare_versions(redis_version, "7.0.0"):
@@ -101,6 +102,7 @@ def test_routes_different_distance_thresholds_optimizer_default(
     router = SemanticRouter(
         name="test_routes_different_distance_optimizer",
         routes=routes,
+        vectorizer=hf_vectorizer,
         redis_url=redis_url,
         overwrite=True,
     )
@@ -119,7 +121,7 @@ def test_routes_different_distance_thresholds_optimizer_default(
 
 
 def test_routes_different_distance_thresholds_optimizer_precision(
-    semantic_router, routes, redis_url, test_data_optimization
+    semantic_router, routes, redis_url, test_data_optimization, hf_vectorizer
 ):
 
     redis_version = semantic_router._index.client.info()["redis_version"]
@@ -135,6 +137,7 @@ def test_routes_different_distance_thresholds_optimizer_precision(
     router = SemanticRouter(
         name="test_routes_different_distance_optimizer",
         routes=routes,
+        vectorizer=hf_vectorizer,
         redis_url=redis_url,
         overwrite=True,
     )
@@ -155,7 +158,7 @@ def test_routes_different_distance_thresholds_optimizer_precision(
 
 
 def test_routes_different_distance_thresholds_optimizer_recall(
-    semantic_router, routes, redis_url, test_data_optimization
+    semantic_router, routes, redis_url, test_data_optimization, hf_vectorizer
 ):
     redis_version = semantic_router._index.client.info()["redis_version"]
     if not compare_versions(redis_version, "7.0.0"):
@@ -170,6 +173,7 @@ def test_routes_different_distance_thresholds_optimizer_recall(
     router = SemanticRouter(
         name="test_routes_different_distance_optimizer",
         routes=routes,
+        vectorizer=hf_vectorizer,
         redis_url=redis_url,
         overwrite=True,
     )