diff --git a/.github/workflows/ci_templates.yaml b/.github/workflows/ci_templates.yaml index 8163a32d3..72ebb786a 100644 --- a/.github/workflows/ci_templates.yaml +++ b/.github/workflows/ci_templates.yaml @@ -4,7 +4,6 @@ on: pull_request: branches: - main - - '[0-9]+.[0-9]+' paths: # Paths that may affect code quality concurrency: diff --git a/CHANGELOG.md b/CHANGELOG.md index e9e789c07..3aa69d126 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 **Before you create a Pull Request, remember to update the Changelog with your changes.** -## Changes Since Last Release +## Changes Since Last Release #### Changed defaults / behaviours diff --git a/superduper/cli/main.py b/superduper/cli/main.py index 46515445d..45397ddb5 100644 --- a/superduper/cli/main.py +++ b/superduper/cli/main.py @@ -1,6 +1,5 @@ import json import os -import subprocess from superduper import CFG, Component, logging, superduper from superduper.components.template import Template @@ -103,9 +102,20 @@ def bootstrap( db = superduper(data_backend) existing = db.show('template') + + if template.startswith('http'): + import subprocess + + logging.info('Downloading remote template...') + subprocess.run(['curl', '-O', '-k', template]) + template = template.split('/')[-1] + if destination is not None: - root = os.path.dirname(os.path.dirname(__file__)) - template_directory = os.path.join(root, f'templates/{template}') + if os.path.exists(template): + template_directory = template + else: + root = os.path.dirname(os.path.dirname(__file__)) + template_directory = os.path.join(root, f'templates/{template}') print(template_directory) import shutil @@ -114,8 +124,12 @@ def bootstrap( if template in existing: logging.warn(f'Template {template} already exists') + logging.info(f'Applying template: {template} from inbuilt') - tem = getattr(inbuilt, template) + if os.path.exists(template): + tem = Template.read(template) + else: + tem = getattr(inbuilt, template) if tem.requirements and pip_install: with open('/tmp/requirements.txt', 'w') as f: f.write('\n'.join(tem.requirements)) diff --git a/templates/simple_rag/VERSION b/templates/simple_rag/VERSION index 8ff5d387d..1d0ba9ea1 100644 --- a/templates/simple_rag/VERSION +++ b/templates/simple_rag/VERSION @@ -1 +1 @@ -0.5.0.dev +0.4.0 diff --git a/templates/simple_rag/blobs/a03a6ac18d448d0e3cd7b62ebcd7f19d777b5a59 b/templates/simple_rag/blobs/a03a6ac18d448d0e3cd7b62ebcd7f19d777b5a59 new file mode 100644 index 000000000..bac1551de Binary files /dev/null and b/templates/simple_rag/blobs/a03a6ac18d448d0e3cd7b62ebcd7f19d777b5a59 differ diff --git a/templates/simple_rag/blobs/de5a10b374e634d964148beb865f73c5b82e53a6 b/templates/simple_rag/blobs/de5a10b374e634d964148beb865f73c5b82e53a6 new file mode 100644 index 000000000..a24d75dfb Binary files /dev/null and b/templates/simple_rag/blobs/de5a10b374e634d964148beb865f73c5b82e53a6 differ diff --git a/templates/simple_rag/build.ipynb b/templates/simple_rag/build.ipynb index d1331d229..05c3e5c38 100644 --- a/templates/simple_rag/build.ipynb +++ b/templates/simple_rag/build.ipynb @@ -42,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "3ef70f6d-a189-460a-8864-241a689624e2", "metadata": { "editable": true, @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "cb029a5e-fedf-4f07-8a31-d220cfbfbb3d", "metadata": { "editable": true, @@ -75,7 +75,22 @@ }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-Nov-11 11:49:18.44| INFO | superduper.misc.plugins:13 | Loading plugin: mongodb\n", + "2024-Nov-11 11:49:18.49| INFO | superduper.base.datalayer:76 | Building Data Layer\n", + "2024-Nov-11 11:49:18.49| INFO | superduper.base.build:184 | Configuration: \n", + " +---------------+--------------+\n", + "| Configuration | Value |\n", + "+---------------+--------------+\n", + "| Data Backend | mongomock:// |\n", + "+---------------+--------------+\n" + ] + } + ], "source": [ "from superduper import superduper, CFG\n", "\n", @@ -88,7 +103,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "4e7902bd", "metadata": { "editable": true, @@ -114,7 +129,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "1ef8dd07-1b47-4dce-84dd-a081d1f5ee9d", "metadata": {}, "outputs": [], @@ -136,7 +151,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "c5965fdf", "metadata": {}, "outputs": [], @@ -170,7 +185,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "2d20eaa0-a416-4483-938e-23f79845739a", "metadata": {}, "outputs": [], @@ -198,7 +213,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "93d21872-d4dc-40dc-abab-fb07ba102ea3", "metadata": {}, "outputs": [], @@ -217,7 +232,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "31900eec-b516-4bef-939e-2e8f46252b12", "metadata": {}, "outputs": [], @@ -265,7 +280,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "a9b1f538-65ca-499e-b6d0-2dd733f81723", "metadata": {}, "outputs": [], @@ -276,7 +291,8 @@ "from superduper_openai import OpenAIEmbedding\n", "\n", "openai_embedding = OpenAIEmbedding(\n", - " identifier='text-embedding-ada-002',\n", + " identifier='text-embedding',\n", + " model='text-embedding-ada-002',\n", " datatype=sqlvector(shape=(1536,)),\n", " client_kwargs={'base_url': BASE_URL, 'api_key': API_KEY},\n", ")" @@ -292,7 +308,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "4663fa4b-c2ec-427d-bf8b-b8b109cc2ccf", "metadata": {}, "outputs": [], @@ -315,7 +331,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "509c3505-54c5-4e68-84ec-3df8bea0fd74", "metadata": {}, "outputs": [], @@ -335,7 +351,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "f98e5ff4", "metadata": {}, "outputs": [], @@ -360,7 +376,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "44baeb09-6f35-4cf2-b814-46283a59f7e9", "metadata": {}, "outputs": [], @@ -387,7 +403,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "2d3a0d3a-da1c-41ec-b16c-f281c46ad794", "metadata": {}, "outputs": [], @@ -398,7 +414,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "2a82ea22-9694-4c65-b72f-c89ae49d1ab2", "metadata": {}, "outputs": [], @@ -417,7 +433,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "e6787c78-4b14-4a72-818b-450408a74331", "metadata": {}, "outputs": [], @@ -436,7 +452,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "e7c16557-af76-4e70-83d9-2984e19a9554", "metadata": {}, "outputs": [], @@ -463,10 +479,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "2e850c03-33c6-4c88-95d3-d14146a6a0af", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-Nov-11 11:49:19.42| WARNING | superduper.base.document:479 | Leaf listener:chunker already exists\n", + "2024-Nov-11 11:49:19.42| WARNING | superduper.base.document:479 | Leaf model:chunker already exists\n", + "2024-Nov-11 11:49:19.43| WARNING | superduper.base.document:479 | Leaf datatype:dill already exists\n", + "2024-Nov-11 11:49:19.43| WARNING | superduper.base.document:479 | Leaf var-table-name-select-var-id-field-x already exists\n" + ] + } + ], "source": [ "from superduper import Template, Table, Schema\n", "from superduper.components.dataset import RemoteData\n", @@ -526,10 +553,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "8924ba0d-7c01-4d6c-87fb-245531db7506", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-Nov-11 11:49:19.43| WARNING | superduper.base.document:479 | Leaf str already exists\n" + ] + } + ], "source": [ "template.export('.')" ] diff --git a/templates/simple_rag/component.json b/templates/simple_rag/component.json index 01c2650a6..c70a90ba7 100644 --- a/templates/simple_rag/component.json +++ b/templates/simple_rag/component.json @@ -20,11 +20,11 @@ "method": "dill", "encodable": "artifact" }, - "a395902bca4bf34255e97a6a057b0edffc3500b2": { + "de5a10b374e634d964148beb865f73c5b82e53a6": { "_path": "superduper.components.datatype.Artifact", "datatype": "?datatype:dill", "uri": null, - "blob": "&:blob:a395902bca4bf34255e97a6a057b0edffc3500b2" + "blob": "&:blob:de5a10b374e634d964148beb865f73c5b82e53a6" }, "dataset:superduper-docs": { "_path": "superduper.components.dataset.RemoteData", @@ -32,7 +32,7 @@ "plugins": null, "cache": true, "status": null, - "getter": "?a395902bca4bf34255e97a6a057b0edffc3500b2" + "getter": "?de5a10b374e634d964148beb865f73c5b82e53a6" }, "table:sample_simple_rag": { "_path": "superduper.components.table.Table", @@ -58,14 +58,14 @@ "method": "dill", "encodable": "artifact" }, - "727d3bb560939e1211f9cac189d56e07e9622eeb": { + "a03a6ac18d448d0e3cd7b62ebcd7f19d777b5a59": { "_path": "superduper.components.datatype.Artifact", "datatype": "?datatype:dill", "uri": null, - "blob": "&:blob:727d3bb560939e1211f9cac189d56e07e9622eeb" + "blob": "&:blob:a03a6ac18d448d0e3cd7b62ebcd7f19d777b5a59" }, "model:chunker": { - "_object": "?727d3bb560939e1211f9cac189d56e07e9622eeb", + "_object": "?a03a6ac18d448d0e3cd7b62ebcd7f19d777b5a59", "upstream": null, "plugins": null, "cache": true, @@ -108,7 +108,7 @@ 1536 ] }, - "model:": { + "model:text-embedding": { "_path": "superduper_openai.model.OpenAIEmbedding", "upstream": null, "plugins": null, @@ -155,7 +155,7 @@ "status": null, "cdc_table": "chunker__?(listener:chunker.uuid)", "key": "chunker__?(listener:chunker.uuid)", - "model": "?model:", + "model": "?model:text-embedding", "predict_kwargs": {}, "select": "?outputs-chunker-?(listener:chunker.uuid)-select-id-source-outputs-chunker-?(listener:chunker.uuid)", "flatten": false @@ -293,6 +293,7 @@ "default": null } }, + "schema": null, "blobs": null, "files": null, "requirements": null, @@ -304,4 +305,4 @@ } }, "_files": {} -} +} \ No newline at end of file