Skip to content

Commit

Permalink
Fix pdf template for latest datatype refactorings
Browse files Browse the repository at this point in the history
  • Loading branch information
blythed committed Dec 4, 2024
1 parent 8eb39bc commit dbc912e
Show file tree
Hide file tree
Showing 18 changed files with 251 additions and 878 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

**Before you create a Pull Request, remember to update the Changelog with your changes.**

## Changes Since Last Release
## Changes Since Last Release

#### Changed defaults / behaviours

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from sentence_transformers import SentenceTransformer as _SentenceTransformer
from superduper.backends.query_dataset import QueryDataset
from superduper.base.enums import DBType
from superduper.components.component import ensure_initialized
from superduper.components.model import Model, Signature, _DeviceManaged

Expand Down Expand Up @@ -125,4 +124,3 @@ def _pre_create(self, db):
"""
if self.datatype is not None:
return

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
994 changes: 147 additions & 847 deletions templates/pdf_rag/build.ipynb

Large diffs are not rendered by default.

131 changes: 103 additions & 28 deletions templates/pdf_rag/component.json
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,10 @@
"serve": false,
"trainer": null,
"deploy": false,
"object": "&:blob:568534984ca7688b45c9a5caaa73a5756e1a78a6",
"object": "&:blob:fb3f160973320c9b45cb2892875aa4f2fcd33466",
"method": null
},
"sample-pdf-rag-find": {
"var-table-name-find": {
"_path": "superduper_<var:databackend>.query.parse_query",
"documents": [],
"query": "<var:table_name>.find()"
Expand All @@ -81,7 +81,7 @@
"key": "file",
"model": "?model:split_image",
"predict_kwargs": {},
"select": "?sample-pdf-rag-find",
"select": "?var-table-name-find",
"flatten": true
},
"json": {
Expand All @@ -106,10 +106,10 @@
"serve": false,
"trainer": null,
"deploy": false,
"object": "&:blob:1087871ffeeb5420a415b53f5e137701fd991584",
"object": "&:blob:bf6c73746c460c85c2d1cc3b419288ce434df064",
"method": null
},
"sample-pdf-rag-select": {
"var-table-name-select": {
"_path": "superduper_<var:databackend>.query.parse_query",
"documents": [],
"query": "<var:table_name>.select()"
Expand All @@ -124,7 +124,7 @@
"key": "file",
"model": "?model:chunk",
"predict_kwargs": {},
"select": "?sample-pdf-rag-select",
"select": "?var-table-name-select",
"flatten": true
},
"datatype:vector[1536]": {
Expand All @@ -137,7 +137,7 @@
"shape": [
1536
],
"dtype": "float32"
"dtype": "float64"
},
"model:text-embedding-ada-002": {
"_path": "superduper_openai.model.OpenAIEmbedding",
Expand All @@ -147,7 +147,7 @@
"build_variables": null,
"build_template": null,
"signature": "singleton",
"datatype": null,
"datatype": "?datatype:vector[1536]",
"output_schema": null,
"model_update_kwargs": {},
"predict_kwargs": {},
Expand All @@ -165,6 +165,18 @@
"client_kwargs": {},
"batch_size": 100
},
"datatype:vector[384]": {
"_path": "superduper.components.datatype.Vector",
"upstream": null,
"plugins": null,
"cache": true,
"build_variables": null,
"build_template": null,
"shape": [
384
],
"dtype": "float64"
},
"model:sentence-transformers-embedding": {
"_path": "superduper_sentence_transformers.model.SentenceTransformer",
"preferred_devices": [
Expand All @@ -179,7 +191,7 @@
"build_variables": null,
"build_template": null,
"signature": "singleton",
"datatype": null,
"datatype": "?datatype:vector[384]",
"output_schema": null,
"model_update_kwargs": {},
"predict_kwargs": {
Expand All @@ -194,7 +206,7 @@
"deploy": false,
"model": "BAAI/bge-small-en",
"preprocess": null,
"postprocess": "&:blob:c190af19f02c96ce9f7c38409748ccfc98758827"
"postprocess": "&:blob:29de8234e88613b66ede6d1dda606b6b67c4d8c8"
},
"model:embedding": {
"_path": "superduper.components.model.ModelRouter",
Expand All @@ -204,7 +216,7 @@
"build_variables": null,
"build_template": null,
"signature": "singleton",
"datatype": "?datatype:vector[1536]",
"datatype": null,
"output_schema": null,
"model_update_kwargs": {},
"predict_kwargs": {},
Expand All @@ -219,7 +231,7 @@
"openai": "?model:text-embedding-ada-002",
"sentence_transformers": "?model:sentence-transformers-embedding"
},
"model": "openai"
"model": "<var:embedding_model>"
},
"outputs-chunk-?(listener:chunk.uuid)-select": {
"_path": "superduper_<var:databackend>.query.parse_query",
Expand All @@ -243,7 +255,7 @@
},
"vector_index:vector-index": {
"_path": "superduper.components.vector_index.VectorIndex",
"upstream": null,
"upstream": [],
"plugins": null,
"cache": true,
"build_variables": null,
Expand All @@ -261,7 +273,7 @@
"build_variables": null,
"build_template": null,
"signature": "singleton",
"datatype": "str",
"datatype": null,
"output_schema": null,
"model_update_kwargs": {},
"predict_kwargs": {},
Expand All @@ -280,15 +292,73 @@
"batch_size": 1,
"prompt": ""
},
"model:llm-anthropic": {
"_path": "superduper_anthropic.model.AnthropicCompletions",
"upstream": null,
"plugins": null,
"cache": true,
"build_variables": null,
"build_template": null,
"signature": "*args,**kwargs",
"datatype": null,
"output_schema": null,
"model_update_kwargs": {},
"predict_kwargs": {
"max_tokens": 1024,
"temperature": 0.8
},
"compute_kwargs": {},
"validation": null,
"metric_values": {},
"num_workers": 0,
"serve": false,
"trainer": null,
"deploy": false,
"model": "claude-2.1",
"max_batch_size": 8,
"client_kwargs": {},
"prompt": ""
},
"model:llm-vllm": {
"_path": "superduper_vllm.model.VllmCompletion",
"upstream": null,
"plugins": null,
"cache": true,
"build_variables": null,
"build_template": null,
"signature": "*args,**kwargs",
"datatype": null,
"output_schema": null,
"model_update_kwargs": {},
"predict_kwargs": {
"max_tokens": 1024,
"temperature": 0.8
},
"compute_kwargs": {
"num_gpus": 1
},
"validation": null,
"metric_values": {},
"num_workers": 0,
"serve": false,
"trainer": null,
"deploy": false,
"vllm_params": {
"model": "TheBloke/Mistral-7B-Instruct-v0.2-AWQ",
"gpu_memory_utilization": 0.7,
"max_model_len": 1024,
"quantization": "awq"
}
},
"model:llm": {
"_path": "superduper.components.model.ModelRouter",
"upstream": null,
"plugins": null,
"cache": true,
"build_variables": null,
"build_template": null,
"signature": "singleton",
"datatype": "str",
"signature": "*args,**kwargs",
"datatype": null,
"output_schema": null,
"model_update_kwargs": {},
"predict_kwargs": {},
Expand All @@ -300,9 +370,11 @@
"trainer": null,
"deploy": false,
"models": {
"openai": "?model:llm-openai"
"openai": "?model:llm-openai",
"anthropic": "?model:llm-anthropic",
"vllm": "?model:llm-vllm"
},
"model": "openai"
"model": "<var:llm_model>"
},
"plugin:plugin-utils_py": {
"_path": "superduper.components.plugin.Plugin",
Expand Down Expand Up @@ -339,8 +411,11 @@
"split_image_key": "<var:output_prefix>split_image__?(listener:split_image.uuid)"
},
"model:rag": {
"_object": "&:blob:58d51953d88d26b2a08024f471d6896b12063698",
"upstream": null,
"_object": "&:blob:cd225eb265434f4caa753cfd4c1b708d67dd4d7f",
"upstream": [
"?vector_index:vector-index",
"?vector_index:vector-index"
],
"plugins": null,
"cache": true,
"build_variables": null,
Expand All @@ -358,9 +433,9 @@
"trainer": null,
"deploy": false,
"llm_model": "?model:llm",
"vector_index_name": "vector-index",
"prompt_template": "<var:prompt_template>",
"processor": "?model:processor"
"processor": "?model:processor",
"vector_index": "?vector_index:vector-index"
},
"pdf-rag": {
"_path": "superduper.components.application.Application",
Expand Down Expand Up @@ -429,11 +504,11 @@
"_builds": {
"str": {
"_path": "superduper.components.schema.FieldType",
"uuid": "0ba486f949584e5c"
"uuid": "5f1ed2f02fc64e10"
},
"datatype:file": {
"_path": "superduper.components.datatype.FileType",
"uuid": "121dd11d21464f2b",
"uuid": "cb66f7db104b402b",
"upstream": null,
"plugins": null,
"cache": true,
Expand All @@ -442,7 +517,7 @@
},
"schema:sample_pdf_rag/schema": {
"_path": "superduper.components.schema.Schema",
"uuid": "a19e6f4b12914eb0",
"uuid": "d6afe58fc2cd4aaf",
"upstream": null,
"plugins": null,
"cache": true,
Expand All @@ -456,17 +531,17 @@
},
"dataset:sample_pdfs": {
"_path": "superduper.components.dataset.RemoteData",
"uuid": "a49980907c504046",
"uuid": "874a14a7699c4b2e",
"upstream": null,
"plugins": null,
"cache": true,
"build_variables": null,
"build_template": null,
"getter": "&:blob:a306e0ed6eecb5fd002ef781f9b24f8d73cc44cc"
"getter": "&:blob:0c27f6fbe3b4538b68f0ddf6a9d2a0f8be7d1684"
},
"table:sample_pdf_rag": {
"_path": "superduper.components.table.Table",
"uuid": "a517dec411304504",
"uuid": "c802c284fb7541b0",
"upstream": null,
"plugins": null,
"cache": true,
Expand Down

0 comments on commit dbc912e

Please sign in to comment.