From 14cf5baef5b71c646f882ef3e7522e1a5364acde Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Sun, 20 Jul 2025 12:06:52 +0200 Subject: [PATCH 1/5] LangChain: Add example using MCP --- .github/workflows/ml-langchain.yml | 16 +++++- .../machine-learning/llm-langchain/.gitignore | 3 +- .../machine-learning/llm-langchain/README.md | 42 +++++++++++++-- .../llm-langchain/agent_with_mcp.py | 54 +++++++++++++++++++ topic/machine-learning/llm-langchain/init.sql | 25 +++++++++ .../llm-langchain/requirements.txt | 2 + topic/machine-learning/llm-langchain/test.py | 8 +++ 7 files changed, 143 insertions(+), 7 deletions(-) create mode 100644 topic/machine-learning/llm-langchain/agent_with_mcp.py create mode 100644 topic/machine-learning/llm-langchain/init.sql diff --git a/.github/workflows/ml-langchain.yml b/.github/workflows/ml-langchain.yml index 40c32b26..3d7cec01 100644 --- a/.github/workflows/ml-langchain.yml +++ b/.github/workflows/ml-langchain.yml @@ -38,7 +38,12 @@ jobs: matrix: os: [ 'ubuntu-latest' ] python-version: [ '3.10', '3.11', '3.12', '3.13' ] - cratedb-version: [ 'nightly' ] + cratedb-version: [ + 'nightly', + ] + cratedb-mcp-version: [ + 'pr-50', + ] services: cratedb: @@ -48,6 +53,15 @@ jobs: - 5432:5432 env: CRATE_HEAP_SIZE: 4g + cratedb-mcp: + image: ghcr.io/crate/cratedb-mcp:${{ matrix.cratedb-mcp-version }} + ports: + - 8000:8000 + env: + CRATEDB_MCP_TRANSPORT: streamable-http + CRATEDB_MCP_HOST: 0.0.0.0 + CRATEDB_MCP_PORT: 8000 + CRATEDB_CLUSTER_URL: http://crate:crate@cratedb:4200/ env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} diff --git a/topic/machine-learning/llm-langchain/.gitignore b/topic/machine-learning/llm-langchain/.gitignore index 8d55c741..00d815fb 100644 --- a/topic/machine-learning/llm-langchain/.gitignore +++ b/topic/machine-learning/llm-langchain/.gitignore @@ -1,2 +1,3 @@ -*.sql .env +*.sql +!init.sql diff --git a/topic/machine-learning/llm-langchain/README.md b/topic/machine-learning/llm-langchain/README.md index 479a5f9f..0dfe40e0 100644 --- a/topic/machine-learning/llm-langchain/README.md +++ b/topic/machine-learning/llm-langchain/README.md @@ -7,13 +7,18 @@ by language models. It provides a complete set of powerful and flexible components for building context-aware, reasoning applications. -Please refer to the [LangChain documentation] for further information. +[LangGraph] is a low-level orchestration framework for building, managing, +and deploying long-running, stateful agents. + +Please refer to the [LangChain documentation] and the [Building Ambient +Agents with LangGraph] academy material for further information. Common end-to-end use cases are: - Analyzing structured data - Chatbots and friends - Document question answering +- Text-to-SQL (talk to your data) LangChain provides standard, extendable interfaces and external integrations for the following modules, listed from least to most complex: @@ -79,17 +84,21 @@ and [CrateDB]. augmented generation (RAG) pipeline. To implement RAG we use the Python client driver for CrateDB and vector store support in LangChain. - - `cratedb_rag_customer_support_vertexai.ipynb` [![Open on GitHub](https://img.shields.io/badge/Open%20on-GitHub-lightgray?logo=GitHub)](cratedb_rag_customer_support_vertexai.ipynb)[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/crate/cratedb-examples/blob/main/topic/machine-learning/llm-langchain/cratedb_rag_customer_support_vertexai.ipynb) +- `cratedb_rag_customer_support_vertexai.ipynb` [![Open on GitHub](https://img.shields.io/badge/Open%20on-GitHub-lightgray?logo=GitHub)](cratedb_rag_customer_support_vertexai.ipynb)[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/crate/cratedb-examples/blob/main/topic/machine-learning/llm-langchain/cratedb_rag_customer_support_vertexai.ipynb) This example illustrates the RAG implementation of a customer support scenario. It is based on the previous notebook, and it illustrates how to use Vertex AI platform on Google Cloud for RAG pipeline. - +- `agent_with_mcp.py` + + This example illustrates how to use LangGraph and the `langchain-mcp-adapters` + package to implement an LLM agent that is connecting to the CrateDB MCP server. + The demo program performs Text-to-SQL on timeseries data stored in a CrateDB table. ## Install -In order to properly set up a sandbox environment to explore the example notebooks +To properly set up a sandbox environment to explore the example notebooks and programs, it is advised to create a Python virtualenv, and install the dependencies into it. In this way, it is easy to wipe your virtualenv and start from scratch anytime. @@ -126,6 +135,26 @@ a cloud-based development environment is up and running. As soon as your project easily move to a different cluster tier or scale horizontally. +### MCP + +Spin up the [CrateDB MCP server], connecting it to CrateDB on localhost. +```bash +export CRATEDB_CLUSTER_URL=http://crate:crate@localhost:4200/ +export CRATEDB_MCP_TRANSPORT=streamable-http +uvx cratedb-mcp serve +``` + +Run the code using OpenAI API: +```bash +export OPENAI_API_KEY= +python agent_with_mcp.py +``` +Expected output: +```text +Query was: What is the average value for sensor 1? +Answer was: The average value for sensor 1 is approximately 17.03. If you need more details or a different calculation, let me know! +``` + ## Testing Run all tests. @@ -139,7 +168,7 @@ pytest -k document_loader pytest -k "notebook and loader" ``` -In order to force a regeneration of the Jupyter Notebook, use the +To force a regeneration of the Jupyter Notebook, use the `--nb-force-regen` option. ```shell pytest -k document_loader --nb-force-regen @@ -147,14 +176,17 @@ pytest -k document_loader --nb-force-regen [Agents]: https://python.langchain.com/docs/modules/agents/ +[Building Ambient Agents with LangGraph]: https://academy.langchain.com/courses/ambient-agents/ [Callbacks]: https://python.langchain.com/docs/modules/callbacks/ [Chains]: https://python.langchain.com/docs/modules/chains/ [CrateDB]: https://github.com/crate/crate [CrateDB Cloud]: https://console.cratedb.cloud +[CrateDB MCP server]: https://cratedb.com/docs/guide/integrate/mcp/cratedb-mcp.html [`FLOAT_VECTOR`]: https://crate.io/docs/crate/reference/en/master/general/ddl/data-types.html#float-vector [`KNN_MATCH`]: https://crate.io/docs/crate/reference/en/master/general/builtins/scalar-functions.html#scalar-knn-match [LangChain]: https://www.langchain.com/ [LangChain documentation]: https://python.langchain.com/ +[LangGraph]: https://langchain-ai.github.io/langgraph/ [Memory]: https://python.langchain.com/docs/modules/memory/ [Model I/O]: https://python.langchain.com/docs/modules/model_io/ [Retrieval]: https://python.langchain.com/docs/modules/data_connection/ diff --git a/topic/machine-learning/llm-langchain/agent_with_mcp.py b/topic/machine-learning/llm-langchain/agent_with_mcp.py new file mode 100644 index 00000000..0aaf85d5 --- /dev/null +++ b/topic/machine-learning/llm-langchain/agent_with_mcp.py @@ -0,0 +1,54 @@ +""" +Exercise LangChain/LangGraph with the CrateDB MCP server. + +## Synopsis + +# Install prerequisites. +pip install -U -r requirements.txt + +# Start database. +docker run --rm -it --publish=4200:4200 crate/crate:nightly + +# Start MCP server. +export CRATEDB_MCP_TRANSPORT=streamable-http +export CRATEDB_MCP_HOST=0.0.0.0 +export CRATEDB_MCP_PORT=8000 +export CRATEDB_CLUSTER_URL=http://crate:crate@localhost:4200/ +docker run --rm -it --network=host --publish=8000:8000 ghcr.io/crate/cratedb-mcp:pr-50 + +# Run program. +export OPENAI_API_KEY= +python agent_with_mcp.py +""" +import asyncio + +from langchain_mcp_adapters.client import MultiServerMCPClient +from langgraph.prebuilt import create_react_agent + + +async def amain(): + client = MultiServerMCPClient( + { + "cratedb": { + "transport": "streamable_http", + "url": "http://localhost:8000/mcp/" + }, + } + ) + tools = await client.get_tools() + agent = create_react_agent("openai:gpt-4.1", tools) + + QUERY_STR = "What is the average value for sensor 1?" + response = await agent.ainvoke({"messages": QUERY_STR}) + answer = response["messages"][-1].content + + print("Query was:", QUERY_STR) + print("Answer was:", answer) + + +def main(): + asyncio.run(amain()) + + +if __name__ == "__main__": + main() diff --git a/topic/machine-learning/llm-langchain/init.sql b/topic/machine-learning/llm-langchain/init.sql new file mode 100644 index 00000000..811b16dd --- /dev/null +++ b/topic/machine-learning/llm-langchain/init.sql @@ -0,0 +1,25 @@ +DROP TABLE IF EXISTS time_series_data; + +CREATE TABLE IF NOT EXISTS time_series_data ( + timestamp TIMESTAMP, + value DOUBLE, + location STRING, + sensor_id INT +); + +INSERT INTO time_series_data (timestamp, value, location, sensor_id) +VALUES + ('2023-09-14T00:00:00', 10.5, 'Sensor A', 1), + ('2023-09-14T01:00:00', 15.2, 'Sensor A', 1), + ('2023-09-14T02:00:00', 18.9, 'Sensor A', 1), + ('2023-09-14T03:00:00', 12.7, 'Sensor B', 2), + ('2023-09-14T04:00:00', 17.3, 'Sensor B', 2), + ('2023-09-14T05:00:00', 20.1, 'Sensor B', 2), + ('2023-09-14T06:00:00', 22.5, 'Sensor A', 1), + ('2023-09-14T07:00:00', 18.3, 'Sensor A', 1), + ('2023-09-14T08:00:00', 16.8, 'Sensor A', 1), + ('2023-09-14T09:00:00', 14.6, 'Sensor B', 2), + ('2023-09-14T10:00:00', 13.2, 'Sensor B', 2), + ('2023-09-14T11:00:00', 11.7, 'Sensor B', 2); + +REFRESH TABLE time_series_data; diff --git a/topic/machine-learning/llm-langchain/requirements.txt b/topic/machine-learning/llm-langchain/requirements.txt index b69d2ff0..c7b31eb8 100644 --- a/topic/machine-learning/llm-langchain/requirements.txt +++ b/topic/machine-learning/llm-langchain/requirements.txt @@ -1,9 +1,11 @@ crash google-cloud-aiplatform<2 langchain-cratedb<0.1.2 +langchain-mcp-adapters<0.2 langchain-google-vertexai<3 langchain-openai<0.4 langchain-text-splitters<0.4 +langgraph<0.6 pueblo[cli,nlp]>=0.0.10 pypdf<6 python-dotenv<2 diff --git a/topic/machine-learning/llm-langchain/test.py b/topic/machine-learning/llm-langchain/test.py index 651ba50b..215368ec 100644 --- a/topic/machine-learning/llm-langchain/test.py +++ b/topic/machine-learning/llm-langchain/test.py @@ -29,6 +29,14 @@ def reset_database(cratedb): time.sleep(0.01) +@pytest.fixture(scope="function", autouse=True) +def init_database(cratedb): + """ + Initialize database. + """ + cratedb.run_sql((HERE / "init.sql").read_text()) + + def pytest_generate_tests(metafunc): """ Generate pytest test case per Jupyter Notebook. From 046d8f7770c421bbd59ec3d2f83745186aadedc6 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Mon, 21 Jul 2025 20:39:26 +0200 Subject: [PATCH 2/5] LangChain: Use GA OCI image (head) for `cratedb-mcp` --- .github/workflows/ml-langchain.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ml-langchain.yml b/.github/workflows/ml-langchain.yml index 3d7cec01..7e152506 100644 --- a/.github/workflows/ml-langchain.yml +++ b/.github/workflows/ml-langchain.yml @@ -42,7 +42,7 @@ jobs: 'nightly', ] cratedb-mcp-version: [ - 'pr-50', + 'main', ] services: From 45f070475eacb8da2645a9aac729f5482e8859b8 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Mon, 21 Jul 2025 20:40:46 +0200 Subject: [PATCH 3/5] LangChain: Add missing provisioning notes using `init.sql` --- topic/machine-learning/llm-langchain/README.md | 5 +++++ topic/machine-learning/llm-langchain/agent_with_mcp.py | 3 +++ 2 files changed, 8 insertions(+) diff --git a/topic/machine-learning/llm-langchain/README.md b/topic/machine-learning/llm-langchain/README.md index 0dfe40e0..2519611a 100644 --- a/topic/machine-learning/llm-langchain/README.md +++ b/topic/machine-learning/llm-langchain/README.md @@ -137,6 +137,11 @@ easily move to a different cluster tier or scale horizontally. ### MCP +# Provision database. +```bash +crash < init.sql +``` + Spin up the [CrateDB MCP server], connecting it to CrateDB on localhost. ```bash export CRATEDB_CLUSTER_URL=http://crate:crate@localhost:4200/ diff --git a/topic/machine-learning/llm-langchain/agent_with_mcp.py b/topic/machine-learning/llm-langchain/agent_with_mcp.py index 0aaf85d5..05133274 100644 --- a/topic/machine-learning/llm-langchain/agent_with_mcp.py +++ b/topic/machine-learning/llm-langchain/agent_with_mcp.py @@ -9,6 +9,9 @@ # Start database. docker run --rm -it --publish=4200:4200 crate/crate:nightly +# Provision database. +crash < init.sql + # Start MCP server. export CRATEDB_MCP_TRANSPORT=streamable-http export CRATEDB_MCP_HOST=0.0.0.0 From 5e0a338d4bb224bd48e9322c1be5f7bee1839e06 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Mon, 21 Jul 2025 20:42:50 +0200 Subject: [PATCH 4/5] LangChain: Use general CrateDB usage instructions from `cratedb-about` --- topic/machine-learning/llm-langchain/agent_with_mcp.py | 7 ++++++- topic/machine-learning/llm-langchain/requirements.txt | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/topic/machine-learning/llm-langchain/agent_with_mcp.py b/topic/machine-learning/llm-langchain/agent_with_mcp.py index 05133274..b3a52ce5 100644 --- a/topic/machine-learning/llm-langchain/agent_with_mcp.py +++ b/topic/machine-learning/llm-langchain/agent_with_mcp.py @@ -25,6 +25,7 @@ """ import asyncio +from cratedb_about.instruction import GeneralInstructions from langchain_mcp_adapters.client import MultiServerMCPClient from langgraph.prebuilt import create_react_agent @@ -39,7 +40,11 @@ async def amain(): } ) tools = await client.get_tools() - agent = create_react_agent("openai:gpt-4.1", tools) + agent = create_react_agent( + model="openai:gpt-4.1", + tools=tools, + prompt=GeneralInstructions().render(), + ) QUERY_STR = "What is the average value for sensor 1?" response = await agent.ainvoke({"messages": QUERY_STR}) diff --git a/topic/machine-learning/llm-langchain/requirements.txt b/topic/machine-learning/llm-langchain/requirements.txt index c7b31eb8..4cd40961 100644 --- a/topic/machine-learning/llm-langchain/requirements.txt +++ b/topic/machine-learning/llm-langchain/requirements.txt @@ -1,4 +1,5 @@ crash +cratedb-about==0.0.6 google-cloud-aiplatform<2 langchain-cratedb<0.1.2 langchain-mcp-adapters<0.2 From 57c2235b86e91c156d7584904d3d2e7fd738c042 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Mon, 21 Jul 2025 20:48:30 +0200 Subject: [PATCH 5/5] LangChain: Address suggestions by CodeRabbit --- .github/workflows/ml-langchain.yml | 1 - topic/machine-learning/llm-langchain/README.md | 4 ++-- topic/machine-learning/llm-langchain/agent_with_mcp.py | 3 ++- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ml-langchain.yml b/.github/workflows/ml-langchain.yml index 7e152506..139ffbe1 100644 --- a/.github/workflows/ml-langchain.yml +++ b/.github/workflows/ml-langchain.yml @@ -2,7 +2,6 @@ name: LangChain on: pull_request: - branches: ~ paths: - '.github/workflows/ml-langchain.yml' - 'topic/machine-learning/llm-langchain/**' diff --git a/topic/machine-learning/llm-langchain/README.md b/topic/machine-learning/llm-langchain/README.md index 2519611a..d885dc9e 100644 --- a/topic/machine-learning/llm-langchain/README.md +++ b/topic/machine-learning/llm-langchain/README.md @@ -90,7 +90,7 @@ and [CrateDB]. It is based on the previous notebook, and it illustrates how to use Vertex AI platform on Google Cloud for RAG pipeline. -- `agent_with_mcp.py` +- `agent_with_mcp.py` [![Open on GitHub](https://img.shields.io/badge/Open%20on-GitHub-lightgray?logo=GitHub)](agent_with_mcp.py) This example illustrates how to use LangGraph and the `langchain-mcp-adapters` package to implement an LLM agent that is connecting to the CrateDB MCP server. @@ -173,7 +173,7 @@ pytest -k document_loader pytest -k "notebook and loader" ``` -To force a regeneration of the Jupyter Notebook, use the +To force regeneration of Jupyter notebooks, use the `--nb-force-regen` option. ```shell pytest -k document_loader --nb-force-regen diff --git a/topic/machine-learning/llm-langchain/agent_with_mcp.py b/topic/machine-learning/llm-langchain/agent_with_mcp.py index b3a52ce5..0756160f 100644 --- a/topic/machine-learning/llm-langchain/agent_with_mcp.py +++ b/topic/machine-learning/llm-langchain/agent_with_mcp.py @@ -24,6 +24,7 @@ python agent_with_mcp.py """ import asyncio +import os from cratedb_about.instruction import GeneralInstructions from langchain_mcp_adapters.client import MultiServerMCPClient @@ -46,7 +47,7 @@ async def amain(): prompt=GeneralInstructions().render(), ) - QUERY_STR = "What is the average value for sensor 1?" + QUERY_STR = os.getenv("DEMO_QUERY", "What is the average value for sensor 1?") response = await agent.ainvoke({"messages": QUERY_STR}) answer = response["messages"][-1].content