diff --git a/.github/workflows/ml-langchain.yml b/.github/workflows/ml-langchain.yml index 40c32b26..139ffbe1 100644 --- a/.github/workflows/ml-langchain.yml +++ b/.github/workflows/ml-langchain.yml @@ -2,7 +2,6 @@ name: LangChain on: pull_request: - branches: ~ paths: - '.github/workflows/ml-langchain.yml' - 'topic/machine-learning/llm-langchain/**' @@ -38,7 +37,12 @@ jobs: matrix: os: [ 'ubuntu-latest' ] python-version: [ '3.10', '3.11', '3.12', '3.13' ] - cratedb-version: [ 'nightly' ] + cratedb-version: [ + 'nightly', + ] + cratedb-mcp-version: [ + 'main', + ] services: cratedb: @@ -48,6 +52,15 @@ jobs: - 5432:5432 env: CRATE_HEAP_SIZE: 4g + cratedb-mcp: + image: ghcr.io/crate/cratedb-mcp:${{ matrix.cratedb-mcp-version }} + ports: + - 8000:8000 + env: + CRATEDB_MCP_TRANSPORT: streamable-http + CRATEDB_MCP_HOST: 0.0.0.0 + CRATEDB_MCP_PORT: 8000 + CRATEDB_CLUSTER_URL: http://crate:crate@cratedb:4200/ env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} diff --git a/topic/machine-learning/llm-langchain/.gitignore b/topic/machine-learning/llm-langchain/.gitignore index 8d55c741..00d815fb 100644 --- a/topic/machine-learning/llm-langchain/.gitignore +++ b/topic/machine-learning/llm-langchain/.gitignore @@ -1,2 +1,3 @@ -*.sql .env +*.sql +!init.sql diff --git a/topic/machine-learning/llm-langchain/README.md b/topic/machine-learning/llm-langchain/README.md index 479a5f9f..d885dc9e 100644 --- a/topic/machine-learning/llm-langchain/README.md +++ b/topic/machine-learning/llm-langchain/README.md @@ -7,13 +7,18 @@ by language models. It provides a complete set of powerful and flexible components for building context-aware, reasoning applications. -Please refer to the [LangChain documentation] for further information. +[LangGraph] is a low-level orchestration framework for building, managing, +and deploying long-running, stateful agents. + +Please refer to the [LangChain documentation] and the [Building Ambient +Agents with LangGraph] academy material for further information. Common end-to-end use cases are: - Analyzing structured data - Chatbots and friends - Document question answering +- Text-to-SQL (talk to your data) LangChain provides standard, extendable interfaces and external integrations for the following modules, listed from least to most complex: @@ -79,17 +84,21 @@ and [CrateDB]. augmented generation (RAG) pipeline. To implement RAG we use the Python client driver for CrateDB and vector store support in LangChain. - - `cratedb_rag_customer_support_vertexai.ipynb` [![Open on GitHub](https://img.shields.io/badge/Open%20on-GitHub-lightgray?logo=GitHub)](cratedb_rag_customer_support_vertexai.ipynb)[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/crate/cratedb-examples/blob/main/topic/machine-learning/llm-langchain/cratedb_rag_customer_support_vertexai.ipynb) +- `cratedb_rag_customer_support_vertexai.ipynb` [![Open on GitHub](https://img.shields.io/badge/Open%20on-GitHub-lightgray?logo=GitHub)](cratedb_rag_customer_support_vertexai.ipynb)[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/crate/cratedb-examples/blob/main/topic/machine-learning/llm-langchain/cratedb_rag_customer_support_vertexai.ipynb) This example illustrates the RAG implementation of a customer support scenario. It is based on the previous notebook, and it illustrates how to use Vertex AI platform on Google Cloud for RAG pipeline. - +- `agent_with_mcp.py` [![Open on GitHub](https://img.shields.io/badge/Open%20on-GitHub-lightgray?logo=GitHub)](agent_with_mcp.py) + + This example illustrates how to use LangGraph and the `langchain-mcp-adapters` + package to implement an LLM agent that is connecting to the CrateDB MCP server. + The demo program performs Text-to-SQL on timeseries data stored in a CrateDB table. ## Install -In order to properly set up a sandbox environment to explore the example notebooks +To properly set up a sandbox environment to explore the example notebooks and programs, it is advised to create a Python virtualenv, and install the dependencies into it. In this way, it is easy to wipe your virtualenv and start from scratch anytime. @@ -126,6 +135,31 @@ a cloud-based development environment is up and running. As soon as your project easily move to a different cluster tier or scale horizontally. +### MCP + +# Provision database. +```bash +crash < init.sql +``` + +Spin up the [CrateDB MCP server], connecting it to CrateDB on localhost. +```bash +export CRATEDB_CLUSTER_URL=http://crate:crate@localhost:4200/ +export CRATEDB_MCP_TRANSPORT=streamable-http +uvx cratedb-mcp serve +``` + +Run the code using OpenAI API: +```bash +export OPENAI_API_KEY= +python agent_with_mcp.py +``` +Expected output: +```text +Query was: What is the average value for sensor 1? +Answer was: The average value for sensor 1 is approximately 17.03. If you need more details or a different calculation, let me know! +``` + ## Testing Run all tests. @@ -139,7 +173,7 @@ pytest -k document_loader pytest -k "notebook and loader" ``` -In order to force a regeneration of the Jupyter Notebook, use the +To force regeneration of Jupyter notebooks, use the `--nb-force-regen` option. ```shell pytest -k document_loader --nb-force-regen @@ -147,14 +181,17 @@ pytest -k document_loader --nb-force-regen [Agents]: https://python.langchain.com/docs/modules/agents/ +[Building Ambient Agents with LangGraph]: https://academy.langchain.com/courses/ambient-agents/ [Callbacks]: https://python.langchain.com/docs/modules/callbacks/ [Chains]: https://python.langchain.com/docs/modules/chains/ [CrateDB]: https://github.com/crate/crate [CrateDB Cloud]: https://console.cratedb.cloud +[CrateDB MCP server]: https://cratedb.com/docs/guide/integrate/mcp/cratedb-mcp.html [`FLOAT_VECTOR`]: https://crate.io/docs/crate/reference/en/master/general/ddl/data-types.html#float-vector [`KNN_MATCH`]: https://crate.io/docs/crate/reference/en/master/general/builtins/scalar-functions.html#scalar-knn-match [LangChain]: https://www.langchain.com/ [LangChain documentation]: https://python.langchain.com/ +[LangGraph]: https://langchain-ai.github.io/langgraph/ [Memory]: https://python.langchain.com/docs/modules/memory/ [Model I/O]: https://python.langchain.com/docs/modules/model_io/ [Retrieval]: https://python.langchain.com/docs/modules/data_connection/ diff --git a/topic/machine-learning/llm-langchain/agent_with_mcp.py b/topic/machine-learning/llm-langchain/agent_with_mcp.py new file mode 100644 index 00000000..0756160f --- /dev/null +++ b/topic/machine-learning/llm-langchain/agent_with_mcp.py @@ -0,0 +1,63 @@ +""" +Exercise LangChain/LangGraph with the CrateDB MCP server. + +## Synopsis + +# Install prerequisites. +pip install -U -r requirements.txt + +# Start database. +docker run --rm -it --publish=4200:4200 crate/crate:nightly + +# Provision database. +crash < init.sql + +# Start MCP server. +export CRATEDB_MCP_TRANSPORT=streamable-http +export CRATEDB_MCP_HOST=0.0.0.0 +export CRATEDB_MCP_PORT=8000 +export CRATEDB_CLUSTER_URL=http://crate:crate@localhost:4200/ +docker run --rm -it --network=host --publish=8000:8000 ghcr.io/crate/cratedb-mcp:pr-50 + +# Run program. +export OPENAI_API_KEY= +python agent_with_mcp.py +""" +import asyncio +import os + +from cratedb_about.instruction import GeneralInstructions +from langchain_mcp_adapters.client import MultiServerMCPClient +from langgraph.prebuilt import create_react_agent + + +async def amain(): + client = MultiServerMCPClient( + { + "cratedb": { + "transport": "streamable_http", + "url": "http://localhost:8000/mcp/" + }, + } + ) + tools = await client.get_tools() + agent = create_react_agent( + model="openai:gpt-4.1", + tools=tools, + prompt=GeneralInstructions().render(), + ) + + QUERY_STR = os.getenv("DEMO_QUERY", "What is the average value for sensor 1?") + response = await agent.ainvoke({"messages": QUERY_STR}) + answer = response["messages"][-1].content + + print("Query was:", QUERY_STR) + print("Answer was:", answer) + + +def main(): + asyncio.run(amain()) + + +if __name__ == "__main__": + main() diff --git a/topic/machine-learning/llm-langchain/init.sql b/topic/machine-learning/llm-langchain/init.sql new file mode 100644 index 00000000..811b16dd --- /dev/null +++ b/topic/machine-learning/llm-langchain/init.sql @@ -0,0 +1,25 @@ +DROP TABLE IF EXISTS time_series_data; + +CREATE TABLE IF NOT EXISTS time_series_data ( + timestamp TIMESTAMP, + value DOUBLE, + location STRING, + sensor_id INT +); + +INSERT INTO time_series_data (timestamp, value, location, sensor_id) +VALUES + ('2023-09-14T00:00:00', 10.5, 'Sensor A', 1), + ('2023-09-14T01:00:00', 15.2, 'Sensor A', 1), + ('2023-09-14T02:00:00', 18.9, 'Sensor A', 1), + ('2023-09-14T03:00:00', 12.7, 'Sensor B', 2), + ('2023-09-14T04:00:00', 17.3, 'Sensor B', 2), + ('2023-09-14T05:00:00', 20.1, 'Sensor B', 2), + ('2023-09-14T06:00:00', 22.5, 'Sensor A', 1), + ('2023-09-14T07:00:00', 18.3, 'Sensor A', 1), + ('2023-09-14T08:00:00', 16.8, 'Sensor A', 1), + ('2023-09-14T09:00:00', 14.6, 'Sensor B', 2), + ('2023-09-14T10:00:00', 13.2, 'Sensor B', 2), + ('2023-09-14T11:00:00', 11.7, 'Sensor B', 2); + +REFRESH TABLE time_series_data; diff --git a/topic/machine-learning/llm-langchain/requirements.txt b/topic/machine-learning/llm-langchain/requirements.txt index b69d2ff0..4cd40961 100644 --- a/topic/machine-learning/llm-langchain/requirements.txt +++ b/topic/machine-learning/llm-langchain/requirements.txt @@ -1,9 +1,12 @@ crash +cratedb-about==0.0.6 google-cloud-aiplatform<2 langchain-cratedb<0.1.2 +langchain-mcp-adapters<0.2 langchain-google-vertexai<3 langchain-openai<0.4 langchain-text-splitters<0.4 +langgraph<0.6 pueblo[cli,nlp]>=0.0.10 pypdf<6 python-dotenv<2 diff --git a/topic/machine-learning/llm-langchain/test.py b/topic/machine-learning/llm-langchain/test.py index 651ba50b..215368ec 100644 --- a/topic/machine-learning/llm-langchain/test.py +++ b/topic/machine-learning/llm-langchain/test.py @@ -29,6 +29,14 @@ def reset_database(cratedb): time.sleep(0.01) +@pytest.fixture(scope="function", autouse=True) +def init_database(cratedb): + """ + Initialize database. + """ + cratedb.run_sql((HERE / "init.sql").read_text()) + + def pytest_generate_tests(metafunc): """ Generate pytest test case per Jupyter Notebook.