From db60467e79636c5138d8f425e81db584fbc32265 Mon Sep 17 00:00:00 2001
From: bastienchassagnol <bastien_chassagnol@laposte.net>
Date: Tue, 10 Dec 2024 13:52:35 +0100
Subject: [PATCH 1/9] add the tools `tl` modules to API agent __init__.py

---
 biochatter/api_agent/__init__.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/biochatter/api_agent/__init__.py b/biochatter/api_agent/__init__.py
index 21f0fca9..2fae423f 100644
--- a/biochatter/api_agent/__init__.py
+++ b/biochatter/api_agent/__init__.py
@@ -12,6 +12,7 @@
     BlastQueryParameters,
 )
 from .oncokb import OncoKBFetcher, OncoKBInterpreter, OncoKBQueryBuilder
+from .scanpy_tl import ScanpyTlQueryBuilder, ScanpyTlQueryFetcher, ScanpyTlQueryInterpreter
 
 __all__ = [
     "BaseFetcher",
@@ -28,4 +29,7 @@
     "BioToolsInterpreter",
     "BioToolsQueryBuilder",
     "APIAgent",
+    "ScanpyTlQueryBuilder", 
+    "ScanpyTlQueryFetcher", 
+    "ScanpyTlQueryInterpreter",
 ]

From 6abb3a930b9cce6a68ef561e528d57640d7252b8 Mon Sep 17 00:00:00 2001
From: bastienchassagnol <bastien_chassagnol@laposte.net>
Date: Tue, 10 Dec 2024 14:02:11 +0100
Subject: [PATCH 2/9] add scanpy_tl module with general description

---
 biochatter/api_agent/scanpy_tl.py | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 biochatter/api_agent/scanpy_tl.py

diff --git a/biochatter/api_agent/scanpy_tl.py b/biochatter/api_agent/scanpy_tl.py
new file mode 100644
index 00000000..b3edef65
--- /dev/null
+++ b/biochatter/api_agent/scanpy_tl.py
@@ -0,0 +1,2 @@
+# -*- coding: utf-8 -*-
+"""Module for interacting with the `scanpy` API for data transformation tools (`tl`)."""

From 9044563a60c4e41cecbb8904db4e60c438a5586c Mon Sep 17 00:00:00 2001
From: bastienchassagnol <bastien_chassagnol@laposte.net>
Date: Wed, 11 Dec 2024 09:56:11 +0100
Subject: [PATCH 3/9] Change pymilvus dependency in the pyproject.toml from the
 fixed version 2.2.8 to superior or equal to 2.2.8. Indeed, it appears that
 the grpcio 1.53.0 external dependency of pymilvus version 2.2.8 is not
 compatible with Windows OS 11 and Python version 2.12.3, whatever it is the
 wheel or source version. Running pytest does not yield any errors, beyond
 raising deprecated warnings

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0230c6cf..7ba7f27f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,7 +35,7 @@ python = ">=3.10,<3.13"
 langchain = "^0.2.5"
 openai = "^1.1.0"
 pymupdf = "^1.22.3"
-pymilvus = "2.2.8"
+pymilvus = ">=2.2.8"
 nltk = "^3.8.1"
 redis = "^4.5.5"
 retry = "^0.9.2"

From a46df7a753d7bd74eae8dc7a952b39bc3bdb9012 Mon Sep 17 00:00:00 2001
From: mengerj <jonatan.menger@gmx.de>
Date: Wed, 11 Dec 2024 11:16:12 +0100
Subject: [PATCH 4/9] api agent for scnapy tl using the
 generate_pydantic_class_from_module method. Currently scanpy is imported when
 ScanpyTLQueryBuilder.parametrise_query is called.

---
 biochatter/api_agent/scanpy_tl.py | 144 ++++++++++++++++++++++++++++++
 1 file changed, 144 insertions(+)
 create mode 100644 biochatter/api_agent/scanpy_tl.py

diff --git a/biochatter/api_agent/scanpy_tl.py b/biochatter/api_agent/scanpy_tl.py
new file mode 100644
index 00000000..1cc175ca
--- /dev/null
+++ b/biochatter/api_agent/scanpy_tl.py
@@ -0,0 +1,144 @@
+"""Module for interacting with the bio.tools API."""
+from collections.abc import Callable
+from typing import TYPE_CHECKING
+
+import requests
+from langchain.chains.openai_functions import create_structured_output_runnable
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.pydantic_v1 import BaseModel, Field
+from langchain_openai import ChatOpenAI
+from langchain_core.output_parsers import PydanticToolsParser
+if TYPE_CHECKING:
+    from biochatter.llm_connect import Conversation
+
+from .abc import BaseFetcher, BaseInterpreter, BaseQueryBuilder
+from .generate_pydantic_classes_from_module import generate_pydantic_classes
+
+SCANPY_QUERY_PROMPT = """
+You are a world class algorithm for creating queries in structured formats. Your task is to use the scanpy python package
+to provide the user with the appropriate function call to answer their question. You focus on the scanpy.tl module, which has 
+the following overview:
+Any transformation of the data matrix that is not *preprocessing*. In contrast to a *preprocessing* function, a *tool* usually adds an easily interpretable annotation to the data matrix, which can then be visualized with a corresponding plotting function.
+
+### Embeddings
+
+```{eval-rst}
+.. autosummary::
+   :nosignatures:
+   :toctree: ../generated/
+
+   pp.pca
+   tl.tsne
+   tl.umap
+   tl.draw_graph
+   tl.diffmap
+```
+
+Compute densities on embeddings.
+
+```{eval-rst}
+.. autosummary::
+   :nosignatures:
+   :toctree: ../generated/
+
+   tl.embedding_density
+```
+
+### Clustering and trajectory inference
+
+```{eval-rst}
+.. autosummary::
+   :nosignatures:
+   :toctree: ../generated/
+
+   tl.leiden
+   tl.louvain
+   tl.dendrogram
+   tl.dpt
+   tl.paga
+```
+
+### Data integration
+
+```{eval-rst}
+.. autosummary::
+   :nosignatures:
+   :toctree: ../generated/
+
+   tl.ingest
+```
+
+### Marker genes
+
+```{eval-rst}
+.. autosummary::
+   :nosignatures:
+   :toctree: ../generated/
+
+   tl.rank_genes_groups
+   tl.filter_rank_genes_groups
+   tl.marker_gene_overlap
+```
+
+### Gene scores, Cell cycle
+
+```{eval-rst}
+.. autosummary::
+   :nosignatures:
+   :toctree: ../generated/
+
+   tl.score_genes
+   tl.score_genes_cell_cycle
+```
+
+### Simulations
+
+```{eval-rst}
+.. autosummary::
+   :nosignatures:
+   :toctree: ../generated/
+
+   tl.sim
+
+```
+"""
+class ScanpyTLQueryBuilder(BaseQueryBuilder):
+    """A class for building an ScanpyTLQuery object."""
+    def parameterise_query(
+        self,
+        question: str,
+        conversation: "Conversation",
+    ) -> ScanpyTLQueryParameters:
+        """Generate an ScanpyTLQuery object.
+
+        Generate a ScanpyTLQuery object based on the given question, prompt,
+        and BioChatter conversation. Uses a Pydantic model to define the API
+        fields. Using langchains .bind_tools method to allow the LLM to parameterise
+        the function call, based on the functions available in thescanpy.tl module.
+
+        Args:
+        ----
+            question (str): The question to be answered.
+
+            conversation: The conversation object used for parameterising the
+                BioToolsQuery.
+
+        Returns:
+        -------
+            BioToolsQueryParameters: the parameterised query object (Pydantic
+                model)
+
+        """
+        import scanpy as sc
+        module = sc.tl
+        generated_classes = generate_pydantic_classes(module)
+        llm = conversation.chat
+        llm_with_tools = llm.bind_tools(generated_classes)
+        query = [
+	        ("system", "You're an expert data scientist"), 
+            ("human", {question}),
+        ]
+        chain = llm_with_tools | PydanticToolsParser(tools=generated_classes)
+        result = chain.invoke(query)
+        return result
\ No newline at end of file

From d4f318425c12c8571635dc846ce256aea0f85876 Mon Sep 17 00:00:00 2001
From: mengerj <jonatan.menger@gmx.de>
Date: Wed, 11 Dec 2024 11:17:51 +0100
Subject: [PATCH 5/9] generic method to generate pydantic classes for functions
 in a module. Only includes functions which dont start with "_"

---
 .../generate_pydantic_classes_from_module.py  | 78 +++++++++++++++++++
 1 file changed, 78 insertions(+)
 create mode 100644 biochatter/api_agent/generate_pydantic_classes_from_module.py

diff --git a/biochatter/api_agent/generate_pydantic_classes_from_module.py b/biochatter/api_agent/generate_pydantic_classes_from_module.py
new file mode 100644
index 00000000..25bf74ea
--- /dev/null
+++ b/biochatter/api_agent/generate_pydantic_classes_from_module.py
@@ -0,0 +1,78 @@
+import inspect
+from typing import Any, Dict, Optional, Type
+from types import ModuleType
+from docstring_parser import parse
+from langchain_core.pydantic_v1 import BaseModel, Field, create_model
+from . import generate_pydantic_classes_from_modules
+
+def generate_pydantic_classes(module: ModuleType) -> list[Type[BaseModel]]:
+    """
+    Generate Pydantic classes for each callable (function/method) in a given module.
+    
+    Extracts parameters from docstrings using docstring-parser. Each generated class
+    has fields corresponding to the parameters of the function. If a parameter name 
+    conflicts with BaseModel attributes, it is aliased.
+    
+    Parameters
+    ----------
+    module : ModuleType
+        The Python module from which to extract functions and generate models.
+        
+    Returns
+    -------
+    Dict[str, Type[BaseModel]]
+        A dictionary mapping function names to their corresponding Pydantic model classes.
+    """
+    base_attributes = set(dir(BaseModel))
+    classes_list = []
+
+    # Iterate over all callables in the module
+    for name, func in inspect.getmembers(module, inspect.isfunction):
+        # skip if method starts with _
+        if name.startswith("_"):
+            continue
+        doc = inspect.getdoc(func)
+        if not doc:
+            # If no docstring, still create a model with no fields
+            TLParametersModel = create_model(f"{name}")
+            classes_list.append(TLParametersModel)
+            continue
+
+        parsed_doc = parse(doc)
+        
+        # Collect parameter descriptions
+        param_info = {}
+        for p in parsed_doc.params:
+            if p.arg_name not in param_info:
+                param_info[p.arg_name] = p.description or "No description available."
+
+        # Prepare fields for create_model
+        fields = {}
+        alias_map = {}
+
+        for param_name, param_desc in param_info.items():
+            field_kwargs = {"default": None, "description": param_desc}
+            field_name = param_name
+
+            # Alias if conflicts with BaseModel attributes
+            if param_name in base_attributes:
+                aliased_name = param_name + "_param"
+                field_kwargs["alias"] = param_name
+                alias_map[aliased_name] = param_name
+                field_name = aliased_name
+
+            # Without type info, default to Optional[str]
+            fields[field_name] = (Optional[str], Field(**field_kwargs))
+
+        # Dynamically create the model for this function
+        TLParametersModel = create_model(name, **fields)
+        classes_list.append(TLParametersModel)
+
+    return classes_list
+
+
+# Example usage:
+#import scanpy as sc
+#generated_classes = generate_pydantic_classes(sc.tl)
+#for func in generated_classes:  
+#    print(func.schema())
\ No newline at end of file

From 7b4df80d1300ee8c5988ecf7f13d724e7e9e113b Mon Sep 17 00:00:00 2001
From: mengerj <jonatan.menger@gmx.de>
Date: Wed, 11 Dec 2024 13:31:40 +0100
Subject: [PATCH 6/9] working progress on QueryBuilder and its unit tests

---
 biochatter/api_agent/__init__.py              |  2 +
 .../generate_pydantic_classes_from_module.py  |  1 -
 biochatter/api_agent/scanpy_tl.py             | 14 +++-
 pyproject.toml                                |  2 +
 test/test_api_agent.py                        | 79 +++++++++++++++++++
 5 files changed, 95 insertions(+), 3 deletions(-)

diff --git a/biochatter/api_agent/__init__.py b/biochatter/api_agent/__init__.py
index 21f0fca9..fc1a3fa4 100644
--- a/biochatter/api_agent/__init__.py
+++ b/biochatter/api_agent/__init__.py
@@ -12,6 +12,7 @@
     BlastQueryParameters,
 )
 from .oncokb import OncoKBFetcher, OncoKBInterpreter, OncoKBQueryBuilder
+from .scanpy_tl import ScanpyTLQueryBuilder
 
 __all__ = [
     "BaseFetcher",
@@ -28,4 +29,5 @@
     "BioToolsInterpreter",
     "BioToolsQueryBuilder",
     "APIAgent",
+    "ScanpyTLQueryBuilder",
 ]
diff --git a/biochatter/api_agent/generate_pydantic_classes_from_module.py b/biochatter/api_agent/generate_pydantic_classes_from_module.py
index 25bf74ea..5a60f959 100644
--- a/biochatter/api_agent/generate_pydantic_classes_from_module.py
+++ b/biochatter/api_agent/generate_pydantic_classes_from_module.py
@@ -3,7 +3,6 @@
 from types import ModuleType
 from docstring_parser import parse
 from langchain_core.pydantic_v1 import BaseModel, Field, create_model
-from . import generate_pydantic_classes_from_modules
 
 def generate_pydantic_classes(module: ModuleType) -> list[Type[BaseModel]]:
     """
diff --git a/biochatter/api_agent/scanpy_tl.py b/biochatter/api_agent/scanpy_tl.py
index 1cc175ca..fdd21c3c 100644
--- a/biochatter/api_agent/scanpy_tl.py
+++ b/biochatter/api_agent/scanpy_tl.py
@@ -105,11 +105,19 @@
 """
 class ScanpyTLQueryBuilder(BaseQueryBuilder):
     """A class for building an ScanpyTLQuery object."""
+    
+    def create_runnable(
+        self,
+        query_parameters: BaseModel,
+        conversation: "Conversation",
+    ):
+        pass
+    
     def parameterise_query(
         self,
         question: str,
         conversation: "Conversation",
-    ) -> ScanpyTLQueryParameters:
+    ):
         """Generate an ScanpyTLQuery object.
 
         Generate a ScanpyTLQuery object based on the given question, prompt,
@@ -141,4 +149,6 @@ def parameterise_query(
         ]
         chain = llm_with_tools | PydanticToolsParser(tools=generated_classes)
         result = chain.invoke(query)
-        return result
\ No newline at end of file
+        return result
+    
+
diff --git a/pyproject.toml b/pyproject.toml
index 0230c6cf..1a2645e7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -52,6 +52,7 @@ rouge_score = "0.1.2"
 evaluate = "^0.4.1"
 pillow = ">=10.2,<11.0"
 pdf2image = "^1.16.0"
+scanpy = { version = "^1.11.0", optional = true }
 langchain-community = "^0.2.5"
 langgraph = "^0.1.5"
 langchain-openai = "^0.1.14"
@@ -62,6 +63,7 @@ colorcet = "^3.1.0"
 
 langchain-anthropic = "^0.1.22"
 anthropic = "^0.33.0"
+docstring-parser = "^0.16.0"
 [tool.poetry.extras]
 streamlit = ["streamlit"]
 podcast = ["gTTS"]
diff --git a/test/test_api_agent.py b/test/test_api_agent.py
index 6c082658..3ea38fa1 100644
--- a/test/test_api_agent.py
+++ b/test/test_api_agent.py
@@ -28,6 +28,7 @@
     OncoKBQueryBuilder,
     OncoKBQueryParameters,
 )
+from biochatter.api_agent.scanpy_tl import ScanpyTLQueryBuilder
 from biochatter.llm_connect import Conversation, GptConversation
 
 
@@ -422,3 +423,81 @@ def test_summarise_results(mock_prompt, mock_conversation, mock_chain):
         mock_chain.invoke.assert_called_once_with(
             {"input": {expected_summary_prompt}},
         )
+
+class TestScanpyTLQueryBuilder:
+    @pytest.fixture()
+    def mock_generate_pydantic_classes(self):
+        with patch("biochatter.api_agent.generate_pydantic_classes_from_module.generate_pydantic_classes") as mock:
+            # Return a fake dictionary of generated classes
+            mock.return_value = {"leiden": MagicMock()}
+            yield mock
+
+    @pytest.fixture()
+    def mock_pydantic_tools_parser(self):
+        with patch("langchain_core.output_parsers.PydanticToolsParser") as mock_parser_cls:
+            mock_parser_instance = MagicMock()
+            mock_parser_cls.return_value = mock_parser_instance
+            yield mock_parser_cls, mock_parser_instance
+
+    def test_parameterise_query(
+        self,
+        mock_generate_pydantic_classes,
+        mock_pydantic_tools_parser
+    ):
+        # Arrange
+        query_builder = ScanpyTLQueryBuilder()
+        mock_conversation = MagicMock()
+        mock_llm = MagicMock()
+        mock_conversation.chat = mock_llm
+
+        # Mock the LLM with tools
+        mock_llm_with_tools = MagicMock()
+        mock_llm.bind_tools.return_value = mock_llm_with_tools
+
+        # When we do llm_with_tools | PydanticToolsParser(...) it should return a mock chain
+        mock_parser_cls, mock_parser_instance = mock_pydantic_tools_parser
+        mock_chain = MagicMock()
+        # The '|' operator (pipe) can be emulated by setting return value on __or__
+        mock_llm_with_tools.__or__.return_value = mock_chain
+
+        # The chain.invoke(...) result
+        mock_result = MagicMock()
+        mock_chain.invoke.return_value = mock_result
+
+        question = "Find the best parameters for leiden clustering."
+
+        # Act
+        result = query_builder.parameterise_query(question, mock_conversation)
+
+        # Assert
+        # Check that generate_pydantic_classes was called with scanpy.tl
+        args, kwargs = mock_generate_pydantic_classes.call_args
+        assert "scanpy.tl" in str(args[0])  # or more robust checks depending on your imports
+
+        # Check that bind_tools was called on the llm
+        mock_llm.bind_tools.assert_called_once()
+
+        # The query should have been passed to chain.invoke
+        # query is built as:
+        # query = [
+        #   ("system", "You're an expert data scientist"), 
+        #   ("human", {question}),
+        # ]
+        mock_chain.invoke.assert_called_once_with([
+            ("system", "You're an expert data scientist"),
+            ("human", {question}),
+        ])
+
+        # Ensure the returned result is the mock_result
+        assert result == mock_result
+
+
+
+
+
+class TestScanpyPlFetcher:
+    pass
+
+
+class TestScanpyPlInterpreter:
+    pass
\ No newline at end of file

From f10839a0b0cedb10b10c4a14bc11eefbe08c70c0 Mon Sep 17 00:00:00 2001
From: bastienchassagnol <bastien_chassagnol@laposte.net>
Date: Wed, 11 Dec 2024 15:14:14 +0100
Subject: [PATCH 7/9] add in the benchmark a call to scanpy.pp to carry on a
 PCA with a given number of latent dimensions

---
 benchmark/data/benchmark_api_calling_data.yaml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/benchmark/data/benchmark_api_calling_data.yaml b/benchmark/data/benchmark_api_calling_data.yaml
index 85340a81..d9a02d37 100644
--- a/benchmark/data/benchmark_api_calling_data.yaml
+++ b/benchmark/data/benchmark_api_calling_data.yaml
@@ -73,3 +73,10 @@ api_calling:
     expected:
       parts_of_query:
         ["https://bio.tools/api/t/", "\\?topic=", "[mM]etabolomics"]
+  - case: scanpy:pp:pca
+    input:
+      prompt:
+        fuzzy_search: "Make a PCA keeping n_comps."
+    expected:
+      parts_of_query:
+        ["scanpy.pp.pca\\(", "n_comps=", "\\)"]

From 7e742dff27985b6070e1254423ce60606ceea586 Mon Sep 17 00:00:00 2001
From: Lera <vd.dragan21@gmail.com>
Date: Wed, 11 Dec 2024 18:16:59 +0100
Subject: [PATCH 8/9] Added mock test for ScanpyTLQueryBuilder (without module
 specification)

---
 biochatter/api_agent/scanpy_tl.py | 11 +++--
 test/test_api_agent.py            | 80 ++++++++++---------------------
 2 files changed, 33 insertions(+), 58 deletions(-)

diff --git a/biochatter/api_agent/scanpy_tl.py b/biochatter/api_agent/scanpy_tl.py
index eb1ebad7..18e6d07e 100644
--- a/biochatter/api_agent/scanpy_tl.py
+++ b/biochatter/api_agent/scanpy_tl.py
@@ -117,6 +117,8 @@ def parameterise_query(
         self,
         question: str,
         conversation: "Conversation",
+        generated_classes=None, # Allow external injection of classes
+        module=None,
     ):
         """Generate an ScanpyTLQuery object.
 
@@ -138,14 +140,15 @@ def parameterise_query(
                 model)
 
         """
-        import scanpy as sc
-        module = sc.tl
-        generated_classes = generate_pydantic_classes(module)
+            # Generate classes if not provided
+        if generated_classes is None:
+            generated_classes = generate_pydantic_classes(module)
+            
         llm = conversation.chat
         llm_with_tools = llm.bind_tools(generated_classes)
         query = [
 	        ("system", "You're an expert data scientist"), 
-            ("human", {question}),
+            ("human", question),
         ]
         chain = llm_with_tools | PydanticToolsParser(tools=generated_classes)
         result = chain.invoke(query)
diff --git a/test/test_api_agent.py b/test/test_api_agent.py
index 3ea38fa1..7c303775 100644
--- a/test/test_api_agent.py
+++ b/test/test_api_agent.py
@@ -425,79 +425,51 @@ def test_summarise_results(mock_prompt, mock_conversation, mock_chain):
         )
 
 class TestScanpyTLQueryBuilder:
-    @pytest.fixture()
-    def mock_generate_pydantic_classes(self):
-        with patch("biochatter.api_agent.generate_pydantic_classes_from_module.generate_pydantic_classes") as mock:
-            # Return a fake dictionary of generated classes
-            mock.return_value = {"leiden": MagicMock()}
-            yield mock
-
-    @pytest.fixture()
-    def mock_pydantic_tools_parser(self):
-        with patch("langchain_core.output_parsers.PydanticToolsParser") as mock_parser_cls:
-            mock_parser_instance = MagicMock()
-            mock_parser_cls.return_value = mock_parser_instance
-            yield mock_parser_cls, mock_parser_instance
-
-    def test_parameterise_query(
-        self,
-        mock_generate_pydantic_classes,
-        mock_pydantic_tools_parser
-    ):
+    @patch("biochatter.llm_connect.GptConversation")
+    def test_parameterise_query(self, mock_conversation):
         # Arrange
-        query_builder = ScanpyTLQueryBuilder()
-        mock_conversation = MagicMock()
+        question = "I want to mark mitochondrial genes of my adata object"
+
+        # Mock the list of Pydantic classes as a list of Mock objects
+        class MockTool1(BaseModel):
+            param1: str
+
+        class MockTool2(BaseModel):
+            param2: int
+
+        mock_generated_classes = [MockTool1, MockTool2]
+
+        # Mock the conversation object and LLM
+        mock_conversation_instance = mock_conversation.return_value
         mock_llm = MagicMock()
-        mock_conversation.chat = mock_llm
+        mock_conversation_instance.chat = mock_llm
 
         # Mock the LLM with tools
         mock_llm_with_tools = MagicMock()
         mock_llm.bind_tools.return_value = mock_llm_with_tools
 
-        # When we do llm_with_tools | PydanticToolsParser(...) it should return a mock chain
-        mock_parser_cls, mock_parser_instance = mock_pydantic_tools_parser
+        # Mock the chain and its invoke method
         mock_chain = MagicMock()
-        # The '|' operator (pipe) can be emulated by setting return value on __or__
         mock_llm_with_tools.__or__.return_value = mock_chain
-
-        # The chain.invoke(...) result
-        mock_result = MagicMock()
+        mock_result = {"parameters": {"key_added": "mt_genes"}}
         mock_chain.invoke.return_value = mock_result
 
-        question = "Find the best parameters for leiden clustering."
-
         # Act
-        result = query_builder.parameterise_query(question, mock_conversation)
+        builder = ScanpyTLQueryBuilder()
+        result = builder.parameterise_query(
+            question, 
+            mock_conversation_instance, 
+            generated_classes=mock_generated_classes
+        )
 
         # Assert
-        # Check that generate_pydantic_classes was called with scanpy.tl
-        args, kwargs = mock_generate_pydantic_classes.call_args
-        assert "scanpy.tl" in str(args[0])  # or more robust checks depending on your imports
-
-        # Check that bind_tools was called on the llm
-        mock_llm.bind_tools.assert_called_once()
-
-        # The query should have been passed to chain.invoke
-        # query is built as:
-        # query = [
-        #   ("system", "You're an expert data scientist"), 
-        #   ("human", {question}),
-        # ]
+        mock_llm.bind_tools.assert_called_once_with(mock_generated_classes)
         mock_chain.invoke.assert_called_once_with([
             ("system", "You're an expert data scientist"),
-            ("human", {question}),
+            ("human", question),
         ])
-
-        # Ensure the returned result is the mock_result
         assert result == mock_result
 
 
 
 
-
-class TestScanpyPlFetcher:
-    pass
-
-
-class TestScanpyPlInterpreter:
-    pass
\ No newline at end of file

From a1c20f9b280be0efa9ec3ee9bfaf398d1b39d873 Mon Sep 17 00:00:00 2001
From: bastienchassagnol <bastien_chassagnol@laposte.net>
Date: Wed, 11 Dec 2024 18:38:24 +0100
Subject: [PATCH 9/9] remove irrelevant imports in scanpy.tl module

---
 biochatter/api_agent/scanpy_tl.py | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/biochatter/api_agent/scanpy_tl.py b/biochatter/api_agent/scanpy_tl.py
index eb1ebad7..637735a0 100644
--- a/biochatter/api_agent/scanpy_tl.py
+++ b/biochatter/api_agent/scanpy_tl.py
@@ -1,20 +1,16 @@
-"""Module for interacting with the `scanpy` API for data transformation tools (`tl`)."""
-from collections.abc import Callable
+"""Module for interacting with the `scanpy` API for data tools (`tl`)."""
+
 from typing import TYPE_CHECKING
 
-import requests
-from langchain.chains.openai_functions import create_structured_output_runnable
-from langchain_core.output_parsers import StrOutputParser
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.pydantic_v1 import BaseModel, Field
-from langchain_openai import ChatOpenAI
 from langchain_core.output_parsers import PydanticToolsParser
-if TYPE_CHECKING:
-    from biochatter.llm_connect import Conversation
+from langchain_core.pydantic_v1 import BaseModel
 
-from .abc import BaseFetcher, BaseInterpreter, BaseQueryBuilder
+from .abc import BaseQueryBuilder
 from .generate_pydantic_classes_from_module import generate_pydantic_classes
 
+if TYPE_CHECKING:
+    from biochatter.llm_connect import Conversation
+
 SCANPY_QUERY_PROMPT = """
 You are a world class algorithm for creating queries in structured formats. Your task is to use the scanpy python package
 to provide the user with the appropriate function call to answer their question. You focus on the scanpy.tl module, which has 
@@ -103,16 +99,18 @@
 
 ```
 """
+
+
 class ScanpyTLQueryBuilder(BaseQueryBuilder):
     """A class for building an ScanpyTLQuery object."""
-    
+
     def create_runnable(
         self,
         query_parameters: BaseModel,
         conversation: "Conversation",
     ):
         pass
-    
+
     def parameterise_query(
         self,
         question: str,
@@ -139,14 +137,14 @@ def parameterise_query(
 
         """
         import scanpy as sc
+
         module = sc.tl
         generated_classes = generate_pydantic_classes(module)
         llm = conversation.chat
         llm_with_tools = llm.bind_tools(generated_classes)
         query = [
-	        ("system", "You're an expert data scientist"), 
+            ("system", "You're an expert data scientist"),
             ("human", {question}),
         ]
         chain = llm_with_tools | PydanticToolsParser(tools=generated_classes)
-        result = chain.invoke(query)
-        return result
+        return chain.invoke(query)