workfloworchestrator · pboers1988 · Oct 6, 2025 · Sep 29, 2025 · Sep 30, 2025 · Sep 30, 2025
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 4.5.0a7
+current_version = 4.5.0a8
 commit = False
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(rc(?P<build>\d+))?

diff --git a/.github/workflows/run-unit-tests.yml b/.github/workflows/run-unit-tests.yml
@@ -65,6 +65,7 @@ jobs:
           POSTGRES_PASSWORD: nwa
           POSTGRES_HOST: postgres
           ENVIRONMENT: TESTING
+          SEARCH_ENABLED: true
 
       - name: "Upload coverage to Codecov"
         uses: codecov/codecov-action@v3

diff --git a/orchestrator/__init__.py b/orchestrator/__init__.py
@@ -13,7 +13,7 @@
 
 """This is the orchestrator workflow engine."""
 
-__version__ = "4.5.0a7"
+__version__ = "4.5.0a8"
 
 
 from structlog import get_logger
@@ -25,18 +25,9 @@
 from orchestrator.llm_settings import llm_settings
 from orchestrator.settings import app_settings
 
-if llm_settings.LLM_ENABLED:
-    try:
-        from importlib import import_module
+if llm_settings.SEARCH_ENABLED or llm_settings.AGENT_ENABLED:
 
-        import_module("pydantic_ai")
-        from orchestrator.agentic_app import AgenticOrchestratorCore as OrchestratorCore
-
-    except ImportError:
-        logger.error(
-            "Unable to import 'pydantic_ai' module, please install the orchestrator with llm dependencies. `pip install orchestrator-core[llm]",
-        )
-        exit(1)
+    from orchestrator.agentic_app import LLMOrchestratorCore as OrchestratorCore
 else:
     from orchestrator.app import OrchestratorCore  # type: ignore[assignment]
 

diff --git a/orchestrator/agentic_app.py b/orchestrator/agentic_app.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python3
 """The main application module.
 
-This module contains the main `AgenticOrchestratorCore` class for the `FastAPI` backend and
-provides the ability to run the CLI.
+This module contains the main `LLMOrchestratorCore` class for the `FastAPI` backend and
+provides the ability to run the CLI with LLM features (search and/or agent).
 """
 # Copyright 2019-2025 SURF
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,68 +16,84 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
 import typer
-from pydantic_ai.models.openai import OpenAIModel
-from pydantic_ai.toolsets import FunctionToolset
 from structlog import get_logger
 
 from orchestrator.app import OrchestratorCore
 from orchestrator.cli.main import app as cli_app
 from orchestrator.llm_settings import LLMSettings, llm_settings
 
+if TYPE_CHECKING:
+    from pydantic_ai.models.openai import OpenAIModel
+    from pydantic_ai.toolsets import FunctionToolset
+
 logger = get_logger(__name__)
 
 
-class AgenticOrchestratorCore(OrchestratorCore):
+class LLMOrchestratorCore(OrchestratorCore):
     def __init__(
         self,
         *args: Any,
-        llm_model: OpenAIModel | str = "gpt-4o-mini",
         llm_settings: LLMSettings = llm_settings,
-        agent_tools: list[FunctionToolset] | None = None,
+        agent_model: "OpenAIModel | str | None" = None,
+        agent_tools: "list[FunctionToolset] | None" = None,
         **kwargs: Any,
     ) -> None:
-        """Initialize the `AgenticOrchestratorCore` class.
+        """Initialize the `LLMOrchestratorCore` class.
 
-        This class takes the same arguments as the `OrchestratorCore` class.
+        This class extends `OrchestratorCore` with LLM features (search and agent).
+        It runs the search migration and mounts the agent endpoint based on feature flags.
 
         Args:
             *args: All the normal arguments passed to the `OrchestratorCore` class.
-            llm_model: An OpenAI model class or string, not limited to OpenAI models (gpt-4o-mini etc)
             llm_settings: A class of settings for the LLM
+            agent_model: Override the agent model (defaults to llm_settings.AGENT_MODEL)
             agent_tools: A list of tools that can be used by the agent
             **kwargs: Additional arguments passed to the `OrchestratorCore` class.
 
         Returns:
             None
         """
-        self.llm_model = llm_model
-        self.agent_tools = agent_tools
         self.llm_settings = llm_settings
+        self.agent_model = agent_model or llm_settings.AGENT_MODEL
+        self.agent_tools = agent_tools
 
         super().__init__(*args, **kwargs)
 
-        logger.info("Mounting the agent")
-        self.register_llm_integration()
-
-    def register_llm_integration(self) -> None:
-        """Register the Agent endpoint.
-
-        This helper includes the agent router on the application with auth dependencies.
-
-        Returns:
-            None
-
-        """
-        from fastapi import Depends
-
-        from orchestrator.search.agent import build_agent_router
-        from orchestrator.security import authorize
-
-        agent_router = build_agent_router(self.llm_model, self.agent_tools)
-        self.include_router(agent_router, prefix="/agent", dependencies=[Depends(authorize)])
+        # Run search migration if search or agent is enabled
+        if self.llm_settings.SEARCH_ENABLED or self.llm_settings.AGENT_ENABLED:
+            logger.info("Running search migration")
+            try:
+                from orchestrator.db import db
+                from orchestrator.search.llm_migration import run_migration
+
+                with db.engine.begin() as connection:
+                    run_migration(connection)
+            except ImportError as e:
+                logger.error(
+                    "Unable to run search migration. Please install search dependencies: "
+                    "`pip install orchestrator-core[search]`",
+                    error=str(e),
+                )
+                raise
+
+        # Mount agent endpoint if agent is enabled
+        if self.llm_settings.AGENT_ENABLED:
+            logger.info("Initializing agent features", model=self.agent_model)
+            try:
+                from orchestrator.search.agent import build_agent_router
+
+                agent_app = build_agent_router(self.agent_model, self.agent_tools)
+                self.mount("/agent", agent_app)
+            except ImportError as e:
+                logger.error(
+                    "Unable to initialize agent features. Please install agent dependencies: "
+                    "`pip install orchestrator-core[agent]`",
+                    error=str(e),
+                )
+                raise
 
 
 main_typer_app = typer.Typer()

diff --git a/orchestrator/api/api_v1/api.py b/orchestrator/api/api_v1/api.py
@@ -89,7 +89,7 @@
     ws.router, prefix="/ws", tags=["Core", "Events"]
 )  # Auth on the websocket is handled in the Websocket Manager
 
-if llm_settings.LLM_ENABLED:
+if llm_settings.SEARCH_ENABLED:
     from orchestrator.api.api_v1.endpoints import search
 
     api_router.include_router(

diff --git a/orchestrator/cli/main.py b/orchestrator/cli/main.py
@@ -25,10 +25,21 @@
 app.add_typer(database.app, name="db", help="Interact with the application database")
 app.add_typer(generate.app, name="generate", help="Generate products, workflows and other artifacts")
 
-if llm_settings.LLM_ENABLED:
-    from orchestrator.cli import search
+if llm_settings.SEARCH_ENABLED:
+    from orchestrator.cli.search import index_llm, resize_embedding, search_explore, speedtest
 
-    search.register_commands(app)
+    app.add_typer(index_llm.app, name="index", help="(Re-)Index the search table.")
+    app.add_typer(search_explore.app, name="search", help="Try out different search types.")
+    app.add_typer(
+        resize_embedding.app,
+        name="embedding",
+        help="Resize the vector dimension of the embedding column in the search table.",
+    )
+    app.add_typer(
+        speedtest.app,
+        name="speedtest",
+        help="Search performance testing and analysis.",
+    )
 
 
 if __name__ == "__main__":

diff --git a/orchestrator/llm_settings.py b/orchestrator/llm_settings.py
@@ -18,7 +18,10 @@
 
 
 class LLMSettings(BaseSettings):
-    LLM_ENABLED: bool = False  # Default to false
+    # Feature flags for LLM functionality
+    SEARCH_ENABLED: bool = False  # Enable search/indexing with embeddings
+    AGENT_ENABLED: bool = False  # Enable agentic functionality
+
     # Pydantic-ai Agent settings
     AGENT_MODEL: str = "gpt-4o-mini"  # See pydantic-ai docs for supported models.
     AGENT_MODEL_VERSION: str = "2025-01-01-preview"
@@ -30,11 +33,11 @@ class LLMSettings(BaseSettings):
         0.1, description="Safety margin as a percentage (e.g., 0.1 for 10%) for token budgeting.", ge=0, le=1
     )
 
-    # The following settings are only needed for local models.
+    # The following settings are only needed for local models or system constraints.
     # By default, they are set conservative assuming a small model like All-MiniLM-L6-V2.
     OPENAI_BASE_URL: str | None = None
     EMBEDDING_FALLBACK_MAX_TOKENS: int | None = 512
-    EMBEDDING_MAX_BATCH_SIZE: int | None = 32
+    EMBEDDING_MAX_BATCH_SIZE: int | None = None
 
     # General LiteLLM settings
     LLM_MAX_RETRIES: int = 3

diff --git a/...rations/versions/schema/2025-08-12_52b37b5b2714_search_index_model_for_llm_integration.py b/...rations/versions/schema/2025-08-12_52b37b5b2714_search_index_model_for_llm_integration.py
diff --git a/orchestrator/search/docs/running_local_text_embedding_inference.md b/orchestrator/search/docs/running_local_text_embedding_inference.md
@@ -18,6 +18,7 @@ Point your backend to the local endpoint and declare the new vector size:
 ```env
 OPENAI_BASE_URL=http://localhost:8080/v1
 EMBEDDING_DIMENSION=384
+EMBEDDING_MAX_BATCH_SIZE=32 # Not required when using OpenAI embeddings
 ```
 
 Depending on the model, you might want to change the `EMBEDDING_FALLBACK_MAX_TOKENS` and `EMBEDDING_MAX_BATCH_SIZE` settings, which are set conservatively and according to the requirements of the setup used in this example.

diff --git a/orchestrator/search/indexing/indexer.py b/orchestrator/search/indexing/indexer.py
@@ -226,9 +226,7 @@ def _generate_upsert_batches(
         safe_margin = int(max_ctx * llm_settings.EMBEDDING_SAFE_MARGIN_PERCENT)
         token_budget = max(1, max_ctx - safe_margin)
 
-        max_batch_size = None
-        if llm_settings.OPENAI_BASE_URL:  # We are using a local model
-            max_batch_size = llm_settings.EMBEDDING_MAX_BATCH_SIZE
+        max_batch_size = llm_settings.EMBEDDING_MAX_BATCH_SIZE
 
         for entity_id, field in fields_to_upsert:
             if field.value_type.is_embeddable(field.value):