From b2c45f731c4ec9e48917bc0e317df6f7a5a3993d Mon Sep 17 00:00:00 2001 From: Nathan Evans Date: Thu, 20 Nov 2025 13:30:56 -0800 Subject: [PATCH 1/5] Add init prompting for models --- packages/graphrag/graphrag/api/index.py | 4 --- packages/graphrag/graphrag/cli/index.py | 6 ---- packages/graphrag/graphrag/cli/initialize.py | 11 ++++++-- packages/graphrag/graphrag/cli/main.py | 28 +++++++++---------- .../graphrag/graphrag/config/init_content.py | 4 +-- 5 files changed, 24 insertions(+), 29 deletions(-) diff --git a/packages/graphrag/graphrag/api/index.py b/packages/graphrag/graphrag/api/index.py index c15ea2e786..cc9d0956d5 100644 --- a/packages/graphrag/graphrag/api/index.py +++ b/packages/graphrag/graphrag/api/index.py @@ -30,7 +30,6 @@ async def build_index( config: GraphRagConfig, method: IndexingMethod | str = IndexingMethod.Standard, is_update_run: bool = False, - memory_profile: bool = False, callbacks: list[WorkflowCallbacks] | None = None, additional_context: dict[str, Any] | None = None, verbose: bool = False, @@ -67,9 +66,6 @@ async def build_index( outputs: list[PipelineRunResult] = [] - if memory_profile: - logger.warning("New pipeline does not yet support memory profiling.") - logger.info("Initializing indexing pipeline...") # todo: this could propagate out to the cli for better clarity, but will be a breaking api change method = _get_method(method, is_update_run) diff --git a/packages/graphrag/graphrag/cli/index.py b/packages/graphrag/graphrag/cli/index.py index d686a38f13..60b724ea98 100644 --- a/packages/graphrag/graphrag/cli/index.py +++ b/packages/graphrag/graphrag/cli/index.py @@ -43,7 +43,6 @@ def index_cli( root_dir: Path, method: IndexingMethod, verbose: bool, - memprofile: bool, cache: bool, dry_run: bool, skip_validation: bool, @@ -55,7 +54,6 @@ def index_cli( method=method, is_update_run=False, verbose=verbose, - memprofile=memprofile, cache=cache, dry_run=dry_run, skip_validation=skip_validation, @@ -66,7 +64,6 @@ def update_cli( root_dir: Path, method: IndexingMethod, verbose: bool, - memprofile: bool, cache: bool, skip_validation: bool, ): @@ -80,7 +77,6 @@ def update_cli( method=method, is_update_run=True, verbose=verbose, - memprofile=memprofile, cache=cache, dry_run=False, skip_validation=skip_validation, @@ -92,7 +88,6 @@ def _run_index( method, is_update_run, verbose, - memprofile, cache, dry_run, skip_validation, @@ -129,7 +124,6 @@ def _run_index( config=config, method=method, is_update_run=is_update_run, - memory_profile=memprofile, callbacks=[ConsoleWorkflowCallbacks(verbose=verbose)], verbose=verbose, ) diff --git a/packages/graphrag/graphrag/cli/initialize.py b/packages/graphrag/graphrag/cli/initialize.py index 8dbaf30f93..d29866e5c4 100644 --- a/packages/graphrag/graphrag/cli/initialize.py +++ b/packages/graphrag/graphrag/cli/initialize.py @@ -35,7 +35,9 @@ logger = logging.getLogger(__name__) -def initialize_project_at(path: Path, force: bool) -> None: +def initialize_project_at( + path: Path, force: bool, model: str, embedding_model: str +) -> None: """ Initialize the project at the given path. @@ -64,8 +66,11 @@ def initialize_project_at(path: Path, force: bool) -> None: root / (graphrag_config_defaults.input.storage.base_dir or "input") ).resolve() input_path.mkdir(parents=True, exist_ok=True) - - settings_yaml.write_text(INIT_YAML, encoding="utf-8", errors="strict") + # using replace with custom tokens instead of format here because we have a placeholder for GRAPHRAG_API_KEY that is used later for .env overlay + formatted = INIT_YAML.replace("", model).replace( + "", embedding_model + ) + settings_yaml.write_text(formatted, encoding="utf-8", errors="strict") dotenv = root / ".env" if not dotenv.exists() or force: diff --git a/packages/graphrag/graphrag/cli/main.py b/packages/graphrag/graphrag/cli/main.py index 89768e736d..805f31318c 100644 --- a/packages/graphrag/graphrag/cli/main.py +++ b/packages/graphrag/graphrag/cli/main.py @@ -10,7 +10,11 @@ import typer -from graphrag.config.defaults import graphrag_config_defaults +from graphrag.config.defaults import ( + DEFAULT_CHAT_MODEL, + DEFAULT_EMBEDDING_MODEL, + graphrag_config_defaults, +) from graphrag.config.enums import IndexingMethod, SearchMethod from graphrag.prompt_tune.defaults import LIMIT, MAX_TOKEN_COUNT, N_SUBSET_MAX, K from graphrag.prompt_tune.types import DocSelectionType @@ -112,9 +116,17 @@ def _initialize_cli( ), ) -> None: """Generate a default configuration file.""" + model = typer.prompt( + "Specify the default chat model to use", default=DEFAULT_CHAT_MODEL + ) + embedding_model = typer.prompt( + "Specify the default embedding model to use", default=DEFAULT_EMBEDDING_MODEL + ) from graphrag.cli.initialize import initialize_project_at - initialize_project_at(path=root, force=force) + initialize_project_at( + path=root, force=force, model=model, embedding_model=embedding_model + ) @app.command("index") @@ -143,11 +155,6 @@ def _index_cli( "-v", help="Run the indexing pipeline with verbose logging", ), - memprofile: bool = typer.Option( - False, - "--memprofile", - help="Run the indexing pipeline with memory profiling", - ), dry_run: bool = typer.Option( False, "--dry-run", @@ -173,7 +180,6 @@ def _index_cli( index_cli( root_dir=root, verbose=verbose, - memprofile=memprofile, cache=cache, dry_run=dry_run, skip_validation=skip_validation, @@ -207,11 +213,6 @@ def _update_cli( "-v", help="Run the indexing pipeline with verbose logging.", ), - memprofile: bool = typer.Option( - False, - "--memprofile", - help="Run the indexing pipeline with memory profiling.", - ), cache: bool = typer.Option( True, "--cache/--no-cache", @@ -233,7 +234,6 @@ def _update_cli( update_cli( root_dir=root, verbose=verbose, - memprofile=memprofile, cache=cache, skip_validation=skip_validation, method=method, diff --git a/packages/graphrag/graphrag/config/init_content.py b/packages/graphrag/graphrag/config/init_content.py index 1cbccf74df..4d58c62e97 100644 --- a/packages/graphrag/graphrag/config/init_content.py +++ b/packages/graphrag/graphrag/config/init_content.py @@ -23,7 +23,7 @@ model_provider: {defs.DEFAULT_MODEL_PROVIDER} auth_type: {defs.DEFAULT_CHAT_MODEL_AUTH_TYPE.value} # or azure_managed_identity api_key: ${{GRAPHRAG_API_KEY}} # set this in the generated .env file, or remove if managed identity - model: {defs.DEFAULT_CHAT_MODEL} + model: # api_base: https://.openai.azure.com # api_version: 2024-05-01-preview model_supports_json: true # recommended if this is available for your model. @@ -37,7 +37,7 @@ model_provider: {defs.DEFAULT_MODEL_PROVIDER} auth_type: {defs.DEFAULT_EMBEDDING_MODEL_AUTH_TYPE.value} api_key: ${{GRAPHRAG_API_KEY}} - model: {defs.DEFAULT_EMBEDDING_MODEL} + model: # api_base: https://.openai.azure.com # api_version: 2024-05-01-preview concurrent_requests: {language_model_defaults.concurrent_requests} From ad0caf1d122a422ccf980716d0c93f1e366998f8 Mon Sep 17 00:00:00 2001 From: Nathan Evans Date: Thu, 20 Nov 2025 13:33:10 -0800 Subject: [PATCH 2/5] Remove hard-coded model config validation --- packages/graphrag/graphrag/config/errors.py | 9 --------- .../config/models/graph_rag_config.py | 20 ------------------- 2 files changed, 29 deletions(-) diff --git a/packages/graphrag/graphrag/config/errors.py b/packages/graphrag/graphrag/config/errors.py index 32a20b838c..6dbe25ad40 100644 --- a/packages/graphrag/graphrag/config/errors.py +++ b/packages/graphrag/graphrag/config/errors.py @@ -33,15 +33,6 @@ def __init__(self, llm_type: str) -> None: super().__init__(msg) -class LanguageModelConfigMissingError(ValueError): - """Missing model configuration error.""" - - def __init__(self, key: str = "") -> None: - """Init method definition.""" - msg = f'A {key} model configuration is required. Please rerun `graphrag init` and set models["{key}"] in settings.yaml.' - super().__init__(msg) - - class ConflictingSettingsError(ValueError): """Missing model configuration error.""" diff --git a/packages/graphrag/graphrag/config/models/graph_rag_config.py b/packages/graphrag/graphrag/config/models/graph_rag_config.py index 15d02eaf3a..61e21f3a85 100644 --- a/packages/graphrag/graphrag/config/models/graph_rag_config.py +++ b/packages/graphrag/graphrag/config/models/graph_rag_config.py @@ -11,7 +11,6 @@ import graphrag.config.defaults as defs from graphrag.config.defaults import graphrag_config_defaults from graphrag.config.enums import VectorStoreType -from graphrag.config.errors import LanguageModelConfigMissingError from graphrag.config.models.basic_search_config import BasicSearchConfig from graphrag.config.models.cache_config import CacheConfig from graphrag.config.models.chunking_config import ChunkingConfig @@ -58,24 +57,6 @@ def __str__(self): default=graphrag_config_defaults.models, ) - def _validate_models(self) -> None: - """Validate the models configuration. - - Ensure both a default chat model and default embedding model - have been defined. Other models may also be defined but - defaults are required for the time being as places of the - code fallback to default model configs instead - of specifying a specific model. - - TODO: Don't fallback to default models elsewhere in the code. - Forcing code to specify a model to use and allowing for any - names for model configurations. - """ - if defs.DEFAULT_CHAT_MODEL_ID not in self.models: - raise LanguageModelConfigMissingError(defs.DEFAULT_CHAT_MODEL_ID) - if defs.DEFAULT_EMBEDDING_MODEL_ID not in self.models: - raise LanguageModelConfigMissingError(defs.DEFAULT_EMBEDDING_MODEL_ID) - def _validate_retry_services(self) -> None: """Validate the retry services configuration.""" retry_factory = RetryFactory() @@ -329,7 +310,6 @@ def get_language_model_config(self, model_id: str) -> LanguageModelConfig: @model_validator(mode="after") def _validate_model(self): """Validate the model configuration.""" - self._validate_models() self._validate_input_pattern() self._validate_input_base_dir() self._validate_reporting_base_dir() From 7ba648594d53bf7f2fcb95cd9fa411ca014c3fd4 Mon Sep 17 00:00:00 2001 From: Nathan Evans Date: Thu, 20 Nov 2025 13:44:13 -0800 Subject: [PATCH 3/5] Switch to typer option prompt for full CLI use with models --- packages/graphrag/graphrag/cli/main.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/packages/graphrag/graphrag/cli/main.py b/packages/graphrag/graphrag/cli/main.py index 805f31318c..e62de47554 100644 --- a/packages/graphrag/graphrag/cli/main.py +++ b/packages/graphrag/graphrag/cli/main.py @@ -108,6 +108,18 @@ def _initialize_cli( resolve_path=True, autocompletion=ROOT_AUTOCOMPLETE, ), + model: str = typer.Option( + DEFAULT_CHAT_MODEL, + "--model", + "-m", + prompt="Specify the default chat model to use", + ), + embedding_model: str = typer.Option( + DEFAULT_EMBEDDING_MODEL, + "--embedding", + "-e", + prompt="Specify the default embedding model to use", + ), force: bool = typer.Option( False, "--force", @@ -116,12 +128,6 @@ def _initialize_cli( ), ) -> None: """Generate a default configuration file.""" - model = typer.prompt( - "Specify the default chat model to use", default=DEFAULT_CHAT_MODEL - ) - embedding_model = typer.prompt( - "Specify the default embedding model to use", default=DEFAULT_EMBEDDING_MODEL - ) from graphrag.cli.initialize import initialize_project_at initialize_project_at( From f6b77dd31ce01f6aeb1b62a87713924c006251fe Mon Sep 17 00:00:00 2001 From: Nathan Evans Date: Thu, 20 Nov 2025 13:50:04 -0800 Subject: [PATCH 4/5] Update getting started for init model input --- docs/get_started.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/get_started.md b/docs/get_started.md index 9a6f496383..a173611744 100644 --- a/docs/get_started.md +++ b/docs/get_started.md @@ -47,6 +47,8 @@ To initialize your workspace, first run the `graphrag init` command. graphrag init ``` +When prompted, specify the default chat and embedding models you would like to use in your config. + This will create two files, `.env` and `settings.yaml`, and a directory `input`, in the current directory. - `input` Location of text files to process with `graphrag`. From dcd2b4b80013baf241a6bac3a48cccd1ec19fc9e Mon Sep 17 00:00:00 2001 From: Nathan Evans Date: Thu, 20 Nov 2025 17:55:14 -0800 Subject: [PATCH 5/5] Bump request timeout and overall smoke test timeout --- packages/graphrag/graphrag/config/defaults.py | 2 +- tests/fixtures/min-csv/config.json | 2 +- tests/fixtures/text/config.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/graphrag/graphrag/config/defaults.py b/packages/graphrag/graphrag/config/defaults.py index 88449a6050..29ed5a3bea 100644 --- a/packages/graphrag/graphrag/config/defaults.py +++ b/packages/graphrag/graphrag/config/defaults.py @@ -274,7 +274,7 @@ class LanguageModelDefaults: n: int = 1 frequency_penalty: float = 0.0 presence_penalty: float = 0.0 - request_timeout: float = 180.0 + request_timeout: float = 600.0 api_base: None = None api_version: None = None deployment_name: None = None diff --git a/tests/fixtures/min-csv/config.json b/tests/fixtures/min-csv/config.json index c28508be8b..88d38f1e61 100644 --- a/tests/fixtures/min-csv/config.json +++ b/tests/fixtures/min-csv/config.json @@ -51,7 +51,7 @@ "period", "size" ], - "max_runtime": 1200, + "max_runtime": 2000, "expected_artifacts": ["community_reports.parquet"] }, "create_final_text_units": { diff --git a/tests/fixtures/text/config.json b/tests/fixtures/text/config.json index 792b91c48c..6248a08044 100644 --- a/tests/fixtures/text/config.json +++ b/tests/fixtures/text/config.json @@ -50,7 +50,7 @@ "period", "size" ], - "max_runtime": 1200, + "max_runtime": 2000, "expected_artifacts": ["community_reports.parquet"] }, "create_final_text_units": {