-
Notifications
You must be signed in to change notification settings - Fork 2.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
1ad30c7
commit 33c73c6
Showing
2 changed files
with
178 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,177 @@ | ||
--- | ||
title: Configuration Template | ||
navtitle: Configuration Template | ||
layout: page | ||
tags: [post] | ||
date: 2023-01-04 | ||
--- | ||
|
||
The following template can be used and stored as a `.env` in the the directory where you're are pointing | ||
the `--root` parameter on your Indexing Pipeline execution. | ||
|
||
For details about how to run the Indexing Pipeline, refer to the [Index CLI](../index/2-cli) documentation. | ||
|
||
## .env File Template | ||
|
||
Required variables are uncommented. All the optional configuration can be turned on or off as needed. | ||
|
||
### Minimal Configuration | ||
|
||
```bash | ||
# Base LLM Settings | ||
GRAPHRAG_API_KEY=None | ||
GRAPHRAG_API_BASE=None # For Azure OpenAI Users | ||
GRAPHRAG_API_VERSION=None # For Azure OpenAI Users | ||
|
||
# Text Generation Settings | ||
GRAPHRAG_LLM_TYPE=azure_openai_chat # or openai_chat | ||
GRAPHRAG_LLM_DEPLOYMENT_NAME="gpt-4-turbo-preview" | ||
GRAPHRAG_LLM_MODEL_SUPPORTS_JSON=True | ||
|
||
# Text Embedding Settings | ||
GRAPHRAG_EMBEDDING_TYPE = azure_openai_embedding # or openai_embedding | ||
GRAPHRAG_LLM_DEPLOYMENT_NAME = "text-embedding-3-small" | ||
|
||
# Data Mapping Settings | ||
GRAPHRAG_INPUT_TYPE="text" | ||
|
||
``` | ||
|
||
### Full Configuration | ||
|
||
```bash | ||
|
||
# Required LLM Config | ||
|
||
# Input Data Configuration | ||
GRAPHRAG_INPUT_TYPE=text | ||
|
||
# Plaintext Input Data Configuration | ||
# GRAPHRAG_INPUT_FILE_PATTERN=.*\.txt | ||
|
||
# CSV Input Data Configuration | ||
GRAPHRAG_INPUT_TYPE="csv" | ||
GRAPHRAG_INPUT_FILE_PATTERN=".*\.csv$" | ||
GRAPHRAG_INPUT_SOURCE_COLUMN=source | ||
# GRAPHRAG_INPUT_TIMESTAMP_COLUMN=None | ||
# GRAPHRAG_INPUT_TIMESTAMP_FORMAT=None | ||
# GRAPHRAG_INPUT_TEXT_COLUMN="text" | ||
# GRAPHRAG_INPUT_ATTRIBUTE_COLUMNS=id | ||
# GRAPHRAG_INPUT_TITLE_COLUMN="title" | ||
# GRAPHRAG_INPUT_STORAGE_TYPE="file" | ||
# GRAPHRAG_INPUT_CONNECTION_STRING=None | ||
# GRAPHRAG_INPUT_CONTAINER_NAME=None | ||
# GRAPHRAG_INPUT_BASE_DIR=None | ||
|
||
# Base LLM Settings | ||
GRAPHRAG_API_KEY=None | ||
GRAPHRAG_API_BASE=None # For Azure OpenAI Users | ||
GRAPHRAG_API_VERSION=None # For Azure OpenAI Users | ||
# GRAPHRAG_API_ORGANIZATION=None | ||
# GRAPHRAG_API_PROXY=None | ||
|
||
# Text Generation Settings | ||
# GRAPHRAG_LLM_TYPE=openai_chat | ||
GRAPHRAG_LLM_API_KEY=None # If GRAPHRAG_API_KEY is not set | ||
GRAPHRAG_LLM_API_BASE=None # For Azure OpenAI Users and if GRAPHRAG_API_BASE is not set | ||
GRAPHRAG_LLM_API_VERSION=None # For Azure OpenAI Users and if GRAPHRAG_API_VERSION is not set | ||
GRAPHRAG_LLM_MODEL_SUPPORTS_JSON=True # Suggested by default | ||
# GRAPHRAG_LLM_API_ORGANIZATION=None | ||
# GRAPHRAG_LLM_API_PROXY=None | ||
# GRAPHRAG_LLM_DEPLOYMENT_NAME=None | ||
# GRAPHRAG_LLM_MODEL=gpt-4-turbo-preview | ||
# GRAPHRAG_LLM_MAX_TOKENS=4000 | ||
# GRAPHRAG_LLM_REQUEST_TIMEOUT=180 | ||
# GRAPHRAG_LLM_THREAD_COUNT=50 | ||
# GRAPHRAG_LLM_THREAD_STAGGER=0.3 | ||
# GRAPHRAG_LLM_CONCURRENT_REQUESTS=25 | ||
# GRAPHRAG_LLM_TPM=0 | ||
# GRAPHRAG_LLM_RPM=0 | ||
# GRAPHRAG_LLM_MAX_RETRIES=10 | ||
# GRAPHRAG_LLM_MAX_RETRY_WAIT=10 | ||
# GRAPHRAG_LLM_SLEEP_ON_RATE_LIMIT_RECOMMENDATION=True | ||
|
||
# Text Embedding Settings | ||
# GRAPHRAG_EMBEDDING_TYPE=openai_embedding | ||
GRAPHRAG_EMBEDDING_API_KEY=None # If GRAPHRAG_API_KEY is not set | ||
GRAPHRAG_EMBEDDING_API_BASE=None # For Azure OpenAI Users and if GRAPHRAG_API_BASE is not set | ||
GRAPHRAG_EMBEDDING_API_VERSION=None # For Azure OpenAI Users and if GRAPHRAG_API_VERSION is not set | ||
# GRAPHRAG_EMBEDDING_API_ORGANIZATION=None | ||
# GRAPHRAG_EMBEDDING_API_PROXY=None | ||
# GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME=None | ||
# GRAPHRAG_EMBEDDING_MODEL=text-embedding-3-small | ||
# GRAPHRAG_EMBEDDING_BATCH_SIZE=16 | ||
# GRAPHRAG_EMBEDDING_BATCH_MAX_TOKENS=8191 | ||
# GRAPHRAG_EMBEDDING_TARGET=required | ||
# GRAPHRAG_EMBEDDING_SKIP=None | ||
# GRAPHRAG_EMBEDDING_THREAD_COUNT=None | ||
# GRAPHRAG_EMBEDDING_THREAD_STAGGER=50 | ||
# GRAPHRAG_EMBEDDING_CONCURRENT_REQUESTS=25 | ||
# GRAPHRAG_EMBEDDING_TPM=0 | ||
# GRAPHRAG_EMBEDDING_RPM=0 | ||
# GRAPHRAG_EMBEDDING_MAX_RETRIES=10 | ||
# GRAPHRAG_EMBEDDING_MAX_RETRY_WAIT=10 | ||
# GRAPHRAG_EMBEDDING_SLEEP_ON_RATE_LIMIT_RECOMMENDATION=True | ||
|
||
# Data Mapping Settings | ||
# GRAPHRAG_INPUT_ENCODING=utf-8 | ||
|
||
# Data Chunking | ||
# GRAPHRAG_CHUNK_SIZE=300 | ||
# GRAPHRAG_CHUNK_OVERLAP=100 | ||
# GRAPHRAG_CHUNK_BY_COLUMNS=id | ||
|
||
# Prompting Overrides | ||
# GRAPHRAG_ENTITY_EXTRACTION_PROMPT_FILE=None | ||
# GRAPHRAG_ENTITY_EXTRACTION_MAX_GLEANINGS=0 | ||
# GRAPHRAG_ENTITY_EXTRACTION_ENTITY_TYPES=organization,person,event,geo | ||
# GRAPHRAG_SUMMARIZE_DESCRIPTIONS_PROMPT_FILE=None | ||
# GRAPHRAG_SUMMARIZE_DESCRIPTIONS_MAX_LENGTH=500 | ||
# GRAPHRAG_CLAIM_EXTRACTION_DESCRIPTION="Any claims or facts that could be relevant to threat analysis." | ||
# GRAPHRAG_CLAIM_EXTRACTION_PROMPT_FILE=None | ||
# GRAPHRAG_CLAIM_EXTRACTION_MAX_GLEANINGS=0 | ||
# GRAPHRAG_COMMUNITY_REPORT_PROMPT_FILE=None | ||
# GRAPHRAG_COMMUNITY_REPORT_MAX_LENGTH=1500 | ||
|
||
# Storage | ||
# GRAPHRAG_STORAGE_TYPE=file | ||
# GRAPHRAG_STORAGE_CONNECTION_STRING=None | ||
# GRAPHRAG_STORAGE_CONTAINER_NAME=None | ||
# GRAPHRAG_STORAGE_BASE_DIR=None | ||
|
||
# Cache | ||
# GRAPHRAG_CACHE_TYPE=file | ||
# GRAPHRAG_CACHE_CONNECTION_STRING=None | ||
# GRAPHRAG_CACHE_CONTAINER_NAME=None | ||
# GRAPHRAG_CACHE_BASE_DIR=None | ||
|
||
# Reporting | ||
# GRAPHRAG_REPORTING_TYPE=file | ||
# GRAPHRAG_REPORTING_CONNECTION_STRING=None | ||
# GRAPHRAG_REPORTING_CONTAINER_NAME=None | ||
# GRAPHRAG_REPORTING_BASE_DIR=None | ||
|
||
# Node2Vec Parameters | ||
# GRAPHRAG_NODE2VEC_ENABLED=False | ||
# GRAPHRAG_NODE2VEC_NUM_WALKS=10 | ||
# GRAPHRAG_NODE2VEC_WALK_LENGTH=40 | ||
# GRAPHRAG_NODE2VEC_WINDOW_SIZE=2 | ||
# GRAPHRAG_NODE2VEC_ITERATIONS=3 | ||
# GRAPHRAG_NODE2VEC_RANDOM_SEED=597832 | ||
|
||
# Data Snapshotting | ||
# GRAPHRAG_SNAPSHOT_GRAPHML=False | ||
# GRAPHRAG_SNAPSHOT_RAW_ENTITIES=False | ||
# GRAPHRAG_SNAPSHOT_TOP_LEVEL_NODES=False | ||
|
||
# Miscellaneous Settings | ||
# GRAPHRAG_ASYNC_MODE=asyncio | ||
# GRAPHRAG_ENCODING_MODEL=cl100k_base | ||
# GRAPHRAG_MAX_CLUSTER_SIZE=10 | ||
# GRAPHRAG_ENTITY_RESOLUTION_ENABLED=False | ||
# GRAPHRAG_SKIP_WORKFLOWS=None | ||
# GRAPHRAG_UMAP_ENABLED=False | ||
|
||
|
||
|
||
``` |