Skip to content

Commit

Permalink
.env templates
Browse files Browse the repository at this point in the history
  • Loading branch information
AlonsoGuevara committed Apr 4, 2024
1 parent 1ad30c7 commit 33c73c6
Show file tree
Hide file tree
Showing 2 changed files with 178 additions and 0 deletions.
1 change: 1 addition & 0 deletions docsite/_includes/page.njk
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ title: GraphRAG
<li>{{link_to("/posts/config/env_vars", "Using Env Vars")}}</li>
<li>{{link_to("/posts/config/json_yaml", "Using JSON or YAML")}}</li>
<li>{{link_to("/posts/config/custom", "Fully Custom")}}</li>
<li>{{link_to("/posts/config/template", "Template")}}</li>
</ul>
</li>
</ul>
Expand Down
177 changes: 177 additions & 0 deletions docsite/posts/config/template.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
---
title: Configuration Template
navtitle: Configuration Template
layout: page
tags: [post]
date: 2023-01-04
---

The following template can be used and stored as a `.env` in the the directory where you're are pointing
the `--root` parameter on your Indexing Pipeline execution.

For details about how to run the Indexing Pipeline, refer to the [Index CLI](../index/2-cli) documentation.

## .env File Template

Required variables are uncommented. All the optional configuration can be turned on or off as needed.

### Minimal Configuration

```bash
# Base LLM Settings
GRAPHRAG_API_KEY=None
GRAPHRAG_API_BASE=None # For Azure OpenAI Users
GRAPHRAG_API_VERSION=None # For Azure OpenAI Users

# Text Generation Settings
GRAPHRAG_LLM_TYPE=azure_openai_chat # or openai_chat
GRAPHRAG_LLM_DEPLOYMENT_NAME="gpt-4-turbo-preview"
GRAPHRAG_LLM_MODEL_SUPPORTS_JSON=True

# Text Embedding Settings
GRAPHRAG_EMBEDDING_TYPE = azure_openai_embedding # or openai_embedding
GRAPHRAG_LLM_DEPLOYMENT_NAME = "text-embedding-3-small"

# Data Mapping Settings
GRAPHRAG_INPUT_TYPE="text"

```

### Full Configuration

```bash

# Required LLM Config

# Input Data Configuration
GRAPHRAG_INPUT_TYPE=text

# Plaintext Input Data Configuration
# GRAPHRAG_INPUT_FILE_PATTERN=.*\.txt

# CSV Input Data Configuration
GRAPHRAG_INPUT_TYPE="csv"
GRAPHRAG_INPUT_FILE_PATTERN=".*\.csv$"
GRAPHRAG_INPUT_SOURCE_COLUMN=source
# GRAPHRAG_INPUT_TIMESTAMP_COLUMN=None
# GRAPHRAG_INPUT_TIMESTAMP_FORMAT=None
# GRAPHRAG_INPUT_TEXT_COLUMN="text"
# GRAPHRAG_INPUT_ATTRIBUTE_COLUMNS=id
# GRAPHRAG_INPUT_TITLE_COLUMN="title"
# GRAPHRAG_INPUT_STORAGE_TYPE="file"
# GRAPHRAG_INPUT_CONNECTION_STRING=None
# GRAPHRAG_INPUT_CONTAINER_NAME=None
# GRAPHRAG_INPUT_BASE_DIR=None

# Base LLM Settings
GRAPHRAG_API_KEY=None
GRAPHRAG_API_BASE=None # For Azure OpenAI Users
GRAPHRAG_API_VERSION=None # For Azure OpenAI Users
# GRAPHRAG_API_ORGANIZATION=None
# GRAPHRAG_API_PROXY=None

# Text Generation Settings
# GRAPHRAG_LLM_TYPE=openai_chat
GRAPHRAG_LLM_API_KEY=None # If GRAPHRAG_API_KEY is not set
GRAPHRAG_LLM_API_BASE=None # For Azure OpenAI Users and if GRAPHRAG_API_BASE is not set
GRAPHRAG_LLM_API_VERSION=None # For Azure OpenAI Users and if GRAPHRAG_API_VERSION is not set
GRAPHRAG_LLM_MODEL_SUPPORTS_JSON=True # Suggested by default
# GRAPHRAG_LLM_API_ORGANIZATION=None
# GRAPHRAG_LLM_API_PROXY=None
# GRAPHRAG_LLM_DEPLOYMENT_NAME=None
# GRAPHRAG_LLM_MODEL=gpt-4-turbo-preview
# GRAPHRAG_LLM_MAX_TOKENS=4000
# GRAPHRAG_LLM_REQUEST_TIMEOUT=180
# GRAPHRAG_LLM_THREAD_COUNT=50
# GRAPHRAG_LLM_THREAD_STAGGER=0.3
# GRAPHRAG_LLM_CONCURRENT_REQUESTS=25
# GRAPHRAG_LLM_TPM=0
# GRAPHRAG_LLM_RPM=0
# GRAPHRAG_LLM_MAX_RETRIES=10
# GRAPHRAG_LLM_MAX_RETRY_WAIT=10
# GRAPHRAG_LLM_SLEEP_ON_RATE_LIMIT_RECOMMENDATION=True

# Text Embedding Settings
# GRAPHRAG_EMBEDDING_TYPE=openai_embedding
GRAPHRAG_EMBEDDING_API_KEY=None # If GRAPHRAG_API_KEY is not set
GRAPHRAG_EMBEDDING_API_BASE=None # For Azure OpenAI Users and if GRAPHRAG_API_BASE is not set
GRAPHRAG_EMBEDDING_API_VERSION=None # For Azure OpenAI Users and if GRAPHRAG_API_VERSION is not set
# GRAPHRAG_EMBEDDING_API_ORGANIZATION=None
# GRAPHRAG_EMBEDDING_API_PROXY=None
# GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME=None
# GRAPHRAG_EMBEDDING_MODEL=text-embedding-3-small
# GRAPHRAG_EMBEDDING_BATCH_SIZE=16
# GRAPHRAG_EMBEDDING_BATCH_MAX_TOKENS=8191
# GRAPHRAG_EMBEDDING_TARGET=required
# GRAPHRAG_EMBEDDING_SKIP=None
# GRAPHRAG_EMBEDDING_THREAD_COUNT=None
# GRAPHRAG_EMBEDDING_THREAD_STAGGER=50
# GRAPHRAG_EMBEDDING_CONCURRENT_REQUESTS=25
# GRAPHRAG_EMBEDDING_TPM=0
# GRAPHRAG_EMBEDDING_RPM=0
# GRAPHRAG_EMBEDDING_MAX_RETRIES=10
# GRAPHRAG_EMBEDDING_MAX_RETRY_WAIT=10
# GRAPHRAG_EMBEDDING_SLEEP_ON_RATE_LIMIT_RECOMMENDATION=True

# Data Mapping Settings
# GRAPHRAG_INPUT_ENCODING=utf-8

# Data Chunking
# GRAPHRAG_CHUNK_SIZE=300
# GRAPHRAG_CHUNK_OVERLAP=100
# GRAPHRAG_CHUNK_BY_COLUMNS=id

# Prompting Overrides
# GRAPHRAG_ENTITY_EXTRACTION_PROMPT_FILE=None
# GRAPHRAG_ENTITY_EXTRACTION_MAX_GLEANINGS=0
# GRAPHRAG_ENTITY_EXTRACTION_ENTITY_TYPES=organization,person,event,geo
# GRAPHRAG_SUMMARIZE_DESCRIPTIONS_PROMPT_FILE=None
# GRAPHRAG_SUMMARIZE_DESCRIPTIONS_MAX_LENGTH=500
# GRAPHRAG_CLAIM_EXTRACTION_DESCRIPTION="Any claims or facts that could be relevant to threat analysis."
# GRAPHRAG_CLAIM_EXTRACTION_PROMPT_FILE=None
# GRAPHRAG_CLAIM_EXTRACTION_MAX_GLEANINGS=0
# GRAPHRAG_COMMUNITY_REPORT_PROMPT_FILE=None
# GRAPHRAG_COMMUNITY_REPORT_MAX_LENGTH=1500

# Storage
# GRAPHRAG_STORAGE_TYPE=file
# GRAPHRAG_STORAGE_CONNECTION_STRING=None
# GRAPHRAG_STORAGE_CONTAINER_NAME=None
# GRAPHRAG_STORAGE_BASE_DIR=None

# Cache
# GRAPHRAG_CACHE_TYPE=file
# GRAPHRAG_CACHE_CONNECTION_STRING=None
# GRAPHRAG_CACHE_CONTAINER_NAME=None
# GRAPHRAG_CACHE_BASE_DIR=None

# Reporting
# GRAPHRAG_REPORTING_TYPE=file
# GRAPHRAG_REPORTING_CONNECTION_STRING=None
# GRAPHRAG_REPORTING_CONTAINER_NAME=None
# GRAPHRAG_REPORTING_BASE_DIR=None

# Node2Vec Parameters
# GRAPHRAG_NODE2VEC_ENABLED=False
# GRAPHRAG_NODE2VEC_NUM_WALKS=10
# GRAPHRAG_NODE2VEC_WALK_LENGTH=40
# GRAPHRAG_NODE2VEC_WINDOW_SIZE=2
# GRAPHRAG_NODE2VEC_ITERATIONS=3
# GRAPHRAG_NODE2VEC_RANDOM_SEED=597832

# Data Snapshotting
# GRAPHRAG_SNAPSHOT_GRAPHML=False
# GRAPHRAG_SNAPSHOT_RAW_ENTITIES=False
# GRAPHRAG_SNAPSHOT_TOP_LEVEL_NODES=False

# Miscellaneous Settings
# GRAPHRAG_ASYNC_MODE=asyncio
# GRAPHRAG_ENCODING_MODEL=cl100k_base
# GRAPHRAG_MAX_CLUSTER_SIZE=10
# GRAPHRAG_ENTITY_RESOLUTION_ENABLED=False
# GRAPHRAG_SKIP_WORKFLOWS=None
# GRAPHRAG_UMAP_ENABLED=False



```

0 comments on commit 33c73c6

Please sign in to comment.