Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/graphrag-cache/graphrag_cache/json_cache.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

"""A module containing 'JsonPipelineCache' model."""
"""A module containing 'JsonCache' model."""

import json
from typing import Any
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

"""A module containing the WorkflowCallbacks registry."""
"""A module containing 'WorkflowCallbacksManager' model."""

from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks
from graphrag.index.typing.pipeline_run_result import PipelineRunResult
Expand Down
2 changes: 1 addition & 1 deletion packages/graphrag/graphrag/index/input/csv.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

"""A module containing load method definition."""
"""A module containing 'CSVFileReader' model."""

import logging
from io import BytesIO
Expand Down
2 changes: 1 addition & 1 deletion packages/graphrag/graphrag/index/input/factory.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

"""A module containing create_input method definition."""
"""A module containing 'InputReaderFactory' model."""

import logging

Expand Down
2 changes: 1 addition & 1 deletion packages/graphrag/graphrag/index/input/input_reader.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

"""A module containing 'PipelineCache' model."""
"""A module containing 'InputReader' model."""

from __future__ import annotations

Expand Down
2 changes: 1 addition & 1 deletion packages/graphrag/graphrag/index/input/json.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

"""A module containing load method definition."""
"""A module containing 'JSONFileReader' model."""

import json
import logging
Expand Down
2 changes: 1 addition & 1 deletion packages/graphrag/graphrag/index/input/text.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

"""A module containing load method definition."""
"""A module containing 'TextFileReader' model."""

import logging
from pathlib import Path
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

"""A module containing _get_num_total, chunk, run_strategy and load_strategy methods definitions."""
"""A module containing chunk_text method definitions."""

from typing import Any, cast

Expand Down Expand Up @@ -54,7 +54,7 @@ def chunk_text(
strategy: sentence
```
"""
strategy_exec = load_strategy(strategy)
strategy_exec = _load_strategy(strategy)

num_total = _get_num_total(input, column)
tick = progress_ticker(callbacks.progress, num_total)
Expand All @@ -67,7 +67,7 @@ def chunk_text(
input.apply(
cast(
"Any",
lambda x: run_strategy(
lambda x: _run_strategy(
strategy_exec,
x[column],
config,
Expand All @@ -79,7 +79,7 @@ def chunk_text(
)


def run_strategy(
def _run_strategy(
strategy_exec: ChunkStrategy,
input: ChunkInput,
config: ChunkingConfig,
Expand Down Expand Up @@ -111,7 +111,7 @@ def run_strategy(
return results


def load_strategy(strategy: ChunkStrategyType) -> ChunkStrategy:
def _load_strategy(strategy: ChunkStrategyType) -> ChunkStrategy:
"""Load strategy method definition."""
match strategy:
case ChunkStrategyType.tokens:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

"""A module containing chunk strategies."""
"""A module containing run_tokens and run_sentences methods."""

from collections.abc import Iterable

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

"""A module containing cluster_graph, apply_clustering methods definition."""
"""A module containing cluster_graph method definition."""

import logging

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

"""A module containing create_graph definition."""
"""A module containing compute_degree method definition."""

import networkx as nx
import pandas as pd
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

"""A module containing compute_edge_combined_degree methods definition."""
"""A module containing compute_edge_combined_degree method definition."""

from typing import cast

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

"""A module containing embed_text, load_strategy and create_row_from_embedding_data methods definition."""
"""A module containing embed_text method definition."""

import logging

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

"""A module containing run method definition."""
"""A module containing 'TextEmbeddingResult' model and run_embed_text method definition."""

import asyncio
import logging
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

"""A module containing entity_extract methods."""
"""A module containing extract_graph method."""

import logging

Expand Down Expand Up @@ -35,7 +35,7 @@ async def run_strategy(row):
nonlocal num_started
text = row[text_column]
id = row[id_column]
result = await run_extract_graph(
result = await _run_extract_graph(
text=text,
source_id=id,
entity_types=entity_types,
Expand Down Expand Up @@ -68,7 +68,7 @@ async def run_strategy(row):
return (entities, relationships)


async def run_extract_graph(
async def _run_extract_graph(
text: str,
source_id: str,
entity_types: list[str],
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

"""A module containing create_graph definition."""
"""A module containing graph_to_dataframes method definition."""

import networkx as nx
import pandas as pd
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
"""A module containing the build_mixed_context method definition."""

"""A module containing build_mixed_context method definition."""

import pandas as pd

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

"""A module containing create_community_reports and load_strategy methods definition."""
"""A module containing summarize_communities method definition."""

import logging
from collections.abc import Callable
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

"""A module containing 'GraphExtractionResult' and 'GraphExtractor' models."""
"""A module containing 'SummarizationResult' and 'SummarizeExtractor' models."""

import json
from dataclasses import dataclass
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

"""A module containing the 'Tokenizer', 'TextSplitter', 'NoopTextSplitter' and 'TokenTextSplitter' models."""
"""A module containing 'TokenChunkerOptions', 'TextSplitter', 'NoopTextSplitter', 'TokenTextSplitter', 'split_single_text_on_tokens', and 'split_multiple_texts_on_tokens'."""

import logging
from abc import ABC, abstractmethod
Expand Down
8 changes: 1 addition & 7 deletions packages/graphrag/graphrag/index/utils/derive_from_rows.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

"""Apply a generic transform function to each row in a table."""
"""A module containing derive_from_rows, derive_from_rows_asyncio_threads, and derive_from_rows_asyncio methods."""

import asyncio
import inspect
Expand Down Expand Up @@ -55,9 +55,6 @@ async def derive_from_rows(
raise ValueError(msg)


"""A module containing the derive_from_rows_async method."""


async def derive_from_rows_asyncio_threads(
input: pd.DataFrame,
transform: Callable[[pd.Series], Awaitable[ItemType]],
Expand Down Expand Up @@ -88,9 +85,6 @@ async def execute_task(task: Coroutine) -> ItemType | None:
)


"""A module containing the derive_from_rows_async method."""


async def derive_from_rows_asyncio(
input: pd.DataFrame,
transform: Callable[[pd.Series], Awaitable[ItemType]],
Expand Down
2 changes: 1 addition & 1 deletion tests/mock_provider.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2025 Microsoft Corporation.
# Licensed under the MIT License

"""A module containing mock model provider definitions."""
"""A module containing 'MockChatLLM' and 'MockEmbeddingLLM' models."""

from collections.abc import AsyncGenerator, Generator
from typing import Any
Expand Down
22 changes: 11 additions & 11 deletions tests/unit/indexing/operations/chunk_text/test_chunk_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
from graphrag.config.enums import ChunkStrategyType
from graphrag.index.operations.chunk_text.chunk_text import (
_get_num_total,
_load_strategy,
_run_strategy,
chunk_text,
load_strategy,
run_strategy,
)
from graphrag.index.operations.chunk_text.typing import (
TextChunk,
Expand All @@ -36,15 +36,15 @@ def test_get_num_total_array():
def test_load_strategy_tokens():
strategy_type = ChunkStrategyType.tokens

strategy_loaded = load_strategy(strategy_type)
strategy_loaded = _load_strategy(strategy_type)

assert strategy_loaded.__name__ == "run_tokens"


def test_load_strategy_sentence():
strategy_type = ChunkStrategyType.sentence

strategy_loaded = load_strategy(strategy_type)
strategy_loaded = _load_strategy(strategy_type)

assert strategy_loaded.__name__ == "run_sentences"

Expand All @@ -55,7 +55,7 @@ def test_load_strategy_none():
with pytest.raises(
ValueError, match="Unknown strategy: <enum 'ChunkStrategyType'>"
):
load_strategy(strategy_type) # type: ignore
_load_strategy(strategy_type) # type: ignore


def test_run_strategy_str():
Expand All @@ -71,7 +71,7 @@ def test_run_strategy_str():
)
]

runned = run_strategy(strategy_mocked, input, config, tick)
runned = _run_strategy(strategy_mocked, input, config, tick)
assert runned == ["text test for run strategy"]


Expand All @@ -93,7 +93,7 @@ def test_run_strategy_arr_str():
"use for strategy",
]

runned = run_strategy(strategy_mocked, input, config, tick)
runned = _run_strategy(strategy_mocked, input, config, tick)
assert runned == expected


Expand Down Expand Up @@ -123,7 +123,7 @@ def test_run_strategy_arr_tuple():
),
]

runned = run_strategy(strategy_mocked, input, config, tick)
runned = _run_strategy(strategy_mocked, input, config, tick)
assert runned == expected


Expand Down Expand Up @@ -153,12 +153,12 @@ def test_run_strategy_arr_tuple_same_doc():
),
]

runned = run_strategy(strategy_mocked, input, config, tick)
runned = _run_strategy(strategy_mocked, input, config, tick)
assert runned == expected


@mock.patch("graphrag.index.operations.chunk_text.chunk_text.load_strategy")
@mock.patch("graphrag.index.operations.chunk_text.chunk_text.run_strategy")
@mock.patch("graphrag.index.operations.chunk_text.chunk_text._load_strategy")
@mock.patch("graphrag.index.operations.chunk_text.chunk_text._run_strategy")
@mock.patch("graphrag.index.operations.chunk_text.chunk_text.progress_ticker")
def test_chunk_text(mock_progress_ticker, mock_run_strategy, mock_load_strategy):
input_data = pd.DataFrame({"name": ["The Shining"]})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Licensed under the MIT License
import unittest

from graphrag.index.operations.extract_graph.extract_graph import run_extract_graph
from graphrag.index.operations.extract_graph.extract_graph import _run_extract_graph
from graphrag.prompts.index.extract_graph import GRAPH_EXTRACTION_PROMPT

from tests.unit.indexing.verbs.helpers.mock_llm import create_mock_llm
Expand All @@ -22,7 +22,7 @@

class TestRunChain(unittest.IsolatedAsyncioTestCase):
async def test_run_extract_graph_single_document_correct_entities_returned(self):
entities_df, _ = await run_extract_graph(
entities_df, _ = await _run_extract_graph(
text="test_text",
source_id="1",
entity_types=["person"],
Expand All @@ -39,7 +39,7 @@ async def test_run_extract_graph_single_document_correct_entities_returned(self)
)

async def test_run_extract_graph_single_document_correct_edges_returned(self):
_, relationships_df = await run_extract_graph(
_, relationships_df = await _run_extract_graph(
text="test_text",
source_id="1",
entity_types=["person"],
Expand All @@ -61,7 +61,7 @@ async def test_run_extract_graph_single_document_correct_edges_returned(self):
}

async def test_run_extract_graph_single_document_source_ids_mapped(self):
entities_df, relationships_df = await run_extract_graph(
entities_df, relationships_df = await _run_extract_graph(
text="test_text",
source_id="1",
entity_types=["person"],
Expand Down