Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .semversioner/next-release/patch-20260211211707376370.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"type": "patch",
"description": "Move document ID, human_readable_id, and raw_data initialization from create_final_documents into load_input_documents and load_update_documents."
}
5 changes: 2 additions & 3 deletions docs/examples_notebooks/api_overview.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,10 @@
"from pathlib import Path\n",
"from pprint import pprint\n",
"\n",
"import graphrag.api as api\n",
"import pandas as pd\n",
"from graphrag.config.load_config import load_config\n",
"from graphrag.index.typing.pipeline_run_result import PipelineRunResult\n",
"\n",
"import graphrag.api as api"
"from graphrag.index.typing.pipeline_run_result import PipelineRunResult"
]
},
{
Expand Down
5 changes: 2 additions & 3 deletions docs/examples_notebooks/input_documents.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,10 @@
"from pathlib import Path\n",
"from pprint import pprint\n",
"\n",
"import graphrag.api as api\n",
"import pandas as pd\n",
"from graphrag.config.load_config import load_config\n",
"from graphrag.index.typing.pipeline_run_result import PipelineRunResult\n",
"\n",
"import graphrag.api as api"
"from graphrag.index.typing.pipeline_run_result import PipelineRunResult"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,4 @@ def create_final_documents(
copy=False,
).reset_index(drop=True)

rejoined["id"] = rejoined["id"].astype(str)
rejoined["human_readable_id"] = rejoined.index

if "raw_data" not in rejoined.columns:
rejoined["raw_data"] = pd.Series(dtype="object")

return rejoined.loc[:, DOCUMENTS_FINAL_COLUMNS]
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,8 @@ async def run_workflow(

async def load_input_documents(input_reader: InputReader) -> pd.DataFrame:
"""Load and parse input documents into a standard format."""
return pd.DataFrame(await input_reader.read_files())
output = pd.DataFrame(await input_reader.read_files())
output["human_readable_id"] = output.index
if "raw_data" not in output.columns:
output["raw_data"] = pd.Series(dtype="object")
return output
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ async def load_update_documents(
) -> pd.DataFrame:
"""Load and parse update-only input documents into a standard format."""
input_documents = pd.DataFrame(await input_reader.read_files())
input_documents["human_readable_id"] = input_documents.index
if "raw_data" not in input_documents.columns:
input_documents["raw_data"] = pd.Series(dtype="object")
# previous table provider has the output of the previous run
# we'll use this to diff the input from the prior
delta_documents = await get_delta_docs(input_documents, previous_table_provider)
Expand Down
8 changes: 4 additions & 4 deletions tests/verbs/test_create_community_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@


from graphrag.data_model.schemas import COMMUNITY_REPORTS_FINAL_COLUMNS
from graphrag.index.workflows.create_community_reports import (
run_workflow,
)

from graphrag.index.operations.summarize_communities.community_reports_extractor import (
CommunityReportResponse,
FindingModel,
)
from graphrag.index.workflows.create_community_reports import (
run_workflow,
)

from tests.unit.config.utils import get_default_graphrag_config

from .util import (
Expand Down
3 changes: 1 addition & 2 deletions unified-search-app/app/app_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import logging
from typing import TYPE_CHECKING

import graphrag.api as api
import streamlit as st
from knowledge_loader.data_sources.loader import (
create_datasource,
Expand All @@ -17,8 +18,6 @@
from state.session_variables import SessionVariables
from ui.search import display_search_result

import graphrag.api as api

if TYPE_CHECKING:
import pandas as pd

Expand Down