Skip to content

Commit

Permalink
Fix too long context issue. (#4735)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

#4728

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
  • Loading branch information
KevinHuSh authored Feb 6, 2025
1 parent a3a7043 commit 2a07eb6
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 3 deletions.
2 changes: 2 additions & 0 deletions graphrag/general/community_reports_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ def __call__(self, graph: nx.Graph, callback: Callable | None = None):
weight = ents["weight"]
ents = ents["nodes"]
ent_df = pd.DataFrame(self._get_entity_(ents)).dropna()#[{"entity": n, **graph.nodes[n]} for n in ents])
if ent_df.empty:
continue
ent_df["entity"] = ent_df["entity_name"]
del ent_df["entity_name"]
rela_df = pd.DataFrame(self._get_relation_(list(ent_df["entity"]), list(ent_df["entity"]), 10000))
Expand Down
3 changes: 2 additions & 1 deletion graphrag/general/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ def __call__(
with ThreadPoolExecutor(max_workers=max_workers) as exe:
threads = []
for i, (cid, ck) in enumerate(chunks):
ck = truncate(ck, int(self._llm.max_length*0.8))
threads.append(
exe.submit(self._process_single_content, (cid, ck)))

Expand Down Expand Up @@ -241,5 +242,5 @@ def _handle_entity_relation_summary(
)
use_prompt = prompt_template.format(**context_base)
logging.info(f"Trigger summary: {entity_or_relation_name}")
summary = self._chat(use_prompt, [{"role": "assistant", "content": "Output: "}], {"temperature": 0.8})
summary = self._chat(use_prompt, [{"role": "user", "content": "Output: "}], {"temperature": 0.8})
return summary
2 changes: 1 addition & 1 deletion graphrag/general/graph_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def _process_single_content(self,
token_count += num_tokens_from_string(hint_prompt + response)

results = response or ""
history = [{"role": "system", "content": hint_prompt}, {"role": "assistant", "content": response}]
history = [{"role": "system", "content": hint_prompt}, {"role": "user", "content": response}]

# Repeat to ensure we maximize entity count
for i in range(self._max_gleanings):
Expand Down
2 changes: 1 addition & 1 deletion graphrag/light/graph_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def _process_single_content(self, chunk_key_dp: tuple[str, str]):
).format(**self._context_base, input_text=content)

try:
gen_conf = {"temperature": 0.3}
gen_conf = {"temperature": 0.8}
final_result = self._chat(hint_prompt, [{"role": "user", "content": "Output:"}], gen_conf)
token_count += num_tokens_from_string(hint_prompt + final_result)
history = pack_user_ass_to_openai_messages(hint_prompt, final_result)
Expand Down

0 comments on commit 2a07eb6

Please sign in to comment.