Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Latest with win fixes and corsi #13

Merged
merged 7 commits into from
Mar 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/ci-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ jobs:


- name: Test all
shell: bash
run: |
./regenerateAndTest.sh -free # Only test the parts not requiring any API keys...

Expand Down
4,858 changes: 4,858 additions & 0 deletions corpus/papers/test/PrimerOnCElegans.pdf.json

Large diffs are not rendered by default.

11 changes: 8 additions & 3 deletions openworm_ai/graphrag/GraphRAG_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@

import glob
import sys
import json

from modelspec.utils import load_json
# from modelspec.utils import load_json

STORE_DIR = "store"
SOURCE_DOCUMENT = "source document"
Expand All @@ -24,7 +25,10 @@
documents = []
for json_file in json_inputs:
print("Adding %s" % json_file)
doc_model = load_json(json_file)

with open(json_file, encoding="utf-8") as f:
doc_model = json.load(f)

for title in doc_model:
print(" Processing document: %s" % title)
doc_contents = doc_model[title]
Expand Down Expand Up @@ -98,7 +102,8 @@ def process_query(response):
"What is the main function of cell AVBR?",
"Give me 3 facts about the coelomocyte system in C. elegens",
"Give me 3 facts about the control of motor programs in c. elegans by monoamines",
"The NeuroPAL transgene is cool. Give me some examples of fluorophores in it.",
"The NeuroPAL transgene is amazing. Give me some examples of fluorophores in it.",
"When was the first metazoan genome sequenced? Answer only with the year.",
]

for query in queries:
Expand Down
4 changes: 2 additions & 2 deletions openworm_ai/parser/DocumentModels.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class Document(Base):

def to_markdown(self, file_name):
fn = Path(file_name)
with open(fn, "w") as f:
with open(fn, "w", encoding="utf-8") as f:
f.write(f"# {self.title}\n")
f.write(f"\n_Generated from: {self.source}_\n")
for section in self.sections:
Expand All @@ -81,7 +81,7 @@ def to_markdown(self, file_name):

def to_plaintext(self, file_name):
fn = Path(file_name)
with open(fn, "w") as f:
with open(fn, "w", encoding="utf-8") as f:
f.write(f"{self.title}\n")
f.write(f"\nGenerated from: {self.source}\n")
for section in self.sections:
Expand Down
4 changes: 4 additions & 0 deletions openworm_ai/parser/ParseLlamaIndexJson.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ def convert_to_json(paper_ref, paper_info, output_dir):
"corpus/papers/test/Randi2023_Llamaparse_Accurate.pdf.json",
"https://www.nature.com/articles/s41586-023-06683-4",
],
"Corsi_et_al_2015": [
"corpus/papers/test/PrimerOnCElegans.pdf.json",
"https://academic.oup.com/genetics/article/200/2/387/5936175",
],
}

# Loop through papers and process markdown sections
Expand Down
20 changes: 14 additions & 6 deletions openworm_ai/parser/ParseWormAtlas.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,15 @@ def __init__(self, title, info):
self.title = title
ref = title.replace(" ", "_")

self.markdown = open(Path("%s/%s.md" % (MARKDOWN_DIR, ref)), "w")
self.markdown = open(
Path("%s/%s.md" % (MARKDOWN_DIR, ref)), "w", encoding="utf-8"
)

self.markdown.write("# %s\n\n" % title)

self.plaintext = open(Path("%s/%s.txt" % (PLAINTEXT_DIR, ref)), "w")
self.plaintext = open(
Path("%s/%s.txt" % (PLAINTEXT_DIR, ref)), "w", encoding="utf-8"
)

self.plaintext.write("%s\n\n" % title)

Expand Down Expand Up @@ -226,10 +230,10 @@ def read_all_cell_info_file():
ref = "BasicCellInfo"
title = "Basic information on C. elegans neurons from WormAtlas"

markdown = open(Path("%s/%s.md" % (MARKDOWN_DIR, ref)), "w")
markdown = open(Path("%s/%s.md" % (MARKDOWN_DIR, ref)), "w", encoding="utf-8")
markdown.write("# %s\n\n" % title)

plaintext = open(Path("%s/%s.txt" % (PLAINTEXT_DIR, ref)), "w")
plaintext = open(Path("%s/%s.txt" % (PLAINTEXT_DIR, ref)), "w", encoding="utf-8")
plaintext.write("%s\n\n" % title)

doc_model = Document(
Expand All @@ -242,7 +246,9 @@ def read_all_cell_info_file():
doc_model.sections.append(current_section)

with open(
Path(CORPUS_LOCATION + "/wormatlas/%s/all_cell_info.csv" % ref), newline="\n"
Path(CORPUS_LOCATION + "/wormatlas/%s/all_cell_info.csv" % ref),
newline="\n",
encoding="utf-8",
) as csvfile:
reader = csv.reader(csvfile, delimiter=",", quotechar='"')

Expand Down Expand Up @@ -318,7 +324,9 @@ def read_all_cell_info_file():

# rint(openworm_ai.__file__)
print(os.getcwd())
with open(Path("processed/markdown/wormatlas/README.md"), "w") as readme:
with open(
Path("processed/markdown/wormatlas/README.md"), "w", encoding="utf-8"
) as readme:
readme.write("""
## WormAtlas Handbooks

Expand Down
3 changes: 0 additions & 3 deletions openworm_ai/quiz/QuizMaster.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ def save_quiz(num_questions, num_answers, llm_ver, temperature=0):
print(f"Selected LLM: {llm_ver}")

if "-ask" in sys.argv:

# quiz_json = "openworm_ai/quiz/samples/GPT4o_50questions.json"
# quiz_json = "openworm_ai/quiz/samples/GPT4o_10questions.json"

Expand Down Expand Up @@ -136,7 +135,5 @@ def save_quiz(num_questions, num_answers, llm_ver, temperature=0):
# make this into a method which returns a dictionary of all the "stats" that lists the llm, correct/incorrect answers
# this can be used to plot comparison of variety of llms on general knowledge
else:

print(f"Debug: Using LLM {llm_ver} for saving quiz")
save_quiz(100, 4, llm_ver, temperature=0.2)

4 changes: 2 additions & 2 deletions openworm_ai/quiz/quiz_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def load_llms():
def load_questions_from_json(filename):
"""Loads a structured quiz JSON file and extracts questions and answers."""
try:
with open(filename, "r") as f:
with open(filename, "r", encoding="utf-8") as f:
data = json.load(f)

if "questions" not in data or not isinstance(data["questions"], list):
Expand All @@ -73,7 +73,6 @@ def load_questions_from_json(filename):
questions.append(
{"question": q["question"], "answers": formatted_answers}
)

if len(questions) == 0:
raise ValueError("Error: No valid questions found in the JSON file.")

Expand Down Expand Up @@ -213,6 +212,7 @@ def save_results_to_json(

def main():
"""Main execution function."""

questions = load_questions_from_json(SOURCE_QUESTIONS_FILE)

if not questions:
Expand Down
4 changes: 0 additions & 4 deletions openworm_ai/utils/llms.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
LLM_OLLAMA_LLAMA32,
LLM_OLLAMA_MISTRAL,
LLM_OLLAMA_TINYLLAMA,

LLM_OLLAMA_PHI4,
LLM_OLLAMA_GEMMA2,
LLM_OLLAMA_DEEPSEEK,
Expand Down Expand Up @@ -181,7 +180,6 @@ def get_llm(llm_ver, temperature):

llm = OllamaLLM(model="tinyllama")


elif llm_ver == LLM_OLLAMA_PHI4:
from langchain_ollama.llms import OllamaLLM

Expand Down Expand Up @@ -224,7 +222,6 @@ def get_llm(llm_ver, temperature):
print("Debug: Using Falcon2")
return OllamaLLM(model="falcon2:latest", temperature=temperature)


return llm


Expand Down Expand Up @@ -349,7 +346,6 @@ def get_llm_from_argv(argv):
if "-o-t" in argv:
llm_ver = LLM_OLLAMA_TINYLLAMA


print(f"Debug: get_llm_from_argv selected {llm_ver}")

return llm_ver
Expand Down
Loading