Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update examples/FinanceBench #356

Merged
merged 4 commits into from
Sep 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 25 additions & 25 deletions examples/FinanceBench/Makefile
Original file line number Diff line number Diff line change
@@ -1,36 +1,36 @@
agent-solve:
@poetry run python htp_oodar_agent.py ${id}
dana-solve:
@poetry run python dana.py ${id}

agent-solve-w-knowledge:
@poetry run python htp_oodar_agent.py ${id} --knowledge
dana-solve-w-knowledge:
@poetry run python dana.py ${id} --knowledge

agent-solve-w-prog-store:
@poetry run python htp_oodar_agent.py ${id} --prog-store
dana-solve-w-prog-store:
@poetry run python dana.py ${id} --prog-store

agent-solve-w-knowledge-and-prog-store:
@poetry run python htp_oodar_agent.py ${id} --knowledge --prog-store
dana-solve-w-knowledge-and-prog-store:
@poetry run python dana.py ${id} --knowledge --prog-store

agent-solve-w-llama3:
@poetry run python htp_oodar_agent.py ${id} --llama3
dana-solve-w-llama3:
@poetry run python dana.py ${id} --llama3

agent-solve-w-knowledge-w-llama3:
@poetry run python htp_oodar_agent.py ${id} --knowledge --llama3
dana-solve-w-knowledge-w-llama3:
@poetry run python dana.py ${id} --knowledge --llama3

agent-solve-w-prog-store-w-llama3:
@poetry run python htp_oodar_agent.py ${id} --prog-store --llama3
dana-solve-w-prog-store-w-llama3:
@poetry run python dana.py ${id} --prog-store --llama3

agent-solve-w-knowledge-and-prog-store-w-llama3:
@poetry run python htp_oodar_agent.py ${id} --knowledge --prog-store --llama3
dana-solve-w-knowledge-and-prog-store-w-llama3:
@poetry run python dana.py ${id} --knowledge --prog-store --llama3

agent-solve-all-combos:
@poetry run python htp_oodar_agent.py ${id}
@poetry run python htp_oodar_agent.py ${id} --knowledge
@poetry run python htp_oodar_agent.py ${id} --prog-store
@poetry run python htp_oodar_agent.py ${id} --knowledge --prog-store
@poetry run python htp_oodar_agent.py ${id} --llama3
@poetry run python htp_oodar_agent.py ${id} --knowledge --llama3
@poetry run python htp_oodar_agent.py ${id} --prog-store --llama3
@poetry run python htp_oodar_agent.py ${id} --knowledge --prog-store --llama3
dana-solve-all-combos:
@poetry run python dana.py ${id}
@poetry run python dana.py ${id} --knowledge
@poetry run python dana.py ${id} --prog-store
@poetry run python dana.py ${id} --knowledge --prog-store
@poetry run python dana.py ${id} --llama3
@poetry run python dana.py ${id} --knowledge --llama3
@poetry run python dana.py ${id} --prog-store --llama3
@poetry run python dana.py ${id} --knowledge --prog-store --llama3


langchain-react-solve:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,64 +51,64 @@ def get_or_create_adaptations(doc_name: DocName) -> dict[str, str]:
return {EXPERT_HTP_COMPANY_KEY: (doc := Doc(name=doc_name)).company, EXPERT_HTP_PERIOD_KEY: doc.period}


@enable_batch_qa_and_eval(output_name='HTP-OODAR')
@log_qa_and_update_output_file(output_name='HTP-OODAR')
@enable_batch_qa_and_eval(output_name='DANA')
@log_qa_and_update_output_file(output_name='DANA')
def solve(fb_id: FbId) -> Answer:
return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id]).solve(
problem=QS_BY_FB_ID[fb_id],
adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))


@enable_batch_qa_and_eval(output_name='HTP-OODAR-wKnowledge')
@log_qa_and_update_output_file(output_name='HTP-OODAR-wKnowledge')
@enable_batch_qa_and_eval(output_name='DANA-wKnowledge')
@log_qa_and_update_output_file(output_name='DANA-wKnowledge')
def solve_with_knowledge(fb_id: FbId) -> Answer:
return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], expert_knowledge=True).solve(
problem=QS_BY_FB_ID[fb_id],
adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))


@enable_batch_qa_and_eval(output_name='HTP-OODAR-wProgSpace')
@log_qa_and_update_output_file(output_name='HTP-OODAR-wProgSpace')
@enable_batch_qa_and_eval(output_name='DANA-wProgStore')
@log_qa_and_update_output_file(output_name='DANA-wProgStore')
def solve_with_program_store(fb_id: FbId) -> Answer:
return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], expert_programs=True).solve(
problem=QS_BY_FB_ID[fb_id],
adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))


@enable_batch_qa_and_eval(output_name='HTP-OODAR-wKnowledge-wProgSpace')
@log_qa_and_update_output_file(output_name='HTP-OODAR-wKnowledge-wProgSpace')
@enable_batch_qa_and_eval(output_name='DANA-wKnowledge-wProgStore')
@log_qa_and_update_output_file(output_name='DANA-wKnowledge-wProgStore')
def solve_with_knowledge_and_program_store(fb_id: FbId) -> Answer:
return get_or_create_agent(DOC_NAMES_BY_FB_ID[fb_id], expert_knowledge=True, expert_programs=True).solve(
problem=QS_BY_FB_ID[fb_id],
adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))


@enable_batch_qa_and_eval(output_name='HTP-OODAR-wLlama3')
@log_qa_and_update_output_file(output_name='HTP-OODAR-wLlama3')
@enable_batch_qa_and_eval(output_name='DANA-wLlama3')
@log_qa_and_update_output_file(output_name='DANA-wLlama3')
def solve_with_llama3(fb_id: FbId) -> Answer:
return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], use_llama3=True).solve(
problem=QS_BY_FB_ID[fb_id],
adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))


@enable_batch_qa_and_eval(output_name='HTP-OODAR-wKnowledge-wLlama3')
@log_qa_and_update_output_file(output_name='HTP-OODAR-wKnowledge-wLlama3')
@enable_batch_qa_and_eval(output_name='DANA-wKnowledge-wLlama3')
@log_qa_and_update_output_file(output_name='DANA-wKnowledge-wLlama3')
def solve_with_knowledge_with_llama3(fb_id: FbId) -> Answer:
return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], expert_knowledge=True, use_llama3=True).solve(
problem=QS_BY_FB_ID[fb_id],
adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))


@enable_batch_qa_and_eval(output_name='HTP-OODAR-wProgSpace-wLlama3')
@log_qa_and_update_output_file(output_name='HTP-OODAR-wProgSpace-wLlama3')
@enable_batch_qa_and_eval(output_name='DANA-wProgStore-wLlama3')
@log_qa_and_update_output_file(output_name='DANA-wProgStore-wLlama3')
def solve_with_program_store_with_llama3(fb_id: FbId) -> Answer:
return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], expert_programs=True, use_llama3=True).solve(
problem=QS_BY_FB_ID[fb_id],
adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))


@enable_batch_qa_and_eval(output_name='HTP-OODAR-wKnowledge-wProgSpace-wLlama3')
@log_qa_and_update_output_file(output_name='HTP-OODAR-wKnowledge-wProgSpace-wLlama3')
@enable_batch_qa_and_eval(output_name='DANA-wKnowledge-wProgStore-wLlama3')
@log_qa_and_update_output_file(output_name='DANA-wKnowledge-wProgStore-wLlama3')
def solve_with_knowledge_and_program_store_with_llama3(fb_id: FbId) -> Answer:
return get_or_create_agent(DOC_NAMES_BY_FB_ID[fb_id], expert_knowledge=True, expert_programs=True, use_llama3=True).solve( # noqa: E501
problem=QS_BY_FB_ID[fb_id],
Expand Down
2 changes: 2 additions & 0 deletions examples/FinanceBench/ground-truths.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ financebench_id_04672:
8.738, 8.738 billion, 8738 million,
8.7, 8.7 billion or 8700 million

evaluator-unreliable: true


financebench_id_00499:
sector: Industrials
Expand Down
68 changes: 34 additions & 34 deletions examples/FinanceBench/make.bat
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
:: =======
SET TARGET=%1

IF "%TARGET%"=="agent-solve" GOTO agent-solve
IF "%TARGET%"=="agent-solve-w-prog-store" GOTO agent-solve-w-prog-store
IF "%TARGET%"=="agent-solve-w-knowledge" GOTO agent-solve-w-knowledge
IF "%TARGET%"=="agent-solve-w-knowledge-and-prog-store" GOTO agent-solve-w-knowledge-and-prog-store
IF "%TARGET%"=="agent-solve-w-llama3" GOTO agent-solve-w-llama3
IF "%TARGET%"=="agent-solve-w-prog-store-w-llama3" GOTO agent-solve-w-prog-store-w-llama3
IF "%TARGET%"=="agent-solve-w-knowledge-w-llama3" GOTO agent-solve-w-knowledge-w-llama3
IF "%TARGET%"=="agent-solve-w-knowledge-and-prog-store-w-llama3" GOTO agent-solve-w-knowledge-and-prog-store-w-llama3
IF "%TARGET%"=="agent-solve-all-combos" GOTO agent-solve-all-combos
IF "%TARGET%"=="dana-solve" GOTO dana-solve
IF "%TARGET%"=="dana-solve-w-prog-store" GOTO dana-solve-w-prog-store
IF "%TARGET%"=="dana-solve-w-knowledge" GOTO dana-solve-w-knowledge
IF "%TARGET%"=="dana-solve-w-knowledge-and-prog-store" GOTO dana-solve-w-knowledge-and-prog-store
IF "%TARGET%"=="dana-solve-w-llama3" GOTO dana-solve-w-llama3
IF "%TARGET%"=="dana-solve-w-prog-store-w-llama3" GOTO dana-solve-w-prog-store-w-llama3
IF "%TARGET%"=="dana-solve-w-knowledge-w-llama3" GOTO dana-solve-w-knowledge-w-llama3
IF "%TARGET%"=="dana-solve-w-knowledge-and-prog-store-w-llama3" GOTO dana-solve-w-knowledge-and-prog-store-w-llama3
IF "%TARGET%"=="dana-solve-all-combos" GOTO dana-solve-all-combos

IF "%TARGET%"=="langchain-react-solve" GOTO langchain-react-solve
IF "%TARGET%"=="openai-assist" GOTO openai-assist
Expand All @@ -31,47 +31,47 @@ IF "%TARGET%"=="streamlit-run" GOTO streamlit-run

:: BATCH INFERENCING
:: =================
:agent-solve
poetry run python htp_oodar_agent.py %2
:dana-solve
poetry run python dana.py %2
GOTO end

:agent-solve-w-knowledge
poetry run python htp_oodar_agent.py %2 --knowledge
:dana-solve-w-knowledge
poetry run python dana.py %2 --knowledge
GOTO end

:agent-solve-w-prog-store
poetry run python htp_oodar_agent.py %2 --prog-store
:dana-solve-w-prog-store
poetry run python dana.py %2 --prog-store
GOTO end

:agent-solve-w-knowledge-and-prog-store
poetry run python htp_oodar_agent.py %2 --knowledge --prog-store
:dana-solve-w-knowledge-and-prog-store
poetry run python dana.py %2 --knowledge --prog-store
GOTO end

:agent-solve-w-llama3
poetry run python htp_oodar_agent.py %2 --llama3
:dana-solve-w-llama3
poetry run python dana.py %2 --llama3
GOTO end

:agent-solve-w-knowledge-w-llama3
poetry run python htp_oodar_agent.py %2 --knowledge --llama3
:dana-solve-w-knowledge-w-llama3
poetry run python dana.py %2 --knowledge --llama3
GOTO end

:agent-solve-w-prog-store-w-llama3
poetry run python htp_oodar_agent.py %2 --prog-store --llama3
:dana-solve-w-prog-store-w-llama3
poetry run python dana.py %2 --prog-store --llama3
GOTO end

:agent-solve-w-knowledge-and-prog-store-w-llama3
poetry run python htp_oodar_agent.py %2 --knowledge --prog-store --llama3
:dana-solve-w-knowledge-and-prog-store-w-llama3
poetry run python dana.py %2 --knowledge --prog-store --llama3
GOTO end

:agent-solve-all-combos
poetry run python htp_oodar_agent.py %2
poetry run python htp_oodar_agent.py %2 --knowledge
poetry run python htp_oodar_agent.py %2 --prog-space
poetry run python htp_oodar_agent.py %2 --knowledge --prog-space
poetry run python htp_oodar_agent.py %2 --llama3
poetry run python htp_oodar_agent.py %2 --knowledge --llama3
poetry run python htp_oodar_agent.py %2 --prog-space --llama3
poetry run python htp_oodar_agent.py %2 --knowledge --prog-space --llama3
:dana-solve-all-combos
poetry run python dana.py %2
poetry run python dana.py %2 --knowledge
poetry run python dana.py %2 --prog-space
poetry run python dana.py %2 --knowledge --prog-space
poetry run python dana.py %2 --llama3
poetry run python dana.py %2 --knowledge --llama3
poetry run python dana.py %2 --prog-space --llama3
poetry run python dana.py %2 --knowledge --prog-space --llama3
GOTO end


Expand Down
2 changes: 1 addition & 1 deletion examples/FinanceBench/streamlit-main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import streamlit as st

from data_and_knowledge import DocName, Doc, DOC_NAMES, ExpertPlanId as TaskId, EXPERT_PROGRAMS
from htp_oodar_agent import get_or_create_agent, get_or_create_adaptations
from dana import get_or_create_agent, get_or_create_adaptations
from rag import get_or_create_file_resource


Expand Down