Merge branch 'main' of https://GitHub.com/Aitomatic/OpenSSA into docs

aitomatic · Oct 7, 2024 · f53c8dc · f53c8dc
2 parents 99576a7 + eb85028
commit f53c8dc
Show file tree

Hide file tree

Showing 12 changed files with 90 additions and 92 deletions.
diff --git a/.vscode/extensions.json b/.vscode/extensions.json
@@ -5,6 +5,8 @@
 
     "charliermarsh.ruff",  // Ruff
 
+    "davidanson.vscode-markdownlint",  // MarkdownLint
+
     "soulcode.vscode-unwanted-recommendations"  // Unwanted Recommendations
   ],
 

diff --git a/README.md b/README.md
@@ -2,43 +2,38 @@
 
 # OpenSSA: Neurosymbolic Agentic AI for Industrial Problem-Solving
 
-**Why OpenSSA?**
 OpenSSA is an open-source neurosymbolic agentic AI framework
 designed to solve complex, high-stakes problems in industries like semiconductor, manufacturing and finance,
 where consistency, accuracy and deterministic outcomes are essential.
 
-At the core of OpenSSA is the **Domain-Aware Neurosymbolic Agent (DANA)** architecture,
-advancing AI from basic pattern-matching and information retrieval to true problem-solving.
-It overcomes the limitations of traditional LLMs and RAG in high-precision, multi-step problem-solving
-by combining **Hierarchical Task Plans (HTPs)** to structure complex programs and the **Observe-Orient-Decide-Act Reasoning (OODAR)** paradigm to execute such programs.
+At the core of OpenSSA is the [__Domain-Aware Neurosymbolic Agent (DANA)__](https://arxiv.org/abs/2410.02823) architecture,
+advancing generative AI from basic pattern matching and information retrieval to industrial-grade problem solving.
 By integrating domain-specific knowledge with neural and symbolic planning and reasoning,
-OpenSSA consistently delivers accurate solutions for complex industrial challenges.
+such as __Hierarchical Task Planning (HTP)__ for structuring programs
+and __Observe-Orient-Decide-Act Reasoning (OODAR)__ for executing such programs,
+OpenSSA DANA agents consistently deliver accurate solutions, often using much smaller models.
 
 ## Key Benefits of OpenSSA
 
-- **Consistent Results**: Delivers repeatable, high-precision outcomes for complex tasks.
-- **Advanced Problem-Solving**: Combines HTPs and OODAR for multi-step planning and reasoning.
-- **Scalable Expertise**: Leverages domain knowledge to scale AI without heavy data requirements.
-- **Resource Efficiency**: Uses smaller, resource-efficient models, minimizing computational costs.
-- **Extensible and Developer-Friendly**: Supports diverse LLM backends and is fully customizable for industry-specific needs.
+- __Consistent and Accurate Results__ for complex industrial problems
+- __Scalable Expertise__ through AI agents incorporating deep domain knowledge from human experts
+- __Economical and Efficient Computation__ thanks to usage of small models
+- __Full Ownership__ of intellectual property when used with open-source models such as Llama
 
 ## Getting Started
 
-- Install with __`pip install openssa`__
-_(supports Python 3.12 and 3.13)_
+- Install with __`pip install openssa`__ _(Python 3.12 and 3.13)_
+  - For bleeding-edge capabilities: __`pip install https://github.com/aitomatic/openssa/archive/main.zip`__
 
-- For the latest capabilities:
-__`pip install https://github.com/aitomatic/openssa/archive/main.zip`__.
-
-- Explore the `examples/` directory and developer guides and tutorials on our [documentation site](https://aitomatic.github.io/openssa).
+- Explore the `examples/` directory and developer guides and tutorials on our [documentation site](https://aitomatic.github.io/openssa)
 
 ## [API Documentation](https://aitomatic.github.io/openssa/modules)
 
 ## Contributing
 
 We welcome contributions from the community!
 
-- Join the discussion on our [Community Forum](https://github.com/aitomatic/openssa/discussions)
-- Submit pull requests for bug fixes, enhancements, or new features
+- Join discussions on our [Community Forum](https://github.com/aitomatic/openssa/discussions)
+- Submit pull requests for bug fixes, enhancements and new features
 
 For detailed guidelines, refer to our [Contribution Guide](CONTRIBUTING.md).
diff --git a/examples/FinanceBench/Makefile b/examples/FinanceBench/Makefile
@@ -10,27 +10,27 @@ dana-solve-w-prog-store:
 dana-solve-w-knowledge-and-prog-store:
 	@poetry run python dana.py ${id} --knowledge --prog-store
 
-dana-solve-w-llama3:
-	@poetry run python dana.py ${id} --llama3
+dana-solve-w-llama:
+	@poetry run python dana.py ${id} --llama
 
-dana-solve-w-knowledge-w-llama3:
-	@poetry run python dana.py ${id} --knowledge --llama3
+dana-solve-w-knowledge-w-llama:
+	@poetry run python dana.py ${id} --knowledge --llama
 
-dana-solve-w-prog-store-w-llama3:
-	@poetry run python dana.py ${id} --prog-store --llama3
+dana-solve-w-prog-store-w-llama:
+	@poetry run python dana.py ${id} --prog-store --llama
 
-dana-solve-w-knowledge-and-prog-store-w-llama3:
-	@poetry run python dana.py ${id} --knowledge --prog-store --llama3
+dana-solve-w-knowledge-and-prog-store-w-llama:
+	@poetry run python dana.py ${id} --knowledge --prog-store --llama
 
 dana-solve-all-combos:
 	@poetry run python dana.py ${id}
 	@poetry run python dana.py ${id} --knowledge
 	@poetry run python dana.py ${id} --prog-store
 	@poetry run python dana.py ${id} --knowledge --prog-store
-	@poetry run python dana.py ${id} --llama3
-	@poetry run python dana.py ${id} --knowledge --llama3
-	@poetry run python dana.py ${id} --prog-store --llama3
-	@poetry run python dana.py ${id} --knowledge --prog-store --llama3
+	@poetry run python dana.py ${id} --llama
+	@poetry run python dana.py ${id} --knowledge --llama
+	@poetry run python dana.py ${id} --prog-store --llama
+	@poetry run python dana.py ${id} --knowledge --prog-store --llama
 
 
 langchain-react-solve:

diff --git a/examples/FinanceBench/dana.py b/examples/FinanceBench/dana.py
@@ -12,13 +12,13 @@
 
 
 @cache
-def get_main_lm(use_llama3: bool = False):
-    return (HuggingFaceLM if use_llama3 else OpenAILM).from_defaults()
+def get_main_lm(use_llama: bool = False):
+    return (HuggingFaceLM if use_llama else OpenAILM).from_defaults()
 
 
 @cache
-def get_or_create_expert_program_store(use_llama3: bool = False) -> ProgramStore:
-    program_store = ProgramStore(lm=get_main_lm(use_llama3=use_llama3))
+def get_or_create_expert_program_store(use_llama: bool = False) -> ProgramStore:
+    program_store = ProgramStore(lm=get_main_lm(use_llama=use_llama))
 
     for program_name, htp_dict in EXPERT_PROGRAMS.items():
         htp = HTP.from_dict(htp_dict)
@@ -30,16 +30,16 @@ def get_or_create_expert_program_store(use_llama3: bool = False) -> ProgramStore
 @cache
 def get_or_create_agent(doc_name: DocName, expert_knowledge: bool = False, expert_programs: bool = False,
                         max_depth=3, max_subtasks_per_decomp=6,
-                        use_llama3: bool = False,
+                        use_llama: bool = False,
                         llama_index_openai_lm_name: str = LMConfig.OPENAI_DEFAULT_MODEL) -> DANA:
     # pylint: disable=too-many-arguments
     return DANA(knowledge={EXPERT_KNOWLEDGE} if expert_knowledge else None,
 
-                program_store=(get_or_create_expert_program_store(use_llama3=use_llama3)
+                program_store=(get_or_create_expert_program_store(use_llama=use_llama)
                                if expert_programs
                                else ProgramStore()),
 
-                programmer=HTPlanner(lm=get_main_lm(use_llama3=use_llama3),
+                programmer=HTPlanner(lm=get_main_lm(use_llama=use_llama),
                                      max_depth=max_depth, max_subtasks_per_decomp=max_subtasks_per_decomp),
 
                 resources={FileResource(path=Doc(name=doc_name).dir_path,
@@ -83,34 +83,34 @@ def solve_with_knowledge_and_program_store(fb_id: FbId) -> Answer:
         adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))
 
 
-@enable_batch_qa_and_eval(output_name='DANA-wLlama3')
-@log_qa_and_update_output_file(output_name='DANA-wLlama3')
-def solve_with_llama3(fb_id: FbId) -> Answer:
-    return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], use_llama3=True).solve(
+@enable_batch_qa_and_eval(output_name='DANA-wLlama')
+@log_qa_and_update_output_file(output_name='DANA-wLlama')
+def solve_with_llama(fb_id: FbId) -> Answer:
+    return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], use_llama=True).solve(
         problem=QS_BY_FB_ID[fb_id],
         adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))
 
 
-@enable_batch_qa_and_eval(output_name='DANA-wKnowledge-wLlama3')
-@log_qa_and_update_output_file(output_name='DANA-wKnowledge-wLlama3')
-def solve_with_knowledge_with_llama3(fb_id: FbId) -> Answer:
-    return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], expert_knowledge=True, use_llama3=True).solve(
+@enable_batch_qa_and_eval(output_name='DANA-wKnowledge-wLlama')
+@log_qa_and_update_output_file(output_name='DANA-wKnowledge-wLlama')
+def solve_with_knowledge_with_llama(fb_id: FbId) -> Answer:
+    return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], expert_knowledge=True, use_llama=True).solve(
         problem=QS_BY_FB_ID[fb_id],
         adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))
 
 
-@enable_batch_qa_and_eval(output_name='DANA-wProgStore-wLlama3')
-@log_qa_and_update_output_file(output_name='DANA-wProgStore-wLlama3')
-def solve_with_program_store_with_llama3(fb_id: FbId) -> Answer:
-    return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], expert_programs=True, use_llama3=True).solve(
+@enable_batch_qa_and_eval(output_name='DANA-wProgStore-wLlama')
+@log_qa_and_update_output_file(output_name='DANA-wProgStore-wLlama')
+def solve_with_program_store_with_llama(fb_id: FbId) -> Answer:
+    return get_or_create_agent(doc_name=DOC_NAMES_BY_FB_ID[fb_id], expert_programs=True, use_llama=True).solve(
         problem=QS_BY_FB_ID[fb_id],
         adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))
 
 
-@enable_batch_qa_and_eval(output_name='DANA-wKnowledge-wProgStore-wLlama3')
-@log_qa_and_update_output_file(output_name='DANA-wKnowledge-wProgStore-wLlama3')
-def solve_with_knowledge_and_program_store_with_llama3(fb_id: FbId) -> Answer:
-    return get_or_create_agent(DOC_NAMES_BY_FB_ID[fb_id], expert_knowledge=True, expert_programs=True, use_llama3=True).solve(  # noqa: E501
+@enable_batch_qa_and_eval(output_name='DANA-wKnowledge-wProgStore-wLlama')
+@log_qa_and_update_output_file(output_name='DANA-wKnowledge-wProgStore-wLlama')
+def solve_with_knowledge_and_program_store_with_llama(fb_id: FbId) -> Answer:
+    return get_or_create_agent(DOC_NAMES_BY_FB_ID[fb_id], expert_knowledge=True, expert_programs=True, use_llama=True).solve(  # noqa: E501
         problem=QS_BY_FB_ID[fb_id],
         adaptations_from_known_programs=get_or_create_adaptations(doc_name=DOC_NAMES_BY_FB_ID[fb_id]))
 
@@ -121,10 +121,10 @@ def solve_with_knowledge_and_program_store_with_llama3(fb_id: FbId) -> Answer:
     arg_parser.add_argument('--from-id', action='store_true')
     arg_parser.add_argument('--knowledge', action='store_true')
     arg_parser.add_argument('--prog-store', action='store_true')
-    arg_parser.add_argument('--llama3', action='store_true')
+    arg_parser.add_argument('--llama', action='store_true')
     args = arg_parser.parse_args()
 
-    match (args.knowledge, args.prog_store, args.llama3):
+    match (args.knowledge, args.prog_store, args.llama):
         case (False, False, False):
             solve_func: QAFunc = solve
 
@@ -138,16 +138,16 @@ def solve_with_knowledge_and_program_store_with_llama3(fb_id: FbId) -> Answer:
             solve_func: QAFunc = solve_with_knowledge_and_program_store
 
         case (False, False, True):
-            solve_func: QAFunc = solve_with_llama3
+            solve_func: QAFunc = solve_with_llama
 
         case (True, False, True):
-            solve_func: QAFunc = solve_with_knowledge_with_llama3
+            solve_func: QAFunc = solve_with_knowledge_with_llama
 
         case (False, True, True):
-            solve_func: QAFunc = solve_with_program_store_with_llama3
+            solve_func: QAFunc = solve_with_program_store_with_llama
 
         case (True, True, True):
-            solve_func: QAFunc = solve_with_knowledge_and_program_store_with_llama3
+            solve_func: QAFunc = solve_with_knowledge_and_program_store_with_llama
 
     if not (fb_id := args.fb_id).startswith(FB_ID_COL_NAME):
         fb_id: FbId = f'{FB_ID_COL_NAME}_{fb_id}'

diff --git a/examples/FinanceBench/data_and_knowledge.py b/examples/FinanceBench/data_and_knowledge.py
@@ -160,9 +160,9 @@ class Category(StrEnum):
 QS_BY_FB_ID: dict[FbId, Question] = META_DF.question.to_dict()
 
 
-LOCAL_CACHE_DIR_PATH: Path = Path(__file__).parent / '.data'
-LOCAL_CACHE_DOCS_DIR_PATH: Path = LOCAL_CACHE_DIR_PATH / 'docs'
-OUTPUT_FILE_PATH: Path = LOCAL_CACHE_DIR_PATH / 'output.csv'
+DATA_LOCAL_DIR_PATH: Path = Path(__file__).parent / '.data'
+DOCS_DATA_LOCAL_DIR_PATH: Path = DATA_LOCAL_DIR_PATH / 'docs'
+OUTPUT_FILE_PATH: Path = DATA_LOCAL_DIR_PATH / 'output.csv'
 
 
 GROUND_TRUTHS_FILE_PATH = Path(__file__).parent / 'ground-truths.yml'
@@ -179,7 +179,9 @@ class Category(StrEnum):
 CAT_DISTRIB: Counter[Category] = Counter(ground_truth['category'] for ground_truth in GROUND_TRUTHS.values())
 
 
-EXPERT_KNOWLEDGE_FILE_PATH: Path = Path(__file__).parent / 'expert-knowledge.txt'
+EXPERTISE_DIR_PATH: Path = Path(__file__).parent / 'expertise'
+
+EXPERT_KNOWLEDGE_FILE_PATH: Path = EXPERTISE_DIR_PATH / 'expert-knowledge.txt'
 with open(file=EXPERT_KNOWLEDGE_FILE_PATH,
           buffering=-1,
           encoding='utf-8',
@@ -189,8 +191,7 @@ class Category(StrEnum):
           opener=None) as f:
     EXPERT_KNOWLEDGE: str = f.read()
 
-
-EXPERT_PROGRAMS_FILE_PATH: Path = Path(__file__).parent / 'expert-programs.yml'
+EXPERT_PROGRAMS_FILE_PATH: Path = EXPERTISE_DIR_PATH / 'expert-programs.yml'
 with open(file=EXPERT_PROGRAMS_FILE_PATH,
           buffering=-1,
           encoding='utf-8',
@@ -251,7 +252,7 @@ def request(self) -> requests.Response:
 
     @cached_property
     def dir_path(self) -> Path:
-        dir_path: Path = LOCAL_CACHE_DOCS_DIR_PATH / self.name
+        dir_path: Path = DOCS_DATA_LOCAL_DIR_PATH / self.name
 
         if not (file_path := dir_path / f'{self.name}.pdf').is_file():
             dir_path.mkdir(parents=True, exist_ok=True)

diff --git a/examples/FinanceBench/eval.py b/examples/FinanceBench/eval.py
@@ -17,7 +17,7 @@
 # pylint: disable=wrong-import-order
 from data_and_knowledge import (FbId, Question, Answer, Category, GroundTruth,
                                 FB_ID_COL_NAME, GROUND_TRUTHS, N_CASES, CAT_DISTRIB,
-                                LOCAL_CACHE_DIR_PATH, OUTPUT_FILE_PATH, get_or_create_output_df)
+                                DATA_LOCAL_DIR_PATH, OUTPUT_FILE_PATH, get_or_create_output_df)
 from log import switch_log_file
 
 if TYPE_CHECKING:
@@ -200,7 +200,7 @@ def eval_accuracy_and_consistency_wrt_ground_truths(output_name: str, output_fil
     n_yes_scores_by_fb_id: defaultdict = defaultdict(int)
     incorrect_answer_fb_ids: dict[FbId, str] = {}
 
-    for output_df in (read_csv(LOCAL_CACHE_DIR_PATH / output_file_name, index_col=FB_ID_COL_NAME)
+    for output_df in (read_csv(DATA_LOCAL_DIR_PATH / output_file_name, index_col=FB_ID_COL_NAME)
                       for output_file_name in output_file_names):
 
         for fb_id, correctness in output_df[correctness_col_name].items():

diff --git a/examples/FinanceBench/expert-knowledge.txt → ...nanceBench/expertise/expert-knowledge.txt b/examples/FinanceBench/expert-knowledge.txt → ...nanceBench/expertise/expert-knowledge.txt
diff --git a/examples/FinanceBench/expert-programs.yml → ...inanceBench/expertise/expert-programs.yml b/examples/FinanceBench/expert-programs.yml → ...inanceBench/expertise/expert-programs.yml
diff --git a/examples/FinanceBench/export-multi-runs.py b/examples/FinanceBench/export-multi-runs.py
@@ -2,7 +2,7 @@
 
 from pandas import DataFrame, read_csv
 
-from data_and_knowledge import FB_ID_COL_NAME, LOCAL_CACHE_DIR_PATH
+from data_and_knowledge import FB_ID_COL_NAME, DATA_LOCAL_DIR_PATH
 
 
 EXPORT_FILE_NAME: str = 'export-multi-runs.csv'
@@ -14,12 +14,12 @@
 args = arg_parser.parse_args()
 
 
-for i, df in enumerate(read_csv(LOCAL_CACHE_DIR_PATH / output_file_name, index_col=FB_ID_COL_NAME)
+for i, df in enumerate(read_csv(DATA_LOCAL_DIR_PATH / output_file_name, index_col=FB_ID_COL_NAME)
                        for output_file_name in args.output_file_names):
     if not i:
         export_df: DataFrame = df[['question']]
 
     export_df.loc[:, f'answer {i + 1}'] = df[args.output_name]  # pylint: disable=possibly-used-before-assignment
 
 
-export_df.to_csv(LOCAL_CACHE_DIR_PATH / EXPORT_FILE_NAME, index=True)
+export_df.to_csv(DATA_LOCAL_DIR_PATH / EXPORT_FILE_NAME, index=True)
diff --git a/examples/FinanceBench/make.bat b/examples/FinanceBench/make.bat
@@ -9,10 +9,10 @@ IF "%TARGET%"=="dana-solve" GOTO dana-solve
 IF "%TARGET%"=="dana-solve-w-prog-store" GOTO dana-solve-w-prog-store
 IF "%TARGET%"=="dana-solve-w-knowledge" GOTO dana-solve-w-knowledge
 IF "%TARGET%"=="dana-solve-w-knowledge-and-prog-store" GOTO dana-solve-w-knowledge-and-prog-store
-IF "%TARGET%"=="dana-solve-w-llama3" GOTO dana-solve-w-llama3
-IF "%TARGET%"=="dana-solve-w-prog-store-w-llama3" GOTO dana-solve-w-prog-store-w-llama3
-IF "%TARGET%"=="dana-solve-w-knowledge-w-llama3" GOTO dana-solve-w-knowledge-w-llama3
-IF "%TARGET%"=="dana-solve-w-knowledge-and-prog-store-w-llama3" GOTO dana-solve-w-knowledge-and-prog-store-w-llama3
+IF "%TARGET%"=="dana-solve-w-llama" GOTO dana-solve-w-llama
+IF "%TARGET%"=="dana-solve-w-prog-store-w-llama" GOTO dana-solve-w-prog-store-w-llama
+IF "%TARGET%"=="dana-solve-w-knowledge-w-llama" GOTO dana-solve-w-knowledge-w-llama
+IF "%TARGET%"=="dana-solve-w-knowledge-and-prog-store-w-llama" GOTO dana-solve-w-knowledge-and-prog-store-w-llama
 IF "%TARGET%"=="dana-solve-all-combos" GOTO dana-solve-all-combos
 
 IF "%TARGET%"=="langchain-react-solve" GOTO langchain-react-solve
@@ -47,31 +47,31 @@ IF "%TARGET%"=="streamlit-run" GOTO streamlit-run
   poetry run python dana.py %2 --knowledge --prog-store
   GOTO end
 
-:dana-solve-w-llama3
-  poetry run python dana.py %2 --llama3
+:dana-solve-w-llama
+  poetry run python dana.py %2 --llama
   GOTO end
 
-:dana-solve-w-knowledge-w-llama3
-  poetry run python dana.py %2 --knowledge --llama3
+:dana-solve-w-knowledge-w-llama
+  poetry run python dana.py %2 --knowledge --llama
   GOTO end
 
-:dana-solve-w-prog-store-w-llama3
-  poetry run python dana.py %2 --prog-store --llama3
+:dana-solve-w-prog-store-w-llama
+  poetry run python dana.py %2 --prog-store --llama
   GOTO end
 
-:dana-solve-w-knowledge-and-prog-store-w-llama3
-  poetry run python dana.py %2 --knowledge --prog-store --llama3
+:dana-solve-w-knowledge-and-prog-store-w-llama
+  poetry run python dana.py %2 --knowledge --prog-store --llama
   GOTO end
 
 :dana-solve-all-combos
   poetry run python dana.py %2
   poetry run python dana.py %2 --knowledge
   poetry run python dana.py %2 --prog-space
   poetry run python dana.py %2 --knowledge --prog-space
-  poetry run python dana.py %2 --llama3
-  poetry run python dana.py %2 --knowledge --llama3
-  poetry run python dana.py %2 --prog-space --llama3
-  poetry run python dana.py %2 --knowledge --prog-space --llama3
+  poetry run python dana.py %2 --llama
+  poetry run python dana.py %2 --knowledge --llama
+  poetry run python dana.py %2 --prog-space --llama
+  poetry run python dana.py %2 --knowledge --prog-space --llama
   GOTO end
 
 

diff --git a/examples/financial-research/dataproc.py b/examples/financial-research/dataproc.py
@@ -2,11 +2,11 @@
 from pathlib import Path
 
 
-LOCAL_CACHE_DIR_PATH: Path = Path(__file__).parent / '.data'
+DATA_LOCAL_DIR_PATH: Path = Path(__file__).parent / '.data'
 
 
 @cache
 def get_or_create_cached_dir_path(company: str) -> str:
-    dir_path: Path = LOCAL_CACHE_DIR_PATH / company
+    dir_path: Path = DATA_LOCAL_DIR_PATH / company
     dir_path.mkdir(parents=True, exist_ok=True)
     return str(dir_path)
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,6 +5,8 @@ @@
         "charliermarsh.ruff",  // Ruff
+        "davidanson.vscode-markdownlint",  // MarkdownLint
         "soulcode.vscode-unwanted-recommendations"  // Unwanted Recommendations
       ],
@@ Expand Down @@