diff --git a/Makefile b/Makefile
index d02d7bd61..19e244e47 100644
--- a/Makefile
+++ b/Makefile
@@ -41,10 +41,10 @@ install:
 		--with=dev --with=docs --with=lint --with=test
 
 install-editable:
-	@python3 -m pip install -e ".[contrib]" --upgrade
+	@python3 -m pip install -e ".[contrib, langchain]" --upgrade
 
 install-editable-mac-sys:
-	@python3 -m pip install -e ".[contrib]" --upgrade --user --break-system-packages
+	@python3 -m pip install -e ".[contrib, langchain]" --upgrade --user --break-system-packages
 
 
 # LINTING
diff --git a/examples/FinanceBench/Makefile b/examples/FinanceBench/Makefile
index 339e0e715..66a41df49 100644
--- a/examples/FinanceBench/Makefile
+++ b/examples/FinanceBench/Makefile
@@ -33,6 +33,9 @@ agent-solve-all-combos:
 	@poetry run python htp_oodar_agent.py ${id} --knowledge --prog-store --llama3
 
 
+langchain-react-solve:
+	@poetry run python langchain_react.py ${id}
+
 openai-assist:
 	@poetry run python openai_assist.py ${id}
 
diff --git a/examples/FinanceBench/eval.py b/examples/FinanceBench/eval.py
index 575943f4b..77f491f4f 100644
--- a/examples/FinanceBench/eval.py
+++ b/examples/FinanceBench/eval.py
@@ -17,7 +17,7 @@
 # pylint: disable=wrong-import-order
 from data_and_knowledge import (FbId, Question, Answer, Category, GroundTruth,
                                 FB_ID_COL_NAME, GROUND_TRUTHS, N_CASES, CAT_DISTRIB,
-                                OUTPUT_FILE_PATH, get_or_create_output_df)
+                                LOCAL_CACHE_DIR_PATH, OUTPUT_FILE_PATH, get_or_create_output_df)
 from log import switch_log_file
 
 if TYPE_CHECKING:
@@ -191,6 +191,86 @@ def compare_eval(output_name: str, baseline_output_name: str = 'RAG-Default'):
                          ['doc_name', 'category', baseline_output_name, output_name]]
 
 
+def eval_accuracy_and_consistency_wrt_ground_truths(output_name: str, output_file_names: list[str]):
+    # pylint: disable=too-many-locals
+
+    n_output_files: int = len(output_file_names)
+    correctness_col_name: str = f'{output_name}---CORRECTNESS'
+
+    n_yes_scores_by_fb_id: defaultdict = defaultdict(int)
+    incorrect_answer_fb_ids: dict[FbId, str] = {}
+
+    for output_df in (read_csv(LOCAL_CACHE_DIR_PATH / output_file_name, index_col=FB_ID_COL_NAME)
+                      for output_file_name in output_file_names):
+
+        for fb_id, correctness in output_df[correctness_col_name].items():
+            ground_truth: GroundTruth = GROUND_TRUTHS[fb_id]
+
+            if notna(correctness) and correctness:
+                n_yes_scores_by_fb_id[fb_id] += 1
+
+            else:
+                incorrect_answer_fb_ids[fb_id]: str = ('expert answer inadequate'
+                                                       if ground_truth.get('answer-inadequate')
+                                                       else ('evaluator unreliable'
+                                                             if ground_truth.get('evaluator-unreliable')
+                                                             else ''))
+
+    cumu_avg_accuracy_scores_by_category: defaultdict = defaultdict(int)
+    cumu_consistency_scores_by_category: defaultdict = defaultdict(float)
+
+    for fb_id, ground_truth in GROUND_TRUTHS.items():
+        cumu_avg_accuracy_scores_by_category[cat := ground_truth['category']] += (a := n_yes_scores_by_fb_id[fb_id] / n_output_files)
+        cumu_consistency_scores_by_category[cat] += 2 * abs(a - 0.5)
+
+    print(f'TOTAL CORRECT: {(n := sum(cumu_avg_accuracy_scores_by_category.values()))} / {N_CASES} = {n / N_CASES:.1%}')
+
+    pprint({category: (f'{(n := cumu_avg_accuracy_scores_by_category[category])} / {n_for_category} '
+                       f'= {n / n_for_category:.1%}')
+            for category, n_for_category in CAT_DISTRIB.items()})
+
+    pprint({
+        'EASY': (f'{(e := sum(cumu_avg_accuracy_scores_by_category[easy_cat]
+                              for easy_cat in (Category.RETRIEVE, Category.COMPARE, Category.CALC_CHANGE)))} / '
+                 f'{(se := sum(CAT_DISTRIB[easy_cat]
+                               for easy_cat in (Category.RETRIEVE, Category.COMPARE, Category.CALC_CHANGE)))} '
+                 f'= {e / se:.1%}'),
+
+        'HARD': (f'{(h := sum(cumu_avg_accuracy_scores_by_category[hard_cat]
+                              for hard_cat in (Category.CALC_COMPLEX, Category.CALC_AND_JUDGE,
+                                               Category.EXPLAIN_FACTORS, Category.OTHER_ADVANCED)))} / '
+                 f'{(sh := sum(CAT_DISTRIB[hard_cat]
+                               for hard_cat in (Category.CALC_COMPLEX, Category.CALC_AND_JUDGE,
+                                                Category.EXPLAIN_FACTORS, Category.OTHER_ADVANCED)))} '
+                 f'= {h / sh:.1%}')
+    })
+
+    print(f'\nTOTAL CONSISTENT: {(n := sum(cumu_consistency_scores_by_category.values()))} / {N_CASES} = {n / N_CASES:.1%}')
+
+    pprint({category: (f'{(n := cumu_consistency_scores_by_category[category])} / {n_for_category} '
+                       f'= {n / n_for_category:.1%}')
+            for category, n_for_category in CAT_DISTRIB.items()})
+
+    pprint({
+        'EASY': (f'{(e := sum(cumu_consistency_scores_by_category[easy_cat]
+                              for easy_cat in (Category.RETRIEVE, Category.COMPARE, Category.CALC_CHANGE)))} / '
+                 f'{(se := sum(CAT_DISTRIB[easy_cat]
+                               for easy_cat in (Category.RETRIEVE, Category.COMPARE, Category.CALC_CHANGE)))} '
+                 f'= {e / se:.1%}'),
+
+        'HARD': (f'{(h := sum(cumu_consistency_scores_by_category[hard_cat]
+                              for hard_cat in (Category.CALC_COMPLEX, Category.CALC_AND_JUDGE,
+                                               Category.EXPLAIN_FACTORS, Category.OTHER_ADVANCED)))} / '
+                 f'{(sh := sum(CAT_DISTRIB[hard_cat]
+                               for hard_cat in (Category.CALC_COMPLEX, Category.CALC_AND_JUDGE,
+                                                Category.EXPLAIN_FACTORS, Category.OTHER_ADVANCED)))} '
+                 f'= {h / sh:.1%}')
+    })
+
+    print('\nINCORRECT:')
+    pprint(incorrect_answer_fb_ids)
+
+
 if __name__ == '__main__':
     arg_parser = argparse.ArgumentParser()
 
diff --git a/examples/FinanceBench/export-multi-runs.py b/examples/FinanceBench/export-multi-runs.py
new file mode 100644
index 000000000..d48aa7a1b
--- /dev/null
+++ b/examples/FinanceBench/export-multi-runs.py
@@ -0,0 +1,25 @@
+from argparse import ArgumentParser
+
+from pandas import DataFrame, read_csv
+
+from data_and_knowledge import FB_ID_COL_NAME, LOCAL_CACHE_DIR_PATH
+
+
+EXPORT_FILE_NAME: str = 'export-multi-runs.csv'
+
+
+arg_parser = ArgumentParser()
+arg_parser.add_argument('output_name')
+arg_parser.add_argument('output_file_names', nargs='+')
+args = arg_parser.parse_args()
+
+
+for i, df in enumerate(read_csv(LOCAL_CACHE_DIR_PATH / output_file_name, index_col=FB_ID_COL_NAME)
+                       for output_file_name in args.output_file_names):
+    if not i:
+        export_df: DataFrame = df[['question']]
+
+    export_df.loc[:, f'answer {i + 1}'] = df[args.output_name]  # pylint: disable=possibly-used-before-assignment
+
+
+export_df.to_csv(LOCAL_CACHE_DIR_PATH / EXPORT_FILE_NAME, index=True)
diff --git a/examples/FinanceBench/ground-truths.yml b/examples/FinanceBench/ground-truths.yml
index a9b417862..17ae0078d 100644
--- a/examples/FinanceBench/ground-truths.yml
+++ b/examples/FinanceBench/ground-truths.yml
@@ -545,7 +545,7 @@ financebench_id_01319:
   evaluator-unreliable: true
 
 
-financebench_id_00540:  # TODO: retrieve COGS
+financebench_id_00540:
   sector: Utilities
 
   company: AES Corporation
@@ -769,7 +769,9 @@ financebench_id_01935:
 
   category: 0-RETRIEVE
   correctness: >-
-    the answer mentions supplemental indentures related to debt
+    the answer mentions indenture(s)
+
+  evaluator-unreliable: true
 
 
 financebench_id_00799:
@@ -945,6 +947,8 @@ financebench_id_01928:
     2018 million, 2.018 billion,
     2000 million or 2 billion
 
+  evaluator-unreliable: true
+
 
 financebench_id_01930:
   sector: Materials
@@ -1094,9 +1098,9 @@ financebench_id_01198:
   category: 0-RETRIEVE
   correctness: |-
     the answer mentions at least 2 of the following:
-    - EPYC server processors;
-    - Gaming; and
-    - inclusion of Xilinx
+    - "Data Center" and/or "EPYC";
+    - "Gaming" and/or "semi-custom"; and
+    - "Embedded" and/or "Xilinx"
 
   evaluator-unreliable: true
 
@@ -1228,6 +1232,8 @@ financebench_id_00476:
     the answer concludes that there are no debt securities traded,
     or, alternatively, that no such debt securities are explicitly reported
 
+  evaluator-unreliable: true
+
 
 financebench_id_01028:
   sector: Financials
@@ -1338,7 +1344,10 @@ financebench_id_01351:
 
   category: 2-CALC-CHANGE
   correctness: >-
-    the answer says Effective Tax Rate changed from 24.6% to 21.6%, and/or that it decreased by 3 pencentage points
+    the answer says Effective Tax Rate changed from 24.6% to 21.6%,
+    and/or that it decreased by 3 pencentage points or 3%
+
+  evaluator-unreliable: true
 
 
 financebench_id_01964:
@@ -1472,8 +1481,7 @@ financebench_id_00070:
     data? If working capital is not a useful or relevant metric for this company,
     then please state that and explain why.
 
-  answer: Yes. American Water Works had postivie working capital of $ 124Mn by FY
-    2022.
+  answer: No, American Water Works had negative working capital of -$1561M in FY 2022.
   justification: 'Accounts receivable+Income tax receivable+Unbilled revenues+Materials
     and supplies+other-Accounts payable-Accrued liabilities-Accrued taxes
 
@@ -1484,7 +1492,9 @@ financebench_id_00070:
   category: 3-CALC-COMPLEX
   correctness: >-
     the answer contains a calculated (Net) Working Capital metric value in dollars
-  answer-inadequate: true
+    that is NEGATIVE and equivalent to or approximately equal to
+    minus/negative 1561, minus/negative 1561 million, minus/negative 1.561 billion,
+    minus/negative 1600, minus/negative 1600 million or minus/negative 1.6 billion
 
   evaluator-unreliable: true
 
@@ -1583,7 +1593,7 @@ financebench_id_00685:
   category: 4-CALC-AND-JUDGE
   correctness: >-
     the answer contains calculated Gross Margin
-    percentage values for 2022 and 2023 that are within 2 percentage points of each other,
+    percentage values for 2022 and 2023 that are within 2 percentage points (or 2%) of each other,
     or, alternatively, calculated decimal values that are within 0.02 of each other
   answer-inadequate: true
 
@@ -1909,7 +1919,7 @@ financebench_id_01091:
   evaluator-unreliable: true
 
 
-financebench_id_00678:  # tricky: Gross Income is implicit, with missing label
+financebench_id_00678:  # note: Gross Income is implicit, with missing label
   sector: Industrials
 
   company: Boeing
@@ -2266,7 +2276,7 @@ financebench_id_01346:
   category: 2-CALC-CHANGE
   correctness: >-
     the answer says that Effective Tax Rate changed from approximately 20% to approximately 23%,
-    and/or that it increased by approximately 3 percentage points
+    and/or that it increased by approximately 3 percentage points or 3%
 
   evaluator-unreliable: true
 
@@ -2777,7 +2787,7 @@ financebench_id_00711:
   evaluator-unreliable: true
 
 
-financebench_id_00651:
+financebench_id_00651:  # TODO: retrieve growth rates
   sector: Health Care
 
   company: Johnson & Johnson
@@ -2826,6 +2836,8 @@ financebench_id_01484:
   correctness: >-
     the answer mentions US sales increased and international sales decreased
 
+  evaluator-unreliable: true
+
 
 financebench_id_01488:
   sector: Health Care
@@ -3191,7 +3203,7 @@ financebench_id_03718:
   evaluator-unreliable: true
 
 
-financebench_id_04171:  # TODO: retrieve Accounts Payable
+financebench_id_04171:
   sector: Consumer Discretionary
 
   company: MGM Resorts
@@ -3218,6 +3230,8 @@ financebench_id_04171:  # TODO: retrieve Accounts Payable
     303, 303 million, 0.303 billion,
     300, 300 million or 0.3 billion
 
+  evaluator-unreliable: true
+
 
 financebench_id_03849:
   sector: Consumer Discretionary
@@ -3481,7 +3495,7 @@ financebench_id_04458:
     (if the answer is a single number, assume that it is that calculated EBITDA Margin metric value)
 
 
-financebench_id_03282:  # TODO: retrieve Total Current Liabilities
+financebench_id_03282:
   sector: Communication Services
 
   company: Netflix
@@ -3508,6 +3522,8 @@ financebench_id_03282:  # TODO: retrieve Total Current Liabilities
     5466, 5466 million, 5.466 billion,
     5500, 5500 million or 5.5 billion
 
+  evaluator-unreliable: true
+
 
 financebench_id_04302:
   sector: Consumer Discretionary
@@ -4005,7 +4021,9 @@ financebench_id_01476:
 
   category: 2-CALC-CHANGE
   correctness: >-
-    the answer mentions growth guidance raised from 8% to 9%, and/or growth guidance raised by 1 percentage point
+    the answer mentions growth guidance raised from 8% to 9%, and/or growth guidance raised by 1 percentage point or 1%
+
+  evaluator-unreliable: true
 
 
 financebench_id_00302:
@@ -4080,7 +4098,7 @@ financebench_id_02416:  # note: Therachon is mentioned on separate following pag
 
   category: 0-RETRIEVE
   correctness: >-
-    the answer mentions Arena and Trillium
+    the answer mentions Trillium and Array
 
 
 financebench_id_00283:
diff --git a/examples/FinanceBench/langchain_react.py b/examples/FinanceBench/langchain_react.py
new file mode 100644
index 000000000..4d8ccee1b
--- /dev/null
+++ b/examples/FinanceBench/langchain_react.py
@@ -0,0 +1,75 @@
+from __future__ import annotations
+
+from argparse import ArgumentParser
+from functools import cache
+from typing import TYPE_CHECKING
+
+from langchain import hub
+from langchain.agents.agent import AgentExecutor
+from langchain.agents.react.agent import create_react_agent
+from langchain_community.document_loaders import PyPDFLoader
+from langchain_community.tools.vectorstore.tool import VectorStoreQATool
+from langchain_community.vectorstores.faiss import FAISS
+from langchain_openai.embeddings.base import OpenAIEmbeddings
+from langchain_openai.chat_models.base import ChatOpenAI
+from langchain_text_splitters.character import RecursiveCharacterTextSplitter
+
+from data_and_knowledge import DocName, FbId, Answer, Doc, FB_ID_COL_NAME, DOC_NAMES_BY_FB_ID, QS_BY_FB_ID
+from util import enable_batch_qa_and_eval, log_qa_and_update_output_file
+
+from openssa.core.util.lm.config import LMConfig
+
+if TYPE_CHECKING:
+    from langchain_core.documents.base import Document
+    from langchain_core.embeddings.embeddings import Embeddings
+    from langchain_core.language_models.llms import BaseLLM
+    from langchain_core.tools import BaseTool
+    from langchain_core.vectorstores.base import VectorStore
+
+
+EMBED_MODEL: Embeddings = OpenAIEmbeddings(model='text-embedding-3-large', dimensions=3072, chunk_size=2048)
+LLM: BaseLLM = ChatOpenAI(model_name='gpt-4o', temperature=0, seed=LMConfig.DEFAULT_SEED, n=1, max_tokens=2048)
+
+REACT_PROMPT_TEMPLATE: str = hub.pull('hwchase17/react')
+
+
+@cache
+def get_or_create_react_agent_executor(doc_name: DocName):
+    doc: Doc = Doc(name=doc_name)
+
+    tools: list[BaseTool] = [
+        VectorStoreQATool(
+            name=doc_name,
+            description=f'{doc.type} SEC Filing by {doc.company} for financial period {doc.period}',
+            vectorstore=FAISS.from_documents(
+                documents=(PyPDFLoader(file_path=doc.file_path)
+                           .load_and_split(text_splitter=RecursiveCharacterTextSplitter())),
+                embedding=EMBED_MODEL),
+            llm=LLM)
+    ]
+
+    return AgentExecutor(agent=create_react_agent(llm=LLM, tools=tools, prompt=REACT_PROMPT_TEMPLATE),
+                         tools=tools,
+                         return_intermediate_steps=True,
+                         max_iterations=15,
+                         max_execution_time=None,
+                         early_stopping_method='force',  # TODO: 'generate'
+                         handle_parsing_errors=True,
+                         trim_intermediate_steps=-1)
+
+
+@enable_batch_qa_and_eval(output_name='LangChain-ReAct')
+@log_qa_and_update_output_file(output_name='LangChain-ReAct')
+def solve(fb_id: FbId) -> Answer:
+    return (get_or_create_react_agent_executor(doc_name=DOC_NAMES_BY_FB_ID[fb_id])
+            .invoke({'input': QS_BY_FB_ID[fb_id]})['output'])
+
+
+if __name__ == '__main__':
+    arg_parser = ArgumentParser()
+    arg_parser.add_argument('fb_id')
+    args = arg_parser.parse_args()
+
+    solve(fb_id
+          if (fb_id := args.fb_id).startswith(FB_ID_COL_NAME)
+          else f'{FB_ID_COL_NAME}_{fb_id}')
diff --git a/examples/FinanceBench/make.bat b/examples/FinanceBench/make.bat
index 309bc8304..67f51a9bb 100644
--- a/examples/FinanceBench/make.bat
+++ b/examples/FinanceBench/make.bat
@@ -15,6 +15,7 @@ IF "%TARGET%"=="agent-solve-w-knowledge-w-llama3" GOTO agent-solve-w-knowledge-w
 IF "%TARGET%"=="agent-solve-w-knowledge-and-prog-store-w-llama3" GOTO agent-solve-w-knowledge-and-prog-store-w-llama3
 IF "%TARGET%"=="agent-solve-all-combos" GOTO agent-solve-all-combos
 
+IF "%TARGET%"=="langchain-react-solve" GOTO langchain-react-solve
 IF "%TARGET%"=="openai-assist" GOTO openai-assist
 
 IF "%TARGET%"=="rag-default-answer" GOTO rag-default-answer
@@ -74,6 +75,10 @@ IF "%TARGET%"=="streamlit-run" GOTO streamlit-run
   GOTO end
 
 
+:langchain-react-solve
+  poetry run python langchain_react.py %2
+  GOTO end
+
 :openai-assist
   poetry run python openai_assist.py %2
   GOTO end
diff --git a/make.bat b/make.bat
index 9bc169a38..28555adb2 100644
--- a/make.bat
+++ b/make.bat
@@ -56,7 +56,7 @@ IF "%TARGET%"=="launch-solver" GOTO launch-solver
   GOTO end
 
 :install-editable
-  python3 -m pip install -e ".[contrib]" --upgrade --user
+  python3 -m pip install -e ".[contrib, langchain]" --upgrade --user
   GOTO end
 
 
diff --git a/pyproject.toml b/pyproject.toml
index fccca245e..d5d92a4a9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -99,6 +99,9 @@ streamlit = {version = ">=1.38", optional = true}
 streamlit-extras = {version = ">=0.4", optional = true}
 streamlit-mic-recorder = {version = ">=0.0.8", optional = true}
 
+langchainhub = ">=0.1"
+faiss-cpu = ">=1.8"
+
 [tool.poetry.extras]
 contrib = [
   "streamlit",
@@ -106,6 +109,11 @@ contrib = [
   "streamlit-mic-recorder",
 ]
 
+langchain = [
+  "langchainhub",
+  "faiss-cpu",
+]
+
 
 [build-system]
 build-backend = "poetry.core.masonry.api"
@@ -128,6 +136,7 @@ disable = [
   "missing-class-docstring",
   "missing-function-docstring",
   "missing-module-docstring",
+  "no-name-in-module",
   "raw-checker-failed",
   "redefined-outer-name",
   "relative-beyond-top-level",