accept separate matrix_str from HMI for dataset config

DARPA-ASKEM · Apr 3, 2024 · a1693f1 · a1693f1
1 parent d16fb2a
commit a1693f1
Show file tree

Hide file tree

Showing 4 changed files with 5 additions and 4 deletions.
diff --git a/core/entities.py b/core/entities.py
@@ -12,6 +12,7 @@ class ConfigureModel(BaseModel):
 class ConfigureModelDataset(BaseModel):
     datasets: List[str]
     amr: Dict  # expects AMR in JSON format
+    matrix_str: str
 
 
 class ModelCardModel(BaseModel):

diff --git a/core/openai/prompts/dataset_config.py b/core/openai/prompts/dataset_config.py
@@ -50,7 +50,7 @@
 	Only populate values from the datasets. If a value is not present in the datasets, use the default value from the model configuration, or null.
 	The datasets may be in the form of a matrix where the index and column names are useful for mapping the values of initials and parameters within the dataset to the model.
 	Your goal is to map the dataset to the model and provide the results of the mapping. If you cannot map the dataset to the model, use the string "null" as a placeholder.
-	Use the following model configuration as a reference: ---MODEL CONFIGURATION START---{amr}---MODEL CONFIGURATION END---.  ---DATASETS START---{datasets}---DATASETS END--. Ensure that the output follows the below model configuration and is JSON serializable.
+	Use the following model configuration as a reference: ---MODEL CONFIGURATION START---{amr}---MODEL CONFIGURATION END---\n --- MODEL MAPPING START --- {matrix_str} ---MODEL MAPPING END ---\n  ---DATASETS START---{datasets}---DATASETS END--. Ensure that the output follows the below model configuration and is JSON serializable.
 	Once you have enough information from loading the dataset in your scratchpad, populate the missing values in the configuration as your final answer. Parameters should follow the format:\n
 		values: [
 			{{"id": "str", "value": float, "type": "parameter" or "initial"}},

diff --git a/core/openai/tool_utils.py b/core/openai/tool_utils.py
@@ -144,12 +144,12 @@ def react_config_from_dataset(amr: str, dataset_path: str) -> str:
     return react_manager.run(query)
 
 
-def config_from_dataset(amr: str, datasets: List[str]) -> str:
+def config_from_dataset(amr: str, model_mapping: str, datasets: List[str]) -> str:
     dataset_text = ""
     for idx in range(len(datasets)):
         dataset_text += f"..dataset_{idx + 1} start..\n {datasets[idx]} \n...dataset_{idx + 1} end...\n"
 
-    prompt = DATASET_PROMPT.format(amr=amr, datasets=dataset_text)
+    prompt = DATASET_PROMPT.format(amr=amr, matrix_str=model_mapping, datasets=dataset_text)
     client = OpenAI()
     output = client.chat.completions.create(
         model="gpt-4-0125-preview",

diff --git a/tasks/dataset_configure.py b/tasks/dataset_configure.py
@@ -22,7 +22,7 @@ def main():
         amr = json.dumps(input_model.amr, separators=(",", ":"))
 
         taskrunner.log("Sending request to OpenAI API")
-        response = config_from_dataset(datasets=input_model.datasets, amr=amr)
+        response = config_from_dataset(datasets=input_model.datasets, model_mapping=input_model.matrix_str, amr=amr)
         taskrunner.log("Received response from OpenAI API")
 
         taskrunner.write_output_with_timeout({"response": response})