stream dataset strings directly into configure from dataset

DARPA-ASKEM · Feb 28, 2024 · cc2d6e3 · cc2d6e3
1 parent 131679e
commit cc2d6e3
Show file tree

Hide file tree

Showing 4 changed files with 28 additions and 5 deletions.
diff --git a/core/entities.py b/core/entities.py
@@ -10,7 +10,7 @@ class ConfigureModel(BaseModel):
 
 
 class ConfigureModelDataset(BaseModel):
-	url: str
+	datasets: str
 	amr: Dict  # expects AMR in JSON format
 
 

diff --git a/core/openai/prompts/dataset_config.py b/core/openai/prompts/dataset_config.py
@@ -1,8 +1,18 @@
-DATASET_PROMPT = """
+REACT_DATASET_PROMPT = """
 	You are a helpful agent that will accept a representation of a mathematical model and a dataset which contains parameter values which must be mapped into the mathematical model. You have access to tools which can help you fetch and manipulate datasets.
 	The mathematical models may be represented in petri nets, regnets, stockflow models, or other model formats. The dataset may be in the form of a matrix where the index and column names are useful for mapping the values, or parameters, within the dataset to the model.
 	Your goal is to map the dataset to the model and provide the results of the mapping. If you cannot map the dataset to the model, use the string "null" as a placeholder.
 	Use the following model configuration as a reference: ---MODEL CONFIGURATION START---{amr}---MODEL CONFIGURATION END---. --PATH TO DATASET START---{dataset_path}---PATH TO DATASET END--. Ensure that the output follows the below model configuration and is JSON serializable.
-	Once you have enough information from loading the dataset in your scratchpad, populate the missing values in the configuration as your final answer. Only write the parameter section of the configuration, not the entire configuration. Pay close attention to which parameter corresponds to which linkage::
+	Once you have enough information from loading the dataset in your scratchpad, populate the missing values in the configuration as your final answer. Only write the parameter section of the configuration, not the entire configuration. Pay close attention to which parameter corresponds to which linkage:
+	{{
+
+"""
 
+DATASET_PROMPT = """
+	You are a helpful agent that will accept a representation of a mathematical model and a dataset which contains parameter values which must be mapped into the mathematical model. The mathematical models may be represented in petri nets, regnets, stockflow models, or other model formats. The dataset may be in the form of a matrix where the index and column names are useful for mapping the values, or parameters, within the dataset to the model.
+	Your goal is to map the dataset to the model and provide the results of the mapping. If you cannot map the dataset to the model, use the string "null" as a placeholder.
+	Use the following model configuration as a reference: ---MODEL CONFIGURATION START---{amr}---MODEL CONFIGURATION END---. --DATASETS START---{dataset_path}---DATASETS END--. Ensure that the output follows the below model configuration and is JSON serializable.
+	Once you have enough information from loading the dataset in your scratchpad, populate the missing values in the configuration as your final answer. Only write the parameter section of the configuration, not the entire configuration. Pay close attention to which parameter corresponds to which linkage. Do not generate markdown.:
+	{{
 """
+
diff --git a/core/openai/tool_utils.py b/core/openai/tool_utils.py
@@ -116,12 +116,25 @@ def embedding_chain(text: str) -> List:
 	output = client.embeddings.create(model="text-embedding-ada-002", input=text)
 	return output.data[0].embedding
 
-def config_from_dataset(amr: str, dataset_path: str) -> str:
+def react_config_from_dataset(amr: str, dataset_path: str) -> str:
 	agent = OpenAIAgent(DatasetConfig)
 	react_manager = ReActManager(agent, executor=AgentExecutor(toolset=DatasetConfig))
 	query = DATASET_PROMPT.format(amr=amr, dataset_path=dataset_path)
 	return react_manager.run(query)
 
+def config_from_dataset(amr: str, datasets: str) -> str:
+	prompt = DATASET_PROMPT.format(amr=amr, dataset_path=datasets)
+	client = OpenAI()
+	output = client.chat.completions.create(
+		model="gpt-4-0125-preview",
+		top_p=0,
+		max_tokens=4000,
+		messages=[
+			{"role": "user", "content": prompt},
+		],
+		)
+	return json.loads(output.choices[0].message.content)
+
 def compare_models(model_cards: List[str]) -> str:
 	prompt = MODEL_METADATA_COMPARE_PROMPT.format(model_cards="--------".join(model_cards))
 	client = OpenAI()

diff --git a/tasks/dataset_configure.py b/tasks/dataset_configure.py
@@ -22,7 +22,7 @@ def main():
 
         taskrunner.log("Sending request to OpenAI API")
         response = config_from_dataset(
-            dataset_path=input_model.url, amr=amr
+            datasets=input_model.datasets, amr=amr
         )
         taskrunner.log("Received response from OpenAI API")