diff --git a/core/entities.py b/core/entities.py index 78326fb..22513ab 100644 --- a/core/entities.py +++ b/core/entities.py @@ -12,6 +12,7 @@ class ConfigureModel(BaseModel): class ConfigureModelDataset(BaseModel): datasets: List[str] amr: Dict # expects AMR in JSON format + matrix_str: str class ModelCardModel(BaseModel): diff --git a/core/openai/prompts/dataset_config.py b/core/openai/prompts/dataset_config.py index 7fe5eeb..df7bcd6 100644 --- a/core/openai/prompts/dataset_config.py +++ b/core/openai/prompts/dataset_config.py @@ -50,7 +50,7 @@ Only populate values from the datasets. If a value is not present in the datasets, use the default value from the model configuration, or null. The datasets may be in the form of a matrix where the index and column names are useful for mapping the values of initials and parameters within the dataset to the model. Your goal is to map the dataset to the model and provide the results of the mapping. If you cannot map the dataset to the model, use the string "null" as a placeholder. - Use the following model configuration as a reference: ---MODEL CONFIGURATION START---{amr}---MODEL CONFIGURATION END---. ---DATASETS START---{datasets}---DATASETS END--. Ensure that the output follows the below model configuration and is JSON serializable. + Use the following model configuration as a reference: ---MODEL CONFIGURATION START---{amr}---MODEL CONFIGURATION END---\n --- MODEL MAPPING START --- {matrix_str} ---MODEL MAPPING END ---\n ---DATASETS START---{datasets}---DATASETS END--. Ensure that the output follows the below model configuration and is JSON serializable. Once you have enough information from loading the dataset in your scratchpad, populate the missing values in the configuration as your final answer. Parameters should follow the format:\n values: [ {{"id": "str", "value": float, "type": "parameter" or "initial"}}, diff --git a/core/openai/tool_utils.py b/core/openai/tool_utils.py index ddcd8a9..439fa7c 100644 --- a/core/openai/tool_utils.py +++ b/core/openai/tool_utils.py @@ -144,12 +144,12 @@ def react_config_from_dataset(amr: str, dataset_path: str) -> str: return react_manager.run(query) -def config_from_dataset(amr: str, datasets: List[str]) -> str: +def config_from_dataset(amr: str, model_mapping: str, datasets: List[str]) -> str: dataset_text = "" for idx in range(len(datasets)): dataset_text += f"..dataset_{idx + 1} start..\n {datasets[idx]} \n...dataset_{idx + 1} end...\n" - prompt = DATASET_PROMPT.format(amr=amr, datasets=dataset_text) + prompt = DATASET_PROMPT.format(amr=amr, matrix_str=model_mapping, datasets=dataset_text) client = OpenAI() output = client.chat.completions.create( model="gpt-4-0125-preview", diff --git a/tasks/dataset_configure.py b/tasks/dataset_configure.py index ca9bbd7..095df79 100644 --- a/tasks/dataset_configure.py +++ b/tasks/dataset_configure.py @@ -22,7 +22,7 @@ def main(): amr = json.dumps(input_model.amr, separators=(",", ":")) taskrunner.log("Sending request to OpenAI API") - response = config_from_dataset(datasets=input_model.datasets, amr=amr) + response = config_from_dataset(datasets=input_model.datasets, model_mapping=input_model.matrix_str, amr=amr) taskrunner.log("Received response from OpenAI API") taskrunner.write_output_with_timeout({"response": response})