DARPA-ASKEM · dgauldie · Aug 20, 2024 · Aug 14, 2024 · Aug 14, 2024 · Aug 14, 2024
diff --git a/.editorconfig b/.editorconfig
@@ -24,3 +24,12 @@ end_of_line = crlf
 
 [*.gradle]
 indent_size = 4
+
+[*.json]
+indent_size = 2
+indent_style = space
+
+[*.py]
+indent_size = 4
+indent_style = space
+
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.10-slim
+FROM python:3.11-slim
 
 WORKDIR /usr/src/app
 
@@ -10,4 +10,4 @@ RUN pip install --no-cache-dir -r requirements.txt
 
 COPY . .
 
-CMD ["uvicorn", "api.run:app", "--host", "0.0.0.0", "--port", "8000"]
+CMD ["uvicorn", "api.run:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/gollm/openai/prompts/configuration.json b/gollm/openai/prompts/configuration.json
@@ -0,0 +1,274 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$defs": {
+    "modelConfiguration": {
+      "type": "object",
+      "properties": {
+        "description": {
+          "type": [
+            "string",
+            "null"
+          ]
+        },
+        "inferredParameterList": {
+          "type": [
+            "array",
+            "null"
+          ],
+          "items": {
+            "type": "object",
+            "properties": {
+              "distribution": {
+                "type": "object",
+                "properties": {
+                  "parameters": {
+                    "type": "object",
+                    "properties": {
+                      "value": {
+                        "type": [
+                          "number",
+                          "null"
+                        ]
+                      },
+                      "minimum": {
+                        "type": [
+                          "number",
+                          "null"
+                        ]
+                      },
+                      "maximum": {
+                        "type": [
+                          "number",
+                          "null"
+                        ]
+                      }
+                    },
+                    "required": [
+                      "value",
+                      "minimum",
+                      "maximum"
+                    ],
+                    "additionalProperties": false
+                  },
+                  "type": {
+                    "type": "string"
+                  }
+                },
+                "required": [
+                  "parameters",
+                  "type"
+                ],
+                "additionalProperties": false
+              },
+              "referenceId": {
+                "type": "string"
+              },
+              "source": {
+                "type": "string"
+              },
+              "type": {
+                "type": "string",
+                "enum": [
+                  "initial",
+                  "parameter",
+                  "observable",
+                  "inferred"
+                ]
+              }
+            },
+            "required": [
+              "distribution",
+              "referenceId",
+              "source",
+              "type"
+            ],
+            "additionalProperties": false
+          }
+        },
+        "initialSemanticList": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "expression": {
+                "type": "string"
+              },
+              "expressionMathml": {
+                "type": "string"
+              },
+              "source": {
+                "type": "string"
+              },
+              "target": {
+                "type": "string"
+              },
+              "type": {
+                "type": "string",
+                "enum": [
+                  "initial",
+                  "parameter",
+                  "observable",
+                  "inferred"
+                ]
+              }
+            },
+            "required": [
+              "expression",
+              "expressionMathml",
+              "source",
+              "target",
+              "type"
+            ],
+            "additionalProperties": false
+          }
+        },
+        "modelId": {
+          "type": "string"
+        },
+        "name": {
+          "type": "string"
+        },
+        "observableSemanticList": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "expression": {
+                "type": "string"
+              },
+              "expressionMathml": {
+                "type": "string"
+              },
+              "referenceId": {
+                "type": "string"
+              },
+              "source": {
+                "type": "string"
+              },
+              "states": {
+                "type": "array",
+                "items": {
+                  "type": "string"
+                }
+              },
+              "type": {
+                "type": "string",
+                "enum": [
+                  "initial",
+                  "parameter",
+                  "observable",
+                  "inferred"
+                ]
+              }
+            },
+            "required": [
+              "expression",
+              "expressionMathml",
+              "referenceId",
+              "source",
+              "states",
+              "type"
+            ],
+            "additionalProperties": false
+          }
+        },
+        "parameterSemanticList": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "distribution": {
+                "type": "object",
+                "properties": {
+                  "parameters": {
+                    "type": "object",
+                    "properties": {
+                      "value": {
+                        "type": [
+                          "number",
+                          "null"
+                        ]
+                      },
+                      "minimum": {
+                        "type": [
+                          "number",
+                          "null"
+                        ]
+                      },
+                      "maximum": {
+                        "type": [
+                          "number",
+                          "null"
+                        ]
+                      }
+                    },
+                    "required": [
+                      "value",
+                      "minimum",
+                      "maximum"
+                    ],
+                    "additionalProperties": false
+                  },
+                  "type": {
+                    "type": "string"
+                  }
+                },
+                "required": [
+                  "parameters",
+                  "type"
+                ],
+                "additionalProperties": false
+              },
+              "referenceId": {
+                "type": "string"
+              },
+              "source": {
+                "type": "string"
+              },
+              "type": {
+                "type": "string",
+                "enum": [
+                  "initial",
+                  "parameter",
+                  "observable",
+                  "inferred"
+                ]
+              }
+            },
+            "required": [
+              "distribution",
+              "referenceId",
+              "source",
+              "type"
+            ],
+            "additionalProperties": false
+          }
+        }
+      },
+      "required": [
+        "description",
+        "inferredParameterList",
+        "initialSemanticList",
+        "modelId",
+        "name",
+        "observableSemanticList",
+        "parameterSemanticList"
+      ],
+      "additionalProperties": false
+    }
+  },
+  "type": "object",
+  "properties": {
+    "conditions": {
+      "type": "array",
+      "items": {
+        "$ref": "#/$defs/modelConfiguration"
+      },
+      "additionalProperties": false
+    }
+  },
+  "required": [
+    "conditions"
+  ],
+  "additionalProperties": false
+}
diff --git a/gollm/openai/prompts/petrinet_config.py b/gollm/openai/prompts/petrinet_config.py
@@ -1,39 +1,42 @@
 PETRINET_PROMPT = """
-      You are a helpful agent designed to find initial parameters for a given petri net model file and a given research paper describing the mathematical model.
-      Use the following petri net json file as a reference: {petrinet}.
-	  Do not respond in full sentences, only populate the JSON output with conditions and parameters.
-      Assume that parameter fields with missing values may have multiple different sets values discussed in the user provided text for different conditions.\n
-      Return the different sets of initial parameters for the petri net model file like so:
-	  {{"conditions": {{"condition_1": "description of condition_1", "condition_2": "description of condition_2", ...}},
-	  "parameters": [{{
-		"id": "beta",
-		"name": "β",
-		"value": {{'condition_1': 0.1, 'condition_2': 0.2}},
-		"description": "infection rate",
-		"units": {{ "expression": "1/(person*day)", "expression_mathml": "<apply><divide/><cn>1</cn><apply><times/><ci>person</ci><ci>day</ci></apply></apply>" }}
-		"distribution": {{
-			"type": "Uniform1",
-			"parameters": {{
-				"minimum": 2.6e-7,
-				"maximum": 2.8e-7
-			}}
-		}},
-      ....
-      }}
-	  ],
-      }}
-      If a condition is not mentioned in the following text body, then the value should be set to the string "null". Be sure to use consistent naming conventions for the conditions.
-      Instead of 'condition_1' and 'condition_2'.. use names that are descriptive of the conditions.
-
-	  For each parameter, following below instructions, fill the value for `name`, `description`, `units`, and `distribution` fields if not provided in the petrinet file.
-	  - For the name, extract from the user provided text or derive it from the id whenever possible. Here are some examples of id to name mappings: {{ id: "beta", name: "β" }}, {{ id: "gamma", name: "γ" }}, {{ id: "S0", name: "S₀" }}, {{ id: "I0", name: "R₀" }}. If a name can't be generated, omit the field.
-	  - For the description, provide long-form description of the parameter. If the description can not be found, set it to an empty string "",
-	  - For units, provide both "units.expression" (a unicode string) and "units.expression_mathml" (MathML format). Ensure both units are valid and represent the same unit. If the unit is not found or not valid, omit the units field.
-	  - For the distribution, if present, provide 'distribution.type' and 'distribution.parameters' using 'name' and 'parameters' from the following probability distribution ontology JSON. If a valid distribution is not found, omit the distribution field. Make sure following JSON content is only used for the distribution field and must not affect other fields. --START PROBABILITY DISTRIBUTION ONTOLOGY JSON-- {pb} --END PROBABILITY DISTRIBUTION ONTOLOGY JSON--
-
-  	  Only use parameters found in the reference petrinet file provided above.
-
-      Ensure that the output follows the above petri net format and can be serialized as a JSON. Specifically populate parameters and initials. Use the following
-      text body to answer the user's query: --START USER PROVIDED TEXT-- {research_paper}--END USER PROVIDED TEXT--\n\n Answer:
-         {{
+You are a helpful agent designed to find multiple model configurations for a given Petri net model of various conditions described in a research paper.
+
+Use the following Petri net JSON file as a reference:
+
+---START PETRI NET MODEL JSON---
+{petrinet}
+---END PETRI NET MODEL JSON---
+
+Use the following user-provided text as the research paper to answer the query:
+
+---START USER-PROVIDED TEXT---
+{research_paper}
+---END USER-PROVIDED TEXT---
+
+Assume that the user-provided text describes multiple conditions to which the model can be applied. Create a model configuration for each condition.
+Be sure to extract parameter values and initial values from the user-provided text, and do not use the default values from the Petri net model.
+Be sure to use consistent naming conventions for the conditions. Instead of 'condition_1' and 'condition_2', use descriptive names.
+
+For each condition, create a model configuration JSON object that satisfies the JSON schema specified in the response format. To do this, follow the instructions below:
+1.	Create a value for `name` and `description` from the user-provided text.
+2.	For the description, provide a long-form description of the condition. If the description cannot be created from the user-provided text, set it to an empty string.
+3.	`model_id` should reference the id of the Petri net model.
+4.	For each initial specified in the Petri net model ODE semantics, create an initial semantic object. Do not create new initial semantic objects if they are not included in the original Petri net model. You should set initial semantic object fields using the following rules:
+    a.	`target` should reference the id of the initial variable from the Petri net model ODE semantics.
+    b.	`source` should reference the title or file name of the research paper.
+    c.	`type` should be set to "initial”.
+    d.	`expression` should be written in LaTeX format.
+    e.	`expression_mathml` should be written in MathML format.
+    f.	For `expression` and `expression_mathml`, Ensure both are valid and represent the same unit. If the unit is not found or not valid, omit the units field.
+5.	For each parameter specified in the Petri net model ODE semantics, create a parameter semantic object. Do not create new parameter semantic objects if they are not included in the original Petri net model. You should set parameter semantic object fields using the following rules:
+    a.	`reference_id` should reference the id of the parameter.
+    b.	`source` should reference the title or file name of the research paper.
+    c.	`type` should be set to "parameter".
+    d.	Be sure to extract parameter values from the user-provided text, and do not use the default values from the Petri net model.
+        -	If the extracted parameter value is a single constant value, set the parameter `value` to the constant value and set `type` to "Constant".
+        -	If the extracted parameter value is a distribution with a maximum and minimum value, set `type` to only "Uniform" and populate the `minimum` and `maximum` fields.
+
+Do not respond in full sentences; only create a JSON object that satisfies the JSON schema specified in the response format.
+
+Answer:
 """