From eb004d8d41a007be170bfcfdf8aabee779fdf279 Mon Sep 17 00:00:00 2001
From: Pip Liggins <philippa.liggins@dtc.ox.ac.uk>
Date: Wed, 17 May 2023 15:39:20 +0100
Subject: [PATCH 1/6] Add fieldOption, skipFieldPattern options

---
 adtl/__init__.py        | 15 +++++++++++++++
 schemas/dev.schema.json |  4 ++++
 2 files changed, 19 insertions(+)

diff --git a/adtl/__init__.py b/adtl/__init__.py
index c59ace1..a18219d 100644
--- a/adtl/__init__.py
+++ b/adtl/__init__.py
@@ -63,7 +63,19 @@ def get_value_unhashed(row: StrDict, rule: Rule, ctx: Context = None) -> Any:
         rule, list
     ):  # not a container, is constant
         return rule
+    # Check whether field is present if it's allowed to be passed over
+    if "fieldOption" in rule:
+        try:
+            row[rule["fieldOption"]]
+            row["field"] = row.pop("fieldOption")
+        except KeyError:
+            return None
     if "field" in rule:
+        if ctx and ctx.get("skip_pattern").match(rule["field"]):
+            try:
+                row[rule["field"]]
+            except KeyError:
+                return None
         # do not parse field if condition is not met
         if "if" in rule and not parse_if(row, rule["if"]):
             return None
@@ -485,6 +497,9 @@ def ctx(self, attribute: str):
             "defaultDateFormat": self.header.get(
                 "defaultDateFormat", DEFAULT_DATE_FORMAT
             ),
+            "skip_pattern": re.compile(self.header.get("skipFieldPattern"))
+            if self.header.get("skipFieldPattern")
+            else False,
         }
 
     def validate_spec(self):
diff --git a/schemas/dev.schema.json b/schemas/dev.schema.json
index ccbc6a0..4f424e7 100644
--- a/schemas/dev.schema.json
+++ b/schemas/dev.schema.json
@@ -27,6 +27,10 @@
               "type": "string",
               "description": "This is only used with combinedType, specifies a regular expression matching multiple fields"
             },
+            "fieldOption": {
+              "type": "string",
+              "description": "Corresponding field name in source file, can be skipped if not present in data"
+            },
             "sensitive": {
               "type": "boolean",
               "description": "Indicates to the parser whether the field is sensitive. Usually a sensitive field is hashed or encrypted before storing in the database.",

From 5d6d9f079b6c972a634c08c19d31cc9c50559f7e Mon Sep 17 00:00:00 2001
From: Pip Liggins <philippa.liggins@dtc.ox.ac.uk>
Date: Thu, 18 May 2023 17:05:37 +0100
Subject: [PATCH 2/6] Works and passes tests (more needed) but slooow

---
 adtl/__init__.py                     | 59 ++++++++++++++++++----------
 schemas/dev.schema.json              |  9 +++--
 tests/__snapshots__/test_parser.ambr | 16 ++++++++
 tests/parsers/skip_field.json        | 30 ++++++++++++++
 tests/schemas/epoch-data.schema.json |  8 +++-
 tests/sources/skip_field_absent.csv  |  3 ++
 tests/sources/skip_field_present.csv |  3 ++
 tests/test_parser.py                 | 39 +++++++++++++++++-
 8 files changed, 141 insertions(+), 26 deletions(-)
 create mode 100644 tests/parsers/skip_field.json
 create mode 100644 tests/sources/skip_field_absent.csv
 create mode 100644 tests/sources/skip_field_present.csv

diff --git a/adtl/__init__.py b/adtl/__init__.py
index a18219d..f0a2c54 100644
--- a/adtl/__init__.py
+++ b/adtl/__init__.py
@@ -11,7 +11,7 @@
 from datetime import datetime
 from pathlib import Path
 from functools import lru_cache
-from typing import Any, Dict, Iterable, List, Optional, Union
+from typing import Any, Dict, Iterable, List, Optional, Union, Callable
 
 import pint
 import tomli
@@ -64,18 +64,10 @@ def get_value_unhashed(row: StrDict, rule: Rule, ctx: Context = None) -> Any:
     ):  # not a container, is constant
         return rule
     # Check whether field is present if it's allowed to be passed over
-    if "fieldOption" in rule:
-        try:
-            row[rule["fieldOption"]]
-            row["field"] = row.pop("fieldOption")
-        except KeyError:
-            return None
     if "field" in rule:
-        if ctx and ctx.get("skip_pattern").match(rule["field"]):
-            try:
-                row[rule["field"]]
-            except KeyError:
-                return None
+        # do not check for condition if field is missing
+        if skip_field(row, rule, ctx):
+            return None
         # do not parse field if condition is not met
         if "if" in rule and not parse_if(row, rule["if"]):
             return None
@@ -153,19 +145,33 @@ def matching_fields(fields: List[str], pattern: str) -> List[str]:
     return [f for f in fields if compiled_pattern.match(f)]
 
 
-def parse_if(row: StrDict, rule: StrDict) -> bool:
+def parse_if(row: StrDict, rule: StrDict, ctx: Callable = None, can_skip=False) -> bool:
     "Parse conditional statements and return a boolean"
 
     n_keys = len(rule.keys())
-    assert n_keys == 1
+    # assert n_keys == 1
+    assert n_keys == 1 or n_keys == 2
+    if n_keys == 2:
+        assert "can_skip" in rule
+        can_skip = True
     key = next(iter(rule.keys()))
     if key == "not" and isinstance(rule[key], dict):
-        return not parse_if(row, rule[key])
+        return not parse_if(row, rule[key], ctx, can_skip)
     elif key == "any" and isinstance(rule[key], list):
-        return any(parse_if(row, r) for r in rule[key])
+        return any(parse_if(row, r, ctx, can_skip) for r in rule[key])
     elif key == "all" and isinstance(rule[key], list):
-        return all(parse_if(row, r) for r in rule[key])
-    attr_value = row[key]
+        return all(parse_if(row, r, ctx, can_skip) for r in rule[key])
+    try:
+        attr_value = row[key]
+    except KeyError as e:
+        if can_skip == True:
+            return False
+        elif ctx:
+            if skip_field(row, {"field": key}, ctx(key)):
+                return False
+        else:
+            raise e
+
     if isinstance(rule[key], dict):
         cmp = next(iter(rule[key]))
         value = rule[key][cmp]
@@ -562,8 +568,16 @@ def default_if(self, table: str, rule: StrDict):
 
         if "combinedType" not in rule[option]:
             field = rule[option]["field"]
-            if "values" in rule[option]:
+            if "values" in rule[option] and "can_skip" in rule[option]:
+                if_rule = {
+                    "any": [
+                        {field: v, "can_skip": True} for v in rule[option]["values"]
+                    ]
+                }
+            elif "values" in rule[option]:
                 if_rule = {"any": [{field: v} for v in rule[option]["values"]]}
+            elif "can_skip" in rule[option]:
+                if_rule = {field: {"!=": ""}, "can_skip": True}
             else:
                 if_rule = {field: {"!=": ""}}
         else:
@@ -582,6 +596,11 @@ def default_if(self, table: str, rule: StrDict):
             )
             if_rule = {"any": sum(map(condition, rules), [])}
 
+            for ir in if_rule["any"]:
+                for r in rules:
+                    if str(list(ir.keys())[0]) in r.values() and "can_skip" in r.keys():
+                        ir["can_skip"] = True
+
         rule["if"] = if_rule
         return rule
 
@@ -603,7 +622,7 @@ def update_table(self, table: str, row: StrDict):
             for match in self.spec[table]:
                 if "if" not in match:
                     match = self.default_if(table, match)
-                if parse_if(row, match["if"]):
+                if parse_if(row, match["if"], self.ctx):
                     self.data[table].append(
                         remove_null_keys(
                             {
diff --git a/schemas/dev.schema.json b/schemas/dev.schema.json
index 4f424e7..371a519 100644
--- a/schemas/dev.schema.json
+++ b/schemas/dev.schema.json
@@ -27,10 +27,6 @@
               "type": "string",
               "description": "This is only used with combinedType, specifies a regular expression matching multiple fields"
             },
-            "fieldOption": {
-              "type": "string",
-              "description": "Corresponding field name in source file, can be skipped if not present in data"
-            },
             "sensitive": {
               "type": "boolean",
               "description": "Indicates to the parser whether the field is sensitive. Usually a sensitive field is hashed or encrypted before storing in the database.",
@@ -88,6 +84,11 @@
                   ]
                 }
               }
+            },
+            "can_skip": {
+              "type": "boolean",
+              "description": "Indicates to the parser whether the field can be skipped without throwing an error if missing in the data.",
+              "default": false
             }
           }
         }
diff --git a/tests/__snapshots__/test_parser.ambr b/tests/__snapshots__/test_parser.ambr
index 69bcf4b..033157e 100644
--- a/tests/__snapshots__/test_parser.ambr
+++ b/tests/__snapshots__/test_parser.ambr
@@ -23,6 +23,22 @@
   
   '''
 # ---
+# name: test_skip_field_pattern_absent
+  '''
+  adtl_valid,adtl_error,cough,epoch,followup_cough,id,text
+  False,data.epoch must be date,1,11/01/1999,,1,Lorem ipsum
+  False,data.epoch must be date,0,19/12/2022,,2,example
+  
+  '''
+# ---
+# name: test_skip_field_pattern_present
+  '''
+  adtl_valid,adtl_error,cough,epoch,followup_cough,id,text
+  False,data.epoch must be date,1,11/01/1999,0,1,Lorem ipsum
+  False,data.epoch must be date,0,19/12/2022,1,2,example
+  
+  '''
+# ---
 # name: test_validation
   '''
   adtl_valid,adtl_error,admission_date,country_iso3,dataset_id,enrolment_date,ethnicity,sex_at_birth,subject_id
diff --git a/tests/parsers/skip_field.json b/tests/parsers/skip_field.json
new file mode 100644
index 0000000..725c8fe
--- /dev/null
+++ b/tests/parsers/skip_field.json
@@ -0,0 +1,30 @@
+{
+  "adtl": {
+    "name": "allow-skip-field-pattern",
+    "description": "Tests skipping missing fields",
+    "skipFieldPattern": "flw.*",
+    "tables": {
+      "table": {
+        "kind": "oneToOne",
+        "schema": "../schemas/epoch-data.schema.json"
+      }
+    }
+  },
+  "table": {
+    "id": {
+      "field": "Entry_ID"
+    },
+    "epoch": {
+      "field": "Epoch"
+    },
+    "text": {
+      "field": "Text"
+    },
+    "cough": {
+      "field": "cough"
+    },
+    "followup_cough": {
+      "field": "flw_cough"
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/schemas/epoch-data.schema.json b/tests/schemas/epoch-data.schema.json
index 5ba856e..abdc5f7 100644
--- a/tests/schemas/epoch-data.schema.json
+++ b/tests/schemas/epoch-data.schema.json
@@ -21,6 +21,12 @@
     },
     "text": {
       "description": "Text field"
+    },
+    "cough": {
+      "description": "Standard cough field"
+    },
+    "followup_cough": {
+      "description": "Follow-up cough field"
     }
   }
-}
+}
\ No newline at end of file
diff --git a/tests/sources/skip_field_absent.csv b/tests/sources/skip_field_absent.csv
new file mode 100644
index 0000000..9962f55
--- /dev/null
+++ b/tests/sources/skip_field_absent.csv
@@ -0,0 +1,3 @@
+Entry_ID,Epoch,Text,cough
+1,11/01/1999,Lorem ipsum,1
+2,19/12/2022,example,0
diff --git a/tests/sources/skip_field_present.csv b/tests/sources/skip_field_present.csv
new file mode 100644
index 0000000..515c984
--- /dev/null
+++ b/tests/sources/skip_field_present.csv
@@ -0,0 +1,3 @@
+Entry_ID,Epoch,Text,cough,flw_cough
+1,11/01/1999,Lorem ipsum,1,0
+2,19/12/2022,example,0,1
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 1ff7621..a8a358c 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -279,6 +279,23 @@
     },
 ]
 
+RULE_FIELD_OPTION = {
+    "field": "aidshiv_mhyn",
+    "values": {"1": True, "0": False},
+    "can_skip": True,
+}
+
+# OBSERVATION_RULE_FIELD_OPTION = {
+#     "name": "bleeding",
+#     "phase": "admission",
+#     "date": "2023-05-18",
+#     "is_present": {
+#         "field": "bleed_ceterm_v2",
+#         "values": {"1": True, "0": False},
+#         "can_skip": True,
+#     },
+# }
+
 
 @pytest.mark.parametrize(
     "row_rule,expected",
@@ -350,6 +367,9 @@
             unordered(["Lopinavir/Ritonvir", "Interferon alpha"]),
         ),
         (({"first": "", "second": ""}, RULE_COMBINED_FIRST_NON_NULL), None),
+        (({"aidshiv": "1"}, RULE_FIELD_OPTION), None),
+        (({"aidshiv_mhyn": "1"}, RULE_FIELD_OPTION), True),
+        (({"aidshiv_mhyn": "2"}, RULE_FIELD_OPTION), None),
     ],
 )
 def test_get_value(row_rule, expected):
@@ -405,7 +425,6 @@ def test_one_to_many():
     assert actual_one_many_output_csv == ONE_MANY_OUTPUT
 
 
-# HERE
 def test_one_to_many_correct_if_behaviour():
     actual_row = list(
         parser.Parser(TEST_PARSERS_PATH / "oneToMany-missingIf.toml")
@@ -781,3 +800,21 @@ def test_apply_in_observations_table():
     )
 
     assert apply_observations_output == APPLY_OBSERVATIONS_OUTPUT
+
+
+def test_skip_field_pattern_present(snapshot):
+    transformed_csv_data = (
+        parser.Parser(TEST_PARSERS_PATH / "skip_field.json")
+        .parse(TEST_SOURCES_PATH / "skip_field_present.csv")
+        .write_csv("table")
+    )
+    assert transformed_csv_data == snapshot
+
+
+def test_skip_field_pattern_absent(snapshot):
+    transformed_csv_data = (
+        parser.Parser(TEST_PARSERS_PATH / "skip_field.json")
+        .parse(TEST_SOURCES_PATH / "skip_field_absent.csv")
+        .write_csv("table")
+    )
+    assert transformed_csv_data == snapshot

From 0c0b1426592b011feaea715b2dca71adaa63c733 Mon Sep 17 00:00:00 2001
From: Pip Liggins <philippa.liggins@dtc.ox.ac.uk>
Date: Fri, 19 May 2023 13:51:40 +0100
Subject: [PATCH 3/6] tidied up

---
 adtl/__init__.py | 62 ++++++++++++++++++++++++++++++++++--------------
 1 file changed, 44 insertions(+), 18 deletions(-)

diff --git a/adtl/__init__.py b/adtl/__init__.py
index f0a2c54..5807e6d 100644
--- a/adtl/__init__.py
+++ b/adtl/__init__.py
@@ -419,6 +419,24 @@ def read_definition(file: Path) -> Dict[str, Any]:
         raise ValueError(f"Unsupported file format: {file}")
 
 
+def skip_field(row, rule, ctx: Context = None):
+    "Returns True if the field is missing and allowed to be skipped"
+    # made no difference
+    if "can_skip" in rule:
+        if rule["can_skip"]:
+            if rule["field"] not in row:
+                return True
+            else:
+                return False
+    if ctx and ctx.get("skip_pattern"):
+        if ctx.get("skip_pattern").match(rule["field"]):
+            if rule["field"] not in row:
+                return True
+            else:
+                return False
+    return False
+
+
 class Parser:
     def __init__(self, spec: Union[str, Path, StrDict], include_defs: List[str] = []):
         "Loads specification from spec in format (default json)"
@@ -568,14 +586,12 @@ def default_if(self, table: str, rule: StrDict):
 
         if "combinedType" not in rule[option]:
             field = rule[option]["field"]
-            if "values" in rule[option] and "can_skip" in rule[option]:
-                if_rule = {
-                    "any": [
-                        {field: v, "can_skip": True} for v in rule[option]["values"]
-                    ]
-                }
-            elif "values" in rule[option]:
-                if_rule = {"any": [{field: v} for v in rule[option]["values"]]}
+            if "values" in rule[option]:
+                values = rule[option]["values"]
+                if "can_skip" in rule[option]:
+                    if_rule = {"any": [{field: v, "can_skip": True} for v in values]}
+                else:
+                    if_rule = {"any": [{field: v} for v in values]}
             elif "can_skip" in rule[option]:
                 if_rule = {field: {"!=": ""}, "can_skip": True}
             else:
@@ -589,17 +605,27 @@ def default_if(self, table: str, rule: StrDict):
                 "list",
             ], f"Invalid combinedType: {rule[option]['combinedType']}"
             rules = rule[option]["fields"]
-            condition = (
-                lambda rule: [{rule["field"]: v} for v in rule["values"]]
-                if "values" in rule
-                else [{rule["field"]: {"!=": ""}}]
-            )
-            if_rule = {"any": sum(map(condition, rules), [])}
 
-            for ir in if_rule["any"]:
-                for r in rules:
-                    if str(list(ir.keys())[0]) in r.values() and "can_skip" in r.keys():
-                        ir["can_skip"] = True
+            def create_if_rule(rule):  # better, but not faster
+                field = rule["field"]
+                values = rule.get("values", [])
+                can_skip = rule.get("can_skip", False)
+
+                if_condition = {}
+
+                if values and can_skip:
+                    if_condition = [{field: v, "can_skip": True} for v in values]
+                elif values:
+                    if_condition = [{field: v} for v in values]
+                elif can_skip:
+                    if_condition[field] = {"!=": ""}
+                    if_condition["can_skip"] = True
+                else:
+                    if_condition[field] = {"!=": ""}
+
+                return if_condition
+
+            if_rule = {"any": sum(map(create_if_rule, rules), [])}
 
         rule["if"] = if_rule
         return rule

From c69b7b2fccd742a0410695a6d1cbe687988baa79 Mon Sep 17 00:00:00 2001
From: Pip Liggins <philippa.liggins@dtc.ox.ac.uk>
Date: Fri, 19 May 2023 15:04:45 +0100
Subject: [PATCH 4/6] simplify skip_field

---
 adtl/__init__.py | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/adtl/__init__.py b/adtl/__init__.py
index 5807e6d..ac9566e 100644
--- a/adtl/__init__.py
+++ b/adtl/__init__.py
@@ -422,18 +422,10 @@ def read_definition(file: Path) -> Dict[str, Any]:
 def skip_field(row, rule, ctx: Context = None):
     "Returns True if the field is missing and allowed to be skipped"
     # made no difference
-    if "can_skip" in rule:
-        if rule["can_skip"]:
-            if rule["field"] not in row:
-                return True
-            else:
-                return False
-    if ctx and ctx.get("skip_pattern"):
-        if ctx.get("skip_pattern").match(rule["field"]):
-            if rule["field"] not in row:
-                return True
-            else:
-                return False
+    if rule.get("can_skip"):
+        return rule["field"] not in row
+    if ctx and ctx.get("skip_pattern") and ctx.get("skip_pattern").match(rule["field"]):
+        return rule["field"] not in row
     return False
 
 

From 835c69dd04dfcaa303dc6142e0ba109e45e6b073 Mon Sep 17 00:00:00 2001
From: Pip Liggins <philippa.liggins@dtc.ox.ac.uk>
Date: Fri, 19 May 2023 15:59:34 +0100
Subject: [PATCH 5/6] Add documentation, edit schema

---
 adtl/__init__.py        |  7 ++++---
 docs/specification.md   | 44 +++++++++++++++++++++++++++++++++++++++++
 schemas/dev.schema.json |  5 ++---
 3 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/adtl/__init__.py b/adtl/__init__.py
index ac9566e..3973242 100644
--- a/adtl/__init__.py
+++ b/adtl/__init__.py
@@ -145,7 +145,9 @@ def matching_fields(fields: List[str], pattern: str) -> List[str]:
     return [f for f in fields if compiled_pattern.match(f)]
 
 
-def parse_if(row: StrDict, rule: StrDict, ctx: Callable = None, can_skip=False) -> bool:
+def parse_if(
+    row: StrDict, rule: StrDict, ctx: Callable[[str], dict] = None, can_skip=False
+) -> bool:
     "Parse conditional statements and return a boolean"
 
     n_keys = len(rule.keys())
@@ -419,9 +421,8 @@ def read_definition(file: Path) -> Dict[str, Any]:
         raise ValueError(f"Unsupported file format: {file}")
 
 
-def skip_field(row, rule, ctx: Context = None):
+def skip_field(row: StrDict, rule: StrDict, ctx: Context = None):
     "Returns True if the field is missing and allowed to be skipped"
-    # made no difference
     if rule.get("can_skip"):
         return rule["field"] not in row
     if ctx and ctx.get("skip_pattern") and ctx.get("skip_pattern").match(rule["field"]):
diff --git a/docs/specification.md b/docs/specification.md
index 4256b38..26e6656 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -45,6 +45,8 @@ These metadata fields are defined under a header key `adtl`.
 * **defs**: Definitions that can be referred to elsewhere in the schema
 * **include-def** (list): List of additional TOML or JSON files to import as
   definitions
+* **skipFieldPattern** : Regex string matching field names which may be skipped
+if not present in a datafile, following the same syntax as `fieldPattern` key.
 * **defaultDateFormat**: Default source date format, applied to all fields
   with either "date_" / "_date" in the field name or that have format date
   set in the JSON schema
@@ -297,6 +299,48 @@ fields =  [
 
 If *excludeWhen* is not set, no exclusions take place and all values are returned as-is.
 
+### Skippable fields
+
+In some cases, a study will be assocaited with multiple data files, all of which have been
+filled in to varying degrees. For example, one study site may not provide any follow-up data.
+
+Rather than writing a new parser for every data file with minor differences, parsers can be made
+robust to a certain amount of missing data by tagging applicable fields with `can_skip = True`,
+for example:
+
+```ini
+[[observation]]
+  name = "cough"
+  phase = "admission"
+  date = { field = "admit_date" }
+  is_present = { field = "cough_ceoccur_v2", description = "Cough", ref = "Y/N/NK", "can_skip" = true }
+```
+
+In this case, if adtl does not find `cough_ceoccur_v2` in the data it will skip over the field
+and continue, rather than throwing an error.
+
+If there are lots of fields missing all with similar field names, for example if followup data
+has been omitted and all the followup fields are labelled with a `flw` prefix e.g., `flw_cough`,
+`flw2_fatigue`, this can be specified at the top of the file:
+
+```ini
+[adtl]
+  name = "isaric-core"
+  description = "isaric-core"
+  skipFieldPattern = "flw.*"
+
+[table.sex_at_birth]
+  combinedType = "firstNonNull"
+  excludeWhen = "none"
+  fields = [
+    { field = "sex", values = { 1 = "male", 2 = "female" } },
+    { field = "flw_sex_at_birth", values = { 1 = "male", 2 = "female", 3 = "non_binary" } },
+    { field = "flw2_sex_at_birth", values = { 1 = "male", 2 = "female", 3 = "non_binary" } },
+  ]
+```
+
+Notice that in this case `can_skip` does not need to be added to the fields with a `flw` prefix.
+
 ### Data transformations (apply)
 
 Arbitrary functions can be applied to source fields. adtl ships with a library
diff --git a/schemas/dev.schema.json b/schemas/dev.schema.json
index 371a519..c794ca8 100644
--- a/schemas/dev.schema.json
+++ b/schemas/dev.schema.json
@@ -86,9 +86,8 @@
               }
             },
             "can_skip": {
-              "type": "boolean",
-              "description": "Indicates to the parser whether the field can be skipped without throwing an error if missing in the data.",
-              "default": false
+              "const": true,
+              "description": "Indicates to the parser whether the field can be skipped without throwing an error if missing in the data."
             }
           }
         }

From c36d03a756f7fa8523937795404008e382800bcd Mon Sep 17 00:00:00 2001
From: Pip Liggins <philippa.liggins@dtc.ox.ac.uk>
Date: Mon, 22 May 2023 12:49:50 +0100
Subject: [PATCH 6/6] Add tests

---
 adtl/__init__.py                              |   5 +-
 tests/__snapshots__/test_parser.ambr          |  12 +-
 tests/parsers/oneToMany-missingIf.toml        |  13 +++
 tests/parsers/skip_field.json                 |   4 +
 .../schemas/observation_defaultif.schema.json |   4 +-
 tests/sources/oneToManyIf.csv                 |   4 +-
 tests/sources/skip_field_present.csv          |   6 +-
 tests/test_parser.py                          | 107 +++++++++++++++---
 8 files changed, 126 insertions(+), 29 deletions(-)

diff --git a/adtl/__init__.py b/adtl/__init__.py
index 3973242..c1609b4 100644
--- a/adtl/__init__.py
+++ b/adtl/__init__.py
@@ -151,7 +151,6 @@ def parse_if(
     "Parse conditional statements and return a boolean"
 
     n_keys = len(rule.keys())
-    # assert n_keys == 1
     assert n_keys == 1 or n_keys == 2
     if n_keys == 2:
         assert "can_skip" in rule
@@ -166,7 +165,7 @@ def parse_if(
     try:
         attr_value = row[key]
     except KeyError as e:
-        if can_skip == True:
+        if can_skip is True:
             return False
         elif ctx:
             if skip_field(row, {"field": key}, ctx(key)):
@@ -599,7 +598,7 @@ def default_if(self, table: str, rule: StrDict):
             ], f"Invalid combinedType: {rule[option]['combinedType']}"
             rules = rule[option]["fields"]
 
-            def create_if_rule(rule):  # better, but not faster
+            def create_if_rule(rule):
                 field = rule["field"]
                 values = rule.get("values", [])
                 can_skip = rule.get("can_skip", False)
diff --git a/tests/__snapshots__/test_parser.ambr b/tests/__snapshots__/test_parser.ambr
index 033157e..0ca1170 100644
--- a/tests/__snapshots__/test_parser.ambr
+++ b/tests/__snapshots__/test_parser.ambr
@@ -25,17 +25,17 @@
 # ---
 # name: test_skip_field_pattern_absent
   '''
-  adtl_valid,adtl_error,cough,epoch,followup_cough,id,text
-  False,data.epoch must be date,1,11/01/1999,,1,Lorem ipsum
-  False,data.epoch must be date,0,19/12/2022,,2,example
+  adtl_valid,adtl_error,cough,epoch,followup_cough,headache,id,text
+  False,data.epoch must be date,1,11/01/1999,,,1,Lorem ipsum
+  False,data.epoch must be date,0,19/12/2022,,,2,example
   
   '''
 # ---
 # name: test_skip_field_pattern_present
   '''
-  adtl_valid,adtl_error,cough,epoch,followup_cough,id,text
-  False,data.epoch must be date,1,11/01/1999,0,1,Lorem ipsum
-  False,data.epoch must be date,0,19/12/2022,1,2,example
+  adtl_valid,adtl_error,cough,epoch,followup_cough,headache,id,text
+  False,data.epoch must be date,1,11/01/1999,0,3,1,Lorem ipsum
+  False,data.epoch must be date,0,19/12/2022,1,0,2,example
   
   '''
 # ---
diff --git a/tests/parsers/oneToMany-missingIf.toml b/tests/parsers/oneToMany-missingIf.toml
index 75fa1b7..8c4bb09 100644
--- a/tests/parsers/oneToMany-missingIf.toml
+++ b/tests/parsers/oneToMany-missingIf.toml
@@ -1,6 +1,7 @@
 [adtl]
   name = "sampleOneToMany - missingIf"
   description = "One to Many example where if statements are removed"
+  skipFieldPattern = "flw3.*"
 
   [adtl.tables.observation]
     kind = "oneToMany"
@@ -47,3 +48,15 @@
   is_present = { field = "flw2_fever_{n}", values = { 0 = false, 1 = true } }
   # if.any = [ { "flw2_fever_{n}" = 1 }, { "flw2_fever_{n}" = 0 } ]
   for.n.range = [1, 2]
+
+[[observation]]
+  name = "fatigue_malaise"
+  phase = "followup"
+  date = { field = "dt" }
+  is_present = { field = "flw3_fatigue", description = "Fatigue", values = { 1 = true, 0 = false } }
+
+[[observation]]
+  name = "severe_dehydration"
+  phase = "admission"
+  date = { field = "dt" }
+  is_present = { field = "dehydration_vsorres", description = "Severe dehydration:", ref = "Y/N/NK", "can_skip" = true }
diff --git a/tests/parsers/skip_field.json b/tests/parsers/skip_field.json
index 725c8fe..97079cc 100644
--- a/tests/parsers/skip_field.json
+++ b/tests/parsers/skip_field.json
@@ -25,6 +25,10 @@
     },
     "followup_cough": {
       "field": "flw_cough"
+    },
+    "headache": {
+      "field": "headache",
+      "can_skip": true
     }
   }
 }
\ No newline at end of file
diff --git a/tests/schemas/observation_defaultif.schema.json b/tests/schemas/observation_defaultif.schema.json
index 073b584..525f42c 100644
--- a/tests/schemas/observation_defaultif.schema.json
+++ b/tests/schemas/observation_defaultif.schema.json
@@ -58,7 +58,9 @@
                 "headache",
                 "oxygen_saturation",
                 "pao2_sample_type",
-                "history_of_fever"
+                "history_of_fever",
+                "fatigue_malaise",
+                "severe_dehydration"
             ],
             "description": "Observation name"
         }
diff --git a/tests/sources/oneToManyIf.csv b/tests/sources/oneToManyIf.csv
index 58a58df..fa5bd1e 100644
--- a/tests/sources/oneToManyIf.csv
+++ b/tests/sources/oneToManyIf.csv
@@ -1,2 +1,2 @@
-dt,dt_1,dt_2,headache_v2,oxy_vsorres,cough_ceoccur_v2,dry_cough_ceoccur_v2,wet_cough_ceoccur_v2,pao2_lbspec,flw2_fever_1,flw2_fever_2
-2022-02-05,2022-02-06,2022-02-07,2,87,3,1,2,3,1,0
+dt,dt_1,dt_2,headache_v2,oxy_vsorres,cough_ceoccur_v2,dry_cough_ceoccur_v2,wet_cough_ceoccur_v2,pao2_lbspec,flw2_fever_1,flw2_fever_2,flw3_fatigue,dehydration_vsorres
+2022-02-05,2022-02-06,2022-02-07,2,87,3,1,2,3,1,0,1,2
diff --git a/tests/sources/skip_field_present.csv b/tests/sources/skip_field_present.csv
index 515c984..3deab0a 100644
--- a/tests/sources/skip_field_present.csv
+++ b/tests/sources/skip_field_present.csv
@@ -1,3 +1,3 @@
-Entry_ID,Epoch,Text,cough,flw_cough
-1,11/01/1999,Lorem ipsum,1,0
-2,19/12/2022,example,0,1
+Entry_ID,Epoch,Text,cough,flw_cough,headache
+1,11/01/1999,Lorem ipsum,1,0,3
+2,19/12/2022,example,0,1,0
diff --git a/tests/test_parser.py b/tests/test_parser.py
index a8a358c..0ba633a 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -148,6 +148,20 @@
         "is_present": False,
         "adtl_valid": True,
     },
+    {
+        "date": "2022-02-05",
+        "name": "fatigue_malaise",
+        "phase": "followup",
+        "is_present": True,
+        "adtl_valid": True,
+    },
+    {
+        "date": "2022-02-05",
+        "name": "severe_dehydration",
+        "phase": "admission",
+        "is_present": False,
+        "adtl_valid": True,
+    },
 ]
 
 ONE_MANY_IF_MISSINGDATA_OUTPUT = [
@@ -279,22 +293,54 @@
     },
 ]
 
-RULE_FIELD_OPTION = {
+RULE_FIELD_OPTION_SKIP = {
     "field": "aidshiv_mhyn",
     "values": {"1": True, "0": False},
     "can_skip": True,
 }
 
-# OBSERVATION_RULE_FIELD_OPTION = {
-#     "name": "bleeding",
-#     "phase": "admission",
-#     "date": "2023-05-18",
-#     "is_present": {
-#         "field": "bleed_ceterm_v2",
-#         "values": {"1": True, "0": False},
-#         "can_skip": True,
-#     },
-# }
+OBSERVATION_RULE_FIELD_OPTION_SKIP = {
+    "name": "bleeding",
+    "phase": "admission",
+    "date": "2023-05-18",
+    "is_present": {
+        "field": "bleed_ceterm_v2",
+        "values": {"1": True, "0": False},
+        "can_skip": True,
+    },
+}
+OBSERVATION_RULE_FIELD_OPTION_VALUE = {
+    "name": "temperature_celsius",
+    "phase": "admission",
+    "date": "2023-05-22",
+    "value": {
+        "field": "temp_vsorres",
+        "source_unit": {"field": "temp_vsorresu", "values": {"1": "°C", "2": "°F"}},
+    },
+}
+
+OBSERVATION_RULE_FIELD_OPTION_COMB = {
+    "name": "cough",
+    "phase": "admission",
+    "date": "2023-05-22",
+    "is_present": {
+        "combinedType": "any",
+        "excludeWhen": "none",
+        "fields": [
+            {"field": "cough_ceoccur_v2", "values": {"1": "true", "0": "false"}},
+            {
+                "field": "coughsput_ceoccur_v2",
+                "values": {"1": "true", "0": "false"},
+                "can_skip": "true",
+            },
+            {
+                "field": "coughhb_ceoccur_v2",
+                "values": {"1": "true", "0": "false"},
+                "can_skip": "true",
+            },
+        ],
+    },
+}
 
 
 @pytest.mark.parametrize(
@@ -367,9 +413,9 @@
             unordered(["Lopinavir/Ritonvir", "Interferon alpha"]),
         ),
         (({"first": "", "second": ""}, RULE_COMBINED_FIRST_NON_NULL), None),
-        (({"aidshiv": "1"}, RULE_FIELD_OPTION), None),
-        (({"aidshiv_mhyn": "1"}, RULE_FIELD_OPTION), True),
-        (({"aidshiv_mhyn": "2"}, RULE_FIELD_OPTION), None),
+        (({"aidshiv": "1"}, RULE_FIELD_OPTION_SKIP), None),
+        (({"aidshiv_mhyn": "1"}, RULE_FIELD_OPTION_SKIP), True),
+        (({"aidshiv_mhyn": "2"}, RULE_FIELD_OPTION_SKIP), None),
     ],
 )
 def test_get_value(row_rule, expected):
@@ -425,6 +471,39 @@ def test_one_to_many():
     assert actual_one_many_output_csv == ONE_MANY_OUTPUT
 
 
+@pytest.mark.parametrize(
+    "rule,expected",
+    [
+        (
+            OBSERVATION_RULE_FIELD_OPTION_SKIP,
+            {
+                "any": [
+                    {"bleed_ceterm_v2": "1", "can_skip": True},
+                    {"bleed_ceterm_v2": "0", "can_skip": True},
+                ]
+            },
+        ),
+        (OBSERVATION_RULE_FIELD_OPTION_VALUE, {"temp_vsorres": {"!=": ""}}),
+        (
+            OBSERVATION_RULE_FIELD_OPTION_COMB,
+            {
+                "any": [
+                    {"cough_ceoccur_v2": "1"},
+                    {"cough_ceoccur_v2": "0"},
+                    {"coughsput_ceoccur_v2": "1", "can_skip": True},
+                    {"coughsput_ceoccur_v2": "0", "can_skip": True},
+                    {"coughhb_ceoccur_v2": "1", "can_skip": True},
+                    {"coughhb_ceoccur_v2": "0", "can_skip": True},
+                ]
+            },
+        ),
+    ],
+)
+def test_default_if_rule_is_correct(rule, expected):
+    psr = parser.Parser(TEST_PARSERS_PATH / "oneToMany-missingIf.toml")
+    assert psr.default_if("observation", rule)["if"] == expected
+
+
 def test_one_to_many_correct_if_behaviour():
     actual_row = list(
         parser.Parser(TEST_PARSERS_PATH / "oneToMany-missingIf.toml")