From a09232c1e234e78ef7646eb0a67c16c62ed5c608 Mon Sep 17 00:00:00 2001
From: Phil Snyder <subfivemile@gmail.com>
Date: Tue, 3 Jan 2023 15:02:37 -0800
Subject: [PATCH 1/4] Refactor upload-and-deploy github workflow

---
 .github/workflows/upload-and-deploy.yaml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/upload-and-deploy.yaml b/.github/workflows/upload-and-deploy.yaml
index a971a20..b215459 100755
--- a/.github/workflows/upload-and-deploy.yaml
+++ b/.github/workflows/upload-and-deploy.yaml
@@ -43,7 +43,7 @@ jobs:
         run: echo "NAMESPACE=$GITHUB_REF_NAME" >> $GITHUB_ENV
 
       - name: Copy files to templates bucket
-        run: python src/scripts/manage_artifacts/artifacts.py --upload --ref $GITHUB_REF_NAME
+        run: python src/scripts/manage_artifacts/artifacts.py --upload --ref $NAMESPACE
 
   pytest-docker:
     name: Build and push testing docker image to ECR
@@ -127,8 +127,6 @@ jobs:
     name: Deploy branch using sceptre
     runs-on: ubuntu-latest
     needs: upload-files
-    env:
-      NAMESPACE: $GITHUB_REF_NAME
     if: github.ref_name != 'main' && github.ref_type != 'tag'
     environment: develop
 
@@ -144,7 +142,7 @@ jobs:
         run: mkdir -p templates/remote/
 
       - name: Deploy sceptre stacks
-        run: pipenv run sceptre --var "namespace=${{ env.NAMESPACE }}" launch develop --yes
+        run: pipenv run sceptre --var "namespace=$GITHUB_REF_NAME" launch develop --yes
 
   integration-test:
     name: Deploys test events from the workflow

From 2a5c39e9549c18bf3bb5b1ab53cf4f5bc38f4a47 Mon Sep 17 00:00:00 2001
From: Phil Snyder <subfivemile@gmail.com>
Date: Tue, 3 Jan 2023 15:03:20 -0800
Subject: [PATCH 2/4] Ignore specific non-severe Android validation errors

---
 src/glue/jobs/s3_to_json_s3.py | 62 ++++++++++++++++++++++++++++++++--
 1 file changed, 59 insertions(+), 3 deletions(-)

diff --git a/src/glue/jobs/s3_to_json_s3.py b/src/glue/jobs/s3_to_json_s3.py
index 16fa5fd..4e72e9d 100644
--- a/src/glue/jobs/s3_to_json_s3.py
+++ b/src/glue/jobs/s3_to_json_s3.py
@@ -271,8 +271,8 @@ def validate_data(s3_obj, archive_map, json_schemas, dataset_mapping):
             * assessmentRevision (str)
             * appId (str)
             * recordId (str)
-            * errors (dict): mapping file names to their validation errors.
-                See `validate_against_schema` for format.
+            * errors (dict): mapping file names (str) to their
+                validation errors (list[str]).
     """
     logger = logging.getLogger(__name__)
     assessment_id = s3_obj["Metadata"]["assessmentid"]
@@ -338,6 +338,57 @@ def validate_against_schema(data, schema):
     all_errors = [e.message for e in validator.iter_errors(data)]
     return all_errors
 
+def is_expected_validation_error(validation_result, client_info):
+    """
+    In the first year of MTB there were a number of issues with Android
+    data not conforming to the JSON Schema. These aren't very severe
+    inconsistincies (mostly missing or superfluous properties), but they
+    prevent almost all Android data from being processed. This function
+    checks whether the validation errors fall into this non-severe category.
+    See ETL-312 for more details.
+
+    Args:
+        validation_result (dict): A dictionary containing keys
+            * assessmentId (str)
+            * assessmentRevision (str)
+            * appId (str)
+            * recordId (str)
+            * errors (dict): mapping file names (str) to their
+                validation errors (list[str]).
+        client_info (str): A JSON blob containing the client info from the
+            relevant S3 object's metadata.
+
+    Returns:
+        bool: Whether the validation errors match expected, non-severe errors.
+    """
+    if not validation_result["errors"]:
+        return False
+    else:
+        if "Android" not in client_info:
+            return False
+        if "metadata.json" in validation_result["errors"]:
+            metadata_errors = validation_result["errors"]["metadata.json"]
+            if not (len(metadata_errors) == 2 and
+                    "'appName' is a required property" in metadata_errors and
+                    "'files' is a required property" in metadata_errors):
+                return False
+        if "taskData.json" in validation_result["errors"]:
+            taskdata_errors = validation_result["errors"]["taskData.json"]
+            if not taskdata_errors == [
+                    "Additional properties are not allowed ('type' was unexpected)"]:
+                return False
+        if "weather.json" in validation_result["errors"]:
+            weather_errors = validation_result["errors"]["weather.json"]
+            if not weather_errors == [
+                    "'type' is a required property"]:
+                return False
+        if "motion.json" in validation_result["errors"]:
+            motion_errors = validation_result["errors"]["motion.json"]
+            if not motion_errors == [
+                    "'type' is a required property"]:
+                return False
+    return True
+
 def get_dataset_identifier(json_schema, schema_mapping, dataset_mapping, file_metadata):
     """
     Get a dataset identifier for a file.
@@ -615,7 +666,12 @@ def main():
                 json_schemas=json_schemas,
                 dataset_mapping=dataset_mapping
         )
-        if len(validation_result["errors"]) > 0:
+        expected_validation_error = is_expected_validation_error(
+                validation_result=validation_result,
+                client_info=s3_obj["Metadata"]["clientinfo"])
+        if validation_result["errors"] and expected_validation_error:
+            validation_result["errors"] = {}
+        if validation_result["errors"]:
             for file_name in validation_result["errors"]:
                 # limit 10 errors reported per file to avoid redundandant errors
                 validation_result["errors"][file_name] = \

From fb09f1254daaca6e8f28471d274564c58991239c Mon Sep 17 00:00:00 2001
From: Phil Snyder <subfivemile@gmail.com>
Date: Tue, 3 Jan 2023 15:59:37 -0800
Subject: [PATCH 3/4] Minor refactor and add test for
 is_expected_validation_error

---
 src/glue/jobs/s3_to_json_s3.py | 47 ++++++++++++------------
 tests/test_s3_to_json_s3.py    | 65 ++++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+), 23 deletions(-)

diff --git a/src/glue/jobs/s3_to_json_s3.py b/src/glue/jobs/s3_to_json_s3.py
index 4e72e9d..4ec3c9e 100644
--- a/src/glue/jobs/s3_to_json_s3.py
+++ b/src/glue/jobs/s3_to_json_s3.py
@@ -363,30 +363,31 @@ def is_expected_validation_error(validation_result, client_info):
     """
     if not validation_result["errors"]:
         return False
-    else:
-        if "Android" not in client_info:
+    if validation_result["appId"] != "mobile-toolbox":
+        return False
+    if "Android" not in client_info:
+        return False
+    if "metadata.json" in validation_result["errors"]:
+        metadata_errors = validation_result["errors"]["metadata.json"]
+        if not (len(metadata_errors) == 2 and
+                "'appName' is a required property" in metadata_errors and
+                "'files' is a required property" in metadata_errors):
+            return False
+    if "taskData.json" in validation_result["errors"]:
+        taskdata_errors = validation_result["errors"]["taskData.json"]
+        if not taskdata_errors == [
+                "Additional properties are not allowed ('type' was unexpected)"]:
+            return False
+    if "weather.json" in validation_result["errors"]:
+        weather_errors = validation_result["errors"]["weather.json"]
+        if not weather_errors == [
+                "'type' is a required property"]:
+            return False
+    if "motion.json" in validation_result["errors"]:
+        motion_errors = validation_result["errors"]["motion.json"]
+        if not motion_errors == [
+                "'type' is a required property"]:
             return False
-        if "metadata.json" in validation_result["errors"]:
-            metadata_errors = validation_result["errors"]["metadata.json"]
-            if not (len(metadata_errors) == 2 and
-                    "'appName' is a required property" in metadata_errors and
-                    "'files' is a required property" in metadata_errors):
-                return False
-        if "taskData.json" in validation_result["errors"]:
-            taskdata_errors = validation_result["errors"]["taskData.json"]
-            if not taskdata_errors == [
-                    "Additional properties are not allowed ('type' was unexpected)"]:
-                return False
-        if "weather.json" in validation_result["errors"]:
-            weather_errors = validation_result["errors"]["weather.json"]
-            if not weather_errors == [
-                    "'type' is a required property"]:
-                return False
-        if "motion.json" in validation_result["errors"]:
-            motion_errors = validation_result["errors"]["motion.json"]
-            if not motion_errors == [
-                    "'type' is a required property"]:
-                return False
     return True
 
 def get_dataset_identifier(json_schema, schema_mapping, dataset_mapping, file_metadata):
diff --git a/tests/test_s3_to_json_s3.py b/tests/test_s3_to_json_s3.py
index 6c1a35e..490a17a 100644
--- a/tests/test_s3_to_json_s3.py
+++ b/tests/test_s3_to_json_s3.py
@@ -533,6 +533,71 @@ def test_validate_data(self, s3_obj, archive_map, dataset_mapping):
             assert key in validation_result
         assert isinstance(validation_result["errors"], dict)
 
+    def test_is_expected_validation_error(self):
+        validation_result = {
+                "assessmentId": "flanker",
+                "assessmentRevision": "5",
+                "appId": "mobile-toolbox",
+                "recordId": "123456789",
+                "errors": {}
+        }
+        client_info = "{osName:'Android'}"
+        # test not validation_result["errors"]
+        assert not s3_to_json_s3.is_expected_validation_error(
+                validation_result=validation_result,
+                client_info=client_info)
+        # test "Android" not in client_info:
+        assert not s3_to_json_s3.is_expected_validation_error(
+                validation_result=validation_result,
+                client_info="{osName:'iOS'}")
+        # test validation_result["appId"] != "mobile-toolbox":
+        assert not s3_to_json_s3.is_expected_validation_error(
+            validation_result = {
+                    "assessmentId": "flanker",
+                    "assessmentRevision": "5",
+                    "appId": "example-app",
+                    "recordId": "123456789",
+                    "errors": {}
+            },
+            client_info=client_info)
+        # test metadata.json
+        validation_result["errors"] = {
+                "metadata.json": [
+                    "'appName' is a required property",
+                    "'files' is a required property"
+                ]
+        }
+        assert s3_to_json_s3.is_expected_validation_error(
+                validation_result=validation_result,
+                client_info=client_info)
+        # test taskData.json
+        validation_result["errors"] = {
+                "taskData.json": [
+                    "Additional properties are not allowed ('type' was unexpected)"
+                ]
+        }
+        assert s3_to_json_s3.is_expected_validation_error(
+                validation_result=validation_result,
+                client_info=client_info)
+        # test weather.json
+        validation_result["errors"] = {
+                "weather.json": [
+                    "'type' is a required property"
+                ]
+        }
+        assert s3_to_json_s3.is_expected_validation_error(
+                validation_result=validation_result,
+                client_info=client_info)
+        # test motion.json
+        validation_result["errors"] = {
+                "motion.json": [
+                    "'type' is a required property"
+                ]
+        }
+        assert s3_to_json_s3.is_expected_validation_error(
+                validation_result=validation_result,
+                client_info=client_info)
+
     def test_write_metadata_file_to_json_dataset(self, s3_obj, namespace, monkeypatch):
         monkeypatch.setattr("boto3.client", lambda x : MockAWSClient())
         workflow_run_properties = {

From 50ab23f1a25f2825631f3780d02e5f11426c7be1 Mon Sep 17 00:00:00 2001
From: Phil Snyder <subfivemile@gmail.com>
Date: Wed, 4 Jan 2023 12:31:46 -0800
Subject: [PATCH 4/4] Minor syntax edit

---
 src/glue/jobs/s3_to_json_s3.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/glue/jobs/s3_to_json_s3.py b/src/glue/jobs/s3_to_json_s3.py
index 4ec3c9e..426b839 100644
--- a/src/glue/jobs/s3_to_json_s3.py
+++ b/src/glue/jobs/s3_to_json_s3.py
@@ -375,17 +375,17 @@ def is_expected_validation_error(validation_result, client_info):
             return False
     if "taskData.json" in validation_result["errors"]:
         taskdata_errors = validation_result["errors"]["taskData.json"]
-        if not taskdata_errors == [
+        if taskdata_errors != [
                 "Additional properties are not allowed ('type' was unexpected)"]:
             return False
     if "weather.json" in validation_result["errors"]:
         weather_errors = validation_result["errors"]["weather.json"]
-        if not weather_errors == [
+        if weather_errors != [
                 "'type' is a required property"]:
             return False
     if "motion.json" in validation_result["errors"]:
         motion_errors = validation_result["errors"]["motion.json"]
-        if not motion_errors == [
+        if motion_errors != [
                 "'type' is a required property"]:
             return False
     return True