Sage-Bionetworks · philerooski · Jan 4, 2023 · Jan 3, 2023 · Jan 3, 2023 · Jan 3, 2023
@@ -43,7 +43,7 @@ jobs:
         run: echo "NAMESPACE=$GITHUB_REF_NAME" >> $GITHUB_ENV
 
       - name: Copy files to templates bucket
-        run: python src/scripts/manage_artifacts/artifacts.py --upload --ref $GITHUB_REF_NAME
+        run: python src/scripts/manage_artifacts/artifacts.py --upload --ref $NAMESPACE
 
   pytest-docker:
     name: Build and push testing docker image to ECR
@@ -127,8 +127,6 @@ jobs:
     name: Deploy branch using sceptre
     runs-on: ubuntu-latest
     needs: upload-files
-    env:
-      NAMESPACE: $GITHUB_REF_NAME
     if: github.ref_name != 'main' && github.ref_type != 'tag'
     environment: develop
 
@@ -144,7 +142,7 @@ jobs:
         run: mkdir -p templates/remote/
 
       - name: Deploy sceptre stacks
-        run: pipenv run sceptre --var "namespace=${{ env.NAMESPACE }}" launch develop --yes
+        run: pipenv run sceptre --var "namespace=$GITHUB_REF_NAME" launch develop --yes
 
   integration-test:
     name: Deploys test events from the workflow

@@ -271,8 +271,8 @@ def validate_data(s3_obj, archive_map, json_schemas, dataset_mapping):
             * assessmentRevision (str)
             * appId (str)
             * recordId (str)
-            * errors (dict): mapping file names to their validation errors.
-                See `validate_against_schema` for format.
+            * errors (dict): mapping file names (str) to their
+                validation errors (list[str]).
     """
     logger = logging.getLogger(__name__)
     assessment_id = s3_obj["Metadata"]["assessmentid"]
@@ -338,6 +338,58 @@ def validate_against_schema(data, schema):
     all_errors = [e.message for e in validator.iter_errors(data)]
     return all_errors
 
+def is_expected_validation_error(validation_result, client_info):
+    """
+    In the first year of MTB there were a number of issues with Android
+    data not conforming to the JSON Schema. These aren't very severe
+    inconsistincies (mostly missing or superfluous properties), but they
+    prevent almost all Android data from being processed. This function
+    checks whether the validation errors fall into this non-severe category.
+    See ETL-312 for more details.
+
+    Args:
+        validation_result (dict): A dictionary containing keys
+            * assessmentId (str)
+            * assessmentRevision (str)
+            * appId (str)
+            * recordId (str)
+            * errors (dict): mapping file names (str) to their
+                validation errors (list[str]).
+        client_info (str): A JSON blob containing the client info from the
+            relevant S3 object's metadata.
+
+    Returns:
+        bool: Whether the validation errors match expected, non-severe errors.
+    """
+    if not validation_result["errors"]:
+        return False
+    if validation_result["appId"] != "mobile-toolbox":
+        return False
+    if "Android" not in client_info:
+        return False
+    if "metadata.json" in validation_result["errors"]:
+        metadata_errors = validation_result["errors"]["metadata.json"]
+        if not (len(metadata_errors) == 2 and
+                "'appName' is a required property" in metadata_errors and
+                "'files' is a required property" in metadata_errors):
+            return False
+    if "taskData.json" in validation_result["errors"]:
+        taskdata_errors = validation_result["errors"]["taskData.json"]
+        if taskdata_errors != [
+                "Additional properties are not allowed ('type' was unexpected)"]:
+            return False
+    if "weather.json" in validation_result["errors"]:
+        weather_errors = validation_result["errors"]["weather.json"]
+        if weather_errors != [
+                "'type' is a required property"]:
+            return False
+    if "motion.json" in validation_result["errors"]:
+        motion_errors = validation_result["errors"]["motion.json"]
+        if motion_errors != [
+                "'type' is a required property"]:
+            return False
+    return True
+
 def get_dataset_identifier(json_schema, schema_mapping, dataset_mapping, file_metadata):
     """
     Get a dataset identifier for a file.
@@ -615,7 +667,12 @@ def main():
                 json_schemas=json_schemas,
                 dataset_mapping=dataset_mapping
         )
-        if len(validation_result["errors"]) > 0:
+        expected_validation_error = is_expected_validation_error(
+                validation_result=validation_result,
+                client_info=s3_obj["Metadata"]["clientinfo"])
+        if validation_result["errors"] and expected_validation_error:
+            validation_result["errors"] = {}
+        if validation_result["errors"]:
             for file_name in validation_result["errors"]:
                 # limit 10 errors reported per file to avoid redundandant errors
                 validation_result["errors"][file_name] = \

@@ -533,6 +533,71 @@ def test_validate_data(self, s3_obj, archive_map, dataset_mapping):
             assert key in validation_result
         assert isinstance(validation_result["errors"], dict)
 
+    def test_is_expected_validation_error(self):
+        validation_result = {
+                "assessmentId": "flanker",
+                "assessmentRevision": "5",
+                "appId": "mobile-toolbox",
+                "recordId": "123456789",
+                "errors": {}
+        }
+        client_info = "{osName:'Android'}"
+        # test not validation_result["errors"]
+        assert not s3_to_json_s3.is_expected_validation_error(
+                validation_result=validation_result,
+                client_info=client_info)
+        # test "Android" not in client_info:
+        assert not s3_to_json_s3.is_expected_validation_error(
+                validation_result=validation_result,
+                client_info="{osName:'iOS'}")
+        # test validation_result["appId"] != "mobile-toolbox":
+        assert not s3_to_json_s3.is_expected_validation_error(
+            validation_result = {
+                    "assessmentId": "flanker",
+                    "assessmentRevision": "5",
+                    "appId": "example-app",
+                    "recordId": "123456789",
+                    "errors": {}
+            },
+            client_info=client_info)
+        # test metadata.json
+        validation_result["errors"] = {
+                "metadata.json": [
+                    "'appName' is a required property",
+                    "'files' is a required property"
+                ]
+        }
+        assert s3_to_json_s3.is_expected_validation_error(
+                validation_result=validation_result,
+                client_info=client_info)
+        # test taskData.json
+        validation_result["errors"] = {
+                "taskData.json": [
+                    "Additional properties are not allowed ('type' was unexpected)"
+                ]
+        }
+        assert s3_to_json_s3.is_expected_validation_error(
+                validation_result=validation_result,
+                client_info=client_info)
+        # test weather.json
+        validation_result["errors"] = {
+                "weather.json": [
+                    "'type' is a required property"
+                ]
+        }
+        assert s3_to_json_s3.is_expected_validation_error(
+                validation_result=validation_result,
+                client_info=client_info)
+        # test motion.json
+        validation_result["errors"] = {
+                "motion.json": [
+                    "'type' is a required property"
+                ]
+        }
+        assert s3_to_json_s3.is_expected_validation_error(
+                validation_result=validation_result,
+                client_info=client_info)
+
     def test_write_metadata_file_to_json_dataset(self, s3_obj, namespace, monkeypatch):
         monkeypatch.setattr("boto3.client", lambda x : MockAWSClient())
         workflow_run_properties = {