diff --git a/test/service/data/verbatim/anvil/pfb_entities.json b/test/service/data/verbatim/anvil/pfb_entities.json index 29d14d974e..8cb9a00eda 100644 --- a/test/service/data/verbatim/anvil/pfb_entities.json +++ b/test/service/data/verbatim/anvil/pfb_entities.json @@ -88,6 +88,13 @@ "ontology_reference": "", "properties": [], "values": {} + }, + { + "links": [], + "name": "non_schema_orphan_table", + "ontology_reference": "", + "properties": [], + "values": {} } ] }, @@ -132,6 +139,16 @@ }, "relations": [] }, + { + "id": "28ed0f3a-157b-417b-a05a-48f57f9d3a34", + "name": "non_schema_orphan_table", + "object": { + "datarepo_row_id": "28ed0f3a-157b-417b-a05a-48f57f9d3a34", + "non_schema_column": "eggs", + "version": "2022-06-01T00:00:00.000000Z" + }, + "relations": [] + }, { "id": "826dea02-e274-4ffe-aabc-eb3db63ad068", "name": "anvil_biosample", @@ -279,6 +296,26 @@ }, "relations": [] }, + { + "id": "9687b86d-a2ae-a083-b910-a16bcbef1ba4", + "name": "non_schema_orphan_table", + "object": { + "datarepo_row_id": "9687b86d-a2ae-a083-b910-a16bcbef1ba4", + "non_schema_column": "spam", + "version": "2022-06-01T00:00:00.000000Z" + }, + "relations": [] + }, + { + "id": "9db5952c-c454-49d9-8a62-5abb026701c0", + "name": "non_schema_orphan_table", + "object": { + "datarepo_row_id": "9db5952c-c454-49d9-8a62-5abb026701c0", + "non_schema_column": "baked beans", + "version": "2022-06-01T00:00:00.000000Z" + }, + "relations": [] + }, { "id": "15b76f9c-6b46-433f-851d-34e89f1b9ba6", "name": "anvil_file", diff --git a/test/service/data/verbatim/anvil/pfb_schema.json b/test/service/data/verbatim/anvil/pfb_schema.json index 07aee95c55..9bdd6fcf66 100644 --- a/test/service/data/verbatim/anvil/pfb_schema.json +++ b/test/service/data/verbatim/anvil/pfb_schema.json @@ -1158,6 +1158,27 @@ ], "name": "anvil_variantcallingactivity", "type": "record" + }, + { + "fields": [ + { + "name": "datarepo_row_id", + "namespace": "non_schema_orphan_table", + "type": "string" + }, + { + "name": "non_schema_column", + "namespace": "non_schema_orphan_table", + "type": "string" + }, + { + "name": "version", + "namespace": "non_schema_orphan_table", + "type": "string" + } + ], + "name": "non_schema_orphan_table", + "type": "record" } ] }, diff --git a/test/service/test_manifest.py b/test/service/test_manifest.py index dac94047db..6a53214ce2 100644 --- a/test/service/test_manifest.py +++ b/test/service/test_manifest.py @@ -2122,10 +2122,37 @@ def hash_entities(entities: dict[EntityReference, JSON]) -> dict[str, JSON]: return all_entities_by_hash.values(), linked_entities_by_hash.values() def test_verbatim_pfb_manifest(self): - response = self._get_manifest(ManifestFormat.verbatim_pfb, filters={}) - self.assertEqual(200, response.status_code) with open(self._data_path('service') / 'verbatim/anvil/pfb_schema.json') as f: expected_schema = json.load(f) with open(self._data_path('service') / 'verbatim/anvil/pfb_entities.json') as f: expected_entities = json.load(f) - self._assert_pfb(expected_schema, expected_entities, response) + + def test(expected_schema, expected_entities, filters): + response = self._get_manifest(ManifestFormat.verbatim_pfb, filters) + self.assertEqual(200, response.status_code) + self._assert_pfb(expected_schema, expected_entities, response) + + with self.subTest(orphans=True): + test(expected_schema, expected_entities, filters={ + 'datasets.dataset_id': {'is': ['52ee7665-7033-63f2-a8d9-ce8e32666739']} + }) + + with self.subTest(orphans=False): + # Dynamically edit out references to the orphaned entities that are + # only expected when filtering by dataset ID + schemas = one( + field['type'] + for field in expected_schema['fields'] + if field['name'] == 'object' + ) + # The first AVRO record is the PFB schema, or 'metadata entity' in PFB terms. + metadata_entity = expected_entities[0]['object']['nodes'] + for part in [ + schemas, + metadata_entity, + expected_entities + ]: + filtered = [e for e in part if e['name'] != 'non_schema_orphan_table'] + assert len(filtered) < len(part), 'Expected to filter orphan references' + part[:] = filtered + test(expected_schema, expected_entities, filters={})