From f653520a73f018fcf253b0429e5d3fd7f312acb0 Mon Sep 17 00:00:00 2001 From: Thomas Chopitea Date: Thu, 27 Jun 2024 14:44:00 +0000 Subject: [PATCH 01/20] Move from ID to UUID, make ID optional --- core/database_arango.py | 4 +- core/schemas/dfiq.py | 50 +++++--- core/web/apiv2/dfiq.py | 13 +- tests/apiv2/dfiq.py | 112 +++++++++++++----- tests/dfiq_test_data/DFIQ_Scenario_no_id.yaml | 13 ++ tests/dfiq_test_data/F1005.yaml | 4 +- tests/dfiq_test_data/Q1020.10.yaml | 5 +- .../Q1020.10_no_indicators.yaml | 7 +- tests/dfiq_test_data/Q1020.10_no_parent.yaml | 61 ++++++++++ tests/dfiq_test_data/Q1020.yaml | 4 +- tests/dfiq_test_data/Q1020_no_parents.yaml | 11 ++ tests/dfiq_test_data/Q1020_uuid_parent.yaml | 12 ++ .../Q1020_uuid_scenario_parent.yaml | 12 ++ tests/dfiq_test_data/S1003.yaml | 4 +- tests/schemas/dfiq.py | 25 +++- 15 files changed, 272 insertions(+), 65 deletions(-) create mode 100644 tests/dfiq_test_data/DFIQ_Scenario_no_id.yaml create mode 100644 tests/dfiq_test_data/Q1020.10_no_parent.yaml create mode 100644 tests/dfiq_test_data/Q1020_no_parents.yaml create mode 100644 tests/dfiq_test_data/Q1020_uuid_parent.yaml create mode 100644 tests/dfiq_test_data/Q1020_uuid_scenario_parent.yaml diff --git a/core/database_arango.py b/core/database_arango.py index e00765606..9f44d62b9 100644 --- a/core/database_arango.py +++ b/core/database_arango.py @@ -115,9 +115,7 @@ def connect( self.db.collection("indicators").add_persistent_index( fields=["name", "type"], unique=True ) - self.db.collection("dfiq").add_persistent_index( - fields=["dfiq_id", "type"], unique=True - ) + self.db.collection("dfiq").add_persistent_index(fields=["uuid"], unique=True) def clear(self, truncate=True): if not self.db: diff --git a/core/schemas/dfiq.py b/core/schemas/dfiq.py index 00078628d..532aa60b7 100644 --- a/core/schemas/dfiq.py +++ b/core/schemas/dfiq.py @@ -102,7 +102,8 @@ class DFIQBase(YetiModel, database_arango.ArangoYetiConnector): _root_type: Literal["dfiq"] = "dfiq" name: str = Field(min_length=1) - dfiq_id: str = Field(min_length=1) + uuid: str # = Field(default_factory=lambda: str(uuid.uuid4())) + dfiq_id: str | None = None dfiq_version: str = Field(min_length=1) dfiq_tags: list[str] | None = None contributors: list[str] | None = None @@ -142,7 +143,7 @@ def parse_yaml(cls, yaml_string: str) -> dict[str, Any]: if "id" not in yaml_data: raise ValueError(f"Invalid DIFQ YAML (missing 'id' attribute): {yaml_data}") - if not re.match("^\d+\.\d+\.\d+$", str(yaml_data.get("dfiq_version", ""))): + if not re.match(r"^\d+\.\d+\.\d+$", str(yaml_data.get("dfiq_version", ""))): raise ValueError(f"Invalid DFIQ version: {yaml_data['dfiq_version']}") return yaml_data @@ -156,27 +157,31 @@ def to_yaml(self) -> str: dump = self.model_dump( exclude={"created", "modified", "id", "root_type", "dfiq_yaml"} ) - dump.pop("internal") dump["type"] = dump["type"].removeprefix("DFIQType.") dump["display_name"] = dump.pop("name") dump["tags"] = dump.pop("dfiq_tags") dump["id"] = dump.pop("dfiq_id") + dump["uuid"] = dump.pop("uuid") if dump["contributors"] is None: dump.pop("contributors") return yaml.dump(dump) def update_parents(self) -> None: intended_parent_ids = None - if hasattr(self, "parent_ids"): + if getattr(self, "parent_ids", []): intended_parent_ids = self.parent_ids - elif self.type == DFIQType.approach: - intended_parent_ids = [self.dfiq_id.split(".")[0]] + elif self.type == DFIQType.approach and self.parent_id: + intended_parent_ids = [self.parent_id] else: return - intended_parents = [ - DFIQBase.find(dfiq_id=parent_id) for parent_id in intended_parent_ids - ] + intended_parents = [] + for parent_id in intended_parent_ids: + parent = DFIQBase.find(dfiq_id=parent_id) + if not parent: + parent = DFIQBase.find(uuid=parent_id) + intended_parents.append(parent) + if not all(intended_parents): raise ValueError( f"Missing parent(s) {intended_parent_ids} for {self.dfiq_id}" @@ -190,7 +195,9 @@ def update_parents(self) -> None: continue if rel.target != self.extended_id: continue - if vertices[rel.source].dfiq_id not in intended_parent_ids: + if ( + vertices[rel.source].dfiq_id and vertices[rel.source].uuid + ) not in intended_parent_ids: rel.delete() for parent in intended_parents: @@ -209,19 +216,20 @@ def from_yaml(cls: Type["DFIQScenario"], yaml_string: str) -> "DFIQScenario": if yaml_data["type"] != "scenario": raise ValueError(f"Invalid type for DFIQ scenario: {yaml_data['type']}") # use re.match to check that DFIQ Ids for scenarios start with S[0-1]\d+ - if not re.match(r"^S[0-1]\d+$", yaml_data["id"] or ""): + if yaml_data.get("id") and not re.match(r"^S[0-1]\d+$", yaml_data["id"] or ""): raise ValueError( f"Invalid DFIQ ID for scenario: {yaml_data['id']}. Must be in the format S[0-1]\d+" ) return cls( name=yaml_data["display_name"], description=yaml_data["description"], + uuid=yaml_data["uuid"], dfiq_id=yaml_data["id"], dfiq_version=yaml_data["dfiq_version"], dfiq_tags=yaml_data.get("tags"), contributors=yaml_data.get("contributors"), dfiq_yaml=yaml_string, - internal=yaml_data["id"][1] == "0", + internal=yaml_data.get("internal", True), ) @@ -237,7 +245,7 @@ def from_yaml(cls: Type["DFIQFacet"], yaml_string: str) -> "DFIQFacet": yaml_data = cls.parse_yaml(yaml_string) if yaml_data["type"] != "facet": raise ValueError(f"Invalid type for DFIQ facet: {yaml_data['type']}") - if not re.match(r"^F[0-1]\d+$", yaml_data["id"] or ""): + if yaml_data.get("id") and not re.match(r"^F[0-1]\d+$", yaml_data["id"] or ""): raise ValueError( f"Invalid DFIQ ID for facet: {yaml_data['id']}. Must be in the format F[0-1]\d+" ) @@ -245,13 +253,14 @@ def from_yaml(cls: Type["DFIQFacet"], yaml_string: str) -> "DFIQFacet": return cls( name=yaml_data["display_name"], description=yaml_data.get("description"), + uuid=yaml_data["uuid"], dfiq_id=yaml_data["id"], dfiq_version=yaml_data["dfiq_version"], dfiq_tags=yaml_data.get("tags"), contributors=yaml_data.get("contributors"), parent_ids=yaml_data["parent_ids"], dfiq_yaml=yaml_string, - internal=yaml_data["id"][1] == "0", + internal=yaml_data.get("internal", True), ) @@ -267,7 +276,7 @@ def from_yaml(cls: Type["DFIQQuestion"], yaml_string: str) -> "DFIQQuestion": yaml_data = cls.parse_yaml(yaml_string) if yaml_data["type"] != "question": raise ValueError(f"Invalid type for DFIQ question: {yaml_data['type']}") - if not re.match(r"^Q[0-1]\d+$", yaml_data["id"] or ""): + if yaml_data.get("id") and not re.match(r"^Q[0-1]\d+$", yaml_data["id"] or ""): raise ValueError( f"Invalid DFIQ ID for question: {yaml_data['id']}. Must be in the format Q[0-1]\d+" ) @@ -275,13 +284,14 @@ def from_yaml(cls: Type["DFIQQuestion"], yaml_string: str) -> "DFIQQuestion": return cls( name=yaml_data["display_name"], description=yaml_data.get("description"), + uuid=yaml_data["uuid"], dfiq_id=yaml_data["id"], dfiq_version=yaml_data["dfiq_version"], dfiq_tags=yaml_data.get("tags"), contributors=yaml_data.get("contributors"), parent_ids=yaml_data["parent_ids"], dfiq_yaml=yaml_string, - internal=yaml_data["id"][1] == "0", + internal=yaml_data.get("internal", True), ) @@ -336,13 +346,14 @@ class DFIQApproach(DFIQBase): description: DFIQApproachDescription view: DFIQApproachView type: Literal[DFIQType.approach] = DFIQType.approach + parent_id: str | None = None @classmethod def from_yaml(cls: Type["DFIQApproach"], yaml_string: str) -> "DFIQApproach": yaml_data = cls.parse_yaml(yaml_string) if yaml_data["type"] != "approach": raise ValueError(f"Invalid type for DFIQ approach: {yaml_data['type']}") - if not re.match(r"^Q[0-1]\d+\.\d+$", yaml_data["id"]): + if yaml_data.get("id") and not re.match(r"^Q[0-1]\d+\.\d+$", yaml_data["id"]): raise ValueError( f"Invalid DFIQ ID for approach: {yaml_data['id']}. Must be in the format Q[0-1]\d+.\d+" ) @@ -355,17 +366,18 @@ def from_yaml(cls: Type["DFIQApproach"], yaml_string: str) -> "DFIQApproach": f"Invalid DFIQ view for approach (has to be an object): {yaml_data['view']}" ) - internal = bool(re.match(r"^Q[0-1]\d+\.0\d+$", yaml_data["id"])) return cls( name=yaml_data["display_name"], description=DFIQApproachDescription(**yaml_data["description"]), view=DFIQApproachView(**yaml_data["view"]), + uuid=yaml_data["uuid"], dfiq_id=yaml_data["id"], dfiq_version=yaml_data["dfiq_version"], dfiq_tags=yaml_data.get("tags"), + parent_id=yaml_data.get("parent_id"), contributors=yaml_data.get("contributors"), dfiq_yaml=yaml_string, - internal=internal, + internal=yaml_data.get("internal", True), ) diff --git a/core/web/apiv2/dfiq.py b/core/web/apiv2/dfiq.py index 444ec47f5..d69fddbaf 100644 --- a/core/web/apiv2/dfiq.py +++ b/core/web/apiv2/dfiq.py @@ -77,13 +77,20 @@ async def new_from_yaml(request: NewDFIQRequest) -> dfiq.DFIQTypes: except ValueError as error: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(error)) - # Ensure there is not an object with the same ID: - if dfiq.DFIQBase.find(dfiq_id=new.dfiq_id): + # Ensure there is not an object with the same ID or UUID + + if new.dfiq_id and dfiq.DFIQBase.find(dfiq_id=new.dfiq_id): raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=f"DFIQ with id {new.dfiq_id} already exists", ) + if dfiq.DFIQBase.find(uuid=new.uuid): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"DFIQ with uuid {new.uuid} already exists", + ) + new = new.save() try: @@ -109,7 +116,7 @@ async def validate_dfiq_yaml(request: DFIQValidateRequest) -> DFIQValidateRespon except KeyError as error: return DFIQValidateResponse(valid=False, error=f"Invalid DFIQ type: {error}") - if request.check_id and dfiq.DFIQBase.find(dfiq_id=obj.dfiq_id): + if request.check_id and obj.dfiq_id and dfiq.DFIQBase.find(dfiq_id=obj.dfiq_id): return DFIQValidateResponse( valid=False, error=f"DFIQ with id {obj.dfiq_id} already exists" ) diff --git a/tests/apiv2/dfiq.py b/tests/apiv2/dfiq.py index 57078bb45..b0c34c5c6 100644 --- a/tests/apiv2/dfiq.py +++ b/tests/apiv2/dfiq.py @@ -40,7 +40,7 @@ def test_new_dfiq_scenario(self) -> None: self.assertIsNotNone(data["created"]) self.assertEqual(data["name"], "scenario1") self.assertEqual(data["dfiq_id"], "S1003") - self.assertEqual(data["dfiq_version"], "1.0.0") + self.assertEqual(data["dfiq_version"], "1.1.0") self.assertEqual(data["description"], "Long description 1\n") self.assertEqual(data["type"], dfiq.DFIQType.scenario) self.assertEqual(data["dfiq_tags"], ["Tag1", "Tag2", "Tag3"]) @@ -49,7 +49,8 @@ def test_new_dfiq_facet(self) -> None: scenario = dfiq.DFIQScenario( name="mock_scenario", dfiq_id="S1003", - dfiq_version="1.0.0", + uuid="fake_scenario_uuid", + dfiq_version="1.1.0", description="desc", dfiq_yaml="mock", ).save() @@ -70,7 +71,7 @@ def test_new_dfiq_facet(self) -> None: self.assertIsNotNone(data["created"]) self.assertEqual(data["name"], "facet1") self.assertEqual(data["dfiq_id"], "F1005") - self.assertEqual(data["dfiq_version"], "1.0.0") + self.assertEqual(data["dfiq_version"], "1.1.0") self.assertEqual(data["description"], "Long description of facet1\n") self.assertEqual(data["type"], dfiq.DFIQType.facet) self.assertEqual(data["dfiq_tags"], ["Web Browser"]) @@ -87,7 +88,8 @@ def test_new_dfiq_question(self) -> None: facet = dfiq.DFIQFacet( name="mock_facet", dfiq_id="F1005", - dfiq_version="1.0.0", + uuid="fake_facet_uuid", + dfiq_version="1.1.0", description="desc", parent_ids=["S1003"], dfiq_yaml="mock", @@ -109,7 +111,7 @@ def test_new_dfiq_question(self) -> None: self.assertIsNotNone(data["created"]) self.assertEqual(data["name"], "What is a question?") self.assertEqual(data["dfiq_id"], "Q1020") - self.assertEqual(data["dfiq_version"], "1.0.0") + self.assertEqual(data["dfiq_version"], "1.1.0") self.assertEqual(data["description"], None) self.assertEqual(data["type"], dfiq.DFIQType.question) self.assertEqual(data["dfiq_tags"], ["Web Browser"]) @@ -126,7 +128,8 @@ def test_new_dfiq_approach(self) -> None: question = dfiq.DFIQQuestion( name="mock_question", dfiq_id="Q1020", - dfiq_version="1.0.0", + uuid="fake_question_uuid", + dfiq_version="1.1.0", description="desc", parent_ids=["F1005"], dfiq_yaml="mock", @@ -148,7 +151,7 @@ def test_new_dfiq_approach(self) -> None: self.assertIsNotNone(data["created"]) self.assertEqual(data["name"], "Approach1") self.assertEqual(data["dfiq_id"], "Q1020.10") - self.assertEqual(data["dfiq_version"], "1.0.0") + self.assertEqual(data["dfiq_version"], "1.1.0") self.assertEqual(data["description"]["summary"], "Description for approach") self.assertEqual(data["type"], dfiq.DFIQType.approach) self.assertEqual(data["dfiq_tags"], ["Lots", "Of", "Tags"]) @@ -164,7 +167,8 @@ def test_dfiq_patch_updates_parents(self) -> None: scenario1 = dfiq.DFIQScenario( name="mock_scenario", dfiq_id="S1003", - dfiq_version="1.0.0", + uuid="fake_scenario_uuid1", + dfiq_version="1.1.0", description="desc", dfiq_yaml="mock", ).save() @@ -172,7 +176,8 @@ def test_dfiq_patch_updates_parents(self) -> None: scenario2 = dfiq.DFIQScenario( name="mock_scenario2", dfiq_id="S1222", - dfiq_version="1.0.0", + uuid="fake_scenario_uuid2", + dfiq_version="1.1.0", description="desc", dfiq_yaml="mock", ).save() @@ -180,7 +185,8 @@ def test_dfiq_patch_updates_parents(self) -> None: facet = dfiq.DFIQFacet( name="mock_facet", dfiq_id="F1005", - dfiq_version="1.0.0", + uuid="fake_facet_uuid", + dfiq_version="1.1.0", description="desc", parent_ids=["S1003"], dfiq_yaml="mock", @@ -215,7 +221,8 @@ def test_dfiq_patch_approach_updates_parents(self) -> None: dfiq.DFIQScenario( name="mock_scenario", dfiq_id="S1003", - dfiq_version="1.0.0", + uuid="fake_scenario_uuid", + dfiq_version="1.1.0", description="desc", dfiq_yaml="mock", ).save() @@ -223,25 +230,28 @@ def test_dfiq_patch_approach_updates_parents(self) -> None: dfiq.DFIQFacet( name="mock_facet", dfiq_id="F1005", - dfiq_version="1.0.0", + uuid="fake_facet_uuid", + dfiq_version="1.1.0", description="desc", - parent_ids=["S1003"], + parent_ids=["fake_scenario_uuid"], dfiq_yaml="mock", ).save() question1 = dfiq.DFIQQuestion( name="mock_question", dfiq_id="Q1020", - dfiq_version="1.0.0", + uuid="bd46ce6e-c933-46e5-960c-36945aaef401", + dfiq_version="1.1.0", description="desc", - parent_ids=["F1005"], + parent_ids=["fake_facet_uuid"], dfiq_yaml="mock", ).save() question2 = dfiq.DFIQQuestion( name="mock_question2", + uuid="fake_question_uuid_2", dfiq_id="Q1022", - dfiq_version="1.0.0", + dfiq_version="1.1.0", description="desc", parent_ids=["F1005"], dfiq_yaml="mock", @@ -259,7 +269,7 @@ def test_dfiq_patch_approach_updates_parents(self) -> None: self.assertEqual(edges[0][0].description, "Uses DFIQ approach") self.assertEqual(total, 1) - approach.dfiq_id = "Q1022.10" + approach.parent_id = "fake_question_uuid_2" response = client.patch( f"/api/v2/dfiq/{approach.id}", json={ @@ -270,12 +280,14 @@ def test_dfiq_patch_approach_updates_parents(self) -> None: ) data = response.json() self.assertEqual(response.status_code, 200, data) - self.assertEqual(data["dfiq_id"], "Q1022.10") + self.assertEqual(data["dfiq_id"], "Q1020.10") self.assertEqual(data["id"], approach.id) vertices, edges, total = approach.neighbors() self.assertEqual(len(vertices), 1) self.assertEqual(vertices[f"dfiq/{question2.id}"].dfiq_id, "Q1022") + self.assertEqual(vertices[f"dfiq/{question2.id}"].uuid, "fake_question_uuid_2") + self.assertEqual(edges[0][0].type, "approach") self.assertEqual(edges[0][0].description, "Uses DFIQ approach") self.assertEqual(total, 1) @@ -283,16 +295,16 @@ def test_dfiq_patch_approach_updates_parents(self) -> None: def test_dfiq_patch_approach_updates_indicators(self) -> None: dfiq.DFIQScenario( name="mock_scenario", - dfiq_id="S1003", - dfiq_version="1.0.0", + uuid="fake_scenario_uuid", + dfiq_version="1.1.0", description="desc", dfiq_yaml="mock", ).save() dfiq.DFIQFacet( name="mock_facet", - dfiq_id="F1005", - dfiq_version="1.0.0", + uuid="fake_facet_uuid", + dfiq_version="1.1.0", description="desc", parent_ids=["S1003"], dfiq_yaml="mock", @@ -300,8 +312,8 @@ def test_dfiq_patch_approach_updates_indicators(self) -> None: dfiq.DFIQQuestion( name="mock_question", - dfiq_id="Q1020", - dfiq_version="1.0.0", + uuid="bd46ce6e-c933-46e5-960c-36945aaef401", + dfiq_version="1.1.0", description="desc", parent_ids=["F1005"], dfiq_yaml="mock", @@ -350,16 +362,16 @@ def test_dfiq_patch_approach_updates_indicators(self) -> None: def test_dfiq_post_approach(self): dfiq.DFIQScenario( name="mock_scenario", - dfiq_id="S1003", - dfiq_version="1.0.0", + uuid="fake_scenario_uuid", + dfiq_version="1.1.0", description="desc", dfiq_yaml="mock", ).save() dfiq.DFIQFacet( name="mock_facet", - dfiq_id="F1005", - dfiq_version="1.0.0", + uuid="fake_facet_uuid", + dfiq_version="1.1.0", description="desc", parent_ids=["S1003"], dfiq_yaml="mock", @@ -367,8 +379,8 @@ def test_dfiq_post_approach(self): dfiq.DFIQQuestion( name="mock_question", - dfiq_id="Q1020", - dfiq_version="1.0.0", + uuid="bd46ce6e-c933-46e5-960c-36945aaef401", + dfiq_version="1.1.0", description="desc", parent_ids=["F1005"], dfiq_yaml="mock", @@ -393,6 +405,7 @@ def test_dfiq_post_approach(self): self.assertEqual(len(vertices), 1) approach.delete() + # Repeat the action, updating indicators response = client.post( "/api/v2/dfiq/from_yaml", json={ @@ -436,7 +449,12 @@ def test_wrong_parent_approach(self) -> None: ) data = response.json() self.assertEqual(response.status_code, 400, data) - self.assertEqual(data, {"detail": "Missing parent(s) ['Q1020'] for Q1020.10"}) + self.assertEqual( + data, + { + "detail": "Missing parent(s) ['bd46ce6e-c933-46e5-960c-36945aaef401'] for Q1020.10" + }, + ) def test_valid_dfiq_yaml(self) -> None: with open("tests/dfiq_test_data/S1003.yaml", "r") as f: @@ -499,6 +517,38 @@ def test_valid_dfiq_yaml(self) -> None: self.assertEqual(response.status_code, 200, data) self.assertEqual(data["valid"], True) + def test_standalone_question_creation(self): + with open("tests/dfiq_test_data/Q1020_no_parents.yaml", "r") as f: + yaml_string = f.read() + + response = client.post( + "/api/v2/dfiq/from_yaml", + json={ + "dfiq_yaml": yaml_string, + "dfiq_type": dfiq.DFIQType.question, + }, + ) + data = response.json() + self.assertEqual(response.status_code, 200, data) + self.assertIsNotNone(data["id"]) + self.assertEquals(data["parent_ids"], []) + + def test_standalone_approach_creation(self): + with open("tests/dfiq_test_data/Q1020.10_no_parent.yaml", "r") as f: + yaml_string = f.read() + + response = client.post( + "/api/v2/dfiq/from_yaml", + json={ + "dfiq_yaml": yaml_string, + "dfiq_type": dfiq.DFIQType.approach, + }, + ) + data = response.json() + self.assertEqual(response.status_code, 200, data) + self.assertIsNotNone(data["id"]) + self.assertIsNone(data["parent_id"]) + def test_upload_dfiq_archive(self): zip_archive = open("tests/dfiq_test_data/dfiq_test_data.zip", "rb") response = client.post( diff --git a/tests/dfiq_test_data/DFIQ_Scenario_no_id.yaml b/tests/dfiq_test_data/DFIQ_Scenario_no_id.yaml new file mode 100644 index 000000000..9b3cb8ef4 --- /dev/null +++ b/tests/dfiq_test_data/DFIQ_Scenario_no_id.yaml @@ -0,0 +1,13 @@ +--- +display_name: scenario1 +type: scenario +description: > + Long description 1 +id: +uuid: 2ee16263-56f8-49a5-9b33-d1a2dd8b829c +internal: false +dfiq_version: 1.1.0 +tags: + - Tag1 + - Tag2 + - Tag3 diff --git a/tests/dfiq_test_data/F1005.yaml b/tests/dfiq_test_data/F1005.yaml index 4ea3a0f71..ee5de55f6 100644 --- a/tests/dfiq_test_data/F1005.yaml +++ b/tests/dfiq_test_data/F1005.yaml @@ -4,7 +4,9 @@ type: facet description: > Long description of facet1 id: F1005 -dfiq_version: 1.0.0 +uuid: b2bab31f-1670-4297-8cb1-685747a13468 +internal: false +dfiq_version: 1.1.0 tags: - Web Browser parent_ids: diff --git a/tests/dfiq_test_data/Q1020.10.yaml b/tests/dfiq_test_data/Q1020.10.yaml index 530022870..1553108a7 100644 --- a/tests/dfiq_test_data/Q1020.10.yaml +++ b/tests/dfiq_test_data/Q1020.10.yaml @@ -2,7 +2,10 @@ display_name: Approach1 type: approach id: Q1020.10 -dfiq_version: 1.0.0 +uuid: 292500f7-9d54-40ca-8254-34821e9b5c4e +parent_id: bd46ce6e-c933-46e5-960c-36945aaef401 +internal: false +dfiq_version: 1.1.0 tags: - Lots - Of diff --git a/tests/dfiq_test_data/Q1020.10_no_indicators.yaml b/tests/dfiq_test_data/Q1020.10_no_indicators.yaml index 293f05efe..edd37f8be 100644 --- a/tests/dfiq_test_data/Q1020.10_no_indicators.yaml +++ b/tests/dfiq_test_data/Q1020.10_no_indicators.yaml @@ -2,13 +2,16 @@ display_name: Approach1 type: approach id: Q1020.10 -dfiq_version: 1.0.0 +uuid: fcbdb313-424a-436e-a877-130aeba3f134 +parent_id: bd46ce6e-c933-46e5-960c-36945aaef401 +internal: false +dfiq_version: 1.1.0 tags: - Lots - Of - Tags description: - summary: Description for approach + summary: Descripion for approach details: > Details for approach references: diff --git a/tests/dfiq_test_data/Q1020.10_no_parent.yaml b/tests/dfiq_test_data/Q1020.10_no_parent.yaml new file mode 100644 index 000000000..66f3dbcf0 --- /dev/null +++ b/tests/dfiq_test_data/Q1020.10_no_parent.yaml @@ -0,0 +1,61 @@ +--- +display_name: Approach1 +type: approach +id: Q1020.10 +uuid: 292500f7-9d54-40ca-8254-34821e9b5c4e +parent_id: +internal: false +dfiq_version: 1.1.0 +tags: + - Lots + - Of + - Tags +description: + summary: Description for approach + details: > + Details for approach + references: + - "ref1" + - "ref2" + references_internal: null +view: + data: + - type: artifact + value: RandomArtifact + - type: description + value: Random description + notes: + covered: + - Covered1 + - Covered2 + - Covered3 + not_covered: + - Not covered1 + - Not covered2 + processors: + - name: processor1 + options: + - type: parsers + value: parser1option + analysis: + - name: OpenSearch + steps: + - description: random parser description + type: opensearch-query + value: data_type:("fs:stat") + - name: Python Notebook + steps: + - description: random step description + type: pandas + value: query('data_type in ("fs:stat")') + - name: processor2 + options: + - type: format + value: jsonl + analysis: + - name: analysis1 + steps: + - description: &filter-desc-processor2 > + something else + type: opensearch-query + value: data_type:"chrome:history:page_visited") diff --git a/tests/dfiq_test_data/Q1020.yaml b/tests/dfiq_test_data/Q1020.yaml index b92ded8b9..5878cc4f4 100644 --- a/tests/dfiq_test_data/Q1020.yaml +++ b/tests/dfiq_test_data/Q1020.yaml @@ -3,7 +3,9 @@ display_name: What is a question? type: question description: id: Q1020 -dfiq_version: 1.0.0 +uuid: bd46ce6e-c933-46e5-960c-36945aaef401 +internal: false +dfiq_version: 1.1.0 tags: - Web Browser parent_ids: diff --git a/tests/dfiq_test_data/Q1020_no_parents.yaml b/tests/dfiq_test_data/Q1020_no_parents.yaml new file mode 100644 index 000000000..df8c2d5ca --- /dev/null +++ b/tests/dfiq_test_data/Q1020_no_parents.yaml @@ -0,0 +1,11 @@ +--- +display_name: What is a question? +type: question +description: +id: Q1020 +uuid: bd46ce6e-c933-46e5-960c-36945aaef401 +internal: false +dfiq_version: 1.1.0 +tags: + - Web Browser +parent_ids: [] diff --git a/tests/dfiq_test_data/Q1020_uuid_parent.yaml b/tests/dfiq_test_data/Q1020_uuid_parent.yaml new file mode 100644 index 000000000..23a576f74 --- /dev/null +++ b/tests/dfiq_test_data/Q1020_uuid_parent.yaml @@ -0,0 +1,12 @@ +--- +display_name: What is a question? +type: question +description: +id: Q1020 +uuid: bd46ce6e-c933-46e5-960c-36945aaef401 +internal: false +dfiq_version: 1.1.0 +tags: + - Web Browser +parent_ids: + - b2bab31f-1670-4297-8cb1-685747a13468 diff --git a/tests/dfiq_test_data/Q1020_uuid_scenario_parent.yaml b/tests/dfiq_test_data/Q1020_uuid_scenario_parent.yaml new file mode 100644 index 000000000..baf0371bf --- /dev/null +++ b/tests/dfiq_test_data/Q1020_uuid_scenario_parent.yaml @@ -0,0 +1,12 @@ +--- +display_name: What is a question? +type: question +description: +id: Q1020 +uuid: bd46ce6e-c933-46e5-960c-36945aaef401 +internal: false +dfiq_version: 1.1.0 +tags: + - Web Browser +parent_ids: + - S1003 diff --git a/tests/dfiq_test_data/S1003.yaml b/tests/dfiq_test_data/S1003.yaml index 6cac4c2b3..155690ecd 100644 --- a/tests/dfiq_test_data/S1003.yaml +++ b/tests/dfiq_test_data/S1003.yaml @@ -4,7 +4,9 @@ type: scenario description: > Long description 1 id: S1003 -dfiq_version: 1.0.0 +uuid: 2ee16263-56f8-49a5-9b33-d1a2dd8b829c +internal: false +dfiq_version: 1.1.0 tags: - Tag1 - Tag2 diff --git a/tests/schemas/dfiq.py b/tests/schemas/dfiq.py index dce5a0fba..736c0a341 100644 --- a/tests/schemas/dfiq.py +++ b/tests/schemas/dfiq.py @@ -28,7 +28,22 @@ def test_dfiq_scenario(self) -> None: self.assertIsNotNone(result.id) self.assertIsNotNone(result.created) self.assertEqual(result.name, "scenario1") - self.assertEqual(result.dfiq_version, "1.0.0") + self.assertEqual(result.dfiq_version, "1.1.0") + self.assertEqual(str(result.uuid), "2ee16263-56f8-49a5-9b33-d1a2dd8b829c") + self.assertEqual(result.description, "Long description 1\n") + self.assertEqual(result.type, DFIQType.scenario) + self.assertEqual(result.dfiq_tags, ["Tag1", "Tag2", "Tag3"]) + + def test_dfiq_scenario_no_id(self) -> None: + with open("tests/dfiq_test_data/DFIQ_Scenario_no_id.yaml", "r") as f: + yaml_string = f.read() + + result = DFIQScenario.from_yaml(yaml_string).save() + self.assertIsNotNone(result.id) + self.assertIsNotNone(result.created) + self.assertEqual(result.name, "scenario1") + self.assertEqual(result.dfiq_version, "1.1.0") + self.assertEqual(str(result.uuid), "2ee16263-56f8-49a5-9b33-d1a2dd8b829c") self.assertEqual(result.description, "Long description 1\n") self.assertEqual(result.type, DFIQType.scenario) self.assertEqual(result.dfiq_tags, ["Tag1", "Tag2", "Tag3"]) @@ -44,7 +59,8 @@ def test_dfiq_facet(self) -> None: self.assertEqual(result.name, "facet1") self.assertEqual(result.description, "Long description of facet1\n") self.assertEqual(result.dfiq_id, "F1005") - self.assertEqual(result.dfiq_version, "1.0.0") + self.assertEqual(result.dfiq_version, "1.1.0") + self.assertEqual(str(result.uuid), "b2bab31f-1670-4297-8cb1-685747a13468") self.assertEqual(result.dfiq_tags, ["Web Browser"]) self.assertEqual(result.parent_ids, ["S1003"]) self.assertEqual(result.type, DFIQType.facet) @@ -59,8 +75,9 @@ def test_dfiq_question(self) -> None: self.assertIsNotNone(result.created) self.assertEqual(result.name, "What is a question?") self.assertEqual(result.description, None) + self.assertEqual(str(result.uuid), "bd46ce6e-c933-46e5-960c-36945aaef401") self.assertEqual(result.dfiq_id, "Q1020") - self.assertEqual(result.dfiq_version, "1.0.0") + self.assertEqual(result.dfiq_version, "1.1.0") self.assertEqual(result.dfiq_tags, ["Web Browser"]) self.assertEqual(result.parent_ids, ["F1005"]) self.assertEqual(result.type, DFIQType.question) @@ -72,6 +89,8 @@ def test_dfiq_approach(self) -> None: result = DFIQApproach.from_yaml(yaml_string).save() self.assertIsNotNone(result.id) + self.assertEquals(result.uuid, "292500f7-9d54-40ca-8254-34821e9b5c4e") + self.assertEquals(result.parent_id, "bd46ce6e-c933-46e5-960c-36945aaef401") self.assertIsNotNone(result.created) self.assertEqual(result.name, "Approach1") self.assertEqual(result.description.summary, "Description for approach") From c69b6b545176087ad3b7189c0bdc65743ddf3a5a Mon Sep 17 00:00:00 2001 From: Thomas Chopitea Date: Thu, 27 Jun 2024 15:10:52 +0000 Subject: [PATCH 02/20] Remove approach summaries --- core/schemas/dfiq.py | 3 +-- tests/dfiq_test_data/Q1020.10.yaml | 1 - .../Q1020.10_no_indicators.yaml | 1 - tests/dfiq_test_data/Q1020.10_no_parent.yaml | 1 - tests/dfiq_test_data/dfiq_test_data.zip | Bin 1697 -> 1586 bytes tests/schemas/dfiq.py | 5 ++--- 6 files changed, 3 insertions(+), 8 deletions(-) diff --git a/core/schemas/dfiq.py b/core/schemas/dfiq.py index 532aa60b7..a4ebcb9c1 100644 --- a/core/schemas/dfiq.py +++ b/core/schemas/dfiq.py @@ -323,8 +323,7 @@ class DFIQProcessors(BaseModel): class DFIQApproachDescription(BaseModel): - summary: str = Field(min_length=1) - details: str = Field(min_length=1) + details: str = "" references: list[str] = [] references_internal: list[str] | None = None diff --git a/tests/dfiq_test_data/Q1020.10.yaml b/tests/dfiq_test_data/Q1020.10.yaml index 1553108a7..c3612db80 100644 --- a/tests/dfiq_test_data/Q1020.10.yaml +++ b/tests/dfiq_test_data/Q1020.10.yaml @@ -11,7 +11,6 @@ tags: - Of - Tags description: - summary: Description for approach details: > Details for approach references: diff --git a/tests/dfiq_test_data/Q1020.10_no_indicators.yaml b/tests/dfiq_test_data/Q1020.10_no_indicators.yaml index edd37f8be..3d0b53f70 100644 --- a/tests/dfiq_test_data/Q1020.10_no_indicators.yaml +++ b/tests/dfiq_test_data/Q1020.10_no_indicators.yaml @@ -11,7 +11,6 @@ tags: - Of - Tags description: - summary: Descripion for approach details: > Details for approach references: diff --git a/tests/dfiq_test_data/Q1020.10_no_parent.yaml b/tests/dfiq_test_data/Q1020.10_no_parent.yaml index 66f3dbcf0..8a401fa38 100644 --- a/tests/dfiq_test_data/Q1020.10_no_parent.yaml +++ b/tests/dfiq_test_data/Q1020.10_no_parent.yaml @@ -11,7 +11,6 @@ tags: - Of - Tags description: - summary: Description for approach details: > Details for approach references: diff --git a/tests/dfiq_test_data/dfiq_test_data.zip b/tests/dfiq_test_data/dfiq_test_data.zip index e70e59183c6724de8c6acee48d77aaad5b892843..bbe302982b842de565ded14504c3cedbf9fa27b1 100644 GIT binary patch literal 1586 zcmWIWW@Zs#U|`^2SW$S|a&WfsPlq!yRNrzDmn>bn^l7?|o+ zCg$dZhHx@4FRrgmV``{PE3M#WU}OPmVE_}peH-}>8SuE2*SFRygj#J#Nk~XYO7HP` z6f#YL(~s-b{l!6A8}Ht|JolUgb70io%a4q0ZzL`{P}@>D?T2yQ)eF;i1$W$O@_HMz z>9DdwoY%Y$`bQ^yTUh$cCaSt&#*>%vYu-jnvs*{JFfnD%_;AYHCbQB!Y{il}yQlA1 zC}uI)>t}!0u2p}4D<{Q5FTp)dJ2oj)`s}@ z&o&U)`&rvfX4h_ZC69c&kf_rB(CzW%PYwy}VV(VTM#!Jvd~Cwg6kON7@;1ufH}8Ir z=Oh&^(b8U-BHd__4>wA$omg~E$-E@z^v%8y%?H`j=+#OR_u;u$wg~@7>wEDMGPW=R&#s@1s1aA5U2v%YRg~R5nUinsHbD ztQTuOe4i2RrS9Atss1`dmHWA4!{(eSxBZ^6v%VF+<>p)+{qAYbpIbW)>@v_%pO$pa z>%n7|l;{ULSnpLgcs~s{wUZX-O0Rwp(OsJ2dE$oazRXlHn-r<@vz)%3o?GeK5uSK( zy0w4bnKe%*==^qgn`EZpJp5pc~g8z(m%$h#!>yi1#{`?ZZmi)l?oa}M0Tj{U#3+uNF zuq{nI`tx04{j%fH#!tfgZouwJqx=E}6yT~QN_|HQs}!uff#($fu3ZvJOPO){2E!a3Ez1icu0Mt~$4ct*gH zUQBxo`I-%QT+8Pl66fHO-5m0Y*UHdM&v5Nhvz8kHg}=?5a^AnTt$i+|a`DqL+q7Q= z2Ohe%E%Mm0W{q0$zNWWN^Uv{imaYksVxJ)L;ok4=_R`#%u#LVRlXVIouUdO_-{-mP zi?SlSs5wTdUdG2Sy8PfPz?0ReC)mn24-txf~nuP$B^r= zfdI?z>Z7#|ZsO%ypPEWM_4&NT*gKcJNNKK@m&qwGwdUS^OoC(4*R!+F|LNo_?V34T zq-o#PZs!gw)0MGaWfkm4+)r}q>k4@k{7}hukC>gQ}hwK5~j7)OOxQZtUpl=u$fcctXNh64br?6s$6jo>j6|(V|Z;NfKmYg8&H!KvJITf7#=`QWW*Vd8qdhaU&k~aIo5F*&&mc0SQa3x2Zr-paPbZR DL54ne literal 1697 zcmWIWW@h1H00EN~p%GvPl;CHOVMs~KEQ~KnEiQ>qNi0d!4-MgDVBVZ^Emdl}Pg-dO zHv=OJNHLfQ02(9$t>7C{aWq0d-F~p<#g=6LFNv9bZ7&KWKfIh(plt4oRBLh7{1A^XJ8{*wR z+dyF7XKgo`UAx(pJo4>AqDuQix39OJbx2?j>+G)hrpzn)3Y zR`FaERD3;QzI2Y~!Q|_f-L7+8_q{)I>||fb$_Lrg-d&EJ=9aoN{z?GH}*xns<2qbFDvr1pGsU#Qn-3vd$G=mxYEHJ#W$X3YIyDT8 zem7)T|F$jL^7ip9bM-aL?ws2BLHc$2wN_Efl_9}lO!DpuyIHw#P1@i^&U=hL5LE#+)iJNbHIOue9qpuC=woY1?+U82=r zJ{}f*sK0mp(@S0RzFfW)|Nl@G$DWfrCr3QVU6$DKdv~ir2DA3La{k-ackka?xo4x; z->+WrH)cJ1rj_phEqvCV)g{XBjU*2|@hPqP`}*)U)%fgUhO8O?PA_`As4OV->a4v} z-v_m`pYorzb$L+Pe|FRa67`VXya<@kyRavANCF|0*iAdF`3@QIIF!#nB<}H4VPlG8 zQbPJG3CW}bLQ67p9QN9VbiA!~;t05MYwq-zpNgEvrL9*fKK>(8Jogn}P3Ha2}}(18TxVw!w@UhYgTqN}Tbi fNe0>Y-wLv$BC=ixmj307GCqxLyGO0^U?m diff --git a/tests/schemas/dfiq.py b/tests/schemas/dfiq.py index 736c0a341..646d44fa7 100644 --- a/tests/schemas/dfiq.py +++ b/tests/schemas/dfiq.py @@ -89,11 +89,10 @@ def test_dfiq_approach(self) -> None: result = DFIQApproach.from_yaml(yaml_string).save() self.assertIsNotNone(result.id) - self.assertEquals(result.uuid, "292500f7-9d54-40ca-8254-34821e9b5c4e") - self.assertEquals(result.parent_id, "bd46ce6e-c933-46e5-960c-36945aaef401") + self.assertEqual(result.uuid, "292500f7-9d54-40ca-8254-34821e9b5c4e") + self.assertEqual(result.parent_id, "bd46ce6e-c933-46e5-960c-36945aaef401") self.assertIsNotNone(result.created) self.assertEqual(result.name, "Approach1") - self.assertEqual(result.description.summary, "Description for approach") self.assertEqual(result.description.details, "Details for approach\n") self.assertEqual(result.description.references, ["ref1", "ref2"]) From 4a40f1eb4466afd6ea7eab3937819853b4cc90d2 Mon Sep 17 00:00:00 2001 From: Thomas Chopitea Date: Thu, 27 Jun 2024 15:11:08 +0000 Subject: [PATCH 03/20] Change deprecated test function --- tests/apiv2/dfiq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/apiv2/dfiq.py b/tests/apiv2/dfiq.py index b0c34c5c6..da2f388c3 100644 --- a/tests/apiv2/dfiq.py +++ b/tests/apiv2/dfiq.py @@ -531,7 +531,7 @@ def test_standalone_question_creation(self): data = response.json() self.assertEqual(response.status_code, 200, data) self.assertIsNotNone(data["id"]) - self.assertEquals(data["parent_ids"], []) + self.assertEqual(data["parent_ids"], []) def test_standalone_approach_creation(self): with open("tests/dfiq_test_data/Q1020.10_no_parent.yaml", "r") as f: From 73decd3bd3c247f29dd1dff79b79601c534f23df Mon Sep 17 00:00:00 2001 From: Thomas Chopitea Date: Fri, 16 Aug 2024 09:05:23 +0000 Subject: [PATCH 04/20] New Config endpoint for DFIQ to compute data dynamically --- core/web/apiv2/dfiq.py | 26 ++++++++++++++++++++++++++ tests/apiv2/dfiq.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/core/web/apiv2/dfiq.py b/core/web/apiv2/dfiq.py index 742515d53..c3d484903 100644 --- a/core/web/apiv2/dfiq.py +++ b/core/web/apiv2/dfiq.py @@ -56,10 +56,36 @@ class DFIQSearchResponse(BaseModel): total: int +class DFIQConfigResponse(BaseModel): + model_config = ConfigDict(extra="forbid") + + approach_data_sources: list[str] + approach_analysis_step_types: list[str] + + # API endpoints router = APIRouter() +@router.get("/config") +async def config() -> DFIQConfigResponse: + all_approaches = dfiq.DFIQApproach.list() + + data_sources = set() + analysis_step_types = set() + + for approach in all_approaches: + data_sources.update({data.type for data in approach.view.data}) + for processor in approach.view.processors: + for analysis in processor.analysis: + analysis_step_types.update({step.type for step in analysis.steps}) + + return DFIQConfigResponse( + approach_data_sources=sorted(list(data_sources)), + approach_analysis_step_types=sorted(list(analysis_step_types)), + ) + + @router.post("/from_archive") async def from_archive(archive: UploadFile) -> dict[str, int]: """Uncompresses a ZIP archive and processes the DFIQ content inside it.""" diff --git a/tests/apiv2/dfiq.py b/tests/apiv2/dfiq.py index 65dc3da1f..ebea8a800 100644 --- a/tests/apiv2/dfiq.py +++ b/tests/apiv2/dfiq.py @@ -25,6 +25,38 @@ def setUp(self) -> None: ).json() client.headers = {"Authorization": "Bearer " + token_data["access_token"]} + def test_config(self) -> None: + dfiq.DFIQQuestion( + name="mock_question", + dfiq_id="Q1020", + uuid="bd46ce6e-c933-46e5-960c-36945aaef401", + dfiq_version="1.1.0", + description="desc", + parent_ids=["F1005"], + dfiq_yaml="mock", + ).save() + + with open("tests/dfiq_test_data/Q1020.10.yaml", "r") as f: + yaml_string = f.read() + + response = client.post( + "/api/v2/dfiq/from_yaml", + json={ + "dfiq_yaml": yaml_string, + "dfiq_type": dfiq.DFIQType.approach, + }, + ) + self.assertEqual(response.status_code, 200, response.json()) + + response = client.get("/api/v2/dfiq/config") + data = response.json() + + self.assertEqual(response.status_code, 200, data) + self.assertEqual(data["approach_data_sources"], ["artifact", "description"]) + self.assertEqual( + data["approach_analysis_step_types"], ["opensearch-query", "pandas"] + ) + def test_new_dfiq_scenario(self) -> None: with open("tests/dfiq_test_data/S1003.yaml", "r") as f: yaml_string = f.read() From de35d3c1f91d89dab05dd0adfd4154bef816b070 Mon Sep 17 00:00:00 2001 From: Thomas Chopitea Date: Fri, 16 Aug 2024 09:05:33 +0000 Subject: [PATCH 05/20] update feed to tomchop's 1.1 for while changes are merged --- plugins/feeds/public/dfiq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/feeds/public/dfiq.py b/plugins/feeds/public/dfiq.py index 61c5c7a00..95caddae1 100644 --- a/plugins/feeds/public/dfiq.py +++ b/plugins/feeds/public/dfiq.py @@ -19,7 +19,7 @@ class DFIQFeed(task.FeedTask): def run(self): response = self._make_request( - "https://github.com/google/dfiq/archive/refs/heads/main.zip" + "https://github.com/tomchop/dfiq/archive/refs/heads/dfiq1.1.zip" ) if not response: logging.info("No response: skipping DFIQ update") From 7937dbf859860128ba7706bfd1c317b319bef667 Mon Sep 17 00:00:00 2001 From: Thomas Chopitea Date: Fri, 16 Aug 2024 11:34:29 +0000 Subject: [PATCH 06/20] Fix tests --- tests/apiv2/dfiq.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/apiv2/dfiq.py b/tests/apiv2/dfiq.py index ebea8a800..898f87dd1 100644 --- a/tests/apiv2/dfiq.py +++ b/tests/apiv2/dfiq.py @@ -162,7 +162,7 @@ def test_new_dfiq_approach(self) -> None: question = dfiq.DFIQQuestion( name="mock_question", dfiq_id="Q1020", - uuid="fake_question_uuid", + uuid="bd46ce6e-c933-46e5-960c-36945aaef401", dfiq_version="1.1.0", description="desc", parent_ids=["F1005"], @@ -186,7 +186,7 @@ def test_new_dfiq_approach(self) -> None: self.assertEqual(data["name"], "Approach1") self.assertEqual(data["dfiq_id"], "Q1020.10") self.assertEqual(data["dfiq_version"], "1.1.0") - self.assertEqual(data["description"]["summary"], "Description for approach") + self.assertEqual(data["description"]["details"], "Details for approach\n") self.assertEqual(data["type"], dfiq.DFIQType.approach) self.assertEqual(data["dfiq_tags"], ["Lots", "Of", "Tags"]) @@ -596,6 +596,7 @@ def test_upload_dfiq_archive(self): def test_to_archive(self): dfiq.DFIQScenario( name="public_scenario", + uuid="test_scenario_uuid", dfiq_id="S1003", dfiq_version="1.0.0", description="desc", @@ -605,6 +606,7 @@ def test_to_archive(self): dfiq.DFIQScenario( name="private_scenario", + uuid="test_private_scenario_uuid", dfiq_id="S0003", dfiq_version="1.0.0", description="desc", @@ -614,6 +616,7 @@ def test_to_archive(self): dfiq.DFIQQuestion( name="mock_question", + uuid="test_question_uuid", dfiq_id="Q1020", dfiq_version="1.0.0", description="desc", From 505fd8e6d52231c0f14acb360b0bf6c0dc207bbb Mon Sep 17 00:00:00 2001 From: Thomas Chopitea Date: Fri, 16 Aug 2024 11:41:18 +0000 Subject: [PATCH 07/20] Add in DFIQ feed to move back to the main ref once 1.1 has been merged --- plugins/feeds/public/dfiq.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/plugins/feeds/public/dfiq.py b/plugins/feeds/public/dfiq.py index 95caddae1..0c614ebb2 100644 --- a/plugins/feeds/public/dfiq.py +++ b/plugins/feeds/public/dfiq.py @@ -18,6 +18,8 @@ class DFIQFeed(task.FeedTask): } def run(self): + # move back to "https://github.com/google/dfiq/archive/refs/heads/main.zip" + # once the changes have been merged. response = self._make_request( "https://github.com/tomchop/dfiq/archive/refs/heads/dfiq1.1.zip" ) From 2ef88ec4076f6ca2db311100559cad564fbb0e86 Mon Sep 17 00:00:00 2001 From: Thomas Chopitea Date: Fri, 16 Aug 2024 15:35:28 +0000 Subject: [PATCH 08/20] Rebuild indexes upon server restart --- core/database_arango.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/core/database_arango.py b/core/database_arango.py index 37de574b9..ee51dbe98 100644 --- a/core/database_arango.py +++ b/core/database_arango.py @@ -105,6 +105,15 @@ def connect( ], }, ) + + for collection_data in self.db.collections(): + if collection_data["system"]: + continue + collection = self.db.collection(collection_data["name"]) + for index in collection.indexes(): + if index["type"] == "persistent": + collection.delete_index(index["id"]) + self.db.collection("observables").add_persistent_index( fields=["value", "type"], unique=True ) @@ -115,7 +124,9 @@ def connect( self.db.collection("indicators").add_persistent_index( fields=["name", "type"], unique=True ) - self.db.collection("dfiq").add_persistent_index(fields=["uuid"], unique=True) + self.db.collection("dfiq").add_persistent_index( + fields=["uuid"], unique=True, sparse=True + ) def clear(self, truncate=True): if not self.db: From 9d28c8a193652f2a4dd6b0625f898e8e8bc6ff10 Mon Sep 17 00:00:00 2001 From: Thomas Chopitea Date: Fri, 16 Aug 2024 15:35:41 +0000 Subject: [PATCH 09/20] Mechanism to gracefully upgrade DFIQ schemas --- core/schemas/dfiq.py | 61 +++++++++++++++++++++++++++++++++--- plugins/feeds/public/dfiq.py | 7 +++++ 2 files changed, 63 insertions(+), 5 deletions(-) diff --git a/core/schemas/dfiq.py b/core/schemas/dfiq.py index a4ebcb9c1..8c0e6527e 100644 --- a/core/schemas/dfiq.py +++ b/core/schemas/dfiq.py @@ -2,10 +2,12 @@ import logging import os import re +import uuid from enum import Enum from typing import Annotated, Any, ClassVar, Literal, Type, Union import yaml +from packaging.version import Version from pydantic import BaseModel, Field, computed_field from core import database_arango @@ -13,6 +15,35 @@ from core.schemas import indicator from core.schemas.model import YetiModel +LATEST_SUPPORTED_DFIQ_VERSION = "1.1.0" + + +def upgrade_dfiq_schema(existing, new=None): + if Version("1.0.0") <= Version(existing.dfiq_version) < Version("1.1.0"): + if new: + existing.uuid = new.uuid + existing.internal = new.internal + if existing.type == DFIQType.approach: + existing.parent_id = new.parent_id + existing.description.details = new.description.details + else: + dfiq_yaml = yaml.safe_load(existing.dfiq_yaml) + existing.uuid = str(uuid.uuid4()) + if existing.type == DFIQType.approach: + question_id, approach_id = existing.dfiq_id.split(".") + existing.internal = approach_id[0] == "0" + summary = dfiq_yaml["description"]["summary"] + details = dfiq_yaml["description"]["details"] + existing.description.details = f"{summary}\n\n{details}" + existing.parent_id = question_id + else: + existing.internal = existing.dfiq_id[1] == "0" + + existing.dfiq_version = "1.1.0" + + existing.dfiq_yaml = existing.to_yaml() + return existing.save() + def read_from_data_directory(directory: str, overwrite: bool = False) -> int: """Read DFIQ files from a directory and add them to the database. @@ -35,11 +66,31 @@ def read_from_data_directory(directory: str, overwrite: bool = False) -> int: try: dfiq_object = DFIQBase.from_yaml(f.read()) if not overwrite: - db_dfiq = DFIQBase.find(dfiq_id=dfiq_object.dfiq_id) + if dfiq_object.uuid: + db_dfiq = DFIQBase.find(uuid=dfiq_object.uuid) + if not db_dfiq and dfiq_object.dfiq_id: + db_dfiq = DFIQBase.find(dfiq_id=dfiq_object.dfiq_id) if db_dfiq: - logging.info( - "DFIQ %s already exists, skipping", dfiq_object.dfiq_id - ) + incoming_v = Version(dfiq_object.dfiq_version) + if incoming_v > Version(LATEST_SUPPORTED_DFIQ_VERSION): + logging.warning( + "DFIQ %s has unsupported version %s, skipping", + dfiq_object.dfiq_id, + dfiq_object.dfiq_version, + ) + continue + db_v = Version(db_dfiq.dfiq_version) + if incoming_v <= db_v: + logging.info( + "DFIQ %s already exists, skipping", + dfiq_object.dfiq_id, + ) + else: + logging.info( + "DFIQ %s already exists, but version is newer, updating", + dfiq_object.dfiq_id, + ) + upgrade_dfiq_schema(db_dfiq, new=dfiq_object) continue dfiq_object = dfiq_object.save() total_added += 1 @@ -102,7 +153,7 @@ class DFIQBase(YetiModel, database_arango.ArangoYetiConnector): _root_type: Literal["dfiq"] = "dfiq" name: str = Field(min_length=1) - uuid: str # = Field(default_factory=lambda: str(uuid.uuid4())) + uuid: str | None = None dfiq_id: str | None = None dfiq_version: str = Field(min_length=1) dfiq_tags: list[str] | None = None diff --git a/plugins/feeds/public/dfiq.py b/plugins/feeds/public/dfiq.py index 0c614ebb2..ca21bf780 100644 --- a/plugins/feeds/public/dfiq.py +++ b/plugins/feeds/public/dfiq.py @@ -17,6 +17,10 @@ class DFIQFeed(task.FeedTask): "description": "DFIQ feed", } + def upgrade_existing_dfiq_schema(self): + for dfiq_object in dfiq.DFIQBase.list(): + dfiq.upgrade_dfiq_schema(dfiq_object) + def run(self): # move back to "https://github.com/google/dfiq/archive/refs/heads/main.zip" # once the changes have been merged. @@ -38,5 +42,8 @@ def run(self): logging.info("Processing extra directory %s", directory) dfiq.read_from_data_directory(directory) + logging.info("Upgrading schema of remaining DFIQ objects") + self.upgrade_existing_dfiq_schema() + taskmanager.TaskManager.register_task(DFIQFeed) From 7de8b80d2cfc2b217370ced3cc6935e60d6f9ec0 Mon Sep 17 00:00:00 2001 From: Thomas Chopitea Date: Wed, 21 Aug 2024 11:57:45 +0000 Subject: [PATCH 10/20] update test files --- tests/dfiq_test_data/DFIQ_Scenario_no_id.yaml | 3 +- tests/dfiq_test_data/F1005.yaml | 3 +- tests/dfiq_test_data/Q1020.10.yaml | 60 ------------------- .../Q1020.10_no_indicators.yaml | 33 ---------- tests/dfiq_test_data/Q1020.10_no_parent.yaml | 60 ------------------- tests/dfiq_test_data/Q1020.yaml | 26 +++++++- tests/dfiq_test_data/Q1020_no_indicators.yaml | 12 ++++ tests/dfiq_test_data/Q1020_no_parents.yaml | 3 +- tests/dfiq_test_data/Q1020_uuid_parent.yaml | 3 +- .../Q1020_uuid_scenario_parent.yaml | 3 +- tests/dfiq_test_data/S1003.yaml | 3 +- 11 files changed, 42 insertions(+), 167 deletions(-) delete mode 100644 tests/dfiq_test_data/Q1020.10.yaml delete mode 100644 tests/dfiq_test_data/Q1020.10_no_indicators.yaml delete mode 100644 tests/dfiq_test_data/Q1020.10_no_parent.yaml create mode 100644 tests/dfiq_test_data/Q1020_no_indicators.yaml diff --git a/tests/dfiq_test_data/DFIQ_Scenario_no_id.yaml b/tests/dfiq_test_data/DFIQ_Scenario_no_id.yaml index 9b3cb8ef4..21158b71c 100644 --- a/tests/dfiq_test_data/DFIQ_Scenario_no_id.yaml +++ b/tests/dfiq_test_data/DFIQ_Scenario_no_id.yaml @@ -1,11 +1,10 @@ --- -display_name: scenario1 +name: scenario1 type: scenario description: > Long description 1 id: uuid: 2ee16263-56f8-49a5-9b33-d1a2dd8b829c -internal: false dfiq_version: 1.1.0 tags: - Tag1 diff --git a/tests/dfiq_test_data/F1005.yaml b/tests/dfiq_test_data/F1005.yaml index ee5de55f6..2d07ef3de 100644 --- a/tests/dfiq_test_data/F1005.yaml +++ b/tests/dfiq_test_data/F1005.yaml @@ -1,11 +1,10 @@ --- -display_name: facet1 +name: facet1 type: facet description: > Long description of facet1 id: F1005 uuid: b2bab31f-1670-4297-8cb1-685747a13468 -internal: false dfiq_version: 1.1.0 tags: - Web Browser diff --git a/tests/dfiq_test_data/Q1020.10.yaml b/tests/dfiq_test_data/Q1020.10.yaml deleted file mode 100644 index c3612db80..000000000 --- a/tests/dfiq_test_data/Q1020.10.yaml +++ /dev/null @@ -1,60 +0,0 @@ ---- -display_name: Approach1 -type: approach -id: Q1020.10 -uuid: 292500f7-9d54-40ca-8254-34821e9b5c4e -parent_id: bd46ce6e-c933-46e5-960c-36945aaef401 -internal: false -dfiq_version: 1.1.0 -tags: - - Lots - - Of - - Tags -description: - details: > - Details for approach - references: - - "ref1" - - "ref2" - references_internal: null -view: - data: - - type: artifact - value: RandomArtifact - - type: description - value: Random description - notes: - covered: - - Covered1 - - Covered2 - - Covered3 - not_covered: - - Not covered1 - - Not covered2 - processors: - - name: processor1 - options: - - type: parsers - value: parser1option - analysis: - - name: OpenSearch - steps: - - description: random parser description - type: opensearch-query - value: data_type:("fs:stat") - - name: Python Notebook - steps: - - description: random step description - type: pandas - value: query('data_type in ("fs:stat")') - - name: processor2 - options: - - type: format - value: jsonl - analysis: - - name: analysis1 - steps: - - description: &filter-desc-processor2 > - something else - type: opensearch-query - value: data_type:"chrome:history:page_visited") diff --git a/tests/dfiq_test_data/Q1020.10_no_indicators.yaml b/tests/dfiq_test_data/Q1020.10_no_indicators.yaml deleted file mode 100644 index 3d0b53f70..000000000 --- a/tests/dfiq_test_data/Q1020.10_no_indicators.yaml +++ /dev/null @@ -1,33 +0,0 @@ ---- -display_name: Approach1 -type: approach -id: Q1020.10 -uuid: fcbdb313-424a-436e-a877-130aeba3f134 -parent_id: bd46ce6e-c933-46e5-960c-36945aaef401 -internal: false -dfiq_version: 1.1.0 -tags: - - Lots - - Of - - Tags -description: - details: > - Details for approach - references: - - "ref1" - - "ref2" - references_internal: null -view: - data: - - type: artifact - value: RandomArtifact - - type: description - value: Random description - notes: - covered: - - Covered1 - - Covered2 - - Covered3 - not_covered: - - Not covered1 - - Not covered2 diff --git a/tests/dfiq_test_data/Q1020.10_no_parent.yaml b/tests/dfiq_test_data/Q1020.10_no_parent.yaml deleted file mode 100644 index 8a401fa38..000000000 --- a/tests/dfiq_test_data/Q1020.10_no_parent.yaml +++ /dev/null @@ -1,60 +0,0 @@ ---- -display_name: Approach1 -type: approach -id: Q1020.10 -uuid: 292500f7-9d54-40ca-8254-34821e9b5c4e -parent_id: -internal: false -dfiq_version: 1.1.0 -tags: - - Lots - - Of - - Tags -description: - details: > - Details for approach - references: - - "ref1" - - "ref2" - references_internal: null -view: - data: - - type: artifact - value: RandomArtifact - - type: description - value: Random description - notes: - covered: - - Covered1 - - Covered2 - - Covered3 - not_covered: - - Not covered1 - - Not covered2 - processors: - - name: processor1 - options: - - type: parsers - value: parser1option - analysis: - - name: OpenSearch - steps: - - description: random parser description - type: opensearch-query - value: data_type:("fs:stat") - - name: Python Notebook - steps: - - description: random step description - type: pandas - value: query('data_type in ("fs:stat")') - - name: processor2 - options: - - type: format - value: jsonl - analysis: - - name: analysis1 - steps: - - description: &filter-desc-processor2 > - something else - type: opensearch-query - value: data_type:"chrome:history:page_visited") diff --git a/tests/dfiq_test_data/Q1020.yaml b/tests/dfiq_test_data/Q1020.yaml index 5878cc4f4..baf457a18 100644 --- a/tests/dfiq_test_data/Q1020.yaml +++ b/tests/dfiq_test_data/Q1020.yaml @@ -1,12 +1,34 @@ --- -display_name: What is a question? +name: What is a question? + type: question description: id: Q1020 uuid: bd46ce6e-c933-46e5-960c-36945aaef401 -internal: false dfiq_version: 1.1.0 tags: - Web Browser parent_ids: - F1005 +approaches: + - name: Approach 1 + description: blah + notes: + references: [] + tags: [] + steps: + - name: step1 + description: step1 description + stage: collection + type: ForensicArtifact + value: NTFSUSNJournal + - name: Run a query + description: null + stage: analysis + type: opensearch-query + value: data_type:"fs:ntfs:usn_change" RANDOM_QUERY + - name: Run another query + description: null + stage: analysis + type: opensearch-query-second + value: data_type:"fs:ntfs:usn_change" ANOTHER_QUERY diff --git a/tests/dfiq_test_data/Q1020_no_indicators.yaml b/tests/dfiq_test_data/Q1020_no_indicators.yaml new file mode 100644 index 000000000..475f183a2 --- /dev/null +++ b/tests/dfiq_test_data/Q1020_no_indicators.yaml @@ -0,0 +1,12 @@ +--- +approaches: [] +name: What is a question? +type: question +description: desc +id: Q1020 +uuid: bd46ce6e-c933-46e5-960c-36945aaef401 +dfiq_version: 1.1.0 +tags: + - Web Browser +parent_ids: + - F1005 diff --git a/tests/dfiq_test_data/Q1020_no_parents.yaml b/tests/dfiq_test_data/Q1020_no_parents.yaml index df8c2d5ca..74e39dbf2 100644 --- a/tests/dfiq_test_data/Q1020_no_parents.yaml +++ b/tests/dfiq_test_data/Q1020_no_parents.yaml @@ -1,10 +1,9 @@ --- -display_name: What is a question? +name: What is a question? type: question description: id: Q1020 uuid: bd46ce6e-c933-46e5-960c-36945aaef401 -internal: false dfiq_version: 1.1.0 tags: - Web Browser diff --git a/tests/dfiq_test_data/Q1020_uuid_parent.yaml b/tests/dfiq_test_data/Q1020_uuid_parent.yaml index 23a576f74..69dcb1bcb 100644 --- a/tests/dfiq_test_data/Q1020_uuid_parent.yaml +++ b/tests/dfiq_test_data/Q1020_uuid_parent.yaml @@ -1,10 +1,9 @@ --- -display_name: What is a question? +name: What is a question? type: question description: id: Q1020 uuid: bd46ce6e-c933-46e5-960c-36945aaef401 -internal: false dfiq_version: 1.1.0 tags: - Web Browser diff --git a/tests/dfiq_test_data/Q1020_uuid_scenario_parent.yaml b/tests/dfiq_test_data/Q1020_uuid_scenario_parent.yaml index baf0371bf..79ef2b4e0 100644 --- a/tests/dfiq_test_data/Q1020_uuid_scenario_parent.yaml +++ b/tests/dfiq_test_data/Q1020_uuid_scenario_parent.yaml @@ -1,10 +1,9 @@ --- -display_name: What is a question? +name: What is a question? type: question description: id: Q1020 uuid: bd46ce6e-c933-46e5-960c-36945aaef401 -internal: false dfiq_version: 1.1.0 tags: - Web Browser diff --git a/tests/dfiq_test_data/S1003.yaml b/tests/dfiq_test_data/S1003.yaml index 155690ecd..845123319 100644 --- a/tests/dfiq_test_data/S1003.yaml +++ b/tests/dfiq_test_data/S1003.yaml @@ -1,11 +1,10 @@ --- -display_name: scenario1 +name: scenario1 type: scenario description: > Long description 1 id: S1003 uuid: 2ee16263-56f8-49a5-9b33-d1a2dd8b829c -internal: false dfiq_version: 1.1.0 tags: - Tag1 From 78ea252579e05d71d3a95ad8fe93b9a043d1bb18 Mon Sep 17 00:00:00 2001 From: Thomas Chopitea Date: Wed, 21 Aug 2024 11:58:38 +0000 Subject: [PATCH 11/20] Update DFIQ schema and API to account for new spec --- core/schemas/dfiq.py | 280 +++++++++++++++-------------------------- core/web/apiv2/dfiq.py | 74 ++++++++--- 2 files changed, 155 insertions(+), 199 deletions(-) diff --git a/core/schemas/dfiq.py b/core/schemas/dfiq.py index 8c0e6527e..de713e132 100644 --- a/core/schemas/dfiq.py +++ b/core/schemas/dfiq.py @@ -1,8 +1,7 @@ import datetime +import glob import logging -import os import re -import uuid from enum import Enum from typing import Annotated, Any, ClassVar, Literal, Type, Union @@ -18,31 +17,20 @@ LATEST_SUPPORTED_DFIQ_VERSION = "1.1.0" -def upgrade_dfiq_schema(existing, new=None): - if Version("1.0.0") <= Version(existing.dfiq_version) < Version("1.1.0"): - if new: - existing.uuid = new.uuid - existing.internal = new.internal - if existing.type == DFIQType.approach: - existing.parent_id = new.parent_id - existing.description.details = new.description.details - else: - dfiq_yaml = yaml.safe_load(existing.dfiq_yaml) - existing.uuid = str(uuid.uuid4()) - if existing.type == DFIQType.approach: - question_id, approach_id = existing.dfiq_id.split(".") - existing.internal = approach_id[0] == "0" - summary = dfiq_yaml["description"]["summary"] - details = dfiq_yaml["description"]["details"] - existing.description.details = f"{summary}\n\n{details}" - existing.parent_id = question_id - else: - existing.internal = existing.dfiq_id[1] == "0" +def long_text_representer(dumper, data): + if "1. " in data or "\n" in data: + return dumper.represent_scalar("tag:yaml.org,2002:str", data, style=">") + else: + return dumper.represent_scalar("tag:yaml.org,2002:str", data) + + +def custom_null_representer(dumper, data): + # Represent 'None' as an empty string + return dumper.represent_scalar("tag:yaml.org,2002:null", "") - existing.dfiq_version = "1.1.0" - existing.dfiq_yaml = existing.to_yaml() - return existing.save() +yaml.add_representer(str, long_text_representer) +yaml.add_representer(type(None), custom_null_representer) def read_from_data_directory(directory: str, overwrite: bool = False) -> int: @@ -54,97 +42,90 @@ def read_from_data_directory(directory: str, overwrite: bool = False) -> int: """ dfiq_kb = {} total_added = 0 - for root, _, files in os.walk(directory): - for file in files: - if not file.endswith(".yaml"): - continue - if "spec" in file or "template" in file: - # Don't process DIFQ specification files - continue - logging.debug("Processing %s/%s", root, file) - with open(os.path.join(root, file), "r") as f: - try: - dfiq_object = DFIQBase.from_yaml(f.read()) - if not overwrite: - if dfiq_object.uuid: - db_dfiq = DFIQBase.find(uuid=dfiq_object.uuid) - if not db_dfiq and dfiq_object.dfiq_id: - db_dfiq = DFIQBase.find(dfiq_id=dfiq_object.dfiq_id) - if db_dfiq: - incoming_v = Version(dfiq_object.dfiq_version) - if incoming_v > Version(LATEST_SUPPORTED_DFIQ_VERSION): - logging.warning( - "DFIQ %s has unsupported version %s, skipping", - dfiq_object.dfiq_id, - dfiq_object.dfiq_version, - ) - continue - db_v = Version(db_dfiq.dfiq_version) - if incoming_v <= db_v: - logging.info( - "DFIQ %s already exists, skipping", - dfiq_object.dfiq_id, - ) - else: - logging.info( - "DFIQ %s already exists, but version is newer, updating", - dfiq_object.dfiq_id, - ) - upgrade_dfiq_schema(db_dfiq, new=dfiq_object) + for file in glob.glob(directory): + if not file.endswith(".yaml"): + continue + logging.debug("Processing %s", file) + with open(file, "r") as f: + try: + dfiq_object = DFIQBase.from_yaml(f.read()) + if not overwrite: + if dfiq_object.uuid: + db_dfiq = DFIQBase.find(uuid=dfiq_object.uuid) + if not db_dfiq and dfiq_object.dfiq_id: + db_dfiq = DFIQBase.find(dfiq_id=dfiq_object.dfiq_id) + if db_dfiq: + incoming_v = Version(dfiq_object.dfiq_version) + if incoming_v > Version(LATEST_SUPPORTED_DFIQ_VERSION): + logging.warning( + "DFIQ %s has unsupported version %s, skipping", + dfiq_object.dfiq_id, + dfiq_object.dfiq_version, + ) continue - dfiq_object = dfiq_object.save() - total_added += 1 - except (ValueError, KeyError) as e: - logging.warning("Error processing %s: %s", file, e) - continue - - dfiq_kb[dfiq_object.dfiq_id] = dfiq_object + db_v = Version(db_dfiq.dfiq_version) + if incoming_v <= db_v: + logging.info( + "DFIQ %s already exists, skipping", + dfiq_object.dfiq_id, + ) + continue + dfiq_object = dfiq_object.save() + total_added += 1 + except (ValueError, KeyError) as e: + logging.warning("Error processing %s: %s", file, e) + raise e + + dfiq_kb[dfiq_object.dfiq_id] = dfiq_object for dfiq_id, dfiq_object in dfiq_kb.items(): dfiq_object.update_parents() - if dfiq_object.type == DFIQType.approach: + if dfiq_object.type == DFIQType.question: extract_indicators(dfiq_object) return total_added -def extract_indicators(approach) -> None: - for processor in approach.view.processors: - for analysis in processor.analysis: - for step in analysis.steps: - if step.type == "manual": +def extract_indicators(question: "DFIQQuestion") -> None: + for approach in question.approaches: + for step in approach.steps: + if step.type == "manual": + continue + + if step.type in ("ForensicArtifact", "artifact"): + artifact = indicator.ForensicArtifact.find(name=step.value) + if not artifact: + logging.warning( + "Missing artifact %s in %s", step.value, question.dfiq_id + ) continue + question.link_to(artifact, "artifact", "Uses artifact") + continue + elif "query" in step.type: query = indicator.Query.find(pattern=step.value) if not query: query = indicator.Query( - name=f"{step.description} ({step.type})", + name=f"{step.name} ({step.type})", + description=step.description or "", pattern=step.value, - relevant_tags=approach.dfiq_tags or [], + relevant_tags=[t.lower() for t in approach.tags] or [], query_type=step.type, location=step.type, diamond=indicator.DiamondModel.victim, ).save() - approach.link_to(query, "query", "Uses query") + question.link_to(query, "query", "Uses query") - for data in approach.view.data: - if data.type in ("ForensicArtifact", "artifact"): - artifact = indicator.ForensicArtifact.find(name=data.value) - if not artifact: + else: logging.warning( - "Missing artifact %s in %s", data.value, approach.dfiq_id + "Unknown step type %s in %s", step.type, question.dfiq_id ) - continue - approach.link_to(artifact, "artifact", "Uses artifact") - else: - logging.warning("Unknown data type %s in %s", data.type, approach.dfiq_id) class DFIQType(str, Enum): scenario = "scenario" facet = "facet" question = "question" - approach = "approach" class DFIQBase(YetiModel, database_arango.ArangoYetiConnector): @@ -159,7 +140,6 @@ class DFIQBase(YetiModel, database_arango.ArangoYetiConnector): dfiq_tags: list[str] | None = None contributors: list[str] | None = None dfiq_yaml: str = Field(min_length=1) - internal: bool = False created: datetime.datetime = Field(default_factory=now) modified: datetime.datetime = Field(default_factory=now) @@ -204,25 +184,30 @@ def from_yaml(cls, yaml_string: str) -> "DFIQBase": yaml_data = yaml.safe_load(yaml_string) return TYPE_MAPPING[yaml_data["type"]].from_yaml(yaml_string) - def to_yaml(self) -> str: + def to_yaml(self, sort_keys=False) -> str: dump = self.model_dump( exclude={"created", "modified", "id", "root_type", "dfiq_yaml"} ) dump["type"] = dump["type"].removeprefix("DFIQType.") - dump["display_name"] = dump.pop("name") - dump["tags"] = dump.pop("dfiq_tags") + dump["name"] = dump.pop("name") dump["id"] = dump.pop("dfiq_id") dump["uuid"] = dump.pop("uuid") + dump["description"] = dump.pop("description") + dump["tags"] = dump.pop("dfiq_tags") if dump["contributors"] is None: dump.pop("contributors") - return yaml.dump(dump) + return yaml.dump( + dump, + default_flow_style=False, + sort_keys=sort_keys, + explicit_start=True, + indent=2, + ) def update_parents(self) -> None: intended_parent_ids = None if getattr(self, "parent_ids", []): intended_parent_ids = self.parent_ids - elif self.type == DFIQType.approach and self.parent_id: - intended_parent_ids = [self.parent_id] else: return @@ -272,7 +257,7 @@ def from_yaml(cls: Type["DFIQScenario"], yaml_string: str) -> "DFIQScenario": f"Invalid DFIQ ID for scenario: {yaml_data['id']}. Must be in the format S[0-1]\d+" ) return cls( - name=yaml_data["display_name"], + name=yaml_data["name"], description=yaml_data["description"], uuid=yaml_data["uuid"], dfiq_id=yaml_data["id"], @@ -280,7 +265,6 @@ def from_yaml(cls: Type["DFIQScenario"], yaml_string: str) -> "DFIQScenario": dfiq_tags=yaml_data.get("tags"), contributors=yaml_data.get("contributors"), dfiq_yaml=yaml_string, - internal=yaml_data.get("internal", True), ) @@ -302,7 +286,7 @@ def from_yaml(cls: Type["DFIQFacet"], yaml_string: str) -> "DFIQFacet": ) return cls( - name=yaml_data["display_name"], + name=yaml_data["name"], description=yaml_data.get("description"), uuid=yaml_data["uuid"], dfiq_id=yaml_data["id"], @@ -311,7 +295,6 @@ def from_yaml(cls: Type["DFIQFacet"], yaml_string: str) -> "DFIQFacet": contributors=yaml_data.get("contributors"), parent_ids=yaml_data["parent_ids"], dfiq_yaml=yaml_string, - internal=yaml_data.get("internal", True), ) @@ -321,6 +304,7 @@ class DFIQQuestion(DFIQBase): description: str | None parent_ids: list[str] type: Literal[DFIQType.question] = DFIQType.question + approaches: list["DFIQApproach"] = [] @classmethod def from_yaml(cls: Type["DFIQQuestion"], yaml_string: str) -> "DFIQQuestion": @@ -333,7 +317,7 @@ def from_yaml(cls: Type["DFIQQuestion"], yaml_string: str) -> "DFIQQuestion": ) return cls( - name=yaml_data["display_name"], + name=yaml_data["name"], description=yaml_data.get("description"), uuid=yaml_data["uuid"], dfiq_id=yaml_data["id"], @@ -342,108 +326,44 @@ def from_yaml(cls: Type["DFIQQuestion"], yaml_string: str) -> "DFIQQuestion": contributors=yaml_data.get("contributors"), parent_ids=yaml_data["parent_ids"], dfiq_yaml=yaml_string, - internal=yaml_data.get("internal", True), + approaches=[ + DFIQApproach(**approach) for approach in yaml_data.get("approaches", []) + ], ) -class DFIQData(BaseModel): - type: str = Field(min_length=1) - value: str = Field(min_length=1) - - -class DFIQProcessorOption(BaseModel): - type: str = Field(min_length=1) - value: str = Field(min_length=1) - - -class DFIQAnalysisStep(BaseModel): - description: str = Field(min_length=1) +class DFIQApproachStep(BaseModel): + name: str = Field(min_length=1) + description: str | None + stage: str = Field(min_length=1) type: str = Field(min_length=1) value: str = Field(min_length=1) -class DFIQAnalysis(BaseModel): - name: str = Field(min_length=1) - steps: list[DFIQAnalysisStep] = [] - - -class DFIQProcessors(BaseModel): - name: str = Field(min_length=1) - options: list[DFIQProcessorOption] = [] - analysis: list[DFIQAnalysis] = [] - - -class DFIQApproachDescription(BaseModel): - details: str = "" - references: list[str] = [] - references_internal: list[str] | None = None - - class DFIQApproachNotes(BaseModel): covered: list[str] = [] not_covered: list[str] = [] -class DFIQApproachView(BaseModel): - data: list[DFIQData] = [] - notes: DFIQApproachNotes - processors: list[DFIQProcessors] = [] - - -class DFIQApproach(DFIQBase): - _type_filter: ClassVar[str] = DFIQType.approach - - description: DFIQApproachDescription - view: DFIQApproachView - type: Literal[DFIQType.approach] = DFIQType.approach - parent_id: str | None = None - - @classmethod - def from_yaml(cls: Type["DFIQApproach"], yaml_string: str) -> "DFIQApproach": - yaml_data = cls.parse_yaml(yaml_string) - if yaml_data["type"] != "approach": - raise ValueError(f"Invalid type for DFIQ approach: {yaml_data['type']}") - if yaml_data.get("id") and not re.match(r"^Q[0-1]\d+\.\d+$", yaml_data["id"]): - raise ValueError( - f"Invalid DFIQ ID for approach: {yaml_data['id']}. Must be in the format Q[0-1]\d+.\d+" - ) - if not isinstance(yaml_data["description"], dict): - raise ValueError( - f"Invalid DFIQ description for approach (has to be an object): {yaml_data['description']}" - ) - if not isinstance(yaml_data["view"], dict): - raise ValueError( - f"Invalid DFIQ view for approach (has to be an object): {yaml_data['view']}" - ) - - return cls( - name=yaml_data["display_name"], - description=DFIQApproachDescription(**yaml_data["description"]), - view=DFIQApproachView(**yaml_data["view"]), - uuid=yaml_data["uuid"], - dfiq_id=yaml_data["id"], - dfiq_version=yaml_data["dfiq_version"], - dfiq_tags=yaml_data.get("tags"), - parent_id=yaml_data.get("parent_id"), - contributors=yaml_data.get("contributors"), - dfiq_yaml=yaml_string, - internal=yaml_data.get("internal", True), - ) +class DFIQApproach(BaseModel): + name: str = Field(min_length=1) + description: str + tags: list[str] = [] + references: list[str] = [] + notes: DFIQApproachNotes | None = None + steps: list[DFIQApproachStep] = [] TYPE_MAPPING = { "scenario": DFIQScenario, "facet": DFIQFacet, "question": DFIQQuestion, - "approach": DFIQApproach, "dfiq": DFIQBase, } DFIQTypes = Annotated[ - Union[DFIQScenario, DFIQFacet, DFIQQuestion, DFIQApproach], + Union[DFIQScenario, DFIQFacet, DFIQQuestion], Field(discriminator="type"), ] -DFIQClasses = ( - Type[DFIQScenario] | Type[DFIQFacet] | Type[DFIQQuestion] | Type[DFIQApproach] -) +DFIQClasses = Type[DFIQScenario] | Type[DFIQFacet] | Type[DFIQQuestion] diff --git a/core/web/apiv2/dfiq.py b/core/web/apiv2/dfiq.py index c3d484903..f79cea0f5 100644 --- a/core/web/apiv2/dfiq.py +++ b/core/web/apiv2/dfiq.py @@ -1,3 +1,4 @@ +import os import tempfile from io import BytesIO from zipfile import ZipFile @@ -59,8 +60,8 @@ class DFIQSearchResponse(BaseModel): class DFIQConfigResponse(BaseModel): model_config = ConfigDict(extra="forbid") - approach_data_sources: list[str] - approach_analysis_step_types: list[str] + stage_types: list[str] + step_types: list[str] # API endpoints @@ -69,20 +70,20 @@ class DFIQConfigResponse(BaseModel): @router.get("/config") async def config() -> DFIQConfigResponse: - all_approaches = dfiq.DFIQApproach.list() + all_questions = dfiq.DFIQQuestion.list() - data_sources = set() - analysis_step_types = set() + stage_types = set() + step_types = set() - for approach in all_approaches: - data_sources.update({data.type for data in approach.view.data}) - for processor in approach.view.processors: - for analysis in processor.analysis: - analysis_step_types.update({step.type for step in analysis.steps}) + for question in all_questions: + for approach in question.approaches: + for step in approach.steps: + stage_types.add(step.stage) + step_types.add(step.type) return DFIQConfigResponse( - approach_data_sources=sorted(list(data_sources)), - approach_analysis_step_types=sorted(list(analysis_step_types)), + stage_types=sorted(list(stage_types)), + step_types=sorted(list(step_types)), ) @@ -92,7 +93,7 @@ async def from_archive(archive: UploadFile) -> dict[str, int]: tempdir = tempfile.TemporaryDirectory() contents = await archive.read() ZipFile(BytesIO(contents)).extractall(path=tempdir.name) - total_added = dfiq.read_from_data_directory(tempdir.name) + total_added = dfiq.read_from_data_directory(f"{tempdir.name}/*/*.yaml") return {"total_added": total_added} @@ -149,17 +150,52 @@ async def to_archive(request: DFIQSearchRequest) -> FileResponse: ) tempdir = tempfile.TemporaryDirectory() + public_objs = [] + internal_objs = [] for obj in dfiq_objects: - with open(f"{tempdir.name}/{obj.dfiq_id}.yaml", "w") as f: + if obj.dfiq_tags and "internal" in obj.dfiq_tags: + internal_objs.append(obj) + else: + if obj.type == dfiq.DFIQType.question: + public_version = obj.model_copy() + internal_approaches = False + for approach in obj.approaches: + if "internal" in approach.tags: + internal_approaches = True + break + if internal_approaches: + public_version.approaches = [ + a for a in obj.approaches if "internal" not in a.tags + ] + public_objs.append(public_version) + internal_objs.append(obj) + else: + public_objs.append(obj) + else: + public_objs.append(obj) + + for dir_name in ["public", "internal"]: + os.makedirs(f"{tempdir.name}/{dir_name}") + + for obj in public_objs: + with open(f"{tempdir.name}/public/{obj.dfiq_id}.yaml", "w") as f: + f.write(obj.to_yaml()) + + for obj in internal_objs: + with open(f"{tempdir.name}/internal/{obj.dfiq_id}.yaml", "w") as f: f.write(obj.to_yaml()) with tempfile.NamedTemporaryFile(delete=False) as archive: with ZipFile(archive, "w") as zipf: - for obj in dfiq_objects: - subdir = "internal" if obj.internal else "public" + for obj in public_objs: zipf.write( - f"{tempdir.name}/{obj.dfiq_id}.yaml", - f"{subdir}/{obj.type}/{obj.dfiq_id}.yaml", + f"{tempdir.name}/public/{obj.dfiq_id}.yaml", + f"public/{obj.type}/{obj.dfiq_id}.yaml", + ) + for obj in internal_objs: + zipf.write( + f"{tempdir.name}/internal/{obj.dfiq_id}.yaml", + f"internal/{obj.type}/{obj.dfiq_id}.yaml", ) return FileResponse(archive.name, media_type="application/zip", filename="dfiq.zip") @@ -206,7 +242,7 @@ async def patch(request: PatchDFIQRequest, dfiq_id) -> dfiq.DFIQTypes: new = updated_dfiq.save() new.update_parents() - if request.update_indicators and new.type == dfiq.DFIQType.approach: + if request.update_indicators and new.type == dfiq.DFIQType.question: dfiq.extract_indicators(new) return new From db8db186dfb60360ff3910723012b8e319b5d472 Mon Sep 17 00:00:00 2001 From: Thomas Chopitea Date: Wed, 21 Aug 2024 11:58:43 +0000 Subject: [PATCH 12/20] Update feed --- plugins/feeds/public/dfiq.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/plugins/feeds/public/dfiq.py b/plugins/feeds/public/dfiq.py index ca21bf780..468f08542 100644 --- a/plugins/feeds/public/dfiq.py +++ b/plugins/feeds/public/dfiq.py @@ -1,4 +1,5 @@ import logging +import os import tempfile from datetime import timedelta from io import BytesIO @@ -33,7 +34,10 @@ def run(self): tempdir = tempfile.TemporaryDirectory() ZipFile(BytesIO(response.content)).extractall(path=tempdir.name) - dfiq.read_from_data_directory(tempdir.name) + dfiq.read_from_data_directory( + os.path.join(tempdir.name, "*", "dfiq", "data", "*", "*.yaml"), + overwrite=True, + ) extra_dirs = yeti_config.get("dfiq", "extra_dirs") if not extra_dirs: From 747ca2fca33c630dc1ffcc445f9049cb54a1e977 Mon Sep 17 00:00:00 2001 From: Thomas Chopitea Date: Wed, 21 Aug 2024 11:58:53 +0000 Subject: [PATCH 13/20] Update tests --- tests/apiv2/dfiq.py | 274 +++++------------------- tests/dfiq_test_data/dfiq_test_data.zip | Bin 1586 -> 908 bytes tests/feeds.py | 142 ++++++------ tests/schemas/dfiq.py | 65 +----- 4 files changed, 134 insertions(+), 347 deletions(-) diff --git a/tests/apiv2/dfiq.py b/tests/apiv2/dfiq.py index 898f87dd1..e63657389 100644 --- a/tests/apiv2/dfiq.py +++ b/tests/apiv2/dfiq.py @@ -26,35 +26,37 @@ def setUp(self) -> None: client.headers = {"Authorization": "Bearer " + token_data["access_token"]} def test_config(self) -> None: - dfiq.DFIQQuestion( - name="mock_question", - dfiq_id="Q1020", - uuid="bd46ce6e-c933-46e5-960c-36945aaef401", + dfiq.DFIQFacet( + name="mock_facet", + dfiq_id="F1005", + uuid="fake_facet_uuid", dfiq_version="1.1.0", description="desc", - parent_ids=["F1005"], + parent_ids=[], dfiq_yaml="mock", ).save() - with open("tests/dfiq_test_data/Q1020.10.yaml", "r") as f: + with open("tests/dfiq_test_data/Q1020.yaml", "r") as f: yaml_string = f.read() response = client.post( "/api/v2/dfiq/from_yaml", json={ "dfiq_yaml": yaml_string, - "dfiq_type": dfiq.DFIQType.approach, + "dfiq_type": dfiq.DFIQType.question, }, ) - self.assertEqual(response.status_code, 200, response.json()) + data = response.json() + self.assertEqual(response.status_code, 200, data) response = client.get("/api/v2/dfiq/config") data = response.json() self.assertEqual(response.status_code, 200, data) - self.assertEqual(data["approach_data_sources"], ["artifact", "description"]) + self.assertEqual(data["stage_types"], ["analysis", "collection"]) self.assertEqual( - data["approach_analysis_step_types"], ["opensearch-query", "pandas"] + data["step_types"], + ["ForensicArtifact", "opensearch-query", "opensearch-query-second"], ) def test_new_dfiq_scenario(self) -> None: @@ -251,85 +253,11 @@ def test_dfiq_patch_updates_parents(self) -> None: self.assertEqual(edges[0][0].description, "Uses DFIQ facet") self.assertEqual(total, 1) - def test_dfiq_patch_approach_updates_parents(self) -> None: - dfiq.DFIQScenario( - name="mock_scenario", - dfiq_id="S1003", - uuid="fake_scenario_uuid", - dfiq_version="1.1.0", - description="desc", - dfiq_yaml="mock", - ).save() - - dfiq.DFIQFacet( - name="mock_facet", - dfiq_id="F1005", - uuid="fake_facet_uuid", - dfiq_version="1.1.0", - description="desc", - parent_ids=["fake_scenario_uuid"], - dfiq_yaml="mock", - ).save() - - question1 = dfiq.DFIQQuestion( - name="mock_question", - dfiq_id="Q1020", - uuid="bd46ce6e-c933-46e5-960c-36945aaef401", - dfiq_version="1.1.0", - description="desc", - parent_ids=["fake_facet_uuid"], - dfiq_yaml="mock", - ).save() - - question2 = dfiq.DFIQQuestion( - name="mock_question2", - uuid="fake_question_uuid_2", - dfiq_id="Q1022", - dfiq_version="1.1.0", - description="desc", - parent_ids=["F1005"], - dfiq_yaml="mock", - ).save() - - with open("tests/dfiq_test_data/Q1020.10.yaml", "r") as f: - yaml_string = f.read() - approach = dfiq.DFIQApproach.from_yaml(yaml_string).save() - approach.update_parents() - - vertices, edges, total = approach.neighbors() - self.assertEqual(len(vertices), 1) - self.assertEqual(vertices[f"dfiq/{question1.id}"].dfiq_id, "Q1020") - self.assertEqual(edges[0][0].type, "approach") - self.assertEqual(edges[0][0].description, "Uses DFIQ approach") - self.assertEqual(total, 1) - - approach.parent_id = "fake_question_uuid_2" - response = client.patch( - f"/api/v2/dfiq/{approach.id}", - json={ - "dfiq_yaml": approach.to_yaml(), - "dfiq_type": approach.type, - "update_indicators": False, - }, - ) - data = response.json() - self.assertEqual(response.status_code, 200, data) - self.assertEqual(data["dfiq_id"], "Q1020.10") - self.assertEqual(data["id"], approach.id) - - vertices, edges, total = approach.neighbors() - self.assertEqual(len(vertices), 1) - self.assertEqual(vertices[f"dfiq/{question2.id}"].dfiq_id, "Q1022") - self.assertEqual(vertices[f"dfiq/{question2.id}"].uuid, "fake_question_uuid_2") - - self.assertEqual(edges[0][0].type, "approach") - self.assertEqual(edges[0][0].description, "Uses DFIQ approach") - self.assertEqual(total, 1) - - def test_dfiq_patch_approach_updates_indicators(self) -> None: + def test_dfiq_patch_question_updates_indicators(self) -> None: dfiq.DFIQScenario( name="mock_scenario", uuid="fake_scenario_uuid", + dfiq_id="S1003", dfiq_version="1.1.0", description="desc", dfiq_yaml="mock", @@ -339,121 +267,61 @@ def test_dfiq_patch_approach_updates_indicators(self) -> None: name="mock_facet", uuid="fake_facet_uuid", dfiq_version="1.1.0", + dfiq_id="F1005", description="desc", parent_ids=["S1003"], dfiq_yaml="mock", ).save() dfiq.DFIQQuestion( - name="mock_question", + name="What is a question?", uuid="bd46ce6e-c933-46e5-960c-36945aaef401", dfiq_version="1.1.0", description="desc", parent_ids=["F1005"], dfiq_yaml="mock", + approaches=[], ).save() - with open("tests/dfiq_test_data/Q1020.10_no_indicators.yaml", "r") as f: + with open("tests/dfiq_test_data/Q1020_no_indicators.yaml", "r") as f: yaml_string = f.read() - approach = dfiq.DFIQApproach.from_yaml(yaml_string).save() - approach.update_parents() + question = dfiq.DFIQQuestion.from_yaml(yaml_string).save() + question.update_parents() - vertices, edges, total = approach.neighbors() + vertices, edges, total = question.neighbors() self.assertEqual(len(vertices), 1) self.assertEqual(total, 1) - with open("tests/dfiq_test_data/Q1020.10.yaml", "r") as f: + with open("tests/dfiq_test_data/Q1020.yaml", "r") as f: yaml_string = f.read() response = client.patch( - f"/api/v2/dfiq/{approach.id}", + f"/api/v2/dfiq/{question.id}", json={ "dfiq_yaml": yaml_string, - "dfiq_type": approach.type, + "dfiq_type": question.type, "update_indicators": False, }, ) data = response.json() self.assertEqual(response.status_code, 200, data) - vertices, edges, total = approach.neighbors() + vertices, edges, total = question.neighbors() self.assertEqual(len(vertices), 1) self.assertEqual(total, 1) response = client.patch( - f"/api/v2/dfiq/{approach.id}", - json={ - "dfiq_yaml": yaml_string, - "dfiq_type": approach.type, - "update_indicators": True, - }, - ) - data = response.json() - self.assertEqual(response.status_code, 200, data) - vertices, edges, total = approach.neighbors() - self.assertEqual(len(vertices), 4) - self.assertEqual(total, 4) - - def test_dfiq_post_approach(self): - dfiq.DFIQScenario( - name="mock_scenario", - uuid="fake_scenario_uuid", - dfiq_version="1.1.0", - description="desc", - dfiq_yaml="mock", - ).save() - - dfiq.DFIQFacet( - name="mock_facet", - uuid="fake_facet_uuid", - dfiq_version="1.1.0", - description="desc", - parent_ids=["S1003"], - dfiq_yaml="mock", - ).save() - - dfiq.DFIQQuestion( - name="mock_question", - uuid="bd46ce6e-c933-46e5-960c-36945aaef401", - dfiq_version="1.1.0", - description="desc", - parent_ids=["F1005"], - dfiq_yaml="mock", - ).save() - - with open("tests/dfiq_test_data/Q1020.10.yaml", "r") as f: - yaml_string = f.read() - - response = client.post( - "/api/v2/dfiq/from_yaml", - json={ - "dfiq_yaml": yaml_string, - "dfiq_type": dfiq.DFIQType.approach, - "update_indicators": False, - }, - ) - data = response.json() - self.assertEqual(response.status_code, 200, data) - - approach = dfiq.DFIQApproach.get(id=data["id"]) - vertices, edges, total = approach.neighbors() - self.assertEqual(len(vertices), 1) - approach.delete() - - # Repeat the action, updating indicators - response = client.post( - "/api/v2/dfiq/from_yaml", + f"/api/v2/dfiq/{question.id}", json={ "dfiq_yaml": yaml_string, - "dfiq_type": dfiq.DFIQType.approach, + "dfiq_type": question.type, "update_indicators": True, }, ) data = response.json() self.assertEqual(response.status_code, 200, data) - - approach = dfiq.DFIQApproach.get(id=data["id"]) - vertices, edges, total = approach.neighbors() - self.assertEqual(len(vertices), 4) + vertices, edges, total = question.neighbors() + self.assertEqual(len(vertices), 3) + self.assertEqual(total, 3) def test_wrong_parent(self) -> None: with open("tests/dfiq_test_data/F1005.yaml", "r") as f: @@ -470,26 +338,6 @@ def test_wrong_parent(self) -> None: self.assertEqual(response.status_code, 400, data) self.assertEqual(data, {"detail": "Missing parent(s) ['S1003'] for F1005"}) - def test_wrong_parent_approach(self) -> None: - with open("tests/dfiq_test_data/Q1020.10.yaml", "r") as f: - yaml_string = f.read() - - response = client.post( - "/api/v2/dfiq/from_yaml", - json={ - "dfiq_yaml": yaml_string, - "dfiq_type": dfiq.DFIQType.approach, - }, - ) - data = response.json() - self.assertEqual(response.status_code, 400, data) - self.assertEqual( - data, - { - "detail": "Missing parent(s) ['bd46ce6e-c933-46e5-960c-36945aaef401'] for Q1020.10" - }, - ) - def test_valid_dfiq_yaml(self) -> None: with open("tests/dfiq_test_data/S1003.yaml", "r") as f: yaml_string = f.read() @@ -536,21 +384,6 @@ def test_valid_dfiq_yaml(self) -> None: self.assertEqual(response.status_code, 200, data) self.assertEqual(data["valid"], True) - with open("tests/dfiq_test_data/Q1020.10.yaml", "r") as f: - yaml_string = f.read() - - response = client.post( - "/api/v2/dfiq/validate", - json={ - "dfiq_yaml": yaml_string, - "dfiq_type": dfiq.DFIQType.approach, - "check_id": True, - }, - ) - data = response.json() - self.assertEqual(response.status_code, 200, data) - self.assertEqual(data["valid"], True) - def test_standalone_question_creation(self): with open("tests/dfiq_test_data/Q1020_no_parents.yaml", "r") as f: yaml_string = f.read() @@ -567,22 +400,6 @@ def test_standalone_question_creation(self): self.assertIsNotNone(data["id"]) self.assertEqual(data["parent_ids"], []) - def test_standalone_approach_creation(self): - with open("tests/dfiq_test_data/Q1020.10_no_parent.yaml", "r") as f: - yaml_string = f.read() - - response = client.post( - "/api/v2/dfiq/from_yaml", - json={ - "dfiq_yaml": yaml_string, - "dfiq_type": dfiq.DFIQType.approach, - }, - ) - data = response.json() - self.assertEqual(response.status_code, 200, data) - self.assertIsNotNone(data["id"]) - self.assertIsNone(data["parent_id"]) - def test_upload_dfiq_archive(self): zip_archive = open("tests/dfiq_test_data/dfiq_test_data.zip", "rb") response = client.post( @@ -591,7 +408,7 @@ def test_upload_dfiq_archive(self): ) data = response.json() self.assertEqual(response.status_code, 200, data) - self.assertEqual(data, {"total_added": 4}) + self.assertEqual(data, {"total_added": 3}) def test_to_archive(self): dfiq.DFIQScenario( @@ -601,27 +418,38 @@ def test_to_archive(self): dfiq_version="1.0.0", description="desc", dfiq_yaml="mock", - internal=False, ).save() dfiq.DFIQScenario( name="private_scenario", uuid="test_private_scenario_uuid", dfiq_id="S0003", + dfiq_tags=["internal"], dfiq_version="1.0.0", description="desc", dfiq_yaml="mock", - internal=True, ).save() dfiq.DFIQQuestion( - name="mock_question", + name="semi_private_question", uuid="test_question_uuid", dfiq_id="Q1020", dfiq_version="1.0.0", description="desc", parent_ids=["F1005"], dfiq_yaml="mock", + approaches=[ + dfiq.DFIQApproach( + name="public_approach", + description="desc", + tags=["public"], + ), + dfiq.DFIQApproach( + name="internal_approach", + description="desc", + tags=["internal"], + ), + ], ).save() response = client.post("/api/v2/dfiq/to_archive", json={}) @@ -633,10 +461,11 @@ def test_to_archive(self): with ZipFile(io.BytesIO(response.content)) as archive: files = archive.namelist() - self.assertEqual(len(files), 3) + self.assertEqual(len(files), 4) self.assertIn("public/scenario/S1003.yaml", files) self.assertIn("internal/scenario/S0003.yaml", files) self.assertIn("public/question/Q1020.yaml", files) + self.assertIn("internal/question/Q1020.yaml", files) with archive.open("public/scenario/S1003.yaml") as f: content = f.read().decode("utf-8") @@ -646,4 +475,11 @@ def test_to_archive(self): self.assertIn("private_scenario", content) with archive.open("public/question/Q1020.yaml") as f: content = f.read().decode("utf-8") - self.assertIn("mock_question", content) + self.assertIn("semi_private_question", content) + self.assertIn("public_approach", content) + self.assertNotIn("internal_approach", content) + with archive.open("internal/question/Q1020.yaml") as f: + content = f.read().decode("utf-8") + self.assertIn("semi_private_question", content) + self.assertIn("public_approach", content) + self.assertIn("internal_approach", content) diff --git a/tests/dfiq_test_data/dfiq_test_data.zip b/tests/dfiq_test_data/dfiq_test_data.zip index bbe302982b842de565ded14504c3cedbf9fa27b1..70c5fe979b957614a99458310d8302ccf1c58ca8 100644 GIT binary patch literal 908 zcmWIWW@Zs#U|`^2NRASX?0)J0p%%zn1;mmJG7KqcnT7Essl_GnDTyVC`fi2>2BvzI ziMctUA)E}%T4#@@IRbHM1vdjD3s4IKnDFh}$j788;PU%#*I!1xfExi35fL||g<76& zYHip(=h%Mp=arpz&+Ivq(qkC#c&^Q~85tH4V(09I!+0OxE$5nP!ukE++A15SM@z3t z{twDYm5)k4ckUMRlSBOJg(*Q4~ctBHA-`Q#O5Qb ze0j%%Mw8yTN_qT&SKrp|UH*Pfvrj&s#oiMEBAnZgWo>$@n8qW=JJ5JgyyGz*GyIW_p9%B?n(L8c186)bHZY85 RWdnJe83<*821GJ}cmT&-C6@pI literal 1586 zcmWIWW@Zs#U|`^2SW$S|a&WfsPlq!yRNrzDmn>bn^l7?|o+ zCg$dZhHx@4FRrgmV``{PE3M#WU}OPmVE_}peH-}>8SuE2*SFRygj#J#Nk~XYO7HP` z6f#YL(~s-b{l!6A8}Ht|JolUgb70io%a4q0ZzL`{P}@>D?T2yQ)eF;i1$W$O@_HMz z>9DdwoY%Y$`bQ^yTUh$cCaSt&#*>%vYu-jnvs*{JFfnD%_;AYHCbQB!Y{il}yQlA1 zC}uI)>t}!0u2p}4D<{Q5FTp)dJ2oj)`s}@ z&o&U)`&rvfX4h_ZC69c&kf_rB(CzW%PYwy}VV(VTM#!Jvd~Cwg6kON7@;1ufH}8Ir z=Oh&^(b8U-BHd__4>wA$omg~E$-E@z^v%8y%?H`j=+#OR_u;u$wg~@7>wEDMGPW=R&#s@1s1aA5U2v%YRg~R5nUinsHbD ztQTuOe4i2RrS9Atss1`dmHWA4!{(eSxBZ^6v%VF+<>p)+{qAYbpIbW)>@v_%pO$pa z>%n7|l;{ULSnpLgcs~s{wUZX-O0Rwp(OsJ2dE$oazRXlHn-r<@vz)%3o?GeK5uSK( zy0w4bnKe%*==^qgn`EZpJp5pc~g8z(m%$h#!>yi1#{`?ZZmi)l?oa}M0Tj{U#3+uNF zuq{nI`tx04{j%fH#!tfgZouwJqx=E}6yT~QN_|HQs}!uff#($fu3ZvJOPO){2E!a3Ez1icu0Mt~$4ct*gH zUQBxo`I-%QT+8Pl66fHO-5m0Y*UHdM&v5Nhvz8kHg}=?5a^AnTt$i+|a`DqL+q7Q= z2Ohe%E%Mm0W{q0$zNWWN^Uv{imaYksVxJ)L;ok4=_R`#%u#LVRlXVIouUdO_-{-mP zi?SlSs5wTdUdG2Sy8PfPz?0ReC)mn24-txf~nuP$B^r= zfdI?z>Z7#|ZsO%ypPEWM_4&NT*gKcJNNKK@m&qwGwdUS^OoC(4*R!+F|LNo_?V34T zq-o#PZs!gw)0MGaWfkm4+)r}q>k4@k{7}hukC>gQ}hwK5~j7)OOxQZtUpl=u$fcctXNh64br?6s$6jo>j6|(V|Z;NfKmYg8&H!KvJITf7#=`QWW*Vd8qdhaU&k~aIo5F*&&mc0SQa3x2Zr-paPbZR DL54ne diff --git a/tests/feeds.py b/tests/feeds.py index 390fcc905..987094858 100644 --- a/tests/feeds.py +++ b/tests/feeds.py @@ -22,80 +22,80 @@ class FeedTest(unittest.TestCase): @classmethod def setUpClass(cls) -> None: - database_arango.db.connect(database="yeti_test") - database_arango.db.clear() - - def test_feodo_tracker_ip_blocklist(self): - defaults = feodo_tracker_ip_blocklist.FeodoTrackerIPBlockList._defaults.copy() - defaults["name"] = "FeodoTrackerIPBlocklist" - feed = feodo_tracker_ip_blocklist.FeodoTrackerIPBlockList(**defaults) - feed.run() - - def test_openphish(self): - defaults = openphish.OpenPhish._defaults.copy() - defaults["name"] = "OpenPhish" - feed = openphish.OpenPhish(**defaults) - feed.run() - - def test_lolbas(self): - defaults = lolbas.LoLBAS._defaults.copy() - feed = lolbas.LoLBAS(**defaults) - feed.run() - - @unittest.skipIf( - yeti_config.get("timesketch", "endpoint") is None, "Timesketch not setup" - ) - def test_timesketch(self): - defaults = timesketch.Timesketch._defaults.copy() - feed = timesketch.Timesketch(**defaults) - feed.run() - - def test_attack(self): - defaults = attack.MitreAttack._defaults.copy() - feed = attack.MitreAttack(**defaults) - feed.run() - - def test_hybrid_analysis(self): - defaults = hybrid_analysis.HybridAnalysis._defaults.copy() - feed = hybrid_analysis.HybridAnalysis(**defaults) - feed.run() + database_arango.db.connect(database="yeti") + # database_arango.db.clear() + + # def test_feodo_tracker_ip_blocklist(self): + # defaults = feodo_tracker_ip_blocklist.FeodoTrackerIPBlockList._defaults.copy() + # defaults["name"] = "FeodoTrackerIPBlocklist" + # feed = feodo_tracker_ip_blocklist.FeodoTrackerIPBlockList(**defaults) + # feed.run() + + # def test_openphish(self): + # defaults = openphish.OpenPhish._defaults.copy() + # defaults["name"] = "OpenPhish" + # feed = openphish.OpenPhish(**defaults) + # feed.run() + + # def test_lolbas(self): + # defaults = lolbas.LoLBAS._defaults.copy() + # feed = lolbas.LoLBAS(**defaults) + # feed.run() + + # @unittest.skipIf( + # yeti_config.get("timesketch", "endpoint") is None, "Timesketch not setup" + # ) + # def test_timesketch(self): + # defaults = timesketch.Timesketch._defaults.copy() + # feed = timesketch.Timesketch(**defaults) + # feed.run() + + # def test_attack(self): + # defaults = attack.MitreAttack._defaults.copy() + # feed = attack.MitreAttack(**defaults) + # feed.run() + + # def test_hybrid_analysis(self): + # defaults = hybrid_analysis.HybridAnalysis._defaults.copy() + # feed = hybrid_analysis.HybridAnalysis(**defaults) + # feed.run() def test_dfiq(self): defaults = dfiq.DFIQFeed._defaults.copy() feed = dfiq.DFIQFeed(**defaults) feed.run() - def test_forensic_artifacts(self): - defaults = artifacts.ForensicArtifacts._defaults.copy() - feed = artifacts.ForensicArtifacts(**defaults) - feed.run() - - def test_tor_exit_nodes(self): - defaults = tor_exit_nodes.TorExitNodes._defaults.copy() - feed = tor_exit_nodes.TorExitNodes(**defaults) - feed.run() - - def test_sslblacklist_ja3(self): - defaults = sslblacklist_ja3.SSLBlacklistJA3._defaults.copy() - feed = sslblacklist_ja3.SSLBlacklistJA3(**defaults) - feed.run() - - def test_yaraify(self): - defaults = yaraify.YARAify._defaults.copy() - feed = yaraify.YARAify(**defaults) - feed.run() - - def test_malpedia_malware(self): - defaults = malpedia.MalpediaMalware._defaults.copy() - feed = malpedia.MalpediaMalware(**defaults) - feed.run() - - def test_malpedia_actor(self): - defaults = malpedia.MalpediaActors._defaults.copy() - feed = malpedia.MalpediaActors(**defaults) - feed.run() - - def test_et_open(self): - defaults = et_open.ETOpen._defaults.copy() - feed = et_open.ETOpen(**defaults) - feed.run() + # def test_forensic_artifacts(self): + # defaults = artifacts.ForensicArtifacts._defaults.copy() + # feed = artifacts.ForensicArtifacts(**defaults) + # feed.run() + + # def test_tor_exit_nodes(self): + # defaults = tor_exit_nodes.TorExitNodes._defaults.copy() + # feed = tor_exit_nodes.TorExitNodes(**defaults) + # feed.run() + + # def test_sslblacklist_ja3(self): + # defaults = sslblacklist_ja3.SSLBlacklistJA3._defaults.copy() + # feed = sslblacklist_ja3.SSLBlacklistJA3(**defaults) + # feed.run() + + # def test_yaraify(self): + # defaults = yaraify.YARAify._defaults.copy() + # feed = yaraify.YARAify(**defaults) + # feed.run() + + # def test_malpedia_malware(self): + # defaults = malpedia.MalpediaMalware._defaults.copy() + # feed = malpedia.MalpediaMalware(**defaults) + # feed.run() + + # def test_malpedia_actor(self): + # defaults = malpedia.MalpediaActors._defaults.copy() + # feed = malpedia.MalpediaActors(**defaults) + # feed.run() + + # def test_et_open(self): + # defaults = et_open.ETOpen._defaults.copy() + # feed = et_open.ETOpen(**defaults) + # feed.run() diff --git a/tests/schemas/dfiq.py b/tests/schemas/dfiq.py index 646d44fa7..31781ca25 100644 --- a/tests/schemas/dfiq.py +++ b/tests/schemas/dfiq.py @@ -82,67 +82,12 @@ def test_dfiq_question(self) -> None: self.assertEqual(result.parent_ids, ["F1005"]) self.assertEqual(result.type, DFIQType.question) - def test_dfiq_approach(self) -> None: - with open("tests/dfiq_test_data/Q1020.10.yaml", "r") as f: - yaml_string = f.read() - - result = DFIQApproach.from_yaml(yaml_string).save() - - self.assertIsNotNone(result.id) - self.assertEqual(result.uuid, "292500f7-9d54-40ca-8254-34821e9b5c4e") - self.assertEqual(result.parent_id, "bd46ce6e-c933-46e5-960c-36945aaef401") - self.assertIsNotNone(result.created) - self.assertEqual(result.name, "Approach1") - self.assertEqual(result.description.details, "Details for approach\n") - self.assertEqual(result.description.references, ["ref1", "ref2"]) - - self.assertEqual(result.view.data[0].type, "artifact") - self.assertEqual(result.view.data[0].value, "RandomArtifact") - self.assertEqual(result.view.data[1].type, "description") - self.assertEqual(result.view.data[1].value, "Random description") - self.assertEqual( - result.view.notes.covered, ["Covered1", "Covered2", "Covered3"] - ) - self.assertEqual( - result.view.notes.not_covered, ["Not covered1", "Not covered2"] - ) - self.assertEqual(result.view.processors[0].name, "processor1") - self.assertEqual(result.view.processors[0].options[0].type, "parsers") - self.assertEqual(result.view.processors[0].options[0].value, "parser1option") - self.assertEqual(result.view.processors[0].analysis[0].name, "OpenSearch") - self.assertEqual( - result.view.processors[0].analysis[0].steps[0].description, - "random parser description", - ) - self.assertEqual( - result.view.processors[0].analysis[0].steps[0].type, "opensearch-query" - ) - self.assertEqual( - result.view.processors[0].analysis[0].steps[0].value, - 'data_type:("fs:stat")', - ) - self.assertEqual( - result.view.processors[0].analysis[1].steps[0].description, - "random step description", - ) - self.assertEqual(result.view.processors[0].analysis[1].steps[0].type, "pandas") - self.assertEqual( - result.view.processors[0].analysis[1].steps[0].value, - """query('data_type in ("fs:stat")')""", - ) - - self.assertEqual( - result.view.processors[1].analysis[0].steps[0].description, - "something else\n", - ) - def test_dfiq_conversion_to_yaml(self) -> None: self.maxDiff = None type_map = [ (DFIQScenario, "tests/dfiq_test_data/S1003.yaml"), (DFIQFacet, "tests/dfiq_test_data/F1005.yaml"), (DFIQQuestion, "tests/dfiq_test_data/Q1020.yaml"), - (DFIQApproach, "tests/dfiq_test_data/Q1020.10.yaml"), ] for type_, file_path in type_map: @@ -151,6 +96,12 @@ def test_dfiq_conversion_to_yaml(self) -> None: result = type_.from_yaml(yaml_string).save() - expected_yaml_string = yaml.dump(yaml.safe_load(yaml_string)) - result_yaml_string = result.to_yaml() + expected_yaml_string = yaml.dump( + yaml.safe_load(yaml_string), + default_flow_style=False, + sort_keys=True, + explicit_start=True, + indent=2, + ) + result_yaml_string = result.to_yaml(sort_keys=True) self.assertEqual(expected_yaml_string, result_yaml_string) From 01ab161a4a3af8d2c4f1c3f1b5708997c96e02b1 Mon Sep 17 00:00:00 2001 From: Thomas Chopitea Date: Wed, 21 Aug 2024 12:01:07 +0000 Subject: [PATCH 14/20] Remove stale test --- tests/apiv2/dfiq.py | 39 --------------------------------------- 1 file changed, 39 deletions(-) diff --git a/tests/apiv2/dfiq.py b/tests/apiv2/dfiq.py index e63657389..d591e0f66 100644 --- a/tests/apiv2/dfiq.py +++ b/tests/apiv2/dfiq.py @@ -160,45 +160,6 @@ def test_new_dfiq_question(self) -> None: self.assertEqual(edges[0][0].description, "Uses DFIQ question") self.assertEqual(total, 1) - def test_new_dfiq_approach(self) -> None: - question = dfiq.DFIQQuestion( - name="mock_question", - dfiq_id="Q1020", - uuid="bd46ce6e-c933-46e5-960c-36945aaef401", - dfiq_version="1.1.0", - description="desc", - parent_ids=["F1005"], - dfiq_yaml="mock", - ).save() - - with open("tests/dfiq_test_data/Q1020.10.yaml", "r") as f: - yaml_string = f.read() - - response = client.post( - "/api/v2/dfiq/from_yaml", - json={ - "dfiq_yaml": yaml_string, - "dfiq_type": dfiq.DFIQType.approach, - }, - ) - data = response.json() - self.assertEqual(response.status_code, 200, data) - self.assertIsNotNone(data["id"]) - self.assertIsNotNone(data["created"]) - self.assertEqual(data["name"], "Approach1") - self.assertEqual(data["dfiq_id"], "Q1020.10") - self.assertEqual(data["dfiq_version"], "1.1.0") - self.assertEqual(data["description"]["details"], "Details for approach\n") - self.assertEqual(data["type"], dfiq.DFIQType.approach) - self.assertEqual(data["dfiq_tags"], ["Lots", "Of", "Tags"]) - - vertices, edges, total = question.neighbors() - self.assertEqual(len(vertices), 1) - self.assertEqual(vertices[f'dfiq/{data["id"]}'].dfiq_id, "Q1020.10") - self.assertEqual(edges[0][0].type, "approach") - self.assertEqual(edges[0][0].description, "Uses DFIQ approach") - self.assertEqual(total, 1) - def test_dfiq_patch_updates_parents(self) -> None: scenario1 = dfiq.DFIQScenario( name="mock_scenario", From 3d1a68eda93e830d529b4470d97eabf14b681743 Mon Sep 17 00:00:00 2001 From: Thomas Chopitea Date: Wed, 21 Aug 2024 12:01:33 +0000 Subject: [PATCH 15/20] revert changes in feeds test --- tests/feeds.py | 142 ++++++++++++++++++++++++------------------------- 1 file changed, 71 insertions(+), 71 deletions(-) diff --git a/tests/feeds.py b/tests/feeds.py index 987094858..390fcc905 100644 --- a/tests/feeds.py +++ b/tests/feeds.py @@ -22,80 +22,80 @@ class FeedTest(unittest.TestCase): @classmethod def setUpClass(cls) -> None: - database_arango.db.connect(database="yeti") - # database_arango.db.clear() - - # def test_feodo_tracker_ip_blocklist(self): - # defaults = feodo_tracker_ip_blocklist.FeodoTrackerIPBlockList._defaults.copy() - # defaults["name"] = "FeodoTrackerIPBlocklist" - # feed = feodo_tracker_ip_blocklist.FeodoTrackerIPBlockList(**defaults) - # feed.run() - - # def test_openphish(self): - # defaults = openphish.OpenPhish._defaults.copy() - # defaults["name"] = "OpenPhish" - # feed = openphish.OpenPhish(**defaults) - # feed.run() - - # def test_lolbas(self): - # defaults = lolbas.LoLBAS._defaults.copy() - # feed = lolbas.LoLBAS(**defaults) - # feed.run() - - # @unittest.skipIf( - # yeti_config.get("timesketch", "endpoint") is None, "Timesketch not setup" - # ) - # def test_timesketch(self): - # defaults = timesketch.Timesketch._defaults.copy() - # feed = timesketch.Timesketch(**defaults) - # feed.run() - - # def test_attack(self): - # defaults = attack.MitreAttack._defaults.copy() - # feed = attack.MitreAttack(**defaults) - # feed.run() - - # def test_hybrid_analysis(self): - # defaults = hybrid_analysis.HybridAnalysis._defaults.copy() - # feed = hybrid_analysis.HybridAnalysis(**defaults) - # feed.run() + database_arango.db.connect(database="yeti_test") + database_arango.db.clear() + + def test_feodo_tracker_ip_blocklist(self): + defaults = feodo_tracker_ip_blocklist.FeodoTrackerIPBlockList._defaults.copy() + defaults["name"] = "FeodoTrackerIPBlocklist" + feed = feodo_tracker_ip_blocklist.FeodoTrackerIPBlockList(**defaults) + feed.run() + + def test_openphish(self): + defaults = openphish.OpenPhish._defaults.copy() + defaults["name"] = "OpenPhish" + feed = openphish.OpenPhish(**defaults) + feed.run() + + def test_lolbas(self): + defaults = lolbas.LoLBAS._defaults.copy() + feed = lolbas.LoLBAS(**defaults) + feed.run() + + @unittest.skipIf( + yeti_config.get("timesketch", "endpoint") is None, "Timesketch not setup" + ) + def test_timesketch(self): + defaults = timesketch.Timesketch._defaults.copy() + feed = timesketch.Timesketch(**defaults) + feed.run() + + def test_attack(self): + defaults = attack.MitreAttack._defaults.copy() + feed = attack.MitreAttack(**defaults) + feed.run() + + def test_hybrid_analysis(self): + defaults = hybrid_analysis.HybridAnalysis._defaults.copy() + feed = hybrid_analysis.HybridAnalysis(**defaults) + feed.run() def test_dfiq(self): defaults = dfiq.DFIQFeed._defaults.copy() feed = dfiq.DFIQFeed(**defaults) feed.run() - # def test_forensic_artifacts(self): - # defaults = artifacts.ForensicArtifacts._defaults.copy() - # feed = artifacts.ForensicArtifacts(**defaults) - # feed.run() - - # def test_tor_exit_nodes(self): - # defaults = tor_exit_nodes.TorExitNodes._defaults.copy() - # feed = tor_exit_nodes.TorExitNodes(**defaults) - # feed.run() - - # def test_sslblacklist_ja3(self): - # defaults = sslblacklist_ja3.SSLBlacklistJA3._defaults.copy() - # feed = sslblacklist_ja3.SSLBlacklistJA3(**defaults) - # feed.run() - - # def test_yaraify(self): - # defaults = yaraify.YARAify._defaults.copy() - # feed = yaraify.YARAify(**defaults) - # feed.run() - - # def test_malpedia_malware(self): - # defaults = malpedia.MalpediaMalware._defaults.copy() - # feed = malpedia.MalpediaMalware(**defaults) - # feed.run() - - # def test_malpedia_actor(self): - # defaults = malpedia.MalpediaActors._defaults.copy() - # feed = malpedia.MalpediaActors(**defaults) - # feed.run() - - # def test_et_open(self): - # defaults = et_open.ETOpen._defaults.copy() - # feed = et_open.ETOpen(**defaults) - # feed.run() + def test_forensic_artifacts(self): + defaults = artifacts.ForensicArtifacts._defaults.copy() + feed = artifacts.ForensicArtifacts(**defaults) + feed.run() + + def test_tor_exit_nodes(self): + defaults = tor_exit_nodes.TorExitNodes._defaults.copy() + feed = tor_exit_nodes.TorExitNodes(**defaults) + feed.run() + + def test_sslblacklist_ja3(self): + defaults = sslblacklist_ja3.SSLBlacklistJA3._defaults.copy() + feed = sslblacklist_ja3.SSLBlacklistJA3(**defaults) + feed.run() + + def test_yaraify(self): + defaults = yaraify.YARAify._defaults.copy() + feed = yaraify.YARAify(**defaults) + feed.run() + + def test_malpedia_malware(self): + defaults = malpedia.MalpediaMalware._defaults.copy() + feed = malpedia.MalpediaMalware(**defaults) + feed.run() + + def test_malpedia_actor(self): + defaults = malpedia.MalpediaActors._defaults.copy() + feed = malpedia.MalpediaActors(**defaults) + feed.run() + + def test_et_open(self): + defaults = et_open.ETOpen._defaults.copy() + feed = et_open.ETOpen(**defaults) + feed.run() From 98d07dde8476dfe46719402ace1ef20f4a55d669 Mon Sep 17 00:00:00 2001 From: Thomas Chopitea Date: Wed, 21 Aug 2024 14:21:50 +0000 Subject: [PATCH 16/20] Remove stale function --- plugins/feeds/public/dfiq.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/plugins/feeds/public/dfiq.py b/plugins/feeds/public/dfiq.py index 468f08542..0ba6f28d7 100644 --- a/plugins/feeds/public/dfiq.py +++ b/plugins/feeds/public/dfiq.py @@ -18,10 +18,6 @@ class DFIQFeed(task.FeedTask): "description": "DFIQ feed", } - def upgrade_existing_dfiq_schema(self): - for dfiq_object in dfiq.DFIQBase.list(): - dfiq.upgrade_dfiq_schema(dfiq_object) - def run(self): # move back to "https://github.com/google/dfiq/archive/refs/heads/main.zip" # once the changes have been merged. @@ -46,8 +42,5 @@ def run(self): logging.info("Processing extra directory %s", directory) dfiq.read_from_data_directory(directory) - logging.info("Upgrading schema of remaining DFIQ objects") - self.upgrade_existing_dfiq_schema() - taskmanager.TaskManager.register_task(DFIQFeed) From 31e39e8df516382458ffa0504713f856f8c37fa0 Mon Sep 17 00:00:00 2001 From: Thomas Chopitea Date: Wed, 21 Aug 2024 15:32:13 +0000 Subject: [PATCH 17/20] Use plural to replicate upstream DFIQ structure --- core/web/apiv2/dfiq.py | 10 ++++++++-- tests/apiv2/dfiq.py | 16 ++++++++-------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/core/web/apiv2/dfiq.py b/core/web/apiv2/dfiq.py index f79cea0f5..82e6b0100 100644 --- a/core/web/apiv2/dfiq.py +++ b/core/web/apiv2/dfiq.py @@ -149,6 +149,12 @@ async def to_archive(request: DFIQSearchRequest) -> FileResponse: aliases=request.filter_aliases, ) + _TYPE_TO_DUMP_DIR = { + dfiq.DFIQType.scenario: "scenarios", + dfiq.DFIQType.facet: "facets", + dfiq.DFIQType.question: "questions", + } + tempdir = tempfile.TemporaryDirectory() public_objs = [] internal_objs = [] @@ -190,12 +196,12 @@ async def to_archive(request: DFIQSearchRequest) -> FileResponse: for obj in public_objs: zipf.write( f"{tempdir.name}/public/{obj.dfiq_id}.yaml", - f"public/{obj.type}/{obj.dfiq_id}.yaml", + f"public/{_TYPE_TO_DUMP_DIR[obj.type]}/{obj.dfiq_id}.yaml", ) for obj in internal_objs: zipf.write( f"{tempdir.name}/internal/{obj.dfiq_id}.yaml", - f"internal/{obj.type}/{obj.dfiq_id}.yaml", + f"internal/{_TYPE_TO_DUMP_DIR[obj.type]}/{obj.dfiq_id}.yaml", ) return FileResponse(archive.name, media_type="application/zip", filename="dfiq.zip") diff --git a/tests/apiv2/dfiq.py b/tests/apiv2/dfiq.py index d591e0f66..b10e8c981 100644 --- a/tests/apiv2/dfiq.py +++ b/tests/apiv2/dfiq.py @@ -423,23 +423,23 @@ def test_to_archive(self): with ZipFile(io.BytesIO(response.content)) as archive: files = archive.namelist() self.assertEqual(len(files), 4) - self.assertIn("public/scenario/S1003.yaml", files) - self.assertIn("internal/scenario/S0003.yaml", files) - self.assertIn("public/question/Q1020.yaml", files) - self.assertIn("internal/question/Q1020.yaml", files) + self.assertIn("public/scenarios/S1003.yaml", files) + self.assertIn("internal/scenarios/S0003.yaml", files) + self.assertIn("public/questions/Q1020.yaml", files) + self.assertIn("internal/questions/Q1020.yaml", files) - with archive.open("public/scenario/S1003.yaml") as f: + with archive.open("public/scenarios/S1003.yaml") as f: content = f.read().decode("utf-8") self.assertIn("public_scenario", content) - with archive.open("internal/scenario/S0003.yaml") as f: + with archive.open("internal/scenarios/S0003.yaml") as f: content = f.read().decode("utf-8") self.assertIn("private_scenario", content) - with archive.open("public/question/Q1020.yaml") as f: + with archive.open("public/questions/Q1020.yaml") as f: content = f.read().decode("utf-8") self.assertIn("semi_private_question", content) self.assertIn("public_approach", content) self.assertNotIn("internal_approach", content) - with archive.open("internal/question/Q1020.yaml") as f: + with archive.open("internal/questions/Q1020.yaml") as f: content = f.read().decode("utf-8") self.assertIn("semi_private_question", content) self.assertIn("public_approach", content) From 6f39646b9357a9679c990f5bac843b12de4ef87f Mon Sep 17 00:00:00 2001 From: Thomas Chopitea Date: Thu, 22 Aug 2024 11:21:36 +0000 Subject: [PATCH 18/20] Add soft fail by default when adding DFIQ from directories --- core/schemas/dfiq.py | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/core/schemas/dfiq.py b/core/schemas/dfiq.py index de713e132..fabb492de 100644 --- a/core/schemas/dfiq.py +++ b/core/schemas/dfiq.py @@ -2,6 +2,7 @@ import glob import logging import re +import uuid from enum import Enum from typing import Annotated, Any, ClassVar, Literal, Type, Union @@ -10,6 +11,7 @@ from pydantic import BaseModel, Field, computed_field from core import database_arango +from core.config.config import yeti_config from core.helpers import now from core.schemas import indicator from core.schemas.model import YetiModel @@ -33,16 +35,16 @@ def custom_null_representer(dumper, data): yaml.add_representer(type(None), custom_null_representer) -def read_from_data_directory(directory: str, overwrite: bool = False) -> int: +def read_from_data_directory(globpath: str, overwrite: bool = False) -> int: """Read DFIQ files from a directory and add them to the database. Args: - directory: Directory to read DFIQ files from. + globpath: Glob path to search for DFIQ files (supports recursion). overwrite: Whether to overwrite existing DFIQs with the same ID. """ dfiq_kb = {} total_added = 0 - for file in glob.glob(directory): + for file in glob.glob(globpath, recursive=True): if not file.endswith(".yaml"): continue logging.debug("Processing %s", file) @@ -50,6 +52,7 @@ def read_from_data_directory(directory: str, overwrite: bool = False) -> int: try: dfiq_object = DFIQBase.from_yaml(f.read()) if not overwrite: + db_dfiq = None if dfiq_object.uuid: db_dfiq = DFIQBase.find(uuid=dfiq_object.uuid) if not db_dfiq and dfiq_object.dfiq_id: @@ -70,6 +73,8 @@ def read_from_data_directory(directory: str, overwrite: bool = False) -> int: dfiq_object.dfiq_id, ) continue + if not dfiq_object.uuid: + dfiq_object.uuid = str(uuid.uuid4()) dfiq_object = dfiq_object.save() total_added += 1 except (ValueError, KeyError) as e: @@ -79,7 +84,7 @@ def read_from_data_directory(directory: str, overwrite: bool = False) -> int: dfiq_kb[dfiq_object.dfiq_id] = dfiq_object for dfiq_id, dfiq_object in dfiq_kb.items(): - dfiq_object.update_parents() + dfiq_object.update_parents(soft_fail=True) if dfiq_object.type == DFIQType.question: extract_indicators(dfiq_object) @@ -102,7 +107,7 @@ def extract_indicators(question: "DFIQQuestion") -> None: question.link_to(artifact, "artifact", "Uses artifact") continue - elif "query" in step.type: + elif step.type and step.value and "query" in step.type: query = indicator.Query.find(pattern=step.value) if not query: query = indicator.Query( @@ -204,7 +209,7 @@ def to_yaml(self, sort_keys=False) -> str: indent=2, ) - def update_parents(self) -> None: + def update_parents(self, soft_fail=False) -> None: intended_parent_ids = None if getattr(self, "parent_ids", []): intended_parent_ids = self.parent_ids @@ -219,9 +224,18 @@ def update_parents(self) -> None: intended_parents.append(parent) if not all(intended_parents): - raise ValueError( - f"Missing parent(s) {intended_parent_ids} for {self.dfiq_id}" - ) + actual_parents = { + intended_parent.dfiq_id + for intended_parent in intended_parents + if intended_parent + } + missing_parents = set(intended_parent_ids) - actual_parents + if soft_fail: + logging.warning( + "Missing parent(s) %s for %s", missing_parents, self.dfiq_id + ) + return + raise ValueError(f"Missing parent(s) {missing_parents} for {self.dfiq_id}") # remove all links: vertices, relationships, total = self.neighbors() @@ -259,7 +273,7 @@ def from_yaml(cls: Type["DFIQScenario"], yaml_string: str) -> "DFIQScenario": return cls( name=yaml_data["name"], description=yaml_data["description"], - uuid=yaml_data["uuid"], + uuid=yaml_data.get("uuid"), dfiq_id=yaml_data["id"], dfiq_version=yaml_data["dfiq_version"], dfiq_tags=yaml_data.get("tags"), @@ -288,7 +302,7 @@ def from_yaml(cls: Type["DFIQFacet"], yaml_string: str) -> "DFIQFacet": return cls( name=yaml_data["name"], description=yaml_data.get("description"), - uuid=yaml_data["uuid"], + uuid=yaml_data.get("uuid"), dfiq_id=yaml_data["id"], dfiq_version=yaml_data["dfiq_version"], dfiq_tags=yaml_data.get("tags"), @@ -319,7 +333,7 @@ def from_yaml(cls: Type["DFIQQuestion"], yaml_string: str) -> "DFIQQuestion": return cls( name=yaml_data["name"], description=yaml_data.get("description"), - uuid=yaml_data["uuid"], + uuid=yaml_data.get("uuid"), dfiq_id=yaml_data["id"], dfiq_version=yaml_data["dfiq_version"], dfiq_tags=yaml_data.get("tags"), @@ -334,10 +348,10 @@ def from_yaml(cls: Type["DFIQQuestion"], yaml_string: str) -> "DFIQQuestion": class DFIQApproachStep(BaseModel): name: str = Field(min_length=1) - description: str | None + description: str | None = None stage: str = Field(min_length=1) - type: str = Field(min_length=1) - value: str = Field(min_length=1) + type: str | None = None + value: str | None = None class DFIQApproachNotes(BaseModel): From f5512b3fe772547d42f35240439a6f72613a0d92 Mon Sep 17 00:00:00 2001 From: Thomas Chopitea Date: Thu, 22 Aug 2024 11:22:59 +0000 Subject: [PATCH 19/20] Clean up children's parents when deleting an object --- core/web/apiv2/dfiq.py | 17 +++++++++++++ tests/apiv2/dfiq.py | 58 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/core/web/apiv2/dfiq.py b/core/web/apiv2/dfiq.py index 82e6b0100..3b27d9696 100644 --- a/core/web/apiv2/dfiq.py +++ b/core/web/apiv2/dfiq.py @@ -81,6 +81,11 @@ async def config() -> DFIQConfigResponse: stage_types.add(step.stage) step_types.add(step.type) + if None in stage_types: + stage_types.remove(None) + if None in step_types: + step_types.remove(None) + return DFIQConfigResponse( stage_types=sorted(list(stage_types)), step_types=sorted(list(step_types)), @@ -269,6 +274,18 @@ async def delete(dfiq_id: str) -> None: db_dfiq = dfiq.DFIQBase.get(dfiq_id) if not db_dfiq: raise HTTPException(status_code=404, detail="DFIQ object {dfiq_id} not found") + + all_children, _ = dfiq.DFIQBase.filter(query_args={"parent_ids": db_dfiq.uuid}) + if db_dfiq.dfiq_id: + children, _ = dfiq.DFIQBase.filter(query_args={"parent_ids": db_dfiq.dfiq_id}) + all_children.extend(children) + for child in all_children: + if db_dfiq.dfiq_id in child.parent_ids: + child.parent_ids.remove(db_dfiq.dfiq_id) + if db_dfiq.uuid in child.parent_ids: + child.parent_ids.remove(db_dfiq.uuid) + child.save() + db_dfiq.delete() diff --git a/tests/apiv2/dfiq.py b/tests/apiv2/dfiq.py index b10e8c981..681a2afa7 100644 --- a/tests/apiv2/dfiq.py +++ b/tests/apiv2/dfiq.py @@ -160,6 +160,64 @@ def test_new_dfiq_question(self) -> None: self.assertEqual(edges[0][0].description, "Uses DFIQ question") self.assertEqual(total, 1) + def test_delete_with_children_ref_uuid(self) -> None: + scenario = dfiq.DFIQScenario( + name="mock_scenario", + dfiq_id="S1003", + uuid="fake_scenario_uuid", + dfiq_version="1.1.0", + description="desc", + dfiq_yaml="mock", + ).save() + + facet = dfiq.DFIQFacet( + name="mock_facet", + dfiq_id="F1005", + uuid="fake_facet_uuid", + dfiq_version="1.1.0", + description="desc", + parent_ids=["fake_scenario_uuid"], + dfiq_yaml="mock", + ).save() + + response = client.delete(f"/api/v2/dfiq/{scenario.id}") + data = response.json() + self.assertEqual(response.status_code, 200, data) + + response = client.get(f"/api/v2/dfiq/{facet.id}") + data = response.json() + self.assertEqual(response.status_code, 200, data) + self.assertEqual(data["parent_ids"], []) + + def test_delete_with_children_ref_dfiqd(self) -> None: + scenario = dfiq.DFIQScenario( + name="mock_scenario", + dfiq_id="S1003", + uuid="fake_scenario_uuid", + dfiq_version="1.1.0", + description="desc", + dfiq_yaml="mock", + ).save() + + facet = dfiq.DFIQFacet( + name="mock_facet", + dfiq_id="F1005", + uuid="fake_facet_uuid", + dfiq_version="1.1.0", + description="desc", + parent_ids=["S1003"], + dfiq_yaml="mock", + ).save() + + response = client.delete(f"/api/v2/dfiq/{scenario.id}") + data = response.json() + self.assertEqual(response.status_code, 200, data) + + response = client.get(f"/api/v2/dfiq/{facet.id}") + data = response.json() + self.assertEqual(response.status_code, 200, data) + self.assertEqual(data["parent_ids"], []) + def test_dfiq_patch_updates_parents(self) -> None: scenario1 = dfiq.DFIQScenario( name="mock_scenario", From 8f7d5c023f9d68606b9439c75e9b6d13a7a0e7f3 Mon Sep 17 00:00:00 2001 From: Thomas Chopitea Date: Thu, 22 Aug 2024 11:37:58 +0000 Subject: [PATCH 20/20] fix test --- tests/apiv2/dfiq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/apiv2/dfiq.py b/tests/apiv2/dfiq.py index 681a2afa7..500995860 100644 --- a/tests/apiv2/dfiq.py +++ b/tests/apiv2/dfiq.py @@ -355,7 +355,7 @@ def test_wrong_parent(self) -> None: ) data = response.json() self.assertEqual(response.status_code, 400, data) - self.assertEqual(data, {"detail": "Missing parent(s) ['S1003'] for F1005"}) + self.assertEqual(data, {"detail": "Missing parent(s) {'S1003'} for F1005"}) def test_valid_dfiq_yaml(self) -> None: with open("tests/dfiq_test_data/S1003.yaml", "r") as f: