diff --git a/ingestion/ingestion_utils.py b/ingestion/ingestion_utils.py index d2fbbee..64388e8 100644 --- a/ingestion/ingestion_utils.py +++ b/ingestion/ingestion_utils.py @@ -277,10 +277,10 @@ def __iter__(self): def get_subject_areas(): """ - Returns a list of subject areas from the subject_areas_template.yaml file + Returns a list of top level subject areas from the subject_areas_template.yaml file """ subject_areas_filepath = os.path.join( - os.path.dirname(__file__), "tags", "subject_areas_template.yaml" + os.path.dirname(__file__), "tags", "top_level_subject_areas_template.yaml" ) with open(subject_areas_filepath, "r") as file: subject_areas_yaml = yaml.safe_load(file) diff --git a/ingestion/tags/generate_tags_file.py b/ingestion/tags/generate_tags_file.py index 6fecf21..f6a25f5 100644 --- a/ingestion/tags/generate_tags_file.py +++ b/ingestion/tags/generate_tags_file.py @@ -16,18 +16,20 @@ dir = Path(__file__).parent +input_yamls = ["top_level_subject_areas_template.yaml", "subject_areas_template.yaml"] output = [] -with open(dir / "subject_areas_template.yaml") as file: - for tag in yaml.safe_load(file): - name = tag["name"] - properties = TagPropertiesClass(name=name, description=tag["description"]) - urn = f"urn:li:tag:{name}" - output.append( - MetadataChangeEventClass( - proposedSnapshot=TagSnapshotClass(urn=urn, aspects=[properties]) - ).to_obj() - ) +for input_yaml in input_yamls: + with open(dir / input_yaml) as file: + for tag in yaml.safe_load(file): + name = tag["name"] + properties = TagPropertiesClass(name=name, description=tag.get("description")) + urn = f"urn:li:tag:{name}" + output.append( + MetadataChangeEventClass( + proposedSnapshot=TagSnapshotClass(urn=urn, aspects=[properties]) + ).to_obj() + ) with open(dir / "subject_areas.json", "w") as file: json.dump(output, file, indent=2) diff --git a/ingestion/tags/subject_areas.json b/ingestion/tags/subject_areas.json index 7ed94e2..fc57d0f 100644 --- a/ingestion/tags/subject_areas.json +++ b/ingestion/tags/subject_areas.json @@ -103,5 +103,134 @@ ] } } + }, + { + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Property", + "aspects": [ + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Property", + "description": "MoJ property and estates" + } + } + ] + } + } + }, + { + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Finance", + "aspects": [ + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Finance", + "description": "MoJ finance and procurement" + } + } + ] + } + } + }, + { + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Employees", + "aspects": [ + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Employees", + "description": "SOP, HR, and staff" + } + } + ] + } + } + }, + { + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Reoffending", + "aspects": [ + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Reoffending" + } + } + ] + } + } + }, + { + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Criminal courts", + "aspects": [ + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Criminal courts" + } + } + ] + } + } + }, + { + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Civil courts", + "aspects": [ + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Civil courts" + } + } + ] + } + } + }, + { + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Prisons", + "aspects": [ + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Prisons" + } + } + ] + } + } + }, + { + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Probation", + "aspects": [ + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Probation" + } + } + ] + } + } + }, + { + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Electronic monitoring", + "aspects": [ + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Electronic monitoring" + } + } + ] + } + } } ] \ No newline at end of file diff --git a/ingestion/tags/subject_areas_template.yaml b/ingestion/tags/subject_areas_template.yaml index e6d5d21..a1c0e2e 100644 --- a/ingestion/tags/subject_areas_template.yaml +++ b/ingestion/tags/subject_areas_template.yaml @@ -1,14 +1,12 @@ -- name: "Corporate operations" - description: "Employees, property and finance" -- name: "Crime and policing" - description: "Law enforcement, criminal offences and reoffending" -- name: "Courts and tribunals" - description: "HM Courts and Tribunal Service, legal cases, sentencing" -- name: "Legal aid" - description: "Legal aid and the Civil Legal Advice service" -- name: "Office of the Public Guardian" - description: "Lasting power of attorney and supervision orders" -- name: "Prisons and probation" - description: "HM Prison and Probation Service, electronic monitoring, rehabilitation, offence risk" -- name: "Reference data" - description: "Data to define or standardise other data" +- name: Property + description: MoJ property and estates +- name: Finance + description: MoJ finance and procurement +- name: Employees + description: SOP, HR, and staff +- name: Reoffending +- name: Criminal courts +- name: Civil courts +- name: Prisons +- name: Probation +- name: Electronic monitoring diff --git a/ingestion/tags/top_level_subject_areas_template.yaml b/ingestion/tags/top_level_subject_areas_template.yaml new file mode 100644 index 0000000..e6d5d21 --- /dev/null +++ b/ingestion/tags/top_level_subject_areas_template.yaml @@ -0,0 +1,14 @@ +- name: "Corporate operations" + description: "Employees, property and finance" +- name: "Crime and policing" + description: "Law enforcement, criminal offences and reoffending" +- name: "Courts and tribunals" + description: "HM Courts and Tribunal Service, legal cases, sentencing" +- name: "Legal aid" + description: "Legal aid and the Civil Legal Advice service" +- name: "Office of the Public Guardian" + description: "Lasting power of attorney and supervision orders" +- name: "Prisons and probation" + description: "HM Prison and Probation Service, electronic monitoring, rehabilitation, offence risk" +- name: "Reference data" + description: "Data to define or standardise other data"