diff --git a/docs/GWDM/2.0.form.json b/docs/GWDM/2.0.form.json index 94c5e9b..7b1eb5c 100644 --- a/docs/GWDM/2.0.form.json +++ b/docs/GWDM/2.0.form.json @@ -319,7 +319,7 @@ "required": false, "title": "Geographic Coverage", "description": "The geographical area covered by the dataset. It is recommended that links are to entries in a well-maintained gazetteer such as https://www.geonames.org/ or https://what3words.com/daring.lion.race.", - "guidance": "- The geographical area covered by the dataset.\\n- Please provide a valid location.\\n- For locations in the UK, this location should conform to [ONS standards](https://geoportal.statistics.gov.uk/datasets/ons::index-of-place-names-in-great-britain-november-2021/about).\\n- For locations in other countries we use [ISO 3166-1 & ISO 3166-2](https://github.com/HDRUK/reference-codes).", + "guidance": "- The geographical area covered by the dataset.\\n- Please provide a valid location.\\n- For locations in the UK, this location should conform to [ONS standards](https://geoportal.statistics.gov.uk/datasets/208d9884575647c29f0dd5a1184e711a/about).\\n- For locations in other countries we use [ISO 3166-1 & ISO 3166-2](https://github.com/HDRUK/reference-codes).", "examples": [ "https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html" ], @@ -912,69 +912,195 @@ }, { "required": false, - "title": "Derivations", - "description": "Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset", + "title": "Persistent identifier of a dataset", + "description": null, "guidance": "", - "examples": [ - "Data will be minimised as appropriate relative to the data access application" - ], + "examples": null, "is_list": false, "is_optional": true, "types": { - "title": "CommaSeparatedValues", - "pattern": "([^,]+)", + "maxLength": 150, + "minLength": 2, + "title": "OneHundredFiftyCharacters", "type": "string" }, - "location": "linkage.datasetLinkage.isDerivedFrom" + "location": "linkage.datasetLinkage.derivedFrom.pid" }, { "required": false, - "title": "Is PartOf", - "description": "If the dataset is part of a group or family", + "title": "Title of a dataset", + "description": null, "guidance": "", - "examples": [ - "UKCRC Tissue Directory and Coordination Centre" - ], + "examples": null, "is_list": false, "is_optional": true, "types": { - "title": "CommaSeparatedValues", - "pattern": "([^,]+)", + "maxLength": 150, + "minLength": 2, + "title": "OneHundredFiftyCharacters", "type": "string" }, - "location": "linkage.datasetLinkage.isPartOf" + "location": "linkage.datasetLinkage.derivedFrom.title" }, { "required": false, - "title": "Is MemberOf", - "description": "Dataset is a member of XXX(?)", + "title": "Url of a dataset", + "description": null, "guidance": "", "examples": null, "is_list": false, "is_optional": true, "types": { - "title": "CommaSeparatedValues", - "pattern": "([^,]+)", + "title": "Url", + "format": "uri", + "minLength": 1, "type": "string" }, - "location": "linkage.datasetLinkage.isMemberOf" + "location": "linkage.datasetLinkage.derivedFrom.url" }, { "required": false, - "title": "Linked Datasets", - "description": "Links to other datasets.", + "title": "Persistent identifier of a dataset", + "description": null, "guidance": "", - "examples": [ - "Yes. To any SAIL dataset & reference data.,ALL" - ], + "examples": null, "is_list": false, "is_optional": true, "types": { - "title": "CommaSeparatedValues", - "pattern": "([^,]+)", + "maxLength": 150, + "minLength": 2, + "title": "OneHundredFiftyCharacters", + "type": "string" + }, + "location": "linkage.datasetLinkage.isPartOf.pid" + }, + { + "required": false, + "title": "Title of a dataset", + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "maxLength": 150, + "minLength": 2, + "title": "OneHundredFiftyCharacters", + "type": "string" + }, + "location": "linkage.datasetLinkage.isPartOf.title" + }, + { + "required": false, + "title": "Url of a dataset", + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "Url", + "format": "uri", + "minLength": 1, + "type": "string" + }, + "location": "linkage.datasetLinkage.isPartOf.url" + }, + { + "required": false, + "title": "Persistent identifier of a dataset", + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "maxLength": 150, + "minLength": 2, + "title": "OneHundredFiftyCharacters", + "type": "string" + }, + "location": "linkage.datasetLinkage.linkableDatasets.pid" + }, + { + "required": false, + "title": "Title of a dataset", + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "maxLength": 150, + "minLength": 2, + "title": "OneHundredFiftyCharacters", + "type": "string" + }, + "location": "linkage.datasetLinkage.linkableDatasets.title" + }, + { + "required": false, + "title": "Url of a dataset", + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "Url", + "format": "uri", + "minLength": 1, + "type": "string" + }, + "location": "linkage.datasetLinkage.linkableDatasets.url" + }, + { + "required": false, + "title": "Persistent identifier of a dataset", + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "maxLength": 150, + "minLength": 2, + "title": "OneHundredFiftyCharacters", + "type": "string" + }, + "location": "linkage.datasetLinkage.similarToDatasets.pid" + }, + { + "required": false, + "title": "Title of a dataset", + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "maxLength": 150, + "minLength": 2, + "title": "OneHundredFiftyCharacters", + "type": "string" + }, + "location": "linkage.datasetLinkage.similarToDatasets.title" + }, + { + "required": false, + "title": "Url of a dataset", + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "Url", + "format": "uri", + "minLength": 1, "type": "string" }, - "location": "linkage.datasetLinkage.linkedDatasets" + "location": "linkage.datasetLinkage.similarToDatasets.url" }, { "required": false, diff --git a/docs/GWDM/2.0.md b/docs/GWDM/2.0.md index 7cf72e3..e9e8a8d 100644 --- a/docs/GWDM/2.0.md +++ b/docs/GWDM/2.0.md @@ -310,9 +310,9 @@ This information includes attributes for geographical and temporal coverage, coh The geographical area covered by the dataset. It is recommended that links are to entries in a well-maintained gazetteer such as https://www.geonames.org/ or https://what3words.com/daring.lion.race. -| title | guidance | is_list | required | type | -|:--------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------| -| Geographic Coverage | - The geographical area covered by the dataset.
- Please provide a valid location.
- For locations in the UK, this location should conform to [ONS standards](https://geoportal.statistics.gov.uk/datasets/ons::index-of-place-names-in-great-britain-november-2021/about).
- For locations in other countries we use [ISO 3166-1 & ISO 3166-2](https://github.com/HDRUK/reference-codes). | False | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| title | guidance | is_list | required | type | +|:--------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Geographic Coverage | - The geographical area covered by the dataset.
- Please provide a valid location.
- For locations in the UK, this location should conform to [ONS standards](https://geoportal.statistics.gov.uk/datasets/208d9884575647c29f0dd5a1184e711a/about).
- For locations in other countries we use [ISO 3166-1 & ISO 3166-2](https://github.com/HDRUK/reference-codes). | False | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: @@ -821,54 +821,174 @@ Dataset Linkage copied over from -#### isDerivedFrom +#### derivedFrom -Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset +If applicable, please provide DOIs or links to datasets from which data in this dataset has been derived or calculated from. -| title | guidance | is_list | required | type | -|:------------|:-----------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------| -| Derivations | | False | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | -Examples: - * Data will be minimised as appropriate relative to the data access application + + + +##### pid + +None + +| title | guidance | is_list | required | type | +|:-----------------------------------|:-----------|:----------|:-----------|:--------------------------------------------------------------------------------------------| +| Persistent identifier of a dataset | | False | False | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | + + + + +##### title + +None + +| title | guidance | is_list | required | type | +|:-------------------|:-----------|:----------|:-----------|:--------------------------------------------------------------------------------------------| +| Title of a dataset | | False | False | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | + + + + +##### url + +None + +| title | guidance | is_list | required | type | +|:-----------------|:-----------|:----------|:-----------|:----------------------------------------------------------------------------------------------------| +| Url of a dataset | | False | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + #### isPartOf -If the dataset is part of a group or family +This relationship indicates that the dataset is a component or subset of a broader collection of related datasets. For example, clinical trial data for a specific drug may be part of a larger database of pharmaceutical research data. Complete only if the dataset is part of a group or family of datasets i.e. Hospital Episode Statistics has several constituents. If your dataset is not part of a group, please enter “NOT APPLICABLE” **Example**: Hospital Episodes Statistics datasets (A&E, APC, OP, AC MSDS). -| title | guidance | is_list | required | type | -|:----------|:-----------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------| -| Is PartOf | | False | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + Examples: - * UKCRC Tissue Directory and Coordination Centre + * Hospital Episodes Statistics datasets (A&E, APC, OP, AC MSDS) -#### isMemberOf +##### pid -Dataset is a member of XXX(?) +None -| title | guidance | is_list | required | type | -|:------------|:-----------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------| -| Is MemberOf | | False | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| title | guidance | is_list | required | type | +|:-----------------------------------|:-----------|:----------|:-----------|:--------------------------------------------------------------------------------------------| +| Persistent identifier of a dataset | | False | False | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | -#### linkedDatasets +##### title -Links to other datasets. +None -| title | guidance | is_list | required | type | -|:----------------|:-----------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------| -| Linked Datasets | | False | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| title | guidance | is_list | required | type | +|:-------------------|:-----------|:----------|:-----------|:--------------------------------------------------------------------------------------------| +| Title of a dataset | | False | False | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | + + + + +##### url + +None + +| title | guidance | is_list | required | type | +|:-----------------|:-----------|:----------|:-----------|:----------------------------------------------------------------------------------------------------| +| Url of a dataset | | False | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### linkableDatasets + +If applicable, please provide the DOI of other datasets that have previously been linked to this dataset and their availability. If no DOI is available, please provide the title of the datasets that can be linked. + + + + + + +##### pid + +None + +| title | guidance | is_list | required | type | +|:-----------------------------------|:-----------|:----------|:-----------|:--------------------------------------------------------------------------------------------| +| Persistent identifier of a dataset | | False | False | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | + + + + +##### title + +None + +| title | guidance | is_list | required | type | +|:-------------------|:-----------|:----------|:-----------|:--------------------------------------------------------------------------------------------| +| Title of a dataset | | False | False | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | + + + + +##### url + +None + +| title | guidance | is_list | required | type | +|:-----------------|:-----------|:----------|:-----------|:----------------------------------------------------------------------------------------------------| +| Url of a dataset | | False | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### similarToDatasets + +Datasets that are similar to each other in some way, collect similar patients, regional equivalent etc. + + + + + + +##### pid + +None + +| title | guidance | is_list | required | type | +|:-----------------------------------|:-----------|:----------|:-----------|:--------------------------------------------------------------------------------------------| +| Persistent identifier of a dataset | | False | False | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | + + + + +##### title + +None + +| title | guidance | is_list | required | type | +|:-------------------|:-----------|:----------|:-----------|:--------------------------------------------------------------------------------------------| +| Title of a dataset | | False | False | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | + + + + +##### url + +None + +| title | guidance | is_list | required | type | +|:-----------------|:-----------|:----------|:-----------|:----------------------------------------------------------------------------------------------------| +| Url of a dataset | | False | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | -Examples: - * Yes. To any SAIL dataset & reference data.,ALL ### investigations diff --git a/docs/GWDM/2.0.structure.json b/docs/GWDM/2.0.structure.json index b88942e..d55213b 100644 --- a/docs/GWDM/2.0.structure.json +++ b/docs/GWDM/2.0.structure.json @@ -1121,70 +1121,246 @@ "is_optional": true, "subItems": [ { - "name": "isDerivedFrom", + "name": "derivedFrom", "required": false, - "title": "Derivations", - "description": "Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset", + "title": "Derived from", + "description": "If applicable, please provide DOIs or links to datasets from which data in this dataset has been derived or calculated from.", "guidance": "", - "examples": [ - "Data will be minimised as appropriate relative to the data access application" - ], + "examples": null, "type": [ - "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", - "null" + "DatasetDescriptor" ], - "is_list": false, + "is_list": true, "is_optional": true, - "subItems": [] + "subItems": [ + { + "name": "pid", + "required": false, + "title": "Persistent identifier of a dataset", + "description": null, + "guidance": "", + "examples": null, + "type": [ + "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "title", + "required": false, + "title": "Title of a dataset", + "description": null, + "guidance": "", + "examples": null, + "type": [ + "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "url", + "required": false, + "title": "Url of a dataset", + "description": null, + "guidance": "", + "examples": null, + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] }, { "name": "isPartOf", "required": false, - "title": "Is PartOf", - "description": "If the dataset is part of a group or family", + "title": "Is part of", + "description": "This relationship indicates that the dataset is a component or subset of a broader collection of related datasets. For example, clinical trial data for a specific drug may be part of a larger database of pharmaceutical research data. Complete only if the dataset is part of a group or family of datasets i.e. Hospital Episode Statistics has several constituents. If your dataset is not part of a group, please enter \u201cNOT APPLICABLE\u201d Example: Hospital Episodes Statistics datasets (A&E, APC, OP, AC MSDS).", "guidance": "", "examples": [ - "UKCRC Tissue Directory and Coordination Centre" + "Hospital Episodes Statistics datasets (A&E, APC, OP, AC MSDS)" ], "type": [ - "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", - "null" + "DatasetDescriptor" ], - "is_list": false, + "is_list": true, "is_optional": true, - "subItems": [] + "subItems": [ + { + "name": "pid", + "required": false, + "title": "Persistent identifier of a dataset", + "description": null, + "guidance": "", + "examples": null, + "type": [ + "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "title", + "required": false, + "title": "Title of a dataset", + "description": null, + "guidance": "", + "examples": null, + "type": [ + "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "url", + "required": false, + "title": "Url of a dataset", + "description": null, + "guidance": "", + "examples": null, + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] }, { - "name": "isMemberOf", + "name": "linkableDatasets", "required": false, - "title": "Is MemberOf", - "description": "Dataset is a member of XXX(?)", + "title": "Linked datasets", + "description": "If applicable, please provide the DOI of other datasets that have previously been linked to this dataset and their availability. If no DOI is available, please provide the title of the datasets that can be linked.", "guidance": "", "examples": null, "type": [ - "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", - "null" + "DatasetDescriptor" ], - "is_list": false, + "is_list": true, "is_optional": true, - "subItems": [] + "subItems": [ + { + "name": "pid", + "required": false, + "title": "Persistent identifier of a dataset", + "description": null, + "guidance": "", + "examples": null, + "type": [ + "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "title", + "required": false, + "title": "Title of a dataset", + "description": null, + "guidance": "", + "examples": null, + "type": [ + "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "url", + "required": false, + "title": "Url of a dataset", + "description": null, + "guidance": "", + "examples": null, + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] }, { - "name": "linkedDatasets", + "name": "similarToDatasets", "required": false, - "title": "Linked Datasets", - "description": "Links to other datasets.", + "title": "Similar to datasets", + "description": "Datasets that are similar to each other in some way, collect similar patients, regional equivalent etc.", "guidance": "", - "examples": [ - "Yes. To any SAIL dataset & reference data.,ALL" - ], + "examples": null, "type": [ - "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", - "null" + "DatasetDescriptor" ], - "is_list": false, + "is_list": true, "is_optional": true, - "subItems": [] + "subItems": [ + { + "name": "pid", + "required": false, + "title": "Persistent identifier of a dataset", + "description": null, + "guidance": "", + "examples": null, + "type": [ + "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "title", + "required": false, + "title": "Title of a dataset", + "description": null, + "guidance": "", + "examples": null, + "type": [ + "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "url", + "required": false, + "title": "Url of a dataset", + "description": null, + "guidance": "", + "examples": null, + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] } ] }, diff --git a/hdr_schemata/models/GWDM/2.0/schema.json b/hdr_schemata/models/GWDM/2.0/schema.json index b9ae36a..602c7b4 100644 --- a/hdr_schemata/models/GWDM/2.0/schema.json +++ b/hdr_schemata/models/GWDM/2.0/schema.json @@ -502,69 +502,117 @@ "title": "DataValue", "type": "object" }, + "DatasetDescriptor": { + "properties": { + "pid": { + "anyOf": [ + { + "$ref": "#/$defs/OneHundredFiftyCharacters" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Persistent identifier of a dataset" + }, + "title": { + "anyOf": [ + { + "$ref": "#/$defs/OneHundredFiftyCharacters" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Title of a dataset" + }, + "url": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Url of a dataset" + } + }, + "title": "DatasetDescriptor", + "type": "object" + }, "DatasetLinkage": { "additionalProperties": false, "properties": { - "isDerivedFrom": { + "derivedFrom": { "anyOf": [ { - "$ref": "#/$defs/CommaSeparatedValues" + "items": { + "$ref": "#/$defs/DatasetDescriptor" + }, + "type": "array" }, { "type": "null" } ], "default": null, - "description": "Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset", - "examples": [ - "Data will be minimised as appropriate relative to the data access application" - ], - "title": "Derivations" + "description": "If applicable, please provide DOIs or links to datasets from which data in this dataset has been derived or calculated from.", + "title": "Derived from" }, "isPartOf": { "anyOf": [ { - "$ref": "#/$defs/CommaSeparatedValues" + "items": { + "$ref": "#/$defs/DatasetDescriptor" + }, + "type": "array" }, { "type": "null" } ], "default": null, - "description": "If the dataset is part of a group or family", + "description": "This relationship indicates that the dataset is a component or subset of a broader collection of related datasets. For example, clinical trial data for a specific drug may be part of a larger database of pharmaceutical research data. Complete only if the dataset is part of a group or family of datasets i.e. Hospital Episode Statistics has several constituents. If your dataset is not part of a group, please enter \u201cNOT APPLICABLE\u201d **Example**: Hospital Episodes Statistics datasets (A&E, APC, OP, AC MSDS).", "examples": [ - "UKCRC Tissue Directory and Coordination Centre" + "Hospital Episodes Statistics datasets (A&E, APC, OP, AC MSDS)" ], - "title": "Is PartOf" + "title": "Is part of" }, - "isMemberOf": { + "linkableDatasets": { "anyOf": [ { - "$ref": "#/$defs/CommaSeparatedValues" + "items": { + "$ref": "#/$defs/DatasetDescriptor" + }, + "type": "array" }, { "type": "null" } ], "default": null, - "description": "Dataset is a member of XXX(?)", - "title": "Is MemberOf" + "description": "If applicable, please provide the DOI of other datasets that have previously been linked to this dataset and their availability. If no DOI is available, please provide the title of the datasets that can be linked.", + "title": "Linked datasets" }, - "linkedDatasets": { + "similarToDatasets": { "anyOf": [ { - "$ref": "#/$defs/CommaSeparatedValues" + "items": { + "$ref": "#/$defs/DatasetDescriptor" + }, + "type": "array" }, { "type": "null" } ], "default": null, - "description": "Links to other datasets.", - "examples": [ - "Yes. To any SAIL dataset & reference data.,ALL" - ], - "title": "Linked Datasets" + "description": "Datasets that are similar to each other in some way, collect similar patients, regional equivalent etc.", + "title": "Similar to datasets" } }, "title": "DatasetLinkage", @@ -1110,6 +1158,12 @@ "title": "Omics", "type": "object" }, + "OneHundredFiftyCharacters": { + "maxLength": 150, + "minLength": 2, + "title": "OneHundredFiftyCharacters", + "type": "string" + }, "Organisation": { "properties": { "name": { diff --git a/hdr_schemata/models/GWDM/v2_0/DatasetDescriptor.py b/hdr_schemata/models/GWDM/v2_0/DatasetDescriptor.py new file mode 100644 index 0000000..7de1aea --- /dev/null +++ b/hdr_schemata/models/GWDM/v2_0/DatasetDescriptor.py @@ -0,0 +1,13 @@ +from typing import Optional +from pydantic import BaseModel, Field, constr +from hdr_schemata.definitions.HDRUK import * + +from .annotations import annotations + +an = annotations.datasetDescriptor + +class DatasetDescriptor(BaseModel): + pid: Optional[OneHundredFiftyCharacters] = Field(None, **an.pid.__dict__) + title: Optional[OneHundredFiftyCharacters] = Field(None, **an.title.__dict__) + url: Optional[Url] = Field(None, **an.url.__dict__) + diff --git a/hdr_schemata/models/GWDM/v2_0/DatasetLinkage.py b/hdr_schemata/models/GWDM/v2_0/DatasetLinkage.py index 147dd0f..cedff1e 100644 --- a/hdr_schemata/models/GWDM/v2_0/DatasetLinkage.py +++ b/hdr_schemata/models/GWDM/v2_0/DatasetLinkage.py @@ -2,6 +2,8 @@ from pydantic import BaseModel, Field from hdr_schemata.definitions.HDRUK import * +from .DatasetDescriptor import DatasetDescriptor + from .annotations import annotations an = annotations.linkage @@ -11,14 +13,18 @@ class DatasetLinkage(BaseModel): class Config: extra = "forbid" - isDerivedFrom: Optional[CommaSeparatedValues] = Field( - None, **an.isDerivedFrom.__dict__ + derivedFrom: Optional[List[DatasetDescriptor]] = Field( + None, **an.derivedFrom.__dict__ ) - isPartOf: Optional[CommaSeparatedValues] = Field(None, **an.isPartOf.__dict__) + isPartOf: Optional[List[DatasetDescriptor]] = Field( + None, **an.isPartOf.__dict__ + ) - isMemberOf: Optional[CommaSeparatedValues] = Field(None, **an.isMemberOf.__dict__) + linkableDatasets: Optional[List[DatasetDescriptor]] = Field( + None, **an.linkableDatasets.__dict__ + ) - linkedDatasets: Optional[CommaSeparatedValues] = Field( - None, **an.linkedDatasets.__dict__ + similarToDatasets: Optional[List[DatasetDescriptor]] = Field( + None, **an.similarToDatasets.__dict__ ) diff --git a/hdr_schemata/models/GWDM/v2_0/annotations/config.yaml b/hdr_schemata/models/GWDM/v2_0/annotations/config.yaml index 1164e81..fc33c13 100644 --- a/hdr_schemata/models/GWDM/v2_0/annotations/config.yaml +++ b/hdr_schemata/models/GWDM/v2_0/annotations/config.yaml @@ -386,27 +386,23 @@ linkage: - "https://digital.nhs.uk/services/data-access-request-service-dars/register-of-approved-data-releases" title: "Investigations" - isDerivedFrom: - description: "Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset" - examples: - - "Data will be minimised as appropriate relative to the data access application" - title: Derivations - + derivedFrom: + title: "Derived from" + description: "If applicable, please provide DOIs or links to datasets from which data in this dataset has been derived or calculated from." + isPartOf: - description: "If the dataset is part of a group or family" - examples: - - "UKCRC Tissue Directory and Coordination Centre" - title: "Is PartOf" - - isMemberOf: - description: "Dataset is a member of XXX(?)" - title: "Is MemberOf" - - linkedDatasets: - description: "Links to other datasets." - examples: - - "Yes. To any SAIL dataset & reference data.,ALL" - title: "Linked Datasets" + title: "Is part of" + description: "This relationship indicates that the dataset is a component or subset of a broader collection of related datasets. For example, clinical trial data for a specific drug may be part of a larger database of pharmaceutical research data. Complete only if the dataset is part of a group or family of datasets i.e. Hospital Episode Statistics has several constituents. If your dataset is not part of a group, please enter “NOT APPLICABLE” **Example**: Hospital Episodes Statistics datasets (A&E, APC, OP, AC MSDS)." + examples: + - "Hospital Episodes Statistics datasets (A&E, APC, OP, AC MSDS)" + + similarToDatasets: + title: "Similar to datasets" + description: "Datasets that are similar to each other in some way, collect similar patients, regional equivalent etc." + + linkableDatasets: + title: "Linked datasets" + description: "If applicable, please provide the DOI of other datasets that have previously been linked to this dataset and their availability. If no DOI is available, please provide the title of the datasets that can be linked." syntheticDataWebLink: description: Links to locations of information and or raw downloads of synthetic data associated with this dataset @@ -420,6 +416,14 @@ linkage: description: DOIs for publications which use the dataset for analysis. title: Publication using the dataset +datasetDescriptor: + pid: + title: "Persistent identifier of a dataset" + title: + title: "Title of a dataset" + url: + title: "Url of a dataset" + structuralMetadata: description: Descriptions of all tables and data elements that can be included in the dataset title: Structural Metadata