diff --git a/rocrate/metadata.py b/rocrate/metadata.py new file mode 100644 index 0000000..f61e7a3 --- /dev/null +++ b/rocrate/metadata.py @@ -0,0 +1,108 @@ +# Copyright 2019-2022 The University of Manchester, UK +# Copyright 2020-2022 Vlaams Instituut voor Biotechnologie (VIB), BE +# Copyright 2020-2022 Barcelona Supercomputing Center (BSC), ES +# Copyright 2020-2022 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT +# Copyright 2022 École Polytechnique Fédérale de Lausanne, CH +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import warnings + +from .model.metadata import Metadata, LegacyMetadata + + +def read_metadata(metadata_path): + """\ + Read an RO-Crate metadata file. + + Return a tuple of two elements: the context; a dictionary that maps entity + ids to the entities themselves. + """ + with open(metadata_path) as f: + metadata = json.load(f) + try: + context = metadata['@context'] + graph = metadata['@graph'] + except KeyError: + raise ValueError(f"{metadata_path} must have a @context and a @graph") + return context, {_["@id"]: _ for _ in graph} + + +def _check_descriptor(descriptor, entities): + if descriptor["@type"] != "CreativeWork": + raise ValueError('metadata descriptor must be of type "CreativeWork"') + try: + root = entities[descriptor["about"]["@id"]] + except (KeyError, TypeError): + raise ValueError("metadata descriptor does not reference the root entity") + if ("Dataset" not in root["@type"] if isinstance(root["@type"], list) else root["@type"] != "Dataset"): + raise ValueError('root entity must have "Dataset" among its types') + return descriptor["@id"], root["@id"] + + +def find_root_entity_id(entities): + """\ + Find metadata file descriptor and root data entity. + + Expects as input a dictionary that maps JSON entity IDs to the entities + themselves (like the second element returned by read_metadata). + + Return a tuple of the corresponding identifiers (descriptor, root). + If the entities are not found, raise KeyError. If they are found, + but they don't satisfy the required constraints, raise ValueError. + + In the general case, the metadata file descriptor id can be an + absolute URI whose last path segment is "ro-crate-metadata.json[ld]". + Since there can be more than one such id in the crate, we need to + choose among the corresponding (descriptor, root) entity pairs. First, we + exclude those that don't satisfy other constraints, such as the + descriptor entity being of type CreativeWork, etc.; if this doesn't + leave us with a single pair, we try to pick one with a + heuristic. Suppose we are left with the (m1, r1) and (m2, r2) pairs: + if r1 is the actual root of this crate, then m2 and r2 are regular + files in it, and as such they must appear in r1's hasPart; r2, + however, is not required to have a hasPart property listing other + files. Thus, we look for a pair whose root entity "contains" all + descriptor entities from other pairs. If there is no such pair, or there + is more than one, we just return an arbitrary pair. + + """ + descriptor = entities.get(Metadata.BASENAME, entities.get(LegacyMetadata.BASENAME)) + if descriptor: + return _check_descriptor(descriptor, entities) + candidates = [] + for id_, e in entities.items(): + basename = id_.rsplit("/", 1)[-1] + if basename == Metadata.BASENAME or basename == LegacyMetadata.BASENAME: + try: + candidates.append(_check_descriptor(e, entities)) + except ValueError: + pass + if not candidates: + raise KeyError("Metadata file descriptor not found") + elif len(candidates) == 1: + return candidates[0] + else: + warnings.warn("Multiple metadata file descriptors, will pick one with a heuristic") + descriptor_ids = set(_[0] for _ in candidates) + for m_id, r_id in candidates: + try: + root = entities[r_id] + part_ids = set(_["@id"] for _ in root["hasPart"]) + except KeyError: + continue + if part_ids >= descriptor_ids - {m_id}: + # if True for more than one candidate, this pick is arbitrary + return m_id, r_id + return candidates[0] # fall back to arbitrary pick diff --git a/rocrate/model/contextentity.py b/rocrate/model/contextentity.py index 7e4a659..cce9306 100644 --- a/rocrate/model/contextentity.py +++ b/rocrate/model/contextentity.py @@ -18,52 +18,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .. import vocabs -from ..utils import as_list, is_url - +from ..utils import is_url from .entity import Entity -""" -A property class that can be used during class declaration -to make getter/setter properties. - -The class name under construction is assumed to be a valid class name -in schema.org as referenced from the RO-Crate JSON-LD context, -and likewise the class properties defined using this -are assumed to be valid schema.org properties. - -The setters handle any Entity by picking up their @id instead -of nesting their objects. - -Likewise the getter will construct the typed Entity subclass -instead of returning only the identifiers. - -The name of the property is provided by the class under construction, -which will call our __set_name__. - -The singular getter will always return the first value set (or None), -while the plural versions of the getter return a generator that yields all -values. - -So for instance: - - class Dataset(Entity): - author = ContextEntity(Person) - - dataset = Dataset() - -will have both dataset.author that return Person instance, -and dataset.authors, which return generator of Person instances. - -The corresponding plural setter supports any iterable (e.g. list): - - person1 = Person("#person1", metadata) - person2 = Person("#person2", metadata) - dataset.creators = [person1, person2] -""" - - def add_hash(id_): if id_ is None or "#" in id_ or is_url(id_): return id_ @@ -77,75 +35,3 @@ def __init__(self, crate, identifier=None, properties=None): def format_id(self, identifier): return add_hash(identifier) - - def getmany(self, instance): - for json in as_list(instance.get(self.property)): - # TODO: Support more advanced dispatching - yield self.entity_constructor(json["@id"], instance._metadata) - - # def setmany(self, instance, values): - # json = [] - # for value in values: - # # TODO: Check it has compatible @type? - # if value._metadata != instance._metadata: - # # Oh no, it might have different base URIs, - # # will need to be added to @graph, reference - # # other objects we don't have etc. - # # TODO: Support setting entities from other RO-Crates - # raise ValueError( - # "Adding entity from other RO-Crate not (yet) supported" - # ) - # json.append({"@id": value.id}) - # instance[self.property] = flatten(json) - - # def __get__(self, instance, owner=None): - # if instance is None: - # return self - # result = None - # for val in self.getmany(instance): - # if result is not None: - # warnings.warn( - # "More than one value in %s.%s, returning first" % - # (self.owner, self.property) - # ) - # break - # result = val - # return result - - # def __set__(self, instance, value): - # # TODO: Check if arrays are permitted - # self.setmany(instance, as_list(value)) - - def __delete__(self, instance): - # TODO: Check if permitted to delete? - instance[self.property] = [] # known property, empty in JSON - - def __set_name__(self, owner, name): - if not owner.__doc__: - _set_class_doc(owner) - self.owner = owner - self.property = name - uri = vocabs.term_to_uri(name) - doc = vocabs.schema_doc(uri) - self.__doc__ = "Single contextual entity %s\n%s" % (uri, doc) - # Register plural _s variant - # TODO: Register plural _s variants - setattr(owner, name+"s", property( - self.getmany, # self.setmany, - doc="Multiple contextual entities %s\n%s" % (uri, doc)) - ) - # TODO: Register _ids variants? - - -def _set_class_doc(Class): - """ - Set class documentation from schema.org definitions - """ - # set the class documentation - try: - # FIXME: avoid this hack here! - uri = vocabs.term_to_uri(Class.__name__) - doc = vocabs.schema_doc(uri) - Class.__doc__ = "Entity %s\n%s" % (uri, doc) - except KeyError: - pass # Non-matching class name, ignore diff --git a/rocrate/model/entity.py b/rocrate/model/entity.py index 6c33a66..ede44d2 100644 --- a/rocrate/model/entity.py +++ b/rocrate/model/entity.py @@ -131,10 +131,6 @@ def __eq__(self, other): def type(self): return self._jsonld['@type'] - # @property - # def types(self)-> List[str]: - # return tuple(as_list(self.get("@type", "Thing"))) - @property def datePublished(self): d = self.get('datePublished') diff --git a/rocrate/rocrate.py b/rocrate/rocrate.py index 9abcc9f..264e894 100644 --- a/rocrate/rocrate.py +++ b/rocrate/rocrate.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python - # Copyright 2019-2022 The University of Manchester, UK # Copyright 2020-2022 Vlaams Instituut voor Biotechnologie (VIB), BE # Copyright 2020-2022 Barcelona Supercomputing Center (BSC), ES @@ -19,13 +17,11 @@ # limitations under the License. import errno -import json import uuid import zipfile import atexit import shutil import tempfile -import warnings from collections import OrderedDict from pathlib import Path @@ -49,23 +45,7 @@ from .model.testsuite import TestSuite from .utils import is_url, subclasses, get_norm_value, walk - - -def read_metadata(metadata_path): - """\ - Read an RO-Crate metadata file. - - Return a tuple of two elements: the context; a dictionary that maps entity - ids to the entities themselves. - """ - with open(metadata_path) as f: - metadata = json.load(f) - try: - context = metadata['@context'] - graph = metadata['@graph'] - except KeyError: - raise ValueError(f"{metadata_path} must have a @context and a @graph") - return context, {_["@id"]: _ for _ in graph} +from .metadata import read_metadata, find_root_entity_id def pick_type(json_entity, type_map, fallback=None): @@ -144,71 +124,8 @@ def __read(self, source, gen_preview=False): self.__read_contextual_entities(entities) return source - def __check_metadata(self, metadata, entities): - if metadata["@type"] != "CreativeWork": - raise ValueError('metadata descriptor must be of type "CreativeWork"') - try: - root = entities[metadata["about"]["@id"]] - except (KeyError, TypeError): - raise ValueError("metadata descriptor does not reference the root entity") - if ("Dataset" not in root["@type"] if isinstance(root["@type"], list) else root["@type"] != "Dataset"): - raise ValueError('root entity must have "Dataset" among its types') - return metadata["@id"], root["@id"] - - def find_root_entity_id(self, entities): - """\ - Find metadata file descriptor and root data entity. - - Return a tuple of the corresponding identifiers (metadata, root). - If the entities are not found, raise KeyError. If they are found, - but they don't satisfy the required constraints, raise ValueError. - - In the general case, the metadata file descriptor id can be an - absolute URI whose last path segment is "ro-crate-metadata.json[ld]". - Since there can be more than one such id in the crate, we need to - choose among the corresponding (metadata, root) entity pairs. First, we - exclude those that don't satisfy other constraints, such as the - metadata entity being of type CreativeWork, etc.; if this doesn't - leave us with a single pair, we try to pick one with a - heuristic. Suppose we are left with the (m1, r1) and (m2, r2) pairs: - if r1 is the actual root of this crate, then m2 and r2 are regular - files in it, and as such they must appear in r1's hasPart; r2, - however, is not required to have a hasPart property listing other - files. Thus, we look for a pair whose root entity "contains" all - metadata entities from other pairs. If there is no such pair, or there - is more than one, we just return an arbitrary pair. - """ - metadata = entities.get(Metadata.BASENAME, entities.get(LegacyMetadata.BASENAME)) - if metadata: - return self.__check_metadata(metadata, entities) - candidates = [] - for id_, e in entities.items(): - basename = id_.rsplit("/", 1)[-1] - if basename == Metadata.BASENAME or basename == LegacyMetadata.BASENAME: - try: - candidates.append(self.__check_metadata(e, entities)) - except ValueError: - pass - if not candidates: - raise KeyError("Metadata file descriptor not found") - elif len(candidates) == 1: - return candidates[0] - else: - warnings.warn("Multiple metadata file descriptors, will pick one with a heuristic") - metadata_ids = set(_[0] for _ in candidates) - for m_id, r_id in candidates: - try: - root = entities[r_id] - part_ids = set(_["@id"] for _ in root["hasPart"]) - except KeyError: - continue - if part_ids >= metadata_ids - {m_id}: - # if True for more than one candidate, this pick is arbitrary - return m_id, r_id - return candidates[0] # fall back to arbitrary pick - def __read_data_entities(self, entities, source, gen_preview): - metadata_id, root_id = self.find_root_entity_id(entities) + metadata_id, root_id = find_root_entity_id(entities) MetadataClass = metadata_class(metadata_id) metadata_properties = entities.pop(metadata_id) self.add(MetadataClass(self, metadata_id, properties=metadata_properties)) @@ -473,10 +390,6 @@ def delete(self, *entities): pass self.__entity_map.pop(e.canonical_id(), None) - # TODO - # def fetch_all(self): - # fetch all files defined in the crate - def _copy_unlisted(self, top, base_path): for root, dirs, files in walk(top, exclude=self.exclude): root = Path(root) @@ -608,3 +521,16 @@ def __validate_suite(self, suite): if suite is None: raise ValueError("suite not found") return suite + + +def make_workflow_rocrate(workflow_path, wf_type, include_files=[], + fetch_remote=False, cwl=None, diagram=None): + wf_crate = ROCrate() + workflow_path = Path(workflow_path) + wf_crate.add_workflow( + workflow_path, workflow_path.name, fetch_remote=fetch_remote, + main=True, lang=wf_type, gen_cwl=(cwl is None) + ) + for file_entry in include_files: + wf_crate.add_file(file_entry) + return wf_crate diff --git a/rocrate/rocrate_api.py b/rocrate/rocrate_api.py deleted file mode 100644 index baf3dfc..0000000 --- a/rocrate/rocrate_api.py +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2019-2022 The University of Manchester, UK -# Copyright 2020-2022 Vlaams Instituut voor Biotechnologie (VIB), BE -# Copyright 2020-2022 Barcelona Supercomputing Center (BSC), ES -# Copyright 2020-2022 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT -# Copyright 2022 École Polytechnique Fédérale de Lausanne, CH -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from pathlib import Path - -import rocrate.rocrate as roc - - -def make_workflow_rocrate(workflow_path, wf_type, include_files=[], - fetch_remote=False, cwl=None, diagram=None): - - # Properties - # missing? - # input - # output - # programmingLanguage - # url - # version - # sdPublisher - current set to the person that provided the metadata, - # decision to change to the Workflow Hub itself - Done - # publisher - where it came came from, e.g. Galaxy, github, or WF Hub - # if uploaded - Done - # producer - to describe the Project or Team - Done - # creator - the creators / authors - Done - # maintainer - new recommended property to describe the uploader + - # additional people with manage rights - Done - # funder - example of cordis reference - # https://cordis.europa.eu/project/id/730976 - # https://schema.org/FundingScheme linked to funder - # Examples at the bottom of https://schema.org/Grant - funding looks - # ideal but not currently legal - # Is needed to fulfill the OpenAire “Funding Reference” property - # datePublished - becomes an optional property, and we use the date a - # DOI was minted (this property is needed for dataCite) - Done - # creativeWorkStatus - Maturity level, to be added to BioSchemas - Done - # Identifier - can be DOI if this function is enabled in WorkflowHub - Done - - # returns a complete ROCrate object corresponding to a Workflow template - # file - # wf_type: Galaxy, CWL, Nextflow, ... - # cwl: CWL/CWL-Abstract representation of the workflow. - # diagram: an image/graphical workflow representation. - # If a CWL/CWLAbstract file is provided, this is generated using cwltool - - wf_crate = roc.ROCrate() - workflow_path = Path(workflow_path) - wf_file = wf_crate.add_workflow( - workflow_path, workflow_path.name, fetch_remote=fetch_remote, - main=True, lang=wf_type, gen_cwl=(cwl is None) - ) - - # if the source is a remote URL then add https://schema.org/codeRepository - # property to it this can be checked by checking if the source is a URL - # instead of a local path - if 'url' in wf_file.properties(): - wf_file['codeRepository'] = wf_file['url'] - - # add extra files - for file_entry in include_files: - wf_crate.add_file(file_entry) - - return wf_crate diff --git a/rocrate/utils.py b/rocrate/utils.py index 010b99a..1107a47 100644 --- a/rocrate/utils.py +++ b/rocrate/utils.py @@ -18,33 +18,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import collections import os from datetime import datetime, timezone from urllib.parse import urlsplit -def first(iterable): - for e in iterable: - return e - return None - - -def flatten(single_or_multiple): - if len(single_or_multiple) == 1: - return single_or_multiple[0] - return single_or_multiple # might be empty! - - -def as_list(list_or_other): - if list_or_other is None: - return [] - if (isinstance(list_or_other, collections.Sequence) - and not isinstance(list_or_other, str)): # FIXME: bytes? - return list_or_other - return [list_or_other] - - def is_url(string): parts = urlsplit(string) if os.name == "nt" and len(parts.scheme) == 1: diff --git a/test/test_metadata.py b/test/test_metadata.py new file mode 100644 index 0000000..7f2173a --- /dev/null +++ b/test/test_metadata.py @@ -0,0 +1,182 @@ +# Copyright 2019-2022 The University of Manchester, UK +# Copyright 2020-2022 Vlaams Instituut voor Biotechnologie (VIB), BE +# Copyright 2020-2022 Barcelona Supercomputing Center (BSC), ES +# Copyright 2020-2022 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT +# Copyright 2022 École Polytechnique Fédérale de Lausanne, CH +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from copy import deepcopy + +from rocrate.metadata import find_root_entity_id + + +@pytest.mark.parametrize("root,basename", [ + ("", "ro-crate-metadata.json"), + ("", "ro-crate-metadata.jsonld"), + ("https://example.org/crate/", "ro-crate-metadata.json"), + ("https://example.org/crate/", "ro-crate-metadata.jsonld"), + ("", "bad-name.json"), +]) +def test_find_root(root, basename): + metadata_id = root + basename + root_id = root or "./" + entities = {_["@id"]: _ for _ in [ + { + "@id": metadata_id, + "@type": "CreativeWork", + "about": {"@id": root_id}, + "conformsTo": [ + {"@id": "https://w3id.org/ro/crate/1.1"}, + {"@id": "https://example.org/fancy-ro-crate/1.0"}, + ] + }, + { + "@id": root_id, + "@type": "Dataset", + }, + ]} + if basename not in {"ro-crate-metadata.json", "ro-crate-metadata.jsonld"}: + with pytest.raises(KeyError): + find_root_entity_id(entities) + else: + assert find_root_entity_id(entities) == (metadata_id, root_id) + + +def test_find_root_bad_entities(): + orig_entities = { + "ro-crate-metadata.json": { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}, + }, + "./": { + "@id": "./", + "@type": "Dataset", + }, + } + # missing "about" + entities = deepcopy(orig_entities) + del entities["ro-crate-metadata.json"]["about"] + with pytest.raises(ValueError, match="does not reference"): + find_root_entity_id(entities) + # "about" does not reference the root entity + entities = deepcopy(orig_entities) + for about in "http://example.org", {"@id": "http://example.org"}: + entities["ro-crate-metadata.json"]["about"] = about + with pytest.raises(ValueError, match="does not reference"): + find_root_entity_id(entities) + # metadata type is not CreativeWork + entities = deepcopy(orig_entities) + entities["ro-crate-metadata.json"]["@type"] = "Thing" + with pytest.raises(ValueError, match="must be of type"): + find_root_entity_id(entities) + # root type is not Dataset + entities = deepcopy(orig_entities) + entities["./"]["@type"] = "Thing" + with pytest.raises(ValueError, match="must have"): + find_root_entity_id(entities) + + +@pytest.mark.filterwarnings("ignore") +def test_find_root_multiple_entries(): + orig_entities = { + "http://example.org/ro-crate-metadata.json": { + "@id": "http://example.org/ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "http://example.org/"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}, + }, + "http://example.org/": { + "@id": "http://example.org/", + "@type": "Dataset", + "hasPart": [ + {"@id": "http://example.com/"}, + {"@id": "http://example.com/ro-crate-metadata.json"} + ] + }, + "http://example.com/ro-crate-metadata.json": { + "@id": "http://example.com/ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "http://example.com/"}, + "conformsTo": {"@id": "https://w3id.com/ro/crate/1.1"}, + }, + "http://example.com/": { + "@id": "http://example.com/", + "@type": "Dataset", + }, + } + + def check_finds_org(entities): + m_id, r_id = find_root_entity_id(entities) + assert m_id == "http://example.org/ro-crate-metadata.json" + assert r_id == "http://example.org/" + + def check_picks_one(entities): + m_id, r_id = find_root_entity_id(entities) + assert m_id in [f"http://example.{_}/ro-crate-metadata.json" for _ in ("org", "com")] + assert r_id in [f"http://example.{_}/" for _ in ("org", "com")] + + check_finds_org(orig_entities) + # no root candidate contains the other one + mod_entities = deepcopy(orig_entities) + del mod_entities["http://example.org/"]["hasPart"] + check_picks_one(mod_entities) + # each root candidate contains the other one + mod_entities = deepcopy(orig_entities) + mod_entities["http://example.com/"]["hasPart"] = [ + {"@id": "http://example.org/"}, + {"@id": "http://example.org/ro-crate-metadata.json"} + ] + check_picks_one(mod_entities) + # "about" does not reference the root entity + mod_entities = deepcopy(orig_entities) + for about in "http://google.com", {"@id": "http://google.com"}: + mod_entities["http://example.com/ro-crate-metadata.json"]["about"] = about + check_finds_org(mod_entities) + # metadata type is not CreativeWork + mod_entities = deepcopy(orig_entities) + mod_entities["http://example.com/ro-crate-metadata.json"]["@type"] = "Thing" + check_finds_org(mod_entities) + # root type is not Dataset + mod_entities = deepcopy(orig_entities) + mod_entities["http://example.com/"]["@type"] = "Thing" + check_finds_org(mod_entities) + + +def test_find_root_multiple_types(): + entities = {_["@id"]: _ for _ in [ + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": {"@id": "./"}, + "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}, + }, + { + "@id": "./", + "@type": ["Dataset", "RepositoryCollection"], + }, + ]} + m_id, r_id = find_root_entity_id(entities) + assert m_id == "ro-crate-metadata.json" + assert r_id == "./" + # "Dataset" not included + del entities["./"]["@type"][0] + with pytest.raises(ValueError): + find_root_entity_id(entities) + # Check we're not trying to be too clever + entities["./"]["@type"] = "NotADataset" + with pytest.raises(ValueError): + find_root_entity_id(entities) diff --git a/test/test_read.py b/test/test_read.py index 6497a76..85aeb03 100644 --- a/test/test_read.py +++ b/test/test_read.py @@ -21,7 +21,6 @@ import shutil import uuid import zipfile -from copy import deepcopy from pathlib import Path from rocrate.rocrate import ROCrate @@ -453,176 +452,3 @@ def test_multi_type_context_entity(tmpdir): entity = crate.dereference(id_) assert entity in crate.contextual_entities assert set(entity.type) == set(type_) - - -@pytest.mark.parametrize("root,basename", [ - ("", "ro-crate-metadata.json"), - ("", "ro-crate-metadata.jsonld"), - ("https://example.org/crate/", "ro-crate-metadata.json"), - ("https://example.org/crate/", "ro-crate-metadata.jsonld"), - ("", "bad-name.json"), -]) -def test_find_root(tmpdir, root, basename): - metadata_id = root + basename - root_id = root or "./" - metadata = { - "@context": "https://w3id.org/ro/crate/1.1/context", - "@graph": [ - { - "@id": metadata_id, - "@type": "CreativeWork", - "about": {"@id": root_id}, - "conformsTo": [ - {"@id": "https://w3id.org/ro/crate/1.1"}, - {"@id": "https://example.org/fancy-ro-crate/1.0"}, - ] - }, - { - "@id": root_id, - "@type": "Dataset", - }, - ] - } - crate_dir = tmpdir / "test_find_root" - crate_dir.mkdir() - # fixed filename, we only want the metadata entry to change - with open(crate_dir / "ro-crate-metadata.json", "wt") as f: - json.dump(metadata, f, indent=4) - if basename not in {"ro-crate-metadata.json", "ro-crate-metadata.jsonld"}: - with pytest.raises(KeyError): - ROCrate(crate_dir) - else: - crate = ROCrate(crate_dir) - assert crate.metadata.id == metadata_id - assert crate.root_dataset.id == root_id - - -def test_find_root_bad_entities(): - orig_entities = { - "ro-crate-metadata.json": { - "@id": "ro-crate-metadata.json", - "@type": "CreativeWork", - "about": {"@id": "./"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}, - }, - "./": { - "@id": "./", - "@type": "Dataset", - }, - } - crate = ROCrate() - # missing "about" - entities = deepcopy(orig_entities) - del entities["ro-crate-metadata.json"]["about"] - with pytest.raises(ValueError, match="does not reference"): - crate.find_root_entity_id(entities) - # "about" does not reference the root entity - entities = deepcopy(orig_entities) - for about in "http://example.org", {"@id": "http://example.org"}: - entities["ro-crate-metadata.json"]["about"] = about - with pytest.raises(ValueError, match="does not reference"): - crate.find_root_entity_id(entities) - # metadata type is not CreativeWork - entities = deepcopy(orig_entities) - entities["ro-crate-metadata.json"]["@type"] = "Thing" - with pytest.raises(ValueError, match="must be of type"): - crate.find_root_entity_id(entities) - # root type is not Dataset - entities = deepcopy(orig_entities) - entities["./"]["@type"] = "Thing" - with pytest.raises(ValueError, match="must have"): - crate.find_root_entity_id(entities) - - -@pytest.mark.filterwarnings("ignore") -def test_find_root_multiple_entries(): - orig_entities = { - "http://example.org/ro-crate-metadata.json": { - "@id": "http://example.org/ro-crate-metadata.json", - "@type": "CreativeWork", - "about": {"@id": "http://example.org/"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}, - }, - "http://example.org/": { - "@id": "http://example.org/", - "@type": "Dataset", - "hasPart": [ - {"@id": "http://example.com/"}, - {"@id": "http://example.com/ro-crate-metadata.json"} - ] - }, - "http://example.com/ro-crate-metadata.json": { - "@id": "http://example.com/ro-crate-metadata.json", - "@type": "CreativeWork", - "about": {"@id": "http://example.com/"}, - "conformsTo": {"@id": "https://w3id.com/ro/crate/1.1"}, - }, - "http://example.com/": { - "@id": "http://example.com/", - "@type": "Dataset", - }, - } - crate = ROCrate() - - def check_finds_org(entities): - m_id, r_id = crate.find_root_entity_id(entities) - assert m_id == "http://example.org/ro-crate-metadata.json" - assert r_id == "http://example.org/" - - def check_picks_one(entities): - m_id, r_id = crate.find_root_entity_id(entities) - assert m_id in [f"http://example.{_}/ro-crate-metadata.json" for _ in ("org", "com")] - assert r_id in [f"http://example.{_}/" for _ in ("org", "com")] - - check_finds_org(orig_entities) - # no root candidate contains the other one - mod_entities = deepcopy(orig_entities) - del mod_entities["http://example.org/"]["hasPart"] - check_picks_one(mod_entities) - # each root candidate contains the other one - mod_entities = deepcopy(orig_entities) - mod_entities["http://example.com/"]["hasPart"] = [ - {"@id": "http://example.org/"}, - {"@id": "http://example.org/ro-crate-metadata.json"} - ] - check_picks_one(mod_entities) - # "about" does not reference the root entity - mod_entities = deepcopy(orig_entities) - for about in "http://google.com", {"@id": "http://google.com"}: - mod_entities["http://example.com/ro-crate-metadata.json"]["about"] = about - check_finds_org(mod_entities) - # metadata type is not CreativeWork - mod_entities = deepcopy(orig_entities) - mod_entities["http://example.com/ro-crate-metadata.json"]["@type"] = "Thing" - check_finds_org(mod_entities) - # root type is not Dataset - mod_entities = deepcopy(orig_entities) - mod_entities["http://example.com/"]["@type"] = "Thing" - check_finds_org(mod_entities) - - -def test_find_root_multiple_types(): - entities = {_["@id"]: _ for _ in [ - { - "@id": "ro-crate-metadata.json", - "@type": "CreativeWork", - "about": {"@id": "./"}, - "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}, - }, - { - "@id": "./", - "@type": ["Dataset", "RepositoryCollection"], - }, - ]} - crate = ROCrate() - m_id, r_id = crate.find_root_entity_id(entities) - assert m_id == "ro-crate-metadata.json" - assert r_id == "./" - # "Dataset" not included - del entities["./"]["@type"][0] - with pytest.raises(ValueError): - crate.find_root_entity_id(entities) - # Check we're not trying to be too clever - entities["./"]["@type"] = "NotADataset" - with pytest.raises(ValueError): - crate.find_root_entity_id(entities) diff --git a/test/test_api.py b/test/test_workflow_ro_crate.py similarity index 94% rename from test/test_api.py rename to test/test_workflow_ro_crate.py index a202f07..a48143f 100644 --- a/test/test_api.py +++ b/test/test_workflow_ro_crate.py @@ -16,8 +16,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from rocrate import rocrate_api as roc_api -from rocrate.rocrate import ROCrate +from rocrate.rocrate import ROCrate, make_workflow_rocrate from rocrate.model.computerlanguage import CWL_DEFAULT_VERSION, GALAXY_DEFAULT_VERSION WF_CRATE = "https://w3id.org/workflowhub/workflow-ro-crate" @@ -26,7 +25,7 @@ def test_galaxy_wf_crate(test_data_dir, tmpdir, helpers): wf_id = 'test_galaxy_wf.ga' wf_path = test_data_dir / wf_id - wf_crate = roc_api.make_workflow_rocrate(wf_path, wf_type='Galaxy') + wf_crate = make_workflow_rocrate(wf_path, wf_type='Galaxy') assert isinstance(wf_crate, ROCrate) wf = wf_crate.dereference(wf_id) @@ -62,7 +61,7 @@ def test_galaxy_wf_crate(test_data_dir, tmpdir, helpers): def test_cwl_wf_crate(test_data_dir, tmpdir, helpers): wf_id = 'sample_cwl_wf.cwl' wf_path = test_data_dir / wf_id - wf_crate = roc_api.make_workflow_rocrate(wf_path, wf_type='CWL') + wf_crate = make_workflow_rocrate(wf_path, wf_type='CWL') assert isinstance(wf_crate, ROCrate) wf = wf_crate.dereference(wf_id) @@ -91,7 +90,7 @@ def test_create_wf_include(test_data_dir, tmpdir, helpers): extra_file1 = test_data_dir / 'test_file_galaxy.txt' extra_file2 = test_data_dir / 'test_file_galaxy2.txt' files_list = [extra_file1, extra_file2] - wf_crate = roc_api.make_workflow_rocrate( + wf_crate = make_workflow_rocrate( wf_path, wf_type='Galaxy', include_files=files_list ) assert isinstance(wf_crate, ROCrate)