From 8e0f727b235a140ae0f9a480d45afeccb30f5b07 Mon Sep 17 00:00:00 2001 From: giangbui Date: Mon, 11 Jan 2021 17:56:11 -0600 Subject: [PATCH 1/9] fix(expand): only get immediate childs --- indexd/drs/blueprint.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/indexd/drs/blueprint.py b/indexd/drs/blueprint.py index 7a73ccd8..b1e2477a 100644 --- a/indexd/drs/blueprint.py +++ b/indexd/drs/blueprint.py @@ -202,15 +202,20 @@ def bundle_to_drs(record, expand=False, is_content=False): "contents": [], } - if expand: - contents = ( - record["contents"] - if "contents" in record - else record["bundle_data"] - if "bundle_data" in record - else [] - ) - drs_object["contents"] = contents + contents = ( + record["contents"] + if "contents" in record + else record["bundle_data"] + if "bundle_data" in record + else [] + ) + + if not expand and isinstance(contents, list): + for content in contents: + if isinstance(content, dict): + content.pop("contents", None) + + drs_object["contents"] = contents if not is_content: # Show these only if its the leading bundle @@ -256,7 +261,12 @@ def parse_checksums(record, drs_object): for k in record["hashes"]: ret_checksum.append({"checksum": record["hashes"][k], "type": k}) elif "checksum" in record: - checksums = json.loads(record["checksum"]) + try: + checksums = json.loads(record["checksum"]) + except json.decoder.JSONDecodeError: + # TODO: is it expected that the record["checksum"] is json format? + # it seems that it is string + checksums = [checksums] for checksum in checksums: ret_checksum.append( {"checksum": checksum["checksum"], "type": checksum["type"]} From fd851b8605c86611b21316df0540f331aca87a87 Mon Sep 17 00:00:00 2001 From: giangbui Date: Mon, 11 Jan 2021 19:01:46 -0600 Subject: [PATCH 2/9] fix(typo): fix typos --- indexd/drs/blueprint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indexd/drs/blueprint.py b/indexd/drs/blueprint.py index b1e2477a..2adf421d 100644 --- a/indexd/drs/blueprint.py +++ b/indexd/drs/blueprint.py @@ -266,7 +266,7 @@ def parse_checksums(record, drs_object): except json.decoder.JSONDecodeError: # TODO: is it expected that the record["checksum"] is json format? # it seems that it is string - checksums = [checksums] + checksums = [record["checksum"]] for checksum in checksums: ret_checksum.append( {"checksum": checksum["checksum"], "type": checksum["type"]} From a22f8b4a3e9e32a0f8308539d416d5b0ac39ef77 Mon Sep 17 00:00:00 2001 From: giangbui Date: Mon, 11 Jan 2021 19:08:42 -0600 Subject: [PATCH 3/9] fixup --- indexd/drs/blueprint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indexd/drs/blueprint.py b/indexd/drs/blueprint.py index 2adf421d..f66ca122 100644 --- a/indexd/drs/blueprint.py +++ b/indexd/drs/blueprint.py @@ -266,7 +266,7 @@ def parse_checksums(record, drs_object): except json.decoder.JSONDecodeError: # TODO: is it expected that the record["checksum"] is json format? # it seems that it is string - checksums = [record["checksum"]] + checksums = [{"checksum": record["checksum"], "type": "md5"}] for checksum in checksums: ret_checksum.append( {"checksum": checksum["checksum"], "type": checksum["type"]} From 1ea16c38d0ecef0954a876707ddd3776f891ed83 Mon Sep 17 00:00:00 2001 From: giangbui Date: Tue, 12 Jan 2021 10:24:52 -0600 Subject: [PATCH 4/9] fix(ga4gh): fix ga4gh endpoint --- indexd/drs/blueprint.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/indexd/drs/blueprint.py b/indexd/drs/blueprint.py index f66ca122..bdf6c47c 100644 --- a/indexd/drs/blueprint.py +++ b/indexd/drs/blueprint.py @@ -147,13 +147,17 @@ def indexd_to_drs(record, expand=False, list_drs=False): if "description" in record: drs_object["description"] = record["description"] - - if expand == True and "bundle_data" in record: + if "bundle_data" in record: bundle_data = record["bundle_data"] for bundle in bundle_data: - drs_object["contents"].append( - bundle_to_drs(bundle, expand=expand, is_content=True) - ) + if expand: + drs_object["contents"].append( + bundle_to_drs(bundle, expand=True, is_content=True) + ) + else: + drs_object["contents"].append( + bundle_to_drs(bundle, expand=False, is_content=True) + ) # access_methods mapping if "urls" in record: @@ -233,16 +237,18 @@ def bundle_to_drs(record, expand=False, is_content=False): created_time = ( record["created_date"] if "created_date" in record - else record["created_time"] + else record.get("created_time") ) updated_time = ( record["updated_date"] if "updated_date" in record - else record["updated_time"] + else record.get("updated_time") ) - drs_object["created_time"] = created_time - drs_object["updated_time"] = updated_time + if created_time: + drs_object["created_time"] = created_time + if updated_time: + drs_object["updated_time"] = updated_time drs_object["size"] = record["size"] drs_object["aliases"] = aliases drs_object["description"] = description From d92cdd702eee10219e64776dd30c255ecec03844 Mon Sep 17 00:00:00 2001 From: giangbui Date: Tue, 12 Jan 2021 10:57:28 -0600 Subject: [PATCH 5/9] chore(code): refactor --- indexd/drs/blueprint.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/indexd/drs/blueprint.py b/indexd/drs/blueprint.py index bdf6c47c..7bd762f2 100644 --- a/indexd/drs/blueprint.py +++ b/indexd/drs/blueprint.py @@ -147,17 +147,13 @@ def indexd_to_drs(record, expand=False, list_drs=False): if "description" in record: drs_object["description"] = record["description"] - if "bundle_data" in record: + + if expand == True and "bundle_data" in record: bundle_data = record["bundle_data"] for bundle in bundle_data: - if expand: - drs_object["contents"].append( - bundle_to_drs(bundle, expand=True, is_content=True) - ) - else: - drs_object["contents"].append( - bundle_to_drs(bundle, expand=False, is_content=True) - ) + drs_object["contents"].append( + bundle_to_drs(bundle, expand=expand, is_content=True) + ) # access_methods mapping if "urls" in record: From 401505065f4793af1f89f5f54bdbd9f3dd2282aa Mon Sep 17 00:00:00 2001 From: giangbui Date: Tue, 12 Jan 2021 14:47:38 -0600 Subject: [PATCH 6/9] chore(refactor): refactor --- indexd/drs/blueprint.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/indexd/drs/blueprint.py b/indexd/drs/blueprint.py index 7bd762f2..dc2294f3 100644 --- a/indexd/drs/blueprint.py +++ b/indexd/drs/blueprint.py @@ -148,12 +148,11 @@ def indexd_to_drs(record, expand=False, list_drs=False): if "description" in record: drs_object["description"] = record["description"] - if expand == True and "bundle_data" in record: - bundle_data = record["bundle_data"] - for bundle in bundle_data: - drs_object["contents"].append( - bundle_to_drs(bundle, expand=expand, is_content=True) - ) + for bundle in record.get("bundle_data", []): + bundle_object = bundle_to_drs(bundle, expand=expand, is_content=True) + if not expand: + bundle_object.pop("contents", None) + drs_object["contents"].append(bundle_object) # access_methods mapping if "urls" in record: @@ -266,8 +265,7 @@ def parse_checksums(record, drs_object): try: checksums = json.loads(record["checksum"]) except json.decoder.JSONDecodeError: - # TODO: is it expected that the record["checksum"] is json format? - # it seems that it is string + # TODO: Remove the code after fixing the record["checksum"] format checksums = [{"checksum": record["checksum"], "type": "md5"}] for checksum in checksums: ret_checksum.append( From cda9202bc983e07bdafd47cee92e2511e38dd2dc Mon Sep 17 00:00:00 2001 From: giangbui Date: Tue, 12 Jan 2021 15:20:51 -0600 Subject: [PATCH 7/9] fix(unittest): fix unittest --- tests/test_bundles.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/test_bundles.py b/tests/test_bundles.py index cb42a6cd..49b164e6 100644 --- a/tests/test_bundles.py +++ b/tests/test_bundles.py @@ -202,18 +202,14 @@ def test_bundle_post_checksum_with_incorrect_schema(client, user): "name": "test_bundle", "bundles": did_list, "bundle_id": bundle_id, - "checksums": [ - {"type": "md42", "checksum": "a"}, - ], + "checksums": [{"type": "md42", "checksum": "a"},], } res = client.post("/bundle/", json=data, headers=user) assert res.status_code == 400 # checksum value doesn't match checksum type data = { - "checksums": [ - {"type": "md5", "checksum": "a"}, - ], + "checksums": [{"type": "md5", "checksum": "a"},], } res = client.post("/bundle/", json=data, headers=user) assert res.status_code == 400 @@ -644,7 +640,7 @@ def test_get_drs_expand_contents_default(client, user): rec2 = res2.json contents = rec2["contents"] - assert len(contents) == 0 + assert len(contents) == 3 def test_get_drs_expand_contents_false(client, user): @@ -657,7 +653,7 @@ def test_get_drs_expand_contents_false(client, user): assert res2.status_code == 200 rec2 = res2.json - contents = rec2["contents"] + contents = rec2["contents"][0].get("contents", []) assert len(contents) == 0 From 152ddbf84ee3624c3bc220ed86e64d25aac080ab Mon Sep 17 00:00:00 2001 From: giangbui Date: Tue, 12 Jan 2021 15:28:29 -0600 Subject: [PATCH 8/9] fixup --- tests/test_bundles.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_bundles.py b/tests/test_bundles.py index 49b164e6..111a8545 100644 --- a/tests/test_bundles.py +++ b/tests/test_bundles.py @@ -202,14 +202,18 @@ def test_bundle_post_checksum_with_incorrect_schema(client, user): "name": "test_bundle", "bundles": did_list, "bundle_id": bundle_id, - "checksums": [{"type": "md42", "checksum": "a"},], + "checksums": [ + {"type": "md42", "checksum": "a"}, + ], } res = client.post("/bundle/", json=data, headers=user) assert res.status_code == 400 # checksum value doesn't match checksum type data = { - "checksums": [{"type": "md5", "checksum": "a"},], + "checksums": [ + {"type": "md5", "checksum": "a"}, + ], } res = client.post("/bundle/", json=data, headers=user) assert res.status_code == 400 From ea89c78a3a19552fce280597ea4a1ab4c68e733c Mon Sep 17 00:00:00 2001 From: giangbui Date: Thu, 14 Jan 2021 09:08:28 -0600 Subject: [PATCH 9/9] chore(code): update docstring --- indexd/drs/blueprint.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/indexd/drs/blueprint.py b/indexd/drs/blueprint.py index dc2294f3..2134452c 100644 --- a/indexd/drs/blueprint.py +++ b/indexd/drs/blueprint.py @@ -20,7 +20,7 @@ def get_drs_object(object_id): ret = blueprint.index_driver.get_with_nonstrict_prefix(object_id) - data = indexd_to_drs(ret, expand=expand, list_drs=False) + data = indexd_to_drs(ret, expand=expand) return flask.jsonify(data), 200 @@ -84,9 +84,14 @@ def get_signed_url(object_id, access_id): return res, 200 -def indexd_to_drs(record, expand=False, list_drs=False): +def indexd_to_drs(record, expand=False): + """ + Convert record to ga4gh-compilant format - bearer_token = flask.request.headers.get("AUTHORIZATION") + Args: + record(dict): json object record + expand(bool): show contents of the descendants + """ did = ( record["id"] @@ -179,6 +184,8 @@ def indexd_to_drs(record, expand=False, list_drs=False): def bundle_to_drs(record, expand=False, is_content=False): """ + record(dict): json object record + expand(bool): show contents of the descendants is_content: is an expanded content in a bundle """