From f156c3295ffffb1880ef035675a60a0baa1d5266 Mon Sep 17 00:00:00 2001 From: Nichollette Date: Fri, 8 Nov 2024 17:15:48 -0500 Subject: [PATCH 01/30] working GET endpoint add, POST is setup --- src/config.py | 1 + src/controller/smartapi.py | 24 ++++++++++++++++++++++-- src/handlers/api.py | 29 +++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/src/config.py b/src/config.py index 7f6faa48..c7b6c703 100644 --- a/src/config.py +++ b/src/config.py @@ -96,6 +96,7 @@ (r"/api/metakg/consolidated/?", "handlers.api.MetaKGQueryHandler", {"biothing_type": "metakg_consolidated"}), (r"/api/metakg/consolidated/fields/?", "biothings.web.handlers.MetadataFieldHandler", {"biothing_type": "metakg_consolidated"}), (r"/api/metakg/paths/?", "handlers.api.MetaKGPathFinderHandler", {"biothing_type": "metakgpathfinder"}), + (r"/api/metakg/parse/?", "handlers.api.MetaKGParserHandler"), ] # biothings web tester will read this diff --git a/src/controller/smartapi.py b/src/controller/smartapi.py index c5e1bf05..4f095945 100644 --- a/src/controller/smartapi.py +++ b/src/controller/smartapi.py @@ -369,8 +369,28 @@ def is_trapi(self): """return True if a TRAPI""" return self.has_tags("trapi", "translator") - def get_metakg(self, include_trapi=True): - raw_metadata = decoder.to_dict(decoder.decompress(self._doc._raw)) + # def get_metakg(self, include_trapi=True): + # raw_metadata = decoder.to_dict(decoder.decompress(self._doc._raw)) + # mkg_parser = MetaKGParser() + # extra_data = {"id": self._id, "url": self.url} + # self.metakg_errors = None # reset metakg_errors + # if self.is_trapi: + # metakg = mkg_parser.get_TRAPI_metadatas(raw_metadata, extra_data) if include_trapi else [] + # else: + # metakg = mkg_parser.get_non_TRAPI_metadatas(raw_metadata, extra_data) + # if mkg_parser.metakg_errors: + # # hold metakg_errors for later use + # self.metakg_errors = mkg_parser.metakg_errors + # return metakg + + def get_metakg(self, include_trapi=True, metadata_url=False): + if metadata_url: + data_id = decoder.get_id(self.url) # get ID + doc = self.get(data_id)# get smartapi data + self._doc = doc._doc + raw_metadata = decoder.to_dict(decoder.decompress(doc._doc._raw)) + else: + raw_metadata = decoder.to_dict(decoder.decompress(self._doc._raw)) mkg_parser = MetaKGParser() extra_data = {"id": self._id, "url": self.url} self.metakg_errors = None # reset metakg_errors diff --git a/src/handlers/api.py b/src/handlers/api.py index cc0a89ad..5146b8b5 100644 --- a/src/handlers/api.py +++ b/src/handlers/api.py @@ -686,3 +686,32 @@ async def get(self, *args, **kwargs): } await asyncio.sleep(0.01) self.finish(res) + +class MetaKGParserHandler(QueryHandler): + name="metakgparser" + kwargs = { + "GET": { + "url": { + "type": str, + "required": True, + "max": 1000, + "description": "URL of the SmartAPI metadata to parse" + }, + }, + "POST": { + "type": dict, + "required": True, + "description": "Metadata content of the SmartAPI in JSON format" + }, + } + + + async def get(self, *args, **kwargs): + if self.request.method == "GET": + smartapi = SmartAPI(self.args.url) + content=smartapi.get_metakg(metadata_url=self.args.url) + self.finish(f"{content}") + elif self.request.method == "POST": + print(f"\n\n[INFO] HERE") + # pass + self.finish(f"HERE") \ No newline at end of file From 05dd22be3eb55cb14fa6d49b3f25f878ecc5f357 Mon Sep 17 00:00:00 2001 From: Nichollette Date: Wed, 20 Nov 2024 16:35:02 -0500 Subject: [PATCH 02/30] adding working mkg parser handler --- src/handlers/api.py | 89 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 75 insertions(+), 14 deletions(-) diff --git a/src/handlers/api.py b/src/handlers/api.py index 5146b8b5..672b3f47 100644 --- a/src/handlers/api.py +++ b/src/handlers/api.py @@ -22,9 +22,11 @@ from utils.metakg.cytoscape_formatter import CytoscapeDataFormatter from utils.metakg.biolink_helpers import get_expanded_values from utils.notification import SlackNewAPIMessage, SlackNewTranslatorAPIMessage +from utils.metakg.parser import MetaKGParser logger = logging.getLogger("smartAPI") +from tornado.web import RequestHandler def github_authenticated(func): """ @@ -687,7 +689,7 @@ async def get(self, *args, **kwargs): await asyncio.sleep(0.01) self.finish(res) -class MetaKGParserHandler(QueryHandler): +class MetaKGParserHandler(BaseHandler): #RequestHandler/BaseAPIHandler name="metakgparser" kwargs = { "GET": { @@ -698,20 +700,79 @@ class MetaKGParserHandler(QueryHandler): "description": "URL of the SmartAPI metadata to parse" }, }, - "POST": { - "type": dict, - "required": True, - "description": "Metadata content of the SmartAPI in JSON format" - }, + "POST": { } } - async def get(self, *args, **kwargs): if self.request.method == "GET": - smartapi = SmartAPI(self.args.url) - content=smartapi.get_metakg(metadata_url=self.args.url) - self.finish(f"{content}") - elif self.request.method == "POST": - print(f"\n\n[INFO] HERE") - # pass - self.finish(f"HERE") \ No newline at end of file + if not self.get_argument("url", None): # Check if the 'url' argument is present + self.set_status(400) + self.write({"error": "Missing 'url' argument"}) + return + # smartapi = API(url=self.get_argument("url")) + # metakg_doc = smartapi.get_metakg() + # call parser // pass URL to parser + parser = MetaKGParser() + url = self.get_argument("url") + + trapi_data = parser.get_TRAPI_metadatas(data=None, url=url) + nontrapi_data = parser.get_non_TRAPI_metadatas(data=None, url=url) + combined_data = trapi_data + nontrapi_data + + # Transform the combined data to include an 'api' key and organize it into 'hits' + hits = [] + for edge in combined_data: + print(f"\n{json.dumps(edge, indent=4)}\n") + transformed_edge = { + "_id": edge['api'].get("_id"), # Include an ID if available + "_score": 1, # Placeholder for scoring logic + "api": { + "name": edge['api'].get("name"), # Replace with actual API name key + "smartapi": { + "id": edge['api']['smartapi'].get("id") # Replace with actual SmartAPI ID key + } + }, + "subject": edge.get("subject"), + "subject_prefix": edge.get("subject_prefix"), + "predicate": edge.get("predicate"), + "object": edge.get("object"), + "object_prefix": edge.get("object_prefix"), + } + hits.append(transformed_edge) + + # Create final response format + response = { + "took": 1, # Placeholder for actual timing logic + "total": len(hits), + "max_score": 1, # Placeholder for scoring logic + "hits": hits + } + + # Write response + self.set_header("Content-Type", "application/json") + self.write(json.dumps(response)) + + async def post(self, *args, **kwargs): + try: + # Read the raw request body + body = self.request.body + # Parse the JSON content + data = json.loads(body) + parser = MetaKGParser() + trapi_data = parser.get_TRAPI_metadatas(data=data) + nontrapi_data = parser.get_non_TRAPI_metadatas(data=data) + combined_data = trapi_data + nontrapi_data + # self.write(json.dumps(combined_data)) + # Clean up the metakg_doc to remove the 'api' key + cleaned_metakg_doc = [] + for edge in combined_data: + if 'api' in edge: + edge.pop('api') # Remove the 'api' key + cleaned_metakg_doc.append(edge) + + # Return the cleaned metakg document + self.set_header("Content-Type", "application/json") + self.write(json.dumps(cleaned_metakg_doc)) # make dict + except json.JSONDecodeError: + self.set_status(400) + self.write({"error": "Invalid JSON format"}) \ No newline at end of file From f52acb6ee870d4814b62b3ea4808b026c5ccfab6 Mon Sep 17 00:00:00 2001 From: Nichollette Date: Wed, 20 Nov 2024 16:36:17 -0500 Subject: [PATCH 03/30] added url parameter for parser methods --- src/utils/metakg/parser.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/src/utils/metakg/parser.py b/src/utils/metakg/parser.py index 8abc48df..2ce43469 100644 --- a/src/utils/metakg/parser.py +++ b/src/utils/metakg/parser.py @@ -13,17 +13,28 @@ class MetaKGParser: get_url_timeout = 60 metakg_errors = None - def get_non_TRAPI_metadatas(self, data, extra_data=None): - parser = API(data) + def get_non_TRAPI_metadatas(self, data=None, extra_data=None, url=None): # *** TEST THIS FOR BREAK POINTS *** + if data: + parser = API(smartapi_doc=data) + elif url: + parser = API(url=url) + else: + return [] # **** ERROR HANDLE THIS **** mkg = self.extract_metakgedges(parser.metadata["operations"], extra_data=extra_data) no_nodes = len({x["subject"] for x in mkg} | {x["object"] for x in mkg}) no_edges = len({x["predicate"] for x in mkg}) logger.info("Done [%s nodes, %s edges]", no_nodes, no_edges) return mkg - def get_TRAPI_metadatas(self, data, extra_data=None): + def get_TRAPI_metadatas(self, data=None, extra_data=None, url=None): ops = [] - metadata_list = self.get_TRAPI_with_metakg_endpoint(data) + if data: + metadata_list = self.get_TRAPI_with_metakg_endpoint(data=data) + elif url: + metadata_list = self.get_TRAPI_with_metakg_endpoint(url=url) + else: + return [] # **** ERROR HANDLE THIS **** + count_metadata_list = len(metadata_list) self.metakg_errors = {} for i, metadata in enumerate(metadata_list): @@ -34,7 +45,13 @@ def get_TRAPI_metadatas(self, data, extra_data=None): return self.extract_metakgedges(ops, extra_data=extra_data) - def get_TRAPI_with_metakg_endpoint(self, data): + def get_TRAPI_with_metakg_endpoint(self, data=None, url=None): # TEST THIS FOR BREAK POINTS + # Use the URL if provided, otherwise fall back to the 'data' argument + if data: + parser = API(data) + elif url: + parser = API(url=url) + metadatas = [] parser = API(data) metadata = parser.metadata From 51b02b55b89ddb62e0a2d302fe757d415a78661d Mon Sep 17 00:00:00 2001 From: Nichollette Date: Tue, 3 Dec 2024 11:59:03 -0500 Subject: [PATCH 04/30] filter for get and post output in parse --- src/handlers/api.py | 180 ++++++++++++++++++++++++++++---------------- 1 file changed, 115 insertions(+), 65 deletions(-) diff --git a/src/handlers/api.py b/src/handlers/api.py index 672b3f47..309ae132 100644 --- a/src/handlers/api.py +++ b/src/handlers/api.py @@ -1,7 +1,6 @@ import asyncio import json import logging -from typing import List, Union import os import bmt from biothings.utils import serializer @@ -26,8 +25,6 @@ logger = logging.getLogger("smartAPI") -from tornado.web import RequestHandler - def github_authenticated(func): """ RegistryHandler Decorator @@ -497,7 +494,7 @@ def process_apis(self, apis): api_dict = apis["api"] filtered_api= self.get_filtered_api(api_dict) apis["api"] = filtered_api - + def write(self, chunk): """ Overwrite the biothings query handler to ... @@ -524,7 +521,7 @@ def write(self, chunk): self.set_header("Content-Disposition", 'attachment; filename="smartapi_metakg.graphml"') return super(BaseAPIHandler, self).write(chunk) - + if self.format == "html": # setup template template_path = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'templates')) @@ -682,75 +679,124 @@ async def get(self, *args, **kwargs): raw_query_output = self.setup_pathfinder_rawquery(expanded_fields) self.write(raw_query_output) return - res = { - "total": len(paths_with_edges), + res = { + "total": len(paths_with_edges), "paths": paths_with_edges, } await asyncio.sleep(0.01) self.finish(res) -class MetaKGParserHandler(BaseHandler): #RequestHandler/BaseAPIHandler +class MetaKGParserHandler(BaseHandler): name="metakgparser" kwargs = { "GET": { "url": { "type": str, - "required": True, + "required": True, "max": 1000, "description": "URL of the SmartAPI metadata to parse" }, + "api_details": {"type": bool, "default": 0 }, + "bte": {"type": bool, "default": 0}, + }, + "POST": { + "api_details": {"type": bool, "default": 0 }, + "bte": {"type": bool, "default": 0 }, }, - "POST": { } } + def initialize(self, *args, **kwargs): + super().initialize(*args, **kwargs) + # change the default query pipeline from self.biothings.pipeline + self.pipeline = MetaKGQueryPipeline(ns=self.biothings) + + def get_filtered_api(self, api_dict): + """Extract and return filtered API information.""" + api_info = api_dict["api"] + # Default structure to preserve top-level keys + filtered_dict = { + "subject": api_dict.get("subject"), + "object": api_dict.get("object"), + "predicate": api_dict.get("predicate"), + "subject_prefix": api_dict.get("subject_prefix"), + "object_prefix": api_dict.get("object_prefix"), + } + # case: bte=1, api_details=0 + if self.args.bte == "1" and self.args.api_details == "0": + filtered_api = { + **({"name": api_info["name"]} if "name" in api_info else {}), + **( + {"smartapi": {"id": api_info["smartapi"]["id"]}} + if "smartapi" in api_info and "id" in api_info["smartapi"] + else {} + ), + "bte": api_info.get("bte", {}), + } + # case: bte=0, api_details=1 + elif self.args.bte == "0" and self.args.api_details == "1": + api_info.pop("bte", None) + filtered_api = api_info + # case: api_details=1, bte=1 + elif self.args.bte == "1" and self.args.api_details == "1": + filtered_api = api_info + # case: bte=0, api_details=0 + else: + filtered_api = { + **({"name": api_info["name"]} if "name" in api_info else {}), + **( + {"smartapi": {"id": api_info["smartapi"]["id"]}} + if "smartapi" in api_info and "id" in api_info["smartapi"] + else {} + ), + } + # Add the filtered 'api' key to the preserved top-level structure + filtered_dict["api"] = filtered_api + + return filtered_dict + + def process_apis(self, apis): + """Process each API dict based on provided args.""" + if isinstance(apis, list): + for i, api_dict in enumerate(apis): + filtered_api = self.get_filtered_api(api_dict) + apis[i] = filtered_api + elif isinstance(apis, dict): + if "bte" in apis: + # Update dict for new format + apis["api"]["bte"] = apis.pop("bte") + api_dict = apis["api"] + filtered_api = self.get_filtered_api(api_dict) + apis["api"] = filtered_api + return apis + async def get(self, *args, **kwargs): - if self.request.method == "GET": - if not self.get_argument("url", None): # Check if the 'url' argument is present - self.set_status(400) - self.write({"error": "Missing 'url' argument"}) - return - # smartapi = API(url=self.get_argument("url")) - # metakg_doc = smartapi.get_metakg() - # call parser // pass URL to parser - parser = MetaKGParser() - url = self.get_argument("url") - - trapi_data = parser.get_TRAPI_metadatas(data=None, url=url) - nontrapi_data = parser.get_non_TRAPI_metadatas(data=None, url=url) - combined_data = trapi_data + nontrapi_data + if not self.get_argument("url", None): + self.set_status(400) + self.write({"error": "Missing 'url' argument"}) + return - # Transform the combined data to include an 'api' key and organize it into 'hits' - hits = [] - for edge in combined_data: - print(f"\n{json.dumps(edge, indent=4)}\n") - transformed_edge = { - "_id": edge['api'].get("_id"), # Include an ID if available - "_score": 1, # Placeholder for scoring logic - "api": { - "name": edge['api'].get("name"), # Replace with actual API name key - "smartapi": { - "id": edge['api']['smartapi'].get("id") # Replace with actual SmartAPI ID key - } - }, - "subject": edge.get("subject"), - "subject_prefix": edge.get("subject_prefix"), - "predicate": edge.get("predicate"), - "object": edge.get("object"), - "object_prefix": edge.get("object_prefix"), - } - hits.append(transformed_edge) + parser = MetaKGParser() + url = self.get_argument("url") + self.args.api_details = self.get_argument("api_details", False) + self.args.bte = self.get_argument("bte", False) - # Create final response format - response = { - "took": 1, # Placeholder for actual timing logic - "total": len(hits), - "max_score": 1, # Placeholder for scoring logic - "hits": hits - } + trapi_data = parser.get_TRAPI_metadatas(data=None, url=url) + nontrapi_data = parser.get_non_TRAPI_metadatas(data=None, url=url) + combined_data = trapi_data + nontrapi_data - # Write response - self.set_header("Content-Type", "application/json") - self.write(json.dumps(response)) + for i, api_dict in enumerate(combined_data): + filtered_api = self.get_filtered_api(api_dict) + combined_data[i] = filtered_api + + response = { + "took": 1, + "total": len(combined_data), + "max_score": 1, + "hits": combined_data, + } + + self.set_header("Content-Type", "application/json") + self.write(json.dumps(response)) async def post(self, *args, **kwargs): try: @@ -762,17 +808,21 @@ async def post(self, *args, **kwargs): trapi_data = parser.get_TRAPI_metadatas(data=data) nontrapi_data = parser.get_non_TRAPI_metadatas(data=data) combined_data = trapi_data + nontrapi_data - # self.write(json.dumps(combined_data)) - # Clean up the metakg_doc to remove the 'api' key - cleaned_metakg_doc = [] - for edge in combined_data: - if 'api' in edge: - edge.pop('api') # Remove the 'api' key - cleaned_metakg_doc.append(edge) - - # Return the cleaned metakg document + + for i, api_dict in enumerate(combined_data): + filtered_api = self.get_filtered_api(api_dict) + combined_data[i] = filtered_api + + response = { + "took": 1, + "total": len(combined_data), + "max_score": 1, + "hits": combined_data, + } + self.set_header("Content-Type", "application/json") - self.write(json.dumps(cleaned_metakg_doc)) # make dict + self.write(json.dumps(response) + except json.JSONDecodeError: self.set_status(400) - self.write({"error": "Invalid JSON format"}) \ No newline at end of file + self.write({"error": "Invalid JSON format"}) From 8ee011db89e98488ea11297a7940225de37660ae Mon Sep 17 00:00:00 2001 From: Nichollette Date: Wed, 4 Dec 2024 10:37:29 -0500 Subject: [PATCH 05/30] updated error handling for parser --- src/utils/metakg/parser.py | 47 +++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/src/utils/metakg/parser.py b/src/utils/metakg/parser.py index 2ce43469..d7815bf1 100644 --- a/src/utils/metakg/parser.py +++ b/src/utils/metakg/parser.py @@ -13,13 +13,17 @@ class MetaKGParser: get_url_timeout = 60 metakg_errors = None - def get_non_TRAPI_metadatas(self, data=None, extra_data=None, url=None): # *** TEST THIS FOR BREAK POINTS *** + def get_non_TRAPI_metadatas(self, data=None, extra_data=None, url=None): + # Error Handling + if not data and not url: + raise ValueError("Either data or url must be provided.") if data: parser = API(smartapi_doc=data) elif url: parser = API(url=url) - else: - return [] # **** ERROR HANDLE THIS **** + else: + raise ValueError("Error getting metadata from provided data or url.") + mkg = self.extract_metakgedges(parser.metadata["operations"], extra_data=extra_data) no_nodes = len({x["subject"] for x in mkg} | {x["object"] for x in mkg}) no_edges = len({x["predicate"] for x in mkg}) @@ -28,12 +32,14 @@ def get_non_TRAPI_metadatas(self, data=None, extra_data=None, url=None): # *** T def get_TRAPI_metadatas(self, data=None, extra_data=None, url=None): ops = [] + if not data and not url: + raise ValueError("Either data or url must be provided.") if data: metadata_list = self.get_TRAPI_with_metakg_endpoint(data=data) elif url: metadata_list = self.get_TRAPI_with_metakg_endpoint(url=url) else: - return [] # **** ERROR HANDLE THIS **** + raise ValueError("Error getting metadata from provided data or url.") count_metadata_list = len(metadata_list) self.metakg_errors = {} @@ -45,21 +51,24 @@ def get_TRAPI_metadatas(self, data=None, extra_data=None, url=None): return self.extract_metakgedges(ops, extra_data=extra_data) - def get_TRAPI_with_metakg_endpoint(self, data=None, url=None): # TEST THIS FOR BREAK POINTS - # Use the URL if provided, otherwise fall back to the 'data' argument - if data: - parser = API(data) - elif url: - parser = API(url=url) - - metadatas = [] - parser = API(data) - metadata = parser.metadata - _paths = metadata.get("paths", {}) - _team = metadata.get("x-translator", {}).get("team") - if "/meta_knowledge_graph" in _paths and "/query" in _paths and _team: - metadatas.append(metadata) - return metadatas + def get_TRAPI_with_metakg_endpoint(self, data=None, url=None): + if not data and not url: + raise ValueError("Either data or url must be provided.") + try: + # Initialize API with either data or URL + parser = API(smartapi_doc=data) if data else API(url=url) + metadata = parser.metadata + _paths = metadata.get("paths", {}) + _team = metadata.get("x-translator", {}).get("team") + + # Check for required TRAPI paths + if "/meta_knowledge_graph" in _paths and "/query" in _paths and _team: + print("TRAPI metadata found.") + return [metadata] + else: + return [] + except Exception as e: + raise ValueError(f"Error getting TRAPI metadata: {e}") def construct_query_url(self, server_url): if server_url.endswith("/"): From d43b730ba94f2ca6abdaa815540cf3be905e618b Mon Sep 17 00:00:00 2001 From: Nichollette Date: Wed, 4 Dec 2024 15:32:10 -0500 Subject: [PATCH 06/30] added get_metakg method --- src/controller/smartapi.py | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/src/controller/smartapi.py b/src/controller/smartapi.py index 4f095945..08325c7f 100644 --- a/src/controller/smartapi.py +++ b/src/controller/smartapi.py @@ -369,24 +369,10 @@ def is_trapi(self): """return True if a TRAPI""" return self.has_tags("trapi", "translator") - # def get_metakg(self, include_trapi=True): - # raw_metadata = decoder.to_dict(decoder.decompress(self._doc._raw)) - # mkg_parser = MetaKGParser() - # extra_data = {"id": self._id, "url": self.url} - # self.metakg_errors = None # reset metakg_errors - # if self.is_trapi: - # metakg = mkg_parser.get_TRAPI_metadatas(raw_metadata, extra_data) if include_trapi else [] - # else: - # metakg = mkg_parser.get_non_TRAPI_metadatas(raw_metadata, extra_data) - # if mkg_parser.metakg_errors: - # # hold metakg_errors for later use - # self.metakg_errors = mkg_parser.metakg_errors - # return metakg - def get_metakg(self, include_trapi=True, metadata_url=False): if metadata_url: - data_id = decoder.get_id(self.url) # get ID - doc = self.get(data_id)# get smartapi data + data_id = decoder.get_id(self.url) + doc = self.get(data_id) self._doc = doc._doc raw_metadata = decoder.to_dict(decoder.decompress(doc._doc._raw)) else: From 3ce64f8046811c8b992da91038a178dfeca32ff9 Mon Sep 17 00:00:00 2001 From: Nichollette Date: Wed, 4 Dec 2024 16:26:34 -0500 Subject: [PATCH 07/30] added missing ) --- src/handlers/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/handlers/api.py b/src/handlers/api.py index 309ae132..88695178 100644 --- a/src/handlers/api.py +++ b/src/handlers/api.py @@ -821,7 +821,7 @@ async def post(self, *args, **kwargs): } self.set_header("Content-Type", "application/json") - self.write(json.dumps(response) + self.write(json.dumps(response)) except json.JSONDecodeError: self.set_status(400) From f4de5e9f78029bfc9f61dd72647c30be6fc6d4cf Mon Sep 17 00:00:00 2001 From: Nichollette Date: Thu, 5 Dec 2024 12:34:28 -0500 Subject: [PATCH 08/30] added tests and clean metakg parse endpoint --- src/handlers/api.py | 19 +++-- .../_utils/metakg/integration/parser/parse.py | 80 +++++++++++++++++++ 2 files changed, 94 insertions(+), 5 deletions(-) create mode 100644 src/tests/_utils/metakg/integration/parser/parse.py diff --git a/src/handlers/api.py b/src/handlers/api.py index 88695178..fdbfc6e4 100644 --- a/src/handlers/api.py +++ b/src/handlers/api.py @@ -687,7 +687,6 @@ async def get(self, *args, **kwargs): self.finish(res) class MetaKGParserHandler(BaseHandler): - name="metakgparser" kwargs = { "GET": { "url": { @@ -713,6 +712,7 @@ def initialize(self, *args, **kwargs): def get_filtered_api(self, api_dict): """Extract and return filtered API information.""" api_info = api_dict["api"] + # Default structure to preserve top-level keys filtered_dict = { "subject": api_dict.get("subject"), @@ -721,6 +721,7 @@ def get_filtered_api(self, api_dict): "subject_prefix": api_dict.get("subject_prefix"), "object_prefix": api_dict.get("object_prefix"), } + # case: bte=1, api_details=0 if self.args.bte == "1" and self.args.api_details == "0": filtered_api = { @@ -732,13 +733,16 @@ def get_filtered_api(self, api_dict): ), "bte": api_info.get("bte", {}), } + # case: bte=0, api_details=1 elif self.args.bte == "0" and self.args.api_details == "1": api_info.pop("bte", None) filtered_api = api_info + # case: api_details=1, bte=1 elif self.args.bte == "1" and self.args.api_details == "1": filtered_api = api_info + # case: bte=0, api_details=0 else: filtered_api = { @@ -752,6 +756,10 @@ def get_filtered_api(self, api_dict): # Add the filtered 'api' key to the preserved top-level structure filtered_dict["api"] = filtered_api + # Remove 'bte' from 'api' if it exists + if "bte" in filtered_dict["api"]: + filtered_dict['bte'] = filtered_dict["api"].pop("bte", None) + return filtered_dict def process_apis(self, apis): @@ -796,7 +804,7 @@ async def get(self, *args, **kwargs): } self.set_header("Content-Type", "application/json") - self.write(json.dumps(response)) + self.write(response) async def post(self, *args, **kwargs): try: @@ -805,6 +813,8 @@ async def post(self, *args, **kwargs): # Parse the JSON content data = json.loads(body) parser = MetaKGParser() + self.args.api_details = self.get_argument("api_details", "0") + self.args.bte = self.get_argument("bte", "0") trapi_data = parser.get_TRAPI_metadatas(data=data) nontrapi_data = parser.get_non_TRAPI_metadatas(data=data) combined_data = trapi_data + nontrapi_data @@ -821,8 +831,7 @@ async def post(self, *args, **kwargs): } self.set_header("Content-Type", "application/json") - self.write(json.dumps(response)) + self.write(response) except json.JSONDecodeError: - self.set_status(400) - self.write({"error": "Invalid JSON format"}) + raise ValueError("Invalid JSON content in request body.") diff --git a/src/tests/_utils/metakg/integration/parser/parse.py b/src/tests/_utils/metakg/integration/parser/parse.py new file mode 100644 index 00000000..f628f430 --- /dev/null +++ b/src/tests/_utils/metakg/integration/parser/parse.py @@ -0,0 +1,80 @@ +import unittest +import requests +import json + + +class TestAPI(unittest.TestCase): + URL_EXAMPLE = "https://raw.githubusercontent.com/NCATS-Tangerine/translator-api-registry/master/mygene.info/openapi_full.yml" + + def setUp(self): + self.headers = {"Content-Type": "application/json"} + with open('/Users/nacosta/Documents/smartAPI/WORKING_BRANCH/add-metakg-endpoint/smartAPI/src/metadata_content.json', 'r') as file: + self.data = json.load(file) + + # POST Tests + def test_post_metakg_parse_api_details_1_bte_1(self): + url = "http://localhost:8000/api/metakg/parse?api_details=1&bte=1" + response = requests.post(url, headers=self.headers, json=self.data) + json_response = response.json() + self.assertEqual(response.status_code, 200) + self.assertIn('api', json_response['hits'][0].keys()) + self.assertIn('bte', json_response['hits'][0].keys()) + + def test_post_metakg_parse_api_details_0_bte_1(self): + url = "http://localhost:8000/api/metakg/parse?api_details=0&bte=1" + response = requests.post(url, headers=self.headers, json=self.data) + json_response = response.json() + self.assertEqual(response.status_code, 200) + self.assertIn('bte', json_response['hits'][0].keys()) + + def test_post_metakg_parse_api_details_1_bte_0(self): + url = "http://localhost:8000/api/metakg/parse?api_details=1&bte=0" + response = requests.post(url, headers=self.headers, json=self.data) + json_response = response.json() + self.assertEqual(response.status_code, 200) + self.assertIn('api', json_response['hits'][0].keys()) + self.assertNotIn('bte', json_response['hits'][0].keys()) + + def test_post_metakg_parse_api_details_0_bte_0(self): + url = "http://localhost:8000/api/metakg/parse?api_details=0&bte=0" + response = requests.post(url, headers=self.headers, json=self.data) + json_response = response.json() + self.assertEqual(response.status_code, 200) + self.assertNotIn('bte', json_response['hits'][0].keys()) + self.assertIn('subject', json_response['hits'][0].keys()) + + # GET Tests + def test_get_metakg_parse_api_details_1_bte_1(self): + url = f"http://localhost:8000/api/metakg/parse?url={self.URL_EXAMPLE}&api_details=1&bte=1" + response = requests.get(url) + json_response = response.json() + self.assertEqual(response.status_code, 200) + self.assertIn('api', json_response['hits'][0].keys()) + self.assertIn('bte', json_response['hits'][0].keys()) + + def test_get_metakg_parse_api_details_0_bte_1(self): + url = f"http://localhost:8000/api/metakg/parse?url={self.URL_EXAMPLE}&api_details=0&bte=1" + response = requests.get(url) + json_response = response.json() + self.assertEqual(response.status_code, 200) + self.assertIn('bte', json_response['hits'][0].keys()) + + def test_get_metakg_parse_api_details_1_bte_0(self): + url = f"http://localhost:8000/api/metakg/parse?url={self.URL_EXAMPLE}&api_details=1&bte=0" + response = requests.get(url) + json_response = response.json() + self.assertEqual(response.status_code, 200) + self.assertIn('api', json_response['hits'][0].keys()) + self.assertNotIn('bte', json_response['hits'][0].keys()) + + def test_get_metakg_parse_api_details_0_bte_0(self): + url = f"http://localhost:8000/api/metakg/parse?url={self.URL_EXAMPLE}&api_details=0&bte=0" + response = requests.get(url) + json_response = response.json() + self.assertEqual(response.status_code, 200) + self.assertNotIn('bte', json_response['hits'][0].keys()) + self.assertIn('subject', json_response['hits'][0].keys()) + + +if __name__ == "__main__": + unittest.main() From 49cff338958d4480f0ad975639177ed849c879d9 Mon Sep 17 00:00:00 2001 From: Nichollette Date: Thu, 5 Dec 2024 12:57:08 -0500 Subject: [PATCH 09/30] added timeout --- .../_utils/metakg/integration/parser/parse.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/tests/_utils/metakg/integration/parser/parse.py b/src/tests/_utils/metakg/integration/parser/parse.py index f628f430..303226fc 100644 --- a/src/tests/_utils/metakg/integration/parser/parse.py +++ b/src/tests/_utils/metakg/integration/parser/parse.py @@ -14,7 +14,7 @@ def setUp(self): # POST Tests def test_post_metakg_parse_api_details_1_bte_1(self): url = "http://localhost:8000/api/metakg/parse?api_details=1&bte=1" - response = requests.post(url, headers=self.headers, json=self.data) + response = requests.post(url, headers=self.headers, json=self.data, timeout=self.TIMEOUT) json_response = response.json() self.assertEqual(response.status_code, 200) self.assertIn('api', json_response['hits'][0].keys()) @@ -22,14 +22,14 @@ def test_post_metakg_parse_api_details_1_bte_1(self): def test_post_metakg_parse_api_details_0_bte_1(self): url = "http://localhost:8000/api/metakg/parse?api_details=0&bte=1" - response = requests.post(url, headers=self.headers, json=self.data) + response = requests.post(url, headers=self.headers, json=self.data, timeout=self.TIMEOUT) json_response = response.json() self.assertEqual(response.status_code, 200) self.assertIn('bte', json_response['hits'][0].keys()) def test_post_metakg_parse_api_details_1_bte_0(self): url = "http://localhost:8000/api/metakg/parse?api_details=1&bte=0" - response = requests.post(url, headers=self.headers, json=self.data) + response = requests.post(url, headers=self.headers, json=self.data, timeout=self.TIMEOUT) json_response = response.json() self.assertEqual(response.status_code, 200) self.assertIn('api', json_response['hits'][0].keys()) @@ -37,7 +37,7 @@ def test_post_metakg_parse_api_details_1_bte_0(self): def test_post_metakg_parse_api_details_0_bte_0(self): url = "http://localhost:8000/api/metakg/parse?api_details=0&bte=0" - response = requests.post(url, headers=self.headers, json=self.data) + response = requests.post(url, headers=self.headers, json=self.data, timeout=self.TIMEOUT) json_response = response.json() self.assertEqual(response.status_code, 200) self.assertNotIn('bte', json_response['hits'][0].keys()) @@ -46,7 +46,7 @@ def test_post_metakg_parse_api_details_0_bte_0(self): # GET Tests def test_get_metakg_parse_api_details_1_bte_1(self): url = f"http://localhost:8000/api/metakg/parse?url={self.URL_EXAMPLE}&api_details=1&bte=1" - response = requests.get(url) + response = requests.get(url, timeout=self.TIMEOUT) json_response = response.json() self.assertEqual(response.status_code, 200) self.assertIn('api', json_response['hits'][0].keys()) @@ -54,14 +54,14 @@ def test_get_metakg_parse_api_details_1_bte_1(self): def test_get_metakg_parse_api_details_0_bte_1(self): url = f"http://localhost:8000/api/metakg/parse?url={self.URL_EXAMPLE}&api_details=0&bte=1" - response = requests.get(url) + response = requests.get(url, timeout=self.TIMEOUT) json_response = response.json() self.assertEqual(response.status_code, 200) self.assertIn('bte', json_response['hits'][0].keys()) def test_get_metakg_parse_api_details_1_bte_0(self): url = f"http://localhost:8000/api/metakg/parse?url={self.URL_EXAMPLE}&api_details=1&bte=0" - response = requests.get(url) + response = requests.get(url, timeout=self.TIMEOUT) json_response = response.json() self.assertEqual(response.status_code, 200) self.assertIn('api', json_response['hits'][0].keys()) @@ -69,7 +69,7 @@ def test_get_metakg_parse_api_details_1_bte_0(self): def test_get_metakg_parse_api_details_0_bte_0(self): url = f"http://localhost:8000/api/metakg/parse?url={self.URL_EXAMPLE}&api_details=0&bte=0" - response = requests.get(url) + response = requests.get(url, timeout=self.TIMEOUT) json_response = response.json() self.assertEqual(response.status_code, 200) self.assertNotIn('bte', json_response['hits'][0].keys()) From eb798c5ee8d88cadd3bb9327b184c7829666b077 Mon Sep 17 00:00:00 2001 From: Nichollette Date: Thu, 5 Dec 2024 12:57:29 -0500 Subject: [PATCH 10/30] added timeout --- src/tests/_utils/metakg/integration/parser/parse.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tests/_utils/metakg/integration/parser/parse.py b/src/tests/_utils/metakg/integration/parser/parse.py index 303226fc..3c3405da 100644 --- a/src/tests/_utils/metakg/integration/parser/parse.py +++ b/src/tests/_utils/metakg/integration/parser/parse.py @@ -5,6 +5,7 @@ class TestAPI(unittest.TestCase): URL_EXAMPLE = "https://raw.githubusercontent.com/NCATS-Tangerine/translator-api-registry/master/mygene.info/openapi_full.yml" + TIMEOUT = 10 # Timeout in seconds def setUp(self): self.headers = {"Content-Type": "application/json"} From 663d572b13a21382a38cae897e426b6e92b4b73e Mon Sep 17 00:00:00 2001 From: Nichollette Date: Fri, 31 Jan 2025 11:28:03 -0500 Subject: [PATCH 11/30] error handle updates --- src/utils/metakg/parser.py | 44 ++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/src/utils/metakg/parser.py b/src/utils/metakg/parser.py index d7815bf1..df4abcf2 100644 --- a/src/utils/metakg/parser.py +++ b/src/utils/metakg/parser.py @@ -1,6 +1,8 @@ import json import logging from copy import copy +from tornado.web import HTTPError +from utils.downloader import DownloadError import requests @@ -16,13 +18,14 @@ class MetaKGParser: def get_non_TRAPI_metadatas(self, data=None, extra_data=None, url=None): # Error Handling if not data and not url: - raise ValueError("Either data or url must be provided.") + raise HTTPError(400, reason="Either data or url value is expected for this request, please provide data or a url.") + # raise ValueError("Either data or url must be provided.") if data: parser = API(smartapi_doc=data) elif url: parser = API(url=url) else: - raise ValueError("Error getting metadata from provided data or url.") + raise HTTPError(404, "Error getting metadata from provided data or url, for more info please reference: ") mkg = self.extract_metakgedges(parser.metadata["operations"], extra_data=extra_data) no_nodes = len({x["subject"] for x in mkg} | {x["object"] for x in mkg}) @@ -33,13 +36,16 @@ def get_non_TRAPI_metadatas(self, data=None, extra_data=None, url=None): def get_TRAPI_metadatas(self, data=None, extra_data=None, url=None): ops = [] if not data and not url: - raise ValueError("Either data or url must be provided.") + raise HTTPError(400, reason="Either data or url value is expected for this request, please provide data or a url.") if data: metadata_list = self.get_TRAPI_with_metakg_endpoint(data=data) elif url: metadata_list = self.get_TRAPI_with_metakg_endpoint(url=url) else: - raise ValueError("Error getting metadata from provided data or url.") + raise HTTPError(404, "Error getting metadata from provided data or url, for more info please reference: ") + + if isinstance(metadata_list, Exception): + return metadata_list count_metadata_list = len(metadata_list) self.metakg_errors = {} @@ -53,22 +59,24 @@ def get_TRAPI_metadatas(self, data=None, extra_data=None, url=None): def get_TRAPI_with_metakg_endpoint(self, data=None, url=None): if not data and not url: - raise ValueError("Either data or url must be provided.") + raise HTTPError(400, reason="Either data or url value is expected for this request, please provide data or a url.") + # Initialize API with either data or URL + parser = API(smartapi_doc=data) if data else API(url=url) try: - # Initialize API with either data or URL - parser = API(smartapi_doc=data) if data else API(url=url) metadata = parser.metadata - _paths = metadata.get("paths", {}) - _team = metadata.get("x-translator", {}).get("team") - - # Check for required TRAPI paths - if "/meta_knowledge_graph" in _paths and "/query" in _paths and _team: - print("TRAPI metadata found.") - return [metadata] - else: - return [] - except Exception as e: - raise ValueError(f"Error getting TRAPI metadata: {e}") + except DownloadError as dl_err: + raise HTTPError(400, reason="Unable to download response data with given input. Please look at your given input for any errors.") + _paths = metadata.get("paths", {}) + _team = metadata.get("x-translator", {}).get("team") + + # Check for required TRAPI paths + if "/meta_knowledge_graph" in _paths and "/query" in _paths and _team: + print("TRAPI metadata found.") + return [metadata] + else: + return [] + # except Exception as value_error: # Specify Error + # return value_error def construct_query_url(self, server_url): if server_url.endswith("/"): From 1028d5b230b75fca31b6872b03b360a2a4007f42 Mon Sep 17 00:00:00 2001 From: Nichollette Date: Fri, 31 Jan 2025 11:28:36 -0500 Subject: [PATCH 12/30] error handling update --- src/handlers/api.py | 64 +++++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/src/handlers/api.py b/src/handlers/api.py index fdbfc6e4..04dc0aaf 100644 --- a/src/handlers/api.py +++ b/src/handlers/api.py @@ -687,6 +687,25 @@ async def get(self, *args, **kwargs): self.finish(res) class MetaKGParserHandler(BaseHandler): + """ + Handles parsing of SmartAPI metadata from a given URL or request body. + + This handler processes SmartAPI metadata and returns structured, + cleaned results based on the specified query parameters. + + Supported HTTP methods: + - **GET**: Parses metadata from a provided URL. + - **POST**: Parses metadata from the request body. + + Query Parameters: + - `url` (str, required): The URL of the SmartAPI metadata to parse. + Maximum length: 1000 characters. + - `api_details` (bool, optional, default: `False`): + Whether to return detailed API information. + - `bte` (bool, optional, default: `False`): + Whether to include BTE (BioThings Explorer) specific metadata. + """ + kwargs = { "GET": { "url": { @@ -713,17 +732,18 @@ def get_filtered_api(self, api_dict): """Extract and return filtered API information.""" api_info = api_dict["api"] + # Convert arguments to integers for consistency + bte = int(self.args.bte) + api_details = int(self.args.api_details) + # Default structure to preserve top-level keys filtered_dict = { - "subject": api_dict.get("subject"), - "object": api_dict.get("object"), - "predicate": api_dict.get("predicate"), - "subject_prefix": api_dict.get("subject_prefix"), - "object_prefix": api_dict.get("object_prefix"), - } + key: api_dict.get(key) + for key in ["subject", "object", "predicate", "subject_prefix", "object_prefix"] + } - # case: bte=1, api_details=0 - if self.args.bte == "1" and self.args.api_details == "0": + # Determine filtered API structure based on `bte` and `api_details` + if bte == 1 and api_details == 0: filtered_api = { **({"name": api_info["name"]} if "name" in api_info else {}), **( @@ -733,18 +753,12 @@ def get_filtered_api(self, api_dict): ), "bte": api_info.get("bte", {}), } - - # case: bte=0, api_details=1 - elif self.args.bte == "0" and self.args.api_details == "1": - api_info.pop("bte", None) - filtered_api = api_info - - # case: api_details=1, bte=1 - elif self.args.bte == "1" and self.args.api_details == "1": - filtered_api = api_info - - # case: bte=0, api_details=0 - else: + elif api_details == 1: + # Covers both (bte=0, api_details=1) and (bte=1, api_details=1) + filtered_api = api_info.copy() + if bte == 0: + filtered_api.pop("bte", None) + else: # bte == 0 and api_details == 0 filtered_api = { **({"name": api_info["name"]} if "name" in api_info else {}), **( @@ -753,15 +767,17 @@ def get_filtered_api(self, api_dict): else {} ), } + # Add the filtered 'api' key to the preserved top-level structure filtered_dict["api"] = filtered_api - # Remove 'bte' from 'api' if it exists + # Remove 'bte' from 'api' and move it to the top level if "bte" in filtered_dict["api"]: - filtered_dict['bte'] = filtered_dict["api"].pop("bte", None) + filtered_dict["bte"] = filtered_dict["api"].pop("bte") return filtered_dict + def process_apis(self, apis): """Process each API dict based on provided args.""" if isinstance(apis, list): @@ -779,9 +795,7 @@ def process_apis(self, apis): async def get(self, *args, **kwargs): if not self.get_argument("url", None): - self.set_status(400) - self.write({"error": "Missing 'url' argument"}) - return + raise HTTPError(400, reason="A url value is expected for the request, please provide a url.") parser = MetaKGParser() url = self.get_argument("url") From 4bd1ad4d0c56f34c7958544801c106d823b6226e Mon Sep 17 00:00:00 2001 From: Nichollette Date: Wed, 5 Feb 2025 13:44:57 -0500 Subject: [PATCH 13/30] error handling update for parse POST --- src/handlers/api.py | 110 +++++++++++++++++++++---------------- src/utils/metakg/parser.py | 7 +-- 2 files changed, 67 insertions(+), 50 deletions(-) diff --git a/src/handlers/api.py b/src/handlers/api.py index 04dc0aaf..f3ae4f6a 100644 --- a/src/handlers/api.py +++ b/src/handlers/api.py @@ -688,22 +688,22 @@ async def get(self, *args, **kwargs): class MetaKGParserHandler(BaseHandler): """ - Handles parsing of SmartAPI metadata from a given URL or request body. - - This handler processes SmartAPI metadata and returns structured, - cleaned results based on the specified query parameters. - - Supported HTTP methods: - - **GET**: Parses metadata from a provided URL. - - **POST**: Parses metadata from the request body. - - Query Parameters: - - `url` (str, required): The URL of the SmartAPI metadata to parse. - Maximum length: 1000 characters. - - `api_details` (bool, optional, default: `False`): - Whether to return detailed API information. - - `bte` (bool, optional, default: `False`): - Whether to include BTE (BioThings Explorer) specific metadata. + Handles parsing of SmartAPI metadata from a given URL or request body. + + This handler processes SmartAPI metadata and returns structured, + cleaned results based on the specified query parameters. + + Supported HTTP methods: + - **GET**: Parses metadata from a provided URL. + - **POST**: Parses metadata from the request body. + + Query Parameters: + - `url` (str, required): The URL of the SmartAPI metadata to parse. + Maximum length: 1000 characters. + - `api_details` (bool, optional, default: `False`): + Whether to return detailed API information. + - `bte` (bool, optional, default: `False`): + Whether to include BTE (BioThings Explorer) specific metadata. """ kwargs = { @@ -731,10 +731,8 @@ def initialize(self, *args, **kwargs): def get_filtered_api(self, api_dict): """Extract and return filtered API information.""" api_info = api_dict["api"] - - # Convert arguments to integers for consistency - bte = int(self.args.bte) - api_details = int(self.args.api_details) + bte = self.args.bte + api_details = self.args.api_details # Default structure to preserve top-level keys filtered_dict = { @@ -797,15 +795,20 @@ async def get(self, *args, **kwargs): if not self.get_argument("url", None): raise HTTPError(400, reason="A url value is expected for the request, please provide a url.") + # Set initial args parser = MetaKGParser() url = self.get_argument("url") - self.args.api_details = self.get_argument("api_details", False) - self.args.bte = self.get_argument("bte", False) + self.args.api_details = int(self.get_argument("api_details", 0)) + self.args.bte = int(self.get_argument("bte", 0)) + # Get data trapi_data = parser.get_TRAPI_metadatas(data=None, url=url) nontrapi_data = parser.get_non_TRAPI_metadatas(data=None, url=url) combined_data = trapi_data + nontrapi_data + if not combined_data: + raise HTTPError(404, reason="Metadata not found.") + for i, api_dict in enumerate(combined_data): filtered_api = self.get_filtered_api(api_dict) combined_data[i] = filtered_api @@ -821,31 +824,46 @@ async def get(self, *args, **kwargs): self.write(response) async def post(self, *args, **kwargs): + if not self.request.body: + raise HTTPError(400, reason="Request body cannot be empty.") + + # Attempt to parse JSON body try: - # Read the raw request body - body = self.request.body - # Parse the JSON content - data = json.loads(body) - parser = MetaKGParser() - self.args.api_details = self.get_argument("api_details", "0") - self.args.bte = self.get_argument("bte", "0") - trapi_data = parser.get_TRAPI_metadatas(data=data) - nontrapi_data = parser.get_non_TRAPI_metadatas(data=data) - combined_data = trapi_data + nontrapi_data - - for i, api_dict in enumerate(combined_data): - filtered_api = self.get_filtered_api(api_dict) - combined_data[i] = filtered_api + data = json.loads(self.request.body) + except json.JSONDecodeError: + raise HTTPError(400, reason="Invalid JSON content in request body.") - response = { - "took": 1, - "total": len(combined_data), - "max_score": 1, - "hits": combined_data, - } + # Ensure the parsed data is a dictionary + if not isinstance(data, dict): + raise HTTPError(400, reason="Invalid JSON format. Expected a JSON object.") - self.set_header("Content-Type", "application/json") - self.write(response) + parser = MetaKGParser() + + try: + self.args.api_details = int(self.get_argument("api_details", 0)) + self.args.bte = int(self.get_argument("bte", 0)) + except ValueError: + raise HTTPError(400, reason="Invalid query parameter value. 'api_details' and 'bte' must be integers.") + + # Process metadata + trapi_data = parser.get_TRAPI_metadatas(data=data) + nontrapi_data = parser.get_non_TRAPI_metadatas(data=data) + combined_data = trapi_data + nontrapi_data - except json.JSONDecodeError: - raise ValueError("Invalid JSON content in request body.") + if not combined_data: + raise HTTPError(404, reason="Metadata not found.") + + # Apply filtering + for i, api_dict in enumerate(combined_data): + filtered_api = self.get_filtered_api(api_dict) + combined_data[i] = filtered_api + + response = { + "took": 1, + "total": len(combined_data), + "max_score": 1, + "hits": combined_data, + } + + self.set_header("Content-Type", "application/json") + self.write(response) \ No newline at end of file diff --git a/src/utils/metakg/parser.py b/src/utils/metakg/parser.py index df4abcf2..9f2fe48d 100644 --- a/src/utils/metakg/parser.py +++ b/src/utils/metakg/parser.py @@ -16,7 +16,6 @@ class MetaKGParser: metakg_errors = None def get_non_TRAPI_metadatas(self, data=None, extra_data=None, url=None): - # Error Handling if not data and not url: raise HTTPError(400, reason="Either data or url value is expected for this request, please provide data or a url.") # raise ValueError("Either data or url must be provided.") @@ -25,7 +24,7 @@ def get_non_TRAPI_metadatas(self, data=None, extra_data=None, url=None): elif url: parser = API(url=url) else: - raise HTTPError(404, "Error getting metadata from provided data or url, for more info please reference: ") + raise HTTPError(404, "No metadata available from provided data or url.") mkg = self.extract_metakgedges(parser.metadata["operations"], extra_data=extra_data) no_nodes = len({x["subject"] for x in mkg} | {x["object"] for x in mkg}) @@ -42,7 +41,7 @@ def get_TRAPI_metadatas(self, data=None, extra_data=None, url=None): elif url: metadata_list = self.get_TRAPI_with_metakg_endpoint(url=url) else: - raise HTTPError(404, "Error getting metadata from provided data or url, for more info please reference: ") + raise HTTPError(404, "No metadata available from provided data or url.") if isinstance(metadata_list, Exception): return metadata_list @@ -65,7 +64,7 @@ def get_TRAPI_with_metakg_endpoint(self, data=None, url=None): try: metadata = parser.metadata except DownloadError as dl_err: - raise HTTPError(400, reason="Unable to download response data with given input. Please look at your given input for any errors.") + raise HTTPError(400, reason="Error fetching data from given input.") _paths = metadata.get("paths", {}) _team = metadata.get("x-translator", {}).get("team") From 3c3c001015a3c43378fa74c62e15f8fcb2dd19f6 Mon Sep 17 00:00:00 2001 From: Nichollette Date: Wed, 12 Feb 2025 12:26:38 -0500 Subject: [PATCH 14/30] flake8 clean up --- src/handlers/api.py | 60 +++++++------- .../_utils/metakg/integration/parser/parse.py | 81 ------------------- src/utils/metakg/parser.py | 2 +- 3 files changed, 34 insertions(+), 109 deletions(-) delete mode 100644 src/tests/_utils/metakg/integration/parser/parse.py diff --git a/src/handlers/api.py b/src/handlers/api.py index f3ae4f6a..36b80359 100644 --- a/src/handlers/api.py +++ b/src/handlers/api.py @@ -688,21 +688,21 @@ async def get(self, *args, **kwargs): class MetaKGParserHandler(BaseHandler): """ - Handles parsing of SmartAPI metadata from a given URL or request body. + Handles parsing of SmartAPI metadata from a given URL or request body. - This handler processes SmartAPI metadata and returns structured, - cleaned results based on the specified query parameters. + This handler processes SmartAPI metadata and returns structured, + cleaned results based on the specified query parameters. Supported HTTP methods: - **GET**: Parses metadata from a provided URL. - **POST**: Parses metadata from the request body. Query Parameters: - - `url` (str, required): The URL of the SmartAPI metadata to parse. + - `url` (str, required): The URL of the SmartAPI metadata to parse. Maximum length: 1000 characters. - - `api_details` (bool, optional, default: `False`): + - `api_details` (bool, optional, default: `False`): Whether to return detailed API information. - - `bte` (bool, optional, default: `False`): + - `bte` (bool, optional, default: `False`): Whether to include BTE (BioThings Explorer) specific metadata. """ @@ -775,7 +775,6 @@ def get_filtered_api(self, api_dict): return filtered_dict - def process_apis(self, apis): """Process each API dict based on provided args.""" if isinstance(apis, list): @@ -798,20 +797,25 @@ async def get(self, *args, **kwargs): # Set initial args parser = MetaKGParser() url = self.get_argument("url") - self.args.api_details = int(self.get_argument("api_details", 0)) - self.args.bte = int(self.get_argument("bte", 0)) + try: + self.args.api_details = int(self.get_argument("api_details", 0)) + except ValueError: + raise HTTPError(400, reason=f"Unexcepted value for api_details, {self.get_argument('api_details')}. Please enter integer, 0 or 1.") + try: + self.args.bte = int(self.get_argument("bte", 0)) + except ValueError: + raise HTTPError(400, reason=f"Unexcepted value for bte, {self.get_argument('bte')}. Please enter integer, 0 or 1.") # Get data trapi_data = parser.get_TRAPI_metadatas(data=None, url=url) nontrapi_data = parser.get_non_TRAPI_metadatas(data=None, url=url) combined_data = trapi_data + nontrapi_data - if not combined_data: - raise HTTPError(404, reason="Metadata not found.") - - for i, api_dict in enumerate(combined_data): - filtered_api = self.get_filtered_api(api_dict) - combined_data[i] = filtered_api + # Apply filtering -- if data found + if combined_data: + for i, api_dict in enumerate(combined_data): + filtered_api = self.get_filtered_api(api_dict) + combined_data[i] = filtered_api response = { "took": 1, @@ -831,32 +835,34 @@ async def post(self, *args, **kwargs): try: data = json.loads(self.request.body) except json.JSONDecodeError: - raise HTTPError(400, reason="Invalid JSON content in request body.") + raise HTTPError(400, reason=f"Unexcepted value for api_details, {self.get_argument('api_details')}. Please enter integer, 0 or 1.") # Ensure the parsed data is a dictionary if not isinstance(data, dict): - raise HTTPError(400, reason="Invalid JSON format. Expected a JSON object.") + raise HTTPError(400, reason=f"Unexcepted value for bte, {self.get_argument('bte')}. Please enter integer, 0 or 1.") parser = MetaKGParser() - + try: self.args.api_details = int(self.get_argument("api_details", 0)) - self.args.bte = int(self.get_argument("bte", 0)) except ValueError: raise HTTPError(400, reason="Invalid query parameter value. 'api_details' and 'bte' must be integers.") + try: + self.args.bte = int(self.get_argument("bte", 0)) + except ValueError: + raise HTTPError(400, reason=f"Unexcepted value for bte, {self.get_argument('bte')}. Please enter integer, 0 or 1.") + # Process metadata trapi_data = parser.get_TRAPI_metadatas(data=data) nontrapi_data = parser.get_non_TRAPI_metadatas(data=data) combined_data = trapi_data + nontrapi_data - if not combined_data: - raise HTTPError(404, reason="Metadata not found.") - - # Apply filtering - for i, api_dict in enumerate(combined_data): - filtered_api = self.get_filtered_api(api_dict) - combined_data[i] = filtered_api + # Apply filtering -- if data found + if combined_data: + for i, api_dict in enumerate(combined_data): + filtered_api = self.get_filtered_api(api_dict) + combined_data[i] = filtered_api response = { "took": 1, @@ -866,4 +872,4 @@ async def post(self, *args, **kwargs): } self.set_header("Content-Type", "application/json") - self.write(response) \ No newline at end of file + self.write(response) diff --git a/src/tests/_utils/metakg/integration/parser/parse.py b/src/tests/_utils/metakg/integration/parser/parse.py deleted file mode 100644 index 3c3405da..00000000 --- a/src/tests/_utils/metakg/integration/parser/parse.py +++ /dev/null @@ -1,81 +0,0 @@ -import unittest -import requests -import json - - -class TestAPI(unittest.TestCase): - URL_EXAMPLE = "https://raw.githubusercontent.com/NCATS-Tangerine/translator-api-registry/master/mygene.info/openapi_full.yml" - TIMEOUT = 10 # Timeout in seconds - - def setUp(self): - self.headers = {"Content-Type": "application/json"} - with open('/Users/nacosta/Documents/smartAPI/WORKING_BRANCH/add-metakg-endpoint/smartAPI/src/metadata_content.json', 'r') as file: - self.data = json.load(file) - - # POST Tests - def test_post_metakg_parse_api_details_1_bte_1(self): - url = "http://localhost:8000/api/metakg/parse?api_details=1&bte=1" - response = requests.post(url, headers=self.headers, json=self.data, timeout=self.TIMEOUT) - json_response = response.json() - self.assertEqual(response.status_code, 200) - self.assertIn('api', json_response['hits'][0].keys()) - self.assertIn('bte', json_response['hits'][0].keys()) - - def test_post_metakg_parse_api_details_0_bte_1(self): - url = "http://localhost:8000/api/metakg/parse?api_details=0&bte=1" - response = requests.post(url, headers=self.headers, json=self.data, timeout=self.TIMEOUT) - json_response = response.json() - self.assertEqual(response.status_code, 200) - self.assertIn('bte', json_response['hits'][0].keys()) - - def test_post_metakg_parse_api_details_1_bte_0(self): - url = "http://localhost:8000/api/metakg/parse?api_details=1&bte=0" - response = requests.post(url, headers=self.headers, json=self.data, timeout=self.TIMEOUT) - json_response = response.json() - self.assertEqual(response.status_code, 200) - self.assertIn('api', json_response['hits'][0].keys()) - self.assertNotIn('bte', json_response['hits'][0].keys()) - - def test_post_metakg_parse_api_details_0_bte_0(self): - url = "http://localhost:8000/api/metakg/parse?api_details=0&bte=0" - response = requests.post(url, headers=self.headers, json=self.data, timeout=self.TIMEOUT) - json_response = response.json() - self.assertEqual(response.status_code, 200) - self.assertNotIn('bte', json_response['hits'][0].keys()) - self.assertIn('subject', json_response['hits'][0].keys()) - - # GET Tests - def test_get_metakg_parse_api_details_1_bte_1(self): - url = f"http://localhost:8000/api/metakg/parse?url={self.URL_EXAMPLE}&api_details=1&bte=1" - response = requests.get(url, timeout=self.TIMEOUT) - json_response = response.json() - self.assertEqual(response.status_code, 200) - self.assertIn('api', json_response['hits'][0].keys()) - self.assertIn('bte', json_response['hits'][0].keys()) - - def test_get_metakg_parse_api_details_0_bte_1(self): - url = f"http://localhost:8000/api/metakg/parse?url={self.URL_EXAMPLE}&api_details=0&bte=1" - response = requests.get(url, timeout=self.TIMEOUT) - json_response = response.json() - self.assertEqual(response.status_code, 200) - self.assertIn('bte', json_response['hits'][0].keys()) - - def test_get_metakg_parse_api_details_1_bte_0(self): - url = f"http://localhost:8000/api/metakg/parse?url={self.URL_EXAMPLE}&api_details=1&bte=0" - response = requests.get(url, timeout=self.TIMEOUT) - json_response = response.json() - self.assertEqual(response.status_code, 200) - self.assertIn('api', json_response['hits'][0].keys()) - self.assertNotIn('bte', json_response['hits'][0].keys()) - - def test_get_metakg_parse_api_details_0_bte_0(self): - url = f"http://localhost:8000/api/metakg/parse?url={self.URL_EXAMPLE}&api_details=0&bte=0" - response = requests.get(url, timeout=self.TIMEOUT) - json_response = response.json() - self.assertEqual(response.status_code, 200) - self.assertNotIn('bte', json_response['hits'][0].keys()) - self.assertIn('subject', json_response['hits'][0].keys()) - - -if __name__ == "__main__": - unittest.main() diff --git a/src/utils/metakg/parser.py b/src/utils/metakg/parser.py index 9f2fe48d..d3e01e6f 100644 --- a/src/utils/metakg/parser.py +++ b/src/utils/metakg/parser.py @@ -63,7 +63,7 @@ def get_TRAPI_with_metakg_endpoint(self, data=None, url=None): parser = API(smartapi_doc=data) if data else API(url=url) try: metadata = parser.metadata - except DownloadError as dl_err: + except DownloadError: raise HTTPError(400, reason="Error fetching data from given input.") _paths = metadata.get("paths", {}) _team = metadata.get("x-translator", {}).get("team") From 260d7d05afe50f46af2e0830fe726259149c70e4 Mon Sep 17 00:00:00 2001 From: Nichollette Date: Thu, 20 Feb 2025 18:13:55 -0500 Subject: [PATCH 15/30] errors raised for unique instances with clear error message --- src/handlers/api.py | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/src/handlers/api.py b/src/handlers/api.py index 36b80359..fad6d97e 100644 --- a/src/handlers/api.py +++ b/src/handlers/api.py @@ -1,3 +1,4 @@ +from builtins import ValueError, isinstance import asyncio import json import logging @@ -22,6 +23,7 @@ from utils.metakg.biolink_helpers import get_expanded_values from utils.notification import SlackNewAPIMessage, SlackNewTranslatorAPIMessage from utils.metakg.parser import MetaKGParser +from utils.metakg.metakg_errors import MetadataRetrievalError logger = logging.getLogger("smartAPI") @@ -806,9 +808,21 @@ async def get(self, *args, **kwargs): except ValueError: raise HTTPError(400, reason=f"Unexcepted value for bte, {self.get_argument('bte')}. Please enter integer, 0 or 1.") - # Get data - trapi_data = parser.get_TRAPI_metadatas(data=None, url=url) - nontrapi_data = parser.get_non_TRAPI_metadatas(data=None, url=url) + try: + trapi_data = parser.get_TRAPI_metadatas(data=None, url=url) + except MetadataRetrievalError as retrieve_err: + raise HTTPError(retrieve_err.status_code, reason=retrieve_err.message) + except DownloadError: + raise HTTPError(400, reason="There was an error downloading the data from the given input.") + + # Get non-TRAPI metadata + try: + nontrapi_data = parser.get_non_TRAPI_metadatas(data=None, url=url) + except MetadataRetrievalError as retrieve_err: + raise HTTPError(retrieve_err.status_code, reason=retrieve_err.message) + except DownloadError: + raise HTTPError(400, reason="There was an error downloading the data from the given input.") + combined_data = trapi_data + nontrapi_data # Apply filtering -- if data found @@ -854,8 +868,20 @@ async def post(self, *args, **kwargs): raise HTTPError(400, reason=f"Unexcepted value for bte, {self.get_argument('bte')}. Please enter integer, 0 or 1.") # Process metadata - trapi_data = parser.get_TRAPI_metadatas(data=data) - nontrapi_data = parser.get_non_TRAPI_metadatas(data=data) + try: + trapi_data = parser.get_TRAPI_metadatas(data=data) + except MetadataRetrievalError as retrieve_err: + raise HTTPError(retrieve_err.status_code, reason=retrieve_err.message) + except DownloadError: + raise HTTPError(400, reason="There was an error downloading the data from the given input.") + + try: + nontrapi_data = parser.get_non_TRAPI_metadatas(data=data) + except MetadataRetrievalError as retrieve_err: + raise HTTPError(retrieve_err.status_code, reason=retrieve_err.message) + except DownloadError: + raise HTTPError(400, reason="There was an error downloading the data from the given input.") + combined_data = trapi_data + nontrapi_data # Apply filtering -- if data found From 2d6987cda64fc52fddee0af2f5418b791d149d92 Mon Sep 17 00:00:00 2001 From: Nichollette Date: Thu, 20 Feb 2025 18:15:05 -0500 Subject: [PATCH 16/30] adding unique MetadataRetrivalError class for identifying metadata errors and passing to handler --- src/utils/metakg/metakg_errors.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 src/utils/metakg/metakg_errors.py diff --git a/src/utils/metakg/metakg_errors.py b/src/utils/metakg/metakg_errors.py new file mode 100644 index 00000000..91db45c5 --- /dev/null +++ b/src/utils/metakg/metakg_errors.py @@ -0,0 +1,16 @@ +class MetadataRetrievalError(Exception): + """Custom exception for metadata retrieval failures.""" + + def __init__(self, status_code, message): + self.status_code = status_code + self.message = message + super().__init__(f"MetadataRetrievalError {status_code}: {message}") + + def to_dict(self): + """Return error details in JSON-like dictionary format.""" + return { + "code": self.status_code, + "success": False, + "error": "Metadata Retrieval Error", + "details": str(self) + } \ No newline at end of file From bad1284d9f0ce70bbcb3802e5a2b1eae0e6fe0a4 Mon Sep 17 00:00:00 2001 From: Nichollette Date: Mon, 24 Feb 2025 14:08:58 -0500 Subject: [PATCH 17/30] mkg parser and handler clean up error code --- src/handlers/api.py | 7 ++--- src/utils/metakg/parser.py | 55 ++++++++++++++++++++++++-------------- 2 files changed, 39 insertions(+), 23 deletions(-) diff --git a/src/handlers/api.py b/src/handlers/api.py index fad6d97e..862fd340 100644 --- a/src/handlers/api.py +++ b/src/handlers/api.py @@ -799,14 +799,15 @@ async def get(self, *args, **kwargs): # Set initial args parser = MetaKGParser() url = self.get_argument("url") + try: self.args.api_details = int(self.get_argument("api_details", 0)) except ValueError: - raise HTTPError(400, reason=f"Unexcepted value for api_details, {self.get_argument('api_details')}. Please enter integer, 0 or 1.") + raise HTTPError(400, reason=f"Value, {self.get_argument('api_details')}, not accepted for api_details. Please enter integer, 0 or 1.") try: self.args.bte = int(self.get_argument("bte", 0)) except ValueError: - raise HTTPError(400, reason=f"Unexcepted value for bte, {self.get_argument('bte')}. Please enter integer, 0 or 1.") + raise HTTPError(400, reason=f"Value,, {self.get_argument('bte')}, not accepted for bte. Please enter integer, 0 or 1.") try: trapi_data = parser.get_TRAPI_metadatas(data=None, url=url) @@ -814,7 +815,7 @@ async def get(self, *args, **kwargs): raise HTTPError(retrieve_err.status_code, reason=retrieve_err.message) except DownloadError: raise HTTPError(400, reason="There was an error downloading the data from the given input.") - + # Get non-TRAPI metadata try: nontrapi_data = parser.get_non_TRAPI_metadatas(data=None, url=url) diff --git a/src/utils/metakg/parser.py b/src/utils/metakg/parser.py index d3e01e6f..afce0f2c 100644 --- a/src/utils/metakg/parser.py +++ b/src/utils/metakg/parser.py @@ -3,7 +3,7 @@ from copy import copy from tornado.web import HTTPError from utils.downloader import DownloadError - +from utils.metakg.metakg_errors import MetadataRetrievalError import requests from .api import API @@ -16,15 +16,19 @@ class MetaKGParser: metakg_errors = None def get_non_TRAPI_metadatas(self, data=None, extra_data=None, url=None): + """ + Extract MetaKG edges from a SmartAPI document provided as `data` or fetched from a `url`. + Raises an error if no valid input is given. + """ if not data and not url: - raise HTTPError(400, reason="Either data or url value is expected for this request, please provide data or a url.") - # raise ValueError("Either data or url must be provided.") + raise MetadataRetrievalError(400, "Either data or url value is expected for this request, please provide data or a url.") + if data: parser = API(smartapi_doc=data) elif url: parser = API(url=url) else: - raise HTTPError(404, "No metadata available from provided data or url.") + raise MetadataRetrievalError(404, "No metadata available from provided data or url.") mkg = self.extract_metakgedges(parser.metadata["operations"], extra_data=extra_data) no_nodes = len({x["subject"] for x in mkg} | {x["object"] for x in mkg}) @@ -33,38 +37,51 @@ def get_non_TRAPI_metadatas(self, data=None, extra_data=None, url=None): return mkg def get_TRAPI_metadatas(self, data=None, extra_data=None, url=None): - ops = [] + """ + Extract and process TRAPI metadata from a SmartAPI document or URL. + Returns MetaKG edges or propagates errors. + """ if not data and not url: - raise HTTPError(400, reason="Either data or url value is expected for this request, please provide data or a url.") - if data: - metadata_list = self.get_TRAPI_with_metakg_endpoint(data=data) - elif url: - metadata_list = self.get_TRAPI_with_metakg_endpoint(url=url) - else: - raise HTTPError(404, "No metadata available from provided data or url.") + raise MetadataRetrievalError(400, "Either data or url value is expected for this request, please provide data or a url.") - if isinstance(metadata_list, Exception): - return metadata_list + try: + if data: + metadata_list = self.get_TRAPI_with_metakg_endpoint(data=data) + else: + metadata_list = self.get_TRAPI_with_metakg_endpoint(url=url) + except MetadataRetrievalError: + raise MetadataRetrievalError(404, "No metadata available from provided data or url.") count_metadata_list = len(metadata_list) self.metakg_errors = {} + ops = [] + for i, metadata in enumerate(metadata_list): ops.extend(self.get_ops_from_metakg_endpoint(metadata, f"[{i + 1}/{count_metadata_list}]")) + if self.metakg_errors: - cnt_metakg_errors = sum([len(x) for x in self.metakg_errors.values()]) + cnt_metakg_errors = sum(len(x) for x in self.metakg_errors.values()) logger.error(f"Found {cnt_metakg_errors} TRAPI metakg errors:\n {json.dumps(self.metakg_errors, indent=2)}") return self.extract_metakgedges(ops, extra_data=extra_data) def get_TRAPI_with_metakg_endpoint(self, data=None, url=None): + """ + Retrieve TRAPI metadata from a SmartAPI document or URL. + Returns metadata if TRAPI endpoints are found, else an empty list. + """ if not data and not url: - raise HTTPError(400, reason="Either data or url value is expected for this request, please provide data or a url.") + raise MetadataRetrievalError(400, "Either data or url value is expected for this request, please provide data or a url.") + # Initialize API with either data or URL parser = API(smartapi_doc=data) if data else API(url=url) + + # Download the metadata try: metadata = parser.metadata - except DownloadError: - raise HTTPError(400, reason="Error fetching data from given input.") + except DownloadError as dl_err: + raise dl_err + _paths = metadata.get("paths", {}) _team = metadata.get("x-translator", {}).get("team") @@ -74,8 +91,6 @@ def get_TRAPI_with_metakg_endpoint(self, data=None, url=None): return [metadata] else: return [] - # except Exception as value_error: # Specify Error - # return value_error def construct_query_url(self, server_url): if server_url.endswith("/"): From 95f8f84e2af2f66f600a1cdca055c906697b70e5 Mon Sep 17 00:00:00 2001 From: Nichollette Date: Thu, 13 Mar 2025 18:31:02 -0400 Subject: [PATCH 18/30] added Mixin function for improved code --- src/handlers/api.py | 163 ++++++++++++++++++------------------- src/utils/metakg/parser.py | 13 ++- 2 files changed, 83 insertions(+), 93 deletions(-) diff --git a/src/handlers/api.py b/src/handlers/api.py index 862fd340..c9986d61 100644 --- a/src/handlers/api.py +++ b/src/handlers/api.py @@ -24,6 +24,7 @@ from utils.notification import SlackNewAPIMessage, SlackNewTranslatorAPIMessage from utils.metakg.parser import MetaKGParser from utils.metakg.metakg_errors import MetadataRetrievalError +from utils.decoder import to_dict logger = logging.getLogger("smartAPI") @@ -44,7 +45,6 @@ def _(self, *args, **kwargs): class BaseHandler(BioThingsAuthnMixin, BaseAPIHandler): pass - class AuthHandler(BaseHandler): def set_cache_header(self, cache_value): # disabel cache for auth-related handlers @@ -382,7 +382,57 @@ def post(self): else: raise HTTPError(400, reason="Missing required form field: id") +class MetaKGHandlerMixin: + """ + Mixin to provide reusable logic for filtering API information. + """ + def get_filtered_api(self, api_dict): + """Extract and return filtered API information.""" + api_info = api_dict.get("api", api_dict) # Handle both formats + bte = getattr(self.args, "bte", 0) + api_details = getattr(self.args, "api_details", 0) + + # Default structure to preserve top-level keys + filtered_dict = { + key: api_dict.get(key) + for key in ["subject", "object", "predicate", "subject_prefix", "object_prefix"] + if key in api_dict + } + + # Determine filtered API structure based on `bte` and `api_details` + if bte == 1 and api_details == 0: + filtered_api = { + **({"name": api_info.get("name")} if "name" in api_info else {}), + **( + {"smartapi": {"id": api_info.get("smartapi", {}).get("id", None)}} + if "smartapi" in api_info + else {"smartapi": {"id": None}} + ), + "bte": api_info.get("bte", {}), + } + elif api_details == 1: + # Covers both (bte=0, api_details=1) and (bte=1, api_details=1) + filtered_api = api_info.copy() + if bte == 0: + filtered_api.pop("bte", None) + else: # bte == 0 and api_details == 0 + filtered_api = { + **({"name": api_info.get("name")} if "name" in api_info else {}), + **( + {"smartapi": {"id": api_info.get("smartapi", {}).get("id", None)}} + if "smartapi" in api_info + else {"smartapi": {"id": None}} + ), + } + + # Add the filtered 'api' key to the preserved top-level structure + filtered_dict["api"] = filtered_api + + # Remove 'bte' from 'api' and move it to the top level + if "bte" in filtered_dict["api"]: + filtered_dict["bte"] = filtered_dict["api"].pop("bte") + return filtered_dict class MetaKGQueryHandler(QueryHandler): """ Support metakg queries with biolink model's semantic descendants @@ -462,27 +512,6 @@ async def get(self, *args, **kwargs): await super().get(*args, **kwargs) - def get_filtered_api(self, api_dict): - """Extract and return filtered API information.""" - api_info = api_dict - if not self.args.bte and not self.args.api_details: # no bte and no api details - filtered_api= { - **({"name": api_info["name"]} if "name" in api_info else {}), - **({"smartapi": {"id": api_info["smartapi"]["id"]}} if "smartapi" in api_info and "id" in api_info["smartapi"] else {}) - } - elif self.args.bte and not self.args.api_details : # bte and no api details - filtered_api= { - **({"name": api_info["name"]} if "name" in api_info else {}), - **({"smartapi": {"id": api_info["smartapi"]["id"]}} if "smartapi" in api_info and "id" in api_info["smartapi"] else {}), - 'bte': api_info.get('bte', {}) - } - elif not self.args.bte and self.args.api_details: # no bte and api details - api_info.pop('bte', None) - filtered_api = api_info - else: - filtered_api = api_info - return filtered_api - def process_apis(self, apis): """Process each API dict based on provided args.""" if isinstance(apis, list): @@ -730,53 +759,6 @@ def initialize(self, *args, **kwargs): # change the default query pipeline from self.biothings.pipeline self.pipeline = MetaKGQueryPipeline(ns=self.biothings) - def get_filtered_api(self, api_dict): - """Extract and return filtered API information.""" - api_info = api_dict["api"] - bte = self.args.bte - api_details = self.args.api_details - - # Default structure to preserve top-level keys - filtered_dict = { - key: api_dict.get(key) - for key in ["subject", "object", "predicate", "subject_prefix", "object_prefix"] - } - - # Determine filtered API structure based on `bte` and `api_details` - if bte == 1 and api_details == 0: - filtered_api = { - **({"name": api_info["name"]} if "name" in api_info else {}), - **( - {"smartapi": {"id": api_info["smartapi"]["id"]}} - if "smartapi" in api_info and "id" in api_info["smartapi"] - else {} - ), - "bte": api_info.get("bte", {}), - } - elif api_details == 1: - # Covers both (bte=0, api_details=1) and (bte=1, api_details=1) - filtered_api = api_info.copy() - if bte == 0: - filtered_api.pop("bte", None) - else: # bte == 0 and api_details == 0 - filtered_api = { - **({"name": api_info["name"]} if "name" in api_info else {}), - **( - {"smartapi": {"id": api_info["smartapi"]["id"]}} - if "smartapi" in api_info and "id" in api_info["smartapi"] - else {} - ), - } - - # Add the filtered 'api' key to the preserved top-level structure - filtered_dict["api"] = filtered_api - - # Remove 'bte' from 'api' and move it to the top level - if "bte" in filtered_dict["api"]: - filtered_dict["bte"] = filtered_dict["api"].pop("bte") - - return filtered_dict - def process_apis(self, apis): """Process each API dict based on provided args.""" if isinstance(apis, list): @@ -831,37 +813,49 @@ async def get(self, *args, **kwargs): for i, api_dict in enumerate(combined_data): filtered_api = self.get_filtered_api(api_dict) combined_data[i] = filtered_api + # parser does not pick up this information, so we add it here + if self.args.api_details == 1: + for data_dict in combined_data: + if "metadata" in data_dict["api"]["smartapi"] and data_dict["api"]["smartapi"]["metadata"] is None: + data_dict["api"]["smartapi"]["metadata"] = self.args.url response = { - "took": 1, "total": len(combined_data), - "max_score": 1, "hits": combined_data, } - self.set_header("Content-Type", "application/json") - self.write(response) + self.finish(response) async def post(self, *args, **kwargs): if not self.request.body: raise HTTPError(400, reason="Request body cannot be empty.") + content_type = self.request.headers.get("Content-Type", "") + data_body = self.request.body - # Attempt to parse JSON body - try: - data = json.loads(self.request.body) - except json.JSONDecodeError: - raise HTTPError(400, reason=f"Unexcepted value for api_details, {self.get_argument('api_details')}. Please enter integer, 0 or 1.") - - # Ensure the parsed data is a dictionary + if content_type == "application/json": + try: + data = to_dict(data_body, ctype="application/json") + except ValueError: + raise HTTPError(400, reason="Invalid data. Please provide a valid JSON object.") + except TypeError: + raise HTTPError(400, reason="Invalid data type. Please provide a valid type.") + if content_type == "application/x-yaml": + try: + data = to_dict(data_body) + except ValueError: + raise HTTPError(400, reason="Invalid input data. Please provide a valid YAML object.") + except TypeError: + raise HTTPError(400, reason="Invalid type data. Please provide a valid type.") + # # Ensure the parsed data is a dictionary if not isinstance(data, dict): - raise HTTPError(400, reason=f"Unexcepted value for bte, {self.get_argument('bte')}. Please enter integer, 0 or 1.") + raise ValueError("Invalid input data. Please provide a valid JSON/YAML object.") parser = MetaKGParser() try: self.args.api_details = int(self.get_argument("api_details", 0)) except ValueError: - raise HTTPError(400, reason="Invalid query parameter value. 'api_details' and 'bte' must be integers.") + raise HTTPError(400, reason=f"Unexcepted value for api_details, {self.get_argument('api_details')}. Please enter integer, 0 or 1.") try: self.args.bte = int(self.get_argument("bte", 0)) @@ -892,11 +886,8 @@ async def post(self, *args, **kwargs): combined_data[i] = filtered_api response = { - "took": 1, "total": len(combined_data), - "max_score": 1, "hits": combined_data, } - self.set_header("Content-Type", "application/json") - self.write(response) + self.finish(response) diff --git a/src/utils/metakg/parser.py b/src/utils/metakg/parser.py index afce0f2c..8763687a 100644 --- a/src/utils/metakg/parser.py +++ b/src/utils/metakg/parser.py @@ -1,7 +1,6 @@ import json import logging from copy import copy -from tornado.web import HTTPError from utils.downloader import DownloadError from utils.metakg.metakg_errors import MetadataRetrievalError import requests @@ -18,7 +17,7 @@ class MetaKGParser: def get_non_TRAPI_metadatas(self, data=None, extra_data=None, url=None): """ Extract MetaKG edges from a SmartAPI document provided as `data` or fetched from a `url`. - Raises an error if no valid input is given. + Raises an error if no valid input is given, or if parser fails to parse the document. """ if not data and not url: raise MetadataRetrievalError(400, "Either data or url value is expected for this request, please provide data or a url.") @@ -38,8 +37,8 @@ def get_non_TRAPI_metadatas(self, data=None, extra_data=None, url=None): def get_TRAPI_metadatas(self, data=None, extra_data=None, url=None): """ - Extract and process TRAPI metadata from a SmartAPI document or URL. - Returns MetaKG edges or propagates errors. + Extract and process TRAPI metadata from a SmartAPI document or URL. + Returns MetaKG edges or propagates errors. """ if not data and not url: raise MetadataRetrievalError(400, "Either data or url value is expected for this request, please provide data or a url.") @@ -66,9 +65,9 @@ def get_TRAPI_metadatas(self, data=None, extra_data=None, url=None): return self.extract_metakgedges(ops, extra_data=extra_data) def get_TRAPI_with_metakg_endpoint(self, data=None, url=None): - """ - Retrieve TRAPI metadata from a SmartAPI document or URL. - Returns metadata if TRAPI endpoints are found, else an empty list. + """ + Retrieve TRAPI metadata from a SmartAPI document or URL. + Returns metadata if TRAPI endpoints are found, else an empty list. """ if not data and not url: raise MetadataRetrievalError(400, "Either data or url value is expected for this request, please provide data or a url.") From 3819a835df825b83100488df83ddcb6d4dcadef6 Mon Sep 17 00:00:00 2001 From: Nichollette Date: Mon, 17 Mar 2025 18:55:17 -0400 Subject: [PATCH 19/30] error handling cleanup: --- src/handlers/api.py | 101 ++++++++++++++++++++------------------------ 1 file changed, 46 insertions(+), 55 deletions(-) diff --git a/src/handlers/api.py b/src/handlers/api.py index c9986d61..335f6924 100644 --- a/src/handlers/api.py +++ b/src/handlers/api.py @@ -433,7 +433,7 @@ def get_filtered_api(self, api_dict): filtered_dict["bte"] = filtered_dict["api"].pop("bte") return filtered_dict -class MetaKGQueryHandler(QueryHandler): +class MetaKGQueryHandler(MetaKGHandlerMixin,QueryHandler): """ Support metakg queries with biolink model's semantic descendants @@ -717,7 +717,7 @@ async def get(self, *args, **kwargs): await asyncio.sleep(0.01) self.finish(res) -class MetaKGParserHandler(BaseHandler): +class MetaKGParserHandler(MetaKGHandlerMixin, BaseHandler): """ Handles parsing of SmartAPI metadata from a given URL or request body. @@ -774,22 +774,20 @@ def process_apis(self, apis): apis["api"] = filtered_api return apis + async def get(self, *args, **kwargs): + url = self.get_argument("url", None) if not self.get_argument("url", None): raise HTTPError(400, reason="A url value is expected for the request, please provide a url.") - # Set initial args + # Set initial args and handle potential errors in query parameters parser = MetaKGParser() - url = self.get_argument("url") try: self.args.api_details = int(self.get_argument("api_details", 0)) - except ValueError: - raise HTTPError(400, reason=f"Value, {self.get_argument('api_details')}, not accepted for api_details. Please enter integer, 0 or 1.") - try: self.args.bte = int(self.get_argument("bte", 0)) - except ValueError: - raise HTTPError(400, reason=f"Value,, {self.get_argument('bte')}, not accepted for bte. Please enter integer, 0 or 1.") + except ValueError as err: + raise HTTPError(400, reason=f"Invalid value for parameter: {str(err)}. Please enter integer, 0 or 1.") try: trapi_data = parser.get_TRAPI_metadatas(data=None, url=url) @@ -806,18 +804,17 @@ async def get(self, *args, **kwargs): except DownloadError: raise HTTPError(400, reason="There was an error downloading the data from the given input.") - combined_data = trapi_data + nontrapi_data - # Apply filtering -- if data found + combined_data = trapi_data + nontrapi_data if combined_data: for i, api_dict in enumerate(combined_data): - filtered_api = self.get_filtered_api(api_dict) - combined_data[i] = filtered_api - # parser does not pick up this information, so we add it here - if self.args.api_details == 1: - for data_dict in combined_data: - if "metadata" in data_dict["api"]["smartapi"] and data_dict["api"]["smartapi"]["metadata"] is None: - data_dict["api"]["smartapi"]["metadata"] = self.args.url + combined_data[i] = self.get_filtered_api(api_dict) + + # Add url to metadata if api_details is set to 1 + if self.args.api_details == 1: + for data_dict in combined_data: + if "metadata" in data_dict["api"]["smartapi"] and data_dict["api"]["smartapi"]["metadata"] is None: + data_dict["api"]["smartapi"]["metadata"] = url response = { "total": len(combined_data), @@ -829,62 +826,56 @@ async def get(self, *args, **kwargs): async def post(self, *args, **kwargs): if not self.request.body: raise HTTPError(400, reason="Request body cannot be empty.") - content_type = self.request.headers.get("Content-Type", "") - data_body = self.request.body - if content_type == "application/json": - try: - data = to_dict(data_body, ctype="application/json") - except ValueError: - raise HTTPError(400, reason="Invalid data. Please provide a valid JSON object.") - except TypeError: - raise HTTPError(400, reason="Invalid data type. Please provide a valid type.") - if content_type == "application/x-yaml": - try: - data = to_dict(data_body) - except ValueError: - raise HTTPError(400, reason="Invalid input data. Please provide a valid YAML object.") - except TypeError: - raise HTTPError(400, reason="Invalid type data. Please provide a valid type.") - # # Ensure the parsed data is a dictionary - if not isinstance(data, dict): - raise ValueError("Invalid input data. Please provide a valid JSON/YAML object.") - - parser = MetaKGParser() + content_type = self.request.headers.get("Content-Type", "").lower() + raw_body = self.request.body + # Try to parse the request body based on content type try: - self.args.api_details = int(self.get_argument("api_details", 0)) - except ValueError: - raise HTTPError(400, reason=f"Unexcepted value for api_details, {self.get_argument('api_details')}. Please enter integer, 0 or 1.") + if content_type == "application/json": + data = to_dict(raw_body, ctype="application/json") + elif content_type == "application/x-yaml": + data = to_dict(raw_body, ctype="application/x-yaml") + else: + # Default to YAML parsing if the content type is unknown or not specified + data = to_dict(raw_body) + except ValueError as val_err: + if 'mapping values are not allowed here' in str(val_err): + raise HTTPError(400, reason="Formatting issue, please consider using --data-binary to maintain YAML format.") + else: + raise HTTPError(400, reason="Invalid value, please provide a valid YAML object.") + except TypeError: + raise HTTPError(400, reason="Invalid type, provide valid type metadata.") + + # Ensure the parsed data is a dictionary + if not isinstance(data, dict): + raise ValueError("Invalid input data type. Please provide a valid JSON/YAML object.") + # Extract query parameters (assuming these need to be parsed from the request) try: + self.args.api_details = int(self.get_argument("api_details", 0)) self.args.bte = int(self.get_argument("bte", 0)) - except ValueError: - raise HTTPError(400, reason=f"Unexcepted value for bte, {self.get_argument('bte')}. Please enter integer, 0 or 1.") + except ValueError as err: + raise HTTPError(400, reason=f"Invalid query parameter: {str(err)}") - # Process metadata + # Process the parsed metadata + parser = MetaKGParser() try: trapi_data = parser.get_TRAPI_metadatas(data=data) - except MetadataRetrievalError as retrieve_err: - raise HTTPError(retrieve_err.status_code, reason=retrieve_err.message) - except DownloadError: - raise HTTPError(400, reason="There was an error downloading the data from the given input.") - - try: nontrapi_data = parser.get_non_TRAPI_metadatas(data=data) except MetadataRetrievalError as retrieve_err: raise HTTPError(retrieve_err.status_code, reason=retrieve_err.message) except DownloadError: - raise HTTPError(400, reason="There was an error downloading the data from the given input.") + raise HTTPError(400, reason="Error downloading the data from the provided input.") combined_data = trapi_data + nontrapi_data - # Apply filtering -- if data found + # Apply filtering to the combined data if combined_data: for i, api_dict in enumerate(combined_data): - filtered_api = self.get_filtered_api(api_dict) - combined_data[i] = filtered_api + combined_data[i] = self.get_filtered_api(api_dict) + # Send the response back to the client response = { "total": len(combined_data), "hits": combined_data, From 4b6d2614b26bca359e3b7e819da33f4aea778bee Mon Sep 17 00:00:00 2001 From: Nichollette Date: Mon, 17 Mar 2025 18:57:37 -0400 Subject: [PATCH 20/30] code cleanup --- src/handlers/api.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/handlers/api.py b/src/handlers/api.py index 335f6924..1b324849 100644 --- a/src/handlers/api.py +++ b/src/handlers/api.py @@ -382,6 +382,7 @@ def post(self): else: raise HTTPError(400, reason="Missing required form field: id") + class MetaKGHandlerMixin: """ Mixin to provide reusable logic for filtering API information. @@ -433,6 +434,8 @@ def get_filtered_api(self, api_dict): filtered_dict["bte"] = filtered_dict["api"].pop("bte") return filtered_dict + + class MetaKGQueryHandler(MetaKGHandlerMixin,QueryHandler): """ Support metakg queries with biolink model's semantic descendants @@ -717,6 +720,7 @@ async def get(self, *args, **kwargs): await asyncio.sleep(0.01) self.finish(res) + class MetaKGParserHandler(MetaKGHandlerMixin, BaseHandler): """ Handles parsing of SmartAPI metadata from a given URL or request body. From 4c8d92eb18c7284acb0b62bf05761be79fc23428 Mon Sep 17 00:00:00 2001 From: Nichollette Date: Tue, 18 Mar 2025 10:22:20 -0400 Subject: [PATCH 21/30] code cleanup with flake8 --- src/handlers/api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/handlers/api.py b/src/handlers/api.py index 1b324849..c321c928 100644 --- a/src/handlers/api.py +++ b/src/handlers/api.py @@ -778,7 +778,6 @@ def process_apis(self, apis): apis["api"] = filtered_api return apis - async def get(self, *args, **kwargs): url = self.get_argument("url", None) if not self.get_argument("url", None): From 220e4a3497a5c9a3627bbc8a6219849ca27d4751 Mon Sep 17 00:00:00 2001 From: Nichollette Date: Tue, 18 Mar 2025 10:58:42 -0400 Subject: [PATCH 22/30] set ui key to none when empty value --- src/handlers/api.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/handlers/api.py b/src/handlers/api.py index c321c928..5ec3672d 100644 --- a/src/handlers/api.py +++ b/src/handlers/api.py @@ -416,6 +416,9 @@ def get_filtered_api(self, api_dict): filtered_api = api_info.copy() if bte == 0: filtered_api.pop("bte", None) + # Check if the "ui" key exists and ends with "None" + if filtered_api['smartapi'].get("ui", "").endswith("/None"): + filtered_api["smartapi"]["ui"] = None else: # bte == 0 and api_details == 0 filtered_api = { **({"name": api_info.get("name")} if "name" in api_info else {}), @@ -433,6 +436,7 @@ def get_filtered_api(self, api_dict): if "bte" in filtered_dict["api"]: filtered_dict["bte"] = filtered_dict["api"].pop("bte") + return filtered_dict From dec2502a78ec185ba6e9adb3e0f8441022574227 Mon Sep 17 00:00:00 2001 From: Nichollette Date: Thu, 27 Mar 2025 13:12:58 -0400 Subject: [PATCH 23/30] exchanged basehandler for queryhandler in metakgparserhandler, removed old testing code --- src/controller/smartapi.py | 10 +--- src/handlers/api.py | 114 ++++++++++++++++++++++++++----------- 2 files changed, 83 insertions(+), 41 deletions(-) diff --git a/src/controller/smartapi.py b/src/controller/smartapi.py index 08325c7f..c5e1bf05 100644 --- a/src/controller/smartapi.py +++ b/src/controller/smartapi.py @@ -369,14 +369,8 @@ def is_trapi(self): """return True if a TRAPI""" return self.has_tags("trapi", "translator") - def get_metakg(self, include_trapi=True, metadata_url=False): - if metadata_url: - data_id = decoder.get_id(self.url) - doc = self.get(data_id) - self._doc = doc._doc - raw_metadata = decoder.to_dict(decoder.decompress(doc._doc._raw)) - else: - raw_metadata = decoder.to_dict(decoder.decompress(self._doc._raw)) + def get_metakg(self, include_trapi=True): + raw_metadata = decoder.to_dict(decoder.decompress(self._doc._raw)) mkg_parser = MetaKGParser() extra_data = {"id": self._id, "url": self.url} self.metakg_errors = None # reset metakg_errors diff --git a/src/handlers/api.py b/src/handlers/api.py index 5ec3672d..5ade874d 100644 --- a/src/handlers/api.py +++ b/src/handlers/api.py @@ -1,4 +1,3 @@ -from builtins import ValueError, isinstance import asyncio import json import logging @@ -390,8 +389,10 @@ class MetaKGHandlerMixin: def get_filtered_api(self, api_dict): """Extract and return filtered API information.""" api_info = api_dict.get("api", api_dict) # Handle both formats - bte = getattr(self.args, "bte", 0) - api_details = getattr(self.args, "api_details", 0) + + # Default to False if not present + bte = getattr(self.args, "bte", False) + api_details = getattr(self.args, "api_details", False) # Default structure to preserve top-level keys filtered_dict = { @@ -401,7 +402,8 @@ def get_filtered_api(self, api_dict): } # Determine filtered API structure based on `bte` and `api_details` - if bte == 1 and api_details == 0: + if bte and not api_details: + # When bte is True and api_details is False, include only minimal API info filtered_api = { **({"name": api_info.get("name")} if "name" in api_info else {}), **( @@ -411,15 +413,17 @@ def get_filtered_api(self, api_dict): ), "bte": api_info.get("bte", {}), } - elif api_details == 1: - # Covers both (bte=0, api_details=1) and (bte=1, api_details=1) + elif api_details: + # When api_details is True, include more detailed information filtered_api = api_info.copy() - if bte == 0: + if not bte: filtered_api.pop("bte", None) - # Check if the "ui" key exists and ends with "None" - if filtered_api['smartapi'].get("ui", "").endswith("/None"): + + # Handle case where "ui" key exists and ends with "None" + if filtered_api.get('smartapi', {}).get("ui", "").endswith("/None"): filtered_api["smartapi"]["ui"] = None - else: # bte == 0 and api_details == 0 + else: + # Default: No bte and no api_details - just minimal API info filtered_api = { **({"name": api_info.get("name")} if "name" in api_info else {}), **( @@ -436,11 +440,55 @@ def get_filtered_api(self, api_dict): if "bte" in filtered_dict["api"]: filtered_dict["bte"] = filtered_dict["api"].pop("bte") - return filtered_dict - - -class MetaKGQueryHandler(MetaKGHandlerMixin,QueryHandler): + # # Default structure to preserve top-level keys + # filtered_dict = { + # key: api_dict.get(key) + # for key in ["subject", "object", "predicate", "subject_prefix", "object_prefix"] + # if key in api_dict + # } + + # # Determine filtered API structure based on `bte` and `api_details` + # if bte == 1 and api_details == 0: + # filtered_api = { + # **({"name": api_info.get("name")} if "name" in api_info else {}), + # **( + # {"smartapi": {"id": api_info.get("smartapi", {}).get("id", None)}} + # if "smartapi" in api_info + # else {"smartapi": {"id": None}} + # ), + # "bte": api_info.get("bte", {}), + # } + # elif api_details == 1: + # # Covers both (bte=0, api_details=1) and (bte=1, api_details=1) + # filtered_api = api_info.copy() + # if bte == 0: + # filtered_api.pop("bte", None) + # # Check if the "ui" key exists and ends with "None" + # if filtered_api['smartapi'].get("ui", "").endswith("/None"): + # filtered_api["smartapi"]["ui"] = None + # else: # bte == 0 and api_details == 0 + # filtered_api = { + # **({"name": api_info.get("name")} if "name" in api_info else {}), + # **( + # {"smartapi": {"id": api_info.get("smartapi", {}).get("id", None)}} + # if "smartapi" in api_info + # else {"smartapi": {"id": None}} + # ), + # } + + # # Add the filtered 'api' key to the preserved top-level structure + # filtered_dict["api"] = filtered_api + + # # Remove 'bte' from 'api' and move it to the top level + # if "bte" in filtered_dict["api"]: + # filtered_dict["bte"] = filtered_dict["api"].pop("bte") + + + # return filtered_dict + + +class MetaKGQueryHandler(QueryHandler, MetaKGHandlerMixin): """ Support metakg queries with biolink model's semantic descendants @@ -725,7 +773,7 @@ async def get(self, *args, **kwargs): self.finish(res) -class MetaKGParserHandler(MetaKGHandlerMixin, BaseHandler): +class MetaKGParserHandler(QueryHandler, MetaKGHandlerMixin): """ Handles parsing of SmartAPI metadata from a given URL or request body. @@ -753,12 +801,12 @@ class MetaKGParserHandler(MetaKGHandlerMixin, BaseHandler): "max": 1000, "description": "URL of the SmartAPI metadata to parse" }, - "api_details": {"type": bool, "default": 0 }, - "bte": {"type": bool, "default": 0}, + "api_details": {"type": bool, "default": False}, + "bte": {"type": bool, "default": False}, }, "POST": { - "api_details": {"type": bool, "default": 0 }, - "bte": {"type": bool, "default": 0 }, + "api_details": {"type": bool, "default": False}, + "bte": {"type": bool, "default": False}, }, } @@ -783,18 +831,18 @@ def process_apis(self, apis): return apis async def get(self, *args, **kwargs): - url = self.get_argument("url", None) - if not self.get_argument("url", None): + url = self.args.url + if not url: raise HTTPError(400, reason="A url value is expected for the request, please provide a url.") # Set initial args and handle potential errors in query parameters parser = MetaKGParser() - try: - self.args.api_details = int(self.get_argument("api_details", 0)) - self.args.bte = int(self.get_argument("bte", 0)) - except ValueError as err: - raise HTTPError(400, reason=f"Invalid value for parameter: {str(err)}. Please enter integer, 0 or 1.") + # try: + # self.args.api_details = int(self.get_argument("api_details", 0)) + # self.args.bte = int(self.get_argument("bte", 0)) + # except ValueError as err: + # raise HTTPError(400, reason=f"Invalid value for parameter: {str(err)}. Please enter integer, 0 or 1.") try: trapi_data = parser.get_TRAPI_metadatas(data=None, url=url) @@ -831,11 +879,11 @@ async def get(self, *args, **kwargs): self.finish(response) async def post(self, *args, **kwargs): - if not self.request.body: + raw_body = self.request.body + if not raw_body: raise HTTPError(400, reason="Request body cannot be empty.") content_type = self.request.headers.get("Content-Type", "").lower() - raw_body = self.request.body # Try to parse the request body based on content type try: @@ -859,11 +907,11 @@ async def post(self, *args, **kwargs): raise ValueError("Invalid input data type. Please provide a valid JSON/YAML object.") # Extract query parameters (assuming these need to be parsed from the request) - try: - self.args.api_details = int(self.get_argument("api_details", 0)) - self.args.bte = int(self.get_argument("bte", 0)) - except ValueError as err: - raise HTTPError(400, reason=f"Invalid query parameter: {str(err)}") + # try: + # self.args.api_details = int(self.get_argument("api_details", 0)) + # self.args.bte = int(self.get_argument("bte", 0)) + # except ValueError as err: + # raise HTTPError(400, reason=f"Invalid query parameter: {str(err)}") # Process the parsed metadata parser = MetaKGParser() From 56af963bbbea91ce3925fd495ac8bc9c708a7ac7 Mon Sep 17 00:00:00 2001 From: Nichollette Date: Thu, 27 Mar 2025 13:22:36 -0400 Subject: [PATCH 24/30] code cleanup, whitespaces, etc. --- src/handlers/api.py | 58 ++++++--------------------------------------- 1 file changed, 7 insertions(+), 51 deletions(-) diff --git a/src/handlers/api.py b/src/handlers/api.py index 5ade874d..6e69f8fa 100644 --- a/src/handlers/api.py +++ b/src/handlers/api.py @@ -27,6 +27,7 @@ logger = logging.getLogger("smartAPI") + def github_authenticated(func): """ RegistryHandler Decorator @@ -44,6 +45,7 @@ def _(self, *args, **kwargs): class BaseHandler(BioThingsAuthnMixin, BaseAPIHandler): pass + class AuthHandler(BaseHandler): def set_cache_header(self, cache_value): # disabel cache for auth-related handlers @@ -418,7 +420,7 @@ def get_filtered_api(self, api_dict): filtered_api = api_info.copy() if not bte: filtered_api.pop("bte", None) - + # Handle case where "ui" key exists and ends with "None" if filtered_api.get('smartapi', {}).get("ui", "").endswith("/None"): filtered_api["smartapi"]["ui"] = None @@ -441,51 +443,6 @@ def get_filtered_api(self, api_dict): filtered_dict["bte"] = filtered_dict["api"].pop("bte") return filtered_dict - # # Default structure to preserve top-level keys - # filtered_dict = { - # key: api_dict.get(key) - # for key in ["subject", "object", "predicate", "subject_prefix", "object_prefix"] - # if key in api_dict - # } - - # # Determine filtered API structure based on `bte` and `api_details` - # if bte == 1 and api_details == 0: - # filtered_api = { - # **({"name": api_info.get("name")} if "name" in api_info else {}), - # **( - # {"smartapi": {"id": api_info.get("smartapi", {}).get("id", None)}} - # if "smartapi" in api_info - # else {"smartapi": {"id": None}} - # ), - # "bte": api_info.get("bte", {}), - # } - # elif api_details == 1: - # # Covers both (bte=0, api_details=1) and (bte=1, api_details=1) - # filtered_api = api_info.copy() - # if bte == 0: - # filtered_api.pop("bte", None) - # # Check if the "ui" key exists and ends with "None" - # if filtered_api['smartapi'].get("ui", "").endswith("/None"): - # filtered_api["smartapi"]["ui"] = None - # else: # bte == 0 and api_details == 0 - # filtered_api = { - # **({"name": api_info.get("name")} if "name" in api_info else {}), - # **( - # {"smartapi": {"id": api_info.get("smartapi", {}).get("id", None)}} - # if "smartapi" in api_info - # else {"smartapi": {"id": None}} - # ), - # } - - # # Add the filtered 'api' key to the preserved top-level structure - # filtered_dict["api"] = filtered_api - - # # Remove 'bte' from 'api' and move it to the top level - # if "bte" in filtered_dict["api"]: - # filtered_dict["bte"] = filtered_dict["api"].pop("bte") - - - # return filtered_dict class MetaKGQueryHandler(QueryHandler, MetaKGHandlerMixin): @@ -564,7 +521,6 @@ async def get(self, *args, **kwargs): value_list = get_expanded_values(value_list, self.biolink_model_toolkit) if expanded_fields[field] else value_list setattr(self.args, field, value_list) - await super().get(*args, **kwargs) def process_apis(self, apis): @@ -576,9 +532,9 @@ def process_apis(self, apis): elif isinstance(apis, dict): if 'bte' in apis: # update dict for new format - apis['api']['bte']=apis.pop('bte') + apis['api']['bte'] = apis.pop('bte') api_dict = apis["api"] - filtered_api= self.get_filtered_api(api_dict) + filtered_api = self.get_filtered_api(api_dict) apis["api"] = filtered_api def write(self, chunk): @@ -610,7 +566,7 @@ def write(self, chunk): if self.format == "html": # setup template - template_path = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'templates')) + template_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'templates')) loader = Loader(template_path) template = loader.load("cytoscape.html") # initial counts @@ -627,7 +583,7 @@ def write(self, chunk): graph_data = serializer.to_json(cdf.get_data()) # generate global template variable with graph data result = template.generate( - data= graph_data, + data=graph_data, response=serializer.to_json(chunk), shown=shown, available=available, From 8ea30ae3363f6506bc1fa4ec45b3589937e9cb59 Mon Sep 17 00:00:00 2001 From: Nichollette Date: Thu, 27 Mar 2025 15:34:49 -0400 Subject: [PATCH 25/30] cleaned up excess code --- src/handlers/api.py | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/src/handlers/api.py b/src/handlers/api.py index 6e69f8fa..5519c9ce 100644 --- a/src/handlers/api.py +++ b/src/handlers/api.py @@ -393,8 +393,8 @@ def get_filtered_api(self, api_dict): api_info = api_dict.get("api", api_dict) # Handle both formats # Default to False if not present - bte = getattr(self.args, "bte", False) - api_details = getattr(self.args, "api_details", False) + bte = self.args.bte # getattr(self.args, "bte", False) + api_details = self.args.api_details # getattr(self.args, "api_details", False) # Default structure to preserve top-level keys filtered_dict = { @@ -729,7 +729,7 @@ async def get(self, *args, **kwargs): self.finish(res) -class MetaKGParserHandler(QueryHandler, MetaKGHandlerMixin): +class MetaKGParserHandler(BaseHandler, MetaKGHandlerMixin): """ Handles parsing of SmartAPI metadata from a given URL or request body. @@ -794,12 +794,6 @@ async def get(self, *args, **kwargs): # Set initial args and handle potential errors in query parameters parser = MetaKGParser() - # try: - # self.args.api_details = int(self.get_argument("api_details", 0)) - # self.args.bte = int(self.get_argument("bte", 0)) - # except ValueError as err: - # raise HTTPError(400, reason=f"Invalid value for parameter: {str(err)}. Please enter integer, 0 or 1.") - try: trapi_data = parser.get_TRAPI_metadatas(data=None, url=url) except MetadataRetrievalError as retrieve_err: @@ -862,13 +856,6 @@ async def post(self, *args, **kwargs): if not isinstance(data, dict): raise ValueError("Invalid input data type. Please provide a valid JSON/YAML object.") - # Extract query parameters (assuming these need to be parsed from the request) - # try: - # self.args.api_details = int(self.get_argument("api_details", 0)) - # self.args.bte = int(self.get_argument("bte", 0)) - # except ValueError as err: - # raise HTTPError(400, reason=f"Invalid query parameter: {str(err)}") - # Process the parsed metadata parser = MetaKGParser() try: From 6316bf556e953a991f60b0169cb821a49222ef28 Mon Sep 17 00:00:00 2001 From: Nichollette Date: Tue, 8 Apr 2025 11:21:42 -0400 Subject: [PATCH 26/30] removed old code --- src/handlers/api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/handlers/api.py b/src/handlers/api.py index 5519c9ce..3a91316d 100644 --- a/src/handlers/api.py +++ b/src/handlers/api.py @@ -393,8 +393,8 @@ def get_filtered_api(self, api_dict): api_info = api_dict.get("api", api_dict) # Handle both formats # Default to False if not present - bte = self.args.bte # getattr(self.args, "bte", False) - api_details = self.args.api_details # getattr(self.args, "api_details", False) + bte = self.args.bte + api_details = self.args.api_details # Default structure to preserve top-level keys filtered_dict = { From 745e16aba8be17c2b222dea32960121ecb512eb3 Mon Sep 17 00:00:00 2001 From: Nichollette Date: Tue, 15 Apr 2025 15:52:57 -0400 Subject: [PATCH 27/30] removed not needed code --- src/handlers/api.py | 2 +- src/utils/metakg/parser.py | 7 +------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/handlers/api.py b/src/handlers/api.py index 3a91316d..c6209bf6 100644 --- a/src/handlers/api.py +++ b/src/handlers/api.py @@ -816,7 +816,7 @@ async def get(self, *args, **kwargs): combined_data[i] = self.get_filtered_api(api_dict) # Add url to metadata if api_details is set to 1 - if self.args.api_details == 1: + if self.args.api_details: for data_dict in combined_data: if "metadata" in data_dict["api"]["smartapi"] and data_dict["api"]["smartapi"]["metadata"] is None: data_dict["api"]["smartapi"]["metadata"] = url diff --git a/src/utils/metakg/parser.py b/src/utils/metakg/parser.py index 8763687a..e5e14956 100644 --- a/src/utils/metakg/parser.py +++ b/src/utils/metakg/parser.py @@ -1,7 +1,6 @@ import json import logging from copy import copy -from utils.downloader import DownloadError from utils.metakg.metakg_errors import MetadataRetrievalError import requests @@ -76,11 +75,7 @@ def get_TRAPI_with_metakg_endpoint(self, data=None, url=None): parser = API(smartapi_doc=data) if data else API(url=url) # Download the metadata - try: - metadata = parser.metadata - except DownloadError as dl_err: - raise dl_err - + metadata = parser.metadata _paths = metadata.get("paths", {}) _team = metadata.get("x-translator", {}).get("team") From bfbd6f06ffcdd37f8ad9df07637339af6f189af9 Mon Sep 17 00:00:00 2001 From: Nichollette Date: Wed, 16 Apr 2025 10:11:26 -0400 Subject: [PATCH 28/30] removed print statement --- src/utils/metakg/parser.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/utils/metakg/parser.py b/src/utils/metakg/parser.py index e5e14956..b05f6298 100644 --- a/src/utils/metakg/parser.py +++ b/src/utils/metakg/parser.py @@ -81,7 +81,6 @@ def get_TRAPI_with_metakg_endpoint(self, data=None, url=None): # Check for required TRAPI paths if "/meta_knowledge_graph" in _paths and "/query" in _paths and _team: - print("TRAPI metadata found.") return [metadata] else: return [] From 8f06ce71fa9fe6b9845a657d488609c7c2b3a009 Mon Sep 17 00:00:00 2001 From: Chunlei Wu Date: Fri, 2 May 2025 15:31:47 -0700 Subject: [PATCH 29/30] style: :art: minor coding style fixes --- src/handlers/api.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/handlers/api.py b/src/handlers/api.py index c6209bf6..b896814a 100644 --- a/src/handlers/api.py +++ b/src/handlers/api.py @@ -2,6 +2,7 @@ import json import logging import os + import bmt from biothings.utils import serializer from biothings.web.auth.authn import BioThingsAuthnMixin @@ -9,21 +10,21 @@ from biothings.web.handlers.query import BiothingHandler, capture_exceptions from biothings.web.settings.default import QUERY_KWARGS from tornado.httpclient import AsyncHTTPClient -from tornado.web import Finish, HTTPError from tornado.template import Loader +from tornado.web import Finish, HTTPError from controller import SmartAPI from controller.exceptions import ControllerError, NotFoundError from pipeline import MetaKGQueryPipeline +from utils.decoder import to_dict from utils.downloader import DownloadError, download_async +from utils.metakg.biolink_helpers import get_expanded_values +from utils.metakg.cytoscape_formatter import CytoscapeDataFormatter from utils.metakg.export import edges2graphml +from utils.metakg.metakg_errors import MetadataRetrievalError +from utils.metakg.parser import MetaKGParser from utils.metakg.path_finder import MetaKGPathFinder -from utils.metakg.cytoscape_formatter import CytoscapeDataFormatter -from utils.metakg.biolink_helpers import get_expanded_values from utils.notification import SlackNewAPIMessage, SlackNewTranslatorAPIMessage -from utils.metakg.parser import MetaKGParser -from utils.metakg.metakg_errors import MetadataRetrievalError -from utils.decoder import to_dict logger = logging.getLogger("smartAPI") @@ -627,7 +628,7 @@ class MetaKGPathFinderHandler(QueryHandler): "max": 6, "default": [], "enum": ["subject", "object", "predicate", "node", "edge", "all"] - } + } }, } @@ -722,9 +723,9 @@ async def get(self, *args, **kwargs): self.write(raw_query_output) return res = { - "total": len(paths_with_edges), - "paths": paths_with_edges, - } + "total": len(paths_with_edges), + "paths": paths_with_edges, + } await asyncio.sleep(0.01) self.finish(res) From 4bcd99ba291437635ac1f9493fe40c198287d1c4 Mon Sep 17 00:00:00 2001 From: Chunlei Wu Date: Mon, 5 May 2025 08:50:38 -0700 Subject: [PATCH 30/30] refactor: :recycle: simplify and refactor metakg parsing logics --- src/handlers/api.py | 130 ++++++++++++------------------ src/pipeline.py | 15 ++-- src/utils/metakg/metakg_errors.py | 16 ---- src/utils/metakg/parser.py | 65 ++++++++------- 4 files changed, 88 insertions(+), 138 deletions(-) delete mode 100644 src/utils/metakg/metakg_errors.py diff --git a/src/handlers/api.py b/src/handlers/api.py index b896814a..e075103c 100644 --- a/src/handlers/api.py +++ b/src/handlers/api.py @@ -16,12 +16,10 @@ from controller import SmartAPI from controller.exceptions import ControllerError, NotFoundError from pipeline import MetaKGQueryPipeline -from utils.decoder import to_dict from utils.downloader import DownloadError, download_async from utils.metakg.biolink_helpers import get_expanded_values from utils.metakg.cytoscape_formatter import CytoscapeDataFormatter from utils.metakg.export import edges2graphml -from utils.metakg.metakg_errors import MetadataRetrievalError from utils.metakg.parser import MetaKGParser from utils.metakg.path_finder import MetaKGPathFinder from utils.notification import SlackNewAPIMessage, SlackNewTranslatorAPIMessage @@ -751,6 +749,10 @@ class MetaKGParserHandler(BaseHandler, MetaKGHandlerMixin): """ kwargs = { + "*": { + "api_details": {"type": bool, "default": False}, + "bte": {"type": bool, "default": False}, + }, "GET": { "url": { "type": str, @@ -758,12 +760,6 @@ class MetaKGParserHandler(BaseHandler, MetaKGHandlerMixin): "max": 1000, "description": "URL of the SmartAPI metadata to parse" }, - "api_details": {"type": bool, "default": False}, - "bte": {"type": bool, "default": False}, - }, - "POST": { - "api_details": {"type": bool, "default": False}, - "bte": {"type": bool, "default": False}, }, } @@ -789,95 +785,69 @@ def process_apis(self, apis): async def get(self, *args, **kwargs): url = self.args.url - if not url: - raise HTTPError(400, reason="A url value is expected for the request, please provide a url.") - - # Set initial args and handle potential errors in query parameters parser = MetaKGParser() try: - trapi_data = parser.get_TRAPI_metadatas(data=None, url=url) - except MetadataRetrievalError as retrieve_err: - raise HTTPError(retrieve_err.status_code, reason=retrieve_err.message) - except DownloadError: - raise HTTPError(400, reason="There was an error downloading the data from the given input.") - - # Get non-TRAPI metadata - try: - nontrapi_data = parser.get_non_TRAPI_metadatas(data=None, url=url) - except MetadataRetrievalError as retrieve_err: - raise HTTPError(retrieve_err.status_code, reason=retrieve_err.message) + parsed_metakg = parser.get_metakg(url=url) except DownloadError: - raise HTTPError(400, reason="There was an error downloading the data from the given input.") + self.write_error(400, reason="There was an error downloading the data from the given url.") + except (ValueError, TypeError) as err: + self.write_error( + status_code=400, + reason="The data retrived from the given url is not a valid JSON or YAML object.", + message=str(err) + ) # Apply filtering -- if data found - combined_data = trapi_data + nontrapi_data - if combined_data: - for i, api_dict in enumerate(combined_data): - combined_data[i] = self.get_filtered_api(api_dict) + if parsed_metakg: + for i, api_dict in enumerate(parsed_metakg): + parsed_metakg[i] = self.get_filtered_api(api_dict) # Add url to metadata if api_details is set to 1 if self.args.api_details: - for data_dict in combined_data: + for data_dict in parsed_metakg: if "metadata" in data_dict["api"]["smartapi"] and data_dict["api"]["smartapi"]["metadata"] is None: data_dict["api"]["smartapi"]["metadata"] = url response = { - "total": len(combined_data), - "hits": combined_data, + "total": len(parsed_metakg), + "hits": parsed_metakg, } self.finish(response) async def post(self, *args, **kwargs): - raw_body = self.request.body - if not raw_body: - raise HTTPError(400, reason="Request body cannot be empty.") - content_type = self.request.headers.get("Content-Type", "").lower() + if content_type in ["application/json", "application/x-yaml"]: + # if content type is set properly, it should have alrady been parsed + metadata_from_body = self.args_json or self.args_yaml + elif self.request.body: + # if request body is provided but no proper content type is set + # we will parse it as YAML anyway + metadata_from_body = self._parse_yaml() + else: + metadata_from_body = None + + if metadata_from_body: + # Process the parsed metadata + parser = MetaKGParser() + parsed_metakg = parser.get_metakg(metadata_from_body) + + # Apply filtering to the combined data + if parsed_metakg: + for i, api_dict in enumerate(parsed_metakg): + parsed_metakg[i] = self.get_filtered_api(api_dict) + + # Send the response back to the client + response = { + "total": len(parsed_metakg), + "hits": parsed_metakg, + } - # Try to parse the request body based on content type - try: - if content_type == "application/json": - data = to_dict(raw_body, ctype="application/json") - elif content_type == "application/x-yaml": - data = to_dict(raw_body, ctype="application/x-yaml") - else: - # Default to YAML parsing if the content type is unknown or not specified - data = to_dict(raw_body) - except ValueError as val_err: - if 'mapping values are not allowed here' in str(val_err): - raise HTTPError(400, reason="Formatting issue, please consider using --data-binary to maintain YAML format.") - else: - raise HTTPError(400, reason="Invalid value, please provide a valid YAML object.") - except TypeError: - raise HTTPError(400, reason="Invalid type, provide valid type metadata.") - - # Ensure the parsed data is a dictionary - if not isinstance(data, dict): - raise ValueError("Invalid input data type. Please provide a valid JSON/YAML object.") - - # Process the parsed metadata - parser = MetaKGParser() - try: - trapi_data = parser.get_TRAPI_metadatas(data=data) - nontrapi_data = parser.get_non_TRAPI_metadatas(data=data) - except MetadataRetrievalError as retrieve_err: - raise HTTPError(retrieve_err.status_code, reason=retrieve_err.message) - except DownloadError: - raise HTTPError(400, reason="Error downloading the data from the provided input.") - - combined_data = trapi_data + nontrapi_data - - # Apply filtering to the combined data - if combined_data: - for i, api_dict in enumerate(combined_data): - combined_data[i] = self.get_filtered_api(api_dict) - - # Send the response back to the client - response = { - "total": len(combined_data), - "hits": combined_data, - } - - self.finish(response) + self.finish(response) + else: + self.write_error( + status_code=400, + reason="Request body cannot be empty.", + message="Please provide a valid JSON/YAML object in the request body." + ) diff --git a/src/pipeline.py b/src/pipeline.py index 748a6be8..d1122574 100644 --- a/src/pipeline.py +++ b/src/pipeline.py @@ -2,14 +2,10 @@ from enum import Enum from typing import Dict, OrderedDict -from biothings.web.query import ( - AsyncESQueryBackend, - AsyncESQueryPipeline, - ESQueryBuilder, - ESResultFormatter, -) -from controller.base import OpenAPI, Swagger +from biothings.web.query import AsyncESQueryBackend, AsyncESQueryPipeline, ESQueryBuilder, ESResultFormatter from elasticsearch_dsl import Q, Search + +from controller.base import OpenAPI, Swagger from utils import decoder @@ -219,8 +215,8 @@ def apply_extras(self, search, options): apply extra filters """ # if not options._source: - # by default exclude api.bte or bte field, but can be included by specifying in the fields parameter - # options._source = ["-api.bte", "-bte"] + # by default exclude api.bte or bte field, but can be included by specifying in the fields parameter + # options._source = ["-api.bte", "-bte"] search = super().apply_extras(search, options) # apply extra filters from query parameters @@ -262,6 +258,7 @@ def adjust_index(self, original_index: str, query: str, **options: Dict) -> str: query_index = self.indices.get("metakg", None) return query_index + class MetaKGQueryPipeline(AsyncESQueryPipeline): def __init__(self, *args, **kwargs): # ns is an instance of BiothingsNamespace diff --git a/src/utils/metakg/metakg_errors.py b/src/utils/metakg/metakg_errors.py deleted file mode 100644 index 91db45c5..00000000 --- a/src/utils/metakg/metakg_errors.py +++ /dev/null @@ -1,16 +0,0 @@ -class MetadataRetrievalError(Exception): - """Custom exception for metadata retrieval failures.""" - - def __init__(self, status_code, message): - self.status_code = status_code - self.message = message - super().__init__(f"MetadataRetrievalError {status_code}: {message}") - - def to_dict(self): - """Return error details in JSON-like dictionary format.""" - return { - "code": self.status_code, - "success": False, - "error": "Metadata Retrieval Error", - "details": str(self) - } \ No newline at end of file diff --git a/src/utils/metakg/parser.py b/src/utils/metakg/parser.py index b05f6298..7dce2f7b 100644 --- a/src/utils/metakg/parser.py +++ b/src/utils/metakg/parser.py @@ -1,7 +1,8 @@ import json import logging from copy import copy -from utils.metakg.metakg_errors import MetadataRetrievalError +from typing import Dict, List, Optional, Union + import requests from .api import API @@ -13,46 +14,49 @@ class MetaKGParser: get_url_timeout = 60 metakg_errors = None - def get_non_TRAPI_metadatas(self, data=None, extra_data=None, url=None): + def get_metakg(self, + data: Optional[Union[Dict, API]] = None, + extra_data: Optional[Dict] = None, + url: Optional[str] = None) -> List[Dict]: """ - Extract MetaKG edges from a SmartAPI document provided as `data` or fetched from a `url`. - Raises an error if no valid input is given, or if parser fails to parse the document. + Extract and process metadata from a SmartAPI document or URL. + Returns MetaKG edges or propagates errors. """ if not data and not url: - raise MetadataRetrievalError(400, "Either data or url value is expected for this request, please provide data or a url.") + raise ValueError("Either data or url value is expected for this request, please provide data or a url.") + # if both data and url are provided, prefer data if data: - parser = API(smartapi_doc=data) + _api = data if isinstance(data, API) else API(data) elif url: - parser = API(url=url) + _api = API(url=url) + + if _api.is_trapi: + return self.get_TRAPI_metadatas(data=_api, extra_data=extra_data) else: - raise MetadataRetrievalError(404, "No metadata available from provided data or url.") + return self.get_non_TRAPI_metadatas(data=_api, extra_data=extra_data) - mkg = self.extract_metakgedges(parser.metadata["operations"], extra_data=extra_data) + def get_non_TRAPI_metadatas(self, data: Union[Dict, API], extra_data: Optional[Dict] = None) -> List[Dict]: + """ + Extract MetaKG edges from a SmartAPI document provided as `data` or fetched from a `url`. + Raises an error if no valid input is given, or if parser fails to parse the document. + """ + _api = data if isinstance(data, API) else API(data) + mkg = self.extract_metakgedges(_api.metadata["operations"], extra_data=extra_data) no_nodes = len({x["subject"] for x in mkg} | {x["object"] for x in mkg}) no_edges = len({x["predicate"] for x in mkg}) logger.info("Done [%s nodes, %s edges]", no_nodes, no_edges) return mkg - def get_TRAPI_metadatas(self, data=None, extra_data=None, url=None): + def get_TRAPI_metadatas(self, data: Union[Dict, API], extra_data: Optional[Dict] = None) -> List[Dict]: """ Extract and process TRAPI metadata from a SmartAPI document or URL. Returns MetaKG edges or propagates errors. """ - if not data and not url: - raise MetadataRetrievalError(400, "Either data or url value is expected for this request, please provide data or a url.") - - try: - if data: - metadata_list = self.get_TRAPI_with_metakg_endpoint(data=data) - else: - metadata_list = self.get_TRAPI_with_metakg_endpoint(url=url) - except MetadataRetrievalError: - raise MetadataRetrievalError(404, "No metadata available from provided data or url.") - + ops = [] + metadata_list = self.get_TRAPI_with_metakg_endpoint(data) count_metadata_list = len(metadata_list) self.metakg_errors = {} - ops = [] for i, metadata in enumerate(metadata_list): ops.extend(self.get_ops_from_metakg_endpoint(metadata, f"[{i + 1}/{count_metadata_list}]")) @@ -63,27 +67,22 @@ def get_TRAPI_metadatas(self, data=None, extra_data=None, url=None): return self.extract_metakgedges(ops, extra_data=extra_data) - def get_TRAPI_with_metakg_endpoint(self, data=None, url=None): + def get_TRAPI_with_metakg_endpoint(self, data: Union[Dict, API]): """ Retrieve TRAPI metadata from a SmartAPI document or URL. Returns metadata if TRAPI endpoints are found, else an empty list. """ - if not data and not url: - raise MetadataRetrievalError(400, "Either data or url value is expected for this request, please provide data or a url.") + metadatas = [] + _api = data if isinstance(data, API) else API(data) - # Initialize API with either data or URL - parser = API(smartapi_doc=data) if data else API(url=url) - - # Download the metadata - metadata = parser.metadata + metadata = _api.metadata _paths = metadata.get("paths", {}) _team = metadata.get("x-translator", {}).get("team") # Check for required TRAPI paths if "/meta_knowledge_graph" in _paths and "/query" in _paths and _team: - return [metadata] - else: - return [] + metadatas.append(metadata) + return metadatas def construct_query_url(self, server_url): if server_url.endswith("/"):