diff --git a/sdrf_pipelines/sdrf/sdrf_schema.py b/sdrf_pipelines/sdrf/sdrf_schema.py index 4ac8be85..2c3cf8bf 100644 --- a/sdrf_pipelines/sdrf/sdrf_schema.py +++ b/sdrf_pipelines/sdrf/sdrf_schema.py @@ -56,7 +56,12 @@ def ontology_term_parser(cell_value: str = None): else: for name in values: value_terms = name.split("=") - term[value_terms[0].strip().upper()] = value_terms[1].strip().lower() + if len(value_terms) == 2: + term[value_terms[0].strip().upper()] = value_terms[1].strip().lower() + else: + raise ValueError( + f"Invalid term: {name} after splitting by '=', please check the prefix (e.g. AC, NT, " f"TA..)" + ) return term @@ -109,8 +114,8 @@ def default_message(self): @staticmethod def validate_ontology_terms(cell_value, labels): """ - Check if a cell value is in a list of labels or list of strings - :param cell_value: string line in cell + Check if a cell value is in a list of labels or list of string + :param cell_value: line in a cell :param labels: list of labels :return: """ @@ -124,7 +129,7 @@ def validate(self, series: pd.Series) -> pd.Series: """ Validate if the term is present in the provided ontology. This method looks in the provided ontology _ontology_name - :param series: return the series that do not match the criteria + :param series: return series that do not match the criteria :return: """ terms = [ontology_term_parser(x) for x in series.unique()] @@ -165,7 +170,7 @@ def __new__(cls, ordered: bool = False, min_columns: int = 0) -> Any: def validate(self, panda_sdrf: sdrf = None) -> typing.List[LogicError]: errors = [] - # Check minimum number of columns + # Check the minimum number of columns if check_minimum_columns(panda_sdrf, self._min_columns): error_message = ( "The number of columns in the SDRF ({}) is smaller than the number of mandatory fields ({})".format( @@ -179,7 +184,7 @@ def validate(self, panda_sdrf: sdrf = None) -> typing.List[LogicError]: if error_mandatory is not None: errors.append(error_mandatory) - # Check the columns order + # Check the column order error_columns_order = self.validate_columns_order(panda_sdrf) if error_columns_order is not None: errors.extend(error_columns_order) diff --git a/sdrf_pipelines/zooma/ols.py b/sdrf_pipelines/zooma/ols.py index ec2197d6..c06084ab 100644 --- a/sdrf_pipelines/zooma/ols.py +++ b/sdrf_pipelines/zooma/ols.py @@ -16,7 +16,7 @@ import requests -OLS = "https://www.ebi.ac.uk/ols" +OLS = "https://www.ebi.ac.uk/ols4" __all__ = ["OlsClient"] @@ -45,7 +45,7 @@ def _concat_str_or_list(input_str): def _dparse(iri): """ Double url encode the IRI, which is required - @:param iri IRI in the OLS + @:param iri in the OLS """ return urllib.parse.quote_plus(urllib.parse.quote_plus(iri)) @@ -70,7 +70,7 @@ def __init__(self, ols_base=None, ontology=None, field_list=None, query_fields=N def besthit(self, name, **kwargs): """ - select first element of the /search API response + select a first element of the /search API response """ search_resp = self.search(name, **kwargs) if search_resp: @@ -82,8 +82,8 @@ def get_term(self, ontology, iri): """ Gets the data for a given term Args: - ontology: The name of the ontology - iri: The IRI of a term + ontology: The name of the ontology + iri: The IRI of a term """ url = self.ontology_term.format(ontology=ontology, iri=_dparse(iri)) @@ -117,7 +117,7 @@ def search( """ Searches the OLS with the given term - @:param query_fields: By default the search is performed over term labels, + @:param query_fields: By default, the search is performed over term labels, synonyms, descriptions, identifiers and annotation properties. This option allows to specify the fields to query, the defaults are `{label, synonym, description, short_form, obo_id, annotations, logical_description, iri}` @@ -196,7 +196,7 @@ def select(self, name, ontology=None, field_list=None): """Select terms, Tuned specifically to support applications such as autocomplete. - .. seealso:: https://www.ebi.ac.uk/ols/docs/api#_select + .. see also:: https://www.ebi.ac.uk/ols4/docs/api#_select """ params = {"q": name} if ontology: