diff --git a/.docker/docker_config.json b/.docker/docker_config.json index 6b0a8de35..4b5957ccb 100644 --- a/.docker/docker_config.json +++ b/.docker/docker_config.json @@ -1,5 +1,6 @@ { "debug": true, + "page_limit": 5, "default_db": "test_server", "base_url": "http://gh_actions_host:3213", "implementation": { diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8a8c298c5..7c62a743b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -135,7 +135,7 @@ jobs: services: mongo: - image: mongo:6 + image: mongo:7 ports: - 27017:27017 postgres: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d55744b2f..7e09132cd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ default_language_version: repos: - repo: https://github.com/ambv/black - rev: 23.7.0 + rev: 23.9.1 hooks: - id: black name: Blacken @@ -24,7 +24,7 @@ repos: args: [--markdown-linebreak-ext=md] - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: 'v0.0.286' + rev: 'v0.0.291' hooks: - id: ruff args: [--fix] diff --git a/CHANGELOG.md b/CHANGELOG.md index b3e46d5bf..b8a54481f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,16 @@ # Changelog +## [v0.25.3](https://github.com/Materials-Consortia/optimade-python-tools/tree/v0.25.3) (2023-09-29) + +[Full Changelog](https://github.com/Materials-Consortia/optimade-python-tools/compare/v0.25.2...v0.25.3) + +**Merged pull requests:** + +- Add mp-api and emmet pins to avoid premature pydantic v2 upgrade [\#1795](https://github.com/Materials-Consortia/optimade-python-tools/pull/1795) ([ml-evs](https://github.com/ml-evs)) +- Debugging `links->next` issues [\#1794](https://github.com/Materials-Consortia/optimade-python-tools/pull/1794) ([ml-evs](https://github.com/ml-evs)) +- Add option for custom landing pages [\#1788](https://github.com/Materials-Consortia/optimade-python-tools/pull/1788) ([ml-evs](https://github.com/ml-evs)) +- Add config option `mongo_count_timeout` to skip the global count per request [\#1757](https://github.com/Materials-Consortia/optimade-python-tools/pull/1757) ([ml-evs](https://github.com/ml-evs)) + ## [v0.25.2](https://github.com/Materials-Consortia/optimade-python-tools/tree/v0.25.2) (2023-09-12) [Full Changelog](https://github.com/Materials-Consortia/optimade-python-tools/compare/v0.25.1...v0.25.2) diff --git a/docs/static/default_config.json b/docs/static/default_config.json index fe84f4f1d..ba510dc4f 100644 --- a/docs/static/default_config.json +++ b/docs/static/default_config.json @@ -12,7 +12,7 @@ "base_url": null, "implementation": { "name": "OPTIMADE Python Tools", - "version": "0.25.2", + "version": "0.25.3", "source_url": "https://github.com/Materials-Consortia/optimade-python-tools", "maintainer": {"email": "dev@optimade.org"} }, diff --git a/openapi/index_openapi.json b/openapi/index_openapi.json index 3517374b7..fd2b34fd3 100644 --- a/openapi/index_openapi.json +++ b/openapi/index_openapi.json @@ -2,7 +2,7 @@ "openapi": "3.1.0", "info": { "title": "OPTIMADE API - Index meta-database", - "description": "The [Open Databases Integration for Materials Design (OPTIMADE) consortium](https://www.optimade.org/) aims to make materials databases interoperational by developing a common REST API.\nThis is the \"special\" index meta-database.\n\nThis specification is generated using [`optimade-python-tools`](https://github.com/Materials-Consortia/optimade-python-tools/tree/v0.25.2) v0.25.2.", + "description": "The [Open Databases Integration for Materials Design (OPTIMADE) consortium](https://www.optimade.org/) aims to make materials databases interoperational by developing a common REST API.\nThis is the \"special\" index meta-database.\n\nThis specification is generated using [`optimade-python-tools`](https://github.com/Materials-Consortia/optimade-python-tools/tree/v0.25.3) v0.25.3.", "version": "1.1.0" }, "paths": { diff --git a/openapi/openapi.json b/openapi/openapi.json index 376a81909..985bc5e77 100644 --- a/openapi/openapi.json +++ b/openapi/openapi.json @@ -2,7 +2,7 @@ "openapi": "3.1.0", "info": { "title": "OPTIMADE API", - "description": "The [Open Databases Integration for Materials Design (OPTIMADE) consortium](https://www.optimade.org/) aims to make materials databases interoperational by developing a common REST API.\n\nThis specification is generated using [`optimade-python-tools`](https://github.com/Materials-Consortia/optimade-python-tools/tree/v0.25.2) v0.25.2.", + "description": "The [Open Databases Integration for Materials Design (OPTIMADE) consortium](https://www.optimade.org/) aims to make materials databases interoperational by developing a common REST API.\n\nThis specification is generated using [`optimade-python-tools`](https://github.com/Materials-Consortia/optimade-python-tools/tree/v0.25.3) v0.25.3.", "version": "1.1.0" }, "paths": { diff --git a/optimade/__init__.py b/optimade/__init__.py index d1775177d..f9e0f3d47 100644 --- a/optimade/__init__.py +++ b/optimade/__init__.py @@ -1,2 +1,2 @@ -__version__ = "0.25.2" +__version__ = "0.25.3" __api_version__ = "1.1.0" diff --git a/optimade/server/config.py b/optimade/server/config.py index bd090bb95..78a33fee2 100644 --- a/optimade/server/config.py +++ b/optimade/server/config.py @@ -157,6 +157,12 @@ class ServerConfig(BaseSettings): None, description="Host settings to pass through to the `Elasticsearch` class." ) + mongo_count_timeout: int = Field( + 5, + description="""Number of seconds to allow MongoDB to perform a full database count before falling back to `null`. +This operation can require a full COLLSCAN for empty queries which can be prohibitively slow if the database does not fit into the active set, hence a timeout can drastically speed-up response times.""", + ) + mongo_database: str = Field( "optimade", description="Mongo database for collection data" ) @@ -273,6 +279,11 @@ class ServerConfig(BaseSettings): ), ) + custom_landing_page: Optional[Union[str, Path]] = Field( + None, + description="The location of a custom landing page (Jinja template) to use for the API.", + ) + index_schema_url: Optional[Union[str, AnyHttpUrl]] = Field( f"https://schemas.optimade.org/openapi/v{__api_version__}/optimade_index.json", description=( diff --git a/optimade/server/entry_collections/entry_collections.py b/optimade/server/entry_collections/entry_collections.py index 6153216ad..d59bb8713 100644 --- a/optimade/server/entry_collections/entry_collections.py +++ b/optimade/server/entry_collections/entry_collections.py @@ -2,7 +2,7 @@ import re import warnings from abc import ABC, abstractmethod -from typing import Any, Dict, Iterable, List, Set, Tuple, Type, Union +from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Type, Union from lark import Transformer @@ -126,7 +126,7 @@ def insert(self, data: List[EntryResource]) -> None: """ @abstractmethod - def count(self, **kwargs: Any) -> int: + def count(self, **kwargs: Any) -> Union[int, None]: """Returns the number of entries matching the query specified by the keyword arguments. @@ -137,7 +137,7 @@ def count(self, **kwargs: Any) -> int: def find( self, params: Union[EntryListingQueryParams, SingleEntryQueryParams] - ) -> Tuple[Union[None, Dict, List[Dict]], int, bool, Set[str], Set[str],]: + ) -> Tuple[Union[None, Dict, List[Dict]], Optional[int], bool, Set[str], Set[str],]: """ Fetches results and indicates if more data is available. @@ -203,7 +203,11 @@ def find( if single_entry: results = results[0] # type: ignore[assignment] - if CONFIG.validate_api_response and data_returned > 1: + if ( + CONFIG.validate_api_response + and data_returned is not None + and data_returned > 1 + ): raise NotFound( detail=f"Instead of a single entry, {data_returned} entries were found", ) @@ -221,7 +225,7 @@ def find( @abstractmethod def _run_db_query( self, criteria: Dict[str, Any], single_entry: bool = False - ) -> Tuple[List[Dict[str, Any]], int, bool]: + ) -> Tuple[List[Dict[str, Any]], Optional[int], bool]: """Run the query on the backend and collect the results. Arguments: diff --git a/optimade/server/entry_collections/mongo.py b/optimade/server/entry_collections/mongo.py index 03a63bcce..b7bd20f9b 100644 --- a/optimade/server/entry_collections/mongo.py +++ b/optimade/server/entry_collections/mongo.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Tuple, Type, Union +from typing import Any, Dict, List, Optional, Tuple, Type, Union from optimade.filtertransformers.mongo import MongoTransformer from optimade.models import EntryResource @@ -10,6 +10,7 @@ if CONFIG.database_backend.value == "mongodb": from pymongo import MongoClient, version_tuple + from pymongo.errors import ExecutionTimeout if version_tuple[0] < 4: LOGGER.warning( @@ -67,9 +68,9 @@ def __len__(self) -> int: """Returns the total number of entries in the collection.""" return self.collection.estimated_document_count() - def count(self, **kwargs: Any) -> int: + def count(self, **kwargs: Any) -> Union[int, None]: """Returns the number of entries matching the query specified - by the keyword arguments. + by the keyword arguments, or `None` if the count timed out. Parameters: **kwargs: Query parameters as keyword arguments. The keys @@ -80,9 +81,15 @@ def count(self, **kwargs: Any) -> int: for k in list(kwargs.keys()): if k not in ("filter", "skip", "limit", "hint", "maxTimeMS"): del kwargs[k] - if "filter" not in kwargs: # "filter" is needed for count_documents() - kwargs["filter"] = {} - return self.collection.count_documents(**kwargs) + if "filter" not in kwargs: + return self.collection.estimated_document_count() + else: + if "maxTimeMS" not in kwargs: + kwargs["maxTimeMS"] = 1000 * CONFIG.mongo_count_timeout + try: + return self.collection.count_documents(**kwargs) + except ExecutionTimeout: + return None def insert(self, data: List[EntryResource]) -> None: """Add the given entries to the underlying database. @@ -136,7 +143,7 @@ def handle_query_params( def _run_db_query( self, criteria: Dict[str, Any], single_entry: bool = False - ) -> Tuple[List[Dict[str, Any]], int, bool]: + ) -> Tuple[List[Dict[str, Any]], Optional[int], bool]: """Run the query on the backend and collect the results. Arguments: @@ -163,7 +170,12 @@ def _run_db_query( criteria_nolimit.pop("limit", None) skip = criteria_nolimit.pop("skip", 0) data_returned = self.count(**criteria_nolimit) - more_data_available = nresults_now + skip < data_returned + # Only correct most of the time: if the total number of remaining results is exactly the page limit + # then this will incorrectly say there is more_data_available + if data_returned is None: + more_data_available = nresults_now == criteria.get("limit", 0) + else: + more_data_available = nresults_now + skip < data_returned else: # SingleEntryQueryParams, e.g., /structures/{entry_id} data_returned = nresults_now diff --git a/optimade/server/routers/landing.py b/optimade/server/routers/landing.py index 151fa85b2..3b51f2a1c 100644 --- a/optimade/server/routers/landing.py +++ b/optimade/server/routers/landing.py @@ -30,9 +30,11 @@ def render_landing_page(url: str) -> HTMLResponse: major_version = __api_version__.split(".")[0] versioned_url = f"{get_base_url(url)}/v{major_version}/" - template_dir = Path(__file__).parent.joinpath("static").resolve() - - html = (template_dir / "landing_page.html").read_text() + if CONFIG.custom_landing_page: + html = Path(CONFIG.custom_landing_page).resolve().read_text() + else: + template_dir = Path(__file__).parent.joinpath("static").resolve() + html = (template_dir / "landing_page.html").read_text() # Build a dictionary that maps the old Jinja keys to the new simplified replacements replacements = { diff --git a/optimade/server/routers/utils.py b/optimade/server/routers/utils.py index 72e92fcf7..cecf50658 100644 --- a/optimade/server/routers/utils.py +++ b/optimade/server/routers/utils.py @@ -55,7 +55,7 @@ class JSONAPIResponse(JSONResponse): def meta_values( url: Union[urllib.parse.ParseResult, urllib.parse.SplitResult, StarletteURL, str], - data_returned: int, + data_returned: Optional[int], data_available: int, more_data_available: bool, schema: Optional[str] = None, diff --git a/optimade/validator/validator.py b/optimade/validator/validator.py index 27bd29232..82f4b583a 100644 --- a/optimade/validator/validator.py +++ b/optimade/validator/validator.py @@ -1317,6 +1317,10 @@ def _test_page_limit( next_link = response_json["links"]["next"] if isinstance(next_link, dict): next_link = next_link["href"] + if not next_link: + raise ResponseError( + "Endpoint suggested more data was available but provided no valid links->next link." + ) except KeyError: raise ResponseError( "Endpoint suggested more data was available but provided no valid links->next link." diff --git a/pyproject.toml b/pyproject.toml index 8fe13c214..1cbfffc3a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,7 +81,14 @@ http_client = [ ase = ["ase~=3.22"] cif = ["numpy>=1.20"] -pymatgen = ["pymatgen>=2022"] +# we don't support pydantic v2 yet, but pymatgen has a (uncessary IMO) tight coupling to mp-api which now enforces it +# as "true" users of pydantic, if we can't update our stuff to v2 (including all the fieldinfo hacks) then we will +# probably just have to hard pin or remove pymatgen support +pymatgen = [ + "pymatgen>=2022", + "mp-api<=0.36", + "emmet-core<=0.68" +] jarvis = ["jarvis-tools>=2023.1.8"] client = ["optimade[cif]"] diff --git a/requirements-client.txt b/requirements-client.txt index 93b8452b7..2c0b7f4f4 100644 --- a/requirements-client.txt +++ b/requirements-client.txt @@ -1,5 +1,8 @@ aiida-core==2.4.0 ase==3.22.1 -jarvis-tools==2023.8.10 +emmet_core==0.68.0 +jarvis-tools==2023.9.20 +jarvis-tools==2023.9.20 +mp-api==0.36.1 numpy>=1.20 pymatgen==2023.9.10 diff --git a/requirements-dev.txt b/requirements-dev.txt index 5de285d8f..33423d46c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -9,5 +9,5 @@ pre-commit==3.4.0 pylint==2.17.5 pytest==7.4.2 pytest-cov==4.1.0 -ruff==0.0.287 +ruff==0.0.291 types-all==1.0.0 diff --git a/requirements-docs.txt b/requirements-docs.txt index 34d338613..e8af19873 100644 --- a/requirements-docs.txt +++ b/requirements-docs.txt @@ -1,5 +1,5 @@ mike==1.1.2 -mkdocs==1.5.2 +mkdocs==1.5.3 mkdocs-awesome-pages-plugin==2.9.2 -mkdocs-material==9.2.8 +mkdocs-material==9.4.2 mkdocstrings[python-legacy]==0.23.0 diff --git a/requirements-http-client.txt b/requirements-http-client.txt index a53f8ddc8..9e731df04 100644 --- a/requirements-http-client.txt +++ b/requirements-http-client.txt @@ -1,3 +1,3 @@ click==8.1.7 -httpx==0.24.1 -rich==13.5.2 +httpx==0.25.0 +rich==13.5.3 diff --git a/tests/adapters/structures/utils.py b/tests/adapters/structures/utils.py index 232b80495..b11df3182 100644 --- a/tests/adapters/structures/utils.py +++ b/tests/adapters/structures/utils.py @@ -9,6 +9,6 @@ def get_min_ver(dependency: str) -> str: for line in setup_file.readlines(): min_ver = re.findall(rf'"{dependency}((=|!|<|>|~)=|>|<)(.+)"', line) if min_ver: - return min_ver[0][2].split(";")[0].split(",")[0] + return min_ver[0][2].split(";")[0].split(",")[0].strip('"') else: raise RuntimeError(f"Cannot find {dependency} dependency in pyproject.toml")