Skip to content

Commit

Permalink
Merge pull request #971 from rkingsbury/np20
Browse files Browse the repository at this point in the history
pre-commit updates and Numpy 2.0 support
  • Loading branch information
rkingsbury authored Jun 20, 2024
2 parents 94f5bcf + c450ad5 commit 84f864e
Show file tree
Hide file tree
Showing 43 changed files with 55 additions and 86 deletions.
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,26 +10,26 @@ ci:

repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.0.280
rev: v0.4.10
hooks:
- id: ruff
args: [--fix, --show-fixes, --ignore, D, --extend-select, D411]

- repo: https://github.com/psf/black
rev: 23.7.0
rev: 24.4.2
hooks:
- id: black

- repo: https://github.com/codespell-project/codespell
rev: v2.2.5
rev: v2.3.0
hooks:
- id: codespell
stages: [commit, commit-msg]
exclude_types: [html]
additional_dependencies: [tomli] # needed to read pyproject.toml below py3.11

- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.6.0
hooks:
- id: check-case-conflict
- id: check-symlinks
Expand Down
2 changes: 1 addition & 1 deletion docs/getting_started/simple_builder.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ One advantage of using the generator approach is it is less memory intensive tha

`process_item` just has to do the parallelizable work on each item. Since the item is whatever comes out of `get_items`, you know exactly what it should be. It may be a single document, a list of documents, a mapping, a set, etc.

Our simple process item just has to multiply one field by `self.mulitplier`:
Our simple process item just has to multiply one field by `self.multiplier`:

``` python

Expand Down
14 changes: 8 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,17 @@ dependencies = [
"pydash>=4.1.0",
"jsonschema>=3.1.1",
"tqdm>=4.19.6",
"pandas>=2.1.4",
"jsonlines>=4.0.0",
"aioitertools>=0.5.1",
"numpy>=1.17.3",
"fastapi>=0.42.0",
"pyzmq>=24.0.1",
"dnspython>=1.16.0",
"sshtunnel>=0.1.5",
"msgpack>=0.5.6",
"orjson>=3.9.0",
"boto3>=1.20.41",
"python-dateutil>=2.8.2",
"uvicorn>=0.18.3",
]

[project.urls]
Expand All @@ -63,7 +63,7 @@ montydb = ["montydb>=2.3.12"]
mongogrant = ["mongogrant>=0.3.1"]
notebook_runner = ["IPython>=8.11", "nbformat>=5.0", "regex>=2020.6"]
azure = ["azure-storage-blob>=12.16.0", "azure-identity>=1.12.0"]
open_data = ["pandas>=2.1.4", "jsonlines>=4.0.0"]
api = ["fastapi>=0.42.0","uvicorn>=0.18.3"]
testing = [
"pytest",
"pytest-cov",
Expand Down Expand Up @@ -102,6 +102,9 @@ line-length = 120

[tool.ruff]
line-length = 120
src = ["src"]

[tool.ruff.lint]
select = [
"B", # flake8-bugbear
"C4", # flake8-comprehensions
Expand Down Expand Up @@ -132,6 +135,7 @@ select = [
"UP", # pyupgrade
"W", # pycodestyle warning
"YTT", # flake8-2020
"NPY201", # numpy 2.0
]
ignore = [
"B023", # Function definition does not bind loop variable
Expand All @@ -155,10 +159,8 @@ ignore = [
]
pydocstyle.convention = "google"
isort.split-on-trailing-comma = false
src = ["src"]


[tool.ruff.per-file-ignores]
[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401"]
"tasks.py" = ["D"]
"tests/*" = ["D"]
Expand Down
1 change: 1 addition & 0 deletions src/maggma/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Primary Maggma module."""

from importlib.metadata import PackageNotFoundError, version

try:
Expand Down
1 change: 0 additions & 1 deletion src/maggma/api/API.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ def app(self):
@app.head("/heartbeat", include_in_schema=False)
def heartbeat():
"""API Heartbeat for Load Balancing."""

return {
"status": "OK",
"time": datetime.utcnow(),
Expand Down
2 changes: 1 addition & 1 deletion src/maggma/api/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
""" Simple API Interface for Maggma. """
"""Simple API Interface for Maggma."""
2 changes: 0 additions & 2 deletions src/maggma/api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@


class Meta(BaseModel):

"""
Meta information for the MAPI Response.
"""
Expand Down Expand Up @@ -76,7 +75,6 @@ def default_meta(cls, v, values):


class S3URLDoc(BaseModel):

"""
S3 pre-signed URL data returned by the S3 URL resource.
"""
Expand Down
8 changes: 3 additions & 5 deletions src/maggma/api/query_operator/dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def query(**kwargs) -> STORE_PARAMS:
self.query = query # type: ignore

def query(self):
"Stub query function for abstract class."
"""Stub query function for abstract class."""

@abstractmethod
def field_to_operator(self, name: str, field: FieldInfo) -> list[tuple[str, Any, Query, Callable[..., dict]]]:
Expand Down Expand Up @@ -105,7 +105,7 @@ def as_dict(self) -> dict:


class NumericQuery(DynamicQueryOperator):
"Query Operator to enable searching on numeric fields."
"""Query Operator to enable searching on numeric fields."""

def field_to_operator(self, name: str, field: FieldInfo) -> list[tuple[str, Any, Query, Callable[..., dict]]]:
"""
Expand All @@ -115,7 +115,6 @@ def field_to_operator(self, name: str, field: FieldInfo) -> list[tuple[str, Any,
Query object,
and callable to convert it into a query dict.
"""

ops = []
field_type = field.annotation

Expand Down Expand Up @@ -190,7 +189,7 @@ def field_to_operator(self, name: str, field: FieldInfo) -> list[tuple[str, Any,


class StringQueryOperator(DynamicQueryOperator):
"Query Operator to enable searching on numeric fields."
"""Query Operator to enable searching on numeric fields."""

def field_to_operator(self, name: str, field: FieldInfo) -> list[tuple[str, Any, Query, Callable[..., dict]]]:
"""
Expand All @@ -200,7 +199,6 @@ def field_to_operator(self, name: str, field: FieldInfo) -> list[tuple[str, Any,
Query object,
and callable to convert it into a query dict.
"""

ops = []
field_type: type = field.annotation

Expand Down
4 changes: 1 addition & 3 deletions src/maggma/api/query_operator/pagination.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ def __init__(self, default_limit: int = 100, max_limit: int = 1000):
default_limit: the default number of documents to return
max_limit: max number of documents to return.
"""

self.default_limit = default_limit
self.max_limit = max_limit

Expand All @@ -39,7 +38,6 @@ def query(
"""
Pagination parameters for the API Endpoint.
"""

if _page is not None:
if _per_page > max_limit:
raise HTTPException(
Expand Down Expand Up @@ -78,7 +76,7 @@ def query(
self.query = query # type: ignore

def query(self):
"Stub query function for abstract class."
"""Stub query function for abstract class."""

def meta(self) -> dict:
"""
Expand Down
5 changes: 1 addition & 4 deletions src/maggma/api/query_operator/sparse_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ def __init__(self, model: type[BaseModel], default_fields: Optional[list[str]] =
model: PyDantic Model that represents the underlying data source
default_fields: default fields to return in the API response if no fields are explicitly requested.
"""

self.model = model

model_name = self.model.__name__ # type: ignore
Expand All @@ -34,7 +33,6 @@ def query(
"""
Pagination parameters for the API Endpoint.
"""

properties = _fields.split(",") if isinstance(_fields, str) else self.default_fields
if _all_fields:
properties = model_fields
Expand All @@ -44,7 +42,7 @@ def query(
self.query = query # type: ignore

def query(self):
"Stub query function for abstract class."
"""Stub query function for abstract class."""

def meta(self) -> dict:
"""
Expand All @@ -56,7 +54,6 @@ def as_dict(self) -> dict:
"""
Special as_dict implemented to convert pydantic models into strings.
"""

d = super().as_dict() # Ensures sub-classes serialize correctly
d["model"] = f"{self.model.__module__}.{self.model.__name__}" # type: ignore
return d
Expand Down
2 changes: 1 addition & 1 deletion src/maggma/api/query_operator/submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,4 @@ def query(
self.query = query

def query(self):
"Stub query function for abstract class."
"""Stub query function for abstract class."""
1 change: 0 additions & 1 deletion src/maggma/api/resource/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ def prepare_endpoint(self):
Internal method to prepare the endpoint by setting up default handlers
for routes.
"""

self.build_dynamic_model_search()

def build_dynamic_model_search(self):
Expand Down
2 changes: 0 additions & 2 deletions src/maggma/api/resource/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ def redirect_unslashed():
Redirects unforward slashed url to resource
url with the forward slash.
"""

url = self.router.url_path_for("/")
return RedirectResponse(url=url, status_code=301)

Expand All @@ -71,7 +70,6 @@ def as_dict(self) -> dict:
"""
Special as_dict implemented to convert pydantic models into strings.
"""

d = super().as_dict() # Ensures sub-classes serialize correctly
d["model"] = f"{self.model.__module__}.{self.model.__name__}"
return d
Expand Down
1 change: 0 additions & 1 deletion src/maggma/api/resource/post_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ def prepare_endpoint(self):
Internal method to prepare the endpoint by setting up default handlers
for routes.
"""

self.build_dynamic_model_search()

def build_dynamic_model_search(self):
Expand Down
1 change: 0 additions & 1 deletion src/maggma/api/resource/read_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ def prepare_endpoint(self):
Internal method to prepare the endpoint by setting up default handlers
for routes.
"""

if self.enable_get_by_key:
self.build_get_by_key()

Expand Down
1 change: 0 additions & 1 deletion src/maggma/api/resource/s3_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ def prepare_endpoint(self):
Internal method to prepare the endpoint by setting up default handlers
for routes.
"""

self.build_get_by_key()

def build_get_by_key(self):
Expand Down
2 changes: 0 additions & 2 deletions src/maggma/api/resource/submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ def __init__(
post_sub_path: POST sub-URL path for the resource.
patch_sub_path: PATCH sub-URL path for the resource.
"""

if isinstance(state_enum, Enum) and default_state not in [entry.value for entry in state_enum]: # type: ignore
raise RuntimeError("If data is stateful a state enum and valid default value must be provided")

Expand Down Expand Up @@ -118,7 +117,6 @@ def prepare_endpoint(self):
Internal method to prepare the endpoint by setting up default handlers
for routes.
"""

if self.enable_default_search:
self.build_search_data()

Expand Down
2 changes: 1 addition & 1 deletion src/maggma/api/resource/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def generate_query_pipeline(query: dict, store: Store):
pipeline.append(sort_dict)

pipeline.append({"$project": projection_dict})
pipeline.append({"$skip": query["skip"] if "skip" in query else 0})
pipeline.append({"$skip": query.get("skip", 0)})

if query.get("limit", False):
pipeline.append({"$limit": query["limit"]})
Expand Down
2 changes: 0 additions & 2 deletions src/maggma/api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ def attach_signature(function: Callable, defaults: dict, annotations: dict):
defaults: dictionary of parameters -> default values
annotations: dictionary of type annotations for the parameters
"""

required_params = [
inspect.Parameter(
param,
Expand Down Expand Up @@ -106,7 +105,6 @@ def api_sanitize(
allow_dict_msonable (bool): Whether to allow dictionaries in place of MSONable quantities.
Defaults to False
"""

models = [
model for model in get_flat_models_from_model(pydantic_model) if issubclass(model, BaseModel)
] # type: list[BaseModel]
Expand Down
3 changes: 1 addition & 2 deletions src/maggma/builders/group_builder.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Many-to-Many GroupBuilder.
"""

import traceback
from abc import ABCMeta, abstractmethod
from collections.abc import Iterable, Iterator
Expand Down Expand Up @@ -183,7 +184,6 @@ def get_ids_to_process(self) -> Iterable:
"""
Gets the IDs that need to be processed.
"""

query = self.query or {}

distinct_from_target = list(self.target.distinct(self._target_keys_field, criteria=query))
Expand Down Expand Up @@ -217,7 +217,6 @@ def get_groups_from_keys(self, keys) -> set[tuple]:
"""
Get the groups by grouping_keys for these documents.
"""

grouping_keys = self.grouping_keys

groups: set[tuple] = set()
Expand Down
8 changes: 3 additions & 5 deletions src/maggma/builders/map_builder.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
One-to-One Map Builder and a simple CopyBuilder implementation.
"""

import traceback
from abc import ABCMeta, abstractmethod
from collections.abc import Iterator
Expand Down Expand Up @@ -102,7 +103,6 @@ def get_items(self):
Generic get items for Map Builder designed to perform
incremental building.
"""

self.logger.info(f"Starting {self.__class__.__name__} Builder")

self.ensure_indexes()
Expand All @@ -126,20 +126,18 @@ def get_items(self):
self.total = len(keys)
for chunked_keys in grouper(keys, self.chunk_size):
chunked_keys = list(chunked_keys)
for doc in list(
yield from list(
self.source.query(
criteria={self.source.key: {"$in": chunked_keys}},
properties=projection,
)
):
yield doc
)

def process_item(self, item: dict):
"""
Generic process items to process a dictionary using
a map function.
"""

self.logger.debug(f"Processing: {item[self.source.key]}")

time_start = time()
Expand Down
Loading

0 comments on commit 84f864e

Please sign in to comment.