Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix table interface + add test #256

Merged
merged 4 commits into from
Oct 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 84 additions & 27 deletions caveclient/tools/table_manager.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import datetime
import logging
import re
import warnings
from itertools import chain
from typing import Optional

import attrs
from cachetools import TTLCache, cached, keys
Expand Down Expand Up @@ -653,16 +655,44 @@ def __attrs_post_init__(self):
class TableQueryKwargs(BaseQueryKwargs):
def query(
self,
select_columns=None,
offset=None,
limit=None,
split_positions=False,
materialization_version=None,
timestamp=None,
metadata=True,
desired_resolution=None,
get_counts=False,
select_columns: Optional[list] = None,
offset: Optional[int] = None,
limit: Optional[int] = None,
split_positions: bool = False,
materialization_version: Optional[int] = None,
timestamp: Optional[datetime.datetime] = None,
metadata: bool = True,
desired_resolution: Optional[list] = None,
get_counts: bool = False,
):
"""Set data return options for the specified query

Parameters
----------
select_columns : list, optional
List of columns to be returned, by default None
offset : int, optional
Sets how many rows to skip before starting to return results, by default None
limit : int, optional
Sets the total number of results returned, by default None
split_positions : bool, optional
If True, leaves position data in separate columns by componnet, by default False
materialization_version : int, optional
Specifies a non-default materialization version, by default None
timestamp : datetime.datetime, optional
Sets the timestamp to look up root ids at, by default None
metadata : bool, optional
Whether to return table and query metadata under the `.attrs` property, by default True
desired_resolution : list, optional
A three element vector setting the return resolution of position information, by default None
get_counts : bool, optional
If True, only return the number of rows that match the query, by default False

Returns
-------
pd.DataFrame
Query data
"""
if self._reference_table is None:
qry_table = self._base_table
return client.materialize.query_table(
Expand Down Expand Up @@ -704,18 +734,45 @@ def query(
metadata=metadata,
desired_resolution=desired_resolution,
allow_missing_lookups=False,
**self.filter_kwargs_mat,
)

def live_query(
self,
timestamp,
offset=None,
limit=None,
split_positions=False,
metadata=True,
desired_resolution=None,
allow_missing_lookups=False,
timestamp: datetime.datetime,
offset: Optional[int] = None,
limit: Optional[int] = None,
split_positions: bool = False,
metadata: bool = True,
desired_resolution: Optional[list] = None,
allow_missing_lookups: bool = False,
):
"""Set data return options for the specified live query

Parameters
----------
timestamp :
offset : int, optional
Sets how many rows to skip before starting to return results, by default None
limit : int, optional
Sets the total number of results returned, by default None
split_positions : bool, optional
If True, leaves position data in separate columns by componnet, by default False
metadata : bool, optional
Whether to return table and query metadata under the `.attrs` property, by default True
desired_resolution : list, optional
A three element vector setting the return resolution of position information, by default None
allow_missing_lookups: bool, optional
If True, will return values even if the database is still ingesting new information, by default False.
IMPORTANT: If set to True, the database could return different answers to the same query at the same timestamp.
Do not set to True if writing code you intended to give consistent answers every time.

Returns
-------
pd.DataFrame
Query data
"""

logger.warning(
"The `client.materialize.tables` interface is experimental and might experience breaking changes before the feature is stabilized."
)
Expand Down Expand Up @@ -754,14 +811,14 @@ def live_query(
class ViewQueryKwargs(BaseQueryKwargs):
def query(
self,
select_columns=None,
offset=None,
limit=None,
split_positions=False,
materialization_version=None,
metadata=True,
desired_resolution=None,
get_counts=False,
select_columns: Optional[list] = None,
offset: Optional[int] = None,
limit: Optional[int] = None,
split_positions: bool = False,
materialization_version: Optional[int] = None,
metadata: bool = True,
desired_resolution: Optional[list] = None,
get_counts: bool = False,
):
"""Query views through the table interface

Expand Down Expand Up @@ -824,8 +881,8 @@ def make_query_filter(table_name, meta, client):
table_name, class_vals, bases=(make_kwargs_mixin(client),)
)
QueryFilter.__doc__ = desc
setattr(QueryFilter, "query", QueryFilter().query)
setattr(QueryFilter, "live_query", QueryFilter().live_query)
setattr(QueryFilter, "get_all", QueryFilter().query)
setattr(QueryFilter, "get_all_live", QueryFilter().live_query)

fields = [
x.name
Expand Down Expand Up @@ -872,7 +929,7 @@ def make_query_filter_view(view_name, meta, schema, client):
)
ViewQueryFilter.__doc__ = desc

setattr(ViewQueryFilter, "query", ViewQueryFilter().query)
setattr(ViewQueryFilter, "get_all", ViewQueryFilter().query)

fields = [
x.name
Expand Down
8 changes: 5 additions & 3 deletions docs/tutorials/materialization.md
Original file line number Diff line number Diff line change
Expand Up @@ -342,11 +342,13 @@ nuc_df = client.materialize.tables.nucleus_detection_v0(
).query()
```

If you are not using any filters, you can omit the parenthesis and use the `query`
or `live_query` function directly. The first example could be rewritten as:
If you are not using any filters, you can omit the parenthesis and use the `get_all`
or `get_all_live` functions directly, which act similarly to the `query` and `live_query` functions
respectively.
The first example could be rewritten as:

```python
nuc_df = client.materialize.tables.nucleus_detection_v0.query()
nuc_df = client.materialize.tables.nucleus_detection_v0.get_all()
```

If you want to list all available fields, you can use the `.fields` attribute.
Expand Down
31 changes: 31 additions & 0 deletions tests/test_materialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,37 @@ def test_matclient_v3_tableinterface(self, myclient, mocker):
vqry = myclient.materialize.views.single_neurons(pt_root_id=[123, 456])
assert 123 in vqry.filter_kwargs_mat.get("filter_in_dict").get("pt_root_id")

qry_url = materialization_endpoints_v3["join_query"].format_map(
endpoint_mapping
)
query_d = {
"return_pyarrow": True,
"arrow_format": True,
"split_positions": True,
}
query_string = urlencode(query_d)
qry_url = qry_url + "?" + query_string
correct_query_data = {
"filter_in_dict": {"nucleus_detection_v0": {"pt_root_id": [123, 456]}},
"filter_equal_dict": {"allen_column_mtypes_v2": {"target_id": 271700}},
"suffix_map": {
"allen_column_mtypes_v2": "_ref",
"nucleus_detection_v0": "",
},
"tables": [
["allen_column_mtypes_v2", "target_id"],
["nucleus_detection_v0", "id"],
],
}
responses.add(
responses.POST,
qry_url,
body=serialize_dataframe(pd.DataFrame()),
content_type="data.arrow",
match=[json_params_matcher(correct_query_data)],
)
qry.query(metadata=False)

@responses.activate
def test_matclient(self, myclient, mocker):
endpoint_mapping = self.default_mapping
Expand Down
Loading