Skip to content

Commit

Permalink
Improve support for Transfer filter params
Browse files Browse the repository at this point in the history
This started from the fact that `task_list` was missing first-class
support for `filter`. Users would have to pass it via `query_params`
previously.

However, after clearing up some confusion about how to write a filter
string, it seems that we could provide a better interface for passing
these filters. Rather than an elaborate new ``TransferFilterDict``
object -- a briefly considered approach -- this adds support for
passing a dict of strings or string lists. Lists get comma-separated,
strings get passed unmodified.

The resulting semantics allow for usages like

    tc.task_list(filter={"task_id": [...]})
    tc.operation_ls(filter={"type": "file"})

which are relatively intuitive to read and are handled correctly by
the SDK for the user.

A new test for task_list exercises the various filter behaviors (and,
NB, relies a little on cpython 3.6+ having order-preserving dicts).
  • Loading branch information
sirosen committed Oct 8, 2021
1 parent 07774cc commit 24373e5
Show file tree
Hide file tree
Showing 4 changed files with 185 additions and 8 deletions.
15 changes: 15 additions & 0 deletions changelog.d/20211008_144403_sirosen_better_transfer_filters.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
..
.. A new scriv changelog fragment
..
.. Add one or more items to the list below describing the change in clear, concise terms.
..
.. Leave the ":pr:`...`" text alone. When you open a pull request, GitHub Actions will
.. automatically replace it in a new commit. (You can squash this commit later
.. if you like.)
..
* Add ``filter`` as a supported parameter to ``TransferClient.task_list`` (:pr:`NUMBER`)
* The ``filter`` parameter to ``TransferClient.task_list`` and
``TransferClient.operation_ls`` can now be passed as a ``Dict[str, str | List[str]]``.
Documentation on the ``TransferClient`` explains how this will be formatted,
and is linked from the param docs for ``filter`` on each method (:pr:`NUMBER`)
91 changes: 83 additions & 8 deletions src/globus_sdk/services/transfer/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,25 @@

log = logging.getLogger(__name__)

TransferFilterDict = Dict[str, Union[str, List[str]]]


def _datelike_to_str(x: DateLike) -> str:
return x if isinstance(x, str) else x.isoformat(timespec="seconds")


def _format_filter_value(x: Union[str, List[str]]) -> str:
if isinstance(x, str):
return x
return ",".join(x)


def _format_filter(x: Union[str, TransferFilterDict]) -> str:
if isinstance(x, str):
return x
return "/".join(f"{k}:{_format_filter_value(v)}" for k, v in x.items())


def _get_page_size(paged_result: IterableTransferResponse) -> int:
return len(paged_result["DATA"])

Expand All @@ -41,6 +55,25 @@ class TransferClient(client.BaseClient):
:type authorizer: :class:`GlobusAuthorizer\
<globus_sdk.authorizers.base.GlobusAuthorizer>`
.. _transfer_filter_formatting:
**Filter Formatting**
Several methods of ``TransferClient`` take a ``filter`` parameter which can be a
string or dict. When the filter given is a string, it is not modified. When it is a
dict, it is formatted according to the following rules.
- each (key, value) pair in the dict is a clause in the resulting filter string
- clauses are each formatted to ``key:value``
- when the value is a list, it is comma-separated, as in ``key:value1,value2``
- clauses are separated with slashes, as in ``key1:value1/key2:value2``
The corresponding external API documentation describes, in detail, the supported
filter clauses for each method which uses the ``filter`` parameter.
Generally, speaking, filter clauses documented as ``string list`` can be passed as
lists to a filter dict, while string, date, and numeric filters should be passed as
strings.
**Paginated Calls**
Methods which support pagination can be called as paginated or unpaginated methods.
Expand Down Expand Up @@ -965,7 +998,7 @@ def operation_ls(
show_hidden: Optional[bool] = None,
orderby: Optional[Union[str, List[str]]] = None,
# note: filter is a soft keyword in python, so using this name is okay
filter: Optional[str] = None,
filter: Union[str, TransferFilterDict, None] = None,
query_params: Optional[Dict[str, Any]] = None,
) -> IterableTransferResponse:
"""
Expand All @@ -982,9 +1015,11 @@ def operation_ls(
either a field name or a field name followed by a space and 'ASC' or 'DESC'
for ascending or descending.
:type orderby: str, optional
:param filter: Only return file documents that match these filter clauses. For
the filter syntax, see the **External Documentation** linked below.
:type filter: str, optional
:param filter: Only return file documents which match these filter clauses. For
the filter syntax, see the **External Documentation** linked below. If a
dict is supplied as the filter, it is formatted as a set of filter clauses.
See :ref:`filter formatting <transfer_filter_formatting>` for details.
:type filter: str or dict, optional
:param query_params: Additional passthrough query parameters
:type query_params: dict, optional
Expand All @@ -1005,6 +1040,17 @@ def operation_ls(
>>> orderby=["type", "name"]
>>> ):
>>> print(entry["name DESC"], entry["type"])
List filtering to files modified before January 1, 2021. Note the use of an
empty "start date" for the filter:
>>> tc = globus_sdk.TransferClient(...)
>>> for entry in tc.operation_ls(
>>> ep_id,
>>> path="/~/project1/",
>>> filter={"last_modified": ["", "2021-01-01"]},
>>> ):
>>> print(entry["name"], entry["type"])
"""
if query_params is None:
query_params = {}
Expand All @@ -1018,7 +1064,7 @@ def operation_ls(
else:
query_params["orderby"] = ",".join(orderby)
if filter is not None:
query_params["filter"] = filter
query_params["filter"] = _format_filter(filter)

log.info(f"TransferClient.operation_ls({endpoint_id}, {query_params})")
return IterableTransferResponse(
Expand Down Expand Up @@ -1250,6 +1296,7 @@ def task_list(
*,
limit: Optional[int] = None,
offset: Optional[int] = None,
filter: Union[str, TransferFilterDict, None] = None,
query_params: Optional[Dict[str, Any]] = None,
) -> IterableTransferResponse:
"""
Expand All @@ -1261,6 +1308,11 @@ def task_list(
:type limit: int, optional
:param offset: offset used in paging
:type offset: int, optional
:param filter: Only return task documents which match these filter clauses. For
the filter syntax, see the **External Documentation** linked below. If a
dict is supplied as the filter, it is formatted as a set of filter clauses.
See :ref:`filter formatting <transfer_filter_formatting>` for details.
:type filter: str or dict, optional
:param query_params: Additional passthrough query parameters
:type query_params: dict, optional
Expand All @@ -1270,9 +1322,30 @@ def task_list(
>>> tc = TransferClient(...)
>>> for task in tc.task_list(limit=10):
>>> print("Task({}): {} -> {}".format(
>>> task["task_id"], task["source_endpoint"],
>>> task["destination_endpoint"]))
>>> print(
>>> "Task({}): {} -> {}".format(
>>> task["task_id"],
>>> task["source_endpoint"],
>>> task["destination_endpoint"]
>>> )
>>> )
Fetch 3 *specific* tasks using a ``task_id`` filter:
>>> tc = TransferClient(...)
>>> task_ids = [
>>> "acb4b581-b3f3-403a-a42a-9da97aaa9961",
>>> "39447a3c-e002-401a-b95c-f48b69b4c60a",
>>> "02330d3a-987b-4abb-97ed-6a22f8fa365e",
>>> ]
>>> for task in tc.task_list(filter={"task_id": task_ids}):
>>> print(
>>> "Task({}): {} -> {}".format(
>>> task["task_id"],
>>> task["source_endpoint"],
>>> task["destination_endpoint"]
>>> )
>>> )
"""
log.info("TransferClient.task_list(...)")
if query_params is None:
Expand All @@ -1281,6 +1354,8 @@ def task_list(
query_params["limit"] = limit
if offset is not None:
query_params["offset"] = offset
if filter is not None:
query_params["filter"] = _format_filter(filter)
return IterableTransferResponse(
self.get("task_list", query_params=query_params)
)
Expand Down
63 changes: 63 additions & 0 deletions tests/functional/transfer/fixture_data/task_list.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
{
"DATA": [
{
"bytes_checksummed": 0,
"bytes_transferred": 4,
"canceled_by_admin": null,
"canceled_by_admin_message": null,
"command": "API 0.10",
"completion_time": "2021-09-02T18:04:49+00:00",
"deadline": "2021-09-03T18:04:47+00:00",
"delete_destination_extra": false,
"destination_endpoint": "go#ep2",
"destination_endpoint_display_name": "Globus Tutorial Endpoint 2",
"destination_endpoint_id": "ddb59af0-6d04-11e5-ba46-22000b92c6ec",
"directories": 0,
"effective_bytes_per_second": 3,
"encrypt_data": false,
"fail_on_quota_errors": false,
"fatal_error": null,
"faults": 0,
"files": 1,
"files_skipped": 0,
"files_transferred": 1,
"filter_rules": null,
"history_deleted": false,
"is_ok": null,
"is_paused": false,
"label": null,
"nice_status": null,
"nice_status_details": null,
"nice_status_expires_in": null,
"nice_status_short_description": null,
"owner_id": "5276fa05-eedf-46c5-919f-ad2d0160d1a9",
"preserve_timestamp": false,
"recursive_symlinks": "ignore",
"request_time": "2021-09-02T18:04:47+00:00",
"skip_source_errors": false,
"source_endpoint": "go#ep1",
"source_endpoint_display_name": "Globus Tutorial Endpoint 1",
"source_endpoint_id": "ddb59aef-6d04-11e5-ba46-22000b92c6ec",
"status": "SUCCEEDED",
"subtasks_canceled": 0,
"subtasks_expired": 0,
"subtasks_failed": 0,
"subtasks_pending": 0,
"subtasks_retrying": 0,
"subtasks_skipped_errors": 0,
"subtasks_succeeded": 2,
"subtasks_total": 2,
"symlinks": 0,
"sync_level": null,
"task_id": "42277910-0c18-11ec-ba76-138ac5bdb19f",
"type": "TRANSFER",
"username": "u_XrtivK6z9w2MZwr65os6nZX0wv",
"verify_checksum": true
}
],
"DATA_TYPE": "task_list",
"length": 1,
"limit": 1000,
"offset": 0,
"total": 1
}
24 changes: 24 additions & 0 deletions tests/functional/transfer/test_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,3 +208,27 @@ def test_autoactivation(client):
req = get_last_request()
parsed_qs = urllib.parse.parse_qs(urllib.parse.urlparse(req.url).query)
assert parsed_qs == {"if_expires_in": ["300"]}


@pytest.mark.parametrize(
"client_kwargs, qs",
[
({}, {}),
({"query_params": {"foo": "bar"}}, {"foo": "bar"}),
({"filter": "foo"}, {"filter": "foo"}),
({"limit": 10, "offset": 100}, {"limit": "10", "offset": "100"}),
({"limit": 10, "query_params": {"limit": 100}}, {"limit": "10"}),
({"filter": "foo:bar:baz"}, {"filter": "foo:bar:baz"}),
({"filter": {"foo": "bar", "bar": "baz"}}, {"filter": "foo:bar/bar:baz"}),
({"filter": {"foo": ["bar", "baz"]}}, {"filter": "foo:bar,baz"}),
],
)
def test_task_list(client, client_kwargs, qs):
register_api_route_fixture_file("transfer", "/task_list", "task_list.json")
client.task_list(**client_kwargs)

req = get_last_request()
parsed_qs = urllib.parse.parse_qs(urllib.parse.urlparse(req.url).query)
# parsed_qs will have each value as a list (because query-params are a multidict)
# so transform the test data to match before comparison
assert parsed_qs == {k: [v] for k, v in qs.items()}

0 comments on commit 24373e5

Please sign in to comment.