Skip to content

Commit 2b1e110

Browse files
authored
feat: add dataset.create_items_public_url and key_value_store.create_keys_public_url (#453)
When storage resources (Datasets or Key-Value Stores) are set to Restricted, accessing or sharing their data externally becomes difficult due to limited permissions. This PR introduces functionality to generate signed URLs that allow controlled external access to these resources without adding token to the request. This PR introduces methods to generate signed URLs for Dataset items and Key-Value Store records: 1. **Datasets** `dataset(:datasetId).create_items_public_url(options, expires_in_millis)` → Returns a signed URL like: `/v2/datasets/:datasetId/items?signature=xxx` 2. Key-Value Stores `key_value_store(:storeId).create_keys_public_url(options, expires_in_millis)` → Returns a signed URL like: `/v2/key-value-stores/:storeId/keys?signature=xxx` 🕒 Expiration: The `expires_in_millis` parameter defines how long the signature is valid. - If provided, the URL will expire after the specified time. - If omitted, the URL will never expire. Note 1: The signature is included only if the token has WRITE access to the storage. Otherwise, an unsigned URL is returned. P.S. We're not yet exposing `urlSigningSecretKey` for datasets, it will be released after [PR](apify/apify-core#22173) is merged. [More context here](https://www.notion.so/apify/Signed-Dataset-Items-KV-store-record-URLs-224f39950a2280158a6bd82bc2e2ebb5?source=copy_link) Same PR in JS apify/apify-client-js#720
1 parent 3920ac2 commit 2b1e110

File tree

7 files changed

+457
-42
lines changed

7 files changed

+457
-42
lines changed

src/apify_client/clients/resource_clients/dataset.py

Lines changed: 130 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,16 @@
44
import warnings
55
from contextlib import asynccontextmanager, contextmanager
66
from typing import TYPE_CHECKING, Any
7+
from urllib.parse import urlencode, urlparse, urlunparse
8+
9+
from apify_shared.utils import create_storage_content_signature
710

811
from apify_client._types import ListPage
9-
from apify_client._utils import catch_not_found_or_throw, filter_out_none_values_recursively, pluck_data
12+
from apify_client._utils import (
13+
catch_not_found_or_throw,
14+
filter_out_none_values_recursively,
15+
pluck_data,
16+
)
1017
from apify_client.clients.base import ResourceClient, ResourceClientAsync
1118
from apify_client.errors import ApifyApiError
1219

@@ -558,6 +565,67 @@ def get_statistics(self) -> dict | None:
558565

559566
return None
560567

568+
def create_items_public_url(
569+
self,
570+
*,
571+
offset: int | None = None,
572+
limit: int | None = None,
573+
clean: bool | None = None,
574+
desc: bool | None = None,
575+
fields: list[str] | None = None,
576+
omit: list[str] | None = None,
577+
unwind: list[str] | None = None,
578+
skip_empty: bool | None = None,
579+
skip_hidden: bool | None = None,
580+
flatten: list[str] | None = None,
581+
view: str | None = None,
582+
expires_in_secs: int | None = None,
583+
) -> str:
584+
"""Generate a URL that can be used to access dataset items.
585+
586+
If the client has permission to access the dataset's URL signing key,
587+
the URL will include a signature to verify its authenticity.
588+
589+
You can optionally control how long the signed URL should be valid using the `expires_in_secs` option.
590+
This value sets the expiration duration in seconds from the time the URL is generated.
591+
If not provided, the URL will not expire.
592+
593+
Any other options (like `limit` or `offset`) will be included as query parameters in the URL.
594+
595+
Returns:
596+
The public dataset items URL.
597+
"""
598+
dataset = self.get()
599+
600+
request_params = self._params(
601+
offset=offset,
602+
limit=limit,
603+
desc=desc,
604+
clean=clean,
605+
fields=fields,
606+
omit=omit,
607+
unwind=unwind,
608+
skipEmpty=skip_empty,
609+
skipHidden=skip_hidden,
610+
flatten=flatten,
611+
view=view,
612+
)
613+
614+
if dataset and 'urlSigningSecretKey' in dataset:
615+
signature = create_storage_content_signature(
616+
resource_id=dataset['id'],
617+
url_signing_secret_key=dataset['urlSigningSecretKey'],
618+
expires_in_millis=expires_in_secs * 1000 if expires_in_secs is not None else None,
619+
)
620+
request_params['signature'] = signature
621+
622+
items_public_url = urlparse(self._url('items'))
623+
filtered_params = {k: v for k, v in request_params.items() if v is not None}
624+
if filtered_params:
625+
items_public_url = items_public_url._replace(query=urlencode(filtered_params))
626+
627+
return urlunparse(items_public_url)
628+
561629

562630
class DatasetClientAsync(ResourceClientAsync):
563631
"""Async sub-client for manipulating a single dataset."""
@@ -1003,3 +1071,64 @@ async def get_statistics(self) -> dict | None:
10031071
catch_not_found_or_throw(exc)
10041072

10051073
return None
1074+
1075+
async def create_items_public_url(
1076+
self,
1077+
*,
1078+
offset: int | None = None,
1079+
limit: int | None = None,
1080+
clean: bool | None = None,
1081+
desc: bool | None = None,
1082+
fields: list[str] | None = None,
1083+
omit: list[str] | None = None,
1084+
unwind: list[str] | None = None,
1085+
skip_empty: bool | None = None,
1086+
skip_hidden: bool | None = None,
1087+
flatten: list[str] | None = None,
1088+
view: str | None = None,
1089+
expires_in_secs: int | None = None,
1090+
) -> str:
1091+
"""Generate a URL that can be used to access dataset items.
1092+
1093+
If the client has permission to access the dataset's URL signing key,
1094+
the URL will include a signature to verify its authenticity.
1095+
1096+
You can optionally control how long the signed URL should be valid using the `expires_in_secs` option.
1097+
This value sets the expiration duration in seconds from the time the URL is generated.
1098+
If not provided, the URL will not expire.
1099+
1100+
Any other options (like `limit` or `offset`) will be included as query parameters in the URL.
1101+
1102+
Returns:
1103+
The public dataset items URL.
1104+
"""
1105+
dataset = await self.get()
1106+
1107+
request_params = self._params(
1108+
offset=offset,
1109+
limit=limit,
1110+
desc=desc,
1111+
clean=clean,
1112+
fields=fields,
1113+
omit=omit,
1114+
unwind=unwind,
1115+
skipEmpty=skip_empty,
1116+
skipHidden=skip_hidden,
1117+
flatten=flatten,
1118+
view=view,
1119+
)
1120+
1121+
if dataset and 'urlSigningSecretKey' in dataset:
1122+
signature = create_storage_content_signature(
1123+
resource_id=dataset['id'],
1124+
url_signing_secret_key=dataset['urlSigningSecretKey'],
1125+
expires_in_millis=expires_in_secs * 1000 if expires_in_secs is not None else None,
1126+
)
1127+
request_params['signature'] = signature
1128+
1129+
items_public_url = urlparse(self._url('items'))
1130+
filtered_params = {k: v for k, v in request_params.items() if v is not None}
1131+
if filtered_params:
1132+
items_public_url = items_public_url._replace(query=urlencode(filtered_params))
1133+
1134+
return urlunparse(items_public_url)

src/apify_client/clients/resource_clients/key_value_store.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
from contextlib import asynccontextmanager, contextmanager
55
from http import HTTPStatus
66
from typing import TYPE_CHECKING, Any
7+
from urllib.parse import urlencode, urlparse, urlunparse
8+
9+
from apify_shared.utils import create_storage_content_signature
710

811
from apify_client._utils import (
912
catch_not_found_or_throw,
@@ -264,6 +267,54 @@ def delete_record(self, key: str) -> None:
264267
timeout_secs=_SMALL_TIMEOUT,
265268
)
266269

270+
def create_keys_public_url(
271+
self,
272+
*,
273+
limit: int | None = None,
274+
exclusive_start_key: str | None = None,
275+
collection: str | None = None,
276+
prefix: str | None = None,
277+
expires_in_secs: int | None = None,
278+
) -> str:
279+
"""Generate a URL that can be used to access key-value store keys.
280+
281+
If the client has permission to access the key-value store's URL signing key,
282+
the URL will include a signature to verify its authenticity.
283+
284+
You can optionally control how long the signed URL should be valid using the `expires_in_secs` option.
285+
This value sets the expiration duration in seconds from the time the URL is generated.
286+
If not provided, the URL will not expire.
287+
288+
Any other options (like `limit` or `prefix`) will be included as query parameters in the URL.
289+
290+
Returns:
291+
The public key-value store keys URL.
292+
"""
293+
store = self.get()
294+
295+
request_params = self._params(
296+
limit=limit,
297+
exclusive_start_key=exclusive_start_key,
298+
collection=collection,
299+
prefix=prefix,
300+
)
301+
302+
if store and 'urlSigningSecretKey' in store:
303+
signature = create_storage_content_signature(
304+
resource_id=store['id'],
305+
url_signing_secret_key=store['urlSigningSecretKey'],
306+
expires_in_millis=expires_in_secs * 1000 if expires_in_secs is not None else None,
307+
)
308+
request_params['signature'] = signature
309+
310+
keys_public_url = urlparse(self._url('keys'))
311+
312+
filtered_params = {k: v for k, v in request_params.items() if v is not None}
313+
if filtered_params:
314+
keys_public_url = keys_public_url._replace(query=urlencode(filtered_params))
315+
316+
return urlunparse(keys_public_url)
317+
267318

268319
class KeyValueStoreClientAsync(ResourceClientAsync):
269320
"""Async sub-client for manipulating a single key-value store."""
@@ -503,3 +554,52 @@ async def delete_record(self, key: str) -> None:
503554
params=self._params(),
504555
timeout_secs=_SMALL_TIMEOUT,
505556
)
557+
558+
async def create_keys_public_url(
559+
self,
560+
*,
561+
limit: int | None = None,
562+
exclusive_start_key: str | None = None,
563+
collection: str | None = None,
564+
prefix: str | None = None,
565+
expires_in_secs: int | None = None,
566+
) -> str:
567+
"""Generate a URL that can be used to access key-value store keys.
568+
569+
If the client has permission to access the key-value store's URL signing key,
570+
the URL will include a signature to verify its authenticity.
571+
572+
You can optionally control how long the signed URL should be valid using the `expires_in_secs` option.
573+
This value sets the expiration duration in seconds from the time the URL is generated.
574+
If not provided, the URL will not expire.
575+
576+
Any other options (like `limit` or `prefix`) will be included as query parameters in the URL.
577+
578+
Returns:
579+
The public key-value store keys URL.
580+
"""
581+
store = await self.get()
582+
583+
keys_public_url = urlparse(self._url('keys'))
584+
585+
request_params = self._params(
586+
limit=limit,
587+
exclusive_start_key=exclusive_start_key,
588+
collection=collection,
589+
prefix=prefix,
590+
)
591+
592+
if store and 'urlSigningSecretKey' in store:
593+
signature = create_storage_content_signature(
594+
resource_id=store['id'],
595+
url_signing_secret_key=store['urlSigningSecretKey'],
596+
expires_in_millis=expires_in_secs * 1000 if expires_in_secs is not None else None,
597+
)
598+
request_params['signature'] = signature
599+
600+
keys_public_url = urlparse(self._url('keys'))
601+
filtered_params = {k: v for k, v in request_params.items() if v is not None}
602+
if filtered_params:
603+
keys_public_url = keys_public_url._replace(query=urlencode(filtered_params))
604+
605+
return urlunparse(keys_public_url)
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import secrets
2+
import string
3+
4+
5+
def random_string(length: int = 10) -> str:
6+
return ''.join(secrets.choice(string.ascii_letters) for _ in range(length))
7+
8+
9+
def random_resource_name(resource: str) -> str:
10+
return f'python-client-test-{resource}-{random_string(5)}'

tests/integration/test_dataset.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING
4+
5+
import impit
6+
7+
from integration.integration_test_utils import random_resource_name
8+
9+
if TYPE_CHECKING:
10+
from apify_client import ApifyClient, ApifyClientAsync
11+
12+
13+
class TestDatasetSync:
14+
def test_dataset_should_create_public_items_expiring_url_with_params(self, apify_client: ApifyClient) -> None:
15+
created_dataset = apify_client.datasets().get_or_create(name=random_resource_name('dataset'))
16+
17+
dataset = apify_client.dataset(created_dataset['id'])
18+
items_public_url = dataset.create_items_public_url(
19+
expires_in_secs=2000,
20+
limit=10,
21+
offset=0,
22+
)
23+
24+
assert 'signature=' in items_public_url
25+
assert 'limit=10' in items_public_url
26+
assert 'offset=0' in items_public_url
27+
28+
impit_client = impit.Client()
29+
response = impit_client.get(items_public_url, timeout=5)
30+
assert response.status_code == 200
31+
32+
dataset.delete()
33+
assert apify_client.dataset(created_dataset['id']).get() is None
34+
35+
def test_dataset_should_create_public_items_non_expiring_url(self, apify_client: ApifyClient) -> None:
36+
created_dataset = apify_client.datasets().get_or_create(name=random_resource_name('dataset'))
37+
38+
dataset = apify_client.dataset(created_dataset['id'])
39+
items_public_url = dataset.create_items_public_url()
40+
41+
assert 'signature=' in items_public_url
42+
43+
impit_client = impit.Client()
44+
response = impit_client.get(items_public_url, timeout=5)
45+
assert response.status_code == 200
46+
47+
dataset.delete()
48+
assert apify_client.dataset(created_dataset['id']).get() is None
49+
50+
51+
class TestDatasetAsync:
52+
async def test_dataset_should_create_public_items_expiring_url_with_params(
53+
self, apify_client_async: ApifyClientAsync
54+
) -> None:
55+
created_dataset = await apify_client_async.datasets().get_or_create(name=random_resource_name('dataset'))
56+
57+
dataset = apify_client_async.dataset(created_dataset['id'])
58+
items_public_url = await dataset.create_items_public_url(
59+
expires_in_secs=2000,
60+
limit=10,
61+
offset=0,
62+
)
63+
64+
assert 'signature=' in items_public_url
65+
assert 'limit=10' in items_public_url
66+
assert 'offset=0' in items_public_url
67+
68+
impit_async_client = impit.AsyncClient()
69+
response = await impit_async_client.get(items_public_url, timeout=5)
70+
assert response.status_code == 200
71+
72+
await dataset.delete()
73+
assert await apify_client_async.dataset(created_dataset['id']).get() is None
74+
75+
async def test_dataset_should_create_public_items_non_expiring_url(
76+
self, apify_client_async: ApifyClientAsync
77+
) -> None:
78+
created_dataset = await apify_client_async.datasets().get_or_create(name=random_resource_name('dataset'))
79+
80+
dataset = apify_client_async.dataset(created_dataset['id'])
81+
items_public_url = await dataset.create_items_public_url()
82+
83+
assert 'signature=' in items_public_url
84+
85+
impit_async_client = impit.AsyncClient()
86+
response = await impit_async_client.get(items_public_url, timeout=5)
87+
assert response.status_code == 200
88+
89+
await dataset.delete()
90+
assert await apify_client_async.dataset(created_dataset['id']).get() is None

0 commit comments

Comments
 (0)