Skip to content

Commit a865461

Browse files
authored
feat: add signing of public URL (#407)
This PR is part of [Issue #19363](apify/apify-core#19363), which updates `KeyValueStore.getPublicUrl(recordKey)` to generate signed links using HMAC. **This PR**: - creates signature and appends it to the public URL of the record in KV store P.S. Before merging, we need to wait for the release of Crawlee, so we can update version of Crawlee. P.P.S It's hard to test the changes, since master branch of SDK and master branch of Crawlee are out of sync. Crawlee has some breaking changes in version 6.0, which are not yet addressed in master branch of SDK. Previous PR that adds storageObject to Crawlee Python is [here](apify/crawlee-python#993) Same PR in SDK JS is [here](apify/apify-sdk-js#358)
1 parent 5f49275 commit a865461

File tree

5 files changed

+104
-12
lines changed

5 files changed

+104
-12
lines changed

src/apify/_crypto.py

+38
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
from __future__ import annotations
22

33
import base64
4+
import hashlib
5+
import hmac
6+
import string
47
from typing import Any
58

69
from cryptography.exceptions import InvalidTag as InvalidTagException
@@ -153,3 +156,38 @@ def decrypt_input_secrets(private_key: rsa.RSAPrivateKey, input_data: Any) -> An
153156
)
154157

155158
return input_data
159+
160+
161+
CHARSET = string.digits + string.ascii_letters
162+
163+
164+
def encode_base62(num: int) -> str:
165+
"""Encode the given number to base62."""
166+
if num == 0:
167+
return CHARSET[0]
168+
169+
res = ''
170+
while num > 0:
171+
num, remainder = divmod(num, 62)
172+
res = CHARSET[remainder] + res
173+
return res
174+
175+
176+
@ignore_docs
177+
def create_hmac_signature(secret_key: str, message: str) -> str:
178+
"""Generate an HMAC signature and encodes it using Base62. Base62 encoding reduces the signature length.
179+
180+
HMAC signature is truncated to 30 characters to make it shorter.
181+
182+
Args:
183+
secret_key: Secret key used for signing signatures.
184+
message: Message to be signed.
185+
186+
Returns:
187+
Base62 encoded signature.
188+
"""
189+
signature = hmac.new(secret_key.encode('utf-8'), message.encode('utf-8'), hashlib.sha256).hexdigest()[:30]
190+
191+
decimal_signature = int(signature, 16)
192+
193+
return encode_base62(decimal_signature)

src/apify/apify_storage_client/_key_value_store_client.py

+17-2
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,13 @@
44
from typing import TYPE_CHECKING, Any
55

66
from typing_extensions import override
7+
from yarl import URL
78

89
from crawlee.storage_clients._base import KeyValueStoreClient as BaseKeyValueStoreClient
910
from crawlee.storage_clients.models import KeyValueStoreListKeysPage, KeyValueStoreMetadata, KeyValueStoreRecord
1011

12+
from apify._crypto import create_hmac_signature
13+
1114
if TYPE_CHECKING:
1215
from collections.abc import AsyncIterator
1316
from contextlib import AbstractAsyncContextManager
@@ -89,6 +92,18 @@ async def get_public_url(self, key: str) -> str:
8992
Args:
9093
key: The key for which the URL should be generated.
9194
"""
92-
public_api_url = self._api_public_base_url
95+
if self._client.resource_id is None:
96+
raise ValueError('resource_id cannot be None when generating a public URL')
97+
98+
public_url = (
99+
URL(self._api_public_base_url) / 'v2' / 'key-value-stores' / self._client.resource_id / 'records' / key
100+
)
101+
102+
key_value_store = await self.get()
103+
104+
if key_value_store is not None and isinstance(key_value_store.model_extra, dict):
105+
url_signing_secret_key = key_value_store.model_extra.get('urlSigningSecretKey')
106+
if url_signing_secret_key:
107+
public_url = public_url.with_query(signature=create_hmac_signature(url_signing_secret_key, key))
93108

94-
return f'{public_api_url}/v2/key-value-stores/{self._client.resource_id}/records/{key}'
109+
return str(public_url)

tests/integration/test_actor_key_value_store.py

+15-6
Original file line numberDiff line numberDiff line change
@@ -201,19 +201,28 @@ async def test_generate_public_url_for_kvs_record(
201201
run_actor: RunActorFunction,
202202
) -> None:
203203
async def main() -> None:
204-
from typing import cast
205-
206-
from apify.apify_storage_client._key_value_store_client import KeyValueStoreClient
204+
from apify._crypto import create_hmac_signature
207205

208206
async with Actor:
209207
public_api_url = Actor.config.api_public_base_url
210208
default_store_id = Actor.config.default_key_value_store_id
209+
record_key = 'public-record-key'
211210

212211
store = await Actor.open_key_value_store()
213-
record_url = await cast(KeyValueStoreClient, store._resource_client).get_public_url('dummy')
214-
print(record_url)
215212

216-
assert record_url == f'{public_api_url}/v2/key-value-stores/{default_store_id}/records/dummy'
213+
assert isinstance(store.storage_object.model_extra, dict)
214+
url_signing_secret_key = store.storage_object.model_extra.get('urlSigningSecretKey')
215+
assert url_signing_secret_key is not None
216+
217+
await store.set_value(record_key, {'exposedData': 'test'}, 'application/json')
218+
219+
record_url = await store.get_public_url(record_key)
220+
221+
signature = create_hmac_signature(url_signing_secret_key, record_key)
222+
assert (
223+
record_url
224+
== f'{public_api_url}/v2/key-value-stores/{default_store_id}/records/{record_key}?signature={signature}'
225+
)
217226

218227
actor = await make_actor(label='kvs-get-public-url', main_func=main)
219228
run_result = await run_actor(actor)

tests/unit/test_crypto.py

+31-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,15 @@
44

55
import pytest
66

7-
from apify._crypto import _load_public_key, crypto_random_object_id, load_private_key, private_decrypt, public_encrypt
7+
from apify._crypto import (
8+
_load_public_key,
9+
create_hmac_signature,
10+
crypto_random_object_id,
11+
encode_base62,
12+
load_private_key,
13+
private_decrypt,
14+
public_encrypt,
15+
)
816

917
# NOTE: Uses the same keys as in:
1018
# https://github.com/apify/apify-shared-js/blob/master/test/crypto.test.ts
@@ -105,3 +113,25 @@ def test_crypto_random_object_id_length_and_charset() -> None:
105113
long_random_object_id = crypto_random_object_id(1000)
106114
for char in long_random_object_id:
107115
assert char in 'abcdefghijklmnopqrstuvwxyzABCEDFGHIJKLMNOPQRSTUVWXYZ0123456789'
116+
117+
118+
@pytest.mark.parametrize(('test_input', 'expected'), [(0, '0'), (10, 'a'), (999999999, '15FTGf')])
119+
def test_encode_base62(test_input: int, expected: str) -> None:
120+
assert encode_base62(test_input) == expected
121+
122+
123+
# This test ensures compatibility with the JavaScript version of the same method.
124+
# https://github.com/apify/apify-shared-js/blob/master/packages/utilities/src/hmac.ts
125+
def test_create_valid_hmac_signature() -> None:
126+
# This test uses the same secret key and message as in JS tests.
127+
secret_key = 'hmac-secret-key'
128+
message = 'hmac-message-to-be-authenticated'
129+
assert create_hmac_signature(secret_key, message) == 'pcVagAsudj8dFqdlg7mG'
130+
131+
132+
def test_create_same_hmac() -> None:
133+
# This test uses the same secret key and message as in JS tests.
134+
secret_key = 'hmac-same-secret-key'
135+
message = 'hmac-same-message-to-be-authenticated'
136+
assert create_hmac_signature(secret_key, message) == 'FYMcmTIm3idXqleF1Sw5'
137+
assert create_hmac_signature(secret_key, message) == 'FYMcmTIm3idXqleF1Sw5'

uv.lock

+3-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)