Skip to content

Commit

Permalink
feat: allowing human-readable date in presigns() expiration
Browse files Browse the repository at this point in the history
  • Loading branch information
trojblue committed Sep 30, 2024
1 parent ccfcbc8 commit 1381b9a
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 14 deletions.
4 changes: 2 additions & 2 deletions noteooks/test_general.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@
{
"data": {
"text/plain": [
"'https://bucket-external.s3.amazonaws.com/dataset/dataset_qft/moody_qft_danbooru.json?AWSAccessKeyId=AKIAVVWUPRZ2AC6O2S3I&Signature=0z3LgRw9wcdnSqcCmqX7VaqAU7Q%3D&Expires=1727805561'"
"'https://bucket-external.s3.amazonaws.com/dataset/dataset_qft/moody_qft_danbooru.json?AWSAccessKeyId=AKIAVVWUPRZ2AC6O2S3I&Signature=sXf%2FvVsHcWcBK8aA%2FBW5i4dseEw%3D&Expires=1759172427'"
]
},
"execution_count": 1,
Expand All @@ -548,7 +548,7 @@
"\n",
"uri = \"s3://bucket-external/dataset/dataset_qft/moody_qft_danbooru.json\"\n",
"\n",
"signed = ub.presigns(uri)\n",
"signed = ub.presigns(uri, expiration=\"1y\")\n",
"signed"
]
}
Expand Down
50 changes: 47 additions & 3 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "unibox"
version = "0.4.11"
version = "0.4.12"
description = "Unibox provides unified interface for common file operations."
authors = ["yada <trojblue@gmail.com>"]
license = "MIT"
Expand All @@ -20,6 +20,7 @@ pyarrow = ">=10.0.1"
boto3 = "^1.28.76"
requests = "^2.31.0"
orjson = "^3.9.10"
humanfriendly = "^10.0"

[tool.poetry.extras]
full = ["ipython"]
Expand Down
8 changes: 5 additions & 3 deletions unibox/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations


from doctest import debug
from pathlib import Path
from typing import Union, List, Dict, Any
Expand Down Expand Up @@ -140,11 +141,12 @@ def peeks(data: Any, n=3, console_print=False) -> Dict[str, Any]:
return peeker.peeks(data)


def presigns(s3_uri: str, timeout: int = 86400) -> str:
def presigns(s3_uri: str, expiration: Union[int, str] = "1d") -> str:
"""
Generate a presigned URL from a given S3 URI.
:param s3_uri: S3 URI (e.g., 's3://bucket-name/object-key')
:param timeout: Time in seconds for the presigned URL to remain valid (default: 1 day)
:param expiration: Time in seconds for the presigned URL to remain valid (default: 1 day).
Accepts either an integer (seconds) or human-readable strings like "1d", "1mo", "1y".
:return: Presigned URL as string. If error, returns None.
"""
return s3_client.generate_presigned_uri(s3_uri, timeout)
return s3_client.generate_presigned_uri(s3_uri, expiration=expiration)
29 changes: 24 additions & 5 deletions unibox/utils/s3_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@
from tqdm.auto import tqdm
import pandas as pd

import logging
import humanfriendly
from botocore.exceptions import ClientError
from typing import Union


def parse_s3_url(url: str):
parsed_url = urlparse(url)
if parsed_url.scheme != "s3":
Expand Down Expand Up @@ -114,19 +120,32 @@ def traverse(self, s3_uri, include_extensions=None, exclude_extensions=None,

return all_entries

def generate_presigned_uri(self, s3_uri: str, timeout: int = 86400) -> str:

def generate_presigned_uri(self, s3_uri: str, expiration: Union[int, str] = "1d") -> str:
"""
Generate a presigned URL from a given S3 URI.
:param s3_uri: S3 URI (e.g., 's3://bucket-name/object-key')
:param timeout: Time in seconds for the presigned URL to remain valid (default: 1 day)
:param expiration: Time in seconds for the presigned URL to remain valid (default: 1 day).
Accepts either an integer (seconds) or human-readable strings like "1d", "1mo", "1y".
:return: Presigned URL as string. If error, returns None.
"""
bucket, key = parse_s3_url(s3_uri)

# Convert human-readable time to seconds if needed
if isinstance(expiration, str):
try:
expiration = int(humanfriendly.parse_timespan(expiration))
except Exception as e:
logging.error(f"Invalid time format: {expiration}. Error: {e}")
return None

try:
response = self.s3.generate_presigned_url('get_object',
Params={'Bucket': bucket, 'Key': key},
ExpiresIn=timeout)
response = self.s3.generate_presigned_url(
'get_object',
Params={'Bucket': bucket, 'Key': key},
ExpiresIn=expiration
)
return response
except ClientError as e:
logging.error(f"Failed to generate presigned URL for {s3_uri}: {e}")
Expand Down

0 comments on commit 1381b9a

Please sign in to comment.