diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 287e62b..275c487 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -5,7 +5,7 @@ For local development, clone this repository and then run ```sh source ./venv/bin/ ctivate && python setup.py develop --user # OR -source ./venv/bin/activate && python3 -m pip install . && REDIVIS_API_ENDPOINT = https://localhost:8443/api/v1 python3 - W ignore +source ./venv/bin/activate && python3 -m pip install . && REDIVIS_API_ENDPOINT=https://localhost:8443/api/v1 python3 - W ignore ``` You can then run the tests, e.g.: diff --git a/src/redivis/_version.py b/src/redivis/_version.py index fe051f1..137e5fb 100644 --- a/src/redivis/_version.py +++ b/src/redivis/_version.py @@ -1 +1 @@ -__version__ = "0.15.18" +__version__ = "0.15.19" diff --git a/src/redivis/classes/Upload.py b/src/redivis/classes/Upload.py index b080095..a966548 100644 --- a/src/redivis/classes/Upload.py +++ b/src/redivis/classes/Upload.py @@ -15,6 +15,8 @@ from ..common.api_request import make_request, make_paginated_request from ..common.retryable_upload import perform_resumable_upload, perform_standard_upload +MAX_SIMPLE_UPLOAD_SIZE = 1e7 + class Upload(Base): def __init__( @@ -59,8 +61,11 @@ def create( temp_upload_id = None if data and ( - (hasattr(data, "read") and os.stat(data.name).st_size > 1e7) - or (hasattr(data, "__len__") and len(data) > 1e7) + ( + hasattr(data, "read") + and os.stat(data.name).st_size > MAX_SIMPLE_UPLOAD_SIZE + ) + or (hasattr(data, "__len__") and len(data) > MAX_SIMPLE_UPLOAD_SIZE) ): did_reopen_file = False pbar_bytes = None @@ -78,7 +83,7 @@ def create( path=f"{self.table.uri}/tempUploads", payload={ "tempUploads": [ - {"size": size, "name": self.name, "resumable": size > 1e7} + {"size": size, "name": self.name, "resumable": True} ] }, ) diff --git a/src/redivis/common/retryable_upload.py b/src/redivis/common/retryable_upload.py index 586b127..3eae615 100644 --- a/src/redivis/common/retryable_upload.py +++ b/src/redivis/common/retryable_upload.py @@ -6,6 +6,7 @@ import logging from tqdm.utils import CallbackIOWrapper from urllib.parse import quote as quote_uri + from .auth import get_auth_token @@ -120,7 +121,6 @@ def retry_partial_upload(*, retry_count=0, file_size, resumable_url, headers): return file_size elif res.status_code == 308: range_header = res.headers["Range"] if "Range" in res.headers else None - if range_header: match = re.match(r"bytes=0-(\d+)", range_header) if match.group(0) and not math.isnan(int(match.group(1))): @@ -137,7 +137,7 @@ def retry_partial_upload(*, retry_count=0, file_size, resumable_url, headers): raise e time.sleep(retry_count / 10) - retry_partial_upload( + return retry_partial_upload( retry_count=retry_count + 1, file_size=file_size, resumable_url=resumable_url, diff --git a/tests/test_upload.py b/tests/test_upload.py index 7adee3e..8b1539d 100644 --- a/tests/test_upload.py +++ b/tests/test_upload.py @@ -18,6 +18,18 @@ def test_linebreaks_in_cell(): ) +def test_upload_large_file(): + dataset = util.create_test_dataset() + util.clear_test_data() + table = util.get_table().create( + description="Some info", upload_merge_strategy="replace" + ) + file_name = "concept_relationship.csv" + with open(f"tests/data/{file_name}", "rb") as f: + data = f.read() + table.upload(name=file_name).create(data=data, wait_for_finish=True) + + def test_upload_remove_on_failure(): dataset = util.create_test_dataset() util.clear_test_data()