Skip to content

Commit cac700e

Browse files
authored
Merge pull request #274 from MerginMaps/changes_limits
Upload changes limits
2 parents 3d00631 + 7440f3f commit cac700e

File tree

9 files changed

+196
-28
lines changed

9 files changed

+196
-28
lines changed

.github/workflows/autotests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919

2020
- uses: actions/setup-python@v2
2121
with:
22-
python-version: '3.x'
22+
python-version: '3.8'
2323

2424
- name: Install python package dependencies
2525
run: |

mergin/client.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1554,6 +1554,8 @@ def sync_project_generator(self, project_directory):
15541554

15551555
def sync_project(self, project_directory):
15561556
"""
1557+
Syncs project by pulling server changes and pushing local changes. There is intorduced retry mechanism
1558+
for handling server conflicts (when server has changes that we do not have yet or somebody else is syncing).
15571559
See description of _sync_project_generator().
15581560
15591561
:param project_directory: Project's directory

mergin/client_push.py

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,16 @@
2222
import time
2323
from typing import List, Tuple, Optional, ByteString
2424

25-
from .local_changes import FileChange, LocalProjectChanges
26-
27-
from .common import UPLOAD_CHUNK_ATTEMPT_WAIT, UPLOAD_CHUNK_ATTEMPTS, UPLOAD_CHUNK_SIZE, ClientError, ErrorCode
25+
from .local_changes import ChangesValidationError, FileChange, LocalProjectChanges
26+
27+
from .common import (
28+
MAX_UPLOAD_VERSIONED_SIZE,
29+
UPLOAD_CHUNK_ATTEMPT_WAIT,
30+
UPLOAD_CHUNK_ATTEMPTS,
31+
UPLOAD_CHUNK_SIZE,
32+
MAX_UPLOAD_MEDIA_SIZE,
33+
ClientError,
34+
)
2835
from .merginproject import MerginProject
2936
from .editor import filter_changes
3037
from .utils import get_data_checksum
@@ -176,7 +183,7 @@ def start(self, items: List[UploadQueueItem]):
176183

177184
def update_chunks_from_items(self):
178185
"""
179-
Update chunks in LocalChanges from the upload queue items.
186+
Update chunks in LocalProjectChanges from the upload queue items.
180187
Used just before finalizing the transaction to set the server_chunk_id in v2 API.
181188
"""
182189
self.changes.update_chunk_ids([(item.chunk_id, item.server_chunk_id) for item in self.upload_queue_items])
@@ -301,28 +308,23 @@ def push_project_async(mc, directory) -> Optional[UploadJob]:
301308
mp.log.info(f"--- push {project_path} - nothing to do")
302309
return
303310

304-
mp.log.debug("push changes:\n" + pprint.pformat(changes))
311+
mp.log.debug("push changes:\n" + pprint.pformat(asdict(changes)))
305312
tmp_dir = tempfile.TemporaryDirectory(prefix="python-api-client-")
306313

307314
# If there are any versioned files (aka .gpkg) that are not updated through a diff,
308315
# we need to make a temporary copy somewhere to be sure that we are uploading full content.
309316
# That's because if there are pending transactions, checkpointing or switching from WAL mode
310317
# won't work, and we would end up with some changes left in -wal file which do not get
311318
# uploaded. The temporary copy using geodiff uses sqlite backup API and should copy everything.
312-
for f in changes["updated"]:
313-
if mp.is_versioned_file(f["path"]) and "diff" not in f:
319+
for f in changes.updated:
320+
if mp.is_versioned_file(f.path) and not f.diff:
314321
mp.copy_versioned_file_for_upload(f, tmp_dir.name)
315322

316-
for f in changes["added"]:
317-
if mp.is_versioned_file(f["path"]):
323+
for f in changes.added:
324+
if mp.is_versioned_file(f.path):
318325
mp.copy_versioned_file_for_upload(f, tmp_dir.name)
319326

320-
local_changes = LocalProjectChanges(
321-
added=[FileChange(**change) for change in changes["added"]],
322-
updated=[FileChange(**change) for change in changes["updated"]],
323-
removed=[FileChange(**change) for change in changes["removed"]],
324-
)
325-
job = create_upload_job(mc, mp, local_changes, tmp_dir)
327+
job = create_upload_job(mc, mp, changes, tmp_dir)
326328
return job
327329

328330

@@ -477,12 +479,22 @@ def remove_diff_files(job: UploadJob) -> None:
477479
os.remove(diff_file)
478480

479481

480-
def get_push_changes_batch(mc, mp: MerginProject) -> Tuple[dict, int]:
482+
def get_push_changes_batch(mc, mp: MerginProject) -> Tuple[LocalProjectChanges, int]:
481483
"""
482484
Get changes that need to be pushed to the server.
483485
"""
484486
changes = mp.get_push_changes()
485487
project_role = mp.project_role()
486488
changes = filter_changes(mc, project_role, changes)
487489

488-
return changes, sum(len(v) for v in changes.values())
490+
try:
491+
local_changes = LocalProjectChanges(
492+
added=[FileChange(**change) for change in changes["added"]],
493+
updated=[FileChange(**change) for change in changes["updated"]],
494+
removed=[FileChange(**change) for change in changes["removed"]],
495+
)
496+
except ChangesValidationError as e:
497+
raise ClientError(
498+
f"Some files exceeded maximum upload size. Files: {', '.join([c.path for c in e.invalid_changes])}. Maximum size for media files is {MAX_UPLOAD_MEDIA_SIZE / (1024**3)} GB and for geopackage files {MAX_UPLOAD_VERSIONED_SIZE / (1024**3)} GB."
499+
)
500+
return local_changes, sum(len(v) for v in changes.values())

mergin/common.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@
2424
# seconds to wait between sync callback calls
2525
SYNC_CALLBACK_WAIT = 0.01
2626

27+
# maximum size of media file able to upload in one push (in bytes)
28+
MAX_UPLOAD_MEDIA_SIZE = 10 * (1024**3)
29+
30+
# maximum size of GPKG file able to upload in one push (in bytes)
31+
MAX_UPLOAD_VERSIONED_SIZE = 5 * (1024**3)
32+
2733
# default URL for submitting logs
2834
MERGIN_DEFAULT_LOGS_URL = "https://g4pfq226j0.execute-api.eu-west-1.amazonaws.com/mergin_client_log_submit"
2935

mergin/editor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from itertools import filterfalse
22
from typing import Callable, Dict, List
33

4-
from .utils import is_mergin_config, is_qgis_file, is_versioned_file
4+
from .utils import is_qgis_file
55

66
EDITOR_ROLE_NAME = "editor"
77

mergin/local_changes.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,18 @@
22
from datetime import datetime
33
from typing import Optional, List, Tuple
44

5+
from .utils import is_versioned_file
6+
from .common import MAX_UPLOAD_MEDIA_SIZE, MAX_UPLOAD_VERSIONED_SIZE
7+
8+
MAX_UPLOAD_CHANGES = 100
9+
10+
11+
# The custom exception
12+
class ChangesValidationError(Exception):
13+
def __init__(self, message, invalid_changes=[]):
14+
super().__init__(message)
15+
self.invalid_changes = invalid_changes if invalid_changes is not None else []
16+
517

618
@dataclass
719
class FileDiffChange:
@@ -68,6 +80,29 @@ class LocalProjectChanges:
6880
updated: List[FileChange] = field(default_factory=list)
6981
removed: List[FileChange] = field(default_factory=list)
7082

83+
def __post_init__(self):
84+
"""
85+
Enforce a limit of changes combined from `added` and `updated`.
86+
"""
87+
upload_changes = self.get_upload_changes()
88+
total_changes = len(upload_changes)
89+
oversize_changes = []
90+
for change in upload_changes:
91+
if not is_versioned_file(change.path) and change.size > MAX_UPLOAD_MEDIA_SIZE:
92+
oversize_changes.append(change)
93+
elif not change.diff and change.size > MAX_UPLOAD_VERSIONED_SIZE:
94+
oversize_changes.append(change)
95+
if oversize_changes:
96+
error = ChangesValidationError("Some files exceed the maximum upload size", oversize_changes)
97+
raise error
98+
99+
if total_changes > MAX_UPLOAD_CHANGES:
100+
# Calculate how many changes to keep from `added` and `updated`
101+
added_limit = min(len(self.added), MAX_UPLOAD_CHANGES)
102+
updated_limit = MAX_UPLOAD_CHANGES - added_limit
103+
self.added = self.added[:added_limit]
104+
self.updated = self.updated[:updated_limit]
105+
71106
def to_server_payload(self) -> dict:
72107
return {
73108
"added": [change.to_server_data() for change in self.added],

mergin/merginproject.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
conflicted_copy_file_name,
2222
edit_conflict_file_name,
2323
)
24-
24+
from .local_changes import FileChange
2525

2626
this_dir = os.path.dirname(os.path.realpath(__file__))
2727

@@ -470,20 +470,20 @@ def get_push_changes(self):
470470
changes["updated"] = [f for f in changes["updated"] if f not in not_updated]
471471
return changes
472472

473-
def copy_versioned_file_for_upload(self, f, tmp_dir):
473+
def copy_versioned_file_for_upload(self, f: FileChange, tmp_dir: str) -> str:
474474
"""
475475
Make a temporary copy of the versioned file using geodiff, to make sure that we have full
476476
content in a single file (nothing left in WAL journal)
477477
"""
478-
path = f["path"]
478+
path = f.path
479479
self.log.info("Making a temporary copy (full upload): " + path)
480480
tmp_file = os.path.join(tmp_dir, path)
481481
os.makedirs(os.path.dirname(tmp_file), exist_ok=True)
482482
self.geodiff.make_copy_sqlite(self.fpath(path), tmp_file)
483-
f["size"] = os.path.getsize(tmp_file)
484-
f["checksum"] = generate_checksum(tmp_file)
485-
f["chunks"] = [str(uuid.uuid4()) for i in range(math.ceil(f["size"] / UPLOAD_CHUNK_SIZE))]
486-
f["upload_file"] = tmp_file
483+
f.size = os.path.getsize(tmp_file)
484+
f.checksum = generate_checksum(tmp_file)
485+
f.chunks = [str(uuid.uuid4()) for i in range(math.ceil(f.size / UPLOAD_CHUNK_SIZE))]
486+
f.upload_file = tmp_file
487487
return tmp_file
488488

489489
def get_list_of_push_changes(self, push_changes):

mergin/test/test_client.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2328,8 +2328,10 @@ def test_clean_diff_files(mc):
23282328
shutil.copy(mp.fpath("inserted_1_A.gpkg"), mp.fpath(f_updated))
23292329
mc.push_project(project_dir)
23302330

2331-
diff_files = glob.glob("*-diff-*", root_dir=os.path.split(mp.fpath_meta("inserted_1_A.gpkg"))[0])
2331+
directory = os.path.split(mp.fpath_meta("inserted_1_A.gpkg"))[0]
2332+
diff_files = [f for f in os.listdir(directory) if "-diff-" in f]
23322333

2334+
# Assert that no matching files are found
23332335
assert diff_files == []
23342336

23352337

@@ -3214,3 +3216,22 @@ def test_client_project_sync_retry(mc):
32143216
with pytest.raises(ClientError):
32153217
mc.sync_project(project_dir)
32163218
assert mock_push_project_async.call_count == 2
3219+
3220+
3221+
def test_push_file_limits(mc):
3222+
test_project = "test_push_file_limits"
3223+
project = API_USER + "/" + test_project
3224+
project_dir = os.path.join(TMP_DIR, test_project)
3225+
cleanup(mc, project, [project_dir])
3226+
mc.create_project(test_project)
3227+
mc.download_project(project, project_dir)
3228+
shutil.copy(os.path.join(TEST_DATA_DIR, "base.gpkg"), project_dir)
3229+
# setting to some minimal value to mock limit hit
3230+
with patch("mergin.local_changes.MAX_UPLOAD_VERSIONED_SIZE", 1):
3231+
with pytest.raises(ClientError, match=f"Some files exceeded maximum upload size. Files: base.gpkg."):
3232+
mc.push_project(project_dir)
3233+
3234+
shutil.copy(os.path.join(TEST_DATA_DIR, "test.txt"), project_dir)
3235+
with patch("mergin.local_changes.MAX_UPLOAD_MEDIA_SIZE", 1):
3236+
with pytest.raises(ClientError, match=f"Some files exceeded maximum upload size. Files: test.txt."):
3237+
mc.push_project(project_dir)

mergin/test/test_local_changes.py

Lines changed: 93 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from datetime import datetime
2+
import pytest
3+
from unittest.mock import patch
24

3-
from ..local_changes import FileChange, LocalProjectChanges
5+
from ..local_changes import ChangesValidationError, FileChange, LocalProjectChanges, MAX_UPLOAD_CHANGES
46

57

68
def test_local_changes_from_dict():
@@ -118,3 +120,93 @@ def test_local_changes_get_upload_changes():
118120
assert len(upload_changes) == 2 # Only added and updated should be included
119121
assert upload_changes[0].path == "file1.txt" # First change is from added
120122
assert upload_changes[1].path == "file2.txt" # Second change is from updated
123+
124+
125+
def test_local_changes_post_init_validation_media():
126+
"""Test the get_media_upload_file method of LocalProjectChanges."""
127+
# Define constants
128+
SIZE_LIMIT_MB = 5
129+
SIZE_LIMIT_BYTES = SIZE_LIMIT_MB * 1024 * 1024
130+
SMALL_FILE_SIZE = 1024
131+
LARGE_FILE_SIZE = 15 * 1024 * 1024
132+
133+
# Create sample LocalChange instances
134+
added = [
135+
FileChange(path="file1.txt", checksum="abc123", size=SMALL_FILE_SIZE, mtime=datetime.now()),
136+
FileChange(path="file2.jpg", checksum="xyz789", size=LARGE_FILE_SIZE, mtime=datetime.now()), # Over limit
137+
]
138+
updated = [
139+
FileChange(path="file3.mp4", checksum="lmn456", size=5 * 1024 * 1024, mtime=datetime.now()),
140+
FileChange(path="file4.gpkg", checksum="opq123", size=SMALL_FILE_SIZE, mtime=datetime.now()),
141+
]
142+
143+
# Initialize LocalProjectChanges
144+
with patch("mergin.local_changes.MAX_UPLOAD_MEDIA_SIZE", SIZE_LIMIT_BYTES):
145+
with pytest.raises(ChangesValidationError, match="Some files exceed") as err:
146+
LocalProjectChanges(added=added, updated=updated)
147+
print(err.value.invalid_changes)
148+
assert len(err.value.invalid_changes) == 1
149+
assert "file2.jpg" == err.value.invalid_changes[0].path
150+
assert err.value.invalid_changes[0].size == LARGE_FILE_SIZE
151+
152+
153+
def test_local_changes_post_init_validation_gpgkg():
154+
"""Test the get_gpgk_upload_file method of LocalProjectChanges."""
155+
# Define constants
156+
SIZE_LIMIT_MB = 10
157+
SIZE_LIMIT_BYTES = SIZE_LIMIT_MB * 1024 * 1024
158+
SMALL_FILE_SIZE = 1024
159+
LARGE_FILE_SIZE = 15 * 1024 * 1024
160+
161+
# Create sample LocalChange instances
162+
added = [
163+
FileChange(path="file1.gpkg", checksum="abc123", size=SMALL_FILE_SIZE, mtime=datetime.now()),
164+
FileChange(
165+
path="file2.gpkg", checksum="xyz789", size=LARGE_FILE_SIZE, mtime=datetime.now(), diff=None
166+
), # Over limit
167+
]
168+
updated = [
169+
FileChange(
170+
path="file3.gpkg",
171+
checksum="lmn456",
172+
size=SIZE_LIMIT_BYTES + 1,
173+
mtime=datetime.now(),
174+
diff={"path": "file3-diff.gpkg", "checksum": "diff123", "size": 1024, "mtime": datetime.now()},
175+
),
176+
FileChange(path="file4.txt", checksum="opq123", size=SMALL_FILE_SIZE, mtime=datetime.now()),
177+
]
178+
179+
# Initialize LocalProjectChanges
180+
with patch("mergin.local_changes.MAX_UPLOAD_VERSIONED_SIZE", SIZE_LIMIT_BYTES):
181+
with pytest.raises(ChangesValidationError) as err:
182+
LocalProjectChanges(added=added, updated=updated)
183+
assert len(err.value.invalid_changes) == 1
184+
assert "file2.gpkg" == err.value.invalid_changes[0].path
185+
assert err.value.invalid_changes[0].size == LARGE_FILE_SIZE
186+
187+
188+
def test_local_changes_post_init():
189+
"""Test the __post_init__ method of LocalProjectChanges."""
190+
# Define constants
191+
ADDED_COUNT = 80
192+
UPDATED_COUNT = 21
193+
SMALL_FILE_SIZE = 1024
194+
LARGE_FILE_SIZE = 2048
195+
196+
# Create more than MAX_UPLOAD_CHANGES changes
197+
added = [
198+
FileChange(path=f"file{i}.txt", checksum="abc123", size=SMALL_FILE_SIZE, mtime=datetime.now())
199+
for i in range(ADDED_COUNT)
200+
]
201+
updated = [
202+
FileChange(path=f"file{i}.txt", checksum="xyz789", size=LARGE_FILE_SIZE, mtime=datetime.now())
203+
for i in range(UPDATED_COUNT)
204+
]
205+
206+
# Initialize LocalProjectChanges
207+
local_changes = LocalProjectChanges(added=added, updated=updated)
208+
209+
# Assertions
210+
assert len(local_changes.added) == ADDED_COUNT # All added changes are included
211+
assert len(local_changes.updated) == MAX_UPLOAD_CHANGES - ADDED_COUNT # Only enough updated changes are included
212+
assert len(local_changes.added) + len(local_changes.updated) == MAX_UPLOAD_CHANGES # Total is limited

0 commit comments

Comments
 (0)