Skip to content

Commit

Permalink
fix(listing): pick actually the smallest one to update
Browse files Browse the repository at this point in the history
  • Loading branch information
shcheklein committed Dec 21, 2024
1 parent 8391d4e commit 4f66965
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 3 deletions.
6 changes: 5 additions & 1 deletion src/datachain/lib/dc.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,11 @@ def parse_uri(
if listings:
if update:
# choosing the smallest possible one to minimize update time
listing = sorted(listings, key=lambda ls: len(ls.name))[0]
listing = sorted(listings, key=lambda ls: len(ls.name), reverse=True)[0]
if listing.name != ds_name:
# better to create a separate new listing rather then updating
# potentially a very big one that we don't need now
listing = None
else:
# no need to update, choosing the most recent one
listing = sorted(listings, key=lambda ls: ls.created_at)[-1]
Expand Down
8 changes: 6 additions & 2 deletions tests/func/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,13 +840,17 @@ def test_listing_stats(cloud_test_catalog):

catalog.enlist_source(f"{src_uri}/dogs/", update=True)
stats = listing_stats(src_uri, catalog)
assert stats.num_objects == 7
assert stats.size == 36

stats = listing_stats(f"{src_uri}/dogs/", catalog)
assert stats.num_objects == 4
assert stats.size == 15

catalog.enlist_source(f"{src_uri}/dogs/")
stats = listing_stats(src_uri, catalog)
assert stats.num_objects == 4
assert stats.size == 15
assert stats.num_objects == 7
assert stats.size == 36


@pytest.mark.parametrize("cloud_type", ["s3", "azure", "gs"], indirect=True)
Expand Down

0 comments on commit 4f66965

Please sign in to comment.