Skip to content

Commit

Permalink
STAR-843: Update dtests for ULID based generation ID
Browse files Browse the repository at this point in the history
(cherry picked from commit fd2ebac)
(cherry picked from commit b85b0f0)
(cherry picked from commit 2e1b38a)
  • Loading branch information
jacek-lewandowski committed Oct 18, 2022
1 parent c71c480 commit 31b2994
Showing 1 changed file with 25 additions and 30 deletions.
55 changes: 25 additions & 30 deletions scrub_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,23 +171,25 @@ def standalonescrub(self, table, *indexes, acceptable_errors=None):
self.launch_standalone_scrub(KEYSPACE, '{}.{}'.format(table, index))
return self.get_sstables(table, indexes)

def increment_generation_by(self, sstable, generation_increment):
def get_latest_generation(self, sstables):
"""
Set the generation number for an sstable file name
Get the latest generation ID of the provided sstables
"""
return re.sub('(\d(?!\d))\-', lambda x: str(int(x.group(1)) + generation_increment) + '-', sstable)
latest_gen = None
for table_or_index, table_sstables in list(sstables.items()):
gen = max(parse.search('{}-{generation}-{}.{}', s).named['generation'] for s in table_sstables)
latest_gen = gen if latest_gen is None else max([gen, latest_gen])
return latest_gen

def increase_sstable_generations(self, sstables):
def get_earliest_generation(self, sstables):
"""
After finding the number of existing sstables, increase all of the
generations by that amount.
Get the earliest generation ID of the provided sstables
"""
earliest_gen = None
for table_or_index, table_sstables in list(sstables.items()):
increment_by = len(set(parse.search('{}-{increment_by}-{suffix}.{file_extention}', s).named['increment_by'] for s in table_sstables))
sstables[table_or_index] = [self.increment_generation_by(s, increment_by) for s in table_sstables]

logger.debug('sstables after increment {}'.format(str(sstables)))

gen = min(parse.search('{}-{generation}-{}.{}', s).named['generation'] for s in table_sstables)
earliest_gen = gen if earliest_gen is None else min([gen, earliest_gen])
return earliest_gen

@since('2.2')
class TestScrubIndexes(TestHelper):
Expand Down Expand Up @@ -240,16 +242,15 @@ def test_scrub_static_table(self):
initial_sstables = self.flush('users', 'gender_idx', 'state_idx', 'birth_year_idx')
scrubbed_sstables = self.scrub('users', 'gender_idx', 'state_idx', 'birth_year_idx')

self.increase_sstable_generations(initial_sstables)
assert initial_sstables == scrubbed_sstables
assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables)

users = self.query_users(session)
assert initial_users == users

# Scrub and check sstables and data again
initial_sstables = scrubbed_sstables
scrubbed_sstables = self.scrub('users', 'gender_idx', 'state_idx', 'birth_year_idx')
self.increase_sstable_generations(initial_sstables)
assert initial_sstables == scrubbed_sstables
assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables)

users = self.query_users(session)
assert initial_users == users
Expand Down Expand Up @@ -281,8 +282,7 @@ def test_standalone_scrub(self):
cluster.stop()

scrubbed_sstables = self.standalonescrub('users', 'gender_idx', 'state_idx', 'birth_year_idx')
self.increase_sstable_generations(initial_sstables)
assert initial_sstables == scrubbed_sstables
assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables)

cluster.start()
session = self.patient_cql_connection(node1)
Expand Down Expand Up @@ -315,16 +315,14 @@ def test_scrub_collections_table(self):
initial_sstables = self.flush('users', 'user_uuids_idx')
scrubbed_sstables = self.scrub('users', 'user_uuids_idx')

self.increase_sstable_generations(initial_sstables)
assert initial_sstables == scrubbed_sstables
assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables)

users = list(session.execute(("SELECT * from users where uuids contains {some_uuid}").format(some_uuid=_id)))
assert initial_users == users

initial_sstables = scrubbed_sstables
scrubbed_sstables = self.scrub('users', 'user_uuids_idx')

self.increase_sstable_generations(initial_sstables)
assert initial_sstables == scrubbed_sstables
assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables)

users = list(session.execute(("SELECT * from users where uuids contains {some_uuid}").format(some_uuid=_id)))

Expand Down Expand Up @@ -377,16 +375,15 @@ def test_nodetool_scrub(self):
initial_sstables = self.flush('users')
scrubbed_sstables = self.scrub('users')

self.increase_sstable_generations(initial_sstables)
assert initial_sstables == scrubbed_sstables
assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables)

users = self.query_users(session)
assert initial_users == users

# Scrub and check sstables and data again
initial_sstables = scrubbed_sstables
scrubbed_sstables = self.scrub('users')
self.increase_sstable_generations(initial_sstables)
assert initial_sstables == scrubbed_sstables
assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables)

users = self.query_users(session)
assert initial_users == users
Expand Down Expand Up @@ -418,8 +415,7 @@ def test_standalone_scrub(self):
cluster.stop()

scrubbed_sstables = self.standalonescrub('users')
self.increase_sstable_generations(initial_sstables)
assert initial_sstables == scrubbed_sstables
assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables)

cluster.start()
session = self.patient_cql_connection(node1)
Expand Down Expand Up @@ -447,8 +443,7 @@ def test_standalone_scrub_essential_files_only(self):
self.delete_non_essential_sstable_files('users')

scrubbed_sstables = self.standalonescrub(table='users', acceptable_errors=["WARN.*Could not recreate or deserialize existing bloom filter, continuing with a pass-through bloom filter but this will significantly impact reads performance"])
self.increase_sstable_generations(initial_sstables)
assert initial_sstables == scrubbed_sstables
assert self.get_latest_generation(initial_sstables) < self.get_earliest_generation(scrubbed_sstables)

cluster.start()
session = self.patient_cql_connection(node1)
Expand Down

0 comments on commit 31b2994

Please sign in to comment.