Skip to content

Commit

Permalink
Add "retry_on_existing_dir" backup flag (#95)
Browse files Browse the repository at this point in the history
  • Loading branch information
myrrc authored Dec 6, 2023
1 parent 1e4b4aa commit b13db95
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 11 deletions.
45 changes: 34 additions & 11 deletions ch_backup/logic/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Clickhouse backup logic for tables
"""
import os
import shutil
from collections import deque
from dataclasses import dataclass
from functools import partial
Expand Down Expand Up @@ -322,20 +323,42 @@ def _backup_table(
)
return

# Sometimes while ALTER FREEZing a table we may get an error that a directory for
# part already exists. This clearly is a clickhouse error but to mitigate it on backup
# side we can just clear this directory as it doesn't contain actual data, just hardlinks
retry_on_existing_dir: int = context.config.get("retry_on_existing_dir", 0)
dir_exists_prefix: str = "Code: 84. DB::Exception: Directory "

# Freeze only MergeTree tables
if not schema_only and is_merge_tree(table.engine):
try:
context.ch_ctl.freeze_table(backup_name, table)
except ClickhouseError:
if context.ch_ctl.does_table_exist(table.database, table.name):
raise
for i in range(retry_on_existing_dir + 1):
try:
context.ch_ctl.freeze_table(backup_name, table)
break
except ClickhouseError as e:
if not context.ch_ctl.does_table_exist(table.database, table.name):
logging.warning(
'Table "{}"."{}" was removed by a user during backup',
table.database,
table.name,
)
return

if retry_on_existing_dir > 0 and i == retry_on_existing_dir:
raise ClickhouseError(
f"All {retry_on_existing_dir} retries failed"
) from e
if retry_on_existing_dir > 0 and str(e).startswith(
dir_exists_prefix
):
existing_dir = str(e)[len(dir_exists_prefix) :].split(
" ", maxsplit=1
)[0]
shutil.rmtree(existing_dir)
logging.debug(f"Removed existing dir {existing_dir}, retrying")
continue

logging.warning(
'Table "{}"."{}" was removed by a user during backup',
table.database,
table.name,
)
return
raise e

# Check if table metadata was updated
new_mtime = self._get_mtime(table.metadata_path)
Expand Down
40 changes: 40 additions & 0 deletions tests/integration/features/single_table.feature
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,43 @@ Feature: Backup of single database table
tables:
- test_db_01.test_table_01
"""

Scenario: Create backup with existing paths in backup directory
Given clickhouse01 has test clickhouse data test1
When we execute query on clickhouse01
"""
ALTER TABLE test_db_01.test_table_01 FREEZE
"""
When we try to execute command on clickhouse01
"""
find /var/lib/clickhouse/shadow/1/store/*/*/* -print | head -n 1 > part_dir
"""
When we execute query on clickhouse01
"""
# Strange but you can't UNFREEZE without specifying name
ALTER TABLE test_db_01.test_table_01 UNFREEZE with name '1'
"""
When we try to execute command on clickhouse01
"""
part_dir=$(cat part_dir)
mkdir -p $part_dir
echo "test" > $part_dir/doo
chown -R clickhouse:clickhouse $part_dir
"""
When we can't create clickhouse01 clickhouse backup with exception
"""
tables:
- test_db_01.test_table_01
name: '1'
"""
When we update ch-backup configuration on clickhouse01
"""
backup:
retry_on_existing_dir: 1
"""
When we create clickhouse01 clickhouse backup
"""
tables:
- test_db_01.test_table_01
name: '1'
"""
9 changes: 9 additions & 0 deletions tests/integration/steps/ch_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""
import json

import pytest
from behave import given, then, when
from hamcrest import (
any_of,
Expand All @@ -18,6 +19,7 @@


@given("ch-backup configuration on {node:w}")
@when("we update ch-backup configuration on {node:w}")
def step_update_ch_backup_config(context, node):
conf = get_step_data(context)
BackupManager(context, node).update_config(conf)
Expand Down Expand Up @@ -45,6 +47,13 @@ def step_cannot_create_backup(context, node):
)


@when("we can't create {node:w} clickhouse backup with exception")
def step_cannot_create_backup_exc(context, node):
options = get_step_data(context)
with pytest.raises(Exception):
BackupManager(context, node).backup(**options)


@given("metadata of {node:w} backup #{backup_id:d} was adjusted with")
@when("metadata of {node:w} backup #{backup_id:d} was adjusted with")
@given('metadata of {node:w} backup "{backup_id}" was adjusted with')
Expand Down

0 comments on commit b13db95

Please sign in to comment.