Skip to content

Commit

Permalink
cfr: Add support to export systables to a tarfile
Browse files Browse the repository at this point in the history
  • Loading branch information
seut committed May 31, 2024
1 parent 68d179b commit ecea4c9
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 9 deletions.
4 changes: 2 additions & 2 deletions cratedb_toolkit/cfr/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ def cli(ctx: click.Context, cratedb_sqlalchemy_url: str, verbose: bool, debug: b
def sys_export(ctx: click.Context, target: str):
cratedb_sqlalchemy_url = ctx.meta["cratedb_sqlalchemy_url"]
try:
stc = SystemTableExporter(dburi=cratedb_sqlalchemy_url, target=path_from_url(target))
path = stc.save()
stc = SystemTableExporter(dburi=cratedb_sqlalchemy_url)
path = stc.save(path_from_url(target))

Check warning on line 50 in cratedb_toolkit/cfr/cli.py

View check run for this annotation

Codecov / codecov/patch

cratedb_toolkit/cfr/cli.py#L49-L50

Added lines #L49 - L50 were not covered by tests
jd({"path": str(path)})
except Exception as ex:
error_logger(ctx)(ex)
Expand Down
35 changes: 30 additions & 5 deletions cratedb_toolkit/cfr/systable.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@

import datetime as dt
import logging
import os
import tarfile
import tempfile
import typing as t
from pathlib import Path

Expand Down Expand Up @@ -92,14 +95,12 @@ class SystemTableExporter:
Export schema and data from CrateDB system tables.
"""

def __init__(self, dburi: str, target: t.Union[Path], data_format: DataFormat = "jsonl"):
def __init__(self, dburi: str, data_format: DataFormat = "jsonl"):
self.dburi = dburi
self.target = target
self.data_format = data_format
self.adapter = DatabaseAdapter(dburi=self.dburi)
self.info = InfoContainer(adapter=self.adapter)
self.inspector = SystemTableInspector(dburi=self.dburi)
self.target.mkdir(exist_ok=True, parents=True)

def read_table(self, tablename: str) -> pl.DataFrame:
sql = f'SELECT * FROM "{SystemTableKnowledge.SYS_SCHEMA}"."{tablename}"' # noqa: S608
Expand All @@ -122,9 +123,27 @@ def dump_table(self, frame: pl.DataFrame, file: t.Union[t.TextIO, None] = None):
else:
raise NotImplementedError(f"Output format not implemented: {self.data_format}")

def save(self) -> Path:
def save(self, target: t.Union[Path]) -> Path:
temp_dir = None
if target.name.endswith(".tgz") or target.name.endswith(".tar.gz"):
temp_dir = tempfile.TemporaryDirectory()
target_folder = Path(temp_dir.name)

Check warning on line 130 in cratedb_toolkit/cfr/systable.py

View check run for this annotation

Codecov / codecov/patch

cratedb_toolkit/cfr/systable.py#L127-L130

Added lines #L127 - L130 were not covered by tests
else:
target_folder = target
target.mkdir(exist_ok=True, parents=True)

Check warning on line 133 in cratedb_toolkit/cfr/systable.py

View check run for this annotation

Codecov / codecov/patch

cratedb_toolkit/cfr/systable.py#L132-L133

Added lines #L132 - L133 were not covered by tests

full_path = self.export(target_folder)

Check warning on line 135 in cratedb_toolkit/cfr/systable.py

View check run for this annotation

Codecov / codecov/patch

cratedb_toolkit/cfr/systable.py#L135

Added line #L135 was not covered by tests

if temp_dir is not None:
self.make_tarfile(target_folder, target)
temp_dir.cleanup()
logger.info(f"Created archive file {target}")
return target
return full_path

Check warning on line 142 in cratedb_toolkit/cfr/systable.py

View check run for this annotation

Codecov / codecov/patch

cratedb_toolkit/cfr/systable.py#L137-L142

Added lines #L137 - L142 were not covered by tests

def export(self, target_folder: Path) -> Path:
timestamp = dt.datetime.now().strftime("%Y-%m-%dT%H-%M-%S")
path = self.target / self.info.cluster_name / timestamp / "sys"
path = target_folder / self.info.cluster_name / timestamp / "sys"

Check warning on line 146 in cratedb_toolkit/cfr/systable.py

View check run for this annotation

Codecov / codecov/patch

cratedb_toolkit/cfr/systable.py#L146

Added line #L146 was not covered by tests
logger.info(f"Exporting system tables to: {path}")
system_tables = self.inspector.table_names()
path_schema = path / ExportSettings.SCHEMA_PATH
Expand Down Expand Up @@ -166,6 +185,12 @@ def save(self) -> Path:
logger.info(f"Successfully exported {table_count} system tables")
return path

@staticmethod
def make_tarfile(source_folder: Path, target_file_path: Path) -> Path:
with tarfile.open(target_file_path, "x:gz") as tar:
tar.add(source_folder.absolute(), arcname=os.path.basename(source_folder))
return target_file_path

Check warning on line 192 in cratedb_toolkit/cfr/systable.py

View check run for this annotation

Codecov / codecov/patch

cratedb_toolkit/cfr/systable.py#L190-L192

Added lines #L190 - L192 were not covered by tests


class SystemTableImporter:
"""
Expand Down
44 changes: 42 additions & 2 deletions tests/cfr/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import json
import os.path
import re
import shutil
import sys
import tarfile

import tests

Expand Down Expand Up @@ -43,10 +45,48 @@ def test_cfr_cli_export(cratedb, tmp_path, caplog):
assert filenames(path) == ["data", "schema"]

schema_files = filenames(path / "schema")
data_files = filenames(path / "schema")
data_files = filenames(path / "data")

assert len(schema_files) >= 19
assert len(data_files) >= 19
assert len(data_files) >= 10


def test_cfr_cli_export_to_archive_file(cratedb, tmp_path, caplog):
"""
Verify `ctk cfr sys-export some-file.tgz` works.
"""

target = os.path.join(tmp_path, "cluster-data.tgz")

# Invoke command.
runner = CliRunner(env={"CRATEDB_SQLALCHEMY_URL": cratedb.database.dburi, "CFR_TARGET": str(tmp_path)})
result = runner.invoke(
cli,
args=f"--debug sys-export {target}",
catch_exceptions=False,
)
assert result.exit_code == 0

# Verify log output.
assert "Exporting system tables to" in caplog.text
assert re.search(r"Successfully exported \d+ system tables", caplog.text), "Log message missing"

# Verify outcome.
path = Path(json.loads(result.output)["path"])
assert "cluster-data.tgz" in path.name

data_files = []
schema_files = []
with tarfile.open(path, "r") as tar:
name_list = tar.getnames()
for name in name_list:
if "data" in name:
data_files.append(name)
elif "schema" in name:
schema_files.append(name)

assert len(schema_files) >= 19
assert len(data_files) >= 10


def test_cfr_cli_import(cratedb, tmp_path, caplog):
Expand Down

0 comments on commit ecea4c9

Please sign in to comment.