Skip to content

Commit

Permalink
Tests: Add full-roundtrip/integration test for the "snapshot" strategy
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Jul 9, 2023
1 parent 9338700 commit 25d2dcf
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 9 deletions.
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,19 @@ or on a supported object storage backend. CrateDB is able to use buckets on S3-c
storage backends, or on Azure blob storage, using the `CREATE REPOSITORY ... TYPE =
s3|azure|fs` SQL statement.

```sql
CREATE REPOSITORY
export_cold
TYPE
s3
WITH (
protocol = 'https',
endpoint = 's3-store.example.org:443',
access_key = '<USERNAME>',
secret_key = '<PASSWORD>',
bucket = 'cratedb-cold-storage'
);
```
```shell
cratedb-retention create-policy --strategy=snapshot \
--table-schema=doc --table-name=sensor_readings \
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ test = [
"pytest<8",
"pytest-cov<5",
"pytest-mock<4",
"testcontainers",
"testcontainers<4",
"testcontainers-minio==0.0.1rc1",
]
[project.urls]
changelog = "https://github.com/crate-workbench/cratedb-retention/blob/main/CHANGES.rst"
Expand Down
10 changes: 10 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from cratedb_retention.util.common import setup_logging
from cratedb_retention.util.database import DatabaseAdapter, run_sql
from tests.testcontainers.cratedb import CrateDBContainer
from tests.testcontainers.minio import ExtendedMinioContainer

# Use different schemas both for storing the retention policy table, and
# the test data, so that they do not accidentally touch the default `doc`
Expand Down Expand Up @@ -67,6 +68,15 @@ def cratedb():
db.finalize()


@pytest.fixture(scope="session")
def minio():
"""
Provide a MinIO service to the test suite.
"""
with ExtendedMinioContainer() as minio:
yield minio


@pytest.fixture()
def database(cratedb, settings):
"""
Expand Down
55 changes: 47 additions & 8 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Distributed under the terms of the AGPLv3 license, see LICENSE.
import pytest
from click.testing import CliRunner
from sqlalchemy.exc import OperationalError
from sqlalchemy.exc import OperationalError, ProgrammingError

from cratedb_retention.cli import cli
from tests.conftest import TESTDRIVE_DATA_SCHEMA
Expand Down Expand Up @@ -260,22 +260,61 @@ def test_run_reallocate(store, database, raw_metrics, raw_metrics_reallocate_pol
assert database.count_records(raw_metrics) == 6


def test_run_snapshot(caplog, store, database, sensor_readings, sensor_readings_snapshot_policy):
def test_run_snapshot(caplog, store, database, sensor_readings, sensor_readings_snapshot_policy, minio):
"""
CLI test: Invoke `cratedb-retention run --strategy=snapshot`.
"""

database_url = store.database.dburi
runner = CliRunner()
# Acquire runtime information from MinIO container. In order to let CrateDB talk to
# MinIO, we need its Docker-internal IP address (172.17.0.x), not the exposed one.
s3_config = minio.get_config()
s3_endpoint = minio.get_real_host_address()

# Prepare a bucket in the S3 storage.
minio.get_client().make_bucket("cratedb-cold-storage")

# TODO: DROP REPOSITORY IF EXISTS
try:
sql = "DROP REPOSITORY export_cold"
database.run_sql(sql)
except ProgrammingError as ex:
if "RepositoryUnknownException" not in str(ex):
raise

# TODO: CREATE REPOSITORY IF NOT EXISTS
sql = f"""
CREATE REPOSITORY
export_cold
TYPE
s3
WITH (
protocol = 'http',
endpoint = '{s3_endpoint}',
access_key = '{s3_config["access_key"]}',
secret_key = '{s3_config["secret_key"]}',
bucket = 'cratedb-cold-storage'
);
"""
database.run_sql(sql)

# Check number of records in database.
assert database.count_records(sensor_readings) == 9

# Invoke data retention through CLI interface.
# FIXME: This currently can not be tested, because it needs a snapshot repository.
# TODO: Provide an embedded MinIO S3 instance to the test suite.
database_url = store.database.dburi
runner = CliRunner()
runner.invoke(
cli,
args=f'run --cutoff-day=2024-12-31 --strategy=snapshot "{database_url}"',
catch_exceptions=False,
)

assert "Data retention SQL statement failed" in caplog.text
assert "RepositoryUnknownException[Repository 'export_cold' unknown]" in caplog.text
# Check number of records in database.
assert database.count_records(sensor_readings) == 0

# Verify that the S3 bucket has been populated correctly, and that the snapshot has the right shape.
object_names = minio.list_object_names(bucket_name="cratedb-cold-storage")
assert "index-1" in object_names
assert "index.latest" in object_names
assert "indices/" in object_names
assert len(object_names) == 7
51 changes: 51 additions & 0 deletions tests/testcontainers/minio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

from testcontainers.minio import MinioContainer


class ExtendedMinioContainer(MinioContainer):
"""
A Testcontainer for MinIO with two properties.
- Use the `latest` OCI image.
- Provide convenience methods for getting the Docker-internal endpoint address.
"""

def __init__(self, *args, **kwargs):
# Use most recent stable release of MinIO.
image = "quay.io/minio/minio:latest"
kwargs.setdefault("image", image)

super().__init__(*args, **kwargs)

def get_real_host_ip(self):
"""
To let containers talk to each other, explicitly provide the real IP address
of the container. In corresponding jargon, it appears to be the "bridge IP".
"""
return self.get_docker_client().bridge_ip(self._container.id)

def get_real_host_address(self):
"""
Provide Docker-internal full endpoint address `<host>:<port>` of the service.
For example, `172.17.0.4:9000`.
"""
return f"{self.get_real_host_ip()}:{self.port_to_expose}"

def list_object_names(self, bucket_name: str):
"""
Return list of object names within given bucket.
"""
objects = self.get_client().list_objects(bucket_name=bucket_name)
return [obj.object_name for obj in objects]

0 comments on commit 25d2dcf

Please sign in to comment.