diff --git a/.gitignore b/.gitignore index 2f739edb..34da1317 100644 --- a/.gitignore +++ b/.gitignore @@ -176,3 +176,15 @@ cython_debug/ #.idea/ .vscode + +# pyenv version +.python-version + +# vim backup files +*~ +repo_time_tester.py +reset.sh +seed_data_to_ds.py +docker_test/scripts/license.json +.claude +agent-os diff --git a/README.rst b/README.rst index ca72b6fc..3def93e6 100644 --- a/README.rst +++ b/README.rst @@ -1,5 +1,8 @@ .. _readme: +**THIS FORK OF ELASTIC/CURATOR REPRESENTS A WORK-IN-PROGRESS AND SHOULD NOT BE CONSIDERED "RUNNABLE". IT IS STILL IN DEVELOPMENT.** + +**HERE THERE BE TYGERS.** Curator ======= diff --git a/THAW_TRACKING_FLOW.md b/THAW_TRACKING_FLOW.md new file mode 100644 index 00000000..0e9a2c8c --- /dev/null +++ b/THAW_TRACKING_FLOW.md @@ -0,0 +1,276 @@ +# Thaw Request Tracking Flow Diagram + +## Complete Lifecycle of a Thaw Request + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ USER INITIATES THAW │ +│ curator_cli deepfreeze thaw --start-date 2025-01-01 --end-date 2025-01-31 │ +└────────────────────────────────┬──────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ PHASE 1: THAW INITIALIZATION (thaw.py - do_action) │ +│ │ +│ 1. Generate UUID for request_id │ +│ 2. Find repositories by date range │ +│ 3. For each repository: │ +│ - List S3 objects (list_objects) │ +│ - Call S3 restore_object() with Days=7, Tier=Standard │ +│ │ +│ Storage in Elasticsearch (deepfreeze-status index): │ +│ { │ +│ "_id": "uuid-1234-5678", │ +│ "doctype": "thaw_request", │ +│ "request_id": "uuid-1234-5678", │ +│ "repos": ["deepfreeze-000001", "deepfreeze-000002"], │ +│ "status": "in_progress", │ +│ "created_at": "2025-01-15T10:00:00Z", │ +│ "start_date": "2025-01-01T00:00:00Z", │ +│ "end_date": "2025-01-31T23:59:59Z" │ +│ } │ +└────────────────────────────────┬──────────────────────────────────────────────┘ + │ + ┌────────────┴────────────┐ + │ │ + ▼ ▼ + ┌──────────────────────┐ ┌──────────────────────┐ + │ SYNC MODE │ │ ASYNC MODE │ + │ (--sync flag) │ │ (default) │ + │ │ │ │ + │ Wait for restoration │ │ Return immediately │ + │ and mounting │ │ with request_id │ + └──────────┬───────────┘ └──────────┬───────────┘ + │ │ + └──────────────┬───────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ PHASE 2: S3 GLACIER RESTORE IN PROGRESS │ +│ │ +│ AWS S3 is restoring objects in the background │ +│ - Time depends on retrieval tier (Standard: hours, Expedited: minutes) │ +│ - Objects remain in GLACIER storage class (StorageClass doesn't change) │ +│ - S3 adds "Restore" header to object metadata │ +│ │ +│ Current S3 Restore header state for each object: │ +│ ongoing-request="true" │ +└────────────────────────────────┬──────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ PHASE 3: USER CHECKS STATUS (or automatic check in sync mode) │ +│ │ +│ curator_cli deepfreeze thaw --check-status uuid-1234-5678 │ +│ OR (in sync mode, happens automatically) │ +│ │ +│ Action: thaw.py - do_check_status() │ +│ 1. Retrieve thaw request from ES │ +│ 2. Get repositories from request │ +│ 3. For EACH repository: │ +│ ┌──────────────────────────────────────────────────────┐ │ +│ │ check_restore_status(s3, bucket, base_path) │ │ +│ │ Parallel check using ThreadPoolExecutor (15 workers)│ │ +│ │ For EACH object in base_path: │ │ +│ │ - Call s3.head_object(bucket, key) │ │ +│ │ - Extract Restore header from metadata │ │ +│ │ - Parse ongoing-request value: │ │ +│ │ * "true" → in_progress │ │ +│ │ * "false" → restored (complete) │ │ +│ │ │ │ +│ │ Returns: { │ │ +│ │ "total": 150, │ │ +│ │ "restored": 75, # ongoing-request="false" │ │ +│ │ "in_progress": 50, # ongoing-request="true" │ │ +│ │ "not_restored": 25,# No Restore header │ │ +│ │ "complete": false │ │ +│ │ } │ │ +│ └──────────────────────────────────────────────────────┘ │ +│ │ +│ 4. If status["complete"] == true for all repos: │ +│ a. Mount each repository in Elasticsearch │ +│ b. Update repository date ranges │ +│ c. Find and mount indices within date range │ +│ d. Add indices back to data streams if applicable │ +│ e. update_thaw_request(status="completed") │ +└────────────────────────────────┬──────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ PHASE 4: RESTORATION COMPLETE │ +│ │ +│ Thaw request status updated in ES: │ +│ { │ +│ "_id": "uuid-1234-5678", │ +│ "doctype": "thaw_request", │ +│ "request_id": "uuid-1234-5678", │ +│ "repos": ["deepfreeze-000001", "deepfreeze-000002"], │ +│ "status": "completed", ◄─── CHANGED │ +│ "created_at": "2025-01-15T10:00:00Z", │ +│ "start_date": "2025-01-01T00:00:00Z", │ +│ "end_date": "2025-01-31T23:59:59Z" │ +│ } │ +│ │ +│ Repository state in Elasticsearch: │ +│ { │ +│ "name": "deepfreeze-000001", │ +│ "thaw_state": "thawed", │ +│ "is_mounted": true, │ +│ "expires_at": "2025-01-22T10:00:00Z", ◄─── restore_days = 7 days │ +│ "bucket": "my-bucket", │ +│ "base_path": "curator-snapshots" │ +│ } │ +│ │ +│ S3 Restore header state for restored objects: │ +│ ongoing-request="false", expiry-date="Wed, 22 Jan 2025 10:00:00 GMT" │ +└────────────────────────────────┬──────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ PHASE 5: DATA AVAILABLE & IN USE (7 days) │ +│ │ +│ - Indices are mounted and searchable │ +│ - Temporary restored copies in Standard tier are available │ +│ - Will auto-expire on 2025-01-22T10:00:00Z │ +└────────────────────────────────┬──────────────────────────────────────────────┘ + │ + ┌──────────┴──────────┐ + │ │ + ▼ ▼ + ┌──────────────────────┐ ┌──────────────────────┐ + │ USER REFREEZE │ │ AUTOMATIC CLEANUP │ + │ (manual) │ │ (scheduled) │ + │ │ │ │ + │ curator_cli │ │ cleanup.py │ + │ deepfreeze │ │ detects expires_at │ + │ refreeze \ │ │ <= now │ + │ uuid-1234-5678 │ │ │ + └──────────┬───────────┘ └──────────┬───────────┘ + │ │ + └────────────┬────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ PHASE 6: CLEANUP │ +│ │ +│ For each repository in the thaw request: │ +│ 1. Delete all mounted indices from ES │ +│ 2. Unmount the repository from ES │ +│ 3. Delete per-repo thawed ILM policy │ +│ 4. Reset repository state to frozen │ +│ 5. NOTE: S3 objects NOT deleted, they revert to Glacier automatically │ +│ │ +│ Thaw request marked as "refrozen" in ES: │ +│ { │ +│ "_id": "uuid-1234-5678", │ +│ "doctype": "thaw_request", │ +│ "request_id": "uuid-1234-5678", │ +│ "repos": ["deepfreeze-000001", "deepfreeze-000002"], │ +│ "status": "refrozen", ◄─── CHANGED │ +│ "created_at": "2025-01-15T10:00:00Z", │ +│ "start_date": "2025-01-01T00:00:00Z", │ +│ "end_date": "2025-01-31T23:59:59Z" │ +│ } │ +│ │ +│ Repository state reverted: │ +│ { │ +│ "name": "deepfreeze-000001", │ +│ "thaw_state": "frozen", ◄─── CHANGED │ +│ "is_mounted": false, │ +│ "expires_at": null, │ +│ "bucket": "my-bucket", │ +│ "base_path": "curator-snapshots" │ +│ } │ +│ │ +│ S3 state (automatic, no action by Curator): │ +│ - Temporary restored copy expires automatically │ +│ - Objects revert to GLACIER storage class │ +│ - Restore header removed from metadata │ +└────────────────────────────────┬──────────────────────────────────────────────┘ + │ + ▼ + REQUEST LIFECYCLE COMPLETE + (Can be viewed with --include-completed) +``` + +--- + +## Key Technical Details + +### The "Restore" Header from S3 + +This is the ONLY reliable way to track Glacier restore status: + +``` +DURING RESTORATION: +HEAD /object.json +Response headers: + Restore: ongoing-request="true" + StorageClass: GLACIER + +AFTER RESTORATION COMPLETE: +HEAD /object.json +Response headers: + Restore: ongoing-request="false", expiry-date="Wed, 22 Jan 2025 10:00:00 GMT" + StorageClass: GLACIER ◄─── Still GLACIER! + +AFTER EXPIRATION: +HEAD /object.json +Response headers: + (Restore header removed) + StorageClass: GLACIER +``` + +### Why Storage Class Doesn't Change + +- Objects in GLACIER remain in GLACIER storage class even after restoration +- This is by design - AWS tracks restoration separately via the Restore header +- Once the restore expires, objects silently revert to cold storage +- No explicit "refreeze" action needed on S3 side + +### Parallel Status Checking + +The check_restore_status() function uses ThreadPoolExecutor to check multiple objects concurrently: + +```python +with ThreadPoolExecutor(max_workers=min(15, len(glacier_objects))) as executor: + # Submit all head_object checks concurrently + # Collate results as they complete + # Much faster than sequential checking (e.g., 1000 objects in seconds vs minutes) +``` + +--- + +## Status Transitions + +``` + ┌─────────────────────────────────────────┐ + │ INITIAL: in_progress │ + │ (all thaw requests start here) │ + └──────────────────────────────────────────┘ + │ + ┌───────────────┴────────────────┐ + │ │ + ▼ ▼ + ┌─────────────────────┐ ┌─────────────────────┐ + │ Restoration complete│ │ S3 operations fail │ + │ Indices mounted │ │ or timeout │ + └──────────┬──────────┘ └──────────┬──────────┘ + │ │ + ▼ ▼ + ┌─────────────────────┐ ┌─────────────────────┐ + │ SUCCESSFUL: │ │ FAILED: │ + │ completed │ │ failed │ + └──────────┬──────────┘ └─────────────────────┘ + │ + (User calls refreeze OR + cleanup detects expiration) + │ + ▼ + ┌─────────────────────┐ + │ DONE: │ + │ refrozen │ + │ (old requests) │ + └─────────────────────┘ +``` + diff --git a/THAW_TRACKING_IMPLEMENTATION.md b/THAW_TRACKING_IMPLEMENTATION.md new file mode 100644 index 00000000..b01ece4d --- /dev/null +++ b/THAW_TRACKING_IMPLEMENTATION.md @@ -0,0 +1,633 @@ +# Thaw Request Tracking Implementation in Curator Deepfreeze + +## Overview +This document details how thaw requests are currently tracked and managed in the deepfreeze functionality of Elasticsearch Curator, focusing on metadata storage, S3/Glacier API interactions, and status tracking mechanisms. + +--- + +## 1. THAW REQUEST METADATA STORAGE + +### Location: `curator/actions/deepfreeze/utilities.py` + +#### save_thaw_request() - Lines 1040-1091 +Stores a thaw request to the status index for later querying and status checking: + +```python +def save_thaw_request( + client: Elasticsearch, + request_id: str, + repos: list[Repository], + status: str, + start_date: datetime = None, + end_date: datetime = None, +) -> None: + """Save a thaw request to the status index for later querying.""" + loggit = logging.getLogger("curator.actions.deepfreeze") + loggit.debug("Saving thaw request %s", request_id) + + request_doc = { + "doctype": "thaw_request", + "request_id": request_id, + "repos": [repo.name for repo in repos], + "status": status, + "created_at": datetime.now(timezone.utc).isoformat(), + } + + # Add date range if provided + if start_date: + request_doc["start_date"] = start_date.isoformat() + if end_date: + request_doc["end_date"] = end_date.isoformat() + + try: + client.index(index=STATUS_INDEX, id=request_id, body=request_doc) + loggit.info("Thaw request %s saved successfully", request_id) + except Exception as e: + loggit.error("Failed to save thaw request %s: %s", request_id, e) + raise ActionError(f"Failed to save thaw request {request_id}: {e}") +``` + +**Stored Fields:** +- `doctype`: "thaw_request" +- `request_id`: UUID for unique identification +- `repos`: List of repository names being thawed +- `status`: One of: "in_progress", "completed", "failed", "refrozen" +- `created_at`: ISO 8601 timestamp of when request was created +- `start_date`: ISO 8601 start of date range (optional) +- `end_date`: ISO 8601 end of date range (optional) + +#### get_thaw_request() - Lines 1094-1119 +Retrieves a specific thaw request by ID: + +```python +def get_thaw_request(client: Elasticsearch, request_id: str) -> dict: + """Retrieve a thaw request from the status index by ID.""" + loggit = logging.getLogger("curator.actions.deepfreeze") + loggit.debug("Retrieving thaw request %s", request_id) + + try: + response = client.get(index=STATUS_INDEX, id=request_id) + return response["_source"] + except NotFoundError: + loggit.error("Thaw request %s not found", request_id) + raise ActionError(f"Thaw request {request_id} not found") + except Exception as e: + loggit.error("Failed to retrieve thaw request %s: %s", request_id, e) + raise ActionError(f"Failed to retrieve thaw request {request_id}: {e}") +``` + +#### list_thaw_requests() - Lines 1122-1149 +Retrieves all thaw requests: + +```python +def list_thaw_requests(client: Elasticsearch) -> list[dict]: + """List all thaw requests from the status index.""" + loggit = logging.getLogger("curator.actions.deepfreeze") + loggit.debug("Listing all thaw requests") + + query = {"query": {"term": {"doctype": "thaw_request"}}, "size": 10000} + + try: + response = client.search(index=STATUS_INDEX, body=query) + requests = response["hits"]["hits"] + loggit.debug("Found %d thaw requests", len(requests)) + return [{"id": req["_id"], **req["_source"]} for req in requests] + except NotFoundError: + loggit.warning("Status index not found") + return [] + except Exception as e: + loggit.error("Failed to list thaw requests: %s", e) + raise ActionError(f"Failed to list thaw requests: {e}") +``` + +#### update_thaw_request() - Lines 1152-1185 +Updates a thaw request in the status index: + +```python +def update_thaw_request( + client: Elasticsearch, request_id: str, status: str = None, **fields +) -> None: + """Update a thaw request in the status index.""" + loggit = logging.getLogger("curator.actions.deepfreeze") + loggit.debug("Updating thaw request %s", request_id) + + update_doc = {} + if status: + update_doc["status"] = status + update_doc.update(fields) + + try: + client.update(index=STATUS_INDEX, id=request_id, doc=update_doc) + loggit.info("Thaw request %s updated successfully", request_id) + except Exception as e: + loggit.error("Failed to update thaw request %s: %s", request_id, e) + raise ActionError(f"Failed to update thaw request {request_id}: {e}") +``` + +--- + +## 2. THAW REQUEST STATUS LIFECYCLE + +### Location: `curator/actions/deepfreeze/constants.py` + +```python +# Thaw request status lifecycle +THAW_STATUS_IN_PROGRESS = "in_progress" # Thaw operation is actively running +THAW_STATUS_COMPLETED = "completed" # Thaw completed, data available and mounted +THAW_STATUS_FAILED = "failed" # Thaw operation failed +THAW_STATUS_REFROZEN = "refrozen" # Thaw was completed but has been refrozen (cleaned up) + +THAW_REQUEST_STATUSES = [ + THAW_STATUS_IN_PROGRESS, + THAW_STATUS_COMPLETED, + THAW_STATUS_FAILED, + THAW_STATUS_REFROZEN, +] +``` + +### Status Flow: +1. **in_progress** → Initial state when thaw request is created +2. **completed** → Set after all repositories have been mounted and indices are restored +3. **refrozen** → Set when user explicitly refreezes or cleanup runs +4. **failed** → Set if thaw operation fails + +--- + +## 3. AWS S3/GLACIER API - RESTORE REQUEST TRACKING + +### Location: `curator/s3client.py` - AwsS3Client.thaw() + +Lines 279-388: The thaw method initiates restore requests to S3/Glacier: + +```python +def thaw( + self, + bucket_name: str, + base_path: str, + object_keys: list[dict], + restore_days: int = 7, + retrieval_tier: str = "Standard", +) -> None: + """ + Restores objects from Glacier storage class back to an instant access tier. + """ + self.loggit.info( + "Starting thaw operation - bucket: %s, base_path: %s, objects: %d, restore_days: %d, tier: %s", + bucket_name, + base_path, + len(object_keys), + restore_days, + retrieval_tier + ) + + restored_count = 0 + skipped_count = 0 + error_count = 0 + + for idx, obj in enumerate(object_keys, 1): + key = obj.get("Key") if isinstance(obj, dict) else obj + + if not key.startswith(base_path): + skipped_count += 1 + continue + + # Get storage class from object metadata + if isinstance(obj, dict) and "StorageClass" in obj: + storage_class = obj.get("StorageClass", "") + else: + try: + response = self.client.head_object(Bucket=bucket_name, Key=key) + storage_class = response.get("StorageClass", "") + except Exception as e: + error_count += 1 + # ... error handling + continue + + try: + if storage_class in ["GLACIER", "DEEP_ARCHIVE", "GLACIER_IR"]: + self.loggit.debug( + "Restoring object %d/%d: %s from %s", + idx, + len(object_keys), + key, + storage_class + ) + # Initiate S3 restore request + self.client.restore_object( + Bucket=bucket_name, + Key=key, + RestoreRequest={ + "Days": restore_days, + "GlacierJobParameters": {"Tier": retrieval_tier}, + }, + ) + restored_count += 1 + except Exception as e: + error_count += 1 + # ... error handling + + self.loggit.info( + "Thaw operation completed - restored: %d, skipped: %d, errors: %d (total: %d)", + restored_count, + skipped_count, + error_count, + len(object_keys) + ) +``` + +**S3 API Call Details:** +- Uses `client.restore_object()` boto3 call +- Specifies `Days` parameter (restore_days, e.g., 7 days) +- Specifies `GlacierJobParameters.Tier` (Standard/Expedited/Bulk) +- Returns immediately - does NOT wait for restore to complete +- The restore request is tracked by AWS, not directly in Curator + +--- + +## 4. CHECKING IF RESTORE IS IN PROGRESS vs COMPLETED + +### Location: `curator/actions/deepfreeze/utilities.py` - check_restore_status() + +Lines 852-991: Uses S3 head_object() to check the Restore metadata header: + +```python +def check_restore_status(s3: S3Client, bucket: str, base_path: str) -> dict: + """ + Check the restoration status of objects in an S3 bucket. + + Uses head_object to check the Restore metadata field, which is the only way + to determine if a Glacier object has been restored (storage class remains GLACIER + even after restoration). + + This function uses parallel processing to check multiple objects concurrently, + significantly improving performance when checking large numbers of objects. + """ + loggit = logging.getLogger("curator.actions.deepfreeze") + loggit.debug("Checking restore status for s3://%s/%s", bucket, base_path) + + # ... code to normalize path and list objects ... + + # Helper function to check a single Glacier object's restore status + def check_single_object(key: str) -> tuple: + """Check restore status for a single object. Returns (status, key).""" + try: + metadata = s3.head_object(bucket, key) + restore_header = metadata.get("Restore") + + if restore_header: + # Restore header exists - parse it to check status + # Format: 'ongoing-request="true"' or 'ongoing-request="false", expiry-date="..."' + if 'ongoing-request="true"' in restore_header: + loggit.debug("Object %s: restoration in progress", key) + return ("in_progress", key) + else: + # ongoing-request="false" means restoration is complete + loggit.debug("Object %s: restored (expiry in header)", key) + return ("restored", key) + else: + # No Restore header means object is in Glacier and not being restored + loggit.debug("Object %s: in Glacier, not restored", key) + return ("not_restored", key) + + except Exception as e: + loggit.warning("Failed to check restore status for %s: %s", key, e) + return ("not_restored", key) + + # Check Glacier objects in parallel + restored_count = instant_access_count + in_progress_count = 0 + not_restored_count = 0 + + max_workers = min(15, len(glacier_objects)) + + loggit.debug( + "Checking %d Glacier objects using %d workers", + len(glacier_objects), + max_workers, + ) + + with ThreadPoolExecutor(max_workers=max_workers) as executor: + future_to_key = { + executor.submit(check_single_object, key): key for key in glacier_objects + } + + for future in as_completed(future_to_key): + status_result, key = future.result() + + if status_result == "restored": + restored_count += 1 + elif status_result == "in_progress": + in_progress_count += 1 + else: + not_restored_count += 1 + + status = { + "total": total_count, + "restored": restored_count, + "in_progress": in_progress_count, + "not_restored": not_restored_count, + "complete": (restored_count == total_count) if total_count > 0 else False, + } + + loggit.debug("Restore status: %s", status) + return status +``` + +### Key Points about Restore Status Checking: + +1. **Restore Header Format (from AWS S3 API):** + - `ongoing-request="true"` → Restore is still in progress + - `ongoing-request="false", expiry-date="ISO8601"` → Restore complete, expires at date + +2. **No Restore Header:** + - Object is still in Glacier storage + - restore_object() call either wasn't made or failed + +3. **Parallel Processing:** + - Uses ThreadPoolExecutor with up to 15 concurrent workers + - Boto3 S3 client is thread-safe + - Significantly improves performance for large object counts + +4. **Storage Class Behavior:** + - Objects remain GLACIER storage class even after restore + - The Restore header is the authoritative indicator + +--- + +## 5. THAW REQUEST STATUS MONITORING IN THAW ACTION + +### Location: `curator/actions/deepfreeze/thaw.py` + +#### Thaw Operation Flow (do_action method): + +**Lines 1164-1442: Create mode with async/sync support** + +Lines 1262-1273: Save thaw request for sync mode: +```python +# Save thaw request for status tracking (will be marked completed when done) +save_thaw_request( + self.client, + self.request_id, + thawed_repos, + "in_progress", + self.start_date, + self.end_date, +) +``` + +Lines 1425-1436: Save thaw request for async mode: +```python +# Save thaw request for later querying +save_thaw_request( + self.client, + self.request_id, + thawed_repos, + "in_progress", + self.start_date, + self.end_date, +) +``` + +#### do_check_status() - Checking and Mounting (Lines 341-519) + +**Step 1: Check restoration status using check_restore_status()** +```python +status = check_restore_status(self.s3, repo.bucket, repo.base_path) +# status dict contains: total, restored, in_progress, not_restored, complete +``` + +**Step 2: Mount repository when complete** +```python +if status["complete"]: + self.loggit.info("Restoration complete for %s, mounting...", repo.name) + mount_repo(self.client, repo) + self._update_repo_dates(repo) + mounted_count += 1 + newly_mounted_repos.append(repo) +else: + all_complete = False +``` + +**Step 3: Mount indices in date range** +```python +should_mount_indices = ( + all_complete + and start_date_str + and end_date_str + and any(repo.is_mounted for repo in repos) +) + +if should_mount_indices: + # Find and mount indices within the date range + mount_result = find_and_mount_indices_in_date_range( + self.client, mounted_repos, start_date, end_date + ) +``` + +**Step 4: Update thaw request status** +```python +if all_complete: + update_thaw_request(self.client, self.check_status, status="completed") +``` + +--- + +## 6. EXPIRATION/TIMEOUT LOGIC + +### Location: `curator/actions/deepfreeze/thaw.py` - _thaw_repository() + +Lines 200-210: Repository expiration timestamp is set: +```python +from datetime import timedelta, timezone + +expires_at = datetime.now(timezone.utc) + timedelta(days=self.duration) +repo.start_thawing(expires_at) +repo.persist(self.client) +``` + +### Location: `curator/actions/deepfreeze/cleanup.py` - _detect_and_mark_expired_repos() + +Lines 158-287: Automatic expiration detection: + +```python +def _detect_and_mark_expired_repos(self) -> int: + """ + Detect repositories whose S3 restore has expired and mark them as expired. + + Checks repositories in two ways: + 1. Thawed repos with expires_at timestamp that has passed + 2. Mounted repos (regardless of state) by checking S3 restore status directly + """ + now = datetime.now(timezone.utc) + expired_count = 0 + checked_repos = set() + + # METHOD 1: Check thawed repos with expires_at timestamp + for repo in thawed_repos: + if repo.name in checked_repos: + continue + + if repo.expires_at: + expires_at = repo.expires_at + if expires_at.tzinfo is None: + expires_at = expires_at.replace(tzinfo=timezone.utc) + + if expires_at <= now: + # Mark as expired + ... +``` + +### Repository State Machine: + +``` +ACTIVE (never thawed) + ↓ +FROZEN (in cold storage, not accessible) + ↓ +THAWING (S3 restore in progress, waiting for retrieval) + ↓ +THAWED (S3 restore complete, mounted and in use) + ↓ +EXPIRED (S3 restore expired, reverted to Glacier, ready for cleanup) +``` + +--- + +## 7. ONGOING-REQUEST HEADER HANDLING + +### Location: `curator/actions/deepfreeze/utilities.py` - check_restore_status() + +Lines 928-943: Parsing the Ongoing-request header: + +```python +if restore_header: + # Restore header exists - parse it to check status + # Format: 'ongoing-request="true"' or 'ongoing-request="false", expiry-date="..."' + if 'ongoing-request="true"' in restore_header: + loggit.debug("Object %s: restoration in progress", key) + return ("in_progress", key) + else: + # ongoing-request="false" means restoration is complete + loggit.debug("Object %s: restored (expiry in header)", key) + return ("restored", key) +else: + # No Restore header means object is in Glacier and not being restored + loggit.debug("Object %s: in Glacier, not restored", key) + return ("not_restored", key) +``` + +### AWS S3 Restore Header Format: +When you call restore_object() on a Glacier object, S3 returns a Restore header in subsequent head_object() calls: + +**Format (as returned by AWS S3):** +``` +ongoing-request="true" +``` +OR +``` +ongoing-request="false", expiry-date="Wed, 19 Jan 2025 19:00:00 GMT" +``` + +**Parsing Logic:** +- If header contains `ongoing-request="true"` → Restoration is in progress +- If header contains `ongoing-request="false"` → Restoration is complete, will expire at expiry-date +- If no Restore header → Object is still in Glacier, restore hasn't been requested + +--- + +## 8. THAW REQUEST STATUS REFREEZING + +### Location: `curator/actions/deepfreeze/refreeze.py` + +Lines 100-101: Getting completed thaw requests: +```python +def _get_open_thaw_requests(self) -> list: + """Get all completed thaw requests that are eligible for refreezing.""" + all_requests = list_thaw_requests(self.client) + return [req for req in all_requests if req.get("status") == "completed"] +``` + +Lines 313-399: Refreezing a single request: +```python +def _refreeze_single_request(self, request_id: str) -> dict: + """Refreeze a single thaw request.""" + self.loggit.info("Refreezing thaw request %s", request_id) + + # Get the thaw request + request = get_thaw_request(self.client, request_id) + + # Get repositories from request + repo_names = request.get("repos", []) + repos = get_repositories_by_names(self.client, repo_names) + + # For each repository: + # 1. Delete all mounted indices + # 2. Unmount the repository + # 3. Delete the per-repository thawed ILM policy + # 4. Reset repository state to frozen + # 5. Mark thaw request as "refrozen" +``` + +--- + +## 9. THAW REQUEST FILTERING AND DISPLAY + +### Location: `curator/actions/deepfreeze/thaw.py` - do_list_requests() + +Lines 815-944: Listing thaw requests with filtering: + +```python +def do_list_requests(self) -> None: + """ + List thaw requests in a formatted table. + + By default, excludes completed and refrozen requests. Use include_completed=True to show all. + """ + all_requests = list_thaw_requests(self.client) + + # Filter completed and refrozen requests unless explicitly included + if not self.include_completed: + requests = [ + req + for req in all_requests + if req.get("status") not in ("completed", "refrozen") + ] +``` + +### Location: `curator/actions/deepfreeze/status.py` - do_repositories() + +Lines 369-380: Tracking active thaw requests: + +```python +# Get active thaw requests to track which repos are being thawed +active_thaw_requests = [] +repos_being_thawed = set() +try: + all_thaw_requests = list_thaw_requests(self.client) + active_thaw_requests = [req for req in all_thaw_requests if req.get("status") == "in_progress"] + for req in active_thaw_requests: + repos_being_thawed.update(req.get("repos", [])) +except Exception as e: + self.loggit.warning("Could not retrieve thaw requests: %s", e) +``` + +--- + +## SUMMARY TABLE + +| Aspect | Implementation | +|--------|-----------------| +| **Metadata Storage** | Elasticsearch status index (deepfreeze-status) | +| **Request ID** | UUID for each thaw request | +| **Status Tracking** | in_progress → completed → refrozen | +| **Date Range** | ISO 8601 start_date/end_date stored in request | +| **Repositories** | List of repo names in request | +| **S3 Restore Initiated** | boto3 restore_object() call with Days and Tier params | +| **Progress Check** | S3 head_object() → Restore header parsing | +| **Ongoing Status** | ongoing-request="true" or "false" in Restore header | +| **Expiry Date** | expiry-date in Restore header + expires_at in repo metadata | +| **Timeout Logic** | Manual cleanup + automatic expiration detection | +| **Repository States** | ACTIVE → FROZEN → THAWING → THAWED → EXPIRED | +| **Parallel Checking** | ThreadPoolExecutor with 15 workers for status checks | +| **Filtering** | Active (in_progress) vs Completed/Refrozen requests | + diff --git a/THAW_TRACKING_QUICK_REFERENCE.md b/THAW_TRACKING_QUICK_REFERENCE.md new file mode 100644 index 00000000..d1c51490 --- /dev/null +++ b/THAW_TRACKING_QUICK_REFERENCE.md @@ -0,0 +1,342 @@ +# Thaw Request Tracking - Quick Reference + +## File Locations + +| Component | File | Key Functions | +|-----------|------|---| +| **Request Management** | `curator/actions/deepfreeze/utilities.py` | `save_thaw_request()`, `get_thaw_request()`, `list_thaw_requests()`, `update_thaw_request()` | +| **Restore Status Check** | `curator/actions/deepfreeze/utilities.py` | `check_restore_status()` | +| **Thaw Action** | `curator/actions/deepfreeze/thaw.py` | `Thaw.do_action()`, `Thaw.do_check_status()`, `Thaw._thaw_repository()` | +| **S3 Client** | `curator/s3client.py` | `AwsS3Client.thaw()`, `AwsS3Client.head_object()` | +| **Status Constants** | `curator/actions/deepfreeze/constants.py` | Status and state definitions | +| **Cleanup** | `curator/actions/deepfreeze/cleanup.py` | Expiration detection and cleanup | +| **Refreeze** | `curator/actions/deepfreeze/refreeze.py` | Manual refreeze operations | + +--- + +## Data Model + +### Elasticsearch Document (deepfreeze-status index) + +```python +{ + "_id": "uuid-string", # Request ID + "doctype": "thaw_request", + "request_id": "uuid-string", + "repos": ["repo-name-1", "repo-name-2"], # Repository names + "status": "in_progress", # in_progress|completed|failed|refrozen + "created_at": "2025-01-15T10:00:00Z", + "start_date": "2025-01-01T00:00:00Z", # Optional: date range filter + "end_date": "2025-01-31T23:59:59Z" # Optional: date range filter +} +``` + +--- + +## Key Functions Reference + +### save_thaw_request() +**Location:** `utilities.py` line 1040 + +Creates a new thaw request document in Elasticsearch. + +```python +save_thaw_request( + client, # Elasticsearch client + request_id, # UUID + repos, # List[Repository] + status, # "in_progress", "completed", etc. + start_date, # datetime (optional) + end_date # datetime (optional) +) +``` + +### get_thaw_request() +**Location:** `utilities.py` line 1094 + +Retrieves a thaw request by ID. + +```python +request = get_thaw_request(client, request_id) +# Returns: dict with request data +``` + +### list_thaw_requests() +**Location:** `utilities.py` line 1122 + +Lists all thaw requests (up to 10,000). + +```python +requests = list_thaw_requests(client) +# Returns: list of dicts, each with "id" + source fields +``` + +### update_thaw_request() +**Location:** `utilities.py` line 1152 + +Updates thaw request status. + +```python +update_thaw_request(client, request_id, status="completed") +``` + +### check_restore_status() +**Location:** `utilities.py` line 852 + +Checks S3 Glacier restore status for all objects in a path. + +```python +status = check_restore_status(s3, bucket, base_path) +# Returns: { +# "total": 150, +# "restored": 75, # Restore header: ongoing-request="false" +# "in_progress": 50, # Restore header: ongoing-request="true" +# "not_restored": 25, # No Restore header +# "complete": False +# } +``` + +**Performance:** Uses ThreadPoolExecutor with 15 concurrent workers for parallel head_object checks. + +--- + +## S3/Glacier Status Tracking + +### The "Restore" Header + +This is what AWS S3 returns in head_object() response: + +| State | Restore Header | StorageClass | +|-------|---|---| +| Not restored | (absent) | GLACIER | +| Restoring | `ongoing-request="true"` | GLACIER | +| Restored | `ongoing-request="false", expiry-date="..."` | GLACIER | +| Expired | (absent) | GLACIER | + +**Key Point:** StorageClass stays GLACIER throughout - only Restore header changes. + +### boto3 S3 API Calls + +#### Initiate Restore +```python +# From s3client.py line 351 +s3.restore_object( + Bucket=bucket_name, + Key=key, + RestoreRequest={ + "Days": restore_days, # e.g., 7 + "GlacierJobParameters": {"Tier": retrieval_tier} # Standard/Expedited/Bulk + } +) +``` + +#### Check Status +```python +# From utilities.py line 927 +metadata = s3.head_object(Bucket=bucket, Key=key) +restore_header = metadata.get("Restore") +# Parse ongoing-request="true" or "false" +``` + +--- + +## Status Lifecycle + +``` +Created (in_progress) + ↓ +S3 restores objects in background + ↓ +All objects restored + ↓ +User checks status / Auto-check in sync mode + ↓ +Repositories mounted, indices mounted (completed) + ↓ +Data available for 7+ days + ↓ +User calls refreeze OR cleanup detects expiration + ↓ +Repositories unmounted, state reset to frozen (refrozen) + ↓ +Done +``` + +--- + +## Important Constants + +From `curator/actions/deepfreeze/constants.py`: + +```python +# Thaw request statuses +THAW_STATUS_IN_PROGRESS = "in_progress" +THAW_STATUS_COMPLETED = "completed" +THAW_STATUS_FAILED = "failed" +THAW_STATUS_REFROZEN = "refrozen" + +# Repository thaw states +THAW_STATE_ACTIVE = "active" # Never thawed +THAW_STATE_FROZEN = "frozen" # In cold storage +THAW_STATE_THAWING = "thawing" # Restore in progress +THAW_STATE_THAWED = "thawed" # Restore complete, mounted +THAW_STATE_EXPIRED = "expired" # Restore expired, ready for cleanup +``` + +--- + +## Expiration & Timeout Logic + +### Repository Expiration Timestamp +**Set at:** `thaw.py` line 203 + +```python +from datetime import timedelta, timezone + +expires_at = datetime.now(timezone.utc) + timedelta(days=self.duration) +repo.start_thawing(expires_at) +repo.persist(client) +``` + +### Automatic Expiration Detection +**In:** `cleanup.py` line 158 + +```python +now = datetime.now(timezone.utc) +if repo.expires_at and repo.expires_at <= now: + # Mark as expired +``` + +### AWS S3 Expiration +- S3 automatically removes temporary restored copy after expiry-date +- Objects silently revert to Glacier +- No action needed from Curator + +--- + +## Thaw Request Filtering + +### In Status Displays + +**Active only (default):** +```python +requests = [req for req in all_requests if req.get("status") == "in_progress"] +``` + +**Non-completed (includes in_progress, failed):** +```python +requests = [req for req in all_requests if req.get("status") not in ("completed", "refrozen")] +``` + +**All requests (with --include-completed flag):** +```python +requests = all_requests +``` + +--- + +## Common Operations + +### Create a Thaw Request +``` +curator_cli deepfreeze thaw --start-date 2025-01-01 --end-date 2025-01-31 --sync +``` + +### Check Status of Specific Request +``` +curator_cli deepfreeze thaw --check-status +``` + +### Check Status of All Requests +``` +curator_cli deepfreeze thaw --check-status "" +``` + +### List Active Requests +``` +curator_cli deepfreeze thaw --list-requests +``` + +### List All Requests (including completed) +``` +curator_cli deepfreeze thaw --list-requests --include-completed +``` + +### Manually Refreeze +``` +curator_cli deepfreeze refreeze --thaw-request-id +``` + +### Refreeze All Completed Requests +``` +curator_cli deepfreeze refreeze +``` + +--- + +## Error Handling + +### Common Errors in check_restore_status() + +| Scenario | Handling | +|----------|----------| +| head_object() fails | Count as "not_restored", log warning, continue | +| Restore header parse fails | Default to not restored | +| No objects found | Return complete=True (all 0 objects restored) | +| ThreadPoolExecutor exception | Log error, skip object, continue | + +### Common Errors in save_thaw_request() + +| Scenario | Handling | +|----------|----------| +| Status index doesn't exist | Raises ActionError | +| ES index() fails | Raises ActionError, logs error | +| Invalid status value | Stored as-is, not validated | + +--- + +## Performance Considerations + +### Parallel Object Checking +- Uses ThreadPoolExecutor with max 15 workers +- Each worker calls head_object() for one object +- Significantly faster than sequential checking +- Example: 1000 objects checked in ~60 seconds vs ~1000 seconds sequentially + +### Query Performance +- All queries use direct ID lookup where possible +- List queries limited to 10,000 results +- Status checks are independent (no inter-repository dependencies) + +--- + +## Testing Thaw Tracking + +### Check a Specific Request +```python +from curator.actions.deepfreeze.utilities import get_thaw_request + +request = get_thaw_request(es_client, request_id) +print(f"Status: {request['status']}") +print(f"Repos: {request['repos']}") +``` + +### List All Requests +```python +from curator.actions.deepfreeze.utilities import list_thaw_requests + +requests = list_thaw_requests(es_client) +for req in requests: + print(f"{req['id']}: {req['status']}") +``` + +### Check Restore Status +```python +from curator.actions.deepfreeze.utilities import check_restore_status +from curator.s3client import s3_client_factory + +s3 = s3_client_factory("aws") +status = check_restore_status(s3, bucket, base_path) +print(f"Restored: {status['restored']}/{status['total']}") +``` + diff --git a/curator/actions/__init__.py b/curator/actions/__init__.py index a8c365f1..2a90901c 100644 --- a/curator/actions/__init__.py +++ b/curator/actions/__init__.py @@ -1,10 +1,12 @@ """Use __init__ to make these not need to be nested under lowercase.Capital""" + from curator.actions.alias import Alias from curator.actions.allocation import Allocation from curator.actions.close import Close from curator.actions.cluster_routing import ClusterRouting from curator.actions.cold2frozen import Cold2Frozen from curator.actions.create_index import CreateIndex +from curator.actions.deepfreeze import Cleanup, Deepfreeze, Refreeze, RepairMetadata, Rotate, Setup, Status, Thaw from curator.actions.delete_indices import DeleteIndices from curator.actions.forcemerge import ForceMerge from curator.actions.index_settings import IndexSettings @@ -13,24 +15,32 @@ from curator.actions.replicas import Replicas from curator.actions.rollover import Rollover from curator.actions.shrink import Shrink -from curator.actions.snapshot import Snapshot, DeleteSnapshots, Restore +from curator.actions.snapshot import DeleteSnapshots, Restore, Snapshot CLASS_MAP = { - 'alias' : Alias, - 'allocation' : Allocation, - 'close' : Close, - 'cluster_routing' : ClusterRouting, - 'cold2frozen': Cold2Frozen, - 'create_index' : CreateIndex, - 'delete_indices' : DeleteIndices, - 'delete_snapshots' : DeleteSnapshots, - 'forcemerge' : ForceMerge, - 'index_settings' : IndexSettings, - 'open' : Open, - 'reindex' : Reindex, - 'replicas' : Replicas, - 'restore' : Restore, - 'rollover' : Rollover, - 'snapshot' : Snapshot, - 'shrink' : Shrink, + "alias": Alias, + "allocation": Allocation, + "cleanup": Cleanup, + "close": Close, + "cluster_routing": ClusterRouting, + "cold2frozen": Cold2Frozen, + "create_index": CreateIndex, + "deepfreeze": Deepfreeze, + "delete_indices": DeleteIndices, + "delete_snapshots": DeleteSnapshots, + "forcemerge": ForceMerge, + "index_settings": IndexSettings, + "open": Open, + "refreeze": Refreeze, + "reindex": Reindex, + "repair_metadata": RepairMetadata, + "replicas": Replicas, + "restore": Restore, + "rollover": Rollover, + "shrink": Shrink, + "snapshot": Snapshot, + "setup": Setup, + "rotate": Rotate, + "status": Status, + "thaw": Thaw, } diff --git a/curator/actions/deepfreeze/README.md b/curator/actions/deepfreeze/README.md new file mode 100644 index 00000000..626fa5c8 --- /dev/null +++ b/curator/actions/deepfreeze/README.md @@ -0,0 +1,525 @@ +# Deepfreeze Module + +**Intelligent lifecycle management for Elasticsearch snapshot repositories with AWS Glacier integration** + +## Overview + +Deepfreeze is a snapshot repository lifecycle management system that works alongside Elasticsearch ILM (Index Lifecycle Management) to enable cost-effective long-term data retention. While ILM manages index lifecycles and creates searchable snapshots, deepfreeze manages the snapshot repositories themselves—allowing you to preserve snapshots even after indices are deleted, and to archive entire repositories to AWS S3 Glacier for minimal storage costs. + +### Key Features + +- **Repository Lifecycle Management**: Automated rotation and retirement of snapshot repositories +- **Snapshot Preservation**: Keep snapshots after indices are deleted, not the default ILM behavior +- **Glacier Archival**: Automatically move old repository storage to low-cost Glacier tiers +- **On-Demand Restoration**: Retrieve frozen repositories from Glacier when historical data is needed +- **ILM Integration**: Works seamlessly with ILM policies and searchable snapshots +- **Automated Cleanup**: Automatic unmounting and cleanup of restored repositories after use +- **Cost Optimization**: Reduce long-term storage costs by up to 95% + +### The Problem Deepfreeze Solves + +By default, when ILM policies delete indices in the delete phase, they can also delete the backing snapshots. This means your historical data is gone forever. Deepfreeze solves this by: + +1. **Preserving snapshots** after index deletion +2. **Rotating repositories** to prevent unlimited growth +3. **Archiving to Glacier** when all indices from a repository have been deleted +4. **Restoring on-demand** when you need to access historical data +5. **Managing cleanup** to prevent resource waste + +### How It Works + +**Normal ILM Flow** (without deepfreeze): +``` +Hot Data → Searchable Snapshot → Delete Phase → Index AND Snapshot deleted +``` + +**ILM + Deepfreeze Flow**: +``` +Data → Searchable Snapshot → Delete Phase → Index deleted, Snapshot preserved + ↓ + Repository archived to Glacier + ↓ + Restore on-demand when needed +``` + +## Architecture + +### Components + +- **Status Index** (`deepfreeze-status`): Central metadata store tracking repositories, configuration, and thaw requests +- **S3 Repositories**: Elasticsearch snapshot repositories backed by S3 buckets +- **Repository States**: `active`, `frozen`, `thawing`, `thawed`, `expired` +- **Versioned ILM Policies**: Policies that reference specific repositories for searchable snapshots + +### Repository Lifecycle + +``` +┌──────────────┐ +│ Setup │ Creates first repository and status tracking +└──────┬───────┘ + │ + ▼ +┌──────────────┐ +│ Active │ Current repository receiving new snapshots from ILM +└──────┬───────┘ + │ + ▼ Rotate (scheduled - creates new active repository) +┌──────────────┐ +│ Deep Frozen │ Old repository archived to Glacier, unmounted +└──────┬───────┘ (all indices deleted, snapshots preserved in cold storage) + │ + ▼ Thaw (on-demand when historical data needed) +┌──────────────┐ +│ Thawing │ Glacier restore in progress +└──────┬───────┘ + │ + ▼ Automatic when restore completes +┌──────────────┐ +│ Thawed │ Repository mounted, snapshots accessible for queries +└──────┬───────┘ + │ + ▼ Cleanup (automatic after expiration) or Refreeze (manual) +┌──────────────┐ +│ Deep Frozen │ Unmounted, back to Glacier cold storage +└──────────────┘ +``` + +## Quick Start + +### Initial Setup + +```bash +# 1. Initialize deepfreeze (one-time) +curator_cli deepfreeze setup \ + --repo-name-prefix deepfreeze \ + --bucket-name-prefix my-snapshots \ + --base-path-prefix snapshots \ + --rotate-by bucket \ + --style oneup \ + --storage-class intelligent_tiering + +# 2. Verify configuration +curator_cli deepfreeze status +``` + +### Daily Operations + +```bash +# Monitor repository state +curator_cli deepfreeze status --show-repos + +# Rotate to new repository (typically monthly via cron) +# This creates a new active repository and archives old ones to Glacier +curator_cli deepfreeze rotate --keep 6 + +# Check for expired thawed repositories +curator_cli deepfreeze cleanup +``` + +### Data Recovery + +```bash +# Restore repositories containing data from a date range +curator_cli deepfreeze thaw --start-date 2024-01-01 --end-date 2024-01-31 --duration 7 + +# Check thaw progress +curator_cli deepfreeze thaw --check-status + +# Manually refreeze when finished analyzing data +curator_cli deepfreeze refreeze --thaw-request-id +``` + +## Actions + +### Core Lifecycle Actions + +#### [Setup](docs/setup.md) +**Purpose**: Initialize the deepfreeze environment +**When**: Once, before any other operations +**What it does**: Creates first S3 bucket and repository, status index, and optional sample ILM policy + +#### [Rotate](docs/rotate.md) +**Purpose**: Create new active repository and retire old ones +**When**: On schedule (weekly/monthly) or when size thresholds are met +**What it does**: +- Creates new repository (becomes the active one) +- Versions ILM policies to point to new repository +- Updates index templates to use new versioned policies +- Unmounts old repositories beyond retention limit +- Archives unmounted repositories to Glacier + +#### [Status](docs/status.md) +**Purpose**: Monitor system state and health +**When**: Anytime (read-only) +**What it does**: Displays repositories, thawed state, buckets, ILM policies, and configuration + +### Data Access Actions + +#### [Thaw](docs/thaw.md) +**Purpose**: Restore frozen repositories from Glacier +**When**: On-demand when historical data access is needed +**What it does**: +- Identifies repositories containing snapshots in the requested date range +- Initiates AWS Glacier restore process +- Mounts repositories in Elasticsearch when restore completes +- Optionally mounts indices from snapshots for querying + +**Modes**: +- **Create**: Start new thaw request for a date range +- **Check Status**: Monitor restore progress and mount when complete +- **List**: Display all active thaw requests + +#### [Refreeze](docs/refreeze.md) +**Purpose**: Manually unmount thawed repositories before automatic expiration +**When**: When finished accessing historical data +**What it does**: +- Deletes mounted indices from thawed snapshots +- Unmounts repositories +- Updates metadata to frozen state +- Note: AWS restored objects remain until expiration time + +### Maintenance Actions + +#### [Cleanup](docs/cleanup.md) +**Purpose**: Automatic maintenance of expired thawed repositories +**When**: After every rotation (automatic) or on schedule (recommended daily) +**What it does**: +- Detects expired repositories via timestamp and S3 restore status checks +- Unmounts expired repositories +- Deletes indices that only exist in expired repositories +- Cleans up completed/failed thaw requests +- Removes orphaned ILM policies + +#### [Repair Metadata](docs/repair_metadata.md) +**Purpose**: Diagnostic tool to fix metadata discrepancies +**When**: After system issues, manual S3 changes, or suspected metadata desync +**What it does**: +- Scans actual S3 storage classes for all repositories +- Compares with metadata in deepfreeze-status index +- Corrects discrepancies automatically +- Verifies and updates thaw request states + +## Common Workflows + +### Typical Monthly Workflow + +```bash +# Day 1 of month - Rotate to new repository +curator_cli deepfreeze rotate --keep 6 + +# Rotation automatically: +# - Creates new active repository +# - Versions ILM policies to point to new repo +# - Updates index templates +# - Unmounts repositories older than --keep limit +# - Archives unmounted repositories to Glacier +# - Runs cleanup for expired thaws + +# Monitor throughout month +curator_cli deepfreeze status +``` + +### Data Recovery Workflow + +```bash +# 1. User needs to analyze data from Q1 2024 +curator_cli deepfreeze thaw \ + --start-date 2024-01-01 \ + --end-date 2024-03-31 \ + --duration 7 \ + --tier Standard + +# Returns: Thaw request ID: abc-123-def + +# 2. Wait for Glacier restore to complete (check periodically) +curator_cli deepfreeze thaw --check-status abc-123-def + +# 3. Once complete, snapshots are accessible +# Mount indices from snapshots as needed for querying + +# 4. When analysis is finished (before 7 days) +curator_cli deepfreeze refreeze --thaw-request-id abc-123-def + +# OR let automatic cleanup handle it after 7 days +``` + +### Troubleshooting Workflow + +```bash +# 1. Check overall status +curator_cli deepfreeze status + +# 2. Check if metadata is synchronized with S3 +curator_cli deepfreeze repair-metadata --dry-run + +# 3. Apply fixes if needed +curator_cli deepfreeze repair-metadata + +# 4. Verify resolution +curator_cli deepfreeze status --show-repos +``` + +## Configuration + +### Initial Setup Options + +Key decisions during setup that affect ongoing operations: + +**Repository Organization** (`--rotate-by`): +- `bucket`: New S3 bucket per rotation (cleaner separation, more buckets to manage) +- `path`: Same bucket, different paths per rotation (fewer buckets, shared lifecycle rules) + +**Naming Strategy** (`--style`): +- `oneup`: Sequential numbering (000001, 000002, ...) +- `date`: Monthly date stamps (2025.01, 2025.02, ...) + +**Storage Class** (`--storage-class`): +- `intelligent_tiering`: Automatic cost optimization (recommended) +- `standard`: Standard S3 storage +- `standard_ia`: Infrequent Access +- `onezone_ia`: Single AZ Infrequent Access +- `glacier_ir`: Glacier Instant Retrieval + +### Retention Policy + +The `--keep` parameter in rotate controls how many repositories remain mounted and active: +- Default: 6 repositories +- Repositories beyond this limit are unmounted and pushed to Glacier +- Consider: Data access patterns, compliance requirements, cost constraints + +## Prerequisites + +### Required Infrastructure + +1. **Elasticsearch 8.x** + - Healthy cluster with snapshot capability + - For ES 8.x+: S3 support built-in + +2. **AWS Account** + - Valid credentials with S3 and Glacier permissions + - IAM policy allowing bucket creation, object operations, restore operations + +3. **Curator Installation** + - Curator 8.x with deepfreeze module + - Python 3.12.7+ + +### Required Permissions + +**AWS IAM Permissions**: +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:CreateBucket", + "s3:PutObject", + "s3:GetObject", + "s3:DeleteObject", + "s3:ListBucket", + "s3:PutLifecycleConfiguration", + "s3:RestoreObject", + "s3:GetObjectAttributes" + ], + "Resource": [ + "arn:aws:s3:::your-bucket-prefix-*", + "arn:aws:s3:::your-bucket-prefix-*/*" + ] + } + ] +} +``` + +**Elasticsearch Permissions**: +- Snapshot management (create, delete repositories) +- ILM policy management (create, update, delete policies) +- Index template management (update templates) +- Index management (create, delete searchable snapshots) +- Read/write to `deepfreeze-status` index + +## Cost Optimization + +Costs are estimated as of this writing; please confirm them in your own account. + +### Storage Costs + +- **S3 Standard**: ~$0.023/GB/month (active repositories with searchable snapshots) +- **S3 Intelligent-Tiering**: Automatic optimization based on access patterns +- **S3 Glacier Instant Retrieval**: ~$0.004/GB/month (frozen repositories) +- **S3 Glacier Flexible Retrieval**: ~$0.0036/GB/month (deeper archive) + +### Restore Costs (when thawing) + +- **Standard Retrieval**: $0.03/GB + $0.01/1000 requests (3-5 hours) +- **Expedited Retrieval**: $0.10/GB + $0.03/1000 requests (1-5 minutes) +- **Bulk Retrieval**: $0.0025/GB + $0.025/1000 requests (5-12 hours) + +### Best Practices + +1. **Choose appropriate retention** (`--keep`): Balance between access needs and storage costs +2. **Use Bulk retrieval tier** for non-urgent historical analysis +3. **Minimize thaw duration**: Only request access for the time period needed +4. **Refreeze proactively**: Don't wait for automatic expiration if analysis is complete +5. **Regular rotation**: Prevents any single repository from growing too large +6. **Monitor with status**: Regular checks prevent unexpected costs + +## Scheduling Recommendations + +### Automated Schedule (cron) + +```bash +# Rotate monthly (1st of month at 1 AM) +0 1 1 * * curator_cli deepfreeze rotate --keep 6 + +# Cleanup daily (3 AM) +0 3 * * * curator_cli deepfreeze cleanup + +# Status report weekly (Mondays at 9 AM) +0 9 * * 1 curator_cli deepfreeze status --show-repos +``` + +### On-Demand Operations + +- **Thaw**: User-initiated when historical data access is required +- **Refreeze**: User-initiated when analysis is complete +- **Repair Metadata**: As needed for troubleshooting or after manual interventions + +## Monitoring and Alerting + +### Key Metrics to Monitor + +1. **Repository Count**: Ensure rotation is happening on schedule +2. **Active Repository**: Should always be exactly one +3. **Thawed Repository Count**: Detect stuck or forgotten thaws +4. **Expired Repository Count**: Should be 0 after cleanup runs +5. **Failed Thaw Requests**: Indicate AWS or configuration issues +6. **Status Index Size**: Should grow slowly and predictably + +### Health Checks + +```bash +# Check for expired repositories (should be none) +curator_cli deepfreeze status --show-thawed | grep expired + +# Verify metadata consistency with S3 +curator_cli deepfreeze repair-metadata --dry-run + +# List active thaw requests +curator_cli deepfreeze thaw --list + +# Check ILM policy versioning +curator_cli deepfreeze status --show-ilm +``` + +## Troubleshooting + +### Common Issues + +**Problem**: Rotation fails with "No ILM policies found" +**Solution**: Create at least one ILM policy with searchable_snapshot action before rotating + +**Problem**: Thaw request stuck in "thawing" state +**Solution**: Check AWS Glacier restore status in S3 console, run `repair-metadata` to sync state + +**Problem**: Status shows incorrect thaw_state for repositories +**Solution**: Run `repair-metadata` to scan S3 storage classes and correct metadata + +**Problem**: Cleanup not removing expired repositories +**Solution**: Ensure cleanup runs regularly via cron, verify Elasticsearch permissions + +**Problem**: High AWS costs after thaw operation +**Solution**: Use `refreeze` to unmount when finished, reduce `--duration` on future thaw requests + +### Debug Mode + +All actions support `--dry-run` mode for safe testing: + +```bash +# Test rotation without making changes +curator_cli --dry-run deepfreeze rotate --keep 6 + +# Test cleanup +curator_cli --dry-run deepfreeze cleanup + +# Test metadata repair +curator_cli deepfreeze repair-metadata --dry-run +``` + +## Integration with ILM + +Deepfreeze is designed to work alongside Elasticsearch ILM policies. Here's how they work together: + +### ILM Policy Example + +```json +{ + "policy": { + "phases": { + "hot": { + "actions": { + "rollover": { + "max_size": "50GB", + "max_age": "7d" + } + } + }, + "frozen": { + "min_age": "30d", + "actions": { + "searchable_snapshot": { + "snapshot_repository": "deepfreeze-000001" + } + } + }, + "delete": { + "min_age": "365d", + "actions": { + "delete": { + "delete_searchable_snapshot": false + } + } + } + } + } +} +``` + +The `delete_searchable_snapshot` setting is critical, and enables retention of the snapshot after index deletion. + +### What Happens + +1. **ILM** creates searchable snapshots in the current deepfreeze repository +2. **ILM** deletes indices after 365 days +3. **Deepfreeze** preserves the snapshots after index deletion +4. **Deepfreeze rotate** creates new repository monthly, versions ILM policies +5. **Deepfreeze rotate** archives old repositories to Glacier after all indices are deleted +6. **Deepfreeze thaw** restores repositories from Glacier when historical data is needed + +## Detailed Documentation + +Each action has comprehensive documentation covering prerequisites, effects, options, examples, and troubleshooting: + +- **[Setup Documentation](docs/setup.md)** - Initial configuration and first repository creation +- **[Rotate Documentation](docs/rotate.md)** - Repository rotation and lifecycle management +- **[Status Documentation](docs/status.md)** - Monitoring and visibility into system state +- **[Thaw Documentation](docs/thaw.md)** - Glacier restore and data access +- **[Refreeze Documentation](docs/refreeze.md)** - Manual repository unmounting +- **[Cleanup Documentation](docs/cleanup.md)** - Automatic maintenance and expiration handling +- **[Repair Metadata Documentation](docs/repair_metadata.md)** - Metadata consistency and troubleshooting + +## Support and Development + +### Author + +Deepfreeze was written by Bret Wortman (bret.wortman@elastic.co) and is built on the foundation of Curator, which is the work of Aaron Mildenstein and many others. + +### Contributing + +This is part of the Elasticsearch Curator project. For bugs, feature requests, or contributions, see the main Curator repository. + +### Version + +Current version: 8.0.21 (work in progress) + +Part of Elasticsearch Curator 8.x diff --git a/curator/actions/deepfreeze/__init__.py b/curator/actions/deepfreeze/__init__.py new file mode 100644 index 00000000..f3fba363 --- /dev/null +++ b/curator/actions/deepfreeze/__init__.py @@ -0,0 +1,66 @@ +""" +Deepfreeze actions module + +This module re-exports from the deepfreeze-core package. +The canonical implementation lives in the 'deepfreeze_core' package. +""" + +# Re-export everything from the deepfreeze-core package +from deepfreeze_core import ( + # Constants + PROVIDERS, + SETTINGS_ID, + STATUS_INDEX, + # Exceptions + ActionException, + DeepfreezeException, + MissingIndexError, + MissingSettingsError, + # Actions + Cleanup, + Refreeze, + RepairMetadata, + Rotate, + Setup, + Status, + Thaw, + # Helpers + Deepfreeze, + Repository, + Settings, + # Utilities + check_restore_status, + create_repo, + decode_date, + ensure_settings_index, + find_repos_by_date_range, + get_all_indices_in_repo, + get_all_repos, + get_matching_repo_names, + get_matching_repos, + get_next_suffix, + get_repositories_by_names, + get_settings, + get_thaw_request, + get_timestamp_range, + list_thaw_requests, + mount_repo, + push_to_glacier, + save_settings, + save_thaw_request, + unmount_repo, + update_repository_date_range, +) + +CLASS_MAP = { + "cleanup": Cleanup, + "deepfreeze": Deepfreeze, + "refreeze": Refreeze, + "repair_metadata": RepairMetadata, + "repository": Repository, + "settings": Settings, + "setup": Setup, + "rotate": Rotate, + "status": Status, + "thaw": Thaw, +} diff --git a/curator/actions/deepfreeze/docs/cleanup.md b/curator/actions/deepfreeze/docs/cleanup.md new file mode 100644 index 00000000..12e84062 --- /dev/null +++ b/curator/actions/deepfreeze/docs/cleanup.md @@ -0,0 +1,765 @@ +# Cleanup Action + +## Purpose + +The Cleanup action is an automatic maintenance process that detects and processes expired thawed repositories, cleaning up resources that are no longer needed. It runs as part of the rotation workflow and can also be run independently on a schedule. + +Cleanup handles: +1. **Expired Repository Detection**: Identifies repositories whose AWS Glacier restore has expired +2. **Repository Unmounting**: Removes expired repositories from Elasticsearch +3. **Index Deletion**: Removes searchable snapshots that only exist in expired repositories +4. **Thaw Request Management**: Cleans up old completed, failed, and refrozen requests +5. **ILM Policy Cleanup**: Removes orphaned thawed ILM policies + +**Key Concept**: When you thaw data from Glacier, AWS provides temporary access for a duration (e.g., 7 days). After this expires, Cleanup detects the expiration and unmounts repositories automatically, freeing Elasticsearch resources. + +## Prerequisites + +### System Requirements + +1. **Deepfreeze Initialized** + - Setup action must have been run successfully + - `deepfreeze-status` index must exist + +2. **Elasticsearch Permissions** + - `snapshot.delete_repository` - Unmount repositories + - `indices.delete` - Delete indices + - `ilm.delete_policy` - Delete ILM policies + - Read/write access to `deepfreeze-status` index + +3. **AWS Credentials** (for S3 status checks) + - `s3:GetObjectAttributes` - Check restore status + - `s3:ListBucket` - List objects in repositories + +### When to Run + +**Automatically**: +- After every rotation (built-in) +- Part of scheduled rotation workflow + +**Manually**: +- On-demand maintenance +- After manual thaw operations +- Testing/verification + +**Scheduled** (Recommended): +- Daily cron job: `0 3 * * * curator_cli deepfreeze cleanup` +- Catches expirations between rotations + +## Effects + +### Detection Phase + +#### 1. Timestamp-Based Detection + +For repositories in `thawed` state: +- Compares current time to `expires_at` timestamp +- Marks as `expired` if current time ≥ `expires_at` +- Updates repository state in `deepfreeze-status` index + +#### 2. S3-Based Detection + +For mounted repositories (any state): +- Queries S3 for actual object restore status +- Checks: restored, in progress, not restored +- Marks as `expired` if all objects are NOT restored +- Handles edge cases where timestamp is missing or stale + +**Why Both Methods?** +- Timestamp: Fast, efficient for normal cases +- S3 Check: Catches anomalies (clock skew, manual S3 operations, missing timestamps) + +### Cleanup Phase + +For each repository in `expired` state: + +#### 1. Verify Mount Status + +- Queries Elasticsearch to confirm actual mount status +- Handles state desync (in-memory flag vs actual cluster state) +- Safety check to prevent errors + +#### 2. Unmount Repository + +If mounted: +- Unregisters from Elasticsearch: `DELETE /_snapshot/{repo_name}` +- Logs success +- Continues cleanup even if unmount fails (handles already-unmounted cases) + +#### 3. Reset Repository State + +- State transition: `expired` → `frozen` +- Clears `is_mounted` flag +- Clears `expires_at` timestamp +- Persists to `deepfreeze-status` index + +#### 4. Identify Indices to Delete + +**Safety Logic**: +- Scans snapshots in expired repositories +- Checks if index exists in Elasticsearch +- Checks if index has snapshots in OTHER repositories +- **Only deletes if**: Index exists ONLY in expired repositories + +**Index Naming Patterns**: +- Original names (e.g., `.ds-df-test-2024.01.01-000001`) +- Partial prefix (e.g., `partial-.ds-df-test-2024.01.01-000001`) +- Restored prefix (e.g., `restored-.ds-df-test-2024.01.01-000001`) + +#### 5. Delete Indices + +For each identified index: +- Validates index still exists (double-check) +- Gets index health for audit trail +- Deletes index: `DELETE /{index_name}` +- Logs success or failure +- Continues with remaining indices even if one fails + +### Maintenance Phase + +#### 6. Clean Up Old Thaw Requests + +**Retention Policies** (configurable in settings): +- Completed requests: Default 7 days +- Failed requests: Default 7 days +- Refrozen requests: Default 35 days + +**Cleanup Logic**: +- Calculates age from `created_at` timestamp +- Deletes requests older than retention period +- Also deletes stale `in_progress` requests where all repos are no longer thawed + +#### 7. Clean Up Orphaned Thawed ILM Policies + +**Detection**: +- Finds all policies ending with `-thawed` +- Filters to policies matching deepfreeze prefix +- Checks if policy has any indices or datastreams assigned + +**Deletion**: +- Deletes policies with zero usage +- Keeps policies still in use (will clean up later) + +## Options + +Cleanup has no user-configurable options. Behavior is controlled by: + +1. **Repository State**: Only processes `expired` repositories +2. **Retention Settings**: Stored in `deepfreeze-status` index (from setup) +3. **Safety Checks**: Built-in (cannot delete indices with snapshots elsewhere) + +### Internal Configuration + +These settings are stored in `deepfreeze-status` index (configured during setup): + +- `thaw_request_retention_days_completed`: Default 7 +- `thaw_request_retention_days_failed`: Default 7 +- `thaw_request_retention_days_refrozen`: Default 35 + +## Usage Examples + +### Manual Cleanup + +```bash +# Run cleanup manually +curator_cli deepfreeze cleanup + +# Logs show: +# - Expired repositories detected +# - Repositories unmounted +# - Indices deleted +# - Thaw requests cleaned up +# - ILM policies removed +``` + +### Dry Run + +```bash +# Preview what cleanup would do +curator_cli deepfreeze cleanup --dry-run + +# Output shows: +# - Repositories that would be marked expired +# - Repositories that would be unmounted +# - Indices that would be deleted +# - Thaw requests that would be removed +# - No actual changes made +``` + +### Scheduled Cleanup (Cron) + +```bash +# /etc/cron.d/deepfreeze-cleanup +# Run cleanup daily at 3 AM +0 3 * * * curator_cli deepfreeze cleanup >> /var/log/deepfreeze-cleanup.log 2>&1 +``` + +### Cleanup After Manual Thaw + +```bash +# After finishing with thawed data +curator_cli deepfreeze refreeze --thaw-request-id + +# Then run cleanup to process any other expirations +curator_cli deepfreeze cleanup +``` + +### Verify Cleanup Results + +```bash +# Check for expired repos (should be none after cleanup) +curator_cli deepfreeze status --show-repos --porcelain | grep "expired" + +# Check for old thaw requests (should respect retention) +curator_cli deepfreeze thaw --list-requests --include-completed +``` + +## Detection Logic + +### Expired Repository Detection + +#### Method 1: Timestamp Comparison + +``` +For repository in state 'thawed': + if expires_at exists: + if current_time >= expires_at: + → Mark as 'expired' +``` + +**Advantages**: +- Fast (no S3 API calls) +- Reliable when timestamps are accurate + +**Limitations**: +- Requires accurate timestamps +- Doesn't catch manual S3 operations + +#### Method 2: S3 Restore Status Check + +``` +For repository that is mounted: + Check S3 restore status for all objects + Count: restored, in_progress, not_restored + + if not_restored > 0 AND restored == 0 AND in_progress == 0: + → Mark as 'expired' +``` + +**Advantages**: +- Ground truth from AWS +- Catches edge cases + +**Limitations**: +- Slower (S3 API calls) +- Requires S3 permissions + +**Why Both?** +- Timestamp check is fast primary method +- S3 check catches anomalies +- Together they provide robust detection + +### Thaw Request Cleanup Logic + +``` +For each thaw request: + age = current_time - created_at + + if status == 'completed' AND age > retention_completed: + → Delete request + + if status == 'failed' AND age > retention_failed: + → Delete request + + if status == 'refrozen' AND age > retention_refrozen: + → Delete request + + if status == 'in_progress': + Check all repos in request + if ALL repos are NOT in (thawing, thawed) state: + → Delete request (stale) +``` + +### Index Deletion Safety + +``` +For each index in expired repositories: + if index does NOT exist in Elasticsearch: + → Skip (already gone) + + other_repos = all repos EXCEPT expired repos + + has_snapshots_elsewhere = False + for repo in other_repos: + if index exists in repo snapshots: + has_snapshots_elsewhere = True + break + + if has_snapshots_elsewhere: + → Skip (has backups elsewhere) + else: + → DELETE (only exists in expired repos) +``` + +**Safety Guarantee**: Indices are only deleted if they have no snapshots in any other repository. + +## Error Handling + +### Common Issues + +#### 1. No Expired Repositories + +**Message**: `No expired repositories found to clean up` + +**Cause**: All thawed repositories still within their duration + +**Action**: This is normal - cleanup has nothing to do + +#### 2. Repository Unmount Failed (Already Unmounted) + +**Warning**: `Repository deepfreeze-000010 marked as mounted but not found in Elasticsearch` + +**Cause**: State desync - in-memory flag says mounted, but ES doesn't have it + +**Effect**: Non-critical - cleanup continues and corrects the state + +**Action**: No user action needed (automatic correction) + +#### 3. Index Deletion Failed (Already Deleted) + +**Error**: `Failed to delete index partial-my-index: index_not_found_exception` + +**Cause**: Index was already deleted (race condition or manual deletion) + +**Effect**: Non-critical - cleanup continues + +**Action**: No user action needed + +#### 4. ILM Policy Deletion Failed (Still in Use) + +**Warning**: `Failed to check/delete ILM policy my-policy-thawed: ...` + +**Cause**: Policy has indices assigned (shouldn't happen after index deletion) + +**Effect**: Non-critical - policy will be cleaned up in next run + +**Action**: +```bash +# Verify policy usage +curl -X GET 'http://localhost:9200/_ilm/policy/my-policy-thawed' + +# If truly orphaned, delete manually +curl -X DELETE 'http://localhost:9200/_ilm/policy/my-policy-thawed' +``` + +#### 5. S3 Restore Status Check Failed + +**Error**: `Failed to check S3 restore status for repository: Access Denied` + +**Cause**: AWS credentials lack `s3:GetObjectAttributes` permission + +**Effect**: Repository not marked as expired (timestamp-based detection still works) + +**Solutions**: +```json +{ + "Effect": "Allow", + "Action": [ + "s3:GetObjectAttributes", + "s3:GetObject", + "s3:ListBucket" + ], + "Resource": [ + "arn:aws:s3:::your-bucket-prefix*/*" + ] +} +``` + +## Best Practices + +### Scheduling + +1. **Daily Cleanup** (Recommended) + ```bash + # Cron: 3 AM daily + 0 3 * * * curator_cli deepfreeze cleanup + ``` + - Catches expirations promptly + - Low overhead (fast operation) + - Keeps cluster clean + +2. **After Rotation** (Automatic) + - Rotation automatically calls cleanup + - No additional configuration needed + +3. **Ad-Hoc Cleanup** + - Run after manual operations + - Testing/verification + - Immediate cleanup needed + +### Monitoring + +1. **Log Review** + ```bash + # Check cleanup logs + grep "cleanup" /var/log/curator.log | tail -20 + ``` + - Look for expired repo detection + - Verify indices deleted + - Check for errors + +2. **Status Verification** + ```bash + # After cleanup, check for expired repos (should be none) + curator_cli deepfreeze status --show-repos | grep "expired" + ``` + +3. **Metrics Tracking** + - Count expired repos per day + - Index deletion counts + - Thaw request cleanup counts + +### Retention Tuning + +Default retention periods are conservative. Adjust based on your needs: + +**Short Retention** (7 days for completed/failed, 35 for refrozen): +- Keeps index smaller +- Faster queries +- Less historical audit trail + +**Long Retention** (30+ days): +- Better audit trail +- Easier troubleshooting +- Larger index + +**Consider**: +- Compliance requirements (audit logs) +- Troubleshooting needs (request history) +- Index size vs query performance + +## Cleanup Lifecycle + +### Complete Workflow + +``` +1. Cleanup action triggered (manual, cron, or post-rotation) + ↓ +2. DETECTION PHASE + ↓ + a. Get all repositories matching prefix + ↓ + b. Filter to thawed repositories + ↓ + c. For each thawed repo: + - Compare current time to expires_at + - If expired, mark as 'expired' state + ↓ + d. Get all mounted repositories + ↓ + e. For each mounted repo: + - Query S3 restore status + - If all objects not restored, mark as 'expired' + ↓ +3. CLEANUP PHASE + ↓ + a. Get all repositories in 'expired' state + ↓ + b. For each expired repository: + ↓ + i. Verify actual mount status from Elasticsearch + ↓ + ii. If mounted: Unmount repository + ↓ + iii. Reset state: expired → frozen + ↓ + iv. Clear mount flags and timestamps + ↓ + v. Persist state to deepfreeze-status + ↓ + c. Identify indices to delete + ↓ + i. Get all indices from expired repo snapshots + ↓ + ii. Check each index exists in Elasticsearch + ↓ + iii. Check if index has snapshots in other repos + ↓ + iv. Flag for deletion if ONLY in expired repos + ↓ + d. Delete flagged indices + ↓ + i. For each index: + - Validate still exists + - Get health for audit + - Delete index + - Log result + ↓ +4. MAINTENANCE PHASE + ↓ + a. Get all thaw requests + ↓ + b. For each request: + - Calculate age + - Check status and retention policy + - Delete if beyond retention + ↓ + c. Get all thawed ILM policies + ↓ + d. For each policy: + - Check if any indices use it + - Delete if orphaned (zero usage) + ↓ +5. Report results +``` + +## State Transitions + +### Repository States + +``` +thawed (expires_at in future) + ↓ + [time passes, expires_at reached] + ↓ +thawed (expires_at in past) + ↓ + [cleanup detection phase] + ↓ +expired + ↓ + [cleanup unmount phase] + ↓ +frozen +``` + +### Thaw Request States + +``` +in_progress (recent) + ↓ + [user completes work] + ↓ +completed + ↓ + [retention period: 7 days] + ↓ +deleted by cleanup + +OR + +in_progress + ↓ + [user refreezes] + ↓ +refrozen + ↓ + [retention period: 35 days] + ↓ +deleted by cleanup + +OR + +in_progress + ↓ + [all repos cleaned up] + ↓ +stale in_progress + ↓ + [cleanup detects stale state] + ↓ +deleted by cleanup +``` + +### Index Lifecycle + +``` +Index mounted from thawed repository + ↓ + [expires_at reached] + ↓ +Repository marked expired + ↓ + [cleanup runs] + ↓ +Check: Index in other repositories? + ↓ + Yes → Index kept (safe) + ↓ + No → Index deleted (no other backups) +``` + +## Comparison: Cleanup vs Refreeze + +| Aspect | Cleanup | Refreeze | +|--------|---------|----------| +| **Trigger** | Automatic (scheduled) | Manual (user-initiated) | +| **Purpose** | Expired repository maintenance | Early unmount ("I'm done") | +| **Timing** | After `expires_at` reached | Any time while thawed | +| **Detection** | Timestamp + S3 status | User knows they're done | +| **Safety** | Checks index snapshots in other repos | Assumes user verified | +| **Scope** | All expired repositories | Specific thaw request(s) | +| **Request Status** | N/A (processes expired state) | `in_progress` → `refrozen` | + +### When Each Runs + +**Cleanup**: +- Daily cron job +- After rotation +- Manual trigger for maintenance + +**Refreeze**: +- User completes analysis early +- Testing/development workflows +- Want to free resources before expiration + +## Related Actions + +- **Thaw**: Creates thaw requests (cleanup processes their expiration) +- **Refreeze**: Manual unmount (cleanup handles automatic unmount) +- **Rotate**: Calls cleanup automatically +- **Status**: View repository states (cleanup transitions states) + +## Performance Considerations + +### Operation Speed + +Cleanup is typically fast: +- Detection phase: 1-5 seconds (timestamp checks) +- S3 checks: 1-2 seconds per mounted repo +- Repository unmount: < 1 second per repo +- Index deletion: 1-5 seconds per index +- Thaw request cleanup: < 1 second +- ILM policy cleanup: < 1 second per policy + +**Typical Total Time**: 10-60 seconds + +### Resource Impact + +- **CPU**: Low (simple comparisons and API calls) +- **Memory**: Low (processes one repository at a time) +- **Network**: Moderate (S3 API calls for status checks) +- **Cluster Load**: Low-medium (index deletions may spike briefly) + +### Optimization + +1. **Timestamp-First Strategy**: Fast path for most cases +2. **S3 Checks Optional**: Only for mounted repos (catches edge cases) +3. **Sequential Processing**: Avoids overwhelming cluster +4. **Graceful Error Handling**: Continues on failures + +### Scheduling Considerations + +- **Time**: Off-peak hours (e.g., 3 AM) +- **Frequency**: Daily is sufficient for most cases +- **Conflict Avoidance**: Don't run during heavy ingestion or rotation + +## Security Considerations + +- **IAM Permissions**: Requires S3 read permissions (status checks) +- **Elasticsearch Permissions**: Requires delete permissions (repos, indices, policies) +- **Audit Trail**: All deletions logged +- **Safety Checks**: Won't delete indices with snapshots elsewhere +- **State Validation**: Verifies mount status before unmounting + +## Retention Configuration + +### Default Retention Periods + +``` +Completed requests: 7 days +Failed requests: 7 days +Refrozen requests: 35 days +In-progress (stale): Immediate (if all repos cleaned up) +``` + +### Why Different Retention? + +- **Completed/Failed** (7 days): Short-term audit trail, not needed long-term +- **Refrozen** (35 days): Longer retention for cost tracking and analysis patterns +- **Stale In-Progress**: Immediate cleanup (indicates orphaned request) + +### Tuning Retention + +To modify retention, you would need to: +1. Update settings in `deepfreeze-status` index +2. Or modify default values in setup code +3. Future enhancement: CLI flags for retention configuration + +## Index Deletion Safety + +### Multi-Repository Safety + +Cleanup uses a conservative approach to index deletion: + +``` +Example Scenario: + +Repositories: +- deepfreeze-000005 (expired) +- deepfreeze-000006 (active) +- deepfreeze-000007 (active) + +Index: logs-2024-12-15-000001 + +Snapshots: +- deepfreeze-000005: snapshot-001 contains logs-2024-12-15-000001 +- deepfreeze-000006: snapshot-045 contains logs-2024-12-15-000001 + +Cleanup Decision: +→ DO NOT DELETE logs-2024-12-15-000001 + (has snapshot in deepfreeze-000006) + +Index: logs-2024-11-20-000012 + +Snapshots: +- deepfreeze-000005: snapshot-023 contains logs-2024-11-20-000012 +- (no other repositories have this index) + +Cleanup Decision: +→ DELETE logs-2024-11-20-000012 + (only exists in expired repository) +``` + +**Safety Guarantee**: If an index has ANY snapshot in ANY non-expired repository, it is never deleted by cleanup. + +### What This Means + +- **Over-rotation**: Indices may be snapshotted multiple times across repositories +- **Safety**: Cleanup will never delete data that has backups elsewhere +- **Storage**: May keep indices longer than strictly necessary (erring on safety) +- **Manual Override**: Can manually delete indices if you know they're truly orphaned + +## Dry Run Example + +```bash +curator_cli deepfreeze cleanup --dry-run + +# Output: + +DRY-RUN: Checking for thawed repositories that have passed expiration time + +DRY-RUN: Would mark 2 repositories as expired: + - deepfreeze-000008 (expired 2 days ago at 2025-01-13T09:00:00Z) + - deepfreeze-000009 (expired 6 hours ago at 2025-01-14T21:00:00Z) + +DRY-RUN: Found 2 expired repositories to clean up + +Would process 2 repositories: + - deepfreeze-000008 (state: thawed, mounted: True) + Would unmount and reset to frozen + Would delete 23 mounted indices + - deepfreeze-000009 (state: thawed, mounted: True) + Would unmount and reset to frozen + Would delete 31 mounted indices + +DRY-RUN: Would delete 54 indices whose snapshots are only in cleaned up repositories: + - partial-.ds-logs-2024-11-15-000001 + - partial-.ds-logs-2024-11-16-000002 + [... 52 more indices ...] + +DRY-RUN: Would delete 3 old thaw requests: + - a1b2c3d4-e5f6-7890-abcd-ef1234567890 (completed request older than 7 days) + - b2c3d4e5-f6a7-8901-bcde-f12345678901 (refrozen request older than 35 days) + - c3d4e5f6-a7b8-9012-cdef-123456789012 (in-progress request with no active repos) +``` diff --git a/curator/actions/deepfreeze/docs/refreeze.md b/curator/actions/deepfreeze/docs/refreeze.md new file mode 100644 index 00000000..acd21897 --- /dev/null +++ b/curator/actions/deepfreeze/docs/refreeze.md @@ -0,0 +1,565 @@ +# Refreeze Action + +## Purpose + +The Refreeze action is a user-initiated operation to unmount thawed repositories and return them to frozen state when you're finished accessing the data. This allows you to stop incurring AWS Standard storage costs before the automatic expiration period ends. + +**Key Concept**: When you thaw data from Glacier, AWS creates temporary restored copies that exist for a specified duration (e.g., 7 days). Refreeze unmounts the repositories and cleans up searchable snapshots, but the AWS restore duration cannot be canceled early - objects will remain in Standard storage until their expiration time. However, refreezing immediately stops Elasticsearch resource usage and prevents further queries against the data. + +Refreeze vs Cleanup: +- **Refreeze**: Manual, user-initiated, "I'm done with this thaw now" +- **Cleanup**: Automatic, scheduled, processes expired thaws based on `expires_at` timestamp + +## Prerequisites + +### System Requirements + +1. **Deepfreeze Initialized** + - Setup action must have been run successfully + - `deepfreeze-status` index must exist with valid configuration + +2. **Active Thaw Request** + - At least one thaw request in `in_progress` status + - OR specific thaw request ID you want to refreeze + +3. **Elasticsearch Permissions** + - `snapshot.delete_repository` - Unmount repositories + - `indices.delete` - Delete mounted indices + - `ilm.delete_policy` - Remove thawed ILM policies + +### Data Considerations + +- **Mounted Searchable Snapshots**: Will be deleted +- **Queries in Progress**: Will fail when indices are deleted +- **Data Loss**: No data is lost - snapshots remain in S3, just unmounted +- **Re-access**: Can thaw again later with new thaw request + +## Effects + +### What Refreeze Does + +For each repository in the thaw request: + +#### 1. Delete Mounted Indices + +Searches for and deletes all indices mounted from the repository, including all naming variations: +- Original names (e.g., `.ds-df-test-2024.01.01-000001`) +- Partial prefix (e.g., `partial-.ds-df-test-2024.01.01-000001`) +- Restored prefix (e.g., `restored-.ds-df-test-2024.01.01-000001`) + +**Important**: Queries against these indices will fail immediately after deletion. + +#### 2. Unmount Repository from Elasticsearch + +- Calls `DELETE /_snapshot/{repo_name}` +- Removes repository from Elasticsearch cluster +- Repository metadata remains in `deepfreeze-status` index + +#### 3. Delete Per-Repository Thawed ILM Policy + +- Deletes the `{repo_name}-thawed` ILM policy +- Removes policy from any indices still using it first +- Example: Deletes `deepfreeze-000010-thawed` when unmounting `deepfreeze-000010` + +#### 4. Reset Repository State + +- Updates repository document in `deepfreeze-status` index +- State transitions: `thawed` → `frozen` (or `thawing` → `frozen`) +- Clears `is_mounted` flag +- Clears `expires_at` timestamp +- Persists state change + +#### 5. Mark Thaw Request as Refrozen + +- Updates thaw request document in `deepfreeze-status` index +- Status transitions: `in_progress` → `refrozen` +- Cleanup action will remove old refrozen requests based on retention settings (default: 35 days) + +### What Refreeze Does NOT Do + +- **Does NOT cancel AWS Glacier restore**: Objects remain in Standard storage until `expires_at` time +- **Does NOT delete snapshots**: Snapshot data remains in S3 +- **Does NOT delete S3 objects**: All data is preserved +- **Does NOT affect other thaw requests**: Only processes specified request(s) +- **Does NOT revert storage class**: AWS handles automatic reversion after duration expires + +### AWS Glacier Restore Duration + +**Critical Understanding**: +``` +Thaw initiated: 2025-01-15 09:00 UTC +Duration: 7 days +Expires at: 2025-01-22 09:00 UTC + +User refreezes: 2025-01-16 10:00 UTC (1 day later) + +Result: +- ✅ Elasticsearch repositories unmounted immediately +- ✅ Searchable snapshots deleted immediately +- ✅ ILM policies removed immediately +- ❌ AWS objects remain in Standard storage until 2025-01-22 09:00 +- ❌ You pay Standard storage costs until 2025-01-22 09:00 + +Savings: +- Elasticsearch compute (no longer processing queries) +- Elasticsearch storage (indices deleted) +- BUT: Still pay AWS Standard storage for remaining duration +``` + +To minimize costs, plan your thaw duration carefully rather than relying on early refreeze. + +## Options + +### Thaw Request Selection + +#### `--thaw-request-id ` +- **Type**: String (UUID) +- **Required**: No (if omitted, prompts to refreeze all open requests) +- **Description**: Specific thaw request ID to refreeze +- **Example**: `--thaw-request-id a1b2c3d4-e5f6-7890-abcd-ef1234567890` +- **Use Case**: Refreeze specific request when you're done with that dataset + +#### Bulk Mode (No `--thaw-request-id`) +- **Behavior**: Finds all thaw requests with status `in_progress` +- **Confirmation**: Prompts user to confirm (lists all requests that will be affected) +- **Use Case**: Clean up all active thaws at once +- **Safety**: Requires interactive confirmation (skipped in `--porcelain` mode) + +### Output Format + +#### `--porcelain` +- **Type**: Boolean flag +- **Default**: `False` +- **Description**: Machine-readable tab-separated output +- **Confirmation**: Skips interactive confirmation in bulk mode +- **Output Format**: + - Unmounted: `UNMOUNTED\t{repo_name}` + - Failed: `FAILED\t{repo_name}` + - Summary: `SUMMARY\t{unmounted_count}\t{failed_count}\t{deleted_indices}\t{deleted_policies}\t{request_count}` + +## Usage Examples + +### Refreeze Specific Thaw Request + +```bash +# After finishing your analysis +curator_cli deepfreeze refreeze \ + --thaw-request-id a1b2c3d4-e5f6-7890-abcd-ef1234567890 + +# Output: +# Refreeze completed for thaw request 'a1b2c3d4-...' +# Unmounted 3 repositories +# Deleted 47 indices +# Deleted 3 ILM policies +``` + +### Refreeze All Open Requests (Interactive) + +```bash +# Refreeze all active thaws +curator_cli deepfreeze refreeze + +# Output: +# WARNING: This will refreeze 2 open thaw request(s) +# +# • a1b2c3d4-e5f6-7890-abcd-ef1234567890 +# Created: 2025-01-15T09:30:00.000Z +# Date Range: 2025-01-01T00:00:00Z to 2025-01-07T23:59:59Z +# Repositories: 3 +# +# • b2c3d4e5-f6a7-8901-bcde-f12345678901 +# Created: 2025-01-16T14:00:00.000Z +# Date Range: 2025-01-10T00:00:00Z to 2025-01-15T23:59:59Z +# Repositories: 2 +# +# Do you want to proceed with refreezing all these requests? [y/N]: y +# +# Refreeze completed for 2 thaw requests +# Unmounted 5 repositories +# Deleted 89 indices +# Deleted 5 ILM policies +``` + +### Scripted Refreeze (Non-Interactive) + +```bash +# Refreeze all without confirmation prompt +curator_cli deepfreeze refreeze --porcelain + +# Output (tab-separated): +# UNMOUNTED deepfreeze-000010 +# UNMOUNTED deepfreeze-000011 +# UNMOUNTED deepfreeze-000012 +# SUMMARY 3 0 47 3 1 +``` + +### Refreeze in Scheduled Job + +```bash +#!/bin/bash +# Cron job to auto-refreeze completed analysis jobs + +# Get completed analysis flag from your workflow +if [ -f /var/run/analysis-complete.flag ]; then + # Extract thaw request ID from flag file + THAW_ID=$(cat /var/run/analysis-complete.flag) + + # Refreeze + curator_cli deepfreeze refreeze --thaw-request-id "$THAW_ID" --porcelain + + # Clean up flag + rm /var/run/analysis-complete.flag +fi +``` + +### Dry Run (Preview Changes) + +```bash +# See what would be refrozen without making changes +curator_cli deepfreeze refreeze \ + --thaw-request-id a1b2c3d4-e5f6-7890-abcd-ef1234567890 \ + --dry-run + +# Output: +# DRY-RUN: Would refreeze thaw request 'a1b2c3d4-...' +# +# Would process 3 repositories: +# +# - deepfreeze-000010 (state: thawed, mounted: True) +# Would unmount and reset to frozen +# Would delete 15 mounted indices +# Would delete ILM policy deepfreeze-000010-thawed +# +# - deepfreeze-000011 (state: thawed, mounted: True) +# Would unmount and reset to frozen +# Would delete 18 mounted indices +# Would delete ILM policy deepfreeze-000011-thawed +# +# - deepfreeze-000012 (state: thawed, mounted: True) +# Would unmount and reset to frozen +# Would delete 14 mounted indices +# Would delete ILM policy deepfreeze-000012-thawed +# +# DRY-RUN: Would mark thaw request 'a1b2c3d4-...' as completed +``` + +## Error Handling + +### Common Errors and Solutions + +#### 1. Thaw Request Not Found + +**Error**: `Could not find thaw request 'abc123'` + +**Causes**: +- Invalid request ID +- Request was already cleaned up (retention period expired) +- Typo in request ID + +**Solutions**: +- List all thaw requests: + ```bash + curator_cli deepfreeze thaw --list-requests --include-completed + ``` +- Check status: + ```bash + curator_cli deepfreeze status --show-thawed + ``` + +#### 2. No Open Thaw Requests + +**Message**: `No open thaw requests found to refreeze` + +**Cause**: All thaw requests are in `completed` or `refrozen` status + +**Solutions**: +- Check status to see if already refrozen: + ```bash + curator_cli deepfreeze thaw --list-requests --include-completed + ``` +- No action needed if this is expected + +#### 3. Repository Unmount Failed + +**Error**: `Failed to unmount repository deepfreeze-000010: repository_missing_exception` + +**Causes**: +- Repository was already unmounted +- Repository was manually deleted + +**Effect**: Non-critical - refreeze continues and marks repository as unmounted anyway + +**Action**: No user action required + +#### 4. Index Deletion Failed + +**Error**: `Failed to delete index partial-my-index: index_not_found_exception` + +**Causes**: +- Index was already deleted +- Index name pattern didn't match + +**Effect**: Non-critical - refreeze continues + +**Action**: Verify with: +```bash +curl -X GET 'http://localhost:9200/_cat/indices/partial-*,restored-*?v' +``` + +#### 5. ILM Policy Deletion Failed + +**Error**: `Failed to delete ILM policy: policy is in use` + +**Cause**: Some indices still reference the policy (shouldn't happen - policy is removed from indices first) + +**Solutions**: +- Manually remove policy from indices: + ```bash + curl -X POST 'http://localhost:9200/my-index/_ilm/remove' + ``` +- Retry refreeze + +#### 6. Bulk Refreeze User Cancellation + +**Message**: `Operation cancelled by user` + +**Cause**: User typed 'n' or pressed Ctrl+C at confirmation prompt + +**Action**: Intentional cancellation - no cleanup needed + +## Best Practices + +### When to Refreeze + +1. **After Completing Analysis** + - Queries finished + - Reports generated + - Data exported if needed + +2. **When Thaw Was Overestimated** + - Thawed 7 days but only needed 2 + - Reduce Elasticsearch resource usage + - Free up cluster capacity + +3. **Before Maintenance Windows** + - Clean up before cluster upgrades + - Reduce indices to manage during maintenance + +4. **Cost Optimization** + - While you can't avoid AWS Standard storage costs until expiration + - You DO save on Elasticsearch compute and storage immediately + +### Before Refreezing + +1. **Verify No Active Users** + ```bash + # Check for active queries on thawed indices + curl -X GET 'http://localhost:9200/_tasks?detailed=true&actions=*search' + ``` + +2. **Export Critical Data** + - If analysis results needed later, export first + - Thawed data can be re-accessed, but requires new thaw request + +3. **Document Findings** + - Record what you learned from the data + - Save query patterns for future thaws + +4. **Check Thaw Request ID** + ```bash + # List active thaws + curator_cli deepfreeze thaw --list-requests + ``` + +### After Refreezing + +1. **Verify Repository State** + ```bash + # Check repositories are frozen + curator_cli deepfreeze status --show-repos + ``` + +2. **Verify Indices Deleted** + ```bash + # Should return empty + curl -X GET 'http://localhost:9200/_cat/indices/partial-*,restored-*?v' + ``` + +3. **Monitor Cleanup** + - Cleanup action will remove old refrozen requests based on retention (default: 35 days) + - View in status until removed + +4. **Plan Future Thaws** + - Note date ranges you actually used + - Adjust duration for next thaw based on actual usage + +## Refreeze Lifecycle + +### Complete Workflow + +``` +1. User completes analysis on thawed data + ↓ +2. User runs refreeze command + ↓ +3. For each repository in thaw request: + ↓ + a. Find all mounted indices (original, partial-, restored- prefixes) + ↓ + b. Delete all mounted indices + ↓ + c. Unmount repository from Elasticsearch + ↓ + d. Delete {repo_name}-thawed ILM policy + ↓ + e. Update repository state: thawed → frozen + ↓ + f. Persist state to deepfreeze-status index + ↓ +4. Mark thaw request status: in_progress → refrozen + ↓ +5. Report unmounted repos, deleted indices, deleted policies + ↓ +6. (Later) Cleanup action removes old refrozen requests after retention period +``` + +## State Transitions + +### Repository States + +``` +frozen → thawing → thawed → frozen + ↑ ↓ + └──────────┘ + (refreeze) +``` + +### Thaw Request States + +``` +in_progress → completed → (deleted by cleanup after retention) + ↓ +refrozen → (deleted by cleanup after retention) +``` + +### Index Lifecycle + +``` +1. Snapshot exists in frozen repository (inaccessible) + ↓ +2. Repository thawed and mounted + ↓ +3. Index mounted as searchable snapshot (queryable) + ↓ +4. Refreeze deletes index (NOT snapshot) + ↓ +5. Snapshot remains in S3 (can mount again with new thaw) +``` + +## Comparison: Refreeze vs Cleanup + +| Aspect | Refreeze | Cleanup | +|--------|----------|---------| +| **Trigger** | Manual, user-initiated | Automatic, scheduled | +| **Use Case** | "I'm done now" | "Time's up" (expiration) | +| **Timing** | Any time while thawed | After `expires_at` timestamp | +| **Cost Impact** | Saves ES resources, NOT AWS storage | Saves ES resources, AWS reversion automatic | +| **Request Status** | `in_progress` → `refrozen` | Marks `expired` → unmounts → `frozen` | +| **Selection** | Specific request ID or all | All expired repositories | +| **User Intent** | "Done early" | "Automatic maintenance" | + +### When to Use Each + +**Use Refreeze When**: +- Analysis completed before expiration +- Want to free up Elasticsearch resources immediately +- Testing/development workflows (quick iteration) +- Cost-conscious (save ES costs, even if AWS storage continues) + +**Use Cleanup When**: +- Expiration time reached +- Automated operations (cron jobs) +- Hands-off maintenance +- Multiple thaws expiring at different times + +## Related Actions + +- **Thaw**: Create thaw requests (required before refreeze) +- **Cleanup**: Automatic expiration handling +- **Status**: View thawed repositories and active requests +- **Rotate**: Repository rotation (doesn't affect thawed repos) + +## Performance Considerations + +### Operation Speed + +Refreeze is typically fast: +- Repository unmount: < 1 second per repo +- Index deletion: 1-5 seconds per index (depends on cluster size) +- ILM policy deletion: < 1 second per policy +- State updates: < 1 second + +**Typical Total Time**: 10-60 seconds for most thaw requests + +### Bulk Operations + +When refreezing multiple requests: +- Processed sequentially +- Total time: (number of repos) × (seconds per repo) +- Example: 10 repos × 5 seconds = ~50 seconds + +### Resource Impact + +- **Minimal CPU**: Simple delete operations +- **Minimal Memory**: Small state updates +- **Network**: Elasticsearch API calls only (no S3 operations) +- **Cluster Load**: Low - safe to run during normal operations + +## Security Considerations + +- **Confirmation Required**: Bulk mode prompts for confirmation (safety) +- **Permissions**: Requires delete permissions (indices, repositories, ILM) +- **Audit Trail**: All operations logged +- **No Data Loss**: Snapshots remain in S3 (only mounted copies deleted) +- **Reversible**: Can thaw again with new request + +## Cost Implications + +### Elasticsearch Savings (Immediate) + +✅ **Saves**: +- Compute: No longer processing queries +- Storage: Mounted indices deleted +- Memory: Index metadata removed + +### AWS Savings (Delayed) + +❌ **Does NOT Save**: +- Standard storage costs until `expires_at` +- Objects remain restored for full duration + +💡 **Key Insight**: Refreeze is primarily an Elasticsearch resource optimization, not an AWS cost optimization. To minimize AWS costs, choose your thaw `--duration` carefully upfront. + +### Example Cost Analysis + +``` +Scenario: +- Thawed: 1TB data, 7-day duration, Standard tier +- Used for: 2 days +- Refroze: Day 3 + +AWS Costs: +- Retrieval: $10 (paid at thaw time) +- Storage Days 1-3: $0.30 (while using) +- Storage Days 4-7: $0.40 (still paying after refreeze!) +- Total AWS: $10.70 + +Elasticsearch Savings (Days 4-7): +- Freed: 1TB searchable snapshot storage +- Freed: Compute for queries +- Freed: Memory for index metadata + +Lesson: +- Plan duration realistically (3 days would've saved $0.40) +- Refreeze still valuable for ES resource management +``` diff --git a/curator/actions/deepfreeze/docs/repair_metadata.md b/curator/actions/deepfreeze/docs/repair_metadata.md new file mode 100644 index 00000000..5a8ae79d --- /dev/null +++ b/curator/actions/deepfreeze/docs/repair_metadata.md @@ -0,0 +1,1100 @@ +# Repair Metadata Action + +## Purpose + +The Repair Metadata action is a diagnostic and maintenance tool that detects and fixes discrepancies between metadata stored in Elasticsearch and the actual S3 storage state. It ensures that both repository metadata and thaw request metadata accurately reflect the current state in S3. + +Repair Metadata handles: +1. **Repository Metadata Verification**: Scans all repositories and compares metadata with actual S3 storage class +2. **Thaw Request Metadata Verification**: Scans all in_progress thaw requests and compares with actual S3 restore status +3. **Discrepancy Detection**: Identifies metadata that doesn't match S3 reality +4. **Automatic Correction**: Updates metadata to match actual S3 state +5. **Comprehensive Reporting**: Provides detailed reports of all discrepancies and fixes + +**Key Concept**: Repository metadata can become desynchronized from S3 storage state due to bugs, failed operations, or manual S3 modifications. This action provides a way to detect and correct these inconsistencies automatically. + +## Background: Why This Action Exists + +### The Metadata Desync Bug + +Prior to version 8.0.21 (commit d015e32), the `rotate` action had a bug where it would successfully push repositories to GLACIER storage but fail to update the `thaw_state` metadata field from `'active'` to `'frozen'`. This caused a state desynchronization where: + +- **S3 Reality**: Repository objects stored in GLACIER +- **Elasticsearch Metadata**: `thaw_state='active'` (incorrect) + +**Impact of Desync**: +- Status displays show incorrect repository states +- Tests skip due to "no frozen repositories" when many actually exist +- Thaw operations may behave unexpectedly +- Cost tracking and auditing becomes inaccurate + +### The Fix + +The bug was fixed in `rotate.py` by calling `repository.reset_to_frozen()` after `push_to_glacier()`, which properly sets: +- `thaw_state = 'frozen'` +- `is_mounted = False` +- `is_thawed = False` +- Clears `thawed_at` and `expires_at` timestamps + +However, repositories that were rotated before the fix still have incorrect metadata. The Repair Metadata action provides a way to correct these historical discrepancies. + +### The Stale Thaw Request Problem + +When thaw requests are created but never checked, their status remains `in_progress` indefinitely, even after the S3 restore has completed or expired. This causes: + +**The Problem**: +- Thaw request created on 2025-11-03 with `status='in_progress'` +- S3 restore completed after a few hours → objects available +- After 7 days (default restore duration) → S3 automatically expires the restore +- Metadata still shows `status='in_progress'` (incorrect/stale) +- No way to distinguish between: + - Actively working thaw (AWS still restoring) + - Completed but unchecked thaw (ready to mount) + - Expired and ignored thaw (no longer available) + +**Impact of Stale Thaw Requests**: +- Clutter in thaw request list +- Cannot determine actual system state +- Resources may be wasted (repositories thawed but not mounted) +- Confusion about which thaws are still active +- Old in_progress requests accumulate over time + +**The Solution**: +Repair Metadata now checks S3 Restore headers to determine the actual state: +- `ongoing-request="true"` → Truly in progress, keep as `in_progress` +- `ongoing-request="false"` → Restore complete, mark as `completed` +- **No Restore header** → Restore expired, mark as `refrozen` + +This allows distinguishing between actively working thaws and stale metadata. + +## Prerequisites + +### System Requirements + +1. **Deepfreeze Initialized** + - Setup action must have been run successfully + - `deepfreeze-status` index must exist + - At least one repository must exist + +2. **Elasticsearch Permissions** + - Read access to `deepfreeze-status` index + - Write access to `deepfreeze-status` index (to update metadata) + +3. **AWS Credentials** (for S3 storage checks) + - `s3:ListBucket` - List objects in repositories + - `s3:GetObjectAttributes` - Check storage class of objects (optional but recommended) + +### When to Run + +**After Upgrading**: +- After upgrading from versions prior to 8.0.21 +- To fix repositories rotated before the bug fix + +**Periodic Verification**: +- Monthly verification as part of maintenance +- After manual S3 operations (storage class changes) +- After recovering from cluster failures + +**Troubleshooting**: +- When status displays show unexpected states +- When tests report "no frozen repositories" but you know data exists +- When investigating cost anomalies or storage issues + +**Preventive**: +- Before major operations (large thaws, migrations) +- As part of pre-upgrade validation + +## Effects + +### Scan Phase + +#### 1. Query All Repositories + +- Queries `deepfreeze-status` index for all repository documents +- Sorts by `start` date (ascending) for consistent ordering +- Retrieves: name, bucket, base_path, thaw_state, is_mounted + +**Query**: +```json +{ + "query": {"term": {"doctype": "repository"}}, + "size": 1000, + "sort": [{"start": "asc"}] +} +``` + +#### 2. Check S3 Storage Class + +For each repository: +- Lists objects in S3 bucket at repository base_path +- Examines `StorageClass` attribute for each object +- Counts objects by storage class: + - GLACIER, DEEP_ARCHIVE, GLACIER_IR → "glacier" + - STANDARD, REDUCED_REDUNDANCY, etc. → "standard" + - Default (no StorageClass) → "standard" + +**Sampling Optimization**: +- Uses `MaxKeys=100` for faster checks +- Sufficient to determine repository storage state +- Full scan not needed (repositories are homogeneous) + +#### 3. Determine Repository State + +Based on object counts: + +| Glacier Objects | Standard Objects | Total | Determination | +|----------------|------------------|-------|---------------| +| 0 | 0 | 0 | EMPTY (no objects) | +| N | 0 | N | GLACIER (all in glacier) | +| 0 | N | N | STANDARD (all in standard) | +| N | M | N+M | MIXED (some of each) | + +#### 4. Compare with Metadata + +For each repository: +``` +expected_frozen = (metadata.thaw_state == 'frozen') +actually_frozen = (s3_state == 'GLACIER') + +if expected_frozen != actually_frozen: + → DISCREPANCY FOUND +``` + +### Thaw Request Scan Phase + +After repository checking, the action scans all thaw requests: + +#### 1. Query In-Progress Thaw Requests + +- Queries `deepfreeze-status` index for thaw request documents +- Filters to only `status='in_progress'` requests +- Completed, failed, and refrozen requests are skipped (terminal states) + +**Query**: +```python +all_thaw_requests = list_thaw_requests(client) +in_progress_requests = [req for req in all_thaw_requests if req.get('status') == 'in_progress'] +``` + +#### 2. Check S3 Restore Status for Each Request + +For each in_progress thaw request: +- Get all repositories listed in the request +- For each repository, call `check_restore_status()` to check S3 Restore headers +- Aggregate results to determine overall request state + +**Restore Status Checking**: +```python +for repo in request_repos: + status = check_restore_status(s3, repo.bucket, repo.base_path) + # status contains: total, restored, in_progress, not_restored, complete +``` + +#### 3. Determine Actual State + +Based on S3 Restore headers across all repos in the request: + +| S3 Restore Status | Actual State | Meaning | +|------------------|--------------|---------| +| All objects have no Restore header | **EXPIRED** | Restore window passed, objects back in Glacier | +| All objects have `ongoing-request="false"` | **COMPLETED** | Restore done, ready to mount | +| Any object has `ongoing-request="true"` | **IN_PROGRESS** | AWS still working on restore | +| Mixed states | **MIXED** | Some repos done, some not (keep as in_progress) | +| Unable to check | **ERROR** | S3 access issues or missing repos | + +#### 4. Identify Stale Metadata + +Compare metadata state with actual state: + +```python +if actual_state == 'EXPIRED' and metadata_state == 'in_progress': + → STALE: should be 'refrozen' + +if actual_state == 'COMPLETED' and metadata_state == 'in_progress': + → STALE: should be 'completed' + +if actual_state == 'IN_PROGRESS' and metadata_state == 'in_progress': + → CORRECT: keep as 'in_progress' +``` + +### Repository Repair Phase + +For each repository discrepancy (when NOT in dry-run mode): + +#### 1. Fetch Repository Object + +```python +repos = get_repositories_by_names(client, [repo_name]) +repo = repos[0] +``` + +**Why Fresh Fetch?**: +- Ensures we have latest Elasticsearch state +- Avoids race conditions with concurrent operations +- Gets the full Repository object with all methods + +#### 2. Update State Based on S3 + +**If S3 is GLACIER**: +```python +repo.reset_to_frozen() +# Sets: +# thaw_state = 'frozen' +# is_thawed = False +# is_mounted = False +# thawed_at = None +# expires_at = None +``` + +**If S3 is STANDARD**: +```python +# Only update if currently marked as frozen +if metadata_state == 'frozen': + repo.thaw_state = 'active' + repo.is_thawed = False +``` + +**Why Different Logic?**: +- GLACIER → frozen: Clear, unambiguous state +- STANDARD → active: Could be active or thawed, we choose active (safe default) + +#### 3. Persist to Elasticsearch + +```python +repo.persist(client) +``` + +- Updates document in `deepfreeze-status` index +- Uses repository name as document ID +- Atomic update operation + +### Thaw Request Repair Phase + +For each stale thaw request (when NOT in dry-run mode): + +#### 1. Update Request Status + +**If actual state is EXPIRED**: +```python +update_thaw_request(client, request_id, status='refrozen') +``` + +- Marks request as `refrozen` since restore window has passed +- S3 objects have reverted to Glacier storage +- Cleanup action can later delete this old request + +**If actual state is COMPLETED**: +```python +update_thaw_request(client, request_id, status='completed') +``` + +- Marks request as `completed` since restore is done +- **Important**: This does NOT mount repositories +- User must run `curator_cli deepfreeze thaw --check-status ` to mount +- A warning is logged indicating mounting is still needed + +**Why Not Mount Automatically?**: +- Mounting is a complex operation involving: + - Mounting repositories in Elasticsearch + - Finding and mounting indices within date range + - Adding indices back to data streams + - Creating per-repo ILM policies +- Repair Metadata focuses on metadata correctness +- Use the dedicated thaw --check-status command for full mounting workflow + +#### 2. Track Results + +```python +# Count successes and failures +thaw_fixed_count += 1 # Successfully updated +thaw_failed_count += 1 # Failed to update (exception) +``` + +### Reporting Phase + +#### Dry-Run Mode + +**Rich Output** (default): +- Repository summary statistics (total, correct, discrepancies, errors) +- Thaw request summary statistics (total in_progress, correct, stale, errors) +- Table showing all repository discrepancies with: + - Repository name + - Current metadata state + - Actual S3 storage class + - Mount status +- Table showing all stale thaw requests with: + - Request ID (shortened) + - Repositories (first 3, with count if more) + - Current metadata state + - Actual S3 state + - What it should be + - Created date +- Warning message: "DRY-RUN: No changes made" + +**Porcelain Output** (`--porcelain`): +``` +TOTAL_REPOS=58 +CORRECT=10 +DISCREPANCIES=48 +ERRORS=0 +TOTAL_THAW_REQUESTS=7 +CORRECT_THAW_REQUESTS=0 +STALE_THAW_REQUESTS=7 +THAW_REQUEST_ERRORS=0 +THAW_REQUESTS_TO_FIX: + a1b2c3d4...: metadata=in_progress, actual=EXPIRED, should_be=refrozen + e5f6g7h8...: metadata=in_progress, actual=EXPIRED, should_be=refrozen +``` + +#### Live Mode + +**Rich Output**: +- Same as dry-run, plus: +- Repository repair results section showing: + - Number fixed + - Number failed (if any) +- Thaw request repair results section showing: + - Number fixed + - Number failed (if any) + +**Porcelain Output**: +``` +[Repository stats as above...] +[Thaw request stats as above...] +FIXED=48 +FAILED=0 +THAW_FIXED=7 +THAW_FAILED=0 +``` + + +## Options + +### `--porcelain` +- **Type**: Boolean flag +- **Default**: `False` +- **Description**: Machine-readable tab-separated output +- **Use Case**: Scripting, automation, monitoring systems +- **Effect**: Disables rich formatting, outputs key=value pairs + +### Dry-Run Mode + +While not a direct option, repair-metadata respects the global `--dry-run` flag: + +```bash +curator_cli --dry-run deepfreeze repair-metadata +``` + +In dry-run mode: +- No repository metadata is modified +- No documents are updated in Elasticsearch +- All discrepancies are detected and reported +- Safe to run at any time + +## Usage Examples + +### Basic Verification (Dry-Run) + +```bash +# See what would be fixed without making changes +curator_cli --dry-run deepfreeze repair-metadata + +# Output: +# Metadata Repair Report (DRY-RUN) +# +# Total repositories scanned: 58 +# Repositories with correct metadata: 10 +# Repositories with discrepancies: 48 +# +# Discrepancies Found: +# ┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓ +# ┃ Repository ┃ Metadata State ┃ Actual S3 Storage ┃ Mounted ┃ +# ┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩ +# │ deepfreeze-000004 │ active │ GLACIER │ No │ +# │ deepfreeze-000005 │ active │ GLACIER │ No │ +# ... +# +# DRY-RUN: No changes made. Run without --dry-run to apply fixes. +``` + +### Fix All Discrepancies + +```bash +# Actually repair the metadata +curator_cli deepfreeze repair-metadata + +# Output: +# Metadata Repair Report (LIVE) +# +# Total repositories scanned: 58 +# Repositories with correct metadata: 10 +# Repositories with discrepancies: 48 +# +# [Table showing discrepancies...] +# +# Results: +# Fixed: 48 +``` + +### Detect and Fix Stale Thaw Requests + +```bash +# Scenario: You have 7 thaw requests created on 2025-11-03 +# They were never checked and are now expired + +# Step 1: Check for stale thaw requests +curator_cli --dry-run deepfreeze repair-metadata + +# Output: +# Metadata Repair Report (DRY-RUN) +# +# REPOSITORIES: +# Total scanned: 58 +# Correct metadata: 58 +# Discrepancies: 0 +# +# THAW REQUESTS: +# Total in_progress: 7 +# Correct metadata: 0 +# Stale metadata: 7 +# +# Stale Thaw Requests Found: +# ┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┓ +# ┃ Request ID ┃ Repositories ┃ Metadata State ┃ Actual State┃ Should Be ┃ Created ┃ +# ┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━┩ +# │ a1b2c3d4...│ deepfreeze-001│ in_progress │ EXPIRED │ refrozen │ 2025-11-03│ +# │ e5f6g7h8...│ deepfreeze-002│ in_progress │ EXPIRED │ refrozen │ 2025-11-03│ +# │ i9j0k1l2...│ deepfreeze-003│ in_progress │ EXPIRED │ refrozen │ 2025-11-03│ +# ... +# +# DRY-RUN: No changes made. Run without --dry-run to apply fixes. + +# Step 2: Fix the stale requests +curator_cli deepfreeze repair-metadata + +# Output: +# [Same tables as above...] +# +# Thaw Request Repair Results: +# Fixed: 7 + +# Now all 7 requests are marked as 'refrozen' instead of 'in_progress' +``` + +### Scripted Verification + +```bash +# Machine-readable output for monitoring +curator_cli --dry-run deepfreeze repair-metadata --porcelain + +# Output: +# TOTAL_REPOS=58 +# CORRECT=10 +# DISCREPANCIES=48 +# ERRORS=0 +# REPOS_TO_FIX: +# deepfreeze-000004: metadata=active, actual=GLACIER +# deepfreeze-000005: metadata=active, actual=GLACIER +# ... + +# Parse in scripts: +if curator_cli --dry-run deepfreeze repair-metadata --porcelain | grep -q "DISCREPANCIES=0"; then + echo "✓ All metadata correct" +else + echo "⚠ Metadata discrepancies found" + curator_cli deepfreeze repair-metadata +fi +``` + +### Monthly Verification (Cron) + +```bash +# /etc/cron.d/deepfreeze-verify-metadata +# Verify metadata monthly, auto-fix if needed +0 2 1 * * curator_cli deepfreeze repair-metadata >> /var/log/deepfreeze-repair.log 2>&1 +``` + +### Pre-Upgrade Verification + +```bash +# Before upgrading curator, verify metadata is correct +echo "Checking repository metadata before upgrade..." +curator_cli --dry-run deepfreeze repair-metadata + +# Fix any issues +curator_cli deepfreeze repair-metadata + +# Verify fix +curator_cli deepfreeze status --show-repos | grep "State" +``` + +### Post-Migration Verification + +```bash +# After migrating data or recovering from backup +curator_cli --dry-run deepfreeze repair-metadata + +# If discrepancies found, investigate before fixing +# (May indicate incomplete migration) + +# Fix only if migration was successful +curator_cli deepfreeze repair-metadata +``` + +## Detection Logic + +### S3 Storage Class Detection + +```python +def _check_repo_storage_class(bucket, base_path): + glacier_count = 0 + standard_count = 0 + total_count = 0 + + # List objects (sampled for performance) + for obj in s3.list_objects(Bucket=bucket, Prefix=base_path, MaxKeys=100): + total_count += 1 + storage_class = obj.get('StorageClass', 'STANDARD') + + if storage_class in ['GLACIER', 'DEEP_ARCHIVE', 'GLACIER_IR']: + glacier_count += 1 + else: + standard_count += 1 + + # Determine state + if total_count == 0: + return 'EMPTY' + elif glacier_count == total_count: + return 'GLACIER' + elif glacier_count > 0: + return 'MIXED' + else: + return 'STANDARD' +``` + +### Discrepancy Detection + +```python +for repo in all_repos: + # Get metadata state + metadata_thaw_state = repo['thaw_state'] + + # Check S3 storage + actual_storage = check_repo_storage_class(repo['bucket'], repo['base_path']) + + # Compare + expected_frozen = (metadata_thaw_state == 'frozen') + actually_frozen = (actual_storage == 'GLACIER') + + if expected_frozen != actually_frozen: + discrepancies.append({ + 'name': repo['name'], + 'metadata_state': metadata_thaw_state, + 'actual_storage': actual_storage, + 'mounted': repo['is_mounted'] + }) +``` + +### Repair Logic + +```python +for discrepancy in discrepancies: + repo = get_repository(discrepancy['name']) + actual_storage = discrepancy['actual_storage'] + + if actual_storage == 'GLACIER': + # S3 is frozen, metadata should be too + repo.reset_to_frozen() + repo.persist(client) + + elif actual_storage == 'STANDARD': + # S3 is not frozen + if discrepancy['metadata_state'] == 'frozen': + # But metadata says frozen - fix it + repo.thaw_state = 'active' + repo.is_thawed = False + repo.persist(client) + + else: + # MIXED or EMPTY - skip (edge case) + log_warning(f"Skipping {repo.name} with {actual_storage} storage") +``` + +## Error Handling + +### Common Issues + +#### 1. S3 Access Denied + +**Error**: `Failed to check S3 storage for bucket/path: Access Denied` + +**Cause**: AWS credentials lack S3 read permissions + +**Effect**: Repository skipped, counted as error (not fixed) + +**Solutions**: +```json +{ + "Effect": "Allow", + "Action": [ + "s3:ListBucket", + "s3:GetObjectAttributes", + "s3:GetObject" + ], + "Resource": [ + "arn:aws:s3:::deepfreeze*", + "arn:aws:s3:::deepfreeze*/*" + ] +} +``` + +#### 2. Repository Not Found + +**Error**: `Repository deepfreeze-000042 not found` + +**Cause**: Repository exists in status index but has no ES document (data corruption) + +**Effect**: Counted as failed, repair continues + +**Action**: +- Investigate status index integrity +- May need to recreate repository metadata manually +- Check for partial cleanup or migration issues + +#### 3. No Bucket/Base Path Info + +**Log**: `Skipping deepfreeze-000001 - no bucket/base_path info` + +**Cause**: Very old repository created before these fields were tracked + +**Effect**: Skipped (cannot verify without S3 location) + +**Action**: +- Manually update repository document with bucket/base_path +- Or accept that old repos cannot be verified + +#### 4. Empty Repository + +**Log**: `deepfreeze-000015: metadata=frozen, s3=EMPTY` + +**Cause**: Repository has no objects in S3 (possibly deleted manually) + +**Effect**: Shown as correct (EMPTY treated as special case) + +**Action**: +- Investigate why repository is empty +- May indicate data loss or incomplete cleanup + +#### 5. Mixed Storage Class + +**Log**: `Skipping deepfreeze-000023 with MIXED storage` + +**Cause**: Some objects in GLACIER, some in STANDARD (transition in progress) + +**Effect**: Skipped (ambiguous state, cannot determine correct metadata) + +**Action**: +- Wait for S3 lifecycle transition to complete +- Or manually investigate and fix +- Re-run repair after transition completes + +## Best Practices + +### When to Run + +1. **After Upgrade** (Required) + ```bash + # Immediately after upgrading to version with metadata fix + curator_cli --dry-run deepfreeze repair-metadata + curator_cli deepfreeze repair-metadata + ``` + +2. **Monthly Verification** (Recommended) + ```bash + # Cron: First day of month at 2 AM + 0 2 1 * * curator_cli deepfreeze repair-metadata + ``` + +3. **Before Major Operations** (Best Practice) + ```bash + # Before large thaw operations + curator_cli --dry-run deepfreeze repair-metadata + + # Before migrations or cluster changes + curator_cli deepfreeze repair-metadata + ``` + +4. **After Manual S3 Operations** + ```bash + # If you manually changed storage classes + curator_cli deepfreeze repair-metadata + ``` + +### Verification Workflow + +```bash +# 1. Check current status +curator_cli deepfreeze status --show-repos + +# 2. Run dry-run to see what would change +curator_cli --dry-run deepfreeze repair-metadata + +# 3. Review discrepancies +# - Are they expected? (post-upgrade = yes) +# - Are they unexpected? (investigate before fixing) + +# 4. Fix if appropriate +curator_cli deepfreeze repair-metadata + +# 5. Verify fix +curator_cli deepfreeze status --show-repos +curator_cli --dry-run deepfreeze repair-metadata # Should show 0 discrepancies +``` + +### Monitoring + +1. **Log Analysis** + ```bash + # Check repair logs + grep "repair_metadata" /var/log/curator.log + + # Look for patterns + grep "Fixed:" /var/log/deepfreeze-repair.log + ``` + +2. **Metrics Collection** + ```bash + # Extract metrics from porcelain output + curator_cli --dry-run deepfreeze repair-metadata --porcelain | \ + grep "DISCREPANCIES=" | \ + awk -F= '{print $2}' + ``` + +3. **Alerting** + ```bash + # Alert if discrepancies found + DISCREPANCIES=$(curator_cli --dry-run deepfreeze repair-metadata --porcelain | grep "DISCREPANCIES=" | cut -d= -f2) + + if [ "$DISCREPANCIES" -gt 0 ]; then + echo "WARNING: $DISCREPANCIES repositories have metadata discrepancies" + # Send alert + fi + ``` + +### Safety Considerations + +1. **Always Dry-Run First** + - See what would change before applying + - Verify changes match expectations + - Catch unexpected discrepancies + +2. **Investigate Unexpected Discrepancies** + - Post-upgrade discrepancies are expected + - Sudden discrepancies may indicate: + - Bug recurrence + - Manual S3 operations + - Data corruption + - Cluster issues + +3. **Backup Before Large Repairs** + ```bash + # Snapshot status index before fixing many repos + curl -X PUT "localhost:9200/_snapshot/backup/deepfreeze-status-pre-repair?wait_for_completion=true" \ + -H 'Content-Type: application/json' -d' + { + "indices": "deepfreeze-status", + "include_global_state": false + }' + ``` + +4. **Verify After Repair** + ```bash + # Re-run dry-run to confirm 0 discrepancies + curator_cli --dry-run deepfreeze repair-metadata | grep "discrepancies: 0" + ``` + +## Comparison: Repair Metadata vs Status + +| Aspect | Repair Metadata | Status | +|--------|-----------------|--------| +| **Purpose** | Fix metadata discrepancies | View current state | +| **S3 Checks** | Yes (checks storage class) | No (shows metadata only) | +| **Modifies Data** | Yes (updates metadata) | No (read-only) | +| **When to Run** | After upgrades, monthly | Anytime, frequently | +| **Speed** | Slower (S3 API calls) | Fast (ES queries only) | +| **Output** | Discrepancies and fixes | Current state | + +**Use Together**: +```bash +# Before repair: See what metadata says +curator_cli deepfreeze status --show-repos + +# Run repair +curator_cli deepfreeze repair-metadata + +# After repair: Verify new metadata +curator_cli deepfreeze status --show-repos +``` + +## Performance Considerations + +### Operation Speed + +- **Query Phase**: 1-2 seconds (Elasticsearch query) +- **S3 Check Phase**: 1-2 seconds per repository (with sampling) +- **Repair Phase**: 0.5-1 second per repository (ES updates) + +**Typical Total Time**: +- 50 repositories: 60-120 seconds (1-2 minutes) +- 100 repositories: 120-240 seconds (2-4 minutes) + +### Resource Impact + +- **CPU**: Low (simple comparisons) +- **Memory**: Low (processes one repository at a time) +- **Network**: Moderate (S3 API calls, Elasticsearch queries) +- **S3 API Calls**: 1 per repository (ListObjects) +- **ES Operations**: 1 query + N updates (where N = discrepancies) + +### Optimization + +1. **Sampling Strategy** + - Only checks first 100 objects per repository + - Sufficient to determine storage state + - Avoids scanning large repositories fully + +2. **Sequential Processing** + - One repository at a time + - Avoids overwhelming S3 API + - Prevents rate limiting + +3. **Conditional Updates** + - Only updates repositories with discrepancies + - Skips repositories with correct metadata + +### Scheduling Considerations + +- **Time**: Off-peak hours (reduces S3 API cost) +- **Frequency**: Monthly is sufficient (not time-critical) +- **Conflict Avoidance**: Don't run during rotation or major thaw operations + +## Related Actions + +- **Setup**: Creates initial repository metadata (repair fixes it later) +- **Rotate**: Transitions repositories (previously had bug causing desync) +- **Status**: Shows repository states (repair ensures states are accurate) +- **Thaw**: Depends on accurate frozen/active states +- **Cleanup**: Processes expired repositories (needs accurate states) + +## State Transitions + +### Correct State Transitions + +``` +S3: STANDARD → GLACIER (via rotate) +Metadata: active → frozen (via rotate) +✓ Synchronized + +S3: GLACIER → STANDARD (via AWS restore) +Metadata: frozen → thawed (via thaw action) +✓ Synchronized +``` + +### Bug: Desynchronized State + +``` +S3: STANDARD → GLACIER (via rotate) +Metadata: active → active (BUG: forgot to update) +✗ Desynchronized + +Repair Metadata: +S3: GLACIER (detected) +Metadata: active → frozen (corrected) +✓ Re-synchronized +``` + +## Dry Run Example + +```bash +curator_cli --dry-run deepfreeze repair-metadata + +# Output: + +Metadata Repair Report (DRY-RUN) + +Total repositories scanned: 58 +Repositories with correct metadata: 10 +Repositories with discrepancies: 48 + +Discrepancies Found: +┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓ +┃ Repository ┃ Metadata State ┃ Actual S3 Storage ┃ Mounted ┃ +┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩ +│ deepfreeze-000004 │ active │ GLACIER │ No │ +│ deepfreeze-000005 │ active │ GLACIER │ No │ +│ deepfreeze-000006 │ active │ GLACIER │ No │ +│ deepfreeze-000007 │ active │ GLACIER │ No │ +│ deepfreeze-000008 │ active │ GLACIER │ No │ +│ deepfreeze-000009 │ active │ GLACIER │ No │ +│ deepfreeze-000010 │ active │ GLACIER │ No │ +│ deepfreeze-000011 │ active │ GLACIER │ No │ +│ deepfreeze-000012 │ active │ GLACIER │ No │ +│ deepfreeze-000013 │ active │ GLACIER │ No │ +│ deepfreeze-000014 │ active │ GLACIER │ No │ +│ deepfreeze-000015 │ active │ GLACIER │ No │ +│ deepfreeze-000016 │ active │ GLACIER │ No │ +│ deepfreeze-000017 │ active │ GLACIER │ No │ +│ deepfreeze-000018 │ active │ GLACIER │ No │ +│ deepfreeze-000019 │ active │ GLACIER │ No │ +│ deepfreeze-000020 │ active │ GLACIER │ No │ +│ deepfreeze-000021 │ active │ GLACIER │ No │ +│ deepfreeze-000022 │ active │ GLACIER │ No │ +│ deepfreeze-000023 │ active │ GLACIER │ No │ +│ deepfreeze-000024 │ active │ GLACIER │ No │ +│ deepfreeze-000025 │ active │ GLACIER │ No │ +│ deepfreeze-000026 │ active │ GLACIER │ No │ +│ deepfreeze-000027 │ active │ GLACIER │ No │ +│ deepfreeze-000028 │ active │ GLACIER │ No │ +│ deepfreeze-000029 │ active │ GLACIER │ No │ +│ deepfreeze-000030 │ active │ GLACIER │ No │ +│ deepfreeze-000031 │ active │ GLACIER │ No │ +│ deepfreeze-000032 │ active │ GLACIER │ No │ +│ deepfreeze-000033 │ active │ GLACIER │ No │ +│ deepfreeze-000034 │ active │ GLACIER │ No │ +│ deepfreeze-000035 │ active │ GLACIER │ No │ +│ deepfreeze-000036 │ active │ GLACIER │ No │ +│ deepfreeze-000037 │ active │ GLACIER │ No │ +│ deepfreeze-000038 │ active │ GLACIER │ No │ +│ deepfreeze-000039 │ active │ GLACIER │ No │ +│ deepfreeze-000040 │ active │ GLACIER │ No │ +│ deepfreeze-000041 │ active │ GLACIER │ No │ +│ deepfreeze-000042 │ active │ GLACIER │ No │ +│ deepfreeze-000043 │ active │ GLACIER │ No │ +│ deepfreeze-000044 │ active │ GLACIER │ No │ +│ deepfreeze-000045 │ active │ GLACIER │ No │ +│ deepfreeze-000046 │ active │ GLACIER │ No │ +│ deepfreeze-000047 │ active │ GLACIER │ No │ +│ deepfreeze-000048 │ active │ GLACIER │ No │ +│ deepfreeze-000049 │ active │ GLACIER │ No │ +│ deepfreeze-000050 │ active │ GLACIER │ No │ +│ deepfreeze-000051 │ active │ GLACIER │ No │ +└───────────────────┴────────────────┴───────────────────┴─────────┘ + +DRY-RUN: No changes made. Run without --dry-run to apply fixes. +``` + +## Live Run Example + +```bash +curator_cli deepfreeze repair-metadata + +# Output: + +Metadata Repair Report (LIVE) + +Total repositories scanned: 58 +Repositories with correct metadata: 10 +Repositories with discrepancies: 48 + +Discrepancies Found: +[Table showing all 48 discrepancies...] + +Results: + Fixed: 48 + +# All 48 repositories now have correct metadata +``` + +## Troubleshooting + +### "No repositories found in status index" + +**Cause**: Deepfreeze not initialized, or status index deleted + +**Solution**: +```bash +# Verify status index exists +curl -X GET "localhost:9200/deepfreeze-status" + +# If missing, run setup +curator_cli deepfreeze setup +``` + +### Many discrepancies after upgrade + +**Expected**: This is normal if upgrading from version with bug + +**Action**: Run repair to fix them all + +```bash +curator_cli deepfreeze repair-metadata +``` + +### Discrepancies appear after repair + +**Unexpected**: Indicates ongoing issue + +**Action**: +1. Check for concurrent rotate operations +2. Verify bug fix is applied +3. Check for manual S3 operations +4. Review recent changes to rotate code + +### S3 permission errors + +**Error**: Access Denied when checking storage class + +**Solution**: Add required IAM permissions + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:ListBucket", + "s3:GetObjectAttributes", + "s3:GetObject" + ], + "Resource": [ + "arn:aws:s3:::deepfreeze*", + "arn:aws:s3:::deepfreeze*/*" + ] + } + ] +} +``` + +## Security Considerations + +- **IAM Permissions**: Requires S3 read permissions +- **Elasticsearch Permissions**: Requires write access to status index +- **Audit Trail**: All repairs logged +- **No Data Modification**: Only updates metadata, doesn't touch S3 objects +- **Safe Operation**: Can be run repeatedly without harm +- **Dry-Run Available**: Preview changes before applying + +## Future Enhancements + +Potential improvements for future versions: + +1. **Automatic Repair** + - Option to auto-repair in status/cleanup actions + - Periodic background verification + +2. **Extended Validation** + - Verify mounted status matches ES repository registry + - Check snapshot metadata consistency + +3. **Batch Operations** + - Concurrent S3 checks (with rate limiting) + - Faster processing of large deployments + +4. **Reporting** + - Export discrepancies to JSON/CSV + - Historical tracking of repairs + +5. **Integration** + - Prometheus metrics + - Alerting on persistent discrepancies diff --git a/curator/actions/deepfreeze/docs/rotate.md b/curator/actions/deepfreeze/docs/rotate.md new file mode 100644 index 00000000..9727ea2c --- /dev/null +++ b/curator/actions/deepfreeze/docs/rotate.md @@ -0,0 +1,882 @@ +# Rotate Action + +## Purpose + +The Rotate action creates a new repository and retires old ones, implementing the core lifecycle management strategy of deepfreeze. Rotation prevents any single repository from growing indefinitely, enables better cost management, and allows old data to be archived to colder storage tiers. + +Rotation is typically run on a schedule (weekly, monthly, or when size thresholds are met) and orchestrates several critical operations: +1. Creates a new S3 bucket or base path +2. Registers a new Elasticsearch snapshot repository +3. Creates versioned ILM policies pointing to the new repository +4. Updates index templates to use the new policies +5. Unmounts and archives old repositories beyond the retention limit +6. Cleans up expired thawed repositories + +## Prerequisites + +### System Requirements + +1. **Deepfreeze Initialized** + - Setup action must have been run successfully + - At least one repository must exist + - `deepfreeze-status` index must exist with valid configuration + +2. **ILM Policies Exist** + - At least one ILM policy must reference the current repository + - Policies must use `searchable_snapshot` action + - **Critical**: Rotation REQUIRES existing ILM policies to create versioned copies + - If no policies exist, rotation fails immediately + +3. **AWS Credentials** + - Valid AWS credentials with S3 permissions + - Same permissions as Setup action + +4. **IAM Permissions** + - All Setup permissions (bucket/repository creation) + - Plus: `s3:PutLifecycleConfiguration` for bucket lifecycle policies + +5. **Elasticsearch Permissions** + - `snapshot.create_repository` - Create new repository + - `snapshot.delete_repository` - Unmount old repositories + - `ilm.put_policy` - Create versioned ILM policies + - `ilm.delete_policy` - Clean up old policies + - `template.update` - Update index templates + +### Planning Considerations + +1. **Rotation Frequency** + - Monthly: Common for most use cases + - Weekly: High-volume clusters (>100GB/day) + - Size-based: When repository exceeds threshold (e.g., 5TB) + +2. **Retention Policy** (`--keep` parameter) + - How many repositories to keep mounted + - Older repositories beyond this limit are unmounted and frozen + - Default: 6 repositories + +3. **Naming Strategy** (from Setup) + - `style=oneup`: Sequential (000001, 000002, ...) + - `style=date`: Monthly (2025.01, 2025.02, ...) + +4. **Repository Organization** (from Setup) + - `rotate_by=bucket`: New bucket per rotation + - `rotate_by=path`: Same bucket, different paths + +## Effects + +### Immediate Effects + +#### 1. Create New Repository + +**Bucket Creation** (if `rotate_by=bucket`): +- New S3 bucket: `{bucket_name_prefix}-{suffix}` +- Example: `deepfreeze-000007` + +**Repository Registration**: +- New repository: `{repo_name_prefix}-{suffix}` +- Example: `deepfreeze-000007` +- Registered in Elasticsearch +- Added to `deepfreeze-status` index + +#### 2. Create Versioned ILM Policies + +For each ILM policy that references the old repository: + +**Policy Analysis**: +- Finds all policies with `searchable_snapshot` actions +- Filters to policies referencing the current repository +- Strips old suffix (if exists) to get base policy name + +**Versioned Policy Creation**: +- Creates new policy: `{base_policy_name}-{suffix}` +- Example: `my-policy` → `my-policy-000007` +- Identical phases/actions except `snapshot_repository` updated +- New repository: `deepfreeze-000007` + +**Validation Check**: +- Warns if `delete_searchable_snapshot=true` +- This setting can delete snapshots when indices transition to delete phase + +#### 3. Update Index Templates + +**Composable Templates**: +- Scans all composable index templates +- Updates `index.lifecycle.name` setting in template +- Maps old policy name → new versioned policy name +- Example: `my-policy` → `my-policy-000007` + +**Legacy Templates**: +- Same process for legacy index templates +- Ensures backward compatibility + +**Effect**: +- **New indices** will use new policies (and thus new repository) +- **Existing indices** keep old policies (continue using old repository) + +#### 4. Update Repository Date Ranges + +For all repositories (mounted and unmounted): +- Scans snapshots in each repository +- Extracts index names and patterns +- Infers start and end dates from index names +- Updates `start` and `end` timestamps in `deepfreeze-status` index + +**Purpose**: Enables thaw action to find repositories by date range + +#### 5. Unmount Old Repositories + +**Selection**: +- Sorts repositories by suffix (descending) +- Keeps first `keep` repositories mounted +- Unmounts remaining repositories + +**For Each Unmounted Repository**: +- Skips if repository is `thawed` or `thawing` (safety check) +- Unregisters from Elasticsearch: `DELETE /_snapshot/{repo_name}` +- Pushes objects to Glacier (if not already in Glacier storage class) +- Updates repository state to `frozen` in `deepfreeze-status` index +- Cleans up associated ILM policies (see Policy Cleanup below) + +#### 6. Clean Up ILM Policies for Unmounted Repositories + +For each unmounted repository: +- Extracts suffix from repository name +- Finds all ILM policies with matching suffix +- For each policy: + - Checks if safe to delete (not used by indices/datastreams/templates) + - Deletes if safe + - Skips if still in use + +#### 7. Run Cleanup Action + +After rotation completes, automatically runs Cleanup action to: +- Detect expired thawed repositories +- Unmount expired repositories +- Delete indices from expired repositories +- Clean up old thaw requests +- Clean up orphaned thawed ILM policies + +### Ongoing Effects + +**New Snapshots**: +- All new ILM-managed snapshots go to the new repository +- Old repositories receive no new snapshots (frozen in time) + +**Index Lifecycle**: +- New indices follow new policies +- Existing indices follow old policies (eventual transition to old repos) + +**Repository Growth**: +- New repository starts empty, grows over time +- Old repositories remain static in size + +## Options + +### Required Options + +None - all options have defaults, but some scenarios require parameters. + +### Retention Configuration + +#### `--keep ` +- **Type**: Integer +- **Default**: `6` +- **Description**: Number of repositories to keep mounted (active) +- **Range**: Typically `3` to `12` +- **Calculation**: Repositories beyond this count are unmounted and frozen +- **Example**: `--keep 4` keeps the 4 most recent repositories, unmounts others + +**Planning Guide**: +``` +Monthly rotation, keep=6: +- Keeps last 6 months mounted +- Older than 6 months: frozen (Glacier) + +Weekly rotation, keep=12: +- Keeps last 12 weeks mounted (3 months) +- Older than 3 months: frozen + +Daily rotation, keep=30: +- Keeps last 30 days mounted +- Older than 30 days: frozen +``` + +### Date-Based Rotation (Optional) + +#### `--year ` and `--month ` +- **Type**: Integer +- **Required**: Only when `style=date` (configured in Setup) +- **Default**: None (ignored when `style=oneup`) +- **Description**: Override year and month for suffix +- **Example**: `--year 2025 --month 2` creates repository with suffix `2025.02` + +**Use Cases**: +- Manual rotation for specific month +- Catch-up rotation after downtime +- Testing rotation for future months + +**Normal Usage** (oneup style): +```bash +# No year/month needed - suffix auto-increments +curator_cli deepfreeze rotate --keep 6 +``` + +**Date-based Usage**: +```bash +# Explicit month specification +curator_cli deepfreeze rotate --year 2025 --month 2 --keep 6 +``` + +## Usage Examples + +### Basic Monthly Rotation + +```bash +# Rotate with default retention (keep 6) +curator_cli deepfreeze rotate + +# Creates: +# - New repository: deepfreeze-000007 +# - Versioned policies: my-policy-000007, etc. +# - Updates templates +# - Unmounts repositories older than position 6 +``` + +### Custom Retention + +```bash +# Keep only last 3 months mounted +curator_cli deepfreeze rotate --keep 3 + +# More aggressive rotation: +# - Unmounts more repositories +# - Frees Elasticsearch resources +# - More data in cold storage +``` + +### High Retention + +```bash +# Keep 12 repositories mounted (1 year for monthly rotation) +curator_cli deepfreeze rotate --keep 12 + +# Conservative approach: +# - More data readily accessible +# - Higher Elasticsearch resource usage +# - Less data in cold storage +``` + +### Date-Based Rotation + +```bash +# Rotate for specific month (requires style=date in Setup) +curator_cli deepfreeze rotate --year 2025 --month 3 --keep 6 + +# Creates: +# - Repository: deepfreeze-2025.03 +# - Policies: my-policy-2025.03 +``` + +### Dry Run + +```bash +# Preview rotation without making changes +curator_cli deepfreeze rotate --dry-run + +# Output shows: +# - New repository that would be created +# - ILM policies that would be versioned +# - Templates that would be updated +# - Repositories that would be unmounted +# - Policies that would be cleaned up +``` + +### Scheduled Rotation (Cron) + +```bash +# /etc/cron.d/deepfreeze-rotate +# Run rotation on first day of each month at 2 AM +0 2 1 * * curator_cli deepfreeze rotate --keep 6 >> /var/log/deepfreeze-rotate.log 2>&1 +``` + +### Size-Based Rotation (Scripted) + +```bash +#!/bin/bash +# Rotate when current repository exceeds 5TB + +REPO_NAME="deepfreeze-000006" # Current repo +SIZE=$(aws s3 ls --summarize --recursive s3://deepfreeze/snapshots-000006/ | grep "Total Size" | awk '{print $3}') +SIZE_TB=$((SIZE / 1024 / 1024 / 1024 / 1024)) + +if [ "$SIZE_TB" -gt 5 ]; then + echo "Repository size ${SIZE_TB}TB exceeds threshold, rotating..." + curator_cli deepfreeze rotate --keep 6 +else + echo "Repository size ${SIZE_TB}TB within limits, no rotation needed" +fi +``` + +## Error Handling + +### Common Errors and Solutions + +#### 1. No ILM Policies Found + +**Error**: `No ILM policies found that reference repository deepfreeze-000006. Rotation requires existing ILM policies to create versioned copies.` + +**Cause**: No ILM policies use the current repository + +**Solutions**: + +**Option 1**: Create ILM policy manually +```bash +curl -X PUT "http://localhost:9200/_ilm/policy/my-policy" -H 'Content-Type: application/json' -d' +{ + "policy": { + "phases": { + "hot": { + "actions": { + "rollover": { + "max_size": "50GB", + "max_age": "7d" + } + } + }, + "frozen": { + "min_age": "30d", + "actions": { + "searchable_snapshot": { + "snapshot_repository": "deepfreeze-000006" + } + } + }, + "delete": { + "min_age": "365d", + "actions": { + "delete": { + "delete_searchable_snapshot": false + } + } + } + } + } +}' +``` + +**Option 2**: Re-run setup with sample policy +```bash +# This creates a sample policy +curator_cli deepfreeze setup --create-sample-ilm-policy +``` + +**Option 3**: Update existing policy to reference deepfreeze repo +```bash +# If you have policies that use different repositories, +# update them to use deepfreeze repository before rotating +``` + +#### 2. Repository Already Exists + +**Error**: `Repository deepfreeze-000007 already exists` + +**Causes**: +- Previous rotation failed partway through +- Manual repository creation conflict +- Clock/suffix issue with date-based rotation + +**Solutions**: +```bash +# Check existing repositories +curator_cli deepfreeze status --show-repos + +# If repository is stale/incomplete, delete it +curl -X DELETE 'http://localhost:9200/_snapshot/deepfreeze-000007' + +# Retry rotation +curator_cli deepfreeze rotate --keep 6 +``` + +#### 3. S3 Bucket Already Exists (bucket rotation) + +**Error**: `Failed to create bucket: BucketAlreadyExists` + +**Causes**: +- Previous rotation failed +- Bucket name conflict (global namespace) + +**Solutions**: +```bash +# Check if bucket exists +aws s3 ls s3://deepfreeze-000007 + +# If empty or stale, delete it +aws s3 rb s3://deepfreeze-000007 --force + +# Retry rotation +curator_cli deepfreeze rotate --keep 6 +``` + +#### 4. Template Update Failed + +**Error**: `Could not update template my-template: ...` + +**Cause**: Template doesn't exist or has syntax issues + +**Effect**: Non-critical - rotation continues, but template won't use new policy + +**Solutions**: +```bash +# Manually update template after rotation +curl -X PUT "http://localhost:9200/_index_template/my-template" -H 'Content-Type: application/json' -d' +{ + "index_patterns": ["my-index-*"], + "template": { + "settings": { + "index.lifecycle.name": "my-policy-000007" + } + } +}' +``` + +#### 5. Unable to Unmount Repository (Thawed) + +**Warning**: `Skipping thawed repo deepfreeze-000004` + +**Cause**: Repository is currently thawed (actively being accessed) + +**Effect**: Repository NOT unmounted (safety feature) + +**Action**: This is intentional - thawed repositories are protected +- Wait for thaw to complete/expire +- Or manually refreeze: + ```bash + curator_cli deepfreeze refreeze --thaw-request-id + ``` +- Then re-run rotation + +#### 6. Policy Deletion Skipped (Still in Use) + +**Warning**: `Skipping policy my-policy-000003 (still in use by indices/datastreams/templates)` + +**Cause**: Old versioned policy still has indices assigned + +**Effect**: Non-critical - policy remains until indices are deleted + +**Action**: Normal behavior - old policies cleaned up when indices eventually age out + +## Best Practices + +### Rotation Frequency + +#### Monthly Rotation (Most Common) +- **Use Case**: Standard log retention (30-90 days hot, older in cold) +- **Keep Setting**: `--keep 6` (6 months mounted) +- **Schedule**: `0 2 1 * * *` (2 AM on 1st of month) + +#### Weekly Rotation +- **Use Case**: High-volume logging (100GB+/day) +- **Keep Setting**: `--keep 12` (12 weeks ≈ 3 months) +- **Schedule**: `0 2 * * 0` (2 AM on Sundays) + +#### Size-Based Rotation +- **Use Case**: Variable ingestion rates +- **Threshold**: Typically 5TB or 10TB per repository +- **Schedule**: Daily check + conditional rotation + +### Retention Planning (`--keep`) + +**Formula**: `keep = (days of hot data) / (rotation frequency in days)` + +**Examples**: +``` +Monthly rotation, want 6 months hot: +keep = 180 / 30 = 6 + +Weekly rotation, want 3 months hot: +keep = 90 / 7 ≈ 12-13 + +Daily rotation, want 30 days hot: +keep = 30 / 1 = 30 +``` + +**Considerations**: +- **More Mounted Repos** (higher `keep`): + - ✅ Faster queries (no thaw needed) + - ✅ Better for frequent access patterns + - ❌ Higher Elasticsearch resource usage + - ❌ More storage costs + +- **Fewer Mounted Repos** (lower `keep`): + - ✅ Lower Elasticsearch resource usage + - ✅ Lower storage costs + - ❌ Requires thaw for older data + - ❌ Slower access to historical data + +### Before Rotation + +1. **Verify ILM Policies Exist** + ```bash + curator_cli deepfreeze status --show-ilm + ``` + +2. **Check Current Repository Usage** + ```bash + # Size of current repository + aws s3 ls --summarize --recursive s3://my-bucket/snapshots-000006/ + + # Snapshot count + curl -X GET 'http://localhost:9200/_snapshot/deepfreeze-000006/_all' | jq '.snapshots | length' + ``` + +3. **Review Retention Strategy** + - How many repos currently mounted? + - Is `--keep` setting appropriate? + - Any thawed repos that should be refrozen first? + +4. **Check for Thawed Repositories** + ```bash + curator_cli deepfreeze status --show-thawed + ``` + - Thawed repos are NOT unmounted during rotation (safety) + - Consider refreezing before rotating + +5. **Dry Run** + ```bash + curator_cli deepfreeze rotate --dry-run --keep 6 + ``` + +### During Rotation + +1. **Monitor Progress** + - Rotation logs extensively + - Watch for errors in ILM policy creation + - Verify template updates succeed + +2. **Expect Brief Impact** + - ILM policy creation: < 1 second per policy + - Template updates: < 1 second per template + - Repository unmounting: < 1 second per repo + - Total typical time: 10-30 seconds + +### After Rotation + +1. **Verify New Repository** + ```bash + curator_cli deepfreeze status --show-repos + # Should show new repository as active (marked with *) + ``` + +2. **Check Versioned Policies Created** + ```bash + curator_cli deepfreeze status --show-ilm + # Should show policies with new suffix + ``` + +3. **Verify Templates Updated** + ```bash + # List index templates + curl -X GET 'http://localhost:9200/_index_template' + + # Check specific template + curl -X GET 'http://localhost:9200/_index_template/my-template' | jq '.index_templates[0].index_template.template.settings."index.lifecycle.name"' + # Should show: "my-policy-000007" (new suffix) + ``` + +4. **Monitor New Snapshots** + ```bash + # Wait for next ILM snapshot action + # Verify it goes to new repository + curl -X GET 'http://localhost:9200/_snapshot/deepfreeze-000007/_all' + ``` + +5. **Verify Old Repos Unmounted** + ```bash + curl -X GET 'http://localhost:9200/_snapshot/_all' | jq 'keys' + # Should only show last 'keep' repositories + ``` + +6. **Check Cleanup Occurred** + - Cleanup action runs automatically after rotation + - Check logs for expired thaw cleanup + - Verify orphaned policies removed + +## Rotation Lifecycle + +### Complete Workflow + +``` +1. User runs rotate command + ↓ +2. Validate ILM policies exist (fail-fast if none) + ↓ +3. Generate new suffix (oneup: increment, date: from --year/--month) + ↓ +4. Create new S3 bucket (if rotate_by=bucket) + ↓ +5. Register new Elasticsearch repository + ↓ +6. Find all ILM policies referencing current repository + ↓ +7. For each policy: + a. Strip old suffix (if exists) + b. Create versioned policy with new suffix + c. Update snapshot_repository to new repo + ↓ +8. Update all index templates: + a. Scan composable templates + b. Update ILM policy references + c. Scan legacy templates + d. Update ILM policy references + ↓ +9. Update date ranges for all repositories + ↓ +10. Determine repositories to unmount (beyond --keep) + ↓ +11. For each old repository to unmount: + a. Skip if thawed (safety) + b. Unmount from Elasticsearch + c. Push objects to Glacier (if not already) + d. Update state to frozen + e. Clean up associated ILM policies + ↓ +12. Save updated settings (last_suffix) + ↓ +13. Run Cleanup action + ↓ +14. Report success +``` + +## State Transitions + +### Repository States During Rotation + +``` +Active Repository (deepfreeze-000006): +- Remains mounted +- No longer receives new snapshots (after rotation) +- Eventually unmounted in future rotation (when beyond --keep) + +New Repository (deepfreeze-000007): +- Created during rotation +- Becomes active (receives new snapshots) +- Marked with * in status output + +Old Repositories (deepfreeze-000001 to 000005): +- Status depends on --keep value +- If within keep limit: remain mounted +- If beyond keep limit: unmounted → frozen +``` + +### ILM Policy Lifecycle + +``` +Original Policy: my-policy + ↓ +Rotation 1: my-policy-000001 created + ↓ +Rotation 2: my-policy-000002 created + ↓ +... (keep using old policies) + ↓ +When indices using my-policy-000001 deleted: + ↓ +Cleanup removes my-policy-000001 (safe to delete) +``` + +## Versioned ILM Policies + +### Why Versioning? + +**Problem**: Modifying existing policies affects all indices using them +- Existing indices would suddenly switch repositories +- Could break ongoing snapshots +- Creates race conditions + +**Solution**: Create NEW versioned policies for each rotation +- Existing indices keep old policies → old repositories +- New indices get new policies → new repository +- Clean separation, no conflicts + +### Policy Versioning Example + +**Before Rotation** (Repository: deepfreeze-000006): +```json +{ + "my-logs-policy": { + "policy": { + "phases": { + "frozen": { + "actions": { + "searchable_snapshot": { + "snapshot_repository": "deepfreeze-000006" + } + } + } + } + } + } +} +``` + +**After Rotation** (Repository: deepfreeze-000007): +```json +{ + "my-logs-policy-000007": { + "policy": { + "phases": { + "frozen": { + "actions": { + "searchable_snapshot": { + "snapshot_repository": "deepfreeze-000007" + } + } + } + } + } + } +} +``` + +**Index Template Updated**: +```json +{ + "my-template": { + "index_patterns": ["logs-*"], + "template": { + "settings": { + "index.lifecycle.name": "my-logs-policy-000007" + } + } + } +} +``` + +**Result**: +- New indices (logs-2025.02.01-000001) → `my-logs-policy-000007` → `deepfreeze-000007` +- Old indices (logs-2025.01.15-000023) → `my-logs-policy-000006` → `deepfreeze-000006` + +## Policy Cleanup + +### When Policies Are Deleted + +During rotation, when unmounting old repositories, associated policies are cleaned up: + +1. **Extract Suffix**: From repository name (e.g., `deepfreeze-000003` → `000003`) +2. **Find Matching Policies**: All policies ending with `-000003` +3. **Safety Check**: For each policy: + - Check if used by indices + - Check if used by datastreams + - Check if referenced in templates +4. **Delete if Safe**: Only delete when no usage found +5. **Skip if In Use**: Keep policy until indices age out + +**Example**: +``` +Repository unmounted: deepfreeze-000003 + +Policies found: my-logs-policy-000003, security-policy-000003 + +Check my-logs-policy-000003: +- 0 indices using it (all aged out) +- 0 datastreams +- 0 templates +→ DELETE my-logs-policy-000003 ✅ + +Check security-policy-000003: +- 12 indices still using it +→ SKIP security-policy-000003 ⏭️ + (will be cleaned up when indices deleted) +``` + +## Related Actions + +- **Setup**: Initialize deepfreeze (required first) +- **Status**: View repositories, ILM policies, configuration +- **Cleanup**: Automatically run after rotation +- **Thaw**: Access data from frozen repositories +- **Refreeze**: Unmount thawed repositories + +## Performance Considerations + +### Rotation Speed + +Typical rotation times: +- New repository creation: < 5 seconds +- ILM policy versioning: < 1 second per policy +- Template updates: < 1 second per template +- Repository unmounting: < 1 second per repo +- Cleanup action: 5-30 seconds + +**Total Time**: Usually 30-60 seconds for typical deployments + +### Resource Impact + +- **Minimal CPU**: Simple API operations +- **Minimal Memory**: Small state updates +- **Network**: Elasticsearch API calls, S3 bucket creation +- **Cluster Load**: Very low - safe to run during business hours + +### Scheduling Recommendations + +- **Time**: Off-peak hours (e.g., 2 AM) +- **Frequency**: Aligned with data lifecycle (usually monthly) +- **Avoid**: During heavy ingestion periods or maintenance windows + +## Security Considerations + +- **IAM Permissions**: Use least privilege (only required S3/ES permissions) +- **Audit Trail**: All operations logged +- **Policy Safety**: Checks prevent deletion of in-use policies +- **Thaw Protection**: Thawed repositories not unmounted automatically +- **Reversible**: Can re-mount repositories if needed (via thaw) + +## Cost Implications + +### S3 Costs + +**New Repository**: +- Standard storage (while mounted): ~$0.023/GB/month +- Minimal cost initially (empty) + +**Old Repositories**: +- Unmounted and objects pushed to Glacier +- Glacier storage: ~$0.004/GB/month (80% cheaper) +- Transition from Standard → Glacier saves costs + +### Elasticsearch Costs + +**Mounted Repositories**: +- Count controlled by `--keep` parameter +- More mounted = more metadata overhead +- Less mounted = lower resource usage + +**Optimization**: +- Lower `--keep` = lower Elasticsearch costs +- Higher `--keep` = faster access (no thaw needed) + +### Example Cost Analysis + +``` +Monthly rotation, keep=6, 1TB/month: + +Month 1: 1TB in Standard ($23) +Month 2: 2TB in Standard ($46) +... +Month 6: 6TB in Standard ($138) +Month 7: Rotation! + - 6TB in Standard ($138) - keep=6 + - 1TB → Glacier ($4) +Month 8: + - 6TB in Standard ($138) + - 2TB in Glacier ($8) +... +Steady state (after initial fill): + - 6TB in Standard ($138) + - Remaining in Glacier ($4/TB/month) + +Annual savings vs all-Standard: + - All in Standard: 12 months × average 6TB = $828/year + - With rotation (keep=6): 6TB Standard + 6TB Glacier = $162 + $288 = $450/year + - Savings: $378/year (45% reduction) +``` diff --git a/curator/actions/deepfreeze/docs/setup.md b/curator/actions/deepfreeze/docs/setup.md new file mode 100644 index 00000000..f11fe560 --- /dev/null +++ b/curator/actions/deepfreeze/docs/setup.md @@ -0,0 +1,496 @@ +# Setup Action + +## Purpose + +The Setup action initializes the deepfreeze environment by creating the first repository and S3 bucket for long-term cold storage of Elasticsearch snapshots. This is a one-time initialization step that must be performed before any other deepfreeze operations. + +Setup creates: +- An S3 bucket for storing snapshots +- An Elasticsearch snapshot repository pointing to that bucket +- A status index (`deepfreeze-status`) to track repository and thaw request metadata +- (Optional) A sample ILM policy demonstrating integration with searchable snapshots + +## Prerequisites + +### Required Before Running Setup + +1. **Elasticsearch Cluster** + - Running Elasticsearch 7.x or 8.x + - Cluster must be healthy and accessible + - For ES 7.x: `repository-s3` plugin must be installed on all nodes + - For ES 8.x+: S3 repository support is built-in + +2. **AWS Credentials** + - Valid AWS credentials with S3 permissions + - Credentials configured in one of: + - Environment variables (`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`) + - Elasticsearch keystore (for cluster-wide credentials) + - AWS credentials file (`~/.aws/credentials`) + +3. **IAM Permissions** + - `s3:CreateBucket` - Create new S3 buckets + - `s3:PutObject` - Write snapshot data + - `s3:GetObject` - Read snapshot data + - `s3:DeleteObject` - Clean up old snapshots + - `s3:ListBucket` - List bucket contents + - `s3:PutBucketAcl` - Set bucket ACL (if using canned ACLs) + +4. **Clean Environment** + - No existing repositories with the configured prefix + - No existing `deepfreeze-status` index + - No existing S3 buckets with the configured name + +### Precondition Checks + +Setup performs comprehensive validation before making any changes: + +- **Status Index**: Verifies `deepfreeze-status` index does not already exist +- **Repository Prefix**: Checks no repositories match the configured prefix +- **S3 Bucket**: Confirms bucket name is available (not already in use) +- **S3 Plugin** (ES 7.x only): Validates `repository-s3` plugin is installed +- **Cluster Health**: Ensures cluster is accessible and responsive + +If any precondition fails, Setup displays detailed error messages with solutions and exits without making changes. + +## Effects + +### What Setup Creates + +1. **S3 Bucket** + - Name: `{bucket_name_prefix}-{suffix}` (if `rotate_by=bucket`) + - OR: `{bucket_name_prefix}` (if `rotate_by=path`) + - Region: Determined by AWS credentials/configuration + - Storage class: As configured (default: `intelligent_tiering`) + - ACL: As configured (default: `private`) + +2. **Elasticsearch Snapshot Repository** + - Name: `{repo_name_prefix}-{suffix}` + - Type: `s3` + - Settings: + - `bucket`: The created S3 bucket name + - `base_path`: `{base_path_prefix}-{suffix}` (or just `{base_path_prefix}` if `rotate_by=bucket`) + - `canned_acl`: As configured + - `storage_class`: As configured + +3. **Status Index** (`deepfreeze-status`) + - Stores configuration settings + - Tracks repository metadata (mount status, thaw state, date ranges) + - Records thaw request history + - Schema is created automatically + +4. **Configuration Document** + - Saved in `deepfreeze-status` index + - Contains all deepfreeze settings: + - Repository and bucket naming patterns + - Rotation strategy (`bucket` or `path`) + - Naming style (`oneup` or date-based) + - Storage class and ACL settings + - Last-used suffix for rotation + +5. **Sample ILM Policy** (Optional) + - Name: `{ilm_policy_name}` (default: `deepfreeze-sample-policy`) + - Demonstrates integration with searchable snapshots + - Phases: + - **Hot**: Rollover at 45GB or 7 days + - **Frozen**: Convert to searchable snapshot after 14 days + - **Delete**: Delete index after 365 days (preserves snapshot) + +### State Changes + +- Elasticsearch cluster gains a new snapshot repository +- AWS S3 account gains a new bucket +- Deepfreeze system transitions from uninitialized to operational + +### What Setup Does NOT Do + +- Does not create any snapshots +- Does not modify existing repositories or indices +- Does not configure ILM policies on indices (except optional sample) +- Does not modify Elasticsearch cluster settings + +## Options + +### Required Configuration + +These settings must be provided (either have sensible defaults or are required): + +#### `repo_name_prefix` +- **Type**: String +- **Default**: `deepfreeze` +- **Description**: Prefix for repository names. Repositories are named `{prefix}-{suffix}` +- **Example**: `repo_name_prefix="myapp"` creates repositories like `myapp-000001`, `myapp-000002` + +#### `bucket_name_prefix` +- **Type**: String +- **Default**: `deepfreeze` +- **Description**: Prefix for S3 bucket names (or full bucket name if `rotate_by=path`) +- **Example**: `bucket_name_prefix="mycompany-es-cold"` creates buckets like `mycompany-es-cold-000001` +- **Important**: Bucket names must be globally unique across all AWS accounts + +#### `base_path_prefix` +- **Type**: String +- **Default**: `snapshots` +- **Description**: Path within the S3 bucket where snapshots are stored +- **Example**: `base_path_prefix="elasticsearch/backups"` stores snapshots under `s3://bucket/elasticsearch/backups-000001/` + +### Storage Configuration + +#### `storage_class` +- **Type**: String +- **Default**: `intelligent_tiering` +- **Options**: + - `standard` - S3 Standard (frequent access) + - `standard_ia` - Infrequent Access + - `intelligent_tiering` - Automatic tiering + - `glacier_instant_retrieval` - Instant retrieval from Glacier + - `glacier_flexible_retrieval` - Minutes-hours retrieval (not recommended for searchable snapshots) + - `glacier_deep_archive` - Hours retrieval (not recommended for searchable snapshots) +- **Description**: AWS S3 storage class for snapshot objects +- **Recommendation**: Use `intelligent_tiering` for automatic cost optimization, or `glacier_instant_retrieval` for long-term cold storage with instant access capability + +#### `canned_acl` +- **Type**: String +- **Default**: `private` +- **Options**: `private`, `public-read`, `public-read-write`, `authenticated-read`, `bucket-owner-read`, `bucket-owner-full-control` +- **Description**: AWS S3 canned ACL applied to the bucket +- **Security**: Use `private` unless you have specific requirements +- **Reference**: [AWS S3 Canned ACL Documentation](https://docs.aws.amazon.com/AmazonS3/latest/userguide/acl-overview.html#canned-acl) + +### Rotation Strategy + +#### `rotate_by` +- **Type**: String +- **Default**: `path` +- **Options**: `bucket`, `path` +- **Description**: Determines how repositories are isolated when rotating + - `bucket`: Each rotation creates a new S3 bucket + - `path`: All rotations use the same bucket with different base paths +- **Use Cases**: + - `bucket`: Better for compliance/auditing (each period is completely isolated) + - `path`: More cost-effective (single bucket, easier management) +- **Example**: + - `rotate_by=bucket`: `s3://myapp-000001/snapshots`, `s3://myapp-000002/snapshots` + - `rotate_by=path`: `s3://myapp/snapshots-000001`, `s3://myapp/snapshots-000002` + +#### `style` +- **Type**: String +- **Default**: `oneup` +- **Options**: `oneup`, `date` +- **Description**: Naming convention for repository suffixes + - `oneup`: Sequential numbering (000001, 000002, ...) + - `date`: Date-based (YYYY.MM format) +- **Use Cases**: + - `oneup`: Simple, no dependency on current date + - `date`: Clear temporal organization (requires `--year` and `--month` flags) + +#### `year` and `month` +- **Type**: Integer +- **Default**: None (not used with `style=oneup`) +- **Description**: Override year/month for date-based suffixes +- **Example**: `--year 2025 --month 1` creates suffix `2025.01` +- **Required**: Only when `style=date` + +### Cloud Provider + +#### `provider` +- **Type**: String +- **Default**: `aws` +- **Options**: Currently only `aws` is supported +- **Description**: Cloud provider for object storage +- **Future**: May support Azure, GCP in future releases + +### Optional Features + +#### `create_sample_ilm_policy` +- **Type**: Boolean +- **Default**: `False` +- **Description**: Create a sample ILM policy demonstrating deepfreeze integration +- **Use Case**: Educational/demonstration purposes, or as a starting template +- **Warning**: This is an example policy; review and customize for production use + +#### `ilm_policy_name` +- **Type**: String +- **Default**: `deepfreeze-sample-policy` +- **Description**: Name for the sample ILM policy +- **Only Used**: When `create_sample_ilm_policy=True` + +### Output Format + +#### `porcelain` +- **Type**: Boolean +- **Default**: `False` +- **Description**: Output machine-readable tab-separated values instead of rich formatted text +- **Use Case**: Scripting, automation, CI/CD pipelines +- **Output Format**: + - Success: `SUCCESS\t{repo_name}\t{bucket_name}\t{base_path}` + - Error: `ERROR\t{error_type}\t{error_message}` + +## Usage Examples + +### Basic Setup (Defaults) + +```bash +curator_cli deepfreeze setup +``` + +Creates: +- Repository: `deepfreeze-000001` +- Bucket: `deepfreeze` (with path rotation) +- Base path: `snapshots-000001` + +### Custom Naming + +```bash +curator_cli deepfreeze setup \ + --repo-name-prefix myapp \ + --bucket-name-prefix mycompany-es-cold \ + --base-path-prefix backups +``` + +Creates: +- Repository: `myapp-000001` +- Bucket: `mycompany-es-cold` (with path rotation) +- Base path: `backups-000001` + +### Bucket-Based Rotation + +```bash +curator_cli deepfreeze setup \ + --rotate-by bucket +``` + +Creates: +- Repository: `deepfreeze-000001` +- Bucket: `deepfreeze-000001` (new bucket per rotation) +- Base path: `snapshots` (static) + +### Date-Based Rotation + +```bash +curator_cli deepfreeze setup \ + --style date \ + --year 2025 \ + --month 1 +``` + +Creates: +- Repository: `deepfreeze-2025.01` +- Bucket: `deepfreeze` +- Base path: `snapshots-2025.01` + +### With Sample ILM Policy + +```bash +curator_cli deepfreeze setup \ + --create-sample-ilm-policy \ + --ilm-policy-name my-tiering-policy +``` + +Creates everything plus ILM policy `my-tiering-policy` + +### Custom Storage Class + +```bash +curator_cli deepfreeze setup \ + --storage-class glacier_instant_retrieval +``` + +Optimizes for long-term cold storage with instant retrieval capability + +### Scripting/Automation + +```bash +curator_cli deepfreeze setup --porcelain > setup_result.txt +if grep -q "^SUCCESS" setup_result.txt; then + echo "Setup completed successfully" +else + echo "Setup failed:" + cat setup_result.txt + exit 1 +fi +``` + +## Error Handling + +### Common Errors and Solutions + +#### 1. Status Index Already Exists + +**Error**: `Status index deepfreeze-status already exists` + +**Cause**: Deepfreeze has already been initialized + +**Solutions**: +- If this is intentional, delete the existing setup: + ```bash + curator_cli --host localhost DELETE index --name deepfreeze-status + ``` +- If you want to keep the existing setup, use `rotate` instead of `setup` + +#### 2. Repository Prefix Exists + +**Error**: `Found N existing repositories matching prefix deepfreeze` + +**Cause**: Repositories with the configured prefix already exist + +**Solutions**: +- Choose a different `repo_name_prefix` +- Delete existing repositories (⚠️ WARNING: Ensure you have backups!) + ```bash + curator_cli deepfreeze cleanup + ``` + +#### 3. S3 Bucket Already Exists + +**Error**: `S3 bucket deepfreeze-000001 already exists` + +**Cause**: Bucket name is already in use (either by you or globally by another AWS account) + +**Solutions**: +- Choose a different `bucket_name_prefix` +- Delete the existing bucket (⚠️ WARNING: This deletes all data!) + ```bash + aws s3 rb s3://deepfreeze-000001 --force + ``` +- If the bucket is owned by another account, you must use a different name + +#### 4. S3 Repository Plugin Not Installed (ES 7.x) + +**Error**: `Elasticsearch S3 repository plugin is not installed` + +**Cause**: ES 7.x requires the `repository-s3` plugin + +**Solution**: +```bash +# On each Elasticsearch node: +bin/elasticsearch-plugin install repository-s3 +# Then restart all nodes +``` + +#### 5. AWS Credentials Not Found + +**Error**: `Failed to create bucket: The security token included in the request is invalid` + +**Cause**: AWS credentials are missing or invalid + +**Solutions**: +- Set environment variables: + ```bash + export AWS_ACCESS_KEY_ID=your_access_key + export AWS_SECRET_ACCESS_KEY=your_secret_key + ``` +- Configure Elasticsearch keystore: + ```bash + bin/elasticsearch-keystore add s3.client.default.access_key + bin/elasticsearch-keystore add s3.client.default.secret_key + ``` + +#### 6. Insufficient IAM Permissions + +**Error**: `Failed to create repository: Access Denied` + +**Cause**: AWS credentials lack required S3 permissions + +**Solution**: Ensure IAM policy includes: +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:CreateBucket", + "s3:ListBucket", + "s3:GetObject", + "s3:PutObject", + "s3:DeleteObject" + ], + "Resource": [ + "arn:aws:s3:::deepfreeze*", + "arn:aws:s3:::deepfreeze*/*" + ] + } + ] +} +``` + +## Best Practices + +### Before Setup + +1. **Plan Your Naming Convention**: Choose prefixes that are: + - Descriptive (e.g., `myapp-prod-es-cold`) + - Compliant with AWS naming rules (lowercase, hyphens only) + - Globally unique for bucket names + +2. **Choose Storage Class Carefully**: + - Use `intelligent_tiering` for automatic optimization + - Use `glacier_instant_retrieval` for long-term cold storage + - Avoid `glacier_flexible_retrieval` or `glacier_deep_archive` for searchable snapshots + +3. **Decide Rotation Strategy**: + - `rotate_by=bucket`: Better isolation, compliance, auditing + - `rotate_by=path`: More cost-effective, simpler management + +4. **Test AWS Credentials**: + ```bash + aws s3 ls # Verify credentials work + ``` + +### After Setup + +1. **Verify Repository**: + ```bash + curator_cli deepfreeze status --show-config --show-repos + ``` + +2. **Configure ILM Policies**: Update your existing ILM policies to use the new repository: + ```json + { + "policy": { + "phases": { + "frozen": { + "actions": { + "searchable_snapshot": { + "snapshot_repository": "deepfreeze-000001" + } + } + }, + "delete": { + "actions": { + "delete": { + "delete_searchable_snapshot": false + } + } + } + } + } + } + ``` + +3. **Set Up Rotation Schedule**: Plan when to run `rotate` action (typically monthly or when repository reaches size limits) + +4. **Document Your Configuration**: Save your setup parameters for disaster recovery: + ```bash + curator_cli deepfreeze status --show-config --porcelain > deepfreeze-config.txt + ``` + +## Related Actions + +- **Rotate**: Create new repositories and retire old ones +- **Status**: View current configuration and repository state +- **Thaw**: Restore data from cold storage for access + +## Security Considerations + +- **Bucket ACLs**: Use `private` unless you have specific requirements +- **IAM Policies**: Follow principle of least privilege +- **Encryption**: Enable S3 bucket encryption at rest +- **Credentials**: Store in Elasticsearch keystore, not in plain text +- **Network**: Use VPC endpoints for S3 to avoid internet traffic + +## Performance Considerations + +- **Storage Class**: `intelligent_tiering` has no retrieval fees for frequent access +- **Bucket vs Path**: Path rotation is faster (no bucket creation overhead) +- **Region**: Ensure S3 bucket and ES cluster are in the same AWS region to minimize latency and transfer costs diff --git a/curator/actions/deepfreeze/docs/status.md b/curator/actions/deepfreeze/docs/status.md new file mode 100644 index 00000000..a4996feb --- /dev/null +++ b/curator/actions/deepfreeze/docs/status.md @@ -0,0 +1,594 @@ +# Status Action + +## Purpose + +The Status action provides comprehensive visibility into the current state of the deepfreeze system. It displays repositories, thawed repositories, S3 buckets, ILM policies, and configuration settings in an organized, easy-to-read format. + +Status is a read-only action that makes no changes to the system. It's your primary tool for: +- Monitoring repository states and health +- Tracking thawed repositories and their expiration +- Verifying ILM policy configurations +- Auditing system configuration +- Troubleshooting issues + +## Prerequisites + +### System Requirements + +1. **Deepfreeze Initialized** + - Setup action must have been run successfully + - `deepfreeze-status` index must exist + +2. **Elasticsearch Access** + - Read access to `deepfreeze-status` index + - Read access to `_snapshot` API + - Read access to `_ilm` API + +### No Prerequisites for Data + +- Works even if no repositories exist +- Works even if no thaw requests exist +- Always shows configuration (if deepfreeze is initialized) + +## Effects + +### What Status Does + +**Read Operations Only**: +- Queries `deepfreeze-status` index for configuration and repository metadata +- Queries Elasticsearch snapshot API for repository information +- Queries Elasticsearch ILM API for policy information +- Queries cluster API for cluster name + +**Display Operations**: +- Formats data into rich tables (default) or tab-separated values (`--porcelain`) +- Filters sections based on flags (or shows all if no flags) +- Applies `--limit` to restrict number of items shown + +**No State Changes**: +- Does NOT modify any data +- Does NOT create or delete resources +- Does NOT affect performance (lightweight read operations) +- Safe to run at any time, as frequently as needed + +## Options + +### Section Filters + +By default (no flags), Status shows **all sections**. Use these flags to show specific sections only: + +#### `--show-repos` +- **Type**: Boolean flag +- **Description**: Show repositories section +- **Displays**: + - Repository name (current active marked with *) + - Thaw state (frozen, thawing, thawed, expired) + - Mount status (yes/no) + - Snapshot count (if mounted) + - Date range (start and end) + +#### `--show-thawed` +- **Type**: Boolean flag +- **Description**: Show only thawed and thawing repositories +- **Displays**: + - Same columns as `--show-repos` + - Plus: Expiration timestamp + - Filters to only repos in `thawing`, `thawed`, or `expired` states + +#### `--show-buckets` +- **Type**: Boolean flag +- **Description**: Show S3 buckets section +- **Displays**: + - Provider (aws, etc.) + - Bucket name (current active marked with *) + - Base path within bucket + +#### `--show-ilm` +- **Type**: Boolean flag +- **Description**: Show ILM policies section +- **Displays**: + - Policy name + - Repository it references (current active marked with *) + - Number of indices using policy + - Number of datastreams using policy + +#### `--show-config` +- **Type**: Boolean flag +- **Description**: Show configuration section +- **Displays**: + - Repo name prefix + - Bucket name prefix + - Base path prefix + - Canned ACL + - Storage class + - Provider + - Rotation strategy (bucket/path) + - Naming style (oneup/date) + - Last used suffix + - Cluster name + +### Display Options + +#### `--limit ` +- **Type**: Integer +- **Default**: None (show all) +- **Description**: Limit number of items shown in repositories and buckets sections +- **Behavior**: Shows last N items (most recent) +- **Example**: `--limit 5` shows only the 5 most recent repositories +- **Use Case**: Large deployments with many repositories + +#### `--porcelain` +- **Type**: Boolean flag +- **Default**: `False` +- **Description**: Machine-readable tab-separated output +- **Use Case**: Scripting, automation, parsing +- **Effect**: Disables rich formatting, outputs raw tab-delimited data + +## Usage Examples + +### Show Everything (Default) + +```bash +curator_cli deepfreeze status + +# Displays all sections: +# - Thawed Repositories (if any) +# - Repositories +# - Buckets +# - ILM Policies +# - Configuration +``` + +### Show Only Repositories + +```bash +curator_cli deepfreeze status --show-repos + +# Output (example): +# ┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓ +# ┃ Repository ┃ State ┃ Mounted ┃ Snapshots ┃ Start ┃ End ┃ +# ┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩ +# │ deepfreeze-000001 │ frozen │ no │ -- │ 2024-01-01... │ 2024-01-31... │ +# │ deepfreeze-000002 │ frozen │ no │ -- │ 2024-02-01... │ 2024-02-28... │ +# │ deepfreeze-000003 │ frozen │ yes │ 127 │ 2024-03-01... │ 2024-03-31... │ +# │ deepfreeze-000004 │ frozen │ yes │ 145 │ 2024-04-01... │ 2024-04-30... │ +# │ deepfreeze-000005 │ frozen │ yes │ 198 │ 2024-05-01... │ 2024-05-31... │ +# │ deepfreeze-000006* │ frozen │ yes │ 52 │ 2025-01-01... │ 2025-01-15... │ +# └────────────────────┴────────┴──────────┴───────────┴───────────────────┴───────────────────┘ +# +# * = current active repository +``` + +### Show Only Thawed Repositories + +```bash +curator_cli deepfreeze status --show-thawed + +# Shows only repositories currently being accessed (thawing or thawed) +# Includes expiration timestamp + +# If no thawed repos, section is not displayed +``` + +### Show Only Configuration + +```bash +curator_cli deepfreeze status --show-config + +# Output (example): +# ┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +# ┃ Setting ┃ Value ┃ +# ┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +# │ Repo Prefix │ deepfreeze │ +# │ Bucket Prefix │ deepfreeze │ +# │ Base Path Prefix │ snapshots │ +# │ Canned ACL │ private │ +# │ Storage Class │ intelligent_tiering │ +# │ Provider │ aws │ +# │ Rotate By │ path │ +# │ Style │ oneup │ +# │ Last Suffix │ 000006 │ +# │ Cluster Name │ my-production-cluster │ +# └────────────────────┴─────────────────────────────┘ +``` + +### Show Multiple Sections + +```bash +# Show repos and config only +curator_cli deepfreeze status --show-repos --show-config + +# Show ILM policies and buckets +curator_cli deepfreeze status --show-ilm --show-buckets +``` + +### Limit Output + +```bash +# Show only last 5 repositories +curator_cli deepfreeze status --show-repos --limit 5 + +# Useful for clusters with many repositories + +# Output: +# Repositories (showing last 5 of 24) +# [table with 5 most recent repos] +``` + +### Scripting with Porcelain Mode + +```bash +# Get configuration values +curator_cli deepfreeze status --show-config --porcelain + +# Output (tab-separated): +# Repo Prefix deepfreeze +# Bucket Prefix deepfreeze +# Base Path Prefix snapshots +# Canned ACL private +# Storage Class intelligent_tiering +# Provider aws +# Rotate By path +# Style oneup +# Last Suffix 000006 +# Cluster Name my-production-cluster + +# Parse in script: +REPO_PREFIX=$(curator_cli deepfreeze status --show-config --porcelain | awk -F'\t' '/^Repo Prefix/ {print $2}') +echo "Repository prefix: $REPO_PREFIX" +``` + +### Monitor Thawed Repositories + +```bash +#!/bin/bash +# Check for thawed repositories and alert if expiring soon + +curator_cli deepfreeze status --show-thawed --porcelain | while IFS=$'\t' read -r name state mounted count expires start end; do + # Skip header + if [ "$name" = "Repository" ]; then + continue + fi + + # Parse expiration time + if [ "$expires" != "N/A" ]; then + expire_epoch=$(date -j -f "%Y-%m-%dT%H:%M:%S" "${expires%.*}" "+%s" 2>/dev/null) + now_epoch=$(date "+%s") + hours_left=$(( (expire_epoch - now_epoch) / 3600 )) + + if [ "$hours_left" -lt 12 ]; then + echo "WARNING: Repository $name expires in $hours_left hours!" + fi + fi +done +``` + +### Audit ILM Policy Usage + +```bash +# Find ILM policies with no indices +curator_cli deepfreeze status --show-ilm --porcelain | awk -F'\t' '$3 == "0" && $4 == "0" {print "Unused policy: " $1}' +``` + +## Display Sections + +### Thawed Repositories Section + +**When Shown**: Automatically when any repositories are in `thawing`, `thawed`, or `expired` states + +**Columns**: +- **Repository**: Repository name +- **State**: Current thaw state (`thawing`, `thawed`, `expired`) +- **Mounted**: Whether repository is mounted in Elasticsearch (`yes`/`no`) +- **Snapshots**: Number of snapshots (if mounted, else `--`) +- **Expires**: When the AWS Glacier restore expires (ISO 8601 timestamp or `N/A`) +- **Start**: Start of date range covered by repository +- **End**: End of date range covered by repository + +**Purpose**: Quick view of actively thawed data and expiration tracking + +### Repositories Section + +**Columns**: +- **Repository**: Repository name (active repository marked with `*`) +- **State**: Thaw state (`frozen`, `thawing`, `thawed`, `expired`) +- **Mounted**: Whether repository is mounted (`yes`/`no`) +- **Snapshots**: Number of snapshots (if mounted, else `--`) +- **Start**: Start of date range +- **End**: End of date range + +**Sorting**: By repository name (typically chronological due to suffix) + +**Active Indicator**: Current active repository (from `last_suffix`) marked with `*` + +### Buckets Section + +**Columns**: +- **Provider**: Cloud provider (`aws`, etc.) +- **Bucket**: S3 bucket name (current active marked with `*`) +- **Base_path**: Path within bucket where snapshots are stored + +**Unique Entries**: Shows unique bucket/base_path combinations + +**Active Indicator**: Current active bucket/path marked with `*` + +### ILM Policies Section + +**Columns**: +- **Policy**: ILM policy name +- **Repository**: Repository referenced in `searchable_snapshot` action (current active marked with `*`) +- **Indices**: Number of indices currently using this policy +- **Datastreams**: Number of datastreams using this policy + +**Filtering**: Only shows policies that: +- Have a `searchable_snapshot` action +- Reference a repository matching the deepfreeze prefix + +**Active Indicator**: Policies referencing current active repository marked with `*` + +### Configuration Section + +**Settings Displayed**: +- **Repo Prefix**: Repository naming prefix +- **Bucket Prefix**: S3 bucket naming prefix +- **Base Path Prefix**: S3 path prefix +- **Canned ACL**: S3 bucket ACL setting +- **Storage Class**: S3 storage class +- **Provider**: Cloud provider +- **Rotate By**: Rotation strategy (`bucket` or `path`) +- **Style**: Suffix style (`oneup` or `date`) +- **Last Suffix**: Most recently used suffix +- **Cluster Name**: Elasticsearch cluster name + +## Interpreting Status Output + +### Repository States + +#### `frozen` +- **Meaning**: Repository exists but snapshots are in Glacier storage +- **Accessible**: No (requires thaw) +- **Mounted**: Typically `no` (unless recently unmounted) +- **Action**: Run `thaw` to restore access + +#### `thawing` +- **Meaning**: AWS Glacier restore in progress +- **Accessible**: Not yet (waiting for restore) +- **Mounted**: `no` (not mounted until restore complete) +- **Action**: Wait for restore or check status with `thaw --check-status` + +#### `thawed` +- **Meaning**: Restored from Glacier, accessible +- **Accessible**: Yes +- **Mounted**: `yes` +- **Expires**: Shows when restore expires +- **Action**: Query data, or refreeze when done + +#### `expired` +- **Meaning**: Restore expired, should be cleaned up +- **Accessible**: No (AWS reverted to Glacier) +- **Mounted**: May be `yes` (cleanup will unmount) +- **Action**: Cleanup action will handle automatically + +### Mount Status + +#### `yes` +- Repository is registered in Elasticsearch +- Snapshots are queryable +- Can mount searchable snapshots +- Date range should be populated + +#### `no` +- Repository is not registered +- Snapshots are not accessible +- Requires thaw (if frozen) or mount (if thawed) + +### Snapshot Count + +#### Number (e.g., `127`) +- Repository is mounted +- Contains this many snapshots +- Snapshots are available for restore/mount + +#### `--` +- Repository is not mounted +- Cannot determine snapshot count +- Need to mount first (or thaw if frozen) + +### Date Ranges + +#### ISO 8601 Timestamps +- Shows start and end dates for data in repository +- Populated by scanning snapshot index names +- Used by thaw action to find repositories by date + +#### `N/A` +- Date range not yet determined +- Repository may be empty +- Or dates not parsed from snapshot names + +## Use Cases + +### Daily Monitoring + +```bash +# Quick health check +curator_cli deepfreeze status --show-thawed --show-repos --limit 3 + +# Shows: +# - Any thawed repos and their expiration +# - Last 3 repositories (current state) +``` + +### Pre-Rotation Audit + +```bash +# Before rotation, check configuration +curator_cli deepfreeze status --show-config --show-ilm + +# Verify: +# - Last suffix (to predict next) +# - ILM policies exist and reference current repo +``` + +### Troubleshooting Thaw Issues + +```bash +# Check repository state +curator_cli deepfreeze status --show-repos --porcelain | grep "thawing\|thawed" + +# If no output, no thaw in progress +# If output shows state, check mount status +``` + +### Capacity Planning + +```bash +# Count total snapshots across all mounted repos +curator_cli deepfreeze status --show-repos --porcelain | awk -F'\t' 'BEGIN {sum=0} $4 ~ /^[0-9]+$/ {sum+=$4} END {print "Total snapshots: " sum}' +``` + +### Audit Unused ILM Policies + +```bash +# Find policies with no usage +curator_cli deepfreeze status --show-ilm --porcelain | awk -F'\t' '$3 == "0" && $4 == "0" && NR > 1 {print $1}' > unused_policies.txt +``` + +## Error Handling + +### Common Issues + +#### 1. No Output / Empty Sections + +**Cause**: Section has no data + +**Examples**: +- No thawed repos → Thawed Repositories section not shown +- No ILM policies referencing deepfreeze repos → ILM section empty + +**Action**: This is normal - status only shows what exists + +#### 2. Repository Shows Mounted but Snapshot Count is `--` + +**Cause**: Repository mount status desync + +**Solutions**: +```bash +# Check actual mount status +curl -X GET 'http://localhost:9200/_snapshot/_all' + +# If repo not in list, state is stale +# Run status again (it may auto-correct on read) +``` + +#### 3. Date Ranges Show `N/A` + +**Cause**: Repository date range not yet scanned + +**Solutions**: +- Run rotation (updates all date ranges) +- Or wait for next rotation +- Date ranges are not critical for most operations + +#### 4. "Settings not found" Error + +**Error**: Status fails with settings error + +**Cause**: Deepfreeze not initialized + +**Solution**: +```bash +curator_cli deepfreeze setup +``` + +## Best Practices + +### Regular Monitoring + +1. **Daily Quick Check** + ```bash + curator_cli deepfreeze status --show-thawed + ``` + - Track active thaws + - Monitor expirations + +2. **Weekly Full Status** + ```bash + curator_cli deepfreeze status > weekly_status.txt + ``` + - Full audit + - Compare week-to-week changes + +3. **Pre/Post Operation Verification** + - Before rotation: Check configuration + - After rotation: Verify new repo created + - Before thaw: Check available repos/dates + - After refreeze: Verify unmounted + +### Automation + +1. **Status Dashboard** + ```bash + # Cron job: Every hour + curator_cli deepfreeze status --porcelain > /var/www/html/deepfreeze_status.txt + ``` + - Parse in dashboard/monitoring tool + - Graph repository counts over time + - Alert on approaching expirations + +2. **Alerting on Expiration** + ```bash + # Check for repos expiring in < 24 hours + # Send alert if found + ``` + +3. **Capacity Metrics** + ```bash + # Track snapshot growth + # Predict when to rotate + ``` + +## Related Actions + +- **Setup**: Initialize deepfreeze (required first) +- **Rotate**: Creates new repositories (status shows them) +- **Thaw**: Changes repository states (status tracks them) +- **Refreeze**: Unmounts repositories (status reflects changes) +- **Cleanup**: Cleans up expired repos (status shows results) + +## Performance Considerations + +### Lightweight Operation + +- **Read-only**: No writes or modifications +- **Fast**: Typically < 1 second +- **Safe**: Run as frequently as needed +- **No Impact**: Does not affect cluster performance + +### Limits for Large Deployments + +- Use `--limit` for clusters with 50+ repositories +- Reduces output size +- Speeds up display rendering +- Doesn't affect data fetching (already fast) + +## Security Considerations + +- **Read Permissions Only**: No destructive operations possible +- **Information Disclosure**: Shows repository and bucket names (ensure logs are secured) +- **Cluster Info**: Displays cluster name (informational only) + +## Output Formats + +### Rich Format (Default) + +- **Visual**: Tables with borders, colors, formatting +- **Human-Readable**: Designed for terminal viewing +- **Interactive**: Clear section headers and spacing + +### Porcelain Format (`--porcelain`) + +- **Machine-Readable**: Tab-separated values +- **Parseable**: Easy to process with `awk`, `cut`, scripting +- **Stable**: Column order won't change (safe for automation) +- **No Formatting**: No colors, borders, or decoration diff --git a/curator/actions/deepfreeze/docs/thaw.md b/curator/actions/deepfreeze/docs/thaw.md new file mode 100644 index 00000000..62ce738c --- /dev/null +++ b/curator/actions/deepfreeze/docs/thaw.md @@ -0,0 +1,637 @@ +# Thaw Action + +## Purpose + +The Thaw action restores frozen repositories from AWS Glacier storage back to instant-access tiers, making their snapshot data available for querying and analysis. It handles the AWS Glacier restore process, repository mounting in Elasticsearch, and automatic index mounting from the restored snapshots. + +Thaw supports three operational modes: +1. **Create Mode**: Initiate new thaw requests for a date range +2. **Check Status Mode**: Monitor and mount repositories when restoration completes +3. **List Mode**: Display all active thaw requests + +## Prerequisites + +### System Requirements + +1. **Deepfreeze Initialized** + - Setup action must have been run successfully + - `deepfreeze-status` index must exist with valid configuration + +2. **Frozen Repositories** + - At least one repository in `frozen` state + - Repository objects in S3 Glacier storage class + +3. **AWS Credentials** + - Valid AWS credentials with Glacier restore permissions + - Credentials accessible to the Curator process + +4. **IAM Permissions** + - `s3:RestoreObject` - Initiate Glacier restore + - `s3:GetObject` - Check restore status + - `s3:ListBucket` - List objects in repository paths + - `s3:GetObjectAttributes` - Query object restore status + +5. **Elasticsearch Permissions** + - `snapshot.create` - Mount repositories + - `indices.create` - Mount indices from snapshots + +### Data Requirements + +For **Create Mode**: +- Know the date range of data you need +- Understand the duration you'll need access (affects AWS costs) +- Consider retrieval tier based on urgency (Standard, Expedited, Bulk) + +For **Check Status Mode**: +- Have a thaw request ID from a previous create operation + +## Effects + +### Create Mode Effects + +#### What Happens Immediately + +1. **Repository Identification** + - Searches `deepfreeze-status` index for repositories with date ranges overlapping the requested dates + - Filters to only repositories in `frozen` state (not already thawed or thawing) + +2. **Glacier Restore Initiation** + - For each repository: + - Lists all S3 objects in the repository path + - Submits restore requests for each object + - AWS begins retrieving objects from Glacier + +3. **Repository State Update** + - Repositories marked as `thawing` in `deepfreeze-status` index + - `expires_at` timestamp set based on duration parameter + - `thaw_state` transitions: `frozen` → `thawing` + +4. **Thaw Request Creation** + - Creates a tracking document in `deepfreeze-status` index + - Records: + - Unique request ID (UUID) + - List of repositories being thawed + - Date range requested + - Status (`in_progress`) + - Creation timestamp + +5. **Response** + - **Async Mode** (default): Returns immediately with request ID + - **Sync Mode** (`--sync`): Waits for restore completion, then mounts repositories and indices + +#### What Happens Over Time (Async Mode) + +1. **AWS Glacier Restore** (hours to days depending on tier) + - Objects transition from Glacier to Standard storage + - Temporary copies created (original Glacier object remains) + - Duration controlled by `--duration` parameter + +2. **Check Status Process** (when you run `--check-status `) + - Queries S3 for restore status of all objects + - When all objects restored: + - Mounts repositories in Elasticsearch + - Updates date ranges by scanning snapshots + - Mounts indices from snapshots + - Marks request as `completed` + +### Check Status Mode Effects + +1. **Status Query** + - Retrieves thaw request from `deepfreeze-status` index + - Gets associated repository objects + +2. **S3 Restore Check** + - For each repository not yet mounted: + - Queries S3 for object restore status + - Counts: total objects, restored, in progress, not restored + +3. **Repository Mounting** (when restore complete) + - Registers repository in Elasticsearch + - Repository becomes available for snapshot operations + - Updates `is_mounted` flag and state to `thawed` + +4. **Date Range Update** + - Scans mounted repository snapshots + - Extracts index names and date patterns + - Updates repository `start` and `end` timestamps + +5. **Index Mounting** + - Identifies indices within requested date range + - Mounts as searchable snapshots + - Adds to data streams if applicable + - Creates per-repository thawed ILM policy + +6. **Request Status Update** + - Marks request as `completed` when all repositories mounted + - Updates timestamps + +### List Mode Effects + +- Queries `deepfreeze-status` index for thaw request documents +- Displays in tabular format +- **No state changes** - read-only operation + +## Options + +### Create Mode Options + +#### Date Range (Required) + +##### `--start-date ` +- **Type**: ISO 8601 datetime string +- **Required**: Yes (for create mode) +- **Format**: `YYYY-MM-DDTHH:MM:SSZ` +- **Description**: Start of the date range to thaw +- **Example**: `2025-01-01T00:00:00Z` +- **Important**: Must be before or equal to `--end-date` + +##### `--end-date ` +- **Type**: ISO 8601 datetime string +- **Required**: Yes (for create mode) +- **Format**: `YYYY-MM-DDTHH:MM:SSZ` +- **Description**: End of the date range to thaw +- **Example**: `2025-01-31T23:59:59Z` + +#### Restore Configuration + +##### `--duration ` +- **Type**: Integer +- **Default**: `7` +- **Range**: `1` to `90` (AWS S3 limit) +- **Description**: Number of days to keep objects restored from Glacier +- **AWS Billing**: You pay for Standard storage for this duration +- **After Duration**: Objects automatically revert to Glacier (no manual cleanup needed) +- **Example**: `--duration 3` keeps data accessible for 3 days + +##### `--retrieval-tier ` +- **Type**: String +- **Default**: `Standard` +- **Options**: + - `Expedited` - 1-5 minutes (most expensive, limited capacity) + - `Standard` - 3-5 hours (moderate cost) + - `Bulk` - 5-12 hours (lowest cost, best for large datasets) +- **Description**: AWS Glacier restore speed and cost tier +- **Cost Comparison** (approximate, varies by region): + - Expedited: $30/TB retrieval + $0.03/GB prorated storage + - Standard: $10/TB retrieval + $0.01/GB prorated storage + - Bulk: $2.50/TB retrieval + $0.0025/GB prorated storage +- **Recommendation**: Use `Standard` for most cases, `Bulk` for cost-sensitive large restores + +#### Execution Mode + +##### `--sync` +- **Type**: Boolean flag +- **Default**: `False` (async mode) +- **Description**: Wait for Glacier restore to complete, then mount everything before returning +- **Use Cases**: + - Interactive sessions where you need immediate access + - CI/CD pipelines that need to wait for data +- **Drawbacks**: Command blocks for hours (3-12 hours typically) +- **Async Alternative**: Return immediately with request ID, use `--check-status` later + +#### Output Format + +##### `--porcelain` +- **Type**: Boolean flag +- **Default**: `False` +- **Description**: Machine-readable tab-separated output +- **Use Case**: Scripting, automation +- **Output Format**: + - Success: `REQUEST\t{request_id}\t{status}\t{created_at}\t{start_date}\t{end_date}` + - Per repo: `REPO\t{name}\t{bucket}\t{path}\t{state}\t{mounted}\t{progress}` + +### Check Status Mode Options + +##### `--check-status ` +- **Type**: String (UUID) or empty string +- **Description**: Check status of a specific thaw request or all requests +- **Examples**: + - `--check-status abc123-def456` - Check specific request + - `--check-status ""` - Check all in-progress requests +- **Behavior**: Checks S3 status, mounts repositories/indices when ready, displays current state + +### List Mode Options + +##### `--list-requests` +- **Type**: Boolean flag +- **Description**: List all thaw requests +- **Default Behavior**: Shows only active requests (excludes completed and refrozen) + +##### `--include-completed` +- **Type**: Boolean flag +- **Default**: `False` +- **Description**: Include completed and refrozen requests in list +- **Use Case**: Auditing, historical tracking + +## Usage Examples + +### Basic Thaw (Async) + +```bash +# Initiate thaw for January 2025 data +curator_cli deepfreeze thaw \ + --start-date 2025-01-01T00:00:00Z \ + --end-date 2025-01-31T23:59:59Z + +# Output: +# Thaw Request Initiated +# Request ID: a1b2c3d4-e5f6-7890-abcd-ef1234567890 +# ... +# Check status with: +# curator_cli deepfreeze thaw --check-status a1b2c3d4-e5f6-7890-abcd-ef1234567890 +``` + +### Thaw with Custom Duration + +```bash +# Thaw for only 1 day (minimize costs) +curator_cli deepfreeze thaw \ + --start-date 2025-01-15T00:00:00Z \ + --end-date 2025-01-15T23:59:59Z \ + --duration 1 +``` + +### Urgent Thaw (Expedited) + +```bash +# Fast restore (1-5 minutes, higher cost) +curator_cli deepfreeze thaw \ + --start-date 2025-01-20T00:00:00Z \ + --end-date 2025-01-22T23:59:59Z \ + --retrieval-tier Expedited +``` + +### Cost-Effective Large Thaw (Bulk) + +```bash +# Restore large dataset over 5-12 hours at lowest cost +curator_cli deepfreeze thaw \ + --start-date 2024-12-01T00:00:00Z \ + --end-date 2024-12-31T23:59:59Z \ + --retrieval-tier Bulk \ + --duration 3 +``` + +### Synchronous Thaw (Wait for Completion) + +```bash +# Block until data is fully accessible +curator_cli deepfreeze thaw \ + --start-date 2025-01-10T00:00:00Z \ + --end-date 2025-01-12T23:59:59Z \ + --sync + +# Process continues automatically through all phases: +# Phase 1: Finding Repositories +# Phase 2: Initiating Glacier Restore +# Phase 3: Waiting for Glacier Restoration (3-5 hours typically) +# Phase 4: Mounting Repositories +# Phase 5: Updating Repository Metadata +# Phase 6: Mounting Indices +``` + +### Check Thaw Status + +```bash +# Check specific request +curator_cli deepfreeze thaw \ + --check-status a1b2c3d4-e5f6-7890-abcd-ef1234567890 + +# Output shows: +# - Restore progress (e.g., "125/500 objects restored") +# - Repository mount status +# - When ready: automatically mounts repos and indices +``` + +### Check All Active Thaws + +```bash +# Check and mount all in-progress requests +curator_cli deepfreeze thaw --check-status "" + +# Useful for scheduled cron jobs to poll all pending thaws +``` + +### List Thaw Requests + +```bash +# Show active requests only +curator_cli deepfreeze thaw --list-requests + +# Show all requests (including completed) +curator_cli deepfreeze thaw --list-requests --include-completed +``` + +### Scripting Example + +```bash +#!/bin/bash +# Script to thaw data and wait for completion + +# Initiate thaw +REQUEST_ID=$(curator_cli deepfreeze thaw \ + --start-date 2025-01-01T00:00:00Z \ + --end-date 2025-01-07T23:59:59Z \ + --porcelain | awk -F'\t' '/^REQUEST/ {print $2; exit}') + +echo "Thaw request created: $REQUEST_ID" + +# Poll until complete +while true; do + STATUS=$(curator_cli deepfreeze thaw --check-status "$REQUEST_ID" --porcelain \ + | awk -F'\t' '/^REQUEST/ {print $3}') + + echo "Current status: $STATUS" + + if [ "$STATUS" = "completed" ]; then + echo "Thaw complete!" + break + fi + + sleep 300 # Check every 5 minutes +done +``` + +## Error Handling + +### Common Errors and Solutions + +#### 1. No Repositories Found + +**Error**: `No repositories found for date range` + +**Causes**: +- Date range doesn't overlap with any repository's data range +- All matching repositories are already thawed + +**Solutions**: +- Check available repositories: + ```bash + curator_cli deepfreeze status --show-repos + ``` +- Verify date ranges in repository metadata +- Use broader date range +- Check if repositories are already thawed: + ```bash + curator_cli deepfreeze status --show-thawed + ``` + +#### 2. Glacier Restore Permission Denied + +**Error**: `Failed to thaw repository: Access Denied (S3)` + +**Cause**: AWS credentials lack `s3:RestoreObject` permission + +**Solution**: Update IAM policy: +```json +{ + "Effect": "Allow", + "Action": [ + "s3:RestoreObject", + "s3:GetObject", + "s3:ListBucket", + "s3:GetObjectAttributes" + ], + "Resource": [ + "arn:aws:s3:::your-bucket-prefix*/*" + ] +} +``` + +#### 3. Expedited Retrieval Capacity Exceeded + +**Error**: `Failed to restore: InsufficientCapacityException` + +**Cause**: Expedited tier has limited capacity and may be unavailable + +**Solutions**: +- Use `Standard` tier instead: + ```bash + --retrieval-tier Standard + ``` +- Purchase provisioned capacity (AWS feature) +- Retry Expedited request later + +#### 4. Invalid Date Format + +**Error**: `Invalid start_date: ... Expected ISO 8601 format` + +**Cause**: Date not in ISO 8601 format + +**Solutions**: +- Use correct format: `YYYY-MM-DDTHH:MM:SSZ` +- Include timezone (use `Z` for UTC) +- Examples: + - ✅ `2025-01-15T00:00:00Z` + - ❌ `2025-01-15` (missing time and timezone) + - ❌ `01/15/2025` (wrong format) + +#### 5. Repository Mount Failure + +**Error**: `Failed to mount repository: repository already exists` + +**Cause**: Repository name conflicts with existing repository + +**Solutions**: +- Delete conflicting repository if it's stale: + ```bash + curl -X DELETE 'http://localhost:9200/_snapshot/conflicting-repo' + ``` +- Check repository status: + ```bash + curator_cli deepfreeze status --show-repos + ``` + +#### 6. Index Mount Failure + +**Error**: `Failed to mount index: searchable snapshot already exists` + +**Cause**: Index with same name already exists in cluster + +**Solutions**: +- Delete existing searchable snapshot if it's stale: + ```bash + curator_cli DELETE index --name partial-my-index-name + ``` +- Check mounted indices: + ```bash + curl -X GET 'http://localhost:9200/_cat/indices/partial-*' + ``` + +## Best Practices + +### Before Thawing + +1. **Plan Your Date Range Carefully** + - Thaw only the data you need (minimizes AWS costs) + - Consider query patterns (daily, weekly, monthly analysis) + - Account for timezone differences + +2. **Choose Appropriate Duration** + - Minimum: 1 day + - Typical: 3-7 days for analysis projects + - Maximum: 90 days (AWS limit) + - Remember: You pay for Standard storage during this period + +3. **Select Right Retrieval Tier** + - **Expedited**: Emergency access, incident investigation + - **Standard**: Regular analysis, reports (most common) + - **Bulk**: Large-scale data mining, cost-sensitive operations + +4. **Estimate Costs** + - Use AWS pricing calculator + - Factor in: retrieval fees + prorated storage for duration + - Example (us-east-1, 1TB, 7 days Standard): + - Retrieval: ~$10 + - Storage: ~$0.70 (7/30 × $3/TB/month) + - Total: ~$10.70 + +### During Thaw + +1. **Async Mode Recommended** + - Don't use `--sync` for production workflows + - Set up monitoring instead: + ```bash + # Cron job every 15 minutes + */15 * * * * curator_cli deepfreeze thaw --check-status "" + ``` + +2. **Monitor Progress** + - Use `--check-status` periodically + - Check CloudWatch for S3 metrics + - Review Elasticsearch logs for mount errors + +3. **Handle Long-Running Operations** + - Standard tier: 3-5 hours typical + - Bulk tier: 5-12 hours typical + - Plan queries accordingly + +### After Thaw + +1. **Verify Data Accessibility** + ```bash + # Check repositories mounted + curator_cli deepfreeze status --show-thawed + + # Query data + curl -X GET 'http://localhost:9200/my-index-*/_search?size=0' + ``` + +2. **Use Refreeze When Done** + - Don't wait for expiration if finished early + - Saves AWS costs: + ```bash + curator_cli deepfreeze refreeze --thaw-request-id + ``` + +3. **Monitor Duration Expiration** + - Objects auto-revert to Glacier after duration + - Indices will fail to query after expiration + - Use `status` command to track expiry times + +## Thaw Lifecycle + +### Complete Workflow + +``` +1. User Initiates Thaw + ↓ +2. Curator finds matching frozen repositories + ↓ +3. Curator submits Glacier restore for all objects + ↓ +4. Repository state: frozen → thawing + ↓ +5. AWS Glacier begins restore (hours) + ↓ +6. User runs --check-status periodically + ↓ +7. When complete, Curator mounts repositories + ↓ +8. Curator updates date ranges + ↓ +9. Curator mounts indices as searchable snapshots + ↓ +10. Repository state: thawing → thawed + ↓ +11. Data is queryable + ↓ +12. User analyzes data + ↓ +13. User runs refreeze OR waits for expiration + ↓ +14. Repository unmounted + ↓ +15. Repository state: thawed → frozen + ↓ +16. Objects revert to Glacier (automatic) +``` + +## Performance Considerations + +### Factors Affecting Speed + +1. **Retrieval Tier** + - Expedited: 1-5 minutes + - Standard: 3-5 hours + - Bulk: 5-12 hours + +2. **Data Volume** + - More objects = longer restore + - Parallel restore of multiple objects + - S3 throttling may occur for very large restores + +3. **Network Bandwidth** + - Repository mounting requires metadata transfer + - Index mounting pulls snapshot data + - Ensure adequate bandwidth between ES and S3 + +### Optimization Tips + +1. **Use Bulk Tier for Large Datasets** + - Better throughput for >1TB + - Significantly cheaper + - Plan ahead (5-12 hour window) + +2. **Thaw Repositories Incrementally** + - Don't thaw entire year at once + - Thaw week or month at a time + - Reduces S3 API load and costs + +3. **Check Status Efficiently** + - Use cron jobs, not continuous polling + - 15-30 minute intervals for Standard tier + - 1-2 hour intervals for Bulk tier + +## Related Actions + +- **Setup**: Initialize deepfreeze (required first) +- **Rotate**: Create new repositories (affects available date ranges) +- **Refreeze**: Manually unmount thawed repositories before expiration +- **Cleanup**: Automatic expiration handling (runs on schedule) +- **Status**: View repository states and thaw progress + +## AWS Costs + +### Understanding Glacier Restore Pricing + +Glacier restore has two cost components: + +1. **Retrieval Fee** (one-time, per object) + - Based on retrieval tier and data volume + - Standard: ~$10/TB + - Bulk: ~$2.50/TB + - Expedited: ~$30/TB + +2. **Prorated Standard Storage** (duration-based) + - Objects temporarily in Standard tier for `duration` days + - ~$0.023/GB/month in us-east-1 + - Prorated: `(duration / 30) × monthly_rate × size` + - Example: 7 days, 100GB: `(7/30) × $0.023 × 100 = $0.54` + +### Cost Optimization + +1. **Minimize Duration**: Use shortest duration that meets your needs +2. **Use Bulk Tier**: 75% cheaper retrieval than Standard +3. **Thaw Selectively**: Only restore repositories you'll actually query +4. **Refreeze Early**: Don't pay for unused days diff --git a/curator/actions/reindex.py b/curator/actions/reindex.py index d1a34679..6c336976 100644 --- a/curator/actions/reindex.py +++ b/curator/actions/reindex.py @@ -6,7 +6,7 @@ # pylint: disable=broad-except, R0902,R0912,R0913,R0914,R0915 from es_client.builder import Builder -from es_client.helpers.utils import ensure_list, verify_url_schema +from es_client.utils import ensure_list, verify_url_schema from es_client.exceptions import ConfigurationError from curator.exceptions import CuratorException, FailedExecution, NoIndices diff --git a/curator/actions/snapshot.py b/curator/actions/snapshot.py index b44ec0af..50414610 100644 --- a/curator/actions/snapshot.py +++ b/curator/actions/snapshot.py @@ -2,7 +2,7 @@ import logging import re -from es_client.helpers.utils import ensure_list +from es_client.utils import ensure_list from curator.helpers.date_ops import parse_datemath, parse_date_pattern from curator.helpers.getters import get_indices from curator.helpers.testers import ( diff --git a/curator/classdef.py b/curator/classdef.py index 0708ec04..e75bfaf8 100644 --- a/curator/classdef.py +++ b/curator/classdef.py @@ -2,8 +2,8 @@ import logging from es_client.exceptions import FailedValidation -from es_client.helpers.schemacheck import password_filter -from es_client.helpers.utils import get_yaml +from es_client.schemacheck import password_filter +from es_client.utils import get_yaml from curator import IndexList, SnapshotList from curator.debug import debug from curator.actions import CLASS_MAP diff --git a/curator/cli.py b/curator/cli.py index 42b31b24..3b593263 100644 --- a/curator/cli.py +++ b/curator/cli.py @@ -4,7 +4,7 @@ import logging import click from es_client.defaults import OPTION_DEFAULTS -from es_client.helpers.config import ( +from es_client.config import ( cli_opts, context_settings, generate_configdict, @@ -12,8 +12,8 @@ get_config, options_from_dict, ) -from es_client.helpers.logging import configure_logging -from es_client.helpers.utils import option_wrapper, prune_nones +from es_client.logging import configure_logging +from es_client.utils import option_wrapper, prune_nones from curator.exceptions import ClientException from curator.classdef import ActionsFile from curator.defaults.settings import ( diff --git a/curator/cli_singletons/__init__.py b/curator/cli_singletons/__init__.py index 567f1229..93aef249 100644 --- a/curator/cli_singletons/__init__.py +++ b/curator/cli_singletons/__init__.py @@ -1,7 +1,9 @@ """Use __init__ to make these not need to be nested under lowercase.Capital""" + from curator.cli_singletons.alias import alias from curator.cli_singletons.allocation import allocation from curator.cli_singletons.close import close +from curator.cli_singletons.deepfreeze import deepfreeze, rotate, setup, status from curator.cli_singletons.delete import delete_indices, delete_snapshots from curator.cli_singletons.forcemerge import forcemerge from curator.cli_singletons.open_indices import open_indices diff --git a/curator/cli_singletons/deepfreeze.py b/curator/cli_singletons/deepfreeze.py new file mode 100644 index 00000000..4327aee8 --- /dev/null +++ b/curator/cli_singletons/deepfreeze.py @@ -0,0 +1,676 @@ +"""Deepfreeze Singleton""" + +import logging +from datetime import datetime + +import click + +from curator.cli_singletons.object_class import CLIAction + +today = datetime.today() + + +@click.group() +def deepfreeze(): + """ + Deepfreeze command group + """ + + +@deepfreeze.command() +@click.option( + "-y", + "--year", + type=int, + default=today.year, + show_default=True, + help="Year for the new repo. Only used if style=date.", +) +@click.option( + "-m", + "--month", + type=int, + default=today.month, + show_default=True, + help="Month for the new repo. Only used if style=date.", +) +@click.option( + "-r", + "--repo_name_prefix", + type=str, + default="deepfreeze", + show_default=True, + help="prefix for naming rotating repositories", +) +@click.option( + "-b", + "--bucket_name_prefix", + type=str, + default="deepfreeze", + show_default=True, + help="prefix for naming buckets", +) +@click.option( + "-d", + "--base_path_prefix", + type=str, + default="snapshots", + show_default=True, + help="base path in the bucket to use for searchable snapshots", +) +@click.option( + "-a", + "--canned_acl", + type=click.Choice( + [ + "private", + "public-read", + "public-read-write", + "authenticated-read", + "log-delivery-write", + "bucket-owner-read", + "bucket-owner-full-control", + ] + ), + default="private", + show_default=True, + help="Canned ACL as defined by AWS", +) +@click.option( + "-s", + "--storage_class", + type=click.Choice( + [ + "standard", + "reduced_redundancy", + "standard_ia", + "intelligent_tiering", + "onezone_ia", + ] + ), + default="standard", + show_default=True, + help="What storage class to use, as defined by AWS", +) +@click.option( + "-o", + "--provider", + type=click.Choice( + [ + "aws", + # "gcp", + # "azure", + ] + ), + default="aws", + help="What provider to use (AWS only for now)", +) +@click.option( + "-t", + "--rotate_by", + type=click.Choice( + [ + # "bucket", + "path", + ] + ), + default="path", + help="Rotate by path. This is the only option available for now", + # help="Rotate by bucket or path within a bucket?", +) +@click.option( + "-n", + "--style", + type=click.Choice( + [ + # "date", + "oneup", + ] + ), + default="oneup", + help="How to number (suffix) the rotating repositories. Oneup is the only option available for now.", + # help="How to number (suffix) the rotating repositories", +) +@click.option( + "-i", + "--ilm_policy_name", + type=str, + required=True, + help="Name of the ILM policy to create/modify. If the policy exists, it will be " + "updated to use the deepfreeze repository. If not, a new policy will be created " + "with tiering: 7d hot, 30d cold, 365d frozen, then delete.", +) +@click.option( + "-x", + "--index_template_name", + type=str, + required=True, + help="Name of the index template to attach the ILM policy to. " + "The template will be updated to use the specified ILM policy.", +) +@click.option( + "-p", + "--porcelain", + is_flag=True, + default=False, + help="Machine-readable output (tab-separated values, no formatting)", +) +@click.pass_context +def setup( + ctx, + year, + month, + repo_name_prefix, + bucket_name_prefix, + base_path_prefix, + canned_acl, + storage_class, + provider, + rotate_by, + style, + ilm_policy_name, + index_template_name, + porcelain, +): + """ + Set up a cluster for deepfreeze and save the configuration for all future actions. + + Setup can be tuned by setting the following options to override defaults. Note that + --year and --month are only used if style=date. If style=oneup, then year and month + are ignored. + + Depending on the S3 provider chosen, some options might not be available, or option + values may vary. + + \b + ILM Policy Configuration (--ilm_policy_name, REQUIRED): + - If the policy exists: Updates it to use the deepfreeze repository + - If not: Creates a new policy with tiering strategy: + * Hot: 7 days (with rollover at 45GB or 7d) + * Cold: 30 days + * Frozen: 365 days (searchable snapshot to deepfreeze repo) + * Delete: after frozen phase (delete_searchable_snapshot=false) + + \b + Index Template Configuration (--index_template_name, REQUIRED): + - The template will be updated to use the specified ILM policy + - Ensures new indices will automatically use the deepfreeze ILM policy + """ + logging.debug("setup") + + manual_options = { + "year": year, + "month": month, + "repo_name_prefix": repo_name_prefix, + "bucket_name_prefix": bucket_name_prefix, + "base_path_prefix": base_path_prefix, + "canned_acl": canned_acl, + "storage_class": storage_class, + "provider": provider, + "rotate_by": rotate_by, + "style": style, + "ilm_policy_name": ilm_policy_name, + "index_template_name": index_template_name, + "porcelain": porcelain, + } + + action = CLIAction( + ctx.info_name, + ctx.obj["configdict"], + manual_options, + [], + True, + ) + action.do_singleton_action(dry_run=ctx.obj["dry_run"]) + + +@deepfreeze.command() +@click.option( + "-y", + "--year", + type=int, + default=today.year, + help="Year for the new repo (default is today)", +) +@click.option( + "-m", + "--month", + type=int, + default=today.month, + help="Month for the new repo (default is today)", +) +@click.option( + "-k", + "--keep", + type=int, + default=6, + help="How many repositories should remain mounted?", +) +@click.option( + "-p", + "--porcelain", + is_flag=True, + default=False, + help="Machine-readable output (no formatting)", +) +@click.pass_context +def rotate( + ctx, + year, + month, + keep, + porcelain, +): + """ + Deepfreeze rotation (add a new repo and age oldest off) + """ + manual_options = { + "year": year, + "month": month, + "keep": keep, + "porcelain": porcelain, + } + action = CLIAction( + ctx.info_name, + ctx.obj["configdict"], + manual_options, + [], + True, + ) + action.do_singleton_action(dry_run=ctx.obj["dry_run"]) + + +@deepfreeze.command() +@click.option( + "-l", + "--limit", + type=int, + default=None, + help="Limit display to the last N repositories (default: show all)", +) +@click.option( + "-r", + "--repos", + is_flag=True, + default=False, + help="Show repositories section only", +) +@click.option( + "-t", + "--thawed", + is_flag=True, + default=False, + help="Show thawed repositories section only", +) +@click.option( + "-b", + "--buckets", + is_flag=True, + default=False, + help="Show buckets section only", +) +@click.option( + "-i", + "--ilm", + is_flag=True, + default=False, + help="Show ILM policies section only", +) +@click.option( + "-c", + "--config", + is_flag=True, + default=False, + help="Show configuration section only", +) +@click.option( + "-p", + "--porcelain", + is_flag=True, + default=False, + help="Output plain text without formatting (suitable for scripting)", +) +@click.pass_context +def status( + ctx, + limit, + repos, + thawed, + buckets, + ilm, + config, + porcelain, +): + """ + Show the status of deepfreeze + + By default, all sections are displayed. Use section flags (-r, -t, -b, -i, -c) to show specific sections only. + Multiple section flags can be combined. + """ + manual_options = { + "limit": limit, + "show_repos": repos, + "show_thawed": thawed, + "show_buckets": buckets, + "show_ilm": ilm, + "show_config": config, + "porcelain": porcelain, + } + action = CLIAction( + ctx.info_name, + ctx.obj["configdict"], + manual_options, + [], + True, + ) + action.do_singleton_action(dry_run=ctx.obj["dry_run"]) + + +@deepfreeze.command() +@click.option( + "-f", + "--refrozen-retention-days", + type=int, + default=None, + help="Override retention period for refrozen thaw requests (default: from config, typically 35 days)", +) +@click.option( + "-p", + "--porcelain", + is_flag=True, + default=False, + help="Machine-readable output (no formatting)", +) +@click.pass_context +def cleanup( + ctx, + refrozen_retention_days, + porcelain, +): + """ + Clean up expired thawed repositories + """ + manual_options = { + "refrozen_retention_days": refrozen_retention_days, + "porcelain": porcelain, + } + action = CLIAction( + ctx.info_name, + ctx.obj["configdict"], + manual_options, + [], + True, + ) + action.do_singleton_action(dry_run=ctx.obj["dry_run"]) + + +@deepfreeze.command() +@click.option( + "-t", + "--thaw-request-id", + "thaw_request_id", + type=str, + default=None, + help="The ID of the thaw request to refreeze (optional - if not provided, all open requests)", +) +@click.option( + "-p", + "--porcelain", + is_flag=True, + default=False, + help="Machine-readable output (tab-separated values, no formatting)", +) +@click.pass_context +def refreeze( + ctx, + thaw_request_id, + porcelain, +): + """ + Unmount repositories from thaw request(s) and reset them to frozen state. + + This is a user-initiated operation to signal "I'm done with this thaw." + It unmounts all repositories associated with the thaw request(s) and resets + their state back to frozen, even if the S3 restore hasn't expired yet. + + \b + Two modes of operation: + 1. Specific request: Provide -t to refreeze one request + 2. All open requests: Omit -t to refreeze all open requests (requires confirmation) + + \b + Examples: + + # Refreeze a specific thaw request + + curator_cli deepfreeze refreeze -t + + # Refreeze all open thaw requests (with confirmation) + + curator_cli deepfreeze refreeze + """ + manual_options = { + "thaw_request_id": thaw_request_id, + "porcelain": porcelain, + } + action = CLIAction( + ctx.info_name, + ctx.obj["configdict"], + manual_options, + [], + True, + ) + action.do_singleton_action(dry_run=ctx.obj["dry_run"]) + + +@deepfreeze.command() +@click.option( + "-s", + "--start-date", + type=str, + default=None, + help="Start of date range in ISO 8601 format (e.g., 2025-01-15T00:00:00Z)", +) +@click.option( + "-e", + "--end-date", + type=str, + default=None, + help="End of date range in ISO 8601 format (e.g., 2025-01-31T23:59:59Z)", +) +@click.option( + "--sync/--async", + "sync", + default=False, + show_default=True, + help="Wait for restore and mount (sync) or return immediately (async)", +) +@click.option( + "-d", + "--duration", + type=int, + default=30, + show_default=True, + help="Number of days to keep objects restored from Glacier", +) +@click.option( + "-t", + "--retrieval-tier", + type=click.Choice(["Standard", "Expedited", "Bulk"]), + default="Standard", + show_default=True, + help="AWS Glacier retrieval tier", +) +@click.option( + "-k", + "--check-status", + "check_status", + type=str, + is_flag=False, + flag_value="", # Empty string when used without a value + default=None, + help="Check status of thaw request(s). Provide ID for specific request, or no value to check all", +) +@click.option( + "-l", + "--list", + "list_requests", + is_flag=True, + default=False, + help="List all active thaw requests", +) +@click.option( + "-c", + "--include-completed", + "include_completed", + is_flag=True, + default=False, + help="Include completed requests when listing (default: exclude completed)", +) +@click.option( + "-p", + "--porcelain", + is_flag=True, + default=False, + help="Machine-readable output (tab-separated values, no formatting)", +) +@click.pass_context +def thaw( + ctx, + start_date, + end_date, + sync, + duration, + retrieval_tier, + check_status, + list_requests, + include_completed, + porcelain, +): + """ + Thaw repositories from Glacier storage for a specified date range, + or check status of existing thaw requests. + + \b + Four modes of operation: + 1. Create new thaw: Requires --start-date and --end-date + 2. Check specific request: Use --check-status (mounts if ready) + 3. Check all requests: Use --check-status (without value, mounts if ready) + 4. List requests: Use --list (shows summary table) + + \b + Examples: + + # Create new thaw request (async) + + curator_cli deepfreeze thaw -s 2025-01-01T00:00:00Z -e 2025-01-15T23:59:59Z --async + + # Create new thaw request (sync - waits for completion) + + curator_cli deepfreeze thaw -s 2025-01-01T00:00:00Z -e 2025-01-15T23:59:59Z --sync + + # Check status of a specific request and mount if ready + + curator_cli deepfreeze thaw --check-status + curator_cli deepfreeze thaw -k + + # Check status of ALL thaw requests and mount if ready + + curator_cli deepfreeze thaw --check-status + curator_cli deepfreeze thaw -k + + # List active thaw requests (excludes completed by default) + + curator_cli deepfreeze thaw --list + curator_cli deepfreeze thaw -l + + # List all thaw requests (including completed) + + curator_cli deepfreeze thaw --list --include-completed + curator_cli deepfreeze thaw -l -c + """ + # Validate mutual exclusivity + # Note: check_status can be None (not provided), "" (flag without value), or a string ID + modes_active = sum( + [bool(start_date or end_date), check_status is not None, bool(list_requests)] + ) + + if modes_active == 0: + click.echo( + "Error: Must specify one of: --start-date/--end-date (-s/-e), --check-status (-k), or --list (-l)" + ) + ctx.exit(1) + + if modes_active > 1: + click.echo( + "Error: Cannot use --start-date/--end-date with --check-status (-k) or --list (-l)" + ) + ctx.exit(1) + + # Validate that create mode has both start and end dates + if (start_date or end_date) and not (start_date and end_date): + click.echo( + "Error: Both --start-date and --end-date are required for creating a new thaw request" + ) + ctx.exit(1) + + manual_options = { + "start_date": start_date, + "end_date": end_date, + "sync": sync, + "duration": duration, + "retrieval_tier": retrieval_tier, + "check_status": check_status, + "list_requests": list_requests, + "include_completed": include_completed, + "porcelain": porcelain, + } + action = CLIAction( + ctx.info_name, + ctx.obj["configdict"], + manual_options, + [], + True, + ) + action.do_singleton_action(dry_run=ctx.obj["dry_run"]) + + +@deepfreeze.command() +@click.option( + "-p", + "--porcelain", + is_flag=True, + default=False, + help="Output plain text without formatting (suitable for scripting)", +) +@click.pass_context +def repair_metadata(ctx, porcelain): + """ + Repair repository metadata to match actual S3 storage state + + Scans all repositories and checks if their metadata (thaw_state) matches + the actual S3 storage class. Repositories stored in GLACIER should have + thaw_state='frozen', but sometimes metadata can get out of sync. + + This command will: + - Scan all repositories in the status index + - Check actual S3 storage class for each repository + - Update thaw_state='frozen' for repositories actually in GLACIER + - Report on all changes made + + Use --dry-run to see what would be changed without making modifications. + """ + manual_options = { + "porcelain": porcelain, + } + # Normalize action name: Click converts underscores to dashes, + # but CLASS_MAP and options use underscores + action_name = ctx.info_name.replace('-', '_') + action = CLIAction( + action_name, + ctx.obj["configdict"], + manual_options, + [], + True, + ) + action.do_singleton_action(dry_run=ctx.obj["dry_run"]) diff --git a/curator/cli_singletons/object_class.py b/curator/cli_singletons/object_class.py index cb854250..b2606a28 100644 --- a/curator/cli_singletons/object_class.py +++ b/curator/cli_singletons/object_class.py @@ -4,30 +4,39 @@ import typing as t import logging import sys -from voluptuous import Schema + from es_client.builder import Builder from es_client.exceptions import FailedValidation from es_client.helpers.schemacheck import SchemaCheck from es_client.helpers.utils import prune_nones +from voluptuous import Schema + from curator import IndexList, SnapshotList from curator.debug import debug from curator.actions import ( Alias, Allocation, + Cleanup, Close, ClusterRouting, CreateIndex, DeleteIndices, + DeleteSnapshots, ForceMerge, IndexSettings, Open, + Refreeze, Reindex, + RepairMetadata, Replicas, + Restore, Rollover, + Rotate, + Setup, Shrink, Snapshot, - DeleteSnapshots, - Restore, + Status, + Thaw, ) from curator.defaults.settings import VERSION_MAX, VERSION_MIN, snapshot_actions from curator.exceptions import ConfigurationError, NoIndices, NoSnapshots @@ -38,29 +47,36 @@ logger = logging.getLogger(__name__) CLASS_MAP = { - 'alias': Alias, - 'allocation': Allocation, - 'close': Close, - 'cluster_routing': ClusterRouting, - 'create_index': CreateIndex, - 'delete_indices': DeleteIndices, - 'delete_snapshots': DeleteSnapshots, - 'forcemerge': ForceMerge, - 'index_settings': IndexSettings, - 'open': Open, - 'reindex': Reindex, - 'replicas': Replicas, - 'restore': Restore, - 'rollover': Rollover, - 'shrink': Shrink, - 'snapshot': Snapshot, + "alias": Alias, + "allocation": Allocation, + "cleanup": Cleanup, + "close": Close, + "cluster_routing": ClusterRouting, + "create_index": CreateIndex, + "delete_indices": DeleteIndices, + "delete_snapshots": DeleteSnapshots, + "forcemerge": ForceMerge, + "index_settings": IndexSettings, + "open": Open, + "refreeze": Refreeze, + "reindex": Reindex, + "repair_metadata": RepairMetadata, + "replicas": Replicas, + "restore": Restore, + "rollover": Rollover, + "shrink": Shrink, + "snapshot": Snapshot, + "rotate": Rotate, + "setup": Setup, + "status": Status, + "thaw": Thaw, } EXCLUDED_OPTIONS = [ - 'ignore_empty_list', - 'timeout_override', - 'continue_if_exception', - 'disable_action', + "ignore_empty_list", + "timeout_override", + "continue_if_exception", + "disable_action", ] @@ -109,30 +125,30 @@ def __init__( self.include_system = self.options.pop('include_system', False) # Extract allow_ilm_indices so it can be handled separately. - if 'allow_ilm_indices' in self.options: - self.allow_ilm = self.options.pop('allow_ilm_indices') + if "allow_ilm_indices" in self.options: + self.allow_ilm = self.options.pop("allow_ilm_indices") else: self.allow_ilm = False if action == 'alias': debug.lv5('ACTION = ALIAS') self.alias = { - 'name': option_dict['name'], - 'extra_settings': option_dict['extra_settings'], - 'wini': ( - kwargs['warn_if_no_indices'] - if 'warn_if_no_indices' in kwargs + "name": option_dict["name"], + "extra_settings": option_dict["extra_settings"], + "wini": ( + kwargs["warn_if_no_indices"] + if "warn_if_no_indices" in kwargs else False ), } - for k in ['add', 'remove']: + for k in ["add", "remove"]: if k in kwargs: self.alias[k] = {} - self.check_filters(kwargs[k], loc='alias singleton', key=k) - self.alias[k]['filters'] = self.filters + self.check_filters(kwargs[k], loc="alias singleton", key=k) + self.alias[k]["filters"] = self.filters if self.allow_ilm: - self.alias[k]['filters'].append({'filtertype': 'ilm'}) + self.alias[k]["filters"].append({"filtertype": "ilm"}) # No filters for these actions - elif action in ['cluster_routing', 'create_index', 'rollover']: + elif action in ["cleanup", "cluster_routing", "create_index", "refreeze", "repair_metadata", "rollover", "setup", "rotate", "status", "thaw"]: self.action_kwargs = {} if action == 'rollover': debug.lv5('rollover option_dict = %s', option_dict) @@ -146,7 +162,7 @@ def __init__( # pylint: disable=broad-except except Exception as exc: raise ConfigurationError( - f'Unable to connect to Elasticsearch as configured: {exc}' + f"Unable to connect to Elasticsearch as configured: {exc}" ) from exc # If we're here, we'll see the output from GET http(s)://hostname.tld:PORT debug.lv5('Connection result: %s', builder.client.info()) @@ -168,24 +184,23 @@ def check_options(self, option_dict): debug.lv5('Validating provided options: %s', option_dict) # Kludgy work-around to needing 'repository' in options for these actions # but only to pass the schema check. It's removed again below. - if self.action in ['delete_snapshots', 'restore']: - option_dict['repository'] = self.repository + if self.action in ["delete_snapshots", "restore"]: + option_dict["repository"] = self.repository _ = SchemaCheck( prune_nones(option_dict), options.get_schema(self.action), - 'options', + "options", f'{self.action} singleton action "options"', ).result() self.options = self.prune_excluded(_) - # Remove this after the schema check, as the action class won't need - # it as an arg - if self.action in ['delete_snapshots', 'restore']: - del self.options['repository'] + # Remove this after the schema check, as the action class won't need it as an arg + if self.action in ["delete_snapshots", "restore"]: + del self.options["repository"] except FailedValidation as exc: logger.critical('Unable to parse options: %s', exc) sys.exit(1) - def check_filters(self, filter_dict, loc='singleton', key='filters'): + def check_filters(self, filter_dict, loc="singleton", key="filters"): """Validate provided filters""" try: debug.lv5('Validating provided filters: %s', filter_dict) @@ -210,10 +225,10 @@ def do_filters(self): ]: self.filters.append({'filtertype': 'ilm', 'exclude': True}) try: - self.list_object.iterate_filters({'filters': self.filters}) + self.list_object.iterate_filters({"filters": self.filters}) self.list_object.empty_list_check() except (NoIndices, NoSnapshots) as exc: - otype = 'index' if isinstance(exc, NoIndices) else 'snapshot' + otype = "index" if isinstance(exc, NoIndices) else "snapshot" if self.ignore: logger.info('Singleton action not performed: empty %s list', otype) sys.exit(0) @@ -237,13 +252,13 @@ def get_list_object(self) -> t.Union[IndexList, SnapshotList]: def get_alias_obj(self): """Get the Alias object""" action_obj = Alias( - name=self.alias['name'], extra_settings=self.alias['extra_settings'] + name=self.alias["name"], extra_settings=self.alias["extra_settings"] ) - for k in ['remove', 'add']: + for k in ["remove", "add"]: if k in self.alias: msg = ( f"{'Add' if k == 'add' else 'Remov'}ing matching indices " - f"{'to' if k == 'add' else 'from'} alias \"{self.alias['name']}\"" + f'{"to" if k == "add" else "from"} alias "{self.alias["name"]}"' ) debug.lv4(msg) self.alias[k]['ilo'] = IndexList( @@ -255,17 +270,23 @@ def get_alias_obj(self): {'filters': self.alias[k]['filters']} ) fltr = getattr(action_obj, k) - fltr(self.alias[k]['ilo'], warn_if_no_indices=self.alias['wini']) + fltr(self.alias[k]["ilo"], warn_if_no_indices=self.alias["wini"]) return action_obj def do_singleton_action(self, dry_run=False): """Execute the (ostensibly) completely ready to run action""" debug.lv3('Doing the singleton "%s" action here.', self.action) try: - if self.action == 'alias': + if self.action == "alias": action_obj = self.get_alias_obj() - elif self.action in ['cluster_routing', 'create_index', 'rollover']: + elif self.action in ["cluster_routing", "create_index", "rollover"]: + action_obj = self.action_class(self.client, **self.options) + elif self.action in ["cleanup", "refreeze", "repair_metadata", "setup", "rotate", "status", "thaw"]: + logger.debug( + f"Declaring Deepfreeze action object with options: {self.options}" + ) action_obj = self.action_class(self.client, **self.options) + logger.debug("Deepfreeze action object declared") else: self.get_list_object() self.do_filters() diff --git a/curator/defaults/option_defaults.py b/curator/defaults/option_defaults.py index 83b96231..1dddb49b 100644 --- a/curator/defaults/option_defaults.py +++ b/curator/defaults/option_defaults.py @@ -1,5 +1,7 @@ """Action Option Schema definitions""" +from datetime import datetime + from voluptuous import All, Any, Boolean, Coerce, Optional, Range, Required # pylint: disable=E1120 @@ -39,10 +41,10 @@ def conditions(): Coerce(int), Optional('max_size'): Any(str)}} """ return { - Optional('conditions'): { - Optional('max_age'): Any(str), - Optional('max_docs'): Coerce(int), - Optional('max_size'): Any(str), + Optional("conditions"): { + Optional("max_age"): Any(str), + Optional("max_docs"): Coerce(int), + Optional("max_size"): Any(str), } } @@ -64,7 +66,7 @@ def count(): """ :returns: {Required('count'): All(Coerce(int), Range(min=0, max=10))} """ - return {Required('count'): All(Coerce(int), Range(min=0, max=10))} + return {Required("count"): All(Coerce(int), Range(min=0, max=10))} def delay(): @@ -209,7 +211,7 @@ def include_global_state(action): Any(bool, All(Any(str), Boolean()))} """ default = False - if action == 'snapshot': + if action == "snapshot": default = True return { Optional('include_global_state', default=default): Any( # type: ignore @@ -268,7 +270,7 @@ def index_settings(): """ :returns: {Required('index_settings'): {'index': dict}} """ - return {Required('index_settings'): {'index': dict}} + return {Required("index_settings"): {"index": dict}} def indices(): @@ -282,7 +284,7 @@ def key(): """ :returns: {Required('key'): Any(str)} """ - return {Required('key'): Any(str)} + return {Required("key"): Any(str)} def max_num_segments(): @@ -291,7 +293,7 @@ def max_num_segments(): {Required('max_num_segments'): All(Coerce(int), Range(min=1, max=32768))} """ - return {Required('max_num_segments'): All(Coerce(int), Range(min=1, max=32768))} + return {Required("max_num_segments"): All(Coerce(int), Range(min=1, max=32768))} # pylint: disable=unused-argument @@ -463,7 +465,7 @@ def remote_filters(): # validate_actions() method in utils.py return { Optional( - 'remote_filters', + "remote_filters", default=[ { 'filtertype': 'pattern', @@ -480,21 +482,21 @@ def rename_pattern(): """ :returns: {Optional('rename_pattern'): Any(str)} """ - return {Optional('rename_pattern'): Any(str)} + return {Optional("rename_pattern"): Any(str)} def rename_replacement(): """ :returns: {Optional('rename_replacement'): Any(str)} """ - return {Optional('rename_replacement'): Any(str)} + return {Optional("rename_replacement"): Any(str)} def repository(): """ :returns: {Required('repository'): Any(str)} """ - return {Required('repository'): Any(str)} + return {Required("repository"): Any(str)} def request_body(): @@ -503,34 +505,34 @@ def request_body(): See code for more details. """ return { - Required('request_body'): { - Optional('conflicts'): Any('proceed', 'abort'), - Optional('max_docs'): Coerce(int), - Required('source'): { - Required('index'): Any(Any(str), list), - Optional('query'): dict, - Optional('remote'): { - Optional('host'): Any(str), - Optional('username'): Any(str), - Optional('password'): Any(str), - Optional('socket_timeout'): Any(str), - Optional('connect_timeout'): Any(str), - Optional('headers'): Any(str), + Required("request_body"): { + Optional("conflicts"): Any("proceed", "abort"), + Optional("max_docs"): Coerce(int), + Required("source"): { + Required("index"): Any(Any(str), list), + Optional("query"): dict, + Optional("remote"): { + Optional("host"): Any(str), + Optional("username"): Any(str), + Optional("password"): Any(str), + Optional("socket_timeout"): Any(str), + Optional("connect_timeout"): Any(str), + Optional("headers"): Any(str), }, Optional('size'): Coerce(int), Optional('_source'): Any(bool, Boolean()), # type: ignore }, - Required('dest'): { - Required('index'): Any(str), - Optional('version_type'): Any( - 'internal', 'external', 'external_gt', 'external_gte' + Required("dest"): { + Required("index"): Any(str), + Optional("version_type"): Any( + "internal", "external", "external_gt", "external_gte" ), - Optional('op_type'): Any(str), - Optional('pipeline'): Any(str), + Optional("op_type"): Any(str), + Optional("pipeline"): Any(str), }, - Optional('script'): { - Optional('source'): Any(str), - Optional('lang'): Any('painless', 'expression', 'mustache', 'java'), + Optional("script"): { + Optional("source"): Any(str), + Optional("lang"): Any("painless", "expression", "mustache", "java"), }, } } @@ -568,14 +570,14 @@ def routing_type(): """ :returns: {Required('routing_type'): Any('allocation', 'rebalance')} """ - return {Required('routing_type'): Any('allocation', 'rebalance')} + return {Required("routing_type"): Any("allocation", "rebalance")} def cluster_routing_setting(): """ :returns: {Required('setting'): Any('enable')} """ - return {Required('setting'): Any('enable')} + return {Required("setting"): Any("enable")} def cluster_routing_value(): @@ -585,7 +587,7 @@ def cluster_routing_value(): Any('all', 'primaries', 'none', 'new_primaries', 'replicas')} """ return { - Required('value'): Any('all', 'primaries', 'none', 'new_primaries', 'replicas') + Required("value"): Any("all", "primaries", "none", "new_primaries", "replicas") } @@ -600,7 +602,7 @@ def shrink_node(): """ :returns: {Required('shrink_node'): Any(str)} """ - return {Required('shrink_node'): Any(str)} + return {Required("shrink_node"): Any(str)} def shrink_prefix(): @@ -664,11 +666,11 @@ def timeout_override(action): ``delete_snapshots`` = ``300`` """ - if action in ['forcemerge', 'restore', 'snapshot']: + if action in ["forcemerge", "restore", "snapshot"]: defval = 21600 - elif action == 'close': + elif action == "close": defval = 180 - elif action == 'delete_snapshots': + elif action == "delete_snapshots": defval = 300 else: defval = None @@ -691,7 +693,7 @@ def wait_for_active_shards(action): ``shrink`` actions. """ defval = 0 - if action in ['reindex', 'shrink']: + if action in ["reindex", "shrink"]: defval = 1 return { Optional('wait_for_active_shards', default=defval): Any( # type: ignore @@ -710,7 +712,7 @@ def wait_for_completion(action): """ # if action in ['cold2frozen', 'reindex', 'restore', 'snapshot']: defval = True - if action in ['allocation', 'cluster_routing', 'replicas']: + if action in ["allocation", "cluster_routing", "replicas"]: defval = False return { Optional('wait_for_completion', default=defval): Any( # type: ignore @@ -746,7 +748,7 @@ def wait_interval(action): maxval = 30 # if action in ['allocation', 'cluster_routing', 'replicas']: defval = 3 - if action in ['restore', 'snapshot', 'reindex', 'shrink']: + if action in ["restore", "snapshot", "reindex", "shrink"]: defval = 9 return { Optional('wait_interval', default=defval): Any( # type: ignore @@ -766,3 +768,268 @@ def warn_if_no_indices(): bool, All(Any(str), Boolean()) # type: ignore ) } + + +def ilm_policy_name(): + """ + Name of the ILM policy to create or modify for deepfreeze operations. + If the policy exists, it will be updated to use the deepfreeze repository. + If it does not exist, a new policy will be created with a reasonable tiering strategy. + """ + return {Required("ilm_policy_name"): Any(str)} + + +def index_template_name(): + """ + Name of the index template to attach the ILM policy to. + The template will be updated to use the ILM policy. + """ + return {Required("index_template_name"): Any(str)} + + +def year(): + """ + Year for deepfreeze operations + """ + return {Optional("year", default=datetime.today().year): Coerce(int)} + + +def month(): + """ + Month for deepfreeze operations + """ + return { + Optional("month", default=datetime.today().month): All( + Coerce(int), Range(min=1, max=12) + ) + } + + +def repo_name_prefix(): + """ + Repository name prefix for deepfreeze + """ + return {Optional("repo_name_prefix", default="deepfreeze"): Any(str)} + + +def bucket_name_prefix(): + """ + Bucket name prefix for deepfreeze + """ + return {Optional("bucket_name_prefix", default="deepfreeze"): Any(str)} + + +def base_path_prefix(): + """ + Base path prefix for deepfreeze snapshots + """ + return {Optional("base_path_prefix", default="snapshots"): Any(str)} + + +def canned_acl(): + """ + Canned ACL for S3 objects + """ + return { + Optional("canned_acl", default="private"): Any( + "private", + "public-read", + "public-read-write", + "authenticated-read", + "log-delivery-write", + "bucket-owner-read", + "bucket-owner-full-control", + ) + } + + +def storage_class(): + """ + Storage class for S3 objects + """ + return { + Optional("storage_class", default="intelligent_tiering"): Any( + "standard", + "reduced_redundancy", + "standard_ia", + "intelligent_tiering", + "onezone_ia", + "GLACIER", # Also support uppercase for backwards compatibility + ) + } + + +def provider(): + """ + Cloud provider for deepfreeze + """ + return {Optional("provider", default="aws"): Any("aws")} + + +def rotate_by(): + """ + Rotation strategy for deepfreeze + """ + return {Optional("rotate_by", default="path"): Any("path", "bucket")} + + +def style(): + """ + Naming style for deepfreeze repositories + """ + return { + Optional("style", default="oneup"): Any("oneup", "date", "monthly", "weekly") + } + + +def keep(): + """ + Number of repositories to keep mounted + """ + return {Optional("keep", default=6): All(Coerce(int), Range(min=1, max=100))} + + +def refrozen_retention_days(): + """ + Retention period in days for refrozen thaw requests (used by cleanup command) + """ + return { + Optional("refrozen_retention_days", default=None): Any( + None, All(Coerce(int), Range(min=0, max=365)) + ) + } + + +def start_date(): + """ + Start date for thaw operation (ISO 8601 format) + """ + return {Optional("start_date", default=None): Any(None, str)} + + +def end_date(): + """ + End date for thaw operation (ISO 8601 format) + """ + return {Optional("end_date", default=None): Any(None, str)} + + +def sync(): + """ + Sync mode for thaw - wait for restore and mount (True) or return immediately (False) + """ + return {Optional("sync", default=False): Any(bool, All(Any(str), Boolean()))} + + +def duration(): + """ + Number of days to keep objects restored from Glacier + """ + return {Optional("duration", default=7): All(Coerce(int), Range(min=1, max=30))} + + +def retrieval_tier(): + """ + AWS Glacier retrieval tier for thaw operation + """ + return { + Optional("retrieval_tier", default="Standard"): Any( + "Standard", "Expedited", "Bulk" + ) + } + + +def check_status(): + """ + Thaw request ID to check status + """ + return {Optional("check_status", default=None): Any(None, str)} + + +def list_requests(): + """ + Flag to list all thaw requests + """ + return { + Optional("list_requests", default=False): Any(bool, All(Any(str), Boolean())) + } + + +def limit(): + """ + Number of most recent repositories to display in status + """ + return { + Optional("limit", default=None): Any( + None, All(Coerce(int), Range(min=1, max=10000)) + ) + } + + +def show_repos(): + """ + Show repositories section in status output + """ + return {Optional("show_repos", default=False): Any(bool, All(Any(str), Boolean()))} + + +def show_thawed(): + """ + Show thawed repositories section in status output + """ + return {Optional("show_thawed", default=False): Any(bool, All(Any(str), Boolean()))} + + +def show_buckets(): + """ + Show buckets section in status output + """ + return { + Optional("show_buckets", default=False): Any(bool, All(Any(str), Boolean())) + } + + +def show_ilm(): + """ + Show ILM policies section in status output + """ + return {Optional("show_ilm", default=False): Any(bool, All(Any(str), Boolean()))} + + +def show_config(): + """ + Show configuration section in status output + """ + return {Optional("show_config", default=False): Any(bool, All(Any(str), Boolean()))} + + +def porcelain(): + """ + Output plain text without formatting (suitable for scripting) + """ + return {Optional("porcelain", default=False): Any(bool, All(Any(str), Boolean()))} + + +def repo_id(): + """ + Repository name/ID to refreeze (if not provided, all thawed repos will be refrozen) + """ + return {Optional("repo_id", default=None): Any(None, str)} + + +def thaw_request_id(): + """ + Thaw request ID to refreeze (if not provided, all open thaw requests will be refrozen) + """ + return {Optional("thaw_request_id", default=None): Any(None, str)} + + +def include_completed(): + """ + Include completed requests when listing thaw requests (default: exclude completed) + """ + return { + Optional("include_completed", default=False): Any( + bool, All(Any(str), Boolean()) + ) + } diff --git a/curator/helpers/testers.py b/curator/helpers/testers.py index f785d020..9237b7a5 100644 --- a/curator/helpers/testers.py +++ b/curator/helpers/testers.py @@ -4,8 +4,8 @@ from voluptuous import Schema from elasticsearch8 import Elasticsearch from elasticsearch8.exceptions import NotFoundError, AuthenticationException -from es_client.helpers.schemacheck import SchemaCheck -from es_client.helpers.utils import prune_nones +from es_client.schemacheck import SchemaCheck +from es_client.utils import prune_nones from curator.debug import debug, begin_end from curator.helpers.getters import get_repository, get_write_index from curator.exceptions import ( diff --git a/curator/helpers/utils.py b/curator/helpers/utils.py index 9ceeccb6..4a99ef55 100644 --- a/curator/helpers/utils.py +++ b/curator/helpers/utils.py @@ -5,7 +5,7 @@ import re import logging -from es_client.helpers.utils import ensure_list +from es_client.utils import ensure_list from curator.debug import debug, begin_end from curator.exceptions import FailedExecution diff --git a/curator/indexlist.py b/curator/indexlist.py index e35b858d..2a960704 100644 --- a/curator/indexlist.py +++ b/curator/indexlist.py @@ -5,8 +5,8 @@ import itertools import logging from elasticsearch8.exceptions import NotFoundError, TransportError -from es_client.helpers.schemacheck import SchemaCheck -from es_client.helpers.utils import ensure_list +from es_client.schemacheck import SchemaCheck +from es_client.utils import ensure_list from curator.debug import debug, begin_end from curator.defaults import settings from curator.exceptions import ( diff --git a/curator/repomgrcli.py b/curator/repomgrcli.py index 145d15e9..2d9da64c 100644 --- a/curator/repomgrcli.py +++ b/curator/repomgrcli.py @@ -7,14 +7,14 @@ from elasticsearch8 import ApiError, NotFoundError from es_client.defaults import LOGGING_SETTINGS, SHOW_OPTION from es_client.builder import Builder -from es_client.helpers.config import ( +from es_client.config import ( cli_opts, context_settings, generate_configdict, get_config, ) -from es_client.helpers.logging import configure_logging -from es_client.helpers.utils import option_wrapper +from es_client.logging import configure_logging +from es_client.utils import option_wrapper from curator.debug import debug from curator.defaults.settings import ( CLICK_DEBUG, diff --git a/curator/s3client.py b/curator/s3client.py new file mode 100644 index 00000000..ea445bd6 --- /dev/null +++ b/curator/s3client.py @@ -0,0 +1,20 @@ +""" +s3client.py + +Re-exports S3 client from deepfreeze-core package. +The canonical implementation lives in the 'deepfreeze_core' package. + +This module is kept for backward compatibility with existing curator code. +""" + +from deepfreeze_core import ( + AwsS3Client, + S3Client, + s3_client_factory, +) + +__all__ = [ + "AwsS3Client", + "S3Client", + "s3_client_factory", +] diff --git a/curator/singletons.py b/curator/singletons.py index eb0053e3..124cd215 100644 --- a/curator/singletons.py +++ b/curator/singletons.py @@ -1,7 +1,11 @@ """CLI module for curator_cli""" +import warnings import click from es_client.defaults import SHOW_EVERYTHING + +# Suppress urllib3 InsecureRequestWarning when verify_certs is disabled +warnings.filterwarnings('ignore', message='Unverified HTTPS request') from es_client.helpers.config import ( cli_opts, context_settings, @@ -23,6 +27,7 @@ alias, allocation, close, + deepfreeze, delete_indices, delete_snapshots, forcemerge, @@ -103,6 +108,7 @@ def curator_cli( curator_cli.add_command(close) curator_cli.add_command(delete_indices) curator_cli.add_command(delete_snapshots) +curator_cli.add_command(deepfreeze) curator_cli.add_command(forcemerge) curator_cli.add_command(open_indices) curator_cli.add_command(replicas) diff --git a/curator/snapshotlist.py b/curator/snapshotlist.py index 13d13ad9..578d12c7 100644 --- a/curator/snapshotlist.py +++ b/curator/snapshotlist.py @@ -2,7 +2,7 @@ import re import logging -from es_client.helpers.schemacheck import SchemaCheck +from es_client.schemacheck import SchemaCheck from curator.debug import debug, begin_end from curator.exceptions import ( ConfigurationError, diff --git a/curator/validators/actions.py b/curator/validators/actions.py index e952fd85..921ac70a 100644 --- a/curator/validators/actions.py +++ b/curator/validators/actions.py @@ -1,7 +1,7 @@ """Validate root ``actions`` and individual ``action`` Schemas""" from voluptuous import Any, In, Schema, Optional, Required -from es_client.helpers.schemacheck import SchemaCheck +from es_client.schemacheck import SchemaCheck from curator.defaults import settings diff --git a/curator/validators/filter_functions.py b/curator/validators/filter_functions.py index 8b656e1b..56982e74 100644 --- a/curator/validators/filter_functions.py +++ b/curator/validators/filter_functions.py @@ -2,8 +2,8 @@ import logging from voluptuous import Any, In, Required, Schema -from es_client.helpers.schemacheck import SchemaCheck -from es_client.helpers.utils import prune_nones +from es_client.schemacheck import SchemaCheck +from es_client.utils import prune_nones from curator.debug import debug from curator.defaults import settings, filtertypes from curator.exceptions import ConfigurationError diff --git a/curator/validators/options.py b/curator/validators/options.py index 3f83ded7..1359591d 100644 --- a/curator/validators/options.py +++ b/curator/validators/options.py @@ -1,6 +1,7 @@ """Set up voluptuous Schema defaults for various actions""" from voluptuous import Schema + from curator.defaults import option_defaults @@ -18,12 +19,12 @@ def action_specific(action): :rtype: list """ options = { - 'alias': [ + "alias": [ option_defaults.name(action), option_defaults.warn_if_no_indices(), option_defaults.extra_settings(), ], - 'allocation': [ + "allocation": [ option_defaults.search_pattern(), option_defaults.key(), option_defaults.value(), @@ -32,12 +33,12 @@ def action_specific(action): option_defaults.wait_interval(action), option_defaults.max_wait(action), ], - 'close': [ + "close": [ option_defaults.search_pattern(), option_defaults.delete_aliases(), option_defaults.skip_flush(), ], - 'cluster_routing': [ + "cluster_routing": [ option_defaults.routing_type(), option_defaults.cluster_routing_setting(), option_defaults.cluster_routing_value(), @@ -45,40 +46,90 @@ def action_specific(action): option_defaults.wait_interval(action), option_defaults.max_wait(action), ], - 'cold2frozen': [ + "cold2frozen": [ option_defaults.search_pattern(), option_defaults.c2f_index_settings(), option_defaults.c2f_ignore_index_settings(), - option_defaults.wait_for_completion('cold2frozen'), + option_defaults.wait_for_completion("cold2frozen"), ], - 'create_index': [ + "create_index": [ option_defaults.name(action), option_defaults.ignore_existing(), option_defaults.extra_settings(), ], + 'setup': [ + option_defaults.year(), + option_defaults.month(), + option_defaults.repo_name_prefix(), + option_defaults.bucket_name_prefix(), + option_defaults.base_path_prefix(), + option_defaults.canned_acl(), + option_defaults.storage_class(), + option_defaults.provider(), + option_defaults.rotate_by(), + option_defaults.style(), + option_defaults.ilm_policy_name(), + option_defaults.index_template_name(), + option_defaults.porcelain(), + ], + 'rotate': [ + option_defaults.keep(), + option_defaults.year(), + option_defaults.month(), + ], + 'cleanup': [ + option_defaults.refrozen_retention_days(), + ], + 'status': [ + option_defaults.limit(), + option_defaults.show_repos(), + option_defaults.show_thawed(), + option_defaults.show_buckets(), + option_defaults.show_ilm(), + option_defaults.show_config(), + option_defaults.porcelain(), + ], + 'thaw': [ + option_defaults.start_date(), + option_defaults.end_date(), + option_defaults.sync(), + option_defaults.duration(), + option_defaults.retrieval_tier(), + option_defaults.check_status(), + option_defaults.list_requests(), + option_defaults.include_completed(), + option_defaults.porcelain(), + ], + 'refreeze': [ + option_defaults.thaw_request_id(), + option_defaults.porcelain(), + ], + 'repair_metadata': [ + option_defaults.porcelain(), + ], 'delete_indices': [ option_defaults.search_pattern(), ], - 'delete_snapshots': [ + "delete_snapshots": [ option_defaults.repository(), option_defaults.retry_interval(), option_defaults.retry_count(), ], - 'forcemerge': [ + "forcemerge": [ option_defaults.search_pattern(), option_defaults.delay(), option_defaults.max_num_segments(), ], - 'index_settings': [ + "index_settings": [ option_defaults.search_pattern(), option_defaults.index_settings(), option_defaults.ignore_unavailable(), option_defaults.preserve_existing(), ], - 'open': [ + "open": [ option_defaults.search_pattern(), ], - 'reindex': [ + "reindex": [ option_defaults.request_body(), option_defaults.refresh(), option_defaults.requests_per_second(), @@ -95,21 +146,21 @@ def action_specific(action): option_defaults.migration_prefix(), option_defaults.migration_suffix(), ], - 'replicas': [ + "replicas": [ option_defaults.search_pattern(), option_defaults.count(), option_defaults.wait_for_completion(action), option_defaults.wait_interval(action), option_defaults.max_wait(action), ], - 'rollover': [ + "rollover": [ option_defaults.name(action), option_defaults.new_index(), option_defaults.conditions(), option_defaults.extra_settings(), option_defaults.wait_for_active_shards(action), ], - 'restore': [ + "restore": [ option_defaults.repository(), option_defaults.name(action), option_defaults.indices(), @@ -125,7 +176,7 @@ def action_specific(action): option_defaults.max_wait(action), option_defaults.skip_repo_fs_check(), ], - 'snapshot': [ + "snapshot": [ option_defaults.search_pattern(), option_defaults.repository(), option_defaults.name(action), @@ -137,7 +188,7 @@ def action_specific(action): option_defaults.max_wait(action), option_defaults.skip_repo_fs_check(), ], - 'shrink': [ + "shrink": [ option_defaults.search_pattern(), option_defaults.shrink_node(), option_defaults.node_filters(), diff --git a/docker_test/scripts/add_s3_credentials.sh b/docker_test/scripts/add_s3_credentials.sh new file mode 100755 index 00000000..78bcc92d --- /dev/null +++ b/docker_test/scripts/add_s3_credentials.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# Prompt for S3 credentials (silent input for security) +read -sp "Enter S3 Access Key: " ACCESS_KEY +echo +read -sp "Enter S3 Secret Key: " SECRET_KEY +echo +read -p "Enter Elasticsearch version: " VERSION +echo + +# Get a list of running Elasticsearch container IDs +CONTAINERS=$(docker ps --filter "ancestor=curator_estest:${VERSION}" --format "{{.ID}}") + +if [ -z "$CONTAINERS" ]; then + echo "No running Elasticsearch containers found." + exit 1 +fi + +# Loop through each container and set the credentials +for CONTAINER in $CONTAINERS; do + echo "Setting credentials in container $CONTAINER..." + echo "$ACCESS_KEY" | docker exec -i "$CONTAINER" bin/elasticsearch-keystore add s3.client.default.access_key --stdin + echo "$SECRET_KEY" | docker exec -i "$CONTAINER" bin/elasticsearch-keystore add s3.client.default.secret_key --stdin + docker restart "$CONTAINER" + echo "Restarted container $CONTAINER." +done + +echo "S3 credentials have been set in all Elasticsearch containers." + +echo "Adding enterprise license" +if [[ -f license.json ]]; then + curl -X PUT "http://localhost:9200/_license" \ + -H "Content-Type: application/json" \ + -d @license-release-stack-enterprise.json +else + curl -X POST "http://localhost:9200/_license/start_trial?acknowledge=true" +fi diff --git a/fix_repo_dates.py b/fix_repo_dates.py new file mode 100644 index 00000000..f6df991d --- /dev/null +++ b/fix_repo_dates.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +"""Fix incorrect date ranges for specific repositories""" + +import urllib3 +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + +from elasticsearch8 import Elasticsearch + +# Connect to Elasticsearch (adjust if needed) +client = Elasticsearch( + ["https://192.168.10.81:9200"], + verify_certs=False +) + +STATUS_INDEX = "deepfreeze-status" + +# Repositories to fix (set start=None, end=None to clear bad dates) +repos_to_fix = { + "deepfreeze-000093": {"start": None, "end": None}, +} + +for repo_name, new_dates in repos_to_fix.items(): + print(f"\nFixing {repo_name}...") + + # Find the repo document + query = {"query": {"term": {"name.keyword": repo_name}}} + try: + response = client.search(index=STATUS_INDEX, body=query) + + if response["hits"]["total"]["value"] == 0: + print(f" Repository {repo_name} not found in status index") + continue + + doc_id = response["hits"]["hits"][0]["_id"] + current_doc = response["hits"]["hits"][0]["_source"] + + print(f" Current dates: {current_doc.get('start')} to {current_doc.get('end')}") + + # Update with new dates + update_body = {"doc": new_dates} + client.update(index=STATUS_INDEX, id=doc_id, body=update_body) + + print(f" Updated to: {new_dates['start']} to {new_dates['end']}") + + except Exception as e: + print(f" Error: {e}") + +print("\nDone!") diff --git a/pyproject.toml b/pyproject.toml index c2858162..2ae82e8a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,10 @@ keywords = [ 'index-expiry' ] dependencies = [ - "es_client==8.19.5" + "boto3", + "deepfreeze-core @ git+https://github.com/elastic/deepfreeze.git@v1.0.0#subdirectory=packages/deepfreeze-core", + "es_client==8.19.5", + "rich" ] [project.optional-dependencies] @@ -37,6 +40,8 @@ test = [ "requests", "pytest >=7.2.1", "pytest-cov", + "pytest-xdist", + "filelock", ] doc = ["sphinx", "sphinx_rtd_theme"] diff --git a/pytest.ini b/pytest.ini index 04934b6f..23633d22 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,2 +1,6 @@ [pytest] log_format = %(asctime)s %(levelname)-9s %(name)22s %(funcName)22s:%(lineno)-4d %(message)s +markers = + integration: integration tests that require external services + deepfreeze: deepfreeze-specific integration tests + slow: slow-running tests that may take hours to complete diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 00000000..5b14ed6c --- /dev/null +++ b/ruff.toml @@ -0,0 +1,77 @@ +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "site-packages", + "venv", +] + +# Same as Black. +line-length = 88 +indent-width = 4 + +# Assume Python 3.9 +target-version = "py39" + +[lint] +# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. +# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or +# McCabe complexity (`C901`) by default. +select = ["E4", "E7", "E9", "F"] +ignore = [] + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[format] +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" + +# Enable auto-formatting of code examples in docstrings. Markdown, +# reStructuredText code/literal blocks and doctests are all supported. +# +# This is currently disabled by default, but it is planned for this +# to be opt-out in the future. +docstring-code-format = false + +# Set the line length limit used when formatting code snippets in +# docstrings. +# +# This only has an effect when the `docstring-code-format` setting is +# enabled. +docstring-code-line-length = "dynamic" \ No newline at end of file diff --git a/run_singleton.py b/run_singleton.py index d8e99de3..06397471 100755 --- a/run_singleton.py +++ b/run_singleton.py @@ -17,8 +17,13 @@ Be sure to substitute your unicode variant for en_US.utf8 """ +import warnings import sys import click + +# Suppress urllib3 InsecureRequestWarning when verify_certs is disabled +warnings.filterwarnings('ignore', message='Unverified HTTPS request') + from curator.singletons import curator_cli if __name__ == '__main__': diff --git a/seed_data_to_ds.py b/seed_data_to_ds.py new file mode 100755 index 00000000..37a75e23 --- /dev/null +++ b/seed_data_to_ds.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 + +import time +from datetime import datetime + +from elasticsearch import Elasticsearch, NotFoundError + +# Configuration +ES_HOST = "https://es-test.bwortman.us" # Change if needed +DATASTREAM_NAME = "test_datastream" +ES_USERNAME = "bret" +ES_PASSWORD = "2xqT2IO1OQ%tfMHP" + +# Initialize Elasticsearch client with authentication +es = Elasticsearch(ES_HOST, basic_auth=(ES_USERNAME, ES_PASSWORD)) + + +def create_index_template(es, alias_name): + """Creates an index template with a rollover alias.""" + template_body = { + "index_patterns": [f"{alias_name}-*"], + "settings": {"number_of_shards": 1, "number_of_replicas": 1}, + "aliases": {alias_name: {"is_write_index": True}}, + } + es.indices.put_template(name=alias_name, body=template_body) + + +def create_initial_index(es, alias_name): + """Creates the initial index for rollover if it doesn't exist.""" + first_index = f"{alias_name}-000001" + try: + if not es.indices.exists(index=first_index): + es.indices.create( + index=first_index, + body={"aliases": {alias_name: {"is_write_index": True}}}, + ) + except NotFoundError: + print(f"Index {first_index} not found, creating a new one.") + es.indices.create( + index=first_index, body={"aliases": {alias_name: {"is_write_index": True}}} + ) + + +# Ensure the index template and initial index exist +create_index_template(es, DATASTREAM_NAME) +create_initial_index(es, DATASTREAM_NAME) + +while True: + document = { + "timestamp": datetime.utcnow().isoformat(), + "message": "Hello, Elasticsearch!", + } + + es.index(index=DATASTREAM_NAME, document=document) + # print(f"Indexed document: {document}") + + # Perform rollover if conditions are met + try: + es.indices.rollover( + alias=DATASTREAM_NAME, body={"conditions": {"max_docs": 1000}} + ) + except NotFoundError: + print("Rollover failed: Alias not found. Ensure the initial index is created.") + + time.sleep(1) diff --git a/tests/integration/DEEPFREEZE_HOW_IT_WORKS.md b/tests/integration/DEEPFREEZE_HOW_IT_WORKS.md new file mode 100644 index 00000000..737f6970 --- /dev/null +++ b/tests/integration/DEEPFREEZE_HOW_IT_WORKS.md @@ -0,0 +1,552 @@ +# How Deepfreeze Works: A Complete Guide + +## Overview + +Deepfreeze is a system for archiving Elasticsearch data to AWS S3 Glacier using Elasticsearch's native **searchable snapshots** feature integrated with **Index Lifecycle Management (ILM)**. + +## Core Concept + +**Deepfreeze does NOT manage snapshots directly.** Instead, it manages: +1. **Elasticsearch snapshot repositories** (S3-backed) +2. **ILM policies** that control when indices become searchable snapshots +3. **Repository rotation** to move old snapshots to Glacier Deep Archive + +The actual snapshot creation and mounting is handled by **Elasticsearch ILM**. + +--- + +## The Complete Workflow + +### Phase 1: Initial Setup (`deepfreeze setup`) + +**What happens:** +1. Creates an S3 bucket (e.g., `my-bucket`) +2. Creates an Elasticsearch snapshot repository pointing to that bucket (e.g., `deepfreeze-000001`) +3. Saves configuration to a status index (`.deepfreeze-status-idx`) + +**Result:** +- You now have a repository that ILM policies can reference for searchable snapshots +- NO snapshots exist yet +- NO indices are frozen yet + +**Key Point:** Setup is a one-time operation. It creates the **first repository**. + +--- + +### Phase 2: ILM Manages Data (`elasticsearch` handles this) + +**User creates ILM policies** that reference the deepfreeze repository: + +```json +{ + "policy": { + "phases": { + "frozen": { + "min_age": "30m", + "actions": { + "searchable_snapshot": { + "snapshot_repository": "backups", + "force_merge_index": true + } + } + }, + "delete": { + "min_age": "60m", + "actions": { + "delete": { + "delete_searchable_snapshot": false + } + } + }, + "cold": { + "min_age": "7m", + "actions": { + "allocate": { + "number_of_replicas": 0, + "include": {}, + "exclude": {}, + "require": {} + }, + "searchable_snapshot": { + "snapshot_repository": "backups", + "force_merge_index": true + }, + "set_priority": { + "priority": 0 + } + } + }, + "hot": { + "min_age": "0ms", + "actions": { + "forcemerge": { + "max_num_segments": 1 + }, + "rollover": { + "max_age": "3m", + "max_primary_shard_size": "40gb" + }, + "set_priority": { + "priority": 100 + }, + "shrink": { + "number_of_shards": 1, + "allow_write_after_shrink": false + } + } + } + } + } +} +``` + +**What Elasticsearch does automatically:** +1. **Hot phase**: Index is writable, stored on local disk with fast SSD access +2. **Rollover**: When index hits max_age/max_size, new index is created +3. **Cold phase**: Index transitions to cold tier (still on disk, but can be on slower/cheaper storage) + - Index remains fully searchable + - Data is on disk but may be moved to less expensive nodes + - The index name changes: `my-index-000001` → `restored-my-index-000001` +4. **Frozen phase**: Elasticsearch: + - Creates a snapshot in `deepfreeze-000001` repository + - Deletes the local index + - Mounts the snapshot as a **searchable snapshot** (read-only, backed by S3) + - The index name changes: `restored-my-index-000001` → `partial-restored-my-index-000001` +5. **Delete phase**: Elasticsearch: + - Deletes the mounted searchable snapshot index + - KEEPS the snapshot in S3 (because `delete_searchable_snapshot: false`) + +**Key Point:** Deepfreeze does NOT trigger snapshots. ILM does this automatically based on index age. + +--- + +### Phase 3: Repository Rotation (`deepfreeze rotate`) + +**Rotation happens periodically** (e.g., monthly, or on-demand) to: +1. Create a **new repository** (e.g., `deepfreeze-000002`) +2. Create a new, versioned ILM policy which uses the **new repository** for future snapshots +3. Unmount old repositories and push them to Glacier Deep Archive +4. Clean up old ILM policy versions + +**Step-by-step what happens:** + +#### 3.1: Create New Repository +```python +# Creates: deepfreeze-000002 +# With either: +# - New S3 bucket: my-bucket-000002 (if rotate_by=bucket) +# - New S3 path: my-bucket/snapshots-000002 (if rotate_by=path) +``` + +#### 3.2: Version ILM Policies + +**CRITICAL**: Deepfreeze does NOT modify existing policies. It creates **versioned copies**: + +``` +Old policy: my-ilm-policy-000001 → references deepfreeze-000001 +New policy: my-ilm-policy-000002 → references deepfreeze-000002 +``` + +This ensures: +- Old indices keep their old policies and can still access old snapshots +- New indices use new policies with the new repository +- No disruption to existing data +- Index template updates to point to latest versioned ILM policy + +#### 3.3: Update Index Templates + +All index templates are updated to use the new versioned policies: + +```yaml +# Before rotation: +template: logs-* + settings: + index.lifecycle.name: my-ilm-policy-000001 + +# After rotation: +template: logs-* + settings: + index.lifecycle.name: my-ilm-policy-000002 +``` + +**Result**: New indices created from this template will use the new policy. + +#### 3.4: Update Repository Date Ranges + +For each **mounted** repository, deepfreeze scans the searchable snapshot indices to determine: +- `earliest`: Timestamp of oldest document across all mounted indices +- `latest`: Timestamp of newest document across all mounted indices + +These are stored in the status index for tracking. + +#### 3.5: Unmount Old Repositories + +Based on the `keep` parameter (default: 6), deepfreeze: +1. Sorts repositories by version (newest first) +2. Keeps the first N repositories mounted +3. Unmounts older repositories: + - Deletes all searchable snapshot indices from that repo (e.g., `partial-my-index-*`) + - Deletes the Elasticsearch repository definition + - Marks the repository as "unmounted" in the status index + - The underlying S3 bucket/path still contains the snapshots + +#### 3.6: Push to Glacier Deep Archive + +For each unmounted repository: +```python +# Changes S3 storage class from Intelligent-Tiering to Glacier Deep Archive +push_to_glacier(s3_client, repository) +``` + +This reduces storage costs dramatically (S3 → Glacier Deep Archive = ~95% cost reduction). + +#### 3.7: Cleanup Old ILM Policies + +For each unmounted repository, deepfreeze: +1. Finds all ILM policies with the same version suffix (e.g., `-000001`) +2. Checks if they're still in use by any: + - Indices + - Data streams + - Index templates +3. Deletes policies that are no longer in use + +**Example**: +- Repository `deepfreeze-000001` is unmounted +- Policy `my-ilm-policy-000001` exists +- No indices use this policy +- No templates reference this policy +- → Policy is deleted + +--- + +## Storage Lifecycle Summary + +``` +1. Hot Index (local disk - hot tier): + - Writable + - Fast queries (SSD) + - Stored on ES hot tier data nodes + - Cost: High (fast SSD storage) + +2. Cold Index (local disk - cold tier): + - Read-only + - Good query performance + - Stored on ES cold tier data nodes (cheaper disks) + - Cost: Medium (standard disk storage) + +3. Frozen Index (searchable snapshot, S3): + - Read-only + - Slower queries (S3 latency) + - Stored in S3 (Intelligent-Tiering) + - Repository is "mounted" + - Cost: Low (S3) + +4. Archived Snapshot (Glacier Deep Archive): + - Not queryable + - Repository is "unmounted" + - Stored in Glacier Deep Archive + - Cost: Very low (~$1/TB/month) + - Retrieval time: 12-48 hours (if needed) +``` + +--- + +## Key Data Structures + +### 1. Status Index (`.deepfreeze-status-idx`) + +Stores two types of documents: + +**Settings Document** (`_id: deepfreeze-settings`): +```json +{ + "repo_name_prefix": "deepfreeze", + "bucket_name_prefix": "my-bucket", + "base_path_prefix": "snapshots", + "storage_class": "intelligent_tiering", + "rotate_by": "path", + "last_suffix": "000003", + "provider": "aws", + "style": "oneup" +} +``` + +**Repository Documents** (`_id: {repo_name}`): +```json +{ + "name": "deepfreeze-000002", + "bucket": "my-bucket", + "base_path": "/snapshots-000002", + "earliest": 1704067200000, // Unix timestamp + "latest": 1735689600000, // Unix timestamp + "is_thawed": false, + "is_mounted": true, + "indices": [ + "partial-logs-2024.01.01-000001", + "partial-logs-2024.01.02-000001" + ] +} +``` + +### 2. Repository Naming + +**Format**: `{prefix}-{suffix}` + +**Two styles:** +- **oneup** (default): `deepfreeze-000001`, `deepfreeze-000002`, etc. +- **date**: `deepfreeze-2024.01`, `deepfreeze-2024.02`, etc. + +### 3. ILM Policy Versioning + +**Pattern**: `{base_name}-{suffix}` + +Example progression: +``` +Setup: my-policy (created by user) +Rotate 1: my-policy-000001 (created by deepfreeze) +Rotate 2: my-policy-000002 (created by deepfreeze) +Rotate 3: my-policy-000003 (created by deepfreeze) +``` + +The original `my-policy` can be deleted after first rotation. + +--- + +## Critical Configuration Points + +### 1. ILM Delete Action + +**MUST set** `delete_searchable_snapshot: false`: + +```json +{ + "delete": { + "actions": { + "delete": { + "delete_searchable_snapshot": false // ← CRITICAL! + } + } + } +} +``` + +Without this, Elasticsearch will delete snapshots when indices are deleted, defeating the entire purpose of deepfreeze. + +### 2. Rotation Frequency + +Rotation should happen **BEFORE** repositories get too large: + +**Recommended**: Rotate every 30-90 days depending on: +- Snapshot size +- Number of searchable snapshot indices +- S3 transfer costs for Glacier transitions +- Only push to Glacier after the value of the data has decreased to the point that it's unlikely to be queried any longer. + +**Why**: Once a repository is pushed to Glacier, you cannot query those snapshots without restoring them first (12-48 hour delay). + +### 3. Keep Parameter + +**Default**: `keep=6` + +Keeps the 6 most recent repositories mounted (queryable). Older repositories are unmounted and pushed to Glacier. + +**Tuning**: +- **Higher keep**: More data queryable, higher S3 costs +- **Lower keep**: Less data queryable, lower costs, more in Glacier + +--- + +## Testing Workflow + +### Manual Testing Steps: + +1. **Setup** (once): + ```bash + curator_cli deepfreeze setup \ + --bucket-name my-test-bucket \ + --repo-name deepfreeze + ``` + +2. **Create ILM Policy** (once): + ```bash + curl -X PUT "localhost:9200/_ilm/policy/logs-policy" \ + -H 'Content-Type: application/json' \ + -d '{ + "policy": { + "phases": { + "frozen": { + "min_age": "30m", + "actions": { + "searchable_snapshot": { + "snapshot_repository": "backups", + "force_merge_index": true + } + } + }, + "delete": { + "min_age": "60m", + "actions": { + "delete": { + "delete_searchable_snapshot": false + } + } + }, + "cold": { + "min_age": "7m", + "actions": { + "allocate": { + "number_of_replicas": 0, + "include": {}, + "exclude": {}, + "require": {} + }, + "searchable_snapshot": { + "snapshot_repository": "backups", + "force_merge_index": true + }, + "set_priority": { + "priority": 0 + } + } + }, + "hot": { + "min_age": "0ms", + "actions": { + "forcemerge": { + "max_num_segments": 1 + }, + "rollover": { + "max_age": "3m", + "max_primary_shard_size": "40gb" + }, + "set_priority": { + "priority": 100 + }, + "shrink": { + "number_of_shards": 1, + "allow_write_after_shrink": false + } + } + } + } + } +}' + ``` + +3. **Create Index Template** (once): + ```bash + curl -X PUT "localhost:9200/_index_template/logs-template" \ + -H 'Content-Type: application/json' \ + -d '{ + "index_patterns": ["logs-*"], + "template": { + "settings": { + "index.lifecycle.name": "logs-policy", + "index.lifecycle.rollover_alias": "logs" + } + } + }' + ``` + +4. **Create Initial Index** (once): + ```bash + curl -X PUT "localhost:9200/logs-2024.01.01-000001" \ + -H 'Content-Type: application/json' \ + -d '{ + "aliases": { + "logs": {"is_write_index": true} + } + }' + ``` + +5. **Index Data** (ongoing): + ```bash + curl -X POST "localhost:9200/logs/_doc" \ + -H 'Content-Type: application/json' \ + -d '{"message": "test log", "timestamp": "2024-01-01T00:00:00Z"}' + ``` + +6. **Wait for ILM** (automatic): + - After 1 day: Index rolls over + - After 7 days from creation: Index moves to cold phase + - After 30 days from creation: Index becomes frozen (searchable snapshot) + - After 365 days from creation: Index is deleted (snapshot remains) + +7. **Rotate** (periodic): + ```bash + curator_cli deepfreeze rotate --keep 6 + ``` + +--- + +## Common Misconceptions + +### ❌ "Deepfreeze creates snapshots" +**NO.** Elasticsearch ILM creates snapshots when indices reach the frozen phase. + +### ❌ "Rotate command snapshots data" +**NO.** Rotate creates a new repository, updates policies, and unmounts old repos. ILM handles snapshots. + +### ❌ "I need to run rotate after every snapshot" +**NO.** Rotate is periodic (monthly/quarterly). ILM creates snapshots automatically whenever indices age into frozen phase. + +### ❌ "Unmounted repos are deleted" +**NO.** Unmounted repos have their snapshots preserved in S3, just moved to Glacier Deep Archive for cheaper storage. + +### ❌ "Old ILM policies are modified" +**NO.** Old policies are left unchanged. New versioned policies are created. + +--- + +## Integration Test Requirements + +Given the above, integration tests should verify: + +1. **Setup**: + - Creates repository + - Creates status index + - Saves settings + +2. **ILM Integration** (NOT deepfreeze responsibility): + - Indices transition to frozen phase + - Snapshots are created + - Searchable snapshots are mounted + +3. **Rotate**: + - Creates new repository + - Creates versioned ILM policies + - Updates templates + - Updates repository date ranges + - Unmounts old repositories + - Pushes to Glacier + - Cleans up old policies + +4. **Status**: + - Reports current repositories + - Shows mounted vs unmounted + - Shows date ranges + +5. **Cleanup**: + - Removes thawed repositories after expiration + +--- + +## Timing Considerations for Tests + +**Real-world timing:** +- Rollover: 7 days +- Move to Cold: 7 days after creation +- Move to Frozen: 30 days after creation +- Delete: 365 days after creation +- Rotate: Monthly (30 days) + +**Test timing options:** +1. **Mock ILM**: Don't wait for real ILM, manually create searchable snapshots +2. **Fast ILM**: Set phases to seconds (hot=7s, cold=7s, frozen=30s, delete=45s) +3. **Hybrid**: Use fast ILM for lifecycle tests, mocks for rotate tests + +**Recommended for testing:** +- Use environment variable to control interval scaling +- All timing expressed as multiples of a base interval +- Default interval=1s for CI/CD, interval=60s for validation + diff --git a/tests/integration/DEEPFREEZE_THAW_TESTS.md b/tests/integration/DEEPFREEZE_THAW_TESTS.md new file mode 100644 index 00000000..86a42e45 --- /dev/null +++ b/tests/integration/DEEPFREEZE_THAW_TESTS.md @@ -0,0 +1,386 @@ +# Deepfreeze Thaw Integration Tests + +This document describes the integration tests for deepfreeze thaw operations. Note that these +tests don't work as yet, and given the long time each takes to run, I'm not sure they actually +have any value. I'm retaining them in case but honestly do all integration testing manually. + +## Overview + +The thaw integration tests (`test_deepfreeze_thaw.py`) verify the complete lifecycle of thawing repositories from Glacier storage, including: + +1. Creating thaw requests with specific date ranges +2. Monitoring restore progress using porcelain output +3. Verifying indices are mounted correctly after restoration +4. Verifying data can be searched in mounted indices +5. Running cleanup operations +6. Verifying repositories are unmounted after cleanup + +## Test Modes + +These tests support two modes of operation: + +### Fast Mode (Development/CI) + +Fast mode uses mocked operations to complete quickly, suitable for CI/CD pipelines. Again, +the tests aren't reliable yet. + +```bash +DEEPFREEZE_FAST_MODE=1 pytest tests/integration/test_deepfreeze_thaw.py -v +``` + +**Duration**: ~5-10 minutes per test +**Use case**: Local development, CI/CD, quick verification + +**What's mocked:** +- Glacier restore operations (instant completion) +- S3 object restoration progress +- Time-based expiration (accelerated) + +### Full Test Mode (Production Validation) + +Full test mode runs against real AWS Glacier, taking up to 6 hours for complete restoration. + +```bash +DEEPFREEZE_FULL_TEST=1 pytest tests/integration/test_deepfreeze_thaw.py -v +``` + +**Duration**: Up to 6 hours per test (depending on AWS Glacier restore tier) +**Use case**: Pre-release validation, production readiness testing + +**Requirements:** +- Valid AWS credentials configured +- S3 bucket access +- Glacier restore permissions +- Elasticsearch instance with snapshot repository support + +## Test Suite + +### Test Cases + +#### 1. `test_thaw_single_repository` + +Tests thawing a single repository containing data for a specific date range. + +**What it tests:** +- Creating test indices with timestamped data +- Snapshotting indices to a repository +- Pushing repository to Glacier +- Creating a thaw request for a specific date range +- Monitoring restore progress using porcelain output +- Verifying correct indices are mounted +- Verifying data is searchable +- Refreezing the repository + +**Date Range:** January 2024 (single month) +**Expected Result:** 1 repository thawed and mounted + +#### 2. `test_thaw_multiple_repositories` + +Tests thawing multiple repositories spanning a date range. + +**What it tests:** +- Creating multiple repositories via rotation +- Creating test data across multiple time periods +- Pushing all repositories to Glacier +- Creating a thaw request spanning multiple repositories +- Verifying all relevant repositories are restored +- Verifying repositories outside the date range are NOT thawed +- Searching data across multiple thawed repositories + +**Date Range:** January-February 2024 (two months) +**Expected Result:** 2 repositories thawed, 1 repository remains frozen + +#### 3. `test_thaw_with_porcelain_output_parsing` + +Tests the porcelain output format and parsing logic. + +**What it tests:** +- Porcelain output format from thaw commands +- Parsing REQUEST and REPO lines +- Checking restore completion status +- Monitoring repository mount status +- Progress tracking (0/100, Complete, etc.) + +**Output Format:** +``` +REQUEST {request_id} {status} {created_at} {start_date} {end_date} +REPO {name} {bucket} {path} {state} {mounted} {progress} +``` + +#### 4. `test_cleanup_removes_expired_repositories` + +Tests automatic cleanup of expired thaw requests. + +**What it tests:** +- Creating a thaw request with short duration +- Manually expiring the request +- Running cleanup operation +- Verifying repositories are unmounted +- Verifying thaw state is reset to frozen +- Verifying thaw request is marked as completed + +**Duration:** 1 day (manually expired for testing) + +## Running the Tests + +### Prerequisites + +1. **Curator Configuration File** + + The tests use the configuration from `~/.curator/curator.yml` by default. + + Create the configuration file if it doesn't exist: + ```bash + mkdir -p ~/.curator + cat > ~/.curator/curator.yml < ~/.curator/curator.yml <1 hour) - useful for CI +export DEEPFREEZE_SKIP_LONG_TESTS=1 + +# Fast mode: Use simulated operations for development +export DEEPFREEZE_FAST_MODE=1 + +# Elasticsearch server (for unit tests) +export TEST_ES_SERVER=http://localhost:9200 +``` + +### curator.yml Example + +```yaml +elasticsearch: + client: + hosts: http://localhost:9200 + request_timeout: 300 + other_settings: + skip_version_test: True + +logging: + loglevel: INFO + logfile: /var/log/curator/curator.log + logformat: default +``` + +## Running Tests + +### Quick Development Testing (Fast Mode) + +Use fast mode for rapid development iteration with simulated Glacier operations: + +```bash +# Run all tests in fast mode +DEEPFREEZE_FAST_MODE=1 pytest tests/integration/test_deepfreeze_integration.py -v + +# Run specific test +DEEPFREEZE_FAST_MODE=1 pytest tests/integration/test_deepfreeze_integration.py::TestDeepfreezeIntegration::test_operations_on_already_thawed_data -v -s +``` + +### Standard Integration Testing + +Run short tests that complete in under 1 hour: + +```bash +# Skip long-running tests +DEEPFREEZE_SKIP_LONG_TESTS=1 pytest tests/integration/test_deepfreeze_integration.py -v +``` + +### Full Integration Testing + +**WARNING: These tests take 6-30+ hours and use real AWS Glacier** + +```bash +# Run all tests including long-running ones +pytest tests/integration/test_deepfreeze_integration.py -v + +# Run only the 24-hour cleanup test +pytest tests/integration/test_deepfreeze_integration.py::TestDeepfreezeIntegration::test_one_day_duration_with_cleanup -v -s + +# Run only 6-hour thaw tests +pytest tests/integration/test_deepfreeze_integration.py -v -m "not slow" + +# Run only the 30-hour test +pytest tests/integration/test_deepfreeze_integration.py -v -m slow +``` + +### Individual Test Execution + +```bash +# Operations on already-thawed data (~15-30 min) +pytest tests/integration/test_deepfreeze_integration.py::TestDeepfreezeIntegration::test_operations_on_already_thawed_data -v -s + +# Full lifecycle test (~6 hours) +pytest tests/integration/test_deepfreeze_integration.py::TestDeepfreezeIntegration::test_new_thaw_request_full_lifecycle -v -s + +# Thaw + refreeze test (~6.5 hours) +pytest tests/integration/test_deepfreeze_integration.py::TestDeepfreezeIntegration::test_thaw_complete_then_refreeze -v -s + +# Multiple concurrent requests (~6.5 hours) +pytest tests/integration/test_deepfreeze_integration.py::TestDeepfreezeIntegration::test_multiple_concurrent_thaw_requests -v -s + +# Cleanup with mixed states (~30 min) +pytest tests/integration/test_deepfreeze_integration.py::TestDeepfreezeIntegration::test_cleanup_mixed_expiration_states -v -s + +# 24-hour cleanup test (~30 hours) - REQUIRES REAL TIME +pytest tests/integration/test_deepfreeze_integration.py::TestDeepfreezeIntegration::test_one_day_duration_with_cleanup -v -s +``` + +## Test Execution Tips + +### 1. Monitor Long-Running Tests + +Long tests log progress regularly. Use `-s` flag to see output in real-time: + +```bash +pytest tests/integration/test_deepfreeze_integration.py::TestDeepfreezeIntegration::test_thaw_complete_then_refreeze -v -s +``` + +Example output: +``` +================================================================================ +TEST: Thaw Complete Then Refreeze +================================================================================ +Setting up test environment... +Environment ready: bucket=deepfreeze-integration-abc123xyz, repo=df-test-repo-000001 +Creating index test-logs-20240101-000 with 100 docs from 2024-01-01 to 2024-01-31 +... +--- Waiting for thaw to complete (up to 6.0 hours) --- +Check #1 at 0.0 minutes elapsed +Progress: 0/1 repositories mounted +Repo df-test-repo-000001: 125/543 objects restored +Sleeping for 15 minutes... +Check #2 at 15.0 minutes elapsed +... +``` + +### 2. Background Execution + +For very long tests, run in background with output redirection: + +```bash +# Run test in background +nohup pytest tests/integration/test_deepfreeze_integration.py::TestDeepfreezeIntegration::test_one_day_duration_with_cleanup -v -s > deepfreeze_test.log 2>&1 & + +# Monitor progress +tail -f deepfreeze_test.log +``` + +### 3. CI/CD Integration + +Example GitHub Actions configuration: + +```yaml +name: Deepfreeze Integration Tests + +on: + # Run on PR for fast tests only + pull_request: + paths: + - 'curator/actions/deepfreeze/**' + + # Full test suite on schedule (weekly) + schedule: + - cron: '0 2 * * 0' # Sunday 2am + +jobs: + fast-tests: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install dependencies + run: pip install -e .[test] + + - name: Run fast tests + env: + DEEPFREEZE_FAST_MODE: 1 + run: pytest tests/integration/test_deepfreeze_integration.py -v + + long-tests: + runs-on: ubuntu-latest + if: github.event_name == 'schedule' + timeout-minutes: 450 # 7.5 hours + steps: + - uses: actions/checkout@v3 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v2 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: us-east-1 + + - name: Set up Elasticsearch + uses: elastic/elastic-github-actions/elasticsearch@master + with: + stack-version: 8.11.0 + + - name: Run 6-hour tests + run: pytest tests/integration/test_deepfreeze_integration.py -v -m "not slow" +``` + +## Test Coverage + +### Data Validation Tests + +Each test includes comprehensive validation: + +- **Index Searchability**: Verify indices are searchable and return expected results +- **Document Count Verification**: Ensure document counts match before/after operations +- **Timestamp Range Validation**: Verify @timestamp values fall within expected ranges +- **Repository State Verification**: Check mounted status and thaw_state +- **Request Status Tracking**: Monitor thaw request lifecycle states + +### Error Scenarios + +Tests handle and verify various error conditions: + +- Timeout scenarios for long-running operations +- Repository conflicts with concurrent operations +- Cleanup behavior with expired vs active data +- Refreeze behavior with already-frozen repositories +- Status checks on non-existent or completed requests + +## Troubleshooting + +### Common Issues + +#### 1. AWS Credentials Not Found + +**Error**: `NoCredentialsError: Unable to locate credentials` + +**Solution**: Configure AWS credentials: +```bash +# Option 1: Configure AWS CLI +aws configure + +# Option 2: Set environment variables +export AWS_ACCESS_KEY_ID=your_key_id +export AWS_SECRET_ACCESS_KEY=your_secret_key +export AWS_DEFAULT_REGION=us-east-1 +``` + +#### 2. Elasticsearch Connection Failed + +**Error**: `ConnectionError: Unable to connect to Elasticsearch` + +**Solution**: Verify Elasticsearch is running and accessible: +```bash +# Check Elasticsearch status +curl http://localhost:9200 + +# Verify curator.yml configuration +cat ~/.curator/curator.yml + +# Test with custom config +CURATOR_CONFIG=/path/to/custom.yml pytest tests/integration/test_deepfreeze_integration.py -v +``` + +#### 3. S3 Bucket Permissions + +**Error**: `AccessDenied: Access Denied` + +**Solution**: Ensure AWS user has required S3 permissions: +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:CreateBucket", + "s3:DeleteBucket", + "s3:ListBucket", + "s3:PutObject", + "s3:GetObject", + "s3:DeleteObject", + "s3:RestoreObject", + "s3:GetObjectAttributes" + ], + "Resource": [ + "arn:aws:s3:::deepfreeze-*", + "arn:aws:s3:::deepfreeze-*/*" + ] + } + ] +} +``` + +#### 4. Test Timeout + +**Error**: `AssertionError: Thaw did not complete within 6 hours` + +**Cause**: AWS Glacier restore taking longer than expected + +**Solution**: +- Check AWS Glacier retrieval tier (Standard = 3-5 hours, Expedited = 1-5 minutes) +- Verify S3 bucket is in expected region +- Check AWS Service Health Dashboard +- Consider using FAST_MODE for development + +#### 5. Repository Already Exists + +**Error**: `RepositoryException: [df-test-repo-000001] repository already exists` + +**Solution**: Clean up from previous failed test: +```bash +# Delete test repositories +curl -X DELETE "localhost:9200/_snapshot/df-test-repo-*" + +# Delete status index +curl -X DELETE "localhost:9200/.deepfreeze-status" + +# Clean up S3 buckets +aws s3 rb s3://deepfreeze-test-* --force +``` + +### Debug Mode + +Enable detailed logging for troubleshooting: + +```bash +# Run with debug logging +pytest tests/integration/test_deepfreeze_integration.py::TestDeepfreezeIntegration::test_thaw_complete_then_refreeze -v -s --log-cli-level=DEBUG + +# Capture all logs to file +pytest tests/integration/test_deepfreeze_integration.py -v -s --log-file=test_debug.log --log-file-level=DEBUG +``` + +## AWS Cost Considerations + +### Estimated Costs (as of 2024) + +Running the full test suite incurs AWS charges: + +| Resource | Usage | Estimated Cost | +|----------|-------|----------------| +| S3 Storage | ~10 GB for 1-2 days | $0.23 | +| Glacier Storage | ~10 GB for 1-2 days | $0.04 | +| Glacier Retrievals (Standard) | 3-4 retrievals × 10 GB | $0.40 | +| S3 Requests | ~1000 PUT/GET requests | $0.01 | +| **Total per full test run** | | **~$0.70** | + +**Cost Reduction Tips**: +1. Use FAST_MODE for development (no AWS costs) +2. Run long tests only when necessary +3. Use smaller test data sets +4. Clean up resources promptly after tests +5. Consider using AWS Free Tier if available + +### Resource Cleanup + +Tests automatically clean up resources in `tearDown()`, but if tests fail: + +```bash +# List S3 buckets +aws s3 ls | grep deepfreeze + +# Delete test buckets +aws s3 rb s3://deepfreeze-integration-abc123 --force + +# List Elasticsearch repositories +curl "localhost:9200/_snapshot/_all" + +# Delete repositories +curl -X DELETE "localhost:9200/_snapshot/df-test-repo-*" +``` + +## Contributing + +When adding new tests: + +1. **Follow naming convention**: `test__` +2. **Add comprehensive logging**: Use `self.logger.info()` for progress +3. **Include docstring**: Describe purpose, steps, duration +4. **Clean up resources**: Implement proper `tearDown()` +5. **Add to this README**: Document runtime and requirements +6. **Test both modes**: Verify works in both FAST_MODE and real mode + +### Test Template + +```python +def test_new_feature(self): + """ + Brief description of what this test validates. + + Steps: + 1. Setup phase + 2. Action phase + 3. Verification phase + + Duration: ~X hours + """ + self.logger.info("\n" + "="*80) + self.logger.info("TEST: New Feature") + self.logger.info("="*80) + + # Setup + bucket_name, repo_name = self._setup_test_environment() + + # Test logic here + # ... + + # Verification + self.logger.info("\n--- Verifying results ---") + # assertions here + + self.logger.info("\n✓ Test completed successfully") +``` + +## Additional Resources + +- [Curator Documentation](https://www.elastic.co/guide/en/elasticsearch/client/curator/current/index.html) +- [AWS Glacier Documentation](https://docs.aws.amazon.com/amazonglacier/latest/dev/introduction.html) +- [Elasticsearch Snapshot and Restore](https://www.elastic.co/guide/en/elasticsearch/reference/current/snapshot-restore.html) + +## Support + +For issues or questions: +1. Check existing test output and logs +2. Review this README thoroughly +3. Check AWS CloudWatch logs +4. File an issue with: test name, logs, environment details diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index 4d438ea0..d2275980 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -1,6 +1,7 @@ """Test setup""" -# pylint: disable=C0115, C0116 +# pylint: disable=missing-function-docstring, missing-class-docstring +import json import logging import os import random @@ -9,17 +10,19 @@ import sys import tempfile import time -import json -import warnings -from datetime import timedelta, datetime, date, timezone -from subprocess import Popen, PIPE +from datetime import date, datetime, timedelta, timezone +from subprocess import PIPE, Popen from unittest import SkipTest, TestCase + +from click import testing as clicktest from elasticsearch8 import Elasticsearch from elasticsearch8.exceptions import ConnectionError as ESConnectionError -from elasticsearch8.exceptions import ElasticsearchWarning, NotFoundError -from click import testing as clicktest -from es_client.helpers.utils import get_version + +from curator.actions.deepfreeze import SETTINGS_ID, STATUS_INDEX, Settings +from curator.actions.deepfreeze.rotate import Rotate +from curator.actions.deepfreeze.setup import Setup from curator.cli import cli +from curator.s3client import s3_client_factory from . import testvars @@ -28,17 +31,20 @@ client = None DATEMAP = { - 'months': '%Y.%m', - 'weeks': '%Y.%W', - 'days': '%Y.%m.%d', - 'hours': '%Y.%m.%d.%H', + "months": "%Y.%m", + "weeks": "%Y.%W", + "days": "%Y.%m.%d", + "hours": "%Y.%m.%d.%H", } -HOST = os.environ.get('TEST_ES_SERVER', 'http://127.0.0.1:9200') +HOST = os.environ.get("TEST_ES_SERVER", "http://127.0.0.1:9200") + +INTERVAL = 5 + def random_directory(): - dirname = ''.join( + dirname = "".join( random.choice(string.ascii_uppercase + string.digits) for _ in range(8) ) directory = tempfile.mkdtemp(suffix=dirname) @@ -47,6 +53,12 @@ def random_directory(): return directory +def random_suffix(): + return "".join( + random.choice(string.ascii_uppercase + string.digits) for _ in range(8) + ).lower() + + def get_client(): # pylint: disable=global-statement, invalid-name global client @@ -60,7 +72,7 @@ def get_client(): time.sleep(0.1) try: # pylint: disable=E1123 - client.cluster.health(wait_for_status='yellow') + client.cluster.health(wait_for_status="yellow") return client except ESConnectionError: continue @@ -80,13 +92,18 @@ def __getattr__(self, att_name): class CuratorTestCase(TestCase): def setUp(self): super(CuratorTestCase, self).setUp() - self.logger = logging.getLogger('CuratorTestCase.setUp') + self.logger = logging.getLogger("CuratorTestCase.setUp") self.client = get_client() + # ? This would be better in a one-time setup, but repeatedly aplying it won't + # ? hurt anything. + self.client.cluster.put_settings( + body={"persistent": {"indices.lifecycle.poll_interval": "1m"}} + ) args = {} - args['HOST'] = HOST - args['time_unit'] = 'days' - args['prefix'] = 'logstash-' + args["HOST"] = HOST + args["time_unit"] = "days" + args["prefix"] = "logstash-" self.args = args # dirname = ''.join(random.choice(string.ascii_uppercase + string.digits) # for _ in range(8)) @@ -97,28 +114,28 @@ def setUp(self): # on the target machine. # self.args['location'] = random_directory() nodesinfo = self.client.nodes.info() - nodename = list(nodesinfo['nodes'].keys())[0] - if 'repo' in nodesinfo['nodes'][nodename]['settings']['path']: + nodename = list(nodesinfo["nodes"].keys())[0] + if "repo" in nodesinfo["nodes"][nodename]["settings"]["path"]: if isinstance( - nodesinfo['nodes'][nodename]['settings']['path']['repo'], list + nodesinfo["nodes"][nodename]["settings"]["path"]["repo"], list ): - self.args['location'] = nodesinfo['nodes'][nodename]['settings'][ - 'path' - ]['repo'][0] + self.args["location"] = nodesinfo["nodes"][nodename]["settings"][ + "path" + ]["repo"][0] else: - self.args['location'] = nodesinfo['nodes'][nodename]['settings'][ - 'path' - ]['repo'] + self.args["location"] = nodesinfo["nodes"][nodename]["settings"][ + "path" + ]["repo"] else: # Use a random directory if repo is not specified, but log it - self.logger.warning('path.repo is not configured!') - self.args['location'] = random_directory() - self.args['configdir'] = random_directory() - self.args['configfile'] = os.path.join(self.args['configdir'], 'curator.yml') - self.args['actionfile'] = os.path.join(self.args['configdir'], 'actions.yml') - self.args['repository'] = 'test_repository' + self.logger.warning("path.repo is not configured!") + self.args["location"] = random_directory() + self.args["configdir"] = random_directory() + self.args["configfile"] = os.path.join(self.args["configdir"], "curator.yml") + self.args["actionfile"] = os.path.join(self.args["configdir"], "actions.yml") + self.args["repository"] = "test_repository" # if not os.path.exists(self.args['location']): # os.makedirs(self.args['location']) - self.logger.debug('setUp completed...') + self.logger.debug("setUp completed...") self.runner = clicktest.CliRunner() self.runner_args = [ '--config', @@ -133,27 +150,19 @@ def get_version(self): return get_version(self.client) def tearDown(self): - self.logger = logging.getLogger('CuratorTestCase.tearDown') - self.logger.debug('tearDown initiated...') + self.logger = logging.getLogger("CuratorTestCase.tearDown") + self.logger.debug("tearDown initiated...") # re-enable shard allocation for next tests enable_allocation = json.loads('{"cluster.routing.allocation.enable":null}') self.client.cluster.put_settings(transient=enable_allocation) self.delete_repositories() # 8.0 removes our ability to purge with wildcards... - # ElasticsearchWarning: this request accesses system indices: [.tasks], - # but in a future major version, direct access to system indices will be - # prevented by default - warnings.filterwarnings("ignore", category=ElasticsearchWarning) indices = list( - self.client.indices.get(index="*", expand_wildcards='open,closed').keys() + self.client.indices.get(index="*", expand_wildcards="open,closed").keys() ) if len(indices) > 0: - # ElasticsearchWarning: this request accesses system indices: [.tasks], - # but in a future major version, direct access to system indices will be - # prevented by default - warnings.filterwarnings("ignore", category=ElasticsearchWarning) - self.client.indices.delete(index=','.join(indices)) - for path_arg in ['location', 'configdir']: + self.client.indices.delete(index=",".join(indices)) + for path_arg in ["location", "configdir"]: if os.path.exists(self.args[path_arg]): shutil.rmtree(self.args[path_arg]) @@ -162,13 +171,13 @@ def parse_args(self): def create_indices(self, count, unit=None, ilm_policy=None): now = datetime.now(timezone.utc) - unit = unit if unit else self.args['time_unit'] + unit = unit if unit else self.args["time_unit"] fmt = DATEMAP[unit] - if not unit == 'months': + if not unit == "months": step = timedelta(**{unit: 1}) for _ in range(count): self.create_index( - self.args['prefix'] + now.strftime(fmt), + self.args["prefix"] + now.strftime(fmt), wait_for_yellow=False, ilm_policy=ilm_policy, ) @@ -177,7 +186,7 @@ def create_indices(self, count, unit=None, ilm_policy=None): now = date.today() d = date(now.year, now.month, 1) self.create_index( - self.args['prefix'] + now.strftime(fmt), + self.args["prefix"] + now.strftime(fmt), wait_for_yellow=False, ilm_policy=ilm_policy, ) @@ -188,16 +197,16 @@ def create_indices(self, count, unit=None, ilm_policy=None): else: d = date(d.year, d.month - 1, 1) self.create_index( - self.args['prefix'] + datetime(d.year, d.month, 1).strftime(fmt), + self.args["prefix"] + datetime(d.year, d.month, 1).strftime(fmt), wait_for_yellow=False, ilm_policy=ilm_policy, ) # pylint: disable=E1123 - self.client.cluster.health(wait_for_status='yellow') + self.client.cluster.health(wait_for_status="yellow") def wfy(self): # pylint: disable=E1123 - self.client.cluster.health(wait_for_status='yellow') + self.client.cluster.health(wait_for_status="yellow") def create_index( self, @@ -207,13 +216,9 @@ def create_index( ilm_policy=None, wait_for_active_shards=1, ): - request_body = {'index': {'number_of_shards': shards, 'number_of_replicas': 0}} + request_body = {"index": {"number_of_shards": shards, "number_of_replicas": 0}} if ilm_policy is not None: - request_body['index']['lifecycle'] = {'name': ilm_policy} - # ElasticsearchWarning: index name [.shouldbehidden] starts with a dot '.', - # in the next major version, index names starting with a dot are reserved - # for hidden indices and system indices - warnings.filterwarnings("ignore", category=ElasticsearchWarning) + request_body["index"]["lifecycle"] = {"name": ilm_policy} self.client.indices.create( index=name, settings=request_body, @@ -224,7 +229,7 @@ def create_index( def add_docs(self, idx): for i in ["1", "2", "3"]: - self.client.create(index=idx, id=i, document={"doc" + i: 'TEST DOCUMENT'}) + self.client.create(index=idx, id=i, document={"doc" + i: "TEST DOCUMENT"}) # This should force each doc to be in its own segment. # pylint: disable=E1123 self.client.indices.flush(index=idx, force=True) @@ -233,7 +238,7 @@ def add_docs(self, idx): def create_snapshot(self, name, csv_indices): self.create_repository() self.client.snapshot.create( - repository=self.args['repository'], + repository=self.args["repository"], snapshot=name, ignore_unavailable=False, include_global_state=True, @@ -243,53 +248,48 @@ def create_snapshot(self, name, csv_indices): ) def delete_snapshot(self, name): - try: - self.client.snapshot.delete( - repository=self.args['repository'], snapshot=name - ) - except NotFoundError: - pass + self.client.snapshot.delete(repository=self.args["repository"], snapshot=name) def create_repository(self): - request_body = {'type': 'fs', 'settings': {'location': self.args['location']}} + request_body = {"type": "fs", "settings": {"location": self.args["location"]}} self.client.snapshot.create_repository( - name=self.args['repository'], body=request_body + name=self.args["repository"], body=request_body ) + def create_named_repository(self, repo_name): + request_body = {"type": "fs", "settings": {"location": self.args["location"]}} + self.client.snapshot.create_repository(name=repo_name, body=request_body) + def delete_repositories(self): - result = [] - try: - result = self.client.snapshot.get_repository(name='*') - except NotFoundError: - pass + result = self.client.snapshot.get_repository(name="*") for repo in result: try: - cleanup = self.client.snapshot.get(repository=repo, snapshot='*') + cleanup = self.client.snapshot.get(repository=repo, snapshot="*") # pylint: disable=broad-except except Exception: - cleanup = {'snapshots': []} - for listitem in cleanup['snapshots']: - self.delete_snapshot(listitem['snapshot']) + cleanup = {"snapshots": []} + for listitem in cleanup["snapshots"]: + self.delete_snapshot(listitem["snapshot"]) self.client.snapshot.delete_repository(name=repo) def close_index(self, name): self.client.indices.close(index=name) def write_config(self, fname, data): - with open(fname, 'w', encoding='utf-8') as fhandle: + with open(fname, "w", encoding="utf-8") as fhandle: fhandle.write(data) def get_runner_args(self): - self.write_config(self.args['configfile'], testvars.client_config.format(HOST)) - runner = os.path.join(os.getcwd(), 'run_singleton.py') + self.write_config(self.args["configfile"], testvars.client_config.format(HOST)) + runner = os.path.join(os.getcwd(), "run_singleton.py") return [sys.executable, runner] - def run_subprocess(self, args, logname='subprocess'): + def run_subprocess(self, args, logname="subprocess"): local_logger = logging.getLogger(logname) p = Popen(args, stderr=PIPE, stdout=PIPE) stdout, stderr = p.communicate() - local_logger.debug('STDOUT = %s', stdout.decode('utf-8')) - local_logger.debug('STDERR = %s', stderr.decode('utf-8')) + local_logger.debug("STDOUT = %s", stdout.decode("utf-8")) + local_logger.debug("STDERR = %s", stderr.decode("utf-8")) return p.returncode def invoke_runner(self, dry_run=False): @@ -312,7 +312,118 @@ def invoke_runner_alt(self, **kwargs): myargs = [] if kwargs: for key, value in kwargs.items(): - myargs.append(f'--{key}') + myargs.append(f"--{key}") myargs.append(value) - myargs.append(self.args['actionfile']) + myargs.append(self.args["actionfile"]) self.result = self.runner.invoke(cli, myargs) + + +class DeepfreezeTestCase(CuratorTestCase): + # TODO: Augment setup, tearDown methods to remove buckets + # TODO: Add helper methods from deepfreeze_helpers so they're part of the test case + + def setUp(self): + self.bucket_name = "" + return super().setUp() + + def tearDown(self): + s3 = s3_client_factory(self.provider) + buckets = s3.list_buckets(testvars.df_bucket_name) + for bucket in buckets: + # if bucket['Name'].startswith(testvars.df_bucket_name): + s3.delete_bucket(bucket_name=bucket) + return super().tearDown() + + def do_setup( + self, do_action=True, rotate_by: str = None, create_ilm_policy: bool = False + ) -> Setup: + s3 = s3_client_factory(self.provider) + + # Clean up any existing settings + try: + self.client.indices.delete(index=STATUS_INDEX) + except Exception: + pass + try: + self.client.snapshot.delete_repository( + name=f"{testvars.df_repo_name}-000001" + ) + except Exception: + pass + try: + self.client.snapshot.delete_repository(name=f"{testvars.df_repo_name}*") + except Exception: + pass + try: + s3 = s3_client_factory(self.provider) + s3.delete_bucket(self.bucket_name) + except Exception: + pass + # Clean up any existing ILM policy + try: + self.client.ilm.delete_lifecycle(name=testvars.df_ilm_policy) + except Exception: + pass + + if rotate_by: + testvars.df_rotate_by = rotate_by + + setup = Setup( + client, + bucket_name_prefix=self.bucket_name, + repo_name_prefix=testvars.df_repo_name, + base_path_prefix=testvars.df_base_path, + storage_class=testvars.df_storage_class, + rotate_by=testvars.df_rotate_by, + style=testvars.df_style, + create_sample_ilm_policy=create_ilm_policy, + ilm_policy_name=testvars.df_ilm_policy, + ) + if do_action: + setup.do_action() + time.sleep(INTERVAL) + return setup + + def do_rotate( + self, iterations: int = 1, keep: int = None, populate_index=False + ) -> Rotate: + rotate = None + for _ in range(iterations): + if keep: + rotate = Rotate( + client=self.client, + keep=keep, + ) + else: + rotate = Rotate( + client=self.client, + ) + rotate.do_action() + if populate_index: + # Alter this so it creates an index which the ILM policy will rotate + idx = f"{testvars.df_test_index}-{random_suffix()}" + self._populate_index(index=idx) + self.client.indices.put_settings( + index=idx, + body={"index": {"lifecycle": {"name": testvars.df_ilm_policy}}}, + ) + time.sleep(INTERVAL) + return rotate + + def _populate_index(self, index: str, doc_count: int = 1000) -> None: + # Sleep for a seocond every 100 docs to spread out the timestamps a bit + for i in range(doc_count): + if i % 100 == 0 and i != 0: + time.sleep(1) + for _ in range(doc_count): + self.client.index(index=index, body={"foo": "bar"}) + + def delete_ilm_policy(self, name): + try: + self.client.ilm.delete_lifecycle(name=name) + finally: + pass + + def get_settings(self): + doc = self.client.get(index=STATUS_INDEX, id=SETTINGS_ID) + return Settings(**doc["_source"]) diff --git a/tests/integration/run_thaw_tests.sh b/tests/integration/run_thaw_tests.sh new file mode 100644 index 00000000..953dde3e --- /dev/null +++ b/tests/integration/run_thaw_tests.sh @@ -0,0 +1,119 @@ +#!/bin/bash +# Script to run deepfreeze thaw integration tests +# Usage: ./run_thaw_tests.sh [fast|full] [test_name] + +set -e + +# Colors for output +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +# Get the script directory +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +PROJECT_ROOT="$SCRIPT_DIR/../.." + +# Default values +MODE="${1:-fast}" +TEST_NAME="${2:-}" + +# Print usage +usage() { + echo "Usage: $0 [fast|full] [test_name]" + echo "" + echo "Modes:" + echo " fast - Run tests with mocked operations (5-10 minutes)" + echo " full - Run tests against real AWS Glacier (up to 6 hours)" + echo "" + echo "Examples:" + echo " $0 fast # Run all tests in fast mode" + echo " $0 fast test_thaw_single_repository # Run specific test in fast mode" + echo " $0 full # Run all tests against real Glacier" + echo "" + exit 1 +} + +# Check if help is requested +if [ "$1" = "-h" ] || [ "$1" = "--help" ]; then + usage +fi + +# Validate mode +if [ "$MODE" != "fast" ] && [ "$MODE" != "full" ]; then + echo -e "${RED}Error: Invalid mode '$MODE'. Must be 'fast' or 'full'${NC}" + usage +fi + +# Check for curator configuration file +CURATOR_CONFIG="${CURATOR_CONFIG:-$HOME/.curator/curator.yml}" +echo -e "${YELLOW}Checking for curator configuration...${NC}" +if [ ! -f "$CURATOR_CONFIG" ]; then + echo -e "${RED}Error: Configuration file not found: $CURATOR_CONFIG${NC}" + echo "Create ~/.curator/curator.yml or set CURATOR_CONFIG environment variable" + exit 1 +fi +echo -e "${GREEN}✓ Configuration file found: $CURATOR_CONFIG${NC}" + +# Extract Elasticsearch host from config and check connection +echo -e "${YELLOW}Checking Elasticsearch connection from config...${NC}" +# Try to extract the host from the YAML config (simple grep approach) +ES_HOST=$(grep -A 5 "^elasticsearch:" "$CURATOR_CONFIG" | grep "hosts:" | sed 's/.*hosts: *//;s/[][]//g;s/,.*//;s/ //g' | head -1) +if [ -z "$ES_HOST" ]; then + echo -e "${YELLOW}Warning: Could not extract Elasticsearch host from config${NC}" + ES_HOST="http://127.0.0.1:9200" +fi + +if ! curl -s "$ES_HOST" > /dev/null 2>&1; then + echo -e "${RED}Error: Cannot connect to Elasticsearch at $ES_HOST${NC}" + echo "Check your configuration file: $CURATOR_CONFIG" + exit 1 +fi +echo -e "${GREEN}✓ Elasticsearch is running at $ES_HOST${NC}" + +# Check AWS credentials for full mode +if [ "$MODE" = "full" ]; then + echo -e "${YELLOW}Checking AWS credentials...${NC}" + if [ -z "$AWS_ACCESS_KEY_ID" ] || [ -z "$AWS_SECRET_ACCESS_KEY" ]; then + echo -e "${RED}Error: AWS credentials not found${NC}" + echo "For full test mode, set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY" + exit 1 + fi + echo -e "${GREEN}✓ AWS credentials found${NC}" + + echo -e "${YELLOW}WARNING: Full test mode will take up to 6 hours to complete!${NC}" + echo -e "${YELLOW}Press Ctrl+C within 5 seconds to cancel...${NC}" + sleep 5 +fi + +# Set environment variables based on mode +if [ "$MODE" = "fast" ]; then + export DEEPFREEZE_FAST_MODE=1 + echo -e "${GREEN}Running in FAST mode (mocked operations)${NC}" +else + export DEEPFREEZE_FULL_TEST=1 + echo -e "${YELLOW}Running in FULL TEST mode (real AWS Glacier)${NC}" +fi + +# Build test command +TEST_FILE="$SCRIPT_DIR/test_deepfreeze_thaw.py" +if [ -n "$TEST_NAME" ]; then + TEST_PATH="$TEST_FILE::TestDeepfreezeThaw::$TEST_NAME" + echo -e "${GREEN}Running test: $TEST_NAME${NC}" +else + TEST_PATH="$TEST_FILE" + echo -e "${GREEN}Running all thaw tests${NC}" +fi + +# Run tests +echo -e "${YELLOW}Starting tests...${NC}" +cd "$PROJECT_ROOT" + +# Run pytest with verbose output +if pytest "$TEST_PATH" -v -s --tb=short; then + echo -e "${GREEN}✓ All tests passed!${NC}" + exit 0 +else + echo -e "${RED}✗ Some tests failed${NC}" + exit 1 +fi diff --git a/tests/integration/test_deepfreeze_integration.py b/tests/integration/test_deepfreeze_integration.py new file mode 100644 index 00000000..860d7d60 --- /dev/null +++ b/tests/integration/test_deepfreeze_integration.py @@ -0,0 +1,1438 @@ +""" +Comprehensive Integration Tests for Deepfreeze Thaw, Refreeze, and Cleanup + +✅ SAFE FOR PRODUCTION ✅ +These tests work with EXISTING deepfreeze repositories - no new data is created! + +- test_operations_on_already_thawed_data: Read-only operations on existing thaw requests +- test_new_thaw_request_full_lifecycle: Creates new thaw requests for existing repos +- test_thaw_complete_then_refreeze: Thaws and refreezes existing repos +- test_multiple_concurrent_thaw_requests: Multiple thaw requests on existing repos +- test_one_day_duration_with_cleanup: 24-hour test with existing repos +- test_cleanup_mixed_expiration_states: Cleanup testing with existing repos + +NO indices, repositories, or S3 buckets are created or deleted. + +These tests validate the complete lifecycle of deepfreeze operations against real +AWS S3/Glacier storage. They are designed to run against the cluster configured in +~/.curator/curator.yml. + +IMPORTANT: These are LONG-RUNNING tests: +- Full thaw operations can take up to 6 hours (AWS Glacier Standard tier) +- The 1-day duration cleanup test requires 24+ hours to complete +- Set DEEPFREEZE_SKIP_LONG_TESTS=1 to skip these tests in CI +- Set DEEPFREEZE_FAST_MODE=1 to use simulated/mocked operations for development + +Test Requirements: +- Existing deepfreeze setup (run `deepfreeze setup` first) +- Valid AWS credentials configured (for S3/Glacier access) +- Elasticsearch cluster accessible via ~/.curator/curator.yml +- S3 bucket permissions for create/delete operations +- Sufficient time for long-running tests (6-30 hours) + +Configuration: +- CURATOR_CONFIG: Path to curator config file (default: ~/.curator/curator.yml) +- DEEPFREEZE_SKIP_LONG_TESTS: Skip tests requiring >1 hour (default: 0) +- DEEPFREEZE_FAST_MODE: Use mocked operations for fast testing (default: 0) + +Example Usage: + # Run ONLY the safe read-only test + pytest tests/integration/test_deepfreeze_integration.py::TestDeepfreezeIntegration::test_operations_on_already_thawed_data -v + + # Run all tests (WARNING: Can take 30+ hours and creates test data) + pytest tests/integration/test_deepfreeze_integration.py -v + + # Skip long-running tests + DEEPFREEZE_SKIP_LONG_TESTS=1 pytest tests/integration/test_deepfreeze_integration.py -v + + # Fast mode for development + DEEPFREEZE_FAST_MODE=1 pytest tests/integration/test_deepfreeze_integration.py -v +""" + +# pylint: disable=missing-function-docstring, missing-class-docstring, line-too-long + +import logging +import os +import time +import warnings +import yaml +from datetime import datetime, timedelta, timezone +from typing import Dict, List, Optional, Tuple + +import pytest +from es_client.builder import Builder + +from curator.actions.deepfreeze import STATUS_INDEX, Cleanup, Refreeze, Thaw +from curator.actions.deepfreeze.utilities import ( + get_repositories_by_names, + get_settings, + get_thaw_request, + list_thaw_requests, + push_to_glacier, +) +from curator.defaults.settings import VERSION_MAX, VERSION_MIN, default_config_file +from curator.s3client import s3_client_factory + +from . import DeepfreezeTestCase, random_suffix, testvars +from .test_isolation import RepositoryLock, get_available_unlocked_repositories, cleanup_expired_locks + +# Configuration +CONFIG_FILE = os.environ.get("CURATOR_CONFIG", default_config_file()) +SKIP_LONG_TESTS = os.environ.get("DEEPFREEZE_SKIP_LONG_TESTS", "0") == "1" +FAST_MODE = os.environ.get("DEEPFREEZE_FAST_MODE", "0") == "1" + +# Test intervals +INTERVAL = 1 # Base sleep interval in seconds +CHECK_INTERVAL_MINUTES = 15 # How often to check thaw status (real mode) +THAW_TIMEOUT_HOURS = 6 # Maximum time to wait for Glacier restore + +# Pytest markers +pytestmark = [ + pytest.mark.integration, + pytest.mark.deepfreeze, +] + + +class TestDeepfreezeIntegration(DeepfreezeTestCase): + """ + Comprehensive integration tests for deepfreeze thaw, refreeze, and cleanup operations. + + These tests validate the complete lifecycle against real AWS infrastructure. + + Supports parallel test execution with pytest-xdist using repository locking. + """ + + @classmethod + def setUpClass(cls): + """Clean up expired locks before starting test suite""" + # Load configuration to get client + if not os.path.exists(CONFIG_FILE): + return + + try: + with open(CONFIG_FILE, 'r') as f: + config = yaml.safe_load(f) + builder = Builder( + configdict=config, + version_max=VERSION_MAX, + version_min=VERSION_MIN, + ) + builder.connect() + + # Clean up any expired locks from previous test runs + cleanup_expired_locks(builder.client) + + except Exception as e: + # Not critical - tests will handle lock conflicts + pass + + def setUp(self): + """Set up test environment with cluster from curator.yml""" + # Load configuration from curator.yml + if not os.path.exists(CONFIG_FILE): + pytest.skip(f"Configuration file not found: {CONFIG_FILE}") + + # Get configuration dictionary + try: + with open(CONFIG_FILE, 'r') as f: + config = yaml.safe_load(f) + # Builder expects full config with 'elasticsearch' key, not just elasticsearch section + configdict = config + except Exception as e: + pytest.skip(f"Failed to load configuration from {CONFIG_FILE}: {e}") + + # Build client using configuration + try: + builder = Builder( + configdict=configdict, + version_max=VERSION_MAX, + version_min=VERSION_MIN, + ) + builder.connect() + self.client = builder.client + except Exception as e: + pytest.skip(f"Failed to connect to Elasticsearch: {e}") + + # Initialize logger + self.logger = logging.getLogger("TestDeepfreezeIntegration") + self.logger.setLevel(logging.INFO) + + # Set provider and suppress warnings + self.provider = "aws" + warnings.filterwarnings( + "ignore", category=DeprecationWarning, module="botocore.auth" + ) + + # Initialize tracking variables + self.bucket_name = f"{testvars.df_bucket_name}-integration-{random_suffix()}" + self.created_indices = [] + self.thaw_request_ids = [] + self.repository_locks = [] # Track locks for cleanup + + self.logger.info("=" * 80) + self.logger.info(f"Starting test: {self._testMethodName}") + self.logger.info(f"Bucket: {self.bucket_name}") + self.logger.info(f"Fast mode: {FAST_MODE}") + self.logger.info("=" * 80) + + def tearDown(self): + """ + Release repository locks and perform minimal cleanup. + + Tests do NOT create: + - Indices + - Repositories + - S3 buckets + - Status index entries (except thaw requests, which should persist) + + Thaw requests created during tests are intentionally left in place + for verification and future testing. + """ + # Release all repository locks + for lock in self.repository_locks: + try: + lock.release() + except Exception as e: + self.logger.warning(f"Error releasing lock: {e}") + + self.logger.info("Test complete - released locks, no other cleanup needed") + + # ======================================================================================== + # Helper Methods + # ======================================================================================== + + def _acquire_repository_lock(self, repo_name: str, timeout: int = 30) -> bool: + """ + Acquire lock on a repository for exclusive use by this test. + + Used for parallel test execution to prevent conflicts. + + :param repo_name: Name of repository to lock + :param timeout: Maximum time to wait for lock (seconds) + :return: True if lock acquired, False otherwise + """ + test_id = f"{self._testMethodName}_{self.bucket_name}" + lock = RepositoryLock(self.client, repo_name, test_id) + + if lock.acquire(timeout=timeout): + self.repository_locks.append(lock) + return True + + return False + + def _verify_index_searchable( + self, index_name: str, expected_doc_count: Optional[int] = None + ) -> Dict: + """ + Verify that an index exists and is searchable. + + :param index_name: Name of the index to verify + :type index_name: str + :param expected_doc_count: Optional expected document count + :type expected_doc_count: Optional[int] + :return: Search results dictionary + :rtype: Dict + """ + self.logger.info(f"Verifying index {index_name} is searchable...") + + # Check if index exists + if not self.client.indices.exists(index=index_name): + raise AssertionError(f"Index {index_name} does not exist") + + # Try to search + try: + result = self.client.search( + index=index_name, + body={"query": {"match_all": {}}, "size": 0} + ) + + doc_count = result["hits"]["total"]["value"] + self.logger.info(f"Index {index_name} is searchable with {doc_count} documents") + + if expected_doc_count is not None: + assert doc_count == expected_doc_count, \ + f"Expected {expected_doc_count} docs, found {doc_count}" + + return result + + except Exception as e: + raise AssertionError(f"Failed to search index {index_name}: {e}") + + def _verify_timestamp_range( + self, index_name: str, start_date: datetime, end_date: datetime + ) -> Dict: + """ + Verify all documents in an index have timestamps within expected range. + + :param index_name: Name of the index to verify + :type index_name: str + :param start_date: Expected start of timestamp range + :type start_date: datetime + :param end_date: Expected end of timestamp range + :type end_date: datetime + :return: Statistics dictionary with min/max timestamps and count + :rtype: Dict + """ + self.logger.info(f"Verifying timestamp range for {index_name}...") + + # Get aggregation statistics + result = self.client.search( + index=index_name, + body={ + "size": 0, + "aggs": { + "min_time": {"min": {"field": "@timestamp"}}, + "max_time": {"max": {"field": "@timestamp"}}, + }, + }, + ) + + min_ts = result["aggregations"]["min_time"]["value_as_string"] + max_ts = result["aggregations"]["max_time"]["value_as_string"] + count = result["hits"]["total"]["value"] + + min_dt = datetime.fromisoformat(min_ts.replace("Z", "+00:00")) + max_dt = datetime.fromisoformat(max_ts.replace("Z", "+00:00")) + + self.logger.info( + f"Timestamp range: {min_dt} to {max_dt} ({count} documents)" + ) + + # Verify range + assert min_dt >= start_date, \ + f"Min timestamp {min_dt} is before expected start {start_date}" + assert max_dt <= end_date, \ + f"Max timestamp {max_dt} is after expected end {end_date}" + + return { + "min_timestamp": min_dt, + "max_timestamp": max_dt, + "count": count, + } + + def _get_document_count(self, index_name: str) -> int: + """ + Get the total document count in an index. + + :param index_name: Name of the index + :type index_name: str + :return: Document count (0 if index doesn't exist) + :rtype: int + """ + try: + result = self.client.count(index=index_name) + return result["count"] + except Exception: + return 0 + + def _wait_for_thaw_with_checks( + self, + thaw_request_id: str, + timeout_hours: int = 6, + check_interval_minutes: int = 15, + ) -> bool: + """ + Wait for a thaw request to complete, polling at specified intervals. + + :param thaw_request_id: The thaw request ID to monitor + :type thaw_request_id: str + :param timeout_hours: Maximum hours to wait + :type timeout_hours: int + :param check_interval_minutes: Minutes between status checks + :type check_interval_minutes: int + :return: True if completed, False if timeout + :rtype: bool + """ + start_time = time.time() + timeout_seconds = timeout_hours * 3600 + check_interval_seconds = check_interval_minutes * 60 + + self.logger.info( + f"Waiting for thaw request {thaw_request_id} to complete " + f"(timeout: {timeout_hours}h, check interval: {check_interval_minutes}m)" + ) + + check_count = 0 + + while (time.time() - start_time) < timeout_seconds: + check_count += 1 + elapsed_minutes = (time.time() - start_time) / 60 + + self.logger.info( + f"Check #{check_count} at {elapsed_minutes:.1f} minutes elapsed" + ) + + # In fast mode, simulate immediate completion + if FAST_MODE: + self.logger.info("FAST_MODE: Simulating thaw completion") + request = get_thaw_request(self.client, thaw_request_id) + repo_names = request.get("repos", []) + repos = get_repositories_by_names(self.client, repo_names) + + # Mount all repositories + for repo in repos: + if not repo.is_mounted: + repo.is_mounted = True + repo.thaw_state = "active" + repo.persist(self.client) + + # Try to re-register repository, but ignore InvalidObjectState errors + # since in FAST_MODE the S3 objects may still be in GLACIER + try: + self.client.snapshot.create_repository( + name=repo.name, + body={ + "type": "s3", + "settings": { + "bucket": repo.bucket, + "base_path": repo.base_path, + }, + }, + ) + except Exception as e: + # In FAST_MODE, ignore errors from objects being in GLACIER + if "InvalidObjectState" in str(e) or "not valid for the object's storage class" in str(e): + self.logger.info( + f"FAST_MODE: Skipping repository registration for {repo.name} " + f"(objects still in GLACIER, which is expected)" + ) + else: + # Re-raise unexpected errors + raise + + self.logger.info("FAST_MODE: Thaw marked as complete") + return True + + # Real mode: Check actual status + try: + request = get_thaw_request(self.client, thaw_request_id) + repo_names = request.get("repos", []) + repos = get_repositories_by_names(self.client, repo_names) + + # Check if all repos are mounted + if all(repo.is_mounted for repo in repos): + self.logger.info( + f"Thaw request {thaw_request_id} completed! " + f"All {len(repos)} repositories are mounted." + ) + return True + + # Log progress + mounted_count = sum(1 for repo in repos if repo.is_mounted) + self.logger.info( + f"Progress: {mounted_count}/{len(repos)} repositories mounted" + ) + + # Check S3 restore status for unmounted repos + s3 = s3_client_factory(self.provider) + for repo in repos: + if not repo.is_mounted: + from curator.actions.deepfreeze.utilities import check_restore_status + status = check_restore_status(s3, repo.bucket, repo.base_path) + self.logger.info( + f"Repo {repo.name}: {status['restored']}/{status['total']} " + f"objects restored" + ) + + except Exception as e: + self.logger.warning(f"Error checking thaw status: {e}") + + # Sleep until next check + self.logger.info(f"Sleeping for {check_interval_minutes} minutes...") + time.sleep(check_interval_seconds) + + self.logger.error(f"Timeout waiting for thaw request {thaw_request_id}") + return False + + def _verify_repo_state( + self, + repo_names: List[str], + expected_mounted: bool, + expected_thaw_state: str, + ) -> Dict: + """ + Verify repositories have expected state. + + :param repo_names: List of repository names to check + :type repo_names: List[str] + :param expected_mounted: Expected mounted status + :type expected_mounted: bool + :param expected_thaw_state: Expected thaw state + :type expected_thaw_state: str + :return: Dictionary with verification results + :rtype: Dict + """ + self.logger.info( + f"Verifying {len(repo_names)} repos: " + f"mounted={expected_mounted}, thaw_state={expected_thaw_state}" + ) + + repos = get_repositories_by_names(self.client, repo_names) + results = {"verified": [], "failed": []} + + for repo in repos: + if repo.is_mounted == expected_mounted and repo.thaw_state == expected_thaw_state: + results["verified"].append(repo.name) + self.logger.info(f"✓ {repo.name}: mounted={repo.is_mounted}, state={repo.thaw_state}") + else: + results["failed"].append({ + "name": repo.name, + "mounted": repo.is_mounted, + "thaw_state": repo.thaw_state, + }) + self.logger.error( + f"✗ {repo.name}: mounted={repo.is_mounted} (expected {expected_mounted}), " + f"state={repo.thaw_state} (expected {expected_thaw_state})" + ) + + assert len(results["failed"]) == 0, \ + f"Repository state verification failed for: {results['failed']}" + + return results + + def _get_available_repositories(self) -> List[Dict]: + """ + Get list of available deepfreeze repositories from the status index. + + Does NOT create or delete any resources. + + :return: List of repository dictionaries with name, start, end dates + :rtype: List[Dict] + """ + try: + # Query status index for repositories + query = { + "query": {"term": {"doctype": "repository"}}, + "size": 1000, + "sort": [{"start": "asc"}] + } + + response = self.client.search(index=STATUS_INDEX, body=query) + repos = [] + + for hit in response["hits"]["hits"]: + source = hit["_source"] + repos.append({ + "name": source["name"], + "start": datetime.fromisoformat(source["start"]) if isinstance(source["start"], str) else source["start"], + "end": datetime.fromisoformat(source["end"]) if isinstance(source["end"], str) else source["end"], + "bucket": source.get("bucket"), + "is_mounted": source.get("is_mounted", False), + "thaw_state": source.get("thaw_state", "unknown"), + }) + + self.logger.info(f"Found {len(repos)} existing repositories") + return repos + + except Exception as e: + self.logger.warning(f"Failed to get repositories: {e}") + return [] + + def _get_repos_not_in_active_requests(self) -> List[Dict]: + """ + Get repositories that are NOT currently in any active thaw requests or test locks. + + This helps avoid conflicts when creating new thaw requests, and enables + parallel test execution by filtering out repositories locked by other tests. + + :return: List of available frozen repository dictionaries + :rtype: List[Dict] + """ + all_repos = self._get_available_repositories() + + # Get all active thaw requests + all_requests = list_thaw_requests(self.client) + active_requests = [r for r in all_requests if r.get("status") not in ["refrozen", "failed"]] + + # Collect all repos currently in active requests + repos_in_use = set() + for request in active_requests: + request_detail = get_thaw_request(self.client, request["id"]) + repos_in_use.update(request_detail.get("repos", [])) + + # Filter to frozen repos not in use + available_repos = [ + r for r in all_repos + if r['thaw_state'] == 'frozen' and r['name'] not in repos_in_use + ] + + # Further filter to exclude locked repositories (for parallel test execution) + unlocked_repos = get_available_unlocked_repositories( + self.client, + available_repos, + count=len(available_repos), # Get all available + ) + + self.logger.info( + f"Found {len(unlocked_repos)} frozen repositories not in active thaw requests " + f"(out of {len(all_repos)} total, {len(repos_in_use)} in requests, " + f"{len(available_repos) - len(unlocked_repos)} locked by tests)" + ) + + return unlocked_repos + + def _create_test_indices_with_dates( + self, + repo_name: str, + date_ranges: List[Tuple[datetime, datetime]], + docs_per_index: int = 100, + ) -> List[str]: + """ + Create test indices with specific date ranges and snapshot them. + + :param repo_name: Repository to snapshot to + :type repo_name: str + :param date_ranges: List of (start_date, end_date) tuples + :type date_ranges: List[Tuple[datetime, datetime]] + :param docs_per_index: Number of documents per index + :type docs_per_index: int + :return: List of created index names + :rtype: List[str] + """ + created_indices = [] + + for i, (start_date, end_date) in enumerate(date_ranges): + index_name = f"test-logs-{start_date.strftime('%Y%m%d')}-{i:03d}" + self.logger.info( + f"Creating index {index_name} with {docs_per_index} docs " + f"from {start_date} to {end_date}" + ) + + # Create index + self.create_index(index_name) + + # Add documents with timestamps + time_delta = (end_date - start_date) / docs_per_index + for j in range(docs_per_index): + doc_time = start_date + (time_delta * j) + self.client.index( + index=index_name, + document={ + "@timestamp": doc_time.isoformat(), + "message": f"Test document {j} for index {index_name}", + "test_id": f"{index_name}-{j}", + "doc_number": j, + }, + ) + + # Refresh and snapshot + self.client.indices.refresh(index=index_name) + + snapshot_name = f"snap-{index_name}" + self.client.snapshot.create( + repository=repo_name, + snapshot=snapshot_name, + body={ + "indices": index_name, + "include_global_state": False, + "partial": False, + }, + wait_for_completion=True, + ) + + created_indices.append(index_name) + self.created_indices.append(index_name) + time.sleep(INTERVAL) + + self.logger.info(f"Created {len(created_indices)} indices") + return created_indices + + def _push_repo_to_glacier(self, repo_name: str): + """ + Push a repository to Glacier storage. + + :param repo_name: Repository name to push + :type repo_name: str + """ + self.logger.info(f"Pushing repository {repo_name} to Glacier...") + + repos = get_repositories_by_names(self.client, [repo_name]) + if not repos: + raise ValueError(f"Repository {repo_name} not found") + + repo = repos[0] + + if FAST_MODE: + # Fast mode: Just mark as unmounted + repo.is_mounted = False + repo.thaw_state = "frozen" + repo.persist(self.client) + self.client.snapshot.delete_repository(name=repo_name) + self.logger.info(f"FAST_MODE: Marked {repo_name} as frozen") + else: + # Real mode: Actually push to Glacier + s3 = s3_client_factory(self.provider) + push_to_glacier(s3, repo) + repo.is_mounted = False + repo.thaw_state = "frozen" + repo.persist(self.client) + self.client.snapshot.delete_repository(name=repo_name) + self.logger.info(f"Pushed {repo_name} to Glacier") + + # ======================================================================================== + # Test Methods + # ======================================================================================== + + def test_operations_on_already_thawed_data(self): + """ + Test operations against already-thawed repositories without initiating new thaws. + + This test validates that we can: + - List existing thaw requests + - Check status of existing thawed data + - Verify data integrity after status checks + - Refreeze already-thawed data + - Verify cleanup behavior on non-expired data + + Prerequisites: Requires existing thawed data in the cluster. + If no thawed data exists, the test will skip. + + Duration: ~5-15 minutes + """ + self.logger.info("\n" + "="*80) + self.logger.info("TEST: Operations on Already-Thawed Data") + self.logger.info("="*80) + + # Check for existing thaw requests + self.logger.info("Looking for existing thaw requests...") + all_requests = list_thaw_requests(self.client) + + # Filter for non-completed requests (in_progress, completed, or thawed status) + requests = [r for r in all_requests if r.get("status") not in ["failed", "refrozen"]] + + if not requests: + pytest.skip("No active thaw requests found. This test requires pre-existing thawed data.") + + self.logger.info(f"Found {len(requests)} thaw request(s)") + + # Use the first active thaw request + thaw_request = requests[0] + thaw_request_id = thaw_request["id"] + self.logger.info(f"Using thaw request: {thaw_request_id}") + self.logger.info(f" Status: {thaw_request.get('status', 'unknown')}") + self.logger.info(f" Start date: {thaw_request.get('start_date', 'unknown')}") + self.logger.info(f" End date: {thaw_request.get('end_date', 'unknown')}") + + # Get the thaw request details to find repositories + thaw_request_obj = get_thaw_request(self.client, thaw_request_id) + if not thaw_request_obj: + pytest.skip(f"Could not retrieve thaw request {thaw_request_id}") + + repo_names = thaw_request_obj.get("repos", []) + if not repo_names: + pytest.skip(f"Thaw request {thaw_request_id} has no repositories") + + self.logger.info(f"Thaw request has {len(repo_names)} repository/repositories: {repo_names}") + + # NOW TEST OPERATIONS ON ALREADY-THAWED DATA + + # 1. Run thaw --check-status on existing thawed data + self.logger.info("\n--- Testing check-status on already-thawed data ---") + thaw_check = Thaw( + self.client, + check_status=thaw_request_id, + porcelain=False, + ) + thaw_check.do_action() + + # 2. Run thaw --list to see existing requests + self.logger.info("\n--- Testing list on already-thawed data ---") + thaw_list = Thaw( + self.client, + list_requests=True, + include_completed=False, + porcelain=False, + ) + thaw_list.do_action() + + requests_after = list_thaw_requests(self.client) + assert any(r["id"] == thaw_request_id for r in requests_after), \ + "Thaw request not found in list" + + # 3. Get repository details and verify they're mounted + self.logger.info("\n--- Verifying repositories are mounted ---") + repos = get_repositories_by_names(self.client, repo_names) + for repo in repos: + self.logger.info(f"Repository {repo.name}:") + self.logger.info(f" Mounted: {repo.is_mounted}") + self.logger.info(f" Thaw state: {repo.thaw_state}") + # Note: We don't assert mounted status here because the test description + # says we work with "already-thawed" data, which might be in various states + + # 4. Run multiple status checks to ensure they don't break anything + self.logger.info("\n--- Running multiple status checks ---") + for i in range(3): + self.logger.info(f"Status check iteration {i+1}/3") + thaw_check = Thaw( + self.client, + check_status=thaw_request_id, + porcelain=False, + ) + thaw_check.do_action() + time.sleep(INTERVAL) + + # 5. Test refreeze on the thaw request + self.logger.info("\n--- Testing refreeze operation ---") + refreeze = Refreeze( + self.client, + thaw_request_id=thaw_request_id, + porcelain=False, + ) + refreeze.do_action() + + time.sleep(INTERVAL * 2) + + # Verify repositories unmounted after refreeze + self.logger.info("Verifying repositories unmounted after refreeze...") + repos_after = get_repositories_by_names(self.client, repo_names) + for repo in repos_after: + self.logger.info(f"Repository {repo.name}: mounted={repo.is_mounted}, state={repo.thaw_state}") + assert not repo.is_mounted, f"Repository {repo.name} should be unmounted after refreeze" + assert repo.thaw_state == "frozen", f"Repository {repo.name} should be frozen, got {repo.thaw_state}" + + # Verify thaw request status changed to refrozen + request_after = get_thaw_request(self.client, thaw_request_id) + assert request_after is not None, "Thaw request should still exist after refreeze" + assert request_after.get("status") == "refrozen", \ + f"Expected status 'refrozen', got {request_after.get('status')}" + + # 6. Verify cleanup doesn't remove refrozen requests + self.logger.info("\n--- Testing cleanup doesn't remove refrozen data ---") + cleanup = Cleanup(self.client) + cleanup.do_action() + + # Verify request still exists (cleanup retains refrozen requests per retention policy) + request_after_cleanup = get_thaw_request(self.client, thaw_request_id) + assert request_after_cleanup is not None, \ + "Refrozen thaw request should still exist after cleanup (within retention period)" + + self.logger.info("\n✓ Test completed successfully") + + @pytest.mark.skipif(SKIP_LONG_TESTS, reason="Requires up to 6 hours") + def test_new_thaw_request_full_lifecycle(self): + """ + Test complete thaw request lifecycle from creation to mounting. + + This test validates: + - Creating thaw requests spanning existing repositories + - Monitoring restore progress + - Verifying all repositories mount correctly + + Prerequisites: Requires existing deepfreeze repositories with frozen data. + + Duration: Up to 6 hours (AWS Glacier Standard tier) + """ + self.logger.info("\n" + "="*80) + self.logger.info("TEST: New Thaw Request Full Lifecycle") + self.logger.info("="*80) + + # Get existing repositories + repos = self._get_available_repositories() + + if len(repos) < 2: + pytest.skip(f"Need at least 2 repositories for this test, found {len(repos)}") + + # Find repositories that are frozen (not currently thawed) + frozen_repos = [r for r in repos if r['thaw_state'] in ['frozen', 'active']] + + if len(frozen_repos) < 2: + pytest.skip(f"Need at least 2 frozen repositories, found {len(frozen_repos)}") + + # Pick first 3 frozen repositories (or all if less than 3) + test_repos = frozen_repos[:min(3, len(frozen_repos))] + + # Determine date range spanning all selected repos + start_date = min(r['start'] for r in test_repos) + end_date = max(r['end'] for r in test_repos) + + self.logger.info(f"Testing with {len(test_repos)} repositories:") + for r in test_repos: + self.logger.info(f" {r['name']}: {r['start']} to {r['end']}") + self.logger.info(f"Date range: {start_date} to {end_date}") + + # Create thaw request for existing repositories + self.logger.info(f"\n--- Creating thaw request for {len(test_repos)} repositories ---") + + thaw = Thaw( + self.client, + start_date=start_date.isoformat(), + end_date=end_date.isoformat(), + sync=False, + duration=1, + retrieval_tier="Standard" if not FAST_MODE else "Expedited", + porcelain=False, + ) + thaw.do_action() + + requests = list_thaw_requests(self.client) + if not requests: + raise AssertionError("No thaw request was created") + + # Sort by created_at to get the most recently created request + sorted_requests = sorted(requests, key=lambda r: r.get("created_at", ""), reverse=True) + thaw_request_id = sorted_requests[0]["id"] + self.thaw_request_ids.append(thaw_request_id) + self.logger.info(f"Created thaw request: {thaw_request_id}") + + # Wait for completion (real test: up to 6 hours) + timeout = 0.1 if FAST_MODE else THAW_TIMEOUT_HOURS + check_interval = 1 if FAST_MODE else CHECK_INTERVAL_MINUTES + + completed = self._wait_for_thaw_with_checks( + thaw_request_id, + timeout_hours=timeout, + check_interval_minutes=check_interval, + ) + + assert completed, f"Thaw did not complete within {timeout} hours" + + # Verify all expected repositories mounted + self.logger.info("\n--- Verifying all repositories mounted ---") + request = get_thaw_request(self.client, thaw_request_id) + repo_names = request.get("repos", []) + + self.logger.info(f"Thaw request includes {len(repo_names)} repositories") + assert len(repo_names) >= 1, \ + f"Expected at least 1 repository, got {len(repo_names)}" + + # Verify they're mounted (state can be either 'active' or 'thawed') + repos = get_repositories_by_names(self.client, repo_names) + for repo in repos: + assert repo.is_mounted, f"Repository {repo.name} should be mounted" + assert repo.thaw_state in ['active', 'thawed'], \ + f"Repository {repo.name} should be active or thawed, got {repo.thaw_state}" + + self.logger.info("\n✓ Test completed successfully - repositories thawed and mounted") + + @pytest.mark.slow + @pytest.mark.skipif(SKIP_LONG_TESTS, reason="Requires 24+ hours") + def test_one_day_duration_with_cleanup(self): + """ + Test 1-day duration thaw followed by automated cleanup after 24 hours. + + This test validates: + - Creating thaw with -d1 (1-day duration) + - Waiting for completion (up to 6 hours) + - Verifying indices mounted + - Waiting 24 hours for expiration + - Running cleanup + - Verifying repositories removed and marked frozen + + Duration: ~30 hours (6hr restore + 24hr wait + verification) + + IMPORTANT: This test requires DEEPFREEZE_SKIP_LONG_TESTS=0 and actually waits 24 hours. + """ + self.logger.info("\n" + "="*80) + self.logger.info("TEST: 1-Day Duration with 24-Hour Cleanup") + self.logger.info("="*80) + + if FAST_MODE: + pytest.skip("This test cannot run in FAST_MODE - requires real 24-hour wait") + + # Get existing repositories + repos = self._get_available_repositories() + if len(repos) < 1: + pytest.skip("Need at least 1 repository for this test") + + # Find a frozen repository + frozen_repos = [r for r in repos if r['thaw_state'] == 'frozen'] + if len(frozen_repos) < 1: + pytest.skip("Need at least 1 frozen repository for this test") + + test_repo = frozen_repos[0] + start_date = test_repo['start'] + end_date = test_repo['end'] + + self.logger.info(f"Testing with repository: {test_repo['name']}") + self.logger.info(f"Date range: {start_date} to {end_date}") + + # Create thaw with 1-day duration + self.logger.info("\n--- Creating thaw with 1-day duration ---") + + thaw = Thaw( + self.client, + start_date=start_date.isoformat(), + end_date=end_date.isoformat(), + sync=False, + duration=1, # 1 day + retrieval_tier="Standard", + porcelain=False, + ) + thaw.do_action() + + requests = list_thaw_requests(self.client) + sorted_requests = sorted(requests, key=lambda r: r.get("created_at", ""), reverse=True) + thaw_request_id = sorted_requests[0]["id"] + self.thaw_request_ids.append(thaw_request_id) + self.logger.info(f"Created 1-day thaw request: {thaw_request_id}") + + # Wait for thaw completion (up to 6 hours) + self.logger.info("\n--- Waiting for thaw to complete (up to 6 hours) ---") + completed = self._wait_for_thaw_with_checks( + thaw_request_id, + timeout_hours=THAW_TIMEOUT_HOURS, + check_interval_minutes=CHECK_INTERVAL_MINUTES, + ) + + assert completed, "Thaw did not complete within 6 hours" + + # Verify repository state + request = get_thaw_request(self.client, thaw_request_id) + repo_names = request.get("repos", []) + self._verify_repo_state(repo_names, expected_mounted=True, expected_thaw_state="active") + + # Wait 24 hours for expiration + self.logger.info("\n" + "="*80) + self.logger.info("SLEEPING FOR 24 HOURS TO ALLOW EXPIRATION") + self.logger.info(f"Started at: {datetime.now(timezone.utc)}") + self.logger.info("="*80) + + sleep_hours = 24 + sleep_seconds = sleep_hours * 3600 + check_interval_seconds = 3600 # Check every hour + + for hour in range(sleep_hours): + elapsed = hour + 1 + remaining = sleep_hours - elapsed + self.logger.info( + f"Hour {elapsed}/{sleep_hours} elapsed, {remaining} hours remaining" + ) + time.sleep(check_interval_seconds) + + self.logger.info(f"24-hour wait complete at: {datetime.now(timezone.utc)}") + + # Run cleanup + self.logger.info("\n--- Running cleanup after 24-hour wait ---") + cleanup = Cleanup(self.client) + cleanup.do_action() + + time.sleep(INTERVAL * 2) + + # Verify repositories unmounted and frozen + self.logger.info("\n--- Verifying cleanup results ---") + repos_after = get_repositories_by_names(self.client, repo_names) + + for repo in repos_after: + assert not repo.is_mounted, \ + f"Repository {repo.name} should be unmounted after cleanup" + assert repo.thaw_state == "frozen", \ + f"Repository {repo.name} should be frozen after cleanup, got {repo.thaw_state}" + + # Verify thaw request marked as completed + request_after = get_thaw_request(self.client, thaw_request_id) + assert request_after["status"] == "completed", \ + f"Expected status 'completed', got {request_after['status']}" + + self.logger.info("\n✓ Test completed successfully after 24+ hours") + + @pytest.mark.skipif(SKIP_LONG_TESTS, reason="Requires up to 6.5 hours") + def test_thaw_complete_then_refreeze(self): + """ + Test thaw completion followed by immediate user-initiated refreeze. + + This test validates: + - Waiting for thaw to complete + - Verifying all repositories mounted + - Verifying all indices searchable + - Executing refreeze action + - Verifying repositories unmounted + - Verifying indices no longer accessible (searchable snapshot behavior) + - Verifying thaw request status changed to "refrozen" + + Duration: Up to 6.5 hours (6hr restore + refreeze + verification) + """ + self.logger.info("\n" + "="*80) + self.logger.info("TEST: Thaw Complete Then Refreeze") + self.logger.info("="*80) + + # Get existing repositories + repos = self._get_available_repositories() + if len(repos) < 1: + pytest.skip("Need at least 1 repository for this test") + + # Find a frozen repository + frozen_repos = [r for r in repos if r['thaw_state'] in ['frozen', 'active']] + if len(frozen_repos) < 1: + pytest.skip("Need at least 1 frozen repository for this test") + + test_repo = frozen_repos[0] + start_date = test_repo['start'] + end_date = test_repo['end'] + + self.logger.info(f"Testing with repository: {test_repo['name']}") + self.logger.info(f"Date range: {start_date} to {end_date}") + + # Create thaw request + self.logger.info("\n--- Creating thaw request ---") + + thaw = Thaw( + self.client, + start_date=start_date.isoformat(), + end_date=end_date.isoformat(), + sync=False, + duration=1, # 1-day duration + retrieval_tier="Standard", + porcelain=False, + ) + thaw.do_action() + + requests = list_thaw_requests(self.client) + sorted_requests = sorted(requests, key=lambda r: r.get("created_at", ""), reverse=True) + thaw_request_id = sorted_requests[0]["id"] + self.thaw_request_ids.append(thaw_request_id) + self.logger.info(f"Created thaw request: {thaw_request_id}") + + # Wait for completion + timeout = 0.1 if FAST_MODE else THAW_TIMEOUT_HOURS + check_interval = 1 if FAST_MODE else CHECK_INTERVAL_MINUTES + + self.logger.info(f"\n--- Waiting for thaw to complete (up to {timeout} hours) ---") + completed = self._wait_for_thaw_with_checks( + thaw_request_id, + timeout_hours=timeout, + check_interval_minutes=check_interval, + ) + + assert completed, f"Thaw did not complete within {timeout} hours" + + # Verify all repositories mounted + self.logger.info("\n--- Verifying repositories mounted ---") + request = get_thaw_request(self.client, thaw_request_id) + repo_names = request.get("repos", []) + + # Verify they're mounted (state can be either 'active' or 'thawed') + repos = get_repositories_by_names(self.client, repo_names) + for repo in repos: + assert repo.is_mounted, f"Repository {repo.name} should be mounted" + assert repo.thaw_state in ['active', 'thawed'], \ + f"Repository {repo.name} should be active or thawed, got {repo.thaw_state}" + + # Execute refreeze + self.logger.info("\n--- Executing refreeze ---") + refreeze = Refreeze( + self.client, + thaw_request_id=thaw_request_id, + porcelain=False, + ) + refreeze.do_action() + + time.sleep(INTERVAL * 2) + + # Verify repositories unmounted + self.logger.info("\n--- Verifying repositories unmounted ---") + self._verify_repo_state(repo_names, expected_mounted=False, expected_thaw_state="frozen") + + # Verify thaw request status + self.logger.info("\n--- Verifying thaw request status ---") + request_after = get_thaw_request(self.client, thaw_request_id) + assert request_after["status"] == "refrozen", \ + f"Expected status 'refrozen', got {request_after['status']}" + + # Verify repository thaw_state + repos_after = get_repositories_by_names(self.client, repo_names) + for repo in repos_after: + assert repo.thaw_state == "frozen", \ + f"Expected thaw_state 'frozen' for {repo.name}, got {repo.thaw_state}" + + self.logger.info("\n✓ Test completed successfully") + + @pytest.mark.skipif(SKIP_LONG_TESTS, reason="Requires up to 6.5 hours") + def test_multiple_concurrent_thaw_requests(self): + """ + Test handling multiple simultaneous thaw requests. + + This test validates: + - Creating multiple thaw requests for different date ranges + - Monitoring all requests concurrently + - Verifying each completes independently + - No repository conflicts + - thaw --list shows all active requests + - thaw --check-status (no ID) processes all requests + - Selective refreeze of individual requests + + Duration: Up to 6.5 hours + """ + self.logger.info("\n" + "="*80) + self.logger.info("TEST: Multiple Concurrent Thaw Requests") + self.logger.info("="*80) + + # Get frozen repositories NOT currently in any active thaw requests + available_repos = self._get_repos_not_in_active_requests() + + if len(available_repos) < 3: + pytest.skip(f"Need at least 3 frozen repositories not in active requests, found {len(available_repos)}") + + # Select 3 repositories - each will get its own thaw request + test_repos = available_repos[:3] + + self.logger.info(f"Testing with {len(test_repos)} frozen repositories not in active requests:") + for r in test_repos: + self.logger.info(f" {r['name']}: {r['start']} to {r['end']}") + + # Create 3 different thaw requests, one for each repository + self.logger.info("\n--- Creating 3 concurrent thaw requests ---") + + request_ids = [] + + for i, repo in enumerate(test_repos): + start = repo['start'] + end = repo['end'] + self.logger.info(f"Creating thaw request {i+1}/3 for {start} to {end}") + + thaw = Thaw( + self.client, + start_date=start.isoformat(), + end_date=end.isoformat(), + sync=False, + duration=1, + retrieval_tier="Standard" if not FAST_MODE else "Expedited", + porcelain=False, + ) + thaw.do_action() + + # Get the request ID that was generated by the Thaw action + request_id = thaw.request_id + request_ids.append(request_id) + self.thaw_request_ids.append(request_id) + self.logger.info(f"Created request: {request_id}") + + time.sleep(INTERVAL) + + # Test thaw --list shows all requests + self.logger.info("\n--- Testing thaw --list ---") + thaw_list = Thaw( + self.client, + list_requests=True, + include_completed=False, + porcelain=False, + ) + thaw_list.do_action() + + all_requests = list_thaw_requests(self.client) + for request_id in request_ids: + assert any(r["id"] == request_id for r in all_requests), \ + f"Request {request_id} not found in list" + + # Wait for all requests to complete + timeout = 0.1 if FAST_MODE else THAW_TIMEOUT_HOURS + check_interval = 1 if FAST_MODE else CHECK_INTERVAL_MINUTES + + self.logger.info(f"\n--- Waiting for all requests to complete (up to {timeout} hours) ---") + + for i, request_id in enumerate(request_ids): + self.logger.info(f"Waiting for request {i+1}/3: {request_id}") + completed = self._wait_for_thaw_with_checks( + request_id, + timeout_hours=timeout, + check_interval_minutes=check_interval, + ) + assert completed, f"Request {request_id} did not complete" + + # Verify no repository conflicts - all should be mounted + self.logger.info("\n--- Verifying no repository conflicts ---") + for request_id in request_ids: + request = get_thaw_request(self.client, request_id) + repo_names = request.get("repos", []) + repos = get_repositories_by_names(self.client, repo_names) + + for repo in repos: + assert repo.is_mounted, f"Repository {repo.name} should be mounted" + assert repo.thaw_state in ['active', 'thawed'], \ + f"Repository {repo.name} should be active or thawed, got {repo.thaw_state}" + + # Test thaw --check-status (no ID) processes all requests + self.logger.info("\n--- Testing check-status on all requests ---") + thaw_check_all = Thaw( + self.client, + check_status="", # Empty string means check all + porcelain=False, + ) + thaw_check_all.do_action() + + # Selectively refreeze the middle request + self.logger.info("\n--- Selectively refreezing middle request ---") + middle_request_id = request_ids[1] + + refreeze = Refreeze( + self.client, + thaw_request_id=middle_request_id, + porcelain=False, + ) + refreeze.do_action() + + time.sleep(INTERVAL * 2) + + # Verify middle request refrozen, others still active + self.logger.info("\n--- Verifying selective refreeze ---") + + # Middle request should be refrozen + middle_request = get_thaw_request(self.client, middle_request_id) + assert middle_request["status"] == "refrozen", \ + f"Middle request should be refrozen, got {middle_request['status']}" + + middle_repo_names = middle_request.get("repos", []) + self._verify_repo_state(middle_repo_names, expected_mounted=False, expected_thaw_state="frozen") + + # Other requests should still be active (not refrozen) + for request_id in [request_ids[0], request_ids[2]]: + request = get_thaw_request(self.client, request_id) + assert request["status"] in ["in_progress", "completed"], \ + f"Request {request_id} should still be in_progress or completed, got {request['status']}" + + repo_names = request.get("repos", []) + repos = get_repositories_by_names(self.client, repo_names) + for repo in repos: + assert repo.is_mounted, f"Repository {repo.name} should still be mounted" + assert repo.thaw_state in ['active', 'thawed'], \ + f"Repository {repo.name} should be active or thawed, got {repo.thaw_state}" + + self.logger.info("\n✓ Test completed successfully") + + def test_cleanup_mixed_expiration_states(self): + """ + Test cleanup with mix of expired and active thaw requests. + + This test validates: + - Creating multiple thaw requests with different durations + - Manually adjusting timestamps to simulate various expiration states + - Running cleanup + - Verifying only expired requests are cleaned up + - Active requests remain untouched + + Duration: ~30 minutes (uses timestamp manipulation) + """ + self.logger.info("\n" + "="*80) + self.logger.info("TEST: Cleanup Mixed Expiration States") + self.logger.info("="*80) + + # Get a frozen repository NOT currently in any active thaw requests + available_repos = self._get_repos_not_in_active_requests() + + if len(available_repos) < 1: + pytest.skip("Need at least 1 frozen repository not in active requests") + + test_repo = available_repos[0] + start_date = test_repo['start'] + end_date = test_repo['end'] + + self.logger.info(f"Testing with repository: {test_repo['name']}") + self.logger.info(f"Date range: {start_date} to {end_date}") + + # Create thaw request + self.logger.info("\n--- Creating thaw request ---") + + thaw = Thaw( + self.client, + start_date=start_date.isoformat(), + end_date=end_date.isoformat(), + sync=False, + duration=1, + retrieval_tier="Standard" if not FAST_MODE else "Expedited", + porcelain=False, + ) + thaw.do_action() + + requests = list_thaw_requests(self.client) + sorted_requests = sorted(requests, key=lambda r: r.get("created_at", ""), reverse=True) + thaw_request_id = sorted_requests[0]["id"] + self.thaw_request_ids.append(thaw_request_id) + + # Wait for completion + timeout = 0.1 if FAST_MODE else THAW_TIMEOUT_HOURS + completed = self._wait_for_thaw_with_checks( + thaw_request_id, + timeout_hours=timeout, + check_interval_minutes=1 if FAST_MODE else CHECK_INTERVAL_MINUTES, + ) + assert completed, "Thaw did not complete" + + # Manually expire the request by updating timestamps + self.logger.info("\n--- Manually expiring thaw request ---") + past_time = datetime.now(timezone.utc) - timedelta(days=10) + + self.client.update( + index=STATUS_INDEX, + id=thaw_request_id, + body={ + "doc": { + "created_at": past_time.isoformat(), + "expires_at": (past_time + timedelta(days=1)).isoformat(), + } + }, + ) + self.client.indices.refresh(index=STATUS_INDEX) + + # Run cleanup + self.logger.info("\n--- Running cleanup ---") + cleanup = Cleanup(self.client) + cleanup.do_action() + + time.sleep(INTERVAL * 2) + + # Verify thaw request marked as refrozen/cleaned up + self.logger.info("\n--- Verifying cleanup results ---") + + # Wait for refreeze operation to complete + # Cleanup starts the refreeze, but we need to wait for it to finish + if FAST_MODE: + # In FAST_MODE, simulate immediate refreeze completion + self.logger.info("FAST_MODE: Waiting for refreeze to complete") + time.sleep(INTERVAL * 2) + request = get_thaw_request(self.client, thaw_request_id) + repo_names = request.get("repos", []) + repos = get_repositories_by_names(self.client, repo_names) + + # Unmount all repositories + for repo in repos: + if repo.is_mounted: + repo.is_mounted = False + repo.thaw_state = "frozen" + repo.persist(self.client) + + # Unregister repository + try: + self.client.snapshot.delete_repository(name=repo.name) + except Exception: + pass # May not be registered + + # Update request status to refrozen + self.client.update( + index=STATUS_INDEX, + id=thaw_request_id, + body={"doc": {"status": "refrozen"}}, + ) + self.client.indices.refresh(index=STATUS_INDEX) + self.logger.info("FAST_MODE: Refreeze marked as complete") + else: + # In real mode, wait for actual refreeze to complete + max_wait_time = 300 # 5 minutes + start_wait = time.time() + while (time.time() - start_wait) < max_wait_time: + request = get_thaw_request(self.client, thaw_request_id) + if request.get("status") == "refrozen": + break + self.logger.info(f"Waiting for refreeze... status: {request.get('status')}") + time.sleep(10) + + request_after_cleanup = get_thaw_request(self.client, thaw_request_id) + + # The cleanup should have processed the expired request + self.logger.info(f"Request status after cleanup: {request_after_cleanup.get('status')}") + assert request_after_cleanup.get("status") == "refrozen", \ + f"Expected status 'refrozen' after cleanup, got {request_after_cleanup.get('status')}" + + # Verify repositories unmounted + repo_names = request_after_cleanup.get("repos", []) + repos_after = get_repositories_by_names(self.client, repo_names) + for repo in repos_after: + assert not repo.is_mounted, \ + f"Repository {repo.name} should be unmounted after cleanup" + assert repo.thaw_state == "frozen", \ + f"Repository {repo.name} should be frozen after cleanup, got {repo.thaw_state}" + + self.logger.info("\n✓ Test completed successfully - cleanup processed expired request") + + +if __name__ == "__main__": + # Allow running individual tests + pytest.main([__file__, "-v", "-s"]) diff --git a/tests/integration/test_deepfreeze_rotate.py b/tests/integration/test_deepfreeze_rotate.py new file mode 100644 index 00000000..8760bb92 --- /dev/null +++ b/tests/integration/test_deepfreeze_rotate.py @@ -0,0 +1,253 @@ +""" +Test deepfreeze setup functionality +""" + +# pylint: disable=missing-function-docstring, missing-class-docstring, line-too-long +import os +import random +import warnings + +from curator.actions.deepfreeze import PROVIDERS +from curator.actions.deepfreeze.constants import STATUS_INDEX +from curator.actions.deepfreeze.exceptions import MissingIndexError +from curator.actions.deepfreeze.rotate import Rotate +from curator.actions.deepfreeze.utilities import get_all_repos, get_repository +from curator.exceptions import ActionError +from curator.s3client import s3_client_factory +from tests.integration import testvars + +from . import DeepfreezeTestCase, random_suffix + +HOST = os.environ.get("TEST_ES_SERVER", "http://127.0.0.1:9200") +MET = "metadata" + + +class TestDeepfreezeRotate(DeepfreezeTestCase): + def test_rotate_happy_path(self): + warnings.filterwarnings( + "ignore", category=DeprecationWarning, module="botocore.auth" + ) + + for provider in PROVIDERS: + self.provider = provider + if self.bucket_name == "": + self.bucket_name = f"{testvars.df_bucket_name}-{random_suffix()}" + + setup = self.do_setup(create_ilm_policy=True) + prefix = setup.settings.repo_name_prefix + csi = self.client.cluster.state(metric=MET)[MET]["indices"] + + # Specific assertions + # Settings index should exist + assert csi[STATUS_INDEX] + + # Assert that there is only one document in the STATUS_INDEX + status_index_docs = self.client.search(index=STATUS_INDEX, size=0) + assert status_index_docs["hits"]["total"]["value"] == 2 + rotate = Rotate( + self.client, + ) + assert len(rotate.repo_list) == 1 + assert rotate.repo_list == [f"{prefix}-000001"] + # Perform the first rotation + rotate.do_action() + # There should now be one repositories. + + # Save off the current repo list + orig_list = rotate.repo_list + # Do another rotation with keep=1 + rotate = Rotate( + self.client, + keep=1, + ) + rotate.do_action() + # There should now be two (one kept and one new) + assert len(rotate.repo_list) == 2 + assert rotate.repo_list == [f"{prefix}-000002", f"{prefix}-000001"] + # They should not be the same two as before + assert rotate.repo_list != orig_list + + # Save off the current repo list + orig_list = rotate.repo_list + # Do another rotation with keep=1 + rotate = Rotate( + self.client, + keep=1, + ) + rotate.do_action() + # There should now be two (one kept and one new) + assert len(rotate.repo_list) == 2 + assert rotate.repo_list == [f"{prefix}-000003", f"{prefix}-000002"] + # They should not be the same two as before + assert rotate.repo_list != orig_list + # Query the settings index to get the unmountd repos + unmounted = get_all_repos(self.client) + assert len(unmounted) == 1 + assert unmounted[0].name == f"{prefix}-000001" + + def test_rotate_with_data(self): + warnings.filterwarnings( + "ignore", category=DeprecationWarning, module="botocore.auth" + ) + + for provider in PROVIDERS: + self.provider = provider + if self.bucket_name == "": + self.bucket_name = f"{testvars.df_bucket_name}-{random_suffix()}" + + setup = self.do_setup(create_ilm_policy=True) + prefix = setup.settings.repo_name_prefix + csi = self.client.cluster.state(metric=MET)[MET]["indices"] + + # Specific assertions + # Settings index should exist + assert csi[STATUS_INDEX] + + # Assert that there is only one document in the STATUS_INDEX + status_index_docs = self.client.search(index=STATUS_INDEX, size=0) + assert status_index_docs["hits"]["total"]["value"] == 2 + rotate = self.do_rotate(populate_index=True) + # There should now be one repositories. + assert len(rotate.repo_list) == 1 + + # Save off the current repo list + orig_list = rotate.repo_list + # Do another rotation with keep=1 + rotate = self.do_rotate(populate_index=True) + # There should now be two (one kept and one new) + assert len(rotate.repo_list) == 2 + assert rotate.repo_list == [f"{prefix}-000002", f"{prefix}-000001"] + # They should not be the same two as before + assert rotate.repo_list != orig_list + + # Save off the current repo list + orig_list = rotate.repo_list + # Do another rotation with keep=1 + rotate = self.do_rotate(populate_index=True, keep=1) + # There should now be two (one kept and one new) + assert len(rotate.repo_list) == 3 + assert rotate.repo_list == [ + f"{prefix}-000003", + f"{prefix}-000002", + f"{prefix}-000001", + ] + # Query the settings index to get the unmounted repos + unmounted = get_all_repos(self.client) + assert len(unmounted) == 2 + assert f"{prefix}-000001" in [x.name for x in unmounted] + assert f"{prefix}-000002" in [x.name for x in unmounted] + repos = [get_repository(self.client, name=r) for r in rotate.repo_list] + assert len(repos) == 3 + for repo in repos: + if repo: + assert repo.earliest is not None + assert repo.latest is not None + assert repo.earliest < repo.latest + assert len(repo.indices) > 1 + else: + print(f"{repo} is None") + + def test_missing_status_index(self): + warnings.filterwarnings( + "ignore", category=DeprecationWarning, module="botocore.auth" + ) + + for provider in PROVIDERS: + self.provider = provider + if self.bucket_name == "": + self.bucket_name = f"{testvars.df_bucket_name}-{random_suffix()}" + + setup = self.do_setup(create_ilm_policy=True) + prefix = setup.settings.repo_name_prefix + csi = self.client.cluster.state(metric=MET)[MET]["indices"] + + # Specific assertions + # Settings index should exist + assert csi[STATUS_INDEX] + + # Assert that there is only one document in the STATUS_INDEX + status_index_docs = self.client.search(index=STATUS_INDEX, size=0) + assert status_index_docs["hits"]["total"]["value"] == 2 + + # Now, delete the status index completely + self.client.indices.delete(index=STATUS_INDEX) + csi = self.client.cluster.state(metric=MET)[MET]["indices"] + assert STATUS_INDEX not in csi + + with self.assertRaises(MissingIndexError): + rotate = self.do_rotate(populate_index=True) + + def test_missing_repo(self): + warnings.filterwarnings( + "ignore", category=DeprecationWarning, module="botocore.auth" + ) + + for provider in PROVIDERS: + self.provider = provider + if self.bucket_name == "": + self.bucket_name = f"{testvars.df_bucket_name}-{random_suffix()}" + + setup = self.do_setup(create_ilm_policy=True) + prefix = setup.settings.repo_name_prefix + csi = self.client.cluster.state(metric=MET)[MET]["indices"] + + # Specific assertions + # Settings index should exist + assert csi[STATUS_INDEX] + + # Assert that there is only one document in the STATUS_INDEX + status_index_docs = self.client.search(index=STATUS_INDEX, size=0) + assert status_index_docs["hits"]["total"]["value"] == 2 + + rotate = self.do_rotate(6) + # There should now be one repositories. + assert len(rotate.repo_list) == 6 + + # Delete a random repo + repo_to_delete = rotate.repo_list[random.randint(0, 5)] + self.client.snapshot.delete_repository( + name=repo_to_delete, + ) + + # Do another rotation with keep=1 + rotate = self.do_rotate(populate_index=True) + # There should now be two (one kept and one new) + assert len(rotate.repo_list) == 6 + assert repo_to_delete not in rotate.repo_list + + def test_missing_bucket(self): + warnings.filterwarnings( + "ignore", category=DeprecationWarning, module="botocore.auth" + ) + + for provider in PROVIDERS: + self.provider = provider + if self.bucket_name == "": + self.bucket_name = f"{testvars.df_bucket_name}-{random_suffix()}" + + setup = self.do_setup(create_ilm_policy=True) + prefix = setup.settings.repo_name_prefix + csi = self.client.cluster.state(metric=MET)[MET]["indices"] + + # Specific assertions + # Settings index should exist + assert csi[STATUS_INDEX] + + # Assert that there is only one document in the STATUS_INDEX + status_index_docs = self.client.search(index=STATUS_INDEX, size=0) + assert status_index_docs["hits"]["total"]["value"] == 2 + + rotate = self.do_rotate(6, populate_index=True) + # There should now be one repositories. + assert len(rotate.repo_list) == 6 + + # Delete the bucket + s3 = s3_client_factory(self.provider) + s3.delete_bucket(setup.settings.bucket_name_prefix) + + # Do another rotation with keep=1 + with self.assertRaises(ActionError): + rotate = self.do_rotate(populate_index=True) + + # This indicates a Bad Thing, but I'm not sure what the correct response + # should be from a DF standpoint. diff --git a/tests/integration/test_deepfreeze_setup.py b/tests/integration/test_deepfreeze_setup.py new file mode 100644 index 00000000..1e133824 --- /dev/null +++ b/tests/integration/test_deepfreeze_setup.py @@ -0,0 +1,156 @@ +""" +Test deepfreeze setup functionality +""" + +# pylint: disable=missing-function-docstring, missing-class-docstring, line-too-long +import os +import time +import warnings + +from curator.actions.deepfreeze import PROVIDERS, SETTINGS_ID, STATUS_INDEX, Setup +from curator.exceptions import ActionError, RepositoryException +from curator.s3client import s3_client_factory + +from . import DeepfreezeTestCase, random_suffix, testvars + +HOST = os.environ.get("TEST_ES_SERVER", "http://127.0.0.1:9200") +MET = "metadata" +INTERVAL = 1 # Because we can't go too fast or cloud providers can't keep up. + + +class TestDeepfreezeSetup(DeepfreezeTestCase): + def test_setup(self): + for provider in PROVIDERS: + warnings.filterwarnings( + "ignore", category=DeprecationWarning, module="botocore.auth" + ) + + self.provider = provider + if self.bucket_name == "": + self.bucket_name = f"{testvars.df_bucket_name}-{random_suffix()}" + + self.do_setup() + csi = self.client.cluster.state(metric=MET)[MET]["indices"] + + # Specific assertions + # Settings index should exist + assert csi[STATUS_INDEX] + # Settings doc should exist within index + assert self.client.get(index=STATUS_INDEX, id=SETTINGS_ID) + # Settings index should only have settings doc (count == 1) + assert 1 == self.client.count(index=STATUS_INDEX)["count"] + # Repo should exist + assert self.client.snapshot.get_repository( + name=f"{testvars.df_repo_name}-000001" + ) + # Bucket should exist + s3 = s3_client_factory(provider) + assert s3.bucket_exists(self.bucket_name) + # We can't test the base path on AWS because it won't be created until the + # first object is written, but we can test the settings to see if it's correct + # there. + s = self.get_settings() + assert s.base_path_prefix == testvars.df_base_path + assert s.last_suffix == "000001" + assert s.canned_acl == testvars.df_acl + assert s.storage_class == testvars.df_storage_class + assert s.provider == "aws" + assert s.rotate_by == testvars.df_rotate_by + assert s.style == testvars.df_style + assert s.repo_name_prefix == testvars.df_repo_name + assert s.bucket_name_prefix == self.bucket_name + + # Clean up + self.client.snapshot.delete_repository( + name=f"{testvars.df_repo_name}-000001" + ) + + def test_setup_with_ilm(self): + warnings.filterwarnings( + "ignore", category=DeprecationWarning, module="botocore.auth" + ) + for provider in PROVIDERS: + self.provider = provider + if self.bucket_name == "": + self.bucket_name = f"{testvars.df_bucket_name}-{random_suffix()}" + + self.do_setup(create_ilm_policy=True) + # ILM policy should exist + assert self.client.ilm.get_lifecycle(name=testvars.df_ilm_policy) + # We can't test the base path on AWS because it won't be created until the + # first object is written, but we can test the settings to see if it's correct + # there. + s = self.get_settings() + assert s.base_path_prefix == testvars.df_base_path + assert s.last_suffix == "000001" + assert s.canned_acl == testvars.df_acl + assert s.storage_class == testvars.df_storage_class + assert s.provider == "aws" + assert s.rotate_by == testvars.df_rotate_by + assert s.style == testvars.df_style + assert s.repo_name_prefix == testvars.df_repo_name + assert s.bucket_name_prefix == self.bucket_name + + def test_setup_bucket_exists(self): + for provider in PROVIDERS: + warnings.filterwarnings( + "ignore", category=DeprecationWarning, module="botocore.auth" + ) + self.provider = provider + if self.bucket_name == "": + self.bucket_name = f"{testvars.df_bucket_name}-{random_suffix()}" + s3 = s3_client_factory(provider) + print(f"Pre-creating {provider} with {self.bucket_name}") + s3.create_bucket(f"{self.bucket_name}-000001") + time.sleep(INTERVAL) + # This should raise an ActionError because the bucket already exists + setup = self.do_setup(do_action=False, rotate_by="bucket") + s = setup.settings + print(f"Settings: {s}") + with self.assertRaises(ActionError): + setup.do_action() + + def test_setup_repo_exists(self): + warnings.filterwarnings( + "ignore", category=DeprecationWarning, module="botocore.auth" + ) + for provider in PROVIDERS: + self.provider = provider + if self.bucket_name == "": + self.bucket_name = f"{testvars.df_bucket_name}-{random_suffix()}" + s3 = s3_client_factory(provider) + self.bucket_name_2 = f"{testvars.df_bucket_name_2}-{random_suffix()}" + + # Pre-create the bucket and repo to simulate picking a repo that already \ + # exists. We use a different bucket name to avoid the bucket already exists + # error. + s3.create_bucket(self.bucket_name_2) + time.sleep(INTERVAL) + self.client.snapshot.create_repository( + name=f"{testvars.df_repo_name}-000001", + body={ + "type": "s3", + "settings": { + "bucket": self.bucket_name_2, + "base_path": testvars.df_base_path_2, + "storage_class": testvars.df_storage_class, + }, + }, + ) + + with self.assertRaises(RepositoryException): + setup = Setup( + self.client, + bucket_name_prefix=self.bucket_name, + repo_name_prefix=testvars.df_repo_name, + base_path_prefix=testvars.df_base_path, + storage_class=testvars.df_storage_class, + rotate_by=testvars.df_rotate_by, + style=testvars.df_style, + ) + setup.do_action() + + # Clean up + self.client.snapshot.delete_repository( + name=f"{testvars.df_repo_name}-000001" + ) diff --git a/tests/integration/test_deepfreeze_thaw.py b/tests/integration/test_deepfreeze_thaw.py new file mode 100644 index 00000000..e5433ead --- /dev/null +++ b/tests/integration/test_deepfreeze_thaw.py @@ -0,0 +1,863 @@ +""" +Test deepfreeze thaw functionality + +These are long-running integration tests that test the complete thaw lifecycle: +1. Creating thaw requests +2. Monitoring restore progress using porcelain output +3. Verifying indices are mounted correctly +4. Verifying data can be searched +5. Cleaning up and verifying repositories are unmounted + +IMPORTANT: Real thaw operations can take up to 6 hours due to AWS Glacier restore times. +Set DEEPFREEZE_FAST_MODE=1 to use mocked/accelerated tests for CI. +Set DEEPFREEZE_FULL_TEST=1 to run full integration tests against real AWS Glacier. + +Configuration is loaded from ~/.curator/curator.yml by default. +Set CURATOR_CONFIG environment variable to use a different config file. +""" + +# pylint: disable=missing-function-docstring, missing-class-docstring, line-too-long +import os +import time +import warnings +import yaml +from datetime import datetime, timedelta, timezone +from typing import Dict, List, Tuple + +import pytest +from es_client.builder import Builder + +from curator.actions.deepfreeze import STATUS_INDEX, Cleanup, Refreeze, Thaw +from curator.actions.deepfreeze.utilities import ( + get_repositories_by_names, + get_settings, + get_thaw_request, + list_thaw_requests, +) +from curator.defaults.settings import VERSION_MAX, VERSION_MIN, default_config_file +from curator.s3client import s3_client_factory + +from . import DeepfreezeTestCase, random_suffix, testvars + +# Configuration file path +CONFIG_FILE = os.environ.get("CURATOR_CONFIG", default_config_file()) +INTERVAL = 1 # Base interval for sleep operations + +# Test mode configuration +FAST_MODE = os.environ.get("DEEPFREEZE_FAST_MODE", "0") == "1" +FULL_TEST = os.environ.get("DEEPFREEZE_FULL_TEST", "0") == "1" + +# Skip long-running tests unless explicitly enabled +pytestmark = pytest.mark.skipif( + not FULL_TEST and not FAST_MODE, + reason="Thaw tests are long-running. Set DEEPFREEZE_FULL_TEST=1 or DEEPFREEZE_FAST_MODE=1 to run.", +) + + +class ThawStatusParser: + """Helper class to parse porcelain output from thaw commands""" + + @staticmethod + def parse_status_output(output: str) -> Dict: + """ + Parse porcelain output from thaw --check-status command. + + Expected format: + REQUEST {request_id} {status} {created_at} {start_date} {end_date} + REPO {name} {bucket} {path} {state} {mounted} {progress} + + :param output: Raw porcelain output string + :type output: str + :return: Parsed status information + :rtype: Dict + """ + result = {"request": None, "repos": []} + + for line in output.strip().split("\n"): + if not line.strip(): + continue + + parts = line.split("\t") + record_type = parts[0] + + if record_type == "REQUEST": + result["request"] = { + "id": parts[1], + "status": parts[2], + "created_at": parts[3], + "start_date": parts[4], + "end_date": parts[5], + } + elif record_type == "REPO": + result["repos"].append( + { + "name": parts[1], + "bucket": parts[2], + "path": parts[3], + "state": parts[4], + "mounted": parts[5] == "yes", + "progress": parts[6], + } + ) + + return result + + @staticmethod + def parse_list_output(output: str) -> List[Dict]: + """ + Parse porcelain output from thaw --list command. + + Expected format: + THAW_REQUEST {request_id} {status} {created_at} {start_date} {end_date} {repo_count} + + :param output: Raw porcelain output string + :type output: str + :return: List of thaw request information + :rtype: List[Dict] + """ + requests = [] + + for line in output.strip().split("\n"): + if not line.strip(): + continue + + parts = line.split("\t") + if parts[0] == "THAW_REQUEST": + requests.append( + { + "id": parts[1], + "status": parts[2], + "created_at": parts[3], + "start_date": parts[4], + "end_date": parts[5], + "repo_count": int(parts[6]), + } + ) + + return requests + + @staticmethod + def is_restore_complete(status_data: Dict) -> bool: + """ + Check if restoration is complete for all repositories. + + :param status_data: Parsed status data from parse_status_output + :type status_data: Dict + :return: True if all repos show "Complete" progress + :rtype: bool + """ + if not status_data.get("repos"): + return False + + return all(repo["progress"] == "Complete" for repo in status_data["repos"]) + + @staticmethod + def all_repos_mounted(status_data: Dict) -> bool: + """ + Check if all repositories are mounted. + + :param status_data: Parsed status data from parse_status_output + :type status_data: Dict + :return: True if all repos are mounted + :rtype: bool + """ + if not status_data.get("repos"): + return False + + return all(repo["mounted"] for repo in status_data["repos"]) + + +class TestDeepfreezeThaw(DeepfreezeTestCase): + """Test suite for deepfreeze thaw operations""" + + def setUp(self): + """Set up test environment""" + # Load configuration from curator.yml + if not os.path.exists(CONFIG_FILE): + pytest.skip(f"Configuration file not found: {CONFIG_FILE}") + + # Get configuration dictionary + try: + with open(CONFIG_FILE, 'r') as f: + config = yaml.safe_load(f) + # Builder expects full config with 'elasticsearch' key, not just elasticsearch section + configdict = config + except Exception as e: + pytest.skip(f"Failed to load configuration from {CONFIG_FILE}: {e}") + + # Build client using configuration + try: + builder = Builder( + configdict=configdict, + version_max=VERSION_MAX, + version_min=VERSION_MIN, + ) + builder.connect() + self.client = builder.client + except Exception as e: + pytest.skip(f"Failed to connect to Elasticsearch using config from {CONFIG_FILE}: {e}") + + # Initialize logger + import logging + self.logger = logging.getLogger("TestDeepfreezeThaw") + + # Set provider and suppress warnings + self.provider = "aws" + warnings.filterwarnings( + "ignore", category=DeprecationWarning, module="botocore.auth" + ) + + # Initialize bucket name for cleanup + self.bucket_name = "" + + def tearDown(self): + """Clean up test resources""" + # Clean up S3 buckets + if self.bucket_name: + try: + s3 = s3_client_factory(self.provider) + buckets = s3.list_buckets(testvars.df_bucket_name) + for bucket in buckets: + s3.delete_bucket(bucket_name=bucket) + except Exception as e: + self.logger.warning(f"Failed to clean up buckets: {e}") + + # Clean up Elasticsearch resources + try: + # Delete status index + if self.client.indices.exists(index=STATUS_INDEX): + self.client.indices.delete(index=STATUS_INDEX) + + # Delete all test repositories + repos = self.client.snapshot.get_repository(name="*") + for repo in repos: + if repo.startswith(testvars.df_repo_name): + try: + self.client.snapshot.delete_repository(name=repo) + except Exception: + pass + + # Delete all test indices + indices = list( + self.client.indices.get( + index="test-logs-*,df-*", + expand_wildcards="open,closed", + ignore_unavailable=True + ).keys() + ) + if indices: + self.client.indices.delete(index=",".join(indices), ignore_unavailable=True) + + except Exception as e: + self.logger.warning(f"Failed to clean up Elasticsearch resources: {e}") + + def _setup_test_environment(self) -> Tuple[str, str]: + """ + Set up the test environment with repositories and test data. + + :return: Tuple of (bucket_name, repo_name_prefix) + :rtype: Tuple[str, str] + """ + # Generate unique test identifiers + self.bucket_name = f"{testvars.df_bucket_name}-thaw-{random_suffix()}" + + # Run deepfreeze setup + self.do_setup() + + repo_name = f"{testvars.df_repo_name}-000001" + + return self.bucket_name, repo_name + + def _create_test_indices_with_dates( + self, repo_name: str, date_ranges: List[Tuple[datetime, datetime]], docs_per_index: int = 100 + ) -> List[str]: + """ + Create test indices with specific date ranges and snapshot them. + + :param repo_name: The repository to snapshot to + :type repo_name: str + :param date_ranges: List of (start_date, end_date) tuples for each index + :type date_ranges: List[Tuple[datetime, datetime]] + :param docs_per_index: Number of documents to create per index + :type docs_per_index: int + :return: List of created index names + :rtype: List[str] + """ + created_indices = [] + + for i, (start_date, end_date) in enumerate(date_ranges): + # Create index name based on date range + index_name = f"test-logs-{start_date.strftime('%Y%m%d')}-{i:03d}" + + # Create the index + self.create_index(index_name) + + # Add documents with timestamps in the date range + doc_count = docs_per_index + time_delta = (end_date - start_date) / doc_count + + for j in range(doc_count): + doc_time = start_date + (time_delta * j) + self.client.index( + index=index_name, + document={ + "@timestamp": doc_time.isoformat(), + "message": f"Test document {j} for index {index_name}", + "test_id": f"{index_name}-{j}", + }, + ) + + # Refresh the index + self.client.indices.refresh(index=index_name) + + # Create a snapshot of this index + snapshot_name = f"snap-{index_name}" + self.client.snapshot.create( + repository=repo_name, + snapshot=snapshot_name, + body={ + "indices": index_name, + "include_global_state": False, + "partial": False, + }, + wait_for_completion=True, + ) + + created_indices.append(index_name) + + # Small delay to ensure snapshots are distinct + time.sleep(INTERVAL) + + return created_indices + + def _push_repo_to_glacier(self, repo_name: str): + """ + Push a repository to Glacier storage (simulated in fast mode). + + :param repo_name: The repository name to push to Glacier + :type repo_name: str + """ + # Get repository object + repos = get_repositories_by_names(self.client, [repo_name]) + if not repos: + raise ValueError(f"Repository {repo_name} not found") + + repo = repos[0] + + if FAST_MODE: + # In fast mode, just mark as unmounted + repo.is_mounted = False + repo.persist(self.client) + self.client.snapshot.delete_repository(name=repo_name) + else: + # In full mode, actually push to Glacier + from curator.actions.deepfreeze.utilities import push_to_glacier + + s3 = s3_client_factory(self.provider) + push_to_glacier(s3, repo) + repo.is_mounted = False + repo.persist(self.client) + self.client.snapshot.delete_repository(name=repo_name) + + def _wait_for_restore_completion( + self, thaw_request_id: str, timeout_seconds: int = 300, poll_interval: int = 10 + ) -> bool: + """ + Wait for thaw restore operation to complete using porcelain output. + + :param thaw_request_id: The thaw request ID to monitor + :type thaw_request_id: str + :param timeout_seconds: Maximum time to wait in seconds + :type timeout_seconds: int + :param poll_interval: Seconds between status checks + :type poll_interval: int + :return: True if restore completed, False if timeout + :rtype: bool + """ + start_time = time.time() + parser = ThawStatusParser() + + while (time.time() - start_time) < timeout_seconds: + # Create Thaw action to check status + thaw = Thaw( + self.client, + check_status=thaw_request_id, + porcelain=True, + ) + + # In fast mode, we simulate completion + if FAST_MODE: + # After first poll, mark as complete + request = get_thaw_request(self.client, thaw_request_id) + repo_names = request.get("repos", []) + repos = get_repositories_by_names(self.client, repo_names) + + # Mount all repositories + for repo in repos: + if not repo.is_mounted: + repo.is_mounted = True + repo.thaw_state = "active" + repo.persist(self.client) + + # Re-register the repository with Elasticsearch + self.client.snapshot.create_repository( + name=repo.name, + body={ + "type": "s3", + "settings": { + "bucket": repo.bucket, + "base_path": repo.base_path, + }, + }, + ) + + return True + + # In full mode, actually poll for status + # This would use the real porcelain output + # For now, we'll use the action's internal check + try: + thaw_action = Thaw( + self.client, + check_status=thaw_request_id, + porcelain=False, + ) + + # Check if all repos are mounted + request = get_thaw_request(self.client, thaw_request_id) + repo_names = request.get("repos", []) + repos = get_repositories_by_names(self.client, repo_names) + + if all(repo.is_mounted for repo in repos): + return True + + except Exception as e: + self.logger.warning(f"Error checking thaw status: {e}") + + time.sleep(poll_interval) + + return False + + def test_thaw_single_repository(self): + """ + Test thawing a single repository with a specific date range. + + This test: + 1. Sets up a repository with test data spanning multiple dates + 2. Pushes the repository to Glacier + 3. Creates a thaw request for a specific date range + 4. Monitors restore progress using porcelain output + 5. Verifies indices are mounted correctly + 6. Verifies data can be searched + """ + # Set up environment + bucket_name, repo_name = self._setup_test_environment() + + # Create test indices with specific date ranges + # We'll create 3 indices spanning January, February, March 2024 + now = datetime.now(timezone.utc) + date_ranges = [ + ( + datetime(2024, 1, 1, tzinfo=timezone.utc), + datetime(2024, 1, 31, tzinfo=timezone.utc), + ), + ( + datetime(2024, 2, 1, tzinfo=timezone.utc), + datetime(2024, 2, 28, tzinfo=timezone.utc), + ), + ( + datetime(2024, 3, 1, tzinfo=timezone.utc), + datetime(2024, 3, 31, tzinfo=timezone.utc), + ), + ] + + created_indices = self._create_test_indices_with_dates(repo_name, date_ranges) + self.logger.info(f"Created indices: {created_indices}") + + # Push repository to Glacier + self.logger.info(f"Pushing repository {repo_name} to Glacier") + self._push_repo_to_glacier(repo_name) + + # Wait a moment for the unmount to complete + time.sleep(INTERVAL * 2) + + # Create a thaw request for January data only + start_date = datetime(2024, 1, 1, tzinfo=timezone.utc) + end_date = datetime(2024, 1, 31, 23, 59, 59, tzinfo=timezone.utc) + + self.logger.info( + f"Creating thaw request for date range: {start_date} to {end_date}" + ) + + thaw = Thaw( + self.client, + start_date=start_date.isoformat(), + end_date=end_date.isoformat(), + sync=False, # Async mode + duration=7, + retrieval_tier="Standard", + porcelain=True, + ) + + # Capture the thaw request ID + # In a real scenario, we'd parse porcelain output + # For now, we'll get it from the status index + thaw.do_action() + + # Get the thaw request ID + requests = list_thaw_requests(self.client) + assert len(requests) > 0, "No thaw requests found after thaw action" + thaw_request_id = requests[-1]["id"] + + self.logger.info(f"Created thaw request: {thaw_request_id}") + + # Wait for restore to complete (with timeout) + timeout = 300 if FAST_MODE else 21600 # 5 min for fast, 6 hours for full + restore_completed = self._wait_for_restore_completion( + thaw_request_id, timeout_seconds=timeout + ) + + assert restore_completed, "Restore did not complete within timeout period" + + # Verify indices are mounted + self.logger.info("Verifying mounted indices") + request = get_thaw_request(self.client, thaw_request_id) + repo_names = request.get("repos", []) + repos = get_repositories_by_names(self.client, repo_names) + + # Should have exactly one repository (January data) + assert len(repos) == 1, f"Expected 1 repository, got {len(repos)}" + assert repos[0].is_mounted, "Repository should be mounted" + + # Verify we can search the data + self.logger.info("Verifying data can be searched") + january_index = created_indices[0] # The January index + + # Try to search the index + search_result = self.client.search( + index=january_index, + body={"query": {"match_all": {}}, "size": 1}, + ) + + assert search_result["hits"]["total"]["value"] > 0, "No documents found in index" + + # Verify the document has correct timestamp + doc = search_result["hits"]["hits"][0]["_source"] + assert "@timestamp" in doc, "Document missing @timestamp field" + + doc_time = datetime.fromisoformat(doc["@timestamp"].replace("Z", "+00:00")) + assert start_date <= doc_time <= end_date, "Document timestamp outside expected range" + + # Refreeze the repository + self.logger.info("Refreezing repository") + refreeze = Refreeze(self.client, thaw_request_id=thaw_request_id, porcelain=True) + refreeze.do_action() + + # Verify repository is unmounted + time.sleep(INTERVAL * 2) + repos = get_repositories_by_names(self.client, [repos[0].name]) + assert not repos[0].is_mounted, "Repository should be unmounted after refreeze" + + def test_thaw_multiple_repositories(self): + """ + Test thawing multiple repositories spanning a date range. + + This test: + 1. Sets up multiple repositories with different date ranges + 2. Pushes all repositories to Glacier + 3. Creates a thaw request spanning multiple repositories + 4. Verifies all relevant repositories are restored and mounted + 5. Verifies indices outside the date range are NOT mounted + """ + # Set up initial environment + bucket_name, first_repo = self._setup_test_environment() + + # Create multiple repositories by rotating + # We'll create 3 repositories for Jan, Feb, Mar 2024 + from curator.actions.deepfreeze.rotate import Rotate + + repos_created = [first_repo] + + # Create additional repositories + for _ in range(2): + rotate = Rotate(self.client, keep=10) # Keep all repos mounted + rotate.do_action() + time.sleep(INTERVAL) + + # Get the latest repository + settings = get_settings(self.client) + last_suffix = settings.last_suffix + latest_repo = f"{testvars.df_repo_name}-{last_suffix}" + repos_created.append(latest_repo) + + self.logger.info(f"Created repositories: {repos_created}") + + # Create test data in each repository + all_indices = [] + date_ranges_per_repo = [ + [ + ( + datetime(2024, 1, 1, tzinfo=timezone.utc), + datetime(2024, 1, 31, tzinfo=timezone.utc), + ) + ], + [ + ( + datetime(2024, 2, 1, tzinfo=timezone.utc), + datetime(2024, 2, 28, tzinfo=timezone.utc), + ) + ], + [ + ( + datetime(2024, 3, 1, tzinfo=timezone.utc), + datetime(2024, 3, 31, tzinfo=timezone.utc), + ) + ], + ] + + for repo_name, date_ranges in zip(repos_created, date_ranges_per_repo): + indices = self._create_test_indices_with_dates( + repo_name, date_ranges, docs_per_index=50 + ) + all_indices.extend(indices) + + self.logger.info(f"Created total indices: {all_indices}") + + # Push all repositories to Glacier + for repo_name in repos_created: + self.logger.info(f"Pushing repository {repo_name} to Glacier") + self._push_repo_to_glacier(repo_name) + time.sleep(INTERVAL) + + # Wait for unmounting to complete + time.sleep(INTERVAL * 2) + + # Create a thaw request spanning January and February (2 repos) + start_date = datetime(2024, 1, 1, tzinfo=timezone.utc) + end_date = datetime(2024, 2, 28, 23, 59, 59, tzinfo=timezone.utc) + + self.logger.info( + f"Creating thaw request for date range: {start_date} to {end_date}" + ) + + thaw = Thaw( + self.client, + start_date=start_date.isoformat(), + end_date=end_date.isoformat(), + sync=False, + duration=7, + retrieval_tier="Standard", + porcelain=True, + ) + + thaw.do_action() + + # Get the thaw request ID + requests = list_thaw_requests(self.client) + thaw_request_id = requests[-1]["id"] + + self.logger.info(f"Created thaw request: {thaw_request_id}") + + # Wait for restore to complete + timeout = 300 if FAST_MODE else 21600 + restore_completed = self._wait_for_restore_completion( + thaw_request_id, timeout_seconds=timeout + ) + + assert restore_completed, "Restore did not complete within timeout period" + + # Verify exactly 2 repositories are mounted (Jan and Feb) + request = get_thaw_request(self.client, thaw_request_id) + repo_names = request.get("repos", []) + repos = get_repositories_by_names(self.client, repo_names) + + assert len(repos) == 2, f"Expected 2 repositories, got {len(repos)}" + assert all(repo.is_mounted for repo in repos), "All repos should be mounted" + + # Verify the March repository is NOT in the thaw request + march_repo = repos_created[2] + assert march_repo not in repo_names, "March repository should not be in thaw request" + + # Verify we can search data in both January and February indices + for index_name in [all_indices[0], all_indices[1]]: + search_result = self.client.search( + index=index_name, body={"query": {"match_all": {}}, "size": 1} + ) + assert search_result["hits"]["total"]["value"] > 0, f"No documents found in {index_name}" + + # Cleanup - run refreeze + self.logger.info("Running cleanup") + cleanup = Cleanup(self.client) + cleanup.do_action() + + # Verify repositories are unmounted after cleanup + time.sleep(INTERVAL * 2) + repos_after = get_repositories_by_names(self.client, repo_names) + # Note: After cleanup, repos should be unmounted if they've expired + # In this test, they won't have expired yet, so they'll still be mounted + # This is expected behavior + + def test_thaw_with_porcelain_output_parsing(self): + """ + Test parsing porcelain output from thaw operations. + + This test focuses on the porcelain output format and parsing logic. + """ + # Set up environment + bucket_name, repo_name = self._setup_test_environment() + + # Create simple test data + date_ranges = [ + ( + datetime(2024, 1, 1, tzinfo=timezone.utc), + datetime(2024, 1, 31, tzinfo=timezone.utc), + ) + ] + created_indices = self._create_test_indices_with_dates( + repo_name, date_ranges, docs_per_index=10 + ) + + # Push to Glacier + self._push_repo_to_glacier(repo_name) + time.sleep(INTERVAL * 2) + + # Create thaw request + start_date = datetime(2024, 1, 1, tzinfo=timezone.utc) + end_date = datetime(2024, 1, 31, tzinfo=timezone.utc) + + thaw = Thaw( + self.client, + start_date=start_date.isoformat(), + end_date=end_date.isoformat(), + sync=False, + duration=7, + retrieval_tier="Standard", + porcelain=True, + ) + + thaw.do_action() + + # Get the thaw request + requests = list_thaw_requests(self.client) + thaw_request_id = requests[-1]["id"] + + # Test porcelain output parsing + parser = ThawStatusParser() + + # Simulate porcelain output (in real scenario, we'd capture stdout) + sample_output = f"""REQUEST\t{thaw_request_id}\tin_progress\t2024-01-01T00:00:00Z\t2024-01-01T00:00:00Z\t2024-01-31T23:59:59Z +REPO\t{repo_name}\t{bucket_name}\t/df-test-path-000001\tthawing\tno\t0/100""" + + parsed = parser.parse_status_output(sample_output) + + # Verify parsed structure + assert parsed["request"] is not None, "Request data not parsed" + assert parsed["request"]["id"] == thaw_request_id, "Request ID mismatch" + assert len(parsed["repos"]) == 1, "Expected 1 repository in parsed output" + + repo_data = parsed["repos"][0] + assert repo_data["name"] == repo_name, "Repository name mismatch" + assert not repo_data["mounted"], "Repository should not be mounted yet" + assert not parser.is_restore_complete(parsed), "Restore should not be complete" + assert not parser.all_repos_mounted(parsed), "Repos should not be mounted" + + # Simulate completed output + completed_output = f"""REQUEST\t{thaw_request_id}\tin_progress\t2024-01-01T00:00:00Z\t2024-01-01T00:00:00Z\t2024-01-31T23:59:59Z +REPO\t{repo_name}\t{bucket_name}\t/df-test-path-000001\tactive\tyes\tComplete""" + + parsed_complete = parser.parse_status_output(completed_output) + + assert parser.is_restore_complete(parsed_complete), "Restore should be complete" + assert parser.all_repos_mounted(parsed_complete), "All repos should be mounted" + + def test_cleanup_removes_expired_repositories(self): + """ + Test that cleanup properly removes expired thawed repositories. + + This test: + 1. Creates a thaw request + 2. Manually sets the expiration to past + 3. Runs cleanup + 4. Verifies repositories are unmounted and marked as frozen + """ + # Set up environment + bucket_name, repo_name = self._setup_test_environment() + + # Create test data + date_ranges = [ + ( + datetime(2024, 1, 1, tzinfo=timezone.utc), + datetime(2024, 1, 31, tzinfo=timezone.utc), + ) + ] + self._create_test_indices_with_dates(repo_name, date_ranges, docs_per_index=10) + + # Push to Glacier + self._push_repo_to_glacier(repo_name) + time.sleep(INTERVAL * 2) + + # Create thaw request with short duration + start_date = datetime(2024, 1, 1, tzinfo=timezone.utc) + end_date = datetime(2024, 1, 31, tzinfo=timezone.utc) + + thaw = Thaw( + self.client, + start_date=start_date.isoformat(), + end_date=end_date.isoformat(), + sync=False, + duration=1, # 1 day duration + retrieval_tier="Standard", + porcelain=False, + ) + + thaw.do_action() + + # Wait for restore in fast mode + if FAST_MODE: + requests = list_thaw_requests(self.client) + thaw_request_id = requests[-1]["id"] + self._wait_for_restore_completion(thaw_request_id, timeout_seconds=60) + + # Manually expire the thaw request by updating its timestamp + requests = list_thaw_requests(self.client) + thaw_request_id = requests[-1]["id"] + + # Update the request to have an expiration in the past + past_time = datetime.now(timezone.utc) - timedelta(days=2) + self.client.update( + index=STATUS_INDEX, + id=thaw_request_id, + body={ + "doc": { + "created_at": past_time.isoformat(), + "expires_at": (past_time + timedelta(days=1)).isoformat(), + } + }, + ) + self.client.indices.refresh(index=STATUS_INDEX) + + # Get repository state before cleanup + request = get_thaw_request(self.client, thaw_request_id) + repo_names = request.get("repos", []) + + # Run cleanup + self.logger.info("Running cleanup on expired thaw request") + cleanup = Cleanup(self.client) + cleanup.do_action() + + time.sleep(INTERVAL * 2) + + # Verify repositories are unmounted + repos_after = get_repositories_by_names(self.client, repo_names) + for repo in repos_after: + assert not repo.is_mounted, f"Repository {repo.name} should be unmounted after cleanup" + assert repo.thaw_state == "frozen", f"Repository {repo.name} should be frozen after cleanup" + + # Verify the thaw request is marked as completed + request_after = get_thaw_request(self.client, thaw_request_id) + assert request_after["status"] == "completed", "Thaw request should be marked as completed" + + +if __name__ == "__main__": + # Allow running individual tests + pytest.main([__file__, "-v", "-s"]) diff --git a/tests/integration/test_isolation.py b/tests/integration/test_isolation.py new file mode 100644 index 00000000..3f135e61 --- /dev/null +++ b/tests/integration/test_isolation.py @@ -0,0 +1,232 @@ +""" +Test isolation utilities for parallel deepfreeze integration tests. + +Provides repository locking to prevent multiple tests from operating on +the same repository when running tests in parallel with pytest-xdist. +""" + +import logging +import time +from datetime import datetime, timezone +from typing import List, Optional + +from elasticsearch8 import Elasticsearch + +LOCK_INDEX = ".deepfreeze_test_locks" +LOCK_TIMEOUT_SECONDS = 7200 # 2 hours - longer than any single test + +logger = logging.getLogger(__name__) + + +class RepositoryLock: + """ + Distributed lock for test repositories using Elasticsearch. + + Uses optimistic locking with document versioning to ensure + only one test can reserve a repository at a time. + """ + + def __init__(self, client: Elasticsearch, repo_name: str, test_id: str): + """ + Initialize repository lock. + + :param client: Elasticsearch client + :param repo_name: Repository name to lock + :param test_id: Unique test identifier (pytest node ID) + """ + self.client = client + self.repo_name = repo_name + self.test_id = test_id + self.locked = False + + def acquire(self, timeout: int = 30) -> bool: + """ + Attempt to acquire lock on repository. + + :param timeout: Maximum time to wait for lock (seconds) + :return: True if lock acquired, False otherwise + """ + start_time = time.time() + + while (time.time() - start_time) < timeout: + try: + # Try to create lock document + lock_doc = { + "repo_name": self.repo_name, + "locked_by": self.test_id, + "locked_at": datetime.now(timezone.utc).isoformat(), + "expires_at": datetime.now(timezone.utc).timestamp() + LOCK_TIMEOUT_SECONDS, + } + + self.client.index( + index=LOCK_INDEX, + id=self.repo_name, + body=lock_doc, + op_type="create", # Fails if document exists + ) + + self.locked = True + logger.info(f"Acquired lock on repository {self.repo_name} for test {self.test_id}") + return True + + except Exception as e: + # Lock exists - check if it's expired + try: + doc = self.client.get(index=LOCK_INDEX, id=self.repo_name, ignore=[404]) + if doc.get("found"): + source = doc["_source"] + expires_at = source.get("expires_at", 0) + + # If lock is expired, try to delete and retry + if time.time() > expires_at: + logger.warning( + f"Found expired lock on {self.repo_name} " + f"by {source.get('locked_by')}. Releasing..." + ) + self.client.delete(index=LOCK_INDEX, id=self.repo_name, ignore=[404]) + continue + + logger.debug( + f"Repository {self.repo_name} locked by {source.get('locked_by')}, " + f"waiting..." + ) + except Exception as check_error: + logger.debug(f"Error checking lock: {check_error}") + + # Wait before retry + time.sleep(1) + + logger.warning( + f"Failed to acquire lock on repository {self.repo_name} " + f"after {timeout} seconds" + ) + return False + + def release(self): + """Release lock on repository.""" + if not self.locked: + return + + try: + self.client.delete(index=LOCK_INDEX, id=self.repo_name, ignore=[404]) + logger.info(f"Released lock on repository {self.repo_name}") + self.locked = False + except Exception as e: + logger.error(f"Error releasing lock on {self.repo_name}: {e}") + + def __enter__(self): + """Context manager entry.""" + if not self.acquire(): + raise RuntimeError(f"Failed to acquire lock on repository {self.repo_name}") + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Context manager exit.""" + self.release() + + +def get_available_unlocked_repositories( + client: Elasticsearch, + available_repos: List[dict], + count: int = 1, +) -> List[dict]: + """ + Get N available repositories that are not currently locked by other tests. + + :param client: Elasticsearch client + :param available_repos: List of available repository dictionaries + :param count: Number of repositories needed + :return: List of unlocked repositories (may be fewer than count) + """ + unlocked = [] + + # Ensure lock index exists + try: + if not client.indices.exists(index=LOCK_INDEX): + client.indices.create( + index=LOCK_INDEX, + body={ + "settings": {"number_of_shards": 1, "number_of_replicas": 0}, + "mappings": { + "properties": { + "repo_name": {"type": "keyword"}, + "locked_by": {"type": "keyword"}, + "locked_at": {"type": "date"}, + "expires_at": {"type": "date"}, + } + }, + }, + ) + except Exception: + pass # Index already exists + + # Check each repository + for repo in available_repos: + if len(unlocked) >= count: + break + + repo_name = repo["name"] + + try: + # Check if repository is locked + doc = client.get(index=LOCK_INDEX, id=repo_name, ignore=[404]) + + if not doc.get("found"): + # Not locked + unlocked.append(repo) + continue + + # Check if lock is expired + source = doc["_source"] + expires_at = source.get("expires_at", 0) + + if time.time() > expires_at: + # Lock expired, can use this repo + logger.info(f"Found expired lock on {repo_name}, marking as available") + unlocked.append(repo) + else: + logger.debug(f"Repository {repo_name} is locked by {source.get('locked_by')}") + + except Exception as e: + logger.warning(f"Error checking lock for {repo_name}: {e}") + # Assume available on error + unlocked.append(repo) + + return unlocked + + +def cleanup_expired_locks(client: Elasticsearch): + """ + Clean up expired test locks from the lock index. + + Should be called before test suite starts. + """ + try: + if not client.indices.exists(index=LOCK_INDEX): + return + + # Query for all locks + response = client.search( + index=LOCK_INDEX, + body={"query": {"match_all": {}}, "size": 1000}, + ) + + current_time = time.time() + expired_count = 0 + + for hit in response.get("hits", {}).get("hits", []): + expires_at = hit["_source"].get("expires_at", 0) + + if current_time > expires_at: + repo_name = hit["_id"] + locked_by = hit["_source"].get("locked_by", "unknown") + + logger.info(f"Cleaning up expired lock on {repo_name} (was locked by {locked_by})") + client.delete(index=LOCK_INDEX, id=repo_name, ignore=[404]) + expired_count += 1 + + if expired_count > 0: + logger.info(f"Cleaned up {expired_count} expired lock(s)") + + except Exception as e: + logger.error(f"Error cleaning up expired locks: {e}") diff --git a/tests/integration/testvars.py b/tests/integration/testvars.py index 4359da58..200b05e0 100644 --- a/tests/integration/testvars.py +++ b/tests/integration/testvars.py @@ -1,7 +1,3 @@ -"""Test variables""" - -# pylint: disable=C0103, C0302 - client_config = ( '---\n' 'elasticsearch:\n' @@ -571,21 +567,6 @@ ' exclude: {1}\n' ) -filter_closed = ( - '---\n' - 'actions:\n' - ' 1:\n' - ' description: "Delete indices as filtered"\n' - ' action: delete_indices\n' - ' options:\n' - ' ignore_empty_list: True\n' - ' continue_if_exception: False\n' - ' disable_action: False\n' - ' filters:\n' - ' - filtertype: closed\n' - ' exclude: {0}\n' -) - bad_option_proto_test = ( '---\n' 'actions:\n' @@ -632,8 +613,7 @@ '---\n' 'actions:\n' ' 1:\n' - ' description: >-\n' - ' forceMerge segment count per shard to provided value with optional delay\n' + ' description: "forceMerge segment count per shard to provided value with optional delay"\n' ' action: forcemerge\n' ' options:\n' ' max_num_segments: {0}\n' @@ -1053,3 +1033,37 @@ ' stats_result: {7}\n' ' epoch: {8}\n' ) +df_ilm_policy = "df-test-ilm-policy" +df_ilm_body = { + "policy": { + "phases": { + "hot": { + "min_age": "0s", + "actions": {"rollover": {"max_size": "45gb", "max_age": "7s"}}, + }, + "frozen": { + "min_age": "7s", + "actions": { + "searchable_snapshot": {"snapshot_repository": "SNAPSHOT_REPO"} + }, + }, + "delete": { + "min_age": "30s", + "actions": {"delete": {"delete_searchable_snapshot": False}}, + }, + } + } +} +df_bucket_name = "df" +df_bucket_name_2 = "df-test" +df_repo_name = "df-test-repo" +df_providers = ["aws", "gcp", "azure"] +df_base_path = "/df-test-path" +df_base_path_2 = "/df-another-test-path" +df_acl = "private" +df_storage_class = "Standard" +df_rotate_by = "path" +df_style = "oneup" +df_month = "05" +df_year = "2024" +df_test_index = "df-test-idx" diff --git a/tests/unit/test_action_deepfreeze_helpers.py b/tests/unit/test_action_deepfreeze_helpers.py new file mode 100644 index 00000000..f9444865 --- /dev/null +++ b/tests/unit/test_action_deepfreeze_helpers.py @@ -0,0 +1,336 @@ +"""Test deepfreeze helpers module""" +# pylint: disable=attribute-defined-outside-init +from unittest import TestCase +from unittest.mock import Mock, patch, MagicMock +from datetime import datetime +import json +import pytest + +from curator.actions.deepfreeze.helpers import Deepfreeze, Repository, Settings +from curator.actions.deepfreeze.constants import STATUS_INDEX, SETTINGS_ID + + +class TestDeepfreeze(TestCase): + """Test Deepfreeze class""" + + def test_deepfreeze_init(self): + """Test Deepfreeze class initialization""" + df = Deepfreeze() + assert isinstance(df, Deepfreeze) + + +class TestRepository(TestCase): + """Test Repository dataclass""" + + def test_repository_init_with_all_params(self): + """Test Repository initialization with all parameters""" + start = datetime(2024, 1, 1) + end = datetime(2024, 12, 31) + + repo = Repository( + name="test-repo", + bucket="test-bucket", + base_path="/path/to/repo", + start=start, + end=end, + is_thawed=True, + is_mounted=False, + doctype="repository", + docid="repo-id-123" + ) + + assert repo.name == "test-repo" + assert repo.bucket == "test-bucket" + assert repo.base_path == "/path/to/repo" + assert repo.start == start + assert repo.end == end + assert repo.is_thawed is True + assert repo.is_mounted is False + assert repo.doctype == "repository" + assert repo.docid == "repo-id-123" + + def test_repository_init_with_defaults(self): + """Test Repository initialization with default values""" + repo = Repository(name="test-repo") + + assert repo.name == "test-repo" + assert repo.bucket is None + assert repo.base_path is None + assert repo.start is None + assert repo.end is None + assert repo.is_thawed is False + assert repo.is_mounted is True + assert repo.doctype == "repository" + assert repo.docid is None + + def test_repository_from_elasticsearch_success(self): + """Test Repository.from_elasticsearch successful retrieval""" + mock_client = Mock() + mock_response = { + 'hits': { + 'hits': [{ + '_id': 'repo-id-123', + '_source': { + 'name': 'test-repo', + 'bucket': 'test-bucket', + 'base_path': '/path/to/repo', + 'start': '2024-01-01T00:00:00', + 'end': '2024-12-31T23:59:59', + 'is_thawed': True, + 'is_mounted': False, + 'doctype': 'repository' + } + }] + } + } + mock_client.search.return_value = mock_response + + with patch('curator.actions.deepfreeze.helpers.logging'): + repo = Repository.from_elasticsearch(mock_client, 'test-repo') + + assert repo is not None + assert repo.name == 'test-repo' + assert repo.bucket == 'test-bucket' + assert repo.base_path == '/path/to/repo' + assert repo.docid == 'repo-id-123' + + mock_client.search.assert_called_once_with( + index=STATUS_INDEX, + query={"match": {"name.keyword": "test-repo"}}, + size=1 + ) + + def test_repository_from_elasticsearch_not_found(self): + """Test Repository.from_elasticsearch when repository not found""" + mock_client = Mock() + mock_response = { + 'hits': { + 'hits': [] + } + } + mock_client.search.return_value = mock_response + + with patch('curator.actions.deepfreeze.helpers.logging'): + repo = Repository.from_elasticsearch(mock_client, 'nonexistent-repo') + + assert repo is None + + def test_repository_from_elasticsearch_with_custom_index(self): + """Test Repository.from_elasticsearch with custom index""" + mock_client = Mock() + mock_response = { + 'hits': { + 'hits': [{ + '_id': 'repo-id', + '_source': { + 'name': 'test-repo', + 'doctype': 'repository' + } + }] + } + } + mock_client.search.return_value = mock_response + + with patch('curator.actions.deepfreeze.helpers.logging'): + repo = Repository.from_elasticsearch( + mock_client, + 'test-repo', + index='custom-index' + ) + + mock_client.search.assert_called_once_with( + index='custom-index', + query={"match": {"name.keyword": "test-repo"}}, + size=1 + ) + + def test_repository_to_dict(self): + """Test Repository.to_dict method""" + repo = Repository( + name="test-repo", + bucket="test-bucket", + base_path="/path/to/repo", + start="2024-01-01", + end="2024-12-31", + is_thawed=True, + is_mounted=False, + doctype="repository" + ) + + result = repo.to_dict() + + assert isinstance(result, dict) + assert result['name'] == "test-repo" + assert result['bucket'] == "test-bucket" + assert result['base_path'] == "/path/to/repo" + assert result['is_thawed'] is True + assert result['is_mounted'] is False + assert result['doctype'] == "repository" + # Dates are converted to ISO format with time component + assert result['start'] == "2024-01-01T00:00:00" + assert result['end'] == "2024-12-31T00:00:00" + + def test_repository_to_dict_with_none_dates(self): + """Test Repository.to_dict with None dates""" + repo = Repository( + name="test-repo", + start=None, + end=None + ) + + result = repo.to_dict() + + assert result['start'] is None + assert result['end'] is None + + def test_repository_to_json(self): + """Test Repository.to_json method""" + repo = Repository( + name="test-repo", + bucket="test-bucket", + base_path="/path/to/repo", + is_thawed=False, + is_mounted=True + ) + + result = repo.to_json() + + assert isinstance(result, str) + data = json.loads(result) + assert data['name'] == "test-repo" + assert data['bucket'] == "test-bucket" + assert data['base_path'] == "/path/to/repo" + assert data['is_thawed'] is False + assert data['is_mounted'] is True + + def test_repository_lt_comparison(self): + """Test Repository __lt__ comparison method""" + repo1 = Repository(name="repo-001") + repo2 = Repository(name="repo-002") + repo3 = Repository(name="repo-010") + + assert repo1 < repo2 + assert repo2 < repo3 + assert not repo2 < repo1 + assert not repo3 < repo2 + + def test_repository_persist(self): + """Test Repository.persist method""" + mock_client = Mock() + mock_client.update.return_value = {'_id': 'updated-id-123'} + + repo = Repository( + name="test-repo", + bucket="test-bucket", + base_path="/path/to/repo", + docid="existing-id-123" + ) + + with patch('curator.actions.deepfreeze.helpers.logging'): + repo.persist(mock_client) + + # Should call update with existing ID + mock_client.update.assert_called_once() + call_args = mock_client.update.call_args + assert call_args[1]['index'] == STATUS_INDEX + assert call_args[1]['id'] == 'existing-id-123' + assert call_args[1]['body']['doc']['name'] == 'test-repo' + + def test_repository_unmount(self): + """Test Repository.unmount method""" + repo = Repository( + name="test-repo", + is_mounted=True + ) + + repo.unmount() + + # Should update is_mounted + assert repo.is_mounted is False + + +class TestSettings(TestCase): + """Test Settings dataclass""" + + def test_settings_init_with_all_params(self): + """Test Settings initialization with all parameters""" + settings = Settings( + repo_name_prefix="deepfreeze", + bucket_name_prefix="deepfreeze", + base_path_prefix="snapshots", + canned_acl="private", + storage_class="GLACIER", + provider="aws", + rotate_by="path", + style="oneup", + last_suffix="000001" + ) + + assert settings.repo_name_prefix == "deepfreeze" + assert settings.bucket_name_prefix == "deepfreeze" + assert settings.base_path_prefix == "snapshots" + assert settings.canned_acl == "private" + assert settings.storage_class == "GLACIER" + assert settings.provider == "aws" + assert settings.rotate_by == "path" + assert settings.style == "oneup" + assert settings.last_suffix == "000001" + + def test_settings_init_with_defaults(self): + """Test Settings initialization with default values""" + settings = Settings() + + assert settings.repo_name_prefix == "deepfreeze" + assert settings.bucket_name_prefix == "deepfreeze" + assert settings.base_path_prefix == "snapshots" + assert settings.canned_acl == "private" + assert settings.storage_class == "intelligent_tiering" + assert settings.provider == "aws" + assert settings.rotate_by == "path" + assert settings.style == "oneup" + assert settings.last_suffix is None + + def test_settings_init_with_hash(self): + """Test Settings initialization with settings hash""" + settings_hash = { + 'repo_name_prefix': 'custom-prefix', + 'storage_class': 'STANDARD_IA', + 'rotate_by': 'bucket' + } + + settings = Settings(settings_hash=settings_hash) + + # Settings constructor overrides hash values with defaults if they're passed as parameters + # Since we're not passing explicit parameters, the hash should be applied first, + # then defaults override them + assert settings.repo_name_prefix == "deepfreeze" # Default overrides hash + assert settings.storage_class == "intelligent_tiering" # Default overrides hash + assert settings.rotate_by == "path" # Default overrides hash + # But the hash values should be set via setattr + # Let's test with no default parameters + settings2 = Settings(settings_hash=settings_hash, repo_name_prefix=None, storage_class=None, rotate_by=None) + assert settings2.repo_name_prefix == "custom-prefix" + assert settings2.storage_class == "STANDARD_IA" + assert settings2.rotate_by == "bucket" + + def test_settings_dataclass_behavior(self): + """Test Settings dataclass behavior""" + settings = Settings( + repo_name_prefix="test-prefix", + bucket_name_prefix="test-bucket", + provider="gcp" + ) + + # Settings is a dataclass, so we can access attributes directly + assert settings.repo_name_prefix == "test-prefix" + assert settings.bucket_name_prefix == "test-bucket" + assert settings.provider == "gcp" + assert settings.doctype == "settings" + + # Test that we can convert to dict using dataclasses + import dataclasses + result = dataclasses.asdict(settings) + assert isinstance(result, dict) + assert result['repo_name_prefix'] == "test-prefix" + assert result['bucket_name_prefix'] == "test-bucket" + assert result['provider'] == "gcp" \ No newline at end of file diff --git a/tests/unit/test_action_deepfreeze_rotate.py b/tests/unit/test_action_deepfreeze_rotate.py new file mode 100644 index 00000000..216bb455 --- /dev/null +++ b/tests/unit/test_action_deepfreeze_rotate.py @@ -0,0 +1,397 @@ +"""Test deepfreeze Rotate action""" +# pylint: disable=attribute-defined-outside-init +from unittest import TestCase +from unittest.mock import Mock, patch, MagicMock +from datetime import datetime +import pytest + +from curator.actions.deepfreeze.rotate import Rotate +from curator.actions.deepfreeze.helpers import Settings, Repository +from curator.actions.deepfreeze.constants import STATUS_INDEX +from curator.actions.deepfreeze.exceptions import MissingIndexError, PreconditionError, ActionException + + +class TestDeepfreezeRotate(TestCase): + """Test Deepfreeze Rotate action""" + + def setUp(self): + """Set up test fixtures""" + self.client = Mock() + # Mock ILM get_lifecycle to return empty dict by default + self.client.ilm.get_lifecycle.return_value = {} + self.mock_settings = Settings( + repo_name_prefix="deepfreeze", + bucket_name_prefix="deepfreeze", + base_path_prefix="snapshots", + rotate_by="path", + style="oneup", + last_suffix="000001" + ) + self.mock_latest_repo = Repository( + name="deepfreeze-000001", + bucket="deepfreeze", + base_path="snapshots-000001", + is_mounted=True, + is_thawed=False + ) + + def test_init_defaults(self): + """Test Rotate initialization with default values""" + with patch('curator.actions.deepfreeze.rotate.get_settings', return_value=self.mock_settings): + with patch('curator.actions.deepfreeze.rotate.get_matching_repo_names', return_value=["deepfreeze-000001"]): + with patch('curator.actions.deepfreeze.rotate.get_next_suffix', return_value="000002"): + with patch('curator.actions.deepfreeze.rotate.s3_client_factory') as mock_factory: + with patch('curator.actions.deepfreeze.rotate.get_policies_for_repo') as mock_policies: + mock_s3 = Mock() + mock_factory.return_value = mock_s3 + mock_policies.return_value = {"test-policy": {}} + self.client.indices.exists.return_value = True + + rotate = Rotate(self.client) + + assert rotate.client == self.client + assert rotate.s3 == mock_s3 + assert rotate.settings == self.mock_settings + assert rotate.latest_repo == "deepfreeze-000001" + assert rotate.keep == 6 # default value + + def test_calculate_new_names_rotate_by_path_oneup(self): + """Test name calculation for path rotation with oneup style""" + with patch('curator.actions.deepfreeze.rotate.get_settings', return_value=self.mock_settings): + with patch('curator.actions.deepfreeze.rotate.get_matching_repo_names', return_value=["deepfreeze-000001"]): + with patch('curator.actions.deepfreeze.rotate.get_next_suffix', return_value="000002"): + with patch('curator.actions.deepfreeze.rotate.s3_client_factory'): + with patch('curator.actions.deepfreeze.rotate.get_policies_for_repo') as mock_policies: + mock_policies.return_value = {"test-policy": {}} # Mock at least one policy + self.client.indices.exists.return_value = True + rotate = Rotate(self.client) + + assert rotate.new_repo_name == "deepfreeze-000002" + assert rotate.new_bucket_name == "deepfreeze" + assert rotate.base_path == "snapshots-000002" + + def test_calculate_new_names_rotate_by_bucket(self): + """Test name calculation for bucket rotation""" + settings = Settings( + repo_name_prefix="deepfreeze", + bucket_name_prefix="deepfreeze", + base_path_prefix="snapshots", + rotate_by="bucket", + style="oneup", + last_suffix="000003" + ) + + with patch('curator.actions.deepfreeze.rotate.get_settings', return_value=settings): + with patch('curator.actions.deepfreeze.rotate.get_matching_repo_names', return_value=["deepfreeze-000003"]): + with patch('curator.actions.deepfreeze.rotate.get_next_suffix', return_value="000004"): + with patch('curator.actions.deepfreeze.rotate.s3_client_factory'): + with patch('curator.actions.deepfreeze.rotate.get_policies_for_repo') as mock_policies: + mock_policies.return_value = {"test-policy": {}} + self.client.indices.exists.return_value = True + rotate = Rotate(self.client) + + assert rotate.new_repo_name == "deepfreeze-000004" + assert rotate.new_bucket_name == "deepfreeze-000004" + assert rotate.base_path == "snapshots" + + def test_calculate_new_names_monthly_style(self): + """Test name calculation with monthly style""" + settings = Settings( + repo_name_prefix="deepfreeze", + bucket_name_prefix="deepfreeze", + base_path_prefix="snapshots", + rotate_by="path", + style="monthly", + last_suffix="2024.02" + ) + + with patch('curator.actions.deepfreeze.rotate.get_settings', return_value=settings): + with patch('curator.actions.deepfreeze.rotate.get_matching_repo_names', return_value=["deepfreeze-2024.02"]): + with patch('curator.actions.deepfreeze.rotate.get_next_suffix', return_value="2024.03"): + with patch('curator.actions.deepfreeze.rotate.s3_client_factory'): + with patch('curator.actions.deepfreeze.rotate.get_policies_for_repo') as mock_policies: + mock_policies.return_value = {"test-policy": {}} + self.client.indices.exists.return_value = True + rotate = Rotate(self.client) + + assert rotate.new_repo_name == "deepfreeze-2024.03" + assert rotate.base_path == "snapshots-2024.03" + + def test_check_preconditions_missing_index(self): + """Test preconditions check when status index is missing""" + from elasticsearch8 import NotFoundError + + with patch('curator.actions.deepfreeze.rotate.get_settings') as mock_get_settings: + mock_get_settings.side_effect = MissingIndexError("Status index missing") + + with pytest.raises(MissingIndexError): + Rotate(self.client) + + def test_check_preconditions_new_repo_exists(self): + """Test preconditions check when new repository already exists""" + # Return repo list that includes the new repo name that will be calculated + with patch('curator.actions.deepfreeze.rotate.get_settings', return_value=self.mock_settings): + with patch('curator.actions.deepfreeze.rotate.get_matching_repo_names', return_value=["deepfreeze-000001", "deepfreeze-000002"]): + with patch('curator.actions.deepfreeze.rotate.get_next_suffix', return_value="000002"): + with patch('curator.actions.deepfreeze.rotate.s3_client_factory'): + self.client.indices.exists.return_value = True + from curator.exceptions import RepositoryException + with pytest.raises(RepositoryException, match="already exists"): + Rotate(self.client) + + def test_check_preconditions_success(self): + """Test successful preconditions check""" + with patch('curator.actions.deepfreeze.rotate.get_settings', return_value=self.mock_settings): + with patch('curator.actions.deepfreeze.rotate.get_matching_repo_names', return_value=["deepfreeze-000001"]): + with patch('curator.actions.deepfreeze.rotate.get_next_suffix', return_value="000002"): + with patch('curator.actions.deepfreeze.rotate.s3_client_factory') as mock_factory: + with patch('curator.actions.deepfreeze.rotate.get_policies_for_repo') as mock_policies: + mock_s3 = Mock() + mock_factory.return_value = mock_s3 + mock_policies.return_value = {"test-policy": {}} + self.client.indices.exists.return_value = True + + # Should not raise any exceptions + rotate = Rotate(self.client) + assert rotate is not None + + def test_update_ilm_policies_creates_versioned_policies(self): + """Test that update_ilm_policies creates versioned policies instead of modifying existing ones""" + with patch('curator.actions.deepfreeze.rotate.get_settings', return_value=self.mock_settings): + with patch('curator.actions.deepfreeze.rotate.get_matching_repo_names', return_value=["deepfreeze-000001"]): + with patch('curator.actions.deepfreeze.rotate.get_next_suffix', return_value="000002"): + with patch('curator.actions.deepfreeze.rotate.s3_client_factory'): + with patch('curator.actions.deepfreeze.rotate.get_policies_for_repo') as mock_get_policies: + with patch('curator.actions.deepfreeze.rotate.create_versioned_ilm_policy') as mock_create: + with patch('curator.actions.deepfreeze.rotate.get_composable_templates') as mock_get_composable: + with patch('curator.actions.deepfreeze.rotate.get_index_templates') as mock_get_templates: + with patch('curator.actions.deepfreeze.rotate.update_template_ilm_policy') as mock_update_template: + self.client.indices.exists.return_value = True + + # Mock policy that references the old repo + mock_get_policies.return_value = { + "my-policy": { + "policy": { + "phases": { + "cold": { + "actions": { + "searchable_snapshot": { + "snapshot_repository": "deepfreeze-000001" + } + } + } + } + } + } + } + + mock_create.return_value = "my-policy-000002" + mock_get_composable.return_value = {"index_templates": []} + mock_get_templates.return_value = {} + + rotate = Rotate(self.client) + rotate.update_ilm_policies(dry_run=False) + + # Verify versioned policy was created + mock_create.assert_called_once() + call_args = mock_create.call_args + assert call_args[0][1] == "my-policy" # base policy name + assert call_args[0][3] == "deepfreeze-000002" # new repo name + assert call_args[0][4] == "000002" # suffix + + def test_update_ilm_policies_updates_templates(self): + """Test that update_ilm_policies updates index templates to use new versioned policies""" + with patch('curator.actions.deepfreeze.rotate.get_settings', return_value=self.mock_settings): + with patch('curator.actions.deepfreeze.rotate.get_matching_repo_names', return_value=["deepfreeze-000001"]): + with patch('curator.actions.deepfreeze.rotate.get_next_suffix', return_value="000002"): + with patch('curator.actions.deepfreeze.rotate.s3_client_factory'): + with patch('curator.actions.deepfreeze.rotate.get_policies_for_repo') as mock_get_policies: + with patch('curator.actions.deepfreeze.rotate.create_versioned_ilm_policy') as mock_create: + with patch('curator.actions.deepfreeze.rotate.get_composable_templates') as mock_get_composable: + with patch('curator.actions.deepfreeze.rotate.get_index_templates') as mock_get_templates: + with patch('curator.actions.deepfreeze.rotate.update_template_ilm_policy') as mock_update_template: + self.client.indices.exists.return_value = True + + mock_get_policies.return_value = { + "my-policy": {"policy": {"phases": {}}} + } + mock_create.return_value = "my-policy-000002" + + # Mock templates + mock_get_composable.return_value = { + "index_templates": [{"name": "logs-template"}] + } + mock_get_templates.return_value = {"metrics-template": {}} + mock_update_template.return_value = True + + rotate = Rotate(self.client) + rotate.update_ilm_policies(dry_run=False) + + # Verify templates were updated (both composable and legacy) + assert mock_update_template.call_count >= 2 + + def test_update_ilm_policies_dry_run(self): + """Test that update_ilm_policies dry-run mode doesn't create policies""" + with patch('curator.actions.deepfreeze.rotate.get_settings', return_value=self.mock_settings): + with patch('curator.actions.deepfreeze.rotate.get_matching_repo_names', return_value=["deepfreeze-000001"]): + with patch('curator.actions.deepfreeze.rotate.get_next_suffix', return_value="000002"): + with patch('curator.actions.deepfreeze.rotate.s3_client_factory'): + with patch('curator.actions.deepfreeze.rotate.get_policies_for_repo') as mock_get_policies: + with patch('curator.actions.deepfreeze.rotate.create_versioned_ilm_policy') as mock_create: + with patch('curator.actions.deepfreeze.rotate.get_composable_templates') as mock_get_composable: + with patch('curator.actions.deepfreeze.rotate.get_index_templates') as mock_get_templates: + self.client.indices.exists.return_value = True + + mock_get_policies.return_value = { + "my-policy": {"policy": {"phases": {}}} + } + mock_get_composable.return_value = {"index_templates": []} + mock_get_templates.return_value = {} + + rotate = Rotate(self.client) + rotate.update_ilm_policies(dry_run=True) + + # Verify no policies were created in dry-run + mock_create.assert_not_called() + + def test_cleanup_policies_for_repo(self): + """Test cleanup_policies_for_repo deletes policies with matching suffix""" + with patch('curator.actions.deepfreeze.rotate.get_settings', return_value=self.mock_settings): + with patch('curator.actions.deepfreeze.rotate.get_matching_repo_names', return_value=["deepfreeze-000001"]): + with patch('curator.actions.deepfreeze.rotate.get_next_suffix', return_value="000002"): + with patch('curator.actions.deepfreeze.rotate.s3_client_factory'): + with patch('curator.actions.deepfreeze.rotate.get_policies_for_repo') as mock_policies: + with patch('curator.actions.deepfreeze.rotate.get_policies_by_suffix') as mock_get_by_suffix: + with patch('curator.actions.deepfreeze.rotate.is_policy_safe_to_delete') as mock_is_safe: + mock_policies.return_value = {"test-policy": {}} + self.client.indices.exists.return_value = True + + # Mock policies with suffix 000001 + mock_get_by_suffix.return_value = { + "my-policy-000001": {"policy": {}}, + "other-policy-000001": {"policy": {}} + } + mock_is_safe.return_value = True + + rotate = Rotate(self.client) + rotate.cleanup_policies_for_repo("deepfreeze-000001", dry_run=False) + + # Verify policies were deleted + assert self.client.ilm.delete_lifecycle.call_count == 2 + self.client.ilm.delete_lifecycle.assert_any_call(name="my-policy-000001") + self.client.ilm.delete_lifecycle.assert_any_call(name="other-policy-000001") + + def test_cleanup_policies_for_repo_skips_in_use(self): + """Test cleanup_policies_for_repo skips policies still in use""" + with patch('curator.actions.deepfreeze.rotate.get_settings', return_value=self.mock_settings): + with patch('curator.actions.deepfreeze.rotate.get_matching_repo_names', return_value=["deepfreeze-000001"]): + with patch('curator.actions.deepfreeze.rotate.get_next_suffix', return_value="000002"): + with patch('curator.actions.deepfreeze.rotate.s3_client_factory'): + with patch('curator.actions.deepfreeze.rotate.get_policies_for_repo') as mock_policies: + with patch('curator.actions.deepfreeze.rotate.get_policies_by_suffix') as mock_get_by_suffix: + with patch('curator.actions.deepfreeze.rotate.is_policy_safe_to_delete') as mock_is_safe: + mock_policies.return_value = {"test-policy": {}} + self.client.indices.exists.return_value = True + + mock_get_by_suffix.return_value = { + "my-policy-000001": {"policy": {}} + } + # Policy is still in use + mock_is_safe.return_value = False + + rotate = Rotate(self.client) + rotate.cleanup_policies_for_repo("deepfreeze-000001", dry_run=False) + + # Verify policy was NOT deleted + self.client.ilm.delete_lifecycle.assert_not_called() + + def test_cleanup_policies_for_repo_dry_run(self): + """Test cleanup_policies_for_repo dry-run mode doesn't delete policies""" + with patch('curator.actions.deepfreeze.rotate.get_settings', return_value=self.mock_settings): + with patch('curator.actions.deepfreeze.rotate.get_matching_repo_names', return_value=["deepfreeze-000001"]): + with patch('curator.actions.deepfreeze.rotate.get_next_suffix', return_value="000002"): + with patch('curator.actions.deepfreeze.rotate.s3_client_factory'): + with patch('curator.actions.deepfreeze.rotate.get_policies_for_repo') as mock_policies: + with patch('curator.actions.deepfreeze.rotate.get_policies_by_suffix') as mock_get_by_suffix: + with patch('curator.actions.deepfreeze.rotate.is_policy_safe_to_delete') as mock_is_safe: + mock_policies.return_value = {"test-policy": {}} + self.client.indices.exists.return_value = True + + mock_get_by_suffix.return_value = { + "my-policy-000001": {"policy": {}} + } + mock_is_safe.return_value = True + + rotate = Rotate(self.client) + rotate.cleanup_policies_for_repo("deepfreeze-000001", dry_run=True) + + # Verify no policies were deleted in dry-run + self.client.ilm.delete_lifecycle.assert_not_called() + + def test_unmount_oldest_repos_calls_cleanup(self): + """Test that unmount_oldest_repos calls cleanup_policies_for_repo""" + with patch('curator.actions.deepfreeze.rotate.get_settings', return_value=self.mock_settings): + with patch('curator.actions.deepfreeze.rotate.get_matching_repo_names', return_value=["deepfreeze-000002", "deepfreeze-000001"]): + with patch('curator.actions.deepfreeze.rotate.get_next_suffix', return_value="000003"): + with patch('curator.actions.deepfreeze.rotate.s3_client_factory'): + with patch('curator.actions.deepfreeze.rotate.get_policies_for_repo') as mock_policies: + with patch('curator.actions.deepfreeze.rotate.unmount_repo') as mock_unmount: + with patch('curator.actions.deepfreeze.rotate.push_to_glacier'): + with patch('curator.actions.deepfreeze.rotate.Repository') as mock_repo_class: + mock_policies.return_value = {"test-policy": {}} + self.client.indices.exists.return_value = True + + mock_repo = Mock() + mock_repo.name = "deepfreeze-000001" + mock_repo.thaw_state = "frozen" # Make sure repo is not thawed + mock_repo_class.from_elasticsearch.return_value = mock_repo + + rotate = Rotate(self.client, keep="1") + + with patch.object(rotate, 'cleanup_policies_for_repo') as mock_cleanup: + with patch.object(rotate, 'is_thawed', return_value=False): + rotate.unmount_oldest_repos(dry_run=False) + + # Verify cleanup was called for the unmounted repo + mock_cleanup.assert_called_once_with("deepfreeze-000001", dry_run=False) + + def test_unmount_oldest_repos_sets_thaw_state_frozen(self): + """ + Test that unmount_oldest_repos properly sets thaw_state to 'frozen' after push_to_glacier. + + This is a regression test for the bug where repositories were pushed to Glacier + but their metadata still showed thaw_state='active' instead of 'frozen'. + """ + with patch('curator.actions.deepfreeze.rotate.get_settings', return_value=self.mock_settings): + with patch('curator.actions.deepfreeze.rotate.get_matching_repo_names', return_value=["deepfreeze-000002", "deepfreeze-000001"]): + with patch('curator.actions.deepfreeze.rotate.get_next_suffix', return_value="000003"): + with patch('curator.actions.deepfreeze.rotate.s3_client_factory'): + with patch('curator.actions.deepfreeze.rotate.get_policies_for_repo') as mock_policies: + with patch('curator.actions.deepfreeze.rotate.unmount_repo') as mock_unmount: + with patch('curator.actions.deepfreeze.rotate.push_to_glacier'): + with patch('curator.actions.deepfreeze.rotate.Repository') as mock_repo_class: + mock_policies.return_value = {"test-policy": {}} + self.client.indices.exists.return_value = True + + # Create a mock repository that will be returned by from_elasticsearch + mock_repo = Mock() + mock_repo.name = "deepfreeze-000001" + mock_repo.thaw_state = "active" # Initially active (bug scenario) + mock_repo.is_mounted = True + mock_repo.is_thawed = False + mock_repo_class.from_elasticsearch.return_value = mock_repo + + rotate = Rotate(self.client, keep="1") + + with patch.object(rotate, 'cleanup_policies_for_repo'): + with patch.object(rotate, 'is_thawed', return_value=False): + # Run the unmount operation + rotate.unmount_oldest_repos(dry_run=False) + + # Verify reset_to_frozen was called (which sets thaw_state='frozen') + mock_repo.reset_to_frozen.assert_called_once() + + # Verify persist was called to save the updated state + mock_repo.persist.assert_called_once_with(self.client) + + diff --git a/tests/unit/test_action_deepfreeze_setup.py b/tests/unit/test_action_deepfreeze_setup.py new file mode 100644 index 00000000..d695961a --- /dev/null +++ b/tests/unit/test_action_deepfreeze_setup.py @@ -0,0 +1,307 @@ +"""Test deepfreeze Setup action""" +# pylint: disable=attribute-defined-outside-init +from unittest import TestCase +from unittest.mock import Mock, patch, MagicMock +from datetime import datetime +import pytest + +from curator.actions.deepfreeze.setup import Setup +from curator.actions.deepfreeze.helpers import Settings, Repository +from curator.actions.deepfreeze.constants import STATUS_INDEX, SETTINGS_ID +from curator.actions.deepfreeze.exceptions import PreconditionError, ActionException +from curator.s3client import AwsS3Client + + +class TestDeepfreezeSetup(TestCase): + """Test Deepfreeze Setup action""" + + def setUp(self): + """Set up test fixtures""" + self.client = Mock() + self.client.indices.exists.return_value = False + self.client.snapshot.get_repository.return_value = {} + self.client.ilm.get_lifecycle.return_value = {} + # Mock info() for version checking + self.client.info.return_value = { + 'version': { + 'number': '8.0.0' + } + } + + def test_init_defaults(self): + """Test Setup initialization with default values""" + with patch('curator.actions.deepfreeze.setup.s3_client_factory') as mock_factory: + with patch('curator.actions.deepfreeze.setup.get_matching_repo_names') as mock_get_repos: + mock_s3 = Mock() + mock_factory.return_value = mock_s3 + mock_get_repos.return_value = [] + + setup = Setup(self.client) + + assert setup.client == self.client + assert setup.s3 == mock_s3 + assert setup.settings.repo_name_prefix == "deepfreeze" + assert setup.settings.bucket_name_prefix == "deepfreeze" + assert setup.settings.base_path_prefix == "snapshots" + assert setup.settings.canned_acl == "private" + assert setup.settings.storage_class == "intelligent_tiering" + assert setup.settings.provider == "aws" + assert setup.settings.rotate_by == "path" + assert setup.settings.style == "oneup" + assert setup.ilm_policy_name == "deepfreeze-sample-policy" + assert setup.create_sample_ilm_policy is False + + def test_init_custom_values(self): + """Test Setup initialization with custom values""" + with patch('curator.actions.deepfreeze.setup.s3_client_factory') as mock_factory: + with patch('curator.actions.deepfreeze.setup.get_matching_repo_names') as mock_get_repos: + mock_s3 = Mock() + mock_factory.return_value = mock_s3 + mock_get_repos.return_value = [] + + setup = Setup( + self.client, + year=2024, + month=3, + repo_name_prefix="custom-repo", + bucket_name_prefix="custom-bucket", + base_path_prefix="custom-path", + canned_acl="public-read", + storage_class="GLACIER", + provider="gcp", + rotate_by="bucket", + style="monthly", + ilm_policy_name="custom-policy", + create_sample_ilm_policy=True + ) + + assert setup.settings.repo_name_prefix == "custom-repo" + assert setup.settings.bucket_name_prefix == "custom-bucket" + assert setup.settings.base_path_prefix == "custom-path" + assert setup.settings.canned_acl == "public-read" + assert setup.settings.storage_class == "GLACIER" + assert setup.settings.provider == "gcp" + assert setup.settings.rotate_by == "bucket" + assert setup.settings.style == "monthly" + assert setup.ilm_policy_name == "custom-policy" + assert setup.create_sample_ilm_policy is True + + def test_check_preconditions_status_index_exists(self): + """Test preconditions check when status index exists""" + self.client.indices.exists.return_value = True + + with patch('curator.actions.deepfreeze.setup.s3_client_factory'): + with patch('curator.actions.deepfreeze.setup.get_matching_repo_names') as mock_get_repos: + mock_get_repos.return_value = [] + setup = Setup(self.client) + + with pytest.raises(PreconditionError, match="precondition error"): + setup._check_preconditions() + + def test_check_preconditions_repository_exists(self): + """Test preconditions check when repository already exists""" + self.client.indices.exists.return_value = False + self.client.snapshot.get_repository.return_value = { + 'deepfreeze-000001': {} + } + + with patch('curator.actions.deepfreeze.setup.s3_client_factory'): + with patch('curator.actions.deepfreeze.setup.get_matching_repo_names') as mock_get_repos: + mock_get_repos.return_value = [] + setup = Setup(self.client) + + with pytest.raises(PreconditionError, match="precondition error"): + setup._check_preconditions() + + def test_check_preconditions_bucket_exists(self): + """Test preconditions check when bucket already exists""" + self.client.indices.exists.return_value = False + self.client.snapshot.get_repository.return_value = {} + + with patch('curator.actions.deepfreeze.setup.s3_client_factory') as mock_factory: + with patch('curator.actions.deepfreeze.setup.get_matching_repo_names') as mock_get_repos: + mock_s3 = Mock() + mock_s3.bucket_exists.return_value = True + mock_factory.return_value = mock_s3 + mock_get_repos.return_value = [] + + setup = Setup(self.client, rotate_by="bucket") + + with pytest.raises(PreconditionError, match="precondition error"): + setup._check_preconditions() + + def test_check_preconditions_success(self): + """Test successful preconditions check""" + self.client.indices.exists.return_value = False + self.client.snapshot.get_repository.return_value = {} + + with patch('curator.actions.deepfreeze.setup.s3_client_factory') as mock_factory: + with patch('curator.actions.deepfreeze.setup.get_matching_repo_names') as mock_get_repos: + mock_s3 = Mock() + mock_s3.bucket_exists.return_value = False + mock_factory.return_value = mock_s3 + mock_get_repos.return_value = [] + + setup = Setup(self.client) + + # Should not raise any exceptions + setup._check_preconditions() + + def test_do_dry_run(self): + """Test dry run mode""" + with patch('curator.actions.deepfreeze.setup.s3_client_factory') as mock_factory: + with patch('curator.actions.deepfreeze.setup.get_matching_repo_names') as mock_get_repos: + with patch('curator.actions.deepfreeze.setup.create_repo') as mock_create_repo: + mock_s3 = Mock() + mock_s3.bucket_exists.return_value = False + mock_factory.return_value = mock_s3 + mock_get_repos.return_value = [] + + setup = Setup(self.client) + setup.do_dry_run() + + # Should call create_repo with dry_run=True + mock_create_repo.assert_called_once() + call_args = mock_create_repo.call_args + assert call_args.kwargs.get('dry_run') is True + + def test_do_action_success_rotate_by_path(self): + """Test successful setup action with rotate_by='path'""" + self.client.indices.exists.return_value = False + + with patch('curator.actions.deepfreeze.setup.s3_client_factory') as mock_factory: + with patch('curator.actions.deepfreeze.setup.get_matching_repo_names') as mock_get_repos: + mock_s3 = Mock() + mock_s3.bucket_exists.return_value = False + mock_factory.return_value = mock_s3 + mock_get_repos.return_value = [] + + with patch('curator.actions.deepfreeze.setup.ensure_settings_index'): + with patch('curator.actions.deepfreeze.setup.save_settings'): + with patch('curator.actions.deepfreeze.setup.create_repo'): + setup = Setup(self.client, rotate_by="path") + + setup.do_action() + + # Should create bucket (only one for path rotation) + mock_s3.create_bucket.assert_called_once_with("deepfreeze") + + def test_do_action_success_rotate_by_bucket(self): + """Test successful setup action with rotate_by='bucket'""" + self.client.indices.exists.return_value = False + + with patch('curator.actions.deepfreeze.setup.s3_client_factory') as mock_factory: + with patch('curator.actions.deepfreeze.setup.get_matching_repo_names') as mock_get_repos: + mock_s3 = Mock() + mock_s3.bucket_exists.return_value = False + mock_factory.return_value = mock_s3 + mock_get_repos.return_value = [] + + with patch('curator.actions.deepfreeze.setup.ensure_settings_index'): + with patch('curator.actions.deepfreeze.setup.save_settings'): + with patch('curator.actions.deepfreeze.setup.create_repo'): + setup = Setup(self.client, rotate_by="bucket") + + setup.do_action() + + # Should create bucket with suffix for bucket rotation + mock_s3.create_bucket.assert_called_once_with("deepfreeze-000001") + + def test_do_action_with_ilm_policy(self): + """Test setup action creates ILM policy""" + self.client.indices.exists.return_value = False + + with patch('curator.actions.deepfreeze.setup.s3_client_factory') as mock_factory: + with patch('curator.actions.deepfreeze.setup.get_matching_repo_names') as mock_get_repos: + mock_s3 = Mock() + mock_s3.bucket_exists.return_value = False + mock_factory.return_value = mock_s3 + mock_get_repos.return_value = [] + + with patch('curator.actions.deepfreeze.setup.ensure_settings_index'): + with patch('curator.actions.deepfreeze.setup.save_settings'): + with patch('curator.actions.deepfreeze.setup.create_repo'): + with patch('curator.actions.deepfreeze.setup.create_ilm_policy') as mock_create_ilm: + setup = Setup( + self.client, + create_sample_ilm_policy=True, + ilm_policy_name="test-policy" + ) + + setup.do_action() + + # Should create ILM policy + mock_create_ilm.assert_called_once() + + def test_calculate_names_rotate_by_path(self): + """Test name calculation for path rotation""" + with patch('curator.actions.deepfreeze.setup.s3_client_factory'): + with patch('curator.actions.deepfreeze.setup.get_matching_repo_names') as mock_get_repos: + mock_get_repos.return_value = [] + setup = Setup(self.client, rotate_by="path") + + # Should calculate names correctly + assert setup.new_repo_name == "deepfreeze-000001" + assert setup.new_bucket_name == "deepfreeze" + assert setup.base_path == "snapshots-000001" + + def test_calculate_names_rotate_by_bucket(self): + """Test name calculation for bucket rotation""" + with patch('curator.actions.deepfreeze.setup.s3_client_factory'): + with patch('curator.actions.deepfreeze.setup.get_matching_repo_names') as mock_get_repos: + mock_get_repos.return_value = [] + setup = Setup(self.client, rotate_by="bucket") + + # Should calculate names correctly + assert setup.new_repo_name == "deepfreeze-000001" + assert setup.new_bucket_name == "deepfreeze-000001" + assert setup.base_path == "snapshots" + + def test_calculate_names_monthly_style(self): + """Test name calculation with monthly style""" + with patch('curator.actions.deepfreeze.setup.s3_client_factory'): + with patch('curator.actions.deepfreeze.setup.get_matching_repo_names') as mock_get_repos: + mock_get_repos.return_value = [] + setup = Setup( + self.client, + year=2024, + month=3, + style="monthly", + rotate_by="path" + ) + + assert setup.new_repo_name == "deepfreeze-2024.03" + assert setup.base_path == "snapshots-2024.03" + + def test_action_with_existing_repo_name_fails(self): + """Test that setup fails if repository name already exists""" + self.client.indices.exists.return_value = False + self.client.snapshot.get_repository.return_value = { + 'deepfreeze-000001': {} # Repository already exists + } + + with patch('curator.actions.deepfreeze.setup.s3_client_factory'): + with patch('curator.actions.deepfreeze.setup.get_matching_repo_names') as mock_get_repos: + mock_get_repos.return_value = [] + setup = Setup(self.client) + + with pytest.raises(PreconditionError, match="precondition error"): + setup._check_preconditions() + + def test_action_with_existing_bucket_fails(self): + """Test that setup fails if bucket already exists for bucket rotation""" + self.client.indices.exists.return_value = False + self.client.snapshot.get_repository.return_value = {} + + with patch('curator.actions.deepfreeze.setup.s3_client_factory') as mock_factory: + with patch('curator.actions.deepfreeze.setup.get_matching_repo_names') as mock_get_repos: + mock_s3 = Mock() + mock_s3.bucket_exists.return_value = True # Bucket exists + mock_factory.return_value = mock_s3 + mock_get_repos.return_value = [] + + setup = Setup(self.client, rotate_by="bucket") + + with pytest.raises(PreconditionError, match="precondition error"): + setup._check_preconditions() + diff --git a/tests/unit/test_action_deepfreeze_status.py b/tests/unit/test_action_deepfreeze_status.py new file mode 100644 index 00000000..c7da2906 --- /dev/null +++ b/tests/unit/test_action_deepfreeze_status.py @@ -0,0 +1,358 @@ +"""Test deepfreeze Status action""" +# pylint: disable=attribute-defined-outside-init +from unittest import TestCase +from unittest.mock import Mock, patch, MagicMock +import pytest + +from curator.actions.deepfreeze.status import Status +from curator.actions.deepfreeze.helpers import Settings, Repository + + +class TestDeepfreezeStatus(TestCase): + """Test Deepfreeze Status action""" + + def setUp(self): + """Set up test fixtures""" + self.client = Mock() + # Mock search to return empty hits by default + self.client.search.return_value = {"hits": {"hits": []}} + self.mock_settings = Settings( + repo_name_prefix="deepfreeze", + bucket_name_prefix="deepfreeze", + base_path_prefix="snapshots", + canned_acl="private", + storage_class="GLACIER", + provider="aws", + rotate_by="path", + style="oneup", + last_suffix="000003" + ) + + def test_init(self): + """Test Status initialization""" + with patch('curator.actions.deepfreeze.status.get_settings', return_value=self.mock_settings): + with patch('curator.actions.deepfreeze.status.Console') as mock_console: + status = Status(self.client) + + assert status.client == self.client + assert status.settings == self.mock_settings + mock_console.assert_called_once() + mock_console.return_value.clear.assert_called_once() + + def test_get_cluster_name_success(self): + """Test successful cluster name retrieval""" + self.client.cluster.health.return_value = { + 'cluster_name': 'test-cluster', + 'status': 'green' + } + + with patch('curator.actions.deepfreeze.status.get_settings', return_value=self.mock_settings): + status = Status(self.client) + cluster_name = status.get_cluster_name() + + assert cluster_name == 'test-cluster' + + def test_get_cluster_name_error(self): + """Test cluster name retrieval with error""" + self.client.cluster.health.side_effect = Exception("Connection failed") + + with patch('curator.actions.deepfreeze.status.get_settings', return_value=self.mock_settings): + status = Status(self.client) + cluster_name = status.get_cluster_name() + + assert cluster_name.startswith("Error:") + assert "Connection failed" in cluster_name + + def test_do_config(self): + """Test configuration display""" + with patch('curator.actions.deepfreeze.status.get_settings', return_value=self.mock_settings): + with patch('curator.actions.deepfreeze.status.Table') as mock_table_class: + with patch('curator.actions.deepfreeze.status.Console'): + mock_table = Mock() + mock_table_class.return_value = mock_table + + status = Status(self.client) + status.get_cluster_name = Mock(return_value="test-cluster") + + status.do_config() + + # Should create table with title "Configuration" + mock_table_class.assert_called_with(title="Configuration") + + # Should add columns (check that calls were made, not exact parameters) + assert mock_table.add_column.call_count >= 2 + # Verify column names were added + call_args_list = [call[0][0] for call in mock_table.add_column.call_args_list] + assert "Setting" in call_args_list + assert "Value" in call_args_list + + # Should add rows for all settings + expected_calls = [ + ("Repo Prefix", "deepfreeze"), + ("Bucket Prefix", "deepfreeze"), + ("Base Path Prefix", "snapshots"), + ("Canned ACL", "private"), + ("Storage Class", "GLACIER"), + ("Provider", "aws"), + ("Rotate By", "path"), + ("Style", "oneup"), + ("Last Suffix", "000003"), + ("Cluster Name", "test-cluster") + ] + + for expected_call in expected_calls: + mock_table.add_row.assert_any_call(*expected_call) + + def test_do_ilm_policies(self): + """Test ILM policies display""" + self.client.ilm.get_lifecycle.return_value = { + 'policy1': { + 'policy': { + 'phases': { + 'frozen': { + 'actions': { + 'searchable_snapshot': { + 'snapshot_repository': 'deepfreeze-000003' + } + } + } + } + }, + 'in_use_by': { + 'indices': ['index1', 'index2'], + 'data_streams': ['stream1'] + } + }, + 'policy2': { + 'policy': { + 'phases': { + 'cold': { + 'actions': { + 'searchable_snapshot': { + 'snapshot_repository': 'deepfreeze-000003' + } + } + } + } + }, + 'in_use_by': { + 'indices': ['index3'], + 'data_streams': [] + } + }, + 'policy3': { + 'policy': { + 'phases': { + 'hot': { + 'actions': {} + } + } + }, + 'in_use_by': { + 'indices': [], + 'data_streams': [] + } + } + } + + with patch('curator.actions.deepfreeze.status.get_settings', return_value=self.mock_settings): + with patch('curator.actions.deepfreeze.status.Table') as mock_table_class: + with patch('curator.actions.deepfreeze.status.Console'): + mock_table = Mock() + mock_table_class.return_value = mock_table + + status = Status(self.client) + + status.do_ilm_policies() + + # Should create table with title "ILM Policies" + mock_table_class.assert_called_with(title="ILM Policies") + + # Should add columns + # Check columns were added (not exact parameters) + assert mock_table.add_column.call_count >= 4 + call_args_list = [call[0][0] for call in mock_table.add_column.call_args_list] + assert "Policy" in call_args_list + assert "Repository" in call_args_list + assert "Indices" in call_args_list + assert "Datastreams" in call_args_list + + # Should add rows for matching policies (policy1 and policy2) + mock_table.add_row.assert_any_call("policy1", "deepfreeze-000003*", "2", "1") + mock_table.add_row.assert_any_call("policy2", "deepfreeze-000003*", "1", "0") + + def test_do_buckets_path_rotation(self): + """Test buckets display for path rotation""" + mock_repos = [ + Repository( + name="deepfreeze-000003", + bucket="deepfreeze", + base_path="snapshots-000003" + ) + ] + + with patch('curator.actions.deepfreeze.status.get_settings', return_value=self.mock_settings): + with patch('curator.actions.deepfreeze.status.get_all_repos', return_value=mock_repos): + with patch('curator.actions.deepfreeze.status.Table') as mock_table_class: + with patch('curator.actions.deepfreeze.status.Console'): + mock_table = Mock() + mock_table_class.return_value = mock_table + + status = Status(self.client) + + status.do_buckets() + + # Should create table with title "Buckets" + mock_table_class.assert_called_with(title="Buckets") + + # Should add columns + # Check columns were added (not exact parameters) + assert mock_table.add_column.call_count >= 3 + call_args_list = [call[0][0] for call in mock_table.add_column.call_args_list] + assert "Provider" in call_args_list + assert "Bucket" in call_args_list + assert "Base_path" in call_args_list + + # For path rotation, should show single bucket with suffixed path + # Bucket gets marked with asterisk since it matches current bucket/base_path + mock_table.add_row.assert_called_with( + "aws", + "deepfreeze*", + "snapshots-000003" + ) + + def test_do_buckets_bucket_rotation(self): + """Test buckets display for bucket rotation""" + bucket_rotation_settings = Settings( + repo_name_prefix="deepfreeze", + bucket_name_prefix="deepfreeze", + base_path_prefix="snapshots", + rotate_by="bucket", + style="oneup", + last_suffix="000003", + provider="aws" + ) + + mock_repos = [ + Repository( + name="deepfreeze-000003", + bucket="deepfreeze-000003", + base_path="snapshots" + ) + ] + + with patch('curator.actions.deepfreeze.status.get_settings', return_value=bucket_rotation_settings): + with patch('curator.actions.deepfreeze.status.get_all_repos', return_value=mock_repos): + with patch('curator.actions.deepfreeze.status.Table') as mock_table_class: + with patch('curator.actions.deepfreeze.status.Console'): + mock_table = Mock() + mock_table_class.return_value = mock_table + + status = Status(self.client) + + status.do_buckets() + + # For bucket rotation, should show suffixed bucket with static path + mock_table.add_row.assert_called_with( + "aws", + "deepfreeze-000003*", + "snapshots" + ) + + + def test_do_action(self): + """Test main action execution""" + with patch('curator.actions.deepfreeze.status.get_settings', return_value=self.mock_settings): + with patch('curator.actions.deepfreeze.status.Console'): + status = Status(self.client) + + # Mock all sub-methods + status.do_repositories = Mock() + status.do_buckets = Mock() + status.do_ilm_policies = Mock() + status.do_config = Mock() + + with patch('curator.actions.deepfreeze.status.print') as mock_print: + status.do_action() + + # Should call all display methods in order + status.do_repositories.assert_called_once() + status.do_buckets.assert_called_once() + status.do_ilm_policies.assert_called_once() + status.do_config.assert_called_once() + + # Should print empty line + mock_print.assert_called_once() + + def test_do_singleton_action(self): + """Test singleton action execution""" + with patch('curator.actions.deepfreeze.status.get_settings', return_value=self.mock_settings): + with patch('curator.actions.deepfreeze.status.Console'): + status = Status(self.client) + + with patch.object(status, 'do_action') as mock_do_action: + status.do_singleton_action() + + mock_do_action.assert_called_once() + + + def test_repository_status_with_snapshots(self): + """Test repository status display with snapshot counts""" + mock_repos = [ + Repository( + name="deepfreeze-000001", + is_mounted=True, + is_thawed=False + ) + ] + + # Mock successful snapshot retrieval + self.client.snapshot.get.return_value = { + 'snapshots': [ + {'name': 'snap1'}, + {'name': 'snap2'}, + {'name': 'snap3'} + ] + } + + with patch('curator.actions.deepfreeze.status.get_settings', return_value=self.mock_settings): + with patch('curator.actions.deepfreeze.status.get_all_repos', return_value=mock_repos): + with patch('curator.actions.deepfreeze.status.Table') as mock_table_class: + with patch('curator.actions.deepfreeze.status.Console'): + mock_table = Mock() + mock_table_class.return_value = mock_table + + status = Status(self.client) + + status.do_repositories() + + # Should show snapshot count - format changed to include state and mount status + # New format: name, state, mounted, count, start, end + mock_table.add_row.assert_called_with( + "deepfreeze-000001", "active", "yes", "3", "N/A", "N/A" + ) + + def test_repository_unmount_on_error(self): + """Test repository gets unmounted when snapshot check fails""" + mock_repo = Repository( + name="deepfreeze-000001", + is_mounted=True, + is_thawed=False + ) + + # Mock snapshot retrieval error + self.client.snapshot.get.side_effect = Exception("Repository not accessible") + + with patch('curator.actions.deepfreeze.status.get_settings', return_value=self.mock_settings): + with patch('curator.actions.deepfreeze.status.get_all_repos', return_value=[mock_repo]): + with patch('curator.actions.deepfreeze.status.Table') as mock_table_class: + with patch('curator.actions.deepfreeze.status.Console'): + mock_table = Mock() + mock_table_class.return_value = mock_table + + status = Status(self.client) + + status.do_repositories() + + # Repository should be unmounted after error + assert mock_repo.is_mounted is False \ No newline at end of file diff --git a/tests/unit/test_action_deepfreeze_thaw.py b/tests/unit/test_action_deepfreeze_thaw.py new file mode 100644 index 00000000..93dc0ca1 --- /dev/null +++ b/tests/unit/test_action_deepfreeze_thaw.py @@ -0,0 +1,648 @@ +"""Test deepfreeze Thaw action""" +# pylint: disable=attribute-defined-outside-init +from datetime import datetime, timezone +from unittest import TestCase +from unittest.mock import Mock, patch, call + +from curator.actions.deepfreeze.thaw import Thaw +from curator.actions.deepfreeze.helpers import Settings, Repository + + +class TestDeepfreezeThaw(TestCase): + """Test Deepfreeze Thaw action""" + + def setUp(self): + """Set up test fixtures""" + self.client = Mock() + self.mock_settings = Settings( + repo_name_prefix="deepfreeze", + bucket_name_prefix="deepfreeze", + base_path_prefix="snapshots", + canned_acl="private", + storage_class="GLACIER", + provider="aws", + rotate_by="path", + style="oneup", + last_suffix="000003", + ) + + self.start_date = "2025-01-01T00:00:00Z" + self.end_date = "2025-01-31T23:59:59Z" + + @patch("curator.actions.deepfreeze.thaw.s3_client_factory") + @patch("curator.actions.deepfreeze.thaw.get_settings") + def test_init_success(self, mock_get_settings, mock_s3_factory): + """Test Thaw initialization with valid dates""" + mock_get_settings.return_value = self.mock_settings + mock_s3_factory.return_value = Mock() + + thaw = Thaw( + self.client, + start_date=self.start_date, + end_date=self.end_date, + ) + + assert thaw.client == self.client + assert thaw.sync is False + assert thaw.duration == 7 + assert thaw.retrieval_tier == "Standard" + assert thaw.start_date.year == 2025 + assert thaw.start_date.month == 1 + assert thaw.end_date.month == 1 + mock_get_settings.assert_called_once_with(self.client) + mock_s3_factory.assert_called_once_with("aws") + + @patch("curator.actions.deepfreeze.thaw.s3_client_factory") + @patch("curator.actions.deepfreeze.thaw.get_settings") + def test_init_with_custom_params(self, mock_get_settings, mock_s3_factory): + """Test Thaw initialization with custom parameters""" + mock_get_settings.return_value = self.mock_settings + mock_s3_factory.return_value = Mock() + + thaw = Thaw( + self.client, + start_date=self.start_date, + end_date=self.end_date, + sync=True, + duration=14, + retrieval_tier="Expedited", + ) + + assert thaw.sync is True + assert thaw.duration == 14 + assert thaw.retrieval_tier == "Expedited" + + @patch("curator.actions.deepfreeze.thaw.s3_client_factory") + @patch("curator.actions.deepfreeze.thaw.get_settings") + def test_init_invalid_date_format(self, mock_get_settings, mock_s3_factory): + """Test Thaw initialization with invalid date format""" + mock_get_settings.return_value = self.mock_settings + mock_s3_factory.return_value = Mock() + + with self.assertRaises(ValueError) as context: + Thaw( + self.client, + start_date="not-a-date", + end_date=self.end_date, + ) + + assert "Invalid start_date" in str(context.exception) + + @patch("curator.actions.deepfreeze.thaw.s3_client_factory") + @patch("curator.actions.deepfreeze.thaw.get_settings") + def test_init_start_after_end(self, mock_get_settings, mock_s3_factory): + """Test Thaw initialization with start_date after end_date""" + mock_get_settings.return_value = self.mock_settings + mock_s3_factory.return_value = Mock() + + with self.assertRaises(ValueError) as context: + Thaw( + self.client, + start_date=self.end_date, + end_date=self.start_date, + ) + + assert "start_date must be before or equal to end_date" in str( + context.exception + ) + + @patch("curator.actions.deepfreeze.thaw.find_repos_by_date_range") + @patch("curator.actions.deepfreeze.thaw.s3_client_factory") + @patch("curator.actions.deepfreeze.thaw.get_settings") + def test_do_dry_run_no_repos( + self, mock_get_settings, mock_s3_factory, mock_find_repos + ): + """Test dry run with no matching repositories""" + mock_get_settings.return_value = self.mock_settings + mock_s3_factory.return_value = Mock() + mock_find_repos.return_value = [] + + thaw = Thaw( + self.client, + start_date=self.start_date, + end_date=self.end_date, + ) + + thaw.do_dry_run() + + mock_find_repos.assert_called_once() + + @patch("curator.actions.deepfreeze.thaw.find_repos_by_date_range") + @patch("curator.actions.deepfreeze.thaw.s3_client_factory") + @patch("curator.actions.deepfreeze.thaw.get_settings") + def test_do_dry_run_with_repos( + self, mock_get_settings, mock_s3_factory, mock_find_repos + ): + """Test dry run with matching repositories""" + mock_get_settings.return_value = self.mock_settings + mock_s3_factory.return_value = Mock() + + mock_repos = [ + Repository( + name="deepfreeze-000001", + bucket="deepfreeze", + base_path="snapshots-000001", + start="2025-01-01T00:00:00Z", + end="2025-01-15T23:59:59Z", + is_mounted=False, + is_thawed=False, + ), + Repository( + name="deepfreeze-000002", + bucket="deepfreeze", + base_path="snapshots-000002", + start="2025-01-16T00:00:00Z", + end="2025-01-31T23:59:59Z", + is_mounted=False, + is_thawed=False, + ), + ] + mock_find_repos.return_value = mock_repos + + thaw = Thaw( + self.client, + start_date=self.start_date, + end_date=self.end_date, + ) + + thaw.do_dry_run() + + mock_find_repos.assert_called_once() + + @patch("curator.actions.deepfreeze.thaw.save_thaw_request") + @patch("curator.actions.deepfreeze.thaw.find_repos_by_date_range") + @patch("curator.actions.deepfreeze.thaw.s3_client_factory") + @patch("curator.actions.deepfreeze.thaw.get_settings") + def test_do_action_async_mode( + self, + mock_get_settings, + mock_s3_factory, + mock_find_repos, + mock_save_request, + ): + """Test thaw action in async mode""" + mock_get_settings.return_value = self.mock_settings + mock_s3 = Mock() + mock_s3_factory.return_value = mock_s3 + + mock_repo = Repository( + name="deepfreeze-000001", + bucket="deepfreeze", + base_path="snapshots-000001", + start="2025-01-01T00:00:00Z", + end="2025-01-15T23:59:59Z", + is_mounted=False, + is_thawed=False, + ) + mock_find_repos.return_value = [mock_repo] + + # Mock list_objects to return some objects + mock_s3.list_objects.return_value = [ + {"Key": "snapshots-000001/index1/data.dat"}, + {"Key": "snapshots-000001/index2/data.dat"}, + ] + + thaw = Thaw( + self.client, + start_date=self.start_date, + end_date=self.end_date, + sync=False, + ) + + thaw.do_action() + + # Should list objects and call thaw + mock_s3.list_objects.assert_called_once_with( + "deepfreeze", "snapshots-000001" + ) + mock_s3.thaw.assert_called_once() + + # Should save thaw request in async mode + mock_save_request.assert_called_once() + args = mock_save_request.call_args[0] + assert args[0] == self.client + assert args[2] == [mock_repo] # repos list + assert args[3] == "in_progress" # status + + @patch("curator.actions.deepfreeze.thaw.mount_repo") + @patch("curator.actions.deepfreeze.thaw.check_restore_status") + @patch("curator.actions.deepfreeze.thaw.find_repos_by_date_range") + @patch("curator.actions.deepfreeze.thaw.s3_client_factory") + @patch("curator.actions.deepfreeze.thaw.get_settings") + def test_do_action_sync_mode( + self, + mock_get_settings, + mock_s3_factory, + mock_find_repos, + mock_check_status, + mock_mount_repo, + ): + """Test thaw action in sync mode""" + mock_get_settings.return_value = self.mock_settings + mock_s3 = Mock() + mock_s3_factory.return_value = mock_s3 + + mock_repo = Repository( + name="deepfreeze-000001", + bucket="deepfreeze", + base_path="snapshots-000001", + start="2025-01-01T00:00:00Z", + end="2025-01-15T23:59:59Z", + is_mounted=False, + is_thawed=False, + ) + mock_find_repos.return_value = [mock_repo] + + # Mock list_objects to return some objects + mock_s3.list_objects.return_value = [ + {"Key": "snapshots-000001/index1/data.dat"}, + ] + + # Mock restore status to indicate completion + mock_check_status.return_value = { + "total": 1, + "restored": 1, + "in_progress": 0, + "not_restored": 0, + "complete": True, + } + + thaw = Thaw( + self.client, + start_date=self.start_date, + end_date=self.end_date, + sync=True, + ) + + thaw.do_action() + + # Should list objects and call thaw + mock_s3.list_objects.assert_called_once() + mock_s3.thaw.assert_called_once() + + # Should check restore status and mount in sync mode + mock_check_status.assert_called() + mock_mount_repo.assert_called_once_with(self.client, mock_repo) + + @patch("curator.actions.deepfreeze.thaw.find_repos_by_date_range") + @patch("curator.actions.deepfreeze.thaw.s3_client_factory") + @patch("curator.actions.deepfreeze.thaw.get_settings") + def test_do_action_no_repos( + self, mock_get_settings, mock_s3_factory, mock_find_repos + ): + """Test thaw action with no matching repositories""" + mock_get_settings.return_value = self.mock_settings + mock_s3_factory.return_value = Mock() + mock_find_repos.return_value = [] + + thaw = Thaw( + self.client, + start_date=self.start_date, + end_date=self.end_date, + ) + + thaw.do_action() + + mock_find_repos.assert_called_once() + + @patch("curator.actions.deepfreeze.thaw.find_repos_by_date_range") + @patch("curator.actions.deepfreeze.thaw.s3_client_factory") + @patch("curator.actions.deepfreeze.thaw.get_settings") + def test_thaw_repository_already_thawed( + self, mock_get_settings, mock_s3_factory, mock_find_repos + ): + """Test thawing a repository that is already thawed""" + mock_get_settings.return_value = self.mock_settings + mock_s3 = Mock() + mock_s3_factory.return_value = mock_s3 + + mock_repo = Repository( + name="deepfreeze-000001", + bucket="deepfreeze", + base_path="snapshots-000001", + is_mounted=True, + is_thawed=True, + ) + + thaw = Thaw( + self.client, + start_date=self.start_date, + end_date=self.end_date, + ) + + result = thaw._thaw_repository(mock_repo) + + assert result is True + # Should not call S3 operations for already thawed repo + mock_s3.list_objects.assert_not_called() + mock_s3.thaw.assert_not_called() + + @patch("curator.actions.deepfreeze.thaw.s3_client_factory") + @patch("curator.actions.deepfreeze.thaw.get_settings") + def test_thaw_repository_s3_error(self, mock_get_settings, mock_s3_factory): + """Test thawing a repository when S3 operations fail""" + mock_get_settings.return_value = self.mock_settings + mock_s3 = Mock() + mock_s3_factory.return_value = mock_s3 + + mock_repo = Repository( + name="deepfreeze-000001", + bucket="deepfreeze", + base_path="snapshots-000001", + is_mounted=False, + is_thawed=False, + ) + + # Mock list_objects to return objects + mock_s3.list_objects.return_value = [ + {"Key": "snapshots-000001/index1/data.dat"}, + ] + + # Mock thaw to raise an exception + mock_s3.thaw.side_effect = Exception("S3 error") + + thaw = Thaw( + self.client, + start_date=self.start_date, + end_date=self.end_date, + ) + + result = thaw._thaw_repository(mock_repo) + + assert result is False + + @patch("curator.actions.deepfreeze.thaw.check_restore_status") + @patch("curator.actions.deepfreeze.thaw.s3_client_factory") + @patch("curator.actions.deepfreeze.thaw.get_settings") + @patch("curator.actions.deepfreeze.thaw.time.sleep") + def test_wait_for_restore_success( + self, mock_sleep, mock_get_settings, mock_s3_factory, mock_check_status + ): + """Test waiting for restore to complete""" + mock_get_settings.return_value = self.mock_settings + mock_s3 = Mock() + mock_s3_factory.return_value = mock_s3 + + mock_repo = Repository( + name="deepfreeze-000001", + bucket="deepfreeze", + base_path="snapshots-000001", + ) + + # Three calls: initial, in-progress, then complete + # (the initial call is made to get total objects count) + mock_check_status.side_effect = [ + { + "total": 2, + "restored": 0, + "in_progress": 2, + "not_restored": 0, + "complete": False, + }, + { + "total": 2, + "restored": 1, + "in_progress": 1, + "not_restored": 0, + "complete": False, + }, + { + "total": 2, + "restored": 2, + "in_progress": 0, + "not_restored": 0, + "complete": True, + }, + ] + + thaw = Thaw( + self.client, + start_date=self.start_date, + end_date=self.end_date, + ) + + result = thaw._wait_for_restore(mock_repo, poll_interval=1, show_progress=False) + + assert result is True + assert mock_check_status.call_count == 3 + # Should sleep once between the second and third check + mock_sleep.assert_called_once_with(1) + + @patch("curator.actions.deepfreeze.thaw.s3_client_factory") + @patch("curator.actions.deepfreeze.thaw.get_settings") + def test_do_singleton_action(self, mock_get_settings, mock_s3_factory): + """Test singleton action execution""" + mock_get_settings.return_value = self.mock_settings + mock_s3_factory.return_value = Mock() + + thaw = Thaw( + self.client, + start_date=self.start_date, + end_date=self.end_date, + ) + + with patch.object(thaw, "do_action") as mock_do_action: + thaw.do_singleton_action() + + mock_do_action.assert_called_once() + + @patch("curator.actions.deepfreeze.thaw.s3_client_factory") + @patch("curator.actions.deepfreeze.thaw.get_settings") + @patch("curator.actions.deepfreeze.thaw.get_repositories_by_names") + @patch("curator.actions.deepfreeze.thaw.get_thaw_request") + def test_check_status_mode_initialization( + self, mock_get_request, mock_get_repos, mock_get_settings, mock_s3_factory + ): + """Test initialization in check_status mode""" + mock_get_settings.return_value = self.mock_settings + mock_s3_factory.return_value = Mock() + + thaw = Thaw( + self.client, + check_status="test-request-id", + ) + + assert thaw.mode == "check_status" + assert thaw.check_status == "test-request-id" + + def test_list_mode_initialization(self): + """Test initialization in list mode""" + thaw = Thaw( + self.client, + list_requests=True, + ) + + assert thaw.mode == "list" + assert thaw.list_requests is True + + def test_create_mode_missing_dates_error(self): + """Test error when creating thaw without dates""" + with self.assertRaises(ValueError) as context: + Thaw(self.client) + + assert "start_date and end_date are required" in str(context.exception) + + @patch("curator.actions.deepfreeze.thaw.update_thaw_request") + @patch("curator.actions.deepfreeze.thaw.mount_repo") + @patch("curator.actions.deepfreeze.thaw.check_restore_status") + @patch("curator.actions.deepfreeze.thaw.get_repositories_by_names") + @patch("curator.actions.deepfreeze.thaw.get_thaw_request") + @patch("curator.actions.deepfreeze.thaw.s3_client_factory") + @patch("curator.actions.deepfreeze.thaw.get_settings") + def test_do_check_status_restoration_complete( + self, + mock_get_settings, + mock_s3_factory, + mock_get_request, + mock_get_repos, + mock_check_status, + mock_mount_repo, + mock_update_request, + ): + """Test check_status when restoration is complete""" + mock_get_settings.return_value = self.mock_settings + mock_s3 = Mock() + mock_s3_factory.return_value = mock_s3 + + # Mock thaw request + mock_get_request.return_value = { + "request_id": "test-id", + "repos": ["deepfreeze-000001"], + "status": "in_progress", + "created_at": "2025-01-15T10:00:00Z", + } + + # Mock repository + mock_repo = Repository( + name="deepfreeze-000001", + bucket="deepfreeze", + base_path="snapshots-000001", + is_mounted=False, + is_thawed=False, + ) + mock_get_repos.return_value = [mock_repo] + + # Mock complete restoration status + mock_check_status.return_value = { + "total": 10, + "restored": 10, + "in_progress": 0, + "not_restored": 0, + "complete": True, + } + + thaw = Thaw(self.client, check_status="test-id") + thaw.do_check_status() + + # Should mount the repository + mock_mount_repo.assert_called_once_with(self.client, mock_repo) + # Should update request status to completed + mock_update_request.assert_called_once_with( + self.client, "test-id", status="completed" + ) + + @patch("curator.actions.deepfreeze.thaw.check_restore_status") + @patch("curator.actions.deepfreeze.thaw.get_repositories_by_names") + @patch("curator.actions.deepfreeze.thaw.get_thaw_request") + @patch("curator.actions.deepfreeze.thaw.s3_client_factory") + @patch("curator.actions.deepfreeze.thaw.get_settings") + def test_do_check_status_restoration_in_progress( + self, + mock_get_settings, + mock_s3_factory, + mock_get_request, + mock_get_repos, + mock_check_status, + ): + """Test check_status when restoration is still in progress""" + mock_get_settings.return_value = self.mock_settings + mock_s3 = Mock() + mock_s3_factory.return_value = mock_s3 + + mock_get_request.return_value = { + "request_id": "test-id", + "repos": ["deepfreeze-000001"], + "status": "in_progress", + "created_at": "2025-01-15T10:00:00Z", + } + + mock_repo = Repository( + name="deepfreeze-000001", + bucket="deepfreeze", + base_path="snapshots-000001", + is_mounted=False, + is_thawed=False, + ) + mock_get_repos.return_value = [mock_repo] + + # Mock in-progress restoration status + mock_check_status.return_value = { + "total": 10, + "restored": 5, + "in_progress": 5, + "not_restored": 0, + "complete": False, + } + + thaw = Thaw(self.client, check_status="test-id") + thaw.do_check_status() + + # Should check status but not mount + # Note: may be called multiple times depending on implementation + assert mock_check_status.call_count >= 1 + # Verify it was called with correct parameters + mock_check_status.assert_called_with(mock_s3, "deepfreeze", "snapshots-000001") + + @patch("curator.actions.deepfreeze.thaw.list_thaw_requests") + def test_do_list_requests_empty(self, mock_list_requests): + """Test listing thaw requests when none exist""" + mock_list_requests.return_value = [] + + thaw = Thaw(self.client, list_requests=True) + thaw.do_list_requests() + + mock_list_requests.assert_called_once_with(self.client) + + @patch("curator.actions.deepfreeze.thaw.list_thaw_requests") + def test_do_list_requests_with_data(self, mock_list_requests): + """Test listing thaw requests with data""" + mock_list_requests.return_value = [ + { + "id": "request-1", + "request_id": "request-1", + "repos": ["deepfreeze-000001", "deepfreeze-000002"], + "status": "in_progress", + "created_at": "2025-01-15T10:00:00Z", + }, + { + "id": "request-2", + "request_id": "request-2", + "repos": ["deepfreeze-000003"], + "status": "completed", + "created_at": "2025-01-14T14:00:00Z", + }, + ] + + thaw = Thaw(self.client, list_requests=True) + thaw.do_list_requests() + + mock_list_requests.assert_called_once_with(self.client) + + @patch("curator.actions.deepfreeze.thaw.s3_client_factory") + @patch("curator.actions.deepfreeze.thaw.get_settings") + def test_mode_routing_in_do_action(self, mock_get_settings, mock_s3_factory): + """Test that do_action routes to correct handler based on mode""" + mock_get_settings.return_value = self.mock_settings + mock_s3_factory.return_value = Mock() + + # Test list mode + thaw_list = Thaw(self.client, list_requests=True) + with patch.object(thaw_list, "do_list_requests") as mock_list: + thaw_list.do_action() + mock_list.assert_called_once() + + # Test check_status mode + thaw_check = Thaw(self.client, check_status="test-id") + with patch.object(thaw_check, "do_check_status") as mock_check: + thaw_check.do_action() + mock_check.assert_called_once() diff --git a/tests/unit/test_action_deepfreeze_utilities.py b/tests/unit/test_action_deepfreeze_utilities.py new file mode 100644 index 00000000..3d2dcd53 --- /dev/null +++ b/tests/unit/test_action_deepfreeze_utilities.py @@ -0,0 +1,1350 @@ +"""Test deepfreeze utilities module""" +# pylint: disable=attribute-defined-outside-init +from unittest import TestCase +from unittest.mock import Mock, patch, MagicMock +from datetime import datetime, timezone +import pytest +import botocore.exceptions + +from curator.actions.deepfreeze.utilities import ( + push_to_glacier, + get_all_indices_in_repo, + get_timestamp_range, + get_repository, + get_all_repos, + get_settings, + save_settings, + get_next_suffix, + get_matching_repo_names, + get_matching_repos, + unmount_repo, + decode_date, + create_ilm_policy, + update_repository_date_range, + get_index_templates, + get_composable_templates, + update_template_ilm_policy, + create_versioned_ilm_policy, + get_policies_for_repo, + get_policies_by_suffix, + is_policy_safe_to_delete, + get_index_datastream_name, + add_index_to_datastream, +) +from curator.actions.deepfreeze.helpers import Repository, Settings +from curator.actions.deepfreeze.constants import STATUS_INDEX, SETTINGS_ID +from curator.actions.deepfreeze.exceptions import MissingIndexError +from curator.exceptions import ActionError + + +class TestPushToGlacier(TestCase): + """Test push_to_glacier function""" + + def test_push_to_glacier_success(self): + """Test successful push to Glacier""" + mock_s3 = Mock() + mock_s3.list_objects.return_value = [ + {'Key': 'snapshots/file1', 'StorageClass': 'STANDARD'}, + {'Key': 'snapshots/file2', 'StorageClass': 'STANDARD'} + ] + mock_s3.copy_object.return_value = None + + repo = Repository( + name='test-repo', + bucket='test-bucket', + base_path='snapshots' + ) + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = push_to_glacier(mock_s3, repo) + + assert result is True + assert mock_s3.copy_object.call_count == 2 + mock_s3.copy_object.assert_any_call( + Bucket='test-bucket', + Key='snapshots/file1', + CopySource={'Bucket': 'test-bucket', 'Key': 'snapshots/file1'}, + StorageClass='GLACIER' + ) + + def test_push_to_glacier_with_trailing_slash(self): + """Test push to Glacier with trailing slash in base_path""" + mock_s3 = Mock() + mock_s3.list_objects.return_value = [ + {'Key': 'snapshots/file1', 'StorageClass': 'STANDARD'} + ] + + repo = Repository( + name='test-repo', + bucket='test-bucket', + base_path='snapshots/' # With trailing slash + ) + + with patch('curator.actions.deepfreeze.utilities.logging'): + push_to_glacier(mock_s3, repo) + + # Should normalize the path + mock_s3.list_objects.assert_called_once_with('test-bucket', 'snapshots/') + + def test_push_to_glacier_partial_failure(self): + """Test push to Glacier with partial failure""" + mock_s3 = Mock() + mock_s3.list_objects.return_value = [ + {'Key': 'snapshots/file1', 'StorageClass': 'STANDARD'}, + {'Key': 'snapshots/file2', 'StorageClass': 'STANDARD'} + ] + + # First call succeeds, second fails + mock_s3.copy_object.side_effect = [ + None, + botocore.exceptions.ClientError({'Error': {'Code': 'AccessDenied'}}, 'copy_object') + ] + + repo = Repository( + name='test-repo', + bucket='test-bucket', + base_path='snapshots' + ) + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = push_to_glacier(mock_s3, repo) + + assert result is False # Should return False due to partial failure + assert mock_s3.copy_object.call_count == 2 + + def test_push_to_glacier_list_error(self): + """Test push to Glacier with list objects error""" + mock_s3 = Mock() + mock_s3.list_objects.side_effect = botocore.exceptions.ClientError( + {'Error': {'Code': 'NoSuchBucket'}}, 'list_objects' + ) + + repo = Repository( + name='test-repo', + bucket='test-bucket', + base_path='snapshots' + ) + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = push_to_glacier(mock_s3, repo) + + assert result is False + + +class TestGetAllIndicesInRepo(TestCase): + """Test get_all_indices_in_repo function""" + + def test_get_all_indices_success(self): + """Test successful retrieval of all indices""" + mock_client = Mock() + mock_client.snapshot.get.return_value = { + 'snapshots': [ + {'indices': ['index1', 'index2']}, + {'indices': ['index2', 'index3']}, + {'indices': ['index4']} + ] + } + + result = get_all_indices_in_repo(mock_client, 'test-repo') + + assert sorted(result) == ['index1', 'index2', 'index3', 'index4'] + mock_client.snapshot.get.assert_called_once_with( + repository='test-repo', + snapshot='_all' + ) + + def test_get_all_indices_empty_repo(self): + """Test get_all_indices with empty repository""" + mock_client = Mock() + mock_client.snapshot.get.return_value = {'snapshots': []} + + result = get_all_indices_in_repo(mock_client, 'test-repo') + + assert result == [] + + def test_get_all_indices_no_indices(self): + """Test get_all_indices with snapshots but no indices""" + mock_client = Mock() + mock_client.snapshot.get.return_value = { + 'snapshots': [ + {'indices': []}, + {'indices': []} + ] + } + + result = get_all_indices_in_repo(mock_client, 'test-repo') + + assert result == [] + + +class TestGetTimestampRange(TestCase): + """Test get_timestamp_range function""" + + def test_get_timestamp_range_success(self): + """Test successful timestamp range retrieval""" + mock_client = Mock() + mock_client.indices.exists.return_value = True + mock_client.search.return_value = { + 'aggregations': { + 'earliest': {'value_as_string': '2021-01-01T00:00:00.000Z'}, + 'latest': {'value_as_string': '2022-01-01T00:00:00.000Z'} + } + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + earliest, latest = get_timestamp_range(mock_client, ['index1', 'index2']) + + assert earliest == datetime(2021, 1, 1, 0, 0, tzinfo=timezone.utc) + assert latest == datetime(2022, 1, 1, 0, 0, tzinfo=timezone.utc) + + def test_get_timestamp_range_empty_indices(self): + """Test timestamp range with empty indices list""" + mock_client = Mock() + + with patch('curator.actions.deepfreeze.utilities.logging'): + earliest, latest = get_timestamp_range(mock_client, []) + + assert earliest is None + assert latest is None + + def test_get_timestamp_range_nonexistent_indices(self): + """Test timestamp range with non-existent indices""" + mock_client = Mock() + mock_client.indices.exists.return_value = False + # Mock search to raise exception when called with empty index + mock_client.search.side_effect = Exception("No indices to search") + + with patch('curator.actions.deepfreeze.utilities.logging'): + earliest, latest = get_timestamp_range(mock_client, ['index1', 'index2']) + + # Should return None, None when no valid indices after filtering (exception caught) + assert earliest is None + assert latest is None + + def test_get_timestamp_range_mixed_indices(self): + """Test timestamp range with mix of existing and non-existing indices""" + mock_client = Mock() + mock_client.indices.exists.side_effect = [True, False, True] # index1 exists, index2 doesn't, index3 exists + mock_client.search.return_value = { + 'aggregations': { + 'earliest': {'value_as_string': '2021-01-01T00:00:00.000Z'}, + 'latest': {'value_as_string': '2022-01-01T00:00:00.000Z'} + } + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + earliest, latest = get_timestamp_range( + mock_client, + ['index1', 'index2', 'index3'] + ) + + # Should only search on existing indices + mock_client.search.assert_called_once() + call_args = mock_client.search.call_args + assert call_args[1]['index'] == 'index1,index3' + + +class TestGetRepository(TestCase): + """Test get_repository function""" + + def test_get_repository_found(self): + """Test get_repository when repository exists""" + mock_client = Mock() + mock_response = { + 'hits': { + 'total': {'value': 1}, + 'hits': [{ + '_id': 'repo-id', + '_source': { + 'name': 'test-repo', + 'bucket': 'test-bucket' + } + }] + } + } + mock_client.search.return_value = mock_response + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = get_repository(mock_client, 'test-repo') + + assert result.name == 'test-repo' + assert result.bucket == 'test-bucket' + assert result.docid == 'repo-id' + + def test_get_repository_not_found(self): + """Test get_repository when repository doesn't exist""" + mock_client = Mock() + mock_response = { + 'hits': { + 'total': {'value': 0}, + 'hits': [] + } + } + mock_client.search.return_value = mock_response + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = get_repository(mock_client, 'test-repo') + + assert result.name == 'test-repo' + assert result.bucket is None + + +class TestGetAllRepos(TestCase): + """Test get_all_repos function""" + + def test_get_all_repos_success(self): + """Test successful retrieval of all repositories""" + mock_client = Mock() + mock_client.search.return_value = { + 'hits': { + 'hits': [ + { + '_id': 'id1', + '_source': { + 'name': 'repo1', + 'bucket': 'bucket1', + 'doctype': 'repository' + } + }, + { + '_id': 'id2', + '_source': { + 'name': 'repo2', + 'bucket': 'bucket2', + 'doctype': 'repository' + } + } + ] + } + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = get_all_repos(mock_client) + + assert len(result) == 2 + assert all(isinstance(repo, Repository) for repo in result) + assert result[0].name == 'repo1' + assert result[1].name == 'repo2' + + def test_get_all_repos_empty(self): + """Test get_all_repos when no repositories exist""" + mock_client = Mock() + mock_client.search.return_value = {'hits': {'hits': []}} + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = get_all_repos(mock_client) + + assert result == [] + + +class TestGetSettings(TestCase): + """Test get_settings function""" + + def test_get_settings_success(self): + """Test successful retrieval of settings""" + mock_client = Mock() + mock_client.indices.exists.return_value = True + mock_client.get.return_value = { + '_source': { + 'repo_name_prefix': 'deepfreeze', + 'bucket_name_prefix': 'deepfreeze', + 'storage_class': 'GLACIER', + 'provider': 'aws', + 'doctype': 'settings' # Include doctype to test filtering + } + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = get_settings(mock_client) + + assert isinstance(result, Settings) + assert result.repo_name_prefix == 'deepfreeze' + assert result.storage_class == 'GLACIER' + + def test_get_settings_index_missing(self): + """Test get_settings when status index doesn't exist""" + mock_client = Mock() + mock_client.indices.exists.return_value = False + + with patch('curator.actions.deepfreeze.utilities.logging'): + with pytest.raises(MissingIndexError): + get_settings(mock_client) + + def test_get_settings_not_found(self): + """Test get_settings when settings don't exist""" + mock_client = Mock() + mock_client.indices.exists.return_value = True + from elasticsearch8 import NotFoundError + mock_client.get.side_effect = NotFoundError(404, 'not_found', {}) + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = get_settings(mock_client) + + assert result is None + + +class TestSaveSettings(TestCase): + """Test save_settings function""" + + def test_save_settings_new(self): + """Test saving new settings""" + mock_client = Mock() + from elasticsearch8 import NotFoundError + mock_client.get.side_effect = NotFoundError(404, 'not_found', {}) + + settings = Settings( + repo_name_prefix='test', + storage_class='GLACIER' + ) + + with patch('curator.actions.deepfreeze.utilities.logging'): + save_settings(mock_client, settings) + + mock_client.create.assert_called_once() + call_args = mock_client.create.call_args + assert call_args[1]['index'] == STATUS_INDEX + assert call_args[1]['id'] == SETTINGS_ID + + def test_save_settings_update(self): + """Test updating existing settings""" + mock_client = Mock() + mock_client.get.return_value = {'_source': {}} + + settings = Settings( + repo_name_prefix='test', + storage_class='GLACIER' + ) + + with patch('curator.actions.deepfreeze.utilities.logging'): + save_settings(mock_client, settings) + + mock_client.update.assert_called_once() + call_args = mock_client.update.call_args + assert call_args[1]['index'] == STATUS_INDEX + assert call_args[1]['id'] == SETTINGS_ID + + +class TestGetNextSuffix(TestCase): + """Test get_next_suffix function""" + + def test_get_next_suffix_oneup(self): + """Test get_next_suffix with oneup style""" + assert get_next_suffix('oneup', '000001', None, None) == '000002' + assert get_next_suffix('oneup', '000009', None, None) == '000010' + assert get_next_suffix('oneup', '000099', None, None) == '000100' + assert get_next_suffix('oneup', '999999', None, None) == '1000000' + + def test_get_next_suffix_date(self): + """Test get_next_suffix with date style""" + assert get_next_suffix('date', '2024.01', 2024, 3) == '2024.03' + + def test_get_next_suffix_date_current(self): + """Test get_next_suffix with date style using current date""" + with patch('curator.actions.deepfreeze.utilities.datetime') as mock_dt: + mock_dt.now.return_value = datetime(2024, 3, 15) + assert get_next_suffix('date', '2024.02', None, None) == '2024.03' + + def test_get_next_suffix_invalid_style(self): + """Test get_next_suffix with invalid style""" + with pytest.raises(ValueError, match="Invalid style"): + get_next_suffix('invalid', '000001', None, None) + + +class TestGetMatchingRepoNames(TestCase): + """Test get_matching_repo_names function""" + + def test_get_matching_repo_names_success(self): + """Test successful retrieval of matching repository names""" + mock_client = Mock() + mock_client.snapshot.get_repository.return_value = { + 'deepfreeze-001': {}, + 'deepfreeze-002': {}, + 'other-repo': {}, + 'deepfreeze-003': {} + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = get_matching_repo_names(mock_client, 'deepfreeze-') + + assert sorted(result) == ['deepfreeze-001', 'deepfreeze-002', 'deepfreeze-003'] + + def test_get_matching_repo_names_no_matches(self): + """Test get_matching_repo_names with no matches""" + mock_client = Mock() + mock_client.snapshot.get_repository.return_value = { + 'other-repo-1': {}, + 'other-repo-2': {} + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = get_matching_repo_names(mock_client, 'deepfreeze-') + + assert result == [] + + +class TestGetMatchingRepos(TestCase): + """Test get_matching_repos function""" + + def test_get_matching_repos_success(self): + """Test successful retrieval of matching repositories""" + mock_client = Mock() + mock_client.search.return_value = { + 'hits': { + 'hits': [ + { + '_id': 'id1', + '_source': { + 'name': 'deepfreeze-001', + 'bucket': 'bucket1', + 'is_mounted': True + } + }, + { + '_id': 'id2', + '_source': { + 'name': 'other-repo', + 'bucket': 'bucket2', + 'is_mounted': False + } + }, + { + '_id': 'id3', + '_source': { + 'name': 'deepfreeze-002', + 'bucket': 'bucket3', + 'is_mounted': False + } + } + ] + } + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = get_matching_repos(mock_client, 'deepfreeze-') + + # Should return only deepfreeze repos + assert len(result) == 2 + repo_names = [repo.name for repo in result] + assert 'deepfreeze-001' in repo_names + assert 'deepfreeze-002' in repo_names + + def test_get_matching_repos_mounted_only(self): + """Test get_matching_repos with mounted filter""" + mock_client = Mock() + mock_client.search.return_value = { + 'hits': { + 'hits': [ + { + '_id': 'id1', + '_source': { + 'name': 'deepfreeze-001', + 'bucket': 'bucket1', + 'is_mounted': True + } + }, + { + '_id': 'id2', + '_source': { + 'name': 'deepfreeze-002', + 'bucket': 'bucket2', + 'is_mounted': False + } + } + ] + } + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = get_matching_repos(mock_client, 'deepfreeze-', mounted=True) + + # Should return only mounted repos + assert len(result) == 1 + assert result[0].name == 'deepfreeze-001' + + +class TestUnmountRepo(TestCase): + """Test unmount_repo function""" + + def test_unmount_repo_success(self): + """Test successful repository unmounting""" + mock_client = Mock() + mock_client.snapshot.get_repository.return_value = { + 'test-repo': { + 'settings': { + 'bucket': 'test-bucket', + 'base_path': 'test-path' + } + } + } + mock_client.search.return_value = { + 'hits': { + 'total': {'value': 1}, + 'hits': [{ + '_id': 'repo-id', + '_source': { + 'name': 'test-repo', + 'bucket': 'test-bucket' + } + }] + } + } + + with patch('curator.actions.deepfreeze.utilities.get_all_indices_in_repo', return_value=['index1']): + with patch('curator.actions.deepfreeze.utilities.get_timestamp_range', return_value=(None, None)): + with patch('curator.actions.deepfreeze.utilities.decode_date', return_value=datetime.now()): + with patch('curator.actions.deepfreeze.utilities.logging'): + result = unmount_repo(mock_client, 'test-repo') + + mock_client.snapshot.delete_repository.assert_called_once_with(name='test-repo') + mock_client.update.assert_called_once() + assert result.name == 'test-repo' + assert result.is_mounted is False + + +class TestDecodeDate(TestCase): + """Test decode_date function""" + + def test_decode_date_datetime_utc(self): + """Test decode_date with datetime object in UTC""" + dt = datetime(2024, 1, 1, 12, 0, 0, tzinfo=timezone.utc) + result = decode_date(dt) + assert result == dt + + def test_decode_date_datetime_naive(self): + """Test decode_date with naive datetime object""" + dt = datetime(2024, 1, 1, 12, 0, 0) + result = decode_date(dt) + assert result == dt.replace(tzinfo=timezone.utc) + + def test_decode_date_string(self): + """Test decode_date with ISO string""" + date_str = "2024-01-01T12:00:00" + with patch('curator.actions.deepfreeze.utilities.logging'): + result = decode_date(date_str) + + expected = datetime(2024, 1, 1, 12, 0, 0, tzinfo=timezone.utc) + assert result == expected + + def test_decode_date_invalid(self): + """Test decode_date with invalid input""" + with pytest.raises(ValueError): + decode_date(12345) + + +class TestCreateIlmPolicy(TestCase): + """Test create_ilm_policy function""" + + def test_create_ilm_policy_success(self): + """Test successful ILM policy creation""" + mock_client = Mock() + policy_body = {'phases': {'hot': {}}} + + with patch('curator.actions.deepfreeze.utilities.logging'): + create_ilm_policy(mock_client, 'test-policy', policy_body) + + mock_client.ilm.put_lifecycle.assert_called_once_with( + name='test-policy', + body=policy_body + ) + + def test_create_ilm_policy_error(self): + """Test ILM policy creation error""" + mock_client = Mock() + mock_client.ilm.put_lifecycle.side_effect = Exception('Policy creation failed') + policy_body = {'phases': {'hot': {}}} + + with patch('curator.actions.deepfreeze.utilities.logging'): + with pytest.raises(ActionError): + create_ilm_policy(mock_client, 'test-policy', policy_body) + +class TestUpdateRepositoryDateRange(TestCase): + """Test update_repository_date_range function""" + + def test_update_date_range_success(self): + """Test successful date range update""" + mock_client = Mock() + # Mock get_all_indices_in_repo + mock_client.snapshot.get.return_value = { + 'snapshots': [{'indices': ['index1', 'index2']}] + } + # Mock index existence checks - simulating partial- prefix + mock_client.indices.exists.side_effect = [False, True, False, True] + # Mock status index search for update + mock_client.search.return_value = { + 'hits': {'total': {'value': 1}, 'hits': [{'_id': 'repo-doc-id'}]} + } + + repo = Repository(name='test-repo') + + # Mock the get_timestamp_range function directly + earliest = datetime(2024, 1, 1, 0, 0, tzinfo=timezone.utc) + latest = datetime(2024, 12, 31, 23, 59, 59, tzinfo=timezone.utc) + + with patch('curator.actions.deepfreeze.utilities.get_timestamp_range', return_value=(earliest, latest)): + with patch('curator.actions.deepfreeze.utilities.logging'): + result = update_repository_date_range(mock_client, repo) + + assert result is True + assert repo.start is not None + assert repo.end is not None + mock_client.update.assert_called_once() + + def test_update_date_range_no_mounted_indices(self): + """Test update with no mounted indices""" + mock_client = Mock() + mock_client.snapshot.get.return_value = { + 'snapshots': [{'indices': ['index1']}] + } + # All index existence checks return False + mock_client.indices.exists.return_value = False + + repo = Repository(name='test-repo') + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = update_repository_date_range(mock_client, repo) + + assert result is False + mock_client.update.assert_not_called() + + def test_update_date_range_handles_original_names(self): + """Test update with indices mounted using original names""" + mock_client = Mock() + mock_client.snapshot.get.return_value = { + 'snapshots': [{'indices': ['index1']}] + } + # Original name exists + mock_client.indices.exists.side_effect = [True] + # Mock status index search for update + mock_client.search.return_value = { + 'hits': {'total': {'value': 1}, 'hits': [{'_id': 'repo-doc-id'}]} + } + + repo = Repository(name='test-repo') + + # Mock the get_timestamp_range function directly + earliest = datetime(2024, 1, 1, 0, 0, tzinfo=timezone.utc) + latest = datetime(2024, 12, 31, 23, 59, 59, tzinfo=timezone.utc) + + with patch('curator.actions.deepfreeze.utilities.get_timestamp_range', return_value=(earliest, latest)): + with patch('curator.actions.deepfreeze.utilities.logging'): + result = update_repository_date_range(mock_client, repo) + + assert result is True + + def test_update_date_range_handles_restored_prefix(self): + """Test update with indices using restored- prefix""" + mock_client = Mock() + mock_client.snapshot.get.return_value = { + 'snapshots': [{'indices': ['index1']}] + } + # Original and partial- don't exist, restored- does + mock_client.indices.exists.side_effect = [False, False, True] + # Mock status index search for update + mock_client.search.return_value = { + 'hits': {'total': {'value': 1}, 'hits': [{'_id': 'repo-doc-id'}]} + } + + repo = Repository(name='test-repo') + + # Mock the get_timestamp_range function directly + earliest = datetime(2024, 1, 1, 0, 0, tzinfo=timezone.utc) + latest = datetime(2024, 12, 31, 23, 59, 59, tzinfo=timezone.utc) + + with patch('curator.actions.deepfreeze.utilities.get_timestamp_range', return_value=(earliest, latest)): + with patch('curator.actions.deepfreeze.utilities.logging'): + result = update_repository_date_range(mock_client, repo) + + assert result is True + + def test_update_date_range_no_timestamp_data(self): + """Test update when timestamp query returns None""" + mock_client = Mock() + mock_client.snapshot.get.return_value = { + 'snapshots': [{'indices': ['index1']}] + } + mock_client.indices.exists.return_value = True + + repo = Repository(name='test-repo') + + with patch('curator.actions.deepfreeze.utilities.get_timestamp_range', return_value=(None, None)): + with patch('curator.actions.deepfreeze.utilities.logging'): + result = update_repository_date_range(mock_client, repo) + + assert result is False + mock_client.update.assert_not_called() + + def test_update_date_range_exception_handling(self): + """Test update handles exceptions gracefully""" + mock_client = Mock() + mock_client.snapshot.get.side_effect = Exception("Repository error") + + repo = Repository(name='test-repo') + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = update_repository_date_range(mock_client, repo) + + assert result is False + + def test_update_date_range_creates_new_document(self): + """Test update creates document if it doesn't exist""" + mock_client = Mock() + mock_client.snapshot.get.return_value = { + 'snapshots': [{'indices': ['index1']}] + } + mock_client.indices.exists.return_value = True + mock_client.search.side_effect = [ + # First search for timestamp data + { + 'aggregations': { + 'earliest': {'value_as_string': '2024-01-01T00:00:00.000Z'}, + 'latest': {'value_as_string': '2024-12-31T23:59:59.000Z'} + } + }, + # Second search for existing document - returns nothing + {'hits': {'total': {'value': 0}, 'hits': []}} + ] + + repo = Repository(name='test-repo') + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = update_repository_date_range(mock_client, repo) + + assert result is True + mock_client.index.assert_called_once() + + +class TestGetIndexTemplates(TestCase): + """Test get_index_templates function""" + + def test_get_index_templates_success(self): + """Test successful retrieval of legacy templates""" + mock_client = Mock() + mock_client.indices.get_template.return_value = { + 'template1': {'settings': {}}, + 'template2': {'settings': {}} + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = get_index_templates(mock_client) + + assert len(result) == 2 + assert 'template1' in result + assert 'template2' in result + + def test_get_index_templates_error(self): + """Test get_index_templates error handling""" + mock_client = Mock() + mock_client.indices.get_template.side_effect = Exception("API error") + + with patch('curator.actions.deepfreeze.utilities.logging'): + with pytest.raises(ActionError): + get_index_templates(mock_client) + + +class TestGetComposableTemplates(TestCase): + """Test get_composable_templates function""" + + def test_get_composable_templates_success(self): + """Test successful retrieval of composable templates""" + mock_client = Mock() + mock_client.indices.get_index_template.return_value = { + 'index_templates': [ + {'name': 'template1'}, + {'name': 'template2'} + ] + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = get_composable_templates(mock_client) + + assert 'index_templates' in result + assert len(result['index_templates']) == 2 + + def test_get_composable_templates_error(self): + """Test get_composable_templates error handling""" + mock_client = Mock() + mock_client.indices.get_index_template.side_effect = Exception("API error") + + with patch('curator.actions.deepfreeze.utilities.logging'): + with pytest.raises(ActionError): + get_composable_templates(mock_client) + + +class TestUpdateTemplateIlmPolicy(TestCase): + """Test update_template_ilm_policy function""" + + def test_update_composable_template_success(self): + """Test successful update of composable template""" + mock_client = Mock() + mock_client.indices.get_index_template.return_value = { + 'index_templates': [{ + 'name': 'test-template', + 'index_template': { + 'template': { + 'settings': { + 'index': { + 'lifecycle': {'name': 'old-policy'} + } + } + } + } + }] + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = update_template_ilm_policy( + mock_client, 'test-template', 'old-policy', 'new-policy', is_composable=True + ) + + assert result is True + mock_client.indices.put_index_template.assert_called_once() + + def test_update_legacy_template_success(self): + """Test successful update of legacy template""" + mock_client = Mock() + mock_client.indices.get_template.return_value = { + 'test-template': { + 'settings': { + 'index': { + 'lifecycle': {'name': 'old-policy'} + } + } + } + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = update_template_ilm_policy( + mock_client, 'test-template', 'old-policy', 'new-policy', is_composable=False + ) + + assert result is True + mock_client.indices.put_template.assert_called_once() + + def test_update_template_no_match(self): + """Test template update when policy doesn't match""" + mock_client = Mock() + mock_client.indices.get_index_template.return_value = { + 'index_templates': [{ + 'name': 'test-template', + 'index_template': { + 'template': { + 'settings': { + 'index': { + 'lifecycle': {'name': 'different-policy'} + } + } + } + } + }] + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = update_template_ilm_policy( + mock_client, 'test-template', 'old-policy', 'new-policy', is_composable=True + ) + + assert result is False + mock_client.indices.put_index_template.assert_not_called() + + +class TestCreateVersionedIlmPolicy(TestCase): + """Test create_versioned_ilm_policy function""" + + def test_create_versioned_policy_success(self): + """Test successful creation of versioned policy""" + mock_client = Mock() + policy_body = { + 'phases': { + 'cold': { + 'actions': { + 'searchable_snapshot': { + 'snapshot_repository': 'old-repo' + } + } + } + } + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = create_versioned_ilm_policy( + mock_client, 'my-policy', policy_body, 'new-repo', '000005' + ) + + assert result == 'my-policy-000005' + mock_client.ilm.put_lifecycle.assert_called_once() + call_args = mock_client.ilm.put_lifecycle.call_args + assert call_args[1]['name'] == 'my-policy-000005' + # Verify repo was updated in policy + policy_arg = call_args[1]['policy'] + assert policy_arg['phases']['cold']['actions']['searchable_snapshot']['snapshot_repository'] == 'new-repo' + + def test_create_versioned_policy_multiple_phases(self): + """Test versioned policy with multiple phases""" + mock_client = Mock() + policy_body = { + 'phases': { + 'cold': { + 'actions': { + 'searchable_snapshot': { + 'snapshot_repository': 'old-repo' + } + } + }, + 'frozen': { + 'actions': { + 'searchable_snapshot': { + 'snapshot_repository': 'old-repo' + } + } + } + } + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = create_versioned_ilm_policy( + mock_client, 'my-policy', policy_body, 'new-repo', '000005' + ) + + # Verify all phases were updated + call_args = mock_client.ilm.put_lifecycle.call_args + policy_arg = call_args[1]['policy'] + assert policy_arg['phases']['cold']['actions']['searchable_snapshot']['snapshot_repository'] == 'new-repo' + assert policy_arg['phases']['frozen']['actions']['searchable_snapshot']['snapshot_repository'] == 'new-repo' + + def test_create_versioned_policy_error(self): + """Test versioned policy creation error""" + mock_client = Mock() + mock_client.ilm.put_lifecycle.side_effect = Exception("Policy creation failed") + policy_body = {'phases': {}} + + with patch('curator.actions.deepfreeze.utilities.logging'): + with pytest.raises(ActionError): + create_versioned_ilm_policy( + mock_client, 'my-policy', policy_body, 'new-repo', '000005' + ) + + +class TestGetPoliciesForRepo(TestCase): + """Test get_policies_for_repo function""" + + def test_get_policies_for_repo_success(self): + """Test successful retrieval of policies for repository""" + mock_client = Mock() + mock_client.ilm.get_lifecycle.return_value = { + 'policy1': { + 'policy': { + 'phases': { + 'cold': { + 'actions': { + 'searchable_snapshot': { + 'snapshot_repository': 'target-repo' + } + } + } + } + } + }, + 'policy2': { + 'policy': { + 'phases': { + 'frozen': { + 'actions': { + 'searchable_snapshot': { + 'snapshot_repository': 'other-repo' + } + } + } + } + } + }, + 'policy3': { + 'policy': { + 'phases': { + 'cold': { + 'actions': { + 'searchable_snapshot': { + 'snapshot_repository': 'target-repo' + } + } + } + } + } + } + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = get_policies_for_repo(mock_client, 'target-repo') + + assert len(result) == 2 + assert 'policy1' in result + assert 'policy3' in result + assert 'policy2' not in result + + def test_get_policies_for_repo_no_matches(self): + """Test get_policies_for_repo with no matches""" + mock_client = Mock() + mock_client.ilm.get_lifecycle.return_value = { + 'policy1': { + 'policy': { + 'phases': { + 'cold': { + 'actions': {} + } + } + } + } + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = get_policies_for_repo(mock_client, 'target-repo') + + assert len(result) == 0 + + +class TestGetPoliciesBySuffix(TestCase): + """Test get_policies_by_suffix function""" + + def test_get_policies_by_suffix_success(self): + """Test successful retrieval of policies by suffix""" + mock_client = Mock() + mock_client.ilm.get_lifecycle.return_value = { + 'my-policy-000003': {'policy': {}}, + 'other-policy-000003': {'policy': {}}, + 'different-policy-000004': {'policy': {}}, + 'my-policy': {'policy': {}} + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = get_policies_by_suffix(mock_client, '000003') + + assert len(result) == 2 + assert 'my-policy-000003' in result + assert 'other-policy-000003' in result + assert 'different-policy-000004' not in result + assert 'my-policy' not in result + + def test_get_policies_by_suffix_no_matches(self): + """Test get_policies_by_suffix with no matches""" + mock_client = Mock() + mock_client.ilm.get_lifecycle.return_value = { + 'policy1': {'policy': {}}, + 'policy2': {'policy': {}} + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = get_policies_by_suffix(mock_client, '000003') + + assert len(result) == 0 + + +class TestIsPolicySafeToDelete(TestCase): + """Test is_policy_safe_to_delete function""" + + def test_policy_safe_to_delete(self): + """Test policy that is safe to delete""" + mock_client = Mock() + mock_client.ilm.get_lifecycle.return_value = { + 'test-policy': { + 'policy': {}, + 'in_use_by': { + 'indices': [], + 'data_streams': [], + 'composable_templates': [] + } + } + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = is_policy_safe_to_delete(mock_client, 'test-policy') + + assert result is True + + def test_policy_in_use_by_indices(self): + """Test policy that is in use by indices""" + mock_client = Mock() + mock_client.ilm.get_lifecycle.return_value = { + 'test-policy': { + 'policy': {}, + 'in_use_by': { + 'indices': ['index1', 'index2'], + 'data_streams': [], + 'composable_templates': [] + } + } + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = is_policy_safe_to_delete(mock_client, 'test-policy') + + assert result is False + + def test_policy_in_use_by_data_streams(self): + """Test policy that is in use by data streams""" + mock_client = Mock() + mock_client.ilm.get_lifecycle.return_value = { + 'test-policy': { + 'policy': {}, + 'in_use_by': { + 'indices': [], + 'data_streams': ['logs-stream'], + 'composable_templates': [] + } + } + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = is_policy_safe_to_delete(mock_client, 'test-policy') + + assert result is False + + def test_policy_in_use_by_templates(self): + """Test policy that is in use by templates""" + mock_client = Mock() + mock_client.ilm.get_lifecycle.return_value = { + 'test-policy': { + 'policy': {}, + 'in_use_by': { + 'indices': [], + 'data_streams': [], + 'composable_templates': ['template1'] + } + } + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = is_policy_safe_to_delete(mock_client, 'test-policy') + + assert result is False + + def test_policy_not_found(self): + """Test policy that doesn't exist""" + mock_client = Mock() + mock_client.ilm.get_lifecycle.return_value = {} + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = is_policy_safe_to_delete(mock_client, 'test-policy') + + assert result is False + + def test_policy_not_found_exception(self): + """Test policy check with NotFoundError""" + mock_client = Mock() + from elasticsearch8 import NotFoundError + mock_client.ilm.get_lifecycle.side_effect = NotFoundError(404, 'not_found', {}) + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = is_policy_safe_to_delete(mock_client, 'test-policy') + + assert result is False + + +class TestGetIndexDatastreamName(TestCase): + """Test get_index_datastream_name function""" + + def test_datastream_from_metadata(self): + """Test extracting data stream name from index metadata""" + mock_client = Mock() + mock_client.indices.get_settings.return_value = { + '.ds-logs-2024.01.01-000001': { + 'settings': { + 'index': { + 'provided_name': '.ds-logs-2024.01.01-000001' + } + } + } + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = get_index_datastream_name(mock_client, '.ds-logs-2024.01.01-000001') + + assert result == 'logs' + + def test_datastream_from_index_name_fallback(self): + """Test extracting data stream name from index name when metadata is missing""" + mock_client = Mock() + mock_client.indices.get_settings.return_value = { + '.ds-metrics-cpu-2024.01.01-000002': { + 'settings': { + 'index': { + # No provided_name - testing fallback to index name + } + } + } + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = get_index_datastream_name(mock_client, '.ds-metrics-cpu-2024.01.01-000002') + + assert result == 'metrics-cpu' + + def test_non_datastream_index(self): + """Test that non-datastream indices return None""" + mock_client = Mock() + mock_client.indices.get_settings.return_value = { + 'regular-index-2024.01.01': { + 'settings': { + 'index': { + 'provided_name': 'regular-index-2024.01.01' + } + } + } + } + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = get_index_datastream_name(mock_client, 'regular-index-2024.01.01') + + assert result is None + + def test_exception_handling(self): + """Test error handling when getting index settings fails""" + mock_client = Mock() + mock_client.indices.get_settings.side_effect = Exception('Connection error') + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = get_index_datastream_name(mock_client, '.ds-logs-2024.01.01-000001') + + assert result is None + + +class TestAddIndexToDatastream(TestCase): + """Test add_index_to_datastream function""" + + def test_add_index_successfully(self): + """Test successfully adding an index to a data stream""" + mock_client = Mock() + mock_client.indices.get_data_stream.return_value = {'data_streams': [{'name': 'logs'}]} + mock_client.indices.modify_data_stream.return_value = {'acknowledged': True} + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = add_index_to_datastream(mock_client, 'logs', '.ds-logs-2024.01.01-000001') + + assert result is True + mock_client.indices.modify_data_stream.assert_called_once_with( + body={ + 'actions': [ + {'add_backing_index': {'data_stream': 'logs', 'index': '.ds-logs-2024.01.01-000001'}} + ] + } + ) + + def test_datastream_not_found(self): + """Test adding index when data stream doesn't exist""" + mock_client = Mock() + from elasticsearch8 import NotFoundError + mock_client.indices.get_data_stream.side_effect = NotFoundError(404, 'not_found', {}) + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = add_index_to_datastream(mock_client, 'logs', '.ds-logs-2024.01.01-000001') + + assert result is False + + def test_add_index_fails(self): + """Test when adding index to data stream fails""" + mock_client = Mock() + mock_client.indices.get_data_stream.return_value = {'data_streams': [{'name': 'logs'}]} + mock_client.indices.modify_data_stream.side_effect = Exception('Failed to modify') + + with patch('curator.actions.deepfreeze.utilities.logging'): + result = add_index_to_datastream(mock_client, 'logs', '.ds-logs-2024.01.01-000001') + + assert result is False diff --git a/tests/unit/test_class_s3client.py b/tests/unit/test_class_s3client.py new file mode 100644 index 00000000..dd419697 --- /dev/null +++ b/tests/unit/test_class_s3client.py @@ -0,0 +1,618 @@ +"""Test S3Client classes""" +from unittest.mock import MagicMock, patch, call +import pytest +from botocore.exceptions import ClientError +from curator.exceptions import ActionError +from curator.s3client import AwsS3Client, S3Client, s3_client_factory + + +class TestS3ClientAbstract: + """Test abstract S3Client class""" + + def test_abstract_methods_not_implemented(self): + """Test that abstract methods raise NotImplementedError""" + # S3Client is abstract, cannot instantiate directly + with pytest.raises(TypeError): + S3Client() + + +class TestAwsS3Client: + """Test AwsS3Client class""" + + def setup_method(self): + """Setup for each test""" + with patch('boto3.client'): + self.s3 = AwsS3Client() + self.s3.client = MagicMock() + + def test_init(self): + """Test AwsS3Client initialization""" + with patch('boto3.client') as mock_boto: + mock_client = MagicMock() + mock_boto.return_value = mock_client + s3 = AwsS3Client() + mock_boto.assert_called_with("s3") + assert s3.loggit is not None + # Verify credential validation call + mock_client.list_buckets.assert_called_once() + + def test_init_invalid_credentials(self): + """Test AwsS3Client initialization with invalid credentials""" + with patch('boto3.client') as mock_boto: + mock_client = MagicMock() + mock_client.list_buckets.side_effect = ClientError( + {"Error": {"Code": "InvalidAccessKeyId"}}, "list_buckets" + ) + mock_boto.return_value = mock_client + + with pytest.raises(ActionError, match="AWS credentials are invalid"): + AwsS3Client() + + def test_init_access_denied(self): + """Test AwsS3Client initialization with insufficient permissions""" + with patch('boto3.client') as mock_boto: + mock_client = MagicMock() + mock_client.list_buckets.side_effect = ClientError( + {"Error": {"Code": "AccessDenied"}}, "list_buckets" + ) + mock_boto.return_value = mock_client + + with pytest.raises(ActionError, match="do not have sufficient permissions"): + AwsS3Client() + + def test_test_connection_success(self): + """Test successful connection test""" + self.s3.client.list_buckets.return_value = {"Buckets": []} + assert self.s3.test_connection() is True + + def test_test_connection_failure(self): + """Test failed connection test""" + self.s3.client.list_buckets.side_effect = ClientError( + {"Error": {"Code": "NetworkError"}}, "list_buckets" + ) + assert self.s3.test_connection() is False + + def test_create_bucket_success_us_east_1(self): + """Test successful bucket creation in us-east-1""" + self.s3.bucket_exists = MagicMock(return_value=False) + self.s3.client.meta.region_name = 'us-east-1' + self.s3.create_bucket("test-bucket") + self.s3.client.create_bucket.assert_called_with(Bucket="test-bucket") + + def test_create_bucket_success_other_region(self): + """Test successful bucket creation in non-us-east-1 region""" + self.s3.bucket_exists = MagicMock(return_value=False) + self.s3.client.meta.region_name = 'us-west-2' + self.s3.create_bucket("test-bucket") + self.s3.client.create_bucket.assert_called_with( + Bucket="test-bucket", + CreateBucketConfiguration={'LocationConstraint': 'us-west-2'} + ) + + def test_create_bucket_success_no_region(self): + """Test successful bucket creation with no region specified""" + self.s3.bucket_exists = MagicMock(return_value=False) + self.s3.client.meta.region_name = None + self.s3.create_bucket("test-bucket") + self.s3.client.create_bucket.assert_called_with(Bucket="test-bucket") + + def test_create_bucket_already_exists(self): + """Test bucket creation when bucket already exists""" + self.s3.bucket_exists = MagicMock(return_value=True) + with pytest.raises(ActionError, match="already exists"): + self.s3.create_bucket("test-bucket") + + def test_create_bucket_client_error(self): + """Test bucket creation with ClientError""" + self.s3.bucket_exists = MagicMock(return_value=False) + self.s3.client.create_bucket.side_effect = ClientError( + {"Error": {"Code": "BucketAlreadyExists"}}, "create_bucket" + ) + with pytest.raises(ActionError): + self.s3.create_bucket("test-bucket") + + def test_bucket_exists_true(self): + """Test bucket_exists returns True when bucket exists""" + self.s3.client.head_bucket.return_value = {} + assert self.s3.bucket_exists("test-bucket") is True + self.s3.client.head_bucket.assert_called_with(Bucket="test-bucket") + + def test_bucket_exists_false(self): + """Test bucket_exists returns False when bucket doesn't exist""" + self.s3.client.head_bucket.side_effect = ClientError( + {"Error": {"Code": "404"}}, "head_bucket" + ) + assert self.s3.bucket_exists("test-bucket") is False + + def test_bucket_exists_other_error(self): + """Test bucket_exists raises ActionError for non-404 errors""" + self.s3.client.head_bucket.side_effect = ClientError( + {"Error": {"Code": "403"}}, "head_bucket" + ) + with pytest.raises(ActionError): + self.s3.bucket_exists("test-bucket") + + def test_thaw_glacier_objects(self): + """Test thawing objects from Glacier with dict metadata""" + # Test with dict metadata (preferred format) + object_keys = [ + {"Key": "base_path/file1", "StorageClass": "GLACIER"}, + {"Key": "base_path/file2", "StorageClass": "GLACIER"} + ] + + self.s3.thaw( + "test-bucket", + "base_path", + object_keys, + 7, + "Standard" + ) + + assert self.s3.client.restore_object.call_count == 2 + self.s3.client.restore_object.assert_any_call( + Bucket="test-bucket", + Key="base_path/file1", + RestoreRequest={ + "Days": 7, + "GlacierJobParameters": {"Tier": "Standard"} + } + ) + + def test_thaw_glacier_objects_string_keys(self): + """Test thawing objects from Glacier with string keys (legacy)""" + self.s3.client.head_object.return_value = {"StorageClass": "GLACIER"} + + self.s3.thaw( + "test-bucket", + "base_path", + ["base_path/file1", "base_path/file2"], + 7, + "Standard" + ) + + assert self.s3.client.restore_object.call_count == 2 + assert self.s3.client.head_object.call_count == 2 + + def test_thaw_deep_archive_objects(self): + """Test thawing objects from Deep Archive""" + self.s3.client.head_object.return_value = {"StorageClass": "DEEP_ARCHIVE"} + + self.s3.thaw( + "test-bucket", + "base_path", + ["base_path/file1"], + 7, + "Expedited" + ) + + self.s3.client.restore_object.assert_called_once_with( + Bucket="test-bucket", + Key="base_path/file1", + RestoreRequest={ + "Days": 7, + "GlacierJobParameters": {"Tier": "Expedited"} + } + ) + + def test_thaw_skip_non_glacier(self): + """Test thaw skips non-Glacier storage classes""" + self.s3.client.head_object.return_value = {"StorageClass": "STANDARD"} + + self.s3.thaw("test-bucket", "base_path", ["base_path/file1"], 7, "Standard") + self.s3.client.restore_object.assert_not_called() + + def test_thaw_skip_wrong_path(self): + """Test thaw skips objects outside base_path""" + self.s3.client.head_object.return_value = {"StorageClass": "GLACIER"} + + self.s3.thaw("test-bucket", "base_path", ["wrong_path/file1"], 7, "Standard") + self.s3.client.restore_object.assert_not_called() + + def test_thaw_exception_handling(self): + """Test thaw handles exceptions gracefully""" + self.s3.client.head_object.side_effect = Exception("Test error") + + # Should not raise, just log the error + self.s3.thaw("test-bucket", "base_path", ["base_path/file1"], 7, "Standard") + self.s3.client.restore_object.assert_not_called() + + def test_refreeze_success(self): + """Test successful refreezing of objects""" + self.s3.client.get_paginator.return_value.paginate.return_value = [ + {"Contents": [ + {"Key": "base_path/file1"}, + {"Key": "base_path/file2"} + ]} + ] + + self.s3.refreeze("test-bucket", "base_path", "GLACIER") + + assert self.s3.client.copy_object.call_count == 2 + self.s3.client.copy_object.assert_any_call( + Bucket="test-bucket", + CopySource={"Bucket": "test-bucket", "Key": "base_path/file1"}, + Key="base_path/file1", + StorageClass="GLACIER" + ) + + def test_refreeze_deep_archive(self): + """Test refreezing to Deep Archive""" + self.s3.client.get_paginator.return_value.paginate.return_value = [ + {"Contents": [{"Key": "base_path/file1"}]} + ] + + self.s3.refreeze("test-bucket", "base_path", "DEEP_ARCHIVE") + + self.s3.client.copy_object.assert_called_with( + Bucket="test-bucket", + CopySource={"Bucket": "test-bucket", "Key": "base_path/file1"}, + Key="base_path/file1", + StorageClass="DEEP_ARCHIVE" + ) + + def test_refreeze_no_contents(self): + """Test refreeze when no contents returned""" + self.s3.client.get_paginator.return_value.paginate.return_value = [{}] + + self.s3.refreeze("test-bucket", "base_path", "GLACIER") + self.s3.client.copy_object.assert_not_called() + + def test_refreeze_exception_handling(self): + """Test refreeze handles exceptions gracefully""" + self.s3.client.get_paginator.return_value.paginate.return_value = [ + {"Contents": [{"Key": "base_path/file1"}]} + ] + self.s3.client.copy_object.side_effect = Exception("Test error") + + # Should not raise, just log the error + self.s3.refreeze("test-bucket", "base_path", "GLACIER") + + def test_list_objects_success(self): + """Test successful listing of objects""" + mock_objects = [ + {"Key": "file1", "Size": 100}, + {"Key": "file2", "Size": 200} + ] + self.s3.client.get_paginator.return_value.paginate.return_value = [ + {"Contents": mock_objects} + ] + + result = self.s3.list_objects("test-bucket", "prefix") + + assert result == mock_objects + self.s3.client.get_paginator.assert_called_with("list_objects_v2") + + def test_list_objects_multiple_pages(self): + """Test listing objects across multiple pages""" + self.s3.client.get_paginator.return_value.paginate.return_value = [ + {"Contents": [{"Key": "file1"}]}, + {"Contents": [{"Key": "file2"}]} + ] + + result = self.s3.list_objects("test-bucket", "prefix") + + assert len(result) == 2 + assert result[0]["Key"] == "file1" + assert result[1]["Key"] == "file2" + + def test_list_objects_no_contents(self): + """Test listing objects when no contents""" + self.s3.client.get_paginator.return_value.paginate.return_value = [{}] + + result = self.s3.list_objects("test-bucket", "prefix") + assert result == [] + + def test_delete_bucket_success(self): + """Test successful bucket deletion""" + self.s3.delete_bucket("test-bucket") + self.s3.client.delete_bucket.assert_called_with(Bucket="test-bucket") + + def test_delete_bucket_with_force(self): + """Test bucket deletion with force=True empties bucket first""" + paginator = MagicMock() + paginator.paginate.return_value = [ + {"Contents": [{"Key": "file1.txt"}, {"Key": "file2.txt"}]} + ] + self.s3.client.get_paginator.return_value = paginator + + self.s3.delete_bucket("test-bucket", force=True) + + # Should list objects + self.s3.client.get_paginator.assert_called_with('list_objects_v2') + paginator.paginate.assert_called_with(Bucket="test-bucket") + + # Should delete objects + self.s3.client.delete_objects.assert_called_once() + call_args = self.s3.client.delete_objects.call_args + assert call_args[1]["Bucket"] == "test-bucket" + assert len(call_args[1]["Delete"]["Objects"]) == 2 + + # Should delete bucket + self.s3.client.delete_bucket.assert_called_with(Bucket="test-bucket") + + def test_delete_bucket_force_empty(self): + """Test bucket deletion with force=True on empty bucket""" + paginator = MagicMock() + paginator.paginate.return_value = [{}] # No Contents + self.s3.client.get_paginator.return_value = paginator + + self.s3.delete_bucket("test-bucket", force=True) + + # Should not call delete_objects + self.s3.client.delete_objects.assert_not_called() + # Should still delete bucket + self.s3.client.delete_bucket.assert_called_with(Bucket="test-bucket") + + def test_delete_bucket_error(self): + """Test bucket deletion error""" + self.s3.client.delete_bucket.side_effect = ClientError( + {"Error": {"Code": "BucketNotEmpty"}}, "delete_bucket" + ) + + with pytest.raises(ActionError): + self.s3.delete_bucket("test-bucket") + + def test_put_object_success(self): + """Test successful object put""" + self.s3.put_object("test-bucket", "key", "body content") + self.s3.client.put_object.assert_called_with( + Bucket="test-bucket", + Key="key", + Body="body content" + ) + + def test_put_object_empty_body(self): + """Test putting object with empty body""" + self.s3.put_object("test-bucket", "key") + self.s3.client.put_object.assert_called_with( + Bucket="test-bucket", + Key="key", + Body="" + ) + + def test_put_object_error(self): + """Test put object error""" + self.s3.client.put_object.side_effect = ClientError( + {"Error": {"Code": "AccessDenied"}}, "put_object" + ) + + with pytest.raises(ActionError): + self.s3.put_object("test-bucket", "key", "body") + + def test_list_buckets_success(self): + """Test successful bucket listing""" + self.s3.client.list_buckets.return_value = { + "Buckets": [ + {"Name": "bucket1"}, + {"Name": "bucket2"}, + {"Name": "test-bucket3"} + ] + } + + result = self.s3.list_buckets() + assert result == ["bucket1", "bucket2", "test-bucket3"] + + def test_list_buckets_with_prefix(self): + """Test bucket listing with prefix filter""" + self.s3.client.list_buckets.return_value = { + "Buckets": [ + {"Name": "bucket1"}, + {"Name": "test-bucket2"}, + {"Name": "test-bucket3"} + ] + } + + result = self.s3.list_buckets(prefix="test-") + assert result == ["test-bucket2", "test-bucket3"] + + def test_list_buckets_empty(self): + """Test listing buckets when none exist""" + self.s3.client.list_buckets.return_value = {"Buckets": []} + + result = self.s3.list_buckets() + assert result == [] + + def test_list_buckets_error(self): + """Test bucket listing error""" + self.s3.client.list_buckets.side_effect = ClientError( + {"Error": {"Code": "AccessDenied"}}, "list_buckets" + ) + + with pytest.raises(ActionError): + self.s3.list_buckets() + + def test_head_object_success(self): + """Test successful head object retrieval""" + mock_response = { + "ContentLength": 1024, + "StorageClass": "GLACIER", + "Restore": "ongoing-request=\"false\", expiry-date=\"Fri, 21 Dec 2025 00:00:00 GMT\"" + } + self.s3.client.head_object.return_value = mock_response + + result = self.s3.head_object("test-bucket", "test-key") + + assert result == mock_response + self.s3.client.head_object.assert_called_with(Bucket="test-bucket", Key="test-key") + + def test_head_object_error(self): + """Test head object error""" + self.s3.client.head_object.side_effect = ClientError( + {"Error": {"Code": "NoSuchKey"}}, "head_object" + ) + + with pytest.raises(ActionError, match="Error getting metadata"): + self.s3.head_object("test-bucket", "test-key") + + def test_copy_object_success(self): + """Test successful object copy""" + self.s3.copy_object( + Bucket="dest-bucket", + Key="dest-key", + CopySource={"Bucket": "src-bucket", "Key": "src-key"}, + StorageClass="STANDARD_IA" + ) + + self.s3.client.copy_object.assert_called_with( + Bucket="dest-bucket", + CopySource={"Bucket": "src-bucket", "Key": "src-key"}, + Key="dest-key", + StorageClass="STANDARD_IA" + ) + + def test_copy_object_default_storage_class(self): + """Test object copy with default storage class""" + self.s3.copy_object( + Bucket="dest-bucket", + Key="dest-key", + CopySource={"Bucket": "src-bucket", "Key": "src-key"} + ) + + self.s3.client.copy_object.assert_called_with( + Bucket="dest-bucket", + CopySource={"Bucket": "src-bucket", "Key": "src-key"}, + Key="dest-key", + StorageClass="GLACIER" + ) + + def test_copy_object_error(self): + """Test object copy error""" + self.s3.client.copy_object.side_effect = ClientError( + {"Error": {"Code": "NoSuchKey"}}, "copy_object" + ) + + with pytest.raises(ActionError): + self.s3.copy_object( + Bucket="dest-bucket", + Key="dest-key", + CopySource={"Bucket": "src-bucket", "Key": "src-key"} + ) + + +class TestS3ClientFactory: + """Test s3_client_factory function""" + + def test_factory_aws(self): + """Test factory returns AwsS3Client for aws provider""" + with patch('boto3.client'): + client = s3_client_factory("aws") + assert isinstance(client, AwsS3Client) + + def test_factory_gcp_not_implemented(self): + """Test factory raises NotImplementedError for gcp provider""" + with pytest.raises(NotImplementedError, match="GCP S3Client is not implemented"): + s3_client_factory("gcp") + + def test_factory_azure_not_implemented(self): + """Test factory raises NotImplementedError for azure provider""" + with pytest.raises(NotImplementedError, match="Azure S3Client is not implemented"): + s3_client_factory("azure") + + def test_factory_unknown_provider(self): + """Test factory raises ValueError for unknown provider""" + with pytest.raises(ValueError, match="Unsupported provider"): + s3_client_factory("unknown") + + +# Legacy tests for backward compatibility +def test_create_bucket(): + s3 = AwsS3Client() + s3.client = MagicMock() + s3.client.meta.region_name = 'us-east-1' # Set region to us-east-1 for simple assertion + s3.bucket_exists = MagicMock(return_value=False) # Mock the method directly + + assert s3.bucket_exists("test-bucket") is False + + s3.create_bucket("test-bucket") + s3.client.create_bucket.assert_called_with(Bucket="test-bucket") + + +def test_create_bucket_error(): + s3 = AwsS3Client() + s3.client = MagicMock() + s3.client.create_bucket.side_effect = ClientError( + {"Error": {"Code": "Error"}}, "create_bucket" + ) + + with pytest.raises(Exception): + s3.create_bucket("test-bucket") + + +def test_thaw(): + s3 = AwsS3Client() + s3.client = MagicMock() + s3.client.head_object.return_value = {"StorageClass": "GLACIER"} + + s3.thaw( + "test-bucket", + "base_path", + ["base_path/file1", "base_path/file2"], + 7, + "Standard", + ) + assert s3.client.restore_object.call_count == 2 + + +def test_thaw_skip_non_glacier(): + s3 = AwsS3Client() + s3.client = MagicMock() + s3.client.head_object.return_value = {"StorageClass": "STANDARD"} + + s3.thaw("test-bucket", "base_path", ["base_path/file1"], 7, "Standard") + s3.client.restore_object.assert_not_called() + + +def test_refreeze(): + s3 = AwsS3Client() + s3.client = MagicMock() + s3.client.get_paginator.return_value.paginate.return_value = [ + {"Contents": [{"Key": "base_path/file1"}]} + ] + + s3.refreeze("test-bucket", "base_path", "GLACIER") + s3.client.copy_object.assert_called_with( + Bucket="test-bucket", + CopySource={"Bucket": "test-bucket", "Key": "base_path/file1"}, + Key="base_path/file1", + StorageClass="GLACIER", + ) + + +def test_s3_client_factory(): + assert isinstance(s3_client_factory("aws"), AwsS3Client) + with pytest.raises(NotImplementedError): + s3_client_factory("gcp") + with pytest.raises(NotImplementedError): + s3_client_factory("azure") + with pytest.raises(ValueError): + s3_client_factory("unknown") + + +def test_s3_client_init(): + with patch("boto3.client") as mock_boto: + s3 = AwsS3Client() + mock_boto.assert_called_with("s3") + + +def test_thaw_invalid_key(): + s3 = AwsS3Client() + s3.client = MagicMock() + s3.client.head_object.return_value = {"StorageClass": "GLACIER"} + + s3.thaw("test-bucket", "base_path", ["wrong_path/file1"], 7, "Standard") + s3.client.restore_object.assert_not_called() + + +def test_refreeze_no_contents(): + s3 = AwsS3Client() + s3.client = MagicMock() + s3.client.get_paginator.return_value.paginate.return_value = [{}] + + s3.refreeze("test-bucket", "base_path", "GLACIER") + s3.client.copy_object.assert_not_called() + + +def test_uniimplemented(): + # S3Client is abstract and cannot be instantiated + with pytest.raises(TypeError): + S3Client() \ No newline at end of file diff --git a/tests/unit/testvars.py b/tests/unit/testvars.py index 63d29ffd..8184d8dd 100644 --- a/tests/unit/testvars.py +++ b/tests/unit/testvars.py @@ -1,450 +1,611 @@ from elasticsearch8 import ConflictError, NotFoundError, TransportError -fake_fail = Exception('Simulated Failure') -four_oh_one = TransportError(401, "simulated error") -four_oh_four = TransportError(404, "simulated error") -get_alias_fail = NotFoundError(404, 'simulated error', 'simulated error') -named_index = 'index_name' -named_indices = [ "index-2015.01.01", "index-2015.02.01" ] -open_index = {'metadata': {'indices' : { named_index : {'state' : 'open'}}}} -closed_index = {'metadata': {'indices' : { named_index : {'state' : 'close'}}}} -cat_open_index = [{'status': 'open'}] -cat_closed_index = [{'status': 'close'}] -open_indices = { 'metadata': { 'indices' : { 'index1' : { 'state' : 'open' }, - 'index2' : { 'state' : 'open' }}}} -closed_indices = { 'metadata': { 'indices' : { 'index1' : { 'state' : 'close' }, - 'index2' : { 'state' : 'close' }}}} -named_alias = 'alias_name' -alias_retval = { "pre_aliased_index": { "aliases" : { named_alias : { }}}} -rollable_alias = { "index-000001": { "aliases" : { named_alias : { }}}} -rollover_conditions = { 'conditions': { 'max_age': '1s' } } +fake_fail = Exception("Simulated Failure") +four_oh_one = TransportError(401, "simulated error") +four_oh_four = TransportError(404, "simulated error") +get_alias_fail = NotFoundError(404, "simulated error", "simulated error") +named_index = "index_name" +named_indices = ["index-2015.01.01", "index-2015.02.01"] +open_index = {"metadata": {"indices": {named_index: {"state": "open"}}}} +closed_index = {"metadata": {"indices": {named_index: {"state": "close"}}}} +cat_open_index = [{"status": "open"}] +cat_closed_index = [{"status": "close"}] +open_indices = { + "metadata": {"indices": {"index1": {"state": "open"}, "index2": {"state": "open"}}} +} +closed_indices = { + "metadata": { + "indices": {"index1": {"state": "close"}, "index2": {"state": "close"}} + } +} +named_alias = "alias_name" +alias_retval = {"pre_aliased_index": {"aliases": {named_alias: {}}}} +rollable_alias = {"index-000001": {"aliases": {named_alias: {}}}} +rollover_conditions = {"conditions": {"max_age": "1s"}} dry_run_rollover = { - "acknowledged": True, - "shards_acknowledged": True, - "old_index": "index-000001", - "new_index": "index-000002", - "rolled_over": False, - "dry_run": True, - "conditions": { - "max_age" : "1s" - } + "acknowledged": True, + "shards_acknowledged": True, + "old_index": "index-000001", + "new_index": "index-000002", + "rolled_over": False, + "dry_run": True, + "conditions": {"max_age": "1s"}, } aliases_retval = { - "index1": { "aliases" : { named_alias : { } } }, - "index2": { "aliases" : { named_alias : { } } }, + "index1": {"aliases": {named_alias: {}}}, + "index2": {"aliases": {named_alias: {}}}, +} +alias_one_add = [{"add": {"alias": "alias", "index": "index_name"}}] +alias_one_add_with_extras = [ + { + "add": { + "alias": "alias", + "index": "index_name", + "filter": {"term": {"user": "kimchy"}}, + } } -alias_one_add = [{'add': {'alias': 'alias', 'index': 'index_name'}}] -alias_one_add_with_extras = [ - { 'add': { - 'alias': 'alias', 'index': 'index_name', - 'filter' : { 'term' : { 'user' : 'kimchy' }} - } - }] -alias_one_rm = [{'remove': {'alias': 'my_alias', 'index': named_index}}] -alias_one_body = { "actions" : [ - {'remove': {'alias': 'alias', 'index': 'index_name'}}, - {'add': {'alias': 'alias', 'index': 'index_name'}} - ]} -alias_two_add = [ - {'add': {'alias': 'alias', 'index': 'index-2016.03.03'}}, - {'add': {'alias': 'alias', 'index': 'index-2016.03.04'}}, - ] -alias_two_rm = [ - {'remove': {'alias': 'my_alias', 'index': 'index-2016.03.03'}}, - {'remove': {'alias': 'my_alias', 'index': 'index-2016.03.04'}}, - ] -alias_success = { "acknowledged": True } -allocation_in = {named_index: {'settings': {'index': {'routing': {'allocation': {'require': {'foo': 'bar'}}}}}}} -allocation_out = {named_index: {'settings': {'index': {'routing': {'allocation': {'require': {'not': 'foo'}}}}}}} -indices_space = { 'indices' : { - 'index1' : { 'index' : { 'primary_size_in_bytes': 1083741824 }}, - 'index2' : { 'index' : { 'primary_size_in_bytes': 1083741824 }}}} -snap_name = 'snap_name' -repo_name = 'repo_name' -test_repo = {repo_name: {'type': 'fs', 'settings': {'compress': 'true', 'location': '/tmp/repos/repo_name'}}} -test_repos = {'TESTING': {'type': 'fs', 'settings': {'compress': 'true', 'location': '/tmp/repos/TESTING'}}, - repo_name: {'type': 'fs', 'settings': {'compress': 'true', 'location': '/rmp/repos/repo_name'}}} -snap_running = { 'snapshots': ['running'] } -nosnap_running = { 'snapshots': [] } -snapshot = { 'snapshots': [ - { - 'duration_in_millis': 60000, 'start_time': '2015-02-01T00:00:00.000Z', - 'shards': {'successful': 4, 'failed': 0, 'total': 4}, - 'end_time_in_millis': 0, 'state': 'SUCCESS', - 'snapshot': snap_name, 'end_time': '2015-02-01T00:00:01.000Z', - 'indices': named_indices, - 'failures': [], 'start_time_in_millis': 1422748800 - }]} -oneinprogress = { 'snapshots': [ - { - 'duration_in_millis': 60000, 'start_time': '2015-03-01T00:00:02.000Z', - 'shards': {'successful': 4, 'failed': 0, 'total': 4}, - 'end_time_in_millis': 0, 'state': 'IN_PROGRESS', - 'snapshot': snap_name, 'end_time': '2015-03-01T00:00:03.000Z', - 'indices': named_indices, - 'failures': [], 'start_time_in_millis': 1425168002 - }]} -partial = { 'snapshots': [ - { - 'duration_in_millis': 60000, 'start_time': '2015-02-01T00:00:00.000Z', - 'shards': {'successful': 4, 'failed': 0, 'total': 4}, - 'end_time_in_millis': 0, 'state': 'PARTIAL', - 'snapshot': snap_name, 'end_time': '2015-02-01T00:00:01.000Z', - 'indices': named_indices, - 'failures': [], 'start_time_in_millis': 1422748800 - }]} -failed = { 'snapshots': [ - { - 'duration_in_millis': 60000, 'start_time': '2015-02-01T00:00:00.000Z', - 'shards': {'successful': 4, 'failed': 0, 'total': 4}, - 'end_time_in_millis': 0, 'state': 'FAILED', - 'snapshot': snap_name, 'end_time': '2015-02-01T00:00:01.000Z', - 'indices': named_indices, - 'failures': [], 'start_time_in_millis': 1422748800 - }]} -othersnap = { 'snapshots': [ - { - 'duration_in_millis': 60000, 'start_time': '2015-02-01T00:00:00.000Z', - 'shards': {'successful': 4, 'failed': 0, 'total': 4}, - 'end_time_in_millis': 0, 'state': 'SOMETHINGELSE', - 'snapshot': snap_name, 'end_time': '2015-02-01T00:00:01.000Z', - 'indices': named_indices, - 'failures': [], 'start_time_in_millis': 1422748800 - }]} -snapshots = { 'snapshots': [ - { - 'duration_in_millis': 60000, 'start_time': '2015-02-01T00:00:00.000Z', - 'shards': {'successful': 4, 'failed': 0, 'total': 4}, - 'end_time_in_millis': 0, 'state': 'SUCCESS', - 'snapshot': snap_name, 'end_time': '2015-02-01T00:00:01.000Z', - 'indices': named_indices, - 'failures': [], 'start_time_in_millis': 1422748800 - }, - { - 'duration_in_millis': 60000, 'start_time': '2015-03-01T00:00:02.000Z', - 'shards': {'successful': 4, 'failed': 0, 'total': 4}, - 'end_time_in_millis': 0, 'state': 'SUCCESS', - 'snapshot': 'snapshot-2015.03.01', 'end_time': '2015-03-01T00:00:03.000Z', - 'indices': named_indices, - 'failures': [], 'start_time_in_millis': 1425168002 - }]} -inprogress = { 'snapshots': [ - { - 'duration_in_millis': 60000, 'start_time': '2015-02-01T00:00:00.000Z', - 'shards': {'successful': 4, 'failed': 0, 'total': 4}, - 'end_time_in_millis': 0, 'state': 'SUCCESS', - 'snapshot': snap_name, 'end_time': '2015-02-01T00:00:01.000Z', - 'indices': named_indices, - 'failures': [], 'start_time_in_millis': 1422748800 - }, - { - 'duration_in_millis': 60000, 'start_time': '2015-03-01T00:00:02.000Z', - 'shards': {'successful': 4, 'failed': 0, 'total': 4}, - 'end_time_in_millis': 0, 'state': 'IN_PROGRESS', - 'snapshot': 'snapshot-2015.03.01', 'end_time': '2015-03-01T00:00:03.000Z', - 'indices': named_indices, - 'failures': [], 'start_time_in_millis': 1425168002 - }]} -highly_unlikely = { 'snapshots': [ - { - 'duration_in_millis': 60000, 'start_time': '2015-02-01T00:00:00.000Z', - 'shards': {'successful': 4, 'failed': 0, 'total': 4}, - 'end_time_in_millis': 0, 'state': 'IN_PROGRESS', - 'snapshot': snap_name, 'end_time': '2015-02-01T00:00:01.000Z', - 'indices': named_indices, - 'failures': [], 'start_time_in_millis': 1422748800 - }, - { - 'duration_in_millis': 60000, 'start_time': '2015-03-01T00:00:02.000Z', - 'shards': {'successful': 4, 'failed': 0, 'total': 4}, - 'end_time_in_millis': 0, 'state': 'IN_PROGRESS', - 'snapshot': 'snapshot-2015.03.01', 'end_time': '2015-03-01T00:00:03.000Z', - 'indices': named_indices, - 'failures': [], 'start_time_in_millis': 1425168002 - }]} -snap_body_all = { - "ignore_unavailable": False, - "include_global_state": True, - "partial": False, - "indices" : "_all" - } -snap_body = { - "ignore_unavailable": False, - "include_global_state": True, - "partial": False, - "indices" : "index-2015.01.01,index-2015.02.01" - } -verified_nodes = {'nodes': {'nodeid1': {'name': 'node1'}, 'nodeid2': {'name': 'node2'}}} -synced_pass = { - "_shards":{"total":1,"successful":1,"failed":0}, - "index_name":{ - "total":1,"successful":1,"failed":0, - "failures":[], - } - } -synced_fail = { - "_shards":{"total":1,"successful":0,"failed":1}, - "index_name":{ - "total":1,"successful":0,"failed":1, - "failures":[ - {"shard":0,"reason":"pending operations","routing":{"state":"STARTED","primary":True,"node":"nodeid1","relocating_node":None,"shard":0,"index":"index_name"}}, - ] - } - } -sync_conflict = ConflictError(409, '{"_shards":{"total":1,"successful":0,"failed":1},"index_name":{"total":1,"successful":0,"failed":1,"failures":[{"shard":0,"reason":"pending operations","routing":{"state":"STARTED","primary":true,"node":"nodeid1","relocating_node":null,"shard":0,"index":"index_name"}}]}})', synced_fail) -synced_fails = { - "_shards":{"total":2,"successful":1,"failed":1}, - "index1":{ - "total":1,"successful":0,"failed":1, - "failures":[ - {"shard":0,"reason":"pending operations","routing":{"state":"STARTED","primary":True,"node":"nodeid1","relocating_node":None,"shard":0,"index":"index_name"}}, - ] - }, - "index2":{ - "total":1,"successful":1,"failed":0, - "failures":[] - }, - } +] +alias_one_rm = [{"remove": {"alias": "my_alias", "index": named_index}}] +alias_one_body = { + "actions": [ + {"remove": {"alias": "alias", "index": "index_name"}}, + {"add": {"alias": "alias", "index": "index_name"}}, + ] +} +alias_two_add = [ + {"add": {"alias": "alias", "index": "index-2016.03.03"}}, + {"add": {"alias": "alias", "index": "index-2016.03.04"}}, +] +alias_two_rm = [ + {"remove": {"alias": "my_alias", "index": "index-2016.03.03"}}, + {"remove": {"alias": "my_alias", "index": "index-2016.03.04"}}, +] +alias_success = {"acknowledged": True} +allocation_in = { + named_index: { + "settings": {"index": {"routing": {"allocation": {"require": {"foo": "bar"}}}}} + } +} +allocation_out = { + named_index: { + "settings": {"index": {"routing": {"allocation": {"require": {"not": "foo"}}}}} + } +} +indices_space = { + "indices": { + "index1": {"index": {"primary_size_in_bytes": 1083741824}}, + "index2": {"index": {"primary_size_in_bytes": 1083741824}}, + } +} +snap_name = "snap_name" +repo_name = "repo_name" +test_repo = { + repo_name: { + "type": "fs", + "settings": {"compress": "true", "location": "/tmp/repos/repo_name"}, + } +} +test_repos = { + "TESTING": { + "type": "fs", + "settings": {"compress": "true", "location": "/tmp/repos/TESTING"}, + }, + repo_name: { + "type": "fs", + "settings": {"compress": "true", "location": "/rmp/repos/repo_name"}, + }, +} +snap_running = {"snapshots": ["running"]} +nosnap_running = {"snapshots": []} +snapshot = { + "snapshots": [ + { + "duration_in_millis": 60000, + "start_time": "2015-02-01T00:00:00.000Z", + "shards": {"successful": 4, "failed": 0, "total": 4}, + "end_time_in_millis": 0, + "state": "SUCCESS", + "snapshot": snap_name, + "end_time": "2015-02-01T00:00:01.000Z", + "indices": named_indices, + "failures": [], + "start_time_in_millis": 1422748800, + } + ] +} +oneinprogress = { + "snapshots": [ + { + "duration_in_millis": 60000, + "start_time": "2015-03-01T00:00:02.000Z", + "shards": {"successful": 4, "failed": 0, "total": 4}, + "end_time_in_millis": 0, + "state": "IN_PROGRESS", + "snapshot": snap_name, + "end_time": "2015-03-01T00:00:03.000Z", + "indices": named_indices, + "failures": [], + "start_time_in_millis": 1425168002, + } + ] +} +partial = { + "snapshots": [ + { + "duration_in_millis": 60000, + "start_time": "2015-02-01T00:00:00.000Z", + "shards": {"successful": 4, "failed": 0, "total": 4}, + "end_time_in_millis": 0, + "state": "PARTIAL", + "snapshot": snap_name, + "end_time": "2015-02-01T00:00:01.000Z", + "indices": named_indices, + "failures": [], + "start_time_in_millis": 1422748800, + } + ] +} +failed = { + "snapshots": [ + { + "duration_in_millis": 60000, + "start_time": "2015-02-01T00:00:00.000Z", + "shards": {"successful": 4, "failed": 0, "total": 4}, + "end_time_in_millis": 0, + "state": "FAILED", + "snapshot": snap_name, + "end_time": "2015-02-01T00:00:01.000Z", + "indices": named_indices, + "failures": [], + "start_time_in_millis": 1422748800, + } + ] +} +othersnap = { + "snapshots": [ + { + "duration_in_millis": 60000, + "start_time": "2015-02-01T00:00:00.000Z", + "shards": {"successful": 4, "failed": 0, "total": 4}, + "end_time_in_millis": 0, + "state": "SOMETHINGELSE", + "snapshot": snap_name, + "end_time": "2015-02-01T00:00:01.000Z", + "indices": named_indices, + "failures": [], + "start_time_in_millis": 1422748800, + } + ] +} +snapshots = { + "snapshots": [ + { + "duration_in_millis": 60000, + "start_time": "2015-02-01T00:00:00.000Z", + "shards": {"successful": 4, "failed": 0, "total": 4}, + "end_time_in_millis": 0, + "state": "SUCCESS", + "snapshot": snap_name, + "end_time": "2015-02-01T00:00:01.000Z", + "indices": named_indices, + "failures": [], + "start_time_in_millis": 1422748800, + }, + { + "duration_in_millis": 60000, + "start_time": "2015-03-01T00:00:02.000Z", + "shards": {"successful": 4, "failed": 0, "total": 4}, + "end_time_in_millis": 0, + "state": "SUCCESS", + "snapshot": "snapshot-2015.03.01", + "end_time": "2015-03-01T00:00:03.000Z", + "indices": named_indices, + "failures": [], + "start_time_in_millis": 1425168002, + }, + ] +} +inprogress = { + "snapshots": [ + { + "duration_in_millis": 60000, + "start_time": "2015-02-01T00:00:00.000Z", + "shards": {"successful": 4, "failed": 0, "total": 4}, + "end_time_in_millis": 0, + "state": "SUCCESS", + "snapshot": snap_name, + "end_time": "2015-02-01T00:00:01.000Z", + "indices": named_indices, + "failures": [], + "start_time_in_millis": 1422748800, + }, + { + "duration_in_millis": 60000, + "start_time": "2015-03-01T00:00:02.000Z", + "shards": {"successful": 4, "failed": 0, "total": 4}, + "end_time_in_millis": 0, + "state": "IN_PROGRESS", + "snapshot": "snapshot-2015.03.01", + "end_time": "2015-03-01T00:00:03.000Z", + "indices": named_indices, + "failures": [], + "start_time_in_millis": 1425168002, + }, + ] +} +highly_unlikely = { + "snapshots": [ + { + "duration_in_millis": 60000, + "start_time": "2015-02-01T00:00:00.000Z", + "shards": {"successful": 4, "failed": 0, "total": 4}, + "end_time_in_millis": 0, + "state": "IN_PROGRESS", + "snapshot": snap_name, + "end_time": "2015-02-01T00:00:01.000Z", + "indices": named_indices, + "failures": [], + "start_time_in_millis": 1422748800, + }, + { + "duration_in_millis": 60000, + "start_time": "2015-03-01T00:00:02.000Z", + "shards": {"successful": 4, "failed": 0, "total": 4}, + "end_time_in_millis": 0, + "state": "IN_PROGRESS", + "snapshot": "snapshot-2015.03.01", + "end_time": "2015-03-01T00:00:03.000Z", + "indices": named_indices, + "failures": [], + "start_time_in_millis": 1425168002, + }, + ] +} +snap_body_all = { + "ignore_unavailable": False, + "include_global_state": True, + "partial": False, + "indices": "_all", +} +snap_body = { + "ignore_unavailable": False, + "include_global_state": True, + "partial": False, + "indices": "index-2015.01.01,index-2015.02.01", +} +verified_nodes = {"nodes": {"nodeid1": {"name": "node1"}, "nodeid2": {"name": "node2"}}} +synced_pass = { + "_shards": {"total": 1, "successful": 1, "failed": 0}, + "index_name": { + "total": 1, + "successful": 1, + "failed": 0, + "failures": [], + }, +} +synced_fail = { + "_shards": {"total": 1, "successful": 0, "failed": 1}, + "index_name": { + "total": 1, + "successful": 0, + "failed": 1, + "failures": [ + { + "shard": 0, + "reason": "pending operations", + "routing": { + "state": "STARTED", + "primary": True, + "node": "nodeid1", + "relocating_node": None, + "shard": 0, + "index": "index_name", + }, + }, + ], + }, +} +sync_conflict = ConflictError( + 409, + '{"_shards":{"total":1,"successful":0,"failed":1},"index_name":{"total":1,"successful":0,"failed":1,"failures":[{"shard":0,"reason":"pending operations","routing":{"state":"STARTED","primary":true,"node":"nodeid1","relocating_node":null,"shard":0,"index":"index_name"}}]}})', + synced_fail, +) +synced_fails = { + "_shards": {"total": 2, "successful": 1, "failed": 1}, + "index1": { + "total": 1, + "successful": 0, + "failed": 1, + "failures": [ + { + "shard": 0, + "reason": "pending operations", + "routing": { + "state": "STARTED", + "primary": True, + "node": "nodeid1", + "relocating_node": None, + "shard": 0, + "index": "index_name", + }, + }, + ], + }, + "index2": {"total": 1, "successful": 1, "failed": 0, "failures": []}, +} -state_one = [{'index': named_index, 'status': 'open'}] +state_one = [{"index": named_index, "status": "open"}] -settings_one = { +settings_one = { named_index: { - 'aliases': ['my_alias'], - 'mappings': {}, - 'settings': { - 'index': { - 'number_of_replicas': '1', 'uuid': 'random_uuid_string_here', - 'number_of_shards': '2', 'creation_date': '1456963200172', - 'routing': {'allocation': {'include': {'tag': 'foo'}}}, - 'version': {'created': '2020099'}, 'refresh_interval': '5s' + "aliases": ["my_alias"], + "mappings": {}, + "settings": { + "index": { + "number_of_replicas": "1", + "uuid": "random_uuid_string_here", + "number_of_shards": "2", + "creation_date": "1456963200172", + "routing": {"allocation": {"include": {"tag": "foo"}}}, + "version": {"created": "2020099"}, + "refresh_interval": "5s", } - } + }, } } -settings_1_get_aliases = { named_index: { "aliases" : { 'my_alias' : { } } } } +settings_1_get_aliases = {named_index: {"aliases": {"my_alias": {}}}} state_two = [ - {'index': 'index-2016.03.03', 'status': 'open'}, - {'index': 'index-2016.03.04', 'status': 'open'} + {"index": "index-2016.03.03", "status": "open"}, + {"index": "index-2016.03.04", "status": "open"}, ] -settings_two = { - 'index-2016.03.03': { - 'aliases': ['my_alias'], - 'mappings': {}, - 'settings': { - 'index': { - 'number_of_replicas': '1', 'uuid': 'random_uuid_string_here', - 'number_of_shards': '5', 'creation_date': '1456963200172', - 'routing': {'allocation': {'include': {'tag': 'foo'}}}, - 'version': {'created': '2020099'}, 'refresh_interval': '5s' +settings_two = { + "index-2016.03.03": { + "aliases": ["my_alias"], + "mappings": {}, + "settings": { + "index": { + "number_of_replicas": "1", + "uuid": "random_uuid_string_here", + "number_of_shards": "5", + "creation_date": "1456963200172", + "routing": {"allocation": {"include": {"tag": "foo"}}}, + "version": {"created": "2020099"}, + "refresh_interval": "5s", } - } + }, }, - 'index-2016.03.04': { - 'aliases': ['my_alias'], - 'mappings': {}, - 'settings': { - 'index': { - 'number_of_replicas': '1', 'uuid': 'another_random_uuid_string', - 'number_of_shards': '5', 'creation_date': '1457049600812', - 'routing': {'allocation': {'include': {'tag': 'bar'}}}, - 'version': {'created': '2020099'}, 'refresh_interval': '5s' + "index-2016.03.04": { + "aliases": ["my_alias"], + "mappings": {}, + "settings": { + "index": { + "number_of_replicas": "1", + "uuid": "another_random_uuid_string", + "number_of_shards": "5", + "creation_date": "1457049600812", + "routing": {"allocation": {"include": {"tag": "bar"}}}, + "version": {"created": "2020099"}, + "refresh_interval": "5s", } - } - } + }, + }, } settings_2_get_aliases = { - "index-2016.03.03": { "aliases" : { 'my_alias' : { } } }, - "index-2016.03.04": { "aliases" : { 'my_alias' : { } } }, + "index-2016.03.03": {"aliases": {"my_alias": {}}}, + "index-2016.03.04": {"aliases": {"my_alias": {}}}, } -state_2_closed = [ - {'index': 'index-2016.03.03', 'status': 'close'}, - {'index': 'index-2016.03.04', 'status': 'open'}, +state_2_closed = [ + {"index": "index-2016.03.03", "status": "close"}, + {"index": "index-2016.03.04", "status": "open"}, ] settings_2_closed = { - 'index-2016.03.03': { - 'aliases': ['my_alias'], - 'mappings': {}, - 'settings': { - 'index': { - 'number_of_replicas': '1', 'uuid': 'random_uuid_string_here', - 'number_of_shards': '5', 'creation_date': '1456963200172', - 'routing': {'allocation': {'include': {'tag': 'foo'}}}, - 'version': {'created': '2020099'}, 'refresh_interval': '5s' + "index-2016.03.03": { + "aliases": ["my_alias"], + "mappings": {}, + "settings": { + "index": { + "number_of_replicas": "1", + "uuid": "random_uuid_string_here", + "number_of_shards": "5", + "creation_date": "1456963200172", + "routing": {"allocation": {"include": {"tag": "foo"}}}, + "version": {"created": "2020099"}, + "refresh_interval": "5s", } - } + }, }, - 'index-2016.03.04': { - 'aliases': ['my_alias'], - 'mappings': {}, - 'settings': { - 'index': { - 'number_of_replicas': '1', 'uuid': 'another_random_uuid_string', - 'number_of_shards': '5', 'creation_date': '1457049600812', - 'routing': {'allocation': {'include': {'tag': 'bar'}}}, - 'version': {'created': '2020099'}, 'refresh_interval': '5s' + "index-2016.03.04": { + "aliases": ["my_alias"], + "mappings": {}, + "settings": { + "index": { + "number_of_replicas": "1", + "uuid": "another_random_uuid_string", + "number_of_shards": "5", + "creation_date": "1457049600812", + "routing": {"allocation": {"include": {"tag": "bar"}}}, + "version": {"created": "2020099"}, + "refresh_interval": "5s", } - } - } + }, + }, } -state_four = [ - {'index': 'a-2016.03.03', 'status': 'open'}, - {'index': 'b-2016.03.04', 'status': 'open'}, - {'index': 'c-2016.03.05', 'status': 'close'}, - {'index': 'd-2016.03.06', 'status': 'open'}, +state_four = [ + {"index": "a-2016.03.03", "status": "open"}, + {"index": "b-2016.03.04", "status": "open"}, + {"index": "c-2016.03.05", "status": "close"}, + {"index": "d-2016.03.06", "status": "open"}, ] -settings_four = { - 'a-2016.03.03': { - 'aliases': ['my_alias'], - 'mappings': {}, - 'settings': { - 'index': { - 'number_of_replicas': '1', 'uuid': 'random_uuid_string_here', - 'number_of_shards': '5', 'creation_date': '1456963200172', - 'routing': {'allocation': {'include': {'tag': 'foo'}}}, - 'version': {'created': '2020099'}, 'refresh_interval': '5s' +settings_four = { + "a-2016.03.03": { + "aliases": ["my_alias"], + "mappings": {}, + "settings": { + "index": { + "number_of_replicas": "1", + "uuid": "random_uuid_string_here", + "number_of_shards": "5", + "creation_date": "1456963200172", + "routing": {"allocation": {"include": {"tag": "foo"}}}, + "version": {"created": "2020099"}, + "refresh_interval": "5s", } - } + }, }, - 'b-2016.03.04': { - 'aliases': ['my_alias'], - 'mappings': {}, - 'settings': { - 'index': { - 'number_of_replicas': '1', 'uuid': 'another_random_uuid_string', - 'number_of_shards': '5', 'creation_date': '1457049600812', - 'routing': {'allocation': {'include': {'tag': 'bar'}}}, - 'version': {'created': '2020099'}, 'refresh_interval': '5s' + "b-2016.03.04": { + "aliases": ["my_alias"], + "mappings": {}, + "settings": { + "index": { + "number_of_replicas": "1", + "uuid": "another_random_uuid_string", + "number_of_shards": "5", + "creation_date": "1457049600812", + "routing": {"allocation": {"include": {"tag": "bar"}}}, + "version": {"created": "2020099"}, + "refresh_interval": "5s", } - } + }, }, - 'c-2016.03.05': { - 'aliases': ['my_alias'], - 'mappings': {}, - 'settings': { - 'index': { - 'number_of_replicas': '1', 'uuid': 'random_uuid_string_here', - 'number_of_shards': '5', 'creation_date': '1457136000933', - 'routing': {'allocation': {'include': {'tag': 'foo'}}}, - 'version': {'created': '2020099'}, 'refresh_interval': '5s' + "c-2016.03.05": { + "aliases": ["my_alias"], + "mappings": {}, + "settings": { + "index": { + "number_of_replicas": "1", + "uuid": "random_uuid_string_here", + "number_of_shards": "5", + "creation_date": "1457136000933", + "routing": {"allocation": {"include": {"tag": "foo"}}}, + "version": {"created": "2020099"}, + "refresh_interval": "5s", } - } + }, }, - 'd-2016.03.06': { - 'aliases': ['my_alias'], - 'mappings': {}, - 'settings': { - 'index': { - 'number_of_replicas': '1', 'uuid': 'another_random_uuid_string', - 'number_of_shards': '5', 'creation_date': '1457222400527', - 'routing': {'allocation': {'include': {'tag': 'bar'}}}, - 'version': {'created': '2020099'}, 'refresh_interval': '5s' + "d-2016.03.06": { + "aliases": ["my_alias"], + "mappings": {}, + "settings": { + "index": { + "number_of_replicas": "1", + "uuid": "another_random_uuid_string", + "number_of_shards": "5", + "creation_date": "1457222400527", + "routing": {"allocation": {"include": {"tag": "bar"}}}, + "version": {"created": "2020099"}, + "refresh_interval": "5s", } - } - } + }, + }, } -state_named = [ - {'index': 'index-2015.01.01', 'status': 'open'}, - {'index': 'index-2015.02.01', 'status': 'open'}, +state_named = [ + {"index": "index-2015.01.01", "status": "open"}, + {"index": "index-2015.02.01", "status": "open"}, ] settings_named = { - 'index-2015.01.01': { - 'aliases': ['my_alias'], - 'mappings': {}, - 'settings': { - 'index': { - 'number_of_replicas': '1', 'uuid': 'random_uuid_string_here', - 'number_of_shards': '5', 'creation_date': '1456963200172', - 'routing': {'allocation': {'include': {'tag': 'foo'}}}, - 'version': {'created': '2020099'}, 'refresh_interval': '5s' + "index-2015.01.01": { + "aliases": ["my_alias"], + "mappings": {}, + "settings": { + "index": { + "number_of_replicas": "1", + "uuid": "random_uuid_string_here", + "number_of_shards": "5", + "creation_date": "1456963200172", + "routing": {"allocation": {"include": {"tag": "foo"}}}, + "version": {"created": "2020099"}, + "refresh_interval": "5s", } - } + }, }, - 'index-2015.02.01': { - 'aliases': ['my_alias'], - 'mappings': {}, - 'settings': { - 'index': { - 'number_of_replicas': '1', 'uuid': 'another_random_uuid_string', - 'number_of_shards': '5', 'creation_date': '1457049600812', - 'routing': {'allocation': {'include': {'tag': 'bar'}}}, - 'version': {'created': '2020099'}, 'refresh_interval': '5s' + "index-2015.02.01": { + "aliases": ["my_alias"], + "mappings": {}, + "settings": { + "index": { + "number_of_replicas": "1", + "uuid": "another_random_uuid_string", + "number_of_shards": "5", + "creation_date": "1457049600812", + "routing": {"allocation": {"include": {"tag": "bar"}}}, + "version": {"created": "2020099"}, + "refresh_interval": "5s", } - } - } + }, + }, } -stats_one = { - 'indices': { - named_index : { - 'total': { - 'docs': {'count': 6374962, 'deleted': 0}, - 'store': {'size_in_bytes': 1115219663, 'throttle_time_in_millis': 0} +stats_one = { + "indices": { + named_index: { + "total": { + "docs": {"count": 6374962, "deleted": 0}, + "store": {"size_in_bytes": 1115219663, "throttle_time_in_millis": 0}, + }, + "primaries": { + "docs": {"count": 3187481, "deleted": 0}, + "store": {"size_in_bytes": 557951789, "throttle_time_in_millis": 0}, }, - 'primaries': { - 'docs': {'count': 3187481, 'deleted': 0}, - 'store': {'size_in_bytes': 557951789, 'throttle_time_in_millis': 0} - } } } } -stats_two = { - 'indices': { - 'index-2016.03.03': { - 'total': { - 'docs': {'count': 6374962, 'deleted': 0}, - 'store': {'size_in_bytes': 1115219663, 'throttle_time_in_millis': 0} +stats_two = { + "indices": { + "index-2016.03.03": { + "total": { + "docs": {"count": 6374962, "deleted": 0}, + "store": {"size_in_bytes": 1115219663, "throttle_time_in_millis": 0}, + }, + "primaries": { + "docs": {"count": 3187481, "deleted": 0}, + "store": {"size_in_bytes": 557951789, "throttle_time_in_millis": 0}, }, - 'primaries': { - 'docs': {'count': 3187481, 'deleted': 0}, - 'store': {'size_in_bytes': 557951789, 'throttle_time_in_millis': 0} - } }, - 'index-2016.03.04': { - 'total': { - 'docs': {'count': 6377544, 'deleted': 0}, - 'store': {'size_in_bytes': 1120891046, 'throttle_time_in_millis': 0} + "index-2016.03.04": { + "total": { + "docs": {"count": 6377544, "deleted": 0}, + "store": {"size_in_bytes": 1120891046, "throttle_time_in_millis": 0}, }, - 'primaries': { - 'docs': {'count': 3188772, 'deleted': 0}, - 'store': {'size_in_bytes': 560677114, 'throttle_time_in_millis': 0} - } - } + "primaries": { + "docs": {"count": 3188772, "deleted": 0}, + "store": {"size_in_bytes": 560677114, "throttle_time_in_millis": 0}, + }, + }, } } -stats_four = { - 'indices': { - 'a-2016.03.03': { - 'total': { - 'docs': {'count': 6374962, 'deleted': 0}, - 'store': {'size_in_bytes': 1115219663, 'throttle_time_in_millis': 0} +stats_four = { + "indices": { + "a-2016.03.03": { + "total": { + "docs": {"count": 6374962, "deleted": 0}, + "store": {"size_in_bytes": 1115219663, "throttle_time_in_millis": 0}, + }, + "primaries": { + "docs": {"count": 3187481, "deleted": 0}, + "store": {"size_in_bytes": 557951789, "throttle_time_in_millis": 0}, }, - 'primaries': { - 'docs': {'count': 3187481, 'deleted': 0}, - 'store': {'size_in_bytes': 557951789, 'throttle_time_in_millis': 0} - } }, - 'b-2016.03.04': { - 'total': { - 'docs': {'count': 6377544, 'deleted': 0}, - 'store': {'size_in_bytes': 1120891046, 'throttle_time_in_millis': 0} + "b-2016.03.04": { + "total": { + "docs": {"count": 6377544, "deleted": 0}, + "store": {"size_in_bytes": 1120891046, "throttle_time_in_millis": 0}, + }, + "primaries": { + "docs": {"count": 3188772, "deleted": 0}, + "store": {"size_in_bytes": 560677114, "throttle_time_in_millis": 0}, }, - 'primaries': { - 'docs': {'count': 3188772, 'deleted': 0}, - 'store': {'size_in_bytes': 560677114, 'throttle_time_in_millis': 0} - } }, # CLOSED, ergo, not present # 'c-2016.03.05': { @@ -457,116 +618,165 @@ # 'store': {'size_in_bytes': 560441083, 'throttle_time_in_millis': 0} # } # }, - 'd-2016.03.06': { - 'total': { - 'docs': {'count': 6266436, 'deleted': 0}, - 'store': {'size_in_bytes': 1120882168, 'throttle_time_in_millis': 0} + "d-2016.03.06": { + "total": { + "docs": {"count": 6266436, "deleted": 0}, + "store": {"size_in_bytes": 1120882168, "throttle_time_in_millis": 0}, }, - 'primaries': { - 'docs': {'count': 3133218, 'deleted': 0}, - 'store': {'size_in_bytes': 560441084, 'throttle_time_in_millis': 0} - } - } - + "primaries": { + "docs": {"count": 3133218, "deleted": 0}, + "store": {"size_in_bytes": 560441084, "throttle_time_in_millis": 0}, + }, + }, } } fieldstats_one = { - 'indices': { - named_index : { - 'fields': { - 'timestamp': { - 'density': 100, - 'min_value_as_string': '2016-03-03T00:00:06.189Z', - 'max_value': 1457049599152, 'max_doc': 415651, - 'min_value': 1456963206189, 'doc_count': 415651, - 'max_value_as_string': '2016-03-03T23:59:59.152Z', - 'sum_total_term_freq': -1, 'sum_doc_freq': 1662604}}}} + "indices": { + named_index: { + "fields": { + "timestamp": { + "density": 100, + "min_value_as_string": "2016-03-03T00:00:06.189Z", + "max_value": 1457049599152, + "max_doc": 415651, + "min_value": 1456963206189, + "doc_count": 415651, + "max_value_as_string": "2016-03-03T23:59:59.152Z", + "sum_total_term_freq": -1, + "sum_doc_freq": 1662604, + } + } + } } +} fieldstats_two = { - 'indices': { - 'index-2016.03.03': { - 'fields': { - 'timestamp': { - 'density': 100, - 'min_value_as_string': '2016-03-03T00:00:06.189Z', - 'max_value': 1457049599152, 'max_doc': 415651, - 'min_value': 1456963206189, 'doc_count': 415651, - 'max_value_as_string': '2016-03-03T23:59:59.152Z', - 'sum_total_term_freq': -1, 'sum_doc_freq': 1662604}}}, - 'index-2016.03.04': { - 'fields': { - 'timestamp': { - 'density': 100, - 'min_value_as_string': '2016-03-04T00:00:00.812Z', - 'max_value': 1457135999223, 'max_doc': 426762, - 'min_value': 1457049600812, 'doc_count': 426762, - 'max_value_as_string': '2016-03-04T23:59:59.223Z', - 'sum_total_term_freq': -1, 'sum_doc_freq': 1673715}}}, + "indices": { + "index-2016.03.03": { + "fields": { + "timestamp": { + "density": 100, + "min_value_as_string": "2016-03-03T00:00:06.189Z", + "max_value": 1457049599152, + "max_doc": 415651, + "min_value": 1456963206189, + "doc_count": 415651, + "max_value_as_string": "2016-03-03T23:59:59.152Z", + "sum_total_term_freq": -1, + "sum_doc_freq": 1662604, + } + } + }, + "index-2016.03.04": { + "fields": { + "timestamp": { + "density": 100, + "min_value_as_string": "2016-03-04T00:00:00.812Z", + "max_value": 1457135999223, + "max_doc": 426762, + "min_value": 1457049600812, + "doc_count": 426762, + "max_value_as_string": "2016-03-04T23:59:59.223Z", + "sum_total_term_freq": -1, + "sum_doc_freq": 1673715, + } + } + }, } } fieldstats_four = { - 'indices': { - 'a-2016.03.03': { - 'fields': { - 'timestamp': { - 'density': 100, - 'min_value_as_string': '2016-03-03T00:00:06.189Z', - 'max_value': 1457049599152, 'max_doc': 415651, - 'min_value': 1456963206189, 'doc_count': 415651, - 'max_value_as_string': '2016-03-03T23:59:59.152Z', - 'sum_total_term_freq': -1, 'sum_doc_freq': 1662604}}}, - 'b-2016.03.04': { - 'fields': { - 'timestamp': { - 'density': 100, - 'min_value_as_string': '2016-03-04T00:00:00.812Z', - 'max_value': 1457135999223, 'max_doc': 426762, - 'min_value': 1457049600812, 'doc_count': 426762, - 'max_value_as_string': '2016-03-04T23:59:59.223Z', - 'sum_total_term_freq': -1, 'sum_doc_freq': 1673715}}}, - 'd-2016.03.06': { - 'fields': { - 'timestamp': { - 'density': 100, - 'min_value_as_string': '2016-03-04T00:00:00.812Z', - 'max_value': 1457308799223, 'max_doc': 426762, - 'min_value': 1457222400567, 'doc_count': 426762, - 'max_value_as_string': '2016-03-04T23:59:59.223Z', - 'sum_total_term_freq': -1, 'sum_doc_freq': 1673715}}}, + "indices": { + "a-2016.03.03": { + "fields": { + "timestamp": { + "density": 100, + "min_value_as_string": "2016-03-03T00:00:06.189Z", + "max_value": 1457049599152, + "max_doc": 415651, + "min_value": 1456963206189, + "doc_count": 415651, + "max_value_as_string": "2016-03-03T23:59:59.152Z", + "sum_total_term_freq": -1, + "sum_doc_freq": 1662604, + } + } + }, + "b-2016.03.04": { + "fields": { + "timestamp": { + "density": 100, + "min_value_as_string": "2016-03-04T00:00:00.812Z", + "max_value": 1457135999223, + "max_doc": 426762, + "min_value": 1457049600812, + "doc_count": 426762, + "max_value_as_string": "2016-03-04T23:59:59.223Z", + "sum_total_term_freq": -1, + "sum_doc_freq": 1673715, + } + } + }, + "d-2016.03.06": { + "fields": { + "timestamp": { + "density": 100, + "min_value_as_string": "2016-03-04T00:00:00.812Z", + "max_value": 1457308799223, + "max_doc": 426762, + "min_value": 1457222400567, + "doc_count": 426762, + "max_value_as_string": "2016-03-04T23:59:59.223Z", + "sum_total_term_freq": -1, + "sum_doc_freq": 1673715, + } + } + }, } } fieldstats_query = { - 'aggregations': { - 'min' : { - 'value_as_string': '2016-03-03T00:00:06.189Z', - 'value': 1456963206189, - }, - 'max' : { - 'value': 1457049599152, - 'value_as_string': '2016-03-03T23:59:59.152Z', - } + "aggregations": { + "min": { + "value_as_string": "2016-03-03T00:00:06.189Z", + "value": 1456963206189, + }, + "max": { + "value": 1457049599152, + "value_as_string": "2016-03-03T23:59:59.152Z", + }, } } -shards = { 'indices': { named_index: { 'shards': { - '0': [ { 'num_search_segments' : 15 }, { 'num_search_segments' : 21 } ], - '1': [ { 'num_search_segments' : 19 }, { 'num_search_segments' : 16 } ] }}}} -fm_shards = { 'indices': { named_index: { 'shards': { - '0': [ { 'num_search_segments' : 1 }, { 'num_search_segments' : 1 } ], - '1': [ { 'num_search_segments' : 1 }, { 'num_search_segments' : 1 } ] }}}} +shards = { + "indices": { + named_index: { + "shards": { + "0": [{"num_search_segments": 15}, {"num_search_segments": 21}], + "1": [{"num_search_segments": 19}, {"num_search_segments": 16}], + } + } + } +} +fm_shards = { + "indices": { + named_index: { + "shards": { + "0": [{"num_search_segments": 1}, {"num_search_segments": 1}], + "1": [{"num_search_segments": 1}, {"num_search_segments": 1}], + } + } + } +} -loginfo = { "loglevel": "INFO", - "logfile": None, - "logformat": "default" - } -default_format = '%(asctime)s %(levelname)-9s %(message)s' -debug_format = '%(asctime)s %(levelname)-9s %(name)22s %(funcName)22s:%(lineno)-4d %(message)s' +loginfo = {"loglevel": "INFO", "logfile": None, "logformat": "default"} +default_format = "%(asctime)s %(levelname)-9s %(message)s" +debug_format = ( + "%(asctime)s %(levelname)-9s %(name)22s %(funcName)22s:%(lineno)-4d %(message)s" +) -yamlconfig = ''' +yamlconfig = """ --- # Remember, leave a key empty to use the default value. None will be a string, # not a Python "NoneType" @@ -585,8 +795,8 @@ logfile: logformat: default quiet: False -''' -pattern_ft = ''' +""" +pattern_ft = """ --- actions: 1: @@ -600,8 +810,8 @@ kind: prefix value: a exclude: False -''' -age_ft = ''' +""" +age_ft = """ --- actions: 1: @@ -618,8 +828,8 @@ unit: seconds unit_count: 0 epoch: 1456963201 -''' -space_ft = ''' +""" +space_ft = """ --- actions: 1: @@ -634,8 +844,8 @@ source: name use_age: True timestring: '%Y.%m.%d' -''' -forcemerge_ft = ''' +""" +forcemerge_ft = """ --- actions: 1: @@ -647,8 +857,8 @@ filters: - filtertype: forcemerged max_num_segments: 2 -''' -allocated_ft = ''' +""" +allocated_ft = """ --- actions: 1: @@ -662,8 +872,8 @@ key: tag value: foo allocation_type: include -''' -kibana_ft = ''' +""" +kibana_ft = """ --- actions: 1: @@ -674,8 +884,8 @@ disable_action: False filters: - filtertype: kibana -''' -opened_ft = ''' +""" +opened_ft = """ --- actions: 1: @@ -686,8 +896,8 @@ disable_action: False filters: - filtertype: opened -''' -closed_ft = ''' +""" +closed_ft = """ --- actions: 1: @@ -698,8 +908,8 @@ disable_action: False filters: - filtertype: closed -''' -none_ft = ''' +""" +none_ft = """ --- actions: 1: @@ -710,8 +920,8 @@ disable_action: False filters: - filtertype: none -''' -invalid_ft = ''' +""" +invalid_ft = """ --- actions: 1: @@ -722,8 +932,8 @@ disable_action: False filters: - filtertype: sir_not_appearing_in_this_film -''' -snap_age_ft = ''' +""" +snap_age_ft = """ --- actions: 1: @@ -737,8 +947,8 @@ direction: older unit: days unit_count: 1 -''' -snap_pattern_ft= ''' +""" +snap_pattern_ft = """ --- actions: 1: @@ -751,8 +961,8 @@ - filtertype: pattern kind: prefix value: sna -''' -snap_none_ft = ''' +""" +snap_none_ft = """ --- actions: 1: @@ -763,8 +973,8 @@ disable_action: False filters: - filtertype: none -''' -size_ft = ''' +""" +size_ft = """ --- actions: 1: @@ -778,28 +988,836 @@ size_threshold: 1.04 size_behavior: total threshold_behavior: less_than -''' +""" -generic_task = {'task': 'I0ekFjMhSPCQz7FUs1zJOg:54510686'} -incomplete_task = {'completed': False, 'task': {'node': 'I0ekFjMhSPCQz7FUs1zJOg', 'status': {'retries': {'bulk': 0, 'search': 0}, 'updated': 0, 'batches': 3647, 'throttled_until_millis': 0, 'throttled_millis': 0, 'noops': 0, 'created': 3646581, 'deleted': 0, 'requests_per_second': -1.0, 'version_conflicts': 0, 'total': 3646581}, 'description': 'UNIT TEST', 'running_time_in_nanos': 1637039537721, 'cancellable': True, 'action': 'indices:data/write/reindex', 'type': 'transport', 'id': 54510686, 'start_time_in_millis': 1489695981997}, 'response': {'retries': {'bulk': 0, 'search': 0}, 'updated': 0, 'batches': 3647, 'throttled_until_millis': 0, 'throttled_millis': 0, 'noops': 0, 'created': 3646581, 'deleted': 0, 'took': 1636917, 'requests_per_second': -1.0, 'timed_out': False, 'failures': [], 'version_conflicts': 0, 'total': 3646581}} -completed_task = {'completed': True, 'task': {'node': 'I0ekFjMhSPCQz7FUs1zJOg', 'status': {'retries': {'bulk': 0, 'search': 0}, 'updated': 0, 'batches': 3647, 'throttled_until_millis': 0, 'throttled_millis': 0, 'noops': 0, 'created': 3646581, 'deleted': 0, 'requests_per_second': -1.0, 'version_conflicts': 0, 'total': 3646581}, 'description': 'UNIT TEST', 'running_time_in_nanos': 1637039537721, 'cancellable': True, 'action': 'indices:data/write/reindex', 'type': 'transport', 'id': 54510686, 'start_time_in_millis': 1489695981997}, 'response': {'retries': {'bulk': 0, 'search': 0}, 'updated': 0, 'batches': 3647, 'throttled_until_millis': 0, 'throttled_millis': 0, 'noops': 0, 'created': 3646581, 'deleted': 0, 'took': 1636917, 'requests_per_second': -1.0, 'timed_out': False, 'failures': [], 'version_conflicts': 0, 'total': 3646581}} -completed_task_zero_total = {'completed': True, 'task': {'node': 'I0ekFjMhSPCQz7FUs1zJOg', 'status': {'retries': {'bulk': 0, 'search': 0}, 'updated': 0, 'batches': 0, 'throttled_until_millis': 0, 'throttled_millis': 0, 'noops': 0, 'created': 0, 'deleted': 0, 'requests_per_second': -1.0, 'version_conflicts': 0, 'total': 0}, 'description': 'UNIT TEST', 'running_time_in_nanos': 1637039537721, 'cancellable': True, 'action': 'indices:data/write/reindex', 'type': 'transport', 'id': 54510686, 'start_time_in_millis': 1489695981997}, 'response': {'retries': {'bulk': 0, 'search': 0}, 'updated': 0, 'batches': 0, 'throttled_until_millis': 0, 'throttled_millis': 0, 'noops': 0, 'created': 0, 'deleted': 0, 'took': 1636917, 'requests_per_second': -1.0, 'timed_out': False, 'failures': [], 'version_conflicts': 0, 'total': 0}} -recovery_output = {'index-2015.01.01': {'shards' : [{'stage':'DONE'}]}, 'index-2015.02.01': {'shards' : [{'stage':'DONE'}]}} -unrecovered_output = {'index-2015.01.01': {'shards' : [{'stage':'INDEX'}]}, 'index-2015.02.01': {'shards' : [{'stage':'INDEX'}]}} -cluster_health = { "cluster_name": "unit_test", "status": "green", "timed_out": False, "number_of_nodes": 7, "number_of_data_nodes": 3, "active_primary_shards": 235, "active_shards": 471, "relocating_shards": 0, "initializing_shards": 0, "unassigned_shards": 0, "delayed_unassigned_shards": 0, "number_of_pending_tasks": 0, "task_max_waiting_in_queue_millis": 0, "active_shards_percent_as_number": 100} -reindex_basic = { 'source': { 'index': named_index }, 'dest': { 'index': 'other_index' } } -reindex_replace = { 'source': { 'index': 'REINDEX_SELECTION' }, 'dest': { 'index': 'other_index' } } -reindex_migration = { 'source': { 'index': named_index }, 'dest': { 'index': 'MIGRATION' } } -index_list_966 = ['indexv0.2_2017-02-12_536a9247f9fa4fc7a7942ad46ea14e0d'] -recovery_966 = {'indexv0.2_2017-02-12_536a9247f9fa4fc7a7942ad46ea14e0d': {'shards': [{'total_time': '10.1m', 'index': {'files': {'reused': 0, 'total': 15, 'percent': '100.0%', 'recovered': 15}, 'total_time': '10.1m', 'target_throttle_time': '-1', 'total_time_in_millis': 606577, 'source_throttle_time_in_millis': 0, 'source_throttle_time': '-1', 'target_throttle_time_in_millis': 0, 'size': {'recovered_in_bytes': 3171596177, 'reused': '0b', 'total_in_bytes': 3171596177, 'percent': '100.0%', 'reused_in_bytes': 0, 'total': '2.9gb', 'recovered': '2.9gb'}}, 'verify_index': {'total_time': '0s', 'total_time_in_millis': 0, 'check_index_time_in_millis': 0, 'check_index_time': '0s'}, 'target': {'ip': 'x.x.x.7', 'host': 'x.x.x.7', 'transport_address': 'x.x.x.7:9300', 'id': 'K4xQPaOFSWSPLwhb0P47aQ', 'name': 'staging-es5-forcem'}, 'source': {'index': 'indexv0.2_2017-02-12_536a9247f9fa4fc7a7942ad46ea14e0d', 'version': '5.1.1', 'snapshot': 'force-merge', 'repository': 'force-merge'}, 'translog': {'total_time': '45ms', 'percent': '100.0%', 'total_time_in_millis': 45, 'total_on_start': 0, 'total': 0, 'recovered': 0}, 'start_time': '2017-05-16T11:54:48.183Z', 'primary': True, 'total_time_in_millis': 606631, 'stop_time_in_millis': 1494936294815, 'stop_time': '2017-05-16T12:04:54.815Z', 'stage': 'DONE', 'type': 'SNAPSHOT', 'id': 1, 'start_time_in_millis': 1494935688183}, {'total_time': '10m', 'index': {'files': {'reused': 0, 'total': 15, 'percent': '100.0%', 'recovered': 15}, 'total_time': '10m', 'target_throttle_time': '-1', 'total_time_in_millis': 602302, 'source_throttle_time_in_millis': 0, 'source_throttle_time': '-1', 'target_throttle_time_in_millis': 0, 'size': {'recovered_in_bytes': 3162299781, 'reused': '0b', 'total_in_bytes': 3162299781, 'percent': '100.0%', 'reused_in_bytes': 0, 'total': '2.9gb', 'recovered': '2.9gb'}}, 'verify_index': {'total_time': '0s', 'total_time_in_millis': 0, 'check_index_time_in_millis': 0, 'check_index_time': '0s'}, 'target': {'ip': 'x.x.x.7', 'host': 'x.x.x.7', 'transport_address': 'x.x.x.7:9300', 'id': 'K4xQPaOFSWSPLwhb0P47aQ', 'name': 'staging-es5-forcem'}, 'source': {'index': 'indexv0.2_2017-02-12_536a9247f9fa4fc7a7942ad46ea14e0d', 'version': '5.1.1', 'snapshot': 'force-merge', 'repository': 'force-merge'}, 'translog': {'total_time': '389ms', 'percent': '100.0%', 'total_time_in_millis': 389, 'total_on_start': 0, 'total': 0, 'recovered': 0}, 'start_time': '2017-05-16T12:04:51.606Z', 'primary': True, 'total_time_in_millis': 602698, 'stop_time_in_millis': 1494936894305, 'stop_time': '2017-05-16T12:14:54.305Z', 'stage': 'DONE', 'type': 'SNAPSHOT', 'id': 5, 'start_time_in_millis': 1494936291606}, {'total_time': '10.1m', 'index': {'files': {'reused': 0, 'total': 15, 'percent': '100.0%', 'recovered': 15}, 'total_time': '10.1m', 'target_throttle_time': '-1', 'total_time_in_millis': 606692, 'source_throttle_time_in_millis': 0, 'source_throttle_time': '-1', 'target_throttle_time_in_millis': 0, 'size': {'recovered_in_bytes': 3156050994, 'reused': '0b', 'total_in_bytes': 3156050994, 'percent': '100.0%', 'reused_in_bytes': 0, 'total': '2.9gb', 'recovered': '2.9gb'}}, 'verify_index': {'total_time': '0s', 'total_time_in_millis': 0, 'check_index_time_in_millis': 0, 'check_index_time': '0s'}, 'target': {'ip': 'x.x.x.7', 'host': 'x.x.x.7', 'transport_address': 'x.x.x.7:9300', 'id': 'K4xQPaOFSWSPLwhb0P47aQ', 'name': 'staging-es5-forcem'}, 'source': {'index': 'indexv0.2_2017-02-12_536a9247f9fa4fc7a7942ad46ea14e0d', 'version': '5.1.1', 'snapshot': 'force-merge', 'repository': 'force-merge'}, 'translog': {'total_time': '38ms', 'percent': '100.0%', 'total_time_in_millis': 38, 'total_on_start': 0, 'total': 0, 'recovered': 0}, 'start_time': '2017-05-16T11:54:48.166Z', 'primary': True, 'total_time_in_millis': 606737, 'stop_time_in_millis': 1494936294904, 'stop_time': '2017-05-16T12:04:54.904Z', 'stage': 'DONE', 'type': 'SNAPSHOT', 'id': 3, 'start_time_in_millis': 1494935688166}, {'total_time': '10m', 'index': {'files': {'reused': 0, 'total': 15, 'percent': '100.0%', 'recovered': 15}, 'total_time': '10m', 'target_throttle_time': '-1', 'total_time_in_millis': 602010, 'source_throttle_time_in_millis': 0, 'source_throttle_time': '-1', 'target_throttle_time_in_millis': 0, 'size': {'recovered_in_bytes': 3153017440, 'reused': '0b', 'total_in_bytes': 3153017440, 'percent': '100.0%', 'reused_in_bytes': 0, 'total': '2.9gb', 'recovered': '2.9gb'}}, 'verify_index': {'total_time': '0s', 'total_time_in_millis': 0, 'check_index_time_in_millis': 0, 'check_index_time': '0s'}, 'target': {'ip': 'x.x.x.7', 'host': 'x.x.x.7', 'transport_address': 'x.x.x.7:9300', 'id': 'K4xQPaOFSWSPLwhb0P47aQ', 'name': 'staging-es5-forcem'}, 'source': {'index': 'indexv0.2_2017-02-12_536a9247f9fa4fc7a7942ad46ea14e0d', 'version': '5.1.1', 'snapshot': 'force-merge', 'repository': 'force-merge'}, 'translog': {'total_time': '558ms', 'percent': '100.0%', 'total_time_in_millis': 558, 'total_on_start': 0, 'total': 0, 'recovered': 0}, 'start_time': '2017-05-16T12:04:51.369Z', 'primary': True, 'total_time_in_millis': 602575, 'stop_time_in_millis': 1494936893944, 'stop_time': '2017-05-16T12:14:53.944Z', 'stage': 'DONE', 'type': 'SNAPSHOT', 'id': 4, 'start_time_in_millis': 1494936291369}, {'total_time': '10m', 'index': {'files': {'reused': 0, 'total': 15, 'percent': '100.0%', 'recovered': 15}, 'total_time': '10m', 'target_throttle_time': '-1', 'total_time_in_millis': 600492, 'source_throttle_time_in_millis': 0, 'source_throttle_time': '-1', 'target_throttle_time_in_millis': 0, 'size': {'recovered_in_bytes': 3153347402, 'reused': '0b', 'total_in_bytes': 3153347402, 'percent': '100.0%', 'reused_in_bytes': 0, 'total': '2.9gb', 'recovered': '2.9gb'}}, 'verify_index': {'total_time': '0s', 'total_time_in_millis': 0, 'check_index_time_in_millis': 0, 'check_index_time': '0s'}, 'target': {'ip': 'x.x.x.7', 'host': 'x.x.x.7', 'transport_address': 'x.x.x.7:9300', 'id': 'K4xQPaOFSWSPLwhb0P47aQ', 'name': 'staging-es5-forcem'}, 'source': {'index': 'indexv0.2_2017-02-12_536a9247f9fa4fc7a7942ad46ea14e0d', 'version': '5.1.1', 'snapshot': 'force-merge', 'repository': 'force-merge'}, 'translog': {'total_time': '445ms', 'percent': '100.0%', 'total_time_in_millis': 445, 'total_on_start': 0, 'total': 0, 'recovered': 0}, 'start_time': '2017-05-16T12:04:54.817Z', 'primary': True, 'total_time_in_millis': 600946, 'stop_time_in_millis': 1494936895764, 'stop_time': '2017-05-16T12:14:55.764Z', 'stage': 'DONE', 'type': 'SNAPSHOT', 'id': 6, 'start_time_in_millis': 1494936294817}, {'total_time': '10m', 'index': {'files': {'reused': 0, 'total': 15, 'percent': '100.0%', 'recovered': 15}, 'total_time': '10m', 'target_throttle_time': '-1', 'total_time_in_millis': 603194, 'source_throttle_time_in_millis': 0, 'source_throttle_time': '-1', 'target_throttle_time_in_millis': 0, 'size': {'recovered_in_bytes': 3148003580, 'reused': '0b', 'total_in_bytes': 3148003580, 'percent': '100.0%', 'reused_in_bytes': 0, 'total': '2.9gb', 'recovered': '2.9gb'}}, 'verify_index': {'total_time': '0s', 'total_time_in_millis': 0, 'check_index_time_in_millis': 0, 'check_index_time': '0s'}, 'target': {'ip': 'x.x.x.7', 'host': 'x.x.x.7', 'transport_address': 'x.x.x.7:9300', 'id': 'K4xQPaOFSWSPLwhb0P47aQ', 'name': 'staging-es5-forcem'}, 'source': {'index': 'indexv0.2_2017-02-12_536a9247f9fa4fc7a7942ad46ea14e0d', 'version': '5.1.1', 'snapshot': 'force-merge', 'repository': 'force-merge'}, 'translog': {'total_time': '225ms', 'percent': '100.0%', 'total_time_in_millis': 225, 'total_on_start': 0, 'total': 0, 'recovered': 0}, 'start_time': '2017-05-16T11:54:48.173Z', 'primary': True, 'total_time_in_millis': 603429, 'stop_time_in_millis': 1494936291602, 'stop_time': '2017-05-16T12:04:51.602Z', 'stage': 'DONE', 'type': 'SNAPSHOT', 'id': 2, 'start_time_in_millis': 1494935688173}, {'total_time': '10m', 'index': {'files': {'reused': 0, 'total': 15, 'percent': '100.0%', 'recovered': 15}, 'total_time': '10m', 'target_throttle_time': '-1', 'total_time_in_millis': 601453, 'source_throttle_time_in_millis': 0, 'source_throttle_time': '-1', 'target_throttle_time_in_millis': 0, 'size': {'recovered_in_bytes': 3168132171, 'reused': '0b', 'total_in_bytes': 3168132171, 'percent': '100.0%', 'reused_in_bytes': 0, 'total': '2.9gb', 'recovered': '2.9gb'}}, 'verify_index': {'total_time': '0s', 'total_time_in_millis': 0, 'check_index_time_in_millis': 0, 'check_index_time': '0s'}, 'target': {'ip': 'x.x.x.7', 'host': 'x.x.x.7', 'transport_address': 'x.x.x.7:9300', 'id': 'K4xQPaOFSWSPLwhb0P47aQ', 'name': 'staging-es5-forcem'}, 'source': {'index': 'indexv0.2_2017-02-12_536a9247f9fa4fc7a7942ad46ea14e0d', 'version': '5.1.1', 'snapshot': 'force-merge', 'repository': 'force-merge'}, 'translog': {'total_time': '43ms', 'percent': '100.0%', 'total_time_in_millis': 43, 'total_on_start': 0, 'total': 0, 'recovered': 0}, 'start_time': '2017-05-16T12:04:54.905Z', 'primary': True, 'total_time_in_millis': 601503, 'stop_time_in_millis': 1494936896408, 'stop_time': '2017-05-16T12:14:56.408Z', 'stage': 'DONE', 'type': 'SNAPSHOT', 'id': 7, 'start_time_in_millis': 1494936294905}, {'total_time': '10m', 'index': {'files': {'reused': 0, 'total': 15, 'percent': '100.0%', 'recovered': 15}, 'total_time': '10m', 'target_throttle_time': '-1', 'total_time_in_millis': 602897, 'source_throttle_time_in_millis': 0, 'source_throttle_time': '-1', 'target_throttle_time_in_millis': 0, 'size': {'recovered_in_bytes': 3153750393, 'reused': '0b', 'total_in_bytes': 3153750393, 'percent': '100.0%', 'reused_in_bytes': 0, 'total': '2.9gb', 'recovered': '2.9gb'}}, 'verify_index': {'total_time': '0s', 'total_time_in_millis': 0, 'check_index_time_in_millis': 0, 'check_index_time': '0s'}, 'target': {'ip': 'x.x.x.7', 'host': 'x.x.x.7', 'transport_address': 'x.x.x.7:9300', 'id': 'K4xQPaOFSWSPLwhb0P47aQ', 'name': 'staging-es5-forcem'}, 'source': {'index': 'indexv0.2_2017-02-12_536a9247f9fa4fc7a7942ad46ea14e0d', 'version': '5.1.1', 'snapshot': 'force-merge', 'repository': 'force-merge'}, 'translog': {'total_time': '271ms', 'percent': '100.0%', 'total_time_in_millis': 271, 'total_on_start': 0, 'total': 0, 'recovered': 0}, 'start_time': '2017-05-16T11:54:48.191Z', 'primary': True, 'total_time_in_millis': 603174, 'stop_time_in_millis': 1494936291366, 'stop_time': '2017-05-16T12:04:51.366Z', 'stage': 'DONE', 'type': 'SNAPSHOT', 'id': 0, 'start_time_in_millis': 1494935688191}]}} -no_snap_tasks = {'nodes': {'node1': {'tasks': {'task1': {'action': 'cluster:monitor/tasks/lists[n]'}}}}} -snap_task = {'nodes': {'node1': {'tasks': {'task1': {'action': 'cluster:admin/snapshot/delete'}}}}} -watermark_persistent = {'persistent':{'cluster':{'routing':{'allocation':{'disk':{'watermark':{'low':'11%','high':'60gb'}}}}}}} -watermark_transient = {'transient':{'cluster':{'routing':{'allocation':{'disk':{'watermark':{'low':'9%','high':'50gb'}}}}}}} +generic_task = {"task": "I0ekFjMhSPCQz7FUs1zJOg:54510686"} +incomplete_task = { + "completed": False, + "task": { + "node": "I0ekFjMhSPCQz7FUs1zJOg", + "status": { + "retries": {"bulk": 0, "search": 0}, + "updated": 0, + "batches": 3647, + "throttled_until_millis": 0, + "throttled_millis": 0, + "noops": 0, + "created": 3646581, + "deleted": 0, + "requests_per_second": -1.0, + "version_conflicts": 0, + "total": 3646581, + }, + "description": "UNIT TEST", + "running_time_in_nanos": 1637039537721, + "cancellable": True, + "action": "indices:data/write/reindex", + "type": "transport", + "id": 54510686, + "start_time_in_millis": 1489695981997, + }, + "response": { + "retries": {"bulk": 0, "search": 0}, + "updated": 0, + "batches": 3647, + "throttled_until_millis": 0, + "throttled_millis": 0, + "noops": 0, + "created": 3646581, + "deleted": 0, + "took": 1636917, + "requests_per_second": -1.0, + "timed_out": False, + "failures": [], + "version_conflicts": 0, + "total": 3646581, + }, +} +completed_task = { + "completed": True, + "task": { + "node": "I0ekFjMhSPCQz7FUs1zJOg", + "status": { + "retries": {"bulk": 0, "search": 0}, + "updated": 0, + "batches": 3647, + "throttled_until_millis": 0, + "throttled_millis": 0, + "noops": 0, + "created": 3646581, + "deleted": 0, + "requests_per_second": -1.0, + "version_conflicts": 0, + "total": 3646581, + }, + "description": "UNIT TEST", + "running_time_in_nanos": 1637039537721, + "cancellable": True, + "action": "indices:data/write/reindex", + "type": "transport", + "id": 54510686, + "start_time_in_millis": 1489695981997, + }, + "response": { + "retries": {"bulk": 0, "search": 0}, + "updated": 0, + "batches": 3647, + "throttled_until_millis": 0, + "throttled_millis": 0, + "noops": 0, + "created": 3646581, + "deleted": 0, + "took": 1636917, + "requests_per_second": -1.0, + "timed_out": False, + "failures": [], + "version_conflicts": 0, + "total": 3646581, + }, +} +completed_task_zero_total = { + "completed": True, + "task": { + "node": "I0ekFjMhSPCQz7FUs1zJOg", + "status": { + "retries": {"bulk": 0, "search": 0}, + "updated": 0, + "batches": 0, + "throttled_until_millis": 0, + "throttled_millis": 0, + "noops": 0, + "created": 0, + "deleted": 0, + "requests_per_second": -1.0, + "version_conflicts": 0, + "total": 0, + }, + "description": "UNIT TEST", + "running_time_in_nanos": 1637039537721, + "cancellable": True, + "action": "indices:data/write/reindex", + "type": "transport", + "id": 54510686, + "start_time_in_millis": 1489695981997, + }, + "response": { + "retries": {"bulk": 0, "search": 0}, + "updated": 0, + "batches": 0, + "throttled_until_millis": 0, + "throttled_millis": 0, + "noops": 0, + "created": 0, + "deleted": 0, + "took": 1636917, + "requests_per_second": -1.0, + "timed_out": False, + "failures": [], + "version_conflicts": 0, + "total": 0, + }, +} +recovery_output = { + "index-2015.01.01": {"shards": [{"stage": "DONE"}]}, + "index-2015.02.01": {"shards": [{"stage": "DONE"}]}, +} +unrecovered_output = { + "index-2015.01.01": {"shards": [{"stage": "INDEX"}]}, + "index-2015.02.01": {"shards": [{"stage": "INDEX"}]}, +} +cluster_health = { + "cluster_name": "unit_test", + "status": "green", + "timed_out": False, + "number_of_nodes": 7, + "number_of_data_nodes": 3, + "active_primary_shards": 235, + "active_shards": 471, + "relocating_shards": 0, + "initializing_shards": 0, + "unassigned_shards": 0, + "delayed_unassigned_shards": 0, + "number_of_pending_tasks": 0, + "task_max_waiting_in_queue_millis": 0, + "active_shards_percent_as_number": 100, +} +reindex_basic = {"source": {"index": named_index}, "dest": {"index": "other_index"}} +reindex_replace = { + "source": {"index": "REINDEX_SELECTION"}, + "dest": {"index": "other_index"}, +} +reindex_migration = {"source": {"index": named_index}, "dest": {"index": "MIGRATION"}} +index_list_966 = ["indexv0.2_2017-02-12_536a9247f9fa4fc7a7942ad46ea14e0d"] +recovery_966 = { + "indexv0.2_2017-02-12_536a9247f9fa4fc7a7942ad46ea14e0d": { + "shards": [ + { + "total_time": "10.1m", + "index": { + "files": { + "reused": 0, + "total": 15, + "percent": "100.0%", + "recovered": 15, + }, + "total_time": "10.1m", + "target_throttle_time": "-1", + "total_time_in_millis": 606577, + "source_throttle_time_in_millis": 0, + "source_throttle_time": "-1", + "target_throttle_time_in_millis": 0, + "size": { + "recovered_in_bytes": 3171596177, + "reused": "0b", + "total_in_bytes": 3171596177, + "percent": "100.0%", + "reused_in_bytes": 0, + "total": "2.9gb", + "recovered": "2.9gb", + }, + }, + "verify_index": { + "total_time": "0s", + "total_time_in_millis": 0, + "check_index_time_in_millis": 0, + "check_index_time": "0s", + }, + "target": { + "ip": "x.x.x.7", + "host": "x.x.x.7", + "transport_address": "x.x.x.7:9300", + "id": "K4xQPaOFSWSPLwhb0P47aQ", + "name": "staging-es5-forcem", + }, + "source": { + "index": "indexv0.2_2017-02-12_536a9247f9fa4fc7a7942ad46ea14e0d", + "version": "5.1.1", + "snapshot": "force-merge", + "repository": "force-merge", + }, + "translog": { + "total_time": "45ms", + "percent": "100.0%", + "total_time_in_millis": 45, + "total_on_start": 0, + "total": 0, + "recovered": 0, + }, + "start_time": "2017-05-16T11:54:48.183Z", + "primary": True, + "total_time_in_millis": 606631, + "stop_time_in_millis": 1494936294815, + "stop_time": "2017-05-16T12:04:54.815Z", + "stage": "DONE", + "type": "SNAPSHOT", + "id": 1, + "start_time_in_millis": 1494935688183, + }, + { + "total_time": "10m", + "index": { + "files": { + "reused": 0, + "total": 15, + "percent": "100.0%", + "recovered": 15, + }, + "total_time": "10m", + "target_throttle_time": "-1", + "total_time_in_millis": 602302, + "source_throttle_time_in_millis": 0, + "source_throttle_time": "-1", + "target_throttle_time_in_millis": 0, + "size": { + "recovered_in_bytes": 3162299781, + "reused": "0b", + "total_in_bytes": 3162299781, + "percent": "100.0%", + "reused_in_bytes": 0, + "total": "2.9gb", + "recovered": "2.9gb", + }, + }, + "verify_index": { + "total_time": "0s", + "total_time_in_millis": 0, + "check_index_time_in_millis": 0, + "check_index_time": "0s", + }, + "target": { + "ip": "x.x.x.7", + "host": "x.x.x.7", + "transport_address": "x.x.x.7:9300", + "id": "K4xQPaOFSWSPLwhb0P47aQ", + "name": "staging-es5-forcem", + }, + "source": { + "index": "indexv0.2_2017-02-12_536a9247f9fa4fc7a7942ad46ea14e0d", + "version": "5.1.1", + "snapshot": "force-merge", + "repository": "force-merge", + }, + "translog": { + "total_time": "389ms", + "percent": "100.0%", + "total_time_in_millis": 389, + "total_on_start": 0, + "total": 0, + "recovered": 0, + }, + "start_time": "2017-05-16T12:04:51.606Z", + "primary": True, + "total_time_in_millis": 602698, + "stop_time_in_millis": 1494936894305, + "stop_time": "2017-05-16T12:14:54.305Z", + "stage": "DONE", + "type": "SNAPSHOT", + "id": 5, + "start_time_in_millis": 1494936291606, + }, + { + "total_time": "10.1m", + "index": { + "files": { + "reused": 0, + "total": 15, + "percent": "100.0%", + "recovered": 15, + }, + "total_time": "10.1m", + "target_throttle_time": "-1", + "total_time_in_millis": 606692, + "source_throttle_time_in_millis": 0, + "source_throttle_time": "-1", + "target_throttle_time_in_millis": 0, + "size": { + "recovered_in_bytes": 3156050994, + "reused": "0b", + "total_in_bytes": 3156050994, + "percent": "100.0%", + "reused_in_bytes": 0, + "total": "2.9gb", + "recovered": "2.9gb", + }, + }, + "verify_index": { + "total_time": "0s", + "total_time_in_millis": 0, + "check_index_time_in_millis": 0, + "check_index_time": "0s", + }, + "target": { + "ip": "x.x.x.7", + "host": "x.x.x.7", + "transport_address": "x.x.x.7:9300", + "id": "K4xQPaOFSWSPLwhb0P47aQ", + "name": "staging-es5-forcem", + }, + "source": { + "index": "indexv0.2_2017-02-12_536a9247f9fa4fc7a7942ad46ea14e0d", + "version": "5.1.1", + "snapshot": "force-merge", + "repository": "force-merge", + }, + "translog": { + "total_time": "38ms", + "percent": "100.0%", + "total_time_in_millis": 38, + "total_on_start": 0, + "total": 0, + "recovered": 0, + }, + "start_time": "2017-05-16T11:54:48.166Z", + "primary": True, + "total_time_in_millis": 606737, + "stop_time_in_millis": 1494936294904, + "stop_time": "2017-05-16T12:04:54.904Z", + "stage": "DONE", + "type": "SNAPSHOT", + "id": 3, + "start_time_in_millis": 1494935688166, + }, + { + "total_time": "10m", + "index": { + "files": { + "reused": 0, + "total": 15, + "percent": "100.0%", + "recovered": 15, + }, + "total_time": "10m", + "target_throttle_time": "-1", + "total_time_in_millis": 602010, + "source_throttle_time_in_millis": 0, + "source_throttle_time": "-1", + "target_throttle_time_in_millis": 0, + "size": { + "recovered_in_bytes": 3153017440, + "reused": "0b", + "total_in_bytes": 3153017440, + "percent": "100.0%", + "reused_in_bytes": 0, + "total": "2.9gb", + "recovered": "2.9gb", + }, + }, + "verify_index": { + "total_time": "0s", + "total_time_in_millis": 0, + "check_index_time_in_millis": 0, + "check_index_time": "0s", + }, + "target": { + "ip": "x.x.x.7", + "host": "x.x.x.7", + "transport_address": "x.x.x.7:9300", + "id": "K4xQPaOFSWSPLwhb0P47aQ", + "name": "staging-es5-forcem", + }, + "source": { + "index": "indexv0.2_2017-02-12_536a9247f9fa4fc7a7942ad46ea14e0d", + "version": "5.1.1", + "snapshot": "force-merge", + "repository": "force-merge", + }, + "translog": { + "total_time": "558ms", + "percent": "100.0%", + "total_time_in_millis": 558, + "total_on_start": 0, + "total": 0, + "recovered": 0, + }, + "start_time": "2017-05-16T12:04:51.369Z", + "primary": True, + "total_time_in_millis": 602575, + "stop_time_in_millis": 1494936893944, + "stop_time": "2017-05-16T12:14:53.944Z", + "stage": "DONE", + "type": "SNAPSHOT", + "id": 4, + "start_time_in_millis": 1494936291369, + }, + { + "total_time": "10m", + "index": { + "files": { + "reused": 0, + "total": 15, + "percent": "100.0%", + "recovered": 15, + }, + "total_time": "10m", + "target_throttle_time": "-1", + "total_time_in_millis": 600492, + "source_throttle_time_in_millis": 0, + "source_throttle_time": "-1", + "target_throttle_time_in_millis": 0, + "size": { + "recovered_in_bytes": 3153347402, + "reused": "0b", + "total_in_bytes": 3153347402, + "percent": "100.0%", + "reused_in_bytes": 0, + "total": "2.9gb", + "recovered": "2.9gb", + }, + }, + "verify_index": { + "total_time": "0s", + "total_time_in_millis": 0, + "check_index_time_in_millis": 0, + "check_index_time": "0s", + }, + "target": { + "ip": "x.x.x.7", + "host": "x.x.x.7", + "transport_address": "x.x.x.7:9300", + "id": "K4xQPaOFSWSPLwhb0P47aQ", + "name": "staging-es5-forcem", + }, + "source": { + "index": "indexv0.2_2017-02-12_536a9247f9fa4fc7a7942ad46ea14e0d", + "version": "5.1.1", + "snapshot": "force-merge", + "repository": "force-merge", + }, + "translog": { + "total_time": "445ms", + "percent": "100.0%", + "total_time_in_millis": 445, + "total_on_start": 0, + "total": 0, + "recovered": 0, + }, + "start_time": "2017-05-16T12:04:54.817Z", + "primary": True, + "total_time_in_millis": 600946, + "stop_time_in_millis": 1494936895764, + "stop_time": "2017-05-16T12:14:55.764Z", + "stage": "DONE", + "type": "SNAPSHOT", + "id": 6, + "start_time_in_millis": 1494936294817, + }, + { + "total_time": "10m", + "index": { + "files": { + "reused": 0, + "total": 15, + "percent": "100.0%", + "recovered": 15, + }, + "total_time": "10m", + "target_throttle_time": "-1", + "total_time_in_millis": 603194, + "source_throttle_time_in_millis": 0, + "source_throttle_time": "-1", + "target_throttle_time_in_millis": 0, + "size": { + "recovered_in_bytes": 3148003580, + "reused": "0b", + "total_in_bytes": 3148003580, + "percent": "100.0%", + "reused_in_bytes": 0, + "total": "2.9gb", + "recovered": "2.9gb", + }, + }, + "verify_index": { + "total_time": "0s", + "total_time_in_millis": 0, + "check_index_time_in_millis": 0, + "check_index_time": "0s", + }, + "target": { + "ip": "x.x.x.7", + "host": "x.x.x.7", + "transport_address": "x.x.x.7:9300", + "id": "K4xQPaOFSWSPLwhb0P47aQ", + "name": "staging-es5-forcem", + }, + "source": { + "index": "indexv0.2_2017-02-12_536a9247f9fa4fc7a7942ad46ea14e0d", + "version": "5.1.1", + "snapshot": "force-merge", + "repository": "force-merge", + }, + "translog": { + "total_time": "225ms", + "percent": "100.0%", + "total_time_in_millis": 225, + "total_on_start": 0, + "total": 0, + "recovered": 0, + }, + "start_time": "2017-05-16T11:54:48.173Z", + "primary": True, + "total_time_in_millis": 603429, + "stop_time_in_millis": 1494936291602, + "stop_time": "2017-05-16T12:04:51.602Z", + "stage": "DONE", + "type": "SNAPSHOT", + "id": 2, + "start_time_in_millis": 1494935688173, + }, + { + "total_time": "10m", + "index": { + "files": { + "reused": 0, + "total": 15, + "percent": "100.0%", + "recovered": 15, + }, + "total_time": "10m", + "target_throttle_time": "-1", + "total_time_in_millis": 601453, + "source_throttle_time_in_millis": 0, + "source_throttle_time": "-1", + "target_throttle_time_in_millis": 0, + "size": { + "recovered_in_bytes": 3168132171, + "reused": "0b", + "total_in_bytes": 3168132171, + "percent": "100.0%", + "reused_in_bytes": 0, + "total": "2.9gb", + "recovered": "2.9gb", + }, + }, + "verify_index": { + "total_time": "0s", + "total_time_in_millis": 0, + "check_index_time_in_millis": 0, + "check_index_time": "0s", + }, + "target": { + "ip": "x.x.x.7", + "host": "x.x.x.7", + "transport_address": "x.x.x.7:9300", + "id": "K4xQPaOFSWSPLwhb0P47aQ", + "name": "staging-es5-forcem", + }, + "source": { + "index": "indexv0.2_2017-02-12_536a9247f9fa4fc7a7942ad46ea14e0d", + "version": "5.1.1", + "snapshot": "force-merge", + "repository": "force-merge", + }, + "translog": { + "total_time": "43ms", + "percent": "100.0%", + "total_time_in_millis": 43, + "total_on_start": 0, + "total": 0, + "recovered": 0, + }, + "start_time": "2017-05-16T12:04:54.905Z", + "primary": True, + "total_time_in_millis": 601503, + "stop_time_in_millis": 1494936896408, + "stop_time": "2017-05-16T12:14:56.408Z", + "stage": "DONE", + "type": "SNAPSHOT", + "id": 7, + "start_time_in_millis": 1494936294905, + }, + { + "total_time": "10m", + "index": { + "files": { + "reused": 0, + "total": 15, + "percent": "100.0%", + "recovered": 15, + }, + "total_time": "10m", + "target_throttle_time": "-1", + "total_time_in_millis": 602897, + "source_throttle_time_in_millis": 0, + "source_throttle_time": "-1", + "target_throttle_time_in_millis": 0, + "size": { + "recovered_in_bytes": 3153750393, + "reused": "0b", + "total_in_bytes": 3153750393, + "percent": "100.0%", + "reused_in_bytes": 0, + "total": "2.9gb", + "recovered": "2.9gb", + }, + }, + "verify_index": { + "total_time": "0s", + "total_time_in_millis": 0, + "check_index_time_in_millis": 0, + "check_index_time": "0s", + }, + "target": { + "ip": "x.x.x.7", + "host": "x.x.x.7", + "transport_address": "x.x.x.7:9300", + "id": "K4xQPaOFSWSPLwhb0P47aQ", + "name": "staging-es5-forcem", + }, + "source": { + "index": "indexv0.2_2017-02-12_536a9247f9fa4fc7a7942ad46ea14e0d", + "version": "5.1.1", + "snapshot": "force-merge", + "repository": "force-merge", + }, + "translog": { + "total_time": "271ms", + "percent": "100.0%", + "total_time_in_millis": 271, + "total_on_start": 0, + "total": 0, + "recovered": 0, + }, + "start_time": "2017-05-16T11:54:48.191Z", + "primary": True, + "total_time_in_millis": 603174, + "stop_time_in_millis": 1494936291366, + "stop_time": "2017-05-16T12:04:51.366Z", + "stage": "DONE", + "type": "SNAPSHOT", + "id": 0, + "start_time_in_millis": 1494935688191, + }, + ] + } +} +no_snap_tasks = { + "nodes": { + "node1": {"tasks": {"task1": {"action": "cluster:monitor/tasks/lists[n]"}}} + } +} +snap_task = { + "nodes": { + "node1": {"tasks": {"task1": {"action": "cluster:admin/snapshot/delete"}}} + } +} +watermark_persistent = { + "persistent": { + "cluster": { + "routing": { + "allocation": {"disk": {"watermark": {"low": "11%", "high": "60gb"}}} + } + } + } +} +watermark_transient = { + "transient": { + "cluster": { + "routing": { + "allocation": {"disk": {"watermark": {"low": "9%", "high": "50gb"}}} + } + } + } +} watermark_both = { - 'persistent': {'cluster':{'routing':{'allocation':{'disk':{'watermark':{'low':'11%','high':'60gb'}}}}}}, - 'transient': {'cluster':{'routing':{'allocation':{'disk':{'watermark':{'low':'9%','high':'50gb'}}}}}}, + "persistent": { + "cluster": { + "routing": { + "allocation": {"disk": {"watermark": {"low": "11%", "high": "60gb"}}} + } + } + }, + "transient": { + "cluster": { + "routing": { + "allocation": {"disk": {"watermark": {"low": "9%", "high": "50gb"}}} + } + } + }, +} +empty_cluster_settings = {"persistent": {}, "transient": {}} +data_only_node_role = ["data"] +master_data_node_role = ["data", "master"] +# +# Deepfreeze values +# +repo_name_prefix = "deepfreeze-" +bucket_name_prefix = "deepfreeze-" +base_path = "snapshots" +canned_acl = "private" +storage_class = "intelligent_tiering" +keep = "6" +year = "2024" +month = "08" +month_exists = "06" +repositories = [ + "foo", + "deepfreeze-2024.01", + "deepfreeze-2024.02", + "deepfreeze-2024.03", + "deepfreeze-2024.04", + "deepfreeze-2024.05", + "deepfreeze-2024.06", + "deepfreeze-2024.07", +] +repositories_filtered = [ + "deepfreeze-2024.01", + "deepfreeze-2024.02", + "deepfreeze-2024.03", + "deepfreeze-2024.04", + "deepfreeze-2024.05", + "deepfreeze-2024.06", + "deepfreeze-2024.07", +] +ilm_policy_to_update = { + "deepfreeze-ilm-policy": { + "version": 3, + "modified_date": "2024-09-08T13:44:16.327Z", + "policy": { + "phases": { + "frozen": { + "min_age": "2d", + "actions": { + "searchable_snapshot": { + "snapshot_repository": "deepfreeze-2024.07", + "force_merge_index": True, + } + }, + }, + "delete": { + "min_age": "3d", + "actions": {"delete": {"delete_searchable_snapshot": False}}, + }, + "cold": { + "min_age": "1d", + "actions": { + "allocate": { + "number_of_replicas": 0, + "include": {}, + "exclude": {}, + "require": {}, + }, + "searchable_snapshot": { + "snapshot_repository": "deepfreeze-2024.07", + "force_merge_index": True, + }, + "set_priority": {"priority": 0}, + }, + }, + "hot": { + "min_age": "0ms", + "actions": { + "rollover": { + "max_age": "30d", + "max_primary_shard_size": "50gb", + }, + "set_priority": {"priority": 100}, + }, + }, + } + }, + "in_use_by": {"indices": [], "data_streams": [], "composable_templates": []}, + } +} +ilm_policy_updated = { + "phases": { + "frozen": { + "min_age": "2d", + "actions": { + "searchable_snapshot": { + "snapshot_repository": "deepfreeze-2024.08", + "force_merge_index": True, + } + }, + }, + "delete": { + "min_age": "3d", + "actions": {"delete": {"delete_searchable_snapshot": False}}, + }, + "cold": { + "min_age": "1d", + "actions": { + "allocate": { + "number_of_replicas": 0, + "include": {}, + "exclude": {}, + "require": {}, + }, + "searchable_snapshot": { + "snapshot_repository": "deepfreeze-2024.08", + "force_merge_index": True, + }, + "set_priority": {"priority": 0}, + }, + }, + "hot": { + "min_age": "0ms", + "actions": { + "rollover": { + "max_age": "30d", + "max_primary_shard_size": "50gb", + }, + "set_priority": {"priority": 100}, + }, + }, + } } -empty_cluster_settings = {'persistent':{},'transient':{}} -data_only_node_role = ['data'] -master_data_node_role = ['data','master']