XMover/problematic-translogs: Large and small shards analysis/tracking

WalBeh · amotl · commit 1a92c3f72b28 · 2025-09-23T23:35:04.000+02:00
- Add max shard size tracking to size buckets
- Add small shards analysis view to analyzer command
diff --git a/cratedb_toolkit/admin/xmover/analysis/shard.py b/cratedb_toolkit/admin/xmover/analysis/shard.py
@@ -20,6 +20,7 @@
     ShardInfo,
     ShardRelocationConstraints,
     ShardRelocationResponse,
+    TableStatsType,
 )
 from cratedb_toolkit.admin.xmover.util.database import CrateDBClient
 from cratedb_toolkit.admin.xmover.util.format import format_percentage, format_size
@@ -621,11 +622,11 @@ def get_shard_size_overview(self) -> Dict[str, Any]:
 
         # Define size buckets (in GB)
         size_buckets = {
-            "<1GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0},
-            "1GB-5GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0},
-            "5GB-10GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0},
-            "10GB-50GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0},
-            ">=50GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0},
+            "<1GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0, "max_size": 0.0},
+            "1GB-5GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0, "max_size": 0.0},
+            "5GB-10GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0, "max_size": 0.0},
+            "10GB-50GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0, "max_size": 0.0},
+            ">=50GB": {"count": 0, "total_size": 0.0, "avg_size_gb": 0.0, "max_size": 0.0},
         }
 
         if not started_shards:
@@ -652,19 +653,24 @@ def get_shard_size_overview(self) -> Dict[str, Any]:
             if size_gb >= 50:
                 size_buckets[">=50GB"]["count"] += 1
                 size_buckets[">=50GB"]["total_size"] += size_gb
+                size_buckets[">=50GB"]["max_size"] = max(size_buckets[">=50GB"]["max_size"], size_gb)
                 large_shards_count += 1
             elif size_gb >= 10:
                 size_buckets["10GB-50GB"]["count"] += 1
                 size_buckets["10GB-50GB"]["total_size"] += size_gb
+                size_buckets["10GB-50GB"]["max_size"] = max(size_buckets["10GB-50GB"]["max_size"], size_gb)
             elif size_gb >= 5:
                 size_buckets["5GB-10GB"]["count"] += 1
                 size_buckets["5GB-10GB"]["total_size"] += size_gb
+                size_buckets["5GB-10GB"]["max_size"] = max(size_buckets["5GB-10GB"]["max_size"], size_gb)
             elif size_gb >= 1:
                 size_buckets["1GB-5GB"]["count"] += 1
                 size_buckets["1GB-5GB"]["total_size"] += size_gb
+                size_buckets["1GB-5GB"]["max_size"] = max(size_buckets["1GB-5GB"]["max_size"], size_gb)
             else:
                 size_buckets["<1GB"]["count"] += 1
                 size_buckets["<1GB"]["total_size"] += size_gb
+                size_buckets["<1GB"]["max_size"] = max(size_buckets["<1GB"]["max_size"], size_gb)
                 very_small_shards += 1
 
         # Calculate the average size for each bucket
@@ -686,6 +692,123 @@ def get_shard_size_overview(self) -> Dict[str, Any]:
             "very_small_shards_percentage": very_small_percentage,
         }
 
+    def get_large_shards_details(self) -> List[Dict[str, Any]]:
+        """Get detailed information about large shards (>=50GB) including partition values"""
+        # Optimized query to fetch only large shards directly from database
+        query = """
+        SELECT
+            s.schema_name,
+            s.table_name,
+            translate(p.values::text, ':{}', '=()') as partition_values,
+            s.id as shard_id,
+            s.size / 1024^3 as size_gb,
+            s."primary" as is_primary,
+            s.node['name'] as node_name,
+            s.node['id'] as node_id
+        FROM sys.shards s
+        LEFT JOIN information_schema.table_partitions p
+            ON s.table_name = p.table_name
+            AND s.schema_name = p.table_schema
+            AND s.partition_ident = p.partition_ident
+        WHERE s.state = 'STARTED'
+            AND s.size >= 50 * 1024^3  -- 50GB in bytes
+        ORDER BY s.size DESC
+        """
+
+        result = self.client.execute_query(query)
+
+        large_shards = []
+        for row in result.get("rows", []):
+            # Get zone information from our nodes data
+            node_id = row[7]
+            zone = next((node.zone for node in self.nodes if node.id == node_id), "unknown")
+
+            large_shards.append(
+                {
+                    "schema_name": row[0] or "doc",
+                    "table_name": row[1],
+                    "partition_values": row[2],
+                    "shard_id": row[3],
+                    "size_gb": float(row[4]) if row[4] else 0.0,
+                    "is_primary": row[5] or False,
+                    "node_name": row[6],
+                    "zone": zone,
+                }
+            )
+
+        return large_shards
+
+    def get_small_shards_details(self, limit: int = 10) -> List[Dict[str, Any]]:
+        """Get detailed information about the smallest shards, grouped by table/partition"""
+        # Query to get all shards, ordered by size ascending to get the smallest
+        query = """
+        SELECT
+            s.schema_name,
+            s.table_name,
+            translate(p.values::text, ':{}', '=()') as partition_values,
+            s.id as shard_id,
+            s.size / 1024^3 as size_gb,
+            s."primary" as is_primary,
+            s.node['name'] as node_name,
+            s.node['id'] as node_id
+        FROM sys.shards s
+        LEFT JOIN information_schema.table_partitions p
+            ON s.table_name = p.table_name
+            AND s.schema_name = p.table_schema
+            AND s.partition_ident = p.partition_ident
+        WHERE s.state = 'STARTED'
+        ORDER BY s.size ASC
+        """
+
+        result = self.client.execute_query(query)
+
+        # Group by table/partition to get aggregated stats
+        table_partition_stats: TableStatsType = {}
+        for row in result.get("rows", []):
+            # Get zone information from our nodes data
+            node_id = row[7]
+
+            # FIXME: `zone` does not get used.
+            zone = next((node.zone for node in self.nodes if node.id == node_id), "unknown")  # noqa: F841
+
+            # Create table key with schema
+            schema_name = row[0] or "doc"
+            table_name = row[1]
+            table_display = table_name
+            if schema_name and schema_name != "doc":
+                table_display = f"{schema_name}.{table_name}"
+
+            # Create partition key
+            partition_key = row[2] or "N/A"
+
+            # Create combined key
+            key = (table_display, partition_key)
+
+            if key not in table_partition_stats:
+                table_partition_stats[key] = {"sizes": [], "primary_count": 0, "replica_count": 0, "total_size": 0.0}
+
+            # Aggregate stats
+            stats = table_partition_stats[key]
+            size_gb = float(row[4]) if row[4] else 0.0
+            stats["sizes"].append(size_gb)
+            stats["total_size"] += size_gb
+            if row[5]:  # is_primary
+                stats["primary_count"] += 1
+            else:
+                stats["replica_count"] += 1
+
+        # Sort by average size ascending (smallest first) and return top tables/partitions
+        sorted_stats: List[Dict[str, Any]] = []
+        for (table_name, partition_key), stats in table_partition_stats.items():
+            avg_size = sum(stats["sizes"]) / len(stats["sizes"]) if stats["sizes"] else 0
+            sorted_stats.append(
+                {"table_name": table_name, "partition_key": partition_key, "stats": stats, "avg_size": avg_size}
+            )
+
+        # Sort by average size and take the top 'limit' entries
+        sorted_stats.sort(key=lambda x: x["avg_size"])
+        return sorted_stats[:limit]
+
     def get_cluster_overview(self) -> Dict[str, Any]:
         """Get a comprehensive overview of the cluster"""
         # Get cluster watermark settings
@@ -1114,6 +1237,7 @@ def distribution(self, table: str = None):
         size_table.add_column("Count", justify="right", style="magenta")
         size_table.add_column("Percentage", justify="right", style="green")
         size_table.add_column("Avg Size", justify="right", style="blue")
+        size_table.add_column("Max Size", justify="right", style="red")
         size_table.add_column("Total Size", justify="right", style="yellow")
 
         total_shards = size_overview["total_shards"]
@@ -1147,6 +1271,7 @@ def distribution(self, table: str = None):
                 count_str,
                 percentage_str,
                 f"{avg_size:.2f}GB" if avg_size > 0 else "0GB",
+                f"{bucket_data['max_size']:.2f}GB" if bucket_data["max_size"] > 0 else "0GB",
                 format_size(total_size),
             )
 
@@ -1177,23 +1302,176 @@ def distribution(self, table: str = None):
             for warning in warnings:
                 console.print(warning)
 
+        # Show compact table/partition breakdown of large shards if any exist
+        if size_overview["large_shards_count"] > 0:
+            console.print()
+            large_shards_details = self.analyzer.get_large_shards_details()
+
+            # Aggregate by table/partition
+            table_partition_stats: TableStatsType = {}
+            for shard in large_shards_details:
+                # Create table key with schema
+                table_display = shard["table_name"]
+                if shard["schema_name"] and shard["schema_name"] != "doc":
+                    table_display = f"{shard['schema_name']}.{shard['table_name']}"
+
+                # Create partition key
+                partition_key = shard["partition_values"] or "N/A"
+
+                # Create combined key
+                key = (table_display, partition_key)
+
+                if key not in table_partition_stats:
+                    table_partition_stats[key] = {
+                        "sizes": [],
+                        "primary_count": 0,
+                        "replica_count": 0,
+                        "total_size": 0.0,
+                    }
+
+                # Aggregate stats
+                stats = table_partition_stats[key]
+                stats["sizes"].append(shard["size_gb"])
+                stats["total_size"] += shard["size_gb"]
+                if shard["is_primary"]:
+                    stats["primary_count"] += 1
+                else:
+                    stats["replica_count"] += 1
+
+            # Create compact table
+            large_shards_table = Table(title="Large Shards Breakdown by Table/Partition (>=50GB)", box=box.ROUNDED)
+            large_shards_table.add_column("Table", style="cyan")
+            large_shards_table.add_column("Partition", style="blue")
+            large_shards_table.add_column("Shards", justify="right", style="magenta")
+            large_shards_table.add_column("P/R", justify="center", style="yellow")
+            large_shards_table.add_column("Min Size", justify="right", style="green")
+            large_shards_table.add_column("Avg Size", justify="right", style="red")
+            large_shards_table.add_column("Max Size", justify="right", style="red")
+            large_shards_table.add_column("Total Size", justify="right", style="red")
+
+            # Sort by total size descending (most problematic first)
+            sorted_stats = sorted(table_partition_stats.items(), key=lambda x: x[1]["total_size"], reverse=True)
+
+            for (table_name, partition_key), stats in sorted_stats:
+                # Format partition display
+                partition_display = partition_key
+                if partition_display != "N/A" and len(partition_display) > 25:
+                    partition_display = partition_display[:22] + "..."
+
+                # Calculate size stats
+                sizes = stats["sizes"]
+                min_size = min(sizes)
+                avg_size = sum(sizes) / len(sizes)
+                max_size = max(sizes)
+                total_size = stats["total_size"]
+                total_shards = len(sizes)
+
+                # Format primary/replica ratio
+                p_r_display = f"{stats['primary_count']}P/{stats['replica_count']}R"
+
+                large_shards_table.add_row(
+                    table_name,
+                    partition_display,
+                    str(total_shards),
+                    p_r_display,
+                    f"{min_size:.1f}GB",
+                    f"{avg_size:.1f}GB",
+                    f"{max_size:.1f}GB",
+                    f"{total_size:.1f}GB",
+                )
+
+            console.print(large_shards_table)
+
+            # Add summary stats
+            total_primary = sum(stats["primary_count"] for stats in table_partition_stats.values())
+            total_replica = sum(stats["replica_count"] for stats in table_partition_stats.values())
+            affected_table_partitions = len(table_partition_stats)
+
+            console.print()
+            console.print(
+                f"[dim]📊 Summary: {total_primary} primary, {total_replica} replica shards "
+                f"across {affected_table_partitions} table/partition(s)[/dim]"
+            )
+
+        # Show compact table/partition breakdown of smallest shards (top 10)
+        console.print()
+        small_shards_details = self.analyzer.get_small_shards_details(limit=10)
+
+        if small_shards_details:
+            # Create compact table
+            small_shards_table = Table(title="Smallest Shards Breakdown by Table/Partition (Top 10)", box=box.ROUNDED)
+            small_shards_table.add_column("Table", style="cyan")
+            small_shards_table.add_column("Partition", style="blue")
+            small_shards_table.add_column("Shards", justify="right", style="magenta")
+            small_shards_table.add_column("P/R", justify="center", style="yellow")
+            small_shards_table.add_column("Min Size", justify="right", style="green")
+            small_shards_table.add_column("Avg Size", justify="right", style="red")
+            small_shards_table.add_column("Max Size", justify="right", style="red")
+            small_shards_table.add_column("Total Size", justify="right", style="red")
+
+            for entry in small_shards_details:
+                table_name = entry["table_name"]
+                partition_key = entry["partition_key"]
+                stats = entry["stats"]
+
+                # Format partition display
+                partition_display = partition_key
+                if partition_display != "N/A" and len(partition_display) > 25:
+                    partition_display = partition_display[:22] + "..."
+
+                # Calculate size stats
+                sizes = stats["sizes"]
+                min_size = min(sizes)
+                avg_size = sum(sizes) / len(sizes)
+                max_size = max(sizes)
+                total_size = stats["total_size"]
+                total_shards = len(sizes)
+
+                # Format primary/replica ratio
+                p_r_display = f"{stats['primary_count']}P/{stats['replica_count']}R"
+
+                small_shards_table.add_row(
+                    table_name,
+                    partition_display,
+                    str(total_shards),
+                    p_r_display,
+                    f"{min_size:.1f}GB",
+                    f"{avg_size:.1f}GB",
+                    f"{max_size:.1f}GB",
+                    f"{total_size:.1f}GB",
+                )
+
+            console.print(small_shards_table)
+
+            # Add summary stats for smallest shards
+            total_small_primary = sum(entry["stats"]["primary_count"] for entry in small_shards_details)
+            total_small_replica = sum(entry["stats"]["replica_count"] for entry in small_shards_details)
+            small_table_partitions = len(small_shards_details)
+
+            console.print()
+            console.print(
+                f"[dim]📊 Summary: {total_small_primary} primary, "
+                f"{total_small_replica} replica shards across {small_table_partitions} table/partition(s) "
+                f"with smallest average sizes[/dim]"
+            )
+
         console.print()
 
         # Table-specific analysis if requested
         if table:
             console.print()
             console.print(Panel.fit(f"[bold blue]Analysis for table: {table}[/bold blue]"))
 
-            stats = self.analyzer.analyze_distribution(table)
+            distribution_stats = self.analyzer.analyze_distribution(table)
 
             table_summary = Table(title=f"Table {table} Distribution", box=box.ROUNDED)
             table_summary.add_column("Metric", style="cyan")
             table_summary.add_column("Value", style="magenta")
 
-            table_summary.add_row("Total Shards", str(stats.total_shards))
-            table_summary.add_row("Total Size", format_size(stats.total_size_gb))
-            table_summary.add_row("Zone Balance Score", f"{stats.zone_balance_score:.1f}/100")
-            table_summary.add_row("Node Balance Score", f"{stats.node_balance_score:.1f}/100")
+            table_summary.add_row("Total Shards", str(distribution_stats.total_shards))
+            table_summary.add_row("Total Size", format_size(distribution_stats.total_size_gb))
+            table_summary.add_row("Zone Balance Score", f"{distribution_stats.zone_balance_score:.1f}/100")
+            table_summary.add_row("Node Balance Score", f"{distribution_stats.node_balance_score:.1f}/100")
 
             console.print(table_summary)
 
diff --git a/cratedb_toolkit/admin/xmover/model.py b/cratedb_toolkit/admin/xmover/model.py
@@ -1,5 +1,7 @@
 from dataclasses import dataclass
-from typing import Dict, Optional
+from typing import Any, Dict, Optional, Tuple
+
+TableStatsType = Dict[Tuple[str, str], Dict[str, Any]]
 
 
 @dataclass