Skip to content

Commit 44331e6

Browse files
committed
Admin/XMover: Refactor -- analyze -- shard vs. zone
1 parent 978dd24 commit 44331e6

File tree

7 files changed

+178
-149
lines changed

7 files changed

+178
-149
lines changed

cratedb_toolkit/admin/xmover/analyze/__init__.py

Whitespace-only changes.

cratedb_toolkit/admin/xmover/reporter.py renamed to cratedb_toolkit/admin/xmover/analyze/report.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from rich.panel import Panel
66
from rich.table import Table
77

8-
from cratedb_toolkit.admin.xmover.analyzer import ShardAnalyzer
8+
from cratedb_toolkit.admin.xmover.analyze.shard import ShardAnalyzer
99
from cratedb_toolkit.admin.xmover.model import ShardMoveRecommendation, ShardMoveRequest, SizeCriteria
1010
from cratedb_toolkit.admin.xmover.util import format_percentage, format_size
1111

cratedb_toolkit/admin/xmover/analyzer.py renamed to cratedb_toolkit/admin/xmover/analyze/shard.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,14 @@
77
from collections import defaultdict
88
from typing import Any, Dict, List, Optional, Set, Tuple, Union
99

10-
from .database import CrateDBClient
11-
from .model import DistributionStats, NodeInfo, RecommendationConstraints, ShardInfo, ShardMoveRecommendation
10+
from cratedb_toolkit.admin.xmover.database import CrateDBClient
11+
from cratedb_toolkit.admin.xmover.model import (
12+
DistributionStats,
13+
NodeInfo,
14+
RecommendationConstraints,
15+
ShardInfo,
16+
ShardMoveRecommendation,
17+
)
1218

1319
logger = logging.getLogger(__name__)
1420

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
from typing import Dict, List, Optional
2+
3+
from rich import box
4+
from rich.console import Console
5+
from rich.panel import Panel
6+
from rich.table import Table
7+
8+
from cratedb_toolkit.admin.xmover.analyze.shard import ShardAnalyzer
9+
from cratedb_toolkit.admin.xmover.database import CrateDBClient
10+
from cratedb_toolkit.admin.xmover.model import ShardInfo
11+
12+
console = Console()
13+
14+
15+
class ZoneReport:
16+
def __init__(self, client: CrateDBClient):
17+
self.client = client
18+
self.analyzer = ShardAnalyzer(self.client)
19+
20+
def shard_balance(self, tolerance: float, table: Optional[str] = None):
21+
"""Check zone balance for shards"""
22+
console.print(Panel.fit("[bold blue]Zone Balance Check[/bold blue]"))
23+
console.print("[dim]Note: Analyzing all shards regardless of state for complete cluster view[/dim]")
24+
console.print()
25+
26+
zone_stats = self.analyzer.check_zone_balance(table, tolerance)
27+
28+
if not zone_stats:
29+
console.print("[yellow]No shards found for analysis[/yellow]")
30+
return
31+
32+
# Calculate totals and targets
33+
total_shards = sum(stats["TOTAL"] for stats in zone_stats.values())
34+
zones = list(zone_stats.keys())
35+
target_per_zone = total_shards // len(zones) if zones else 0
36+
tolerance_range = (target_per_zone * (1 - tolerance / 100), target_per_zone * (1 + tolerance / 100))
37+
38+
balance_table = Table(title=f"Zone Balance Analysis (Target: {target_per_zone} ±{tolerance}%)", box=box.ROUNDED)
39+
balance_table.add_column("Zone", style="cyan")
40+
balance_table.add_column("Primary", justify="right", style="blue")
41+
balance_table.add_column("Replica", justify="right", style="green")
42+
balance_table.add_column("Total", justify="right", style="magenta")
43+
balance_table.add_column("Status", style="bold")
44+
45+
for zone, stats in zone_stats.items():
46+
total = stats["TOTAL"]
47+
48+
if tolerance_range[0] <= total <= tolerance_range[1]:
49+
status = "[green]✓ Balanced[/green]"
50+
elif total < tolerance_range[0]:
51+
status = f"[yellow]⚠ Under ({total - target_per_zone:+})[/yellow]"
52+
else:
53+
status = f"[red]⚠ Over ({total - target_per_zone:+})[/red]"
54+
55+
balance_table.add_row(zone, str(stats["PRIMARY"]), str(stats["REPLICA"]), str(total), status)
56+
57+
console.print(balance_table)
58+
59+
def distribution_conflicts(self, shard_details: bool = False, table: Optional[str] = None):
60+
"""Detailed analysis of zone distribution and potential conflicts"""
61+
console.print(Panel.fit("[bold blue]Detailed Zone Analysis[/bold blue]"))
62+
console.print("[dim]Comprehensive zone distribution analysis for CrateDB cluster[/dim]")
63+
console.print()
64+
65+
# Get all shards for analysis
66+
shards = self.client.get_shards_info(table_name=table, for_analysis=True)
67+
68+
if not shards:
69+
console.print("[yellow]No shards found for analysis[/yellow]")
70+
return
71+
72+
# Organize by table and shard
73+
tables: Dict[str, Dict[int, List[ShardInfo]]] = {}
74+
for shard in shards:
75+
table_key = f"{shard.schema_name}.{shard.table_name}"
76+
if table_key not in tables:
77+
tables[table_key] = {}
78+
79+
shard_key = shard.shard_id
80+
if shard_key not in tables[table_key]:
81+
tables[table_key][shard_key] = []
82+
83+
tables[table_key][shard_key].append(shard)
84+
85+
# Analyze each table
86+
zone_conflicts = 0
87+
under_replicated = 0
88+
89+
for table_name, table_shards in tables.items():
90+
console.print(f"\n[bold cyan]Table: {table_name}[/bold cyan]")
91+
92+
# Create analysis table
93+
analysis_table = Table(title=f"Shard Distribution for {table_name}", box=box.ROUNDED)
94+
analysis_table.add_column("Shard ID", justify="right", style="magenta")
95+
analysis_table.add_column("Primary Zone", style="blue")
96+
analysis_table.add_column("Replica Zones", style="green")
97+
analysis_table.add_column("Total Copies", justify="right", style="cyan")
98+
analysis_table.add_column("Status", style="bold")
99+
100+
for shard_id, shard_copies in sorted(table_shards.items()):
101+
primary_zone = "Unknown"
102+
replica_zones = set()
103+
total_copies = len(shard_copies)
104+
zones_with_copies = set()
105+
106+
for shard_copy in shard_copies:
107+
zones_with_copies.add(shard_copy.zone)
108+
if shard_copy.is_primary:
109+
primary_zone = shard_copy.zone
110+
else:
111+
replica_zones.add(shard_copy.zone)
112+
113+
# Determine status
114+
status_parts = []
115+
if len(zones_with_copies) == 1:
116+
zone_conflicts += 1
117+
status_parts.append("[red]⚠ ZONE CONFLICT[/red]")
118+
119+
if total_copies < 2: # Assuming we want at least 1 replica
120+
under_replicated += 1
121+
status_parts.append("[yellow]⚠ Under-replicated[/yellow]")
122+
123+
if not status_parts:
124+
status_parts.append("[green]✓ Good[/green]")
125+
126+
replica_zones_str = ", ".join(sorted(replica_zones)) if replica_zones else "None"
127+
128+
analysis_table.add_row(
129+
str(shard_id), primary_zone, replica_zones_str, str(total_copies), " ".join(status_parts)
130+
)
131+
132+
# Show individual shard details if requested
133+
if shard_details:
134+
for shard_copy in shard_copies:
135+
health_indicator = "✓" if shard_copy.routing_state == "STARTED" else "⚠"
136+
console.print(
137+
f" {health_indicator} {shard_copy.shard_type} "
138+
f"on {shard_copy.node_name} ({shard_copy.zone}) - {shard_copy.routing_state}"
139+
)
140+
141+
console.print(analysis_table)
142+
143+
# Summary
144+
console.print("\n[bold]Zone Analysis Summary:[/bold]")
145+
console.print(f" • Tables analyzed: [cyan]{len(tables)}[/cyan]")
146+
console.print(f" • Zone conflicts detected: [red]{zone_conflicts}[/red]")
147+
console.print(f" • Under-replicated shards: [yellow]{under_replicated}[/yellow]")
148+
149+
if zone_conflicts > 0:
150+
console.print(f"\n[red]⚠ Found {zone_conflicts} zone conflicts that need attention![/red]")
151+
console.print("[dim]Zone conflicts occur when all copies of a shard are in the same zone.[/dim]")
152+
console.print("[dim]This violates CrateDB's zone-awareness and creates availability risks.[/dim]")
153+
154+
if under_replicated > 0:
155+
console.print(f"\n[yellow]⚠ Found {under_replicated} under-replicated shards.[/yellow]")
156+
console.print("[dim]Consider increasing replication for better availability.[/dim]")
157+
158+
if zone_conflicts == 0 and under_replicated == 0:
159+
console.print("\n[green]✓ No critical zone distribution issues detected![/green]")

cratedb_toolkit/admin/xmover/cli.py

Lines changed: 8 additions & 144 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,22 @@
55
"""
66

77
import sys
8-
from typing import Dict, List, Optional, cast
8+
from typing import List, Optional, cast
99

1010
import click
11-
from rich import box
1211
from rich.console import Console
1312
from rich.panel import Panel
14-
from rich.table import Table
1513

14+
from cratedb_toolkit.admin.xmover.analyze.report import ShardReporter
15+
from cratedb_toolkit.admin.xmover.analyze.shard import ShardAnalyzer
16+
from cratedb_toolkit.admin.xmover.analyze.zone import ZoneReport
1617
from cratedb_toolkit.admin.xmover.model import (
1718
RecommendationConstraints,
18-
ShardInfo,
1919
ShardMoveRequest,
2020
SizeCriteria,
2121
)
2222
from cratedb_toolkit.admin.xmover.recommender import Recommender
23-
from cratedb_toolkit.admin.xmover.reporter import ShardReporter
2423

25-
from .analyzer import ShardAnalyzer
2624
from .database import CrateDBClient
2725
from .recovery import RecoveryMonitor, RecoveryOptions
2826

@@ -182,44 +180,8 @@ def test_connection(ctx, connection_string: Optional[str]):
182180
def check_balance(ctx, table: Optional[str], tolerance: float):
183181
"""Check zone balance for shards"""
184182
client = ctx.obj["client"]
185-
analyzer = ShardAnalyzer(client)
186-
187-
console.print(Panel.fit("[bold blue]Zone Balance Check[/bold blue]"))
188-
console.print("[dim]Note: Analyzing all shards regardless of state for complete cluster view[/dim]")
189-
console.print()
190-
191-
zone_stats = analyzer.check_zone_balance(table, tolerance)
192-
193-
if not zone_stats:
194-
console.print("[yellow]No shards found for analysis[/yellow]")
195-
return
196-
197-
# Calculate totals and targets
198-
total_shards = sum(stats["TOTAL"] for stats in zone_stats.values())
199-
zones = list(zone_stats.keys())
200-
target_per_zone = total_shards // len(zones) if zones else 0
201-
tolerance_range = (target_per_zone * (1 - tolerance / 100), target_per_zone * (1 + tolerance / 100))
202-
203-
balance_table = Table(title=f"Zone Balance Analysis (Target: {target_per_zone} ±{tolerance}%)", box=box.ROUNDED)
204-
balance_table.add_column("Zone", style="cyan")
205-
balance_table.add_column("Primary", justify="right", style="blue")
206-
balance_table.add_column("Replica", justify="right", style="green")
207-
balance_table.add_column("Total", justify="right", style="magenta")
208-
balance_table.add_column("Status", style="bold")
209-
210-
for zone, stats in zone_stats.items():
211-
total = stats["TOTAL"]
212-
213-
if tolerance_range[0] <= total <= tolerance_range[1]:
214-
status = "[green]✓ Balanced[/green]"
215-
elif total < tolerance_range[0]:
216-
status = f"[yellow]⚠ Under ({total - target_per_zone:+})[/yellow]"
217-
else:
218-
status = f"[red]⚠ Over ({total - target_per_zone:+})[/red]"
219-
220-
balance_table.add_row(zone, str(stats["PRIMARY"]), str(stats["REPLICA"]), str(total), status)
221-
222-
console.print(balance_table)
183+
report = ZoneReport(client=client)
184+
report.shard_balance(tolerance=tolerance, table=table)
223185

224186

225187
@main.command()
@@ -229,106 +191,8 @@ def check_balance(ctx, table: Optional[str], tolerance: float):
229191
def zone_analysis(ctx, table: Optional[str], show_shards: bool):
230192
"""Detailed analysis of zone distribution and potential conflicts"""
231193
client = ctx.obj["client"]
232-
233-
console.print(Panel.fit("[bold blue]Detailed Zone Analysis[/bold blue]"))
234-
console.print("[dim]Comprehensive zone distribution analysis for CrateDB cluster[/dim]")
235-
console.print()
236-
237-
# Get all shards for analysis
238-
shards = client.get_shards_info(table_name=table, for_analysis=True)
239-
240-
if not shards:
241-
console.print("[yellow]No shards found for analysis[/yellow]")
242-
return
243-
244-
# Organize by table and shard
245-
tables: Dict[str, Dict[str, List[ShardInfo]]] = {}
246-
for shard in shards:
247-
table_key = f"{shard.schema_name}.{shard.table_name}"
248-
if table_key not in tables:
249-
tables[table_key] = {}
250-
251-
shard_key = shard.shard_id
252-
if shard_key not in tables[table_key]:
253-
tables[table_key][shard_key] = []
254-
255-
tables[table_key][shard_key].append(shard)
256-
257-
# Analyze each table
258-
zone_conflicts = 0
259-
under_replicated = 0
260-
261-
for table_name, table_shards in tables.items():
262-
console.print(f"\n[bold cyan]Table: {table_name}[/bold cyan]")
263-
264-
# Create analysis table
265-
analysis_table = Table(title=f"Shard Distribution for {table_name}", box=box.ROUNDED)
266-
analysis_table.add_column("Shard ID", justify="right", style="magenta")
267-
analysis_table.add_column("Primary Zone", style="blue")
268-
analysis_table.add_column("Replica Zones", style="green")
269-
analysis_table.add_column("Total Copies", justify="right", style="cyan")
270-
analysis_table.add_column("Status", style="bold")
271-
272-
for shard_id, shard_copies in sorted(table_shards.items()):
273-
primary_zone = "Unknown"
274-
replica_zones = set()
275-
total_copies = len(shard_copies)
276-
zones_with_copies = set()
277-
278-
for shard_copy in shard_copies:
279-
zones_with_copies.add(shard_copy.zone)
280-
if shard_copy.is_primary:
281-
primary_zone = shard_copy.zone
282-
else:
283-
replica_zones.add(shard_copy.zone)
284-
285-
# Determine status
286-
status_parts = []
287-
if len(zones_with_copies) == 1:
288-
zone_conflicts += 1
289-
status_parts.append("[red]⚠ ZONE CONFLICT[/red]")
290-
291-
if total_copies < 2: # Assuming we want at least 1 replica
292-
under_replicated += 1
293-
status_parts.append("[yellow]⚠ Under-replicated[/yellow]")
294-
295-
if not status_parts:
296-
status_parts.append("[green]✓ Good[/green]")
297-
298-
replica_zones_str = ", ".join(sorted(replica_zones)) if replica_zones else "None"
299-
300-
analysis_table.add_row(
301-
str(shard_id), primary_zone, replica_zones_str, str(total_copies), " ".join(status_parts)
302-
)
303-
304-
# Show individual shard details if requested
305-
if show_shards:
306-
for shard_copy in shard_copies:
307-
health_indicator = "✓" if shard_copy.routing_state == "STARTED" else "⚠"
308-
console.print(
309-
f" {health_indicator} {shard_copy.shard_type} "
310-
f"on {shard_copy.node_name} ({shard_copy.zone}) - {shard_copy.routing_state}"
311-
)
312-
313-
console.print(analysis_table)
314-
315-
# Summary
316-
console.print("\n[bold]Zone Analysis Summary:[/bold]")
317-
console.print(f" • Tables analyzed: [cyan]{len(tables)}[/cyan]")
318-
console.print(f" • Zone conflicts detected: [red]{zone_conflicts}[/red]")
319-
console.print(f" • Under-replicated shards: [yellow]{under_replicated}[/yellow]")
320-
321-
if zone_conflicts > 0:
322-
console.print(f"\n[red]⚠ Found {zone_conflicts} zone conflicts that need attention![/red]")
323-
console.print("[dim]Zone conflicts occur when all copies of a shard are in the same zone.[/dim]")
324-
console.print("[dim]This violates CrateDB's zone-awareness and creates availability risks.[/dim]")
325-
326-
if under_replicated > 0:
327-
console.print(f"\n[yellow]⚠ Found {under_replicated} under-replicated shards.[/yellow]")
328-
console.print("[dim]Consider increasing replication for better availability.[/dim]")
329-
330-
if zone_conflicts == 0 and under_replicated == 0:
331-
console.print("\n[green]✓ No critical zone distribution issues detected![/green]")
194+
report = ZoneReport(client=client)
195+
report.distribution_conflicts(shard_details=show_shards, table=table)
332196

333197

334198
@main.command()

cratedb_toolkit/admin/xmover/recommender.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from rich.panel import Panel
66
from rich.table import Table
77

8-
from .analyzer import ShardAnalyzer
8+
from .analyze.shard import ShardAnalyzer
99
from .database import CrateDBClient
1010
from .model import RecommendationConstraints
1111
from .recovery import RecoveryMonitor, RecoveryOptions

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ lint.extend-ignore = [
324324
"S108",
325325
]
326326

327-
lint.per-file-ignores."cratedb_toolkit/admin/xmover/analyzer.py" = [ "T201" ] # Allow `print`
327+
lint.per-file-ignores."cratedb_toolkit/admin/xmover/analyze/shard.py" = [ "T201" ] # Allow `print`
328328
lint.per-file-ignores."cratedb_toolkit/retention/cli.py" = [ "T201" ] # Allow `print`
329329
lint.per-file-ignores."cratedb_toolkit/sqlalchemy/__init__.py" = [ "F401" ] # Allow `module´ imported but unused
330330
lint.per-file-ignores."doc/conf.py" = [ "A001", "ERA001" ]

0 commit comments

Comments
 (0)