From ff62d16de116e688d0107fa02760c3943e4dd7c7 Mon Sep 17 00:00:00 2001 From: bvolovat Date: Wed, 19 Feb 2025 14:37:08 +0200 Subject: [PATCH] get the cpu and mempry data from all pods in kubescape Signed-off-by: bvolovat --- get_data_from_prometheus.py | 6 +++--- threshold_check.py | 40 +++++++++++++++++++++++++------------ 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/get_data_from_prometheus.py b/get_data_from_prometheus.py index 133309c..1672a9a 100644 --- a/get_data_from_prometheus.py +++ b/get_data_from_prometheus.py @@ -19,8 +19,8 @@ class PrometheusConfig: # url: str = "http://localhost:9090" url: str = "http://prometheus-operated.monitoring.svc.cluster.local:9090" namespace: str = "kubescape" - pod_regex: str = "node-agent.*" - step_minutes: str = "1" + pod_regex: str = ".*" # All pods + step_seconds: str = "30" # Step size for Prometheus queries class PrometheusMetricsCollector: def __init__(self, config: Optional[PrometheusConfig] = None): @@ -57,7 +57,7 @@ def query_prometheus_range(self, query: str) -> Optional[List[Dict]]: 'query': query, 'start': self.start_time.isoformat(), 'end': self.end_time.isoformat(), - 'step': f"{self.config.step_minutes}m" + 'step': f"{self.config.step_seconds}s" } try: diff --git a/threshold_check.py b/threshold_check.py index ae5d5c3..bbbae4d 100644 --- a/threshold_check.py +++ b/threshold_check.py @@ -15,14 +15,12 @@ class ThresholdChecker: def __init__( self, output_dir: str = "output", - memory_threshold: float = 500, - cpu_threshold: float = 0.5, - duration_threshold: int = 30 # in seconds + duration_threshold: int = 30, # in seconds + pod_thresholds: Optional[Dict[str, Dict[str, float]]] = None ): self.output_dir = output_dir - self.memory_threshold = memory_threshold - self.cpu_threshold = cpu_threshold self.duration_threshold = duration_threshold + self.pod_thresholds = pod_thresholds or {} self.violations = { "Memory": [], @@ -33,6 +31,12 @@ def calculate_breach_percentage(self, value: float, threshold: float) -> float: """Calculate how much the value exceeded the threshold by percentage.""" return ((value - threshold) / threshold) * 100 + def get_threshold(self, pod: str, metric_type: str) -> Optional[float]: + """Get the threshold for a specific pod and metric type.""" + if pod in self.pod_thresholds and metric_type in self.pod_thresholds[pod]: + return self.pod_thresholds[pod][metric_type] + return None + def check_thresholds(self, file_path: str, metric_type: str) -> None: """Check if any values exceed the threshold for a sustained period.""" if not os.path.exists(file_path): @@ -45,7 +49,6 @@ def check_thresholds(self, file_path: str, metric_type: str) -> None: logger.info(f"{metric_type} data file is empty.") return - threshold = self.memory_threshold if metric_type == "Memory" else self.cpu_threshold df["Time"] = pd.to_datetime(df["Time"]) # Analyze each pod separately @@ -53,6 +56,12 @@ def check_thresholds(self, file_path: str, metric_type: str) -> None: pod_data = df[df["Pod"] == pod].copy() pod_data = pod_data.sort_values("Time") + # Get the threshold for the current pod + threshold = self.get_threshold(pod, metric_type) + if threshold is None: + logger.warning(f"No threshold defined for pod {pod} and metric {metric_type}. Skipping.") + continue + # Find periods where threshold is exceeded pod_data["violation"] = pod_data["Value"] > threshold pod_data["violation_group"] = ( @@ -115,8 +124,6 @@ def generate_report(self) -> Dict: "total_cpu_violations": len(self.violations["CPU"]), "violations_by_pod": pod_summary, "thresholds": { - "memory_mib": self.memory_threshold, - "cpu_cores": self.cpu_threshold, "duration_seconds": self.duration_threshold } }, @@ -152,8 +159,6 @@ def run(self) -> None: """Execute threshold checking on collected metrics.""" logger.info( f"Starting threshold analysis:" - f"\n Memory Threshold: {self.memory_threshold} MiB" - f"\n CPU Threshold: {self.cpu_threshold} cores" f"\n Duration Threshold: {self.duration_threshold} seconds" ) @@ -173,10 +178,19 @@ def run(self) -> None: output_dir = os.getenv('OUTPUT_DIR', 'output') logger.info(f"Using output directory: {output_dir}") + pod_thresholds = { + "kubescape": {"Memory": 400, "CPU": 0.2}, + "kubevuln": {"Memory": 500, "CPU": 0.3}, + "node-agent": {"Memory": 300, "CPU": 0.1}, + "operator": {"Memory": 200, "CPU": 0.05}, + "otel-collector": {"Memory": 600, "CPU": 0.4}, + "storage": {"Memory": 100, "CPU": 0.05}, + "synchronizer": {"Memory": 150, "CPU": 0.1} + } + checker = ThresholdChecker( output_dir=output_dir, - memory_threshold=350, # 500 MiB - cpu_threshold=0.1, # 0.5 cores - duration_threshold=10 # 30 seconds + duration_threshold=10, # 10 seconds + pod_thresholds=pod_thresholds ) checker.run() \ No newline at end of file