Skip to content

Commit

Permalink
taking in node resources in account
Browse files Browse the repository at this point in the history
  • Loading branch information
mudverma committed Mar 11, 2024
1 parent 606fb60 commit 3147a84
Showing 1 changed file with 46 additions and 7 deletions.
53 changes: 46 additions & 7 deletions kraken/chaos_recommender/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,27 +17,57 @@ def convert_data_to_dataframe(data, label):


def convert_data(data, service):
result = {}
for entry in data:
pod_name = entry['metric']['pod']
value = entry['value'][1]
result[pod_name] = value
return result.get(service) # for those pods whose limits are not defined they can take as much resources, there assigning a very high value


def convert_data_limits(data, node_data, service, prometheus):
result = {}
for entry in data:
pod_name = entry['metric']['pod']
value = entry['value'][1]
result[pod_name] = value
return result.get(service, '100000000000') # for those pods whose limits are not defined they can take as much resources, there assigning a very high value
return result.get(service, get_node_capacity(node_data, service, prometheus))

def get_node_capacity(node_data, pod_name, prometheus ):

# Get the node name on which the pod is running
query = f'kube_pod_info{{pod="{pod_name}"}}'
result = prometheus.custom_query(query)
if not result:
return None

node_name = result[0]['metric']['node']

for item in node_data:
if item['metric']['node'] == node_name:
return item['value'][1]

return '1000000000'


def save_utilization_to_file(cpu_data, cpu_limits_result,
node_cpu_limits_result, mem_data, mem_limits_result,
node_mem_limits_result, network_data, filename, prometheus):

def save_utilization_to_file(cpu_data, cpu_limits_result, mem_data, mem_limits_result, network_data, filename):
df_cpu = convert_data_to_dataframe(cpu_data, "CPU")
merged_df = pd.DataFrame(columns=['service','CPU','CPU_LIMITS','MEM','MEM_LIMITS','NETWORK'])
services = df_cpu.service.unique()
logging.info(services)

for s in services:

new_row_df = pd.DataFrame( {"service": s, "CPU" : convert_data(cpu_data, s),
"CPU_LIMITS" : convert_data(cpu_limits_result, s),
"MEM" : convert_data(mem_data, s), "MEM_LIMITS" : convert_data(mem_limits_result, s),
"NETWORK" : convert_data(network_data, s)}, index=[0])
new_row_df = pd.DataFrame( {"service": s,
"CPU" : convert_data(cpu_data, s),
"CPU_LIMITS" : convert_data_limits(cpu_limits_result, node_cpu_limits_result, s, prometheus),
"MEM" : convert_data(mem_data, s),
"MEM_LIMITS" : convert_data_limits(mem_limits_result, node_mem_limits_result, s, prometheus),
"NETWORK" : convert_data(network_data, s)}, index=[0])

merged_df = pd.concat([merged_df, new_row_df], ignore_index=True)

# Convert columns to string
Expand Down Expand Up @@ -69,17 +99,26 @@ def fetch_utilization_from_prometheus(prometheus_endpoint, auth_token, namespace
cpu_limits_query = '(sum by (pod) (kube_pod_container_resource_limits{resource="cpu", namespace="%s"}))*1000' %(namespace)
cpu_limits_result = prometheus.custom_query(cpu_limits_query)

node_cpu_limits_query = 'kube_node_status_capacity{resource="cpu", unit="core"}'
node_cpu_limits_result = prometheus.custom_query(node_cpu_limits_query)

mem_query = 'sum by (pod) (avg_over_time(container_memory_usage_bytes{image!="", namespace="%s"}[%s]))' % (namespace, scrape_duration)
mem_result = prometheus.custom_query(mem_query)

mem_limits_query = 'sum by (pod) (kube_pod_container_resource_limits{resource="memory", namespace="%s"}) ' %(namespace)
mem_limits_result = prometheus.custom_query(mem_limits_query)

node_mem_limits_query = 'kube_node_status_capacity{resource="memory", unit="byte"}'
node_mem_limits_result = prometheus.custom_query(node_mem_limits_query)

network_query = 'sum by (pod) ((avg_over_time(container_network_transmit_bytes_total{namespace="%s"}[%s])) + \
(avg_over_time(container_network_receive_bytes_total{namespace="%s"}[%s])))' % (namespace, scrape_duration, namespace, scrape_duration)
network_result = prometheus.custom_query(network_query)

save_utilization_to_file(cpu_result, cpu_limits_result, mem_result, mem_limits_result, network_result, saved_metrics_path)
save_utilization_to_file(cpu_result, cpu_limits_result,
node_cpu_limits_result, mem_result, mem_limits_result,
node_mem_limits_result, network_result, saved_metrics_path, prometheus)

queries = json_queries(cpu_query, cpu_limits_query, mem_query, mem_limits_query)
return saved_metrics_path, queries

Expand Down

0 comments on commit 3147a84

Please sign in to comment.