Skip to content

Commit

Permalink
Incorrect capacity for remote execution nodes 14051 (ansible#14315)
Browse files Browse the repository at this point in the history
  • Loading branch information
djyasin committed Nov 11, 2024
1 parent fc0b048 commit 058eff4
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 17 deletions.
13 changes: 9 additions & 4 deletions awx/main/models/ha.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,8 @@ def refresh_capacity_fields(self):
self.cpu_capacity = 0
self.mem_capacity = 0 # formula has a non-zero offset, so we make sure it is 0 for hop nodes
else:
self.cpu_capacity = get_cpu_effective_capacity(self.cpu)
self.mem_capacity = get_mem_effective_capacity(self.memory)
self.cpu_capacity = get_cpu_effective_capacity(self.cpu, is_control_node=bool(self.node_type in (Instance.Types.CONTROL, Instance.Types.HYBRID)))
self.mem_capacity = get_mem_effective_capacity(self.memory, is_control_node=bool(self.node_type in (Instance.Types.CONTROL, Instance.Types.HYBRID)))
self.set_capacity_value()

def save_health_data(self, version=None, cpu=0, memory=0, uuid=None, update_last_seen=False, errors=''):
Expand All @@ -333,12 +333,17 @@ def save_health_data(self, version=None, cpu=0, memory=0, uuid=None, update_last
self.version = version
update_fields.append('version')

new_cpu = get_corrected_cpu(cpu)
if self.node_type == Instance.Types.EXECUTION:
new_cpu = cpu
new_memory = memory
else:
new_cpu = get_corrected_cpu(cpu)
new_memory = get_corrected_memory(memory)

if new_cpu != self.cpu:
self.cpu = new_cpu
update_fields.append('cpu')

new_memory = get_corrected_memory(memory)
if new_memory != self.memory:
self.memory = new_memory
update_fields.append('memory')
Expand Down
1 change: 0 additions & 1 deletion awx/main/tasks/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,6 @@ def execution_node_health_check(node):
data = worker_info(node)

prior_capacity = instance.capacity

instance.save_health_data(
version='ansible-runner-' + data.get('runner_version', '???'),
cpu=data.get('cpu_count', 0),
Expand Down
4 changes: 2 additions & 2 deletions awx/main/tests/functional/test_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ def test_orphan_unified_job_creation(instance, inventory):

@pytest.mark.django_db
@mock.patch('awx.main.tasks.system.inspect_execution_and_hop_nodes', lambda *args, **kwargs: None)
@mock.patch('awx.main.models.ha.get_cpu_effective_capacity', lambda cpu: 8)
@mock.patch('awx.main.models.ha.get_mem_effective_capacity', lambda mem: 62)
@mock.patch('awx.main.models.ha.get_cpu_effective_capacity', lambda cpu, is_control_node: 8)
@mock.patch('awx.main.models.ha.get_mem_effective_capacity', lambda mem, is_control_node: 62)
def test_job_capacity_and_with_inactive_node():
i = Instance.objects.create(hostname='test-1')
i.save_health_data('18.0.1', 2, 8000)
Expand Down
8 changes: 6 additions & 2 deletions awx/main/tests/unit/settings/test_k8s_resource_setttings.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ def test_SYSTEM_TASK_ABS_MEM_conversion(value, converted_value, mem_capacity):
mock_settings.IS_K8S = True
assert convert_mem_str_to_bytes(value) == converted_value
assert get_corrected_memory(-1) == converted_value
assert get_mem_effective_capacity(-1) == mem_capacity
assert get_mem_effective_capacity(1, is_control_node=True) == mem_capacity
# SYSTEM_TASK_ABS_MEM should not effect memory and capacity for execution nodes
assert get_mem_effective_capacity(2147483648, is_control_node=False) == 20


@pytest.mark.parametrize(
Expand All @@ -58,4 +60,6 @@ def test_SYSTEM_TASK_ABS_CPU_conversion(value, converted_value, cpu_capacity):
mock_settings.SYSTEM_TASK_FORKS_CPU = 4
assert convert_cpu_str_to_decimal_cpu(value) == converted_value
assert get_corrected_cpu(-1) == converted_value
assert get_cpu_effective_capacity(-1) == cpu_capacity
assert get_cpu_effective_capacity(-1, is_control_node=True) == cpu_capacity
# SYSTEM_TASK_ABS_CPU should not effect cpu count and capacity for execution nodes
assert get_cpu_effective_capacity(2.0, is_control_node=False) == 8
15 changes: 7 additions & 8 deletions awx/main/utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,14 +768,13 @@ def get_corrected_cpu(cpu_count): # formerlly get_cpu_capacity
return cpu_count # no correction


def get_cpu_effective_capacity(cpu_count):
def get_cpu_effective_capacity(cpu_count, is_control_node=False):
from django.conf import settings

cpu_count = get_corrected_cpu(cpu_count)

settings_forkcpu = getattr(settings, 'SYSTEM_TASK_FORKS_CPU', None)
env_forkcpu = os.getenv('SYSTEM_TASK_FORKS_CPU', None)

if is_control_node:
cpu_count = get_corrected_cpu(cpu_count)
if env_forkcpu:
forkcpu = int(env_forkcpu)
elif settings_forkcpu:
Expand Down Expand Up @@ -834,6 +833,7 @@ def get_corrected_memory(memory):

# Runner returns memory in bytes
# so we convert memory from settings to bytes as well.

if env_absmem is not None:
return convert_mem_str_to_bytes(env_absmem)
elif settings_absmem is not None:
Expand All @@ -842,14 +842,13 @@ def get_corrected_memory(memory):
return memory


def get_mem_effective_capacity(mem_bytes):
def get_mem_effective_capacity(mem_bytes, is_control_node=False):
from django.conf import settings

mem_bytes = get_corrected_memory(mem_bytes)

settings_mem_mb_per_fork = getattr(settings, 'SYSTEM_TASK_FORKS_MEM', None)
env_mem_mb_per_fork = os.getenv('SYSTEM_TASK_FORKS_MEM', None)

if is_control_node:
mem_bytes = get_corrected_memory(mem_bytes)
if env_mem_mb_per_fork:
mem_mb_per_fork = int(env_mem_mb_per_fork)
elif settings_mem_mb_per_fork:
Expand Down

0 comments on commit 058eff4

Please sign in to comment.