From ec55e96a09ac825c84c67d53ed86289759196bfb Mon Sep 17 00:00:00 2001 From: Jason Sollom Date: Tue, 29 Oct 2024 15:47:26 -0500 Subject: [PATCH] Provide an option to control the PCS transition deadline --- CHANGELOG.md | 4 ++++ api/openapi.yaml.in | 7 +++++++ src/bos/common/options.py | 5 +++++ src/bos/operators/power_off_forceful.py | 2 +- src/bos/operators/power_off_graceful.py | 3 ++- src/bos/operators/power_on.py | 3 ++- 6 files changed, 21 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4105b927..56c6a8a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased +### Added +- BOS option to control how long a deadline it gives PCS to complete its transition + ## [2.30.5] - 2024-10-15 ### Fixed - Fix per-bootset CFS setting diff --git a/api/openapi.yaml.in b/api/openapi.yaml.in index 31475590..b7b96faa 100644 --- a/api/openapi.yaml.in +++ b/api/openapi.yaml.in @@ -976,6 +976,13 @@ components: Options for the Boot Orchestration Service. type: object properties: + pcs_transition_deadline: + type: integer + description: | + The amount of time (in minutes) to set the deadline for a PCS pcs_transition_deadline + example: 1 + minimum: 1 + maximum: 1440 cfs_read_timeout: type: integer description: | diff --git a/src/bos/common/options.py b/src/bos/common/options.py index f01914c6..4d922cd9 100644 --- a/src/bos/common/options.py +++ b/src/bos/common/options.py @@ -29,6 +29,7 @@ # code should either import this dict directly, or (preferably) access # its values indirectly using a DefaultOptions object DEFAULTS = { + 'pcs_transition_deadline': 60, 'cfs_read_timeout': 20, 'cleanup_completed_session_ttl': "7d", 'clear_stage': False, @@ -63,6 +64,10 @@ def get_option(self, key: str) -> Any: # All these do is convert the response to the appropriate type for the option, # and return it. + @property + def pcs_transition_deadline(self) -> int: + return int(self.get_option('pcs_transition_deadline')) + @property def cfs_read_timeout(self) -> int: return int(self.get_option('cfs_read_timeout')) diff --git a/src/bos/operators/power_off_forceful.py b/src/bos/operators/power_off_forceful.py index 5d258b7a..a2ad13ed 100644 --- a/src/bos/operators/power_off_forceful.py +++ b/src/bos/operators/power_off_forceful.py @@ -59,7 +59,7 @@ def filters(self): def _act(self, components): if components: component_ids = [component['id'] for component in components] - pcs.force_off(nodes=component_ids) + pcs.force_off(nodes=component_ids, task_deadline_minutes=options.pcs_transition_deadline) return components diff --git a/src/bos/operators/power_off_graceful.py b/src/bos/operators/power_off_graceful.py index 9a925b07..bad62c82 100644 --- a/src/bos/operators/power_off_graceful.py +++ b/src/bos/operators/power_off_graceful.py @@ -26,6 +26,7 @@ from bos.common.values import Action, Status from bos.operators.utils.clients import pcs +from bos.operators.utils.clients.bos.options import options from bos.operators.base import BaseOperator, main from bos.operators.filters import BOSQuery, HSMState @@ -55,7 +56,7 @@ def filters(self): def _act(self, components): if components: component_ids = [component['id'] for component in components] - pcs.soft_off(component_ids) + pcs.soft_off(component_ids, task_deadline_minutes=options.pcs_transition_deadline) return components diff --git a/src/bos/operators/power_on.py b/src/bos/operators/power_on.py index 8b06f84a..7bfe4fe1 100644 --- a/src/bos/operators/power_on.py +++ b/src/bos/operators/power_on.py @@ -39,6 +39,7 @@ from bos.operators.utils.clients import pcs from bos.operators.utils.clients.ims import tag_image from bos.operators.utils.clients.cfs import set_cfs +from bos.operators.utils.clients.bos.options import options from bos.operators.base import BaseOperator, main from bos.operators.filters import BOSQuery, HSMState from bos.server.dbs.boot_artifacts import record_boot_artifacts @@ -88,7 +89,7 @@ def _act(self, components: Union[List[dict],None]): raise Exception(f"Error encountered setting CFS information: {e}") from e component_ids = [component['id'] for component in components] try: - pcs.power_on(component_ids) + pcs.power_on(component_ids, task_deadline_minutes=options.pcs_transition_deadline) except Exception as e: raise Exception(f"Error encountered calling CAPMC to power on: {e}") from e return components