Skip to content

Commit

Permalink
Fix Status Handling in Charmed Operators (#1743)
Browse files Browse the repository at this point in the history
- Handle status of operators only in set_pod_spec function.
- Centralize all the logic in set_pod_spec and
add helper checks.
- Pin black version
  • Loading branch information
DomFleischmann authored Dec 1, 2021
1 parent a701fd6 commit bb439fa
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 58 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test-charmed-katib.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
set -eux
sudo apt update
sudo apt install python3-setuptools
sudo pip3 install black flake8
sudo pip3 install black==20.8b1 flake8
- name: Check black
run: black --check operators
Expand Down
38 changes: 29 additions & 9 deletions operators/katib-controller/src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,22 @@
from ops.charm import CharmBase
from ops.framework import StoredState
from ops.main import main
from ops.model import ActiveStatus, MaintenanceStatus
from ops.model import ActiveStatus, MaintenanceStatus, WaitingStatus

logger = logging.getLogger(__name__)


class CheckFailed(Exception):
""" Raise this exception if one of the checks in main fails. """

def __init__(self, msg, status_type=None):
super().__init__()

self.msg = msg
self.status_type = status_type
self.status = status_type(msg)


class Operator(CharmBase):
"""Deploys the katib-controller service."""

Expand All @@ -22,11 +33,6 @@ class Operator(CharmBase):
def __init__(self, framework):
super().__init__(framework)

if not self.model.unit.is_leader():
logger.info("Not a leader, skipping any work")
self.model.unit.status = ActiveStatus()
return

self._stored.set_default(**self.gen_certs())
self.image = OCIImageResource(self, "oci-image")
self.framework.observe(self.on.install, self.set_pod_spec)
Expand All @@ -36,9 +42,11 @@ def set_pod_spec(self, event):
self.model.unit.status = MaintenanceStatus("Setting pod spec")

try:
image_details = self.image.fetch()
except OCIImageResourceError as e:
self.model.unit.status = e.status
self._check_leader()

image_details = self._check_image_details()
except CheckFailed as check_failed:
self.model.unit.status = check_failed.status
return

validating, mutating = yaml.safe_load_all(Path("src/webhooks.yaml").read_text())
Expand Down Expand Up @@ -294,6 +302,18 @@ def gen_certs(self):
"ca": Path("/run/ca.crt").read_text(),
}

def _check_leader(self):
if not self.unit.is_leader():
# We can't do anything useful when not the leader, so do nothing.
raise CheckFailed("Waiting for leadership", WaitingStatus)

def _check_image_details(self):
try:
image_details = self.image.fetch()
except OCIImageResourceError as e:
raise CheckFailed(f"{e.status.message}", e.status_type)
return image_details


if __name__ == "__main__":
main(Operator)
64 changes: 43 additions & 21 deletions operators/katib-db-manager/src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,23 @@
logger = logging.getLogger(__name__)


class CheckFailed(Exception):
""" Raise this exception if one of the checks in main fails. """

def __init__(self, msg, status_type=None):
super().__init__()

self.msg = msg
self.status_type = status_type
self.status = status_type(msg)


class Operator(CharmBase):
"""Deploys the katib-db-manager service."""

def __init__(self, framework):
super().__init__(framework)

if not self.model.unit.is_leader():
logger.info("Not a leader, skipping any work")
self.model.unit.status = ActiveStatus()
return

self.image = OCIImageResource(self, "oci-image")
self.framework.observe(self.on.install, self.set_pod_spec)
self.framework.observe(self.on.config_changed, self.set_pod_spec)
Expand All @@ -29,26 +35,18 @@ def __init__(self, framework):
self.framework.observe(self.on["mysql"].relation_changed, self.set_pod_spec)

def set_pod_spec(self, event):
self.model.unit.status = MaintenanceStatus("Setting pod spec")

try:
image_details = self.image.fetch()
except OCIImageResourceError as e:
self.model.unit.status = e.status
return
self._check_leader()

try:
relation = self.model.relations["mysql"][0]
unit = next(iter(relation.units))
mysql_data = relation.data[unit]
# Ensure we've got some data sent over the relation
mysql_data["root_password"]
except (IndexError, StopIteration, KeyError):
self.model.unit.status = WaitingStatus(
"Waiting for mysql connection information"
)
image_details = self._check_image_details()

mysql_data = self._check_mysql()
except CheckFailed as check_failed:
self.model.unit.status = check_failed.status
return

self.model.unit.status = MaintenanceStatus("Setting pod spec")

self.model.pod.set_spec(
{
"version": 3,
Expand Down Expand Up @@ -117,6 +115,30 @@ def set_pod_spec(self, event):

self.model.unit.status = ActiveStatus()

def _check_leader(self):
if not self.unit.is_leader():
# We can't do anything useful when not the leader, so do nothing.
raise CheckFailed("Waiting for leadership", WaitingStatus)

def _check_image_details(self):
try:
image_details = self.image.fetch()
except OCIImageResourceError as e:
raise CheckFailed(f"{e.status.message}", e.status_type)
return image_details

def _check_mysql(self):
try:
relation = self.model.relations["mysql"][0]
unit = next(iter(relation.units))
mysql_data = relation.data[unit]
# Ensure we've got some data sent over the relation
mysql_data["root_password"]
except (IndexError, StopIteration, KeyError):
raise CheckFailed("Waiting for mysql connection information", WaitingStatus)

return mysql_data


if __name__ == "__main__":
main(Operator)
84 changes: 57 additions & 27 deletions operators/katib-ui/src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,47 +15,46 @@
logger = logging.getLogger(__name__)


class CheckFailed(Exception):
""" Raise this exception if one of the checks in main fails. """

def __init__(self, msg, status_type=None):
super().__init__()

self.msg = msg
self.status_type = status_type
self.status = status_type(msg)


class Operator(CharmBase):
"""Deploys the katib-ui service."""

def __init__(self, framework):
super().__init__(framework)

if not self.model.unit.is_leader():
logger.info("Not a leader, skipping any work")
self.model.unit.status = ActiveStatus()
return

try:
self.interfaces = get_interfaces(self)
except NoVersionsListed as err:
self.model.unit.status = WaitingStatus(str(err))
return
except NoCompatibleVersions as err:
self.model.unit.status = BlockedStatus(str(err))
return
else:
self.model.unit.status = ActiveStatus()

self.image = OCIImageResource(self, "oci-image")
self.framework.observe(self.on.install, self.set_pod_spec)
self.framework.observe(self.on.upgrade_charm, self.set_pod_spec)
self.framework.observe(
self.on["ingress"].relation_changed,
self.configure_ingress,
self.set_pod_spec,
)

def configure_ingress(self, event):
if self.interfaces["ingress"]:
self.interfaces["ingress"].send_data(
{
"prefix": "/katib/",
"service": self.model.app.name,
"port": self.model.config["port"],
}
)

def set_pod_spec(self, event):
try:

self._check_leader()

interfaces = self._get_interfaces()

image_details = self._check_image_details()

except CheckFailed as check_failed:
self.model.unit.status = check_failed.status
return

self._configure_ingress(interfaces)

self.model.unit.status = MaintenanceStatus("Setting pod spec")

try:
Expand Down Expand Up @@ -115,6 +114,37 @@ def set_pod_spec(self, event):

self.model.unit.status = ActiveStatus()

def _configure_ingress(self, interfaces):
if interfaces["ingress"]:
interfaces["ingress"].send_data(
{
"prefix": "/katib/",
"service": self.model.app.name,
"port": self.model.config["port"],
}
)

def _check_leader(self):
if not self.unit.is_leader():
# We can't do anything useful when not the leader, so do nothing.
raise CheckFailed("Waiting for leadership", WaitingStatus)

def _get_interfaces(self):
try:
interfaces = get_interfaces(self)
except NoVersionsListed as err:
raise CheckFailed(err, WaitingStatus)
except NoCompatibleVersions as err:
raise CheckFailed(err, BlockedStatus)
return interfaces

def _check_image_details(self):
try:
image_details = self.image.fetch()
except OCIImageResourceError as e:
raise CheckFailed(f"{e.status.message}", e.status_type)
return image_details


if __name__ == "__main__":
main(Operator)

0 comments on commit bb439fa

Please sign in to comment.