-
Notifications
You must be signed in to change notification settings - Fork 26
[MISC] Parallel patroni calls #925
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f965fb1
cbda477
70a5e15
7306e3a
0606d7c
e838463
df0747f
ac3d954
73f48d3
feceb4f
230ef0d
d0d7c3a
76d45b9
54a0072
6ccfaa7
391ec19
e5d69ce
b6af020
a450c25
7d58999
102f522
05d6f0d
9b7c79c
9d1df0e
2c61ec2
d3ee0a5
ea5cba7
7c1f6c0
712adc3
6e5948f
0bdb2e9
f776858
23613ed
fb75670
36a49b8
dbac958
19f145e
72ef410
7370bd9
71475ac
2f6e307
62d055a
9dc4ca2
94760a7
672a10d
c022891
16934cb
9e3924c
42c3c47
4ad28b2
8e31186
0b5e447
e6141b1
5b6fc0a
94c42a8
427abb1
b0e5f23
8a79798
77ad9ef
70e7776
611d723
61d998a
803dead
a86cb17
3586af1
29194b7
f625efc
ca4d8c5
0c73c02
e1a82de
d3c741d
9ca2287
fb27850
b2ac81b
a58aeb8
afa2fb9
74903f1
5078dd1
0862ace
e4baaf3
bd50fdd
05a400d
d45b125
c2b1c2f
a6bc887
dc147c8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,18 +11,21 @@ | |
import re | ||
import shutil | ||
import subprocess | ||
from asyncio import as_completed, create_task, run, wait | ||
from contextlib import suppress | ||
from pathlib import Path | ||
from ssl import CERT_NONE, create_default_context | ||
from typing import TYPE_CHECKING, Any, TypedDict | ||
|
||
import charm_refresh | ||
import psutil | ||
import requests | ||
from charms.operator_libs_linux.v2 import snap | ||
from httpx import AsyncClient, BasicAuth, HTTPError | ||
from jinja2 import Template | ||
from ops import BlockedStatus | ||
from pysyncobj.utility import TcpUtility, UtilityException | ||
from tenacity import ( | ||
AttemptManager, | ||
RetryError, | ||
Retrying, | ||
retry, | ||
|
@@ -172,6 +175,10 @@ def __init__( | |
def _patroni_auth(self) -> requests.auth.HTTPBasicAuth: | ||
return requests.auth.HTTPBasicAuth("patroni", self.patroni_password) | ||
|
||
@property | ||
def _patroni_async_auth(self) -> BasicAuth: | ||
return BasicAuth("patroni", password=self.patroni_password) | ||
|
||
@property | ||
def _patroni_url(self) -> str: | ||
"""Patroni REST API URL.""" | ||
|
@@ -249,28 +256,14 @@ def get_postgresql_version(self) -> str: | |
if snp["name"] == charm_refresh.snap_name(): | ||
return snp["version"] | ||
|
||
def cluster_status( | ||
self, alternative_endpoints: list | None = None | ||
) -> list[ClusterMember] | None: | ||
def cluster_status(self, alternative_endpoints: list | None = None) -> list[ClusterMember]: | ||
"""Query the cluster status.""" | ||
# Request info from cluster endpoint (which returns all members of the cluster). | ||
# TODO we don't know the other cluster's ca | ||
verify = self.verify if not alternative_endpoints else False | ||
for attempt in Retrying( | ||
stop=stop_after_attempt( | ||
len(alternative_endpoints) if alternative_endpoints else len(self.peers_ips) | ||
) | ||
if response := self.parallel_patroni_get_request( | ||
f"/{PATRONI_CLUSTER_STATUS_ENDPOINT}", alternative_endpoints | ||
): | ||
with attempt: | ||
request_url = self._get_alternative_patroni_url(attempt, alternative_endpoints) | ||
|
||
cluster_status = requests.get( | ||
f"{request_url}/{PATRONI_CLUSTER_STATUS_ENDPOINT}", | ||
verify=verify, | ||
timeout=API_REQUEST_TIMEOUT, | ||
auth=self._patroni_auth, | ||
) | ||
return cluster_status.json()["members"] | ||
return response["members"] | ||
raise RetryError(last_attempt=Exception("Unable to reach any units")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Most existing code should handle RetryErrors instead of empty lists. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great move! |
||
|
||
def get_member_ip(self, member_name: str) -> str | None: | ||
"""Get cluster member IP address. | ||
|
@@ -281,13 +274,14 @@ def get_member_ip(self, member_name: str) -> str | None: | |
Returns: | ||
IP address of the cluster member. | ||
""" | ||
cluster_status = self.cluster_status() | ||
if not cluster_status: | ||
return | ||
try: | ||
cluster_status = self.cluster_status() | ||
|
||
for member in cluster_status: | ||
if member["name"] == member_name: | ||
return member["host"] | ||
for member in cluster_status: | ||
if member["name"] == member_name: | ||
return member["host"] | ||
except RetryError: | ||
logger.debug("Unable to get IP. Cluster status unreachable") | ||
|
||
def get_member_status(self, member_name: str) -> str: | ||
"""Get cluster member status. | ||
|
@@ -307,6 +301,44 @@ def get_member_status(self, member_name: str) -> str: | |
return member["state"] | ||
return "" | ||
|
||
async def _httpx_get_request(self, url: str, verify: bool = True): | ||
ssl_ctx = create_default_context() | ||
if verify: | ||
with suppress(FileNotFoundError): | ||
ssl_ctx.load_verify_locations(cafile=f"{PATRONI_CONF_PATH}/{TLS_CA_BUNDLE_FILE}") | ||
else: | ||
ssl_ctx.check_hostname = False | ||
ssl_ctx.verify_mode = CERT_NONE | ||
async with AsyncClient( | ||
auth=self._patroni_async_auth, timeout=API_REQUEST_TIMEOUT, verify=ssl_ctx | ||
) as client: | ||
try: | ||
return (await client.get(url)).json() | ||
except (HTTPError, ValueError): | ||
return None | ||
|
||
async def _async_get_request(self, uri: str, endpoints: list[str], verify: bool = True): | ||
tasks = [ | ||
create_task(self._httpx_get_request(f"https://{ip}:8008{uri}", verify)) | ||
for ip in endpoints | ||
] | ||
for task in as_completed(tasks): | ||
if result := await task: | ||
for task in tasks: | ||
task.cancel() | ||
await wait(tasks) | ||
Comment on lines
+326
to
+329
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Get the first result, cancel the other requests. |
||
return result | ||
|
||
def parallel_patroni_get_request(self, uri: str, endpoints: list[str] | None = None) -> dict: | ||
"""Call all possible patroni endpoints in parallel.""" | ||
if not endpoints: | ||
endpoints = (self.unit_ip, *self.peers_ips) | ||
verify = True | ||
else: | ||
# TODO we don't know the other cluster's ca | ||
verify = False | ||
return run(self._async_get_request(uri, endpoints, verify)) | ||
|
||
def get_primary( | ||
self, unit_name_pattern=False, alternative_endpoints: list[str] | None = None | ||
) -> str | None: | ||
|
@@ -320,14 +352,17 @@ def get_primary( | |
primary pod or unit name. | ||
""" | ||
# Request info from cluster endpoint (which returns all members of the cluster). | ||
if cluster_status := self.cluster_status(alternative_endpoints): | ||
try: | ||
cluster_status = self.cluster_status(alternative_endpoints) | ||
for member in cluster_status: | ||
if member["role"] == "leader": | ||
primary = member["name"] | ||
if unit_name_pattern: | ||
# Change the last dash to / in order to match unit name pattern. | ||
primary = label2name(primary) | ||
return primary | ||
except RetryError: | ||
logger.debug("Unable to get primary. Cluster status unreachable") | ||
|
||
def get_standby_leader( | ||
self, unit_name_pattern=False, check_whether_is_running: bool = False | ||
|
@@ -366,31 +401,6 @@ def get_sync_standby_names(self) -> list[str]: | |
sync_standbys.append(label2name(member["name"])) | ||
return sync_standbys | ||
|
||
def _get_alternative_patroni_url( | ||
self, attempt: AttemptManager, alternative_endpoints: list[str] | None = None | ||
) -> str: | ||
"""Get an alternative REST API URL from another member each time. | ||
|
||
When the Patroni process is not running in the current unit it's needed | ||
to use a URL from another cluster member REST API to do some operations. | ||
""" | ||
if alternative_endpoints is not None: | ||
return self._patroni_url.replace( | ||
self.unit_ip, alternative_endpoints[attempt.retry_state.attempt_number - 1] | ||
) | ||
attempt_number = attempt.retry_state.attempt_number | ||
if attempt_number > 1: | ||
url = self._patroni_url | ||
if (attempt_number - 1) <= len(self.peers_ips): | ||
unit_number = attempt_number - 2 | ||
else: | ||
unit_number = attempt_number - 2 - len(self.peers_ips) | ||
other_unit_ip = list(self.peers_ips)[unit_number] | ||
url = url.replace(self.unit_ip, other_unit_ip) | ||
else: | ||
url = self._patroni_url | ||
return url | ||
|
||
def are_all_members_ready(self) -> bool: | ||
"""Check if all members are correctly running Patroni and PostgreSQL. | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No deps when executing the script, so running urllib requests in an executor.