-
Notifications
You must be signed in to change notification settings - Fork 26
[DPE-4533] Pause Patroni in the TLS test #588
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9158ea6
3c41be3
8e7db4d
0b04b40
55925a0
7b5cd24
c1ad590
704ebd7
4fe6fc3
a3c2464
a0161fa
fa105fb
02c651e
68948f5
89dba90
c005133
85812f3
bcac1b9
7eafe84
cbadff1
844655d
5ec3edf
bbc6e77
4de0ac1
691133e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -100,22 +100,30 @@ def get_patroni_cluster(unit_ip: str) -> Dict[str, str]: | |
return resp.json() | ||
|
||
|
||
async def change_patroni_setting(ops_test: OpsTest, setting: str, value: int) -> None: | ||
async def change_patroni_setting( | ||
ops_test: OpsTest, setting: str, value: Union[int, str], tls: bool = False | ||
) -> None: | ||
"""Change the value of one of the Patroni settings. | ||
|
||
Args: | ||
ops_test: ops_test instance. | ||
setting: the name of the setting. | ||
value: the value to assign to the setting. | ||
tls: if Patroni is serving using tls. | ||
""" | ||
if tls: | ||
schema = "https" | ||
else: | ||
schema = "http" | ||
for attempt in Retrying(stop=stop_after_delay(30 * 2), wait=wait_fixed(3)): | ||
with attempt: | ||
app = await app_name(ops_test) | ||
primary_name = await get_primary(ops_test, app) | ||
unit_ip = await get_unit_address(ops_test, primary_name) | ||
requests.patch( | ||
f"http://{unit_ip}:8008/config", | ||
f"{schema}://{unit_ip}:8008/config", | ||
json={setting: value}, | ||
verify=not tls, | ||
) | ||
|
||
|
||
|
@@ -404,22 +412,27 @@ def get_host_ip(host: str) -> str: | |
return member_ips | ||
|
||
|
||
async def get_patroni_setting(ops_test: OpsTest, setting: str) -> Optional[int]: | ||
async def get_patroni_setting(ops_test: OpsTest, setting: str, tls: bool = False) -> Optional[int]: | ||
"""Get the value of one of the integer Patroni settings. | ||
|
||
Args: | ||
ops_test: ops_test instance. | ||
setting: the name of the setting. | ||
tls: if Patroni is serving using tls. | ||
|
||
Returns: | ||
the value of the configuration or None if it's using the default value. | ||
""" | ||
if tls: | ||
schema = "https" | ||
else: | ||
schema = "http" | ||
for attempt in Retrying(stop=stop_after_delay(30 * 2), wait=wait_fixed(3)): | ||
with attempt: | ||
app = await app_name(ops_test) | ||
primary_name = await get_primary(ops_test, app) | ||
unit_ip = await get_unit_address(ops_test, primary_name) | ||
configuration_info = requests.get(f"http://{unit_ip}:8008/config") | ||
configuration_info = requests.get(f"{schema}://{unit_ip}:8008/config", verify=not tls) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please share the reason for |
||
primary_start_timeout = configuration_info.json().get(setting) | ||
return int(primary_start_timeout) if primary_start_timeout is not None else None | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,9 @@ | |
from tenacity import Retrying, stop_after_delay, wait_fixed | ||
|
||
from . import architecture, markers | ||
from .ha_tests.helpers import ( | ||
change_patroni_setting, | ||
) | ||
from .helpers import ( | ||
DATABASE_APP_NAME, | ||
build_and_deploy, | ||
|
@@ -55,7 +58,6 @@ async def test_build_and_deploy(ops_test: OpsTest) -> None: | |
await build_and_deploy(ops_test, DATABASE_UNITS, wait_for_idle=False) | ||
|
||
|
||
@pytest.mark.group(1) | ||
async def check_tls_rewind(ops_test: OpsTest) -> None: | ||
"""Checks if TLS was used by rewind.""" | ||
for unit in ops_test.model.applications[DATABASE_APP_NAME].units: | ||
|
@@ -76,23 +78,16 @@ async def check_tls_rewind(ops_test: OpsTest) -> None: | |
|
||
|
||
@pytest.mark.group(1) | ||
@markers.amd64_only # mattermost-k8s charm not available for arm64 | ||
async def test_mattermost_db(ops_test: OpsTest) -> None: | ||
"""Deploy Mattermost to test the 'db' relation. | ||
|
||
Mattermost needs TLS enabled on PostgreSQL to correctly connect to it. | ||
|
||
Args: | ||
ops_test: The ops test framework | ||
""" | ||
@pytest.mark.abort_on_fail | ||
async def test_tls(ops_test: OpsTest) -> None: | ||
async with ops_test.fast_forward(): | ||
# Deploy TLS Certificates operator. | ||
await ops_test.model.deploy( | ||
tls_certificates_app_name, config=tls_config, channel=tls_channel | ||
) | ||
# Relate it to the PostgreSQL to enable TLS. | ||
await ops_test.model.relate(DATABASE_APP_NAME, tls_certificates_app_name) | ||
await ops_test.model.wait_for_idle(status="active", timeout=1000) | ||
await ops_test.model.wait_for_idle(status="active", timeout=1000, raise_on_error=False) | ||
|
||
# Wait for all units enabling TLS. | ||
for unit in ops_test.model.applications[DATABASE_APP_NAME].units: | ||
|
@@ -122,7 +117,10 @@ async def test_mattermost_db(ops_test: OpsTest) -> None: | |
await ops_test.model.wait_for_idle( | ||
apps=[DATABASE_APP_NAME], status="active", idle_period=30 | ||
) | ||
# Pause Patroni so it doesn't wipe the custom changes | ||
await change_patroni_setting(ops_test, "pause", True, tls=True) | ||
|
||
async with ops_test.fast_forward("24h"): | ||
for attempt in Retrying(stop=stop_after_delay(60), wait=wait_fixed(2), reraise=True): | ||
with attempt: | ||
# Promote the replica to primary. | ||
|
@@ -154,7 +152,13 @@ async def test_mattermost_db(ops_test: OpsTest) -> None: | |
|
||
# Stop the initial primary. | ||
logger.info(f"stopping database on {primary}") | ||
await run_command_on_unit(ops_test, primary, "/charm/bin/pebble stop postgresql") | ||
try: | ||
await run_command_on_unit(ops_test, primary, "/charm/bin/pebble stop postgresql") | ||
except Exception as e: | ||
# pebble stop on juju 2 errors out and leaves dangling PG processes | ||
if juju_major_version > 2: | ||
raise e | ||
await run_command_on_unit(ops_test, primary, "pkill --signal SIGTERM -x postgres") | ||
Comment on lines
+155
to
+161
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On Juju 2 if the Alternatively, we can do this check and termination in the charm code, but I would prefer not to add Juju 2 specific live code. Thoughts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On 2.9 if patroni stop failed: kill -SIGTERM $postgresql_pid ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Pretty much. We can do this in the ChangeError in the restart check. Question is do we want to add that code for Juju 2's sake? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer that we keep this SIGTERM call in the test code and not in the charm code. Even if it is a SIGTERM, I'd prefer either to wait for the database process to gracefully finish by itself or analyze the situation manually before doing any action to avoid any data corruption. |
||
|
||
# Check that the primary changed. | ||
assert await primary_changed(ops_test, primary), "primary not changed" | ||
|
@@ -163,12 +167,26 @@ async def test_mattermost_db(ops_test: OpsTest) -> None: | |
for attempt in Retrying(stop=stop_after_delay(60 * 3), wait=wait_fixed(2), reraise=True): | ||
with attempt: | ||
await check_tls_rewind(ops_test) | ||
await change_patroni_setting(ops_test, "pause", False, tls=True) | ||
|
||
async with ops_test.fast_forward(): | ||
# Await for postgresql to be stable if not already | ||
await ops_test.model.wait_for_idle( | ||
apps=[DATABASE_APP_NAME], status="active", idle_period=15 | ||
) | ||
|
||
|
||
@pytest.mark.group(1) | ||
@markers.amd64_only # mattermost-k8s charm not available for arm64 | ||
async def test_mattermost_db(ops_test: OpsTest) -> None: | ||
"""Deploy Mattermost to test the 'db' relation. | ||
|
||
Mattermost needs TLS enabled on PostgreSQL to correctly connect to it. | ||
|
||
Args: | ||
ops_test: The ops test framework | ||
""" | ||
async with ops_test.fast_forward(): | ||
# Deploy and check Mattermost user and database existence. | ||
relation_id = await deploy_and_relate_application_with_postgresql( | ||
ops_test, "mattermost-k8s", MATTERMOST_APP_NAME, APPLICATION_UNITS, status="waiting" | ||
|
@@ -179,6 +197,10 @@ async def test_mattermost_db(ops_test: OpsTest) -> None: | |
|
||
await check_database_users_existence(ops_test, mattermost_users, []) | ||
|
||
|
||
@pytest.mark.group(1) | ||
async def test_remove_tls(ops_test: OpsTest) -> None: | ||
async with ops_test.fast_forward(): | ||
Comment on lines
+200
to
+203
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Splitting off TLS removal as well. |
||
# Remove the relation. | ||
await ops_test.model.applications[DATABASE_APP_NAME].remove_relation( | ||
f"{DATABASE_APP_NAME}:certificates", f"{tls_certificates_app_name}:certificates" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If there's a dangling Postgresql process, Patroni will be unable to start