Skip to content

Commit

Permalink
mzbuild: Don't try to build on non-builder CI machines
Browse files Browse the repository at this point in the history
They won't succeed anyway, better to fail the test than time out while
uselessly building. Seen in https://buildkite.com/materialize/test/builds/90870

Also retry longer (~1 minute) and not just with a 1s sleep inbetween
  • Loading branch information
def- committed Sep 21, 2024
1 parent 2b128b7 commit d66c2f2
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 4 deletions.
3 changes: 3 additions & 0 deletions misc/python/materialize/checks/mzcompose_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def __init__(
deploy_generation: int | None = None,
restart: str | None = None,
force_migrations: str | None = None,
publish: bool | None = None,
) -> None:
if healthcheck is None:
healthcheck = ["CMD", "curl", "-f", "localhost:6878/api/readyz"]
Expand All @@ -59,6 +60,7 @@ def __init__(
self.deploy_generation = deploy_generation
self.restart = restart
self.force_migrations = force_migrations
self.publish = publish

def execute(self, e: Executor) -> None:
c = e.mzcompose_composition()
Expand All @@ -81,6 +83,7 @@ def execute(self, e: Executor) -> None:
deploy_generation=self.deploy_generation,
restart=self.restart,
force_migrations=self.force_migrations,
publish=self.publish,
)

# Don't fail since we are careful to explicitly kill and collect logs
Expand Down
2 changes: 2 additions & 0 deletions misc/python/materialize/checks/scenarios_upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def start_mz_read_only(
system_parameter_defaults: dict[str, str] | None = None,
system_parameter_version: MzVersion | None = None,
force_migrations: str | None = None,
publish: bool | None = None,
) -> StartMz:
return StartMz(
scenario,
Expand All @@ -79,6 +80,7 @@ def start_mz_read_only(
system_parameter_defaults=system_parameter_defaults,
system_parameter_version=system_parameter_version,
force_migrations=force_migrations,
publish=publish,
)


Expand Down
1 change: 1 addition & 0 deletions misc/python/materialize/checks/scenarios_zero_downtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ def actions(self) -> list[Action]:
deploy_generation=2,
mz_service="mz_3",
system_parameter_defaults=system_parameter_defaults,
publish=False, # Allows us to build the image during the test in CI
),
Validate(self, mz_service="mz_2"),
*wait_ready_and_promote("mz_3"),
Expand Down
22 changes: 18 additions & 4 deletions misc/python/materialize/mzbuild.py
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,7 @@ def try_pull(self, max_retries: int) -> bool:
command.append("--quiet")
command.append(self.spec())
if not self.acquired:
sleep_time = 1
for retry in range(1, max_retries + 1):
try:
spawn.runv(
Expand All @@ -848,8 +849,9 @@ def try_pull(self, max_retries: int) -> bool:
# happened based on error code
# (https://github.com/docker/cli/issues/538) and we
# want to print output directly to terminal.
print("Retrying ...")
time.sleep(1)
print(f"Retrying in {sleep_time}s ...")
time.sleep(sleep_time)
sleep_time *= 2
continue
else:
break
Expand Down Expand Up @@ -1009,10 +1011,22 @@ def acquire(self, max_retries: int | None = None) -> None:

# Only retry in CI runs since we struggle with flaky docker pulls there
if not max_retries:
max_retries = 3 if ui.env_is_truthy("CI") else 1
max_retries = 5 if ui.env_is_truthy("CI") else 1
assert max_retries > 0

deps_to_build = [dep for dep in self if not dep.try_pull(max_retries)]
deps_to_build = [
dep for dep in self if not dep.publish or not dep.try_pull(max_retries)
]

# Don't attempt to build in CI, as our timeouts and small machines won't allow it anyway
if ui.env_is_truthy("CI"):
expected_deps = [dep for dep in deps_to_build if dep.publish]
if expected_deps:
print(
f"+++ Expected builds to be available, the build probably failed, so not proceeding: {expected_deps}"
)
sys.exit(5)

prep = self._prepare_batch(deps_to_build)
for dep in deps_to_build:
dep.build(prep)
Expand Down
5 changes: 5 additions & 0 deletions misc/python/materialize/mzcompose/composition.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,11 @@ def _munge_services(
image.rd.arch = Arch.AARCH64
else:
raise ValueError(f"Unknown platform {config['platform']}")
if config.get("publish") is not None:
# Override whether an image is expected to be published, so
# that we will build it in CI instead of failing.
image.publish = config["publish"]
del config["publish"]
images.append(image)

if "propagate_uid_gid" in config:
Expand Down
3 changes: 3 additions & 0 deletions misc/python/materialize/mzcompose/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,9 @@ class ServiceConfig(TypedDict, total=False):
platform: str
"""Target platform for service to run on. Syntax: os[/arch[/variant]]"""

publish: bool | None
"""Override whether an image is publishable. Unpublishable images can be built during normal test runs in CI."""


class Service:
"""A Docker Compose service in a `Composition`.
Expand Down
4 changes: 4 additions & 0 deletions misc/python/materialize/mzcompose/services/materialized.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def __init__(
healthcheck: list[str] | None = None,
deploy_generation: int | None = None,
force_migrations: str | None = None,
publish: bool | None = None,
) -> None:
if name is None:
name = "materialized"
Expand Down Expand Up @@ -200,6 +201,9 @@ def __init__(
"hostname": name,
}

if publish is not None:
config["publish"] = publish

if image:
config["image"] = image
else:
Expand Down

0 comments on commit d66c2f2

Please sign in to comment.