Skip to content

Commit

Permalink
Merge pull request #217 from lsst-sqre/tickets/DM-41622
Browse files Browse the repository at this point in the history
DM-41622: Add explicit timeout for creating file servers
  • Loading branch information
rra authored Nov 7, 2023
2 parents 41b98a7 + 43d6e45 commit 9f59450
Showing 1 changed file with 17 additions and 3 deletions.
20 changes: 17 additions & 3 deletions controller/src/controller/services/fileserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,22 @@ async def create(self, user: UserInfo) -> None:
if user.username not in self._servers:
self._servers[user.username] = _State(running=False)
state = self._servers[user.username]
timeout = timedelta(seconds=self._config.creation_timeout)
start = current_datetime(microseconds=True)
async with state.lock:
if state.running:
return
try:
await self._create_file_server(user)
async with asyncio.timeout(timeout.total_seconds()):
await self._create_file_server(user, timeout)
except TimeoutError:
now = current_datetime(microseconds=True)
elapsed = (now - start).total_seconds()
msg = f"File server creation timed out after {elapsed}s"
logger.exception(msg)
logger.info("Cleaning up orphaned file server objects")
await self._delete_file_server(user.username)
raise
except Exception as e:
logger.exception("File server creation failed")
await self._maybe_post_slack_exception(e, user.username)
Expand Down Expand Up @@ -180,7 +191,9 @@ async def stop(self) -> None:
await self._scheduler.close()
self._scheduler = None

async def _create_file_server(self, user: UserInfo) -> None:
async def _create_file_server(
self, user: UserInfo, timeout: timedelta
) -> None:
"""Create a fileserver for the given user.
Waits for the file server to be operational. Should be called with
Expand All @@ -190,14 +203,15 @@ async def _create_file_server(self, user: UserInfo) -> None:
----------
user
User for which to create a file server.
timeout
How long to wait for the file server to start.
Raises
------
KubernetesError
Raised if there is some failure in a Kubernetes API call.
"""
fileserver = self._builder.build(user)
timeout = timedelta(seconds=self._config.creation_timeout)
self._logger.info("Creating new file server", user=user.username)
await self._storage.create(self._config.namespace, fileserver, timeout)

Expand Down

0 comments on commit 9f59450

Please sign in to comment.