Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Debug 5s connect timeout #4

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
23 changes: 16 additions & 7 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,16 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
os: [macos-latest]
python-version: ["3.8", "3.9", "3.10"]
# Cherry-pick test modules to split the overall runtime roughly in half
partition: [ci1, not ci1]
partition: ["ci3"]
exclude:
- os: macos-latest
python-version: 3.9
include:
- partition: "ci1"
partition-label: "ci1"
- partition: "not ci1"
partition-label: "notci1"
- partition: "ci3"
partition-label: "ci3"

# Uncomment to stress-test the test suite for random failures.
# Must also change env.TEST_ID below.
Expand Down Expand Up @@ -122,8 +120,9 @@ jobs:
source continuous_integration/scripts/set_ulimit.sh
set -o pipefail
mkdir reports
mkdir profiles

pytest distributed \
sudo pytest distributed \
-m "not avoid_ci and ${{ matrix.partition }}" --runslow \
--leaks=fds,processes,threads \
--junitxml reports/pytest.xml -o junit_suite_name=$TEST_ID \
Expand Down Expand Up @@ -168,6 +167,16 @@ jobs:
name: ${{ env.TEST_ID }}_cluster_dumps
path: test_cluster_dump
if-no-files-found: ignore
- name: Upload py-spy profiles
# ensure this runs even if pytest fails
if: >
always() &&
(steps.run_tests.outcome == 'success' || steps.run_tests.outcome == 'failure')
uses: actions/upload-artifact@v2
with:
name: ${{ env.TEST_ID }}_profiles
path: profiles
if-no-files-found: ignore

# Publish an artifact for the event; used by publish-test-results.yaml
event_file:
Expand Down
1 change: 1 addition & 0 deletions continuous_integration/environment-3.10.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,4 @@ dependencies:
- git+https://github.com/dask/zict
- git+https://github.com/fsspec/filesystem_spec
- keras
- py-spy
1 change: 1 addition & 0 deletions continuous_integration/environment-3.8.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,4 @@ dependencies:
- git+https://github.com/dask/dask
- git+https://github.com/jcrist/crick # Only tested here
- keras
- py-spy
1 change: 1 addition & 0 deletions continuous_integration/environment-3.9.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,4 @@ dependencies:
- pip:
- git+https://github.com/dask/dask
- keras
- py-spy
62 changes: 62 additions & 0 deletions distributed/cli/tests/test_dask_scheduler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import functools
import re

import psutil
Expand All @@ -15,6 +16,7 @@
import tempfile
from time import sleep

import pytest
import requests
from click.testing import CliRunner

Expand All @@ -32,13 +34,67 @@
popen,
)

pytestmark = pytest.mark.ci3


def popen_pyspy(args):
# HACK https://stackoverflow.com/a/51955499/17100540
curtest = (
os.environ["PYTEST_CURRENT_TEST"]
.split(" ")[0]
.replace("/", ".")
.replace(":", "_")
)
pyspy_args = [
"py-spy",
"record",
"--idle",
"-f",
"speedscope",
"-o",
f"profiles/{curtest}.json",
"--",
]
return popen(pyspy_args + args)


def pyspy(testfunc):
@functools.wraps(testfunc)
def inner(*args, **kwargs):
# HACK https://stackoverflow.com/a/51955499/17100540
curtest = (
os.environ["PYTEST_CURRENT_TEST"]
.split(" ")[0]
.replace("/", ".")
.replace(":", "_")
)
with popen(
[
"py-spy",
"record",
"--idle",
"--subprocesses",
"-f",
"speedscope",
"-o",
f"profiles/{curtest}.json",
"--pid",
str(os.getpid()),
]
):
sleep(0.5)
return testfunc(*args, **kwargs)

return inner


def _get_dashboard_port(client: Client) -> int:
match = re.search(r":(\d+)\/status", client.dashboard_link)
assert match
return int(match.group(1))


@pyspy
def test_defaults(loop, requires_default_ports):
with popen(["dask-scheduler"]):

Expand All @@ -51,6 +107,7 @@ async def f():
assert _get_dashboard_port(c) == 8787


@pyspy
def test_hostport(loop):
port = open_port()
with popen(["dask-scheduler", "--no-dashboard", "--host", f"127.0.0.1:{port}"]):
Expand All @@ -64,6 +121,7 @@ async def f():
c.sync(f)


@pyspy
def test_no_dashboard(loop, requires_default_ports):
with popen(["dask-scheduler", "--no-dashboard"]):
with Client(f"127.0.0.1:{Scheduler.default_port}", loop=loop):
Expand Down Expand Up @@ -104,6 +162,7 @@ def test_dashboard(loop):
requests.get(f"http://127.0.0.1:{dashboard_port}/status/")


@pyspy
def test_dashboard_non_standard_ports(loop):
pytest.importorskip("bokeh")
port1 = open_port()
Expand Down Expand Up @@ -247,6 +306,7 @@ def check_pidfile(proc, pidfile):
check_pidfile(worker, w)


@pyspy
def test_scheduler_port_zero(loop):
with tmpfile() as fn:
with popen(
Expand All @@ -257,6 +317,7 @@ def test_scheduler_port_zero(loop):
assert c.scheduler.port != 8786


@pyspy
def test_dashboard_port_zero(loop):
pytest.importorskip("bokeh")
port = open_port()
Expand Down Expand Up @@ -488,6 +549,7 @@ def raise_ki():
signal.signal(signal.SIGINT, original_handler)


@pyspy
def test_multiple_workers_2(loop):
text = """
def dask_setup(worker):
Expand Down
3 changes: 3 additions & 0 deletions distributed/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from contextlib import suppress
from functools import partial
from numbers import Number
from time import sleep
from typing import Any, ClassVar, Literal, cast

import psutil
Expand Down Expand Up @@ -4628,6 +4629,8 @@ async def add_client(
msg.update(version_warning)
bcomm.send(msg)

sleep(0.2) # make it show up in pyspy

try:
await self.handle_stream(comm=comm, extra={"client": client})
finally:
Expand Down
6 changes: 6 additions & 0 deletions distributed/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1350,14 +1350,20 @@ def popen(
args[0] = os.path.join(
os.environ.get("DESTDIR", "") + sys.prefix, "bin", args[0]
)

logger.info(f"Starting subprocess {args}")
with subprocess.Popen(args, **kwargs) as proc:
logger.info("Subprocess started")
try:
yield proc
finally:
logger.info("popen contextmanager exited; terminating process")
try:
_terminate_process(proc, terminate_timeout)
logger.info("process terminated successfully")
finally:
out, err = proc.communicate(timeout=kill_timeout)
logger.info("process terminated or killed successfully")
if out:
print(f"------ stdout: returncode {proc.returncode}, {args} ------")
print(out.decode() if isinstance(out, bytes) else out)
Expand Down
3 changes: 2 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ ignore =
W503, # Line break occurred before a binary operator
per-file-ignores =
**/tests/*:
# Module level import not at top of file (to silence on pytest.importorskip)
# Module level import not at top of file (to silence on pytest.importorskip)
# See https://github.com/PyCQA/pycodestyle/issues/472
E402,
# Do not use variables named 'I', 'O', or 'l'
Expand Down Expand Up @@ -90,6 +90,7 @@ filterwarnings =
ignore:setDaemon\(\) is deprecated, set the daemon attribute instead:DeprecationWarning:paramiko
minversion = 6
markers =
ci3: foo
ci1: marks tests as belonging to 1 out of 2 partitions to run on CI ('-m "not ci1"' for second partition)
slow: marks tests as slow (deselected by default; select with '--runslow')
avoid_ci: marks tests as flaky or broken on CI on all OSs
Expand Down