Skip to content

Commit f16e67a

Browse files
Add option to write flux log files (#519)
* Add option to write flux log files * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add tests and docstrings * Add test * fix tests * extensions * Update test_executor_backend_flux.py * Update test_executor_backend_flux.py * remove pmi * disable new tests * test cwd * abspath * create new directory * do not remove * cwd * fix working directory * fix makedir * try higher level --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent e9a81f8 commit f16e67a

File tree

7 files changed

+84
-0
lines changed

7 files changed

+84
-0
lines changed

executorlib/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ class Executor:
4545
flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
4646
flux_executor_pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
4747
flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.
48+
flux_log_files (bool, optional): Write flux stdout and stderr files. Defaults to False.
4849
pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
4950
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
5051
context of an HPC cluster this essential to be able to communicate to an
@@ -95,6 +96,7 @@ def __init__(
9596
flux_executor=None,
9697
flux_executor_pmi_mode: Optional[str] = None,
9798
flux_executor_nesting: bool = False,
99+
flux_log_files: bool = False,
98100
pysqa_config_directory: Optional[str] = None,
99101
hostname_localhost: Optional[bool] = None,
100102
block_allocation: bool = False,
@@ -117,6 +119,7 @@ def __new__(
117119
flux_executor=None,
118120
flux_executor_pmi_mode: Optional[str] = None,
119121
flux_executor_nesting: bool = False,
122+
flux_log_files: bool = False,
120123
pysqa_config_directory: Optional[str] = None,
121124
hostname_localhost: Optional[bool] = None,
122125
block_allocation: bool = False,
@@ -153,6 +156,7 @@ def __new__(
153156
flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
154157
flux_executor_pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
155158
flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.
159+
flux_log_files (bool, optional): Write flux stdout and stderr files. Defaults to False.
156160
pysqa_config_directory (str, optional): path to the pysqa config directory (only for pysqa based backend).
157161
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
158162
context of an HPC cluster this essential to be able to communicate to an
@@ -198,6 +202,7 @@ def __new__(
198202
flux_executor=flux_executor,
199203
flux_executor_pmi_mode=flux_executor_pmi_mode,
200204
flux_executor_nesting=flux_executor_nesting,
205+
flux_log_files=flux_log_files,
201206
pysqa_config_directory=pysqa_config_directory,
202207
hostname_localhost=hostname_localhost,
203208
block_allocation=block_allocation,
@@ -215,6 +220,7 @@ def __new__(
215220
flux_executor=flux_executor,
216221
flux_executor_pmi_mode=flux_executor_pmi_mode,
217222
flux_executor_nesting=flux_executor_nesting,
223+
flux_log_files=flux_log_files,
218224
hostname_localhost=hostname_localhost,
219225
block_allocation=block_allocation,
220226
init_function=init_function,
@@ -235,6 +241,7 @@ def __new__(
235241
flux_executor=flux_executor,
236242
flux_executor_pmi_mode=flux_executor_pmi_mode,
237243
flux_executor_nesting=flux_executor_nesting,
244+
flux_log_files=flux_log_files,
238245
hostname_localhost=hostname_localhost,
239246
block_allocation=block_allocation,
240247
init_function=init_function,

executorlib/cache/executor.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from executorlib.standalone.inputcheck import (
1111
check_executor,
1212
check_flux_executor_pmi_mode,
13+
check_flux_log_files,
1314
check_hostname_localhost,
1415
check_max_workers_and_cores,
1516
check_nested_flux_executor,
@@ -88,6 +89,7 @@ def create_file_executor(
8889
flux_executor=None,
8990
flux_executor_pmi_mode: Optional[str] = None,
9091
flux_executor_nesting: bool = False,
92+
flux_log_files: bool = False,
9193
pysqa_config_directory: Optional[str] = None,
9294
hostname_localhost: Optional[bool] = None,
9395
block_allocation: bool = False,
@@ -109,6 +111,7 @@ def create_file_executor(
109111
check_hostname_localhost(hostname_localhost=hostname_localhost)
110112
check_executor(executor=flux_executor)
111113
check_nested_flux_executor(nested_flux_executor=flux_executor_nesting)
114+
check_flux_log_files(flux_log_files=flux_log_files)
112115
return FileExecutor(
113116
cache_directory=cache_directory,
114117
resource_dict=resource_dict,

executorlib/interactive/executor.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from executorlib.standalone.inputcheck import (
1212
check_command_line_argument_lst,
1313
check_executor,
14+
check_flux_log_files,
1415
check_gpus_per_worker,
1516
check_init_function,
1617
check_nested_flux_executor,
@@ -163,6 +164,7 @@ def create_executor(
163164
flux_executor=None,
164165
flux_executor_pmi_mode: Optional[str] = None,
165166
flux_executor_nesting: bool = False,
167+
flux_log_files: bool = False,
166168
hostname_localhost: Optional[bool] = None,
167169
block_allocation: bool = False,
168170
init_function: Optional[callable] = None,
@@ -193,6 +195,7 @@ def create_executor(
193195
flux_executor (flux.job.FluxExecutor): Flux Python interface to submit the workers to flux
194196
flux_executor_pmi_mode (str): PMI interface to use (OpenMPI v5 requires pmix) default is None (Flux only)
195197
flux_executor_nesting (bool): Provide hierarchically nested Flux job scheduler inside the submitted function.
198+
flux_log_files (bool, optional): Write flux stdout and stderr files. Defaults to False.
196199
hostname_localhost (boolean): use localhost instead of the hostname to establish the zmq connection. In the
197200
context of an HPC cluster this essential to be able to communicate to an Executor
198201
running on a different compute node within the same allocation. And in principle
@@ -222,6 +225,7 @@ def create_executor(
222225
resource_dict["flux_executor"] = flux_executor
223226
resource_dict["flux_executor_pmi_mode"] = flux_executor_pmi_mode
224227
resource_dict["flux_executor_nesting"] = flux_executor_nesting
228+
resource_dict["flux_log_files"] = flux_log_files
225229
if block_allocation:
226230
resource_dict["init_function"] = init_function
227231
max_workers = validate_number_of_cores(
@@ -250,6 +254,7 @@ def create_executor(
250254
elif backend == "slurm_allocation":
251255
check_executor(executor=flux_executor)
252256
check_nested_flux_executor(nested_flux_executor=flux_executor_nesting)
257+
check_flux_log_files(flux_log_files=flux_log_files)
253258
if block_allocation:
254259
resource_dict["init_function"] = init_function
255260
return InteractiveExecutor(
@@ -272,6 +277,7 @@ def create_executor(
272277
elif backend == "local":
273278
check_executor(executor=flux_executor)
274279
check_nested_flux_executor(nested_flux_executor=flux_executor_nesting)
280+
check_flux_log_files(flux_log_files=flux_log_files)
275281
check_gpus_per_worker(gpus_per_worker=resource_dict["gpus_per_core"])
276282
check_command_line_argument_lst(
277283
command_line_argument_lst=resource_dict["slurm_cmd_args"]

executorlib/interactive/flux.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ class FluxPythonSpawner(BaseSpawner):
3333
flux_executor (flux.job.FluxExecutor, optional): The FluxExecutor instance. Defaults to None.
3434
flux_executor_pmi_mode (str, optional): The PMI option. Defaults to None.
3535
flux_executor_nesting (bool, optional): Whether to use nested FluxExecutor. Defaults to False.
36+
flux_log_files (bool, optional): Write flux stdout and stderr files. Defaults to False.
3637
"""
3738

3839
def __init__(
@@ -45,6 +46,7 @@ def __init__(
4546
flux_executor: Optional[flux.job.FluxExecutor] = None,
4647
flux_executor_pmi_mode: Optional[str] = None,
4748
flux_executor_nesting: bool = False,
49+
flux_log_files: bool = False,
4850
):
4951
super().__init__(
5052
cwd=cwd,
@@ -56,6 +58,7 @@ def __init__(
5658
self._flux_executor = flux_executor
5759
self._flux_executor_pmi_mode = flux_executor_pmi_mode
5860
self._flux_executor_nesting = flux_executor_nesting
61+
self._flux_log_files = flux_log_files
5962
self._future = None
6063

6164
def bootup(
@@ -99,6 +102,12 @@ def bootup(
99102
jobspec.setattr_shell_option("pmi", self._flux_executor_pmi_mode)
100103
if self._cwd is not None:
101104
jobspec.cwd = self._cwd
105+
if self._flux_log_files and self._cwd is not None:
106+
jobspec.stderr = os.path.join(self._cwd, "flux.err")
107+
jobspec.stdout = os.path.join(self._cwd, "flux.out")
108+
elif self._flux_log_files:
109+
jobspec.stderr = os.path.abspath("flux.err")
110+
jobspec.stdout = os.path.abspath("flux.out")
102111
self._future = self._flux_executor.submit(jobspec)
103112

104113
def shutdown(self, wait: bool = True):

executorlib/standalone/inputcheck.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,16 @@ def check_flux_executor_pmi_mode(flux_executor_pmi_mode: Optional[str]) -> None:
147147
)
148148

149149

150+
def check_flux_log_files(flux_log_files: Optional[bool]) -> None:
151+
"""
152+
Check if flux_log_files is True and raise a ValueError if it is.
153+
"""
154+
if flux_log_files:
155+
raise ValueError(
156+
"The flux_log_files parameter is only supported for the flux framework backend."
157+
)
158+
159+
150160
def check_pysqa_config_directory(pysqa_config_directory: Optional[str]) -> None:
151161
"""
152162
Check if pysqa_config_directory is None and raise a ValueError if it is not.

tests/test_executor_backend_flux.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,50 @@ def test_single_task(self):
100100
[[(1, 2, 0), (1, 2, 1)], [(2, 2, 0), (2, 2, 1)], [(3, 2, 0), (3, 2, 1)]],
101101
)
102102

103+
def test_output_files_cwd(self):
104+
dirname = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
105+
os.makedirs(dirname, exist_ok=True)
106+
file_stdout = os.path.join(dirname, "flux.out")
107+
file_stderr = os.path.join(dirname, "flux.err")
108+
with Executor(
109+
max_cores=1,
110+
resource_dict={"cores": 1, "cwd": dirname},
111+
flux_executor=self.executor,
112+
backend="flux_allocation",
113+
block_allocation=True,
114+
flux_log_files=True,
115+
) as p:
116+
output = p.map(calc, [1, 2, 3])
117+
self.assertEqual(
118+
list(output),
119+
[1, 2, 3],
120+
)
121+
self.assertTrue(os.path.exists(file_stdout))
122+
self.assertTrue(os.path.exists(file_stderr))
123+
os.remove(file_stdout)
124+
os.remove(file_stderr)
125+
126+
def test_output_files_abs(self):
127+
file_stdout = os.path.abspath("flux.out")
128+
file_stderr = os.path.abspath("flux.err")
129+
with Executor(
130+
max_cores=1,
131+
resource_dict={"cores": 1},
132+
flux_executor=self.executor,
133+
backend="flux_allocation",
134+
block_allocation=True,
135+
flux_log_files=True,
136+
) as p:
137+
output = p.map(calc, [1, 2, 3])
138+
self.assertEqual(
139+
list(output),
140+
[1, 2, 3],
141+
)
142+
self.assertTrue(os.path.exists(file_stdout))
143+
self.assertTrue(os.path.exists(file_stderr))
144+
os.remove(file_stdout)
145+
os.remove(file_stderr)
146+
103147
def test_internal_memory(self):
104148
with Executor(
105149
max_cores=1,

tests/test_shared_input_check.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
check_executor,
88
check_init_function,
99
check_nested_flux_executor,
10+
check_flux_log_files,
1011
check_pmi,
1112
check_plot_dependency_graph,
1213
check_refresh_rate,
@@ -67,6 +68,10 @@ def test_check_nested_flux_executor(self):
6768
with self.assertRaises(ValueError):
6869
check_nested_flux_executor(nested_flux_executor=True)
6970

71+
def test_check_flux_log_files(self):
72+
with self.assertRaises(ValueError):
73+
check_flux_log_files(flux_log_files=True)
74+
7075
def test_check_plot_dependency_graph(self):
7176
with self.assertRaises(ValueError):
7277
check_plot_dependency_graph(plot_dependency_graph=True)

0 commit comments

Comments
 (0)