Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
d9122f6
Add an option to specify initializer function for processes
shwina Nov 18, 2023
e2832a8
Add tests
shwina Nov 20, 2023
cb2478a
Update docs
shwina Nov 20, 2023
f86bfab
Don't be too prescriptive about how to use initializer
shwina Nov 20, 2023
aa8fc42
Move initializer definition and use batch_size=1
shwina Nov 20, 2023
3cb1440
Revert "Move initializer definition and use batch_size=1"
shwina Nov 20, 2023
a06e405
Merge branch 'main' of github.com:joblib/joblib into add-initializer-…
tomMoral Feb 27, 2025
3e33540
ENH allow initializer implicitely
tomMoral Feb 27, 2025
1f9ab6c
ENH allow initializer in multiprocessing+CLN bad merge
tomMoral Feb 27, 2025
c095bcf
FIX linter
tomMoral Feb 27, 2025
943110a
FIX code spell and typo
tomMoral Feb 27, 2025
6de81c4
FIX bad merge
tomMoral Feb 28, 2025
49991e5
FIX bad merge
tomMoral Feb 28, 2025
8e398db
FIX make test more robust
tomMoral Feb 28, 2025
509437f
DOC add entry in changelog
tomMoral Feb 28, 2025
5e18921
Merge branch 'main' into add-initializer-for-loky-and-mp
tomMoral Mar 6, 2025
9f57de3
FIX one test
tomMoral Mar 9, 2025
380e853
FIX linter
tomMoral Mar 9, 2025
a9ee170
TST make test_initializer more robust
tomMoral Mar 9, 2025
8d13fc6
TST make test_initializer_reuse more robust too
tomMoral Mar 9, 2025
733893a
FIX linter
tomMoral Mar 9, 2025
72bee0d
TST quickend test_initializer
tomMoral Mar 17, 2025
b746448
ENH further improve loky backend args
tomMoral Mar 19, 2025
dbb5ca8
Update joblib/test/test_parallel.py
tomMoral Apr 7, 2025
5bb8f66
Apply suggestions from code review
tomMoral Apr 7, 2025
9039d82
TST make test_initializer_reuse_* check worker reuse
tomMoral Apr 7, 2025
54897ba
FIx linter
tomMoral Apr 7, 2025
6ad8287
FIX more stable initializer tests
tomMoral Apr 7, 2025
1107003
CI trigger
tomMoral Apr 7, 2025
0d9412d
CI trigger
tomMoral Apr 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ Parallel:
- Pretty printing of ``Parallel`` execution progress when the number of tasks is
known. https://github.com/joblib/joblib/pull/1608

- Make it possible to pass extra arguments to the ``LokyBackend`` and
``MultiprocessingBackend``, enabling the use of ``initializer``.
https://github.com/joblib/joblib/pull/1525

- Refactor and document the custom parallel backend API.
https://github.com/joblib/joblib/pull/1667

Expand Down
16 changes: 14 additions & 2 deletions doc/parallel.rst
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,20 @@ The context manager allow to set various backend parameters:
of the backend. See :ref:`auto_memmapping_doc` for more details.

Extra arguments passed in the :func:`~joblib.parallel_config` context are
passed to the backend constructor, giving the possibility to set up the
backend with additional parameters.
passed to the backend constructor, allowing additional parameters to be set up:

- ``LokyBackend``
- ``initializer``, ``initargs``: setup function and its arguments to call
in each worker process.
- ``idle_worker_timeout``: timeout in seconds for a worker to wait
for a new task before being clean up. The default is `300 s`.
- ``MultprocessingBackend``
- ``initializer``, ``initargs``: setup function and its arguments to call
in each worker process.
- ``maxtasksperchild``: maximum number of tasks a worker can
execute before being replace with a fresh one.
- ``context``: specify the start method to use for creating new worker
processes.

In addition to the builtin joblib backends, there are several cluster-specific
backends you can use:
Expand Down
53 changes: 38 additions & 15 deletions joblib/_parallel_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,13 @@

nesting_level = None

def __init__(self, nesting_level=None, inner_max_num_threads=None, **kwargs):
super().__init__(**kwargs)
def __init__(
self, nesting_level=None, inner_max_num_threads=None, **backend_kwargs
):
super().__init__()
self.nesting_level = nesting_level
self.inner_max_num_threads = inner_max_num_threads
self.backend_kwargs = backend_kwargs

MAX_NUM_THREADS_VARS = [
"OMP_NUM_THREADS",
Expand Down Expand Up @@ -152,12 +155,7 @@
return out.get()

def configure(
self,
n_jobs=1,
parallel=None,
prefer=None,
require=None,
**backend_args,
self, n_jobs=1, parallel=None, prefer=None, require=None, **backend_kwargs
):
"""Reconfigure the backend and return the number of workers.

Expand Down Expand Up @@ -360,7 +358,7 @@
self.configure(
n_jobs=self.parallel.n_jobs,
parallel=self.parallel,
**self.parallel._backend_args,
**self.parallel._backend_kwargs,
)


Expand Down Expand Up @@ -489,7 +487,7 @@
uses_threads = True
supports_sharedmem = True

def configure(self, n_jobs=1, parallel=None, **backend_args):
def configure(self, n_jobs=1, parallel=None, **backend_kwargs):
"""Build a process or thread pool and return the number of workers"""
n_jobs = self.effective_n_jobs(n_jobs)
if n_jobs == 1:
Expand Down Expand Up @@ -574,16 +572,26 @@
return super(MultiprocessingBackend, self).effective_n_jobs(n_jobs)

def configure(
self, n_jobs=1, parallel=None, prefer=None, require=None, **memmappingpool_args
self,
n_jobs=1,
parallel=None,
prefer=None,
require=None,
**memmapping_pool_kwargs,
):
"""Build a process or thread pool and return the number of workers"""
n_jobs = self.effective_n_jobs(n_jobs)
if n_jobs == 1:
raise FallbackToBackend(SequentialBackend(nesting_level=self.nesting_level))

memmapping_pool_kwargs = {
**self.backend_kwargs,
**memmapping_pool_kwargs,
}

# Make sure to free as much memory as possible before forking
gc.collect()
self._pool = MemmappingPool(n_jobs, **memmappingpool_args)
self._pool = MemmappingPool(n_jobs, **memmapping_pool_kwargs)
self.parallel = parallel
return n_jobs

Expand All @@ -605,20 +613,35 @@
parallel=None,
prefer=None,
require=None,
idle_worker_timeout=300,
**memmappingexecutor_args,
idle_worker_timeout=None,
**memmapping_executor_kwargs,
):
"""Build a process executor and return the number of workers"""
n_jobs = self.effective_n_jobs(n_jobs)
if n_jobs == 1:
raise FallbackToBackend(SequentialBackend(nesting_level=self.nesting_level))

memmapping_executor_kwargs = {
**self.backend_kwargs,
**memmapping_executor_kwargs,
}

# Prohibit the use of 'timeout' in the LokyBackend, as 'idle_worker_timeout'
# better describes the backend's behavior.
if "timeout" in memmapping_executor_kwargs:
raise ValueError(

Check warning on line 632 in joblib/_parallel_backends.py

View check run for this annotation

Codecov / codecov/patch

joblib/_parallel_backends.py#L632

Added line #L632 was not covered by tests
"The 'timeout' parameter is not supported by the LokyBackend. "
"Please use the `idle_worker_timeout` parameter instead."
)
if idle_worker_timeout is None:
idle_worker_timeout = self.backend_kwargs.get("idle_worker_timeout", 300)

self._workers = get_memmapping_executor(
n_jobs,
timeout=idle_worker_timeout,
env=self._prepare_worker_env(n_jobs=n_jobs),
context_id=parallel._id,
**memmappingexecutor_args,
**memmapping_executor_kwargs,
)
self.parallel = parallel
return n_jobs
Expand Down
48 changes: 26 additions & 22 deletions joblib/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,6 @@
# context manager or the context manager did not set a backend.
# create the default backend instance now.
backend = BACKENDS[DEFAULT_BACKEND](nesting_level=0)

explicit_backend = False

# Try to use the backend set by the user with the context manager.
Expand Down Expand Up @@ -300,7 +299,7 @@
overridden with ``TMP``, ``TMPDIR`` or ``TEMP`` environment
variables, typically ``/tmp`` under Unix operating systems.

max_nbytes int, str, or None, optional, default='1M'
max_nbytes: int, str, or None, optional, default='1M'
Threshold on the size of arrays passed to the workers that
triggers automated memory mapping in temp_folder. Can be an int
in Bytes, or a human-readable string, e.g., '1M' for 1 megabyte.
Expand Down Expand Up @@ -542,8 +541,7 @@

See Also
--------
joblib.parallel_config: context manager to change the backend
configuration.
joblib.parallel_config: context manager to change the backend configuration.
"""

def __init__(
Expand Down Expand Up @@ -1084,6 +1082,8 @@
disable memmapping, other modes defined in the numpy.memmap doc:
https://numpy.org/doc/stable/reference/generated/numpy.memmap.html
Also, see 'max_nbytes' parameter documentation for more details.
backend_kwargs: dict, optional
Additional parameters to pass to the backend `configure` method.

Notes
-----
Expand Down Expand Up @@ -1222,6 +1222,7 @@
mmap_mode=default_parallel_config["mmap_mode"],
prefer=default_parallel_config["prefer"],
require=default_parallel_config["require"],
**backend_kwargs,
):
# Initiate parent Logger class state
super().__init__()
Expand Down Expand Up @@ -1253,28 +1254,31 @@
# Check if we are under a parallel_config or parallel_backend
# context manager and use the config from the context manager
# for arguments that are not explicitly set.
self._backend_args = {
k: _get_config_param(param, context_config, k)
for param, k in [
(max_nbytes, "max_nbytes"),
(temp_folder, "temp_folder"),
(mmap_mode, "mmap_mode"),
(prefer, "prefer"),
(require, "require"),
(verbose, "verbose"),
]
self._backend_kwargs = {
**backend_kwargs,
**{
k: _get_config_param(param, context_config, k)
for param, k in [
(max_nbytes, "max_nbytes"),
(temp_folder, "temp_folder"),
(mmap_mode, "mmap_mode"),
(prefer, "prefer"),
(require, "require"),
(verbose, "verbose"),
]
},
}

if isinstance(self._backend_args["max_nbytes"], str):
self._backend_args["max_nbytes"] = memstr_to_bytes(
self._backend_args["max_nbytes"]
if isinstance(self._backend_kwargs["max_nbytes"], str):
self._backend_kwargs["max_nbytes"] = memstr_to_bytes(
self._backend_kwargs["max_nbytes"]
)
self._backend_args["verbose"] = max(0, self._backend_args["verbose"] - 50)
self._backend_kwargs["verbose"] = max(0, self._backend_kwargs["verbose"] - 50)

if DEFAULT_MP_CONTEXT is not None:
self._backend_args["context"] = DEFAULT_MP_CONTEXT
self._backend_kwargs["context"] = DEFAULT_MP_CONTEXT

Check warning on line 1279 in joblib/parallel.py

View check run for this annotation

Codecov / codecov/patch

joblib/parallel.py#L1279

Added line #L1279 was not covered by tests
elif hasattr(mp, "get_context"):
self._backend_args["context"] = mp.get_context()
self._backend_kwargs["context"] = mp.get_context()

if backend is default_parallel_config["backend"] or backend is None:
backend = active_backend
Expand All @@ -1289,7 +1293,7 @@
# Make it possible to pass a custom multiprocessing context as
# backend to change the start method to forkserver or spawn or
# preload modules on the forkserver helper process.
self._backend_args["context"] = backend
self._backend_kwargs["context"] = backend
backend = MultiprocessingBackend(nesting_level=nesting_level)

elif backend not in BACKENDS and backend in MAYBE_AVAILABLE_BACKENDS:
Expand Down Expand Up @@ -1372,7 +1376,7 @@
"""Build a process or thread pool and return the number of workers"""
try:
n_jobs = self._backend.configure(
n_jobs=self.n_jobs, parallel=self, **self._backend_args
n_jobs=self.n_jobs, parallel=self, **self._backend_kwargs
)
if self.timeout is not None and not self._backend.supports_timeout:
warnings.warn(
Expand Down
Loading
Loading