@@ -215,6 +205,7 @@ def render(self):
self.sssp_installation,
self.qe_setup,
self.submission_blocker_messages,
+ self.submission_warning_messages,
ipw.HTML("""
Labeling Your Job
@@ -233,10 +224,20 @@ def render(self):
self.rendered = True
+ # Render and set up default PW code
pw_code = self._model.get_code("dft", "pw")
pw_code.activate()
+ pw_code_widget = pw_code.get_setup_widget()
+ pw_code_widget.num_cpus.observe(
+ self._on_pw_code_resource_change,
+ "value",
+ )
+ pw_code_widget.num_nodes.observe(
+ self._on_pw_code_resource_change,
+ "value",
+ )
- # Render any active codes starting with pw
+ # Render any other active codes
self._toggle_code(pw_code)
for _, code in self._model.get_codes(flat=True):
if code is not pw_code and code.is_active:
@@ -251,6 +252,14 @@ def reset(self):
def _on_previous_step_state_change(self, _):
self._update_state()
+ def _on_input_structure_change(self, _):
+ self._model.check_resources()
+
+ def _on_input_parameters_change(self, _):
+ self._model.update_active_codes()
+ self._model.update_process_label()
+ self._model.update_submission_blockers()
+
def _on_process_change(self, _):
with self.hold_trait_notifications():
# TODO why here? Do we not populate traits earlier that would cover this?
@@ -258,11 +267,6 @@ def _on_process_change(self, _):
self._model.input_structure = self._model.process.inputs.structure
self._update_state()
- def _on_input_parameters_change(self, _):
- self._model.update_active_codes()
- self._model.update_process_label()
- self._model.update_submission_blockers()
-
def _on_submission_blockers_change(self, _):
self._model.update_submission_blocker_message()
self._update_state()
@@ -282,6 +286,9 @@ def _on_code_activation_change(self, change):
def _on_code_selection_change(self, _):
self._model.update_submission_blockers()
+ def _on_pw_code_resource_change(self, _):
+ self._model.check_resources()
+
def _on_submission(self, _):
self._model.submit()
self._update_state()
diff --git a/src/aiidalab_qe/app/submission/model.py b/src/aiidalab_qe/app/submission/model.py
index 7dc775ac4..51baf0572 100644
--- a/src/aiidalab_qe/app/submission/model.py
+++ b/src/aiidalab_qe/app/submission/model.py
@@ -1,5 +1,6 @@
from __future__ import annotations
+import os
import typing as t
from copy import deepcopy
@@ -35,6 +36,7 @@ class SubmissionModel(tl.HasTraits):
process_description = tl.Unicode("")
submission_blocker_messages = tl.Unicode("")
+ submission_warning_messages = tl.Unicode("")
installing_qe = tl.Bool(False)
installing_sssp = tl.Bool(False)
@@ -55,6 +57,21 @@ class SubmissionModel(tl.HasTraits):
code_widgets: dict[str, QEAppComputationalResourcesWidget] = {}
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ self._RUN_ON_LOCALHOST_NUM_SITES_WARN_THRESHOLD = 10
+ self._RUN_ON_LOCALHOST_VOLUME_WARN_THRESHOLD = 1000 # \AA^3
+
+ self._ALERT_MESSAGE = """
+
+ """
+
@property
def is_blocked(self):
return any(
@@ -84,6 +101,89 @@ def submit(self):
)
self.process = process
+ def check_resources(self):
+ pw_code_model = self.get_code("dft", "pw")
+
+ if not self.input_structure or not pw_code_model.selected:
+ return # No code selected or no structure, so nothing to do
+
+ pw_code = pw_code_model.get_setup_widget()
+ num_cpus = pw_code.num_cpus.value * pw_code.num_nodes.value
+ on_localhost = orm.load_node(pw_code.value).computer.hostname == "localhost"
+ num_sites = len(self.input_structure.sites)
+ volume = self.input_structure.get_cell_volume()
+
+ try:
+ localhost_cpus = len(os.sched_getaffinity(0))
+ except Exception:
+ # Fallback, in some OS os.sched_getaffinity(0) is not supported
+ # However, not so reliable in containers
+ localhost_cpus = os.cpu_count()
+
+ large_system = (
+ num_sites > self._RUN_ON_LOCALHOST_NUM_SITES_WARN_THRESHOLD
+ or volume > self._RUN_ON_LOCALHOST_VOLUME_WARN_THRESHOLD
+ )
+
+ # Estimated number of CPUs for a run less than 12 hours.
+ estimated_CPUs = self._estimate_min_cpus(num_sites, volume)
+
+ # List of possible suggestions for warnings:
+ suggestions = {
+ "more_resources": f"
Increase the resources (total number of CPUs should be equal or more than {min(100,estimated_CPUs)}, if possible) ",
+ "change_configuration": "
Review the configuration (e.g. choosing fast protocol - this will affect precision) ",
+ "go_remote": "
Select a code that runs on a larger machine",
+ "avoid_overloading": "
Reduce the number of CPUs to avoid the overloading of the local machine ",
+ }
+
+ alert_message = ""
+ if large_system and estimated_CPUs > num_cpus:
+ # This part is in common between Warnings 1 (2):
+ # (not) on localhost, big system and few cpus
+ warnings_1_2 = (
+ f"
⚠ Warning: The selected structure is large, with {num_sites} atoms "
+ f"and a volume of {int(volume)} Å
3, "
+ "making it computationally demanding "
+ "to run at the localhost. Consider the following: "
+ if on_localhost
+ else "to run in a reasonable amount of time. Consider the following: "
+ )
+ # Warning 1: on localhost, big system and few cpus
+ alert_message += (
+ f"{warnings_1_2}
"
+ + suggestions["more_resources"]
+ + suggestions["change_configuration"]
+ + "
"
+ if on_localhost
+ else f"{warnings_1_2}
"
+ + suggestions["go_remote"]
+ + suggestions["more_resources"]
+ + suggestions["change_configuration"]
+ + "
"
+ )
+ if on_localhost and num_cpus / localhost_cpus > 0.8:
+ # Warning-3: on localhost, more than half of the available cpus
+ alert_message += (
+ "
⚠ Warning: the selected pw.x code will run locally, but "
+ f"the number of requested CPUs ({num_cpus}) is larger than the 80% of the available resources ({localhost_cpus}). "
+ "Please be sure that your local "
+ "environment has enough free CPUs for the calculation. Consider the following: "
+ "
"
+ + suggestions["avoid_overloading"]
+ + suggestions["go_remote"]
+ + "
"
+ )
+
+ self.submission_warning_messages = (
+ ""
+ if (on_localhost and num_cpus / localhost_cpus) <= 0.8
+ and (not large_system or estimated_CPUs <= num_cpus)
+ else self._ALERT_MESSAGE.format(
+ alert_class="warning",
+ message=alert_message,
+ )
+ )
+
def update_active_codes(self):
for name, code in self.get_codes(flat=True):
if name != "pw":
@@ -278,3 +378,49 @@ def _check_submission_blockers(self):
yield (
f"Error: hi, plugin developer, please use the QEAppComputationalResourcesWidget from aiidalab_qe.common.widgets for code {name}."
)
+
+ def _estimate_min_cpus(
+ self,
+ n,
+ v,
+ n0=9,
+ v0=117,
+ num_cpus0=4,
+ t0=129.6,
+ tmax=12 * 60 * 60,
+ scf_cycles=5,
+ ):
+ """Estimate the minimum number of CPUs required to
+ complete a task within a given time limit.
+
+ Parameters
+ ----------
+ `n` : `int`
+ The number of atoms in the system.
+ `v` : `float`
+ The volume of the system.
+ `n0` : `int`, optional
+ Reference number of atoms. Default is 9.
+ `v0` : `float`, optional
+ Reference volume. Default is 117.
+ `num_cpus0` : `int`, optional
+ Reference number of CPUs. Default is 4.
+ `t0` : `float`, optional
+ Reference time. Default is 129.6.
+ `tmax` : `float`, optional
+ Maximum time limit. Default is 12 hours.
+ `scf_cycles` : `int`, optional
+ Reference number of SCF cycles in a relaxation. Default is 5.
+
+ Returns
+ -------
+ `int`
+ The estimated minimum number of CPUs required.
+ """
+ import numpy as np
+
+ return int(
+ np.ceil(
+ scf_cycles * num_cpus0 * (n / n0) ** 3 * (v / v0) ** 1.5 * t0 / tmax
+ )
+ )
diff --git a/tests/conftest.py b/tests/conftest.py
index 64ac89d0b..851c96d46 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -117,6 +117,13 @@ def _generate_structure_data(name="silicon", pbc=(True, True, True)):
structure.append_atom(position=(0.0, 0.0, 1.0), symbols="O")
structure.append_atom(position=(0.0, 1.0, 0.0), symbols="H")
+ elif name == "H2O-larger":
+ cell = [[20.0, 0.0, 0.0], [0.0, 20.0, 0.0], [0.0, 0.0, 20.0]]
+ structure = orm.StructureData(cell=cell)
+ structure.append_atom(position=(0.0, 0.0, 0.0), symbols="H")
+ structure.append_atom(position=(0.0, 0.0, 1.0), symbols="O")
+ structure.append_atom(position=(0.0, 1.0, 0.0), symbols="H")
+
structure.pbc = pbc
return structure
diff --git a/tests/test_submit_qe_workchain.py b/tests/test_submit_qe_workchain.py
index 1a5955691..cfa6afe57 100644
--- a/tests/test_submit_qe_workchain.py
+++ b/tests/test_submit_qe_workchain.py
@@ -129,6 +129,55 @@ def test_create_builder_advanced_settings(
)
+@pytest.mark.usefixtures("aiida_profile_clean", "sssp")
+def test_warning_messages(
+ generate_structure_data,
+ submit_app_generator,
+):
+ """Test the creation of the warning messages.
+
+ For now, we test that the suggestions are indeed there.
+ We should check the whole message, but this is for now not easy to do: the message is built
+ on the fly with variables which are not accessible in this namespace.
+ """
+ import os
+
+ suggestions = {
+ "more_resources": "Increase the resources",
+ "change_configuration": "Review the configuration",
+ "go_remote": "Select a code that runs on a larger machine",
+ "avoid_overloading": "Reduce the number of CPUs to avoid the overloading of the local machine",
+ }
+
+ app: App = submit_app_generator(properties=["bands", "pdos"])
+ submit_step = app.submit_step
+ submit_model = app.submit_model
+
+ pw_code = submit_model.get_code("dft", "pw").get_setup_widget()
+ pw_code.num_cpus.value = 1
+ submit_model.check_resources()
+ # no warning:
+ assert submit_step.submission_warning_messages.value == ""
+
+ # now we increase the resources, so we should have the Warning-3
+ pw_code.num_cpus.value = len(os.sched_getaffinity(0))
+ submit_model.check_resources()
+ for suggestion in ["avoid_overloading", "go_remote"]:
+ assert suggestions[suggestion] in submit_step.submission_warning_messages.value
+
+ # now we use a large structure, so we should have the Warning-1 (and 2 if not on localhost)
+ structure = generate_structure_data("H2O-larger")
+ submit_model.input_structure = structure
+ pw_code.num_cpus.value = 1
+ submit_model.check_resources()
+ num_sites = len(structure.sites)
+ volume = structure.get_cell_volume()
+ estimated_CPUs = submit_model._estimate_min_cpus(num_sites, volume)
+ assert estimated_CPUs == 2
+ for suggestion in ["more_resources", "change_configuration"]:
+ assert suggestions[suggestion] in submit_step.submission_warning_messages.value
+
+
def builder_to_readable_dict(builder):
"""transverse the builder and return a dictionary with readable values."""
from aiida import orm