Skip to content

Commit

Permalink
PwBaseWorkChain: Improve restart and validate inputs (#722)
Browse files Browse the repository at this point in the history
Improve the code regarding restarting in the `PwBaseWorkChain` in several ways:

* Remove some of the logic in the `PwBaseWorkChain` regarding restarting
from a previous calculation using a `RemoteData` provided to the
`pw.parent_folder` input. The current logic expected the `RemoteData` to
have a `PwCalculation` creator, which is not always the case. Moreover, the
`restart_mode` chosen by the user was overriden, which means that e.g.
restarting from _only_ the charge density with `startingpot` was not possible.
* For users who want to restart in the first `PwCalculation`, the inputs are
now validated to make sure that they are sensible. In case the calculation will
still run correctly but the inputs are not consistent, a warning is raised
during the validation. In case the inputs lead to failed calculation, an error
is raised.
* For restarts made by the `PwBaseWorkChain`, the restart logic is gathered
inside the `set_restart_type` method. A new `Enum`, `RestartType` is added for
the different modes of restarting. Each of the error handlers is updated to
use this new method.
* Only for the `sanity_check_insufficient_bands` error handler, the restart
method is changed to restart from the charge density.

Finally, the `validate_parameters` step in the outline of the `PwBaseWorkChain`
is merged into the `setup` step, since no more validation is performed and the
other code in this step is more at home in the `setup` step.
  • Loading branch information
mbercx authored Sep 21, 2021
1 parent a2a109b commit cb32be5
Show file tree
Hide file tree
Showing 8 changed files with 279 additions and 92 deletions.
34 changes: 34 additions & 0 deletions aiida_quantumespresso/calculations/pw.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
"""`CalcJob` implementation for the pw.x code of Quantum ESPRESSO."""
import os
import warnings

from aiida import orm
from aiida.common.lang import classproperty
Expand Down Expand Up @@ -69,6 +70,8 @@ def define(cls, spec):
help='kpoint mesh or kpoint path')
spec.input('hubbard_file', valid_type=orm.SinglefileData, required=False,
help='SinglefileData node containing the output Hubbard parameters from a HpCalculation')
spec.inputs.validator = cls.validate_inputs

spec.output('output_parameters', valid_type=orm.Dict,
help='The `output_parameters` output node of the successful calculation.')
spec.output('output_structure', valid_type=orm.StructureData, required=False,
Expand Down Expand Up @@ -152,6 +155,37 @@ def define(cls, spec):
'is `False` and/or `electron_maxstep` is 0.')
# yapf: enable

@staticmethod
def validate_inputs(value, _):
"""Validate the top level namespace.
1. Check that the restart input parameters are set correctly. In case of 'nscf' and 'bands' calculations, this
means that ``parent_folder`` is provided, ``startingpot`` is set to 'file' and ``restart_mode`` is
'from_scratch'. For other calculations, if the ``parent_folder`` is provided, the restart settings must be set
to use some of the outputs.
"""
parameters = value['parameters'].get_dict()
calculation_type = parameters.get('CONTROL', {}).get('calculation', 'scf')

# Check that the restart input parameters are set correctly
if calculation_type in ('nscf', 'bands'):
if 'parent_folder' not in value:
return f'`parent_folder` not provided for `{calculation_type}` calculation.'
if parameters.get('ELECTRONS', {}).get('startingpot', 'file') != 'file':
return f'`startingpot` should be set to `file` for a `{calculation_type}` calculation.'
if parameters.get('CONTROL', {}).get('restart_mode', 'from_scratch') != 'from_scratch':
warnings.warn(f'`restart_mode` should be set to `from_scratch` for a `{calculation_type}` calculation.')
elif 'parent_folder' in value:
if not any([
parameters.get('CONTROL', {}).get('restart_mode', None) == 'restart',
parameters.get('ELECTRONS', {}).get('startingpot', None) == 'file',
parameters.get('ELECTRONS', {}).get('startingwfc', None) == 'file'
]):
warnings.warn(
'`parent_folder` input was provided for the `PwCalculation`, but no '
'input parameters are set to restart from these files.'
)

@classproperty
def filename_input_hubbard_parameters(cls):
"""Return the relative file name of the file containing the Hubbard parameters.
Expand Down
9 changes: 9 additions & 0 deletions aiida_quantumespresso/common/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,12 @@ class SpinType(enum.Enum):
COLLINEAR = 'collinear'
NON_COLLINEAR = 'non_collinear'
SPIN_ORBIT = 'spin_orbit'


class RestartType(enum.Enum):
"""Enumeration of ways to restart a calculation in Quantum ESPRESSO."""

FULL = 'full'
FROM_SCRATCH = 'from_scratch'
FROM_CHARGE_DENSITY = 'from_charge_density'
FROM_WAVE_FUNCTIONS = 'from_wave_functions'
119 changes: 73 additions & 46 deletions aiida_quantumespresso/workflows/pw/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from aiida.plugins import CalculationFactory, GroupFactory

from aiida_quantumespresso.calculations.functions.create_kpoints_from_distance import create_kpoints_from_distance
from aiida_quantumespresso.common.types import ElectronicType, SpinType
from aiida_quantumespresso.common.types import ElectronicType, SpinType, RestartType
from aiida_quantumespresso.utils.defaults.calculation import pw as qe_defaults
from aiida_quantumespresso.utils.mapping import update_mapping, prepare_process_inputs
from aiida_quantumespresso.utils.pseudopotential import validate_and_prepare_pseudos_inputs
Expand Down Expand Up @@ -73,7 +73,6 @@ def define(cls, spec):

spec.outline(
cls.setup,
cls.validate_parameters,
cls.validate_kpoints,
cls.validate_pseudos,
if_(cls.should_run_init)(
Expand Down Expand Up @@ -230,30 +229,25 @@ def get_builder_from_protocol(
return builder

def setup(self):
"""Call the `setup` of the `BaseRestartWorkChain` and then create the inputs dictionary in `self.ctx.inputs`.
"""Call the ``setup`` of the ``BaseRestartWorkChain`` and create the inputs dictionary in ``self.ctx.inputs``.
This `self.ctx.inputs` dictionary will be used by the `BaseRestartWorkChain` to submit the calculations in the
internal loop.
This ``self.ctx.inputs`` dictionary will be used by the ``BaseRestartWorkChain`` to submit the calculations
in the internal loop.
The ``parameters`` and ``settings`` input ``Dict`` nodes are converted into a regular dictionary and the
default namelists for the ``parameters`` are set to empty dictionaries if not specified.
"""
super().setup()
self.ctx.restart_calc = None
self.ctx.inputs = AttributeDict(self.exposed_inputs(PwCalculation, 'pw'))

def validate_parameters(self):
"""Validate inputs that might depend on each other and cannot be validated by the spec.
Also define dictionary `inputs` in the context, that will contain the inputs for the calculation that will be
launched in the `run_calculation` step.
"""
self.ctx.inputs.parameters = self.ctx.inputs.parameters.get_dict()
self.ctx.inputs.settings = self.ctx.inputs.settings.get_dict() if 'settings' in self.ctx.inputs else {}

if 'parent_folder' in self.ctx.inputs:
self.ctx.restart_calc = self.ctx.inputs.parent_folder.creator

self.ctx.inputs.parameters.setdefault('CONTROL', {})
self.ctx.inputs.parameters.setdefault('ELECTRONS', {})
self.ctx.inputs.parameters.setdefault('SYSTEM', {})
self.ctx.inputs.parameters['CONTROL'].setdefault('calculation', 'scf')

self.ctx.inputs.settings = self.ctx.inputs.settings.get_dict() if 'settings' in self.ctx.inputs else {}

def validate_kpoints(self):
"""Validate the inputs related to k-points.
Expand Down Expand Up @@ -305,6 +299,36 @@ def set_max_seconds(self, max_wallclock_seconds):
max_seconds = max_wallclock_seconds * max_seconds_factor
self.ctx.inputs.parameters['CONTROL']['max_seconds'] = max_seconds

def set_restart_type(self, restart_type, parent_folder=None):
"""Set the restart type for the next iteration."""

if parent_folder is None and restart_type != RestartType.FROM_SCRATCH:
raise ValueError('When not restarting from scratch, a `parent_folder` must be provided.')

if restart_type == RestartType.FROM_SCRATCH:
self.ctx.inputs.parameters['CONTROL']['restart_mode'] = 'from_scratch'
self.ctx.inputs.parameters['ELECTRONS'].pop('startingpot', None)
self.ctx.inputs.parameters['ELECTRONS'].pop('startingwfc', None)
self.ctx.inputs.pop('parent_folder', None)

elif restart_type == RestartType.FULL:
self.ctx.inputs.parameters['CONTROL']['restart_mode'] = 'restart'
self.ctx.inputs.parameters['ELECTRONS'].pop('startingpot', None)
self.ctx.inputs.parameters['ELECTRONS'].pop('startingwfc', None)
self.ctx.inputs.parent_folder = parent_folder

elif restart_type == RestartType.FROM_CHARGE_DENSITY:
self.ctx.inputs.parameters['CONTROL']['restart_mode'] = 'from_scratch'
self.ctx.inputs.parameters['ELECTRONS']['startingpot'] = 'file'
self.ctx.inputs.parameters['ELECTRONS'].pop('startingwfc', None)
self.ctx.inputs.parent_folder = parent_folder

elif restart_type == RestartType.FROM_WAVE_FUNCTIONS:
self.ctx.inputs.parameters['CONTROL']['restart_mode'] = 'from_scratch'
self.ctx.inputs.parameters['ELECTRONS'].pop('startingpot', None)
self.ctx.inputs.parameters['ELECTRONS']['startingwfc'] = 'file'
self.ctx.inputs.parent_folder = parent_folder

def should_run_init(self):
"""Return whether an initialization calculation should be run.
Expand Down Expand Up @@ -407,24 +431,12 @@ def inspect_init(self):
return

def prepare_process(self):
"""Prepare the inputs for the next calculation.
If a `restart_calc` has been set in the context, its `remote_folder` will be used as the `parent_folder` input
for the next calculation and the `restart_mode` is set to `restart`. Otherwise, no `parent_folder` is used and
`restart_mode` is set to `from_scratch`.
"""
"""Prepare the inputs for the next calculation."""
max_wallclock_seconds = self.ctx.inputs.metadata.options.get('max_wallclock_seconds', None)

if max_wallclock_seconds is not None and 'max_seconds' not in self.ctx.inputs.parameters['CONTROL']:
self.set_max_seconds(max_wallclock_seconds)

if self.ctx.restart_calc:
self.ctx.inputs.parameters['CONTROL']['restart_mode'] = 'restart'
self.ctx.inputs.parent_folder = self.ctx.restart_calc.outputs.remote_folder
else:
self.ctx.inputs.parameters['CONTROL']['restart_mode'] = 'from_scratch'
self.ctx.inputs.pop('parent_folder', None)

def report_error_handled(self, calculation, action):
"""Report an action taken for a calculation that has failed.
Expand All @@ -443,7 +455,8 @@ def sanity_check_insufficient_bands(self, calculation):
Verify that the occupation of the last band is below a certain threshold, unless `occupations` was explicitly
set to `fixed` in the input parameters. If this is violated, the calculation used too few bands and cannot be
trusted. The number of bands is increased and the calculation is restarted, starting from the last.
trusted. The number of bands is increased and the calculation is restarted, using the charge density from the
previous calculation.
"""
from aiida_quantumespresso.utils.bands import get_highest_occupied_band

Expand Down Expand Up @@ -476,10 +489,14 @@ def sanity_check_insufficient_bands(self, calculation):

nbnd_cur = calculation.outputs.output_parameters.get_dict()['number_of_bands']
nbnd_new = nbnd_cur + max(int(nbnd_cur * self.defaults.delta_factor_nbnd), self.defaults.delta_minimum_nbnd)
self.ctx.inputs.parameters['SYSTEM']['nbnd'] = nbnd_new

self.ctx.inputs.parameters.setdefault('SYSTEM', {})['nbnd'] = nbnd_new
self.set_restart_type(RestartType.FROM_CHARGE_DENSITY, calculation.outputs.remote_folder)
self.report(
f'Action taken: increased number of bands to {nbnd_new} and restarting from the previous charge '
'density.'
)

self.report(f'Action taken: increased number of bands to {nbnd_new} and restarting from scratch')
return ProcessHandlerReport(True)

@process_handler(priority=600)
Expand All @@ -504,14 +521,20 @@ def handle_known_unrecoverable_failure(self, calculation):
PwCalculation.exit_codes.ERROR_OUT_OF_WALLTIME,
])
def handle_out_of_walltime(self, calculation):
"""Handle `ERROR_OUT_OF_WALLTIME` exit code: calculation shut down neatly and we can simply restart."""
"""Handle `ERROR_OUT_OF_WALLTIME` exit code.
In this case the calculation shut down neatly and we can simply restart. We consider two cases:
1. If the structure is unchanged, we do a full restart.
2. If the structure has changed during the calculation, we restart from scratch.
"""
try:
self.ctx.inputs.structure = calculation.outputs.output_structure
except exceptions.NotExistent:
self.ctx.restart_calc = calculation
self.set_restart_type(RestartType.FULL, calculation.outputs.remote_folder)
self.report_error_handled(calculation, 'simply restart from the last calculation')
else:
self.ctx.restart_calc = None
self.set_restart_type(RestartType.FROM_SCRATCH)
self.report_error_handled(calculation, 'out of walltime: structure changed so restarting from scratch')

return ProcessHandlerReport(True)
Expand All @@ -527,7 +550,6 @@ def handle_vcrelax_converged_except_final_scf(self, calculation):
Convergence reached in `vc-relax` except thresholds exceeded in final scf: consider as converged.
"""
self.ctx.is_finished = True
self.ctx.restart_calc = calculation
action = 'ionic convergence thresholds met except in final scf: consider structure relaxed.'
self.report_error_handled(calculation, action)
self.results() # Call the results method to attach the output nodes
Expand All @@ -548,9 +570,10 @@ def handle_relax_recoverable_ionic_convergence_error(self, calculation):
These exit codes signify that the ionic convergence thresholds were not met, but the output structure is usable,
so the solution is to simply restart from scratch but from the output structure.
"""
self.ctx.restart_calc = None
self.ctx.inputs.structure = calculation.outputs.output_structure
action = 'no ionic convergence but clean shutdown: restarting from scratch but using output structure.'

self.set_restart_type(RestartType.FROM_SCRATCH)
self.report_error_handled(calculation, action)
return ProcessHandlerReport(True)

Expand All @@ -565,33 +588,38 @@ def handle_relax_recoverable_electronic_convergence_error(self, calculation):
"""Handle various exit codes for recoverable `relax` calculations with failed electronic convergence.
These exit codes signify that the electronic convergence thresholds were not met, but the output structure is
usable, so the solution is to simply restart from scratch but from the output structure.
usable, so the solution is to simply restart from scratch but from the output structure and with a reduced
``mixing_beta``.
"""
factor = self.defaults.delta_factor_mixing_beta
mixing_beta = self.ctx.inputs.parameters.get('ELECTRONS', {}).get('mixing_beta', self.defaults.qe.mixing_beta)
mixing_beta_new = mixing_beta * factor

self.ctx.restart_calc = None
self.ctx.inputs.parameters.setdefault('ELECTRONS', {})['mixing_beta'] = mixing_beta_new
self.ctx.inputs.parameters['ELECTRONS']['mixing_beta'] = mixing_beta_new
self.ctx.inputs.structure = calculation.outputs.output_structure
action = 'no electronic convergence but clean shutdown: reduced beta mixing from {} to {} restarting from ' \
'scratch but using output structure.'.format(mixing_beta, mixing_beta_new)

self.set_restart_type(RestartType.FROM_SCRATCH)
self.report_error_handled(calculation, action)
return ProcessHandlerReport(True)

@process_handler(priority=410, exit_codes=[
PwCalculation.exit_codes.ERROR_ELECTRONIC_CONVERGENCE_NOT_REACHED,
])
def handle_electronic_convergence_not_achieved(self, calculation):
"""Handle `ERROR_ELECTRONIC_CONVERGENCE_NOT_REACHED`: decrease the mixing beta and restart from scratch."""
"""Handle `ERROR_ELECTRONIC_CONVERGENCE_NOT_REACHED` error.
Decrease the mixing beta and fully restart from the previous calculation.
"""
factor = self.defaults.delta_factor_mixing_beta
mixing_beta = self.ctx.inputs.parameters.get('ELECTRONS', {}).get('mixing_beta', self.defaults.qe.mixing_beta)
mixing_beta_new = mixing_beta * factor

self.ctx.restart_calc = None
self.ctx.inputs.parameters.setdefault('ELECTRONS', {})['mixing_beta'] = mixing_beta_new

self.ctx.inputs.parameters['ELECTRONS']['mixing_beta'] = mixing_beta_new
action = f'reduced beta mixing from {mixing_beta} to {mixing_beta_new} and restarting from the last calculation'

self.set_restart_type(RestartType.FULL, calculation.outputs.remote_folder)
self.report_error_handled(calculation, action)
return ProcessHandlerReport(True)

Expand All @@ -601,7 +629,6 @@ def handle_electronic_convergence_not_achieved(self, calculation):
def handle_electronic_convergence_warning(self, calculation):
"""Handle `WARNING_ELECTRONIC_CONVERGENCE_NOT_REACHED': consider finished."""
self.ctx.is_finished = True
self.ctx.restart_calc = calculation
action = 'electronic convergence not reached but inputs say this is ok: consider finished.'
self.report_error_handled(calculation, action)
self.results() # Call the results method to attach the output nodes
Expand Down
Loading

0 comments on commit cb32be5

Please sign in to comment.