Skip to content

Commit

Permalink
Add extra TemplateCodeInfo class to hold codeinfo for JobTemplate
Browse files Browse the repository at this point in the history
In the current design, the code_info of calc_job is read from the
code setup and plugin then pass to the job template to create
the bash script. However, job template needs more flexibility to
control the different part of script runline where currently all
the part
  - exec_name from code uuid,
  - code_info.cmdline_params,
  - mpi parameters from computer setting

are stacked together to the job template's code_info. In this PR, the class
`TemplateCodeInfo` is created to handle the elements, where the `code_uuid`
and `withmpi` fields are not used in job script generation.
The code_info of JobTemplate and of `CalcJob` are decoupled
from each other and lead to more flexibility.
  • Loading branch information
unkcpz committed Mar 11, 2022
1 parent 5b10cd3 commit a8348d6
Show file tree
Hide file tree
Showing 8 changed files with 163 additions and 126 deletions.
26 changes: 19 additions & 7 deletions aiida/engine/processes/calcjobs/calcjob.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# For further information please visit http://www.aiida.net #
###########################################################################
"""Implementation of the CalcJob process."""
import dataclasses
import io
import json
import os
Expand Down Expand Up @@ -587,7 +588,7 @@ def presubmit(self, folder: Folder) -> CalcInfo:
from aiida.common.exceptions import InputValidationError, InvalidOperation, PluginInternalError, ValidationError
from aiida.common.utils import validate_list_of_string_tuples
from aiida.orm import Code, Computer, load_node
from aiida.schedulers.datastructures import JobTemplate
from aiida.schedulers.datastructures import JobTemplate, JobTemplateCodeInfo

inputs = self.node.get_incoming(link_type=LinkType.INPUT_CALC)

Expand Down Expand Up @@ -682,7 +683,7 @@ def presubmit(self, folder: Folder) -> CalcInfo:
if not isinstance(calc_info.codes_info, (list, tuple)):
raise PluginInternalError('codes_info passed to CalcInfo must be a list of CalcInfo objects')

codes_info = []
tmpl_codes_info = []
for code_info in calc_info.codes_info:

if not isinstance(code_info, CodeInfo):
Expand Down Expand Up @@ -713,11 +714,15 @@ def presubmit(self, folder: Folder) -> CalcInfo:
this_argv = [this_code.get_execname()
] + (code_info.cmdline_params if code_info.cmdline_params is not None else [])

# overwrite the old cmdline_params and add codename and mpirun stuff
code_info.cmdline_params = this_argv
tmpl_code_info = JobTemplateCodeInfo()
tmpl_code_info.cmdline_params = this_argv
tmpl_code_info.stdin_name = code_info.stdin_name
tmpl_code_info.stdout_name = code_info.stdout_name
tmpl_code_info.stderr_name = code_info.stderr_name
tmpl_code_info.join_files = code_info.join_files

codes_info.append(code_info)
job_tmpl.codes_info = codes_info
tmpl_codes_info.append(tmpl_code_info)
job_tmpl.codes_info = tmpl_codes_info

# set the codes execution mode, default set to `SERIAL`
codes_run_mode = CodeRunMode.SERIAL
Expand Down Expand Up @@ -759,8 +764,15 @@ def presubmit(self, folder: Folder) -> CalcInfo:
script_content = scheduler.get_submit_script(job_tmpl)
folder.create_file_from_filelike(io.StringIO(script_content), submit_script_filename, 'w', encoding='utf8')

def encoder(obj):
if dataclasses.is_dataclass(obj):
return dataclasses.asdict(obj)
raise TypeError(f' {obj!r} is not JSON serializable')

subfolder = folder.get_subfolder('.aiida', create=True)
subfolder.create_file_from_filelike(io.StringIO(json.dumps(job_tmpl)), 'job_tmpl.json', 'w', encoding='utf8')
subfolder.create_file_from_filelike(
io.StringIO(json.dumps(job_tmpl, default=encoder)), 'job_tmpl.json', 'w', encoding='utf8'
)
subfolder.create_file_from_filelike(io.StringIO(json.dumps(calc_info)), 'calcinfo.json', 'w', encoding='utf8')

if calc_info.local_copy_list is None:
Expand Down
26 changes: 25 additions & 1 deletion aiida/schedulers/datastructures.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
the data structure that is returned when querying for jobs in the scheduler
(JobInfo).
"""
from __future__ import annotations

import abc
from dataclasses import dataclass, field
import enum
import json

Expand Down Expand Up @@ -298,7 +301,7 @@ class JobTemplate(DefaultFieldsAttributeDict): # pylint: disable=too-many-insta
* ``append_text``: a (possibly multi-line) string to be inserted
in the scheduler script after the main execution line
* ``import_sys_environment``: import the system environment variables
* ``codes_info``: a list of aiida.common.datastructures.CalcInfo objects.
* ``codes_info``: a list of aiida.scheduler.datastructures.JobTemplateCodeInfo objects.
Each contains the information necessary to run a single code. At the
moment, it can contain:
Expand Down Expand Up @@ -357,6 +360,27 @@ class JobTemplate(DefaultFieldsAttributeDict): # pylint: disable=too-many-insta
)


@dataclass
class JobTemplateCodeInfo:
"""
Data structure to communicate to a `Scheduler` how a code should be run in submit script.
`Scheduler.get_submit_script` will pass a list of these objects to `Scheduler._get_run_line` which
should build up the code execution line based on the parameters specified in this dataclass.
:param cmdline_params: list of unescaped command line parameters.
:param stdin_name: filename of the the stdin file descriptor.
:param stdout_name: filename of the the `stdout` file descriptor.
:param stderr_name: filename of the the `stderr` file descriptor.
:param join_files: boolean, if true, `stderr` should be redirected to `stdout`.
"""
cmdline_params: list[str] = field(default_factory=list)
stdin_name: None | str = None
stdout_name: None | str = None
stderr_name: None | str = None
join_files: bool = False


class MachineInfo(DefaultFieldsAttributeDict):
"""
Similarly to what is defined in the DRMAA v.2 as SlotInfo; this identifies
Expand Down
7 changes: 4 additions & 3 deletions aiida/schedulers/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,9 +206,10 @@ def _get_submit_script_footer(self, job_tmpl):
def _get_run_line(self, codes_info, codes_run_mode):
"""Return a string with the line to execute a specific code with specific arguments.
:parameter codes_info: a list of `aiida.common.datastructures.CodeInfo` objects. Each contains the information
needed to run the code. I.e. `cmdline_params`, `stdin_name`, `stdout_name`, `stderr_name`, `join_files`. See
the documentation of `JobTemplate` and `CodeInfo`.
:parameter codes_info: a list of `aiida.scheduler.datastructures.JobTemplateCodeInfo` objects.
Each contains the information needed to run the code. I.e. `cmdline_params`, `stdin_name`,
`stdout_name`, `stderr_name`, `join_files`. See
the documentation of `JobTemplate` and `JobTemplateCodeInfo`.
:parameter codes_run_mode: instance of `aiida.common.datastructures.CodeRunMode` contains the information on how
to launch the multiple codes.
:return: string with format: [executable] [args] {[ < stdin ]} {[ < stdout ]} {[2>&1 | 2> stderr]}
Expand Down
10 changes: 5 additions & 5 deletions tests/schedulers/test_direct.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
"""Tests for the ``DirectScheduler`` plugin."""
import pytest

from aiida.common.datastructures import CodeInfo, CodeRunMode
from aiida.common.datastructures import CodeRunMode
from aiida.schedulers import SchedulerError
from aiida.schedulers.datastructures import JobTemplate
from aiida.schedulers.datastructures import JobTemplate, JobTemplateCodeInfo
from aiida.schedulers.plugins.direct import DirectScheduler


Expand All @@ -26,11 +26,11 @@ def scheduler():
@pytest.fixture
def template():
"""Return an instance of the ``JobTemplate`` with some required presets."""
code_info = CodeInfo()
code_info.cmdline_params = []
tmpl_code_info = JobTemplateCodeInfo()
tmpl_code_info.cmdline_params = []

template = JobTemplate()
template.codes_info = [code_info]
template.codes_info = [tmpl_code_info]
template.codes_run_mode = CodeRunMode.SERIAL

return template
Expand Down
22 changes: 11 additions & 11 deletions tests/schedulers/test_lsf.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ def test_parse_common_joblist_output():

def test_submit_script():
"""Test the creation of a simple submission script"""
from aiida.common.datastructures import CodeInfo, CodeRunMode
from aiida.schedulers.datastructures import JobTemplate
from aiida.common.datastructures import CodeRunMode
from aiida.schedulers.datastructures import JobTemplate, JobTemplateCodeInfo

scheduler = LsfScheduler()

Expand All @@ -122,10 +122,10 @@ def test_submit_script():
job_tmpl.uuid = str(uuid.uuid4())
job_tmpl.job_resource = scheduler.create_job_resource(tot_num_mpiprocs=2, parallel_env='b681e480bd.cern.ch')
job_tmpl.max_wallclock_seconds = 24 * 3600
code_info = CodeInfo()
code_info.cmdline_params = ['mpirun', '-np', '2', 'pw.x', '-npool', '1']
code_info.stdin_name = 'aiida.in'
job_tmpl.codes_info = [code_info]
tmpl_code_info = JobTemplateCodeInfo()
tmpl_code_info.cmdline_params = ['mpirun', '-np', '2', 'pw.x', '-npool', '1']
tmpl_code_info.stdin_name = 'aiida.in'
job_tmpl.codes_info = [tmpl_code_info]
job_tmpl.codes_run_mode = CodeRunMode.SERIAL
job_tmpl.account = 'account_id'

Expand All @@ -142,16 +142,16 @@ def test_submit_script():

def test_submit_script_rerunnable():
"""Test the `rerunnable` option of the submit script."""
from aiida.common.datastructures import CodeInfo, CodeRunMode
from aiida.schedulers.datastructures import JobTemplate
from aiida.common.datastructures import CodeRunMode
from aiida.schedulers.datastructures import JobTemplate, JobTemplateCodeInfo

scheduler = LsfScheduler()

job_tmpl = JobTemplate()
job_tmpl.job_resource = scheduler.create_job_resource(tot_num_mpiprocs=2, parallel_env='b681e480bd.cern.ch')
code_info = CodeInfo()
code_info.cmdline_params = []
job_tmpl.codes_info = [code_info]
tmpl_code_info = JobTemplateCodeInfo()
tmpl_code_info.cmdline_params = []
job_tmpl.codes_info = [tmpl_code_info]
job_tmpl.codes_run_mode = CodeRunMode.SERIAL

job_tmpl.rerunnable = True
Expand Down
70 changes: 35 additions & 35 deletions tests/schedulers/test_pbspro.py
Original file line number Diff line number Diff line change
Expand Up @@ -896,8 +896,8 @@ def test_submit_script(self):
"""
Test to verify if scripts works fine with default options
"""
from aiida.common.datastructures import CodeInfo, CodeRunMode
from aiida.schedulers.datastructures import JobTemplate
from aiida.common.datastructures import CodeRunMode
from aiida.schedulers.datastructures import JobTemplate, JobTemplateCodeInfo

scheduler = PbsproScheduler()

Expand All @@ -906,10 +906,10 @@ def test_submit_script(self):
job_tmpl.job_resource = scheduler.create_job_resource(num_machines=1, num_mpiprocs_per_machine=1)
job_tmpl.uuid = str(uuid.uuid4())
job_tmpl.max_wallclock_seconds = 24 * 3600
code_info = CodeInfo()
code_info.cmdline_params = ['mpirun', '-np', '23', 'pw.x', '-npool', '1']
code_info.stdin_name = 'aiida.in'
job_tmpl.codes_info = [code_info]
tmpl_code_info = JobTemplateCodeInfo()
tmpl_code_info.cmdline_params = ['mpirun', '-np', '23', 'pw.x', '-npool', '1']
tmpl_code_info.stdin_name = 'aiida.in'
job_tmpl.codes_info = [tmpl_code_info]
job_tmpl.codes_run_mode = CodeRunMode.SERIAL

submit_script_text = scheduler.get_submit_script(job_tmpl)
Expand All @@ -924,13 +924,13 @@ def test_submit_script_bad_shebang(self):
"""
Test to verify if scripts works fine with default options
"""
from aiida.common.datastructures import CodeInfo, CodeRunMode
from aiida.schedulers.datastructures import JobTemplate
from aiida.common.datastructures import CodeRunMode
from aiida.schedulers.datastructures import JobTemplate, JobTemplateCodeInfo

scheduler = PbsproScheduler()
code_info = CodeInfo()
code_info.cmdline_params = ['mpirun', '-np', '23', 'pw.x', '-npool', '1']
code_info.stdin_name = 'aiida.in'
tmpl_code_info = JobTemplateCodeInfo()
tmpl_code_info.cmdline_params = ['mpirun', '-np', '23', 'pw.x', '-npool', '1']
tmpl_code_info.stdin_name = 'aiida.in'

for (shebang, expected_first_line) in ((None, '#!/bin/bash'), ('', ''), ('NOSET', '#!/bin/bash')):
job_tmpl = JobTemplate()
Expand All @@ -939,7 +939,7 @@ def test_submit_script_bad_shebang(self):
else:
job_tmpl.shebang = shebang
job_tmpl.job_resource = scheduler.create_job_resource(num_machines=1, num_mpiprocs_per_machine=1)
job_tmpl.codes_info = [code_info]
job_tmpl.codes_info = [tmpl_code_info]
job_tmpl.codes_run_mode = CodeRunMode.SERIAL

submit_script_text = scheduler.get_submit_script(job_tmpl)
Expand All @@ -952,8 +952,8 @@ def test_submit_script_with_num_cores_per_machine(self):
Test to verify if script works fine if we specify only
num_cores_per_machine value.
"""
from aiida.common.datastructures import CodeInfo, CodeRunMode
from aiida.schedulers.datastructures import JobTemplate
from aiida.common.datastructures import CodeRunMode
from aiida.schedulers.datastructures import JobTemplate, JobTemplateCodeInfo

scheduler = PbsproScheduler()

Expand All @@ -964,10 +964,10 @@ def test_submit_script_with_num_cores_per_machine(self):
)
job_tmpl.uuid = str(uuid.uuid4())
job_tmpl.max_wallclock_seconds = 24 * 3600
code_info = CodeInfo()
code_info.cmdline_params = ['mpirun', '-np', '23', 'pw.x', '-npool', '1']
code_info.stdin_name = 'aiida.in'
job_tmpl.codes_info = [code_info]
tmpl_code_info = JobTemplateCodeInfo()
tmpl_code_info.cmdline_params = ['mpirun', '-np', '23', 'pw.x', '-npool', '1']
tmpl_code_info.stdin_name = 'aiida.in'
job_tmpl.codes_info = [tmpl_code_info]
job_tmpl.codes_run_mode = CodeRunMode.SERIAL

submit_script_text = scheduler.get_submit_script(job_tmpl)
Expand All @@ -985,8 +985,8 @@ def test_submit_script_with_num_cores_per_mpiproc(self):
Test to verify if scripts works fine if we pass only
num_cores_per_mpiproc value
"""
from aiida.common.datastructures import CodeInfo, CodeRunMode
from aiida.schedulers.datastructures import JobTemplate
from aiida.common.datastructures import CodeRunMode
from aiida.schedulers.datastructures import JobTemplate, JobTemplateCodeInfo

scheduler = PbsproScheduler()

Expand All @@ -997,10 +997,10 @@ def test_submit_script_with_num_cores_per_mpiproc(self):
)
job_tmpl.uuid = str(uuid.uuid4())
job_tmpl.max_wallclock_seconds = 24 * 3600
code_info = CodeInfo()
code_info.cmdline_params = ['mpirun', '-np', '23', 'pw.x', '-npool', '1']
code_info.stdin_name = 'aiida.in'
job_tmpl.codes_info = [code_info]
tmpl_code_info = JobTemplateCodeInfo()
tmpl_code_info.cmdline_params = ['mpirun', '-np', '23', 'pw.x', '-npool', '1']
tmpl_code_info.stdin_name = 'aiida.in'
job_tmpl.codes_info = [tmpl_code_info]
job_tmpl.codes_run_mode = CodeRunMode.SERIAL

submit_script_text = scheduler.get_submit_script(job_tmpl)
Expand All @@ -1020,8 +1020,8 @@ def test_submit_script_with_num_cores_per_machine_and_mpiproc1(self):
It should pass in check:
res.num_cores_per_mpiproc * res.num_mpiprocs_per_machine = res.num_cores_per_machine
"""
from aiida.common.datastructures import CodeInfo, CodeRunMode
from aiida.schedulers.datastructures import JobTemplate
from aiida.common.datastructures import CodeRunMode
from aiida.schedulers.datastructures import JobTemplate, JobTemplateCodeInfo

scheduler = PbsproScheduler()

Expand All @@ -1032,10 +1032,10 @@ def test_submit_script_with_num_cores_per_machine_and_mpiproc1(self):
)
job_tmpl.uuid = str(uuid.uuid4())
job_tmpl.max_wallclock_seconds = 24 * 3600
code_info = CodeInfo()
code_info.cmdline_params = ['mpirun', '-np', '23', 'pw.x', '-npool', '1']
code_info.stdin_name = 'aiida.in'
job_tmpl.codes_info = [code_info]
tmpl_code_info = JobTemplateCodeInfo()
tmpl_code_info.cmdline_params = ['mpirun', '-np', '23', 'pw.x', '-npool', '1']
tmpl_code_info.stdin_name = 'aiida.in'
job_tmpl.codes_info = [tmpl_code_info]
job_tmpl.codes_run_mode = CodeRunMode.SERIAL

submit_script_text = scheduler.get_submit_script(job_tmpl)
Expand Down Expand Up @@ -1066,16 +1066,16 @@ def test_submit_script_with_num_cores_per_machine_and_mpiproc2(self):

def test_submit_script_rerunnable(self): # pylint: disable=no-self-use
"""Test the `rerunnable` option of the submit script."""
from aiida.common.datastructures import CodeInfo, CodeRunMode
from aiida.schedulers.datastructures import JobTemplate
from aiida.common.datastructures import CodeRunMode
from aiida.schedulers.datastructures import JobTemplate, JobTemplateCodeInfo

scheduler = PbsproScheduler()

job_tmpl = JobTemplate()
job_tmpl.job_resource = scheduler.create_job_resource(num_machines=1, num_mpiprocs_per_machine=1)
code_info = CodeInfo()
code_info.cmdline_params = []
job_tmpl.codes_info = [code_info]
tmpl_code_info = JobTemplateCodeInfo()
tmpl_code_info.cmdline_params = []
job_tmpl.codes_info = [tmpl_code_info]
job_tmpl.codes_run_mode = CodeRunMode.SERIAL

job_tmpl.rerunnable = True
Expand Down
Loading

0 comments on commit a8348d6

Please sign in to comment.