Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for submit option to HTCondor site adapter #354

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 26 additions & 3 deletions docs/source/adapters/site.rst
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,22 @@ Available adapter configuration options
| | Default: ShellExecutor is used! | |
+----------------+-----------------------------------------------------------------------------------+-----------------+

The only available option in the `MachineTypeConfiguration` section is a template jdl used to submit drones to the
HTCondor batch system. The template jdl is using the `Python template string`_ syntax
(see example HTCondor JDL for details).

Available machine type configuration options
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. content-tabs:: left-col

+----------------+------------------------------------------------------------------------------+-----------------+
| Option | Short Description | Requirement |
+================+==============================================================================+=================+
| jdl | Path to the templated jdl used to submit drones to the HTCondor batch system | **Required** |
+----------------+------------------------------------------------------------------------------+-----------------+
| SubmitOptions | Options to add to the `condor_submit` command. (see example) | **Optional** |
+----------------+------------------------------------------------------------------------------+-----------------+

.. Note::
The template jdl is using the `Python template string`_ syntax (see example HTCondor JDL for details).

.. Warning::
The `$(...)` used for HTCondor variables needs to be replaced by `$$(...)` in the templated JDL.
Expand Down Expand Up @@ -245,14 +258,24 @@ Available adapter configuration options
max_age: 1
MachineTypes:
- wholenode
- remotenode
MachineTypeConfiguration:
wholenode:
jdl: pilot_wholenode.jdl
remotenode:
jdl: pilot_remotenode.jdl
SubmitOptions:
spool: null
pool: remote-pool.somewhere.de
MachineMetaData:
wholenode:
Cores: 42
Memory: 256
Disk: 840
remotenode:
Cores: 8
Memory: 20
Disk: 100

.. rubric:: Example HTCondor JDL (Vanilla Universe)

Expand Down
10 changes: 9 additions & 1 deletion docs/source/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
.. Created by changelog.py at 2024-05-24, command
.. Created by changelog.py at 2024-10-16, command
'/Users/giffler/.cache/pre-commit/repoecmh3ah8/py_env-python3.12/bin/changelog docs/source/changes compile --categories Added Changed Fixed Security Deprecated --output=docs/source/changelog.rst'
based on the format of 'https://keepachangelog.com/'

#########
CHANGELOG
#########

[Unreleased] - 2024-10-16
=========================

Added
-----

* Add support for submit options to HTCondor site adapter

[0.8.2] - 2024-05-15
====================

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
category: added
summary: "Add support for submit options to HTCondor site adapter"
description: |
Add support for additional submit options to the `condor_submit` command used to submit HTCondor batch jobs in the
HTCondor site adapter. For example: `-spool` to transfer input files to a remote HTCondor Schedd for job submission.
pull requests:
- 354
issues:
- 353
37 changes: 27 additions & 10 deletions tardis/adapters/sites/htcondor.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from ...utilities.utils import (
csv_parser,
drone_environment_to_str,
htcondor_cmd_option_formatter,
machine_meta_data_translation,
)

Expand Down Expand Up @@ -44,7 +45,9 @@ async def condor_q(
_job_id(resource.remote_resource_uuid) for resource in resource_attributes
)

queue_command = f"condor_q {remote_resource_ids} -af:t {attributes_string}"
queue_command = (
f"condor_q {remote_resource_ids} -af:t {attributes_string}" # noqa E231
Copy link
Member Author

@giffels giffels Oct 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not related to the feature. Flake 8 complained about the missing space after the colon like in -af: t. 🙈

)

htcondor_queue = {}
try:
Expand Down Expand Up @@ -97,7 +100,9 @@ def _submit_description(resource_jdls: Tuple[JDL, ...]) -> str:
return "\n".join(commands)


async def condor_submit(*resource_jdls: JDL, executor: Executor) -> Iterable[str]:
async def condor_submit(
*resource_jdls: JDL, executor: Executor, submit_option_string: str
) -> Iterable[str]:
"""Submit a number of resources from their JDL, reporting the new Job ID for each"""
# verbose submit gives an ordered listing of class ads, such as
# ** Proc 15556.0:
Expand All @@ -110,7 +115,9 @@ async def condor_submit(*resource_jdls: JDL, executor: Executor) -> Iterable[str
#
# ** Proc 15556.1:
# ...
command = f"condor_submit -verbose -maxjobs {len(resource_jdls)}"
command = (
f"condor_submit -verbose -maxjobs {len(resource_jdls)} {submit_option_string}"
)
response = await executor.run_command(
command,
stdin_input=_submit_description(resource_jdls),
Expand Down Expand Up @@ -213,20 +220,30 @@ def __init__(
self._machine_type = machine_type
self._site_name = site_name
self._executor = getattr(self.configuration, "executor", ShellExecutor())

submit_option_string = htcondor_cmd_option_formatter(
self.machine_type_configuration.get("SubmitOptions", AttributeDict())
)

bulk_size = getattr(self.configuration, "bulk_size", 100)
bulk_delay = getattr(self.configuration, "bulk_delay", 1.0)
self._condor_submit, self._condor_suspend, self._condor_rm = (

self._condor_submit = AsyncBulkCall(
partial(
condor_submit,
executor=self._executor,
submit_option_string=submit_option_string,
),
size=bulk_size,
delay=bulk_delay,
)
self._condor_suspend, self._condor_rm, self._condor_q = (
AsyncBulkCall(
partial(tool, executor=self._executor),
size=bulk_size,
delay=bulk_delay,
)
for tool in (condor_submit, condor_suspend, condor_rm)
)
self._condor_q = AsyncBulkCall(
partial(condor_q, executor=self._executor),
size=bulk_size,
delay=bulk_delay,
for tool in (condor_suspend, condor_rm, condor_q)
)

key_translator = StaticMapping(
Expand Down
27 changes: 27 additions & 0 deletions tests/adapters_t/sites_t/test_htcondorsiteadapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ def machine_meta_data(self):
test2large_args=AttributeDict(Cores=8, Memory=32, Disk=160),
test2large_deprecated=AttributeDict(Cores=8, Memory=32, Disk=160),
testunkownresource=AttributeDict(Cores=8, Memory=32, Disk=160, Foo=3),
testsubmitoptions=AttributeDict(Cores=8, Memory=32, Disk=160),
)

@property
Expand All @@ -148,6 +149,10 @@ def machine_type_configuration(self):
test2large_args=AttributeDict(jdl="tests/data/submit_per_arguments.jdl"),
test2large_deprecated=AttributeDict(jdl="tests/data/submit_deprecated.jdl"),
testunkownresource=AttributeDict(jdl="tests/data/submit.jdl"),
testsubmitoptions=AttributeDict(
jdl="tests/data/submit.jdl",
SubmitOptions=AttributeDict(pool="my_remote_pool", spool=None),
giffels marked this conversation as resolved.
Show resolved Hide resolved
),
)

@mock_executor_run_command(stdout=CONDOR_SUBMIT_OUTPUT)
Expand Down Expand Up @@ -231,6 +236,28 @@ def test_deploy_resource_htcondor_obs(self):
)
self.mock_executor.reset()

self.adapter = HTCondorAdapter(
machine_type="testsubmitoptions", site_name="TestSite"
)

# Test support for submit options
run_async(
self.adapter.deploy_resource,
AttributeDict(
drone_uuid="test-123",
obs_machine_meta_data_translation_mapping=AttributeDict(
Cores=1,
Memory=1024,
Disk=1024 * 1024,
),
),
)

args, _ = self.mock_executor.return_value.run_command.call_args
self.assertEqual(
"condor_submit -verbose -maxjobs 1 -pool my_remote_pool -spool", args[0]
)

def test_translate_resources_raises_logs(self):
self.adapter = HTCondorAdapter(
machine_type="testunkownresource", site_name="TestSite"
Expand Down