Skip to content

Commit

Permalink
[ASP-4238] Add on-site submission support (#427)
Browse files Browse the repository at this point in the history
* feat(api): add support for on-site job submissions

* feat(cli): add support for on-site job submissions

* docs: describe support for on-site job submissions

* code review
  • Loading branch information
fschuch authored Nov 30, 2023
1 parent dc48245 commit 02d09b5
Show file tree
Hide file tree
Showing 17 changed files with 381 additions and 134 deletions.
18 changes: 12 additions & 6 deletions jobbergate-api/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@ This file keeps track of all notable changes to jobbergate-api

## Unreleased

- Added support for on-site job submissions [ASP-4238]

## 4.2.0a2 -- 2023-11-29

- Moved database session management into a dedicated context manager and removed test-aware logic
- Modified testing harnesses to override session management context manager and fail if test session is not used


## 4.2.0a1 -- 2023-11-13

## 4.2.0a0 -- 2023-11-09

## 4.1.0 -- 2023-11-07

- Changed internals to avoid committing to the database when a GET request is made
Expand Down Expand Up @@ -183,27 +186,30 @@ This file keeps track of all notable changes to jobbergate-api

## 2.1.2 -- 2022-02-02

* Revised permissions to use a view/edit model for each data model
- Revised permissions to use a view/edit model for each data model

- Added parameter to filter job_submissions by slurm_job_id

## 2.1.1 -- 2022-01-13

* Refactored the Dockerfile
- Refactored the Dockerfile

## 2.1.0 -- 2021-12-22

* Added graceful handling of delete failures due to FK constraints
- Added graceful handling of delete failures due to FK constraints

- Added Alembic support
- Added application_identifier to response payload
- Added pagination support back in

## 2.0.1 -- 2021-12-10

* Removed CORS origins parameter from settings and set all origins as the allowed ones
- Removed CORS origins parameter from settings and set all origins as the allowed ones

## 2.0.0 -- 2021-12-08

* Added support for auth via Armasec & Auth0
- Added support for auth via Armasec & Auth0

- Added unit tests
- Migrated model definitions from legacy `jobbergate-api`
- Migrated endpoint definitions from legacy `jobbergate-api`
Expand Down
20 changes: 14 additions & 6 deletions jobbergate-api/jobbergate_api/apps/job_submissions/routers.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,16 +69,24 @@ async def job_submission_create(
logger.warning(message)
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=message)

main_file_content = await secure_services.file.job_script.get_file_content(job_script_files[0])
new_execution_parameters = get_job_properties_from_job_script(
main_file_content.decode(), **create_request.execution_parameters.dict(exclude_unset=True)
)
create_request.execution_parameters = new_execution_parameters
if create_request.slurm_job_id is None:
main_file_content = await secure_services.file.job_script.get_file_content(job_script_files[0])
execution_parameters = get_job_properties_from_job_script(
main_file_content.decode(), **create_request.execution_parameters.dict(exclude_unset=True)
)
create_request.execution_parameters = execution_parameters
submission_status = JobSubmissionStatus.CREATED
else:
if create_request.execution_parameters.dict(exclude_unset=True):
message = "Execution parameters are not allowed for on-site job submissions"
logger.warning(message)
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=message)
submission_status = JobSubmissionStatus.SUBMITTED

new_job_submission = await secure_services.crud.job_submission.create(
**create_request.dict(exclude_unset=True),
owner_email=secure_services.identity_payload.email,
status=JobSubmissionStatus.CREATED,
status=submission_status,
)
return new_job_submission

Expand Down
4 changes: 2 additions & 2 deletions jobbergate-api/jobbergate_api/apps/job_submissions/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,7 @@ class JobProperties(BaseModel, extra=Extra.forbid):
"'user' option or with the 'mcs' option)."
)
)
get_user_environment: int = Field(
default=1,
get_user_environment: Optional[int] = Field(
description="Load new login environment for user on job node.",
ge=0,
le=1,
Expand Down Expand Up @@ -305,6 +304,7 @@ class JobSubmissionCreateRequest(BaseModel):
name: str
description: Optional[str]
job_script_id: int
slurm_job_id: Optional[int]
execution_directory: Optional[str]
client_id: Optional[str]
execution_parameters: JobProperties = Field(default_factory=JobProperties)
Expand Down
110 changes: 110 additions & 0 deletions jobbergate-api/tests/apps/job_submissions/test_routers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,116 @@
pytest.mark.usefixtures("synth_session")


async def test_create_job_submission__on_site_submission(
fill_job_script_data,
fill_job_submission_data,
client,
inject_security_header,
tester_email,
job_script_data_as_string,
synth_session,
synth_bucket,
synth_services,
):
"""
Test POST /job-submissions/ correctly creates a job_submission.
This test proves that a job_submission is successfully created via a POST request to the /job-submissions/
endpoint. We show this by asserting that the job_submission is created in the database after the post
request is made, the correct status code (201) is returned. We also show that the ``status``
is ``SUBMITTED`` and the ``execution_parameters`` were not processed.
"""
base_job_script = await synth_services.crud.job_script.create(**fill_job_script_data())

job_script_file_name = "entrypoint.py"

await synth_services.file.job_script.upsert(
parent_id=base_job_script.id,
filename=job_script_file_name,
upload_content=job_script_data_as_string,
file_type="ENTRYPOINT",
)

inserted_job_script_id = base_job_script.id
slurm_job_id = 1234

inject_security_header(tester_email, Permissions.JOB_SUBMISSIONS_EDIT, client_id="dummy-cluster-client")
create_data = fill_job_submission_data(job_script_id=inserted_job_script_id, slurm_job_id=slurm_job_id)

# Removed defaults to make sure these are correctly set by other mechanisms
create_data.pop("status", None)
create_data.pop("client_id", None)
create_data.pop("execution_parameters", None)

with mock.patch(
"jobbergate_api.apps.job_submissions.routers.get_job_properties_from_job_script"
) as mocked:
response = await client.post("/jobbergate/job-submissions", json=create_data)
mocked.assert_not_called()

assert response.status_code == status.HTTP_201_CREATED, f"Create failed: {response.text}"

with synth_services.crud.job_submission.bound_session(synth_session):
assert (await synth_services.crud.job_submission.count()) == 1

response_data = response.json()

# Check that each field is correctly set
assert response_data["name"] == create_data["name"]
assert response_data["owner_email"] == tester_email
assert response_data["description"] == create_data["description"]
assert response_data["job_script_id"] == inserted_job_script_id
assert response_data["execution_directory"] is None
assert response_data["client_id"] == "dummy-cluster-client"
assert response_data["status"] == JobSubmissionStatus.SUBMITTED

assert isinstance(response_data["execution_parameters"], dict)
assert {k: v for k, v in response_data["execution_parameters"].items() if v is not None} == {}


async def test_create_job_submission__on_site_submission_with_execution_parameters(
fill_job_script_data,
fill_job_submission_data,
client,
inject_security_header,
tester_email,
job_script_data_as_string,
synth_session,
synth_bucket,
synth_services,
):
"""
Test POST /job-submissions/ correctly creates a job_submission.
"""
base_job_script = await synth_services.crud.job_script.create(**fill_job_script_data())

job_script_file_name = "entrypoint.py"

await synth_services.file.job_script.upsert(
parent_id=base_job_script.id,
filename=job_script_file_name,
upload_content=job_script_data_as_string,
file_type="ENTRYPOINT",
)

inserted_job_script_id = base_job_script.id
slurm_job_id = 1234

inject_security_header(tester_email, Permissions.JOB_SUBMISSIONS_EDIT, client_id="dummy-cluster-client")
create_data = fill_job_submission_data(
job_script_id=inserted_job_script_id, slurm_job_id=slurm_job_id, execution_parameters={"name": "foo"}
)

# Removed defaults to make sure these are correctly set by other mechanisms
create_data.pop("status", None)
create_data.pop("client_id", None)

response = await client.post("/jobbergate/job-submissions", json=create_data)

assert response.status_code == status.HTTP_400_BAD_REQUEST
assert "Execution parameters are not allowed for on-site job submissions" in response.text


async def test_create_job_submission__with_client_id_in_token(
fill_job_script_data,
fill_job_submission_data,
Expand Down
2 changes: 2 additions & 0 deletions jobbergate-cli/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ This file keeps track of all notable changes to jobbergate-cli

## Unreleased

- Added support for on-site job submissions using the `sbatch` command [ASP-4238]

## 4.2.0a2 -- 2023-11-29
## 4.2.0a1 -- 2023-11-13

- Patched create-job-script command on submit mode when parameter file is provided
- Added setting to control the timeout on `httpx` requests

Expand Down
2 changes: 2 additions & 0 deletions jobbergate-cli/jobbergate_cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ class Settings(BaseSettings):

ARMADA_API_BASE: AnyHttpUrl = Field("https://armada-k8s.staging.omnivector.solutions")

SBATCH_PATH: Optional[Path]

# enable http tracing
JOBBERGATE_DEBUG: bool = Field(False)
JOBBERGATE_REQUESTS_TIMEOUT: Optional[int] = 15
Expand Down
5 changes: 3 additions & 2 deletions jobbergate-cli/jobbergate_cli/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ class ApplicationResponse(pydantic.BaseModel, extra=pydantic.Extra.ignore):
workflow_files: List[WorkflowFileResponse] = []


class JobScriptFiles(pydantic.BaseModel, extra=pydantic.Extra.ignore):
class JobScriptFile(pydantic.BaseModel, extra=pydantic.Extra.ignore):
"""
Model containing job-script files.
"""
Expand Down Expand Up @@ -173,7 +173,7 @@ class JobScriptResponse(
created_at: Optional[datetime] = None
updated_at: Optional[datetime] = None

files: List[JobScriptFiles] = []
files: List[JobScriptFile] = []

@pydantic.validator("files", pre=True)
def null_files(cls, value):
Expand Down Expand Up @@ -239,6 +239,7 @@ class JobSubmissionCreateRequestData(pydantic.BaseModel):
name: str
description: Optional[str] = None
job_script_id: int
slurm_job_id: Optional[int] = None
client_id: Optional[str] = pydantic.Field(None, alias="cluster_name")
execution_directory: Optional[Path] = None
execution_parameters: Dict[str, Any] = pydantic.Field(default_factory=dict)
Expand Down
61 changes: 32 additions & 29 deletions jobbergate-cli/jobbergate_cli/subapps/job_scripts/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import typer

from jobbergate_cli.config import settings
from jobbergate_cli.constants import SortOrder
from jobbergate_cli.exceptions import Abort, handle_abort
from jobbergate_cli.render import StyleMapper, render_list_results, render_single_result, terminal_message
Expand All @@ -18,7 +19,6 @@
fetch_job_script_data,
question_helper,
render_job_script,
save_job_script_files,
upload_job_script_files,
)
from jobbergate_cli.subapps.job_submissions.app import HIDDEN_FIELDS as JOB_SUBMISSION_HIDDEN_FIELDS
Expand Down Expand Up @@ -211,9 +211,23 @@ def render(
"""
),
),
fast: bool = typer.Option(
False,
"--fast",
"-f",
help="Use default answers (when available) instead of asking the user.",
),
download: Optional[bool] = typer.Option(
None,
help="Download the job script files to the current working directory",
),
submit: Optional[bool] = typer.Option(
None,
help="Do not ask the user if they want to submit a job.",
),
cluster_name: Optional[str] = typer.Option(
None,
help="The name of the cluster where the job should be submitted (i.g. 'nash-staging')",
help="The name of the cluster where the job should be submitted to (i.g. 'nash-staging')",
),
execution_directory: Optional[pathlib.Path] = typer.Option(
None,
Expand All @@ -226,20 +240,6 @@ def render(
"""
).strip(),
),
download: Optional[bool] = typer.Option(
None,
help="Download the job script files to the current working directory",
),
fast: bool = typer.Option(
False,
"--fast",
"-f",
help="Use default answers (when available) instead of asking the user.",
),
submit: Optional[bool] = typer.Option(
None,
help="Do not ask the user if they want to submit a job.",
),
):
"""
Render a new job script from an application.
Expand Down Expand Up @@ -289,16 +289,18 @@ def render(
actual_value=submit,
)

download = question_helper(
question_func=typer.confirm,
text="Would you like to download the job script files?",
default=True,
fast=fast,
actual_value=download,
)
if settings.SBATCH_PATH is None:
# Notice on-site submission will download the job script files anyway, so it is asked just in remote mode.
download = question_helper(
question_func=typer.confirm,
text="Would you like to download the job script files?",
default=True,
fast=fast,
actual_value=download,
)

if download:
download_job_script_files(job_script_result.job_script_id, jg_ctx)
if download:
download_job_script_files(job_script_result.job_script_id, jg_ctx, pathlib.Path.cwd())

if not submit:
return
Expand Down Expand Up @@ -430,14 +432,15 @@ def show_files(
Show the files for a single job script by id.
"""
jg_ctx: JobbergateContext = ctx.obj
result = fetch_job_script_data(jg_ctx, id)

with tempfile.TemporaryDirectory() as tmp_dir:
tmp_path = pathlib.Path(tmp_dir)
file_list = save_job_script_files(jg_ctx, result, tmp_path)

for metadata, file_path in zip(result.files, file_list):
files = download_job_script_files(id, jg_ctx, tmp_path)

for metadata in files:
filename = metadata.filename
file_path = tmp_path / filename
file_content = file_path.read_text()
is_main_file = metadata.file_type.upper() == "ENTRYPOINT"
if plain or jg_ctx.raw_output:
Expand All @@ -461,7 +464,7 @@ def download_files(
Download the files from a job script to the current working directory.
"""
jg_ctx: JobbergateContext = ctx.obj
downloaded_files = download_job_script_files(id, jg_ctx)
downloaded_files = download_job_script_files(id, jg_ctx, pathlib.Path.cwd())

terminal_message(
dedent(
Expand Down
Loading

0 comments on commit 02d09b5

Please sign in to comment.