Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ASP-4238] Add on-site submission support #427

Merged
merged 4 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions jobbergate-api/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@ This file keeps track of all notable changes to jobbergate-api

## Unreleased

- Added support for on-site job submissions [ASP-4238]

## 4.2.0a2 -- 2023-11-29

- Moved database session management into a dedicated context manager and removed test-aware logic
- Modified testing harnesses to override session management context manager and fail if test session is not used


## 4.2.0a1 -- 2023-11-13

## 4.2.0a0 -- 2023-11-09

## 4.1.0 -- 2023-11-07

- Changed internals to avoid committing to the database when a GET request is made
Expand Down Expand Up @@ -183,27 +186,30 @@ This file keeps track of all notable changes to jobbergate-api

## 2.1.2 -- 2022-02-02

* Revised permissions to use a view/edit model for each data model
- Revised permissions to use a view/edit model for each data model

- Added parameter to filter job_submissions by slurm_job_id

## 2.1.1 -- 2022-01-13

* Refactored the Dockerfile
- Refactored the Dockerfile

## 2.1.0 -- 2021-12-22

* Added graceful handling of delete failures due to FK constraints
- Added graceful handling of delete failures due to FK constraints

- Added Alembic support
- Added application_identifier to response payload
- Added pagination support back in

## 2.0.1 -- 2021-12-10

* Removed CORS origins parameter from settings and set all origins as the allowed ones
- Removed CORS origins parameter from settings and set all origins as the allowed ones

## 2.0.0 -- 2021-12-08

* Added support for auth via Armasec & Auth0
- Added support for auth via Armasec & Auth0

- Added unit tests
- Migrated model definitions from legacy `jobbergate-api`
- Migrated endpoint definitions from legacy `jobbergate-api`
Expand Down
20 changes: 14 additions & 6 deletions jobbergate-api/jobbergate_api/apps/job_submissions/routers.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,16 +69,24 @@ async def job_submission_create(
logger.warning(message)
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=message)

main_file_content = await secure_services.file.job_script.get_file_content(job_script_files[0])
new_execution_parameters = get_job_properties_from_job_script(
main_file_content.decode(), **create_request.execution_parameters.dict(exclude_unset=True)
)
create_request.execution_parameters = new_execution_parameters
if create_request.slurm_job_id is None:
main_file_content = await secure_services.file.job_script.get_file_content(job_script_files[0])
execution_parameters = get_job_properties_from_job_script(
main_file_content.decode(), **create_request.execution_parameters.dict(exclude_unset=True)
)
create_request.execution_parameters = execution_parameters
submission_status = JobSubmissionStatus.CREATED
else:
if create_request.execution_parameters.dict(exclude_unset=True):
message = "Execution parameters are not allowed for on-site job submissions"
logger.warning(message)
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=message)
submission_status = JobSubmissionStatus.SUBMITTED

new_job_submission = await secure_services.crud.job_submission.create(
**create_request.dict(exclude_unset=True),
owner_email=secure_services.identity_payload.email,
status=JobSubmissionStatus.CREATED,
status=submission_status,
)
return new_job_submission

Expand Down
4 changes: 2 additions & 2 deletions jobbergate-api/jobbergate_api/apps/job_submissions/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,7 @@ class JobProperties(BaseModel, extra=Extra.forbid):
"'user' option or with the 'mcs' option)."
)
)
get_user_environment: int = Field(
default=1,
get_user_environment: Optional[int] = Field(
description="Load new login environment for user on job node.",
ge=0,
le=1,
Expand Down Expand Up @@ -305,6 +304,7 @@ class JobSubmissionCreateRequest(BaseModel):
name: str
description: Optional[str]
job_script_id: int
slurm_job_id: Optional[int]
execution_directory: Optional[str]
client_id: Optional[str]
execution_parameters: JobProperties = Field(default_factory=JobProperties)
Expand Down
110 changes: 110 additions & 0 deletions jobbergate-api/tests/apps/job_submissions/test_routers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,116 @@
pytest.mark.usefixtures("synth_session")


async def test_create_job_submission__on_site_submission(
fill_job_script_data,
fill_job_submission_data,
client,
inject_security_header,
tester_email,
job_script_data_as_string,
synth_session,
synth_bucket,
synth_services,
):
"""
Test POST /job-submissions/ correctly creates a job_submission.

This test proves that a job_submission is successfully created via a POST request to the /job-submissions/
endpoint. We show this by asserting that the job_submission is created in the database after the post
request is made, the correct status code (201) is returned. We also show that the ``status``
is ``SUBMITTED`` and the ``execution_parameters`` were not processed.
"""
base_job_script = await synth_services.crud.job_script.create(**fill_job_script_data())

job_script_file_name = "entrypoint.py"

await synth_services.file.job_script.upsert(
parent_id=base_job_script.id,
filename=job_script_file_name,
upload_content=job_script_data_as_string,
file_type="ENTRYPOINT",
)

inserted_job_script_id = base_job_script.id
slurm_job_id = 1234

inject_security_header(tester_email, Permissions.JOB_SUBMISSIONS_EDIT, client_id="dummy-cluster-client")
create_data = fill_job_submission_data(job_script_id=inserted_job_script_id, slurm_job_id=slurm_job_id)

# Removed defaults to make sure these are correctly set by other mechanisms
create_data.pop("status", None)
create_data.pop("client_id", None)
create_data.pop("execution_parameters", None)

with mock.patch(
"jobbergate_api.apps.job_submissions.routers.get_job_properties_from_job_script"
) as mocked:
response = await client.post("/jobbergate/job-submissions", json=create_data)
mocked.assert_not_called()

assert response.status_code == status.HTTP_201_CREATED, f"Create failed: {response.text}"

with synth_services.crud.job_submission.bound_session(synth_session):
assert (await synth_services.crud.job_submission.count()) == 1

response_data = response.json()

# Check that each field is correctly set
assert response_data["name"] == create_data["name"]
assert response_data["owner_email"] == tester_email
assert response_data["description"] == create_data["description"]
assert response_data["job_script_id"] == inserted_job_script_id
assert response_data["execution_directory"] is None
assert response_data["client_id"] == "dummy-cluster-client"
assert response_data["status"] == JobSubmissionStatus.SUBMITTED

assert isinstance(response_data["execution_parameters"], dict)
assert {k: v for k, v in response_data["execution_parameters"].items() if v is not None} == {}


async def test_create_job_submission__on_site_submission_with_execution_parameters(
fill_job_script_data,
fill_job_submission_data,
client,
inject_security_header,
tester_email,
job_script_data_as_string,
synth_session,
synth_bucket,
synth_services,
):
"""
Test POST /job-submissions/ correctly creates a job_submission.
"""
base_job_script = await synth_services.crud.job_script.create(**fill_job_script_data())

job_script_file_name = "entrypoint.py"

await synth_services.file.job_script.upsert(
parent_id=base_job_script.id,
filename=job_script_file_name,
upload_content=job_script_data_as_string,
file_type="ENTRYPOINT",
)

inserted_job_script_id = base_job_script.id
slurm_job_id = 1234

inject_security_header(tester_email, Permissions.JOB_SUBMISSIONS_EDIT, client_id="dummy-cluster-client")
create_data = fill_job_submission_data(
job_script_id=inserted_job_script_id, slurm_job_id=slurm_job_id, execution_parameters={"name": "foo"}
)

# Removed defaults to make sure these are correctly set by other mechanisms
create_data.pop("status", None)
create_data.pop("client_id", None)

response = await client.post("/jobbergate/job-submissions", json=create_data)

assert response.status_code == status.HTTP_400_BAD_REQUEST
assert "Execution parameters are not allowed for on-site job submissions" in response.text


async def test_create_job_submission__with_client_id_in_token(
fill_job_script_data,
fill_job_submission_data,
Expand Down
2 changes: 2 additions & 0 deletions jobbergate-cli/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ This file keeps track of all notable changes to jobbergate-cli

## Unreleased

- Added support for on-site job submissions using the `sbatch` command [ASP-4238]

## 4.2.0a2 -- 2023-11-29
## 4.2.0a1 -- 2023-11-13

- Patched create-job-script command on submit mode when parameter file is provided
- Added setting to control the timeout on `httpx` requests

Expand Down
2 changes: 2 additions & 0 deletions jobbergate-cli/jobbergate_cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ class Settings(BaseSettings):

ARMADA_API_BASE: AnyHttpUrl = Field("https://armada-k8s.staging.omnivector.solutions")

SBATCH_PATH: Optional[Path]

# enable http tracing
JOBBERGATE_DEBUG: bool = Field(False)
JOBBERGATE_REQUESTS_TIMEOUT: Optional[int] = 15
Expand Down
5 changes: 3 additions & 2 deletions jobbergate-cli/jobbergate_cli/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ class ApplicationResponse(pydantic.BaseModel, extra=pydantic.Extra.ignore):
workflow_files: List[WorkflowFileResponse] = []


class JobScriptFiles(pydantic.BaseModel, extra=pydantic.Extra.ignore):
class JobScriptFile(pydantic.BaseModel, extra=pydantic.Extra.ignore):
"""
Model containing job-script files.
"""
Expand Down Expand Up @@ -173,7 +173,7 @@ class JobScriptResponse(
created_at: Optional[datetime] = None
updated_at: Optional[datetime] = None

files: List[JobScriptFiles] = []
files: List[JobScriptFile] = []

@pydantic.validator("files", pre=True)
def null_files(cls, value):
Expand Down Expand Up @@ -239,6 +239,7 @@ class JobSubmissionCreateRequestData(pydantic.BaseModel):
name: str
description: Optional[str] = None
job_script_id: int
slurm_job_id: Optional[int] = None
client_id: Optional[str] = pydantic.Field(None, alias="cluster_name")
execution_directory: Optional[Path] = None
execution_parameters: Dict[str, Any] = pydantic.Field(default_factory=dict)
Expand Down
53 changes: 23 additions & 30 deletions jobbergate-cli/jobbergate_cli/subapps/job_scripts/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import typer

from jobbergate_cli.config import settings
from jobbergate_cli.constants import SortOrder
from jobbergate_cli.exceptions import Abort, handle_abort
from jobbergate_cli.render import StyleMapper, render_list_results, render_single_result, terminal_message
Expand All @@ -18,7 +19,6 @@
fetch_job_script_data,
question_helper,
render_job_script,
save_job_script_files,
upload_job_script_files,
)
from jobbergate_cli.subapps.job_submissions.app import HIDDEN_FIELDS as JOB_SUBMISSION_HIDDEN_FIELDS
Expand Down Expand Up @@ -211,9 +211,23 @@ def render(
"""
),
),
fast: bool = typer.Option(
False,
"--fast",
"-f",
help="Use default answers (when available) instead of asking the user.",
),
download: Optional[bool] = typer.Option(
None,
help="Download the job script files to the current working directory",
),
submit: Optional[bool] = typer.Option(
None,
help="Do not ask the user if they want to submit a job.",
),
cluster_name: Optional[str] = typer.Option(
None,
help="The name of the cluster where the job should be submitted (i.g. 'nash-staging')",
help="The name of the cluster where the job should be submitted to (i.g. 'nash-staging')",
),
execution_directory: Optional[pathlib.Path] = typer.Option(
None,
Expand All @@ -226,20 +240,6 @@ def render(
"""
).strip(),
),
download: Optional[bool] = typer.Option(
None,
help="Download the job script files to the current working directory",
),
fast: bool = typer.Option(
False,
"--fast",
"-f",
help="Use default answers (when available) instead of asking the user.",
),
submit: Optional[bool] = typer.Option(
None,
help="Do not ask the user if they want to submit a job.",
),
):
"""
Render a new job script from an application.
Expand Down Expand Up @@ -289,16 +289,8 @@ def render(
actual_value=submit,
)

download = question_helper(
question_func=typer.confirm,
text="Would you like to download the job script files?",
default=True,
fast=fast,
actual_value=download,
)

if download:
download_job_script_files(job_script_result.job_script_id, jg_ctx)
if download and settings.SBATCH_PATH is None: # on-site submission will download the job script files anyway
julianaklulo marked this conversation as resolved.
Show resolved Hide resolved
julianaklulo marked this conversation as resolved.
Show resolved Hide resolved
download_job_script_files(job_script_result.job_script_id, jg_ctx, pathlib.Path.cwd())

if not submit:
return
Expand Down Expand Up @@ -430,14 +422,15 @@ def show_files(
Show the files for a single job script by id.
"""
jg_ctx: JobbergateContext = ctx.obj
result = fetch_job_script_data(jg_ctx, id)

with tempfile.TemporaryDirectory() as tmp_dir:
tmp_path = pathlib.Path(tmp_dir)
file_list = save_job_script_files(jg_ctx, result, tmp_path)

for metadata, file_path in zip(result.files, file_list):
files = download_job_script_files(id, jg_ctx, tmp_path)

for metadata in files:
filename = metadata.filename
file_path = tmp_path / filename
file_content = file_path.read_text()
is_main_file = metadata.file_type.upper() == "ENTRYPOINT"
if plain or jg_ctx.raw_output:
Expand All @@ -461,7 +454,7 @@ def download_files(
Download the files from a job script to the current working directory.
"""
jg_ctx: JobbergateContext = ctx.obj
downloaded_files = download_job_script_files(id, jg_ctx)
downloaded_files = download_job_script_files(id, jg_ctx, pathlib.Path.cwd())

terminal_message(
dedent(
Expand Down
Loading