diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index db2cb8a..67f3542 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -56,7 +56,6 @@ jobs: name: "Docker website" steps: - uses: actions/checkout@v4 - - run: echo "VITE_REST_API_LOCATION=https://predictcr.lkeegan.dev/api" > frontend/.env - run: docker compose build - uses: docker/login-action@v3 with: @@ -92,8 +91,8 @@ jobs: - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - run: pip install pytest - - run: python -m pytest -sv + - run: pip install -e .[tests] + - run: pytest -sv runner-docker: runs-on: ubuntu-latest name: "Docker runner" diff --git a/README_DEPLOYMENT.md b/README_DEPLOYMENT.md index 335e77d..abffaea 100644 --- a/README_DEPLOYMENT.md +++ b/README_DEPLOYMENT.md @@ -6,7 +6,7 @@ Some information on how to deploy the website. Production docker container images are automatically built by CI. To deploy the latest version on a virtual machine with docker compose installed, -download [docker compose.yml](https://raw.githubusercontent.com/ssciwr/predicTCR/main/docker compose.yml), then do +download [docker-compose.yml](https://raw.githubusercontent.com/ssciwr/predicTCR/main/docker-compose.yml), then do ``` sudo docker compose pull @@ -16,7 +16,7 @@ sudo docker compose up -d The location of data directory, SSL keys and secret key should be set either in env vars or in a file `.env` in the same location as the docker compose.yml. -For example the current deployment on heicloud looks like this: +For example the current test deployment on heicloud looks like this: ``` PREDICTCR_DATA="/home/ubuntu/predicTCR/docker_volume" diff --git a/backend/src/predicTCR_server/app.py b/backend/src/predicTCR_server/app.py index f5fefc0..f47e2c2 100644 --- a/backend/src/predicTCR_server/app.py +++ b/backend/src/predicTCR_server/app.py @@ -47,8 +47,8 @@ def create_app(data_path: str = "/predictcr_data"): app.config["JWT_ACCESS_TOKEN_EXPIRES"] = datetime.timedelta(minutes=60) app.config["SQLALCHEMY_DATABASE_URI"] = f"sqlite:///{data_path}/predicTCR.db" app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False - # limit max file upload size to 20mb - app.config["MAX_CONTENT_LENGTH"] = 20 * 1024 * 1024 + # limit max file upload size to 100mb + app.config["MAX_CONTENT_LENGTH"] = 100 * 1024 * 1024 app.config["PREDICTCR_DATA_PATH"] = data_path CORS(app) @@ -147,9 +147,9 @@ def change_password(): def samples(): return get_samples(current_user.email) - @app.route("/api/input_file", methods=["POST"]) + @app.route("/api/input_h5_file", methods=["POST"]) @jwt_required() - def input_file(): + def input_h5_file(): sample_id = request.json.get("sample_id", None) logger.info( f"User {current_user.email} requesting results for sample {sample_id}" @@ -163,7 +163,25 @@ def input_file(): if user_sample is None: logger.info(f" -> sample {sample_id} not found") return jsonify(message="Sample not found"), 400 - return flask.send_file(user_sample.input_file_path(), as_attachment=True) + return flask.send_file(user_sample.input_h5_file_path(), as_attachment=True) + + @app.route("/api/input_csv_file", methods=["POST"]) + @jwt_required() + def input_csv_file(): + sample_id = request.json.get("sample_id", None) + logger.info( + f"User {current_user.email} requesting results for sample {sample_id}" + ) + filters = {"id": sample_id} + if not current_user.is_admin and not current_user.is_runner: + filters["email"] = current_user.email + user_sample = db.session.execute( + db.select(Sample).filter_by(**filters) + ).scalar_one_or_none() + if user_sample is None: + logger.info(f" -> sample {sample_id} not found") + return jsonify(message="Sample not found"), 400 + return flask.send_file(user_sample.input_csv_file_path(), as_attachment=True) @app.route("/api/result", methods=["POST"]) @jwt_required() @@ -199,10 +217,11 @@ def add_sample(): name = form_as_dict.get("name", "") tumor_type = form_as_dict.get("tumor_type", "") source = form_as_dict.get("source", "") - infile = request.files.get("file") + h5_file = request.files.get("h5_file") + csv_file = request.files.get("csv_file") logger.info(f"Adding sample {name} from {email}") new_sample, error_message = add_new_sample( - email, name, tumor_type, source, infile + email, name, tumor_type, source, h5_file, csv_file ) if new_sample is not None: logger.info(" - > success") diff --git a/backend/src/predicTCR_server/model.py b/backend/src/predicTCR_server/model.py index 9435943..df5be0e 100644 --- a/backend/src/predicTCR_server/model.py +++ b/backend/src/predicTCR_server/model.py @@ -50,8 +50,11 @@ def _base_path(self) -> pathlib.Path: data_path = flask.current_app.config["PREDICTCR_DATA_PATH"] return pathlib.Path(f"{data_path}/{self.id}") - def input_file_path(self) -> pathlib.Path: - return self._base_path() / "input.zip" + def input_h5_file_path(self) -> pathlib.Path: + return self._base_path() / "input.h5" + + def input_csv_file_path(self) -> pathlib.Path: + return self._base_path() / "input.csv" def result_file_path(self) -> pathlib.Path: return self._base_path() / "result.zip" @@ -110,6 +113,7 @@ def get_samples(email: str | None = None) -> list[Sample]: def request_job() -> int | None: + # todo: go through running jobs and reset to queued if they have been running for more than e.g. 2 hrs selected_samples = ( db.select(Sample) .filter(Sample.status == Status.QUEUED) @@ -288,6 +292,7 @@ def enable_user(email: str, enabled: bool) -> tuple[str, int]: if user is None: logger.info(f" -> Unknown email address '{email}'") return f"Unknown email address {email}", 400 + user.activated = True user.enabled = enabled db.session.commit() return f"Account {email} activated", 200 @@ -345,7 +350,8 @@ def add_new_sample( name: str, tumor_type: str, source: str, - input_file: FileStorage, + h5_file: FileStorage, + csv_file: FileStorage, ) -> tuple[Sample | None, str]: user = db.session.execute( db.select(User).filter(User.email == email) @@ -378,6 +384,7 @@ def add_new_sample( ) db.session.add(new_sample) db.session.commit() - new_sample.input_file_path().parent.mkdir(parents=True, exist_ok=True) - input_file.save(new_sample.input_file_path()) + new_sample.input_h5_file_path().parent.mkdir(parents=True, exist_ok=True) + h5_file.save(new_sample.input_h5_file_path()) + csv_file.save(new_sample.input_csv_file_path()) return new_sample, "" diff --git a/backend/tests/helpers/flask_test_utils.py b/backend/tests/helpers/flask_test_utils.py index ed74aa1..b2af717 100644 --- a/backend/tests/helpers/flask_test_utils.py +++ b/backend/tests/helpers/flask_test_utils.py @@ -1,8 +1,6 @@ import argon2 from predicTCR_server.model import User, Sample, db, Status import pathlib -import shutil -import tempfile def add_test_users(app): @@ -42,10 +40,9 @@ def add_test_samples(app, data_path: pathlib.Path): ): ref_dir = data_path / f"{sample_id}" ref_dir.mkdir(parents=True, exist_ok=True) - with tempfile.TemporaryDirectory() as tmp_dir: - with open(f"{tmp_dir}/test.txt", "w") as f: - f.write(name) - shutil.make_archive(f"{ref_dir}/input", "zip", tmp_dir) + for input_file_type in ["h5", "csv"]: + with open(f"{ref_dir}/input.{input_file_type}", "w") as f: + f.write(input_file_type) new_sample = Sample( email="user@abc.xy", name=name, diff --git a/backend/tests/test_app.py b/backend/tests/test_app.py index 16e6b97..bca8739 100644 --- a/backend/tests/test_app.py +++ b/backend/tests/test_app.py @@ -1,7 +1,7 @@ from __future__ import annotations from typing import Dict import io -import zipfile +import pytest import pathlib import predicTCR_server import flask_test_utils as ftu @@ -129,17 +129,18 @@ def test_samples_valid(client): assert len(response.json) == 4 -def test_input_file_invalid(client): +@pytest.mark.parametrize("input_file_type", ["h5", "csv"]) +def test_input_file_invalid(client, input_file_type: str): # no auth header response = client.post( - "/api/input_file", + f"/api/input_{input_file_type}_file", json={"sample_id": 2}, ) assert response.status_code == 401 # invalid sample id headers = _get_auth_headers(client) response = client.post( - "/api/input_file", + f"/api/input_{input_file_type}_file", json={"sample_id": 66}, headers=headers, ) @@ -147,18 +148,17 @@ def test_input_file_invalid(client): assert "not found" in response.json["message"] -def test_input_file_valid(client): +@pytest.mark.parametrize("input_file_type", ["h5", "csv"]) +def test_input_file_valid(client, input_file_type: str): headers = _get_auth_headers(client) response = client.post( - "/api/input_file", + f"/api/input_{input_file_type}_file", json={"sample_id": 2}, headers=headers, ) assert response.status_code == 200 - zip_file = zipfile.ZipFile(io.BytesIO(response.data)) - filenames = [f.filename for f in zip_file.filelist] - assert len(filenames) == 1 - assert "test.txt" in filenames + with io.BytesIO(response.data) as f: + assert input_file_type in f.read().decode("utf-8") def test_result_invalid(client): @@ -222,14 +222,15 @@ def test_admin_runner_token_invalid(client): assert response.status_code == 400 -def test_admin_runner_token_valid(client): +@pytest.mark.parametrize("input_file_type", ["h5", "csv"]) +def test_admin_runner_token_valid(client, input_file_type: str): headers = _get_auth_headers(client, "admin@abc.xy", "admin") response = client.get("/api/admin/runner_token", headers=headers) assert response.status_code == 200 new_token = response.json["access_token"] assert ( client.post( - "/api/input_file", + f"/api/input_{input_file_type}_file", json={"sample_id": 1}, headers={"Authorization": f"Bearer {new_token}"}, ).status_code diff --git a/frontend/Dockerfile b/frontend/Dockerfile index 7750396..d4a261d 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -14,6 +14,8 @@ RUN pnpm install COPY . . +RUN echo "VITE_REST_API_LOCATION=/api" > .env + RUN pnpm run build-only FROM nginx diff --git a/frontend/nginx.conf b/frontend/nginx.conf index 119d9a8..472d6c1 100644 --- a/frontend/nginx.conf +++ b/frontend/nginx.conf @@ -7,7 +7,7 @@ server { ssl_certificate_key /predictcr_ssl_key.pem; # Maximum file upload size - client_max_body_size 20M; + client_max_body_size 100M; # Improve HTTPS performance with session resumption ssl_session_cache shared:SSL:10m; diff --git a/frontend/pnpm-lock.yaml b/frontend/pnpm-lock.yaml index 4613b4d..ccb3041 100644 --- a/frontend/pnpm-lock.yaml +++ b/frontend/pnpm-lock.yaml @@ -13,9 +13,6 @@ importers: bootstrap-icons: specifier: ^1.11.3 version: 1.11.3 - jsbarcode: - specifier: ^3.11.6 - version: 3.11.6 pinia: specifier: ^2.2.2 version: 2.2.2(typescript@5.5.4)(vue@3.4.38(typescript@5.5.4)) @@ -2799,12 +2796,6 @@ packages: } hasBin: true - jsbarcode@3.11.6: - resolution: - { - integrity: sha512-G5TKGyKY1zJo0ZQKFM1IIMfy0nF2rs92BLlCz+cU4/TazIc4ZH+X1GYeDRt7TKjrYqmPfTjwTBkU/QnQlsYiuA==, - } - jsdom@24.1.1: resolution: { @@ -6243,8 +6234,6 @@ snapshots: dependencies: argparse: 2.0.1 - jsbarcode@3.11.6: {} - jsdom@24.1.1: dependencies: cssstyle: 4.0.1 diff --git a/frontend/src/components/SamplesTable.vue b/frontend/src/components/SamplesTable.vue index 9bcd760..47260e9 100644 --- a/frontend/src/components/SamplesTable.vue +++ b/frontend/src/components/SamplesTable.vue @@ -1,6 +1,10 @@ @@ -114,21 +153,39 @@ function add_sample() {

- + + +

+

+

diff --git a/runner/Dockerfile b/runner/Dockerfile index 3a162ff..df6b5c7 100644 --- a/runner/Dockerfile +++ b/runner/Dockerfile @@ -12,8 +12,12 @@ RUN micromamba install -y -n base -f /tmp/env.yaml && \ WORKDIR /app -COPY runner.py /app/runner.py +COPY --chown=$MAMBA_USER:$MAMBA_USER . . -COPY scripts /app/scripts +ARG MAMBA_DOCKERFILE_ACTIVATE=1 -CMD ["python", "runner.py"] +RUN ls + +RUN python -m pip install . + +CMD ["predicTCR_runner"] diff --git a/runner/pyproject.toml b/runner/pyproject.toml new file mode 100644 index 0000000..9b72c2a --- /dev/null +++ b/runner/pyproject.toml @@ -0,0 +1,33 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "predicTCR_runner" +description = "predicTCR runner" +readme = "README.md" +maintainers = [{ name = "Liam Keegan", email = "ssc@iwr.uni-heidelberg.de" }] +dynamic = ["version"] +requires-python = ">=3.10" +license = { text = "MIT" } +classifiers = [ + "Programming Language :: Python :: 3", + "Operating System :: OS Independent", + "License :: OSI Approved :: MIT License", +] +dependencies = [ + "requests", + "click", +] + +[project.scripts] +predicTCR_runner = "predicTCR_runner.main:main" + +[project.optional-dependencies] +tests = ["pytest", "requests-mock", ] + +[tool.setuptools.dynamic] +version = { attr = "predicTCR_runner.__version__" } + +[tool.pytest.ini_options] +testpaths = ["tests"] diff --git a/runner/src/predicTCR_runner/__init__.py b/runner/src/predicTCR_runner/__init__.py new file mode 100644 index 0000000..f102a9c --- /dev/null +++ b/runner/src/predicTCR_runner/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/runner/src/predicTCR_runner/main.py b/runner/src/predicTCR_runner/main.py new file mode 100644 index 0000000..6419315 --- /dev/null +++ b/runner/src/predicTCR_runner/main.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +import click +import logging +from .runner import Runner + + +@click.command() +@click.option("--api-url", type=str) +@click.option("--jwt-token", type=str) +@click.option("--poll-interval", type=int, default=5, show_default=True) +@click.option( + "--log-level", + default="INFO", + type=click.Choice( + ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], case_sensitive=False + ), + help="Log level", + show_default=True, + show_choices=True, +) +def main(api_url, jwt_token, poll_interval, log_level): + logging.basicConfig( + level=log_level, format="%(levelname)s %(module)s.%(funcName)s :: %(message)s" + ) + runner = Runner(api_url, jwt_token, poll_interval) + runner.start() + + +if __name__ == "__main__": + main(auto_envvar_prefix="PREDICTCR") diff --git a/runner/runner.py b/runner/src/predicTCR_runner/runner.py similarity index 72% rename from runner/runner.py rename to runner/src/predicTCR_runner/runner.py index 8d2d854..cbc2f68 100644 --- a/runner/runner.py +++ b/runner/src/predicTCR_runner/runner.py @@ -3,12 +3,9 @@ import requests import time import logging -import click import os import tempfile import shutil -import zipfile -import io import subprocess @@ -78,25 +75,28 @@ def _upload_result(self, sample_id: int, result_file: str): def _run_job(self, sample_id: int): self.logger.info(f"Starting job for sample id {sample_id}...") - self.logger.debug("Downloading input file...") - response = requests.post( - url=f"{self.api_url}/input_file", - json={"sample_id": sample_id}, - headers=self.auth_header, - timeout=30, - ) - if response.status_code != 200: - self.logger.error(f"Failed to download input file: {response.content}") - return self._report_job_failed( - sample_id, f"Failed to download input file on {self.runner_hostname}" - ) + self.logger.debug("Downloading input files...") with tempfile.TemporaryDirectory(delete=False) as tmpdir: - try: - zip_file = zipfile.ZipFile(io.BytesIO(response.content)) - self.logger.debug( - f" - extracting {zip_file.namelist()} to {tmpdir}..." + for input_file_type in ["h5", "csv"]: + response = requests.post( + url=f"{self.api_url}/{input_file_type}_input_file", + json={"sample_id": sample_id}, + headers=self.auth_header, + timeout=30, ) - zip_file.extractall(tmpdir) + if response.status_code != 200: + self.logger.error( + f"Failed to download {input_file_type}: {response.content}" + ) + return self._report_job_failed( + sample_id, + f"Failed to download {input_file_type} on {self.runner_hostname}", + ) + input_file_name = f"input.{input_file_type}" + self.logger.debug(f" - writing {input_file_name} to {tmpdir}...") + with open(f"{tmpdir}/{input_file_name}", "wb") as input_file: + input_file.write(response.content) + try: self.logger.debug( f" - copying contents of scripts folder to {tmpdir}..." ) @@ -121,29 +121,3 @@ def start(self): self._run_job(job_id) else: time.sleep(self.poll_interval) - - -@click.command() -@click.option("--api-url", type=str) -@click.option("--jwt-token", type=str) -@click.option("--poll-interval", type=int, default=5, show_default=True) -@click.option( - "--log-level", - default="INFO", - type=click.Choice( - ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], case_sensitive=False - ), - help="Log level", - show_default=True, - show_choices=True, -) -def main(api_url, jwt_token, poll_interval, log_level): - logging.basicConfig( - level=log_level, format="%(levelname)s %(module)s.%(funcName)s :: %(message)s" - ) - runner = Runner(api_url, jwt_token, poll_interval) - runner.start() - - -if __name__ == "__main__": - main(auto_envvar_prefix="PREDICTCR") diff --git a/runner/tests/__init__.py b/runner/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/runner/tests/test_runner.py b/runner/tests/test_runner.py new file mode 100644 index 0000000..f044eaa --- /dev/null +++ b/runner/tests/test_runner.py @@ -0,0 +1,9 @@ +from predicTCR_runner.runner import Runner + + +def test_runner_request_job(requests_mock): + requests_mock.post("http://api/runner/request_job", status_code=204) + runner = Runner(api_url="http://api", jwt_token="abc") + assert runner._request_job() is None + requests_mock.post("http://api/runner/request_job", json={"sample_id": 44}) + assert runner._request_job() == 44