Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove pandas from gsaid api #3393

Open
wants to merge 42 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
84f208f
feat(rm_pandas): WIP
henrikstranneheim Jul 2, 2024
7d8d619
feat(rm_pandas): WIP
henrikstranneheim Jul 3, 2024
b056ad7
feat(rm_pandas): WIP
henrikstranneheim Jul 3, 2024
029cf08
feat(rm_pandas):
henrikstranneheim Jul 4, 2024
095574e
feat(rm_pandas):
henrikstranneheim Jul 4, 2024
5dbbeff
feat(rm_pandas):
henrikstranneheim Jul 4, 2024
c2cd1c4
feat(update): Linters
henrikstranneheim Aug 6, 2024
2150c07
Merge remote-tracking branch 'origin/master'
henrikstranneheim Aug 8, 2024
1735c03
Merge remote-tracking branch 'origin/master'
henrikstranneheim Aug 8, 2024
0da7f66
Merge remote-tracking branch 'origin/master'
henrikstranneheim Aug 9, 2024
5926546
Merge
henrikstranneheim Aug 9, 2024
69ce6f2
Merge remote-tracking branch 'origin/master'
henrikstranneheim Aug 9, 2024
3a20a70
Merge branch 'master' into remove-pandas-from-gsaid-api
henrikstranneheim Aug 9, 2024
2904d9e
feat(remove): pandas
henrikstranneheim Aug 9, 2024
bceafdc
Merge remote-tracking branch 'origin/master'
henrikstranneheim Aug 9, 2024
a5d21d6
Merge branch 'master' into remove-pandas-from-gsaid-api
henrikstranneheim Aug 9, 2024
b795184
Merge remote-tracking branch 'origin/master'
henrikstranneheim Aug 11, 2024
90c06ef
Merge remote-tracking branch 'origin/master'
henrikstranneheim Aug 14, 2024
b1e548b
Merge branch 'master' into remove-pandas-from-gsaid-api
henrikstranneheim Aug 16, 2024
ca58e05
feat(header): Use complementary
henrikstranneheim Aug 16, 2024
82644c3
Merge remote-tracking branch 'origin/master'
henrikstranneheim Aug 19, 2024
13c0352
Merge branch 'master' into remove-pandas-from-gsaid-api
henrikstranneheim Aug 19, 2024
26e627f
feat(rm_pandas): WIP
henrikstranneheim Aug 19, 2024
cab4d11
feat(rm_pandas): WIP
henrikstranneheim Aug 19, 2024
57e4ddd
feat(rm_pandas): WIP
henrikstranneheim Aug 20, 2024
dd6c9f1
feat(rm_pandas): WIP
henrikstranneheim Aug 20, 2024
676f8a4
Merge branch 'master' into remove-pandas-from-gsaid-api
henrikstranneheim Aug 20, 2024
40a3c1f
feat(rm_pandas): WIP
henrikstranneheim Aug 21, 2024
97db74f
feat(rm_pandas): WIP
henrikstranneheim Aug 21, 2024
6869e0d
feat(rm_pandas): WIP
henrikstranneheim Aug 21, 2024
6ebdfbd
feat(rm_pandas): WIP
henrikstranneheim Aug 21, 2024
7a22141
feat(rm_pandas): WIP
henrikstranneheim Aug 21, 2024
d331d12
Merge branch 'master' into remove-pandas-from-gsaid-api
henrikstranneheim Aug 22, 2024
6dea4dd
feat(rm_pandas): WIP
henrikstranneheim Aug 22, 2024
64f17b4
Merge remote-tracking branch 'origin/master'
henrikstranneheim Sep 1, 2024
3a5a550
Merge branch 'master' into remove-pandas-from-gsaid-api
henrikstranneheim Sep 1, 2024
a1073ed
feat(gisaid: Add test
henrikstranneheim Sep 1, 2024
259688e
feat(gisaid: Add test
henrikstranneheim Sep 1, 2024
f1c8fb6
feat(gisaid): WIP
henrikstranneheim Sep 1, 2024
12e817f
feat(gisaid): WIP
henrikstranneheim Sep 1, 2024
6d927b4
feat(gisaid): WIP
henrikstranneheim Sep 1, 2024
3c7ca9c
Merge branch 'master' into remove-pandas-from-gsaid-api
henrikstranneheim Sep 16, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cg/cli/upload/fohm.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def preprocess_all(
upload_cases = []
for case_id in cases:
try:
gisaid_api.upload(case_id=case_id)
gisaid_api.upload_to_gisaid(case_id)
fohm_api.update_upload_started_at(case_id=case_id)
LOG.info(f"Upload of case {case_id} to GISAID was successful")
upload_cases.append(case_id)
Expand Down
6 changes: 3 additions & 3 deletions cg/cli/upload/gisaid.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Code for uploading genotype data via CLI"""
"""Code for uploading genotype data via CLI."""

import logging

Expand All @@ -18,7 +18,7 @@ def upload_to_gisaid(context: CGConfig, case_id: str):

LOG.info("----------------- GISAID UPLOAD -------------------")

gisaid_api = GisaidAPI(config=context)
gisaid_api = GisaidAPI(context)

gisaid_api.upload(case_id=case_id)
gisaid_api.upload_to_gisaid(case_id)
LOG.info("Upload to GISAID successful")
1 change: 1 addition & 0 deletions cg/constants/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ class FileExtensions(StrEnum):
CONFIG: str = ".config"
CRAM: str = ".cram"
CSV: str = ".csv"
FASTA: str = ".fasta"
FASTQ: str = ".fastq"
FASTQ_GZ: str = ".fastq.gz"
GPG: str = ".gpg"
Expand Down
7 changes: 7 additions & 0 deletions cg/constants/housekeeper_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,3 +250,10 @@ class JanusTags:
class FohmTag(StrEnum):
COMPLEMENTARY = "komplettering"
PANGOLIN_TYPING = "pangolin-typing-fohm"


class GisaidTag(StrEnum):
FASTA = "gisaid-fasta"
CONSENSUS_SAMPLE = "consensus-sample"
CSV = "gisaid-csv"
LOG = "gisaid-log"
337 changes: 199 additions & 138 deletions cg/meta/upload/gisaid/gisaid.py

Large diffs are not rendered by default.

Empty file added cg/models/gisaid/__init__.py
Empty file.
11 changes: 11 additions & 0 deletions cg/models/gisaid/reports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""GISAID report models."""

from pydantic import BaseModel, Field


class GisaidComplementaryReport(BaseModel):
"""Model for validating a GSAID complementary reports."""

gisaid_accession: str = Field(None, alias="GISAID_accession")
sample_number: str = Field(str, alias="provnummer")
selection_criteria: str = Field(str, alias="urvalskriterium")
Comment on lines +10 to +11
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the str in the default position might cause a bug

133 changes: 1 addition & 132 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ marshmallow = "*"
MarkupSafe = "*"
openpyxl = "*"
packaging = "*"
pandas = "*"
paramiko = "*"
petname = "*"
psutil = "*"
Expand Down
2 changes: 2 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@
"tests.fixture_plugins.encryption_fixtures.encryption_fixtures",
"tests.fixture_plugins.fohm.fohm_fixtures",
"tests.fixture_plugins.io.csv_fixtures",
"tests.fixture_plugins.io.csv_fixtures",
"tests.fixture_plugins.gisaid_fixtures.gisaid_fixtures",
"tests.fixture_plugins.illumina_clean_fixtures.clean_fixtures",
"tests.fixture_plugins.loqusdb_fixtures.loqusdb_api_fixtures",
"tests.fixture_plugins.loqusdb_fixtures.loqusdb_output_fixtures",
Expand Down
47 changes: 47 additions & 0 deletions tests/fixture_plugins/gisaid_fixtures/gisaid_fixtures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import pytest

from cg.meta.upload.gisaid import GisaidAPI
from cg.models.cg_config import CGConfig
from cg.models.gisaid.reports import GisaidComplementaryReport


@pytest.fixture
def gisaid_complementary_report_raw() -> dict[str, str]:
"""Return a raw GISAID complementary report."""
return {
"provnummer": "a_sample_number",
"urvalskriterium": "a_selection_criteria",
}


@pytest.fixture
def gisaid_complementary_report(
gisaid_complementary_report_raw: list[dict],
) -> GisaidComplementaryReport:
"""Return GisaidComplementaryReport."""
return GisaidComplementaryReport.model_validate(gisaid_complementary_report_raw)


@pytest.fixture
def gisaid_complementary_reports(
gisaid_complementary_report: GisaidComplementaryReport,
) -> list[GisaidComplementaryReport]:
"""Return GISAID complementary reports."""
report_1 = GisaidComplementaryReport.model_validate(
{"provnummer": "1CS", "urvalskriterium": "criteria", "GISAID_accession": "an_accession"}
)
complementary_report = GisaidComplementaryReport.model_validate(
{
"urvalskriterium": "criteria",
"provnummer": "44CS000000",
}
)
return [gisaid_complementary_report, complementary_report, report_1]


@pytest.fixture
def gisaid_api(
cg_context: CGConfig,
) -> GisaidAPI:
"""GISAID API fixture."""
return GisaidAPI(cg_context)
82 changes: 82 additions & 0 deletions tests/meta/upload/gisaid/test_gsaid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from pathlib import Path

from cg.meta.upload.gisaid import GisaidAPI
from cg.models.gisaid.reports import GisaidComplementaryReport


def test_get_complementary_report_content(gisaid_api: GisaidAPI, csv_file_path: Path):
# GIVEN a list of CSV files

# WHEN creating the report content
content: list[dict] = gisaid_api.get_complementary_report_content(csv_file_path)

# THEN each file is a list of dicts where each dict represents a row in a CSV file
assert isinstance(content[0], dict)

# THEN the file is added as a list of dicts
assert len(content) == 3


def test_validate_gisaid_complementary_reports(
gisaid_api: GisaidAPI, gisaid_complementary_report_raw: dict[str, str]
):
# GIVEN a dict
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# GIVEN a dict
# GIVEN a valid GISAID complementary report as a dict


# WHEN validating the dict
content: list[GisaidComplementaryReport] = gisaid_api.validate_gisaid_complementary_reports(
[gisaid_complementary_report_raw]
)

# THEN a list of reports is returned
assert isinstance(content[0], GisaidComplementaryReport)


def test_get_sars_cov_complementary_reports(
gisaid_complementary_reports: list[GisaidComplementaryReport], gisaid_api: GisaidAPI
):
# GIVEN a list of reports

# WHEN getting Sars-cov reports from reports
content: list[GisaidComplementaryReport] = gisaid_api.get_sars_cov_complementary_reports(
gisaid_complementary_reports
)

# THEN a list of reports is returned
assert isinstance(content[0], GisaidComplementaryReport)

# THEN only the report for Sars-cov2 reports remains
assert len(content) == 1
assert content[0].sample_number == "44CS000000"


def test_get_complementary_report_sample_number(
gisaid_complementary_reports: list[GisaidComplementaryReport], gisaid_api: GisaidAPI
):
# GIVEN a list of reports

# WHEN getting the sample numbers in the reports
sample_numbers: set[str] = gisaid_api.get_complementary_report_sample_number(
gisaid_complementary_reports
)

# THEN return sample numbers from reports
for report in gisaid_complementary_reports:
assert report.sample_number in sample_numbers


def test_add_gisaid_accession_to_reports(
gisaid_complementary_reports: list[GisaidComplementaryReport], gisaid_api: GisaidAPI
):
"""Test adding gisaid accession to the reports."""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we stick to using capitals for GISAID?

Suggested change
"""Test adding gisaid accession to the reports."""
"""Test adding GISAID accession to the reports."""

# GIVEN a GISAID API

# GIVEN a list of reports

# WHEN adding GISAID accession to reports
gisaid_api.add_gisaid_accession_to_complementary_reports(
gisaid_accession={gisaid_complementary_reports[0].sample_number: "a_gisaid_accession"},
reports=[gisaid_complementary_reports[0]],
)

# THEN a GISAID accession has been added
assert isinstance(gisaid_complementary_reports[0].gisaid_accession, str)
Loading
Loading