Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TST: add tests for MetaCoAG #13

Merged
merged 4 commits into from
Sep 16, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 151 additions & 4 deletions tests/test_cli_metacoag.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
import os
import pathlib
import pytest

import pytest
from click.testing import CliRunner

from gbintk.cli import metacoag


__author__ = "Vijini Mallawaarachchi"
__credits__ = ["Vijini Mallawaarachchi"]


DATADIR = pathlib.Path(__file__).parent / "data"

@pytest.fixture(scope="session")

@pytest.fixture(scope="function")
def tmp_dir(tmpdir_factory):
return tmpdir_factory.mktemp("tmp")

Expand All @@ -29,12 +30,158 @@ def runner():
return CliRunner()


def test_metacoag_run(runner, tmp_dir):
def get_files_and_seq_counts(output_path):
output_files = os.listdir(output_path)
seq_counts = []
for file in output_files:
seq_count = 0
with open(f"{output_path}/{file}", "r") as myfile:
for line in myfile:
if line.strip().startswith(">"):
seq_count += 1
seq_counts.append(seq_count)

seq_counts.sort()

return len(output_files), seq_counts


@pytest.fixture(scope="function")
def test_metacoag_spades_run(runner, tmp_dir):
outpath = tmp_dir
graph = DATADIR / "5G_metaSPAdes" / "assembly_graph_with_scaffolds.gfa"
contigs = DATADIR / "5G_metaSPAdes" / "contigs.fasta"
paths = DATADIR / "5G_metaSPAdes" / "contigs.paths"
abundance = DATADIR / "5G_metaSPAdes" / "coverm_mean_coverage.tsv"
args = f"--assembler spades --graph {graph} --contigs {contigs} --paths {paths} --abundance {abundance} --output {outpath}".split()
r = runner.invoke(metacoag, args, catch_exceptions=False)

assert r.exit_code == 0, r.output # Check if the command ran successfully

n_bins, seq_counts = get_files_and_seq_counts(outpath / "bins")
return n_bins, seq_counts


@pytest.fixture(scope="function")
def test_metacoag_megahit_run(runner, tmp_dir):
outpath = tmp_dir
graph = DATADIR / "5G_MEGAHIT" / "final.gfa"
contigs = DATADIR / "5G_MEGAHIT" / "final.contigs.fa"
abundance = DATADIR / "5G_MEGAHIT" / "abundance.tsv"
args = f"--assembler megahit --graph {graph} --contigs {contigs} --abundance {abundance} --output {outpath}".split()
r = runner.invoke(metacoag, args, catch_exceptions=False)

assert r.exit_code == 0, r.output

n_bins, seq_counts = get_files_and_seq_counts(outpath / "bins")
return n_bins, seq_counts


@pytest.fixture(scope="function")
def test_metacoag_flye_run(tmp_dir, runner):
outpath = tmp_dir
graph = DATADIR / "1Y3B_Flye" / "assembly_graph.gfa"
contigs = DATADIR / "1Y3B_Flye" / "assembly.fasta"
paths = DATADIR / "1Y3B_Flye" / "assembly_info.txt"
abundance = DATADIR / "1Y3B_Flye" / "abundance.tsv"
args = f"--assembler flye --graph {graph} --contigs {contigs} --paths {paths} --abundance {abundance} --output {outpath}".split()
r = runner.invoke(metacoag, args, catch_exceptions=False)

assert r.exit_code == 0, r.output

n_bins, seq_counts = get_files_and_seq_counts(outpath / "bins")
return n_bins, seq_counts


def test_n_bins_metacoag_spades(test_metacoag_spades_run):
n_bins, seq_counts = test_metacoag_spades_run

# Assert number of bins
assert n_bins == 5

# Assert bin sizes
assert seq_counts == [10, 23, 48, 69, 78]


@pytest.mark.filterwarnings("ignore::RuntimeWarning")
def test_n_bins_metacoag_megahit(test_metacoag_megahit_run):
n_bins, seq_counts = test_metacoag_megahit_run

# Assert number of bins
assert n_bins == 5

# Assert bin sizes
assert seq_counts == [36, 40, 46, 84, 127]


@pytest.mark.filterwarnings("ignore::RuntimeWarning")
def test_n_bins_metacoag_flye(test_metacoag_flye_run):
n_bins, seq_counts = test_metacoag_flye_run

# Assert number of bins
assert n_bins == 3

# Assert bin sizes
assert seq_counts == [1, 1, 1]


@pytest.mark.parametrize(
"graph, contigs, paths, abundance",
[
(
DATADIR / "invlid_path" / "assembly_graph_with_scaffolds.gfa",
DATADIR / "5G_metaSPAdes" / "contigs.fasta",
DATADIR / "5G_metaSPAdes" / "contigs.paths",
DATADIR / "5G_metaSPAdes" / "coverm_mean_coverage.tsv",
),
],
)
def test_metacoag_spades_invalid_run(runner, tmp_dir, graph, contigs, paths, abundance):
outpath = tmp_dir
args = f"--assembler spades --graph {graph} --contigs {contigs} --paths {paths} --abundance {abundance} --output {outpath}".split()
r = runner.invoke(metacoag, args, catch_exceptions=False)
assert r.exit_code != 0
assert "Error" in r.output and "Path" in r.output # Check for error messages


@pytest.mark.parametrize(
"graph, contigs, abundance",
[
(
DATADIR / "invalid_path" / "final.gfa",
DATADIR / "5G_MEGAHIT" / "final.contigs.fa",
DATADIR / "5G_MEGAHIT" / "abundance.tsv",
),
],
)
def test_metacoag_megahit_invalid_run(
runner, tmp_path_factory, graph, contigs, abundance
):
outpath = tmp_path_factory.mktemp("tmp")
args = f"--assembler megahit --graph {graph} --contigs {contigs} --abundance {abundance} --output {outpath}".split()
r = runner.invoke(metacoag, args, catch_exceptions=True)

assert r.exit_code != 0
assert "Error" in r.output and "Path" in r.output


@pytest.mark.parametrize(
"graph, contigs, paths, abundance",
[
(
DATADIR / "invalid_path" / "assembly_graph.gfa",
DATADIR / "1Y3B_Flye" / "assembly.fasta",
DATADIR / "1Y3B_Flye" / "assembly_info.txt",
DATADIR / "1Y3B_Flye" / "abundance.tsv",
),
],
)
def test_metacoag_flye_invalid_run(
runner, tmp_path_factory, graph, contigs, paths, abundance
):
outpath = tmp_path_factory.mktemp("tmp")
args = f"--assembler flye --graph {graph} --contigs {contigs} --paths {paths} --abundance {abundance} --output {outpath}".split()
r = runner.invoke(metacoag, args, catch_exceptions=True)

assert r.exit_code != 0
assert "Error" in r.output and "Path" in r.output
Loading