From 4e6f882ed9793b57d76c97a4b07fee4e2271724a Mon Sep 17 00:00:00 2001 From: Keisuke Oshima Date: Tue, 14 May 2024 21:21:04 -0400 Subject: [PATCH] Refactored test dir. Added length subcommand. --- .gitattributes | 29 +-- README.md | 10 +- censtats/length/__init__.py | 0 censtats/length/cli.py | 213 ++++++++++++++++++ censtats/length/constants.py | 9 + censtats/main.py | 6 +- test/helpers/__init__.py | 0 test/helpers/integration.py | 17 ++ test/length/__init__.py | 0 test/length/expected/AS-HOR_chm13_lengths.bed | 26 +++ .../AS-HOR-vs-chm13_cens_v18.stv_row.all.bed | 3 + test/length/test_integration.py | 27 +++ test/status/__init__.py | 0 .../expected/correct_chr21_cens.tsv | 0 .../correct_chr21_cens_false_neg_mismap.tsv | 0 .../correct_chr21_chr13_cens_mismap.tsv | 0 .../expected/correct_chr22_cens.tsv | 0 .../expected/correct_chr4_cens_partials.tsv | 0 .../expected/correct_chr9_cens_partials.tsv | 0 ...hm13_chm1_cens_v21.trimmed.fa.noheader.out | 0 ..._chm1_cens_v21.trimmed.fa.noheader.out.png | 0 test/{ => status}/input/chr21_cens.fa.out | 0 test/{ => status}/input/chr21_cens.png | 0 .../input/chr21_cens_false_neg_mismap.fa.out | 0 .../input/chr21_cens_false_neg_mismap.png | 0 .../input/chr21_chr13_cens_mismap.fa.out | 0 .../input/chr21_chr13_cens_mismap.png | 0 test/{ => status}/input/chr22_cens.fa.out | 0 test/{ => status}/input/chr22_cens.png | 0 .../input/chr4_cens_partials.fa.out | 0 .../{ => status}/input/chr4_cens_partials.png | 0 .../input/chr9_cens_partials.fa.out | 0 .../{ => status}/input/chr9_cens_partials.png | 0 test/{ => status}/plot_inputs.sh | 0 .../plot_repeatStructure_onlyRM.R | 0 test/status/test_integration.py | 54 +++++ test/test_integration.py | 57 ----- 37 files changed, 375 insertions(+), 76 deletions(-) create mode 100644 censtats/length/__init__.py create mode 100644 censtats/length/cli.py create mode 100644 censtats/length/constants.py create mode 100644 test/helpers/__init__.py create mode 100644 test/helpers/integration.py create mode 100644 test/length/__init__.py create mode 100644 test/length/expected/AS-HOR_chm13_lengths.bed create mode 100644 test/length/input/AS-HOR-vs-chm13_cens_v18.stv_row.all.bed create mode 100644 test/length/test_integration.py create mode 100644 test/status/__init__.py rename test/{ => status}/expected/correct_chr21_cens.tsv (100%) rename test/{ => status}/expected/correct_chr21_cens_false_neg_mismap.tsv (100%) rename test/{ => status}/expected/correct_chr21_chr13_cens_mismap.tsv (100%) rename test/{ => status}/expected/correct_chr22_cens.tsv (100%) rename test/{ => status}/expected/correct_chr4_cens_partials.tsv (100%) rename test/{ => status}/expected/correct_chr9_cens_partials.tsv (100%) rename test/{ => status}/input/chm13_chm1_cens_v21.trimmed.fa.noheader.out (100%) rename test/{ => status}/input/chm13_chm1_cens_v21.trimmed.fa.noheader.out.png (100%) rename test/{ => status}/input/chr21_cens.fa.out (100%) rename test/{ => status}/input/chr21_cens.png (100%) rename test/{ => status}/input/chr21_cens_false_neg_mismap.fa.out (100%) rename test/{ => status}/input/chr21_cens_false_neg_mismap.png (100%) rename test/{ => status}/input/chr21_chr13_cens_mismap.fa.out (100%) rename test/{ => status}/input/chr21_chr13_cens_mismap.png (100%) rename test/{ => status}/input/chr22_cens.fa.out (100%) rename test/{ => status}/input/chr22_cens.png (100%) rename test/{ => status}/input/chr4_cens_partials.fa.out (100%) rename test/{ => status}/input/chr4_cens_partials.png (100%) rename test/{ => status}/input/chr9_cens_partials.fa.out (100%) rename test/{ => status}/input/chr9_cens_partials.png (100%) rename test/{ => status}/plot_inputs.sh (100%) rename test/{ => status}/plot_repeatStructure_onlyRM.R (100%) create mode 100644 test/status/test_integration.py delete mode 100644 test/test_integration.py diff --git a/.gitattributes b/.gitattributes index 3b7d158..78223f2 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,14 +1,15 @@ -test/input/chr9_cens_partials.fa.out filter=lfs diff=lfs merge=lfs -text -test/input/chm13_chm1_cens_v21.trimmed.fa.noheader.out filter=lfs diff=lfs merge=lfs -text -test/input/chr21_cens.fa.out filter=lfs diff=lfs merge=lfs -text -test/input/chr21_chr13_cens_mismap.fa.out filter=lfs diff=lfs merge=lfs -text -test/input/chr22_cens.fa.out filter=lfs diff=lfs merge=lfs -text -test/input/chr4_cens_partials.fa.out filter=lfs diff=lfs merge=lfs -text -test/input/chr4_cens_partials.png filter=lfs diff=lfs merge=lfs -text -test/input/chr9_cens_partials.png filter=lfs diff=lfs merge=lfs -text -test/input/chm13_chm1_cens_v21.trimmed.fa.noheader.out.png filter=lfs diff=lfs merge=lfs -text -test/input/chr21_cens_false_neg_mismap.png filter=lfs diff=lfs merge=lfs -text -test/input/chr21_cens_false_neg_mismap.fa.out filter=lfs diff=lfs merge=lfs -text -test/input/chr21_cens.png filter=lfs diff=lfs merge=lfs -text -test/input/chr21_chr13_cens_mismap.png filter=lfs diff=lfs merge=lfs -text -test/input/chr22_cens.png filter=lfs diff=lfs merge=lfs -text +test/length/input/AS-HOR-vs-chm13_cens_v18.stv_row.all.bed filter=lfs diff=lfs merge=lfs -text +test/status/input/chr9_cens_partials.png filter=lfs diff=lfs merge=lfs -text +test/status/input/chr21_cens_false_neg_mismap.fa.out filter=lfs diff=lfs merge=lfs -text +test/status/input/chr21_chr13_cens_mismap.fa.out filter=lfs diff=lfs merge=lfs -text +test/status/input/chr22_cens.png filter=lfs diff=lfs merge=lfs -text +test/status/input/chr4_cens_partials.fa.out filter=lfs diff=lfs merge=lfs -text +test/status/input/chr4_cens_partials.png filter=lfs diff=lfs merge=lfs -text +test/status/input/chr21_cens_false_neg_mismap.png filter=lfs diff=lfs merge=lfs -text +test/status/input/chr21_cens.fa.out filter=lfs diff=lfs merge=lfs -text +test/status/input/chr22_cens.fa.out filter=lfs diff=lfs merge=lfs -text +test/status/input/chr21_chr13_cens_mismap.png filter=lfs diff=lfs merge=lfs -text +test/status/input/chr9_cens_partials.fa.out filter=lfs diff=lfs merge=lfs -text +test/status/input/chm13_chm1_cens_v21.trimmed.fa.noheader.out filter=lfs diff=lfs merge=lfs -text +test/status/input/chm13_chm1_cens_v21.trimmed.fa.noheader.out.png filter=lfs diff=lfs merge=lfs -text +test/status/input/chr21_cens.png filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index 5a47f6e..551c568 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,8 @@ Centromere statistics toolkit. * `status` * Determine the status of centromeric contigs based on [`RepeatMasker`](https://www.repeatmasker.org/) annotations. +* `length` + * Estimate HOR array length from [`stv`](https://github.com/fedorrik/stv) bed file and [`HumAS-HMMER`](https://github.com/fedorrik/HumAS-HMMER_for_AnVIL) output. ### Setup ```bash @@ -14,15 +16,15 @@ pip install censtats ### Usage ```bash -usage: censtats [-h] {status} ... +usage: censtats [-h] {status,length} ... -Centromere statistics tool kit. +Centromere statistics toolkit. positional arguments: - {status} + {status,length} options: - -h, --help show this help message and exit + -h, --help show this help message and exit ``` Read the docs [here](https://github.com/logsdon-lab/CenStats/wiki/Usage). diff --git a/censtats/length/__init__.py b/censtats/length/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/censtats/length/cli.py b/censtats/length/cli.py new file mode 100644 index 0000000..481a5e0 --- /dev/null +++ b/censtats/length/cli.py @@ -0,0 +1,213 @@ +import re +import sys +import argparse +import polars as pl + +from typing import TYPE_CHECKING, Any + +from .constants import ( + RGX_CHR, + DEF_BP_JUMP_LEN_THR, + DEF_ARR_LEN_THR, + HOR_BP_LEN, + DEF_EXP_STV_ROW_BED_COLS, + DEF_OUTPUT_BED_COLS, +) + + +if TYPE_CHECKING: + SubArgumentParser = argparse._SubParsersAction[argparse.ArgumentParser] +else: + SubArgumentParser = Any + + +def add_hor_length_cli(parser: SubArgumentParser) -> None: + ap = parser.add_parser( + "length", + description="Estimate HOR array length from stv bed file / HumAS-HMMER output.", + ) + ap.add_argument( + "-i", + "--input", + help=f"Input stv row bed file produced by HumAS-HMMER and stv. Expects columns: {DEF_EXP_STV_ROW_BED_COLS}", + type=str, + ) + ap.add_argument( + "-o", + "--output", + help=f"Output bed file with columns: {DEF_OUTPUT_BED_COLS}", + default=sys.stdout, + type=argparse.FileType("wt"), + ) + ap.add_argument( + "--bp_jump_thr", + help="Base pair jump threshold to group by", + type=int, + default=DEF_BP_JUMP_LEN_THR, + ) + ap.add_argument( + "--arr_len_thr", + help="Length threshold to filter out.", + type=int, + default=DEF_ARR_LEN_THR, + ) + return None + + +def calculate_hor_length( + infile: str, bp_jump_thr: int, arr_len_thr: int, output: str +) -> int: + """ + Calculate HOR array length from HumAS-HMMER structural variation row output. + + ### Parameters + `infile` + Input bed file made from HumAS-HMMER output. + Expects the following columns: `{chr, start, stop, hor, 0, strand, ...}`. + `bp_jump_thr` + Base pair jump threshold to group by. + `arr_len_thr` + Length threshold of HOR array to filter out. + `output` + Output bed file with HOR array lengths. + Columns: `{chr_name, start_pos, stop_pos, len}`. + + ### Returns + 0 if successful. + """ + df = pl.read_csv( + infile, + separator="\t", + columns=[0, 1, 2, 3, 4, 5], + new_columns=DEF_EXP_STV_ROW_BED_COLS, + has_header=False, + ) + + dfs = [] + for ctg_name, df_chr in df.group_by(["chr"], maintain_order=True): + df_chr = df_chr.with_columns(len=pl.col("stop") - pl.col("start")).with_columns( + mer=(pl.col("len") / HOR_BP_LEN).round() + ) + ctg_name = ctg_name[0] + mtch_chr_name = re.search(RGX_CHR, ctg_name) + if mtch_chr_name is None: + continue + + chr_name = mtch_chr_name.group(0) + df_live_hor = df_chr.filter(pl.col("hor").str.contains("L")) + + # Specific edge case for chr8. + if chr_name == "chr8" or chr_name == "chr10" or chr_name == "chr16": + bp_jump_thr = 10_000 + elif chr_name == "chrY": + bp_jump_thr = 2_000 + else: + bp_jump_thr = bp_jump_thr + + df_bp_jumps = df_live_hor.with_columns( + diff=pl.col("start") - pl.col("stop").shift(1) + ).filter(pl.col("diff") > bp_jump_thr) + + if df_bp_jumps.is_empty(): + adj_start = df_live_hor.get_column("start").min() + adj_stop = df_live_hor.get_column("stop").max() + adj_len = adj_stop - adj_start + + if adj_len < arr_len_thr: + continue + + dfs.append( + pl.DataFrame( + { + "chr_name": ctg_name, + "start_pos": adj_start, + "stop_pos": adj_stop, + "len": adj_len, + } + ) + ) + continue + + starts, stops = [], [] + for i, row in enumerate(df_bp_jumps.iter_rows()): + prev_row = pl.DataFrame() if i == 0 else df_bp_jumps.slice(i - 1) + next_row = df_bp_jumps.slice(i + 1) + + if prev_row.is_empty(): + starts.append(df_chr.get_column("start").min()) + stops.append( + df_chr.filter(pl.col("start") < row[1]).row(-1, named=True)["stop"] + ) + + if next_row.is_empty(): + starts.append(row[1]) + stops.append(df_chr.get_column("stop").max()) + else: + starts.append(row[1]) + stops.append( + df_chr.filter( + pl.col("start") < next_row.get_column("start")[0] + ).row(-1, named=True)["stop"] + ) + + lens = [] + chr_mer_filter = None + if chr_name == "chr10" or chr_name == "chr20": + chr_mer_filter = pl.col("mer") >= 5 + elif chr_name == "chrY": + chr_mer_filter = pl.col("mer") >= 30 + elif chr_name == "chr17": + chr_mer_filter = pl.col("mer") >= 4 + + for start, stop in zip(starts, stops): + df_slice = ( + df_chr.filter(pl.col("start") >= start, pl.col("stop") <= stop) + .with_columns(bp_jump=pl.col("start") - pl.col("stop").shift(1)) + .fill_null(0) + ) + # Filter out mers based on chr. + if chr_mer_filter is not None: + df_slice = df_slice.filter(chr_mer_filter) + + if df_slice.is_empty(): + lens.append(0) + continue + df_slice_dst = ( + # df_slice.with_columns(len=pl.col("stop") - pl.col("start")).get_column("len").sum() + df_slice.get_column("stop").max() - df_slice.get_column("start").min() + ) + lens.append(df_slice_dst) + + lf = pl.LazyFrame( + { + "chr_name": ctg_name, + "start_pos": starts, + "stop_pos": stops, + "len": lens, + } + ) + if ( + chr_name == "chr8" + or chr_name == "chr10" + or chr_name == "chr17" + or chr_name == "chrY" + ): + arr_len_thr = 100_000 + else: + arr_len_thr = arr_len_thr + + dfs.append(lf.filter(pl.col("len") > arr_len_thr).collect()) + + df_all_dsts: pl.DataFrame = pl.concat(dfs) + ( + df_all_dsts.with_columns( + sort_idx=pl.col("chr_name") + .str.extract("chr([0-9XY]+)") + .replace({"X": "23", "Y": "24"}) + .cast(pl.Int32) + ) + .sort(by="sort_idx") + .select(DEF_OUTPUT_BED_COLS) + .write_csv(output, include_header=False, separator="\t") + ) + return 0 diff --git a/censtats/length/constants.py b/censtats/length/constants.py new file mode 100644 index 0000000..54c392a --- /dev/null +++ b/censtats/length/constants.py @@ -0,0 +1,9 @@ +import re + + +RGX_CHR = re.compile(r"chr[0-9XY]{1,2}") +HOR_BP_LEN = 170 +DEF_ARR_LEN_THR = 30_000 +DEF_BP_JUMP_LEN_THR = 100_000 +DEF_EXP_STV_ROW_BED_COLS = ["chr", "start", "stop", "hor", "other", "strand"] +DEF_OUTPUT_BED_COLS = ["chr_name", "start_pos", "stop_pos", "len"] diff --git a/censtats/main.py b/censtats/main.py index 2a90082..057424c 100644 --- a/censtats/main.py +++ b/censtats/main.py @@ -2,6 +2,7 @@ from typing import Any, TYPE_CHECKING from .status.cli import add_status_cli, check_cens_status +from .length.cli import add_hor_length_cli, calculate_hor_length if TYPE_CHECKING: SubArgumentParser = argparse._SubParsersAction[argparse.ArgumentParser] @@ -13,6 +14,7 @@ def main() -> int: ap = argparse.ArgumentParser(description="Centromere statistics toolkit.") sub_ap = ap.add_subparsers(dest="cmd") add_status_cli(sub_ap) + add_hor_length_cli(sub_ap) args = ap.parse_args() @@ -30,7 +32,9 @@ def main() -> int: restrict_14_22=args.restrict_14_22, ) elif args.cmd == "length": - raise NotImplementedError("Length command not implemented.") + return calculate_hor_length( + args.input, args.bp_jump_thr, args.arr_len_thr, args.output + ) else: raise ValueError(f"Unknown command: {args.cmd}") diff --git a/test/helpers/__init__.py b/test/helpers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/helpers/integration.py b/test/helpers/integration.py new file mode 100644 index 0000000..cdbd2be --- /dev/null +++ b/test/helpers/integration.py @@ -0,0 +1,17 @@ +import subprocess + + +def run_integration_test(*args: str, expected_output: str) -> None: + process = subprocess.run( + [*args], + capture_output=True, + check=True, + ) + res = sorted( + line.strip().split("\t") for line in process.stdout.decode().split("\n") if line + ) + with open(expected_output, "rt") as exp_res_fh: + exp_res = sorted( + line.strip().split("\t") for line in exp_res_fh.readlines() if line + ) + assert res == exp_res diff --git a/test/length/__init__.py b/test/length/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/length/expected/AS-HOR_chm13_lengths.bed b/test/length/expected/AS-HOR_chm13_lengths.bed new file mode 100644 index 0000000..e711c07 --- /dev/null +++ b/test/length/expected/AS-HOR_chm13_lengths.bed @@ -0,0 +1,26 @@ +chr1:121100001-127300000 121796049 126300488 4504439 +chr2:91800001-95600000 92316506 94672686 2356180 +chr3:89850001-97000000 91735802 92595822 860020 +chr3:89850001-97000000 95863533 96414943 551410 +chr4:49200001-55800000 49705154 50433573 728419 +chr4:49200001-55800000 52115487 54870511 2755024 +chr4:49200001-55800000 54980290 55199795 219505 +chr5:45650001-51600000 47077202 49596627 2519425 +chr6:57750001-63100000 58286707 61058391 2771684 +chr7:57650001-64700000 60414373 63714498 3300125 +chr8:43350001-47450000 44243546 46325081 2081535 +chr9:44200001-48100000 44952797 47585318 2632521 +chr10:38500001-42550000 39631089 41664590 2030117 +chr11:48300001-55700000 51023872 54413487 3389615 +chr12:33800001-38500000 34620840 37203843 2583003 +chr13:10650001-18100000 15547595 17509783 1962188 +chr14:5600001-13300000 10089210 12708411 2619201 +chr15:13500001-18250000 16677773 17694129 1016356 +chr16:32400001-38950000 35848276 37829524 1981248 +chr17:22850001-28650000 23892422 27488300 3595878 +chr18:15050001-21650000 15966042 20933551 4967509 +chr19:23850001-30750000 25808855 29768170 3959315 +chr20:25800001-32500000 26925855 29099656 2173801 +chr21:7700001-11850000 10962852 11306205 343353 +chr22:8000001-17400000 12786986 15711065 2924079 +chrX:56950001-61750000 57818585 60927025 3108440 diff --git a/test/length/input/AS-HOR-vs-chm13_cens_v18.stv_row.all.bed b/test/length/input/AS-HOR-vs-chm13_cens_v18.stv_row.all.bed new file mode 100644 index 0000000..5dd06f6 --- /dev/null +++ b/test/length/input/AS-HOR-vs-chm13_cens_v18.stv_row.all.bed @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4cc0df305d8f6f7081e7201fb9c320eff10e10116baec59e8084d68eec6c5e2 +size 5237795 diff --git a/test/length/test_integration.py b/test/length/test_integration.py new file mode 100644 index 0000000..76808ef --- /dev/null +++ b/test/length/test_integration.py @@ -0,0 +1,27 @@ +import pytest + +from test.helpers.integration import run_integration_test + + +@pytest.mark.parametrize( + ["input_stv_row_bed", "expected_hor_len_bed"], + [ + ( + "test/length/input/AS-HOR-vs-chm13_cens_v18.stv_row.all.bed", + "test/length/expected/AS-HOR_chm13_lengths.bed", + ), + ], +) +def test_check_cens_status( + input_stv_row_bed: str, + expected_hor_len_bed: str, +): + run_integration_test( + "python", + "-m", + "censtats.main", + "length", + "-i", + input_stv_row_bed, + expected_output=expected_hor_len_bed, + ) diff --git a/test/status/__init__.py b/test/status/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/expected/correct_chr21_cens.tsv b/test/status/expected/correct_chr21_cens.tsv similarity index 100% rename from test/expected/correct_chr21_cens.tsv rename to test/status/expected/correct_chr21_cens.tsv diff --git a/test/expected/correct_chr21_cens_false_neg_mismap.tsv b/test/status/expected/correct_chr21_cens_false_neg_mismap.tsv similarity index 100% rename from test/expected/correct_chr21_cens_false_neg_mismap.tsv rename to test/status/expected/correct_chr21_cens_false_neg_mismap.tsv diff --git a/test/expected/correct_chr21_chr13_cens_mismap.tsv b/test/status/expected/correct_chr21_chr13_cens_mismap.tsv similarity index 100% rename from test/expected/correct_chr21_chr13_cens_mismap.tsv rename to test/status/expected/correct_chr21_chr13_cens_mismap.tsv diff --git a/test/expected/correct_chr22_cens.tsv b/test/status/expected/correct_chr22_cens.tsv similarity index 100% rename from test/expected/correct_chr22_cens.tsv rename to test/status/expected/correct_chr22_cens.tsv diff --git a/test/expected/correct_chr4_cens_partials.tsv b/test/status/expected/correct_chr4_cens_partials.tsv similarity index 100% rename from test/expected/correct_chr4_cens_partials.tsv rename to test/status/expected/correct_chr4_cens_partials.tsv diff --git a/test/expected/correct_chr9_cens_partials.tsv b/test/status/expected/correct_chr9_cens_partials.tsv similarity index 100% rename from test/expected/correct_chr9_cens_partials.tsv rename to test/status/expected/correct_chr9_cens_partials.tsv diff --git a/test/input/chm13_chm1_cens_v21.trimmed.fa.noheader.out b/test/status/input/chm13_chm1_cens_v21.trimmed.fa.noheader.out similarity index 100% rename from test/input/chm13_chm1_cens_v21.trimmed.fa.noheader.out rename to test/status/input/chm13_chm1_cens_v21.trimmed.fa.noheader.out diff --git a/test/input/chm13_chm1_cens_v21.trimmed.fa.noheader.out.png b/test/status/input/chm13_chm1_cens_v21.trimmed.fa.noheader.out.png similarity index 100% rename from test/input/chm13_chm1_cens_v21.trimmed.fa.noheader.out.png rename to test/status/input/chm13_chm1_cens_v21.trimmed.fa.noheader.out.png diff --git a/test/input/chr21_cens.fa.out b/test/status/input/chr21_cens.fa.out similarity index 100% rename from test/input/chr21_cens.fa.out rename to test/status/input/chr21_cens.fa.out diff --git a/test/input/chr21_cens.png b/test/status/input/chr21_cens.png similarity index 100% rename from test/input/chr21_cens.png rename to test/status/input/chr21_cens.png diff --git a/test/input/chr21_cens_false_neg_mismap.fa.out b/test/status/input/chr21_cens_false_neg_mismap.fa.out similarity index 100% rename from test/input/chr21_cens_false_neg_mismap.fa.out rename to test/status/input/chr21_cens_false_neg_mismap.fa.out diff --git a/test/input/chr21_cens_false_neg_mismap.png b/test/status/input/chr21_cens_false_neg_mismap.png similarity index 100% rename from test/input/chr21_cens_false_neg_mismap.png rename to test/status/input/chr21_cens_false_neg_mismap.png diff --git a/test/input/chr21_chr13_cens_mismap.fa.out b/test/status/input/chr21_chr13_cens_mismap.fa.out similarity index 100% rename from test/input/chr21_chr13_cens_mismap.fa.out rename to test/status/input/chr21_chr13_cens_mismap.fa.out diff --git a/test/input/chr21_chr13_cens_mismap.png b/test/status/input/chr21_chr13_cens_mismap.png similarity index 100% rename from test/input/chr21_chr13_cens_mismap.png rename to test/status/input/chr21_chr13_cens_mismap.png diff --git a/test/input/chr22_cens.fa.out b/test/status/input/chr22_cens.fa.out similarity index 100% rename from test/input/chr22_cens.fa.out rename to test/status/input/chr22_cens.fa.out diff --git a/test/input/chr22_cens.png b/test/status/input/chr22_cens.png similarity index 100% rename from test/input/chr22_cens.png rename to test/status/input/chr22_cens.png diff --git a/test/input/chr4_cens_partials.fa.out b/test/status/input/chr4_cens_partials.fa.out similarity index 100% rename from test/input/chr4_cens_partials.fa.out rename to test/status/input/chr4_cens_partials.fa.out diff --git a/test/input/chr4_cens_partials.png b/test/status/input/chr4_cens_partials.png similarity index 100% rename from test/input/chr4_cens_partials.png rename to test/status/input/chr4_cens_partials.png diff --git a/test/input/chr9_cens_partials.fa.out b/test/status/input/chr9_cens_partials.fa.out similarity index 100% rename from test/input/chr9_cens_partials.fa.out rename to test/status/input/chr9_cens_partials.fa.out diff --git a/test/input/chr9_cens_partials.png b/test/status/input/chr9_cens_partials.png similarity index 100% rename from test/input/chr9_cens_partials.png rename to test/status/input/chr9_cens_partials.png diff --git a/test/plot_inputs.sh b/test/status/plot_inputs.sh similarity index 100% rename from test/plot_inputs.sh rename to test/status/plot_inputs.sh diff --git a/test/plot_repeatStructure_onlyRM.R b/test/status/plot_repeatStructure_onlyRM.R similarity index 100% rename from test/plot_repeatStructure_onlyRM.R rename to test/status/plot_repeatStructure_onlyRM.R diff --git a/test/status/test_integration.py b/test/status/test_integration.py new file mode 100644 index 0000000..8531a0a --- /dev/null +++ b/test/status/test_integration.py @@ -0,0 +1,54 @@ +import pytest +from test.helpers.integration import run_integration_test + + +@pytest.mark.parametrize( + ["input_rm_out", "expected_rc_list", "additional_args"], + [ + ( + "test/status/input/chr21_cens.fa.out", + "test/status/expected/correct_chr21_cens.tsv", + (), + ), + ( + "test/status/input/chr22_cens.fa.out", + "test/status/expected/correct_chr22_cens.tsv", + (), + ), + ( + "test/status/input/chr21_chr13_cens_mismap.fa.out", + "test/status/expected/correct_chr21_chr13_cens_mismap.tsv", + tuple(["--restrict_13_21"]), + ), + ( + "test/status/input/chr9_cens_partials.fa.out", + "test/status/expected/correct_chr9_cens_partials.tsv", + (), + ), + ( + "test/status/input/chr4_cens_partials.fa.out", + "test/status/expected/correct_chr4_cens_partials.tsv", + (), + ), + ( + "test/status/input/chr21_cens_false_neg_mismap.fa.out", + "test/status/expected/correct_chr21_cens_false_neg_mismap.tsv", + tuple(["--restrict_13_21"]), + ), + ], +) +def test_check_cens_status( + input_rm_out: str, expected_rc_list: str, additional_args: tuple[str] +): + run_integration_test( + "python", + "-m", + "censtats.main", + "status", + "-i", + input_rm_out, + "-r", + "test/status/input/chm13_chm1_cens_v21.trimmed.fa.noheader.out", + *additional_args, + expected_output=expected_rc_list, + ) diff --git a/test/test_integration.py b/test/test_integration.py deleted file mode 100644 index a58bdf8..0000000 --- a/test/test_integration.py +++ /dev/null @@ -1,57 +0,0 @@ -import pytest -import subprocess - - -@pytest.mark.parametrize( - ["input_rm_out", "expected_rc_list", "additional_args"], - [ - ("test/input/chr21_cens.fa.out", "test/expected/correct_chr21_cens.tsv", ()), - ("test/input/chr22_cens.fa.out", "test/expected/correct_chr22_cens.tsv", ()), - ( - "test/input/chr21_chr13_cens_mismap.fa.out", - "test/expected/correct_chr21_chr13_cens_mismap.tsv", - tuple(["--restrict_13_21"]), - ), - ( - "test/input/chr9_cens_partials.fa.out", - "test/expected/correct_chr9_cens_partials.tsv", - (), - ), - ( - "test/input/chr4_cens_partials.fa.out", - "test/expected/correct_chr4_cens_partials.tsv", - (), - ), - ( - "test/input/chr21_cens_false_neg_mismap.fa.out", - "test/expected/correct_chr21_cens_false_neg_mismap.tsv", - tuple(["--restrict_13_21"]), - ), - ], -) -def test_check_cens_status( - input_rm_out: str, expected_rc_list: str, additional_args: tuple[str] -): - process = subprocess.run( - [ - "python", - "-m", - "censtats.main", - "status", - "-i", - input_rm_out, - "-r", - "test/input/chm13_chm1_cens_v21.trimmed.fa.noheader.out", - *additional_args, - ], - capture_output=True, - check=True, - ) - res = sorted( - line.split("\t") for line in process.stdout.decode().split("\n") if line - ) - with open(expected_rc_list, "rt") as exp_res_fh: - exp_res = sorted( - line.strip().split("\t") for line in exp_res_fh.readlines() if line - ) - assert res == exp_res