Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add Sentieon path argument to config #1461

Merged
merged 28 commits into from
Jul 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion BALSAMIC/commands/config/case.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
OPTION_PANEL_BED,
OPTION_PON_CNN,
OPTION_QUALITY_TRIM,
OPTION_SENTIEON_INSTALL_DIR,
OPTION_SENTIEON_LICENSE,
OPTION_SWEGEN_SNV,
OPTION_SWEGEN_SV,
OPTION_TUMOR_SAMPLE_NAME,
Expand All @@ -43,7 +45,11 @@
from BALSAMIC.constants.analysis import BIOINFO_TOOL_ENV, AnalysisWorkflow, Gender
from BALSAMIC.constants.cache import GenomeVersion
from BALSAMIC.constants.constants import FileType
from BALSAMIC.constants.paths import CONTAINERS_DIR
from BALSAMIC.constants.paths import (
CONTAINERS_DIR,
SENTIEON_DNASCOPE_MODEL,
SENTIEON_TNSCOPE_MODEL,
)
from BALSAMIC.constants.workflow_params import VCF_DICT
from BALSAMIC.models.config import ConfigModel
from BALSAMIC.utils.cli import (
Expand Down Expand Up @@ -84,6 +90,8 @@
@OPTION_PANEL_BED
@OPTION_PON_CNN
@OPTION_QUALITY_TRIM
@OPTION_SENTIEON_INSTALL_DIR
@OPTION_SENTIEON_LICENSE
@OPTION_SWEGEN_SNV
@OPTION_SWEGEN_SV
@OPTION_TUMOR_SAMPLE_NAME
Expand Down Expand Up @@ -117,6 +125,8 @@ def case_config(
panel_bed: Path,
pon_cnn: Path,
quality_trim: bool,
sentieon_install_dir: Path,
ivadym marked this conversation as resolved.
Show resolved Hide resolved
sentieon_license: str,
swegen_snv: Path,
swegen_sv: Path,
tumor_sample_name: str,
Expand Down Expand Up @@ -188,6 +198,13 @@ def case_config(
directory.mkdir(exist_ok=True)

config_collection_dict = ConfigModel(
sentieon={
"sentieon_install_dir": sentieon_install_dir,
"sentieon_license": sentieon_license,
"sentieon_exec": Path(sentieon_install_dir, "bin", "sentieon").as_posix(),
"dnascope_model": SENTIEON_DNASCOPE_MODEL.as_posix(),
"tnscope_model": SENTIEON_TNSCOPE_MODEL.as_posix(),
},
mathiasbio marked this conversation as resolved.
Show resolved Hide resolved
QC={
"quality_trim": quality_trim,
"adapter_trim": adapter_trim,
Expand Down
19 changes: 18 additions & 1 deletion BALSAMIC/commands/config/pon.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
OPTION_FASTQ_PATH,
OPTION_GENOME_INTERVAL,
OPTION_GENOME_VERSION,
OPTION_SENTIEON_INSTALL_DIR,
OPTION_SENTIEON_LICENSE,
OPTION_PANEL_BED,
OPTION_PON_VERSION,
OPTION_PON_WORKFLOW,
Expand All @@ -26,7 +28,11 @@
from BALSAMIC.constants.analysis import BIOINFO_TOOL_ENV, PONWorkflow
from BALSAMIC.constants.cache import GenomeVersion
from BALSAMIC.constants.constants import FileType
from BALSAMIC.constants.paths import CONTAINERS_DIR
from BALSAMIC.constants.paths import (
CONTAINERS_DIR,
SENTIEON_DNASCOPE_MODEL,
SENTIEON_TNSCOPE_MODEL,
)
from BALSAMIC.models.config import ConfigModel
from BALSAMIC.utils.cli import (
generate_graph,
Expand All @@ -49,6 +55,8 @@
@OPTION_FASTQ_PATH
@OPTION_GENOME_VERSION
@OPTION_GENOME_INTERVAL
@OPTION_SENTIEON_INSTALL_DIR
@OPTION_SENTIEON_LICENSE
@OPTION_PANEL_BED
@OPTION_PON_WORKFLOW
@OPTION_PON_VERSION
Expand All @@ -66,6 +74,8 @@ def pon_config(
fastq_path: Path,
genome_version: GenomeVersion,
genome_interval: Path,
sentieon_install_dir: Path,
sentieon_license: str,
panel_bed: Path,
pon_workflow: PONWorkflow,
quality_trim: bool,
Expand Down Expand Up @@ -105,6 +115,13 @@ def pon_config(
directory.mkdir(exist_ok=True)

config_collection_dict = ConfigModel(
sentieon={
"sentieon_install_dir": sentieon_install_dir,
"sentieon_license": sentieon_license,
"sentieon_exec": Path(sentieon_install_dir, "bin", "sentieon").as_posix(),
"dnascope_model": SENTIEON_DNASCOPE_MODEL.as_posix(),
"tnscope_model": SENTIEON_TNSCOPE_MODEL.as_posix(),
},
QC={
"adapter_trim": adapter_trim,
"quality_trim": quality_trim,
Expand Down
14 changes: 14 additions & 0 deletions BALSAMIC/commands/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,20 @@
help="Sample configuration file",
)

OPTION_SENTIEON_INSTALL_DIR = click.option(
"--sentieon-install-dir",
type=click.Path(exists=True, resolve_path=True),
required=True,
help="Path to Sentieon install directory",
)

OPTION_SENTIEON_LICENSE = click.option(
"--sentieon-license",
required=True,
type=click.STRING,
help="Sentieon license in format IP:Port",
)
ivadym marked this conversation as resolved.
Show resolved Hide resolved

OPTION_SHOW_ONLY_MISSING_FILES = click.option(
"-m",
"--show-only-missing",
Expand Down
4 changes: 2 additions & 2 deletions BALSAMIC/constants/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@

# Sentieon specific constants
SENTIEON_MODELS_DIR: Path = Path(BALSAMIC_DIR, "assets", "sentieon_models")
SENTIEON_DNASCOPE_DIR: Path = Path(
SENTIEON_DNASCOPE_MODEL: Path = Path(
SENTIEON_MODELS_DIR, "SentieonDNAscopeModelBeta0.4a-201808.05.model"
)
SENTIEON_TNSCOPE_DIR: Path = Path(
SENTIEON_TNSCOPE_MODEL: Path = Path(
SENTIEON_MODELS_DIR, "SentieonTNscopeModel_GiAB_HighAF_LowFP-201711.05.model"
)

Expand Down
19 changes: 19 additions & 0 deletions BALSAMIC/models/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,23 @@ class CustomFilters(BaseModel):
umi_min_reads: str | None = None


class Sentieon(BaseModel):
"""
Class providing common functions and variables for different balsamic workflows.

Attributes:
sentieon_install_dir: Field(required); path to Sentieon installation directory
sentieon_exec: Field(required); path to Sentieon executeable
sentieon_license: Field(required); Sentieon license string
"""

sentieon_install_dir: Annotated[str, AfterValidator(is_dir)]
sentieon_exec: Annotated[str, AfterValidator(is_file)]
sentieon_license: str
dnascope_model: Annotated[str, AfterValidator(is_file)]
tnscope_model: Annotated[str, AfterValidator(is_file)]


class ConfigModel(BaseModel):
"""
Class providing common functions and variables for different balsamic workflows.
Expand All @@ -194,6 +211,7 @@ class ConfigModel(BaseModel):
background_variants: Field(Path(optional)); path to BACKGROUND VARIANTS for UMI
analysis: Field(AnalysisModel); Pydantic model containing workflow variables
custom_filters: Field(CustomFilters); custom parameters for variant filtering
sentieon: Field(required); Sentieon model attributes

This class also contains functions that help retrieve sample and file information,
facilitating BALSAMIC run operations in Snakemake.
Expand All @@ -220,6 +238,7 @@ class ConfigModel(BaseModel):
background_variants: Optional[str] = None
analysis: AnalysisModel
custom_filters: CustomFilters | None = None
sentieon: Sentieon

@field_validator("reference")
def abspath_as_str(cls, reference: Dict[str, Path]):
Expand Down
12 changes: 6 additions & 6 deletions BALSAMIC/snakemake_rules/align/sentieon_alignment.rule
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ rule sentieon_align_sort:
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
header = params.common.align_header,
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample_id = "{sample}",
sample_type = lambda wildcards: config_model.get_sample_type_by_name(wildcards.sample, uppercase=True),
fastq_pattern = "{fastq_pattern}"
Expand Down Expand Up @@ -53,8 +53,8 @@ rule sentieon_dedup:
Path(benchmark_dir, "sentieon_dedup_{sample_type}.{sample}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample_id = "{sample}"
threads:
get_threads(cluster_config, 'sentieon_dedup')
Expand Down Expand Up @@ -98,8 +98,8 @@ rule sentieon_realign:
Path(benchmark_dir, "sentieon_realign_{sample_type}.{sample}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample_id = "{sample}"
threads:
get_threads(cluster_config, 'sentieon_realign')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ rule sentieon_wgs_metrics:
min_base_qual = '10',
gene_list = config["reference"]["refgene_txt"],
cov_threshold = repeat("--cov_thresh", [50, 100, 150, 200, 250]),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample = '{sample}'
threads:
get_threads(cluster_config, 'sentieon_wgs_metrics')
Expand Down
12 changes: 6 additions & 6 deletions BALSAMIC/snakemake_rules/umi/sentieon_consensuscall.rule
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ rule sentieon_consensuscall_umi:
Path(benchmark_dir, "sentieon_consensuscall_umi_{sample}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_install_dir = config["SENTIEON_INSTALL_DIR"],
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_install_dir = config_model.sentieon.sentieon_install_dir,
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
tag = params.umiconsensuscall.tag,
ip_format = params.umiconsensuscall.align_format,
sample_id = '{sample}'
Expand Down Expand Up @@ -54,9 +54,9 @@ rule sentieon_bwa_umiconsensus:
Path(benchmark_dir, "sentieon_bwa_umiconsensus_{sample}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_install_dir = config["SENTIEON_INSTALL_DIR"],
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_install_dir = config_model.sentieon.sentieon_install_dir,
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sheader = params.umicommon.align_header,
ip_bases = params.umicommon.align_intbases,
sample_id = "{sample}"
Expand Down
12 changes: 6 additions & 6 deletions BALSAMIC/snakemake_rules/umi/sentieon_umiextract.rule
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ rule sentieon_umiextract:
Path(benchmark_dir, "sentieon_umiextract_{sample}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_install_dir = config["SENTIEON_INSTALL_DIR"],
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_install_dir = config_model.sentieon.sentieon_install_dir,
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
ds_params = params.umiextract.read_structure,
sample = "{sample}"
threads:
Expand Down Expand Up @@ -50,9 +50,9 @@ rule sentieon_bwa_umiextract:
Path(benchmark_dir, "sentieon_bwa_umiextract_{sample}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_install_dir = config["SENTIEON_INSTALL_DIR"],
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_install_dir = config_model.sentieon.sentieon_install_dir,
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample_id = '{sample}',
sheader = params.umicommon.align_header,
ip_bases = params.umicommon.align_intbases
Expand Down
4 changes: 2 additions & 2 deletions BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope.rule
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ rule sentieon_tnscope_umi:
params:
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"},
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
tumor_af = params.umicommon.filter_tumor_af,
algo = params.tnscope_umi.algo,
disable_detect = params.tnscope_umi.disable_detect,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ rule sentieon_tnscope_umi_tn:
params:
housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"},
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
tumor_af = params.umicommon.filter_tumor_af,
algo = params.tnscope_umi.algo,
disable_detect = params.tnscope_umi.disable_detect,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ rule sentieon_DNAscope_gnomad:
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
pcr_model = params.common.pcr_model,
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_ml_dnascope = config["SENTIEON_DNASCOPE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sentieon_ml_dnascope = config_model.sentieon.dnascope_model,
sample = "{sample}"
benchmark:
Path(benchmark_dir, "sentieon_DNAscope_gnomad_{sample}.tsv").as_posix()
Expand Down
4 changes: 2 additions & 2 deletions BALSAMIC/snakemake_rules/variant_calling/germline.rule
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ rule sentieon_DNAscope:
Path(benchmark_dir, "sentieon_dnascope_{sample_type}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample = '{sample_type}'
threads:
get_threads(cluster_config, 'sentieon_DNAscope')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ rule sentieon_DNAscope:
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
pcr_model = params.common.pcr_model,
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_ml_dnascope = config["SENTIEON_DNASCOPE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sentieon_ml_dnascope = config_model.sentieon.dnascope_model,
sample = "{sample_type}"
benchmark:
Path(benchmark_dir, 'sentieon_DNAscope_' + "{sample_type}.tsv").as_posix()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ rule sentieon_base_calibration:
Path(benchmark_dir, "sentieon_base_calibration_{sample_type}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample = "sample_type"
threads:
get_threads(cluster_config, 'sentieon_base_calibration')
Expand Down Expand Up @@ -92,8 +92,8 @@ rule sentieon_TNscope_tumor_only:
tumor_options = VARCALL_PARAMS["tnscope"]["tumor"],
pon = " " if get_pon(config) is None else " ".join(["--pon", get_pon(config)]),
pcr_model = params.common.pcr_model,
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
case_name = config["analysis"]["case_id"]
threads:
get_threads(cluster_config, 'sentieon_TNscope_tumor_only')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ rule sentieon_base_calibration:
Path(benchmark_dir, "sentieon_base_calibration_{sample_type}.tsv").as_posix()
params:
tmpdir = tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
sample = "{sample_type}"
threads:
get_threads(cluster_config, 'sentieon_base_calibration')
Expand Down Expand Up @@ -85,9 +85,9 @@ rule sentieon_TNscope:
pcr_model = params.common.pcr_model,
tumor_options = VARCALL_PARAMS["tnscope"]["tumor"],
normal_options = VARCALL_PARAMS["tnscope"]["normal"],
sentieon_ml_tnscope = config["SENTIEON_TNSCOPE"],
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_ml_tnscope = config_model.sentieon.tnscope_model,
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
case_name = config["analysis"]["case_id"]
threads:
get_threads(cluster_config, 'sentieon_TNscope')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ rule sentieon_TNhaplotyper_tumor_only:
params:
tumor = "TUMOR",
tmpdir= tempfile.mkdtemp(prefix=tmp_dir),
sentieon_exec = config["SENTIEON_EXEC"],
sentieon_lic = config["SENTIEON_LICENSE"],
sentieon_exec = config_model.sentieon.sentieon_exec,
sentieon_lic = config_model.sentieon.sentieon_license,
case_name = config["analysis"]["case_id"]
threads:
get_threads(cluster_config, 'sentieon_TNhaplotyper_tumor_only')
Expand Down
Loading
Loading