From 3380a05a1883e6c5ec509ce764af975b1620fc02 Mon Sep 17 00:00:00 2001 From: Wolf <22243650+Wolfffff@users.noreply.github.com> Date: Tue, 21 Nov 2023 09:34:05 -0600 Subject: [PATCH] fix: Fixing workflow for single-end sequencing (#70) * Correcting path for `cutadapt_se` and params for cutadapt wrapper. * fix formatting * better error message in gene2symbol.R * apply dirty fix in biomart.yaml the same fix as applied here: https://github.com/snakemake-workflows/rna-seq-kallisto-sleuth/commit/5bf0c528e7e4cf1687ba72b95a547ad16a33130e original solution can be found here: https://stackoverflow.com/a/77370920 --------- Co-authored-by: David Laehnemann --- workflow/envs/biomart.yaml | 4 ++++ workflow/rules/common.smk | 4 +++- workflow/rules/trim.smk | 2 +- workflow/scripts/gene2symbol.R | 11 ++++++++++- 4 files changed, 18 insertions(+), 3 deletions(-) diff --git a/workflow/envs/biomart.yaml b/workflow/envs/biomart.yaml index 04342c7..e9fe9c7 100644 --- a/workflow/envs/biomart.yaml +++ b/workflow/envs/biomart.yaml @@ -5,3 +5,7 @@ channels: dependencies: - bioconductor-biomart =2.56 - r-tidyverse =2.0 + # remove once we can update to bioconductor-biomart of the 3.18 release, which will + # include this proper fix for the underlying compatibility issue: + # https://github.com/Bioconductor/BiocFileCache/pull/50 + - r-dbplyr=2.3.4 diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 515aef1..1817063 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -119,7 +119,9 @@ def get_fq(wildcards): ) ) # single end sample - return {"fq1": "trimmed/{sample}_{unit}_single.fastq.gz".format(**wildcards)} + return { + "fq1": "results/trimmed/{sample}_{unit}_single.fastq.gz".format(**wildcards) + } else: # no trimming, use raw reads u = units.loc[(wildcards.sample, wildcards.unit)] diff --git a/workflow/rules/trim.smk b/workflow/rules/trim.smk index 9acf527..829e4d5 100644 --- a/workflow/rules/trim.smk +++ b/workflow/rules/trim.smk @@ -49,7 +49,7 @@ rule cutadapt_se: "logs/cutadapt/{sample}_{unit}.log", params: extra=config["params"]["cutadapt-se"], - adapters_r1=lambda w: str(units.loc[w.sample].loc[w.unit, "adapters"]), + adapters=lambda w: str(units.loc[w.sample].loc[w.unit, "adapters"]), threads: 8 wrapper: "v1.21.4/bio/cutadapt/se" diff --git a/workflow/scripts/gene2symbol.R b/workflow/scripts/gene2symbol.R index c14ef85..c2abe01 100644 --- a/workflow/scripts/gene2symbol.R +++ b/workflow/scripts/gene2symbol.R @@ -1,5 +1,7 @@ library(biomaRt) library(tidyverse) +# useful error messages upon aborting +library("cli") # this variable holds a mirror name until # useEnsembl succeeds ("www" is last, because @@ -24,7 +26,14 @@ while ( class(mart)[[1]] != "Mart" ) { # change or make configurable if you want more or # less rounds of tries of all the mirrors if (rounds >= 3) { - stop( + cli_abort( + str_c( + "Have tried all 4 available Ensembl biomaRt mirrors ", + rounds, + " times. You might have a connection problem, or no mirror is responsive.\n", + "The last error message was:\n", + message(e) + ) ) } # hop to next mirror