Skip to content

Commit

Permalink
Merge pull request #45 from ChangqingW/main
Browse files Browse the repository at this point in the history
update basilisk env: add minimap2, samtools
  • Loading branch information
ChangqingW authored Sep 25, 2024
2 parents c086f67 + 5c1990a commit 1892c9a
Show file tree
Hide file tree
Showing 31 changed files with 153 additions and 178 deletions.
20 changes: 6 additions & 14 deletions .github/workflows/check-bioc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,11 @@ name: R-CMD-check-bioc
## Note that you can always run a GHA test without the cache by using the word
## "/nocache" in the commit message.
env:
has_testthat: 'false'
has_testthat: 'true'
run_covr: 'false'
run_pkgdown: 'true'
has_RUnit: 'false'
cache-version: 'cache-v1'
run_docker: 'false'

jobs:
build-check:
Expand All @@ -52,8 +51,8 @@ jobs:
fail-fast: false
matrix:
config:
- { os: ubuntu-latest, r: '4.4', bioc: '3.19', cont: "bioconductor/bioconductor_docker:RELEASE_3_19", rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest" }
- { os: macOS-latest, r: '4.4', bioc: '3.19'}
- { os: ubuntu-latest, r: '4.4', bioc: '3.20', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/jammy/latest" }
- { os: macOS-latest, r: '4.4', bioc: '3.20'}
# - { os: windows-latest, r: '4.3', bioc: '3.18'}
## Check https://github.com/r-lib/actions/tree/master/examples
## for examples using the http-user-agent
Expand All @@ -75,16 +74,6 @@ jobs:
mkdir /__w/_temp/Library
echo ".libPaths('/__w/_temp/Library')" > ~/.Rprofile
- name: Install minimap2 Ubuntu
if: runner.os == 'Linux'
run: |
apt-get update && apt-get -y install minimap2 && curl -L https://github.com/attractivechaos/k8/releases/download/v0.2.4/k8-0.2.4.tar.bz2 | tar -jxf - && mv k8-0.2.4/k8-Linux /usr/local/bin/k8
- name: Install minimap2 macOS
if: runner.os == 'macOS-latest'
run: |
brew install minimap2 && curl -L https://github.com/attractivechaos/k8/releases/download/v0.2.4/k8-0.2.4.tar.bz2 | tar -jxf - && mv k8-0.2.4/k8-Darwin /usr/local/bin/k8
## Most of these steps are the same as the ones in
## https://github.com/r-lib/actions/blob/master/examples/check-standard.yaml
## If they update their steps, we will also need to update ours.
Expand Down Expand Up @@ -153,6 +142,9 @@ jobs:
## Required for tcltk
brew install xquartz --cask
brew install minimap2
brew install samtools
- name: Install Windows system dependencies
if: runner.os == 'Windows'
run: |
Expand Down
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export(cutadapt)
export(demultiplex_sockeye)
export(filter_annotation)
export(find_barcode)
export(find_bin)
export(find_isoform)
export(find_variants)
export(flexiplex)
Expand All @@ -30,7 +31,6 @@ export(sc_impute_transcript)
export(sc_long_multisample_pipeline)
export(sc_long_pipeline)
export(sc_mutations)
export(sys_which)
importFrom(BiocGenerics,cbind)
importFrom(BiocGenerics,colnames)
importFrom(BiocGenerics,end)
Expand Down Expand Up @@ -184,4 +184,5 @@ importFrom(utils,txtProgressBar)
importFrom(utils,write.csv)
importFrom(utils,write.table)
importFrom(withr,with_package)
importFrom(withr,with_path)
useDynLib(FLAMES, .registration=TRUE)
23 changes: 13 additions & 10 deletions R/basilisk.R
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
#' @importFrom basilisk BasiliskEnvironment
flames_env <- BasiliskEnvironment(
envname = "flames_env", pkgname = "FLAMES",
pip = c("fast-edit-distance==1.2.1", "blaze2==2.2.*", "matplotlib==3.5.3"),
pip = c("fast-edit-distance==1.2.1", "blaze2==2.2.*", "matplotlib==3.9.2"),
packages = c(
"python==3.10",
"numpy==1.25.0",
"scipy==1.11.1",
"pysam==0.21.0",
"cutadapt==4.4",
"tqdm==4.64.1",
"pandas==1.3.5",
"oarfish==0.6.2"
"python==3.12.3",
"numpy==2.1.1",
"scipy==1.14.1",
"pysam==0.22.1",
"cutadapt==4.9",
"tqdm==4.66.5",
"pandas==2.2.3",
"oarfish==0.6.2",
"minimap2==2.28",
"samtools==1.21",
"k8==0.2.5"
),
channels = c("conda-forge", "bioconda", "defaults")
channels = c("conda-forge", "bioconda")
)
4 changes: 2 additions & 2 deletions R/find_isoform.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#' annotation <- bfc[[names(BiocFileCache::bfcadd(bfc, "annot.gtf", paste(file_url, "SIRV_isoforms_multi-fasta-annotation_C_170612a.gtf", sep = "/")))]]
#' outdir <- tempfile()
#' dir.create(outdir)
#' if (!any(is.na(sys_which(c("minimap2", "k8"))))) {
#' if (!any(is.na(find_bin(c("minimap2", "k8"))))) {
#' config <- jsonlite::fromJSON(system.file("extdata/config_sclr_nanopore_3end.json", package = "FLAMES"))
#' minimap2_align(
#' config = config,
Expand Down Expand Up @@ -219,7 +219,7 @@ annotation_to_fasta <- function(isoform_annotation, genome_fa, outdir, extract_f
#' annotation <- bfc[[names(BiocFileCache::bfcadd(bfc, "annot.gtf", paste(file_url, "SIRV_isoforms_multi-fasta-annotation_C_170612a.gtf", sep = "/")))]]
#' outdir <- tempfile()
#' dir.create(outdir)
#' if (!any(is.na(sys_which(c("minimap2", "k8"))))) {
#' if (!any(is.na(find_bin(c("minimap2", "k8"))))) {
#' config <- jsonlite::fromJSON(system.file("extdata/config_sclr_nanopore_3end.json", package = "FLAMES"))
#' minimap2_align(
#' config = config,
Expand Down
175 changes: 76 additions & 99 deletions R/minimap2_align.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#' annotation <- bfc[[names(BiocFileCache::bfcadd(bfc, 'annot.gtf', paste(file_url, 'SIRV_isoforms_multi-fasta-annotation_C_170612a.gtf', sep = '/')))]]
#' outdir <- tempfile()
#' dir.create(outdir)
#' if (!any(is.na(sys_which(c("minimap2", "k8"))))) {
#' if (!any(is.na(find_bin(c("minimap2", "k8"))))) {
#' minimap2_align(
#' config = jsonlite::fromJSON(system.file('extdata/config_sclr_nanopore_3end.json', package = 'FLAMES')),
#' fa_file = genome_fa,
Expand All @@ -48,21 +48,21 @@ minimap2_align <- function(config, fa_file, fq_in, annot, outdir, minimap2 = NA,
}

if (missing("minimap2") || is.null(minimap2) || is.na(minimap2) || minimap2 =="") {
minimap2 <- sys_which("minimap2")
minimap2 <- find_bin("minimap2")
if (is.na(minimap2)) {
stop("minimap2 not found, please make sure it is installed and provide its path as the minimap2 argument")
}
}

if (missing("k8") || is.null(k8) || is.na(k8) || k8 =="") {
k8 <- sys_which("k8")
k8 <- find_bin("k8")
if (is.na(k8)) {
stop("k8 not found, please make sure it is installed and provide its path as the k8 argument")
}
}

if (missing("samtools") || is.null(samtools) || is.na(samtools) || samtools =="") {
samtools <- sys_which("samtools")
samtools <- find_bin("samtools")
}

minimap2_args <- c("-ax", "splice", "-t", threads, "-k14", "--secondary=no",
Expand Down Expand Up @@ -90,36 +90,19 @@ minimap2_align <- function(config, fa_file, fq_in, annot, outdir, minimap2 = NA,
# /.../FLAMES_out/tmp_splice_anno.bed12 --junc-bonus 1 -k14 --secondary=no -o
# /.../FLAMES_datasets/MuSC/FLAMES_out/tmp_align.sam --seed 2022
# /.../GRCm38.primary_assembly.genome.fa /.../trimmed_MSC.fastq.gz
if (!is.na(samtools)) {
minimap2_status <- base::system2(command = minimap2,
args = base::append(minimap2_args, c(fa_file, fq_in, "|", samtools, "view -bS -@ 4 -o",
file.path(outdir, paste0(prefix, "tmp_align.bam")), "-")))
if (!is.null(base::attr(minimap2_status, "status")) && base::attr(minimap2_status,
"status") != 0) {
stop(paste0("error running minimap2:\n", minimap2_status))
}
sort_status <- base::system2(command = samtools, args = c("sort", file.path(outdir,
paste0(prefix, "tmp_align.bam")), "-o", file.path(outdir, paste0(prefix,
"align2genome.bam"))))
index_status <- base::system2(command = samtools, args = c("index", file.path(outdir,
paste0(prefix, "align2genome.bam"))))
} else {
message("samtools not found, using Rsamtools instead")
minimap2_status <- base::system2(command = minimap2,
args = base::append(minimap2_args, c(fa_file, fq_in, "-o", file.path(outdir,
paste0(prefix, "tmp_align.sam")))))
if (!is.null(base::attr(minimap2_status, "status")) && base::attr(minimap2_status,
"status") != 0) {
stop(paste0("error running minimap2:\n", minimap2_status))
}

Rsamtools::asBam(file.path(outdir, paste0(prefix, "tmp_align.sam")), file.path(outdir,
paste0(prefix, "tmp_align")))
Rsamtools::sortBam(file.path(outdir, paste0(prefix, "tmp_align.bam")), file.path(outdir,
paste0(prefix, "align2genome")))
Rsamtools::indexBam(file.path(outdir, paste0(prefix, "align2genome.bam")))
file.remove(file.path(outdir, paste0(prefix, "tmp_align.sam")))
stopifnot("Samtools not found" = !is.na(samtools))
minimap2_status <- base::system2(command = minimap2,
args = base::append(minimap2_args, c(fa_file, fq_in, "|", samtools, "view -b -o",
file.path(outdir, paste0(prefix, "tmp_align.bam")), "-")))
if (!is.null(base::attr(minimap2_status, "status")) && base::attr(minimap2_status,
"status") != 0) {
stop(paste0("error running minimap2:\n", minimap2_status))
}
sort_status <- base::system2(command = samtools, args = c("sort", file.path(outdir,
paste0(prefix, "tmp_align.bam")), "-o", file.path(outdir, paste0(prefix,
"align2genome.bam"))))
index_status <- base::system2(command = samtools, args = c("index", file.path(outdir,
paste0(prefix, "align2genome.bam"))))
file.remove(file.path(outdir, paste0(prefix, "tmp_align.bam")))

if (config$alignment_parameters$use_junctions) {
Expand Down Expand Up @@ -158,7 +141,7 @@ minimap2_align <- function(config, fa_file, fq_in, annot, outdir, minimap2 = NA,
#' @examples
#' outdir <- tempfile()
#' dir.create(outdir)
#' if (!any(is.na(sys_which(c("minimap2", "k8"))))) {
#' if (!any(is.na(find_bin(c("minimap2", "k8"))))) {
#' annotation <- system.file('extdata', 'rps24.gtf.gz', package = 'FLAMES')
#' genome_fa <- system.file('extdata', 'rps24.fa.gz', package = 'FLAMES')
#' fasta <- annotation_to_fasta(annotation, genome_fa, outdir)
Expand All @@ -178,81 +161,67 @@ minimap2_realign <- function(config, fq_in, outdir, minimap2, samtools = NULL, p
}

if (missing("minimap2") || is.null(minimap2) || is.na(minimap2) || minimap2 =="") {
minimap2 <- sys_which("minimap2")
minimap2 <- find_bin("minimap2")
if (is.na(minimap2)) {
stop("minimap2 not found, please make sure it is installed and provide its path as the minimap2 argument")
}
}

if (missing("samtools") || is.null(samtools) || is.na(samtools) || samtools =="") {
samtools <- sys_which("samtools")
samtools <- find_bin("samtools")
}

if (missing("minimap2_args") || !is.character(minimap2_args)) {
minimap2_args <- c("-ax", "map-ont", "-p", "0.9", "--end-bonus", "10", "-N",
"3", "-t", threads, "--seed", config$pipeline_parameters$seed)
}

if (!is.na(samtools)) {
minimap2_status <- base::system2(command = minimap2,
args = base::append(minimap2_args, c(file.path(outdir, "transcript_assembly.fa"),
fq_in, "|", samtools, "view -bS -@ 4 -o", file.path(outdir,
paste0(prefix, "tmp_align.bam")), "-")))
if (!is.null(base::attr(minimap2_status, "status")) && base::attr(minimap2_status,
"status") != 0) {
stop(paste0("error running minimap2:\n", minimap2_status))
}
stopifnot("Samtools not found" = !is.na(samtools))
minimap2_status <- base::system2(command = minimap2,
args = base::append(minimap2_args, c(file.path(outdir, "transcript_assembly.fa"),
fq_in, "|", samtools, "view -b -o", file.path(outdir,
paste0(prefix, "tmp_align.bam")), "-")))
if (!is.null(base::attr(minimap2_status, "status")) && base::attr(minimap2_status,
"status") != 0) {
stop(paste0("error running minimap2:\n", minimap2_status))
}

if (missing(sort_by)) {
sort_status <- base::system2(
command = samtools,
args = c("sort",
file.path(outdir, paste0(prefix, "tmp_align.bam")), "-o",
file.path(outdir, paste0(prefix, "realign2transcript.bam"))
)
if (missing(sort_by)) {
cat("Sorting by position\n")
sort_status <- base::system2(
command = samtools,
args = c("sort",
file.path(outdir, paste0(prefix, "tmp_align.bam")), "-o",
file.path(outdir, paste0(prefix, "realign2transcript.bam"))
)
index_status <- base::system2(
command = samtools,
args = c("index",
file.path(outdir,paste0(prefix, "realign2transcript.bam"))
)
)
index_status <- base::system2(
command = samtools,
args = c("index",
file.path(outdir,paste0(prefix, "realign2transcript.bam"))
)
} else if (is.character(sort_by)) {
sort_status <- base::system2(
command = samtools,
args = c("sort", "-t", sort_by,
file.path(outdir, paste0(prefix, "tmp_align.bam")), "-o",
file.path(outdir, paste0(prefix, "realign2transcript.bam"))
)
)
} else if (is.character(sort_by)) {
cat("Sorting by ", sort_by, "\n")
sort_status <- base::system2(
command = samtools,
args = c("sort", "-t", sort_by,
file.path(outdir, paste0(prefix, "tmp_align.bam")), "-o",
file.path(outdir, paste0(prefix, "realign2transcript.bam"))
)
} else if (is.na(sort_by)) {
file.rename(file.path(outdir, paste0(prefix, "tmp_align.bam")), file.path(outdir, paste0(prefix, "realign2transcript.bam")))
} else {
stop("sort_by must be a character or NA")
}
)
} else if (is.na(sort_by)) {
cat("file renamed to ", paste0(prefix, "realign2transcript.bam"), "\n")
file.rename(file.path(outdir, paste0(prefix, "tmp_align.bam")), file.path(outdir, paste0(prefix, "realign2transcript.bam")))
# sort_status <- base::system2(
# command = samtools,
# args = c("sort", "-n",
# file.path(outdir, paste0(prefix, "tmp_align.bam")), "-o",
# file.path(outdir, paste0(prefix, "realign2transcript.bam"))
# )
# )
} else {
message("samtools not found, using Rsamtools instead")
minimap2_status <- base::system2(command = minimap2,
args = base::append(minimap2_args, c(file.path(outdir, "transcript_assembly.fa"),
fq_in, "-o", file.path(outdir, paste0(prefix, "tmp_align.sam")))))
if (!is.null(base::attr(minimap2_status, "status")) && base::attr(minimap2_status,
"status") != 0) {
stop(paste0("error running minimap2:\n", minimap2_status))
}

Rsamtools::asBam(file.path(outdir, paste0(prefix, "tmp_align.sam")), file.path(outdir,
paste0(prefix, "tmp_align")))
if (missing(sort_by)) {
Rsamtools::sortBam(
file.path(outdir, paste0(prefix, "tmp_align.bam")),
file.path(outdir,paste0(prefix, "realign2transcript"))
)
Rsamtools::indexBam(file.path(outdir, paste0(prefix, "realign2transcript.bam")))
} else if (is.character(sort_by)) {
Rsamtools::sortBam(file.path(outdir, paste0(prefix, "tmp_align.bam")), file.path(outdir,paste0(prefix, "realign2transcript")), byTag = sort_by)
} else if (is.na(sort_by)) {
file.rename(file.path(outdir, paste0(prefix, "tmp_align.bam")), file.path(outdir, paste0(prefix, "realign2transcript.bam")))
}
stop("sort_by must be a character or NA")
}
file.remove(file.path(outdir, paste0(prefix, "tmp_align.bam")))

Expand All @@ -262,19 +231,27 @@ minimap2_realign <- function(config, fq_in, outdir, minimap2, samtools = NULL, p
}
}

#' Sys.which wrapper
#' Wrapper for Sys.which that replaces "" with NA
#' Find path to a binary
#' Wrapper for Sys.which to find path to a binary
#' @importFrom withr with_path
#' @description
#' The \code{base::Sys.which} function returns "" if the command is not found
#' on some systems and \code{NA} on others. This wrapper replaces "" with
#' \code{NA}
#' This function is a wrapper for \code{base::Sys.which} to find the path
#' to a command. It also searches within the \code{FLAMES} basilisk conda
#' environment. This function also replaces "" with \code{NA} in the
#' output of \code{base::Sys.which} to make it easier to check if the
#' binary is found.
#' @param command character, the command to search for
#' @return character, the path to the command or \code{NA}
#' @examples
#' sys_which("minimap2")
#' find_bin("minimap2")
#' @export
sys_which <- function(command) {
which_command <- Sys.which(command)
find_bin <- function(command) {
conda_bins <- file.path(basilisk::obtainEnvironmentPath(flames_env), 'bin')
which_command <- withr::with_path(
new = conda_bins,
action = "suffix",
code = Sys.which(command)
)
# replace "" with NA
which_command[which_command == ""] <- NA
return(which_command)
Expand Down
2 changes: 1 addition & 1 deletion R/model_decay.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ filter_annotation <- function(annotation, keep = "tss_differ") {
#' annotation <- bfc[[names(BiocFileCache::bfcadd(bfc, 'annot.gtf', paste(file_url, 'SIRV_isoforms_multi-fasta-annotation_C_170612a.gtf', sep = '/')))]]
#' outdir <- tempfile()
#' dir.create(outdir)
#' if (!any(is.na(sys_which(c("minimap2", "k8"))))) {
#' if (!any(is.na(find_bin(c("minimap2", "k8"))))) {
#' fasta <- annotation_to_fasta(annotation, genome_fa, outdir)
#' minimap2_realign(
#' config = jsonlite::fromJSON(system.file('extdata/config_sclr_nanopore_3end.json', package = 'FLAMES')),
Expand Down
Loading

0 comments on commit 1892c9a

Please sign in to comment.