Skip to content

Commit

Permalink
merged devel into c++ find_isoform version
Browse files Browse the repository at this point in the history
  • Loading branch information
OliverVoogd committed Jul 31, 2023
2 parents 7b34d2c + cff743e commit 0794ce9
Show file tree
Hide file tree
Showing 65 changed files with 6,745 additions and 683 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
inst/doc
inst/python/*.pyc
inst/python/__pycache__/*
inst/blaze/__pycache__/*
vignettes/*.html
vignettes/*.R
inst/data/genocodeshortened.v33.annotation.gff3
Expand Down
20 changes: 12 additions & 8 deletions DESCRIPTION
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,28 +1,30 @@
Package: FLAMES
Type: Package
Title: FLAMES: Full Length Analysis of Mutations and Splicing in long read RNA-seq data
Version: 1.5.2
Version: 1.5.5
Date: 2022-4-21
Authors@R: c(person("Tian", "Luyi", role=c("aut"),
Authors@R: c(person("Luyi", "Tian", role=c("aut"),
email="tian.l@wehi.edu.au"),
person("Voogd", "Oliver", role=c("aut", "cre"),
person("Oliver", "Voogd", role=c("aut", "cre"),
email="voogd.o@wehi.edu.au"),
person("Schuster", "Jakob", role=c("aut"),
person("Jakob", "Schuster", role=c("aut"),
email="schuster.j@wehi.edu.au"),
person("Wang", "Changqing", role=c("aut"),
person("Changqing", "Wang", role=c("aut"),
email="wang.ch@wehi.edu.au"),
person("Su", "Shian", role=c("aut"),
person("Shian", "Su", role=c("aut"),
email="su.s@wehi.edu.au"),
person("Ritchie","Matthew", role=c("ctb"),
person("Matthew", "Ritchie",role=c("ctb"),
email="mritchie@wehi.edu.au"))
Description: Semi-supervised isoform detection and annotation from both bulk and single-cell
long read RNA-seq data. Flames provides automated pipelines for analysing isoforms,
as well as intermediate functions for manual execution.
biocViews: RNASeq, SingleCell, Transcriptomics, DataImport,
DifferentialSplicing, AlternativeSplicing, GeneExpression
DifferentialSplicing, AlternativeSplicing, GeneExpression,
LongRead
License: GPL (>= 2)
Encoding: UTF-8
Imports:
arrangements,
basilisk,
bambu,
Biostrings,
Expand All @@ -34,6 +36,7 @@ Imports:
DropletUtils,
GenomicRanges,
GenomicFeatures,
GenomicAlignments,
GenomeInfoDb,
ggplot2,
ggbio,
Expand Down Expand Up @@ -61,6 +64,7 @@ Imports:
utils,
withr,
zlibbioc,
future,
Suggests:
BiocStyle,
GEOquery,
Expand Down
38 changes: 38 additions & 0 deletions NAMESPACE
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,25 +1,32 @@
# Generated by roxygen2: do not edit by hand

export(annotation_to_fasta)
export(blaze)
export(bulk_long_pipeline)
export(combine_sce)
export(create_config)
export(create_sce_from_dir)
export(create_se_from_dir)
export(cutadapt)
export(demultiplex_sockeye)
export(filter_annotation)
export(find_barcode)
export(find_isoform)
export(flexiplex)
export(get_GRangesList)
export(locate_minimap2_dir)
export(minimap2_align)
export(minimap2_realign)
export(parse_gff_tree)
export(plot_coverage)
export(quantify)
export(sc_DTU_analysis)
export(sc_annotate_plots)
export(sc_heatmap_expression)
export(sc_long_multisample_pipeline)
export(sc_long_pipeline)
export(sc_mutations)
export(sc_umap_expression)
import(zlibbioc)
importFrom(BiocGenerics,cbind)
importFrom(BiocGenerics,colnames)
Expand All @@ -35,26 +42,38 @@ importFrom(ComplexHeatmap,columnAnnotation)
importFrom(ComplexHeatmap,rowAnnotation)
importFrom(DropletUtils,read10xCounts)
importFrom(GenomeInfoDb,seqlengths)
importFrom(GenomicAlignments,readGAlignments)
importFrom(GenomicAlignments,seqnames)
importFrom(GenomicFeatures,extractTranscriptSeqs)
importFrom(GenomicFeatures,makeTxDbFromGFF)
importFrom(GenomicFeatures,transcripts)
importFrom(GenomicRanges,GRanges)
importFrom(GenomicRanges,GRangesList)
importFrom(GenomicRanges,coverage)
importFrom(GenomicRanges,granges)
importFrom(GenomicRanges,strand)
importFrom(GenomicRanges,width)
importFrom(Matrix,colSums)
importFrom(Matrix,t)
importFrom(Matrix,tail)
importFrom(MultiAssayExperiment,"experiments<-")
importFrom(MultiAssayExperiment,MultiAssayExperiment)
importFrom(MultiAssayExperiment,experiments)
importFrom(RColorBrewer,brewer.pal)
importFrom(Rsamtools,ScanBamParam)
importFrom(Rsamtools,asBam)
importFrom(Rsamtools,indexBam)
importFrom(Rsamtools,indexFa)
importFrom(Rsamtools,sortBam)
importFrom(S4Vectors,DataFrame)
importFrom(S4Vectors,head)
importFrom(S4Vectors,split)
importFrom(SingleCellExperiment,"altExp<-")
importFrom(SingleCellExperiment,"colLabels<-")
importFrom(SingleCellExperiment,"counts<-")
importFrom(SingleCellExperiment,SingleCellExperiment)
importFrom(SingleCellExperiment,altExp)
importFrom(SingleCellExperiment,colLabels)
importFrom(SingleCellExperiment,counts)
importFrom(SingleCellExperiment,logcounts)
importFrom(SingleCellExperiment,reducedDimNames)
Expand All @@ -66,6 +85,7 @@ importFrom(SummarizedExperiment,assays)
importFrom(SummarizedExperiment,colData)
importFrom(SummarizedExperiment,rowData)
importFrom(SummarizedExperiment,rowRanges)
importFrom(arrangements,combinations)
importFrom(bambu,bambu)
importFrom(bambu,prepareAnnotations)
importFrom(bambu,writeToGTF)
Expand All @@ -76,35 +96,51 @@ importFrom(basilisk,basiliskStop)
importFrom(circlize,colorRamp2)
importFrom(cowplot,get_legend)
importFrom(cowplot,plot_grid)
importFrom(dplyr,across)
importFrom(dplyr,any_vars)
importFrom(dplyr,filter)
importFrom(dplyr,filter_at)
importFrom(dplyr,group_by)
importFrom(dplyr,groups)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,slice_max)
importFrom(dplyr,summarise)
importFrom(dplyr,summarise_at)
importFrom(dplyr,summarize_at)
importFrom(dplyr,top_n)
importFrom(future,plan)
importFrom(ggbio,autoplot)
importFrom(ggbio,geom_alignment)
importFrom(ggbio,xlim)
importFrom(ggplot2,aes)
importFrom(ggplot2,coord_polar)
importFrom(ggplot2,element_blank)
importFrom(ggplot2,element_line)
importFrom(ggplot2,element_text)
importFrom(ggplot2,geom_bar)
importFrom(ggplot2,geom_histogram)
importFrom(ggplot2,geom_line)
importFrom(ggplot2,geom_point)
importFrom(ggplot2,geom_text)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,ggtitle)
importFrom(ggplot2,labs)
importFrom(ggplot2,margin)
importFrom(ggplot2,position_stack)
importFrom(ggplot2,scale_colour_gradient2)
importFrom(ggplot2,theme)
importFrom(ggplot2,theme_bw)
importFrom(ggplot2,xlab)
importFrom(ggplot2,ylab)
importFrom(grid,unit)
importFrom(grid,viewport)
importFrom(gridExtra,grid.arrange)
importFrom(igraph,as_adjacency_matrix)
importFrom(jsonlite,fromJSON)
importFrom(jsonlite,toJSON)
importFrom(magrittr,"%>%")
importFrom(magrittr,'%>%')
importFrom(parallel,detectCores)
importFrom(reticulate,dict)
importFrom(reticulate,import_from_path)
Expand All @@ -123,9 +159,11 @@ importFrom(stats,chisq.test)
importFrom(stats,median)
importFrom(stats,quantile)
importFrom(stats,setNames)
importFrom(stats,weighted.mean)
importFrom(stringr,str_split)
importFrom(tidyr,as_tibble)
importFrom(tidyr,gather)
importFrom(tidyr,pivot_longer)
importFrom(tidyr,pivot_wider)
importFrom(utils,file_test)
importFrom(utils,modifyList)
Expand Down
65 changes: 65 additions & 0 deletions R/BLAZE_demultiplexing.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#' BLAZE Assign reads to cell barcodes.
#'
#' @description
#' Uses BLAZE to generate barcode list and assign reads to cell barcodes.
#' Uses default options for BLAZE, see BLAZE documentation for details (https://github.com/shimlab/BLAZE).
#'
#' @param blaze_config List, additional BLAZE configuration parameters
#' @param fq_in File path to the fastq file used as a query sequence file
#'
#' @return a \code{data.frame} summarising the reads aligned
#'
#' @importFrom parallel detectCores
#' @export
#' @examples
#' temp_path <- tempfile()
#' bfc <- BiocFileCache::BiocFileCache(temp_path, ask = FALSE)
#' bc_list_10x_url <- 'https://github.com/shimlab/BLAZE/blob/main/10X_bc/3M-february-2018.zip'
#' bc_list_10x <- bfc[[names(BiocFileCache::bfcadd(bfc, 'bc_list_10x', bc_list_10x_url))]]
#' fastq1_url <- 'https://raw.githubusercontent.com/shimlab/BLAZE/main/test/data/FAR20033_pass_51e510db_100.fastq'
#' fastq1 <- bfc[[names(BiocFileCache::bfcadd(bfc, 'Fastq1', fastq1_url))]]
#' outdir <- tempfile()
#' dir.create(outdir)
#' config = jsonlite::fromJSON(system.file('extdata/blaze_flames.json', package = 'FLAMES'))
#' config$blaze_parameters['output-prefix'] <- outdir
#' blaze(config$blaze_parameters, fastq1)
#' @importFrom reticulate import_from_path dict
#' @export
blaze <- function(blaze_config, fq_in) {

# command line arguments for blaze
blaze_argv <- paste("")

if (blaze_config['overwrite'] == TRUE) {
blaze_argv <- paste(blaze_argv, '--overwrite ')
}
blaze_config['overwrite'] <- NULL
for (arg in names(blaze_config)) {
blaze_argv <- paste(blaze_argv, paste0('--',arg), blaze_config[arg])}

# prepare 10X whitelist
temp_path <- tempfile()
bfc <- BiocFileCache::BiocFileCache(temp_path, ask = FALSE)
bc_list_10x_url <- 'https://github.com/shimlab/BLAZE/raw/main/10X_bc/3M-february-2018.zip'
cat('Downloading the full whitelist from 10X...')
bc_list_10x <- bfc[[names(BiocFileCache::bfcadd(bfc, 'bc_list_10x', bc_list_10x_url))]]
blaze_argv <- paste(blaze_argv, '--full-bc-whitelist', bc_list_10x)

blaze_argv <- paste(blaze_argv, fq_in)

ret <-
callBasilisk(flames_env, function(blaze_argv) {

blaze_path <- system.file("blaze", package = "FLAMES")
cat("Running BLAZE...\n")
cat("Argument: ", blaze_argv, "\n")
blaze <-
reticulate::import_from_path("blaze", blaze_path)
ret <-
blaze$main(blaze_argv)

ret
}, blaze_argv = blaze_argv
)
#ret # return the filename of demultiplexed fastq
}
4 changes: 4 additions & 0 deletions R/FLAMES.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#' FLAMES: full-length analysis of mutations and splicing
#' @name FLAMES
#' @useDynLib FLAMES, .registration=TRUE
NULL
22 changes: 22 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,25 @@ find_isoform_multithread <- function(gff3, genome_bam, isoform_gff3, tss_tes_sta
invisible(.Call(`_FLAMES_find_isoform_multithread`, gff3, genome_bam, isoform_gff3, tss_tes_stat, genomefa, transcript_fa, isoform_parameters, raw_splice_isoform))
}

#' Rcpp port of flexiplex
#'
#' @description demultiplex reads with flexiplex, for detailed description, see
#' documentation for the original flexiplex: https://davidsongroup.github.io/flexiplex
#'
#' @param reads_in Input FASTQ or FASTA file
#' @param barcodes_file barcode allow-list file
#' @param bc_as_readid bool, whether to add the demultiplexed barcode to the
#' read ID field
#' @param max_bc_editdistance max edit distance for barcode '
#' @param max_flank_editdistance max edit distance for the flanking sequences '
#' @param pattern StringVector defining the barcode structure, see [find_barcode]
#' @param reads_out output file for demultiplexed reads
#' @param stats_out output file for demultiplexed stats
#' @param n_threads number of threads to be used during demultiplexing
#' @param bc_out WIP
#' @return integer return value. 0 represents normal return.
#' @export
flexiplex <- function(reads_in, barcodes_file, bc_as_readid, max_bc_editdistance, max_flank_editdistance, pattern, reads_out, stats_out, bc_out, n_threads) {
.Call(`_FLAMES_flexiplex`, reads_in, barcodes_file, bc_as_readid, max_bc_editdistance, max_flank_editdistance, pattern, reads_out, stats_out, bc_out, n_threads)
}

85 changes: 25 additions & 60 deletions R/basilisk.R
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,66 +1,31 @@
# flames_nopysam_env <- BasiliskEnvironment(
# envname = "flames_nopysam_env", pkgname = "FLAMES",
# packages = c(
# "python==2.7.15.0",
# # "minimap2==2.17",
# "numpy==1.16.5",
# "editdistance==0.5.3",
# "bamnostic==1.1.7"
# ),
# channels = c("bioconda", "conda-forge")
# )

#' @importFrom basilisk BasiliskEnvironment
flames_env <- BasiliskEnvironment(
envname = "flames_env", pkgname = "FLAMES",
pip = c("fast-edit-distance==1.2.0"),
packages = c(
"python==3.7",
# "minimap2==2.17",
"numpy==1.16.5",
"editdistance==0.5.3",
"scipy==1.2.0",
"pysam==0.18.0"
"python==3.10",
"numpy==1.25.0",
"editdistance==0.6.2",
"scipy==1.11.1",
"pysam==0.21.0",
"cutadapt==4.4",
"tqdm==4.64.1",
"matplotlib==3.5.3",
"pandas==1.3.5",
"biopython==1.79"
),
channels = c("bioconda", "conda-forge")
channels = c("conda-forge", "bioconda", "defaults")
)
# flames_env <- BasiliskEnvironment(envname="full_env",
# pkgname="FLAMES",
# packages=c("python==2.7.15.0",
# "pysam==0.16.0.1",
# "minimap2==2.17",
# "numpy==1.16.5",
# "editdistance==0.5.3",
# "bzip2==1.0.8",
# "c-ares==1.11.0",
# "ca-certificates==2020.11.8",
# "certifi==2019.11.28",
# "htslib==1.11",
# "k8==0.2.5",
# "krb5==1.17.1",
# "libblas==3.9.0",
# "libcblas==3.9.0",
# "libcurl==7.71.1",
# "libcxx==11.0.0",
# "libdeflate==1.6",
# "libedit==3.1.20191231",
# "libev==4.33",
# "libffi==3.2.1",
# "libgfortran==3.0.0",
# "libgfortran5==9.3.0",
# "liblapack==3.9.0",
# "libnghttp2==1.41.0",
# "libopenblas==0.3.12",
# "libssh2==1.9.0",
# "llvm-openmp==11.0.0",
# "ncurses==6.2",
# "openssl==1.1",
# #"pip==20.1.1",
# "python_abi==2.7",
# "readline==8.0",
# "setuptools==44.0.0",
# "sqlite==3.33.0",
# "tk==8.6.10",
# "wheel==0.35.1",
# "xz==5.2.5",
# "zlib==1.2.11"),
# channels=c("bioconda", "conda-forge"))

# blaze_env <- BasiliskEnvironment(
# envname = "blaze_env", pkgname = "FLAMES",
# pip = c("fast-edit-distance==1.2.0"),
# channels = c('conda-forge','bioconda', 'defaults'),
# packages = c(
# "python==3.7",
# "biopython==1.79", #blaze specific
# "pandas==1.3.5",#blaze specific
# "numpy==1.21.6", #blaze specific
# "matplotlib==3.5.3", #blaze specific
# "tqdm==4.64.1"
# ))
Loading

0 comments on commit 0794ce9

Please sign in to comment.