hemberg-lab · seb-mueller · May 20, 2019 · Dec 17, 2019 · Dec 17, 2019 · Dec 17, 2019
diff --git a/course_files/clustering.Rmd b/course_files/clustering.Rmd
@@ -60,7 +60,7 @@ Now we are ready to run `SC3` (we also ask it to calculate biological properties
 deng <- sc3(deng, ks = 10, biology = TRUE, n_cores = 1)
 ```
 
-`SC3` result consists of several different outputs (please look in [@Kiselev2016-bq] and [SC3 vignette](http://bioconductor.org/packages/release/bioc/vignettes/SC3/inst/doc/my-vignette.html) for more details). Here we show some of them:
+`SC3` result consists of several different outputs (please look in [@Kiselev2016-bq] and [SC3 vignette](https://bioconductor.org/packages/release/bioc/vignettes/SC3/inst/doc/SC3.html) for more details). Here we show some of them:
 
 Consensus matrix:
 ```{r, fig.height=6}
@@ -123,7 +123,7 @@ Now we are going to apply _k_-means clustering algorithm to the cloud of points
 
 We will start with $k=8$:
 ```{r clust-tsne-kmeans2, fig.cap = "tSNE map of the patient data with 8 colored clusters, identified by the k-means clustering algorithm"}
-colData(deng)$tSNE_kmeans <- as.character(kmeans(deng@reducedDims$TSNE, centers = 8)$clust)
+colData(deng)$tSNE_kmeans <- as.character(kmeans(reducedDim(deng), centers = 8)$clust)
 plotTSNE(deng, colour_by = "tSNE_kmeans")
 ```
 

diff --git a/course_files/dropouts.Rmd b/course_files/dropouts.Rmd
@@ -61,7 +61,7 @@ expression matrix. This can be extracted from our SingleCellExperiment object us
 command below. 
 
 ```{r}
-expr_matrix <- M3Drop::M3DropConvertData(deng)
+expr_matrix <- M3Drop::M3DropConvertData(counts(deng))
 ```
 
 This function is compatible with most single-cell RNA-seq analysis packages including:
@@ -272,7 +272,7 @@ far too many genes will be called as significant. Thus we will take the top 1500
 by effect size.
 
 ```{r, fig.width=8, fig.height=5}
-deng_int <- NBumiConvertData(deng)
+deng_int <- NBumiConvertData(counts(deng))
 DANB_fit <- NBumiFitModel(deng_int) # DANB is fit to the raw count matrix
 # Perform DANB feature selection
 DropFS <- NBumiFeatureSelectionCombinedDrop(DANB_fit, method="fdr", qval.thresh=0.01, suppress.plot=FALSE)

diff --git a/course_files/exprs-norm-reads.Rmd b/course_files/exprs-norm-reads.Rmd
@@ -31,7 +31,7 @@ plotPCA(
 ```
 
 ```{r norm-pca-cpm-reads, fig.cap = "PCA plot of the tung data after CPM normalisation"}
-logcounts(reads.qc) <- log2(calculateCPM(reads.qc, use_size_factors = FALSE) + 1)
+logcounts(reads.qc) <- log2(calculateCPM(reads.qc, use_size_factors = NULL) + 1)
 plotPCA(
     reads.qc[endog_genes, ],
     colour_by = "batch",
@@ -55,7 +55,7 @@ plotRLE(
 ```{r norm-pca-lsf-umi, fig.cap = "PCA plot of the tung data after LSF normalisation"}
 qclust <- quickCluster(reads.qc, min.size = 30)
 reads.qc <- computeSumFactors(reads.qc, sizes = 15, clusters = qclust)
-reads.qc <- normalize(reads.qc)
+reads.qc <- logNormCounts(reads.qc)
 plotPCA(
     reads.qc[endog_genes, ],
     colour_by = "batch",

diff --git a/course_files/exprs-norm.Rmd b/course_files/exprs-norm.Rmd
@@ -131,7 +131,7 @@ plotPCA(
 
 ### CPM
 ```{r norm-pca-cpm, fig.cap = "PCA plot of the tung data after CPM normalisation"}
-logcounts(umi.qc) <- log2(calculateCPM(umi.qc, use_size_factors = FALSE) + 1)
+logcounts(umi.qc) <- log2(calculateCPM(umi.qc, use_size_factors = NULL) + 1)
 plotPCA(
     umi.qc[endog_genes, ],
     colour_by = "batch",
@@ -156,7 +156,7 @@ plotRLE(
 ```{r norm-pca-lsf, fig.cap = "PCA plot of the tung data after LSF normalisation"}
 qclust <- quickCluster(umi.qc, min.size = 30)
 umi.qc <- computeSumFactors(umi.qc, sizes = 15, clusters = qclust)
-umi.qc <- normalize(umi.qc)
+umi.qc <- logNormCounts(umi.qc)
 plotPCA(
     umi.qc[endog_genes, ],
     colour_by = "batch",

diff --git a/course_files/exprs-qc-reads.Rmd b/course_files/exprs-qc-reads.Rmd
@@ -38,13 +38,18 @@ reads <- reads[keep_feature, ]
 ```
 
 ```{r}
-isSpike(reads, "ERCC") <- grepl("^ERCC-", rownames(reads))
-isSpike(reads, "MT") <- rownames(reads) %in% 
-    c("ENSG00000198899", "ENSG00000198727", "ENSG00000198888",
+is.spike=grepl("^ERCC-", rownames(reads))
+reads  <- splitAltExps(reads, ifelse(is.spike, "ERCC", "gene"))
+altExpNames(reads)
+
+is.mt <- rownames(reads) %in%        
+  c("ENSG00000198899", "ENSG00000198727", "ENSG00000198888",
     "ENSG00000198886", "ENSG00000212907", "ENSG00000198786",
     "ENSG00000198695", "ENSG00000198712", "ENSG00000198804",
     "ENSG00000198763", "ENSG00000228253", "ENSG00000198938",
     "ENSG00000198840")
+reads <-  splitAltExps(reads, ifelse(is.mt, "MT", "gene"))
+altExpNames(reads)
 ```
 
 ```{r}
@@ -148,7 +153,7 @@ table(reads$outlier)
 ```{r}
 plotReducedDim(
     reads,
-    use_dimred = "PCA_coldata",
+    use_dimred = "PCA",
     size_by = "total_features_by_counts", 
     shape_by = "use", 
     colour_by = "outlier"

diff --git a/course_files/exprs-qc.Rmd b/course_files/exprs-qc.Rmd
@@ -64,13 +64,18 @@ umi <- umi[keep_feature, ]
 
 Define control features (genes) - ERCC spike-ins and mitochondrial genes ([provided](http://jdblischak.github.io/singleCellSeq/analysis/qc-filter-ipsc.html) by the authors):
 ```{r}
-isSpike(umi, "ERCC") <- grepl("^ERCC-", rownames(umi))
-isSpike(umi, "MT") <- rownames(umi) %in% 
-    c("ENSG00000198899", "ENSG00000198727", "ENSG00000198888",
+is.spike <- grepl("^ERCC-", rownames(umi))
+umi  <- splitAltExps(umi, ifelse(is.spike, "ERCC", "gene"))
+altExpNames(umi)
+
+is.mt <- rownames(umi) %in%        
+  c("ENSG00000198899", "ENSG00000198727", "ENSG00000198888",
     "ENSG00000198886", "ENSG00000212907", "ENSG00000198786",
     "ENSG00000198695", "ENSG00000198712", "ENSG00000198804",
     "ENSG00000198763", "ENSG00000228253", "ENSG00000198938",
     "ENSG00000198840")
+umi <-  splitAltExps(umi, ifelse(is.mt, "MT", "gene"))
+altExpNames(umi)
 ```
 
 Calculate the quality metrics:
@@ -253,7 +258,7 @@ Then, we can use a PCA plot to see a 2D representation of the cells ordered by t
 ```{r}
 plotReducedDim(
     umi,
-    use_dimred = "PCA_coldata",
+    use_dimred = "PCA",
     size_by = "total_features_by_counts", 
     shape_by = "use", 
     colour_by = "outlier"

diff --git a/course_files/figures/barcode_UMI_read1_adapter_fastqc.png b/course_files/figures/barcode_UMI_read1_adapter_fastqc.png
diff --git a/course_files/imputation.Rmd b/course_files/imputation.Rmd
@@ -142,7 +142,7 @@ enhances trajectory-like structure in a dataset, in contrast to scImpute and
 DrImpute which assume a cluster-like structure to the underlying data.
 
 ```{r, eval=FALSE}
-res <- magic(t(deng@assays[["logcounts"]]), genes="all_genes", k=10, t="auto")
+res <- magic(t(deng@assays@data$logcounts), genes="all_genes", k=10, t="auto")
 ```
 
 ```{r, eval=FALSE}

diff --git a/course_files/intro.Rmd b/course_files/intro.Rmd
@@ -26,7 +26,7 @@ opts_chunk$set(fig.align = "center", echo=FALSE)
 * Allows to study new biological questions in which __cell-specific changes in transcriptome are important__, e.g. cell type identification, heterogeneity of cell responses, stochasticity of gene expression, inference of gene regulatory networks across the cells.
 * Datasets range __from $10^2$ to $10^6$ cells__ and increase in size every year
 * Currently there are several different protocols in use, e.g. SMART-seq2 [@Picelli2013-sb], CELL-seq [@Hashimshony2012-kd] and Drop-seq [@Macosko2015-ix]
-* There are also commercial platforms available, including the [Fluidigm C1](https://www.fluidigm.com/products/c1-system), [Wafergen ICELL8](https://www.wafergen.com/products/icell8-single-cell-system) and the [10X Genomics Chromium](https://www.10xgenomics.com/single-cell/)
+* There are also commercial platforms available, including the [Fluidigm C1](https://www.fluidigm.com/products/c1-system), [Wafergen ICELL8](https://www.wafergen.com/products/icell8-single-cell-system), the [10X Genomics Chromium](https://www.10xgenomics.com/single-cell/) and even a platform developed in Cambridgeshire: [Nadia from Dolomite Bio](https://www.dolomite-bio.com/product/nadia-instrument).
 * Several computational analysis methods from bulk RNA-seq __can__ be used
 * __In most cases__ computational analysis requires adaptation of the existing methods or development of new ones
 
@@ -56,6 +56,7 @@ Today, there are also several different platforms available for carrying out one
 * [Seurat](http://satijalab.org/seurat/) is an R package designed for QC, analysis, and exploration of single cell RNA-seq data.
 * [ASAP](https://asap.epfl.ch/) (Automated Single-cell Analysis Pipeline) is an interactive web-based platform for single-cell analysis.
 * [Bioconductor](https://master.bioconductor.org/packages/release/workflows/html/simpleSingleCell.html) is a open-source, open-development software project for the analysis of high-throughput genomics data, including packages for the analysis of single-cell data.
+* [dropSeqPipe](https://github.com/Hoohm/dropSeqPipe) is dropSeq pre-processing pipeline based on the popular [snakmake](https://snakemake.readthedocs.io/en/stable/) framework combining some of the above software (co-developed by Sebastian Mueller, one of the instructors).
 
 
 ## Challenges

diff --git a/course_files/umis.Rmd b/course_files/umis.Rmd
@@ -38,6 +38,9 @@ After processing the reads from a UMI experiment, the following conventions are
 knitr::include_graphics("figures/UMI-Seq-reads.png")
 ```
 
+```{r intro-umi-fastqc-adapter, out.width = '90%', fig.cap="FastQC adapter content plot of too long Read 1 for a DropSeq sample. Barcode (12bp) and UMI (8bp) make up first 20 bp, the remaining sequence is ininformative poly-T."}
+knitr::include_graphics("figures/barcode_UMI_read1_adapter_fastqc.png")
+```
 ### Counting Barcodes
 
 In theory, every unique UMI-transcript pair should represent all reads originating from a single RNA molecule. However, in practice this is frequently not the case and the most common reasons are: