@ARTICLE{Deng2014-mx, title = "Single-cell {RNA-seq} reveals dynamic, random monoallelic gene expression in mammalian cells", author = "Deng, Qiaolin and Ramsk{\"{o}}ld, Daniel and Reinius, Bj{\"{o}}rn and Sandberg, Rickard", affiliation = "Ludwig Institute for Cancer Research, Box 240, 171 77 Stockholm, Sweden.", abstract = "Expression from both alleles is generally observed in analyses of diploid cell populations, but studies addressing allelic expression patterns genome-wide in single cells are lacking. Here, we present global analyses of allelic expression across individual cells of mouse preimplantation embryos of mixed background (CAST/EiJ \texttimes{} C57BL/6J). We discovered abundant (12 to 24\%) monoallelic expression of autosomal genes and that expression of the two alleles occurs independently. The monoallelic expression appeared random and dynamic because there was considerable variation among closely related embryonic cells. Similar patterns of monoallelic expression were observed in mature cells. Our allelic expression analysis also demonstrates the de novo inactivation of the paternal X chromosome. We conclude that independent and stochastic allelic transcription generates abundant random monoallelic expression in the mammalian cell.", journal = "Science", volume = 343, number = 6167, pages = "193--196", month = "10~" # jan, year = 2014 } % The entry below contains non-ASCII chars that could not be converted % to a LaTeX equivalent. @ARTICLE{Macosko2015-ix, title = "Highly Parallel Genome-wide Expression Profiling of Individual Cells Using Nanoliter Droplets", author = "Macosko, Evan Z and Basu, Anindita and Satija, Rahul and Nemesh, James and Shekhar, Karthik and Goldman, Melissa and Tirosh, Itay and Bialas, Allison R and Kamitaki, Nolan and Martersteck, Emily M and Trombetta, John J and Weitz, David A and Sanes, Joshua R and Shalek, Alex K and Regev, Aviv and McCarroll, Steven A", abstract = "Summary Cells, the basic units of biological structure and function, vary broadly in type and state. Single-cell genomics can characterize cell identity and function, but limitations of ease and scale have prevented its broad application. Here we describe Drop-seq, a strategy for quickly profiling thousands of individual cells by separating them into nanoliter-sized aqueous droplets, associating a different barcode with each cell’s RNAs, and sequencing them all together. Drop-seq analyzes mRNA transcripts from thousands of individual cells simultaneously while remembering transcripts’ cell of origin. We analyzed transcriptomes from 44,808 mouse retinal cells and identified 39 transcriptionally distinct cell populations, creating a molecular atlas of gene expression for known retinal cell classes and novel candidate cell subtypes. Drop-seq will accelerate biological discovery by enabling routine transcriptional profiling at single-cell resolution. Video Abstract", journal = "Cell", volume = 161, number = 5, pages = "1202--1214", month = "21~" # may, year = 2015 } @ARTICLE{Xu2015-vf, title = "Identification of cell types from single-cell transcriptomes using a novel clustering method", author = "Xu, Chen and Su, Zhengchang", affiliation = "Department of Bioinformatics and Genomics, University of North Carolina at Charlotte, Charlotte, NC 28223, USA. Department of Bioinformatics and Genomics, University of North Carolina at Charlotte, Charlotte, NC 28223, USA.", abstract = "MOTIVATION: The recent advance of single-cell technologies has brought new insights into complex biological phenomena. In particular, genome-wide single-cell measurements such as transcriptome sequencing enable the characterization of cellular composition as well as functional variation in homogenic cell populations. An important step in the single-cell transcriptome analysis is to group cells that belong to the same cell types based on gene expression patterns. The corresponding computational problem is to cluster a noisy high dimensional dataset with substantially fewer objects (cells) than the number of variables (genes). RESULTS: In this article, we describe a novel algorithm named shared nearest neighbor (SNN)-Cliq that clusters single-cell transcriptomes. SNN-Cliq utilizes the concept of shared nearest neighbor that shows advantages in handling high-dimensional data. When evaluated on a variety of synthetic and real experimental datasets, SNN-Cliq outperformed the state-of-the-art methods tested. More importantly, the clustering results of SNN-Cliq reflect the cell types or origins with high accuracy. AVAILABILITY AND IMPLEMENTATION: The algorithm is implemented in MATLAB and Python. The source code can be downloaded at http://bioinfo.uncc.edu/SNNCliq. CONTACT: zcsu@uncc.edu Supplementary information: Supplementary data are available at Bioinformatics online.", journal = "Bioinformatics", month = "11~" # feb, year = 2015 } @ARTICLE{Zurauskiene2016-kg, title = "pcaReduce: hierarchical clustering of single cell transcriptional profiles", author = "\v{Z}urauskien\.{e}, Justina and Yau, Christopher", affiliation = "Wellcome Trust Centre for Human Genetics, University of Oxford, Roosevelt Drive, Oxford, OX3 7BN, UK. Wellcome Trust Centre for Human Genetics, University of Oxford, Roosevelt Drive, Oxford, OX3 7BN, UK. cyau@well.ox.ac.uk. Department of Statistics, University of Oxford, 1 S. Parks Rd, Oxford, OX1 3TG, UK. cyau@well.ox.ac.uk.", abstract = "BACKGROUND: Advances in single cell genomics provide a way of routinely generating transcriptomics data at the single cell level. A frequent requirement of single cell expression analysis is the identification of novel patterns of heterogeneity across single cells that might explain complex cellular states or tissue composition. To date, classical statistical analysis tools have being routinely applied, but there is considerable scope for the development of novel statistical approaches that are better adapted to the challenges of inferring cellular hierarchies. RESULTS: We have developed a novel agglomerative clustering method that we call pcaReduce to generate a cell state hierarchy where each cluster branch is associated with a principal component of variation that can be used to differentiate two cell states. Using two real single cell datasets, we compared our approach to other commonly used statistical techniques, such as K-means and hierarchical clustering. We found that pcaReduce was able to give more consistent clustering structures when compared to broad and detailed cell type labels. CONCLUSIONS: Our novel integration of principal components analysis and hierarchical clustering establishes a connection between the representation of the expression data and the number of cell types that can be discovered. In doing so we found that pcaReduce performs better than either technique in isolation in terms of characterising putative cell states. Our methodology is complimentary to other single cell clustering techniques and adds to a growing palette of single cell bioinformatics tools for profiling heterogeneous cell populations.", journal = "BMC Bioinformatics", volume = 17, pages = "140", month = "22~" # mar, year = 2016, keywords = "Gene expression; Hierarchical clustering; Single cell RNA-Seq" } @ARTICLE{Guo2015-ok, title = "{SINCERA}: A Pipeline for {Single-Cell} {RNA-Seq} Profiling Analysis", author = "Guo, Minzhe and Wang, Hui and Potter, S Steven and Whitsett, Jeffrey A and Xu, Yan", affiliation = "The Perinatal Institute, Section of Neonatology, Perinatal and Pulmonary Biology, Cincinnati Children's Hospital Medical Center, Cincinnati, Ohio, United States of America. Department of Electrical Engineering and Computing Systems, College of Engineering and Applied Science, University of Cincinnati, Cincinnati, Ohio, United States of America. The Perinatal Institute, Section of Neonatology, Perinatal and Pulmonary Biology, Cincinnati Children's Hospital Medical Center, Cincinnati, Ohio, United States of America. Division of Developmental Biology, Cincinnati Children's Hospital Medical Center, Cincinnati, Ohio, United States of America. The Perinatal Institute, Section of Neonatology, Perinatal and Pulmonary Biology, Cincinnati Children's Hospital Medical Center, Cincinnati, Ohio, United States of America. The Perinatal Institute, Section of Neonatology, Perinatal and Pulmonary Biology, Cincinnati Children's Hospital Medical Center, Cincinnati, Ohio, United States of America. Division of Biomedical Informatics, Cincinnati Children's Hospital Medical Center, Cincinnati, Ohio, United States of America.", abstract = "A major challenge in developmental biology is to understand the genetic and cellular processes/programs driving organ formation and differentiation of the diverse cell types that comprise the embryo. While recent studies using single cell transcriptome analysis illustrate the power to measure and understand cellular heterogeneity in complex biological systems, processing large amounts of RNA-seq data from heterogeneous cell populations creates the need for readily accessible tools for the analysis of single-cell RNA-seq (scRNA-seq) profiles. The present study presents a generally applicable analytic pipeline (SINCERA: a computational pipeline for SINgle CEll RNA-seq profiling Analysis) for processing scRNA-seq data from a whole organ or sorted cells. The pipeline supports the analysis for: 1) the distinction and identification of major cell types; 2) the identification of cell type specific gene signatures; and 3) the determination of driving forces of given cell types. We applied this pipeline to the RNA-seq analysis of single cells isolated from embryonic mouse lung at E16.5. Through the pipeline analysis, we distinguished major cell types of fetal mouse lung, including epithelial, endothelial, smooth muscle, pericyte, and fibroblast-like cell types, and identified cell type specific gene signatures, bioprocesses, and key regulators. SINCERA is implemented in R, licensed under the GNU General Public License v3, and freely available from CCHMC PBGE website, https://research.cchmc.org/pbge/sincera.html.", journal = "PLoS Comput. Biol.", volume = 11, number = 11, pages = "e1004575", month = nov, year = 2015 } @UNPUBLISHED{Kiselev2016-bq, title = "{SC3} - consensus clustering of single-cell {RNA-Seq} data", author = "Kiselev, Vladimir Yu and Kirschner, Kristina and Schaub, Michael T and Andrews, Tallulah and Chandra, Tamir and Natarajan, Kedar N and Reik, Wolf and Barahona, Mauricio and Green, Anthony R and Hemberg, Martin", abstract = "Using single-cell RNA-seq (scRNA-seq), the full transcriptome of individual cells can be acquired, enabling a quantitative cell-type characterisation based on expression profiles. Due to the large variability in gene expression, assigning cells into groups based on the transcriptome remains challenging. We present Single-Cell Consensus Clustering (SC3), a tool for unsupervised clustering of scRNA-seq data. SC3 achieves high accuracy and robustness by consistently integrating different clustering solutions through a consensus approach. Tests on nine published datasets show that SC3 outperforms 4 existing methods, while remaining scalable for large datasets, as shown by the analysis of a dataset containing 44,808 cells. Moreover, an interactive graphical implementation makes SC3 accessible to a wide audience of users, and SC3 also aids biological interpretation by identifying marker genes, differentially expressed genes and outlier cells. We illustrate the capabilities of SC3 by characterising newly obtained transcriptomes from subclones of neoplastic cells collected from patients.", journal = "bioRxiv", pages = "036558", month = "1~" # jan, year = 2016, language = "en" } @ARTICLE{Tang2009-bu, title = "{mRNA-Seq} whole-transcriptome analysis of a single cell", author = "Tang, Fuchou and Barbacioru, Catalin and Wang, Yangzhou and Nordman, Ellen and Lee, Clarence and Xu, Nanlan and Wang, Xiaohui and Bodeau, John and Tuch, Brian B and Siddiqui, Asim and Lao, Kaiqin and Surani, M Azim", affiliation = "Wellcome Trust-Cancer Research UK Gurdon Institute of Cancer and Developmental Biology, University of Cambridge, Cambridge, UK.", abstract = "Next-generation sequencing technology is a powerful tool for transcriptome analysis. However, under certain conditions, only a small amount of material is available, which requires more sensitive techniques that can preferably be used at the single-cell level. Here we describe a single-cell digital gene expression profiling assay. Using our mRNA-Seq assay with only a single mouse blastomere, we detected the expression of 75\% (5,270) more genes than microarray techniques and identified 1,753 previously unknown splice junctions called by at least 5 reads. Moreover, 8-19\% of the genes with multiple known transcript isoforms expressed at least two isoforms in the same blastomere or oocyte, which unambiguously demonstrated the complexity of the transcript variants at whole-genome scale in individual cells. Finally, for Dicer1(-/-) and Ago2(-/-) (Eif2c2(-/-)) oocytes, we found that 1,696 and 1,553 genes, respectively, were abnormally upregulated compared to wild-type controls, with 619 genes in common.", journal = "Nat. Methods", volume = 6, number = 5, pages = "377--382", month = may, year = 2009 } @ARTICLE{Picelli2013-sb, title = "Smart-seq2 for sensitive full-length transcriptome profiling in single cells", author = "Picelli, Simone and Bj{\"{o}}rklund, \AA{}sa K and Faridani, Omid R and Sagasser, Sven and Winberg, G{\"{o}}sta and Sandberg, Rickard", affiliation = "Ludwig Institute for Cancer Research, Stockholm, Sweden.", abstract = "Single-cell gene expression analyses hold promise for characterizing cellular heterogeneity, but current methods compromise on either the coverage, the sensitivity or the throughput. Here, we introduce Smart-seq2 with improved reverse transcription, template switching and preamplification to increase both yield and length of cDNA libraries generated from individual cells. Smart-seq2 transcriptome libraries have improved detection, coverage, bias and accuracy compared to Smart-seq libraries and are generated with off-the-shelf reagents at lower cost.", journal = "Nat. Methods", volume = 10, number = 11, pages = "1096--1098", month = nov, year = 2013 } @ARTICLE{Hashimshony2012-kd, title = "{CEL-Seq}: single-cell {RNA-Seq} by multiplexed linear amplification", author = "Hashimshony, Tamar and Wagner, Florian and Sher, Noa and Yanai, Itai", affiliation = "Department of Biology, Technion-Israel Institute of Technology, Haifa 32000, Israel.", abstract = "High-throughput sequencing has allowed for unprecedented detail in gene expression analyses, yet its efficient application to single cells is challenged by the small starting amounts of RNA. We have developed CEL-Seq, a method for overcoming this limitation by barcoding and pooling samples before linearly amplifying mRNA with the use of one round of in vitro transcription. We show that CEL-Seq gives more reproducible, linear, and sensitive results than a PCR-based amplification method. We demonstrate the power of this method by studying early C. elegans embryonic development at single-cell resolution. Differential distribution of transcripts between sister cells is seen as early as the two-cell stage embryo, and zygotic expression in the somatic cell lineages is enriched for transcription factors. The robust transcriptome quantifications enabled by CEL-Seq will be useful for transcriptomic analyses of complex tissues containing populations of diverse cell types.", journal = "Cell Rep.", volume = 2, number = 3, pages = "666--673", month = "27~" # sep, year = 2012 } % The entry below contains non-ASCII chars that could not be converted % to a LaTeX equivalent. @ARTICLE{Macosko2015-ix, title = "Highly Parallel Genome-wide Expression Profiling of Individual Cells Using Nanoliter Droplets", author = "Macosko, Evan Z and Basu, Anindita and Satija, Rahul and Nemesh, James and Shekhar, Karthik and Goldman, Melissa and Tirosh, Itay and Bialas, Allison R and Kamitaki, Nolan and Martersteck, Emily M and Trombetta, John J and Weitz, David A and Sanes, Joshua R and Shalek, Alex K and Regev, Aviv and McCarroll, Steven A", abstract = "Summary Cells, the basic units of biological structure and function, vary broadly in type and state. Single-cell genomics can characterize cell identity and function, but limitations of ease and scale have prevented its broad application. Here we describe Drop-seq, a strategy for quickly profiling thousands of individual cells by separating them into nanoliter-sized aqueous droplets, associating a different barcode with each cell’s RNAs, and sequencing them all together. Drop-seq analyzes mRNA transcripts from thousands of individual cells simultaneously while remembering transcripts’ cell of origin. We analyzed transcriptomes from 44,808 mouse retinal cells and identified 39 transcriptionally distinct cell populations, creating a molecular atlas of gene expression for known retinal cell classes and novel candidate cell subtypes. Drop-seq will accelerate biological discovery by enabling routine transcriptional profiling at single-cell resolution. Video Abstract", journal = "Cell", volume = 161, number = 5, pages = "1202--1214", month = "21~" # may, year = 2015 } @ARTICLE{Saliba2014-dy, title = "Single-cell {RNA-seq}: advances and future challenges", author = "Saliba, Antoine-Emmanuel and Westermann, Alexander J and Gorski, Stanislaw A and Vogel, J{\"{o}}rg", affiliation = "Institute for Molecular Infection Biology, University of W{\"{u}}rzburg, Josef-Schneider-Stra\ss{}e 2, D-97080 W{\"{u}}rzburg, Germany. Institute for Molecular Infection Biology, University of W{\"{u}}rzburg, Josef-Schneider-Stra\ss{}e 2, D-97080 W{\"{u}}rzburg, Germany. Institute for Molecular Infection Biology, University of W{\"{u}}rzburg, Josef-Schneider-Stra\ss{}e 2, D-97080 W{\"{u}}rzburg, Germany. Institute for Molecular Infection Biology, University of W{\"{u}}rzburg, Josef-Schneider-Stra\ss{}e 2, D-97080 W{\"{u}}rzburg, Germany joerg.vogel@uni-wuerzburg.de.", abstract = "Phenotypically identical cells can dramatically vary with respect to behavior during their lifespan and this variation is reflected in their molecular composition such as the transcriptomic landscape. Single-cell transcriptomics using next-generation transcript sequencing (RNA-seq) is now emerging as a powerful tool to profile cell-to-cell variability on a genomic scale. Its application has already greatly impacted our conceptual understanding of diverse biological processes with broad implications for both basic and clinical research. Different single-cell RNA-seq protocols have been introduced and are reviewed here-each one with its own strengths and current limitations. We further provide an overview of the biological questions single-cell RNA-seq has been used to address, the major findings obtained from such studies, and current challenges and expected future developments in this booming field.", journal = "Nucleic Acids Res.", volume = 42, number = 14, pages = "8845--8860", month = aug, year = 2014 } @ARTICLE{Handley2015-yi, title = "Designing {Cell-Type-Specific} Genome-wide Experiments", author = "Handley, Ava and Schauer, Tam\'{a}s and Ladurner, Andreas G and Margulies, Carla E", affiliation = "Department of Physiological Chemistry, Biomedical Center, Ludwig-Maximilians-University of Munich, Butenandtstrasse 5, 81377 Munich, Germany; International Max Planck Research School for Molecular and Cellular Life Sciences, Am Klopferspitz 18, 82152 Martinsried, Germany. Department of Molecular Biology, Biomedical Center, Ludwig-Maximilians-University of Munich, Schillerstrasse 44, 80336 Munich, Germany. Department of Physiological Chemistry, Biomedical Center, Ludwig-Maximilians-University of Munich, Butenandtstrasse 5, 81377 Munich, Germany; International Max Planck Research School for Molecular and Cellular Life Sciences, Am Klopferspitz 18, 82152 Martinsried, Germany; Center for Integrated Protein Science Munich (CIPSM), 81377 Munich, Germany; Munich Cluster for Systems Neurology (SyNergy), 80336 Munich, Germany. Department of Physiological Chemistry, Biomedical Center, Ludwig-Maximilians-University of Munich, Butenandtstrasse 5, 81377 Munich, Germany. Electronic address: carla.margulies@med.lmu.de.", abstract = "Multicellular organisms depend on cell-type-specific division of labor for survival. Specific cell types have their unique developmental program and respond differently to environmental challenges, yet are orchestrated by the same genetic blueprint. A key challenge in biology is thus to understand how genes are expressed in the right place, at the right time, and to the right level. Further, this exquisite control of gene expression is perturbed in many diseases. As a consequence, coordinated physiological responses to the environment are compromised. Recently, innovative tools have been developed that are able to capture genome-wide gene expression using cell-type-specific approaches. These novel techniques allow us to understand gene regulation in vivo with unprecedented resolution and give us mechanistic insights into how multicellular organisms adapt to changing environments. In this article, we discuss the considerations needed when designing your own cell-type-specific experiment from the isolation of your starting material through selecting the appropriate controls and validating the data.", journal = "Mol. Cell", volume = 58, number = 4, pages = "621--631", month = "21~" # may, year = 2015 } @ARTICLE{Kolodziejczyk2015-xy, title = "The Technology and Biology of {Single-Cell} {RNA} Sequencing", author = "Kolodziejczyk, Aleksandra A and Kim, Jong Kyoung and Svensson, Valentine and Marioni, John C and Teichmann, Sarah A", affiliation = "European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK; Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SA, UK. European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK. European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK. European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK; Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SA, UK. European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SD, UK; Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge CB10 1SA, UK. Electronic address: saraht@ebi.ac.uk.", abstract = "The differences between individual cells can have profound functional consequences, in both unicellular and multicellular organisms. Recently developed single-cell mRNA-sequencing methods enable unbiased, high-throughput, and high-resolution transcriptomic analysis of individual cells. This provides an additional dimension to transcriptomic information relative to traditional methods that profile bulk populations of cells. Already, single-cell RNA-sequencing methods have revealed new biology in terms of the composition of tissues, the dynamics of transcription, and the regulatory relationships between genes. Rapid technological developments at the level of cell capture, phenotyping, molecular biology, and bioinformatics promise an exciting future with numerous biological and medical applications.", journal = "Mol. Cell", volume = 58, number = 4, pages = "610--620", month = "21~" # may, year = 2015 } @ARTICLE{Kharchenko2014-ts, title = "Bayesian approach to single-cell differential expression analysis", author = "Kharchenko, Peter V and Silberstein, Lev and Scadden, David T", affiliation = "1] Center for Biomedical Informatics, Harvard Medical School, Boston, Massachusetts, USA. [2] Hematology/Oncology Program, Children's Hospital, Boston, Massachusetts, USA. [3] Harvard Stem Cell Institute, Cambridge, Massachusetts, USA. 1] Harvard Stem Cell Institute, Cambridge, Massachusetts, USA. [2] Center for Regenerative Medicine, Massachusetts General Hospital, Boston, Massachusetts, USA. [3] Department of Stem Cell and Regenerative Biology, Harvard University, Cambridge, Massachusetts, USA. 1] Harvard Stem Cell Institute, Cambridge, Massachusetts, USA. [2] Center for Regenerative Medicine, Massachusetts General Hospital, Boston, Massachusetts, USA. [3] Department of Stem Cell and Regenerative Biology, Harvard University, Cambridge, Massachusetts, USA.", abstract = "Single-cell data provide a means to dissect the composition of complex tissues and specialized cellular environments. However, the analysis of such measurements is complicated by high levels of technical noise and intrinsic biological variability. We describe a probabilistic model of expression-magnitude distortions typical of single-cell RNA-sequencing measurements, which enables detection of differential expression signatures and identification of subpopulations of cells in a way that is more tolerant of noise.", journal = "Nat. Methods", volume = 11, number = 7, pages = "740--742", month = jul, year = 2014 } @ARTICLE{Jiang2011-mu, title = "Synthetic spike-in standards for {RNA-seq} experiments", author = "Jiang, Lichun and Schlesinger, Felix and Davis, Carrie A and Zhang, Yu and Li, Renhua and Salit, Marc and Gingeras, Thomas R and Oliver, Brian", affiliation = "Section of Developmental Genomics, Laboratory of Cellular and Developmental Biology, National Institute of Diabetes and Digestive and Kidney Diseases, National Institutes of Health, Bethesda, MD 20892, USA.", abstract = "High-throughput sequencing of cDNA (RNA-seq) is a widely deployed transcriptome profiling and annotation technique, but questions about the performance of different protocols and platforms remain. We used a newly developed pool of 96 synthetic RNAs with various lengths, and GC content covering a 2(20) concentration range as spike-in controls to measure sensitivity, accuracy, and biases in RNA-seq experiments as well as to derive standard curves for quantifying the abundance of transcripts. We observed linearity between read density and RNA input over the entire detection range and excellent agreement between replicates, but we observed significantly larger imprecision than expected under pure Poisson sampling errors. We use the control RNAs to directly measure reproducible protocol-dependent biases due to GC content and transcript length as well as stereotypic heterogeneity in coverage across transcripts correlated with position relative to RNA termini and priming sequence bias. These effects lead to biased quantification for short transcripts and individual exons, which is a serious problem for measurements of isoform abundances, but that can partially be corrected using appropriate models of bias. By using the control RNAs, we derive limits for the discovery and detection of rare transcripts in RNA-seq experiments. By using data collected as part of the model organism and human Encyclopedia of DNA Elements projects (ENCODE and modENCODE), we demonstrate that external RNA controls are a useful resource for evaluating sensitivity and accuracy of RNA-seq experiments for transcriptome discovery and quantification. These quality metrics facilitate comparable analysis across different samples, protocols, and platforms.", journal = "Genome Res.", volume = 21, number = 9, pages = "1543--1551", month = sep, year = 2011 } @ARTICLE{Kivioja2012-yt, title = "Counting absolute numbers of molecules using unique molecular identifiers", author = "Kivioja, Teemu and V{\"{a}}h{\"{a}}rautio, Anna and Karlsson, Kasper and Bonke, Martin and Enge, Martin and Linnarsson, Sten and Taipale, Jussi", affiliation = "Genome-Scale Biology Program, Institute of Biomedicine, University of Helsinki, Helsinki, Finland.", abstract = "Counting individual RNA or DNA molecules is difficult because they are hard to copy quantitatively for detection. To overcome this limitation, we applied unique molecular identifiers (UMIs), which make each molecule in a population distinct, to genome-scale human karyotyping and mRNA sequencing in Drosophila melanogaster. Use of this method can improve accuracy of almost any next-generation sequencing method, including chromatin immunoprecipitation-sequencing, genome assembly, diagnostics and manufacturing-process control and monitoring.", journal = "Nat. Methods", volume = 9, number = 1, pages = "72--74", month = jan, year = 2012 } @ARTICLE{Stegle2015-uv, title = "Computational and analytical challenges in single-cell transcriptomics", author = "Stegle, Oliver and Teichmann, Sarah A and Marioni, John C", affiliation = "European Molecular Biology Laboratory European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge, CB10 1SD, UK. 1] European Molecular Biology Laboratory European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge, CB10 1SD, UK. [2] Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge, CB10 1SA, UK. 1] European Molecular Biology Laboratory European Bioinformatics Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge, CB10 1SD, UK. [2] Wellcome Trust Sanger Institute, Wellcome Trust Genome Campus, Hinxton, Cambridge, CB10 1SA, UK.", abstract = "The development of high-throughput RNA sequencing (RNA-seq) at the single-cell level has already led to profound new discoveries in biology, ranging from the identification of novel cell types to the study of global patterns of stochastic gene expression. Alongside the technological breakthroughs that have facilitated the large-scale generation of single-cell transcriptomic data, it is important to consider the specific computational and analytical challenges that still have to be overcome. Although some tools for analysing RNA-seq data from bulk cell populations can be readily applied to single-cell RNA-seq data, many new computational strategies are required to fully exploit this data type and to enable a comprehensive yet detailed study of gene expression at the single-cell level.", journal = "Nat. Rev. Genet.", volume = 16, number = 3, pages = "133--145", month = mar, year = 2015 } @ARTICLE{Levine2015-fk, title = "{Data-Driven} Phenotypic Dissection of {AML} Reveals Progenitor-like Cells that Correlate with Prognosis", author = "Levine, Jacob H and Simonds, Erin F and Bendall, Sean C and Davis, Kara L and Amir, El-Ad D and Tadmor, Michelle D and Litvin, Oren and Fienberg, Harris G and Jager, Astraea and Zunder, Eli R and Finck, Rachel and Gedman, Amanda L and Radtke, Ina and Downing, James R and Pe'er, Dana and Nolan, Garry P", affiliation = "Departments of Biological Sciences and Systems Biology, Columbia University, New York, NY 10027, USA. Baxter Laboratory in Stem Cell Biology, Department of Microbiology and Immunology, Stanford University, Stanford, CA 94305, USA. Department of Pathology, Stanford University, Stanford, CA 94305, USA. Baxter Laboratory in Stem Cell Biology, Department of Microbiology and Immunology, Stanford University, Stanford, CA 94305, USA. Departments of Biological Sciences and Systems Biology, Columbia University, New York, NY 10027, USA. Departments of Biological Sciences and Systems Biology, Columbia University, New York, NY 10027, USA. Departments of Biological Sciences and Systems Biology, Columbia University, New York, NY 10027, USA. Baxter Laboratory in Stem Cell Biology, Department of Microbiology and Immunology, Stanford University, Stanford, CA 94305, USA. Baxter Laboratory in Stem Cell Biology, Department of Microbiology and Immunology, Stanford University, Stanford, CA 94305, USA. Baxter Laboratory in Stem Cell Biology, Department of Microbiology and Immunology, Stanford University, Stanford, CA 94305, USA. Baxter Laboratory in Stem Cell Biology, Department of Microbiology and Immunology, Stanford University, Stanford, CA 94305, USA. Department of Pathology, St. Jude Children's Research Hospital, 262 Danny Thomas Place, Memphis, TN 38105, USA. Department of Pathology, St. Jude Children's Research Hospital, 262 Danny Thomas Place, Memphis, TN 38105, USA. Department of Pathology, St. Jude Children's Research Hospital, 262 Danny Thomas Place, Memphis, TN 38105, USA. Departments of Biological Sciences and Systems Biology, Columbia University, New York, NY 10027, USA. Electronic address: dpeer@biology.columbia.edu. Baxter Laboratory in Stem Cell Biology, Department of Microbiology and Immunology, Stanford University, Stanford, CA 94305, USA. Electronic address: gnolan@stanford.edu.", abstract = "Acute myeloid leukemia (AML) manifests as phenotypically and functionally diverse cells, often within the same patient. Intratumor phenotypic and functional heterogeneity have been linked primarily by physical sorting experiments, which assume that functionally distinct subpopulations can be prospectively isolated by surface phenotypes. This assumption has proven problematic, and we therefore developed a data-driven approach. Using mass cytometry, we profiled surface and intracellular signaling proteins simultaneously in millions of healthy and leukemic cells. We developed PhenoGraph, which algorithmically defines phenotypes in high-dimensional single-cell data. PhenoGraph revealed that the surface phenotypes of leukemic blasts do not necessarily reflect their intracellular state. Using hematopoietic progenitors, we defined a signaling-based measure of cellular phenotype, which led to isolation of a gene expression signature that was predictive of survival in independent cohorts. This study presents new methods for large-scale analysis of single-cell heterogeneity and demonstrates their utility, yielding insights into AML pathophysiology.", journal = "Cell", volume = 162, number = 1, pages = "184--197", month = "2~" # jul, year = 2015, language = "en" } @ARTICLE{Tung2017-ba, title = "Batch effects and the effective design of single-cell gene expression studies", author = "Tung, Po-Yuan and Blischak, John D and Hsiao, Chiaowen Joyce and Knowles, David A and Burnett, Jonathan E and Pritchard, Jonathan K and Gilad, Yoav", affiliation = "Department of Human Genetics, University of Chicago, Chicago, Illinois, USA. Department of Human Genetics, University of Chicago, Chicago, Illinois, USA. Committee on Genetics, Genomics, and Systems Biology, University of Chicago, Chicago, Illinois, USA. Department of Human Genetics, University of Chicago, Chicago, Illinois, USA. Department of Genetics, Stanford University, Stanford, CA, USA. Department of Radiology, Stanford University, Stanford, CA, USA. Department of Human Genetics, University of Chicago, Chicago, Illinois, USA. Department of Genetics, Stanford University, Stanford, CA, USA. Department of Biology, Stanford University, Stanford, CA, USA. Howard Hughes Medical Institute, Stanford University, CA, USA. Department of Human Genetics, University of Chicago, Chicago, Illinois, USA. Department of Medicine, University of Chicago, Chicago, Illinois, USA.", abstract = "Single-cell RNA sequencing (scRNA-seq) can be used to characterize variation in gene expression levels at high resolution. However, the sources of experimental noise in scRNA-seq are not yet well understood. We investigated the technical variation associated with sample processing using the single-cell Fluidigm C1 platform. To do so, we processed three C1 replicates from three human induced pluripotent stem cell (iPSC) lines. We added unique molecular identifiers (UMIs) to all samples, to account for amplification bias. We found that the major source of variation in the gene expression data was driven by genotype, but we also observed substantial variation between the technical replicates. We observed that the conversion of reads to molecules using the UMIs was impacted by both biological and technical variation, indicating that UMI counts are not an unbiased estimator of gene expression levels. Based on our results, we suggest a framework for effective scRNA-seq studies.", journal = "Sci. Rep.", volume = 7, pages = "39921", month = "3~" # jan, year = 2017, language = "en" } @ARTICLE{Archer2016-zq, title = "Modeling Enzyme Processivity Reveals that {RNA-Seq} Libraries Are Biased in Characteristic and Correctable Ways", author = "Archer, Nathan and Walsh, Mark D and Shahrezaei, Vahid and Hebenstreit, Daniel", affiliation = "School of Life Sciences, University of Warwick, Coventry CV4 7AL, UK. School of Life Sciences, University of Warwick, Coventry CV4 7AL, UK. Department of Mathematics, Imperial College, London SW7 2AZ, UK. Electronic address: v.shahrezaei@imperial.ac.uk. School of Life Sciences, University of Warwick, Coventry CV4 7AL, UK. Electronic address: d.hebenstreit@warwick.ac.uk.", abstract = "Experimental procedures for preparing RNA-seq and single-cell (sc) RNA-seq libraries are based on assumptions regarding their underlying enzymatic reactions. Here, we show that the fairness of these assumptions varies within libraries: coverage by sequencing reads along and between transcripts exhibits characteristic, protocol-dependent biases. To understand the mechanistic basis of this bias, we present an integrated modeling framework that infers the relationship between enzyme reactions during library preparation and the characteristic coverage patterns observed for different protocols. Analysis of new and existing (sc)RNA-seq data from six different library preparation protocols reveals that polymerase processivity is the mechanistic origin of coverage biases. We apply our framework to demonstrate that lowering incubation temperature increases processivity, yield, and (sc)RNA-seq sensitivity in all protocols. We also provide correction factors based on our model for increasing accuracy of transcript quantification in existing samples prepared at standard temperatures. In total, our findings improve our ability to accurately reflect in vivo transcript abundances in (sc)RNA-seq libraries.", journal = "Cell Syst", volume = 3, number = 5, pages = "467--479.e12", month = "23~" # nov, year = 2016, keywords = "Bayesian framework; Markov Chain Monte Carlo; RNA-seq; bias; coverage; enzyme; mathematical modeling; polymerase; processivity; reverse transcriptase", language = "en" } @ARTICLE{Ziegenhain2017-cu, title = "Comparative Analysis of {Single-Cell} {RNA} Sequencing Methods", author = "Ziegenhain, Christoph and Vieth, Beate and Parekh, Swati and Reinius, Bj{\"o}rn and Guillaumet-Adkins, Amy and Smets, Martha and Leonhardt, Heinrich and Heyn, Holger and Hellmann, Ines and Enard, Wolfgang", affiliation = "Anthropology \& Human Genomics, Department of Biology II, Ludwig-Maximilians University, Gro{\ss}haderner Stra{\ss}e 2, 82152 Martinsried, Germany. Anthropology \& Human Genomics, Department of Biology II, Ludwig-Maximilians University, Gro{\ss}haderner Stra{\ss}e 2, 82152 Martinsried, Germany. Anthropology \& Human Genomics, Department of Biology II, Ludwig-Maximilians University, Gro{\ss}haderner Stra{\ss}e 2, 82152 Martinsried, Germany. Ludwig Institute for Cancer Research, Box 240, 171 77 Stockholm, Sweden; Department of Cell and Molecular Biology, Karolinska Institutet, 171 77 Stockholm, Sweden. CNAG-CRG, Centre for Genomic Regulation (CRG), Barcelona Institute of Science and Technology (BIST), 08028 Barcelona, Spain; Universitat Pompeu Fabra (UPF), 08002 Barcelona, Spain. Department of Biology II and Center for Integrated Protein Science Munich (CIPSM), Ludwig-Maximilians University, Gro{\ss}haderner Stra{\ss}e 2, 82152 Martinsried, Germany. Department of Biology II and Center for Integrated Protein Science Munich (CIPSM), Ludwig-Maximilians University, Gro{\ss}haderner Stra{\ss}e 2, 82152 Martinsried, Germany. CNAG-CRG, Centre for Genomic Regulation (CRG), Barcelona Institute of Science and Technology (BIST), 08028 Barcelona, Spain; Universitat Pompeu Fabra (UPF), 08002 Barcelona, Spain. Anthropology \& Human Genomics, Department of Biology II, Ludwig-Maximilians University, Gro{\ss}haderner Stra{\ss}e 2, 82152 Martinsried, Germany. Anthropology \& Human Genomics, Department of Biology II, Ludwig-Maximilians University, Gro{\ss}haderner Stra{\ss}e 2, 82152 Martinsried, Germany. Electronic address: enard@bio.lmu.de.", abstract = "Single-cell RNA sequencing (scRNA-seq) offers new possibilities to address biological and medical questions. However, systematic comparisons of the performance of diverse scRNA-seq protocols are lacking. We generated data from 583 mouse embryonic stem cells to evaluate six prominent scRNA-seq methods: CEL-seq2, Drop-seq, MARS-seq, SCRB-seq, Smart-seq, and Smart-seq2. While Smart-seq2 detected the most genes per cell and across cells, CEL-seq2, Drop-seq, MARS-seq, and SCRB-seq quantified mRNA levels with less amplification noise due to the use of unique molecular identifiers (UMIs). Power simulations at different sequencing depths showed that Drop-seq is more cost-efficient for transcriptome quantification of large numbers of cells, while MARS-seq, SCRB-seq, and Smart-seq2 are more efficient when analyzing fewer cells. Our quantitative comparison offers the basis for an informed choice among six prominent scRNA-seq methods, and it provides a framework for benchmarking further improvements of scRNA-seq protocols.", journal = "Mol. Cell", volume = 65, number = 4, pages = "631--643.e4", month = "16~" # feb, year = 2017, keywords = "cost-effectiveness; method comparison; power analysis; simulation; single-cell RNA-seq; transcriptomics", language = "en" } @ARTICLE{Welch2016-jr, title = "{SLICER: inferring branched, nonlinear cellular trajectories from single cell RNA-seq data}", author = "Welch, Joshua D and Hartemink, Alexander J and Prins, Jan F", affiliation = "Department of Computer Science, University of North Carolina at Chapel Hill, Chapel Hill, NC, 27599, USA. Curriculum in Bioinformatics and Computational Biology, University of North Carolina at Chapel Hill, Chapel Hill, NC, 27599, USA. Department of Computer Science, Duke University, Durham, NC, 27708, USA. Program in Computational Biology and Bioinformatics, Duke University, Durham, NC, 27708, USA. Department of Computer Science, University of North Carolina at Chapel Hill, Chapel Hill, NC, 27599, USA. prins@cs.unc.edu. Curriculum in Bioinformatics and Computational Biology, University of North Carolina at Chapel Hill, Chapel Hill, NC, 27599, USA. prins@cs.unc.edu.", abstract = "Single cell experiments provide an unprecedented opportunity to reconstruct a sequence of changes in a biological process from individual ``snapshots'' of cells. However, nonlinear gene expression changes, genes unrelated to the process, and the possibility of branching trajectories make this a challenging problem. We develop SLICER (Selective Locally Linear Inference of Cellular Expression Relationships) to address these challenges. SLICER can infer highly nonlinear trajectories, select genes without prior knowledge of the process, and automatically determine the location and number of branches and loops. SLICER recovers the ordering of points along simulated trajectories more accurately than existing methods. We demonstrate the effectiveness of SLICER on previously published data from mouse lung cells and neural stem cells.", journal = "Genome biology", volume = 17, number = 1, pages = "106", month = "23~" # may, year = 2016, url = "http://dx.doi.org/10.1186/s13059-016-0975-3", keywords = "Manifold learning; Single cell RNA-seq; Time series", language = "en", issn = "1465-6906", pmid = "27215581", doi = "10.1186/s13059-016-0975-3", pmc = "PMC4877799" } @ARTICLE{Cannoodt2016-uj, title = "{Computational methods for trajectory inference from single-cell transcriptomics}", author = "Cannoodt, Robrecht and Saelens, Wouter and Saeys, Yvan", affiliation = "Data Mining and Modelling for Biomedicine group, VIB Inflammation Research Center, Ghent, Belgium. Department of Internal Medicine, Ghent University, Ghent, Belgium. Center for Medical Genetics, Ghent University, Ghent, Belgium. Cancer Research Institute Ghent (CRIG), Ghent, Belgium. Data Mining and Modelling for Biomedicine group, VIB Inflammation Research Center, Ghent, Belgium. Department of Internal Medicine, Ghent University, Ghent, Belgium. Data Mining and Modelling for Biomedicine group, VIB Inflammation Research Center, Ghent, Belgium. yvan.saeys@ugent.be. Department of Internal Medicine, Ghent University, Ghent, Belgium. yvan.saeys@ugent.be.", abstract = "Recent developments in single-cell transcriptomics have opened new opportunities for studying dynamic processes in immunology in a high throughput and unbiased manner. Starting from a mixture of cells in different stages of a developmental process, unsupervised trajectory inference algorithms aim to automatically reconstruct the underlying developmental path that cells are following. In this review, we break down the strategies used by this novel class of methods, and organize their components into a common framework, highlighting several practical advantages and disadvantages of the individual methods. We also give an overview of new insights these methods have already providedregarding the wiring and gene regulation of cell differentiation. As the trajectory inference field is still in its infancy, we propose several future developments that will ultimately lead to a global and data-driven way of studying immune cell differentiation.", journal = "European journal of immunology", volume = 46, number = 11, pages = "2496--2506", month = nov, year = 2016, url = "http://dx.doi.org/10.1002/eji.201646347", keywords = "Bioinformatics; Cell differentiation; Single-cell transcriptomics", language = "en", issn = "0014-2980, 1521-4141", pmid = "27682842", doi = "10.1002/eji.201646347" } @ARTICLE{Pollen2014-cu, title = "Low-coverage single-cell {mRNA} sequencing reveals cellular heterogeneity and activated signaling pathways in developing cerebral cortex", author = "Pollen, Alex A and Nowakowski, Tomasz J and Shuga, Joe and Wang, Xiaohui and Leyrat, Anne A and Lui, Jan H and Li, Nianzhen and Szpankowski, Lukasz and Fowler, Brian and Chen, Peilin and Ramalingam, Naveen and Sun, Gang and Thu, Myo and Norris, Michael and Lebofsky, Ronald and Toppani, Dominique and Kemp, 2nd, Darnell W and Wong, Michael and Clerkson, Barry and Jones, Brittnee N and Wu, Shiquan and Knutsson, Lawrence and Alvarado, Beatriz and Wang, Jing and Weaver, Lesley S and May, Andrew P and Jones, Robert C and Unger, Marc A and Kriegstein, Arnold R and West, Jay A A", affiliation = "1] Eli and Edythe Broad Center of Regeneration Medicine and Stem Cell Research, University of California, San Francisco, San Francisco, California, USA. [2] Department of Neurology, University of California, San Francisco, San Francisco, California, USA. [3]. 1] Eli and Edythe Broad Center of Regeneration Medicine and Stem Cell Research, University of California, San Francisco, San Francisco, California, USA. [2] Department of Neurology, University of California, San Francisco, San Francisco, California, USA. [3]. 1] Fluidigm Corporation, South San Francisco, California, USA. [2]. 1] Fluidigm Corporation, South San Francisco, California, USA. [2]. Fluidigm Corporation, South San Francisco, California, USA. 1] Eli and Edythe Broad Center of Regeneration Medicine and Stem Cell Research, University of California, San Francisco, San Francisco, California, USA. [2] Department of Neurology, University of California, San Francisco, San Francisco, California, USA. Fluidigm Corporation, South San Francisco, California, USA. Fluidigm Corporation, South San Francisco, California, USA. Fluidigm Corporation, South San Francisco, California, USA. Fluidigm Corporation, South San Francisco, California, USA. Fluidigm Corporation, South San Francisco, California, USA. Fluidigm Corporation, South San Francisco, California, USA. Fluidigm Corporation, South San Francisco, California, USA. Fluidigm Corporation, South San Francisco, California, USA. Fluidigm Corporation, South San Francisco, California, USA. Fluidigm Corporation, South San Francisco, California, USA. Fluidigm Corporation, South San Francisco, California, USA. Fluidigm Corporation, South San Francisco, California, USA. Fluidigm Corporation, South San Francisco, California, USA. Fluidigm Corporation, South San Francisco, California, USA. Fluidigm Corporation, South San Francisco, California, USA. Fluidigm Corporation, South San Francisco, California, USA. Fluidigm Corporation, South San Francisco, California, USA. Fluidigm Corporation, South San Francisco, California, USA. Fluidigm Corporation, South San Francisco, California, USA. Fluidigm Corporation, South San Francisco, California, USA. Fluidigm Corporation, South San Francisco, California, USA. Fluidigm Corporation, South San Francisco, California, USA. 1] Eli and Edythe Broad Center of Regeneration Medicine and Stem Cell Research, University of California, San Francisco, San Francisco, California, USA. [2] Department of Neurology, University of California, San Francisco, San Francisco, California, USA. Fluidigm Corporation, South San Francisco, California, USA.", abstract = "Large-scale surveys of single-cell gene expression have the potential to reveal rare cell populations and lineage relationships but require efficient methods for cell capture and mRNA sequencing. Although cellular barcoding strategies allow parallel sequencing of single cells at ultra-low depths, the limitations of shallow sequencing have not been investigated directly. By capturing 301 single cells from 11 populations using microfluidics and analyzing single-cell transcriptomes across downsampled sequencing depths, we demonstrate that shallow single-cell mRNA sequencing (~50,000 reads per cell) is sufficient for unbiased cell-type classification and biomarker identification. In the developing cortex, we identify diverse cell types, including multiple progenitor and neuronal subtypes, and we identify EGR1 and FOS as previously unreported candidate targets of Notch signaling in human but not mouse radial glia. Our strategy establishes an efficient method for unbiased analysis and comparison of cell populations from heterogeneous tissue by microfluidic single-cell capture and low-coverage sequencing of many cells.", journal = "Nat. Biotechnol.", volume = 32, number = 10, pages = "1053--1058", month = oct, year = 2014 } @ARTICLE{McCarthy2017-kb, title = "Scater: pre-processing, quality control, normalization and visualization of single-cell {RNA-seq} data in {R}", author = "McCarthy, Davis J and Campbell, Kieran R and Lun, Aaron T L and Wills, Quin F", affiliation = "European Molecular Biology Laboratory, European Bioinformatics Institute, Wellcome Genome Campus, CB10 1SD Hinxton, Cambridge, UK. Wellcome Trust Centre for Human Genetics, University of Oxford, Oxford OX3 7BN, UK. St Vincent's Institute of Medical Research, Fitzroy, Victoria 3065, Australia. Wellcome Trust Centre for Human Genetics, University of Oxford, Oxford OX3 7BN, UK. Department of Physiology, Anatomy and Genetics, University of Oxford, Oxford OX1 3QX, UK. CRUK Cambridge Institute, University of Cambridge, Cambridge CB2 0RE, UK. Wellcome Trust Centre for Human Genetics, University of Oxford, Oxford OX3 7BN, UK. Weatherall Institute for Molecular Medicine, University of Oxford, John Radcliffe Hospital, Oxford OX3 9DS, UK.", abstract = "MOTIVATION: Single-cell RNA sequencing (scRNA-seq) is increasingly used to study gene expression at the level of individual cells. However, preparing raw sequence data for further analysis is not a straightforward process. Biases, artifacts and other sources of unwanted variation are present in the data, requiring substantial time and effort to be spent on pre-processing, quality control (QC) and normalization. RESULTS: We have developed the R/Bioconductor package scater to facilitate rigorous pre-processing, quality control, normalization and visualization of scRNA-seq data. The package provides a convenient, flexible workflow to process raw sequencing reads into a high-quality expression dataset ready for downstream analysis. scater provides a rich suite of plotting tools for single-cell data and a flexible data structure that is compatible with existing tools and can be used as infrastructure for future software development. AVAILABILITY AND IMPLEMENTATION: The open-source code, along with installation instructions, vignettes and case studies, is available through Bioconductor at http://bioconductor.org/packages/scater CONTACT: davis@ebi.ac.ukSupplementary information: Supplementary data are available at Bioinformatics online.", journal = "Bioinformatics", month = "14~" # jan, year = 2017, language = "en" } @ARTICLE{Svensson2017-op, title = "Power analysis of single-cell {RNA-sequencing} experiments", author = "Svensson, Valentine and Natarajan, Kedar Nath and Ly, Lam-Ha and Miragaia, Ricardo J and Labalette, Charlotte and Macaulay, Iain C and Cvejic, Ana and Teichmann, Sarah A", affiliation = "European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Hinxton, Cambridge, UK. Wellcome Trust Sanger Institute, Hinxton, Cambridge, UK. European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Hinxton, Cambridge, UK. Wellcome Trust Sanger Institute, Hinxton, Cambridge, UK. Wellcome Trust Sanger Institute, Hinxton, Cambridge, UK. Wellcome Trust Sanger Institute, Hinxton, Cambridge, UK. Centre of Biological Engineering, University of Minho, Braga, Portugal. Wellcome Trust Sanger Institute, Hinxton, Cambridge, UK. Wellcome Trust-Medical Research Council Cambridge Stem Cell Institute, Cambridge, UK. Department of Haematology, University of Cambridge, Cambridge, UK. Wellcome Trust Sanger Institute, Hinxton, Cambridge, UK. Wellcome Trust Sanger Institute, Hinxton, Cambridge, UK. Wellcome Trust-Medical Research Council Cambridge Stem Cell Institute, Cambridge, UK. Department of Haematology, University of Cambridge, Cambridge, UK. European Molecular Biology Laboratory, European Bioinformatics Institute (EMBL-EBI), Hinxton, Cambridge, UK. Wellcome Trust Sanger Institute, Hinxton, Cambridge, UK.", abstract = "Single-cell RNA sequencing (scRNA-seq) has become an established and powerful method to investigate transcriptomic cell-to-cell variation, thereby revealing new cell types and providing insights into developmental processes and transcriptional stochasticity. A key question is how the variety of available protocols compare in terms of their ability to detect and accurately quantify gene expression. Here, we assessed the protocol sensitivity and accuracy of many published data sets, on the basis of spike-in standards and uniform data processing. For our workflow, we developed a flexible tool for counting the number of unique molecular identifiers (https://github.com/vals/umis/). We compared 15 protocols computationally and 4 protocols experimentally for batch-matched cell populations, in addition to investigating the effects of spike-in molecular degradation. Our analysis provides an integrated framework for comparing scRNA-seq protocols.", journal = "Nat. Methods", month = "6~" # mar, year = 2017, language = "en" }