YeoLab · olgabot · May 28, 2015 · May 22, 2015 · May 22, 2015 · May 22, 2015
diff --git a/.travis.yml b/.travis.yml
@@ -17,6 +17,7 @@ install:
 - source activate testenv
 - conda install --yes --file conda_requirements.txt
 - pip install -r requirements.txt
+- pip install coverage
 - pip install coveralls
 before_script:
 - git config --global user.email "olga.botvinnik@gmail.com"

diff --git a/Makefile b/Makefile
@@ -7,7 +7,7 @@ test:
 
 coverage:
 	cp testing/matplotlibrc .
-	py.test --durations=20 --cov flotilla --cov-report term-missing flotilla/test/
+	coverage run --source flotilla --omit=test --module py.test
 	rm matplotlibrc
 
 lint:

diff --git a/README.rst b/README.rst
@@ -1,115 +1,38 @@
-|Build Status|\ |Coverage Status|
-
 flotilla
 ========
 
-.. figure:: flotilla.png
-   :alt: flotilla Logo
-
-   flotilla Logo
-Installation instructions
-=========================
-
-From a clean install of Mavericks 10.9.4, follow these steps.
-
-All others must fend for themselves to install matplotlib, scipy and
-their third-party dependencies.
-
-*This part only needs to be done once*
-
--  `Install anaconda <https://store.continuum.io/cshop/anaconda/>`__
--  `Install Xcode (this can take an
-   hour) <https://itunes.apple.com/us/app/xcode/id497799835?mt=12>`__
--  Open Xcode and agree to terms and services (it is very important to
-   read them thoroughly)
--  Install `homebrew <http://brew.sh/>`__
-
-   ``ruby -e "$(curl -fsSL https://raw.github.com/Homebrew/homebrew/go/install)"``
-
--  Install freetype:
-
-   ``brew install freetype``
-
--  Install heavy packages (this can take an hour or more)
-
-::
-
-    conda install pip numpy scipy cython matplotlib nose six scikit-learn ipython networkx pandas tornado statsmodels setuptools pytest pyzmq jinja2 pyyaml`
-
--  Create a virtual environment
-   ``conda create -n flotilla_env pip numpy scipy cython matplotlib nose six scikit-learn ipython networkx pandas tornado statsmodels setuptools pytest pyzmq jinja2 pyyaml```
-
--  Switch to virtual environment
-
-   ``source activate flotilla_env``
-
--  Install flotilla and its dependencies (this can take a few minutes):
-
-   ``pip install git+https://github.com/YeoLab/flotilla.git``
-
--  Create a scratch space for your work
-
-   ``mkdir ~/flotilla_scratch``
-
--  Make a place to store flotilla projects
-
-   ``mkdir ~/flotilla_projects``
-
--  Go back to the real world
-
-   ``source deactivate``
-
-Start using flotilla:
-=====================
-
-Use the above instructions to create a flotilla-friendly environment,
-then:
-
--  switch to virtual environment
-
-   ``source activate flotilla_env``
-
--  start an ipython notebook:
-
-   ``ipython notebook --notebook-dir=~/flotilla_scratch``
-
--  create a new notebook by clicking ``New Notebook``
--  rename your notebook from "Untitled" to something more informative by
-   clicking the title panel.
--  load matplotlib backend using every notebook must use this to display
-   inline output
-
-   ``%matplotlib inline``
-
-Test interactive features with example data:
---------------------------------------------
-
-We have prepared a slice of the full dataset for testing and
-demonstration purposes.
-
-Run each of the following code lines in its own ipython notebook cell
-for an interactive feature.
-
-::
-
-    import flotilla
-    test_study = flotilla.embark('http://sauron.ucsd.edu/flotilla_projects/neural_diff_chr22/datapackage.json')
-
-    test_study.interactive_pca()
-
-    test_study.interactive_graph()
-
-    test_study.interactive_classifier()
-
-    test_study.interactive_lavalamp_pooled_inconsistent()
-
-IMPORTANT NOTE: for this test,several failures are expected since the
-test set is small. Adjust parameters to explore valid parameter spaces.
-For example, you can manually select ``all_genes`` as the
-``feature_subset`` from the drop-down menu that appears after running
-these interactive functions.
-
-.. |Build Status| image:: https://travis-ci.org/YeoLab/flotilla.svg?branch=master
-   :target: https://travis-ci.org/YeoLab/flotilla
-.. |Coverage Status| image:: https://img.shields.io/coveralls/YeoLab/flotilla.svg
-   :target: https://coveralls.io/r/YeoLab/flotilla?branch=master
+``flotilla`` is a Python package for visualizing transcriptome (RNA expression) data from hundreds of
+samples. We include utilities to perform common tasks on these large data matrices, including:
+
+* Dimensionality reduction
+* Classification and Regression
+* Outlier detection
+* Network graphs from covariance
+* Hierarchical clustering
+
+And common tasks for biological data including:
+
+* Renaming database features to gene symbols
+* Coloring/marking samples based on experimental phenotype
+* Removing poor-quality samples (technical outliers)
+
+
+Finally, ``flotilla`` is a platform for active collaboration between bioinformatics scientists and 
+traditional "wet lab" scientists. Leveraging `interactive widgets <https://github.com/ipython/ipython/tree/master/examples/Interactive%20Widgets>`_ 
+in the `IPython Notebook <http://ipython.org/notebook.html>`_, 
+we have created tools for simple and streamlined data exploration including:
+
+* Subsetting sample groups and feature (genes/splicing events) groups
+* Dynamically adjusting parameters for analysis
+* Integrating external lists of features from the web or local files
+
+These empower the "wet lab" scientists to ask questions on their own and gives bioniformatics
+scientists a platform and share their analysis tools.
+
+
+What flotilla is **not**
+-----------------------
+
+``flotilla`` is not a genomics pipeline. We expect that you have already generated
+data tables for gene expression, isoform expression and metadata. ``flotilla`` only makes 
+it easy to integrate all those data parts together once you have the pieces.
diff --git a/doc/releases/v0.2.7txt → doc/releases/v0.2.7.txt b/doc/releases/v0.2.7txt → doc/releases/v0.2.7.txt
diff --git a/doc/releases/v0.2.8.txt b/doc/releases/v0.2.8.txt
@@ -0,0 +1,14 @@
+v0.2.8 (........)
+------------------------
+
+Bug fixes
+~~~~~~~~~
+
+- ``Study.tidy_splicing_with_expression`` now deals with when splicing events
+  map to multiple gene names. Fixes #304 with #309.
+
+Miscellaneous
+~~~~~~~~~~~~~
+
+- Rasterize lavalamp plot for visualizing many splicing events at once,
+  otherwise the image is too big. PR #308
diff --git a/doc/whatsnew.rst b/doc/whatsnew.rst
@@ -7,6 +7,10 @@ What's new in the package
 
 A catalog of new features, improvements, and bug-fixes in each release.
 
+.. include:: releases/v0.2.8.txt
+.. include:: releases/v0.2.7.txt
+.. include:: releases/v0.2.6.txt
+.. include:: releases/v0.2.5.txt
 .. include:: releases/v0.2.4.txt
 .. include:: releases/v0.2.3.txt
 .. include:: releases/v0.2.2.txt

diff --git a/flotilla/data_model/study.py b/flotilla/data_model/study.py
@@ -3,6 +3,7 @@
 heavier in terms of data load
 """
 import inspect
+import itertools
 import json
 import os
 import sys
@@ -1796,6 +1797,16 @@ def tidy_splicing_with_expression(self):
                                 id_vars=splicing_index_name,
                                 value_name='psi',
                                 var_name=splicing_columns_name)
+
+        s = splicing_common_id.dropna()
+
+        event_name_to_ensembl_ids = list(itertools.chain(
+            *[zip([k] * len(v.split(',')), v.split(',')) for k, v in
+              s.iteritems()]))
+        index, data = zip(*event_name_to_ensembl_ids)
+        event_name_to_ensembl_ids = pd.Series(data, index=index,
+                                              name=self._common_id)
+
         rename_columns = {}
         if splicing_index_name == 'index':
             rename_columns[splicing_index_name] = self._sample_id
@@ -1804,20 +1815,13 @@ def tidy_splicing_with_expression(self):
             splicing_columns_name = self._event_name
         splicing_tidy = splicing_tidy.rename(columns=rename_columns)
 
-        # Create a column of the common id on which to join splicing
-        # and expression
-        splicing_names = splicing_tidy[splicing_columns_name]
-        if isinstance(splicing_names, pd.Series):
-            splicing_tidy[self._common_id] = splicing_tidy[
-                splicing_columns_name].map(splicing_common_id)
-        else:
-            # Splicing ids are a multi-index, so the feature renamer will get
-            # the name of the feature.
-            splicing_tidy[self._common_id] = [
-                self.splicing.feature_renamer(x)
-                for x in splicing_names.itertuples(index=False)]
+        splicing_tidy = splicing_tidy.set_index(splicing_columns_name)
+        splicing_tidy = splicing_tidy.ix[event_name_to_ensembl_ids.index]
+        splicing_tidy = splicing_tidy.join(event_name_to_ensembl_ids)
 
-        splicing_tidy = splicing_tidy.dropna()
+        splicing_tidy = splicing_tidy.dropna().reset_index()
+        splicing_tidy = splicing_tidy.rename(
+            columns={'index': self._event_name})
 
         # Tidify expression
         expression = self.expression.data_original
@@ -1833,11 +1837,10 @@ def tidy_splicing_with_expression(self):
             columns={'index': self._sample_id})
         expression_tidy = expression_tidy.dropna()
 
-        splicing_tidy.set_index([self._sample_id, self._common_id],
-                                inplace=True)
-        expression_tidy.set_index([self._sample_id, self._common_id],
-                                  inplace=True)
-        return splicing_tidy.join(expression_tidy, how='inner').reset_index()
+        splicing_tidy_with_expression = splicing_tidy.merge(
+            expression_tidy, left_on=[self._sample_id, self._common_id],
+            right_on=[self._sample_id, self._common_id])
+        return splicing_tidy_with_expression
 
     def filter_splicing_on_expression(self, expression_thresh,
                                       sample_subset=None):