qiime2 · thermokarst · Jun 11, 2020 · Oct 12, 2019 · Oct 17, 2019 · Oct 17, 2019
diff --git a/.github/workflows/lint-build-test.yml b/.github/workflows/lint-build-test.yml
@@ -0,0 +1,42 @@
+name: lint-build-test
+# build on every PR and commit to master
+on:
+  pull_request:
+  push:
+    branches:
+      - master
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: set up python 3.6
+      uses: actions/setup-python@v1
+      with:
+        python-version: 3.6
+    - name: install dependencies
+      run: python -m pip install --upgrade pip
+    - name: lint
+      run: |
+        pip install -q https://github.com/qiime2/q2lint/archive/master.zip
+        q2lint
+        pip install -q flake8
+        flake8
+
+  build-and-test:
+    needs: lint
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+    runs-on: ${{ matrix.os }}
+    steps:
+    - uses: actions/checkout@v2
+      with:
+        fetch-depth: 0
+    # for versioneer
+    - run: git fetch --depth=1 origin +refs/tags/*:refs/tags/*
+    - uses: qiime2/action-library-packaging@alpha1
+      with: 
+        plugin-name: q2-diversity-lib
+        additional-tests: pytest --pyargs q2_diversity_lib
diff --git a/.gitignore b/.gitignore
@@ -73,3 +73,7 @@ node_modules
 
 # VSCode dotfiles
 .vscode/*
+*.code-workspace
+
+# project notes
+notes/
diff --git a/.travis.yml b/.travis.yml
diff --git a/Makefile b/Makefile
@@ -11,7 +11,7 @@ test: all
 	py.test
 
 test-cov: all
-	py.test --cov=q2_diversity_lib
+	py.test --cov-report=term-missing --cov=q2_diversity_lib
 
 install:
 	$(PYTHON) setup.py install

diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # q2-diversity-lib
 
-[![Build Status](https://travis-ci.org/qiime2/q2-diversity-lib.svg?branch=master)](https://travis-ci.org/qiime2/q2-diversity-lib)
+![](https://github.com/qiime2/q2-diversity-lib/workflows/lint-build-test/badge.svg)
 [![Coverage Status](https://coveralls.io/repos/github/qiime2/q2-diversity-lib/badge.svg?branch=master)](https://coveralls.io/github/qiime2/q2-diversity-lib?branch=master)
 
 This is a QIIME 2 plugin. For details on QIIME 2, see https://qiime2.org.
diff --git a/ci/recipe/meta.yaml b/ci/recipe/meta.yaml
@@ -18,11 +18,14 @@ requirements:
     - setuptools
 
   run:
-    - pandas
-    - scikit-bio
     - biom-format >=2.1.5,<2.2.0
+    - decorator
+    - pandas
+    - psutil
     - qiime2 {{ release }}.*
     - q2-types {{ release }}.*
+    - scikit-bio
+    - unifrac
 
 test:
   imports:

diff --git a/q2_diversity_lib/__init__.py b/q2_diversity_lib/__init__.py
@@ -8,11 +8,13 @@
 
 from .alpha import (faith_pd, observed_features, pielou_evenness,
                     shannon_entropy)
+from .beta import (bray_curtis, jaccard, unweighted_unifrac, weighted_unifrac)
 from ._version import get_versions
 
 __version__ = get_versions()['version']
 del get_versions
 
 
 __all__ = ['faith_pd', 'observed_features', 'pielou_evenness',
-           'shannon_entropy']
+           'shannon_entropy', 'bray_curtis', 'jaccard', 'unweighted_unifrac',
+           'weighted_unifrac']
diff --git a/q2_diversity_lib/_util.py b/q2_diversity_lib/_util.py
@@ -6,10 +6,19 @@
 # The full license is in the file LICENSE, distributed with this software.
 # ----------------------------------------------------------------------------
 
-import numpy as np
-from functools import wraps
 from inspect import signature
 
+import numpy as np
+from decorator import decorator
+import psutil
+import biom
+
+from q2_types.feature_table import BIOMV210Format
+
+skbio_methods = ["bray_curtis", "jaccard"]
+unifrac_methods = ["unweighted_unifrac", "weighted_unifrac",
+                   "faith_pd"]
+
 
 def _drop_undefined_samples(counts: np.ndarray, sample_ids: np.ndarray,
                             minimum_nonzero_elements: int) -> (np.ndarray,
@@ -22,17 +31,71 @@ def _drop_undefined_samples(counts: np.ndarray, sample_ids: np.ndarray,
     return (filtered_counts, filtered_sample_ids)
 
 
-def _disallow_empty_tables(some_function):
-    @wraps(some_function)
-    def wrapper(*args, **kwargs):
-        try:
-            bound_signature = signature(wrapper).bind(*args, **kwargs)
-            table = bound_signature.arguments['table']
-        except KeyError as ex:
-            raise TypeError("The wrapped function has no parameter "
-                            + str(ex) + ".")
-        else:
-            if table.is_empty():
-                raise ValueError("The provided table object is empty")
-        return some_function(*args, **kwargs)
-    return wrapper
+@decorator
+def _disallow_empty_tables(wrapped_function, *args, **kwargs):
+    bound_signature = signature(wrapped_function).bind(*args, **kwargs)
+    table = bound_signature.arguments.get('table')
+    if table is None:
+        raise TypeError("The wrapped function has no parameter 'table'")
+
+    if isinstance(table, BIOMV210Format):
+        table = str(table)
+        table_obj = biom.load_table(table)
+    elif isinstance(table, biom.Table):
+        table_obj = table
+    else:
+        raise ValueError("Invalid view type: table passed as "
+                         f"{type(table)}")
+
+    if table_obj.is_empty():
+        raise ValueError("The provided table is empty")
+
+    return wrapped_function(*args, **kwargs)
+
+
+@decorator
+def _validate_requested_cpus(wrapped_function, *args, **kwargs):
+    bound_signature = signature(wrapped_function).bind(*args, **kwargs)
+    bound_signature.apply_defaults()
+
+    # Handle duplicate param names
+    if all(params in bound_signature.arguments
+            for params in ['n_jobs', 'threads']):
+        raise TypeError("Duplicate parameters: The _validate_requested_cpus "
+                        "decorator may not be applied to callables with both "
+                        "'n_jobs' and 'threads' parameters. Do you really need"
+                        " both?")
+
+    # Handle cpu requests coming from different parameter names
+    if 'n_jobs' in bound_signature.arguments:
+        param_name = 'n_jobs'
+        cpus_requested = bound_signature.arguments[param_name]
+    elif 'threads' in bound_signature.arguments:
+        param_name = 'threads'
+        cpus_requested = bound_signature.arguments[param_name]
+    else:
+        raise TypeError("The _validate_requested_cpus decorator may not be"
+                        " applied to callables without an 'n_jobs' or "
+                        "'threads' parameter.")
+
+    # If `Process.cpu_affinity` unavailable on system, fall back
+    # https://psutil.readthedocs.io/en/latest/index.html#psutil.cpu_count
+    try:
+        cpus = len(psutil.Process().cpu_affinity())
+    except AttributeError:
+        cpus = psutil.cpu_count(logical=False)
+
+    if isinstance(cpus_requested, int) and cpus_requested > cpus:
+        raise ValueError(f"The value passed to '{param_name}' cannot exceed "
+                         f"the number of processors ({cpus}) available to "
+                         "the system.")
+
+    if cpus_requested == 'auto':
+        # remove 'auto' from args to prevent 'multiple values' TypeError...
+        argslist = list(args)
+        argslist.remove('auto')
+        return_args = tuple(argslist)
+        # ...then inject number of available cpus
+        return wrapped_function(*return_args, **kwargs, **{param_name: cpus})
+
+    return wrapped_function(*args, **kwargs)
diff --git a/q2_diversity_lib/alpha.py b/q2_diversity_lib/alpha.py
@@ -6,31 +6,22 @@
 # The full license is in the file LICENSE, distributed with this software.
 # ----------------------------------------------------------------------------
 
-import biom
 import pandas as pd
 import skbio.diversity
+import biom
+from unifrac import faith_pd as f_pd
-from unifrac import faith_pd as f_pd
+import unifrac
-from unifrac import faith_pd as f_pd
+import unifrac
 
-from ._util import _drop_undefined_samples, _disallow_empty_tables
+from q2_types.feature_table import BIOMV210Format
+from q2_types.tree import NewickFormat
+from ._util import (_drop_undefined_samples,
+                    _disallow_empty_tables)
 
 
 @_disallow_empty_tables
-def faith_pd(table: biom.Table, phylogeny: skbio.TreeNode) -> pd.Series:
-    presence_absence_table = table.pa()
-    counts = presence_absence_table.matrix_data.toarray().astype(int).T
-    sample_ids = presence_absence_table.ids(axis='sample')
-    feature_ids = presence_absence_table.ids(axis='observation')
-
-    try:
-        result = skbio.diversity.alpha_diversity(metric='faith_pd',
-                                                 counts=counts,
-                                                 ids=sample_ids,
-                                                 otu_ids=feature_ids,
-                                                 tree=phylogeny)
-    except skbio.tree.MissingNodeError as e:
-        message = str(e).replace('otu_ids', 'feature_ids')
-        message = message.replace('tree', 'phylogeny')
-        raise skbio.tree.MissingNodeError(message)
-
+def faith_pd(table: BIOMV210Format, phylogeny: NewickFormat) -> pd.Series:
+    table_str = str(table)
+    tree_str = str(phylogeny)
+    result = f_pd(table_str, tree_str)
-    result = f_pd(table_str, tree_str)
+    result = unifrac.faith_pd(table_str, tree_str)
-    result = f_pd(table_str, tree_str)
+    result = unifrac.faith_pd(table_str, tree_str)
     result.name = 'faith_pd'
     return result
 

diff --git a/q2_diversity_lib/beta.py b/q2_diversity_lib/beta.py
@@ -0,0 +1,71 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2018-2020, QIIME 2 development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+
+import biom
+import skbio.diversity
+import sklearn.metrics
+import unifrac
+
+from q2_types.feature_table import BIOMV210Format
+from q2_types.tree import NewickFormat
+from ._util import (_disallow_empty_tables,
+                    _validate_requested_cpus)
+
+
+# --------------------Non-Phylogenetic-----------------------
+@_disallow_empty_tables
+@_validate_requested_cpus
+def bray_curtis(table: biom.Table, n_jobs: int = 1) -> skbio.DistanceMatrix:
+    counts = table.matrix_data.toarray().T
+    sample_ids = table.ids(axis='sample')
+    return skbio.diversity.beta_diversity(
+        metric='braycurtis',
+        counts=counts,
+        ids=sample_ids,
+        validate=True,
+        pairwise_func=sklearn.metrics.pairwise_distances,
+        n_jobs=n_jobs
+    )
+
+
+@_disallow_empty_tables
+@_validate_requested_cpus
+def jaccard(table: biom.Table, n_jobs: int = 1) -> skbio.DistanceMatrix:
+    counts = table.matrix_data.toarray().T
+    sample_ids = table.ids(axis='sample')
+    return skbio.diversity.beta_diversity(
+        metric='jaccard',
+        counts=counts,
+        ids=sample_ids,
+        validate=True,
+        pairwise_func=sklearn.metrics.pairwise_distances,
+        n_jobs=n_jobs
+    )
+
+
+# ------------------------Phylogenetic-----------------------
+@_disallow_empty_tables
+@_validate_requested_cpus
+def unweighted_unifrac(table: BIOMV210Format,
+                       phylogeny: NewickFormat,
+                       threads: int = 1,
+                       bypass_tips: bool = False) -> skbio.DistanceMatrix:
+    return unifrac.unweighted(str(table), str(phylogeny), threads=threads,
+                              variance_adjusted=False, bypass_tips=bypass_tips)
+
+
+@_disallow_empty_tables
+@_validate_requested_cpus
+def weighted_unifrac(table: BIOMV210Format,
+                     phylogeny: NewickFormat,
+                     threads: int = 1,
+                     bypass_tips: bool = False) -> skbio.DistanceMatrix:
+    return unifrac.weighted_unnormalized(str(table), str(phylogeny),
+                                         threads=threads,
+                                         variance_adjusted=False,
+                                         bypass_tips=bypass_tips)