Skip to content

Commit

Permalink
[FEATURE] Profiling for single and paired end
Browse files Browse the repository at this point in the history
- code
- tests
- documentation
- examples
  • Loading branch information
valentynbez committed Nov 5, 2022
1 parent 54a4706 commit cd85ac7
Show file tree
Hide file tree
Showing 28 changed files with 79,490 additions and 18 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,5 @@ dmypy.json

# Pyre type checker
.pyre/

q2_motus/tests/data/dev.ipynb
24 changes: 24 additions & 0 deletions .travis.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
dist: trusty
sudo: false
language: python
before_install:
- export MPLBACKEND='Agg'
- wget -q https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
- export MINICONDA_PREFIX="$HOME/miniconda"
- bash miniconda.sh -b -p $MINICONDA_PREFIX
- export PATH="$MINICONDA_PREFIX/bin:$PATH"
- conda config --set always_yes yes
- conda update -q conda
- conda info -a
install:
- wget -q https://raw.githubusercontent.com/qiime2/environment-files/master/latest/staging/qiime2-latest-py36-linux-conda.yml
- conda env create -q -n test-env --file qiime2-latest-py36-linux-conda.yml
- source activate test-env
- conda install -q pytest-cov
- pip install flake8 coveralls
- make install
script:
- make lint
- travis_wait make test-cov
after_success:
- coveralls
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ test-cov: all
py.test --cov=q2_motus

install:
which motus || pip install motu-profiler
which bwa || git clone https://github.com/lh3/bwa.git || cd bwa || make || cd .. || rm -rf bwa
motus downloadDB
$(PYTHON) setup.py install

dev: all
Expand Down
43 changes: 42 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,42 @@
# q2-mOTUs
# q2-mOTUs
This is a QIIME 2 wrapper for (mOTU-tool)[https://motu-tool.org/] . For details on QIIME 2, see https://qiime2.org. The tool will help you to assign taxonomy to your metagenomic samples

# Requirements
- QIIME 2 >= 2022.8 (https://qiime2.org/)
- Git
# Installation
## 1. Install QIIME 2
Follow the instructions on https://docs.qiime2.org/2022.8/install/native/ to install QIIME 2. You will need to install the latest version of QIIME 2 (2022.8 or later).
## 2. Activate QIIME 2
Activate the QIIME 2 environment by running the following command:
```
conda activate qiime2-2022.8
```
## 3. Install mOTU-tool
```
git clone https://github.com/motu-tool/q2-mOTUs
cd q2-mOTUs
make install
```

# Usage
## 1. Import your data to QIIME 2
Import your metagenomic sequencing data in `.fastq` format (don't forget to preprocess your data) to QIIME2 as a `SampleData` semantic type using manifest file. See examples in `q2_motus/test/data`.
## 2. Run mOTU-tool
Whether you have a single sample or multiple samples, you can run mOTU-tool using the following command:
```
qiime motus classify \
--i-samples paired-end.qza \
--o-table paired-end-classified.qza \
--p-threads 4
```
## 3. Process the results
Use `qiime2` tools to visualize the results downstream, for example use:
```
qiime feature-table summarize \
--i-table paired-end-classified.qza \
--o-visualization paired-end-classified.qzv
```
To get summary of your feature table.

![image](expected_output/table-summary.png)
Binary file added example_output/paired-end-classified.qza
Binary file not shown.
Binary file added example_output/paired-end-classified.qzv
Binary file not shown.
Binary file added example_output/table-summary.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 2 additions & 2 deletions q2_motus/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from ._hello import print_hello
from ._taxonomy import classify
from ._version import get_versions

__version__ = get_versions()['version']
del get_versions

__all__ = ['print_hello']
__all__ = ['classify']
2 changes: 0 additions & 2 deletions q2_motus/_hello.py

This file was deleted.

147 changes: 147 additions & 0 deletions q2_motus/_taxonomy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import os
import subprocess
import tempfile
from functools import partial
from typing import List, NamedTuple, Union, Literal

import numpy as np
import pandas as pd
from q2_types.per_sample_sequences import (
SingleLanePerSamplePairedEndFastqDirFmt,
SingleLanePerSampleSingleEndFastqDirFmt)


def _run_command(cmd: List[str], verbose: bool = False) -> None:
"""Run a command in a subprocess.
Parameters
----------
cmd : str
The command to run.
verbose : bool, optional
Whether to print the command before running it, by default False.
Raises
------
subprocess.CalledProcessError
If the command returns a non-zero exit status.
"""
if verbose:
print(cmd)
subprocess.run(cmd, check=True)


def preprocess_manifest(data: Union[SingleLanePerSampleSingleEndFastqDirFmt,
SingleLanePerSamplePairedEndFastqDirFmt]) -> pd.DataFrame:

"""Extracts sample names and filepaths from manifest file."""

manifest = pd.read_csv(os.path.join(str(data), data.manifest.pathspec),
header=0, comment='#')

manifest.filename = manifest.filename.apply(lambda x: os.path.join(str(data), x))
id_to_fps = manifest.pivot(index='sample-id', columns='direction', values='filename')

return id_to_fps


def profile_sample(
output_directory: str,
threads: int,
min_alen: int,
marker_gene_cutoff: int,
mode: Literal["base.coverage", "insert.raw_counts", "insert.scaled_counts"],
reference_genomes: bool,
ncbi_taxonomy: bool,
full_taxonomy: bool,
taxonomy_level: Literal["mOTU", "genus", "family", "order", "class", "phylum", "kingdom"],
row: NamedTuple) -> List[str]:

"""Run motu-profiler on a single sample."""

sample_name = row.Index
reads = row[1:]

cmd = ["motus", "profile"]

# single-end reads
if len(reads) == 1:
cmd.extend(["-s", reads[0]])
# paired-end reads
elif len(reads) == 2:
cmd.extend(["-f", reads[0], "-r", reads[1]])

cmd.extend(["-n", sample_name,
"-o", os.path.join(output_directory, sample_name + ".motus"),
"-t", str(threads),
"-l", str(min_alen),
"-g", str(marker_gene_cutoff),
"-y", str(mode),
"-k", str(taxonomy_level),
"-c"])

if reference_genomes:
cmd.extend(["-e"])

if ncbi_taxonomy:
cmd.extend(["-p"])

if full_taxonomy:
cmd.extend(["-q"])

p = _run_command(cmd)

return cmd


def load_table(tab_fp: str) -> pd.DataFrame:
'''Converts merged mOTU table to biom feature table'''
df = pd.read_csv(tab_fp, sep='\t', index_col=0, skiprows=2)
df = df.replace({0 : np.nan})
# dropping unassigned species
tab = df.dropna(axis = 0, thresh = df.shape[1] - 1).fillna(0).T
return tab


def classify(
samples: Union[SingleLanePerSamplePairedEndFastqDirFmt,
SingleLanePerSampleSingleEndFastqDirFmt],
threads: int,
min_alen: int = 75,
marker_gene_cutoff: int = 3,
mode: Literal["base.coverage", "insert.raw_counts", "insert.scaled_counts"] = "insert.scaled_counts",
reference_genomes: bool = False,
ncbi_taxonomy: bool = False,
full_taxonomy: bool = False,
taxonomy_level: Literal["mOTU", "genus", "family", "order", "class", "phylum", "kingdom"] = "mOTU",
) -> pd.DataFrame:
"""Run motu-profiler on paired-end samples data.
Parameters
----------
sequencing_data : PairEndSequencesWithQuality
The paired-end sequencing data to be profiled.
threads : int
Number of threads to use.
"""

id_to_fps = preprocess_manifest(samples)
# Create temporary directory

with tempfile.TemporaryDirectory() as temp_dir:
profile_dir = os.path.join(temp_dir, "profiles")
# consistent output and number of threads
profile_sample_partial = partial(profile_sample, profile_dir, threads, min_alen, marker_gene_cutoff, mode,
reference_genomes, ncbi_taxonomy, full_taxonomy, taxonomy_level)
# iterate over samples
for row in id_to_fps.itertuples():
os.makedirs(profile_dir, exist_ok=True)
profile_sample_partial(row)

# merge profiles
taxatable = os.path.join(temp_dir, "motus.merged")
cmd = ["motus", "merge", "-d", profile_dir, "-o", taxatable]
_run_command(cmd)

# output merged profiles as feature table
return load_table(taxatable)
49 changes: 36 additions & 13 deletions q2_motus/plugin_setup.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,51 @@
import q2_motus

from qiime2.plugin import (Int, Str, Float, Plugin, Citations, Metadata,
Visualization)
from qiime2.plugin import (Int, Range, Str, Bool, Choices, Plugin, Citations)

from q2_motus._hello import print_hello
from q2_motus._taxonomy import classify
from q2_types.sample_data import SampleData
from q2_types.per_sample_sequences import SequencesWithQuality, PairedEndSequencesWithQuality
from q2_types.feature_table import FeatureTable, Frequency

citations = Citations.load('citations.bib', package='q2_motus')

plugin = Plugin(
name='motus',
version=q2_motus.__version__,
website="https://github.com/motu-tool/q2-motus",
package='q2_motus',
citations=Citations.load('citations.bib', package='q2_motus'),
citations=[citations["Milanese2019-gw"]],
description=('This QIIME 2 plugin allows taxonomical profiling of '
'metagenomic samples using mOTU tool.'),
short_description='Taxonomical profiling of metagenomic samples using mOTU.'
)


plugin.visualizers.register_function(
function=print_hello,
inputs={},
parameters={},
input_descriptions={},
parameter_descriptions={},
name='Print hello.',
description='Print hello on the screen.'
)
plugin.methods.register_function(
function=classify,
inputs={"samples": SampleData[SequencesWithQuality | PairedEndSequencesWithQuality]},
outputs=[("table", FeatureTable[Frequency]),
("taxonomy", FeatureData[Taxonomy])],
parameters={"threads": Int % Range(1, None),
"min_alen": Int % Range(1, None),
"marker_gene_cutoff": Int % Range(1, 10),
"mode": Str % Choices(["base.coverage", "insert.raw_counts", "insert.scaled_counts"]),
"reference_genomes": Bool,
"ncbi_taxonomy": Bool,
"full_taxonomy": Bool,
"taxonomy_level": Str % Choices(["mOTU", "genus", "family", "order", "class", "phylum", "kingdom"])},
name="mOTU paired end profiler",
description="Executes a taxonomical classification of paired-end sample.",
input_descriptions={"samples": "The paired-end samples to be classified."},
parameter_descriptions={"threads": "The number of threads to use.",
"min_alen": "Minimum alignment length.",
"marker_gene_cutoff": "Minimum number of marker genes to be considered a species."
"Ranges from 1 to 10. A higher value increases precision (and lowers recall)",
"mode": "The mode to use for abundance estimation."
"base.coverage measures the average base coverage of the gene."
"insert.raw_counts measures the number of reads that map to the gene."
"insert.scaled_counts measures the number of reads that map to the gene, scaled by the length of the gene.",
"reference_genomes": "Use only species with reference genomes (ref-mOTUs).",
"ncbi_taxonomy": "Use NCBI taxonomy instead of mOTU.",
"full_taxonomy": "Use full taxonomy.",
"taxonomy_level": "The taxonomy level to use for profiling."})
Empty file added q2_motus/tests/__init__.py
Empty file.
Loading

0 comments on commit cd85ac7

Please sign in to comment.