dhakim87 · dhakim87 · Sep 1, 2021 · Aug 12, 2021 · Aug 14, 2021 · Aug 27, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,44 @@
+# Change Log
+
+## Version 0.1.3 (08/27/2021)
+
+### Changed
+- Migrated from Travis CI to GitHub Actions (#127).
+- Made `--map-as-rank` default when only mapping file(s) are provided (#132).
+- Renamed `--normalize|-z` as `--frac|-f` (#128).
+- Modified core algorithm which slightly improved performance (#124).
+
+### Added
+- Added `tool normalize` command, with multiple features (#124).
+- Added the feature to collapse a stratified table (#126).
+- Created an WoL FTP server, and added link to it (#118).
+- Added an WoL standard operating procedure (`wolsop.sh`) and documentation (#116).
+- Added first [citation](https://www.biorxiv.org/content/10.1101/2021.04.04.438427v1.abstract) of Woltka (#111).
+- Added protocols for Bowtie2 / SHOGUN and Fastp (#121).
+- Added discussion about mapping uniqueness (#131).
+
+### Fixed
+- Fixed free-rank classification subject not found issue (#120).
+- Corrected paths to example files and directories (#117).
+
+
+## Version 0.1.2 (03/31/2021)
+
+### Changed
+- Updated Qiita documentation (#107).
+- Renamed "gOTU" with "OGU" (#104).
+
+### Added
+- Published at PyPI. Can be installed by `pip install woltka` (#108).
+- Added instructions for using MetaCyc and KEGG (#99, #101).
+- Added `tool collapse` command, which supports one-to-many classification (#99).
+
+### Fixed
+- Fixed Handling of zero length alignment (#105).
+
+
+## Version 0.1.1 (02/17/2021)
+
+### Added
+- First official release.
+
diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@
 [![CI Status](https://github.com/qiyunzhu/woltka/actions/workflows/main.yml/badge.svg)](https://github.com/qiyunzhu/woltka/actions)
 [![Coverage Status](https://coveralls.io/repos/github/qiyunzhu/woltka/badge.svg?branch=master)](https://coveralls.io/github/qiyunzhu/woltka?branch=master)
 
-**Woltka** (Web of Life Toolkit App), is a bioinformatics package for shotgun metagenome data analysis. It takes full advantage of, and it not limited by, the [WoL](https://biocore.github.io/wol/) reference phylogeny. It bridges first-pass sequence aligners with advanced analytical platforms (such as QIIME 2). Highlights of this program include:
+**Woltka** (Web of Life Toolkit App), is a bioinformatics package for shotgun metagenome data analysis. It takes full advantage of, and is not limited by, the [WoL](https://biocore.github.io/wol/) reference phylogeny. It bridges first-pass sequence aligners with advanced analytical platforms (such as QIIME 2). Highlights of this program include:
 
 - OGU: fine-grained community ecology.
 - Tree-based, rank-free classification.

diff --git a/woltka/__init__.py b/woltka/__init__.py
@@ -10,7 +10,7 @@
 
 __name__ = 'woltka'
 __description__ = 'Web of Life ToolKit App'
-__version__ = '0.1.2'
+__version__ = '0.1.3'
 __license__ = 'BSD-3-Clause'
 __author__ = 'Qiyun Zhu'
 __email__ = 'qiyunzhu@gmail.com'

diff --git a/woltka/align.py b/woltka/align.py
@@ -30,7 +30,6 @@
 
 def plain_mapper(fh, fmt=None, n=1000):
     """Read an alignment file in chunks and yield query-to-subject(s) maps.
-
     Parameters
     ----------
     fh : file handle
@@ -39,14 +38,12 @@ def plain_mapper(fh, fmt=None, n=1000):
         Alignment file format.
     n : int, optional
         Number of lines per chunk.
-
     Yields
     ------
     deque of str
         Query queue.
-    deque of dict of str -> tuple
+    deque of set of str
         Subject(s) queue.
-
     Notes
     -----
     The design of this function aims to couple with the extremely large size of
@@ -72,20 +69,18 @@ def plain_mapper(fh, fmt=None, n=1000):
     for i, line in enumerate(chain(iter(head), fh)):
 
         # parse current alignment line
-        parsed = parser(line)
         try:
-            query, subject = parsed[:2]
+            query, subject = parser(line)[:2]
         except (TypeError, IndexError):
             continue
-        start, end = parsed[4:6] if len(parsed) >= 6 else (None, None)
 
-        # add subject to subject set of the same query Id,
-        # keeping track of read indices
+        # add subject to subject set of the same query Id
         if query == this:
-            subque[-1].setdefault(subject, []).append((start, end))
+            subque[-1].add(subject)
 
         # when query Id changes,
         else:
+
             # line number has reached target
             if i >= target:
 
@@ -102,9 +97,9 @@ def plain_mapper(fh, fmt=None, n=1000):
                 # next target line number
                 target = i + n
 
-            # create new query and subject map pair
+            # create new query and subject set pair
             qry_append(query)
-            sub_append({subject: [(start, end)]})
+            sub_append({subject})
 
             # update current query Id
             this = query
@@ -113,6 +108,68 @@ def plain_mapper(fh, fmt=None, n=1000):
     yield qryque, subque
 
 
+def range_mapper(fh, fmt=None, n=1000):
+    """Read an alignment file and yield maps of query to subject(s) and their
+    ranges.
+
+    Parameters
+    ----------
+    fh : file handle
+        Alignment file to parse.
+    fmt : str, optional
+        Alignment file format.
+    n : int, optional
+        Number of lines per chunk.
+
+    Yields
+    ------
+    deque of str
+        Query queue.
+    deque of dict of str to (int, int)
+        Subject-to-ranges queue.
+
+    Notes
+    -----
+    Same as `plain_mapper`, except that it also returns subject ranges.
+
+    See Also
+    --------
+    plain_mapper
+    """
+    fmt, head = (fmt, []) if fmt else infer_align_format(fh)
+    parser = assign_parser(fmt)
+    qryque, subque = deque(), deque()
+    qry_append, sub_append = qryque.append, subque.append
+    this = None
+    target = n
+    for i, line in enumerate(chain(iter(head), fh)):
+
+        # retain subject range
+        try:
+            query, subject, _, _, start, end = parser(line)[:6]
+        except (TypeError, IndexError):
+            continue
+
+        # range must be positive integers
+        if start and end:
+
+            if query == this:
+                subque[-1].setdefault(subject, []).append((start, end))
+            else:
+                if i >= target:
+                    yield qryque, subque
+                    qryque, subque = deque(), deque()
+                    qry_append, sub_append = qryque.append, subque.append
+                    target = i + n
+                qry_append(query)
+
+                # return subject Id and range
+                sub_append({subject: [(start, end)]})
+
+                this = query
+    yield qryque, subque
+
+
 def infer_align_format(fh):
     """Guess the format of an alignment file based on content.
 

diff --git a/woltka/cover.py b/woltka/cover.py