diff --git a/.appveyor.yml b/.appveyor.yml
index 2f6ccfd..74a9397 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -51,7 +51,8 @@ install:
   # pip will build them from source using the MSVC compiler matching the
   # target Python version and architecture
   - "python -m pip install wheel pytest"
-  - "python -m pip install -r requirements/prod.txt"
+  - "python -m pip install -r requirements/win.txt -f https://download.pytorch.org/whl/torch_stable.html"
+  - "python -m pip install -r requirements/dev.txt"
 
 build_script:
   # Build the compiled extension
diff --git a/LICENSE b/LICENSE
index 9fe4561..02d4aca 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,22 +1,15 @@
-MIT License
+phenotrex
+Copyright (C) 2019-2020, Lukas Lüftinger
 
-Copyright (c) 2019, Lukas Lüftinger
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
 
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
 
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
diff --git a/phenotrex/bin/prodigal.linux b/phenotrex/bin/prodigal.linux
new file mode 100755
index 0000000..c9812b0
Binary files /dev/null and b/phenotrex/bin/prodigal.linux differ
diff --git a/phenotrex/bin/prodigal.osx.10.9.5 b/phenotrex/bin/prodigal.osx.10.9.5
new file mode 100755
index 0000000..9b51733
Binary files /dev/null and b/phenotrex/bin/prodigal.osx.10.9.5 differ
diff --git a/phenotrex/bin/prodigal b/phenotrex/bin/prodigal.windows.exe
similarity index 86%
rename from phenotrex/bin/prodigal
rename to phenotrex/bin/prodigal.windows.exe
index 2c7dfc7..fcff06f 100755
Binary files a/phenotrex/bin/prodigal and b/phenotrex/bin/prodigal.windows.exe differ
diff --git a/phenotrex/cli/predict.py b/phenotrex/cli/predict.py
index d13d141..7eb4766 100644
--- a/phenotrex/cli/predict.py
+++ b/phenotrex/cli/predict.py
@@ -8,6 +8,9 @@
               required=False, help='Input genotype file.')
 @click.option('--classifier', required=True, type=click.Path(exists=True),
               help='Path of pickled classifier file.')
+@click.option('--min_proba', type=click.FloatRange(0.5, 1.0), default=0.5,
+              help='Class probability threshold for displaying predictions. '
+                   'Predictions below the threshold will be given as "N/A".')
 @click.option('--out_explain_per_sample', type=click.Path(dir_okay=False),
               help='Write SHAP explanations for each predicted sample to file (optional).')
 @click.option('--out_explain_summary', type=click.Path(dir_okay=False),
diff --git a/phenotrex/ml/cccv.py b/phenotrex/ml/cccv.py
index 0ffbbd1..ce059f5 100644
--- a/phenotrex/ml/cccv.py
+++ b/phenotrex/ml/cccv.py
@@ -124,7 +124,8 @@ def _completeness_cv(self, param, **kwargs) -> Dict[float, Dict[float, float]]:
         X_train, y_train, tn = get_x_y_tn(training_records)
         classifier.fit(X=X_train, y=y_train, **kwargs)
 
-        # initialize the resampler with the test_records only, so the samples are unknown to the classifier
+        # initialize the resampler with the test_records only,
+        # so the samples are unknown to the classifier
         resampler = TrainingRecordResampler(random_state=self.random_state, verb=False)
         resampler.fit(records=test_records)
         cv_scores = {}
@@ -147,7 +148,8 @@ def run(self, records: List[TrainingRecord]):
         """
         # TODO: run compress_vocabulary before?
 
-        self.logger.info("Begin completeness/contamination matrix crossvalidation on training data.")
+        self.logger.info(
+            "Begin completeness/contamination matrix crossvalidation on training data.")
         t1 = time()
         if self.n_jobs is None:
             cv_scores = map(self._completeness_cv, self._replicates(records, self.cv,
@@ -168,7 +170,8 @@ def run(self, records: List[TrainingRecord]):
         for comple in cv_scores_list[0].keys():
             mba[comple] = {}
             for conta in cv_scores_list[0][comple].keys():
-                single_result = [cv_scores_list[r][comple][conta] for r in range(self.n_replicates * self.cv)]
+                single_result = [cv_scores_list[r][comple][conta] for r in
+                                 range(self.n_replicates * self.cv)]
                 mean_over_fold_and_replicates = np.mean(single_result)
                 std_over_fold_and_replicates = np.std(single_result)
                 mba[comple][conta] = {"score_mean": mean_over_fold_and_replicates,
diff --git a/phenotrex/ml/feature_select.py b/phenotrex/ml/feature_select.py
index 297bb38..56b1090 100644
--- a/phenotrex/ml/feature_select.py
+++ b/phenotrex/ml/feature_select.py
@@ -17,17 +17,14 @@
 
 def compress_vocabulary(records: List[TrainingRecord], pipeline: Pipeline):
     """
-    Method to group features, that store redundant information, to avoid overfitting and speed up process (in some
-    cases). Might be replaced or complemented by a feature selection method in future versions.
-
-    Compressing vocabulary is optional, for the test dataset it took 30 seconds, while the time saved later on is not
-    significant.
+    Method to group features, that store redundant information,
+    to avoid overfitting and speed up process (in some cases).
+    Might be replaced or complemented by a feature selection method in future versions.
 
     :param records: a list of TrainingRecord objects.
     :param pipeline: the targeted pipeline where the vocabulary should be modified
     :return: nothing, sets the vocabulary for CountVectorizer step
     """
-
     X, y, tn = get_x_y_tn(records)  # we actually only need X
     vec = pipeline.named_steps["vec"]
     if not vec.vocabulary:
@@ -58,8 +55,10 @@ def compress_vocabulary(records: List[TrainingRecord], pipeline: Pipeline):
     pipeline.named_steps["vec"].fixed_vocabulary_ = True
 
 
-def recursive_feature_elimination(records: List[TrainingRecord], pipeline: Pipeline, step: float = DEFAULT_STEP_SIZE,
-                                  n_features: int = None, random_state: np.random.RandomState = None):
+def recursive_feature_elimination(records: List[TrainingRecord], pipeline: Pipeline,
+                                  step: float = DEFAULT_STEP_SIZE,
+                                  n_features: int = None,
+                                  random_state: np.random.RandomState = None):
     """
     Function to apply RFE to limit the vocabulary used by the CustomVectorizer, optional step.
 
@@ -101,13 +100,15 @@ def recursive_feature_elimination(records: List[TrainingRecord], pipeline: Pipel
     support = selector.get_support()
     support = support.nonzero()[0]
     new_id = {support[x]: x for x in range(len(support))}
-    vocabulary = {feature: new_id[i] for feature, i in previous_vocabulary.items() if not new_id.get(i) is None}
+    vocabulary = {feature: new_id[i] for feature, i in previous_vocabulary.items() if
+                  not new_id.get(i) is None}
     size_after = selector.n_features_
 
     t2 = time()
 
-    logger.info(f"{size_after} features were selected of {original_size} using Recursive Feature Eliminiation"
-                f" in {np.round(t2 - t1, 2)} seconds.")
+    logger.info(
+        f"{size_after} features were selected of {original_size} using Recursive Feature Eliminiation"
+        f" in {np.round(t2 - t1, 2)} seconds.")
 
     # set vocabulary to vectorizer
     pipeline.named_steps["vec"].vocabulary = vocabulary
@@ -115,26 +116,3 @@ def recursive_feature_elimination(records: List[TrainingRecord], pipeline: Pipel
     pipeline.named_steps["vec"].fixed_vocabulary_ = True
 
     return size_after
-
-
-def multiple_step_rfecv(records: List[TrainingRecord], pipeline: Pipeline, n_features: int, step=(0.01, 0.01, 0.01, ),
-                        random_state: np.random.RandomState = None):
-    """
-    Function to apply multiple steps-sizes of RFECV in series, currently not used. Strategy might be problematic,
-    no clear benefit. #TODO rethink or remove
-
-    :param records: Data used
-    :param pipeline: The base estimator used
-    :param n_features: Goal number of features
-    :param step: List of steps that should be applied
-    :param random_state: random state for deterministic results
-    :return:
-    """
-    # step = [0.0025]
-    for s in step:
-        size_after = recursive_feature_elimination(records, pipeline=pipeline, step=s, n_features=n_features,
-                                                   random_state=random_state)
-        if size_after == n_features:
-            break
-
-    return size_after
diff --git a/phenotrex/ml/prediction.py b/phenotrex/ml/prediction.py
index 839978d..8e635d5 100644
--- a/phenotrex/ml/prediction.py
+++ b/phenotrex/ml/prediction.py
@@ -10,7 +10,7 @@
     from phenotrex.util.helpers import fail_missing_dependency as fastas_to_grs
 
 
-def predict(fasta_files=tuple(), genotype=None, classifier=None,
+def predict(fasta_files=tuple(), genotype=None, classifier=None, min_proba=0.5,
             out_explain_per_sample=None, out_explain_summary=None,
             shap_n_samples=None, n_max_explained_features=None, verb=False):
     """
@@ -71,4 +71,8 @@ def predict(fasta_files=tuple(), genotype=None, classifier=None,
     print(f"# Trait: {model.trait_name}")
     print("Identifier\tTrait present\tConfidence")
     for record, result, probability in zip(gr, preds, probas):
-        print(f"{record.identifier}\t{translate_output[result]}\t{str(round(probability[result], 4))}")
+        if probability[result] < min_proba:
+            result_disp = "N/A"
+        else:
+            result_disp = translate_output[result]
+        print(f"{record.identifier}\t{result_disp}\t{str(round(probability[result], 4))}")
diff --git a/phenotrex/ml/shap_handler.py b/phenotrex/ml/shap_handler.py
index 620512e..de1c87a 100644
--- a/phenotrex/ml/shap_handler.py
+++ b/phenotrex/ml/shap_handler.py
@@ -1,4 +1,4 @@
-from typing import Tuple, Union, List
+from typing import Tuple
 
 import pandas as pd
 import numpy as np
@@ -97,20 +97,20 @@ def _get_feature_data(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         try:
             X_agg = self._used_features.astype(float)
             shap_agg = self._used_shaps.astype(float)
-        except (ValueError, AttributeError) as e:
+        except (ValueError, AttributeError):
             raise RuntimeError('No explanations saved.')
         sample_names = self._sample_names
         return X_agg, shap_agg, sample_names
 
-    def _get_sorted_by_shap_data(self, sort_by_idx=None) -> Tuple[
-        np.ndarray, np.ndarray, np.ndarray]:
+    def _get_sorted_by_shap_data(self, sort_by_idx=None) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """
         Sort features by absolute magnitude of shap values,
         and return sorted features, shap values and feature names.
 
         :param sort_by_idx: if an index into the sample names is passed,
                             sorting will be based only on this sample's SHAP values.
-        :return: Used features, used shaps, and the feature names all sorted by absolute magnitude of shap value.
+        :return: Used features, used shaps, and the feature names
+                 all sorted by absolute magnitude of shap value.
         """
         X_agg, shap_agg, _ = self._get_feature_data()
 
@@ -125,12 +125,11 @@ def _get_sorted_by_shap_data(self, sort_by_idx=None) -> Tuple[
             absshap = np.apply_along_axis(np.abs, feature_axis, shap_agg)
             if sort_by_idx is None:  # sort features by absolute change in shap over all classes and samples
                 sort_criterion = np.apply_over_axes(np.sum, absshap, nonfeature_axes)
-                feature_sort_inds = np.squeeze(np.argsort(sort_criterion))[::-1]
             else:  # sort features by absolute change in shap over all classes for given sample idx
                 sort_criterion = np.sum(absshap[sort_by_idx, ...], axis=0)
             feature_sort_inds = np.squeeze(np.argsort(sort_criterion))[::-1]
-        return X_agg[:, feature_sort_inds], shap_agg[..., feature_sort_inds], \
-               self._used_feature_names[feature_sort_inds]
+        return (X_agg[:, feature_sort_inds], shap_agg[..., feature_sort_inds],
+                self._used_feature_names[feature_sort_inds])
 
     def plot_shap_force(self, sample_name: str, **kwargs):
         """
@@ -196,7 +195,6 @@ def get_shap_force(self, sample_name: str, n_max_features: int = 20) -> pd.DataF
 
         :param sample_name:
         :param n_max_features:
-        :param nsamples:
         :return: a dataframe of the n_max_features most influential features,
                  their value in the sample, and the associated
         SHAP value(s).
@@ -215,6 +213,7 @@ def get_shap_force(self, sample_name: str, n_max_features: int = 20) -> pd.DataF
             shap_vals = [shap_agg_s[i, :n_max_features].round(5), ]
         sample_names = [sample_name] * len(fns)
         df_arrs = [sample_names, fns, feature_vals, *shap_vals]
+        df_arrs = [np.array(x) for x in df_arrs]
         df_labels = ['sample', 'feature', 'feature_value',
                      *[f'SHAP value ({x})' for x in self._class_names]][:len(df_arrs)]
         df = pd.DataFrame(df_arrs, index=df_labels).T
@@ -223,7 +222,8 @@ def get_shap_force(self, sample_name: str, n_max_features: int = 20) -> pd.DataF
 
     def get_shap_summary(self, n_max_features: int = 50):
         """
-        Get summary of features for all predictions, sorted by average impact of feature on shap value.
+        Get summary of features for all predictions,
+        sorted by average impact of feature on shap value.
 
         :param n_max_features:
         :return: a DataFrame of most important SHAP values for samples in the given dataset.
diff --git a/phenotrex/structure/records.py b/phenotrex/structure/records.py
index cc980e4..b83d714 100644
--- a/phenotrex/structure/records.py
+++ b/phenotrex/structure/records.py
@@ -1,15 +1,15 @@
 #
 # Created by Lukas Lüftinger on 2/5/19.
 #
-from typing import List
+from typing import List, Optional
 from dataclasses import dataclass
 
-"""Data structures containing Genotype and Phenotype information."""
-
 
 @dataclass
 class GenotypeRecord:
-    """ TODO add docstring """
+    """
+    Genomic features of a sample referenced by `identifier`.
+    """
     identifier: str
     features: List[str]
 
@@ -19,7 +19,12 @@ def __repr__(self):
 
 @dataclass
 class PhenotypeRecord:
-    """ TODO add docstring """
+    """
+    Ground truth labels of sample `identifier`,
+    indicating presence/absence of trait `trait_name`:
+      - 0 if trait is absent
+      - 1 if trait is present
+    """
     identifier: str
     trait_name: str
     trait_sign: int
@@ -30,10 +35,16 @@ def __repr__(self):
 
 @dataclass
 class GroupRecord:
-    """ TODO add docstring """
+    """
+    Group label of sample `identifier`.
+    Notes
+    -----
+    Useful for leave-one-group-out cross-validation (LOGO-CV),
+    for example, to take taxonomy into account.
+    """
     identifier: str
-    group_name: str
-    group_id: int
+    group_name: Optional[str]
+    group_id: Optional[int]
 
     def __repr__(self):
         return f"{self.identifier} group({self.group_name})={self.group_id}"
@@ -41,11 +52,15 @@ def __repr__(self):
 
 @dataclass
 class TrainingRecord(GenotypeRecord, PhenotypeRecord, GroupRecord):
-    """ TODO add docstring """
-    pass
-
+    """
+    Sample containing Genotype-, Phenotype- and GroupRecords,
+    suitable as machine learning input for a single observation.
+    """
     def __repr__(self):
         gr_repr = GenotypeRecord.__repr__(self).split(' ')[1]
         pr_repr = PhenotypeRecord.__repr__(self).split(' ')[1]
-        gro_repr = GroupRecord.__repr__(self).split(' ')[1]
+        if self.group_name is not None and self.group_id is not None:
+            gro_repr = GroupRecord.__repr__(self).split(' ')[1]
+        else:
+            gro_repr = ''
         return f"id={self.identifier} {' '.join([gr_repr, pr_repr, gro_repr])}"
diff --git a/phenotrex/transforms/annotation.py b/phenotrex/transforms/annotation.py
index 5c2ed77..4dbbfa1 100644
--- a/phenotrex/transforms/annotation.py
+++ b/phenotrex/transforms/annotation.py
@@ -1,4 +1,6 @@
 import os
+import sys
+from collections import namedtuple
 from typing import List
 from pathlib import Path
 from pkg_resources import resource_filename
@@ -7,29 +9,51 @@
 from concurrent.futures import ProcessPoolExecutor
 
 import torch
-from Bio.SeqIO import SeqRecord, parse
-from deepnog.deepnog import load_nn, predict, set_device, create_df
-from deepnog.dataset import ProteinDataset
+from deepnog.inference import load_nn, predict
+from deepnog.io import create_df, get_weights_path
+from deepnog.utils import set_device
+from deepnog.dataset import ProteinDataset, ProteinIterator
+from Bio.SeqIO import SeqRecord, parse, write
 from tqdm.auto import tqdm
 
 from phenotrex.io.flat import load_fasta_file
 from phenotrex.structure.records import GenotypeRecord
 
-
-PRODIGAL_BIN_PATH = resource_filename('phenotrex', 'bin/prodigal')
-DEEPNOG_WEIGHTS_PATH = resource_filename('deepnog', 'parameters/eggNOG5/2/deepencoding.pth')
+PRODIGAL_BIN_SUFFIX = {'win32': 'windows.exe', 'darwin': 'osx.10.9.5'}.get(sys.platform, 'linux')
+PRODIGAL_BIN_PATH = resource_filename('phenotrex', f'bin/prodigal.{PRODIGAL_BIN_SUFFIX}')
 DEEPNOG_ARCH = 'deepencoding'
-EGGNOGDB_VERS = '5.0'
+DEEPNOG_VALID_DBS = {
+    'eggNOG5',
+}
+DEEPNOG_VALID_TAX_LEVELS = {
+    1,
+    2,
+}
+
+
+class PreloadedProteinIterator(ProteinIterator):
+    """Hack ProteinDataset to load from list directly."""
+    def __init__(self, protein_list: List[SeqRecord], aa_vocab, format):
+        self.iterator = (x for x in protein_list)
+        self.vocab = aa_vocab
+        self.format = format
+        self.start = 0
+        self.pos = None
+        self.step = 0
+        self.sequence = namedtuple('sequence',
+                                   ['index', 'id', 'string', 'encoded'])
 
 
 class PreloadedProteinDataset(ProteinDataset):
     """Hack ProteinDataset to load from list directly."""
     def __init__(self, protein_list: List[SeqRecord]):
-        try:
-            super().__init__(file='')
-        except ValueError:
-            pass  # >:D
-        self.iter = (x for x in protein_list)
+        super().__init__(file=None)
+        self.protein_list = protein_list
+
+    def __iter__(self):
+        return PreloadedProteinIterator(protein_list=self.protein_list,
+                                        aa_vocab=self.vocab,
+                                        format=self.f_format)
 
 
 def fastas_to_grs(fasta_files: List[str], verb: bool = False,
@@ -40,7 +64,7 @@ def fastas_to_grs(fasta_files: List[str], verb: bool = False,
 
     :param fasta_files: a list of DNA and/or protein FASTA files to be converted into GenotypeRecords.
     :param verb: Whether to display progress of annotation with tqdm.
-    :param n_threads: Number of threads to run in parallel. Default, use up to all available CPU cores.
+    :param n_threads: Number of parallel threads. Default, use all available CPU cores.
     :returns: A list of GenotypeRecords corresponding with supplied FASTA files.
     """
     n_threads = min(os.cpu_count(), n_threads) if n_threads is not None else os.cpu_count()
@@ -66,43 +90,54 @@ def fasta_to_gr(fasta_file: str, verb: bool = False) -> GenotypeRecord:
     fname = Path(str(fasta_file)).name
     seqtype, seqs = load_fasta_file(fasta_file)
     if seqtype == 'protein':
-        return annotate_with_deepnog(fname, seqs, verb)
+        return annotate_with_deepnog(fname, seqs, verb=verb)
     else:
-        return annotate_with_deepnog(fname, call_proteins(fasta_file), verb)
+        return annotate_with_deepnog(fname, call_proteins(seqs), verb=verb)
 
 
-def call_proteins(fna_file: str) -> List[SeqRecord]:
+def call_proteins(seqs: List[SeqRecord]) -> List[SeqRecord]:
     """
     Perform protein calling with prodigal.
 
-    :param fna_file: A nucleotide fasta file.
-    :returns: a list of SeqRecords suitable for annotation with deepnog.
+    :param seqs: A list of DNA fasta SeqRecords.
+    :returns: a list of protein fasta SeqRecords suitable for annotation with deepnog.
     """
-    with NamedTemporaryFile(mode='w+') as tmp_f:
-        check_call([PRODIGAL_BIN_PATH, '-i', fna_file, '-a', tmp_f.name],
-                   stderr=DEVNULL, stdout=DEVNULL)
-        tmp_f.seek(0)
-        return list(parse(tmp_f, 'fasta'))
+    with NamedTemporaryFile(mode='w', delete=False) as fna_file:
+        write(seqs, fna_file, format='fasta')
+    with NamedTemporaryFile(mode='r', delete=False) as faa_file:
+        check_call([
+            PRODIGAL_BIN_PATH,
+            '-i', fna_file.name,
+            '-a', faa_file.name
+        ], stderr=DEVNULL, stdout=DEVNULL)
+        parsed = list(parse(faa_file, 'fasta'))
+    os.unlink(fna_file.name)
+    os.unlink(faa_file.name)  # cannot re-use .name of open NamedTemporaryFile under Win32
+    return parsed
 
 
 def annotate_with_deepnog(identifier: str, protein_list: List[SeqRecord],
+                          database: str = 'eggNOG5', tax_level: int = 2,
                           verb: bool = True) -> GenotypeRecord:
     """
     Perform calling of EggNOG5 clusters on a list of SeqRecords belonging to a sample, using deepnog.
 
     :param identifier: The name associated with the sample.
     :param protein_list: A list of SeqRecords containing protein sequences.
-    :param verb: Whether to use tqdm for progress calculation
+    :param database: Orthologous group/family database to use.
+    :param tax_level: The NCBI taxon ID of the taxonomic level to use from the given database.
+    :param verb: Whether to print verbose progress messages.
     :returns: a GenotypeRecord suitable for use with phenotrex.
     """
     device = set_device('auto')
     torch.set_num_threads(1)
-
-    model_dict = torch.load(DEEPNOG_WEIGHTS_PATH, map_location=device)
+    weights_path = get_weights_path(
+        database=database, level=str(tax_level), architecture=DEEPNOG_ARCH,
+    )
+    model_dict = torch.load(weights_path, map_location=device)
     model = load_nn(DEEPNOG_ARCH, model_dict, device)
     class_labels = model_dict['classes']
     dataset = PreloadedProteinDataset(protein_list)
-
     preds, confs, ids, indices = predict(model, dataset, device,
                                          batch_size=1,
                                          num_workers=1,
@@ -111,6 +146,6 @@ def annotate_with_deepnog(identifier: str, protein_list: List[SeqRecord],
     if hasattr(model, 'threshold'):
         threshold = model.threshold
     df = create_df(class_labels, preds, confs, ids, indices,
-                   threshold=threshold, device=device, verbose=0)
+                   threshold=threshold, verbose=0)
     cogs = [x for x in df.prediction.unique() if x]
     return GenotypeRecord(identifier=identifier, features=cogs)
diff --git a/phenotrex/util/plotting.py b/phenotrex/util/plotting.py
index 489d0af..e24cf7f 100644
--- a/phenotrex/util/plotting.py
+++ b/phenotrex/util/plotting.py
@@ -5,7 +5,12 @@
 from typing import Dict, List, Union
 
 
-def compleconta_plot(cccv_results: List[Dict[float, Dict[float, Dict[str, float]]]],
+CccvType = Union[
+    Dict[float, Dict[float, Dict[str, float]]],
+    List[Dict[float, Dict[float, Dict[str, float]]]]
+]
+
+def compleconta_plot(cccv_results: CccvType,
                      conditions: List[str] = (), each_n: List[int] = None,
                      title: str = "", fontsize: int = 16, figsize=(10, 7),
                      plot_comple: bool = True, plot_conta: bool = True,
diff --git a/phenotrex/util/taxonomy.py b/phenotrex/util/taxonomy.py
index 6f5a16e..c648fb2 100644
--- a/phenotrex/util/taxonomy.py
+++ b/phenotrex/util/taxonomy.py
@@ -10,15 +10,14 @@
 
 def get_taxonomic_group_mapping(group_ids: List[str], selected_rank: str) -> Tuple[Dict, Dict]:
     """
-    Function to create a mapping from NCBI-taxon ids to groups which are used to split the provided training records
-    into training and validation sets
+    Function to create a mapping from NCBI-taxon ids to groups which are used to split the provided
+    training records into training and validation sets
 
     :param group_ids: List of identifiers that should be NCBI taxon ids
-    :param selected_rank: selected standard rank determining on which level the set is split in training- and
-     validation-set
+    :param selected_rank: selected standard rank determining on which level the set is split in
+                          training and validation-set
     :return: Mapping of input taxon_ids as string and groups as integers
     """
-
     ncbi = NCBITaxa()
     standard_ranks = ["superkingdom", "phylum", "class", "order", "family", "genus", "species"]
     if not selected_rank.lower() in standard_ranks:
@@ -53,7 +52,5 @@ def auto_select_rank(group_ids: List[str]) -> str:
     :param group_ids:
     :return: selected rank
     """
-
     # TODO: implement some auto detection method, if needed
-
     return DEFAULT_AUTO_SELECTED_RANK
diff --git a/requirements/dev.txt b/requirements/dev.txt
index bdece25..90fbdbb 100644
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -1,4 +1,5 @@
 -r prod.txt
+-r extras.txt
 -r test.txt
 pip>=19.2.3
 wheel>=0.33.6
diff --git a/requirements/extras.txt b/requirements/extras.txt
index 0c345a9..cdb4f4e 100644
--- a/requirements/extras.txt
+++ b/requirements/extras.txt
@@ -1,2 +1,2 @@
-deepnog>=1.0.1
+deepnog>=1.1.0
 ete3
diff --git a/requirements/prod.txt b/requirements/prod.txt
index b65c1e8..0fdaab0 100644
--- a/requirements/prod.txt
+++ b/requirements/prod.txt
@@ -5,3 +5,4 @@ xgboost>=1.0.1
 shap>=0.34
 click>=7.0
 biopython>=1.74
+tqdm>=4.26.0
diff --git a/requirements/win.txt b/requirements/win.txt
new file mode 100644
index 0000000..8d0b059
--- /dev/null
+++ b/requirements/win.txt
@@ -0,0 +1 @@
+torch>=1.2.0+cpu
diff --git a/setup.py b/setup.py
index 81328fd..7d85fef 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,5 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
 
-"""The setup script."""
 
 import codecs
 from os import path
diff --git a/tests/targets.py b/tests/targets.py
index 50eaa57..1f95d52 100644
--- a/tests/targets.py
+++ b/tests/targets.py
@@ -9,6 +9,10 @@
     "Sulfate_reducer": "GCA_000007085.1",
 }
 
+first_groups_accession = {
+    "Sulfate_reducer": "GCA_000007085.1",
+}
+
 cv_scores_trex = {
     'SVM': {'Sulfate_reducer': {3 : {'accuracy'         : (0.9477939477939478, 0.04189200493982592),
                                      'balanced_accuracy': (0.94017094017094, 0.04262807124628204),
diff --git a/tests/test_data/Sulfate_reducer.taxids b/tests/test_data/Sulfate_reducer.taxids
new file mode 100644
index 0000000..0b33974
--- /dev/null
+++ b/tests/test_data/Sulfate_reducer.taxids
@@ -0,0 +1,115 @@
+GCA_900113335.1	857293
+GCA_000621705.1	639282
+GCA_002995805.1	643648
+GCA_000016785.1	655607
+GCA_900167305.1	656914
+GCA_000233715.3	667014
+GCA_000341895.1	690567
+GCA_001586935.1	690879
+GCA_000199675.1	697303
+GCA_000519125.1	708132
+GCA_000007085.1	720554
+GCA_000010985.1	760142
+GCA_001640105.1	767817
+GCA_000241815.1	768704
+GCA_002270055.1	768710
+GCA_900176035.1	632518
+GCA_900142105.1	858215
+GCA_000147695.3	868595
+GCA_000425265.1	871968
+GCA_001681735.1	872965
+GCA_000820845.2	880073
+GCA_000204645.1	891968
+GCA_900104045.1	926569
+GCA_000214435.1	935948
+GCA_900129935.1	994573
+GCA_000425245.1	999894
+GCA_900129975.1	1120989
+GCA_003057965.1	1121270
+GCA_000601455.1	1121301
+GCA_000321415.2	390874
+GCA_001730685.1	119641
+GCA_000217795.1	157592
+GCA_000378005.1	161156
+GCA_003201285.1	206665
+GCA_001306175.1	215580
+GCA_000166335.1	224999
+GCA_000023325.1	243164
+GCA_900167165.1	272564
+GCA_000429925.1	273068
+GCA_000016065.1	281362
+GCA_000231385.3	323850
+GCA_000010665.1	335543
+GCA_900167065.1	345632
+GCA_900130025.1	379347
+GCA_001687335.1	1121392
+GCA_900142275.1	401526
+GCA_000266925.1	404380
+GCA_002005145.1	416591
+GCA_000429345.1	419481
+GCA_900106765.1	454188
+GCA_900142245.1	454194
+GCA_000746255.1	500633
+GCA_002797735.1	520767
+GCA_001748245.1	521045
+GCA_000194135.1	522772
+GCA_900096945.1	573370
+GCA_000430885.1	596154
+GCA_000495435.3	598659
+GCA_000422245.1	1408160
+GCA_000711735.1	1185843
+GCA_000817955.1	1255043
+GCA_900142025.1	1259795
+GCA_000237085.1	1266720
+GCA_000482745.1	1294023
+GCA_900112485.1	1303518
+GCA_001311885.1	1304874
+GCA_000421585.1	1304885
+GCA_001047035.1	1322246
+GCA_000021725.1	1323370
+GCA_001293685.1	1346286
+GCA_000244895.1	1348163
+GCA_000828675.1	1390249
+GCA_001968985.1	1403537
+GCA_001672295.1	1168067
+GCA_000189775.3	1408473
+GCA_002895565.1	1429043
+GCA_002924615.1	1478221
+GCA_001551835.1	1560234
+GCA_900142145.1	1564487
+GCA_000231405.3	1619234
+GCA_900112235.1	1630136
+GCA_000156055.1	1716143
+GCA_001485475.2	1763509
+GCA_002933415.1	1763535
+GCA_000017865.1	1794699
+GCA_000092405.1	1795632
+GCA_000014965.1	1833852
+GCA_000983115.1	1121467
+GCA_000297115.1	1121394
+GCA_002724055.1	1121406
+GCA_000020725.1	1121420
+GCA_001613545.1	1121422
+GCA_900129915.1	1121424
+GCA_001652585.1	1121428
+GCA_003315275.1	1121432
+GCA_000429905.1	1121436
+GCA_000526715.1	1121439
+GCA_000021925.1	1121442
+GCA_000025725.1	1121447
+GCA_900101345.1	1121451
+GCA_000169155.1	1121457
+GCA_000526375.1	1914305
+GCA_000931935.2	1121468
+GCA_900129895.1	1121911
+GCA_001563225.1	1121917
+GCA_000011905.1	1121919
+GCA_001601575.1	1122184
+GCA_001595645.1	1122252
+GCA_001742305.1	1122953
+GCA_000426225.1	1122954
+GCA_000383875.1	1123281
+GCA_001184205.1	1123350
+GCA_000427095.1	1123352
+GCA_000427425.1	1123371
+GCA_001642325.1	1156395
diff --git a/tests/test_ml.py b/tests/test_ml.py
index a392737..941e60a 100644
--- a/tests/test_ml.py
+++ b/tests/test_ml.py
@@ -1,5 +1,3 @@
-from tempfile import TemporaryDirectory
-from pathlib import Path
 import sys
 import json
 
@@ -7,22 +5,25 @@
 import numpy as np
 import matplotlib as mpl
 mpl.use('Agg')
+from pathlib import Path
+from tempfile import TemporaryDirectory
 
-from tests.targets import (first_genotype_accession, first_phenotype_accession, cv_scores_trex,
-                           num_of_features_compressed, num_of_features_uncompressed)
-from phenotrex.io.flat import load_training_files
-from phenotrex.io.serialization import load_classifier
-from phenotrex.ml.shap_handler import ShapHandler
+from tests.targets import (first_genotype_accession, first_phenotype_accession, first_groups_accession,
+                           cv_scores_trex, num_of_features_compressed, num_of_features_uncompressed)
+from phenotrex.io.flat import (load_training_files,
+                               write_weights_file, write_params_file, write_misclassifications_file)
+from phenotrex.io.serialization import save_classifier
 from phenotrex.ml import TrexSVM, TrexXGB
 from phenotrex.util.helpers import get_x_y_tn
 from phenotrex.ml.feature_select import recursive_feature_elimination, compress_vocabulary
 from phenotrex.ml.prediction import predict
 
-
 from . import DATA_PATH, FROM_FASTA
 
+
 RANDOM_STATE = 2
 
+
 trait_names = [
     "Sulfate_reducer",
     # "Aerobe",
@@ -71,12 +72,15 @@ def test_load_training_files(self, trait_name):
         """
         full_path_genotype = DATA_PATH / f"{trait_name}.genotype"
         full_path_phenotype = DATA_PATH / f"{trait_name}.phenotype"
+        full_path_groups = DATA_PATH / f"{trait_name}.taxids"
         training_records, genotype, phenotype, group = load_training_files(
             genotype_file=full_path_genotype,
             phenotype_file=full_path_phenotype,
+            groups_file=full_path_groups,
             verb=True)
         assert genotype[0].identifier == first_genotype_accession[trait_name]
         assert phenotype[0].identifier == first_phenotype_accession[trait_name]
+        assert group[0].identifier == first_groups_accession[trait_name]
         return training_records, genotype, phenotype, group
 
     @pytest.mark.parametrize("trait_name", trait_names, ids=trait_names)
@@ -91,28 +95,46 @@ def test_train(self, trait_name, classifier, use_shaps):
         """
         training_records, genotype, phenotype, group = self.test_load_training_files(trait_name)
         clf = classifier(verb=True, random_state=RANDOM_STATE)
-        _ = clf.train(records=training_records, train_explainer=use_shaps)
+        clf.train(records=training_records, train_explainer=use_shaps)
+        with TemporaryDirectory() as tmpdir:
+            clf_path = Path(tmpdir)/'classifier.pkl'
+            weights_path = Path(tmpdir)/'weights.rank'
+            save_classifier(clf,  clf_path)
+            weights = clf.get_feature_weights()
+            write_weights_file(weights_file=weights_path, weights=weights)
+            assert clf_path.is_file()
+            assert weights_path.is_file()
 
     @pytest.mark.parametrize("trait_name", trait_names, ids=trait_names)
     @pytest.mark.parametrize("cv", cv_folds, ids=[str(x) for x in cv_folds])
-    @pytest.mark.parametrize("scoring", scoring_methods, ids=scoring_methods)
     @pytest.mark.parametrize("classifier", classifiers, ids=classifier_ids)
-    def test_crossvalidate(self, trait_name, cv, scoring, classifier):
+    @pytest.mark.parametrize("use_groups", [True, False], ids=['logo', 'nologo'])
+    def test_crossvalidate(self, trait_name, cv, classifier, use_groups):
         """
-        Test default crossvalidation of PICASVM class. Using several different traits, cv folds, and scoring methods.
+        Test default crossvalidation of TrexClassifier class.
+        Using several different traits, cv folds, and scoring methods.
         Compares with dictionary cv_scores.
+
         :param trait_name:
         :param cv:
         :param scoring:
         :param classifier:
+        :param use_groups:
         :return:
         """
         training_records, genotype, phenotype, group = self.test_load_training_files(trait_name)
         clf = classifier(verb=True, random_state=RANDOM_STATE)
-        score_pred = clf.crossvalidate(records=training_records, cv=cv, scoring=scoring)[:2]
-        if classifier.identifier in cv_scores_trex:
-            score_target = cv_scores_trex[classifier.identifier][trait_name][cv][scoring]
+        score_pred = clf.crossvalidate(records=training_records,
+                                       cv=cv, scoring=scoring_methods[0],
+                                       groups=use_groups)[:2]
+        if classifier.identifier in cv_scores_trex and not use_groups:
+            score_target = cv_scores_trex[classifier.identifier][trait_name][cv][scoring_methods[0]]
             np.testing.assert_almost_equal(actual=score_pred, desired=score_target, decimal=1)
+        with TemporaryDirectory() as tmpdir:
+            misclass_path = Path(tmpdir)/'misclassifications.tsv'
+            write_misclassifications_file(misclass_path, training_records,
+                                          score_pred, use_groups=use_groups)
+            assert misclass_path.is_file()
 
     @pytest.mark.parametrize("trait_name", trait_names, ids=trait_names)
     @pytest.mark.parametrize("classifier", classifiers, ids=classifier_ids)
@@ -126,10 +148,13 @@ def test_parameter_search(self, trait_name, classifier):
         """
         training_records, genotype, phenotype, group = self.test_load_training_files(trait_name)
         clf = classifier(verb=True, random_state=RANDOM_STATE)
-        clf_opt = clf.parameter_search(records=training_records, n_iter=5, return_optimized=True)
-        assert type(clf_opt) == type(clf)
+        clf_opt = clf.parameter_search(records=training_records, n_iter=5, return_optimized=False)
+        assert isinstance(clf_opt, dict)
+        with TemporaryDirectory() as tmpdir:
+            param_path = Path(tmpdir)/'params.json'
+            write_params_file(param_path, clf_opt)
+            assert param_path.is_file()
 
-    @pytest.mark.skipif(sys.platform != "linux", reason="Stallman was right")
     @pytest.mark.parametrize("trait_name", trait_names, ids=trait_names)
     @pytest.mark.parametrize("classifier", classifiers, ids=classifier_ids)
     def test_compleconta_cv(self, trait_name, classifier):
@@ -248,7 +273,6 @@ def test_recursive_feature_elimination(self, trait_name, n_features):
         for x in non_zero:
             if len(x) == 0:
                 one_is_zero = True
-
         assert not one_is_zero
 
     @pytest.mark.parametrize('trait_name', trait_names, ids=trait_names)
@@ -258,7 +282,6 @@ def test_predict_from_genotype(self, trait_name, classifier_type):
         genotype_file = DATA_PATH/f'{trait_name}.genotype'
         print(predict(classifier=model_path, genotype=genotype_file))
 
-
     @pytest.mark.skipif(not FROM_FASTA, reason='Missing optional dependencies')
     @pytest.mark.parametrize('trait_name', trait_names, ids=trait_names)
     @pytest.mark.parametrize('fasta_files', predict_files, ids=['fna', 'faa', 'fna+faa'])
diff --git a/tests/test_transforms.py b/tests/test_transforms.py
index 91c065d..4617326 100644
--- a/tests/test_transforms.py
+++ b/tests/test_transforms.py
@@ -1,6 +1,9 @@
 import pytest
 
+from Bio.SeqRecord import SeqRecord
+
 from phenotrex.transforms.resampling import TrainingRecordResampler
+from phenotrex.transforms.annotation import call_proteins, load_fasta_file
 from phenotrex.io.flat import load_training_files
 from phenotrex.structure.records import GenotypeRecord
 
@@ -31,6 +34,14 @@ def test_resampling(trait_name):
     trr.get_resampled(td[0], comple=.5, conta=.5)
 
 
+@pytest.mark.skipif(not FROM_FASTA, reason='Missing optional dependencies')
+def test_call_proteins():
+    seqtype, seqs = load_fasta_file(predict_files[1])
+    proteins = call_proteins(seqs)
+    assert isinstance(proteins[0], SeqRecord)
+    print(proteins[0])
+
+
 @pytest.mark.skipif(not FROM_FASTA, reason='Missing optional dependencies')
 @pytest.mark.parametrize('infile', predict_files, ids=['fna-gz', 'fna', 'faa-gz', 'faa'])
 def test_compute_genotype(infile):