From d5c59ab89489c42ffe3727af3e5cb0497a677cde Mon Sep 17 00:00:00 2001
From: mirpedrol <mirp.julia@gmail.com>
Date: Tue, 7 Nov 2023 13:06:50 +0100
Subject: [PATCH] run black

---
 bin/BAGEL.py | 350 +++++++++++++++++++++++++++------------------------
 1 file changed, 189 insertions(+), 161 deletions(-)

diff --git a/bin/BAGEL.py b/bin/BAGEL.py
index 94bd734b..205fbb44 100755
--- a/bin/BAGEL.py
+++ b/bin/BAGEL.py
@@ -37,22 +37,21 @@
 """
 
 
+import sys
+import time
 
 import click
 import numpy as np
 import pandas as pd
 import scipy.stats as stats
 from sklearn.linear_model import LinearRegression
-import sys
-import time
 
 VERSION = 2.0
 
 BUILD = 115
 
 
-
-'''
+"""
 Update history
 
 Build 115
@@ -77,26 +76,23 @@
 2. Now user can input column names
 3. Fix Threshold function
 
-'''
+"""
 
 
 class OptionRequiredIf(click.Option):
-
     def full_process_value(self, ctx, value):
         value = super(OptionRequiredIf, self).full_process_value(ctx, value)
 
-        if value is None and ctx.params['filter_multi_target'] is True:
-            msg = 'Error! Multi-target-filtering selected and not align-info provided.\n' \
-                  'Please indicate align-info file.'
+        if value is None and ctx.params["filter_multi_target"] is True:
+            msg = (
+                "Error! Multi-target-filtering selected and not align-info provided.\n"
+                "Please indicate align-info file."
+            )
             raise click.MissingParameter(ctx=ctx, param=self, message=msg)
         return value
 
 
-
-
-
-
-CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
+CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])
 
 
 def round_to_hundredth(x):
@@ -109,7 +105,6 @@ def func_linear(x, a, b):
 
 class Training:
     def __init__(self, X, n=None, cvnum=10):
-
         if n == None:
             self._n = len(X)
         self._cvnum = cvnum
@@ -118,7 +113,6 @@ def __init__(self, X, n=None, cvnum=10):
         self._X = X
         self._step = 0
 
-
     def cross_validation(self):
         if self._bid < 1:  # bid check
             print("The number of genes is too small! n<" + str(self._cvnum))
@@ -157,7 +151,7 @@ def get_data(self, method=0):
         elif method == 1:
             train, test = self.cross_validation()
         else:
-            print('Method passed as a value that was neither 0 nor 1.')
+            print("Method passed as a value that was neither 0 nor 1.")
             sys.exit(1)
         return train, test
 
@@ -208,24 +202,24 @@ def bagel():
     """
 
 
-@click.command(name='version')
+@click.command(name="version")
 def report_bagel_version():
     """
     Report the current build and version no.
     """
     print(
-        'Bayesian Analysis of Gene EssentiaLity (BAGEL) suite:\n'
-        'Version: {VERSION}\n'
-        'Build: {BUILD}'.format(VERSION=VERSION, BUILD=BUILD)
+        "Bayesian Analysis of Gene EssentiaLity (BAGEL) suite:\n"
+        "Version: {VERSION}\n"
+        "Build: {BUILD}".format(VERSION=VERSION, BUILD=BUILD)
     )
 
 
-@click.command(name='fc')
-@click.option('-i', '--read-count-file', required=True, type=click.Path(exists=True))
-@click.option('-o', '--output-label', required=True)
-@click.option('-c', '--control-columns', required=True)
-@click.option('-m', '--min-reads', type=int, default=0)
-@click.option('-Np', '--pseudo-count', type=int, default=5)
+@click.command(name="fc")
+@click.option("-i", "--read-count-file", required=True, type=click.Path(exists=True))
+@click.option("-o", "--output-label", required=True)
+@click.option("-c", "--control-columns", required=True)
+@click.option("-m", "--min-reads", type=int, default=0)
+@click.option("-Np", "--pseudo-count", type=int, default=5)
 def calculate_fold_change(read_count_file, output_label, control_columns, min_reads, pseudo_count):
     """
     \b
@@ -259,9 +253,9 @@ def calculate_fold_change(read_count_file, output_label, control_columns, min_re
     #  Import raw read data, normalize, filter for T0 min read counts  #
     #  Output:   [output label].foldchange                             #
     # ---------------------------------------------------------------- #
-    reads = pd.read_csv(read_count_file, sep='\t', index_col=0)
+    reads = pd.read_csv(read_count_file, sep="\t", index_col=0)
 
-    reads[reads.columns.values[0]].fillna('NO_GENE_NAME', inplace=True)
+    reads[reads.columns.values[0]].fillna("NO_GENE_NAME", inplace=True)
     reads.fillna(0, inplace=True)
     control_columns = control_columns.split(",")
     #
@@ -277,7 +271,7 @@ def calculate_fold_change(read_count_file, output_label, control_columns, min_re
 
         ctrl_sum = reads[ctrl_labels].sum(axis=1)
         reads.drop(ctrl_labels, axis=1, inplace=True)
-        ctrl_label_new = ';'.join(ctrl_labels)
+        ctrl_label_new = ";".join(ctrl_labels)
         reads[ctrl_label_new] = ctrl_sum
     except:
         print(reads[ctrl_labels].sum(axis=1))
@@ -298,9 +292,10 @@ def calculate_fold_change(read_count_file, output_label, control_columns, min_re
     #
     sumReads = reads.iloc[:, list(range(1, numColumns))].sum(0)
     normed = pd.DataFrame(index=reads.index.values)
-    normed['GENE'] = reads.iloc[:, 0]  # first column is gene name
-    normed = reads.iloc[:, list(range(1, numColumns))] / np.tile(sumReads,
-                                                                 [numClones, 1]) * 10000000  # normalize to 10M reads
+    normed["GENE"] = reads.iloc[:, 0]  # first column is gene name
+    normed = (
+        reads.iloc[:, list(range(1, numColumns))] / np.tile(sumReads, [numClones, 1]) * 10000000
+    )  # normalize to 10M reads
 
     #
     # filter for minimum readcount
@@ -312,8 +307,8 @@ def calculate_fold_change(read_count_file, output_label, control_columns, min_re
     # calculate fold change
     #
     foldchange = pd.DataFrame(index=normed.index.values)
-    foldchange.index.name = 'REAGENT_ID'
-    foldchange['GENE'] = reads.iloc[f, 0]  # dataframe 'normed' has no GENE column
+    foldchange.index.name = "REAGENT_ID"
+    foldchange["GENE"] = reads.iloc[f, 0]  # dataframe 'normed' has no GENE column
     for i in range(numColumns - 1):
         foldchange[normed.columns.values[i]] = np.log2(
             (normed.loc[:, normed.columns.values[i]]) / normed[ctrl_label_new]
@@ -328,39 +323,57 @@ def calculate_fold_change(read_count_file, output_label, control_columns, min_re
     # write foldchange file
     #
 
-    foldchange_filename = output_label + '.foldchange'
-    foldchange.to_csv(foldchange_filename, sep='\t', float_format='%4.3f')
-
-    normedreads_filename = output_label + '.normed_readcount'
-    normed.to_csv(normedreads_filename, sep='\t', float_format='%3.2f')
-
-
-@click.command(name='bf')
-@click.option('-i', '--fold-change', required=True, type=click.Path(exists=True))
-@click.option('-o', '--output-file', required=True)
-@click.option('-e', '--essential-genes', required=True, type=click.Path(exists=True))
-@click.option('-n', '--non-essential-genes', required=True, type=click.Path(exists=True))
-@click.option('-c', '--columns-to-test', required=True)
-@click.option('-w', '--network-file', metavar='[network File]', default=None, type=click.Path(exists=True))
-@click.option('-m', '--filter-multi-target', is_flag=True)
-@click.option('-m0', '--loci-without-mismatch', type=int, default=10)
-@click.option('-m1', '--loci-with-mismatch', type=int, default=10)
-@click.option('--align-info', metavar='--align-info [File]', default=None,
-              type=click.Path(exists=True), cls=OptionRequiredIf)
-@click.option('-b', '--use-bootstrapping', is_flag=True)
-@click.option('-NS', '--no-resampling', is_flag=True)
-@click.option('-s', '--use-small-sample', is_flag=True)
-@click.option('-N', '--no-of-cross-validations', type=int, default=10)
-@click.option('-NB', '--bootstrap-iterations', type=int, default=1000)
-@click.option('-r', '--sgrna-bayes-factors', is_flag=True)
-@click.option('-f', '--equalise-sgrna-no', type=int)
-@click.option('-s', '--seed', default=int(time.time() * 100000 % 100000), type=int)
-@click.option('-t', '--run-test-mode', is_flag=True)
-@click.option('-p', '--equalise-rep-no', type=int)
+    foldchange_filename = output_label + ".foldchange"
+    foldchange.to_csv(foldchange_filename, sep="\t", float_format="%4.3f")
+
+    normedreads_filename = output_label + ".normed_readcount"
+    normed.to_csv(normedreads_filename, sep="\t", float_format="%3.2f")
+
+
+@click.command(name="bf")
+@click.option("-i", "--fold-change", required=True, type=click.Path(exists=True))
+@click.option("-o", "--output-file", required=True)
+@click.option("-e", "--essential-genes", required=True, type=click.Path(exists=True))
+@click.option("-n", "--non-essential-genes", required=True, type=click.Path(exists=True))
+@click.option("-c", "--columns-to-test", required=True)
+@click.option("-w", "--network-file", metavar="[network File]", default=None, type=click.Path(exists=True))
+@click.option("-m", "--filter-multi-target", is_flag=True)
+@click.option("-m0", "--loci-without-mismatch", type=int, default=10)
+@click.option("-m1", "--loci-with-mismatch", type=int, default=10)
+@click.option(
+    "--align-info", metavar="--align-info [File]", default=None, type=click.Path(exists=True), cls=OptionRequiredIf
+)
+@click.option("-b", "--use-bootstrapping", is_flag=True)
+@click.option("-NS", "--no-resampling", is_flag=True)
+@click.option("-s", "--use-small-sample", is_flag=True)
+@click.option("-N", "--no-of-cross-validations", type=int, default=10)
+@click.option("-NB", "--bootstrap-iterations", type=int, default=1000)
+@click.option("-r", "--sgrna-bayes-factors", is_flag=True)
+@click.option("-f", "--equalise-sgrna-no", type=int)
+@click.option("-s", "--seed", default=int(time.time() * 100000 % 100000), type=int)
+@click.option("-t", "--run-test-mode", is_flag=True)
+@click.option("-p", "--equalise-rep-no", type=int)
 def calculate_bayes_factors(
-        fold_change, output_file, essential_genes, non_essential_genes, columns_to_test, network_file, align_info,
-        use_bootstrapping, no_resampling, use_small_sample, filter_multi_target, loci_without_mismatch, loci_with_mismatch,
-        bootstrap_iterations, no_of_cross_validations, sgrna_bayes_factors, equalise_sgrna_no, seed, run_test_mode, equalise_rep_no
+    fold_change,
+    output_file,
+    essential_genes,
+    non_essential_genes,
+    columns_to_test,
+    network_file,
+    align_info,
+    use_bootstrapping,
+    no_resampling,
+    use_small_sample,
+    filter_multi_target,
+    loci_without_mismatch,
+    loci_with_mismatch,
+    bootstrap_iterations,
+    no_of_cross_validations,
+    sgrna_bayes_factors,
+    equalise_sgrna_no,
+    seed,
+    run_test_mode,
+    equalise_rep_no,
 ):
     """
     \b
@@ -460,7 +473,6 @@ def calculate_bayes_factors(
     multi_targeting_sgrnas_info = dict()
 
     if filter_multi_target:
-
         try:
             aligninfo = pd.read_csv(align_info, header=None, index_col=0, sep="\t").fillna("")
             for seqid in aligninfo.index:
@@ -480,7 +492,10 @@ def calculate_bayes_factors(
                     multi_targeting_sgrnas[seqid] = True
                 elif perfectmatch > 1 or mismatch_1bp > 0:
                     multi_targeting_sgrnas_info[seqid] = (
-                        perfectmatch, mismatch_1bp, perfectmatch_gene, mismatch_1bp_gene
+                        perfectmatch,
+                        mismatch_1bp,
+                        perfectmatch_gene,
+                        mismatch_1bp_gene,
                     )
 
         except:
@@ -494,19 +509,20 @@ def calculate_bayes_factors(
     #
     rnatagset = set()
     with open(fold_change) as fin:
-        fieldname = fin.readline().rstrip().split('\t')
+        fieldname = fin.readline().rstrip().split("\t")
         #
         # DEFINE CONTROLS
         #
-        columns = columns_to_test.split(',')
+        columns = columns_to_test.split(",")
         try:
             try:
                 column_list = list(map(int, columns))
                 column_labels = [fieldname[x + 1] for x in column_list]
             except ValueError:
                 column_labels = columns
-                column_list = [x for x in range(len(fieldname) - 1) if
-                               fieldname[x + 1] in column_labels]  # +1 because of First column start 2
+                column_list = [
+                    x for x in range(len(fieldname) - 1) if fieldname[x + 1] in column_labels
+                ]  # +1 because of First column start 2
             print("Using column:  " + ", ".join(column_labels))
         # print "Using column:  " + ", ".join(map(str,column_list))
 
@@ -515,7 +531,7 @@ def calculate_bayes_factors(
             sys.exit(1)
 
         for line in fin:
-            fields = line.rstrip().split('\t')
+            fields = line.rstrip().split("\t")
             rnatag = fields[0]
             if filter_multi_target is True:  # multitargeting sgrna filtering
                 if rnatag in multi_targeting_sgrnas:
@@ -547,7 +563,7 @@ def calculate_bayes_factors(
     with open(essential_genes) as fin:
         skip_header = fin.readline()
         for line in fin:
-            coreEss.append(line.rstrip().split('\t')[0])
+            coreEss.append(line.rstrip().split("\t")[0])
     coreEss = np.array(coreEss)
     print("Number of reference essentials: " + str(len(coreEss)))
 
@@ -555,7 +571,7 @@ def calculate_bayes_factors(
     with open(non_essential_genes) as fin:
         skip_header = fin.readline()
         for line in fin:
-            nonEss.append(line.rstrip().split('\t')[0])
+            nonEss.append(line.rstrip().split("\t")[0])
 
     nonEss = np.array(nonEss)
     print("Number of reference nonessentials: " + str(len(nonEss)))
@@ -569,7 +585,7 @@ def calculate_bayes_factors(
         edgecount = 0
         with open(network_file) as fin:
             for line in fin:
-                linearray = line.rstrip().split('\t')  # GeneA \t GeneB format
+                linearray = line.rstrip().split("\t")  # GeneA \t GeneB format
                 if linearray[0] in genes_array and linearray[1] in genes_array:
                     for i in [0, 1]:
                         if linearray[i] not in network:
@@ -586,8 +602,7 @@ def calculate_bayes_factors(
     # Define foldchange dynamic threshold. logarithm decay.
     # Parameters are defined by regression (achilles data)  2**-7 was used in previous version.
 
-    FC_THRESH = 2 ** (-1.1535 * np.log(len(np.intersect1d(genes_array,
-                                                          nonEss)) + 13.324) + 0.7728)
+    FC_THRESH = 2 ** (-1.1535 * np.log(len(np.intersect1d(genes_array, nonEss)) + 13.324) + 0.7728)
     bf = {}
     boostedbf = {}
     for g in genes_array:
@@ -603,8 +618,9 @@ def calculate_bayes_factors(
         # declare training class
         # training_data = Training(setdiff1d(gene_idx,np.where(in1d(genes_array,coreEss))),cvnum=NUMCV)
         # declare training class (only for Gold-standard gene set)
-        training_data = Training(np.where(np.in1d(genes_array, np.union1d(coreEss, nonEss)))[0],
-                                 cvnum=no_of_cross_validations)
+        training_data = Training(
+            np.where(np.in1d(genes_array, np.union1d(coreEss, nonEss)))[0], cvnum=no_of_cross_validations
+        )
         # all non-goldstandards
         all_non_gs = np.where(np.logical_not(np.in1d(genes_array, np.union1d(coreEss, nonEss))))[0]
     else:
@@ -649,7 +665,8 @@ def calculate_bayes_factors(
 
         if run_test_mode:
             fp.write(
-                "%d\n%s\n%s\n" % (loop, ",".join(genes_array[gene_train_idx]), ",".join(genes_array[gene_test_idx])))
+                "%d\n%s\n%s\n" % (loop, ",".join(genes_array[gene_train_idx]), ",".join(genes_array[gene_test_idx]))
+            )
 
         train_ess = np.where(np.in1d(genes_array[gene_train_idx], coreEss))[0]
         train_non = np.where(np.in1d(genes_array[gene_train_idx], nonEss))[0]
@@ -661,14 +678,16 @@ def calculate_bayes_factors(
         #
         # define ess_train: vector of observed fold changes of essential genes in training set
         #
-        ess_train_fc_list_of_lists = [fc[rnatag] for g in genes_array[gene_train_idx[train_ess]] for rnatag in
-                                      gene2rna[g]]
+        ess_train_fc_list_of_lists = [
+            fc[rnatag] for g in genes_array[gene_train_idx[train_ess]] for rnatag in gene2rna[g]
+        ]
         ess_train_fc_flat_list = [obs for sublist in ess_train_fc_list_of_lists for obs in list(sublist.values())]
         #
         # define non_train vector of observed fold changes of nonessential genes in training set
         #
-        non_train_fc_list_of_lists = [fc[rnatag] for g in genes_array[gene_train_idx[train_non]] for rnatag in
-                                      gene2rna[g]]
+        non_train_fc_list_of_lists = [
+            fc[rnatag] for g in genes_array[gene_train_idx[train_non]] for rnatag in gene2rna[g]
+        ]
         non_train_fc_flat_list = [obs for sublist in non_train_fc_list_of_lists for obs in list(sublist.values())]
         #
         # calculate empirical fold change distributions for both
@@ -781,21 +800,24 @@ def calculate_bayes_factors(
 
             if len(onlytarget) > 0:  # comparsion between sgRNAs targeting one locus and multiple loci
                 if len(multitarget) > 0:
-
                     bf_only = np.mean([sum(list(bf_mean_rna_rep[seqid].values())) for seqid in onlytarget])
                     for seqid in onlytarget:
                         trainset[seqid] = [1, 0, 0]
 
                     for seqid in multitarget:
-                        if multi_targeting_sgrnas_info[seqid][2] > 1 or multi_targeting_sgrnas_info[seqid][
-                            3] > 0:  # train model using multi-targeting only targeting one protein coding gene
+                        if (
+                            multi_targeting_sgrnas_info[seqid][2] > 1 or multi_targeting_sgrnas_info[seqid][3] > 0
+                        ):  # train model using multi-targeting only targeting one protein coding gene
                             continue
 
                         count += 1
                         increment = sum(list(bf_mean_rna_rep[seqid].values())) - bf_only
 
-                        trainset[seqid] = [multi_targeting_sgrnas_info[seqid][0], multi_targeting_sgrnas_info[seqid][1],
-                                           increment]
+                        trainset[seqid] = [
+                            multi_targeting_sgrnas_info[seqid][0],
+                            multi_targeting_sgrnas_info[seqid][1],
+                            increment,
+                        ]
 
         if count < 10:
             print("Not enough train set for calculating multi-targeting effect.\n")
@@ -804,35 +826,40 @@ def calculate_bayes_factors(
             filter_multi_target = False
 
         else:
-
             trainset = pd.DataFrame().from_dict(trainset).T
             X = trainset[[0, 1]]
             y = trainset[2]
 
             regressor = LinearRegression()
             regressor.fit(X, y)
-            coeff_df = pd.DataFrame(regressor.coef_, X.columns, columns=['Coefficient'])
+            coeff_df = pd.DataFrame(regressor.coef_, X.columns, columns=["Coefficient"])
             for i in [0, 1]:
-                if coeff_df['Coefficient'][i] < 0:
+                if coeff_df["Coefficient"][i] < 0:
                     print("Regression coefficient is below than zero. Substituted to zero\n")
-                    coeff_df['Coefficient'][i] = 0.0
-            print("Multiple effects from perfect matched loci = %.3f and 1bp mis-matched loci = %.3f" % (
-                coeff_df['Coefficient'][0], coeff_df['Coefficient'][1]))
+                    coeff_df["Coefficient"][i] = 0.0
+            print(
+                "Multiple effects from perfect matched loci = %.3f and 1bp mis-matched loci = %.3f"
+                % (coeff_df["Coefficient"][0], coeff_df["Coefficient"][1])
+            )
 
             if rna_level == False:
                 for g in gene2rna:
                     penalty = 0.0
                     for seqid in gene2rna[g]:
                         if seqid in multi_targeting_sgrnas_info:
-                            penalty += float(multi_targeting_sgrnas_info[seqid][0] - 1) * coeff_df['Coefficient'][
-                                0] + float(multi_targeting_sgrnas_info[seqid][1]) * coeff_df['Coefficient'][1]
+                            penalty += (
+                                float(multi_targeting_sgrnas_info[seqid][0] - 1) * coeff_df["Coefficient"][0]
+                                + float(multi_targeting_sgrnas_info[seqid][1]) * coeff_df["Coefficient"][1]
+                            )
                     bf_multi_corrected_gene[g] = bf_mean[g] - penalty
             else:
                 for g in gene2rna:
                     for seqid in gene2rna[g]:
                         if seqid in multi_targeting_sgrnas_info:
-                            penalty = float(multi_targeting_sgrnas_info[seqid][0] - 1) * coeff_df['Coefficient'][
-                                0] + float(multi_targeting_sgrnas_info[seqid][1]) * coeff_df['Coefficient'][1]
+                            penalty = (
+                                float(multi_targeting_sgrnas_info[seqid][0] - 1) * coeff_df["Coefficient"][0]
+                                + float(multi_targeting_sgrnas_info[seqid][1]) * coeff_df["Coefficient"][1]
+                            )
                         else:
                             penalty = 0.0
                         bf_multi_corrected_rna[seqid] = sum(list(bf_mean_rna_rep[seqid].values())) - penalty
@@ -850,12 +877,12 @@ def calculate_bayes_factors(
             multiple_factor = equalise_sgrna_no / float(len(gene2rna[g]))
             bf_norm[g] = targetbf[g] * multiple_factor
 
-    '''
+    """
     if bf_std[rnatag] == 0.0:
         bf_norm[rnatag] = float('inf')
     else:
         bf_norm[g] = ( bf[rnatag] - bf_mean[rnatag] ) / bf_std[rnatag]
-    '''
+    """
     training_data = Training(gene_idx)  # set training class reset
 
     #
@@ -961,102 +988,100 @@ def calculate_bayes_factors(
         if run_test_mode == True:
             fp.close()
 
-
-
-
     #
     # print out results
     #
 
     # Equalizing factor (Replicates)
-    if flat_rep==True:
-        eqf = equalise_rep_no/float(len(column_labels))
+    if flat_rep == True:
+        eqf = equalise_rep_no / float(len(column_labels))
     else:
         eqf = 1
 
     # print out
-    with open(output_file, 'w') as fout:
-
+    with open(output_file, "w") as fout:
         if rna_level == True:
-            fout.write('RNA\tGENE')
+            fout.write("RNA\tGENE")
             for i in range(len(column_list)):
-                fout.write('\t{0:s}'.format(column_labels[i]))
+                fout.write("\t{0:s}".format(column_labels[i]))
                 if train_method == 0:
-                    fout.write('\t{0:s}'.format(column_labels[i] + "_STD"))
-            fout.write('\tBF')
+                    fout.write("\t{0:s}".format(column_labels[i] + "_STD"))
+            fout.write("\tBF")
             if train_method == 0:
-                fout.write('\tNumObs')
-            fout.write('\n')
+                fout.write("\tNumObs")
+            fout.write("\n")
 
             for rnatag in sorted(bf.keys()):
                 # RNA tag
-                fout.write('{0:s}\t'.format(rnatag))
+                fout.write("{0:s}\t".format(rnatag))
                 # Gene
                 gene = rna2gene[rnatag]
-                fout.write('{0:s}\t'.format(gene))
+                fout.write("{0:s}\t".format(gene))
 
                 # BF of replicates
                 for rep in column_list:
-                    fout.write('{0:4.3f}\t'.format(bf_mean_rna_rep[rnatag][rep]))
+                    fout.write("{0:4.3f}\t".format(bf_mean_rna_rep[rnatag][rep]))
                     if train_method == 0:
-                        fout.write('{0:4.3f}\t'.format(bf_std_rna_rep[rnatag][rep]))
+                        fout.write("{0:4.3f}\t".format(bf_std_rna_rep[rnatag][rep]))
 
                 # Sum BF of replicates
                 if filter_multi_target == True:
-                    fout.write('{0:4.3f}'.format(float(bf_multi_corrected_rna[rnatag]) * eqf))  # eqf = equalizing factor for the number of replicates
+                    fout.write(
+                        "{0:4.3f}".format(float(bf_multi_corrected_rna[rnatag]) * eqf)
+                    )  # eqf = equalizing factor for the number of replicates
                 else:
-                    fout.write('{0:4.3f}'.format(float(sum(list(bf_mean_rna_rep[rnatag].values()))) * eqf))
+                    fout.write("{0:4.3f}".format(float(sum(list(bf_mean_rna_rep[rnatag].values()))) * eqf))
 
                 # Num obs
                 if train_method == 0:
-                    fout.write('\t{0:d}'.format(num_obs[gene]))
-                fout.write('\n')
+                    fout.write("\t{0:d}".format(num_obs[gene]))
+                fout.write("\n")
         else:
-            fout.write('GENE')
+            fout.write("GENE")
             if network_boost == True:
-                fout.write('\tBoostedBF')
+                fout.write("\tBoostedBF")
                 if train_method == 0:
-                    fout.write('\tSTD_BoostedBF')
-            fout.write('\tBF')
+                    fout.write("\tSTD_BoostedBF")
+            fout.write("\tBF")
             if train_method == 0:
-                fout.write('\tSTD\tNumObs')
+                fout.write("\tSTD\tNumObs")
             if flat_sgrna == True:
-                fout.write('\tNormBF')
-            fout.write('\n')
+                fout.write("\tNormBF")
+            fout.write("\n")
 
             for g in sorted(genes.keys()):
                 # Gene
-                fout.write('{0:s}'.format(g))
+                fout.write("{0:s}".format(g))
                 if network_boost == True:
                     boostedbf_mean = np.mean(boostedbf[g])
                     boostedbf_std = np.std(boostedbf[g])
-                    fout.write('\t{0:4.3f}'.format(float(boostedbf_mean) * eqf))
+                    fout.write("\t{0:4.3f}".format(float(boostedbf_mean) * eqf))
                     if train_method == 0:
-                        fout.write('\t{0:4.3f}'.format(float(boostedbf_std) * eqf))
+                        fout.write("\t{0:4.3f}".format(float(boostedbf_std) * eqf))
 
                 # BF
                 if filter_multi_target == True:
-                    fout.write('\t{0:4.3f}'.format(float(bf_multi_corrected_gene[g]) * eqf))  # eqf = equalizing factor for the number of replicates
+                    fout.write(
+                        "\t{0:4.3f}".format(float(bf_multi_corrected_gene[g]) * eqf)
+                    )  # eqf = equalizing factor for the number of replicates
                 else:
-                    fout.write('\t{0:4.3f}'.format(float(bf_mean[g]) * eqf ))
+                    fout.write("\t{0:4.3f}".format(float(bf_mean[g]) * eqf))
                 # STD, Count
                 if train_method == 0:
-                    fout.write('\t{0:4.3f}\t{1:d}'.format(float(bf_std[g]), num_obs[g]))
+                    fout.write("\t{0:4.3f}\t{1:d}".format(float(bf_std[g]), num_obs[g]))
                 # Normalized BF
                 if flat_sgrna == True:
-                    fout.write('\t{0:4.3f}'.format(float(bf_norm[g])))
+                    fout.write("\t{0:4.3f}".format(float(bf_norm[g])))
 
-                fout.write('\n')
+                fout.write("\n")
 
 
-@click.command(name='pr')
-@click.option('-i', '--bayes-factors', required=True,
-              type=click.Path(exists=True))
-@click.option('-o', '--output-file', required=True)
-@click.option('-e', '--essential-genes', required=True,
-              type=click.Path(exists=True))
-@click.option('-n', '--non-essential-genes', required=True, type=click.Path(exists=True))
-@click.option('-k', '--use-column', default=None)
+@click.command(name="pr")
+@click.option("-i", "--bayes-factors", required=True, type=click.Path(exists=True))
+@click.option("-o", "--output-file", required=True)
+@click.option("-e", "--essential-genes", required=True, type=click.Path(exists=True))
+@click.option("-n", "--non-essential-genes", required=True, type=click.Path(exists=True))
+@click.option("-k", "--use-column", default=None)
 def calculate_precision_recall(bayes_factors, output_file, essential_genes, non_essential_genes, use_column):
     """
     Calculate precision-recall from an input Bayes Factors file:
@@ -1093,38 +1118,41 @@ def calculate_precision_recall(bayes_factors, output_file, essential_genes, non_
             print("Error! the column name is not in the file")
             sys.exit(1)
     else:
-        bf_column = 'BF'
+        bf_column = "BF"
 
     bf.sort_values(by=bf_column, ascending=False, inplace=True)
 
-    cumulative_tp = 0.
-    cumulative_fp = 0.
-    precision = 1.
-    recall = 0.
+    cumulative_tp = 0.0
+    cumulative_fp = 0.0
+    precision = 1.0
+    recall = 0.0
     # note float formats
 
     ess = essentials.index.values
     non = nonessentials.index.values
     totNumEssentials = len([x for x in bf.index.values if x in ess])
 
-    with open(output_file, 'w') as fout:
-
-        fout.write('Gene\t')
+    with open(output_file, "w") as fout:
+        fout.write("Gene\t")
         fout.write(bf_column)
-        fout.write('\tRecall\tPrecision\tFDR\n')
+        fout.write("\tRecall\tPrecision\tFDR\n")
 
         for g in bf.index.values:
-            if (g in ess):
+            if g in ess:
                 cumulative_tp += 1
-            elif (g in non):
+            elif g in non:
                 cumulative_fp += 1
             recall = cumulative_tp / totNumEssentials
-            if ((cumulative_tp > 0) | (cumulative_fp > 0)):
+            if (cumulative_tp > 0) | (cumulative_fp > 0):
                 precision = cumulative_tp / (cumulative_tp + cumulative_fp)
-            fout.write('{0:s}\t{1:4.3f}\t{2:4.3f}\t{3:4.3f}\t{4:4.3f}\n'.format(g, bf.loc[g, bf_column], recall, precision, 1.0-precision))
+            fout.write(
+                "{0:s}\t{1:4.3f}\t{2:4.3f}\t{3:4.3f}\t{4:4.3f}\n".format(
+                    g, bf.loc[g, bf_column], recall, precision, 1.0 - precision
+                )
+            )
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     bagel.add_command(calculate_fold_change)
     bagel.add_command(calculate_bayes_factors)
     bagel.add_command(calculate_precision_recall)