From e16b4b834d6285d6460d571b014047ed2b9a1dc1 Mon Sep 17 00:00:00 2001 From: EvanRees Date: Wed, 19 Feb 2020 17:54:46 -0600 Subject: [PATCH 01/17] updated add_contig_taxonomy.py to merge any nodes if databases nodes.dmp and merged.dmp are out of sync with nr.gz --- pipeline/add_contig_taxonomy.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pipeline/add_contig_taxonomy.py b/pipeline/add_contig_taxonomy.py index 02406fab8..083eff157 100755 --- a/pipeline/add_contig_taxonomy.py +++ b/pipeline/add_contig_taxonomy.py @@ -185,6 +185,19 @@ def parse_nodes(nodes_dmp_path): nodes_dmp.close() return(nodes) +def parse_merged(fpath): + print(strftime("%Y-%m-%d %H:%M:%S") + ' Processing merged taxid nodes') + wc_output = subprocess.check_output(['wc', '-l', fpath]) + wc_list = wc_output.split() + number_of_lines = int(wc_list[0]) + fh = open(fpath) + merged = {} + for line in tqdm(fh, desc='parsing merged', total=number_of_lines, leave=False): + old_taxid, new_taxid = [int(taxid) for taxid in line.strip('\t|\n').split('\t|\t')] + merged.update({old_taxid:new_taxid}) + fh.close() + return(merged) + def parse_lca(lca_fpath): print( strftime("%Y-%m-%d %H:%M:%S") + ' Parsing lca taxonomy table') # Work out number of lines in file @@ -199,6 +212,9 @@ def parse_lca(lca_fpath): orf, name, rank, taxid = line.strip().split('\t') contig, orf_num = orf.rsplit('_', 1) taxid = int(taxid) + # Convert any nodes that were recently suppressed/deprecated + # to their new node taxid. Otherwise keep the same taxid + taxid = merged.get(taxid, taxid) if taxid != 1: while rank not in set(rank_priority): taxid = nodes[taxid]['parent'] @@ -336,6 +352,7 @@ def write_taxa(ranked_ctgs, contig_table_fpath, outfpath): # Build taxid tree structure with associated canoncial ranks and names names = parse_names(name_fpath) nodes = parse_nodes(nodes_fpath) +merged = parse_merged(merged_fpath) rank_priority = [ 'species', From 2889600cccdd0e7324ecb2eaf282b0999898704a Mon Sep 17 00:00:00 2001 From: EvanRees Date: Wed, 19 Feb 2020 17:59:01 -0600 Subject: [PATCH 02/17] added filepath handling to merged.dmp --- pipeline/add_contig_taxonomy.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pipeline/add_contig_taxonomy.py b/pipeline/add_contig_taxonomy.py index 083eff157..132eed2c2 100755 --- a/pipeline/add_contig_taxonomy.py +++ b/pipeline/add_contig_taxonomy.py @@ -346,6 +346,7 @@ def write_taxa(ranked_ctgs, contig_table_fpath, outfpath): # Process NCBI taxdump files name_fpath = os.path.join(taxdump_dir_path, 'names.dmp') nodes_fpath = os.path.join(taxdump_dir_path, 'nodes.dmp') +nodes_fpath = os.path.join(taxdump_dir_path, 'merged.dmp') pp = pprint.PrettyPrinter(indent=4) From 6505fda146bbae04229d072a691a443251eac473 Mon Sep 17 00:00:00 2001 From: EvanRees Date: Wed, 19 Feb 2020 17:59:53 -0600 Subject: [PATCH 03/17] resolved merged_fpath variable --- pipeline/add_contig_taxonomy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipeline/add_contig_taxonomy.py b/pipeline/add_contig_taxonomy.py index 132eed2c2..f87fad690 100755 --- a/pipeline/add_contig_taxonomy.py +++ b/pipeline/add_contig_taxonomy.py @@ -346,7 +346,7 @@ def write_taxa(ranked_ctgs, contig_table_fpath, outfpath): # Process NCBI taxdump files name_fpath = os.path.join(taxdump_dir_path, 'names.dmp') nodes_fpath = os.path.join(taxdump_dir_path, 'nodes.dmp') -nodes_fpath = os.path.join(taxdump_dir_path, 'merged.dmp') +merged_fpath = os.path.join(taxdump_dir_path, 'merged.dmp') pp = pprint.PrettyPrinter(indent=4) From 7d16d90551e921b05064d7d7687c99fc1db7d69e Mon Sep 17 00:00:00 2001 From: EvanRees Date: Mon, 2 Mar 2020 10:53:17 -0600 Subject: [PATCH 04/17] added extraction of merged.dmp from taxdump.tar.gz --- pipeline/make_taxonomy_table.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pipeline/make_taxonomy_table.py b/pipeline/make_taxonomy_table.py index bfc47872f..507704d6d 100755 --- a/pipeline/make_taxonomy_table.py +++ b/pipeline/make_taxonomy_table.py @@ -164,7 +164,7 @@ def update_dbs(database_path, db='all'): download_file(database_path, taxdump_url, taxdump_md5_url) if os.path.isfile(database_path + '/taxdump.tar.gz'): - run_command('tar -xzf {}/taxdump.tar.gz -C {} names.dmp nodes.dmp'.format(database_path, database_path)) + run_command('tar -xzf {}/taxdump.tar.gz -C {} names.dmp nodes.dmp merged.dmp'.format(database_path, database_path)) os.remove('{}/taxdump.tar.gz'.format(database_path)) print("nodes.dmp and names.dmp updated") @@ -183,7 +183,7 @@ def check_dbs(db_path): db_dict = { 'nr': ['nr.dmnd'], 'acc2taxid': ['prot.accession2taxid'], - 'taxdump': ['names.dmp','nodes.dmp'] + 'taxdump': ['names.dmp','nodes.dmp', 'merged.dmp'] } db_files = os.listdir(db_path) for db in db_dict: @@ -303,7 +303,7 @@ def run_taxonomy(pipeline_path, assembly_path, tax_table_path, db_dir_path, parser.add_argument('-s', '--single_genome', help='Specifies single genome mode', action='store_true') parser.add_argument('-u', '--update', required=False, action='store_true', - help='Checks/Adds/Updates: nodes.dmp, names.dmp, accession2taxid, nr.dmnd files within specified directory.') + help='Checks/Adds/Updates: nodes.dmp, names.dmp, merged.dmp, accession2taxid, nr.dmnd files within specified directory.') args = vars(parser.parse_args()) From 3c2ef35794a63f0f9146b7a25c80787f5d91ee07 Mon Sep 17 00:00:00 2001 From: EvanRees Date: Thu, 5 Mar 2020 12:23:18 -0600 Subject: [PATCH 05/17] resolved #10 Contributors added and copyright year updated to 2020. --- Dockerfile | 10 +++++----- autometa.py | 18 ++++++++++++++++++ autometa/binning/recursive_dbscan.py | 20 +++++++++++++++++++- autometa/common/coverage.py | 19 +++++++++++++++++++ autometa/common/external/bedtools.py | 18 ++++++++++++++++++ autometa/common/external/bowtie.py | 18 ++++++++++++++++++ autometa/common/external/diamond.py | 19 ++++++++++++++++++- autometa/common/external/hmmer.py | 18 ++++++++++++++++++ autometa/common/external/prodigal.py | 18 ++++++++++++++++++ autometa/common/external/samtools.py | 18 ++++++++++++++++++ autometa/common/external/work_queue.py | 18 ++++++++++++++++++ autometa/common/kmers.py | 19 +++++++++++++++++++ autometa/common/mag.py | 24 +++++++++++++++++++++--- autometa/common/markers.py | 18 ++++++++++++++++++ autometa/common/metagenome.py | 20 +++++++++++++++++++- autometa/common/utilities.py | 18 ++++++++++++++++++ autometa/config/__init__.py | 20 ++++++++++++++++++++ autometa/config/databases.py | 18 ++++++++++++++++++ autometa/config/default.config | 20 ++++++++++++++++++++ autometa/config/environ.py | 18 ++++++++++++++++++ autometa/config/metagenome.config | 20 ++++++++++++++++++++ autometa/config/project.py | 18 ++++++++++++++++++ autometa/config/user.py | 21 ++++++++++++++++++++- autometa/taxonomy/lca.py | 18 ++++++++++++++++++ autometa/taxonomy/majority_vote.py | 18 ++++++++++++++++++ autometa/taxonomy/ncbi.py | 18 ++++++++++++++++++ docs/template.py | 18 ++++++++++++++++++ 27 files changed, 488 insertions(+), 12 deletions(-) diff --git a/Dockerfile b/Dockerfile index a67220d53..5179b959d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,9 +1,10 @@ FROM continuumio/anaconda -MAINTAINER Evan R. Rees "evan.rees@wisc.edu" +MAINTAINER Jason C. Kwan "jason.kwan@wisc.edu" -# Copyright 2019 Evan Rees, Jason C. Kwan +# Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +# Shaurya Chanana, Izaak Miller, Jason C. Kwan # -# This file is part of Autometa v2. +# This file is part of Autometa. # # Autometa is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by @@ -38,5 +39,4 @@ conda install -c bioconda -c conda-forge --yes \ umap-learn \ && conda clean --all --yes -COPY autometa autometa/ -# RUN git clone https://bitbucket.org/jason_c_kwan/autometa +RUN git clone https://github.com/KwanLab/Autometa diff --git a/autometa.py b/autometa.py index e87c4423c..d9d8c0002 100755 --- a/autometa.py +++ b/autometa.py @@ -1,6 +1,24 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + Main script to run Autometa """ diff --git a/autometa/binning/recursive_dbscan.py b/autometa/binning/recursive_dbscan.py index f5966e4fc..67818f304 100644 --- a/autometa/binning/recursive_dbscan.py +++ b/autometa/binning/recursive_dbscan.py @@ -1,6 +1,24 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +# Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +# Shaurya Chanana, Izaak Miller, Jason C. Kwan +# +# This file is part of Autometa. +# +# Autometa is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Autometa is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with Autometa. If not, see . + import logging import os @@ -435,7 +453,7 @@ def main(args): default='TSNE') parser.add_argument( '--clustering-method', - help='Embedding method to use', + help='Clustering method to use', choices=['DBSCAN','HDBSCAN'], default='DBSCAN') parser.add_argument('--completeness', help='', default=20., type=float) diff --git a/autometa/common/coverage.py b/autometa/common/coverage.py index 07de3a708..e6b425667 100644 --- a/autometa/common/coverage.py +++ b/autometa/common/coverage.py @@ -1,5 +1,24 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- """ +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + Autometa Coverage """ diff --git a/autometa/common/external/bedtools.py b/autometa/common/external/bedtools.py index 0f76383e6..66dfa5610 100644 --- a/autometa/common/external/bedtools.py +++ b/autometa/common/external/bedtools.py @@ -1,6 +1,24 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + Script containing wrapper functions for bedtools. """ diff --git a/autometa/common/external/bowtie.py b/autometa/common/external/bowtie.py index 100213bbd..01ea6b5e6 100644 --- a/autometa/common/external/bowtie.py +++ b/autometa/common/external/bowtie.py @@ -1,6 +1,24 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + Script containing wrapper functions for bowtie2. """ diff --git a/autometa/common/external/diamond.py b/autometa/common/external/diamond.py index 82ef13861..3fe265c98 100644 --- a/autometa/common/external/diamond.py +++ b/autometa/common/external/diamond.py @@ -1,7 +1,24 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- - """ +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + Class and functions related to running diamond on metagenome sequences """ diff --git a/autometa/common/external/hmmer.py b/autometa/common/external/hmmer.py index 982ec5745..49cf6787e 100644 --- a/autometa/common/external/hmmer.py +++ b/autometa/common/external/hmmer.py @@ -1,6 +1,24 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + Functions related to running hmmer on metagenome sequences """ diff --git a/autometa/common/external/prodigal.py b/autometa/common/external/prodigal.py index b17665c34..955294330 100644 --- a/autometa/common/external/prodigal.py +++ b/autometa/common/external/prodigal.py @@ -1,6 +1,24 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + Functions to retrieve orfs from provided assembly using prodigal """ diff --git a/autometa/common/external/samtools.py b/autometa/common/external/samtools.py index ac81858b1..ec3682cae 100644 --- a/autometa/common/external/samtools.py +++ b/autometa/common/external/samtools.py @@ -1,6 +1,24 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + Script containing wrapper functions for samtools """ diff --git a/autometa/common/external/work_queue.py b/autometa/common/external/work_queue.py index b7d102bd0..05a567f27 100644 --- a/autometa/common/external/work_queue.py +++ b/autometa/common/external/work_queue.py @@ -1,6 +1,24 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + Uses Work Queue as a task-manager to run different tasks within the autometa pipeline on a scalable computing system. """ diff --git a/autometa/common/kmers.py b/autometa/common/kmers.py index a3dc80f9d..35abceb4a 100644 --- a/autometa/common/kmers.py +++ b/autometa/common/kmers.py @@ -1,6 +1,25 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + + File containing functions to count, retrieve, k-mers given sequences TODO: Separate file to handle parallel,work-queue processing diff --git a/autometa/common/mag.py b/autometa/common/mag.py index 2fce4ba3b..928739555 100644 --- a/autometa/common/mag.py +++ b/autometa/common/mag.py @@ -1,6 +1,24 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + Autometa Bin Class """ @@ -23,8 +41,8 @@ logger = logging.getLogger(__name__) -class Mag: - """docstring for Autometa Mag class.""" +class MAG: + """docstring for Autometa MAG class.""" def __init__(self, assembly, contigs, outdir=None): self.assembly = os.path.realpath(assembly) @@ -107,7 +125,7 @@ def get_seqs(self, all=False): Parameters ---------- all : bool - Gets all sequences from assembly if True else sequences for Mag + Gets all sequences from assembly if True else sequences for MAG (the default is False). Returns diff --git a/autometa/common/markers.py b/autometa/common/markers.py index bdc0d2b1e..8d90bd7ec 100644 --- a/autometa/common/markers.py +++ b/autometa/common/markers.py @@ -1,6 +1,24 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + Autometa Marker class consisting of various methods to annotate sequences with marker sets depending on sequence set taxonomy """ diff --git a/autometa/common/metagenome.py b/autometa/common/metagenome.py index e6c79be3b..58625f0cf 100644 --- a/autometa/common/metagenome.py +++ b/autometa/common/metagenome.py @@ -1,6 +1,24 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + Script containing Metagenome class for general handling of metagenome assembly """ @@ -17,7 +35,7 @@ from autometa.common import kmers from autometa.common import coverage from autometa.common.external import prodigal -from autometa.common.mag import Mag +from autometa.common.mag import MAG from autometa.common.utilities import timeit from autometa.common.utilities import gunzip from autometa.taxonomy.majority_vote import majority_vote diff --git a/autometa/common/utilities.py b/autometa/common/utilities.py index 69aa50bf7..4ff45eaa7 100644 --- a/autometa/common/utilities.py +++ b/autometa/common/utilities.py @@ -1,5 +1,23 @@ #!/usr/bin/env python """ +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + File containing common utilities functions to be used by Autometa scripts. """ diff --git a/autometa/config/__init__.py b/autometa/config/__init__.py index bd7dfa549..f98c87fcf 100644 --- a/autometa/config/__init__.py +++ b/autometa/config/__init__.py @@ -1,4 +1,24 @@ #!/usr/bin/env python +""" +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . +""" + import os diff --git a/autometa/config/databases.py b/autometa/config/databases.py index dd686d71f..b133e20d5 100644 --- a/autometa/config/databases.py +++ b/autometa/config/databases.py @@ -1,5 +1,23 @@ #!/usr/bin/env python """ +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + Configuration handling for Autometa Databases. """ diff --git a/autometa/config/default.config b/autometa/config/default.config index d23420fe7..467f3cb62 100644 --- a/autometa/config/default.config +++ b/autometa/config/default.config @@ -1,3 +1,23 @@ +""" +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . +""" + [common] home_dir = None diff --git a/autometa/config/environ.py b/autometa/config/environ.py index e4ff4eaa5..790bfa61a 100644 --- a/autometa/config/environ.py +++ b/autometa/config/environ.py @@ -1,5 +1,23 @@ #!/usr/bin/env python """ +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + Configuration handling for Autometa environment. """ diff --git a/autometa/config/metagenome.config b/autometa/config/metagenome.config index 5c5933e45..fa6be4dbb 100644 --- a/autometa/config/metagenome.config +++ b/autometa/config/metagenome.config @@ -1,3 +1,23 @@ +""" +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . +""" + ######################################## ### Metagenome Submission Parameters ### ######################################## diff --git a/autometa/config/project.py b/autometa/config/project.py index 6067a597f..e5e1b43f5 100644 --- a/autometa/config/project.py +++ b/autometa/config/project.py @@ -1,6 +1,24 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + Configuration handling for Autometa User Project. """ diff --git a/autometa/config/user.py b/autometa/config/user.py index 0e0061af8..5e24cade8 100644 --- a/autometa/config/user.py +++ b/autometa/config/user.py @@ -1,6 +1,25 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -""" Autometa User Configuration Class +""" +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + +Autometa User Configuration Class """ diff --git a/autometa/taxonomy/lca.py b/autometa/taxonomy/lca.py index 81bcdbdc3..16f984060 100644 --- a/autometa/taxonomy/lca.py +++ b/autometa/taxonomy/lca.py @@ -1,5 +1,23 @@ #!/usr/bin/env python """ +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + Determines the Lowest Common Ancestor given a tab-delimited BLAST table,fasta file, or iterable of SeqRecords Assumes BLAST outfmt=6 diff --git a/autometa/taxonomy/majority_vote.py b/autometa/taxonomy/majority_vote.py index 9338f18ca..487a73b64 100644 --- a/autometa/taxonomy/majority_vote.py +++ b/autometa/taxonomy/majority_vote.py @@ -1,5 +1,23 @@ #!/usr/bin/env python """ +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + Modified majority vote algorithm (Autometa V1.0) """ diff --git a/autometa/taxonomy/ncbi.py b/autometa/taxonomy/ncbi.py index 1f7e39640..1dadb39c8 100644 --- a/autometa/taxonomy/ncbi.py +++ b/autometa/taxonomy/ncbi.py @@ -1,5 +1,23 @@ #!/usr/bin/env python """ +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + Utilities file containing functions useful for handling NCBI taxonomy databases """ diff --git a/docs/template.py b/docs/template.py index d55441401..f42443d05 100644 --- a/docs/template.py +++ b/docs/template.py @@ -1,6 +1,24 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + Template Script for Autometa Modules Template Description: From d53fb038f2193a8c66ec5dddbc3127be56fb8276 Mon Sep 17 00:00:00 2001 From: EvanRees Date: Wed, 11 Mar 2020 17:49:29 -0500 Subject: [PATCH 06/17] Resolves KwanLab/Autometa#16, Resolves KwanLab/Autometa#17 and simplified config parsing. Renamed 'projects' to 'workspace' to avoid confusion with 'project'. test metagenome.config file has been updated with respective files & parameters. Reconfigured logger to stream info and write debug level to timestamped log file. Added exceptions. to be used across autometa pipeline. --- autometa.py | 202 +- autometa/binning/recursive_dbscan.py | 14 +- autometa/common/coverage.py | 5 + autometa/common/exceptions.py | 67 + autometa/common/external/diamond.py | 4 +- autometa/common/external/hmmer.py | 2 +- autometa/common/external/prodigal.py | 103 +- autometa/common/kmers.py | 205 +- autometa/common/mag.py | 74 +- autometa/common/markers.py | 1 + autometa/common/metagenome.py | 2 + autometa/config/__init__.py | 62 +- autometa/config/databases.py | 39 +- autometa/config/default.config | 36 +- autometa/config/metagenome.config | 38 +- autometa/config/project.py | 343 +-- autometa/config/user.py | 135 +- autometa/taxonomy/lca.py | 6 +- tests/data/coverage.tsv | 3426 ++++++++++++++++++++++++++ tests/metagenome.config | 8 +- 20 files changed, 4217 insertions(+), 555 deletions(-) create mode 100644 autometa/common/exceptions.py create mode 100644 tests/data/coverage.tsv diff --git a/autometa.py b/autometa.py index d9d8c0002..57c28433b 100755 --- a/autometa.py +++ b/autometa.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # -*- coding: utf-8 -*- """ Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, @@ -27,14 +27,73 @@ import os import sys +import multiprocessing as mp + from autometa.config.user import AutometaUser -from autometa.config import PROJECTS_DIR -from autometa.config import parse_config from autometa.common.utilities import timeit from autometa.common.metagenome import Metagenome -logger = logging.getLogger(__name__) +logger = logging.getLogger('autometa') + + +def init_logger(fpath=None, level=None): + """Initialize logger. + + By default will initialize streaming logger with DEBUG level messages. + If `fpath` is provided, will write DEBUG level messages to `fpath` and + set streaming messages to INFO. + + Parameters + ---------- + fpath : str + + level : int + Overwrite default logging level behavior with provided `level`. + This must be a constant from logging levels. + See https://docs.python.org/3/library/logging.html#levels for details. + i.e. logging.DEBUG, logging.INFO, etc. translates to 0,10, etc... + + Returns + ------- + logging.Logger + logging's Logger object to emit messages via methods: + 'warn','info','debug','error','exception','critical','fatal' + + Raises + ------- + ValueError + `level` must be int and one of 0, 10, 20, 30, 40, 50 + """ + levels = { + logging.NOTSET, + logging.DEBUG, + logging.INFO, + logging.WARNING, + logging.ERROR, + logging.CRITICAL} + if level and type(level) is not int: + raise ValueError(f'{level} must be an int! {type(level)}') + if level and level not in levels: + raise ValueError(f'{level} not in levels: {levels}!') + formatter = logging.Formatter( + fmt='[%(asctime)s %(levelname)s] %(name)s: %(message)s', + datefmt='%m/%d/%Y %I:%M:%S %p') + # Construct file/stream logging handlers + streamhandler = logging.StreamHandler() + streamhandler.setFormatter(formatter) + if fpath: + filehandler = logging.FileHandler(fpath) + filehandler.setFormatter(formatter) + logger.addHandler(filehandler) + lvl = level if level else logging.INFO + else: + lvl = level if level else logging.DEBUG + + streamhandler.setLevel(lvl) + logger.addHandler(streamhandler) + logger.setLevel(logging.DEBUG) + return logger @timeit def run(mgargs): @@ -47,14 +106,13 @@ def run(mgargs): Returns ------- - None - Description of returned object. + NoneType Raises ------- - ExceptionName - Why the exception is raised. - + TODO: Need to enumerate all exceptions raised from within binning pipeline. + I.e. Demarkate new exception (not yet handled) vs. handled exception. + Subclassing an AutometaException class may be most appropriate use case here. """ mg = Metagenome( assembly=mgargs.files.metagenome, @@ -127,121 +185,45 @@ def run(mgargs): sep='\t', index=True, header=True) - # TODO: Refine bins by connection mapping, taxon, or other methods - # mag.refine(by='connections') - # mag.refine(by='taxa') def main(args): - if not args.metagenomes_configs and not args.metagenomes and not args.resume: - raise ValueError('Must provide metagenomes-configs or metagenomes') - if args.config: - user = AutometaUser(args.config, dryrun=args.dryrun) - else: - user = AutometaUser(dryrun=args.dryrun) - # Configure environment and databases - user.configure(nproc=args.cpus) - # Workflow control... - # TODO: WorkQueue handling. to process multiple metagenomes at once. - if args.resume: - mg_configs = user.get_mgargs( - projects_dir=args.projects, - project_num=args.project, - metagenome_num=args.resume) - elif args.metagenomes_configs: - try: - mg_configs = user.add_metagenomes(args.metagenomes_configs) - except FileNotFoundError as err: - project_configs = user.new_project(args) - mg_configs = user.add_metagenomes(args.metagenomes_configs) - else: - project_configs = user.new_project(args) - mg_configs = project_configs.get('metagenomes') - # Run autometa on workflow metagenome args... - for metagenome,mgargs in mg_configs.items(): + user = AutometaUser(dryrun=args.dryrun, nproc=args.cpus) + for config in args.config: + mgargs = user.prepare_run(config) run(mgargs) - # user.bin_metagenome(metagenome_config) + # cluster process -> mgargs.files.binning + # TODO: Refine bins by connection mapping, taxon, or other methods # TODO: Construct pangenomes from multiple datasets # get_pangenomes() if __name__ == '__main__': import argparse - import logging as logger - logger.basicConfig( - format='%(asctime)s : %(name)s : %(levelname)s : %(message)s', - datefmt='%m/%d/%Y %I:%M:%S %p', - level=logger.DEBUG) - - ############################### - # AutometaUser Project(s) API # - ############################### - + import time + cpus = mp.cpu_count() parser = argparse.ArgumentParser('Main script to run Autometa') - parser.add_argument('--projects', - help=f' (Default is {PROJECTS_DIR}).', - default=PROJECTS_DIR, - required=False) - parser.add_argument('--project', - help='project number for which to resume autometa binning (required with `--resume` and --add-metagenome).', - type=int) - parser.add_argument('--resume', - help='metagenome number for which to resume autometa binning (`--project` num is required).', - type=int, - default=0) - parser.add_argument('--metagenomes-configs', + parser.add_argument('config', help='', nargs='*') parser.add_argument('--dryrun', help='whether to perform database updating/construction', action='store_true', default=False) + parser.add_argument('--cpus', + help=f'Num. cpus to use when updating/constructing databases (default: {cpus} cpus)', + type=int, + default=cpus) + args = parser.parse_args() + timestamp = time.strftime("%Y-%m-%d_%H-%M-%S",time.gmtime()) + logger = init_logger(f'{timestamp}_autometa.log') + try: + main(args) + except Exception as err: + issue_request = ''' + An error was encountered! - ####################### - # Autometa Parameters # - ####################### + Please help us fix your problem! - parser.add_argument('metagenomes', nargs='*') - parser.add_argument('--length-cutoff', default=3000, type=int) - parser.add_argument('--cov-from-spades', - help='retrieve coverage from spades headers. (Only may be used when SPAdes assemblies are provided)', - action='store_true', - default=False) - parser.add_argument( - '--kmer-size', - help='size of k-mer to calculate frequencies.', - default=5, type=int) - parser.add_argument( - '--kmer-multiprocess', - help='use multiprocessing to count k-mers.', - action='store_true', default=False) - parser.add_argument( - '--kmer-normalize', - help='Perform CLR transform on k-mer frequencies.', - action='store_true', default=False) - parser.add_argument('--do-pca', - help='Perform PCA prior to running embedding method', default=False, action='store_true') - parser.add_argument( - '--pca-dims', - help='Number of dimesions to reduce k-mer frequencies using PCA', - default=50, type=int) - parser.add_argument( - '--embedding-method', - help='Embedding method for dimension reduction of contig k-mer frequencies', - default='UMAP', - choices=['TSNE','UMAP']) - parser.add_argument('--taxon-method', default='majority_vote', choices=['majority_vote']) - parser.add_argument('--kingdom',default='bacteria',choices=['bacteria','archaea']) - parser.add_argument('--reversed', help='Reverse order at which taxonomic ranks are clustered', default=True, action='store_false') - parser.add_argument('--binning-method', - default='recursive_dbscan', - choices=['recursive_dbscan']) - parser.add_argument('--completeness', type=float, default=20.) - parser.add_argument('--purity', type=float, default=90.) - parser.add_argument('--verbose', action='store_true', default=False) - parser.add_argument('--force', action='store_true', default=False) - parser.add_argument('--usepickle', action='store_true', default=False) - parser.add_argument('--parallel', help="Use GNU parallel", - action='store_true', default=False) - parser.add_argument('--cpus',default=1, type=int) - parser.add_argument('--config',help='user defined config file') - args = parser.parse_args() - main(args) + You may file an issue with us at https://github.com/KwanLab/Autometa/issues/new + ''' + logger.exception(err) + print(issue_request) diff --git a/autometa/binning/recursive_dbscan.py b/autometa/binning/recursive_dbscan.py index 67818f304..d12ed3821 100644 --- a/autometa/binning/recursive_dbscan.py +++ b/autometa/binning/recursive_dbscan.py @@ -30,6 +30,7 @@ from autometa.common.markers import Markers from autometa.common import kmers +from autometa.common.exceptions import RecursiveDBSCANError # TODO: This should be # from autometa.common.kmers import Kmers # So later we can simply/and more clearly do Kmers.load(kmers_fpath).embed(method) @@ -331,7 +332,16 @@ def binning(master, markers, domain='bacteria', completeness=20., purity=90., pd.DataFrame master with ['cluster','completeness','purity'] columns added + Raises + ------- + RecursiveDBSCANError + No marker information is availble for contigs to be binned. """ + # First check needs to ensure we have markers available to check binning quality... + if master.loc[master.index.isin(markers.index)].empty: + err = 'No markers for contigs in table. Unable to assess binning quality' + raise RecursiveDBSCANError(err) + if not taxonomy: return get_clusters( master, @@ -356,6 +366,9 @@ def binning(master, markers, domain='bacteria', completeness=20., purity=90., for rank in ranks: # TODO: We should account for novel taxa here instead of removing 'unclassified' unclassified_filter = master[rank] != 'unclassified' + n_contigs_in_taxa = master.loc[unclassified_filter].groupby(rank)[rank].count().sum() + n_taxa = master.loc[unclassified_filter].groupby(rank)[rank].count().index.nunique() + logger.info(f'Examining {rank}: {n_taxa:,} unique taxa ({n_contigs_in_taxa:,} contigs)') # Group contigs by rank and find best clusters within subset for rank_name_txt, dff in master.loc[unclassified_filter].groupby(rank): if dff.empty: @@ -390,7 +403,6 @@ def binning(master, markers, domain='bacteria', completeness=20., purity=90., clustered.cluster = clustered.cluster.map(rename_cluster) num_clusters += clustered.cluster.nunique() clusters.append(clustered) - clustered_df = pd.concat(clusters, sort=True) unclustered_df = master.loc[~master.index.isin(clustered_df.index)] unclustered_df.loc[:,'cluster'] = pd.np.nan diff --git a/autometa/common/coverage.py b/autometa/common/coverage.py index e6b425667..85129a951 100644 --- a/autometa/common/coverage.py +++ b/autometa/common/coverage.py @@ -117,6 +117,11 @@ def get(fasta, out, fwd_reads=None, rev_reads=None, sam=None, bam=None, lengths= pd.DataFrame index=contig cols=['coverage'] """ + if os.path.exists(out) and os.stat(out).st_size > 0: + # COMBAK: checksum comparison [checkpoint] + logger.debug(f'coverage ({out}) already exists. skipping...') + cols = ['contig','coverage'] + return pd.read_csv(out, sep='\t', usecols=cols, index_col='contig') try: outdir = os.path.dirname(out) tempdir = tempfile.mkdtemp(suffix=None, prefix='cov-alignments', dir=outdir) diff --git a/autometa/common/exceptions.py b/autometa/common/exceptions.py new file mode 100644 index 000000000..6e2ee79c1 --- /dev/null +++ b/autometa/common/exceptions.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan + +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + +File containing customized AutometaExceptions for more specific exception handling +""" + +class AutometaException(Exception): + """docstring for AutometaException.""" + issue_request = ''' + An error was encountered! + + Please help us fix your problem! + + You may file an issue with us at https://github.com/KwanLab/Autometa/issues/new + ''' + pass + + +class KmerFormatError(Exception): + """KmerFormatError exception class.""" + + def __init__(self, fpath): + self.fpath = fpath + + def __str__(self): + return f'{self.fpath} does not contain a \"contig\" column. '\ + 'Ensure the k-mer matrix was properly generated.' + +class KmerEmbeddingError(Exception): + """KmerEmbeddingError exception class.""" + + def __init__(self, value): + self.value = value + + def __str__(self): + return self.value + +class RecursiveDBSCANError(Exception): + """RecursiveDBSCANError exception class.""" + + def __init__(self, value): + self.value = value + + def __str__(self): + return self.value + +if __name__ == '__main__': + print('This file contains custom exceptions for Autometa and should not be run directly') + import sys;sys.exit(1) diff --git a/autometa/common/external/diamond.py b/autometa/common/external/diamond.py index 3fe265c98..a2b1310a7 100644 --- a/autometa/common/external/diamond.py +++ b/autometa/common/external/diamond.py @@ -237,8 +237,8 @@ def parse(results, top_pct=0.9, verbose=False): qend = llist[7] sstart = llist[8] send = llist[9] - evalue = llist[10] - bitscore = llist[11] + evalue = float(llist[10]) + bitscore = float(llist[11]) hit = DiamondResult( qseqid=qseqid, sseqid=sseqid, diff --git a/autometa/common/external/hmmer.py b/autometa/common/external/hmmer.py index 49cf6787e..945592ee9 100644 --- a/autometa/common/external/hmmer.py +++ b/autometa/common/external/hmmer.py @@ -192,7 +192,7 @@ def filter_markers(infpath, outfpath, cutoffs, prodigal_annotations=None, force= mdf = mdf[cols] if prodigal_annotations: logger.debug('Retrieving ORF->contig translations from ORF Caller') - translations = prodigal.get_orf_translations(prodigal_annotations) + translations = prodigal.contigs_from_headers(prodigal_annotations) translater = lambda x: translations.get(x, x.rsplit('_',1)[0]) else: translater = lambda x: x.rsplit('_',1)[0] diff --git a/autometa/common/external/prodigal.py b/autometa/common/external/prodigal.py index 955294330..92cd21cd3 100644 --- a/autometa/common/external/prodigal.py +++ b/autometa/common/external/prodigal.py @@ -48,7 +48,7 @@ def run(assembly, nucls_out, prots_out, force=False,cpus=0,parallel=True): force : bool overwrite outfpath if it already exists (the default is False). cpus : int - num `cpus` to use. By default will run as many `cpus` as possible + num `cpus` to use. **Default (cpus=0) will run as many `cpus` as possible** parallel : bool Will parallelize prodigal using GNU parallel (the default is True). @@ -104,6 +104,7 @@ def run(assembly, nucls_out, prots_out, force=False,cpus=0,parallel=True): '-d',tmpnucl_fpath, '-q', '-p','meta', + '-m', '-o',os.devnull, '<',assembly, '2>',os.devnull, @@ -121,41 +122,50 @@ def run(assembly, nucls_out, prots_out, force=False,cpus=0,parallel=True): cmd = [str(arg) for arg in cmd] logger.debug(f'cmd: {" ".join(cmd)}') if parallel: - returncode = subprocess.call(" ".join(cmd), shell=True) - tmpfpaths = glob(os.path.join(tmpdir,'*.faa')) - lines = '' - for fp in tmpfpaths: - with open(fp) as fh: - for line in fh: - lines += line - out = open(prots_out, 'w') - out.write(lines) - out.close() - tmpfpaths = glob(os.path.join(tmpdir, '*.fna')) - lines = '' - for fp in tmpfpaths: - with open(fp) as fh: - for line in fh: - lines += line - out = open(nucls_out, 'w') - out.write(lines) - out.close() - shutil.rmtree(tmpdir) + try: + returncode = subprocess.call(" ".join(cmd), shell=True) + tmpfpaths = glob(os.path.join(tmpdir,'*.faa')) + lines = '' + for fp in tmpfpaths: + with open(fp) as fh: + for line in fh: + lines += line + out = open(prots_out, 'w') + out.write(lines) + out.close() + tmpfpaths = glob(os.path.join(tmpdir, '*.fna')) + lines = '' + for fp in tmpfpaths: + with open(fp) as fh: + for line in fh: + lines += line + out = open(nucls_out, 'w') + out.write(lines) + out.close() + except Exception as err: + # COMBAK: Should probably be more descriptive as to what errors could occur here. + logger.exception(err) + finally: + shutil.rmtree(tmpdir) else: with open(os.devnull, 'w') as stdout, open(os.devnull, 'w') as stderr: proc = subprocess.run(cmd, stdout=stdout, stderr=stderr) returncode = proc.returncode if returncode: logger.warning(f'Args:{cmd} ReturnCode:{returncode}') + # COMBAK: Check all possible return codes for GNU parallel for fp in [nucls_out, prots_out]: if not os.path.exists(fp): raise OSError(f'{fp} not written') return nucls_out, prots_out -def get_orf_translations(fpath): - """Translate Prodigal ORF ID to contig ID using prodigal assigned ID from +def contigs_from_headers(fpath): + """Get ORF id to contig id translations using prodigal assigned ID from description. + First determines if all of ID=3495691_2 from description is in header. + "3495691_2" represents the 3,495,691st gene in the 2nd sequence. + i.e. : record.description 'k119_1383959_3495691_2 # 688 # 1446 # 1 # ID=3495691_2;partial=01;start_type=ATG;rbs_motif=None;rbs_spacer=None' ^ ^ @@ -178,10 +188,47 @@ def get_orf_translations(fpath): translations = {} for record in SeqIO.parse(fpath, 'fasta'): orf_id = record.description.split('#')[-1].split(';')[0].strip().replace('ID=','') - contig_id = record.id.replace(f'_{orf_id}', '') + if orf_id in record.id: + contig_id = record.id.replace(f'_{orf_id}', '') + else: + contig_id = record.id.rsplit('_',1)[0] translations.update({record.id:contig_id}) return translations +def orf_records_from_contigs(contigs, fpath): + """Retrieve list of *ORFs headers* from `contigs`. Prodigal annotated ORFs + are required as the input `fpath`. + + Parameters + ---------- + contigs: iterable + iterable of contigs from which to retrieve ORFs + fpath : str + + + Returns + ------- + list + ORF SeqIO.SeqRecords from provided `contigs`. i.e. [SeqRecord, ...] + + Raises + ------- + ExceptionName + Why the exception is raised. + + """ + records = [] + for record in SeqIO.parse(fpath, 'fasta'): + orf_id = record.description.split('#')[-1].split(';')[0].strip().replace('ID=','') + if orf_id in record.id: + contig_id = record.id.replace(f'_{orf_id}', '') + else: + contig_id = record.id.rsplit('_',1)[0] + if contig_id not in contigs: + continue + records.append(record) + return records + def main(args): if args.verbose: logger.setLevel(logger.DEBUG) @@ -202,12 +249,12 @@ def main(args): datefmt='%m/%d/%Y %I:%M:%S %p', level=logger.DEBUG) parser = argparse.ArgumentParser('Calls ORFs with provided input assembly') - parser.add_argument('assembly', help='') - parser.add_argument('nucls_out', help='') - parser.add_argument('prots_out', help='') + parser.add_argument('assembly', help='', type=str) + parser.add_argument('nucls_out', help='', type=str) + parser.add_argument('prots_out', help='', type=str) parser.add_argument('--force', help="force overwrite of ORFs out filepaths", action='store_true') - parser.add_argument('--cpus', help='num cpus to use', default=0) + parser.add_argument('--cpus', help='num cpus to use', type=int, default=0) parser.add_argument('--parallel', help="Enable GNU parallel", action='store_true', default=False) parser.add_argument('--verbose', help="add verbosity", action='store_true') diff --git a/autometa/common/kmers.py b/autometa/common/kmers.py index 35abceb4a..479501513 100644 --- a/autometa/common/kmers.py +++ b/autometa/common/kmers.py @@ -41,12 +41,27 @@ from umap import UMAP from autometa.common.utilities import gunzip +from autometa.common.exceptions import KmerFormatError +from autometa.common.exceptions import KmerEmbeddingError -logger = logging.getLogger(__name__) # Suppress numba logger debug output numba_logger = logging.getLogger("numba").setLevel(logging.ERROR) +logger = logging.getLogger(__name__) def revcomp(string): + """Revers complement the provided `string`. + + Parameters + ---------- + string : str + Description of parameter `string`. + + Returns + ------- + str or int(-1) + reverse complemented string. + Note: If any weird letters are encountered, int value of -1 is returned. + """ complement = {'A':'T','T':'A','C':'G','G':'C'} complements = [] for i in range(len(string)): @@ -57,10 +72,21 @@ def revcomp(string): return ''.join(reversed(complements)) def init_kmers(kmer_size=5): - # Count K-mer frequencies - # Holds lists of k-mer counts, keyed by contig name + """Initialize k-mers from `kmer_size`. Respective reverse complements will + be removed. + + Parameters + ---------- + kmer_size : int + pattern size of k-mer to intialize dict (the default is 5). + + Returns + ------- + dict + {kmer:index, ...} + """ kmers = {} - counts = 0 + index = 0 uniq_kmers = dict() dna_letters = ['A', 'T', 'C', 'G'] all_kmers = list(dna_letters) @@ -70,14 +96,14 @@ def init_kmers(kmer_size=5): for char in dna_letters: new_list.append(current_seq + char) all_kmers = new_list - # Now we trim k-mers and put them in the dictionary - # Q: What is being trimmed here? - # A: I think trim means subset by unique from the reverse complement for kmer in all_kmers: kmer_reverse = revcomp(kmer) + if type(kmer_reverse) is int: + logger.warning(f'Encountered non-standard string: {kmer}. skipping...') + continue if (kmer not in uniq_kmers) and (kmer_reverse not in uniq_kmers): - uniq_kmers[kmer] = counts - counts += 1 + uniq_kmers[kmer] = index + index += 1 return uniq_kmers def load(kmers_fpath): @@ -101,18 +127,58 @@ def load(kmers_fpath): """ if not os.path.exists(kmers_fpath) or os.stat(kmers_fpath).st_size == 0: raise FileNotFoundError(kmers_fpath) - return pd.read_csv(kmers_fpath, sep='\t', index_col='contig') + try: + df = pd.read_csv(kmers_fpath, sep='\t', index_col='contig') + except ValueError as err: + raise KmerFormatError(kmers_fpath) from ValueError + return df def mp_counter(assembly, ref_kmers, nproc=mp.cpu_count()): + """Multiprocessing k-mer counter used in `count`. (Should not be used directly). + + Parameters + ---------- + assembly : str + (nucleotides) + ref_kmers : dict + {kmer:index, ...} + nproc : int + Number of cpus to use. (the default will use all available). + + Returns + ------- + list + [{record:counts}, {record:counts}, ...] + + Raises + ------- + ExceptionName + Why the exception is raised. + + """ pool = mp.Pool(nproc) args = [(record,ref_kmers) for record in SeqIO.parse(assembly, 'fasta')] logger.debug(f'Pool counter (nproc={nproc}): counting {len(args):,} records k-mer frequencies') - results = pool.map(kmer_counter, args) + results = pool.map(record_counter, args) pool.close() pool.join() return results -def kmer_counter(args): +def record_counter(args): + """single record counter used when multiprocessing. + + Parameters + ---------- + args : 2-tuple + (record, ref_kmers) + - record : SeqIO.SeqRecord + - ref_kmers : {kmer:index, ...} + + Returns + ------- + dict + {contig:[count,count,...]} count index is respective to ref_kmers.keys() + """ record, ref_kmers = args for ref_kmer in ref_kmers: kmer_size = len(ref_kmer) @@ -123,11 +189,11 @@ def kmer_counter(args): max_length = record_length - kmer_size if max_length <= 0: logger.warning(f'{record.id} can not be counted! k-mer size exceeds length. {record_length}') + contig_kmer_counts = [pd.np.nan] * n_uniq_kmers return {record.id:contig_kmer_counts} - # contig_kmer_counts.insert(0, record.id) - # return '\t'.join([str(c) for c in contig_kmer_counts])+'\n' for i in range(max_length): kmer = record.seq[i:i+kmer_size] + # reverse_complement() is Biopython specific method for SeqRecord object kmer_revcomp = kmer.reverse_complement() kmer, kmer_revcomp = map(str, [kmer,kmer_revcomp]) if kmer not in ref_kmers and kmer_revcomp not in ref_kmers: @@ -137,10 +203,32 @@ def kmer_counter(args): else: index = ref_kmers[kmer_revcomp] contig_kmer_counts[index] += 1 - # contig_kmer_counts.insert(0, record.id) + contig_kmer_counts = [c if c != 0 else pd.np.nan for c in contig_kmer_counts] return {record.id:contig_kmer_counts} def seq_counter(assembly, ref_kmers, verbose=True): + """Sequentially count k-mer frequencies. + + Parameters + ---------- + assembly : str + (nucleotides) + ref_kmers : dict + {kmer:index, ...} + verbose : bool + enable progress bar `verbose` (the default is True). + + Returns + ------- + dict + {contig:[count,count,...]} count index is respective to ref_kmers.keys() + + Raises + ------- + ExceptionName + Why the exception is raised. + + """ n_uniq_kmers = len(ref_kmers) for ref_kmer in ref_kmers: kmer_size = len(ref_kmer) @@ -154,10 +242,12 @@ def seq_counter(assembly, ref_kmers, verbose=True): max_length = len(record.seq) - kmer_size if max_length <= 0: logger.warning(f'{record.id} can not be counted! k-mer size exceeds length. {len(record.seq)}') + contig_kmer_counts = [pd.np.nan] * n_uniq_kmers kmer_counts.update({record.id:contig_kmer_counts}) continue for i in range(max_length): kmer = record.seq[i:i+kmer_size] + # reverse_complement() is Biopython specific method for SeqRecord object kmer_revcomp = kmer.reverse_complement() kmer, kmer_revcomp = map(str, [kmer,kmer_revcomp]) if kmer not in ref_kmers and kmer_revcomp not in ref_kmers: @@ -167,6 +257,7 @@ def seq_counter(assembly, ref_kmers, verbose=True): else: index = ref_kmers[kmer_revcomp] contig_kmer_counts[index] += 1 + contig_kmer_counts = [c if c != 0 else pd.np.nan for c in contig_kmer_counts] kmer_counts.update({record.id:contig_kmer_counts}) return kmer_counts @@ -200,7 +291,7 @@ def count(assembly, kmer_size=5, normalized=False, verbose=True, multiprocess=Tr """ if not type(kmer_size) is int: - raise TypeError(f'kmer_size must be an int! Given: {kmer_size}') + raise TypeError(f'kmer_size must be an int! Given: {type(kmer_size)}') ref_kmers = init_kmers(kmer_size) if assembly.endswith('.gz'): assembly = gunzip(assembly, assembly.rstrip('.gz')) @@ -221,7 +312,9 @@ def count(assembly, kmer_size=5, normalized=False, verbose=True, multiprocess=Tr def normalize(df): """Normalize k-mers by Centered Log Ratio transformation - 1. Drop any k-mers not contained by any contigs + 1a. Drop any k-mers not present for all contigs + 1b. Drop any contigs not containing any kmer counts + 1c. Fill any remaining na values with 0 2a. Normalize the k-mer count by the total count of all k-mers for a given contig 2b. Add 1 as 0 can not be utilized for CLR 3. Perform CLR transformation log(norm. value / geometric mean norm. value) @@ -245,32 +338,39 @@ def normalize(df): index='contig', cols=[kmer, kmer, ...] Columns have been transformed by CLR normalization. """ - # OPTIMIZE: May be able to implement this transformation with dask? - return df.dropna(axis='columns', how='all')\ - .transform(lambda x: (x+1) / x.sum(), axis='columns')\ - .transform(lambda x: np.log(x / gmean(x)), axis='columns') - - -def embed(kmers=None, embedded=None, n_components=2, do_pca=True, pca_dimensions=50, method='TSNE', perplexity=30, **kwargs): + # steps in 1: data cleaning + df.dropna(axis='columns', how='all', inplace=True) + df.dropna(axis='index', how='all', inplace=True) + df.fillna(0, inplace=True) + # steps in 2 and 3: normalization and CLR transformation + step_2a = lambda x: (x+1) / x.sum() + step_2b = lambda x: np.log(x / gmean(x)) + return df.transform(step_2a, axis='columns').transform(step_2b, axis='columns') + +def embed(kmers=None, embedded=None, n_components=2, do_pca=True, pca_dimensions=50, + method='UMAP', perplexity=30.0, **kwargs): """Embed k-mers using provided `method`. Parameters ---------- kmers : str or pd.DataFrame - Description of parameter `kmers` (the default is None). + embedded : str - Description of parameter `embedded` (the default is None). + [optional] If provided will write to `embedded`. n_components : int - `n_components` to embed k-mer frequencies (the default is 3). + `n_components` to embed k-mer frequencies (the default is 2). do_pca : bool Perform PCA decomposition prior to embedding (the default is True). pca_dimensions : int Reduce k-mer frequencies dimensions to `pca_dimensions` (the default is 50). If None, will estimate based on method : str - Description of parameter `method` (the default is 'TSNE'). + embedding method to use (the default is 'UMAP'). perplexity : float - Description of parameter `perplexity` (the default is 30). + hyperparameter used to tune TSNE (the default is 30.0). + See below for details: + https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html#sklearn.manifold.TSNE + **kwargs : dict Other keyword arguments to be supplied to respective `method`. @@ -281,23 +381,33 @@ def embed(kmers=None, embedded=None, n_components=2, do_pca=True, pca_dimensions Raises ------- - ValueError + KmerEmbeddingError Either `kmers` or `embedded` must be provided. + KmerFormatError + Provided `kmers` or `embedded` are not formatted correctly for use. ValueError Provided `method` is not an available choice. FileNotFoundError `kmers` type must be a pd.DataFrame or filepath. """ if not kmers and not embedded: - raise ValueError('kmers or embedded is required') + raise KmerEmbeddingError('kmers or embedded is required') df = None if kmers and type(kmers) is str and os.path.exists(kmers) and os.stat(kmers).st_size >0: - df = pd.read_csv(kmers, sep='\t', index_col='contig') + try: + df = pd.read_csv(kmers, sep='\t', index_col='contig') + except ValueError as err: + raise KmerFormatError(embedded) from ValueError elif kmers and type(kmers) is pd.DataFrame: df = kmers if embedded and os.path.exists(embedded) and os.stat(embedded).st_size > 0: logger.debug(f'k-mers frequency embedding already exists {embedded}') - return pd.read_csv(embedded, sep='\t', index_col='contig') + try: + df = pd.read_csv(embedded, sep='\t', index_col='contig') + except ValueError as err: + raise KmerFormatError(embedded) from ValueError + return df + if df is None or df.empty: kmers_desc = f'kmers:{kmers} type:{type(kmers)}' embed_desc = f'embedded:{embedded} type:{type(embedded)}' @@ -310,7 +420,8 @@ def embed(kmers=None, embedded=None, n_components=2, do_pca=True, pca_dimensions # PCA n_samples, n_dims = df.shape # Drop any rows that all cols contain NaN. This may occur if the contig length is below the k-mer size - df.dropna(how='all', inplace=True) + df.dropna(axis='index', how='all', inplace=True) + df.fillna(0, inplace=True) X = df.to_numpy() if n_dims > pca_dimensions and do_pca: logger.debug(f'Performing decomposition with PCA: {n_dims} to {pca_dimensions} dims') @@ -319,13 +430,13 @@ def embed(kmers=None, embedded=None, n_components=2, do_pca=True, pca_dimensions n_samples, n_dims = X.shape logger.debug(f'{method}: {n_samples} data points and {n_dims} dimensions') - # Adjust perplexity according to the number of data points - n_rows = n_samples-1 - scaler = 3.0 - if n_rows < (scaler*perplexity): - perplexity = (n_rows/scaler) - 1 def do_TSNE(perplexity=perplexity, n_components=n_components): + # Adjust perplexity according to the number of data points + n_rows = n_samples-1 + scaler = 3.0 + if n_rows < (scaler*perplexity): + perplexity = (n_rows/scaler) - 1 return TSNE( n_components=n_components, perplexity=perplexity, @@ -366,14 +477,17 @@ def main(args): logger.debug(f'Wrote {len(df)} contigs {args.size}-mers frequencies to {args.kmers}.') if args.normalized: + ndf = None try: ndf = load(args.normalized) logger.debug(f'{args.normalized} exists... loaded: df.shape {ndf.shape}') except FileNotFoundError as err: logger.debug(f'Normalizing {df.shape} k-mers DataFrame.') - ndf = normalize(df) - ndf.to_csv(args.normalized, sep='\t', header=True, index=True) - logger.debug(f'Wrote {len(df)} normalized k-mer freqs. to {args.normalized}.') + if ndf is not None: + return ndf + ndf = normalize(df) + ndf.to_csv(args.normalized, sep='\t', header=True, index=True) + logger.debug(f'Wrote {len(df)} normalized k-mer freqs. to {args.normalized}.') if not args.embedded: import sys;sys.exit(0) @@ -396,10 +510,11 @@ def main(args): import argparse import logging as logger logger.basicConfig( - format='%(asctime)s : %(name)s : %(levelname)s : %(message)s', + format='[%(asctime)s %(levelname)s] %(name)s: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', level=logger.DEBUG) skip_desc = '(will skip if file exists)' + cpus = mp.cpu_count() parser = argparse.ArgumentParser('Count k-mers') parser.add_argument('--fasta', help='', required=True) parser.add_argument('--kmers', help=f' {skip_desc}', required=True) @@ -415,7 +530,7 @@ def main(args): type=int, default=50) parser.add_argument('--multiprocess', help='count k-mers using multiprocessing', action='store_true', default=False) - parser.add_argument('--nproc', help='num. processors to use if multiprocess is selected', - default=mp.cpu_count(), type=int) + parser.add_argument('--nproc', help=f'num. processors to use if multiprocess is selected. (default = {cpus})', + default=cpus, type=int) args = parser.parse_args() main(args) diff --git a/autometa/common/mag.py b/autometa/common/mag.py index 928739555..f8e69532b 100644 --- a/autometa/common/mag.py +++ b/autometa/common/mag.py @@ -35,6 +35,7 @@ from autometa.common.markers import Markers,MARKERS_DIR from autometa.common import kmers from autometa.common.utilities import timeit +from autometa.common.external import prodigal from autometa.binning import recursive_dbscan @@ -108,16 +109,42 @@ def prepared(self, fpath): return True return False - def get_orfs(self, orf_type='prot'): + def get_orfs(self, orf_type='prot', prodigal_fpath=None): + """Retrieve ORFs corresponding to MAG. + + Parameters + ---------- + orf_type : str + Type of ORF to retrieve (the default is 'prot'). Amino acid or nucleotide + choices = ['prot','nucl'] + prodigal_fpath : str + (the default is MARKERS_DIR). + Should contain pressed hmms and cutoffs table. + force : bool + Will overwrite existing marker annotations (the default is False). + orf_caller : str + Will use `orf_caller` to connect ORFs with contigs and contigs with + their respective ORFs (the default is 'prodigal'). + + Returns + ------- + pd.DataFrame + wide format - index_col='contig', columns=[PFAM,...] + + Raises + ------- + ExceptionName + Why the exception is raised. + + """ + + logger.debug(f'Retrieving markers for {kingdom} kingdom') orfs_fp = os.path.join(self.outdir, f'{kingdom.lower()}.orfs.faa') if (not os.path.exists(orfs_fp)) or (os.path.exists(orfs_fp) and force): - self.write_orfs(orfs_fp) + if orf_caller == 'prodigal': + self.write_orfs(orfs_fp, prodigal_fpath=self.prot_orfs_fpath) + else: + self.write_orfs(orfs_fp) return Markers(orfs_fp, kingdom=kingdom, dbdir=dbdir).get_markers() def subset_df(self, df): diff --git a/autometa/common/markers.py b/autometa/common/markers.py index 8d90bd7ec..57293dc3d 100644 --- a/autometa/common/markers.py +++ b/autometa/common/markers.py @@ -159,6 +159,7 @@ def load(fpath, format='wide'): else: params = ['wide','long','list','counts'] err_msg = f'{format} is not a supported format.\n\tSupported formats: {params}' + # TODO: Write Marker specific AutometaException raise ValueError(err_msg) def get_markers(self, format='wide', **kwargs): diff --git a/autometa/common/metagenome.py b/autometa/common/metagenome.py index 58625f0cf..cbeede46b 100644 --- a/autometa/common/metagenome.py +++ b/autometa/common/metagenome.py @@ -430,6 +430,7 @@ def assign_taxonomy(self, method, force=False, *args, **kwargs): Why the exception is raised. """ + logger.debug(f'assigning taxonomy via {method}') if not self.orfs_called: cpus = kwargs.get('cpus',0) try: @@ -473,6 +474,7 @@ def get_kingdoms(self, **kwargs): """ if not self.taxonomy_assigned: + logger.info('Assigning taxonomy. This may take a while...') self.taxonomy = self.assign_taxonomy(method=self.taxon_method, **kwargs) if self.taxonomy.shape[1] <= 2: # taxonomy_fp should only contain contig and taxid columns from voting method diff --git a/autometa/config/__init__.py b/autometa/config/__init__.py index f98c87fcf..b27db1a8c 100644 --- a/autometa/config/__init__.py +++ b/autometa/config/__init__.py @@ -20,18 +20,19 @@ """ +import logging import os from argparse import Namespace - from configparser import ConfigParser from configparser import ExtendedInterpolation +logger = logging.getLogger(__name__) + DEFAULT_FPATH = os.path.join(os.path.dirname(__file__), 'default.config') AUTOMETA_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) -PROJECTS_DIR = os.path.join(AUTOMETA_DIR, 'projects') - +WORKSPACE = os.path.join(AUTOMETA_DIR, 'workspace') def get_config(fpath): @@ -54,34 +55,6 @@ def update_config(fpath, section, option, value): DEFAULT_CONFIG = get_config(fpath=DEFAULT_FPATH) -parameters = { - 'projects':str, - 'project':int, - 'kingdoms':list, - 'resume':int, - 'length_cutoff':float, - 'cov_from_spades':bool, - 'kmer_size':int, - 'kmer_multiprocess':bool, - 'kmer_normalize':bool, - 'do_pca':bool, - 'pca_dims':int, - 'embedding_method':str, - 'taxon_method':str, - 'reversed':bool, - 'completeness':float, - 'purity':float, - 'binning_method':str, - 'verbose':bool, - 'force':bool, - 'usepickle':bool, - 'parallel':bool, - 'cpus':int, - 'config':str, - 'resume':bool, -} - - def parse_config(fpath=None): """Generate argparse namespace (args) from config file. @@ -101,6 +74,32 @@ def parse_config(fpath=None): provided `fpath` does not exist. """ + parameters = { + 'workspace':str, + 'project':int, + 'kingdoms':list, + 'metagenome_num':int, + 'length_cutoff':float, + 'cov_from_spades':bool, + 'kmer_size':int, + 'kmer_multiprocess':bool, + 'kmer_normalize':bool, + 'do_pca':bool, + 'pca_dims':int, + 'embedding_method':str, + 'taxon_method':str, + 'reversed':bool, + 'completeness':float, + 'purity':float, + 'binning_method':str, + 'verbose':bool, + 'force':bool, + 'usepickle':bool, + 'parallel':bool, + 'cpus':int, + 'config':str, + 'resume':bool, + } if fpath and (not os.path.exists(fpath) or os.stat(fpath).st_size == 0): raise FileNotFoundError(fpath) config = get_config(fpath) if fpath else DEFAULT_CONFIG @@ -122,6 +121,5 @@ def parse_config(fpath=None): value = config.getfloat(section,key) elif parameters.get(key) is list: value = value.split(',') - namespace.__dict__[section].__dict__[key] = value return namespace diff --git a/autometa/config/databases.py b/autometa/config/databases.py index b133e20d5..e8b12825c 100644 --- a/autometa/config/databases.py +++ b/autometa/config/databases.py @@ -44,6 +44,7 @@ 'ncbi':[ 'nodes', 'names', + 'merged', 'accession2taxid', 'nr', ], @@ -68,11 +69,11 @@ def format_nr(config, dryrun, nproc=2): return config def extract_taxdump(config, dryrun): - """Short summary. + """Extract autometa required files from ncbi taxdump directory. Parameters ---------- - config : type + config: configparser.ConfigParser Description of parameter `config`. dryrun : type Description of parameter `dryrun`. @@ -104,11 +105,11 @@ def extract_taxdump(config, dryrun): return config def update_ncbi(config, options, dryrun, nproc=2): - """Update NCBI database. + """Update NCBI database files (taxdump.tar.gz and nr.gz). Parameters ---------- - config : type + config: configparser.ConfigParser Description of parameter `config`. options : type Description of parameter `options`. @@ -163,7 +164,7 @@ def update_markers(config, options, dryrun): Parameters ---------- - config : type + config: configparser.ConfigParser Description of parameter `config`. options : type Description of parameter `options`. @@ -215,7 +216,7 @@ def validate_fpaths(config, section): Parameters ---------- - config : type + config: configparser.ConfigParser Description of parameter `config`. section : type Description of parameter `section`. @@ -267,12 +268,13 @@ def update_missing(config, section, dryrun, options=None, nproc=2): """ if section not in DB_SECTIONS: - raise KeyError(f'section not in DB_SECTIONS : {section}') + raise KeyError(f'section ({section}) not in DB_SECTIONS ({DB_SECTIONS.keys()})') options = set(options) if options else set(DB_SECTIONS.get(section)) if section == 'ncbi': - if 'nodes' in options or 'names' in options: + if 'nodes' in options or 'names' in options or 'merged' in options: options.discard('nodes') options.discard('names') + options.discard('merged') options.add('taxdump') config = update_ncbi(config, options, dryrun, nproc) if section == 'markers': @@ -280,18 +282,18 @@ def update_missing(config, section, dryrun, options=None, nproc=2): return config def check_format(config, dryrun, nproc=2): - """Short summary. + """Checks database files Parameters ---------- - config : type + config : configparser.ConfigParser Description of parameter `config`. dryrun : bool Description of parameter `dryrun`. Returns ------- - type + configparser.ConfigParser Description of returned object. Raises @@ -304,14 +306,23 @@ def check_format(config, dryrun, nproc=2): if not config.has_section(section): logger.warning(f'Missing section : {section}') config.add_section(section) - config = update_missing(config, section=section, dryrun=dryrun, nproc=nproc) + config = update_missing( + config=config, + section=section, + options=None, + dryrun=dryrun, + nproc=nproc) continue config = validate_fpaths(config, section) missing = set(options) - set(config.options(section)) if missing: logger.warning(f'Missing options : {", ".join(missing)}') - config = update_missing(config, section=section, options=missing, dryrun=dryrun, nproc=nproc) - + config = update_missing( + config=config, + section=section, + options=missing, + dryrun=dryrun, + nproc=nproc) return config def configure(config=DEFAULT_CONFIG, dryrun=True, nproc=2): diff --git a/autometa/config/default.config b/autometa/config/default.config index 467f3cb62..bbbbfee8e 100644 --- a/autometa/config/default.config +++ b/autometa/config/default.config @@ -1,22 +1,20 @@ -""" -Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, -Shaurya Chanana, Izaak Miller, Jason C. Kwan - -This file is part of Autometa. - -Autometa is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -Autometa is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with Autometa. If not, see . -""" +# Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +# Shaurya Chanana, Izaak Miller, Jason C. Kwan +# +# This file is part of Autometa. +# +# Autometa is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Autometa is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with Autometa. If not, see . [common] home_dir = None diff --git a/autometa/config/metagenome.config b/autometa/config/metagenome.config index fa6be4dbb..db3950a93 100644 --- a/autometa/config/metagenome.config +++ b/autometa/config/metagenome.config @@ -1,22 +1,20 @@ -""" -Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, -Shaurya Chanana, Izaak Miller, Jason C. Kwan - -This file is part of Autometa. - -Autometa is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -Autometa is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with Autometa. If not, see . -""" +# Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +# Shaurya Chanana, Izaak Miller, Jason C. Kwan +# +# This file is part of Autometa. +# +# Autometa is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Autometa is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with Autometa. If not, see . ######################################## ### Metagenome Submission Parameters ### @@ -51,7 +49,7 @@ checkpoints = checkpoints.tsv [parameters] projects = project = 1 -resume = 0 +metagenome_num = 0 kingdom = bacteria length_cutoff = 3000 cov_from_spades = False diff --git a/autometa/config/project.py b/autometa/config/project.py index e5e1b43f5..c1f4717a0 100644 --- a/autometa/config/project.py +++ b/autometa/config/project.py @@ -28,235 +28,136 @@ import os import argparse -from configparser import ConfigParser -from configparser import ExtendedInterpolation - -from autometa.config import DEFAULT_FPATH from autometa.config import DEFAULT_CONFIG -from autometa.config import AUTOMETA_DIR from autometa.config import get_config +from autometa.config import parse_config from autometa.config import put_config logger = logging.getLogger(__name__) -def update_project(config, args): - """Update input args within config file. - - Parameters - ---------- - args : argparse.Namespace - Description of parameter `arg`. - - Returns - ------- - configparser.ConfigParser - Updated ConfigParser with user parameters - - Raises - ------- - ExceptionName - Why the exception is raised. - - """ - param_section = 'parameters' - metagenome_section = 'metagenomes' - config.add_section(param_section) - for param,value in args.__dict__.items(): - if param == metagenome_section: - config.add_section(metagenome_section) - metagenome_count = 1 - for metagenome in value: - mg = f'metagenome_{metagenome_count:03d}' - config.set(metagenome_section, mg, metagenome) - metagenome_count += 1 - if param == 'config' and not value: - value = DEFAULT_FPATH - config.set(param_section, param, str(value)) - return config - -def setup_project(config): - """Write project.config to . If directory does not exist, - one will be made. - - Parameters - ---------- - config : configparser.ConfigParser - project config required sections: ['parameters'] - - Returns - ------- - str - - - Raises - ------- - ExceptionName - Why the exception is raised. - - """ - projects_dp = config.get('parameters','projects') - try: - project_num = config.getint('parameters','project') - except ValueError as err: - n_projects = 0 - for dp in os.listdir(projects_dp): - if 'project_' in dp and os.path.isdir(os.path.join(projects_dp,dp)): - n_projects += 1 - project_num = n_projects + 1 - config.set('parameters','project',str(project_num)) - project_dirname = f'project_{project_num:03d}' - project_dirpath = os.path.join(projects_dp, project_dirname) - if not os.path.exists(project_dirpath): - os.makedirs(project_dirpath) - config_fp = os.path.join(project_dirpath, 'project.config') - outconfig = copy.deepcopy(config) - outconfig.remove_option('parameters','metagenomes') - outconfig.remove_option('parameters','resume') - put_config(outconfig, config_fp) - return config_fp - -def setup_metagenome(config): - """Setup Autometa metagenome directory given a config file of metagenome - Submission [files] and [parameters] sections. - - Parameters - ---------- - config : str or configparser.ConfigParser - or already loaded metagenome ConfigParser - - Returns - ------- - str - - - Raises - ------- - FileNotFoundError - project directory or project config does not exist. - - """ - if type(config) is str and os.path.exists(config): - config = get_config(config) - mg_config = copy.deepcopy(config) - # Determine what project metagenome belongs... - projects_dirpath = mg_config.get('parameters','projects') - project_num = mg_config.getint('parameters','project') - project_dname = f"project_{project_num:03d}" - project_dirpath = os.path.join(projects_dirpath,project_dname) - if not os.path.exists(project_dirpath): - raise FileNotFoundError(f'ProjectDirectoryNotFound: {project_dirpath}') - project_config_fpath = os.path.join(project_dirpath, 'project.config') - if not os.path.exists(project_config_fpath): - raise FileNotFoundError(project_config_fpath) - # Determine metagenome number added to project and update project.config - metagenomes = [dpath for dpath in os.listdir(project_dirpath) - if 'metagenome_' in dpath and os.path.isdir(os.path.join(project_dirpath,dpath))] - metagenome_num = 1 + len(metagenomes) - metagenome_dirname = f'metagenome_{metagenome_num:03d}' - metagenome_dirpath = os.path.join(project_dirpath, metagenome_dirname) - if os.path.exists(metagenome_dirpath): - raise FileExistsError(metagenome_dirpath) - os.makedirs(metagenome_dirpath) - metagenome_fpath = config.get('files','metagenome') - proj_config = get_config(project_config_fpath) - proj_config.set('metagenomes', metagenome_dirname, metagenome_fpath) - put_config(proj_config, project_config_fpath) - for section in ['databases','environ']: - if not mg_config.has_section(section): - mg_config.add_section(section) - for option,value in proj_config.items(section): - mg_config.set(section,option,value) - #Remove project config sections/options if project.config was provided. - # Change mg config section and parameters to suit respective directory. - if mg_config.has_section('metagenomes'): - mg_config.remove_section('metagenomes') - if mg_config.has_option('parameters','metagenomes'): - mg_config.remove_option('parameters','metagenomes') - #symlink any files that already exist and were specified - for option in mg_config.options('files'): - default_fname = os.path.basename(DEFAULT_CONFIG.get('files',option)) - fpath = mg_config.get('files',option) - if os.path.exists(fpath): - if fpath.endswith('.gz') and not default_fname.endswith('.gz'): - default_fname += '.gz' - full_fpath = os.path.join(metagenome_dirpath, default_fname) - os.symlink(os.path.realpath(fpath),full_fpath) - elif os.path.basename(fpath).title() == 'None': - full_fpath = os.path.join(metagenome_dirpath, default_fname) - else: - fname = os.path.basename(fpath) - full_fpath = os.path.join(metagenome_dirpath, fname) - mg_config.set('files', option, full_fpath) - mg_config.set('parameters','outdir', metagenome_dirpath) - mg_config_fpath = os.path.join(metagenome_dirpath, f'{metagenome_dirname}.config') - mg_config.add_section('config') - mg_config.set('config','project', project_config_fpath) - mg_config.set('config','metagenome', mg_config_fpath) - put_config(mg_config, mg_config_fpath) - logger.debug(f'updated {project_config_fpath} metagenomes: {metagenome_dirname} : {mg_config_fpath}') - # Only write updated project config after successful metagenome configuration. - return mg_config_fpath - -def setup_metagenomes(project_config): - """Build directories for each provided metagenome in `project_config`. - - Parameters - ---------- - project_config : configparser.ConfigParser - Description of parameter `project_config`. - - Returns - ------- - dict - { - 'metagenome_num':', ...} + """ + return {k:v for k,v in self.config.items('metagenomes')} + + def save(self): + put_config(self.config, self.config_fpath) + + def add(self, fpath): + """Setup Autometa metagenome directory given a metagenome.config file. + + Parameters + ---------- + fpath : str + + + Returns + ------- + argparse.Namespace + + Raises + ------- + FileExistsError + metagenome.config already exists in project + + """ + metagenome_num = 1 + self.n_metagenomes + metagenome_name = f'metagenome_{metagenome_num:03d}' + metagenome_dirpath = os.path.join(self.dirpath, metagenome_name) + if os.path.exists(metagenome_dirpath): + raise FileExistsError(metagenome_dirpath) + os.makedirs(metagenome_dirpath) + mg_config = get_config(fpath) + # Add database and env for debugging individual metagenome binning runs. + for section in ['databases','environ']: + if not mg_config.has_section(section): + mg_config.add_section(section) + for option,value in self.config.items(section): + mg_config.set(section,option,value) + #symlink any files that already exist and were specified + for option in mg_config.options('files'): + default_fname = os.path.basename(DEFAULT_CONFIG.get('files',option)) + option_fpath = os.path.realpath(mg_config.get('files',option)) + if os.path.exists(option_fpath): + if option_fpath.endswith('.gz') and not default_fname.endswith('.gz'): + default_fname += '.gz' + full_fpath = os.path.join(metagenome_dirpath, default_fname) + os.symlink(option_fpath,full_fpath) + else: + full_fpath = os.path.join(metagenome_dirpath, default_fname) + mg_config.set('files', option, full_fpath) + mg_config.set('parameters','outdir', metagenome_dirpath) + mg_config_fpath = os.path.join(metagenome_dirpath, f'{metagenome_name}.config') + mg_config.add_section('config') + mg_config.set('config','project', self.config_fpath) + mg_config.set('config','metagenome', mg_config_fpath) + put_config(mg_config, mg_config_fpath) + # Only write updated project config after successful metagenome configuration. + self.config.set('metagenomes',metagenome_name,mg_config_fpath) + logger.debug(f'updated {self.config_fpath} metagenome: {metagenome_name} : {mg_config_fpath}') + return parse_config(mg_config_fpath) + + def update(self, metagenome_num, fpath): + """Update project config metagenomes section with input metagenome.config file. + + Parameters + ---------- + metagenome_num: int + metagenome number to update + fpath : str + This config will overwrite any values in old config + that are different + + Returns + ------- + argparse.Namespace + + Raises + ------- + ValueError + `metagenome` must be an int and within project config! + """ + metagenome = f'metagenome_{metagenome_num:03d}' + if not self.config.has_option('metagenomes',metagenome): + raise ValueError(f'{metagenome_num} must be an int and within project config!') + old_config_fp = self.config.get('metagenomes', metagenome) + old_config = get_config(old_config_fp) + new_config = get_config(fpath) + for section in new_config.sections(): + if not old_config.has_section(section): + old_config.add_section(section) + for option in new_config.options(section): + new_value = new_config.get(section,option) + # TODO: Update file checksums (checkpoint update) + # TODO: Check if new value exists... Otherwise keep old option + if section == 'files' and not os.path.exists(new_value): + continue + old_config.set(section, option, new_value) + put_config(old_config, old_config_fp) + logger.debug(f'Updated {metagenome}.config with {fpath}') + return parse_config(old_config_fp) if __name__ == '__main__': import sys;sys.exit(1) diff --git a/autometa/config/user.py b/autometa/config/user.py index 5e24cade8..a4f5d2009 100644 --- a/autometa/config/user.py +++ b/autometa/config/user.py @@ -19,7 +19,7 @@ You should have received a copy of the GNU Affero General Public License along with Autometa. If not, see . -Autometa User Configuration Class +AutometaUser configuration class """ @@ -28,14 +28,17 @@ import argparse +# TODO: Refactor autometa.config later as AutometaConfigUtils lib or something from autometa.config import get_config +from autometa.config import put_config from autometa.config import parse_config -from autometa.config import DEFAULT_CONFIG from autometa.config import AUTOMETA_DIR +from autometa.config import DEFAULT_CONFIG +from autometa.config import DEFAULT_FPATH from autometa.config import databases from autometa.config import environ -from autometa.config import project -from autometa.common.utilities import get_checkpoints +from autometa.config.project import Project +from autometa.common import utilities logger = logging.getLogger(__name__) @@ -44,38 +47,74 @@ class AutometaUser: """docstring for AutometaUser.""" - def __init__(self, config_fpath=None, dryrun=True): + def __init__(self, config_fpath=None, dryrun=True, nproc=2): self.dryrun= dryrun + self.nproc = nproc self.config_fp = config_fpath self.config = get_config(self.config_fp) if self.config_fp else DEFAULT_CONFIG if not self.config.has_section('common'): self.config.add_section('common') self.config.set('common','home_dir', AUTOMETA_DIR) - def configure(self, configure_environ=True, configure_databases=True, nproc=2): + def configure(self, configure_environ=True, configure_databases=True): if configure_environ: self.config = environ.configure(self.config) if configure_databases: - self.config = databases.configure(self.config, dryrun=self.dryrun, nproc=nproc) + self.config = databases.configure(self.config, dryrun=self.dryrun, nproc=self.nproc) + + def new_workspace(self, fpath): + """Configure new project at `outdir`. + + Parameters + ---------- + fpath : str + /project.config> - def new_project(self, args): - """Configure new project with input args. + Returns + ------- + autometa.config.project.Project object + + Raises + ------- + ExceptionName + Why the exception is raised. + + """ + # 1. configure project from default config and provided config file + self.configure() + dpath = os.path.dirname(fpath) + if not os.path.exists(dpath): + os.makedirs(dpath) + put_config(self.config, fpath) + return Project(fpath) + + def prepare_run(self, config_fpath): + """Prepares metagenome binning run using provided `config_fpath`. + + This method performs a number of configuration checks to ensure the + binning run will perform without conflicts. + 1. workspace check: Will construct workspace directory if provided does not + exist. + 2. Project check: Will configure a new project if project number is not + found in workspace directory. + 3. Metagenome check: Will update if existing with edits or resume + if existing without edits. Otherwise will add new metagenome to project. Parameters ---------- - args : argparse.Namespace - Description of parameter `args`. + config_fpath : str + Returns ------- - dict - {'project':, - 'metagenomes':{ - 'metagenome_num':'', - 'metagenome_num':'', - ... - }, - } + argparse.Namespace + access to parameters and files from config via syntax... + i.e. + generate namespace: + mgargs = prepare_run(mg_config) + access namespace: + mgargs.files. + mgargs.parameters. Raises ------- @@ -83,35 +122,35 @@ def new_project(self, args): Why the exception is raised. """ - proj_config = project.configure(self.config, args) - metagenomes_configs = project.setup_metagenomes(get_config(proj_config)) - mgargs = {mg:parse_config(mg_config) for mg,mg_config in metagenomes_configs.items()} - return {'project':proj_config, 'metagenomes':mgargs} - - def add_metagenomes(self, metagenomes_configs): - mg_configs = {} - for metagenome_config in metagenomes_configs: - mg_config_fpath = project.setup_metagenome(metagenome_config) - mg_name = os.path.basename(mg_config_fpath).strip('.config') - mgargs = parse_config(mg_config_fpath) - fpaths = list(vars(mgargs.files).values()) - __ = get_checkpoints(mgargs.files.checkpoints, fpaths) - mg_configs.update({mg_name:mgargs}) - return mg_configs - - def get_mgargs(self, projects_dir, project_num, metagenome_num): - for arg in [project_num, metagenome_num]: - if type(arg) is not int: - raise TypeError(f'{arg} is type: {type(arg)}') - if project_num <= 0: - raise ValueError(f'project num: {project_num} is invalid') - if metagenome_num <= 0: - raise ValueError(f'metagenome_num {metagenome_num} is invalid') - project_name = f'project_{project_num:03d}' - metagenome_dirname = f'metagenome_{metagenome_num:03d}' - metagenome_fname = f'{metagenome_dirname}.config' - metagenome_config = os.path.join(projects_dir, project_name, metagenome_dirname, metagenome_fname) - return {metagenome_dirname:parse_config(metagenome_config)} + mgargs = parse_config(config_fpath) + # 1 check workspace exists + workspace = os.path.realpath(mgargs.parameters.workspace) + if not os.path.exists(workspace): + os.makedirs(workspace) + # 2 check project exists + proj_name = f'project_{mgargs.parameters.project:03d}' + project_dirpath = os.path.realpath(os.path.join(workspace,proj_name)) + project_config_fp = os.path.join(project_dirpath, 'project.config') + if not os.path.exists(project_dirpath) or not os.path.exists(project_config_fp): + project = self.new_workspace(project_config_fp) + else: + project = Project(project_config_fp) + # 3 check whether existing or new run with metagenome_num + metagenome = f'metagenome_{mgargs.parameters.metagenome_num:03d}' + if metagenome not in project.metagenomes: + mgargs = project.add(config_fpath) + project.save() + return mgargs + # If resuming existing metagenome run. Check whether config file has changed. + old_config_fp = project.metagenomes.get(metagenome) + old_chksum = utilities.get_checksum(old_config_fp) + new_chksum = utilities.get_checksum(config_fpath) + if old_chksum != new_chksum: + mgargs = project.update( + metagenome_num=mgargs.parameters.metagenome_num, + fpath=config_fpath) + project.save() + return mgargs def main(args): logger.info(args.user) diff --git a/autometa/taxonomy/lca.py b/autometa/taxonomy/lca.py index 16f984060..5c165e102 100644 --- a/autometa/taxonomy/lca.py +++ b/autometa/taxonomy/lca.py @@ -331,7 +331,7 @@ def blast2lca(self, fasta, outfpath, blast=None, hits_fpath=None, force=False): `outfpath` """ if self.verbose: - logger.info(f'Running BLAST to LCA for {fasta}') + logger.debug(f'Running BLAST to LCA for {fasta}') if os.path.exists(outfpath) and not force: logger.warning(f'FileAlreadyExists {outfpath}') return outfpath @@ -422,8 +422,8 @@ def parse(self, lca_fpath, prodigal_annotations=None): raise FileNotFoundError(prodigal_annotations) if prodigal_annotations: - logger.debug('Retrieving ORF->contig translations from ORF Caller') - translations = prodigal.get_orf_translations(prodigal_annotations) + logger.debug('getting contig headers from prodigal ORFs') + translations = prodigal.contigs_from_headers(prodigal_annotations) fname = os.path.basename(lca_fpath) n_lines = file_length(lca_fpath) if self.verbose else None diff --git a/tests/data/coverage.tsv b/tests/data/coverage.tsv new file mode 100644 index 000000000..5dd4f98b5 --- /dev/null +++ b/tests/data/coverage.tsv @@ -0,0 +1,3426 @@ +contig total_breadth bases coverage +NODE_1000_length_15201_cov_224.564 6049903 15201 397.9937504111572 +NODE_1001_length_15189_cov_223.754 6049858 15189 398.30522088353416 +NODE_1002_length_15170_cov_223.896 6049788 15170 398.79947264337505 +NODE_1003_length_15161_cov_224.737 6049812 15161 399.0377943407427 +NODE_1004_length_15148_cov_223.915 6049785 15148 399.37846580406654 +NODE_1005_length_15117_cov_222.976 5999824 15117 396.8925051266786 +NODE_1006_length_15099_cov_231.479 6048981 15099 400.6212994238029 +NODE_1007_length_15092_cov_223.109 5999851 15092 397.551749271137 +NODE_1008_length_15085_cov_225.002 6040880 15085 400.4560822008618 +NODE_1009_length_14996_cov_222.895 5949812 14996 396.75993598292877 +NODE_100_length_142487_cov_223.46 56802661 142487 398.6515331223199 +NODE_1010_length_14973_cov_222.706 5948624 14973 397.2900554331129 +NODE_1011_length_14968_cov_224.696 5991194 14968 400.26683591662214 +NODE_1012_length_14947_cov_223.68 5949830 14947 398.06181842510205 +NODE_1013_length_14897_cov_223.89 5949721 14897 399.3905484325703 +NODE_1014_length_14894_cov_224.121 5949018 14894 399.4237948167047 +NODE_1015_length_14868_cov_224.641 5949855 14868 400.17857142857144 +NODE_1016_length_14865_cov_222.741 5899822 14865 396.8935082408342 +NODE_1017_length_14855_cov_223.142 5899844 14855 397.16216762032985 +NODE_1018_length_14779_cov_224.269 5899812 14779 399.20238175789973 +NODE_1019_length_14776_cov_224.242 5899781 14776 399.2813345966432 +NODE_101_length_139096_cov_225.266 55637367 139096 399.99257347443495 +NODE_1020_length_14719_cov_230.4 5841010 14719 396.83470344452746 +NODE_1021_length_14697_cov_223.673 5849807 14697 398.0272844798258 +NODE_1022_length_14670_cov_224.021 5849818 14670 398.7605998636673 +NODE_1023_length_14648_cov_224.155 5849863 14648 399.3625750955762 +NODE_1024_length_14635_cov_224.375 5849793 14635 399.71253843525795 +NODE_1025_length_14591_cov_223.103 5799830 14591 397.4936604756357 +NODE_1026_length_14554_cov_223.897 5799866 14554 398.5066648344098 +NODE_1027_length_14532_cov_227.712 5897027 14532 405.795967519956 +NODE_1028_length_14525_cov_224.06 5799856 14525 399.3016179001721 +NODE_1029_length_14503_cov_223.35 5753105 14503 396.68378956078055 +NODE_102_length_138307_cov_223.908 55298529 138307 399.8245135821036 +NODE_1030_length_14440_cov_222.925 5749727 14440 398.180540166205 +NODE_1031_length_14402_cov_222.067 5630990 14402 390.98666851826135 +NODE_1032_length_14379_cov_222.741 5699850 14379 396.4010014604632 +NODE_1033_length_14348_cov_224.009 5701602 14348 397.37956509618067 +NODE_1034_length_14321_cov_224.9 5734566 14321 400.4305565253823 +NODE_1035_length_14277_cov_224.245 5699838 14277 399.2321916368985 +NODE_1036_length_14271_cov_223.943 5699753 14271 399.39408590848575 +NODE_1037_length_14262_cov_223.873 5649897 14262 396.1503996634413 +NODE_1038_length_14231_cov_270.704 6553721 14231 460.5242779846813 +NODE_1039_length_14197_cov_223.624 5649854 14197 397.96111854617175 +NODE_103_length_137756_cov_225.461 55030051 137607 399.90735209691366 +NODE_1040_length_14189_cov_224.737 5675536 14189 399.99548946366906 +NODE_1041_length_14185_cov_216.348 5458555 14175 385.0832451499118 +NODE_1042_length_14180_cov_223.681 5649850 14180 398.4379407616361 +NODE_1043_length_14160_cov_224.182 5649864 14160 399.00169491525423 +NODE_1044_length_14105_cov_223.061 5599876 14105 397.01354129741225 +NODE_1045_length_14036_cov_223.862 5599855 14036 398.963736107153 +NODE_1046_length_13906_cov_224.346 5549792 13906 399.0933410038832 +NODE_1047_length_13901_cov_225.602 5549883 13901 399.24343572404865 +NODE_1048_length_13870_cov_224.409 5547537 13870 399.96661860129774 +NODE_1049_length_13866_cov_222.688 5499812 13866 396.6401269291793 +NODE_104_length_136707_cov_224.138 54747964 136707 400.4766690805884 +NODE_1050_length_13828_cov_228.932 5491777 13828 397.1490454150998 +NODE_1051_length_13795_cov_227.94 5599760 13795 405.9267850670533 +NODE_1052_length_13792_cov_224.116 5499913 13792 398.7755945475638 +NODE_1053_length_13783_cov_223.981 5499696 13783 399.0202423275049 +NODE_1054_length_13782_cov_224.114 5499886 13782 399.0629806994631 +NODE_1055_length_13768_cov_224.209 5499825 13768 399.46433759442186 +NODE_1056_length_13763_cov_223.958 5488185 13763 398.7637143064739 +NODE_1057_length_13739_cov_222.742 5449797 13739 396.6662056918262 +NODE_1058_length_13680_cov_225.817 5499853 13680 402.0360380116959 +NODE_1059_length_13673_cov_223.999 5449812 13673 398.58202296496745 +NODE_105_length_136131_cov_224.464 54422691 136121 399.8111312729116 +NODE_1060_length_13671_cov_224.088 5449864 13671 398.6441372247824 +NODE_1061_length_13665_cov_224.156 5449788 13665 398.81361141602633 +NODE_1062_length_13597_cov_219.22 5302549 13587 390.26635754765584 +NODE_1063_length_13590_cov_223.263 5399773 13590 397.3342899190581 +NODE_1064_length_13585_cov_223.38 5399847 13585 397.48597718071403 +NODE_1065_length_13555_cov_218.969 5270104 13535 389.3685999261175 +NODE_1066_length_13504_cov_224.498 5399915 13504 399.8752221563981 +NODE_1067_length_13464_cov_223.08 5349835 13464 397.34365715983364 +NODE_1068_length_13462_cov_223.302 5349995 13462 397.4145743574506 +NODE_1069_length_13448_cov_228.25 5473492 13448 407.0116002379536 +NODE_106_length_135357_cov_223.837 54180233 135357 400.2765501599474 +NODE_1070_length_13428_cov_219.638 5247129 13428 390.7602770330652 +NODE_1071_length_13419_cov_224.084 5349765 13419 398.6709143751397 +NODE_1072_length_13412_cov_222.1 5299896 13412 395.160751565762 +NODE_1073_length_13348_cov_222.786 5299740 13348 397.04375187293977 +NODE_1074_length_13345_cov_224.047 5301892 13345 397.2942675159236 +NODE_1075_length_13306_cov_368.219 5299961 13305 398.3435550544908 +NODE_1076_length_13288_cov_223.745 5249615 13288 395.06434376881396 +NODE_1077_length_13283_cov_225.244 5320999 13283 400.58714145900774 +NODE_1078_length_13280_cov_224.313 5299818 13280 399.08268072289155 +NODE_1079_length_13251_cov_224.441 5288208 13251 399.07991849671726 +NODE_107_length_135043_cov_223.938 53998258 135043 399.85973356634554 +NODE_1080_length_13246_cov_222.788 5249850 13246 396.3347425637928 +NODE_1081_length_13236_cov_222.632 5249837 13236 396.63319734058626 +NODE_1082_length_13236_cov_222.952 5249854 13236 396.6344817165307 +NODE_1083_length_13191_cov_223.702 5249894 13191 397.99059965127736 +NODE_1084_length_13155_cov_224.346 5249894 13155 399.0797415431395 +NODE_1085_length_13147_cov_222.348 5199807 13116 396.44762122598354 +NODE_1086_length_13138_cov_224.524 5249807 13138 399.58951134114784 +NODE_1087_length_13117_cov_222.67 5199920 13117 396.42601204543723 +NODE_1088_length_13066_cov_223.5 5199820 13066 397.9657125363539 +NODE_1089_length_13050_cov_223.958 5199855 13050 398.4563218390805 +NODE_108_length_134368_cov_224.716 53874377 134368 400.94648279352225 +NODE_1090_length_13049_cov_228.505 5197803 13049 398.32960380105754 +NODE_1091_length_12991_cov_224.024 5149845 12991 396.416365175891 +NODE_1092_length_12980_cov_225.672 5147576 12980 396.5775038520801 +NODE_1093_length_12966_cov_225.317 5201573 12966 401.17021440691036 +NODE_1094_length_12963_cov_223.277 5149796 12963 397.2688420890226 +NODE_1095_length_12949_cov_223.367 5149872 12949 397.70422426442195 +NODE_1096_length_12931_cov_221.989 5082018 12931 393.01044002784005 +NODE_1097_length_12914_cov_224.167 5149824 12914 398.778380052656 +NODE_1098_length_12891_cov_226.319 5199908 12891 403.37506787681326 +NODE_1099_length_12877_cov_224.559 5149949 12877 399.93391317853536 +NODE_109_length_132210_cov_222.18 52449879 132180 396.8064684521108 +NODE_10_length_622659_cov_224.054 248767213 622659 399.5239978864836 +NODE_1100_length_12859_cov_222.986 5099887 12859 396.6005910257407 +NODE_1101_length_12855_cov_223.056 5099880 12855 396.7234539089848 +NODE_1102_length_12778_cov_224.037 5099000 12778 399.0452339959305 +NODE_1103_length_12686_cov_223.763 5049907 12686 398.06928897997796 +NODE_1104_length_12674_cov_228.346 5133535 12674 405.0445794540003 +NODE_1105_length_12666_cov_224.071 5049874 12666 398.6952471182694 +NODE_1106_length_12658_cov_224.014 5049880 12658 398.94770105861903 +NODE_1107_length_12656_cov_224.491 5054018 12655 399.3692611615962 +NODE_1108_length_12653_cov_239.099 5049853 12653 399.1032166284676 +NODE_1109_length_12644_cov_225.074 4999792 12644 395.4280291047137 +NODE_110_length_132131_cov_223.987 52848766 132131 399.97249699162194 +NODE_1110_length_12619_cov_226.5 5099845 12619 404.14018543466204 +NODE_1111_length_12613_cov_222.686 4999856 12613 396.404978989931 +NODE_1112_length_12607_cov_222.797 4999883 12607 396.59578012215434 +NODE_1113_length_12602_cov_223.092 4999907 12602 396.75503888271703 +NODE_1114_length_12599_cov_222.864 4999887 12599 396.8479244384475 +NODE_1115_length_12564_cov_223.656 4999848 12564 397.9503342884432 +NODE_1116_length_12555_cov_231.948 5099573 12555 406.17865392273995 +NODE_1117_length_12538_cov_224.2 4999887 12538 398.7786728345829 +NODE_1118_length_12532_cov_224.189 4999861 12532 398.9675231407597 +NODE_1119_length_12528_cov_224.345 4999874 12528 399.09594508301404 +NODE_111_length_129576_cov_223.313 51660260 129576 398.686948200284 +NODE_1120_length_12501_cov_224.693 4999901 12501 399.96008319334453 +NODE_1121_length_12458_cov_223.359 4949906 12458 397.3275004013485 +NODE_1122_length_12409_cov_246.801 5558969 12409 447.97880570553633 +NODE_1123_length_12398_cov_221.99 4899801 12398 395.20898532021295 +NODE_1124_length_12398_cov_224.214 4949978 12398 399.25617035005644 +NODE_1125_length_12395_cov_234.329 4950943 12395 399.43065752319484 +NODE_1126_length_12393_cov_221.06 4865731 12393 392.61930121842977 +NODE_1127_length_12391_cov_224.301 4949886 12391 399.4742958598983 +NODE_1128_length_12361_cov_222.719 4899880 12361 396.3983496480867 +NODE_1129_length_12353_cov_222.882 4899821 12353 396.6502873795839 +NODE_112_length_129575_cov_223.985 51798699 129575 399.7584333397646 +NODE_1130_length_12333_cov_223.334 4899845 12333 397.2954674450661 +NODE_1131_length_12267_cov_224.55 4899912 12267 399.43849351919783 +NODE_1132_length_12262_cov_224.399 4899835 12262 399.59509052356873 +NODE_1133_length_12251_cov_224.739 4905648 12251 400.42837319402497 +NODE_1134_length_12245_cov_222.431 4849971 12245 396.07766435279706 +NODE_1135_length_12243_cov_225.38 4903623 12243 400.52462631707914 +NODE_1136_length_12215_cov_222.998 4849893 12215 397.04404420794106 +NODE_1137_length_12196_cov_219.893 4784688 12186 392.63810930576074 +NODE_1138_length_12188_cov_227.479 4883791 12188 400.70487364620936 +NODE_1139_length_12125_cov_224.583 4849881 12125 399.9901855670103 +NODE_113_length_128969_cov_224.569 51596995 128969 400.0728469632237 +NODE_1140_length_12120_cov_223.899 4847158 12120 399.9305280528053 +NODE_1141_length_12097_cov_219.518 4722638 12087 390.72044345164227 +NODE_1142_length_12094_cov_222.971 4799894 12094 396.8822556639656 +NODE_1143_length_12059_cov_223.852 4799862 12059 398.03151173397464 +NODE_1144_length_12035_cov_242.89 5182028 12035 430.57980889073536 +NODE_1145_length_12025_cov_224.576 4767741 12025 396.48573804573806 +NODE_1146_length_12008_cov_224.216 4799862 12008 399.7220186542305 +NODE_1147_length_12002_cov_224.758 4799811 12002 399.9175970671555 +NODE_1148_length_11979_cov_225.754 4749846 11979 396.5144002003506 +NODE_1149_length_11976_cov_225.227 4799791 11976 400.78415163660657 +NODE_114_length_128058_cov_223.59 51198716 128058 399.8088053850599 +NODE_1150_length_11970_cov_222.942 4749896 11970 396.81670843776106 +NODE_1151_length_11948_cov_221.833 4691229 11948 392.63717777033816 +NODE_1152_length_11944_cov_223.377 4749884 11944 397.6795043536504 +NODE_1153_length_11901_cov_224.239 4749831 11901 399.11192336778424 +NODE_1154_length_11835_cov_227.452 4764640 11835 402.5889311364597 +NODE_1155_length_11829_cov_223.458 4699875 11829 397.31803195536395 +NODE_1156_length_11829_cov_223.243 4699917 11829 397.3215825513568 +NODE_1157_length_11817_cov_223.491 4699898 11817 397.7234492680037 +NODE_1158_length_11816_cov_223.308 4699951 11816 397.7615944482058 +NODE_1159_length_11793_cov_224.111 4699904 11793 398.5333672517595 +NODE_115_length_125276_cov_224.211 50139334 125276 400.2309620358249 +NODE_1160_length_11764_cov_224.431 4700037 11764 399.5271166269976 +NODE_1161_length_11763_cov_221.802 4623630 11753 393.3999829830681 +NODE_1162_length_11725_cov_222.986 4649946 11725 396.5838805970149 +NODE_1163_length_11697_cov_225.328 4679230 11697 400.03676156279386 +NODE_1164_length_11637_cov_224.538 4649794 11637 399.5698204004469 +NODE_1165_length_11596_cov_222.755 4599870 11596 396.6773025181097 +NODE_1166_length_11507_cov_224.578 4599820 11507 399.74102720083425 +NODE_1167_length_11502_cov_225.661 4600018 11502 399.9320118240306 +NODE_1168_length_11495_cov_222.59 4549909 11495 395.8163549369291 +NODE_1169_length_11487_cov_223.066 4537277 11471 395.5432830616337 +NODE_116_length_123186_cov_224.013 49248674 123186 399.7911613332684 +NODE_1170_length_11480_cov_223.163 4549906 11480 396.33327526132405 +NODE_1171_length_11474_cov_223 4549882 11474 396.53843472198014 +NODE_1172_length_11464_cov_218.594 4457710 11464 388.84420795533845 +NODE_1173_length_11460_cov_233.474 4519365 11460 394.35994764397907 +NODE_1174_length_11450_cov_223.215 4549872 11450 397.36873362445414 +NODE_1175_length_11450_cov_226.362 4628133 11450 404.2037554585153 +NODE_1176_length_11447_cov_223.366 4549856 11447 397.47147724294575 +NODE_1177_length_11446_cov_223.387 4549849 11446 397.5055914730037 +NODE_1178_length_11443_cov_224.346 4549918 11443 397.61583500830204 +NODE_1179_length_11435_cov_223.548 4549815 11435 397.88500218627024 +NODE_117_length_121926_cov_232.463 50300311 121856 412.7848526129202 +NODE_1180_length_11429_cov_223.81 4549921 11429 398.1031586315513 +NODE_1181_length_11428_cov_223.729 4549864 11428 398.1330066503325 +NODE_1182_length_11422_cov_225.701 4547681 11422 398.15102433899494 +NODE_1183_length_11421_cov_223.69 4549914 11421 398.381402679275 +NODE_1184_length_11416_cov_223.926 4549832 11416 398.54870357393133 +NODE_1185_length_11402_cov_228.453 4649887 11402 407.8132783722154 +NODE_1186_length_11353_cov_224.39 4518099 11353 397.96520743415834 +NODE_1187_length_11343_cov_218.167 4401889 11333 388.41339451160326 +NODE_1188_length_11343_cov_223.147 4499844 11343 396.7066913514943 +NODE_1189_length_11328_cov_223.547 4499882 11328 397.23534604519773 +NODE_118_length_120968_cov_225.235 48377023 120968 399.91587031281 +NODE_1190_length_11301_cov_227.934 4448801 11301 393.664365985311 +NODE_1191_length_11292_cov_224.194 4499895 11292 398.5029224229543 +NODE_1192_length_11279_cov_224.412 4499774 11279 398.9515027928008 +NODE_1193_length_11276_cov_222.013 4449795 11243 395.78359868362537 +NODE_1194_length_11221_cov_223.074 4449905 11221 396.5693788432404 +NODE_1195_length_11220_cov_222.86 4449863 11220 396.6009803921569 +NODE_1196_length_11219_cov_229.29 4446433 11219 396.3305998752117 +NODE_1197_length_11199_cov_223.479 4449866 11199 397.34494151263505 +NODE_1198_length_11198_cov_223.295 4449920 11198 397.385247365601 +NODE_1199_length_11195_cov_223.601 4449888 11195 397.48887896382314 +NODE_119_length_119677_cov_223.947 47805975 119677 399.45833368149266 +NODE_11_length_590705_cov_223.126 235269296 590625 398.3395487830688 +NODE_1200_length_11188_cov_223.435 4449909 11188 397.7394529853414 +NODE_1201_length_11174_cov_224.039 4449912 11174 398.2380526221586 +NODE_1202_length_11146_cov_224.429 4449863 11146 399.23407500448593 +NODE_1203_length_11141_cov_224.428 4449855 11141 399.41253029351043 +NODE_1204_length_11135_cov_224.562 4449895 11135 399.6313426133812 +NODE_1205_length_11074_cov_228.373 4499908 11074 406.3489254108723 +NODE_1206_length_11062_cov_231.471 4533788 11062 409.85246790815404 +NODE_1207_length_11053_cov_223.905 4399943 11053 398.07681172532347 +NODE_1208_length_11045_cov_224.143 4399893 11045 398.36061566319603 +NODE_1209_length_11009_cov_224.821 4399914 11009 399.66518303206465 +NODE_120_length_119298_cov_224.179 47662979 119298 399.52873476504215 +NODE_1210_length_11003_cov_227.935 4399250 11003 399.8227756066527 +NODE_1211_length_10965_cov_223.156 4349806 10965 396.6991336069311 +NODE_1212_length_10912_cov_224.203 4349863 10912 398.63114002932554 +NODE_1213_length_10912_cov_224.02 4349866 10912 398.6314149560117 +NODE_1214_length_10859_cov_224.975 4349831 10859 400.5738097430703 +NODE_1215_length_10821_cov_223.293 4299900 10821 397.36623232603273 +NODE_1216_length_10787_cov_221.355 4249901 10787 393.9835913599703 +NODE_1217_length_10774_cov_231.474 4457477 10774 413.72535734174863 +NODE_1218_length_10773_cov_225.131 4300354 10773 399.17887310869764 +NODE_1219_length_10767_cov_224.397 4299909 10767 399.3599888548342 +NODE_121_length_119013_cov_222.361 47242783 118983 397.05489859895954 +NODE_1220_length_10748_cov_222.502 4249847 10748 395.4081689616673 +NODE_1221_length_10729_cov_222.89 4249801 10729 396.1041103551123 +NODE_1222_length_10699_cov_225.782 4299896 10699 401.8969997196 +NODE_1223_length_10691_cov_224.509 4249844 10673 398.18645179424715 +NODE_1224_length_10671_cov_224.002 4249813 10671 398.2581763658514 +NODE_1225_length_10666_cov_224.01 4249945 10666 398.457247327958 +NODE_1226_length_10662_cov_224.233 4249921 10662 398.604483211405 +NODE_1227_length_10647_cov_226.199 4264456 10647 400.53122945430636 +NODE_1228_length_10638_cov_224.463 4249935 10638 399.505076142132 +NODE_1229_length_10624_cov_225.105 4249862 10624 400.0246611445783 +NODE_122_length_118234_cov_230.379 47369470 118234 400.64169359067614 +NODE_1230_length_10608_cov_222.558 4199872 10608 395.9155354449472 +NODE_1231_length_10582_cov_219.793 4115898 10582 388.95274995274997 +NODE_1232_length_10561_cov_223.656 4199890 10561 397.6791970457343 +NODE_1233_length_10532_cov_224.172 4199824 10532 398.7679453095329 +NODE_1234_length_10522_cov_224.308 4199873 10522 399.1515871507318 +NODE_1235_length_10521_cov_224.633 4199879 10521 399.19009599847925 +NODE_1236_length_10517_cov_224.397 4199892 10517 399.3431586954455 +NODE_1237_length_10515_cov_236.36 4175861 10515 397.13371374227296 +NODE_1238_length_10515_cov_224.584 4199931 10515 399.4228245363766 +NODE_1239_length_10507_cov_224.438 4199898 10507 399.7238031788332 +NODE_123_length_117878_cov_224.371 47197115 117878 400.3895128862044 +NODE_1240_length_10484_cov_222.75 4149931 10484 395.8347004959939 +NODE_1241_length_10481_cov_222.791 4149942 10481 395.9490506631047 +NODE_1242_length_10475_cov_222.868 4149837 10475 396.16582338902145 +NODE_1243_length_10450_cov_226.51 4196691 10450 401.5972248803828 +NODE_1244_length_10446_cov_222.197 4105739 10446 393.04413172506224 +NODE_1245_length_10420_cov_223.859 4149905 10420 398.2634357005758 +NODE_1246_length_10408_cov_222.748 4106187 10408 394.52219446579556 +NODE_1247_length_10393_cov_224.748 4149903 10393 399.2978928124699 +NODE_1248_length_10351_cov_223.432 4082162 10351 394.37368370205775 +NODE_1249_length_10321_cov_224.053 4108234 10321 398.0461195620579 +NODE_124_length_116649_cov_223.972 46648251 116649 399.90270812437313 +NODE_1250_length_10299_cov_223.679 4099943 10299 398.09136809398973 +NODE_1251_length_10292_cov_224.071 4099925 10292 398.36037699183834 +NODE_1252_length_10265_cov_224.755 4099930 10265 399.4086702386751 +NODE_1253_length_10261_cov_224.594 4099895 10261 399.56095897086055 +NODE_1254_length_10258_cov_224.951 4099926 10258 399.68083447065703 +NODE_1255_length_10252_cov_228.55 4170418 10252 406.7906749902458 +NODE_1256_length_10238_cov_222.436 4049904 10238 395.57569837858955 +NODE_1257_length_10215_cov_222.869 4049902 10215 396.46617719040626 +NODE_1258_length_10210_cov_226.83 4050546 10210 396.7234084231146 +NODE_1259_length_10183_cov_225.039 4041132 10183 396.85082981439655 +NODE_125_length_115833_cov_224.259 46353988 115833 400.17946526464823 +NODE_1260_length_10179_cov_232.188 3949897 10179 388.0437174575106 +NODE_1261_length_10173_cov_224.409 4049816 10173 398.09456404207214 +NODE_1262_length_10158_cov_229.487 4080382 10158 401.69147469974405 +NODE_1263_length_10155_cov_229.608 4149841 10155 408.6500246184146 +NODE_1264_length_10155_cov_230.279 3999667 10155 393.86184145741015 +NODE_1265_length_10152_cov_224.348 4049908 10152 398.92710795902286 +NODE_1266_length_10146_cov_227.088 4026771 10146 396.8826138379657 +NODE_1267_length_10131_cov_224.87 4049906 10131 399.75382489389006 +NODE_1268_length_10122_cov_222.471 3999879 10122 395.1668642560759 +NODE_1269_length_10086_cov_224.163 4013710 10086 397.9486416815388 +NODE_126_length_115614_cov_223.525 46139276 115614 399.08035359039565 +NODE_1270_length_10042_cov_224.121 3999879 10042 398.314977096196 +NODE_1271_length_10008_cov_224.974 3999929 10008 399.67316147082335 +NODE_1272_length_9986_cov_224.443 3999806 9986 400.5413579010615 +NODE_1273_length_9980_cov_222.683 3949785 9980 395.7700400801603 +NODE_1274_length_9976_cov_222.661 3949917 9976 395.94196070569365 +NODE_1275_length_9969_cov_222.728 3949882 9969 396.2164710602869 +NODE_1276_length_9966_cov_223.065 3949821 9966 396.32962071041544 +NODE_1277_length_9964_cov_231.406 3880559 9964 389.45794861501406 +NODE_1278_length_9959_cov_223.178 3949851 9959 396.61120594437193 +NODE_1279_length_9938_cov_223.684 3949835 9938 397.44767558864964 +NODE_127_length_115095_cov_224.529 46113906 115095 400.65950736348236 +NODE_1280_length_9921_cov_223.943 3949816 9921 398.1268017336962 +NODE_1281_length_9905_cov_224.181 3949906 9905 398.77900050479553 +NODE_1282_length_9900_cov_224.207 3949890 9900 398.9787878787879 +NODE_1283_length_9858_cov_222.341 3899875 9858 395.6050923108136 +NODE_1284_length_9847_cov_222.92 3899918 9847 396.05138620899766 +NODE_1285_length_9845_cov_222.797 3899949 9845 396.13499238191974 +NODE_1286_length_9824_cov_230.833 4016214 9824 408.8165716612378 +NODE_1287_length_9811_cov_223.747 3899889 9811 397.5016817857507 +NODE_1288_length_9802_cov_231.324 3958571 9802 403.8533972658641 +NODE_1289_length_9794_cov_224.151 3885452 9794 396.7175821931795 +NODE_128_length_115061_cov_222.739 45751587 115058 397.63933842062266 +NODE_1290_length_9767_cov_224.542 3899912 9767 399.294768096652 +NODE_1291_length_9751_cov_225.128 3899898 9751 399.94851810070764 +NODE_1292_length_9737_cov_224.717 3867795 9737 397.22655848824076 +NODE_1293_length_9722_cov_228.56 3949822 9722 406.27669203867515 +NODE_1294_length_9717_cov_222.902 3849836 9717 396.19594525059176 +NODE_1295_length_9688_cov_223.711 3849907 9688 397.38924442609414 +NODE_1296_length_9681_cov_224.066 3849934 9681 397.679371965706 +NODE_1297_length_9679_cov_225.014 3849708 9671 398.0672112501293 +NODE_1298_length_9663_cov_224.627 3849899 9663 398.4165373072545 +NODE_1299_length_9662_cov_224.074 3849957 9662 398.46377561581454 +NODE_129_length_114394_cov_230.781 45756676 114394 399.991922653286 +NODE_12_length_584723_cov_225.634 234195438 584723 400.5237317499055 +NODE_1300_length_9655_cov_226.094 3850045 9655 398.76178146038325 +NODE_1301_length_9630_cov_224.813 3849909 9630 399.7828660436137 +NODE_1302_length_9614_cov_222.153 3799719 9614 395.22768878718534 +NODE_1303_length_9611_cov_222.561 3799908 9611 395.37072104879826 +NODE_1304_length_9576_cov_220.668 3753931 9566 392.424315283295 +NODE_1305_length_9569_cov_223.338 3799897 9569 397.1049221444247 +NODE_1306_length_9561_cov_224.882 3812234 9561 398.7275389603598 +NODE_1307_length_9559_cov_223.627 3799886 9532 398.6451951321863 +NODE_1308_length_9538_cov_217.586 3686203 9528 386.8810873215785 +NODE_1309_length_9500_cov_224.877 3799885 9500 399.9878947368421 +NODE_130_length_113714_cov_224.263 45447995 113714 399.66930193291944 +NODE_1310_length_9447_cov_223.283 3749880 9447 396.9387107018101 +NODE_1311_length_9418_cov_223.91 3749900 9418 398.1630919515821 +NODE_1312_length_9417_cov_224.889 3750413 9417 398.25984920887754 +NODE_1313_length_9399_cov_224.636 3749874 9399 398.9652090647941 +NODE_1314_length_9395_cov_224.517 3749889 9395 399.13666844065995 +NODE_1315_length_9383_cov_224.37 3749889 9383 399.6471277842907 +NODE_1316_length_9370_cov_222.148 3699887 9370 394.8652081109925 +NODE_1317_length_9366_cov_225.201 3749900 9366 400.3736920777279 +NODE_1318_length_9342_cov_225.951 3749865 9342 401.3985228002569 +NODE_1319_length_9332_cov_236.302 3682824 9332 394.64466352336046 +NODE_131_length_112177_cov_224.701 44798357 112177 399.35420808187064 +NODE_1320_length_9331_cov_228.865 3799323 9290 408.9691065662002 +NODE_1321_length_9306_cov_232.842 3803023 9306 408.6635503975929 +NODE_1322_length_9290_cov_223.68 3699870 9290 398.2637244348762 +NODE_1323_length_9289_cov_275.331 3695913 9289 397.8806114759393 +NODE_1324_length_9276_cov_224.487 3699922 9276 398.870418283743 +NODE_1325_length_9248_cov_238.11 3642875 9248 393.90949394463667 +NODE_1326_length_9247_cov_225.045 3699954 9247 400.12479723153456 +NODE_1327_length_9239_cov_223.208 3661137 9239 396.26983439766207 +NODE_1328_length_9236_cov_222.208 3649873 9236 395.1789735816371 +NODE_1329_length_9212_cov_222.889 3649934 9212 396.2151541467651 +NODE_132_length_112038_cov_223.935 44798783 112038 399.8534693586105 +NODE_1330_length_9199_cov_226.721 3686305 9199 400.72888357430156 +NODE_1331_length_9196_cov_223.524 3649908 9196 396.9016963897347 +NODE_1332_length_9168_cov_218.972 3564637 9158 389.2374972701463 +NODE_1333_length_9154_cov_230.267 3749798 9154 409.63491369892944 +NODE_1334_length_9143_cov_222.317 3603342 9143 394.10937329104235 +NODE_1335_length_9114_cov_222.774 3601609 9114 395.17324994513933 +NODE_1336_length_9100_cov_222.794 3599940 9100 395.5978021978022 +NODE_1337_length_9091_cov_222.959 3599834 9091 395.9777802221978 +NODE_1338_length_9075_cov_223.359 3600000 9075 396.6942148760331 +NODE_1339_length_9052_cov_232.163 3733339 9052 412.43250110472826 +NODE_133_length_111688_cov_223.412 44549443 111688 398.87403302055725 +NODE_1340_length_9016_cov_226.397 3671284 9016 407.1965394853594 +NODE_1341_length_9010_cov_225.007 3599921 9010 399.5472807991121 +NODE_1342_length_9008_cov_224.807 3599884 9008 399.63188277087033 +NODE_1343_length_9003_cov_227.775 3599896 9003 399.85515939131403 +NODE_1344_length_9001_cov_224.966 3599894 9001 399.94378402399735 +NODE_1345_length_8984_cov_222.332 3549896 8984 395.1353517364203 +NODE_1346_length_8980_cov_222.668 3549862 8980 395.3075723830735 +NODE_1347_length_8923_cov_228.837 3549960 8923 397.8437745152975 +NODE_1348_length_8916_cov_227.158 3599914 8916 403.75886047554957 +NODE_1349_length_8910_cov_224.246 3549895 8910 398.4169472502806 +NODE_134_length_111466_cov_225.817 44767464 111466 401.6243877056681 +NODE_1350_length_8861_cov_217.661 3423873 8851 386.83459496102137 +NODE_1351_length_8859_cov_222.202 3499889 8859 395.0659216615871 +NODE_1352_length_8847_cov_222.831 3499923 8847 395.60562902678873 +NODE_1353_length_8830_cov_223.225 3499916 8830 396.3664779161948 +NODE_1354_length_8820_cov_223.274 3491888 8820 395.90566893424034 +NODE_1355_length_8818_cov_223.385 3499969 8818 396.9118847811295 +NODE_1356_length_8812_cov_223.638 3499831 8812 397.16647753064 +NODE_1357_length_8810_cov_223.813 3499943 8810 397.26935300794554 +NODE_1358_length_8789_cov_224.11 3499930 8789 398.2170895437479 +NODE_1359_length_8787_cov_224.332 3499946 8787 398.30954819619893 +NODE_135_length_111190_cov_224.301 44498090 111190 400.198668945049 +NODE_1360_length_8766_cov_224.543 3499879 8766 399.25610312571297 +NODE_1361_length_8760_cov_225.078 3499909 8760 399.5329908675799 +NODE_1362_length_8741_cov_222.34 3449890 8741 394.67909850131565 +NODE_1363_length_8724_cov_216.598 3355644 8724 384.64511691884456 +NODE_1364_length_8695_cov_223.202 3449955 8695 396.774583093732 +NODE_1365_length_8689_cov_223.567 3449846 8689 397.0360225572563 +NODE_1366_length_8654_cov_251.862 3400590 8654 392.9500808874509 +NODE_1367_length_8646_cov_224.672 3449895 8646 399.01630811936155 +NODE_1368_length_8642_cov_224.901 3449884 8642 399.1997222865078 +NODE_1369_length_8630_cov_225.206 3449955 8630 399.7630359212051 +NODE_136_length_111142_cov_224.73 44448983 111142 399.92966655269834 +NODE_1370_length_8627_cov_224.861 3449923 8627 399.8983424133534 +NODE_1371_length_8599_cov_222.776 3399902 8599 395.38341667635774 +NODE_1372_length_8598_cov_222.791 3399901 8598 395.4292858804373 +NODE_1373_length_8594_cov_222.845 3399901 8594 395.61333488480335 +NODE_1374_length_8568_cov_223.311 3399920 8568 396.81605975723625 +NODE_1375_length_8561_cov_226.787 3449939 8561 402.98317953510104 +NODE_1376_length_8557_cov_223.85 3399857 8557 397.3188033189202 +NODE_1377_length_8535_cov_224.328 3399885 8535 398.3462214411248 +NODE_1378_length_8509_cov_225.027 3399863 8509 399.56081795745683 +NODE_1379_length_8505_cov_228.207 3449890 8505 405.63080540858317 +NODE_137_length_110232_cov_222.759 43785785 110202 397.3229614707537 +NODE_1380_length_8502_cov_221.373 3320314 8502 390.5332862855799 +NODE_1381_length_8479_cov_225.565 3399913 8479 400.9804222196014 +NODE_1382_length_8472_cov_229.297 3449926 8472 407.21506137865913 +NODE_1383_length_8468_cov_222.605 3349963 8468 395.6026216343883 +NODE_1384_length_8456_cov_225.919 3399750 8456 402.05179754020816 +NODE_1385_length_8448_cov_216.11 3239694 8438 383.9409812751837 +NODE_1386_length_8419_cov_224.19 3349883 8419 397.89559330086706 +NODE_1387_length_8418_cov_224.323 3325878 8418 395.0912330719886 +NODE_1388_length_8388_cov_230.859 3349149 8388 399.2786123032904 +NODE_1389_length_8381_cov_225.067 3349922 8381 399.7043312253908 +NODE_138_length_110015_cov_223.823 43948768 110015 399.47978002999594 +NODE_1390_length_8378_cov_230.353 3409739 8378 406.98722845547866 +NODE_1391_length_8373_cov_222.111 3299880 8373 394.10963812253675 +NODE_1392_length_8361_cov_222.368 3299879 8361 394.67515847386676 +NODE_1393_length_8343_cov_221.895 3291607 8343 394.5351791921371 +NODE_1394_length_8330_cov_223.247 3299905 8330 396.1470588235294 +NODE_1395_length_8305_cov_223.665 3299929 8305 397.3424443106562 +NODE_1396_length_8293_cov_224.265 3299917 8293 397.9159532135536 +NODE_1397_length_8292_cov_224.08 3283109 8292 395.9369271587072 +NODE_1398_length_8286_cov_225.946 3310939 8286 399.5823075066377 +NODE_1399_length_8278_cov_224.527 3299831 8278 398.62660062817105 +NODE_139_length_108919_cov_226.739 43848675 108919 402.580587408992 +NODE_13_length_533917_cov_222.896 212504553 533827 398.07756632766797 +NODE_1400_length_8228_cov_225.766 3299965 8228 401.0652649489548 +NODE_1401_length_8221_cov_221.193 3210846 8221 390.56635445809513 +NODE_1402_length_8216_cov_222.727 3249894 8216 395.55671859785787 +NODE_1403_length_8213_cov_215.122 3135153 8213 381.73054912942894 +NODE_1404_length_8212_cov_222.876 3249923 8212 395.7529225523624 +NODE_1405_length_8211_cov_226.955 3292191 8211 400.94884910485933 +NODE_1406_length_8209_cov_223.968 3249066 8209 395.79315385552445 +NODE_1407_length_8191_cov_223.248 3249841 8191 396.7575387620559 +NODE_1408_length_8187_cov_223.636 3249831 8187 396.95016489556616 +NODE_1409_length_8180_cov_224.072 3249530 8180 397.2530562347188 +NODE_140_length_108629_cov_224.149 43440382 108629 399.8967310754955 +NODE_1410_length_8157_cov_224.317 3249852 8157 398.41265171018756 +NODE_1411_length_8151_cov_224.695 3249919 8151 398.7141455036192 +NODE_1412_length_8147_cov_224.677 3249923 8147 398.9103964649564 +NODE_1413_length_8145_cov_224.895 3236380 8145 397.3456108041743 +NODE_1414_length_8142_cov_446.18 6389713 8124 786.5230182176268 +NODE_1415_length_8139_cov_224.792 3249892 8139 399.2986853421796 +NODE_1416_length_8117_cov_222.143 3199915 8117 394.22385117654306 +NODE_1417_length_8117_cov_221.924 3199875 8117 394.21892324750524 +NODE_1418_length_8103_cov_222.29 3199905 8103 394.90373935579413 +NODE_1419_length_8073_cov_223.435 3199874 8073 396.36739749783226 +NODE_141_length_108342_cov_225.343 43420891 108342 400.77616252238283 +NODE_1420_length_8065_cov_223.355 3199905 8065 396.76441413515187 +NODE_1421_length_8065_cov_223.399 3199884 8065 396.7618102913825 +NODE_1422_length_8065_cov_226.135 3199895 8065 396.7631742095474 +NODE_1423_length_8061_cov_223.673 3199940 8061 396.9656370177397 +NODE_1424_length_8050_cov_207.502 2963812 8050 368.17540372670805 +NODE_1425_length_8050_cov_226.084 3213668 8050 399.21341614906834 +NODE_1426_length_8040_cov_224.077 3199913 8040 397.99912935323385 +NODE_1427_length_8032_cov_224.524 3199874 8032 398.390687250996 +NODE_1428_length_7970_cov_225.097 3176266 7970 398.52772898368886 +NODE_1429_length_7888_cov_225.067 3149988 7888 399.3392494929006 +NODE_142_length_107924_cov_223.475 43021661 107924 398.62922982839774 +NODE_1430_length_7886_cov_225.019 3149881 7886 399.4269591681461 +NODE_1431_length_7878_cov_225.238 3149953 7878 399.84171109418634 +NODE_1432_length_7863_cov_225.644 3102664 7863 394.590359913519 +NODE_1433_length_7857_cov_225.968 3149890 7857 400.9023800432735 +NODE_1434_length_7843_cov_222.851 3099941 7843 395.2493943644014 +NODE_1435_length_7837_cov_222.722 3099908 7837 395.5477861426566 +NODE_1436_length_7833_cov_266.998 3699416 7833 472.28596961572833 +NODE_1437_length_7826_cov_223.302 3099899 7826 396.10260669562996 +NODE_1438_length_7810_cov_223.701 3099918 7810 396.91651728553137 +NODE_1439_length_7809_cov_227.431 3127850 7809 400.54424382123193 +NODE_143_length_106656_cov_223.985 42649069 106656 399.87500937593757 +NODE_1440_length_7794_cov_226.77 3115871 7794 399.7781626892481 +NODE_1441_length_7790_cov_227.47 3149900 7790 404.3517329910141 +NODE_1442_length_7790_cov_223.605 3099838 7790 397.9252888318357 +NODE_1443_length_7773_cov_224.864 3099918 7773 398.80586646082594 +NODE_1444_length_7769_cov_224.922 3099855 7769 399.0030892006693 +NODE_1445_length_7749_cov_225.324 3099929 7749 400.04245709123757 +NODE_1446_length_7738_cov_228.474 3141214 7738 405.9464978030499 +NODE_1447_length_7704_cov_223.099 3040030 7704 394.6041017653167 +NODE_1448_length_7696_cov_223.259 3049883 7696 396.2945686070686 +NODE_1449_length_7693_cov_223.312 3049876 7693 396.4481996620304 +NODE_144_length_106511_cov_224.019 42598780 106511 399.94723549680316 +NODE_1450_length_7690_cov_223.637 3049873 7690 396.60247074122236 +NODE_1451_length_7684_cov_223.824 3049956 7684 396.9229567933368 +NODE_1452_length_7679_cov_223.966 3049983 7679 397.1849199114468 +NODE_1453_length_7678_cov_223.695 3049947 7678 397.2319614482938 +NODE_1454_length_7671_cov_230.074 3149876 7671 410.62130100378045 +NODE_1455_length_7654_cov_224.465 3049894 7654 398.47060360595765 +NODE_1456_length_7645_cov_225.807 3051580 7645 399.1602354480052 +NODE_1457_length_7638_cov_225.225 3049671 7638 399.2761194029851 +NODE_1458_length_7625_cov_225.412 3049901 7625 399.9870163934426 +NODE_1459_length_7614_cov_226.562 2996422 7614 393.5411084843709 +NODE_145_length_106107_cov_223.794 42398829 106107 399.58559755718284 +NODE_1460_length_7612_cov_222.224 2999924 7612 394.1045717288492 +NODE_1461_length_7606_cov_229.633 3099935 7606 407.56442282408625 +NODE_1462_length_7599_cov_222.45 2999910 7599 394.77694433478086 +NODE_1463_length_7590_cov_214.907 2894938 7590 381.41475625823455 +NODE_1464_length_7587_cov_222.957 2999938 7587 395.40503492816663 +NODE_1465_length_7584_cov_223.762 3000072 7584 395.57911392405066 +NODE_1466_length_7578_cov_222.809 2999938 7578 395.8746371074162 +NODE_1467_length_7569_cov_223.319 2999865 7569 396.3357114546175 +NODE_1468_length_7545_cov_220.474 2949929 7545 390.97799867461896 +NODE_1469_length_7538_cov_224.43 2999909 7538 397.9714778455824 +NODE_146_length_105613_cov_224.716 41876696 105609 396.5258263973714 +NODE_1470_length_7529_cov_224.461 2999939 7529 398.45118873688403 +NODE_1471_length_7526_cov_224.563 2999912 7526 398.6064310390646 +NODE_1472_length_7507_cov_225.906 2962539 7507 394.6368722525643 +NODE_1473_length_7506_cov_225.338 2999937 7506 399.671862509992 +NODE_1474_length_7503_cov_225.443 2999910 7503 399.828068772491 +NODE_1475_length_7497_cov_224.138 2949830 7497 393.46805388822196 +NODE_1476_length_7496_cov_221.196 2949924 7496 393.5330843116329 +NODE_1477_length_7491_cov_222.02 2949904 7491 393.7930850353758 +NODE_1478_length_7490_cov_222.097 2949908 7490 393.8461949265688 +NODE_1479_length_7489_cov_225.186 2950454 7489 393.9716918146615 +NODE_147_length_105090_cov_227.076 42174537 105090 401.31827005423924 +NODE_1480_length_7487_cov_222.189 2949916 7487 394.00507546413786 +NODE_1481_length_7483_cov_222.293 2949925 7483 394.2168916210076 +NODE_1482_length_7478_cov_222.4 2949936 7478 394.48194704466437 +NODE_1483_length_7469_cov_222.435 2949914 7469 394.95434462444774 +NODE_1484_length_7435_cov_223.694 2949931 7435 396.76274377942167 +NODE_1485_length_7431_cov_223.678 2949888 7431 396.97052886556315 +NODE_1486_length_7415_cov_200.168 2631407 7395 355.83597025016905 +NODE_1487_length_7405_cov_224.684 2949936 7405 398.37083051991897 +NODE_1488_length_7375_cov_225.506 2949880 7375 399.9837288135593 +NODE_1489_length_7360_cov_221.934 2899888 7360 394.00652173913045 +NODE_148_length_105076_cov_224.712 41999983 105076 399.71052381133654 +NODE_1490_length_7348_cov_222.516 2899975 7348 394.66181273816005 +NODE_1491_length_7347_cov_226.016 2949954 7347 401.518170681911 +NODE_1492_length_7332_cov_223.021 2899870 7332 395.5087288597927 +NODE_1493_length_7309_cov_223.785 2899919 7309 396.76002189081953 +NODE_1494_length_7297_cov_223.858 2899964 7297 397.4186652048787 +NODE_1495_length_7291_cov_224.453 2900561 7291 397.8275956658895 +NODE_1496_length_7277_cov_229.294 2899935 7277 398.50693967294217 +NODE_1497_length_7272_cov_229.414 2899829 7272 398.76636413641364 +NODE_1498_length_7270_cov_221.111 2849845 7270 392.0006877579092 +NODE_1499_length_7263_cov_225.249 2899853 7263 399.26380283629356 +NODE_149_length_104884_cov_228.36 41929106 104884 399.7664658098471 +NODE_14_length_480305_cov_222.789 191054098 480215 397.8511666649313 +NODE_1500_length_7248_cov_221.727 2849916 7248 393.2003311258278 +NODE_1501_length_7237_cov_222.15 2849918 7237 393.7982589470775 +NODE_1502_length_7237_cov_221.949 2849935 7237 393.80060798673486 +NODE_1503_length_7231_cov_222.076 2849931 7231 394.1268151016457 +NODE_1504_length_7229_cov_222.375 2849952 7229 394.2387605477936 +NODE_1505_length_7227_cov_222.087 2849944 7227 394.34675522346754 +NODE_1506_length_7210_cov_222.785 2849984 7210 395.28210818307906 +NODE_1507_length_7166_cov_224.353 2849842 7166 397.68936645269326 +NODE_1508_length_7164_cov_224.311 2843978 7164 396.9818537130095 +NODE_1509_length_7158_cov_220.312 2799837 7158 391.1479463537301 +NODE_150_length_104840_cov_224.535 41991396 104840 400.52838611217095 +NODE_1510_length_7155_cov_224.414 2849869 7155 398.3045422781272 +NODE_1511_length_7151_cov_214.514 2719128 7141 380.77692199971995 +NODE_1512_length_7142_cov_224.648 2849915 7142 399.03598431811815 +NODE_1513_length_7133_cov_225.185 2849881 7133 399.53469788307865 +NODE_1514_length_7104_cov_222.351 2799945 7104 394.13640202702703 +NODE_1515_length_7100_cov_222.49 2799885 7100 394.35 +NODE_1516_length_7094_cov_222.268 2799899 7094 394.6855088807443 +NODE_1517_length_7085_cov_219.308 2726074 7085 384.7669724770642 +NODE_1518_length_7083_cov_232.079 2899493 7083 409.3594522095157 +NODE_1519_length_7074_cov_223.32 2799926 7074 395.8052021487136 +NODE_151_length_104103_cov_225.101 41612543 104103 399.724724551646 +NODE_1520_length_7060_cov_223.749 2799925 7060 396.5899433427762 +NODE_1521_length_7058_cov_223.43 2799926 7058 396.70246528761686 +NODE_1522_length_7054_cov_223.958 2799950 7054 396.9308193932521 +NODE_1523_length_7050_cov_224.066 2799931 7050 397.1533333333333 +NODE_1524_length_7031_cov_224.625 2799956 7031 398.2301237377329 +NODE_1525_length_7004_cov_225.263 2799959 7004 399.7657053112507 +NODE_1526_length_6983_cov_228.257 2820141 6983 403.8580839180868 +NODE_1527_length_6956_cov_222.831 2749924 6956 395.33122484186316 +NODE_1528_length_6954_cov_223.159 2749974 6954 395.4521138912856 +NODE_1529_length_6954_cov_230.829 2849903 6954 409.82211676732817 +NODE_152_length_103759_cov_224.033 41498888 103759 399.95458707196485 +NODE_1530_length_6952_cov_224.836 2783336 6952 400.36478711162255 +NODE_1531_length_6950_cov_223.283 2749952 6950 395.6765467625899 +NODE_1532_length_6945_cov_226.786 2799896 6945 403.1527717782577 +NODE_1533_length_6913_cov_224.277 2749885 6913 397.78460870823085 +NODE_1534_length_6901_cov_224.298 2749935 6901 398.4835531082452 +NODE_1535_length_6879_cov_221.814 2687803 6879 390.72583224305856 +NODE_1536_length_6853_cov_225.665 2700215 6853 394.0194075587334 +NODE_1537_length_6839_cov_222.576 2699922 6839 394.7831554320807 +NODE_1538_length_6830_cov_230.616 2799871 6830 409.93718887262077 +NODE_1539_length_6827_cov_223.151 2699905 6827 395.4745862018456 +NODE_153_length_101057_cov_224.127 40398624 101057 399.76076867510415 +NODE_1540_length_6810_cov_223.479 2699942 6810 396.46725403817914 +NODE_1541_length_6805_cov_238.354 2751919 6805 404.3966201322557 +NODE_1542_length_6785_cov_224.315 2696610 6785 397.4369933677229 +NODE_1543_length_6779_cov_224.737 2699862 6779 398.2684761764272 +NODE_1544_length_6774_cov_225.04 2699613 6749 400.00192621129054 +NODE_1545_length_6764_cov_225.299 2699908 6764 399.1584861028977 +NODE_1546_length_6743_cov_221.657 2649917 6743 392.98783924069403 +NODE_1547_length_6738_cov_221.54 2649922 6738 393.2802018403087 +NODE_1548_length_6725_cov_213.101 2539182 6715 378.13581533879375 +NODE_1549_length_6720_cov_222.348 2649938 6720 394.3360119047619 +NODE_154_length_100926_cov_223.929 40348639 100926 399.78438658026676 +NODE_1550_length_6717_cov_222.626 2649923 6717 394.5099002530892 +NODE_1551_length_6714_cov_222.798 2649938 6714 394.68841227286265 +NODE_1552_length_6708_cov_234.944 2649920 6708 395.0387596899225 +NODE_1553_length_6688_cov_223.513 2649926 6688 396.2209928229665 +NODE_1554_length_6664_cov_224.642 2649937 6664 397.6496098439376 +NODE_1555_length_6657_cov_224.581 2649942 6657 398.06849932401985 +NODE_1556_length_6652_cov_224.763 2649924 6652 398.36500300661453 +NODE_1557_length_6652_cov_225.111 2650154 6652 398.39957907396274 +NODE_1558_length_6649_cov_224.895 2649915 6649 398.54338998345617 +NODE_1559_length_6640_cov_225.145 2649949 6640 399.0887048192771 +NODE_155_length_100674_cov_223.648 40128002 100664 398.63309624096 +NODE_1560_length_6639_cov_228.243 2619388 6639 394.54556409097756 +NODE_1561_length_6634_cov_225.271 2649907 6634 399.44332227916794 +NODE_1562_length_6628_cov_233.276 2668536 6628 402.61557030778516 +NODE_1563_length_6613_cov_221.921 2599947 6613 393.1569635566309 +NODE_1564_length_6608_cov_230.458 2699536 6608 408.52542372881356 +NODE_1565_length_6595_cov_222.311 2599939 6595 394.22880970432146 +NODE_1566_length_6584_cov_222.909 2599925 6584 394.88532806804375 +NODE_1567_length_6581_cov_227.165 2642768 6581 401.57544446132806 +NODE_1568_length_6565_cov_216.336 2515891 6555 383.81250953470635 +NODE_1569_length_6553_cov_223.992 2599987 6553 396.76285670685184 +NODE_156_length_100002_cov_224.043 39998984 100002 399.98184036319276 +NODE_1570_length_6542_cov_224.315 2599949 6542 397.4241822072761 +NODE_1571_length_6534_cov_224.503 2599227 6534 397.80027548209364 +NODE_1572_length_6523_cov_224.637 2599905 6523 398.575042158516 +NODE_1573_length_6512_cov_225.291 2599901 6512 399.2476965601966 +NODE_1574_length_6496_cov_225.211 2587077 6496 398.2569273399015 +NODE_1575_length_6484_cov_222.032 2549957 6484 393.2691239975324 +NODE_1576_length_6480_cov_222.161 2549911 6480 393.5047839506173 +NODE_1577_length_6466_cov_224.809 2541952 6466 393.12588926693473 +NODE_1578_length_6457_cov_228.921 2613666 6457 404.7802385008518 +NODE_1579_length_6452_cov_223.155 2549949 6452 395.21838189708615 +NODE_157_length_98878_cov_225.159 39541823 98878 399.9051659620947 +NODE_1580_length_6442_cov_245.844 2555608 6440 396.8335403726708 +NODE_1581_length_6438_cov_223.341 2549944 6438 396.07704255980116 +NODE_1582_length_6435_cov_223.625 2549905 6435 396.25563325563326 +NODE_1583_length_6433_cov_221.499 2485963 6433 386.43914192445203 +NODE_1584_length_6410_cov_224.34 2549914 6410 397.802496099844 +NODE_1585_length_6401_cov_224.922 2549878 6401 398.35619434463365 +NODE_1586_length_6396_cov_224.833 2549919 6396 398.67401500938087 +NODE_1587_length_6387_cov_225.361 2549932 6387 399.23782683576013 +NODE_1588_length_6376_cov_225.828 2549946 6376 399.9287954830615 +NODE_1589_length_6369_cov_221.65 2499967 6369 392.5211179149003 +NODE_158_length_98590_cov_225.027 39549635 98590 401.1526016837407 +NODE_1590_length_6355_cov_221.842 2499849 6355 393.36726986624706 +NODE_1591_length_6353_cov_223.519 2483400 6353 390.9019360931843 +NODE_1592_length_6335_cov_222.857 2499871 6335 394.6126282557222 +NODE_1593_length_6317_cov_280.416 2127201 6316 336.79559848005067 +NODE_1594_length_6312_cov_223.48 2499889 6312 396.05339036755385 +NODE_1595_length_6309_cov_223.65 2499939 6309 396.2496433666191 +NODE_1596_length_6303_cov_223.805 2499905 6303 396.6214501031255 +NODE_1597_length_6300_cov_225.712 2486100 6300 394.6190476190476 +NODE_1598_length_6282_cov_224.655 2499909 6282 397.9479465138491 +NODE_1599_length_6280_cov_223.993 2499705 6280 398.0421974522293 +NODE_159_length_98172_cov_223.943 39248772 98165 399.82449956705545 +NODE_15_length_459631_cov_223.998 183810579 459631 399.90901179424367 +NODE_1600_length_6277_cov_220.026 2449891 6277 390.2964792098136 +NODE_1601_length_6276_cov_234.384 2491366 6276 396.96717654557045 +NODE_1602_length_6260_cov_225.28 2499928 6260 399.34952076677314 +NODE_1603_length_6252_cov_225.651 2499967 6252 399.8667626359565 +NODE_1604_length_6239_cov_221.356 2449955 6239 392.68392370572207 +NODE_1605_length_6229_cov_222.129 2449922 6229 393.3090383689196 +NODE_1606_length_6224_cov_226.499 2491198 6224 400.2567480719794 +NODE_1607_length_6181_cov_223.511 2428876 6181 392.958420967481 +NODE_1608_length_6170_cov_224.254 2449955 6170 397.0753646677472 +NODE_1609_length_6162_cov_224.678 2449342 6162 397.4913988964622 +NODE_160_length_97942_cov_222.841 38962074 97917 397.9091883942523 +NODE_1610_length_6149_cov_223.397 2412210 6149 392.2930557814279 +NODE_1611_length_6136_cov_229.891 2499659 6136 407.3759778357236 +NODE_1612_length_6130_cov_225.728 2449967 6130 399.668352365416 +NODE_1613_length_6125_cov_226.883 2449941 6125 399.9903673469388 +NODE_1614_length_6124_cov_225.964 2449973 6124 400.06090790333116 +NODE_1615_length_6102_cov_211.901 2288416 6092 375.64281024294155 +NODE_1616_length_6100_cov_226.417 2449784 6100 401.6039344262295 +NODE_1617_length_6073_cov_223.148 2399989 6073 395.1900214062243 +NODE_1618_length_6073_cov_915.65 9522480 6073 1568.0026346122181 +NODE_1619_length_6063_cov_223.336 2399950 6063 395.8353950189675 +NODE_161_length_97915_cov_223.088 39019578 97915 398.50460092937755 +NODE_1620_length_6058_cov_223.743 2399935 6058 396.1596236381644 +NODE_1621_length_6053_cov_224.001 2399951 6053 396.48950933421446 +NODE_1622_length_6041_cov_224.389 2399955 6041 397.2777685813607 +NODE_1623_length_6037_cov_224.543 2399944 6037 397.5391750869637 +NODE_1624_length_6035_cov_221.855 2353080 6035 389.90555095277546 +NODE_1625_length_6027_cov_224.779 2399943 6027 398.198606271777 +NODE_1626_length_6024_cov_225.108 2399965 6024 398.40056440903055 +NODE_1627_length_6016_cov_259.738 2344764 6016 389.75465425531917 +NODE_1628_length_5993_cov_221.459 2349952 5993 392.1161354914066 +NODE_1629_length_5985_cov_216.014 2288208 5985 382.32380952380953 +NODE_162_length_97904_cov_222.102 38835234 97884 396.74751746965796 +NODE_1630_length_5963_cov_222.419 2349986 5963 394.094583263458 +NODE_1631_length_5963_cov_222.652 2349927 5963 394.0846889149757 +NODE_1632_length_5961_cov_222.489 2349860 5961 394.2056701895655 +NODE_1633_length_5949_cov_226.841 2387765 5949 401.3724995797613 +NODE_1634_length_5940_cov_223.527 2349933 5940 395.61161616161615 +NODE_1635_length_5932_cov_223.782 2349982 5932 396.1534052596089 +NODE_1636_length_5920_cov_224.32 2349954 5920 396.9516891891892 +NODE_1637_length_5919_cov_224.212 2349944 5919 397.0170636931914 +NODE_1638_length_5916_cov_224.702 2349969 5916 397.22261663286 +NODE_1639_length_5915_cov_224.154 2349935 5915 397.28402366863907 +NODE_163_length_96922_cov_224.559 38747399 96922 399.77919357834134 +NODE_1640_length_5912_cov_224.375 2349947 5912 397.48765223274694 +NODE_1641_length_5907_cov_226.356 2349880 5881 399.57150144533244 +NODE_1642_length_5905_cov_236.421 2353464 5905 398.5544453852667 +NODE_1643_length_5901_cov_225.067 2349976 5901 398.23351974241655 +NODE_1644_length_5901_cov_223.344 2349962 5901 398.2311472631757 +NODE_1645_length_5897_cov_215.882 2253110 5897 382.07732745463795 +NODE_1646_length_5863_cov_221.631 2299944 5863 392.2810847688896 +NODE_1647_length_5858_cov_221.849 2299928 5858 392.6131785592352 +NODE_1648_length_5856_cov_221.894 2299958 5856 392.7523907103825 +NODE_1649_length_5843_cov_222.472 2299908 5843 393.6176621598494 +NODE_164_length_96188_cov_223.622 38385726 96188 399.06980080675345 +NODE_1650_length_5837_cov_222.632 2299911 5837 394.0227856775741 +NODE_1651_length_5834_cov_224.298 2299928 5834 394.22831676379843 +NODE_1652_length_5834_cov_222.767 2299852 5834 394.2152896811793 +NODE_1653_length_5833_cov_222.59 2299950 5833 394.29967426710095 +NODE_1654_length_5822_cov_223.362 2271782 5822 390.2064582617657 +NODE_1655_length_5816_cov_223.415 2299970 5816 395.45563961485556 +NODE_1656_length_5812_cov_277.745 2389740 5812 411.17343427391603 +NODE_1657_length_5806_cov_279.022 2320422 5806 399.65931794695143 +NODE_1658_length_5796_cov_224.057 2299940 5765 398.9488291413703 +NODE_1659_length_5793_cov_224.248 2299904 5793 397.01432763680305 +NODE_165_length_96175_cov_222.234 38148432 96175 396.65642838575513 +NODE_1660_length_5784_cov_225.64 2302520 5784 398.08437067773167 +NODE_1661_length_5765_cov_225.484 2299962 5765 398.95264527320035 +NODE_1662_length_5764_cov_225.522 2299934 5764 399.0170020818876 +NODE_1663_length_5759_cov_225.519 2299959 5759 399.3677721826706 +NODE_1664_length_5747_cov_226.122 2282801 5747 397.2161127544806 +NODE_1665_length_5722_cov_222.195 2249941 5722 393.2088430618665 +NODE_1666_length_5719_cov_222.927 2149926 5719 375.92691029900334 +NODE_1667_length_5716_cov_222.34 2249934 5716 393.6203638908328 +NODE_1668_length_5712_cov_222.386 2249927 5712 393.8947829131653 +NODE_1669_length_5710_cov_222.237 2249905 5710 394.0288966725044 +NODE_166_length_95533_cov_224.19 38198908 95533 399.85039724493106 +NODE_1670_length_5700_cov_222.97 2249942 5700 394.7266666666667 +NODE_1671_length_5680_cov_210.5 2114931 5670 373.0037037037037 +NODE_1672_length_5664_cov_224.064 2249825 5664 397.214865819209 +NODE_1673_length_5663_cov_220.818 2196692 5588 393.1088045812455 +NODE_1674_length_5662_cov_224.334 2249911 5662 397.3703638290357 +NODE_1675_length_5627_cov_225.871 2249936 5627 399.84645459392215 +NODE_1676_length_5626_cov_226.108 2249933 5626 399.9169925346605 +NODE_1677_length_5626_cov_228.936 2270824 5626 403.6302879488091 +NODE_1678_length_5622_cov_220.877 2199896 5622 391.30131625755956 +NODE_1679_length_5607_cov_221.78 2199948 5607 392.3574103798823 +NODE_167_length_95041_cov_224.08 37999664 95041 399.82390757673005 +NODE_1680_length_5604_cov_221.953 2199945 5604 392.5669164882227 +NODE_1681_length_5592_cov_222.429 2199971 5592 393.4139842632332 +NODE_1682_length_5591_cov_222.347 2199965 5591 393.4832766946879 +NODE_1683_length_5582_cov_222.757 2199937 5582 394.1126836259405 +NODE_1684_length_5582_cov_223.69 2207719 5582 395.5068075958438 +NODE_1685_length_5580_cov_218.513 2151217 5580 385.52275985663084 +NODE_1686_length_5576_cov_232.355 2232412 5576 400.36083213773315 +NODE_1687_length_5574_cov_222.762 2199915 5574 394.67438105489776 +NODE_1688_length_5570_cov_228.339 2197786 5570 394.57558348294435 +NODE_1689_length_5566_cov_223.296 2199916 5566 395.24182536830756 +NODE_168_length_94504_cov_223.548 37682829 94504 398.7432172183188 +NODE_1690_length_5562_cov_269.907 2294120 5562 412.4631427544049 +NODE_1691_length_5560_cov_223.372 2199949 5560 395.67428057553957 +NODE_1692_length_5531_cov_224.829 2199977 5531 397.75393238112457 +NODE_1693_length_5525_cov_219.79 2149923 5525 389.12633484162893 +NODE_1694_length_5520_cov_225.109 2199925 5520 398.53713768115944 +NODE_1695_length_5518_cov_225.245 2199976 5518 398.6908300108735 +NODE_1696_length_5512_cov_225.429 2199912 5512 399.1132075471698 +NODE_1697_length_5502_cov_236.158 2318636 5502 421.41693929480186 +NODE_1698_length_5488_cov_221.498 2149939 5488 391.7527332361516 +NODE_1699_length_5476_cov_221.589 2149922 5476 392.6081081081081 +NODE_169_length_94473_cov_225.282 37848493 94473 400.6276184730029 +NODE_16_length_457986_cov_223.76 182958044 457863 399.59124017446265 +NODE_1700_length_5456_cov_222.865 2149949 5456 394.05223607038124 +NODE_1701_length_5451_cov_223.058 2149963 5451 394.41625389836724 +NODE_1702_length_5450_cov_222.729 2149898 5450 394.4766972477064 +NODE_1703_length_5445_cov_222.996 2149978 5445 394.85362718089993 +NODE_1704_length_5439_cov_223.016 2148202 5439 394.96267696267694 +NODE_1705_length_5429_cov_223.791 2149923 5429 396.00718364339656 +NODE_1706_length_5407_cov_224.729 2148300 5407 397.3182911041243 +NODE_1707_length_5405_cov_224.714 2149938 5405 397.76836262719706 +NODE_1708_length_5401_cov_224.853 2149925 5401 398.06054434364006 +NODE_1709_length_5397_cov_219.707 2099933 5397 389.09264406151567 +NODE_170_length_94022_cov_223.957 37598819 94022 399.89384399395885 +NODE_1710_length_5387_cov_216.004 2057944 5377 382.73089083131856 +NODE_1711_length_5382_cov_225.891 2149913 5382 399.4635823114084 +NODE_1712_length_5360_cov_221.442 2099956 5360 391.7828358208955 +NODE_1713_length_5359_cov_221.586 2099891 5359 391.8438141444299 +NODE_1714_length_5355_cov_221.886 2100089 5355 392.1734827264239 +NODE_1715_length_5343_cov_222.035 2099904 5343 393.01965188096574 +NODE_1716_length_5341_cov_222.38 2099938 5341 393.17318854147163 +NODE_1717_length_5337_cov_222.487 2099968 5337 393.4734869777028 +NODE_1718_length_5331_cov_222.825 2099979 5331 393.9184018007878 +NODE_1719_length_5328_cov_233.273 2099195 5328 393.99305555555554 +NODE_171_length_93220_cov_223.906 37212070 93219 399.1897574528798 +NODE_1720_length_5318_cov_223.114 2099968 5289 397.04443183966725 +NODE_1721_length_5316_cov_223.46 2099952 5316 395.02483069977427 +NODE_1722_length_5314_cov_225.572 2111335 5314 397.3155814828754 +NODE_1723_length_5312_cov_223.089 2099974 5312 395.3264307228916 +NODE_1724_length_5303_cov_223.76 2099956 5303 395.99396567980386 +NODE_1725_length_5302_cov_223.839 2099946 5302 396.0667672576386 +NODE_1726_length_5290_cov_224.475 2099910 5290 396.95841209829865 +NODE_1727_length_5285_cov_221.37 2071661 5285 391.98883632923366 +NODE_1728_length_5269_cov_225.379 2099958 5269 398.549629910799 +NODE_1729_length_5264_cov_225.471 2099936 5264 398.9240121580547 +NODE_172_length_92910_cov_224.68 37176369 92910 400.13312883435583 +NODE_1730_length_5259_cov_225.295 2099955 5259 399.30690245293783 +NODE_1731_length_5258_cov_225.876 2099912 5258 399.37466717383035 +NODE_1732_length_5249_cov_222.677 2051717 5249 390.87769098875975 +NODE_1733_length_5238_cov_221.415 2049916 5238 391.35471554028254 +NODE_1734_length_5237_cov_211.435 1956947 5237 373.67710521290815 +NODE_1735_length_5236_cov_226.941 2070382 5236 395.412910618793 +NODE_1736_length_5236_cov_226.455 2099949 5236 401.0597784568373 +NODE_1737_length_5235_cov_221.313 2049878 5235 391.5717287488061 +NODE_1738_length_5233_cov_221.649 2049939 5233 391.73304032103954 +NODE_1739_length_5231_cov_221.637 2049999 5231 391.89428407570256 +NODE_173_length_92190_cov_224.104 36898853 92190 400.2478902267057 +NODE_1740_length_5229_cov_221.826 2049942 5229 392.0332759609868 +NODE_1741_length_5229_cov_221.615 2049923 5223 392.4799923415662 +NODE_1742_length_5224_cov_222.018 2049975 5224 392.41481623277184 +NODE_1743_length_5219_cov_217.398 1998003 5219 382.8325349683847 +NODE_1744_length_5212_cov_223.017 2050049 5212 393.33250191864926 +NODE_1745_length_5209_cov_222.333 2049948 5209 393.5396429257055 +NODE_1746_length_5201_cov_222.985 2049964 5201 394.14804845222073 +NODE_1747_length_5200_cov_223.048 2049938 5200 394.21884615384613 +NODE_1748_length_5199_cov_222.818 2049867 5199 394.2810155799192 +NODE_1749_length_5191_cov_223.273 2049979 5191 394.9102292429204 +NODE_174_length_91992_cov_222.095 36474335 91972 396.5808615665637 +NODE_1750_length_5177_cov_224.014 2049937 5177 395.97005988023955 +NODE_1751_length_5167_cov_224.385 2049954 5167 396.73969421327655 +NODE_1752_length_5154_cov_225.029 2049915 5154 397.73282887078 +NODE_1753_length_5152_cov_220.476 1974616 5152 383.2717391304348 +NODE_1754_length_5137_cov_225.677 2049944 5137 399.0547011874635 +NODE_1755_length_5134_cov_225.665 2049909 5134 399.281067393845 +NODE_1756_length_5132_cov_258.206 2049948 5132 399.44427123928295 +NODE_1757_length_5131_cov_813.073 6251803 5131 1218.4375365425842 +NODE_1758_length_5129_cov_225.843 2049939 5129 399.6761551959446 +NODE_1759_length_5124_cov_231.577 2036749 5124 397.49199843871975 +NODE_175_length_91487_cov_224.395 36628373 91487 400.36697017062534 +NODE_1760_length_5123_cov_220.63 1999914 5123 390.3794651571345 +NODE_1761_length_5118_cov_221.138 1999920 5118 390.7620164126612 +NODE_1762_length_5118_cov_220.884 1999994 5118 390.7764751856194 +NODE_1763_length_5103_cov_221.813 1999939 5103 391.9143640995493 +NODE_1764_length_5101_cov_228.314 2022982 5101 396.585375416585 +NODE_1765_length_4987_cov_218.589 1935258 4987 388.06055744936833 +NODE_1766_length_5096_cov_222.109 1999968 5096 392.458398744113 +NODE_1767_length_5095_cov_222.066 1999939 5095 392.5297350343474 +NODE_1768_length_5094_cov_222.181 1999776 5094 392.57479387514724 +NODE_1769_length_5091_cov_222.326 1999906 5091 392.8316637202907 +NODE_176_length_90355_cov_223.876 36099202 90355 399.5263350118975 +NODE_1770_length_5088_cov_222.434 1999976 5088 393.07704402515725 +NODE_1771_length_5087_cov_222.43 1999904 5087 393.14016119520346 +NODE_1772_length_5075_cov_223.043 1999950 5075 394.07881773399015 +NODE_1773_length_5072_cov_223.2 1986131 5072 391.5873422712934 +NODE_1774_length_5068_cov_223.45 1999441 5068 394.5226913970008 +NODE_1775_length_5060_cov_223.408 1999971 5060 395.25118577075096 +NODE_1776_length_5055_cov_223.932 1999917 5055 395.6314540059347 +NODE_1777_length_5046_cov_224.341 1999949 5046 396.34344034879115 +NODE_1778_length_5030_cov_265.015 1999939 5030 397.60218687872765 +NODE_1779_length_5025_cov_225.297 1999921 5025 397.9942288557214 +NODE_177_length_90155_cov_224.001 36098973 90155 400.41010481947757 +NODE_1780_length_5017_cov_325.079 2861979 5017 570.4562487542356 +NODE_1781_length_5015_cov_215.434 1908992 5005 381.416983016983 +NODE_1782_length_5006_cov_226.037 1999995 5006 399.5195765081902 +NODE_1783_length_5003_cov_226.043 1999922 5003 399.74455326803917 +NODE_1784_length_5002_cov_226.202 1999963 5002 399.83266693322673 +NODE_1785_length_4988_cov_221.327 1949962 4988 390.9306335204491 +NODE_1786_length_4965_cov_228.068 1981161 4965 399.02537764350456 +NODE_1787_length_4919_cov_314.663 2281225 4919 463.7578776174019 +NODE_1788_length_4916_cov_224.288 1949954 4916 396.6545972335232 +NODE_1789_length_4899_cov_225.049 1949910 4899 398.02204531537046 +NODE_178_length_89649_cov_224.08 35848945 89649 399.8811475866992 +NODE_1790_length_4892_cov_225.688 1949978 4892 398.6054783319706 +NODE_1791_length_4889_cov_221.151 1909753 4889 390.62241767232564 +NODE_1792_length_4876_cov_226.153 1949932 4876 399.90401968826905 +NODE_1793_length_4860_cov_221.042 1899993 4860 390.94506172839505 +NODE_1794_length_4853_cov_209.336 1794447 4843 370.5238488540161 +NODE_1795_length_4818_cov_223.384 1899942 4818 394.3424657534247 +NODE_1796_length_4816_cov_223.272 1899950 4816 394.50789036544853 +NODE_1797_length_4810_cov_223.454 1899957 4810 395.0014553014553 +NODE_1798_length_4810_cov_223.25 1899937 4810 394.9972972972973 +NODE_1799_length_4803_cov_223.788 1899971 4803 395.58005413283365 +NODE_179_length_89383_cov_223.979 35754482 89383 400.0143427721155 +NODE_17_length_457780_cov_225.875 183447389 457780 400.7326423172703 +NODE_1800_length_4782_cov_319.499 1899993 4781 397.4049362058147 +NODE_1801_length_4781_cov_225.052 1899942 4781 397.39426898138464 +NODE_1802_length_4777_cov_225.278 1899976 4777 397.7341427674273 +NODE_1803_length_4765_cov_225.519 1899931 4765 398.72633788037774 +NODE_1804_length_4744_cov_220.861 1849987 4744 389.9635328836425 +NODE_1805_length_4738_cov_220.968 1849997 4738 390.45947657239344 +NODE_1806_length_4726_cov_221.559 1849968 4726 391.4447735928904 +NODE_1807_length_4716_cov_220.917 1827383 4716 387.48579304495337 +NODE_1808_length_4699_cov_222.998 1849929 4699 393.68567780378805 +NODE_1809_length_4689_cov_209.014 1730440 4689 369.0424397526125 +NODE_180_length_89105_cov_224.008 35526855 89105 398.7077605072667 +NODE_1810_length_4671_cov_231.269 1849130 4671 395.8745450652965 +NODE_1811_length_4648_cov_225.487 1849916 4648 398.0025817555938 +NODE_1812_length_4648_cov_225.195 1849962 4648 398.01247848537 +NODE_1813_length_4623_cov_220.222 1799923 4623 389.3409041747783 +NODE_1814_length_4621_cov_225.659 1838621 4621 397.88379138714566 +NODE_1815_length_4613_cov_220.971 1799954 4613 390.1916323433774 +NODE_1816_length_4608_cov_221.34 1799940 4608 390.6119791666667 +NODE_1817_length_4584_cov_222.074 1799955 4584 392.66034031413614 +NODE_1818_length_4578_cov_225.301 1815487 4578 396.56771515945826 +NODE_1819_length_4574_cov_223.003 1799986 4574 393.5255793616091 +NODE_181_length_88191_cov_229.395 35355297 88191 400.8946150967786 +NODE_1820_length_4573_cov_245.489 1850652 4573 404.69101246446536 +NODE_1821_length_4566_cov_223.354 1799967 4566 394.21090670170827 +NODE_1822_length_4564_cov_228.349 1838016 4564 402.7204206836109 +NODE_1823_length_4531_cov_224.466 1799934 4531 397.24873096446703 +NODE_1824_length_4530_cov_225.173 1799937 4530 397.33708609271525 +NODE_1825_length_4524_cov_226.902 1799987 4524 397.87511052166224 +NODE_1826_length_4521_cov_225.426 1799934 4521 398.12740544127405 +NODE_1827_length_4514_cov_225.683 1799978 4514 398.7545414266726 +NODE_1828_length_4504_cov_226.101 1799966 4504 399.63721136767316 +NODE_1829_length_4489_cov_220.909 1749971 4489 389.835375361996 +NODE_182_length_88040_cov_222.686 34981344 88030 397.3798023401113 +NODE_1830_length_4482_cov_221.243 1749963 4482 390.44243641231594 +NODE_1831_length_4469_cov_225.113 1770405 4469 396.15238308346386 +NODE_1832_length_4468_cov_221.946 1749939 4468 391.6604744852283 +NODE_1833_length_4458_cov_222.521 1749918 4458 392.5343203230148 +NODE_1834_length_4447_cov_222.79 1749925 4447 393.5068585563301 +NODE_1835_length_4442_cov_223.607 1749937 4442 393.9524988743809 +NODE_1836_length_4441_cov_240.655 1853565 4441 417.3755910830894 +NODE_1837_length_4440_cov_223.221 1749926 4440 394.1274774774775 +NODE_1838_length_4432_cov_223.537 1749942 4432 394.8425090252708 +NODE_1839_length_4419_cov_224.345 1749955 4419 396.0070151618013 +NODE_183_length_87586_cov_223.459 34922001 87586 398.7167012992944 +NODE_1840_length_4414_cov_223.194 1749950 4414 396.4544630720435 +NODE_1841_length_4395_cov_212.338 1646205 4385 375.41733181299884 +NODE_1842_length_4394_cov_225.551 1749959 4394 398.2610377787893 +NODE_1843_length_4393_cov_222.891 1727088 4393 393.1454586842704 +NODE_1844_length_4390_cov_225.462 1749976 4390 398.6277904328018 +NODE_1845_length_4380_cov_226.436 1749977 4380 399.5381278538813 +NODE_1846_length_4378_cov_226.587 1749909 4378 399.70511649154867 +NODE_1847_length_4365_cov_210.771 1622736 4355 372.61446613088407 +NODE_1848_length_4342_cov_221.363 1699946 4342 391.5122063565177 +NODE_1849_length_4341_cov_221.765 1699989 4341 391.6123013130615 +NODE_184_length_87367_cov_224.405 34899119 87367 399.4542447377156 +NODE_1850_length_4340_cov_223.178 1699974 4340 391.69907834101383 +NODE_1851_length_4326_cov_222.383 1699978 4326 392.9676375404531 +NODE_1852_length_4323_cov_222.954 1699955 4323 393.23502197548 +NODE_1853_length_4317_cov_223.027 1699967 4317 393.7843409775307 +NODE_1854_length_4316_cov_223.36 1699910 4316 393.86237256719187 +NODE_1855_length_4303_cov_223.769 1699954 4303 395.06251452475016 +NODE_1856_length_4301_cov_223.696 1699834 4301 395.2183213206231 +NODE_1857_length_4300_cov_224.18 1699955 4300 395.3383720930233 +NODE_1858_length_4289_cov_224.781 1699993 4289 396.3611564467242 +NODE_1859_length_4289_cov_224.594 1699952 4289 396.3515971088832 +NODE_185_length_87290_cov_223.775 34854901 87290 399.300045824264 +NODE_1860_length_4288_cov_224.708 1699951 4288 396.44379664179104 +NODE_1861_length_4287_cov_1135.42 7077866 4287 1651.0067646372754 +NODE_1862_length_4285_cov_224.803 1699930 4285 396.7164527421237 +NODE_1863_length_4281_cov_225.964 1699965 4281 397.09530483531887 +NODE_1864_length_4278_cov_225.09 1699960 4278 397.37260402057035 +NODE_1865_length_4262_cov_670.121 5007496 4262 1174.9169404035665 +NODE_1866_length_4260_cov_226.325 1699993 4260 399.0593896713615 +NODE_1867_length_4246_cov_220.035 1649950 4246 388.5892604804522 +NODE_1868_length_4224_cov_221.036 1649976 4224 390.6193181818182 +NODE_1869_length_4222_cov_221.329 1649949 4222 390.79796305068686 +NODE_186_length_86785_cov_225.815 34723237 86785 400.10643544391314 +NODE_1870_length_4221_cov_224.884 1649816 4212 391.69420702754036 +NODE_1871_length_4221_cov_222.614 1655029 4221 392.09405354181473 +NODE_1872_length_4219_cov_221.838 1649917 4219 391.0682626214743 +NODE_1873_length_4194_cov_222.948 1649919 4194 393.39985693848354 +NODE_1874_length_4193_cov_223.227 1649939 4193 393.4984497972812 +NODE_1875_length_4186_cov_228.643 1691899 4186 404.1803631151457 +NODE_1876_length_4184_cov_231.186 1695249 4184 405.17423518164435 +NODE_1877_length_4183_cov_211.255 1557713 4183 372.391345923978 +NODE_1878_length_4181_cov_223.878 1649966 4181 394.63429801482897 +NODE_1879_length_4167_cov_224.62 1649927 4167 395.95080393568514 +NODE_187_length_86171_cov_223.156 34314918 86158 398.27895262192715 +NODE_1880_length_4155_cov_225.002 1649962 4155 397.1027677496992 +NODE_1881_length_4147_cov_225.091 1641258 4147 395.7699541837473 +NODE_1882_length_4146_cov_225.51 1649909 4146 397.95200192957066 +NODE_1883_length_4133_cov_226.542 1649976 4133 399.21993709170096 +NODE_1884_length_4127_cov_227.274 1649950 4127 399.7940392536952 +NODE_1885_length_4105_cov_220.924 1599949 4105 389.7561510353228 +NODE_1886_length_4086_cov_222.203 1600077 4086 391.59985315712186 +NODE_1887_length_4085_cov_222.274 1599947 4085 391.6638922888617 +NODE_1888_length_4064_cov_223.145 1599912 4064 393.67913385826773 +NODE_1889_length_4061_cov_2321.29 15420708 4061 3797.268653041123 +NODE_188_length_86162_cov_224.118 34348951 86162 398.6554513590678 +NODE_1890_length_4059_cov_223.411 1599986 4059 394.1823109140182 +NODE_1891_length_4055_cov_223.724 1599963 4055 394.5654747225647 +NODE_1892_length_4054_cov_228.451 1612083 4054 397.65244203256043 +NODE_1893_length_4053_cov_223.793 1599913 4053 394.74784110535404 +NODE_1894_length_4051_cov_224.067 1599943 4051 394.9501357689459 +NODE_1895_length_4044_cov_217.309 1549951 4044 383.2717606330366 +NODE_1896_length_4039_cov_448.289 3158817 4039 782.078979945531 +NODE_1897_length_4034_cov_234.893 1600096 4022 397.8359025360517 +NODE_1898_length_4032_cov_224.951 1599946 4032 396.812003968254 +NODE_1899_length_4029_cov_225.168 1599932 4029 397.10399602879124 +NODE_189_length_85746_cov_224.26 34248707 85746 399.42046276211136 +NODE_18_length_457640_cov_224.09 182802842 457640 399.4468184599248 +NODE_1900_length_4027_cov_225.041 1599934 4027 397.30171343431834 +NODE_1901_length_4015_cov_225.824 1599952 4015 398.4936488169365 +NODE_1902_length_4014_cov_233.932 1642787 4014 409.26432486297955 +NODE_1903_length_4007_cov_226.677 1599970 4007 399.29373596206636 +NODE_1904_length_3973_cov_221.507 1549933 3973 390.1165366221999 +NODE_1905_length_3973_cov_221.413 1549905 3973 390.1094890510949 +NODE_1906_length_3970_cov_221.557 1549971 3970 390.42090680100756 +NODE_1907_length_3960_cov_222.033 1549971 3960 391.4068181818182 +NODE_1908_length_3952_cov_222.624 1549977 3952 392.2006578947368 +NODE_1909_length_3949_cov_222.685 1549959 3949 392.4940491263611 +NODE_190_length_85521_cov_221.678 33799837 85481 395.40759934956304 +NODE_1910_length_3945_cov_222.615 1549986 3943 393.0981486178037 +NODE_1911_length_3933_cov_223.732 1549971 3933 394.09382151029746 +NODE_1912_length_3918_cov_241.834 1577677 3918 402.67406840224606 +NODE_1913_length_3910_cov_224.811 1549979 3910 396.4140664961637 +NODE_1914_length_3900_cov_225.378 1549970 3900 397.42820512820515 +NODE_1915_length_3888_cov_226.358 1549974 3888 398.6558641975309 +NODE_1916_length_3887_cov_226.126 1549957 3887 398.7540519680988 +NODE_1917_length_3881_cov_226.788 1549967 3881 399.3730997165679 +NODE_1918_length_3871_cov_227.971 1550271 3871 400.483337638853 +NODE_1919_length_3864_cov_220.485 1499961 3864 388.18866459627327 +NODE_191_length_85241_cov_228.275 34129420 85241 400.3873722739057 +NODE_1920_length_3856_cov_228.536 1521925 3856 394.69009336099583 +NODE_1921_length_3856_cov_225.433 1496086 3856 387.98910788381744 +NODE_1922_length_3855_cov_220.559 1499944 3855 389.0905317769131 +NODE_1923_length_3840_cov_229.026 1549949 3840 403.63255208333334 +NODE_1924_length_3784_cov_224.757 1499941 3784 396.39032769556025 +NODE_1925_length_3780_cov_225.394 1499979 3780 396.8198412698413 +NODE_1926_length_3778_cov_215.947 1428216 3778 378.03493912122815 +NODE_1927_length_3760_cov_226.347 1499944 3760 398.9212765957447 +NODE_1928_length_3746_cov_219.373 1449970 3746 387.07154297917776 +NODE_1929_length_3745_cov_221.327 1449928 3745 387.16368491321765 +NODE_192_length_84999_cov_223.814 33958656 84999 399.5183002152967 +NODE_1930_length_3741_cov_226.598 1459713 3741 390.1932638331997 +NODE_1931_length_3736_cov_220.358 1449948 3736 388.1017130620985 +NODE_1932_length_3731_cov_220.415 1449832 3731 388.5907263468239 +NODE_1933_length_3725_cov_221.176 1449958 3725 389.2504697986577 +NODE_1934_length_3706_cov_222.332 1449969 3706 391.24905558553695 +NODE_1935_length_3700_cov_459.514 2904682 3700 785.0491891891892 +NODE_1936_length_3680_cov_223.949 1449974 3680 394.01467391304345 +NODE_1937_length_3668_cov_224.66 1449970 3668 395.3026172300981 +NODE_1938_length_3665_cov_224.553 1449977 3665 395.6281036834925 +NODE_1939_length_3657_cov_225.37 1449959 3657 396.48865190046484 +NODE_193_length_84292_cov_226.109 33659938 84292 399.3254164096237 +NODE_1940_length_3646_cov_225.495 1449974 3646 397.68897421832145 +NODE_1941_length_3638_cov_227.448 1447768 3638 397.95711929631665 +NODE_1942_length_3627_cov_226.92 1449931 3627 399.76040805073063 +NODE_1943_length_3626_cov_227.074 1449966 3626 399.88030888030886 +NODE_1944_length_3623_cov_219.614 1399974 3623 386.4129174717085 +NODE_1945_length_3617_cov_219.978 1399994 3617 387.05944152612665 +NODE_1946_length_3609_cov_220.514 1399995 3609 387.91770573566083 +NODE_1947_length_3607_cov_220.357 1399944 3607 388.1186581646798 +NODE_1948_length_3603_cov_229.411 1449580 3603 402.3258395781293 +NODE_1949_length_3598_cov_221.14 1399969 3598 389.0964424680378 +NODE_194_length_84272_cov_224.22 33699147 84263 399.92816538694325 +NODE_1950_length_3594_cov_221.804 1389038 3594 386.48803561491377 +NODE_1951_length_3592_cov_221.239 1399959 3592 389.7435968819599 +NODE_1952_length_3590_cov_221.169 1399931 3590 389.95292479108633 +NODE_1953_length_3590_cov_221.402 1399931 3590 389.95292479108633 +NODE_1954_length_3583_cov_222.135 1399937 3583 390.7164387384873 +NODE_1955_length_3583_cov_222.151 1399903 3583 390.7069494836729 +NODE_1956_length_3582_cov_226.004 1410869 3582 393.8774427694026 +NODE_1957_length_3577_cov_222.302 1399964 3577 391.3793681856304 +NODE_1958_length_3571_cov_302.572 1388242 3571 388.7544105292635 +NODE_1959_length_3565_cov_229.023 1434716 3565 402.4448807854138 +NODE_195_length_83962_cov_223.135 33451657 83952 398.46170430722316 +NODE_1960_length_3559_cov_223.12 1399949 3559 393.354593987075 +NODE_1961_length_3550_cov_223.932 1399983 3550 394.36140845070423 +NODE_1962_length_3549_cov_222.212 1399914 3549 394.4530853761623 +NODE_1963_length_3533_cov_225.359 1399977 3533 396.2572884234362 +NODE_1964_length_3530_cov_225.86 1396245 3530 395.5368271954674 +NODE_1965_length_3522_cov_218.751 1348077 3522 382.7589437819421 +NODE_1966_length_3520_cov_225.854 1399975 3520 397.72017045454544 +NODE_1967_length_3519_cov_226.223 1399979 3519 397.8343279340722 +NODE_1968_length_3518_cov_226.357 1399942 3518 397.9368959636157 +NODE_1969_length_3516_cov_226.138 1399992 3516 398.17747440273035 +NODE_196_length_83917_cov_223.822 33548929 83917 399.787039574818 +NODE_1970_length_3513_cov_226.662 1399970 3510 398.85185185185185 +NODE_1971_length_3510_cov_226.641 1399964 3510 398.85014245014247 +NODE_1972_length_3507_cov_226.793 1399967 3507 399.1921870544625 +NODE_1973_length_3504_cov_253.388 1546657 3504 441.39754566210047 +NODE_1974_length_3502_cov_286.598 1849135 3502 528.0225585379783 +NODE_1975_length_3500_cov_227.453 1399971 3500 399.9917142857143 +NODE_1976_length_3495_cov_219.465 1349968 3495 386.2569384835479 +NODE_1977_length_3492_cov_221.93 1348422 3492 386.14604810996565 +NODE_1978_length_3479_cov_319.666 1197086 3478 344.1880391029327 +NODE_1979_length_3477_cov_227.364 1349097 3426 393.78196147110333 +NODE_197_length_83879_cov_224.283 33551210 83879 399.99535044528426 +NODE_1980_length_3475_cov_220.896 1349994 3475 388.4874820143885 +NODE_1981_length_3468_cov_221.41 1349981 3468 389.267877739331 +NODE_1982_length_3464_cov_221.583 1349983 3464 389.71795612009237 +NODE_1983_length_3459_cov_221.949 1349980 3459 390.2804278693264 +NODE_1984_length_3451_cov_222.273 1349959 3451 391.17907852796293 +NODE_1985_length_3448_cov_222.278 1349967 3448 391.5217517401392 +NODE_1986_length_3442_cov_561.127 3395018 3442 986.3503776873911 +NODE_1987_length_3434_cov_223.326 1341337 3434 390.60483401281306 +NODE_1988_length_3432_cov_223.062 1349911 3432 393.33071095571097 +NODE_1989_length_3428_cov_224.648 1349614 3428 393.70303383897317 +NODE_198_length_83690_cov_222.942 33142032 83690 396.0094634962361 +NODE_1990_length_3422_cov_206.411 1241480 3412 363.8569753810082 +NODE_1991_length_3405_cov_225.335 1349967 3405 396.4660792951542 +NODE_1992_length_3403_cov_225.757 1349961 3403 396.6973258889215 +NODE_1993_length_3393_cov_226.976 1349872 3393 397.8402593575007 +NODE_1994_length_3393_cov_219.31 1299940 3320 391.54819277108436 +NODE_1995_length_3388_cov_226.697 1349954 3388 398.4515938606848 +NODE_1996_length_3368_cov_229.394 1299974 3368 385.97802850356294 +NODE_1997_length_3355_cov_220.172 1299986 3355 387.47719821162445 +NODE_1998_length_3351_cov_220.743 1299918 3351 387.9194270367055 +NODE_1999_length_3347_cov_220.883 1300000 3347 388.4075291305647 +NODE_199_length_82125_cov_224.071 32849091 82125 399.9889315068493 +NODE_19_length_451442_cov_224.264 180630595 451442 400.1191625945304 +NODE_1_length_1389215_cov_225.275 556562674 1389164 400.6457653667961 +NODE_2000_length_3342_cov_217.041 1270339 3342 380.1134051466188 +NODE_2001_length_3339_cov_217.702 1265698 3339 379.0649895178197 +NODE_2002_length_3337_cov_222.125 1299981 3337 389.5657776445909 +NODE_2003_length_3334_cov_226.913 1340059 3334 401.9373125374925 +NODE_2004_length_3334_cov_221.928 1299945 3334 389.90551889622077 +NODE_2005_length_3333_cov_228.178 1318793 3333 395.67746774677465 +NODE_2006_length_3331_cov_230.336 1349973 3331 405.27559291504053 +NODE_2007_length_3330_cov_225.654 1320845 3330 396.6501501501501 +NODE_2008_length_3324_cov_222.365 1299953 3324 391.08092659446453 +NODE_2009_length_3316_cov_222.811 1299979 3316 392.0322677925211 +NODE_200_length_82014_cov_225.527 32897656 82014 401.1224425098154 +NODE_2010_length_3312_cov_223.168 1299987 3312 392.50815217391306 +NODE_2011_length_3308_cov_223.4 1299968 3308 392.9770253929867 +NODE_2012_length_3306_cov_259.142 1299986 3306 393.2202056866304 +NODE_2013_length_3305_cov_223.519 1299966 3305 393.33313161875947 +NODE_2014_length_3286_cov_224.56 1299972 3286 395.60925136944616 +NODE_2015_length_3277_cov_225.34 1299990 3277 396.7012511443393 +NODE_2016_length_3268_cov_226.536 1299938 3268 397.7778457772338 +NODE_2017_length_3248_cov_218.939 1249950 3248 384.8368226600985 +NODE_2018_length_3244_cov_219.449 1249977 3244 385.31966707768186 +NODE_2019_length_3242_cov_219.446 1249945 3242 385.54750154225786 +NODE_201_length_81885_cov_225.664 32747189 81885 399.91682237283993 +NODE_2020_length_3234_cov_219.87 1249982 3234 386.5126777983921 +NODE_2021_length_3228_cov_220.319 1249953 3228 387.22211895910783 +NODE_2022_length_3226_cov_220.375 1249993 3226 387.4745815251085 +NODE_2023_length_3206_cov_222.073 1249962 3206 389.882096069869 +NODE_2024_length_3206_cov_362.764 2044244 3206 637.6306924516532 +NODE_2025_length_3203_cov_222.021 1249963 3203 390.2475803933812 +NODE_2026_length_3202_cov_222.335 1249972 3202 390.3722673329169 +NODE_2027_length_3199_cov_222.422 1249958 3199 390.7339793685527 +NODE_2028_length_3188_cov_223.36 1249978 3188 392.0884567126725 +NODE_2029_length_3183_cov_223.364 1249977 3183 392.70405278039584 +NODE_202_length_81266_cov_224.287 32499172 81266 399.91105751482786 +NODE_2030_length_3179_cov_207.543 1158593 3179 364.4520289399182 +NODE_2031_length_3176_cov_224.625 1226319 3176 386.1205919395466 +NODE_2032_length_3168_cov_936.648 4993847 3168 1576.3405934343434 +NODE_2033_length_3155_cov_225.608 1249972 3155 396.1876386687797 +NODE_2034_length_3152_cov_207.356 1147359 3142 365.1683640992998 +NODE_2035_length_3145_cov_226.226 1249947 3145 397.4394276629571 +NODE_2036_length_3144_cov_226.531 1249966 3144 397.57188295165395 +NODE_2037_length_3142_cov_226.468 1249995 3142 397.8341820496499 +NODE_2038_length_3139_cov_228.144 1241305 3139 395.4460019114368 +NODE_2039_length_3129_cov_201.924 1109230 3113 356.3218760038548 +NODE_203_length_81203_cov_223.854 32449216 81203 399.60612292649284 +NODE_2040_length_3119_cov_228.384 1242690 3119 398.42577749278615 +NODE_2041_length_3110_cov_219.77 1199953 3110 385.8369774919614 +NODE_2042_length_3103_cov_233.061 1261213 3103 406.4495649371576 +NODE_2043_length_3085_cov_219.547 1178406 3085 381.9792544570502 +NODE_2044_length_3080_cov_221.826 1199952 3080 389.5948051948052 +NODE_2045_length_3078_cov_223.065 1191087 3078 386.96783625730995 +NODE_2046_length_3074_cov_222.186 1199980 3074 390.3643461288224 +NODE_2047_length_3072_cov_222.687 1199971 3072 390.6155598958333 +NODE_2048_length_3072_cov_225.408 1210074 3072 393.904296875 +NODE_2049_length_3068_cov_222.83 1199965 3068 391.1228813559322 +NODE_204_length_79787_cov_224.315 31948919 79787 400.42762605436974 +NODE_2050_length_3046_cov_224.576 1199989 3046 393.95567957977676 +NODE_2051_length_3046_cov_224.232 1199972 3046 393.9500984898227 +NODE_2052_length_3044_cov_224.729 1199942 3044 394.19908015768726 +NODE_2053_length_3036_cov_225.316 1199975 3036 395.2486824769434 +NODE_2054_length_3034_cov_230.157 1220490 3034 402.2709294660514 +NODE_2055_length_3023_cov_226.367 1199937 3023 396.93582533906715 +NODE_2056_length_3017_cov_217.338 1149984 3017 381.16804772953265 +NODE_2057_length_3015_cov_226.655 1199986 3015 398.00530679933667 +NODE_2058_length_3011_cov_227.199 1199932 3011 398.5161076054467 +NODE_2059_length_3009_cov_227.189 1199982 3009 398.7976071784646 +NODE_205_length_79593_cov_223.498 31746575 79593 398.86139484628046 +NODE_2060_length_2998_cov_228.238 1199953 2998 400.2511674449633 +NODE_2061_length_2995_cov_221.196 1147978 2995 383.29816360601 +NODE_2062_length_2990_cov_219.408 1149982 2990 384.60936454849497 +NODE_2063_length_2989_cov_219.271 1149960 2989 384.73067915690865 +NODE_2064_length_2986_cov_218.655 1149953 2986 385.11486939048893 +NODE_2065_length_2977_cov_235.142 1219277 2977 409.5656701377225 +NODE_2066_length_2975_cov_220.393 1149955 2975 386.5394957983193 +NODE_2067_length_2958_cov_221.493 1149988 2958 388.77214334009466 +NODE_2068_length_2949_cov_222.391 1149962 2949 389.9498134961004 +NODE_2069_length_2944_cov_222.548 1149970 2944 390.6148097826087 +NODE_206_length_79531_cov_224.062 31799143 79531 399.8333102815254 +NODE_2070_length_2939_cov_223.219 1149965 2939 391.27764545763864 +NODE_2071_length_2938_cov_223.251 1149908 2938 391.39142273655546 +NODE_2072_length_2934_cov_223.606 1149989 2934 391.95262440354463 +NODE_2073_length_2928_cov_223.85 1149966 2928 392.74795081967216 +NODE_2074_length_2926_cov_224.043 1149999 2926 393.0276828434723 +NODE_2075_length_2924_cov_224.425 1149980 2924 393.29001367989054 +NODE_2076_length_2922_cov_224.274 1149959 2922 393.5520191649555 +NODE_2077_length_2918_cov_310.004 1139598 2918 390.5407813570939 +NODE_2078_length_2917_cov_584.535 2377206 2917 814.9489201234145 +NODE_2079_length_2906_cov_225.562 1149986 2906 395.7281486579491 +NODE_207_length_79221_cov_223.477 31492186 79221 397.52320723040606 +NODE_2080_length_2903_cov_228.036 1155702 2903 398.1060971408887 +NODE_2081_length_2900_cov_226.166 1149979 2900 396.5444827586207 +NODE_2082_length_2890_cov_226.769 1150000 2890 397.92387543252596 +NODE_2083_length_2886_cov_227.152 1149961 2886 398.46188496188495 +NODE_2084_length_2879_cov_227.965 1149951 2879 399.4272316776659 +NODE_2085_length_2878_cov_227.702 1149994 2878 399.5809589993051 +NODE_2086_length_2877_cov_228.045 1149969 2877 399.711157455683 +NODE_2087_length_2877_cov_227.744 1149973 2877 399.71254779283976 +NODE_2088_length_2876_cov_228.202 1149948 2876 399.8428372739917 +NODE_2089_length_2867_cov_219.032 1099976 2867 383.6679455877224 +NODE_208_length_78965_cov_223.445 31549136 78965 399.5331602608751 +NODE_2090_length_2866_cov_219.073 1099970 2866 383.7997208653175 +NODE_2091_length_2854_cov_220.038 1099941 2854 385.4032936229853 +NODE_2092_length_2854_cov_219.41 1099952 2854 385.4071478626489 +NODE_2093_length_2849_cov_220.304 1099987 2849 386.0958230958231 +NODE_2094_length_2845_cov_243.083 1178743 2845 414.32091388400704 +NODE_2095_length_2842_cov_235.275 1178950 2842 414.8311048557354 +NODE_2096_length_2835_cov_221.515 1099977 2835 387.9989417989418 +NODE_2097_length_2832_cov_221.753 1099964 2832 388.4053672316384 +NODE_2098_length_2829_cov_220.863 1087507 2829 384.4139271827501 +NODE_2099_length_2828_cov_221.566 1099962 2828 388.95403111739745 +NODE_209_length_78896_cov_225.682 31599691 78896 400.52335986615293 +NODE_20_length_436718_cov_222.09 173231157 436634 396.74225323726506 +NODE_2100_length_2826_cov_222.22 1099947 2826 389.223991507431 +NODE_2101_length_2811_cov_223.427 1099935 2811 391.2966915688367 +NODE_2102_length_2805_cov_223.951 1099999 2805 392.1565062388592 +NODE_2103_length_2802_cov_213.077 1038161 2802 370.50713775874374 +NODE_2104_length_2785_cov_234.778 1106660 2785 397.36445242369837 +NODE_2105_length_2783_cov_225.74 1099984 2783 395.2511678045275 +NODE_2106_length_2783_cov_228.547 1081475 2783 388.600431189364 +NODE_2107_length_2779_cov_226.055 1099964 2779 395.812882331774 +NODE_2108_length_2777_cov_225.909 1099961 2777 396.0968671227944 +NODE_2109_length_2768_cov_223.252 1027322 2768 371.14234104046244 +NODE_210_length_78825_cov_223.98 31499184 78825 399.60905803996195 +NODE_2110_length_2767_cov_227.035 1099985 2767 397.5370437296711 +NODE_2111_length_2767_cov_226.808 1099976 2767 397.5337911095049 +NODE_2112_length_2759_cov_223.68 1066137 2759 386.42152953968827 +NODE_2113_length_2757_cov_227.898 1099955 2757 398.96808124773304 +NODE_2114_length_2756_cov_227.984 1099976 2756 399.12046444121916 +NODE_2115_length_2755_cov_227.865 1099935 2755 399.2504537205082 +NODE_2116_length_2749_cov_228.533 1099954 2749 400.1287740996726 +NODE_2117_length_2742_cov_247.338 998858 2742 364.2808169219548 +NODE_2118_length_2732_cov_219.589 1049988 2732 384.3294289897511 +NODE_2119_length_2717_cov_220.841 1049962 2717 386.44166359955835 +NODE_211_length_78236_cov_223.693 31249074 78236 399.42065033999694 +NODE_2120_length_2715_cov_220.936 1049981 2715 386.73333333333335 +NODE_2121_length_2704_cov_220.327 1036141 2704 383.1882396449704 +NODE_2122_length_2701_cov_222.172 1049970 2701 388.73380229544614 +NODE_2123_length_2699_cov_222.262 1049968 2699 389.0211189329381 +NODE_2124_length_2699_cov_221.986 1049977 2699 389.02445350129676 +NODE_2125_length_2694_cov_222.734 1049941 2694 389.7331106161841 +NODE_2126_length_2692_cov_222.451 1049975 2692 390.0352897473997 +NODE_2127_length_2691_cov_222.956 1049953 2691 390.17205499814196 +NODE_2128_length_2684_cov_226.957 1067253 2684 397.63524590163934 +NODE_2129_length_2680_cov_223.887 1049973 2680 391.78097014925373 +NODE_212_length_78088_cov_223.989 31196551 78088 399.5050583956562 +NODE_2130_length_2674_cov_224.052 1049963 2674 392.6563201196709 +NODE_2131_length_2669_cov_224.785 1049984 2669 393.3997751967029 +NODE_2132_length_2665_cov_225.235 1049996 2665 393.99474671669793 +NODE_2133_length_2660_cov_225.197 1049990 2660 394.73308270676694 +NODE_2134_length_2659_cov_223.354 1022572 2659 384.57013915005643 +NODE_2135_length_2658_cov_235.823 1041281 2658 391.7535741158766 +NODE_2136_length_2657_cov_225.57 1049981 2657 395.1753857734287 +NODE_2137_length_2657_cov_473.069 2044452 2657 769.4587881068875 +NODE_2138_length_2656_cov_233.187 1081265 2656 407.1027861445783 +NODE_2139_length_2652_cov_226.371 1049927 2652 395.90007541478127 +NODE_213_length_77417_cov_223.95 30957996 77417 399.88627820762883 +NODE_2140_length_2648_cov_226.565 1049961 2648 396.5109516616314 +NODE_2141_length_2647_cov_226.664 1049978 2647 396.66717038156406 +NODE_2142_length_2645_cov_226.365 1049953 2645 396.9576559546314 +NODE_2143_length_2636_cov_179.412 827223 2636 313.81752655538696 +NODE_2144_length_2636_cov_227.786 1049972 2636 398.32018209408193 +NODE_2145_length_2633_cov_228.028 1049938 2633 398.7611090011394 +NODE_2146_length_2632_cov_227.674 1049961 2632 398.9213525835866 +NODE_2147_length_2627_cov_197.274 906465 2617 346.37562094000765 +NODE_2148_length_2609_cov_219.146 999974 2609 383.27865082407055 +NODE_2149_length_2599_cov_219.758 999972 2599 384.75259715275104 +NODE_214_length_76544_cov_224.961 30648567 76544 400.40456469481603 +NODE_2150_length_2588_cov_220.722 999994 2588 386.3964451313756 +NODE_2151_length_2585_cov_221.141 999968 2585 386.8348162475822 +NODE_2152_length_2576_cov_222.003 999990 2576 388.1948757763975 +NODE_2153_length_2576_cov_221.806 999937 2576 388.174301242236 +NODE_2154_length_2574_cov_221.486 999941 2574 388.477466977467 +NODE_2155_length_2552_cov_224.196 999979 2552 391.8413009404389 +NODE_2156_length_2551_cov_224.26 999981 2551 391.99568796550375 +NODE_2157_length_2549_cov_224.494 999999 2549 392.31031777167516 +NODE_2158_length_2536_cov_225.57 999966 2536 394.30835962145113 +NODE_2159_length_2422_cov_222.719 925411 2422 382.0854665565648 +NODE_215_length_75764_cov_224.153 30299046 75764 399.9134945356634 +NODE_2160_length_2532_cov_225.7 999999 2532 394.9443127962085 +NODE_2161_length_2531_cov_226.129 999995 2531 395.09877518767286 +NODE_2162_length_2526_cov_226.304 999955 2526 395.8650039588282 +NODE_2163_length_2508_cov_227.6 999990 2508 398.7200956937799 +NODE_2164_length_2506_cov_2720.34 11667904 2506 4655.987230646449 +NODE_2165_length_2504_cov_304.981 932659 2504 372.4676517571885 +NODE_2166_length_2494_cov_218.027 949954 2494 380.89574979951885 +NODE_2167_length_2494_cov_256.018 972262 2494 389.84041700080195 +NODE_2168_length_2489_cov_200.094 869944 2479 350.92537313432837 +NODE_2169_length_2487_cov_218.403 949985 2487 381.9802975472457 +NODE_216_length_75598_cov_224.638 30291666 75598 400.6940130691288 +NODE_2170_length_2482_cov_219.148 949967 2482 382.7425463336019 +NODE_2171_length_2479_cov_261.289 949985 2477 383.5224061364554 +NODE_2172_length_2476_cov_444.017 1891455 2476 763.9155896607431 +NODE_2173_length_2475_cov_234.076 949326 2475 383.5660606060606 +NODE_2174_length_2471_cov_220.096 949962 2471 384.4443545123432 +NODE_2175_length_2469_cov_220.303 949997 2469 384.7699473471041 +NODE_2176_length_2469_cov_219.611 949986 2469 384.7654921020656 +NODE_2177_length_2464_cov_220.5 949980 2464 385.54383116883116 +NODE_2178_length_2453_cov_227.472 969822 2453 395.3615980432124 +NODE_2179_length_2443_cov_222.725 949986 2443 388.86041751944333 +NODE_217_length_75277_cov_222.727 29923246 75277 397.5084820064562 +NODE_2180_length_2443_cov_222.673 949966 2443 388.85223086369217 +NODE_2181_length_2437_cov_223.254 949940 2437 389.798933114485 +NODE_2182_length_2434_cov_224.03 958766 2434 393.9055053410025 +NODE_2183_length_2425_cov_224.4 949975 2425 391.74226804123714 +NODE_2184_length_2421_cov_224.496 949926 2421 392.36926889714994 +NODE_2185_length_2416_cov_225.258 949988 2416 393.2069536423841 +NODE_2186_length_2414_cov_212.797 888115 2413 368.0542892664733 +NODE_2187_length_2410_cov_225.755 949994 2410 394.1883817427386 +NODE_2188_length_2400_cov_234.174 949744 2400 395.7266666666667 +NODE_2189_length_2400_cov_226.766 949987 2400 395.8279166666667 +NODE_218_length_75076_cov_222.777 29853954 75056 397.7557290556385 +NODE_2190_length_2390_cov_215.59 852580 2390 356.72803347280336 +NODE_2191_length_2388_cov_201.26 838965 2378 352.8027754415475 +NODE_2192_length_2386_cov_228.138 949955 2386 398.1370494551551 +NODE_2193_length_2385_cov_228.204 949977 2385 398.31320754716984 +NODE_2194_length_2375_cov_229.034 949968 2375 399.9865263157895 +NODE_2195_length_2365_cov_214.261 870767 2365 368.18900634249474 +NODE_2196_length_2362_cov_218.334 899981 2361 381.18636171113934 +NODE_2197_length_2357_cov_218.868 899984 2357 381.8345354263895 +NODE_2198_length_2355_cov_219.086 899970 2355 382.1528662420382 +NODE_2199_length_2353_cov_198.716 815750 2353 346.68508287292815 +NODE_219_length_74917_cov_223.282 29883328 74907 398.9390577649619 +NODE_21_length_435208_cov_224.27 173625849 435208 398.94912088013086 +NODE_2200_length_2349_cov_219.462 899990 2349 383.13750532141336 +NODE_2201_length_2349_cov_219.322 899951 2349 383.12090251170713 +NODE_2202_length_2342_cov_296.31 1067914 2327 458.9230769230769 +NODE_2203_length_2332_cov_221.251 899964 2332 385.91938250428814 +NODE_2204_length_2329_cov_221.012 899971 2329 386.4194933447832 +NODE_2205_length_2329_cov_221.146 899986 2329 386.4259338772005 +NODE_2206_length_2324_cov_221.904 899941 2324 387.2379518072289 +NODE_2207_length_2322_cov_222.251 899978 2322 387.5874246339363 +NODE_2208_length_2318_cov_222.341 899969 2318 388.25237273511647 +NODE_2209_length_2316_cov_222.371 899993 2316 388.59801381692574 +NODE_220_length_74406_cov_223.867 29699164 74406 399.15012230196487 +NODE_2210_length_2311_cov_223.293 899995 2311 389.4396365209866 +NODE_2211_length_2309_cov_223.071 899984 2309 389.77219575573844 +NODE_2212_length_2196_cov_229.232 859284 2196 391.2950819672131 +NODE_2213_length_2300_cov_224.037 899964 2300 391.2886956521739 +NODE_2214_length_2297_cov_224.712 899982 2297 391.8075750979539 +NODE_2215_length_2296_cov_224.788 899993 2296 391.98301393728224 +NODE_2216_length_2294_cov_224.782 899993 2294 392.32476024411505 +NODE_2217_length_2294_cov_664.163 2423153 2294 1056.3003487358326 +NODE_2218_length_2290_cov_561.215 2082938 2290 909.5799126637554 +NODE_2219_length_2285_cov_225.794 899956 2285 393.853829321663 +NODE_221_length_74348_cov_220.843 29308987 74318 394.3726553459458 +NODE_2220_length_2277_cov_226.278 899995 2277 395.25472112428633 +NODE_2221_length_2276_cov_233.594 915633 2276 402.29920913884007 +NODE_2222_length_2273_cov_227.137 899989 2273 395.9476462824461 +NODE_2223_length_2269_cov_220.058 849125 2269 374.228735125606 +NODE_2224_length_2263_cov_228.231 899964 2263 397.68625718073355 +NODE_2225_length_2263_cov_227.517 899977 2263 397.69200176756516 +NODE_2226_length_2257_cov_228.792 899987 2257 398.7536552946389 +NODE_2227_length_2239_cov_217.55 849986 2239 379.62751228226887 +NODE_2228_length_2239_cov_217.584 849990 2239 379.6292987941045 +NODE_2229_length_2238_cov_217.95 849968 2238 379.7890974084004 +NODE_222_length_74334_cov_225.095 29598297 74324 398.2333701092514 +NODE_2230_length_2238_cov_217.985 849958 2238 379.7846291331546 +NODE_2231_length_2238_cov_217.563 849969 2238 379.78954423592495 +NODE_2232_length_2237_cov_217.707 850000 2237 379.9731783638802 +NODE_2233_length_2235_cov_449.159 1754298 2235 784.9208053691275 +NODE_2234_length_2233_cov_218.486 849999 2233 380.653381101657 +NODE_2235_length_2233_cov_218.518 850000 2232 380.82437275985666 +NODE_2236_length_2228_cov_218.996 849995 2228 381.5058348294435 +NODE_2237_length_2221_cov_219.262 849974 2221 382.6987843313823 +NODE_2238_length_2217_cov_220.027 849968 2217 383.3865584122688 +NODE_2239_length_2216_cov_220.122 849966 2183 389.3568483737975 +NODE_223_length_74031_cov_223.64 29530497 74031 398.893666166876 +NODE_2240_length_2208_cov_221.024 849986 2208 384.9574275362319 +NODE_2241_length_2207_cov_224.484 849979 2207 385.1286814680562 +NODE_2242_length_2205_cov_193.647 743875 2205 337.3582766439909 +NODE_2243_length_2190_cov_222.154 849908 2190 388.08584474885845 +NODE_2244_length_2185_cov_223.108 849994 2185 389.0132723112128 +NODE_2245_length_2183_cov_223.011 849969 2183 389.35822262940906 +NODE_2246_length_2180_cov_223.91 849994 2180 389.905504587156 +NODE_2247_length_2176_cov_233.375 869517 2176 399.59420955882354 +NODE_2248_length_2175_cov_224.263 849973 2175 390.79218390804596 +NODE_2249_length_2174_cov_224.521 849951 2174 390.9618215271389 +NODE_224_length_73882_cov_223.991 29549117 73882 399.9501502395712 +NODE_2250_length_2172_cov_1470.22 5414540 2172 2492.8821362799263 +NODE_2251_length_2169_cov_225.033 849939 2169 391.8575380359613 +NODE_2252_length_2167_cov_225.28 849982 2167 392.2390401476696 +NODE_2253_length_2161_cov_225.697 849977 2161 393.32577510411846 +NODE_2254_length_2155_cov_226.153 850000 2155 394.43155452436196 +NODE_2255_length_2151_cov_226.962 849980 2151 395.15574151557416 +NODE_2256_length_2146_cov_227.38 849977 2146 396.0750232991612 +NODE_2257_length_2141_cov_297.385 868109 2141 405.4689397477814 +NODE_2258_length_2139_cov_228.335 849983 2139 397.3740065451145 +NODE_2259_length_2137_cov_228.831 857875 2137 401.43893308376227 +NODE_225_length_73875_cov_224.087 29544688 73875 399.9280947546531 +NODE_2260_length_2127_cov_229.417 849947 2127 399.5989656793606 +NODE_2261_length_2125_cov_229.916 849985 2125 399.9929411764706 +NODE_2262_length_2113_cov_228.983 807273 2113 382.050638902035 +NODE_2263_length_2113_cov_217.651 799999 2113 378.60814008518696 +NODE_2264_length_2104_cov_218.335 799982 2104 380.21958174904944 +NODE_2265_length_2100_cov_219.03 799983 2100 380.94428571428574 +NODE_2266_length_2086_cov_220.417 799975 2086 383.4971236816874 +NODE_2267_length_2079_cov_221.316 799986 2079 384.7936507936508 +NODE_2268_length_2079_cov_220.94 799970 2079 384.7859547859548 +NODE_2269_length_2079_cov_220.886 799955 2079 384.7787397787398 +NODE_226_length_73868_cov_223.403 29499202 73868 399.35021931012074 +NODE_2270_length_2077_cov_221.17 800000 2077 385.1709195955705 +NODE_2271_length_2076_cov_221.493 800000 2076 385.35645472061657 +NODE_2272_length_2076_cov_221.273 799934 2076 385.3246628131021 +NODE_2273_length_2075_cov_221.493 799999 2075 385.541686746988 +NODE_2274_length_2073_cov_221.515 800000 2073 385.9141341051616 +NODE_2275_length_2071_cov_181.054 651991 2061 316.3469189713731 +NODE_2276_length_2056_cov_223.635 799973 2056 389.09192607003894 +NODE_2277_length_2046_cov_224.735 799990 2046 391.0019550342131 +NODE_2278_length_2041_cov_225.341 799992 2041 391.9608035276825 +NODE_2279_length_2041_cov_225.111 799960 2041 391.9451249387555 +NODE_227_length_73745_cov_223.116 29329198 73745 397.7110041358736 +NODE_2280_length_2035_cov_229.242 799930 2035 393.0859950859951 +NODE_2281_length_2032_cov_226.403 799988 2032 393.6948818897638 +NODE_2282_length_2022_cov_227.725 800000 2022 395.64787339268054 +NODE_2283_length_2006_cov_229.548 799984 2006 398.7956131605184 +NODE_2284_length_2002_cov_229.923 799997 2002 399.5989010989011 +NODE_2285_length_2000_cov_223.121 762294 2000 381.147 +NODE_2286_length_1998_cov_208.039 714596 1998 357.65565565565566 +NODE_2287_length_1996_cov_215.976 749951 1996 375.72695390781564 +NODE_2288_length_1992_cov_232.925 772823 1992 387.9633534136546 +NODE_2289_length_1983_cov_217.29 749975 1983 378.20221886031266 +NODE_228_length_73659_cov_222.775 29300454 73659 397.7851179081986 +NODE_2290_length_1981_cov_217.549 749998 1981 378.59565875820294 +NODE_2291_length_1979_cov_240.312 819135 1979 413.9135927235978 +NODE_2292_length_1978_cov_218.216 749983 1978 379.1622851365015 +NODE_2293_length_1964_cov_219.913 750000 1964 381.87372708757636 +NODE_2294_length_1961_cov_219.909 749974 1961 382.4446710861805 +NODE_2295_length_1958_cov_228.941 763408 1958 389.8917262512768 +NODE_2296_length_1950_cov_221.332 749965 1950 384.5974358974359 +NODE_2297_length_1939_cov_222.63 749990 1939 386.7921609076844 +NODE_2298_length_1936_cov_223.036 749974 1904 393.8939075630252 +NODE_2299_length_1933_cov_223.563 749965 1933 387.97982410760477 +NODE_229_length_73312_cov_220 28790755 73292 392.8226136549692 +NODE_22_length_427389_cov_223.136 170281664 427379 398.4324545660877 +NODE_2300_length_1933_cov_223.59 749957 1933 387.97568546301085 +NODE_2301_length_1932_cov_223.659 749986 1932 388.1915113871636 +NODE_2302_length_1932_cov_223.465 749968 1932 388.1821946169772 +NODE_2303_length_1929_cov_223.676 749967 1929 388.7853810264386 +NODE_2304_length_1926_cov_224.441 750000 1926 389.4080996884735 +NODE_2305_length_1925_cov_224.186 749982 1925 389.60103896103897 +NODE_2306_length_1924_cov_246.792 749963 1924 389.79365904365903 +NODE_2307_length_1922_cov_245.885 806173 1922 419.44484911550467 +NODE_2308_length_1920_cov_225.138 749999 1920 390.6244791666667 +NODE_2309_length_1919_cov_225.264 749986 1919 390.8212610734758 +NODE_230_length_72898_cov_224.183 29154927 72898 399.94138385140883 +NODE_2310_length_1911_cov_232.609 766426 1911 401.0601779173208 +NODE_2311_length_1904_cov_226.73 749938 1904 393.875 +NODE_2312_length_1891_cov_228.566 749989 1891 396.6097303014278 +NODE_2313_length_1889_cov_228.848 749998 1889 397.0344097406035 +NODE_2314_length_1889_cov_228.558 749985 1889 397.0275277924828 +NODE_2315_length_1886_cov_229.269 749980 1886 397.6564156945917 +NODE_2316_length_1883_cov_229.672 749985 1883 398.29261816250664 +NODE_2317_length_1879_cov_230.218 749974 1879 399.1346460883449 +NODE_2318_length_1875_cov_230.573 750000 1875 400.0 +NODE_2319_length_1862_cov_216.851 699977 1862 375.92749731471537 +NODE_231_length_72500_cov_221.175 28606720 72500 394.5754482758621 +NODE_2320_length_1853_cov_217.675 699986 1853 377.7582298974636 +NODE_2321_length_1852_cov_218.094 699974 1851 378.1599135602377 +NODE_2322_length_1851_cov_217.942 699996 1851 378.17179902755265 +NODE_2323_length_1850_cov_217.69 699979 1850 378.367027027027 +NODE_2324_length_1850_cov_673.042 2111238 1850 1141.2097297297298 +NODE_2325_length_1847_cov_445.664 1397248 1847 756.4959393611261 +NODE_2326_length_1840_cov_219.484 699994 1840 380.43152173913046 +NODE_2327_length_1836_cov_219.825 699973 1836 381.24891067538124 +NODE_2328_length_1832_cov_230.497 723424 1832 394.882096069869 +NODE_2329_length_1831_cov_220.258 699955 1831 382.2801747678864 +NODE_232_length_72229_cov_224.347 28848322 72229 399.4008223843609 +NODE_2330_length_1829_cov_220.765 699973 1829 382.70803717878624 +NODE_2331_length_1828_cov_220.752 699999 1828 382.9316192560175 +NODE_2332_length_1815_cov_238.977 743153 1815 409.45068870523414 +NODE_2333_length_1809_cov_223.092 699999 1809 386.9535655058043 +NODE_2334_length_1796_cov_225.105 699966 1796 389.7360801781737 +NODE_2335_length_1796_cov_224.203 699977 1796 389.7422048997773 +NODE_2336_length_1783_cov_226.72 699961 1783 392.57487380818844 +NODE_2337_length_1776_cov_231.035 702271 1776 395.42286036036035 +NODE_2338_length_1775_cov_227.657 699984 1775 394.3571830985916 +NODE_2339_length_1773_cov_228.048 699968 1773 394.7930062041737 +NODE_233_length_72140_cov_223.664 28799286 72140 399.2138342112559 +NODE_2340_length_1750_cov_231.09 699995 1750 399.9971428571429 +NODE_2341_length_1733_cov_216.418 649989 1733 375.065781881131 +NODE_2342_length_1732_cov_216.872 649972 1732 375.27251732101615 +NODE_2343_length_1732_cov_216.727 649973 1732 375.2730946882217 +NODE_2344_length_1720_cov_218.584 649976 1720 377.89302325581394 +NODE_2345_length_1720_cov_218.557 649970 1720 377.88953488372096 +NODE_2346_length_1715_cov_392.698 798111 1715 465.37084548104957 +NODE_2347_length_1712_cov_238.668 696641 1712 406.9164719626168 +NODE_2348_length_1711_cov_219.27 649998 1711 379.8936294564582 +NODE_2349_length_1709_cov_219.938 649993 1709 380.33528379169104 +NODE_234_length_72015_cov_224.837 28872226 72005 400.975293382404 +NODE_2350_length_1704_cov_220.714 649983 1704 381.44542253521126 +NODE_2351_length_1700_cov_223.739 649963 1700 382.3311764705882 +NODE_2352_length_1691_cov_222.461 649970 1691 384.37019515079834 +NODE_2353_length_1681_cov_223.648 649999 1681 386.674003569304 +NODE_2354_length_1677_cov_194.046 562229 1667 337.26994601079787 +NODE_2355_length_1676_cov_224.155 649979 1676 387.8156324582339 +NODE_2356_length_1668_cov_272.066 874221 1668 524.113309352518 +NODE_2357_length_1658_cov_226.9 649984 1658 392.02895054282266 +NODE_2358_length_1650_cov_227.855 649941 1650 393.90363636363634 +NODE_2359_length_1648_cov_228.41 649956 1648 394.3907766990291 +NODE_235_length_71872_cov_223.708 28699350 71872 399.3119712822796 +NODE_2360_length_1647_cov_228.501 649991 1647 394.6514875531269 +NODE_2361_length_1644_cov_228.717 649985 1644 395.36800486618006 +NODE_2362_length_1636_cov_186.626 511000 1621 315.2375077112893 +NODE_2363_length_1633_cov_230.606 649996 1633 398.03796693202696 +NODE_2364_length_1633_cov_229.984 649895 1633 397.97611757501534 +NODE_2365_length_1630_cov_230.973 649992 1630 398.7680981595092 +NODE_2366_length_1612_cov_215.772 599974 1612 372.1923076923077 +NODE_2367_length_1610_cov_215.898 599935 1610 372.6304347826087 +NODE_2368_length_1605_cov_216.248 599969 1605 373.81246105919 +NODE_2369_length_1602_cov_217.111 599996 1602 374.5293383270911 +NODE_236_length_71689_cov_225.667 28599206 71689 398.9343692895702 +NODE_2370_length_1598_cov_217.555 600000 1598 375.4693366708386 +NODE_2371_length_1596_cov_179.539 494214 1586 311.6103404791929 +NODE_2372_length_1595_cov_217.69 599978 1595 376.1617554858934 +NODE_2373_length_1592_cov_218.269 599989 1592 376.87751256281405 +NODE_2374_length_1589_cov_218.954 599979 1589 377.58275645059786 +NODE_2375_length_1588_cov_150.556 412140 1578 261.1787072243346 +NODE_2376_length_1588_cov_219.001 599949 1588 377.801637279597 +NODE_2377_length_1584_cov_281.88 696519 1584 439.72159090909093 +NODE_2378_length_1582_cov_219.422 599999 1582 379.2661188369153 +NODE_2379_length_1579_cov_220.125 599987 1579 379.97910069664346 +NODE_237_length_71631_cov_224.751 28585104 71631 399.06051849059764 +NODE_2380_length_1578_cov_243.014 594048 1578 376.4562737642586 +NODE_2381_length_1571_cov_221.572 599983 1571 381.9115213239975 +NODE_2382_length_1570_cov_223.475 598570 1563 382.96225207933463 +NODE_2383_length_1568_cov_221.973 599999 1568 382.65242346938777 +NODE_2384_length_1567_cov_221.566 599999 1567 382.8966177409062 +NODE_2385_length_1567_cov_221.937 599970 1567 382.8781110402042 +NODE_2386_length_1567_cov_222.149 599984 1567 382.8870453095086 +NODE_2387_length_1566_cov_222.199 599969 1566 383.12196679438057 +NODE_2388_length_1560_cov_199.442 531980 1560 341.0128205128205 +NODE_2389_length_1560_cov_223.027 599981 1560 384.6032051282051 +NODE_238_length_71309_cov_224.233 28499470 71309 399.66161354106777 +NODE_2390_length_1550_cov_224.353 600000 1550 387.0967741935484 +NODE_2391_length_1546_cov_225.258 599999 1546 388.0976714100906 +NODE_2392_length_1542_cov_225.929 599950 1542 389.0726329442283 +NODE_2393_length_1541_cov_225.75 599991 1541 389.3517196625568 +NODE_2394_length_1539_cov_226.14 599986 1539 389.8544509421702 +NODE_2395_length_1536_cov_226.143 600000 1536 390.625 +NODE_2396_length_1534_cov_227.151 599992 1534 391.12907431551497 +NODE_2397_length_1530_cov_227.226 599988 1530 392.1490196078431 +NODE_2398_length_1526_cov_228.104 599986 1526 393.175622542595 +NODE_2399_length_1524_cov_228.668 599984 1524 393.6902887139108 +NODE_239_length_70704_cov_224.782 28330626 70704 400.6933978275628 +NODE_23_length_426686_cov_225.997 171615258 426686 402.20503602180526 +NODE_2400_length_1523_cov_228.671 599993 1523 393.9546946815496 +NODE_2401_length_1521_cov_228.452 599991 1521 394.4714003944773 +NODE_2402_length_1518_cov_229.176 599992 1518 395.2516469038208 +NODE_2403_length_1517_cov_229.718 599991 1517 395.5115359261701 +NODE_2404_length_1517_cov_229.71 599989 1517 395.51021753460776 +NODE_2405_length_1516_cov_229.949 599985 1516 395.76846965699207 +NODE_2406_length_1509_cov_230.917 599965 1509 397.59111994698475 +NODE_2407_length_1504_cov_231.336 599964 1504 398.91223404255317 +NODE_2408_length_1500_cov_232.093 599998 1500 399.9986666666667 +NODE_2409_length_1494_cov_213.763 550000 1494 368.1392235609103 +NODE_240_length_70530_cov_224.011 28189719 70530 399.68409187579755 +NODE_2410_length_1493_cov_213.873 549969 1493 368.36503683858007 +NODE_2411_length_1489_cov_233.707 592386 1489 397.8415043653459 +NODE_2412_length_1488_cov_214.829 549987 1488 369.6149193548387 +NODE_2413_length_1483_cov_215.465 549991 1483 370.86378961564395 +NODE_2414_length_1481_cov_215.927 549993 1481 371.3659689399055 +NODE_2415_length_1479_cov_215.802 549984 1479 371.86206896551727 +NODE_2416_length_1476_cov_216.599 549995 1476 372.62533875338755 +NODE_2417_length_1475_cov_216.835 549996 1475 372.8786440677966 +NODE_2418_length_1472_cov_219.306 548571 1472 372.6705163043478 +NODE_2419_length_1470_cov_217.526 549993 1470 374.14489795918365 +NODE_241_length_70334_cov_224.282 28196352 70334 400.89220007393294 +NODE_2420_length_1459_cov_219.283 549997 1459 376.96847155586016 +NODE_2421_length_1458_cov_219.316 549971 1458 377.2091906721536 +NODE_2422_length_1458_cov_219.337 550000 1458 377.22908093278465 +NODE_2423_length_1452_cov_219.904 549997 1452 378.7858126721763 +NODE_2424_length_1452_cov_220.099 549971 1452 378.7679063360882 +NODE_2425_length_1450_cov_231.825 569935 1450 393.0586206896552 +NODE_2426_length_1449_cov_220.816 549997 1449 379.57004830917873 +NODE_2427_length_1446_cov_220.968 549986 1446 380.3499308437068 +NODE_2428_length_1441_cov_221.411 549985 1408 390.61434659090907 +NODE_2429_length_1440_cov_416.406 906006 1440 629.1708333333333 +NODE_242_length_70146_cov_224.845 28049860 70146 399.8782539275226 +NODE_2430_length_1440_cov_222.251 549967 1440 381.9215277777778 +NODE_2431_length_1440_cov_274.112 736095 1440 511.1770833333333 +NODE_2432_length_1439_cov_210.963 509211 1439 353.864489228631 +NODE_2433_length_1435_cov_222.862 549976 1435 383.25853658536585 +NODE_2434_length_1432_cov_223.298 549991 1432 384.07192737430165 +NODE_2435_length_1430_cov_223.162 549999 1430 384.6146853146853 +NODE_2436_length_1429_cov_224.051 549969 1424 386.21418539325845 +NODE_2437_length_1429_cov_223.884 549994 1429 384.8803358992302 +NODE_2438_length_1428_cov_224.307 549996 1428 385.1512605042017 +NODE_2439_length_1428_cov_224.031 549989 1428 385.14635854341736 +NODE_243_length_69952_cov_223.384 27891693 69952 398.72616937328456 +NODE_2440_length_1422_cov_225.253 549982 1422 386.76652601969056 +NODE_2441_length_1420_cov_225.04 549983 1420 387.3119718309859 +NODE_2442_length_1418_cov_225.404 549994 1418 387.8660084626234 +NODE_2443_length_1415_cov_226.376 549990 1415 388.6855123674912 +NODE_2444_length_1409_cov_218.182 506566 1409 359.5216465578424 +NODE_2445_length_1408_cov_227.217 549954 1408 390.59232954545456 +NODE_2446_length_1407_cov_227.75 549977 1407 390.8862828713575 +NODE_2447_length_1407_cov_227.169 549987 1407 390.89339019189765 +NODE_2448_length_1404_cov_228.22 549993 1404 391.732905982906 +NODE_2449_length_1397_cov_229.398 549973 1397 393.68146027201146 +NODE_244_length_69668_cov_224.331 27896062 69668 400.4142791525521 +NODE_2450_length_1397_cov_229.315 549969 1397 393.6785969935576 +NODE_2451_length_1396_cov_409.105 939357 1396 672.8918338108882 +NODE_2452_length_1396_cov_229.638 550000 1396 393.98280802292265 +NODE_2453_length_1395_cov_208.778 499980 1395 358.40860215053766 +NODE_2454_length_1394_cov_179.149 428484 1394 307.377331420373 +NODE_2455_length_1391_cov_442.204 1027612 1391 738.7577282530553 +NODE_2456_length_1384_cov_3539.28 8220450 1384 5939.631502890174 +NODE_2457_length_1378_cov_205.686 454240 1378 329.6371552975327 +NODE_2458_length_1376_cov_233.093 549984 1376 399.69767441860466 +NODE_2459_length_1376_cov_233.107 549982 1376 399.6962209302326 +NODE_245_length_69269_cov_224.753 27700035 69269 399.89078808702305 +NODE_2460_length_1373_cov_232.533 549984 1373 400.571012381646 +NODE_2461_length_1371_cov_212.619 499986 1371 364.6870897155361 +NODE_2462_length_1369_cov_232.522 477237 1369 348.6026296566837 +NODE_2463_length_1366_cov_213.526 499969 1366 366.0095168374817 +NODE_2464_length_1364_cov_224.788 503347 1364 369.02272727272725 +NODE_2465_length_1361_cov_214.139 499984 1361 367.3651726671565 +NODE_2466_length_1355_cov_215.262 499999 1355 369.0029520295203 +NODE_2467_length_1354_cov_215.032 499991 1354 369.2695716395864 +NODE_2468_length_1349_cov_216.306 499989 1349 370.63676797627875 +NODE_2469_length_1348_cov_216.491 500000 1348 370.919881305638 +NODE_246_length_68890_cov_226.76 27712580 68890 402.2728988242125 +NODE_2470_length_1345_cov_217.001 499992 1345 371.74126394052047 +NODE_2471_length_1341_cov_217.575 500000 1341 372.85607755406414 +NODE_2472_length_1336_cov_218.482 499999 1336 374.250748502994 +NODE_2473_length_1335_cov_217.98 497095 1335 372.3558052434457 +NODE_2474_length_1334_cov_218.803 499996 1334 374.8095952023988 +NODE_2475_length_1333_cov_219.016 499971 1333 375.0720180045011 +NODE_2476_length_1331_cov_219.02 499984 1331 375.64537941397447 +NODE_2477_length_1328_cov_191.215 408614 1328 307.69126506024094 +NODE_2478_length_1318_cov_221.539 499978 1318 379.34597875569045 +NODE_2479_length_1317_cov_221.559 499983 1317 379.6378132118451 +NODE_247_length_68768_cov_228.048 27500454 68768 399.9019020474639 +NODE_2480_length_1316_cov_221.604 500000 1316 379.9392097264438 +NODE_2481_length_1310_cov_223.072 500000 1310 381.6793893129771 +NODE_2482_length_1310_cov_222.87 499962 1310 381.65038167938934 +NODE_2483_length_1305_cov_223.872 499980 1305 383.1264367816092 +NODE_2484_length_1305_cov_436.229 946718 1305 725.4544061302682 +NODE_2485_length_1303_cov_224.022 499994 1303 383.7252494244052 +NODE_2486_length_1296_cov_225.284 486459 1296 375.3541666666667 +NODE_2487_length_1285_cov_227.415 499968 1285 389.0801556420233 +NODE_2488_length_1279_cov_258.48 570089 1279 445.7302580140735 +NODE_2489_length_1279_cov_228.337 499967 1279 390.90461297888976 +NODE_248_length_68694_cov_223.877 27449303 68694 399.5880717384342 +NODE_2490_length_1278_cov_228.886 499983 1278 391.2230046948357 +NODE_2491_length_1277_cov_228.998 499957 1277 391.5090054815975 +NODE_2492_length_1276_cov_228.576 499994 1276 391.8448275862069 +NODE_2493_length_1275_cov_229.407 500000 1275 392.15686274509807 +NODE_2494_length_1274_cov_3268.61 6917611 1274 5429.8359497645215 +NODE_2495_length_1272_cov_240.988 491000 1272 386.0062893081761 +NODE_2496_length_1272_cov_456.397 966531 1272 759.8514150943396 +NODE_2497_length_1269_cov_182.307 398500 1269 314.026792750197 +NODE_2498_length_1262_cov_231.757 499983 1262 396.18304278922346 +NODE_2499_length_1262_cov_231.675 500000 1262 396.19651347068145 +NODE_249_length_68690_cov_223.935 27448945 68690 399.60612898529627 +NODE_24_length_372190_cov_224.35 148958841 372190 400.2225771783229 +NODE_2500_length_1258_cov_232.476 499999 1258 397.45548489666135 +NODE_2501_length_1256_cov_233.026 499992 1256 398.0828025477707 +NODE_2502_length_1255_cov_233.292 499972 1255 398.38406374501994 +NODE_2503_length_1253_cov_233.708 499990 1253 399.0343176376696 +NODE_2504_length_1253_cov_1331.41 2723829 1253 2173.8459696727855 +NODE_2505_length_1252_cov_233.757 499996 1252 399.3578274760383 +NODE_2506_length_1252_cov_224.785 476389 1252 380.5023961661342 +NODE_2507_length_1248_cov_884.028 1757446 1248 1408.2099358974358 +NODE_2508_length_1245_cov_211.574 450000 1245 361.4457831325301 +NODE_2509_length_1243_cov_344.992 390989 1243 314.5526950925181 +NODE_250_length_68593_cov_224.217 27322442 68583 398.38505168919414 +NODE_2510_length_1243_cov_211.785 449970 1243 362.00321802091713 +NODE_2511_length_1242_cov_211.6 450000 1242 362.3188405797101 +NODE_2512_length_1240_cov_452.945 933692 1240 752.9774193548387 +NODE_2513_length_1238_cov_212.581 449985 1238 363.4773828756058 +NODE_2514_length_1237_cov_212.939 449991 1237 363.7760711398545 +NODE_2515_length_1237_cov_204.898 424526 1237 343.1899757477769 +NODE_2516_length_1236_cov_212.967 450000 1236 364.07766990291265 +NODE_2517_length_1234_cov_213.644 449979 1234 364.6507293354943 +NODE_2518_length_1234_cov_213.707 450000 1234 364.6677471636953 +NODE_2519_length_1233_cov_213.322 449999 1233 364.9626926196269 +NODE_251_length_68498_cov_226.631 27352624 68498 399.3200385412713 +NODE_2520_length_1228_cov_214.298 449979 1228 366.4324104234528 +NODE_2521_length_1225_cov_215.265 450000 1225 367.3469387755102 +NODE_2522_length_1224_cov_215.39 449993 1224 367.64133986928107 +NODE_2523_length_1224_cov_215.165 449986 1224 367.6356209150327 +NODE_2524_length_1223_cov_213.919 446923 1223 365.43172526574 +NODE_2525_length_1221_cov_215.514 449978 1221 368.5323505323505 +NODE_2526_length_1217_cov_216.612 449980 1217 369.74527526705015 +NODE_2527_length_1214_cov_219.062 449737 1214 370.45881383855027 +NODE_2528_length_1213_cov_217.324 449997 1213 370.9785655399835 +NODE_2529_length_1212_cov_217.729 449968 1212 371.26072607260727 +NODE_252_length_68452_cov_223.806 27349377 68452 399.5409484017998 +NODE_2530_length_1208_cov_218.103 449997 1208 372.51407284768214 +NODE_2531_length_1202_cov_219.556 449989 1202 374.36688851913476 +NODE_2532_length_1201_cov_244.254 499992 1201 416.31307243963363 +NODE_2533_length_1199_cov_176.304 360500 1199 300.6672226855713 +NODE_2534_length_1194_cov_221.112 450000 1194 376.88442211055275 +NODE_2535_length_1193_cov_220.75 449982 1193 377.18524727577534 +NODE_2536_length_1192_cov_221.19 494979 1192 415.2508389261745 +NODE_2537_length_1190_cov_221.992 449981 1190 378.13529411764705 +NODE_2538_length_1190_cov_451.244 888857 1190 746.9386554621849 +NODE_2539_length_1190_cov_318.105 695076 1190 584.0974789915966 +NODE_253_length_68341_cov_224.285 27399276 68341 400.9200333621106 +NODE_2540_length_1189_cov_222.089 449999 1189 378.46846089150546 +NODE_2541_length_1188_cov_221.989 450000 1188 378.7878787878788 +NODE_2542_length_1180_cov_223.052 449996 1180 381.3525423728814 +NODE_2543_length_1180_cov_439.689 858318 1180 727.3881355932203 +NODE_2544_length_1180_cov_3037.26 5914044 1180 5011.901694915255 +NODE_2545_length_1180_cov_202.995 381101 1149 331.680591818973 +NODE_2546_length_1177_cov_224.156 449987 1177 382.31690739167374 +NODE_2547_length_1176_cov_224.128 449983 1176 382.63860544217687 +NODE_2548_length_1175_cov_241.003 462031 1175 393.2178723404255 +NODE_2549_length_1173_cov_225.296 450000 1173 383.6317135549872 +NODE_254_length_67623_cov_224.527 27062872 67623 400.20217973174806 +NODE_2550_length_1171_cov_245.155 476446 1171 406.87105038428695 +NODE_2551_length_1169_cov_225.69 450000 1169 384.94439692044483 +NODE_2552_length_1158_cov_461.622 879073 1158 759.1303972366148 +NODE_2553_length_1155_cov_229.075 450000 1155 389.61038961038963 +NODE_2554_length_1152_cov_228.861 449986 1152 390.61284722222223 +NODE_2555_length_1149_cov_230.287 450000 1149 391.644908616188 +NODE_2556_length_1140_cov_232.223 449968 1140 394.70877192982454 +NODE_2557_length_1138_cov_232.355 449981 1138 395.4138840070299 +NODE_2558_length_1138_cov_232.63 449987 1138 395.41915641476277 +NODE_2559_length_1138_cov_232.223 449972 1138 395.4059753954306 +NODE_255_length_67407_cov_224.646 27007750 67407 400.6668446897207 +NODE_2560_length_1137_cov_232.724 449994 1137 395.7730870712401 +NODE_2561_length_1136_cov_232.86 449976 1136 396.1056338028169 +NODE_2562_length_1136_cov_443.561 845718 1136 744.4700704225352 +NODE_2563_length_1132_cov_233.88 449991 1132 397.5185512367491 +NODE_2564_length_1128_cov_234.064 399968 1128 354.58156028368796 +NODE_2565_length_1127_cov_234.822 449991 1127 399.282165039929 +NODE_2566_length_1118_cov_446.611 822488 1118 735.6779964221824 +NODE_2567_length_1114_cov_211.473 399962 1114 359.03231597845604 +NODE_2568_length_1114_cov_211.457 399982 1114 359.05026929982046 +NODE_2569_length_1114_cov_211.504 399989 1114 359.05655296229804 +NODE_256_length_67339_cov_224.942 27012738 67339 401.14551745645167 +NODE_2570_length_1111_cov_204.556 378247 1111 340.45634563456343 +NODE_2571_length_1107_cov_212.774 400000 1107 361.3369467028004 +NODE_2572_length_1102_cov_213.923 399988 1102 362.9655172413793 +NODE_2573_length_1098_cov_214.654 399978 1098 364.27868852459017 +NODE_2574_length_1097_cov_214.798 399978 1097 364.61075660893346 +NODE_2575_length_1094_cov_215.525 400000 1094 365.6307129798903 +NODE_2576_length_1094_cov_215.48 400000 1094 365.6307129798903 +NODE_2577_length_1093_cov_221.668 373810 1093 342.00365965233306 +NODE_2578_length_1086_cov_249.098 405464 1086 373.35543278084714 +NODE_2579_length_1084_cov_217.408 399978 1084 368.98339483394835 +NODE_257_length_67324_cov_224.645 26898799 67324 399.5424959895431 +NODE_2580_length_1079_cov_218.629 399983 1079 370.6978683966636 +NODE_2581_length_1077_cov_219.076 399997 1077 371.3992571959146 +NODE_2582_length_1074_cov_219.003 400000 1074 372.43947858472995 +NODE_2583_length_1070_cov_220.367 399994 1070 373.82616822429907 +NODE_2584_length_1063_cov_222.139 400000 1063 376.29350893697085 +NODE_2585_length_1062_cov_222.324 399996 1062 376.64406779661016 +NODE_2586_length_1060_cov_222.613 399985 1060 377.3443396226415 +NODE_2587_length_1059_cov_222.745 399979 1059 377.6949952785647 +NODE_2588_length_1058_cov_223.14 400000 1058 378.0718336483932 +NODE_2589_length_1058_cov_223.202 399978 1058 378.05103969754254 +NODE_258_length_67091_cov_224.297 26847996 67091 400.1728398742007 +NODE_2590_length_1057_cov_223.48 400000 1057 378.4295175023652 +NODE_2591_length_1057_cov_223.492 399985 1057 378.41532639545886 +NODE_2592_length_1055_cov_236.6 385352 1055 365.2625592417062 +NODE_2593_length_1053_cov_224.291 399967 1053 379.8357075023742 +NODE_2594_length_1052_cov_224.136 400000 1052 380.22813688212926 +NODE_2595_length_1052_cov_224.577 399991 1052 380.21958174904944 +NODE_2596_length_1051_cov_224.64 399987 1051 380.5775451950523 +NODE_2597_length_1050_cov_683.275 1170592 1050 1114.8495238095238 +NODE_2598_length_1050_cov_2569.46 4386173 1050 4177.3076190476195 +NODE_2599_length_1048_cov_225.552 399999 1048 381.6784351145038 +NODE_259_length_66871_cov_223.814 26692958 66871 399.17091115730284 +NODE_25_length_363542_cov_225.568 145909300 363542 401.3547265515401 +NODE_2600_length_1046_cov_927.153 1540693 1046 1472.9378585086042 +NODE_2601_length_1045_cov_225.47 399998 1045 382.7732057416268 +NODE_2602_length_1044_cov_225.902 514441 1044 492.7595785440613 +NODE_2603_length_1044_cov_226.341 399981 1044 383.1235632183908 +NODE_2604_length_1043_cov_234.03 408467 1043 391.62703739213805 +NODE_2605_length_1039_cov_433.891 734995 1039 707.4061597690087 +NODE_2606_length_1039_cov_969.152 1523974 1039 1466.7699711260827 +NODE_2607_length_1039_cov_4073.2 6942647 1039 6682.047160731473 +NODE_2608_length_1037_cov_227.786 399986 1037 385.7145612343298 +NODE_2609_length_1034_cov_228.775 399999 1034 386.8462282398453 +NODE_260_length_66805_cov_224.585 26762988 66805 400.6135468902028 +NODE_2610_length_1031_cov_229.418 399987 1031 387.9602327837051 +NODE_2611_length_1030_cov_229.211 399978 1030 388.32815533980585 +NODE_2612_length_1029_cov_229.964 399989 1029 388.7162293488824 +NODE_2613_length_1027_cov_201.485 350000 1027 340.79844206426486 +NODE_2614_length_1026_cov_229.878 399988 1026 389.85185185185185 +NODE_2615_length_1024_cov_231.06 399993 1024 390.6181640625 +NODE_2616_length_1020_cov_460.531 767326 1020 752.2803921568627 +NODE_2617_length_1019_cov_232.334 399951 1019 392.4936211972522 +NODE_2618_length_1019_cov_232.109 399988 1019 392.52993130520116 +NODE_2619_length_1019_cov_231.546 399974 1019 392.5161923454367 +NODE_261_length_66610_cov_224.651 26747110 66610 401.54796577090525 +NODE_2620_length_1015_cov_233.324 399975 1015 394.064039408867 +NODE_2621_length_1014_cov_233.274 400000 1014 394.47731755424064 +NODE_2622_length_1014_cov_222.129 348255 1014 343.44674556213016 +NODE_2623_length_1013_cov_233.8 399984 1013 394.8509378084896 +NODE_2624_length_1013_cov_233.704 400000 1013 394.86673247778873 +NODE_2625_length_1010_cov_234.45 400000 1010 396.03960396039605 +NODE_2626_length_1007_cov_234.641 399987 1007 397.20655412115195 +NODE_2627_length_1004_cov_271.784 444748 1004 442.9760956175299 +NODE_2628_length_1002_cov_236.525 399981 1002 399.18263473053895 +NODE_2629_length_999_cov_207.577 349983 999 350.3333333333333 +NODE_262_length_66567_cov_222.285 26405467 66557 396.73463347206155 +NODE_2630_length_997_cov_208.006 350000 997 351.0531594784353 +NODE_2631_length_990_cov_209.387 349984 990 353.51919191919194 +NODE_2632_length_990_cov_271.936 346261 990 349.75858585858583 +NODE_2633_length_986_cov_210.492 350000 986 354.9695740365112 +NODE_2634_length_985_cov_210.718 349992 985 355.32182741116753 +NODE_2635_length_985_cov_210.694 349969 985 355.2984771573604 +NODE_2636_length_985_cov_237.783 393402 985 399.3928934010152 +NODE_2637_length_983_cov_210.999 349986 983 356.0386571719227 +NODE_2638_length_983_cov_437.047 699706 983 711.8067141403866 +NODE_2639_length_981_cov_211.484 349998 981 356.77675840978594 +NODE_263_length_66302_cov_220.717 26092785 66282 393.6632117316919 +NODE_2640_length_976_cov_212.772 349992 976 358.5983606557377 +NODE_2641_length_972_cov_213.266 349988 972 360.06995884773664 +NODE_2642_length_971_cov_213.894 349998 971 360.45108135942326 +NODE_2643_length_969_cov_214.342 350000 969 361.1971104231166 +NODE_2644_length_964_cov_215.33 349987 964 363.0570539419087 +NODE_2645_length_964_cov_215.469 350000 964 363.07053941908714 +NODE_2646_length_962_cov_216.062 349997 962 363.8222453222453 +NODE_2647_length_961_cov_216.241 349975 961 364.1779396462019 +NODE_2648_length_959_cov_216.702 350000 959 364.963503649635 +NODE_2649_length_952_cov_218.059 350246 935 374.59465240641714 +NODE_264_length_66157_cov_224.204 26240609 66157 396.64145895370103 +NODE_2650_length_950_cov_443.588 684968 950 721.0189473684211 +NODE_2651_length_948_cov_219.462 349988 948 369.1856540084388 +NODE_2652_length_948_cov_219.33 349991 948 369.1888185654008 +NODE_2653_length_945_cov_188.539 299822 945 317.2719576719577 +NODE_2654_length_943_cov_452.17 688245 943 729.846235418876 +NODE_2655_length_942_cov_220.823 349999 942 371.5488322717622 +NODE_2656_length_938_cov_462.815 676341 938 721.045842217484 +NODE_2657_length_927_cov_245.54 341653 927 368.5577130528587 +NODE_2658_length_923_cov_225.749 349957 923 379.1516793066089 +NODE_2659_length_919_cov_467.096 681310 919 741.3601741022851 +NODE_265_length_66134_cov_223.109 26321013 66134 397.9951764599147 +NODE_2660_length_918_cov_461.494 685800 918 747.0588235294117 +NODE_2661_length_917_cov_227.154 349980 917 381.65757906215924 +NODE_2662_length_916_cov_227.612 349989 916 382.0840611353712 +NODE_2663_length_916_cov_227.588 350000 916 382.09606986899564 +NODE_2664_length_914_cov_228.052 349999 914 382.9310722100656 +NODE_2665_length_907_cov_229.934 350000 907 385.8875413450937 +NODE_2666_length_905_cov_230.435 350000 905 386.7403314917127 +NODE_2667_length_898_cov_232.357 350000 898 389.75501113585744 +NODE_2668_length_897_cov_232.724 349985 897 390.1727982162765 +NODE_2669_length_897_cov_216.933 326231 897 363.6911928651059 +NODE_266_length_65984_cov_219.047 25631638 65944 388.68794734926604 +NODE_2670_length_896_cov_231.131 349997 896 390.6216517857143 +NODE_2671_length_886_cov_235.813 349970 886 395.0 +NODE_2672_length_886_cov_235.698 349999 886 395.03273137697516 +NODE_2673_length_885_cov_236.075 349996 885 395.47570621468924 +NODE_2674_length_883_cov_236.528 349971 883 396.3431483578709 +NODE_2675_length_882_cov_236.99 349957 882 396.7766439909297 +NODE_2676_length_878_cov_238.002 350000 878 398.6332574031891 +NODE_2677_length_877_cov_238.298 349983 877 399.06841505131126 +NODE_2678_length_877_cov_238.337 349972 877 399.0558722919042 +NODE_2679_length_877_cov_193.798 265251 877 302.45267958950967 +NODE_267_length_65936_cov_224.813 26382388 65936 400.12114777966514 +NODE_2680_length_875_cov_238.977 349988 875 399.9862857142857 +NODE_2681_length_872_cov_205.57 299985 872 344.01949541284404 +NODE_2682_length_872_cov_205.378 300000 872 344.0366972477064 +NODE_2683_length_871_cov_205.846 300000 871 344.4316877152698 +NODE_2684_length_870_cov_206.06 300000 870 344.82758620689657 +NODE_2685_length_868_cov_206.493 300000 868 345.6221198156682 +NODE_2686_length_866_cov_207.116 300000 866 346.4203233256351 +NODE_2687_length_866_cov_207.036 299999 866 346.41916859122404 +NODE_2688_length_863_cov_207.859 299999 863 347.6234067207416 +NODE_2689_length_863_cov_299.267 424226 863 491.5712630359212 +NODE_268_length_65577_cov_224.34 26269772 65577 400.59429373103376 +NODE_2690_length_862_cov_208.169 300000 862 348.0278422273782 +NODE_2691_length_862_cov_208.141 299988 862 348.0139211136891 +NODE_2692_length_861_cov_208.333 299975 861 348.4030197444832 +NODE_2693_length_860_cov_208.534 299999 860 348.8360465116279 +NODE_2694_length_859_cov_208.913 300000 859 349.2433061699651 +NODE_2695_length_856_cov_209.693 300000 856 350.4672897196262 +NODE_2696_length_853_cov_210.436 299984 853 351.68112543962485 +NODE_2697_length_853_cov_210.429 300000 853 351.69988276670574 +NODE_2698_length_851_cov_210.992 299998 851 352.524089306698 +NODE_2699_length_850_cov_211.283 300000 850 352.94117647058823 +NODE_269_length_65469_cov_223.682 26149109 65469 399.4120728894591 +NODE_26_length_356957_cov_224.046 142631367 356957 399.57576682905784 +NODE_2700_length_850_cov_225.675 305770 850 359.7294117647059 +NODE_2701_length_845_cov_212.615 299988 845 355.0153846153846 +NODE_2702_length_843_cov_213.129 299997 843 355.8683274021352 +NODE_2703_length_840_cov_213.964 300000 840 357.14285714285717 +NODE_2704_length_835_cov_215.365 299974 835 359.2502994011976 +NODE_2705_length_834_cov_215.576 299991 834 359.7014388489209 +NODE_2706_length_833_cov_215.794 300000 833 360.1440576230492 +NODE_2707_length_830_cov_216.612 299994 829 361.8745476477684 +NODE_2708_length_826_cov_2218.12 2626963 826 3180.3426150121068 +NODE_2709_length_823_cov_218.673 300000 823 364.52004860267317 +NODE_270_length_65245_cov_227.119 26052373 65245 399.3006820446011 +NODE_2710_length_820_cov_219.525 299994 820 365.84634146341466 +NODE_2711_length_818_cov_220.024 299987 818 366.7322738386308 +NODE_2712_length_814_cov_221.177 300000 814 368.55036855036855 +NODE_2713_length_813_cov_221.566 300000 813 369.00369003690037 +NODE_2714_length_812_cov_221.897 299999 812 369.4568965517241 +NODE_2715_length_811_cov_222.054 300000 811 369.9136868064118 +NODE_2716_length_810_cov_222.322 299983 810 370.3493827160494 +NODE_2717_length_808_cov_223.073 300000 808 371.28712871287127 +NODE_2718_length_808_cov_223.005 299981 808 371.26361386138615 +NODE_2719_length_808_cov_222.749 299999 808 371.28589108910893 +NODE_271_length_65238_cov_223.885 22982600 65226 352.35335602367155 +NODE_2720_length_801_cov_232.883 305122 801 380.9263420724095 +NODE_2721_length_799_cov_225.726 299982 799 375.4468085106383 +NODE_2722_length_798_cov_226.047 299977 798 375.9110275689223 +NODE_2723_length_794_cov_276.793 190248 790 240.82025316455696 +NODE_2724_length_794_cov_250.724 300000 785 382.1656050955414 +NODE_2725_length_793_cov_227.11 300988 793 379.55611601513243 +NODE_2726_length_789_cov_208.91 263360 789 333.7896070975919 +NODE_2727_length_788_cov_229.115 299999 788 380.7093908629442 +NODE_2728_length_788_cov_172.457 254933 788 323.5190355329949 +NODE_2729_length_784_cov_230.425 299982 784 382.6301020408163 +NODE_272_length_64833_cov_223.104 25805258 64833 398.0265913963568 +NODE_2730_length_783_cov_230.72 300000 783 383.1417624521073 +NODE_2731_length_783_cov_230.581 299980 783 383.1162196679438 +NODE_2732_length_781_cov_249.358 300431 764 393.2342931937173 +NODE_2733_length_775_cov_233.321 299987 775 387.08 +NODE_2734_length_767_cov_235.83 299999 767 391.1329856584094 +NODE_2735_length_766_cov_236.108 299990 766 391.6318537859008 +NODE_2736_length_764_cov_236.898 300000 764 392.67015706806285 +NODE_2737_length_759_cov_238.599 300000 759 395.25691699604744 +NODE_2738_length_759_cov_238.561 299968 759 395.2147562582345 +NODE_2739_length_756_cov_239.646 299990 756 396.8121693121693 +NODE_273_length_64747_cov_224.53 25837134 64747 399.04758521630345 +NODE_2740_length_754_cov_240.313 299995 754 397.8713527851459 +NODE_2741_length_745_cov_232.665 249249 731 340.9699042407661 +NODE_2742_length_743_cov_203.448 250000 743 336.47375504710635 +NODE_2743_length_739_cov_204.649 250000 739 338.29499323410016 +NODE_2744_length_739_cov_632.471 740681 739 1002.2746955345061 +NODE_2745_length_737_cov_205.132 249985 737 339.19267299864316 +NODE_2746_length_735_cov_205.85 250000 735 340.13605442176873 +NODE_2747_length_732_cov_206.784 249979 732 341.50136612021856 +NODE_2748_length_732_cov_206.557 249989 732 341.51502732240436 +NODE_2749_length_732_cov_206.573 249999 732 341.52868852459017 +NODE_274_length_64652_cov_221.398 25548246 64632 395.28787597474934 +NODE_2750_length_728_cov_207.866 250000 728 343.4065934065934 +NODE_2751_length_727_cov_208.274 249997 727 343.8748280605227 +NODE_2752_length_726_cov_208.571 250000 726 344.3526170798898 +NODE_2753_length_725_cov_208.863 250000 725 344.82758620689657 +NODE_2754_length_723_cov_209.554 249994 723 345.7731673582296 +NODE_2755_length_723_cov_209.564 249989 723 345.7662517289073 +NODE_2756_length_722_cov_222.214 264610 722 366.4958448753463 +NODE_2757_length_721_cov_233.79 264848 721 367.3342579750347 +NODE_2758_length_721_cov_2325.71 2530634 721 3509.8945908460473 +NODE_2759_length_720_cov_209.895 249996 720 347.21666666666664 +NODE_275_length_64190_cov_224.33 25699223 64190 400.36178532481694 +NODE_2760_length_716_cov_211.784 249982 716 349.1368715083799 +NODE_2761_length_716_cov_211.466 249997 716 349.1578212290503 +NODE_2762_length_715_cov_211.944 249985 715 349.6293706293706 +NODE_2763_length_712_cov_213.002 249999 712 351.12219101123594 +NODE_2764_length_712_cov_212.699 249990 712 351.10955056179773 +NODE_2765_length_709_cov_214.032 249986 709 352.58956276445696 +NODE_2766_length_707_cov_214.69 249962 707 353.55304101838755 +NODE_2767_length_707_cov_214.715 249981 707 353.5799151343706 +NODE_2768_length_707_cov_222.15 211541 707 299.2093352192362 +NODE_2769_length_705_cov_95.6031 111000 705 157.4468085106383 +NODE_276_length_63859_cov_224.373 25499313 63859 399.3064877307819 +NODE_2770_length_705_cov_215.205 250000 705 354.6099290780142 +NODE_2771_length_704_cov_215.653 249992 704 355.10227272727275 +NODE_2772_length_703_cov_137.67 159736 703 227.22048364153628 +NODE_2773_length_703_cov_215.89 249998 703 355.6159317211949 +NODE_2774_length_701_cov_216.686 249990 701 356.6191155492154 +NODE_2775_length_700_cov_216.899 249983 700 357.11857142857144 +NODE_2776_length_699_cov_181.042 200895 699 287.40343347639487 +NODE_2777_length_699_cov_447.679 484329 699 692.8884120171674 +NODE_2778_length_696_cov_218.371 249979 696 359.1652298850575 +NODE_2779_length_696_cov_218.09 250000 696 359.1954022988506 +NODE_277_length_62813_cov_224.74 25099788 62813 399.5954340661965 +NODE_2780_length_696_cov_442.897 444152 696 638.1494252873563 +NODE_2781_length_695_cov_218.653 250000 695 359.71223021582733 +NODE_2782_length_694_cov_219 250000 694 360.2305475504323 +NODE_2783_length_694_cov_218.994 250000 694 360.2305475504323 +NODE_2784_length_693_cov_219.381 249999 693 360.74891774891773 +NODE_2785_length_692_cov_219.73 249974 692 361.23410404624275 +NODE_2786_length_692_cov_220.702 239127 692 345.55924855491327 +NODE_2787_length_691_cov_220.116 249960 691 361.73661360347324 +NODE_2788_length_689_cov_220.792 250000 689 362.84470246734395 +NODE_2789_length_688_cov_221.006 249977 688 363.3386627906977 +NODE_278_length_62736_cov_223.709 25049355 62736 399.2819912012242 +NODE_2790_length_687_cov_221.432 249994 687 363.89228529839886 +NODE_2791_length_687_cov_463.847 497959 687 724.8311499272198 +NODE_2792_length_682_cov_223.195 250000 682 366.5689149560117 +NODE_2793_length_681_cov_223.548 249993 681 367.0969162995595 +NODE_2794_length_679_cov_224.346 250000 679 368.1885125184094 +NODE_2795_length_678_cov_197.446 176223 653 269.8667687595712 +NODE_2796_length_677_cov_225.037 249989 677 369.2599704579025 +NODE_2797_length_676_cov_225.406 249982 676 369.79585798816566 +NODE_2798_length_674_cov_226.157 249984 674 370.89614243323444 +NODE_2799_length_673_cov_226.506 249988 673 371.45319465081724 +NODE_279_length_62605_cov_223.747 24999283 62591 399.40699142049175 +NODE_27_length_340090_cov_224.315 136202552 340090 400.48972918933225 +NODE_2800_length_671_cov_880.588 915254 671 1364.0149031296573 +NODE_2801_length_670_cov_227.618 250000 670 373.13432835820896 +NODE_2802_length_670_cov_219.444 227578 670 339.6686567164179 +NODE_2803_length_669_cov_226.184 235234 669 351.6203288490284 +NODE_2804_length_669_cov_235.912 252585 669 377.5560538116592 +NODE_2805_length_668_cov_228.318 250000 668 374.251497005988 +NODE_2806_length_667_cov_228.711 249987 667 374.7931034482759 +NODE_2807_length_666_cov_229.074 250000 666 375.37537537537537 +NODE_2808_length_666_cov_200.637 210577 666 316.1816816816817 +NODE_2809_length_665_cov_229.413 250000 665 375.9398496240602 +NODE_280_length_62487_cov_223.293 24908465 62477 398.68215503305214 +NODE_2810_length_662_cov_193.952 198482 662 299.82175226586105 +NODE_2811_length_661_cov_230.817 249749 657 380.13546423135466 +NODE_2812_length_660_cov_230.851 249989 627 398.7065390749601 +NODE_2813_length_659_cov_375.369 385959 659 585.6737481031867 +NODE_2814_length_658_cov_158.965 171618 658 260.81762917933133 +NODE_2815_length_658_cov_487.914 348470 658 529.5896656534954 +NODE_2816_length_657_cov_232.54 250000 657 380.517503805175 +NODE_2817_length_655_cov_254.598 263997 655 403.04885496183203 +NODE_2818_length_651_cov_234.856 249981 651 383.9953917050691 +NODE_2819_length_651_cov_234.71 250000 651 384.0245775729647 +NODE_281_length_62105_cov_223.317 24799323 62105 399.3128250543435 +NODE_2820_length_649_cov_235.667 250000 649 385.2080123266564 +NODE_2821_length_648_cov_443.592 445423 648 687.3811728395061 +NODE_2822_length_647_cov_236.355 250000 647 386.39876352395675 +NODE_2823_length_647_cov_236.471 250000 647 386.39876352395675 +NODE_2824_length_647_cov_236.061 249992 647 386.38639876352397 +NODE_2825_length_645_cov_189.793 200000 645 310.07751937984494 +NODE_2826_length_645_cov_237.193 249988 645 387.5782945736434 +NODE_2827_length_643_cov_238.07 249980 643 388.7713841368585 +NODE_2828_length_643_cov_237.968 249998 643 388.79937791601867 +NODE_2829_length_642_cov_238.47 250000 642 389.4080996884735 +NODE_282_length_61945_cov_224.472 24749329 61945 399.5371539268706 +NODE_2830_length_642_cov_154.416 162000 642 252.33644859813083 +NODE_2831_length_641_cov_238.763 249987 639 391.21596244131456 +NODE_2832_length_640_cov_239.299 250000 640 390.625 +NODE_2833_length_640_cov_239.121 249981 640 390.5953125 +NODE_2834_length_640_cov_226.161 221518 640 346.121875 +NODE_2835_length_640_cov_241.832 250120 640 390.8125 +NODE_2836_length_634_cov_311.463 241501 634 380.91640378548897 +NODE_2837_length_633_cov_242.13 250000 633 394.9447077409163 +NODE_2838_length_632_cov_242.411 249991 632 395.55537974683546 +NODE_2839_length_631_cov_188.819 196910 631 312.06022187004754 +NODE_283_length_61933_cov_226.681 24859836 61933 401.3988665170426 +NODE_2840_length_631_cov_242.922 249997 631 396.19175911251983 +NODE_2841_length_630_cov_243.407 250000 630 396.8253968253968 +NODE_2842_length_629_cov_243.679 250000 629 397.456279809221 +NODE_2843_length_627_cov_244.698 250000 627 398.72408293460927 +NODE_2844_length_627_cov_686.643 658703 627 1050.5629984051036 +NODE_2845_length_625_cov_245.402 250000 625 400.0 +NODE_2846_length_625_cov_164.437 158059 625 252.8944 +NODE_2847_length_624_cov_284.902 173273 624 277.6810897435897 +NODE_2848_length_624_cov_455.179 377822 624 605.4839743589744 +NODE_2849_length_623_cov_197.165 199968 623 320.97592295345106 +NODE_284_length_61619_cov_223.683 24599375 61619 399.21736801960435 +NODE_2850_length_623_cov_246.458 250000 623 401.2841091492777 +NODE_2851_length_622_cov_197.374 184390 622 296.4469453376206 +NODE_2852_length_622_cov_197.511 200000 622 321.54340836012864 +NODE_2853_length_622_cov_197.46 199995 622 321.5353697749196 +NODE_2854_length_621_cov_197.564 200000 621 322.061191626409 +NODE_2855_length_621_cov_226.905 217073 621 349.5539452495974 +NODE_2856_length_620_cov_61.9186 62500 610 102.45901639344262 +NODE_2857_length_620_cov_198.129 200000 620 322.5806451612903 +NODE_2858_length_620_cov_198.113 200000 620 322.5806451612903 +NODE_2859_length_619_cov_198.291 200000 619 323.10177705977384 +NODE_285_length_61358_cov_224.149 24544118 61358 400.0149613742299 +NODE_2860_length_613_cov_200.681 200000 613 326.2642740619902 +NODE_2861_length_612_cov_201.068 200000 612 326.797385620915 +NODE_2862_length_612_cov_201.052 199990 612 326.781045751634 +NODE_2863_length_612_cov_190.223 182037 612 297.44607843137254 +NODE_2864_length_609_cov_202.144 200000 609 328.4072249589491 +NODE_2865_length_607_cov_202.862 199999 607 329.48764415156506 +NODE_2866_length_600_cov_205.394 200000 600 333.3333333333333 +NODE_2867_length_600_cov_205.372 199962 600 333.27 +NODE_2868_length_600_cov_204.69 199995 600 333.325 +NODE_2869_length_598_cov_206.204 200000 598 334.44816053511704 +NODE_286_length_61108_cov_224.178 24447755 61108 400.07454015840807 +NODE_2870_length_596_cov_206.987 200000 596 335.5704697986577 +NODE_2871_length_594_cov_207.77 200000 594 336.7003367003367 +NODE_2872_length_594_cov_206.952 199995 594 336.69191919191917 +NODE_2873_length_591_cov_223.687 199875 552 362.0923913043478 +NODE_2874_length_589_cov_266.266 246435 589 418.3955857385399 +NODE_2875_length_588_cov_210.105 200000 588 340.13605442176873 +NODE_2876_length_587_cov_313.821 201397 587 343.0954003407155 +NODE_2877_length_586_cov_210.817 199997 586 341.29180887372013 +NODE_2878_length_583_cov_212.097 199981 583 343.0205831903945 +NODE_2879_length_579_cov_213.628 199999 579 345.4214162348877 +NODE_287_length_61030_cov_224.24 24399368 61030 399.79301982631495 +NODE_2880_length_577_cov_214.544 200000 577 346.6204506065858 +NODE_2881_length_577_cov_214.538 200000 577 346.6204506065858 +NODE_2882_length_576_cov_214.965 199986 576 347.1979166666667 +NODE_2883_length_575_cov_215.313 200000 575 347.82608695652175 +NODE_2884_length_571_cov_216.936 199975 571 350.2189141856392 +NODE_2885_length_570_cov_217.396 200000 570 350.87719298245617 +NODE_2886_length_570_cov_217.402 199986 570 350.85263157894735 +NODE_2887_length_570_cov_217.128 199999 570 350.8754385964912 +NODE_2888_length_570_cov_216.317 199494 570 349.9894736842105 +NODE_2889_length_569_cov_217.881 200000 569 351.493848857645 +NODE_288_length_61022_cov_224.026 24399230 61022 399.84317131526336 +NODE_2890_length_566_cov_219.117 200000 566 353.35689045936397 +NODE_2891_length_565_cov_468.425 300133 565 531.2088495575222 +NODE_2892_length_563_cov_220.388 199973 563 355.1918294849023 +NODE_2893_length_563_cov_220.356 199999 563 355.2380106571936 +NODE_2894_length_561_cov_220.136 199995 561 356.49732620320856 +NODE_2895_length_561_cov_221.549 184625 561 329.0998217468806 +NODE_2896_length_560_cov_221.689 200000 560 357.14285714285717 +NODE_2897_length_558_cov_222.624 199982 558 358.39068100358423 +NODE_2898_length_557_cov_222.799 199986 557 359.04129263913825 +NODE_2899_length_556_cov_223.439 200000 556 359.71223021582733 +NODE_289_length_60945_cov_223.862 24349385 60945 399.53047830010667 +NODE_28_length_331107_cov_224.832 132019945 331107 398.72290528439447 +NODE_2900_length_555_cov_223.996 200000 555 360.36036036036035 +NODE_2901_length_555_cov_223.98 199986 555 360.33513513513515 +NODE_2902_length_555_cov_223.908 199984 555 360.33153153153154 +NODE_2903_length_555_cov_223.762 199996 555 360.35315315315313 +NODE_2904_length_554_cov_224.427 199979 554 360.97292418772565 +NODE_2905_length_554_cov_224.331 199992 554 360.99638989169677 +NODE_2906_length_553_cov_224.873 200000 553 361.6636528028933 +NODE_2907_length_552_cov_225.241 200000 552 362.3188405797101 +NODE_2908_length_548_cov_227.124 199990 548 364.94525547445255 +NODE_2909_length_548_cov_227.057 199998 548 364.95985401459853 +NODE_290_length_60818_cov_223.439 24324475 60818 399.9551941859318 +NODE_2910_length_547_cov_227.435 199989 547 365.6106032906764 +NODE_2911_length_547_cov_227.575 199988 547 365.60877513711154 +NODE_2912_length_546_cov_228.067 199999 546 366.2985347985348 +NODE_2913_length_545_cov_174.316 145625 545 267.20183486238534 +NODE_2914_length_543_cov_229.08 199995 543 368.31491712707185 +NODE_2915_length_542_cov_229.971 200000 542 369.00369003690037 +NODE_2916_length_541_cov_230.389 200000 541 369.68576709796673 +NODE_2917_length_541_cov_272.16 239221 541 442.1829944547135 +NODE_2918_length_540_cov_230.784 199997 540 370.3648148148148 +NODE_2919_length_539_cov_444.419 233324 539 432.8831168831169 +NODE_291_length_60673_cov_224.58 24299866 60673 400.5054307517347 +NODE_2920_length_538_cov_221.3 171158 538 318.13754646840147 +NODE_2921_length_538_cov_231.783 199997 538 371.74163568773236 +NODE_2922_length_537_cov_232.334 199987 537 372.415270018622 +NODE_2923_length_537_cov_232.334 200000 537 372.43947858472995 +NODE_2924_length_537_cov_232.195 200000 537 372.43947858472995 +NODE_2925_length_536_cov_232.692 199999 536 373.1324626865672 +NODE_2926_length_536_cov_232.532 200000 536 373.13432835820896 +NODE_2927_length_535_cov_233.269 200000 535 373.8317757009346 +NODE_2928_length_535_cov_462.048 359238 535 671.4728971962617 +NODE_2929_length_534_cov_233.797 199992 534 374.5168539325843 +NODE_292_length_60304_cov_226.913 24100729 60304 399.6539035553197 +NODE_2930_length_534_cov_233.687 199993 534 374.5187265917603 +NODE_2931_length_532_cov_234.755 199999 532 375.937969924812 +NODE_2932_length_532_cov_234.574 200000 532 375.9398496240602 +NODE_2933_length_531_cov_235.17 200000 531 376.6478342749529 +NODE_2934_length_531_cov_230.494 191925 531 361.4406779661017 +NODE_2935_length_530_cov_283.992 223600 530 421.8867924528302 +NODE_2936_length_529_cov_236.249 199994 529 378.0604914933837 +NODE_2937_length_527_cov_237.278 200000 527 379.5066413662239 +NODE_2938_length_527_cov_237.108 199999 527 379.5047438330171 +NODE_2939_length_526_cov_237.626 200000 526 380.22813688212926 +NODE_293_length_60097_cov_223.229 23924525 60097 398.0984907732499 +NODE_2940_length_526_cov_258.507 168757 526 320.83079847908743 +NODE_2941_length_525_cov_238.27 200000 525 380.95238095238096 +NODE_2942_length_525_cov_238.281 200000 525 380.95238095238096 +NODE_2943_length_525_cov_237.996 200000 525 380.95238095238096 +NODE_2944_length_524_cov_238.661 199999 524 381.67748091603056 +NODE_2945_length_522_cov_239.812 199999 522 383.13984674329504 +NODE_2946_length_521_cov_239.97 199999 521 383.87523992322457 +NODE_2947_length_521_cov_240.339 200000 521 383.8771593090211 +NODE_2948_length_519_cov_241.364 200000 519 385.35645472061657 +NODE_2949_length_519_cov_240.918 199999 519 385.354527938343 +NODE_294_length_60004_cov_224.57 24030117 60004 400.47525164989 +NODE_2950_length_518_cov_307.33 193046 518 372.6756756756757 +NODE_2951_length_518_cov_241.711 200000 518 386.1003861003861 +NODE_2952_length_517_cov_242.273 199999 517 386.8452611218569 +NODE_2953_length_517_cov_242.348 199999 517 386.8452611218569 +NODE_2954_length_516_cov_242.928 200000 516 387.5968992248062 +NODE_2955_length_516_cov_242.941 199999 516 387.59496124031006 +NODE_2956_length_514_cov_243.791 200000 514 389.10505836575874 +NODE_2957_length_513_cov_244.539 200000 513 389.8635477582846 +NODE_2958_length_513_cov_244.498 200000 513 389.8635477582846 +NODE_2959_length_510_cov_246.141 200000 510 392.15686274509807 +NODE_295_length_59926_cov_224.106 23951109 59926 399.67808630644464 +NODE_2960_length_510_cov_246.086 199999 510 392.1549019607843 +NODE_2961_length_510_cov_246.112 200000 510 392.15686274509807 +NODE_2962_length_509_cov_246.687 200000 509 392.92730844793715 +NODE_2963_length_508_cov_247.148 199999 508 393.6988188976378 +NODE_2964_length_506_cov_248.335 200000 506 395.25691699604744 +NODE_2965_length_505_cov_248.822 200000 505 396.03960396039605 +NODE_2966_length_503_cov_249.871 200000 503 397.61431411530816 +NODE_2967_length_500_cov_251.658 200000 500 400.0 +NODE_2968_length_495_cov_190.325 150125 495 303.2828282828283 +NODE_2969_length_493_cov_191.772 150000 493 304.25963488843814 +NODE_296_length_59615_cov_224.368 23849066 59615 400.05143000922584 +NODE_2970_length_493_cov_191.674 150000 493 304.25963488843814 +NODE_2971_length_492_cov_192.215 150000 492 304.8780487804878 +NODE_2972_length_488_cov_193.949 150000 488 307.37704918032784 +NODE_2973_length_482_cov_196.719 150000 482 311.2033195020747 +NODE_2974_length_481_cov_197.16 150000 481 311.85031185031187 +NODE_2975_length_480_cov_197.631 150000 480 312.5 +NODE_2976_length_478_cov_421.69 289180 478 604.979079497908 +NODE_2977_length_474_cov_200.468 150000 474 316.45569620253167 +NODE_2978_length_474_cov_200.451 150000 474 316.45569620253167 +NODE_2979_length_474_cov_464.47 135443 474 285.7447257383966 +NODE_297_length_59582_cov_223.779 23775289 59582 399.0347588197778 +NODE_2980_length_472_cov_236.062 160794 472 340.66525423728814 +NODE_2981_length_471_cov_201.745 150000 471 318.47133757961785 +NODE_2982_length_471_cov_199.812 104972 469 223.82089552238807 +NODE_2983_length_470_cov_202.275 149999 470 319.1468085106383 +NODE_2984_length_468_cov_282.755 188300 468 402.35042735042737 +NODE_2985_length_465_cov_99.6585 73250 465 157.5268817204301 +NODE_2986_length_464_cov_205.333 150000 464 323.2758620689655 +NODE_2987_length_464_cov_352.362 242975 464 523.6530172413793 +NODE_2988_length_463_cov_205.777 150000 463 323.97408207343415 +NODE_2989_length_460_cov_295.37 193472 460 420.5913043478261 +NODE_298_length_59561_cov_223.162 23713398 59561 398.13633082050336 +NODE_2990_length_459_cov_207.653 149999 459 326.79520697167754 +NODE_2991_length_457_cov_208.881 150000 457 328.2275711159737 +NODE_2992_length_457_cov_208.876 150000 457 328.2275711159737 +NODE_2993_length_457_cov_524.527 192953 457 422.21663019693653 +NODE_2994_length_455_cov_192.923 134331 455 295.23296703296705 +NODE_2995_length_455_cov_209.985 150000 455 329.6703296703297 +NODE_2996_length_455_cov_670.65 434205 455 954.2967032967033 +NODE_2997_length_454_cov_210.454 150000 454 330.3964757709251 +NODE_2998_length_452_cov_211.524 150000 452 331.85840707964604 +NODE_2999_length_451_cov_246.614 114433 451 253.73170731707316 +NODE_299_length_59258_cov_224.274 23699229 59258 399.93298795099395 +NODE_29_length_327293_cov_225.047 131076412 327293 400.4864509781755 +NODE_2_length_1166739_cov_224.155 466597209 1166739 399.9156700855975 +NODE_3000_length_450_cov_212.628 150000 450 333.3333333333333 +NODE_3001_length_446_cov_214.821 150000 446 336.32286995515693 +NODE_3002_length_442_cov_216.938 149999 442 339.36425339366514 +NODE_3003_length_441_cov_40.6218 28000 431 64.96519721577727 +NODE_3004_length_441_cov_236.977 141218 441 320.22222222222223 +NODE_3005_length_441_cov_222.415 122505 441 277.7891156462585 +NODE_3006_length_440_cov_218.106 150000 440 340.90909090909093 +NODE_3007_length_438_cov_219.313 150000 438 342.4657534246575 +NODE_3008_length_436_cov_234.903 147223 436 337.66743119266056 +NODE_3009_length_435_cov_221.011 150000 435 344.82758620689657 +NODE_300_length_58775_cov_224.291 23521772 58775 400.20028923862185 +NODE_3010_length_434_cov_149.726 86982 434 200.41935483870967 +NODE_3011_length_432_cov_222.745 150000 432 347.22222222222223 +NODE_3012_length_432_cov_462.775 65062 432 150.6064814814815 +NODE_3013_length_431_cov_223.391 150000 431 348.0278422273782 +NODE_3014_length_431_cov_182.109 109894 431 254.97447795823666 +NODE_3015_length_431_cov_223.356 150000 431 348.0278422273782 +NODE_3016_length_431_cov_223.346 149999 431 348.02552204176334 +NODE_3017_length_430_cov_223.68 150000 430 348.83720930232556 +NODE_3018_length_430_cov_169.715 86209 430 200.4860465116279 +NODE_3019_length_429_cov_224.572 150000 429 349.65034965034965 +NODE_301_length_57982_cov_229.79 23216623 57982 400.4108688903453 +NODE_3020_length_428_cov_214.804 130642 428 305.2383177570093 +NODE_3021_length_427_cov_225.726 150000 427 351.288056206089 +NODE_3022_length_425_cov_226.962 150000 425 352.94117647058823 +NODE_3023_length_425_cov_1270.18 654201 425 1539.2964705882353 +NODE_3024_length_422_cov_215.049 144411 422 342.20616113744074 +NODE_3025_length_421_cov_229.355 150000 421 356.29453681710214 +NODE_3026_length_419_cov_230.626 150000 419 357.99522673031026 +NODE_3027_length_419_cov_225.673 132100 419 315.2744630071599 +NODE_3028_length_417_cov_441.693 259539 417 622.3956834532374 +NODE_3029_length_416_cov_232.657 150000 416 360.5769230769231 +NODE_302_length_57834_cov_227.488 23143387 57834 400.16922571497736 +NODE_3030_length_415_cov_233.106 150000 415 361.4457831325301 +NODE_3031_length_412_cov_235.221 150000 412 364.07766990291265 +NODE_3032_length_412_cov_235.261 150000 412 364.07766990291265 +NODE_3033_length_411_cov_235.935 150000 411 364.963503649635 +NODE_3034_length_411_cov_445.152 254857 411 620.0900243309003 +NODE_3035_length_411_cov_235.933 150000 411 364.963503649635 +NODE_3036_length_409_cov_237.26 150000 409 366.7481662591687 +NODE_3037_length_409_cov_237.023 150000 409 366.7481662591687 +NODE_3038_length_408_cov_237.856 150000 408 367.6470588235294 +NODE_3039_length_407_cov_238.509 149999 407 368.54791154791155 +NODE_303_length_57598_cov_223.895 22999241 57598 399.30624327233585 +NODE_3040_length_406_cov_238.419 150000 406 369.4581280788177 +NODE_3041_length_405_cov_239.971 150000 405 370.3703703703704 +NODE_3042_length_405_cov_239.966 150000 405 370.3703703703704 +NODE_3043_length_405_cov_239.84 150000 405 370.3703703703704 +NODE_3044_length_404_cov_240.582 149999 404 371.28465346534654 +NODE_3045_length_402_cov_242.072 150000 402 373.13432835820896 +NODE_3046_length_401_cov_242.301 149999 401 374.06234413965086 +NODE_3047_length_400_cov_243.339 150000 400 375.0 +NODE_3048_length_400_cov_243.29 150000 400 375.0 +NODE_3049_length_395_cov_246.882 150000 395 379.746835443038 +NODE_304_length_57178_cov_223.932 22849434 57178 399.6193291125958 +NODE_3050_length_394_cov_247.584 150000 394 380.71065989847716 +NODE_3051_length_393_cov_217.979 102824 393 261.63867684478373 +NODE_3052_length_393_cov_248.464 150000 393 381.6793893129771 +NODE_3053_length_392_cov_249.249 150000 392 382.6530612244898 +NODE_3054_length_391_cov_219.682 118830 391 303.9130434782609 +NODE_3055_length_389_cov_251.389 149999 389 385.60154241645245 +NODE_3056_length_387_cov_253.006 150000 387 387.5968992248062 +NODE_3057_length_387_cov_231.774 118312 387 305.7157622739018 +NODE_3058_length_386_cov_253.728 150000 386 388.60103626943004 +NODE_3059_length_385_cov_226.988 99696 385 258.95064935064937 +NODE_305_length_57136_cov_224.047 22849270 57136 399.9102142257071 +NODE_3060_length_383_cov_256.055 150000 383 391.644908616188 +NODE_3061_length_383_cov_255.713 149999 383 391.64229765013056 +NODE_3062_length_381_cov_459.267 121498 381 318.8923884514436 +NODE_3063_length_381_cov_191.791 59516 381 156.20997375328085 +NODE_3064_length_380_cov_258.452 150000 380 394.7368421052632 +NODE_3065_length_380_cov_258.425 150000 380 394.7368421052632 +NODE_3066_length_378_cov_227.529 115935 378 306.7063492063492 +NODE_3067_length_377_cov_260.82 150000 377 397.8779840848806 +NODE_3068_length_377_cov_465.314 238875 377 633.6206896551724 +NODE_3069_length_377_cov_706.301 368412 377 977.2201591511937 +NODE_306_length_56639_cov_223.656 22649225 56639 399.88744504669927 +NODE_3070_length_376_cov_261.623 150000 376 398.93617021276594 +NODE_3071_length_376_cov_261.667 150000 376 398.93617021276594 +NODE_3072_length_374_cov_175.527 100000 374 267.379679144385 +NODE_3073_length_374_cov_175.483 100000 374 267.379679144385 +NODE_3074_length_374_cov_175.382 100000 374 267.379679144385 +NODE_3075_length_372_cov_176.618 100000 372 268.81720430107526 +NODE_3076_length_371_cov_177.193 100000 371 269.54177897574124 +NODE_3077_length_370_cov_144.933 76074 370 205.6054054054054 +NODE_3078_length_370_cov_270.159 169658 370 458.53513513513514 +NODE_3079_length_369_cov_227.79 73786 369 199.9620596205962 +NODE_307_length_56566_cov_223.686 22599258 56566 399.5201711275324 +NODE_3080_length_366_cov_180.064 100000 366 273.224043715847 +NODE_3081_length_366_cov_179.9 100000 366 273.224043715847 +NODE_3082_length_365_cov_180.642 100000 365 273.972602739726 +NODE_3083_length_364_cov_153.828 80253 364 220.47527472527472 +NODE_3084_length_364_cov_181.133 100000 364 274.72527472527474 +NODE_3085_length_363_cov_163.867 48898 363 134.70523415977962 +NODE_3086_length_363_cov_181.789 100000 363 275.4820936639118 +NODE_3087_length_362_cov_182.397 100000 362 276.24309392265195 +NODE_3088_length_360_cov_183.597 100000 360 277.77777777777777 +NODE_3089_length_360_cov_695.423 332017 360 922.2694444444444 +NODE_308_length_56260_cov_221.735 22262088 56250 395.7704533333333 +NODE_3090_length_360_cov_1504.9 723376 360 2009.3777777777777 +NODE_3091_length_359_cov_184.201 100000 359 278.55153203342616 +NODE_3092_length_359_cov_184.039 100000 359 278.55153203342616 +NODE_3093_length_359_cov_184.02 100000 359 278.55153203342616 +NODE_3094_length_358_cov_184.752 99999 358 279.3268156424581 +NODE_3095_length_357_cov_139.927 58353 357 163.45378151260505 +NODE_3096_length_357_cov_210.464 42819 357 119.94117647058823 +NODE_3097_length_356_cov_186.03 100000 356 280.8988764044944 +NODE_3098_length_356_cov_186.017 100000 356 280.8988764044944 +NODE_3099_length_356_cov_261.236 107278 356 301.3426966292135 +NODE_309_length_56199_cov_224.64 22495712 56199 400.2866954928024 +NODE_30_length_320616_cov_227.122 127726301 320606 398.39023910968604 +NODE_3100_length_355_cov_227.68 96295 355 271.2535211267606 +NODE_3101_length_353_cov_187.735 99999 353 283.2832861189802 +NODE_3102_length_353_cov_187.715 100000 353 283.28611898017 +NODE_3103_length_353_cov_160.899 73637 353 208.60339943342777 +NODE_3104_length_350_cov_189.681 100000 350 285.7142857142857 +NODE_3105_length_350_cov_189.766 100000 350 285.7142857142857 +NODE_3106_length_349_cov_242.204 113776 349 326.0057306590258 +NODE_3107_length_348_cov_274.198 138212 348 397.1609195402299 +NODE_3108_length_348_cov_191.082 100000 348 287.35632183908046 +NODE_3109_length_347_cov_174.027 82102 347 236.60518731988472 +NODE_310_length_56103_cov_224.157 22399296 56103 399.25308807015665 +NODE_3110_length_347_cov_191.777 100000 347 288.1844380403458 +NODE_3111_length_347_cov_191.771 100000 347 288.1844380403458 +NODE_3112_length_347_cov_191.712 100000 347 288.1844380403458 +NODE_3113_length_347_cov_191.733 100000 347 288.1844380403458 +NODE_3114_length_346_cov_149.509 37732 346 109.05202312138728 +NODE_3115_length_346_cov_192.357 100000 346 289.01734104046244 +NODE_3116_length_345_cov_193.052 100000 345 289.8550724637681 +NODE_3117_length_342_cov_92.1847 47250 342 138.1578947368421 +NODE_3118_length_342_cov_180.606 82260 342 240.52631578947367 +NODE_3119_length_341_cov_195.717 100000 341 293.2551319648094 +NODE_311_length_55946_cov_224.576 22399631 55946 400.3794909376899 +NODE_3120_length_341_cov_195.549 99999 341 293.25219941348973 +NODE_3121_length_340_cov_196.474 100000 340 294.11764705882354 +NODE_3122_length_340_cov_196.474 100000 340 294.11764705882354 +NODE_3123_length_339_cov_197.123 100000 339 294.9852507374631 +NODE_3124_length_339_cov_197.141 100000 339 294.9852507374631 +NODE_3125_length_338_cov_197.827 100000 338 295.85798816568047 +NODE_3126_length_338_cov_283.901 126061 338 372.96153846153845 +NODE_3127_length_337_cov_198.582 100000 337 296.7359050445104 +NODE_3128_length_337_cov_198.507 100000 337 296.7359050445104 +NODE_3129_length_335_cov_199.736 100000 335 298.5074626865672 +NODE_312_length_55940_cov_233.259 22086822 55930 394.9011621669945 +NODE_3130_length_334_cov_200.695 100000 334 299.4011976047904 +NODE_3131_length_334_cov_200.699 100000 334 299.4011976047904 +NODE_3132_length_333_cov_201.388 100000 333 300.3003003003003 +NODE_3133_length_333_cov_201.04 100000 333 300.3003003003003 +NODE_3134_length_333_cov_233.888 80619 333 242.0990990990991 +NODE_3135_length_332_cov_201.235 100000 302 331.12582781456956 +NODE_3136_length_332_cov_138.635 44323 332 133.50301204819277 +NODE_3137_length_331_cov_202.884 100000 331 302.11480362537765 +NODE_3138_length_330_cov_203.585 100000 330 303.030303030303 +NODE_3139_length_330_cov_764.88 90375 330 273.8636363636364 +NODE_313_length_55855_cov_224.597 22398246 55855 401.0070002685525 +NODE_3140_length_329_cov_204.204 100000 329 303.951367781155 +NODE_3141_length_325_cov_207.152 99999 325 307.6892307692308 +NODE_3142_length_323_cov_208.948 100000 323 309.59752321981426 +NODE_3143_length_322_cov_209.723 100000 322 310.55900621118013 +NODE_3144_length_321_cov_210.301 100000 321 311.52647975077883 +NODE_3145_length_320_cov_211.317 100000 320 312.5 +NODE_3146_length_320_cov_139.238 61743 320 192.946875 +NODE_3147_length_320_cov_211.313 100000 320 312.5 +NODE_3148_length_320_cov_419.177 170889 320 534.028125 +NODE_3149_length_319_cov_212.114 100000 319 313.47962382445144 +NODE_314_length_55776_cov_224.002 22299445 55776 399.8035893574297 +NODE_3150_length_319_cov_225.523 91718 319 287.51724137931035 +NODE_3151_length_319_cov_132.564 47261 319 148.15360501567397 +NODE_3152_length_318_cov_212.791 100000 318 314.4654088050315 +NODE_3153_length_317_cov_213.676 100000 317 315.45741324921136 +NODE_3154_length_316_cov_214.517 100000 316 316.45569620253167 +NODE_3155_length_316_cov_139.471 58461 313 186.77635782747603 +NODE_3156_length_315_cov_215.377 100000 315 317.46031746031747 +NODE_3157_length_315_cov_215.365 100000 315 317.46031746031747 +NODE_3158_length_315_cov_215.35 100000 315 317.46031746031747 +NODE_3159_length_315_cov_215.262 100000 315 317.46031746031747 +NODE_315_length_55652_cov_223.193 22131986 55642 397.7568383595126 +NODE_3160_length_315_cov_215.177 100000 315 317.46031746031747 +NODE_3161_length_314_cov_215.896 100000 314 318.47133757961785 +NODE_3162_length_313_cov_142.384 25105 313 80.2076677316294 +NODE_3163_length_312_cov_217.864 100000 312 320.5128205128205 +NODE_3164_length_312_cov_217.677 100000 312 320.5128205128205 +NODE_3165_length_312_cov_196.027 76702 312 245.8397435897436 +NODE_3166_length_310_cov_219.592 100000 310 322.5806451612903 +NODE_3167_length_310_cov_438.071 171250 310 552.4193548387096 +NODE_3168_length_309_cov_220.461 100000 309 323.62459546925567 +NODE_3169_length_309_cov_220.386 100000 309 323.62459546925567 +NODE_316_length_55369_cov_222.995 22034172 55369 397.9514168578085 +NODE_3170_length_309_cov_220.457 100000 309 323.62459546925567 +NODE_3171_length_308_cov_221.186 99999 308 324.6720779220779 +NODE_3172_length_308_cov_221.336 100000 308 324.6753246753247 +NODE_3173_length_307_cov_213.27 79393 307 258.60912052117266 +NODE_3174_length_306_cov_164.347 82810 306 270.62091503267976 +NODE_3175_length_305_cov_223.972 100000 305 327.8688524590164 +NODE_3176_length_305_cov_223.904 100000 305 327.8688524590164 +NODE_3177_length_303_cov_225.798 100000 303 330.03300330033005 +NODE_3178_length_303_cov_225.798 100000 303 330.03300330033005 +NODE_3179_length_302_cov_226.696 100000 302 331.12582781456956 +NODE_317_length_55217_cov_223.77 22071774 55217 399.72787366209684 +NODE_3180_length_302_cov_226.709 100000 302 331.12582781456956 +NODE_3181_length_302_cov_480.692 172670 302 571.7549668874173 +NODE_3182_length_301_cov_227.614 100000 301 332.22591362126246 +NODE_3183_length_300_cov_228.453 99999 300 333.33 +NODE_3184_length_300_cov_220.171 82223 300 274.07666666666665 +NODE_3185_length_298_cov_230.449 100000 298 335.5704697986577 +NODE_3186_length_297_cov_231.397 100000 297 336.7003367003367 +NODE_3187_length_296_cov_232.349 100000 296 337.8378378378378 +NODE_3188_length_295_cov_232.479 100000 295 338.9830508474576 +NODE_3189_length_294_cov_234.305 100000 294 340.13605442176873 +NODE_318_length_55165_cov_223.919 21815910 55165 395.4665095622224 +NODE_3190_length_293_cov_235.197 100000 293 341.29692832764505 +NODE_3191_length_292_cov_236.278 100000 292 342.4657534246575 +NODE_3192_length_292_cov_236.278 100000 292 342.4657534246575 +NODE_3193_length_292_cov_226.747 66149 292 226.5376712328767 +NODE_3194_length_291_cov_237.284 100000 291 343.64261168384877 +NODE_3195_length_291_cov_237.275 100000 291 343.64261168384877 +NODE_3196_length_291_cov_237.03 100000 291 343.64261168384877 +NODE_3197_length_291_cov_237.233 100000 291 343.64261168384877 +NODE_3198_length_290_cov_577.434 78165 290 269.5344827586207 +NODE_3199_length_289_cov_134.641 51504 289 178.21453287197232 +NODE_319_length_54926_cov_223.554 21949127 54926 399.6126970833485 +NODE_31_length_318595_cov_223.603 127096544 318595 398.9282443227295 +NODE_3200_length_289_cov_125.274 50842 288 176.53472222222223 +NODE_3201_length_288_cov_240.322 100000 288 347.22222222222223 +NODE_3202_length_288_cov_133.734 54052 288 187.68055555555554 +NODE_3203_length_287_cov_241.371 100000 287 348.4320557491289 +NODE_3204_length_287_cov_240.862 100000 287 348.4320557491289 +NODE_3205_length_286_cov_242.416 100000 286 349.65034965034965 +NODE_3206_length_285_cov_243.039 100000 285 350.87719298245617 +NODE_3207_length_284_cov_229.568 61180 284 215.42253521126761 +NODE_3208_length_284_cov_243.319 100000 252 396.8253968253968 +NODE_3209_length_284_cov_229.934 78330 284 275.8098591549296 +NODE_320_length_54575_cov_223.475 21772288 54575 398.9425194686212 +NODE_3210_length_283_cov_245.605 100000 283 353.35689045936397 +NODE_3211_length_283_cov_244.654 100000 283 353.35689045936397 +NODE_3212_length_282_cov_246.031 100000 282 354.6099290780142 +NODE_3213_length_282_cov_246.626 100000 282 354.6099290780142 +NODE_3214_length_281_cov_247.774 100000 281 355.87188612099646 +NODE_3215_length_281_cov_247.584 100000 281 355.87188612099646 +NODE_3216_length_280_cov_248.84 100000 280 357.14285714285717 +NODE_3217_length_280_cov_248.453 100000 280 357.14285714285717 +NODE_3218_length_280_cov_248.573 100000 280 357.14285714285717 +NODE_3219_length_279_cov_249.929 100000 279 358.42293906810033 +NODE_321_length_54549_cov_224.054 21649451 54380 398.1142147848474 +NODE_3220_length_279_cov_376.728 62604 279 224.38709677419354 +NODE_3221_length_278_cov_250.973 100000 278 359.71223021582733 +NODE_3222_length_278_cov_250.507 100000 278 359.71223021582733 +NODE_3223_length_277_cov_252.243 100000 277 361.01083032490976 +NODE_3224_length_276_cov_253.344 100000 276 362.3188405797101 +NODE_3225_length_276_cov_203.48 36322 276 131.6014492753623 +NODE_3226_length_275_cov_254.541 100000 275 363.6363636363636 +NODE_3227_length_275_cov_254.386 100000 275 363.6363636363636 +NODE_3228_length_274_cov_255.644 100000 274 364.963503649635 +NODE_3229_length_274_cov_567.438 75185 274 274.3978102189781 +NODE_322_length_54469_cov_223.799 21749150 54469 399.29409388826673 +NODE_3230_length_273_cov_256.817 100000 273 366.3003663003663 +NODE_3231_length_273_cov_256.867 100000 273 366.3003663003663 +NODE_3232_length_272_cov_257.949 100000 272 367.6470588235294 +NODE_3233_length_271_cov_43.7222 16875 271 62.269372693726936 +NODE_3234_length_271_cov_226.912 66291 265 250.15471698113208 +NODE_3235_length_270_cov_260.433 100000 270 370.3703703703704 +NODE_3236_length_270_cov_260.423 100000 270 370.3703703703704 +NODE_3237_length_269_cov_261.65 100000 269 371.74721189591077 +NODE_3238_length_269_cov_205.182 82118 269 305.271375464684 +NODE_3239_length_268_cov_262.77 100000 268 373.13432835820896 +NODE_323_length_54184_cov_224.495 21619041 54184 398.99307913775283 +NODE_3240_length_268_cov_262.606 100000 268 373.13432835820896 +NODE_3241_length_268_cov_249.108 61327 268 228.8320895522388 +NODE_3242_length_267_cov_264.137 100000 267 374.53183520599254 +NODE_3243_length_266_cov_2319.72 707027 266 2657.996240601504 +NODE_3244_length_265_cov_135.805 39527 265 149.15849056603773 +NODE_3245_length_265_cov_266.529 100000 265 377.35849056603774 +NODE_3246_length_264_cov_267.861 100000 264 378.7878787878788 +NODE_3247_length_264_cov_121.507 39980 264 151.43939393939394 +NODE_3248_length_264_cov_267.861 100000 264 378.7878787878788 +NODE_3249_length_263_cov_63.2885 23625 263 89.82889733840304 +NODE_324_length_54096_cov_223.026 21514863 54096 397.7163376220053 +NODE_3250_length_261_cov_271.84 100000 261 383.1417624521073 +NODE_3251_length_261_cov_271.796 100000 261 383.1417624521073 +NODE_3252_length_261_cov_214.927 67214 261 257.5249042145594 +NODE_3253_length_260_cov_273.161 100000 260 384.61538461538464 +NODE_3254_length_260_cov_273.049 100000 260 384.61538461538464 +NODE_3255_length_259_cov_274.402 100000 259 386.1003861003861 +NODE_3256_length_259_cov_123.034 38301 259 147.8803088803089 +NODE_3257_length_259_cov_274.088 100000 259 386.1003861003861 +NODE_3258_length_147_cov_116.282 12603 147 85.73469387755102 +NODE_3259_length_256_cov_118.289 17237 254 67.86220472440945 +NODE_325_length_53799_cov_223.879 21499355 53799 399.6236918901838 +NODE_3260_length_256_cov_278.478 100000 256 390.625 +NODE_3261_length_256_cov_278.478 100000 256 390.625 +NODE_3262_length_253_cov_282.818 100000 253 395.25691699604744 +NODE_3263_length_253_cov_282.646 100000 253 395.25691699604744 +NODE_3264_length_253_cov_282.641 100000 253 395.25691699604744 +NODE_3265_length_252_cov_284.046 100000 252 396.8253968253968 +NODE_3266_length_252_cov_283.528 100000 252 396.8253968253968 +NODE_3267_length_251_cov_285.704 100000 251 398.40637450199205 +NODE_3268_length_251_cov_285.689 100000 251 398.40637450199205 +NODE_3269_length_251_cov_189.556 20694 251 82.44621513944223 +NODE_326_length_53236_cov_220.975 20992715 53226 394.4071506406643 +NODE_3270_length_250_cov_286.631 100000 250 400.0 +NODE_3271_length_249_cov_144.33 50000 249 200.80321285140562 +NODE_3272_length_249_cov_144.325 50000 249 200.80321285140562 +NODE_3273_length_249_cov_144.186 50000 249 200.80321285140562 +NODE_3274_length_249_cov_144.186 49999 249 200.7991967871486 +NODE_3275_length_249_cov_131.088 59046 249 237.13253012048193 +NODE_3276_length_249_cov_350.356 10225 247 41.39676113360324 +NODE_3277_length_248_cov_145.062 50000 248 201.61290322580646 +NODE_3278_length_248_cov_144.772 50000 248 201.61290322580646 +NODE_3279_length_248_cov_627.207 110420 248 445.241935483871 +NODE_327_length_52993_cov_223.646 21149488 52993 399.0996546713717 +NODE_3280_length_248_cov_860.648 138861 248 559.9233870967741 +NODE_3281_length_247_cov_79.3385 27250 247 110.32388663967612 +NODE_3282_length_245_cov_147.274 50000 245 204.08163265306123 +NODE_3283_length_245_cov_147.295 50000 245 204.08163265306123 +NODE_3284_length_245_cov_216.684 58871 245 240.28979591836736 +NODE_3285_length_244_cov_148.138 50000 244 204.91803278688525 +NODE_3286_length_244_cov_148.148 50000 244 204.91803278688525 +NODE_3287_length_244_cov_148.143 50000 244 204.91803278688525 +NODE_3288_length_244_cov_148.138 50000 244 204.91803278688525 +NODE_3289_length_244_cov_148.127 50000 244 204.91803278688525 +NODE_328_length_52986_cov_224.277 21194530 52986 400.0024534782773 +NODE_3290_length_244_cov_147.984 50000 244 204.91803278688525 +NODE_3291_length_243_cov_148.904 50000 243 205.76131687242798 +NODE_3292_length_242_cov_149.733 50000 242 206.61157024793388 +NODE_3293_length_242_cov_149.733 50000 242 206.61157024793388 +NODE_3294_length_242_cov_149.722 50000 242 206.61157024793388 +NODE_3295_length_242_cov_149.695 50000 242 206.61157024793388 +NODE_3296_length_241_cov_236.161 22209 241 92.15352697095436 +NODE_3297_length_240_cov_151.351 50000 240 208.33333333333334 +NODE_3298_length_239_cov_152.174 50000 239 209.20502092050208 +NODE_3299_length_239_cov_152.163 50000 239 209.20502092050208 +NODE_329_length_52906_cov_223.141 21058615 52896 398.11356246219 +NODE_32_length_315756_cov_224.26 126550595 315756 400.78603415295356 +NODE_3300_length_239_cov_152.147 50000 239 209.20502092050208 +NODE_3301_length_238_cov_153.005 50000 238 210.08403361344537 +NODE_3302_length_237_cov_153.841 50000 237 210.9704641350211 +NODE_3303_length_237_cov_153.819 50000 237 210.9704641350211 +NODE_3304_length_236_cov_166.608 16518 236 69.99152542372882 +NODE_3305_length_235_cov_155.539 50000 235 212.7659574468085 +NODE_3306_length_125_cov_185.567 11927 125 95.416 +NODE_3307_length_234_cov_156.257 50000 234 213.67521367521368 +NODE_3308_length_233_cov_123.831 29561 233 126.87124463519314 +NODE_3309_length_233_cov_157.028 49999 233 214.58798283261802 +NODE_330_length_52804_cov_224.791 21100041 52804 399.591716536626 +NODE_3310_length_233_cov_157.185 50000 233 214.59227467811158 +NODE_3311_length_232_cov_158.186 50000 232 215.51724137931035 +NODE_3312_length_232_cov_158.119 50000 232 215.51724137931035 +NODE_3313_length_232_cov_158.051 50000 232 215.51724137931035 +NODE_3314_length_232_cov_157.78 50000 232 215.51724137931035 +NODE_3315_length_232_cov_1621.72 408299 232 1759.9094827586207 +NODE_3316_length_230_cov_160 50000 230 217.3913043478261 +NODE_3317_length_230_cov_159.994 50000 230 217.3913043478261 +NODE_3318_length_230_cov_159.989 50000 230 217.3913043478261 +NODE_3319_length_229_cov_160.914 50000 229 218.34061135371178 +NODE_331_length_52450_cov_224.328 20978945 52450 399.97988560533844 +NODE_3320_length_229_cov_160.914 50000 229 218.34061135371178 +NODE_3321_length_229_cov_160.598 49999 229 218.33624454148472 +NODE_3322_length_228_cov_161.763 50000 228 219.2982456140351 +NODE_3323_length_227_cov_162.791 50000 227 220.26431718061673 +NODE_3324_length_227_cov_162.791 50000 227 220.26431718061673 +NODE_3325_length_226_cov_303.158 53020 226 234.60176991150442 +NODE_3326_length_226_cov_114.199 17669 141 125.31205673758865 +NODE_3327_length_225_cov_22.2353 6750 225 30.0 +NODE_3328_length_224_cov_165.68 50000 224 223.21428571428572 +NODE_3329_length_224_cov_46.8047 14125 224 63.058035714285715 +NODE_332_length_52382_cov_223.864 20945879 52382 399.8678744606926 +NODE_3330_length_224_cov_165.68 50000 224 223.21428571428572 +NODE_3331_length_224_cov_165.592 50000 224 223.21428571428572 +NODE_3332_length_224_cov_165.379 50000 224 223.21428571428572 +NODE_3333_length_224_cov_165.556 49999 224 223.20982142857142 +NODE_3334_length_223_cov_166.661 50000 223 224.2152466367713 +NODE_3335_length_222_cov_242.994 53816 222 242.4144144144144 +NODE_3336_length_222_cov_167.599 50000 222 225.22522522522522 +NODE_3337_length_221_cov_168.645 50000 221 226.2443438914027 +NODE_3338_length_221_cov_185.934 65369 221 295.7873303167421 +NODE_3339_length_220_cov_414.424 95533 220 434.2409090909091 +NODE_333_length_51999_cov_224.096 20822976 51999 400.44954710667514 +NODE_3340_length_220_cov_169.673 50000 220 227.27272727272728 +NODE_3341_length_218_cov_171.761 50000 218 229.3577981651376 +NODE_3342_length_218_cov_2716.09 512918 218 2352.8348623853212 +NODE_3343_length_217_cov_172.84 50000 217 230.4147465437788 +NODE_3344_length_217_cov_172.833 50000 217 230.4147465437788 +NODE_3345_length_217_cov_172.827 50000 217 230.4147465437788 +NODE_3346_length_217_cov_172.802 50000 217 230.4147465437788 +NODE_3347_length_217_cov_172.648 50000 217 230.4147465437788 +NODE_3348_length_217_cov_172.488 50000 217 230.4147465437788 +NODE_3349_length_217_cov_172.84 50000 217 230.4147465437788 +NODE_334_length_51812_cov_223.179 20620683 51812 397.9904848297692 +NODE_3350_length_216_cov_173.913 50000 216 231.4814814814815 +NODE_3351_length_216_cov_173.901 50000 216 231.4814814814815 +NODE_3352_length_216_cov_173.677 50000 216 231.4814814814815 +NODE_3353_length_216_cov_173.335 50000 216 231.4814814814815 +NODE_3354_length_215_cov_174.994 50000 215 232.5581395348837 +NODE_3355_length_215_cov_174.988 50000 215 232.5581395348837 +NODE_3356_length_215_cov_788.375 26846 215 124.86511627906977 +NODE_3357_length_215_cov_174.606 50000 215 232.5581395348837 +NODE_3358_length_215_cov_440.869 99148 215 461.153488372093 +NODE_3359_length_214_cov_175.377 50000 214 233.6448598130841 +NODE_335_length_51808_cov_224.304 20619424 51808 397.9969116738728 +NODE_3360_length_214_cov_176.038 50000 214 233.6448598130841 +NODE_3361_length_213_cov_177.184 50000 213 234.7417840375587 +NODE_3362_length_213_cov_176.987 49999 213 234.73708920187792 +NODE_3363_length_212_cov_178.229 50000 212 235.8490566037736 +NODE_3364_length_211_cov_460.788 93933 211 445.18009478672985 +NODE_3365_length_211_cov_179.128 50000 211 236.96682464454977 +NODE_3366_length_209_cov_181.805 50000 209 239.23444976076556 +NODE_3367_length_209_cov_181.63 50000 209 239.23444976076556 +NODE_3368_length_209_cov_181.74 50000 209 239.23444976076556 +NODE_3369_length_209_cov_181.468 50000 209 239.23444976076556 +NODE_336_length_51788_cov_224.521 20749323 51788 400.65889781416547 +NODE_3370_length_208_cov_183 50000 208 240.3846153846154 +NODE_3371_length_208_cov_182.882 50000 208 240.3846153846154 +NODE_3372_length_207_cov_209.928 39148 207 189.12077294685992 +NODE_3373_length_206_cov_185.43 50000 206 242.71844660194174 +NODE_3374_length_206_cov_185.43 50000 206 242.71844660194174 +NODE_3375_length_206_cov_185.43 50000 206 242.71844660194174 +NODE_3376_length_206_cov_185.43 50000 206 242.71844660194174 +NODE_3377_length_206_cov_185.841 39877 206 193.57766990291262 +NODE_3378_length_206_cov_185.093 50000 206 242.71844660194174 +NODE_3379_length_206_cov_184.801 50000 206 242.71844660194174 +NODE_337_length_51757_cov_226.722 20688697 51757 399.7275151187279 +NODE_3380_length_206_cov_184.887 50000 206 242.71844660194174 +NODE_3381_length_206_cov_185.351 49999 206 242.7135922330097 +NODE_3382_length_205_cov_185.813 43555 205 212.46341463414635 +NODE_3383_length_204_cov_237.228 43087 204 211.2107843137255 +NODE_3385_length_192_cov_233.971 13216 192 68.83333333333333 +NODE_3386_length_187_cov_360.985 63607 187 340.14438502673795 +NODE_3389_length_180_cov_427.088 68595 180 381.0833333333333 +NODE_338_length_51742_cov_224.505 20650451 51742 399.10422867303157 +NODE_3391_length_175_cov_401.275 58617 175 334.95428571428573 +NODE_3392_length_174_cov_232.597 32916 174 189.17241379310346 +NODE_3393_length_174_cov_234.412 35142 174 201.9655172413793 +NODE_3394_length_172_cov_168.393 27907 172 162.25 +NODE_3395_length_172_cov_1356.68 9902 170 58.247058823529414 +NODE_3396_length_171_cov_430.776 62205 171 363.7719298245614 +NODE_3397_length_171_cov_373.009 35538 171 207.82456140350877 +NODE_3398_length_171_cov_134.267 17290 171 101.11111111111111 +NODE_3399_length_171_cov_672.759 31130 171 182.046783625731 +NODE_339_length_51439_cov_223.868 20549487 51439 399.4923501623282 +NODE_33_length_312266_cov_223.829 124910622 312266 400.0135205241685 +NODE_3400_length_170_cov_221.417 29832 170 175.48235294117646 +NODE_3401_length_164_cov_246.083 31743 164 193.5548780487805 +NODE_3402_length_163_cov_246.241 31645 163 194.14110429447854 +NODE_3403_length_162_cov_222.28 29904 162 184.59259259259258 +NODE_3404_length_160_cov_221.419 25791 160 161.19375 +NODE_3405_length_160_cov_237.962 26276 160 164.225 +NODE_3406_length_157_cov_184.686 16114 157 102.63694267515923 +NODE_3407_length_153_cov_625.765 5062 153 33.08496732026144 +NODE_3409_length_149_cov_226.011 13665 149 91.71140939597315 +NODE_340_length_51279_cov_224.243 20531677 51279 400.39152479572533 +NODE_3410_length_149_cov_213.213 23771 149 159.53691275167785 +NODE_3411_length_148_cov_216.581 1239 147 8.428571428571429 +NODE_3412_length_147_cov_250.967 14185 147 96.49659863945578 +NODE_3413_length_146_cov_110.374 5362 146 36.726027397260275 +NODE_3414_length_142_cov_115.149 10225 142 72.00704225352112 +NODE_3415_length_142_cov_217.471 20528 142 144.56338028169014 +NODE_3416_length_139_cov_224.262 20521 139 147.63309352517985 +NODE_3417_length_139_cov_223.655 9825 139 70.68345323741008 +NODE_3418_length_135_cov_108.3 9785 135 72.48148148148148 +NODE_3419_length_135_cov_219.625 17899 135 132.5851851851852 +NODE_341_length_51138_cov_221.705 20231817 51118 395.7865526820298 +NODE_3420_length_134_cov_365.443 19474 134 145.32835820895522 +NODE_3421_length_133_cov_466.603 18317 133 137.7218045112782 +NODE_3422_length_132_cov_222.299 16006 132 121.25757575757575 +NODE_3423_length_132_cov_408.883 16837 132 127.5530303030303 +NODE_3424_length_130_cov_242.08 19117 130 147.05384615384617 +NODE_3426_length_128_cov_104.616 8420 128 65.78125 +NODE_3428_length_120_cov_413.938 2707 120 22.558333333333334 +NODE_342_length_51052_cov_222.915 20320514 51052 398.0356107498237 +NODE_3431_length_111_cov_234.304 4660 111 41.98198198198198 +NODE_3432_length_111_cov_145.643 2609 111 23.504504504504503 +NODE_3434_length_108_cov_453.679 4916 108 45.51851851851852 +NODE_3437_length_105_cov_45.64 315 105 3.0 +NODE_343_length_50985_cov_225.855 20408205 50985 400.27861135628126 +NODE_344_length_50946_cov_223.815 20349471 50946 399.43216346720055 +NODE_345_length_50565_cov_223.864 20199403 50565 399.4740037575398 +NODE_346_length_50372_cov_226.218 20232701 50372 401.6656277296911 +NODE_347_length_50134_cov_223.729 20000521 50134 398.9412574300874 +NODE_348_length_49823_cov_225.868 19894689 49823 399.30732794091085 +NODE_349_length_49772_cov_223.03 19806987 49762 398.03438366625136 +NODE_34_length_307722_cov_223.803 122936583 307722 399.50534248445024 +NODE_350_length_49771_cov_223.944 19849875 49738 399.08872491857335 +NODE_351_length_49770_cov_230.098 20279556 49762 407.53096740484705 +NODE_352_length_49623_cov_221.382 19587729 49613 394.8104125934735 +NODE_353_length_49405_cov_230.125 19712411 49405 398.9962756805991 +NODE_354_length_48949_cov_224.611 19603162 48949 400.4813581482768 +NODE_355_length_48738_cov_223.63 19449388 48738 399.06003529073826 +NODE_356_length_48710_cov_223.793 19449314 48710 399.28790802709915 +NODE_357_length_48428_cov_224.153 19349531 48428 399.55255224250436 +NODE_358_length_48353_cov_223.696 19299411 48353 399.1357516596695 +NODE_359_length_48284_cov_249.266 19197155 48284 397.5883315383978 +NODE_35_length_289881_cov_223.799 115809965 289881 399.5086432018656 +NODE_360_length_48081_cov_224.875 19221705 48081 399.77756286266924 +NODE_361_length_48012_cov_224.078 19199353 48012 399.8865491960343 +NODE_362_length_47953_cov_222.792 19048706 47953 397.2370028986716 +NODE_363_length_47917_cov_230.106 19143698 47917 399.5178746582632 +NODE_364_length_47912_cov_221.489 18935624 47902 395.29923594004424 +NODE_365_length_47751_cov_224.481 19099579 47751 399.9828066427928 +NODE_366_length_47751_cov_224.179 19099589 47751 399.98301606249083 +NODE_367_length_47503_cov_226.559 18998359 47503 399.9401932509526 +NODE_368_length_47452_cov_223.793 18949423 47452 399.33876338194386 +NODE_369_length_47325_cov_224.486 18897859 47325 399.3208452192287 +NODE_36_length_281779_cov_223.764 112620960 281779 399.6783294709684 +NODE_370_length_47285_cov_223.522 18899613 47285 399.69573860632335 +NODE_371_length_47223_cov_223.183 18801254 47223 398.13764479173284 +NODE_372_length_47211_cov_223.97 18899520 47211 400.32026434517377 +NODE_373_length_47091_cov_223.459 18749438 47091 398.1533201673356 +NODE_374_length_46913_cov_224.22 18774294 46913 400.1938481870697 +NODE_375_length_46831_cov_227.668 18712293 46831 399.5706476479255 +NODE_376_length_46799_cov_223.817 18699587 46799 399.5723626573217 +NODE_377_length_46797_cov_223.762 18699347 46797 399.5843109601043 +NODE_378_length_46791_cov_223.985 18699115 46773 399.7843841532508 +NODE_379_length_46739_cov_224.854 18699472 46739 400.0828430218875 +NODE_37_length_281124_cov_225.682 112272812 281124 399.3711387145886 +NODE_380_length_46602_cov_223.291 18600196 46602 399.1287069224497 +NODE_381_length_46599_cov_223.665 18599456 46599 399.138522285886 +NODE_382_length_46549_cov_223.964 18599455 46549 399.5672302305098 +NODE_383_length_46543_cov_224.963 18667843 46543 401.0880905829018 +NODE_384_length_46525_cov_225 18659807 46525 401.0705427189683 +NODE_385_length_46525_cov_223.912 18599151 46525 399.76681354110696 +NODE_386_length_46386_cov_222.963 18453764 46372 397.9505736220133 +NODE_387_length_46166_cov_223.901 18453024 46166 399.7102629640861 +NODE_388_length_45825_cov_229.639 18670517 45825 407.43081287506817 +NODE_389_length_45773_cov_223.63 18172462 45773 397.0126930723352 +NODE_38_length_280079_cov_224.186 112090780 280079 400.2112975267692 +NODE_390_length_45666_cov_224.645 18248477 45666 399.60751981780754 +NODE_391_length_45656_cov_231.499 18135269 45656 397.2154590853338 +NODE_392_length_45479_cov_223.66 18149436 45479 399.0728907847578 +NODE_393_length_45329_cov_222.185 17911393 45309 395.31644927056436 +NODE_394_length_45318_cov_223.595 17992784 45318 397.0339379496006 +NODE_395_length_45123_cov_224.504 18099504 45123 401.11481949338474 +NODE_396_length_45079_cov_223.641 17999196 45079 399.28117305175357 +NODE_397_length_44998_cov_226.552 17989647 44998 399.78770167563005 +NODE_398_length_44967_cov_223.86 17956027 44967 399.3156537016034 +NODE_399_length_44872_cov_225.987 17906591 44872 399.0593465858442 +NODE_39_length_279050_cov_223.61 111406010 279040 399.2474555619266 +NODE_3_length_1063064_cov_225.095 425755276 1063064 400.49825410323365 +NODE_400_length_44752_cov_224.458 17899496 44752 399.9708616374687 +NODE_401_length_44651_cov_234.238 17849261 44651 399.7505319029809 +NODE_402_length_44535_cov_222.41 17651286 44525 396.4353958450309 +NODE_403_length_44485_cov_225.871 17945978 44485 403.4163875463639 +NODE_404_length_44211_cov_222.259 17523311 44201 396.4460306327911 +NODE_405_length_44052_cov_223.894 17599388 44052 399.51393807318624 +NODE_406_length_43937_cov_222.642 17455105 43927 397.36619846563616 +NODE_407_length_43905_cov_223.167 17499530 43905 398.57715522150096 +NODE_408_length_43802_cov_223.964 17500027 43802 399.52575224875574 +NODE_409_length_43746_cov_223.579 17449427 43746 398.88051478992367 +NODE_40_length_269259_cov_224.279 107886785 269259 400.6803300911019 +NODE_410_length_43742_cov_221.611 17233732 43732 394.0760084148907 +NODE_411_length_43707_cov_223.794 17449547 43707 399.23918365479216 +NODE_412_length_43550_cov_223.725 17399245 43550 399.52342135476465 +NODE_413_length_43526_cov_225.207 17447473 43526 400.8517437853237 +NODE_414_length_43401_cov_225.008 17349552 43401 399.75005184212347 +NODE_415_length_43372_cov_221.558 17136073 43372 395.0952918933874 +NODE_416_length_43176_cov_223.837 17253963 43176 399.6193023902168 +NODE_417_length_42898_cov_229.045 17195768 42898 400.85244067322486 +NODE_418_length_42796_cov_223.688 17061205 42796 398.6635433218058 +NODE_419_length_42554_cov_222.829 16916033 42544 397.61265983452427 +NODE_41_length_259699_cov_224.012 103925884 259699 400.1782217105187 +NODE_420_length_42528_cov_229.604 17049469 42528 400.8998542136945 +NODE_421_length_42395_cov_224.404 16995907 42395 400.89413845972405 +NODE_422_length_42272_cov_224.018 16899461 42272 399.7790736184709 +NODE_423_length_42172_cov_223.741 16762383 42172 397.47659584558477 +NODE_424_length_42092_cov_223.914 16799547 42092 399.1149624631759 +NODE_425_length_41823_cov_225.011 16778589 41823 401.1809052435263 +NODE_426_length_41805_cov_223.636 16699350 41805 399.4581987800502 +NODE_427_length_41772_cov_229.532 16799521 41772 402.17181365508 +NODE_428_length_41752_cov_225.002 16749589 41752 401.16854282429586 +NODE_429_length_41631_cov_224.266 16648928 41631 399.91660061012226 +NODE_42_length_245814_cov_223.764 98225443 245814 399.5925496513624 +NODE_430_length_41450_cov_225.1 16625260 41450 401.091917973462 +NODE_431_length_41429_cov_223.541 16549496 41429 399.4664606917859 +NODE_432_length_41390_cov_225.02 16549954 41390 399.8539260690988 +NODE_433_length_41363_cov_224.243 16549389 41363 400.1012740855354 +NODE_434_length_41336_cov_224.405 16549537 41336 400.3661941165086 +NODE_435_length_41335_cov_223.715 16499455 41335 399.16426756985607 +NODE_436_length_41327_cov_223.843 16499491 41327 399.2424081109202 +NODE_437_length_41325_cov_225.079 16599450 41325 401.68058076225043 +NODE_438_length_41269_cov_223.569 16420476 41269 397.888875427076 +NODE_439_length_41199_cov_223.336 16381602 41199 397.6213500327678 +NODE_43_length_245367_cov_223.921 98097326 245347 399.83095778631895 +NODE_440_length_41000_cov_226.687 16399603 41000 399.99031707317073 +NODE_441_length_40940_cov_224.345 16349656 40940 399.3565217391304 +NODE_442_length_40927_cov_224.262 16452918 40927 402.00645050944365 +NODE_443_length_40838_cov_224.063 16318964 40838 399.6024291101425 +NODE_444_length_40703_cov_223.763 16249727 40703 399.22676461194504 +NODE_445_length_40660_cov_224.023 16249538 40660 399.6443187407772 +NODE_446_length_40631_cov_225.268 16311648 40631 401.4581969432207 +NODE_447_length_40477_cov_224.319 16206645 40477 400.3914568767448 +NODE_448_length_40456_cov_223.76 16149577 40456 399.188674115088 +NODE_449_length_40241_cov_224.536 16099262 40241 400.0711214930046 +NODE_44_length_237090_cov_224.885 94748675 237090 399.6316799527606 +NODE_450_length_40178_cov_223.631 16031168 40178 399.0036338294589 +NODE_451_length_40163_cov_222.151 15914820 40140 396.4828101644245 +NODE_452_length_40063_cov_224.835 16023093 40063 399.94740783266354 +NODE_453_length_40038_cov_223.832 15999494 40038 399.6077226634697 +NODE_454_length_40027_cov_228.378 16022282 40027 400.28685637194894 +NODE_455_length_39932_cov_225.599 15949570 39899 399.74861525351514 +NODE_456_length_39739_cov_223.489 15849479 39739 398.839402098694 +NODE_457_length_39689_cov_223.655 15793693 39689 397.9362795736854 +NODE_458_length_39549_cov_228.363 15830746 39549 400.2818276062606 +NODE_459_length_39510_cov_224.663 15799649 39510 399.8898759807644 +NODE_45_length_236726_cov_224.26 94820122 236726 400.5479837449203 +NODE_460_length_39480_cov_223.884 15774855 39480 399.56572948328267 +NODE_461_length_39428_cov_223.424 15689446 39428 397.92649893476715 +NODE_462_length_39307_cov_220.403 15453301 39297 393.2437845128127 +NODE_463_length_39221_cov_223.675 15649441 39221 399.0066800948471 +NODE_464_length_39204_cov_223.864 15648061 39204 399.1445005611672 +NODE_465_length_39022_cov_224.115 15599290 39022 399.75629132284354 +NODE_466_length_39013_cov_223.365 15538322 39013 398.28575090354497 +NODE_467_length_38859_cov_223.814 15533298 38859 399.7348876708098 +NODE_468_length_38825_cov_225.241 15591620 38825 401.5871216999356 +NODE_469_length_38741_cov_230.552 15449537 38741 398.79035130740044 +NODE_46_length_234447_cov_223.916 93746852 234447 399.8637303953559 +NODE_470_length_38693_cov_224.261 15449744 38693 399.2904142868219 +NODE_471_length_38648_cov_222.629 15350831 38648 397.1959997930035 +NODE_472_length_38376_cov_224.238 15349579 38376 399.97860642067957 +NODE_473_length_38019_cov_225.942 15164510 38019 398.866619321918 +NODE_474_length_37924_cov_225.145 15148950 37924 399.45548992722286 +NODE_475_length_37692_cov_223.646 15022616 37692 398.56245357104956 +NODE_476_length_37440_cov_223.507 14912150 37440 398.2946047008547 +NODE_477_length_37546_cov_225.102 15069652 37546 401.36504554413256 +NODE_478_length_37494_cov_223.527 14949604 37494 398.7199018509628 +NODE_479_length_37429_cov_221.161 14764186 37429 394.4584680328088 +NODE_47_length_231993_cov_230.018 92531609 231993 398.85517666481314 +NODE_480_length_37268_cov_224.144 14899697 37268 399.79867446602987 +NODE_481_length_37227_cov_223.254 14849579 37227 398.89271227872246 +NODE_482_length_37151_cov_224.03 14849561 37151 399.7082447309628 +NODE_483_length_37030_cov_223.86 14799623 37030 399.6657574939238 +NODE_484_length_37029_cov_224.674 14845291 37029 400.90985443841316 +NODE_485_length_36924_cov_223.929 14749702 36924 399.4611093056007 +NODE_486_length_36830_cov_224.438 14743798 36830 400.3203366820527 +NODE_487_length_36766_cov_224.132 14699538 36766 399.81336016972205 +NODE_488_length_36656_cov_224.046 14649711 36656 399.65383566128327 +NODE_489_length_36646_cov_217.054 14187936 36616 387.4791348044571 +NODE_48_length_229132_cov_224.892 91646917 229132 399.97432484332177 +NODE_490_length_36636_cov_228.078 14686221 36636 400.86857189649527 +NODE_491_length_36561_cov_223.852 14599637 36561 399.32269358059136 +NODE_492_length_36519_cov_222.313 14473610 36411 397.5065227541128 +NODE_493_length_36379_cov_227.984 14449414 36379 397.19107177217626 +NODE_494_length_36280_cov_224.081 14499583 36280 399.6577453142227 +NODE_495_length_36188_cov_225.647 14504119 36188 400.7991323090527 +NODE_496_length_36128_cov_217.11 13991823 36108 387.4992522432702 +NODE_497_length_36076_cov_222.693 14308376 36076 396.6175850981262 +NODE_498_length_35991_cov_223.994 14364003 35991 399.0998582979078 +NODE_499_length_35884_cov_219.383 13977042 35854 389.83215261895464 +NODE_49_length_224269_cov_223.325 89381665 224249 398.5822233320996 +NODE_4_length_1031470_cov_223.812 412423419 1030920 400.053756838552 +NODE_500_length_35879_cov_223.241 14299594 35879 398.5505170155244 +NODE_501_length_35878_cov_225.395 14357001 35878 400.16168682758234 +NODE_502_length_35834_cov_223.813 14299674 35834 399.05324552101354 +NODE_503_length_35769_cov_224.622 14299596 35769 399.77623081439236 +NODE_504_length_35690_cov_224.518 14249659 35690 399.26195012608576 +NODE_505_length_35584_cov_224.256 14233555 35584 399.99873538669067 +NODE_506_length_35545_cov_236.663 14196484 35545 399.3946827964552 +NODE_507_length_35527_cov_224.416 14194443 35527 399.53958960790385 +NODE_508_length_35494_cov_223.482 14149533 35494 398.645771116245 +NODE_509_length_35463_cov_224.708 14208033 35463 400.64385415785466 +NODE_50_length_219083_cov_222.891 87143987 219063 397.80331228915884 +NODE_510_length_35399_cov_229.775 14332692 35399 404.8897426480974 +NODE_511_length_35391_cov_224.091 14149600 35391 399.8078607555593 +NODE_512_length_35276_cov_223.081 14049518 35276 398.27412405034585 +NODE_513_length_35068_cov_227.215 14053426 35068 400.7478612980495 +NODE_514_length_34789_cov_225.404 13906217 34789 399.73028830952313 +NODE_515_length_34654_cov_224.086 13849544 34654 399.65210365325794 +NODE_516_length_34607_cov_223.365 13799483 34607 398.7483168145173 +NODE_517_length_34487_cov_224.058 13726880 34487 398.0305622408444 +NODE_518_length_34396_cov_223.854 13749554 34396 399.742818932434 +NODE_519_length_34244_cov_227.534 13649689 34244 398.60089358719773 +NODE_51_length_210346_cov_223.865 84097533 210346 399.80571534519316 +NODE_520_length_34209_cov_224.178 13695886 34209 400.35914525417286 +NODE_521_length_34044_cov_223.823 13599567 34044 399.4703031371167 +NODE_522_length_34032_cov_223.004 13549580 34032 398.14233662435356 +NODE_523_length_33830_cov_223.75 13499726 33830 399.04599467927875 +NODE_524_length_33810_cov_223.657 13499697 33810 399.28118899733806 +NODE_525_length_33771_cov_223.545 13472966 33771 398.95075656628467 +NODE_526_length_33714_cov_224.157 13464750 33714 399.3815625556149 +NODE_527_length_33590_cov_224.371 13449706 33590 400.40803810657934 +NODE_528_length_33539_cov_224.007 13399483 33516 399.7936209571548 +NODE_529_length_33538_cov_223.751 13399816 33538 399.54129643985925 +NODE_52_length_209828_cov_223.672 83793614 209828 399.3442915149551 +NODE_530_length_33523_cov_223.886 13399531 33523 399.711571160099 +NODE_531_length_33361_cov_224.764 13277036 33361 397.9807559725428 +NODE_532_length_33326_cov_222.532 13234752 33326 397.12992858428856 +NODE_533_length_33283_cov_227.776 13303656 33283 399.71324700297447 +NODE_534_length_33223_cov_223.24 13249743 33223 398.8123589079854 +NODE_535_length_33160_cov_230.669 13180623 33160 397.485615199035 +NODE_536_length_33147_cov_222.755 13153477 33147 396.82254804356353 +NODE_537_length_33123_cov_223.43 13199644 33123 398.5038794795157 +NODE_538_length_33040_cov_223.975 13199632 33040 399.5046004842615 +NODE_539_length_33024_cov_227.026 13146325 33024 398.0839692344961 +NODE_53_length_209805_cov_225.011 83857246 209805 399.69136102571434 +NODE_540_length_32958_cov_223.706 13149650 32958 398.9820377450088 +NODE_541_length_32810_cov_222.24 13003021 32800 396.4335670731707 +NODE_542_length_32754_cov_227.176 13099551 32754 399.93744275508334 +NODE_543_length_32725_cov_223.618 13049527 32725 398.76323911382735 +NODE_544_length_32651_cov_224.463 13063638 32651 400.0991700101069 +NODE_545_length_32650_cov_228.218 13149577 32650 402.7435528330781 +NODE_546_length_32567_cov_223.761 12999566 32567 399.16375472103664 +NODE_547_length_32507_cov_220.701 12788615 32497 393.53217220051084 +NODE_548_length_32434_cov_223.352 12918882 32434 398.312943207745 +NODE_549_length_32408_cov_224.051 12949719 32408 399.58402246358924 +NODE_54_length_207642_cov_224.315 83149350 207642 400.44571907417577 +NODE_550_length_32401_cov_223.806 12949639 32401 399.6678806209685 +NODE_551_length_32320_cov_223.376 12899429 32320 399.1159962871287 +NODE_552_length_32292_cov_223.999 12899645 32292 399.4687538709278 +NODE_553_length_32231_cov_223.535 12849754 32231 398.6768638888027 +NODE_554_length_32224_cov_231.493 12941734 32224 401.6178624627607 +NODE_555_length_32039_cov_223.862 12799691 32039 399.5034489216268 +NODE_556_length_31996_cov_223.372 12749515 31996 398.4721527690961 +NODE_557_length_31813_cov_224.192 12721131 31813 399.8720963128281 +NODE_558_length_31793_cov_223.378 12598045 31793 396.2521624256912 +NODE_559_length_31759_cov_224.245 12699649 31759 399.8755943197204 +NODE_55_length_204393_cov_224.005 81814006 204392 400.27988375278875 +NODE_560_length_31720_cov_222.363 12545398 31720 395.50435056746534 +NODE_561_length_31699_cov_223.728 12649615 31699 399.05407110634405 +NODE_562_length_31692_cov_223.806 12649466 31692 399.13751104379656 +NODE_563_length_31692_cov_223.744 12649657 31692 399.14353780133786 +NODE_564_length_31615_cov_225.278 12649766 31615 400.1191206705678 +NODE_565_length_31578_cov_224.597 12599734 31578 399.00354677306984 +NODE_566_length_31562_cov_223.788 12599768 31562 399.20689436664344 +NODE_567_length_31552_cov_224.406 12599633 31552 399.32913919878297 +NODE_568_length_31495_cov_223.981 12579561 31495 399.4145419907922 +NODE_569_length_31461_cov_225.162 12649752 31461 402.07723848574426 +NODE_56_length_204156_cov_221.641 80846316 204116 396.0802484861549 +NODE_570_length_31293_cov_225.958 12568385 31293 401.63566931901704 +NODE_571_length_31149_cov_224.05 12449750 31149 399.68377797040034 +NODE_572_length_31027_cov_226.326 12387111 31027 399.23650369033425 +NODE_573_length_31017_cov_223.984 12399718 31017 399.7716735983493 +NODE_574_length_31013_cov_223.898 12399450 31013 399.8145938799858 +NODE_575_length_30840_cov_223.36 12298244 30840 398.7757457846952 +NODE_576_length_30757_cov_226.755 12211095 30757 397.0184023149202 +NODE_577_length_30719_cov_223.484 12249314 30719 398.7536703668739 +NODE_578_length_30713_cov_223.677 12249788 30713 398.84700289779573 +NODE_579_length_30709_cov_226.951 12405050 30709 403.95486665147024 +NODE_57_length_203554_cov_224.112 81429483 203498 400.1488122733393 +NODE_580_length_30528_cov_223.929 12199637 30528 399.621232966457 +NODE_581_length_30527_cov_219.764 11892728 30517 389.70829373791656 +NODE_582_length_30497_cov_223.001 12112913 30497 397.18375577925696 +NODE_583_length_30460_cov_223.687 12149632 30460 398.87170059093893 +NODE_584_length_30457_cov_223.707 12149721 30457 398.91391141609483 +NODE_585_length_30452_cov_225.057 12217785 30452 401.2145343491396 +NODE_586_length_30392_cov_223.059 12082825 30392 397.565971308239 +NODE_587_length_30368_cov_226.113 12238936 30368 403.0208113804004 +NODE_588_length_30331_cov_223.742 12099693 30331 398.92166430384754 +NODE_589_length_30329_cov_220.806 11960221 30309 394.60955491768124 +NODE_58_length_202143_cov_224.131 80763878 202143 399.53833672202353 +NODE_590_length_30245_cov_224.27 12097895 30245 399.9965283517937 +NODE_591_length_30184_cov_222.65 11999483 30184 397.5444937715346 +NODE_592_length_30157_cov_224.085 12049675 30157 399.5647776635607 +NODE_593_length_30110_cov_224.839 11955402 30110 397.0575224178014 +NODE_594_length_29986_cov_223.496 11949676 29986 398.50850396851865 +NODE_595_length_29939_cov_224.418 11951518 29939 399.19563111660375 +NODE_596_length_29867_cov_224.529 11913429 29867 398.8826798808049 +NODE_597_length_29835_cov_223.537 11899737 29835 398.8515837104072 +NODE_598_length_29761_cov_224.261 11899691 29761 399.8417727898928 +NODE_599_length_29731_cov_223.402 11849476 29731 398.5562544145841 +NODE_59_length_198185_cov_219.307 77618749 198051 391.9129365668439 +NODE_5_length_937195_cov_225.122 375174896 937195 400.3167921297062 +NODE_600_length_29684_cov_223.925 11849665 29684 399.1936733593855 +NODE_601_length_29601_cov_224.309 11821798 29601 399.37157528461876 +NODE_602_length_29501_cov_224.31 11799682 29501 399.9756618419715 +NODE_603_length_29389_cov_223.822 11749608 29389 399.7961141923849 +NODE_604_length_29209_cov_224.862 11712234 29209 400.9803142866925 +NODE_605_length_29170_cov_223.348 11596160 29170 397.5371957490573 +NODE_606_length_29108_cov_224.46 11649153 29108 400.20451422289403 +NODE_607_length_29106_cov_223.435 11599687 29106 398.5325018896447 +NODE_608_length_29062_cov_223.865 11599753 29062 399.1381529144588 +NODE_609_length_29061_cov_221.315 11485860 29061 395.2327862083204 +NODE_60_length_197785_cov_225.075 79240766 197785 400.64092828070886 +NODE_610_length_29061_cov_223.623 11599675 29061 399.1492033997454 +NODE_611_length_29050_cov_222.795 11549609 29050 397.5769018932874 +NODE_612_length_28999_cov_221.892 11477126 28999 395.7766129866547 +NODE_613_length_28988_cov_223.467 11549650 28988 398.42866013522837 +NODE_614_length_28956_cov_223.523 11549753 28956 398.87253073628955 +NODE_615_length_28864_cov_223.364 11499753 28864 398.4116200110865 +NODE_616_length_28863_cov_224.417 11548473 28863 400.113397775699 +NODE_617_length_28792_cov_223.766 11499454 28792 399.3975409836066 +NODE_618_length_28687_cov_223.818 11449735 28687 399.1262592812075 +NODE_619_length_28686_cov_223.687 11399661 28686 397.39458272327965 +NODE_61_length_195291_cov_224.121 78154875 195291 400.1970136872667 +NODE_620_length_28619_cov_225.058 11429576 28619 399.37020860267654 +NODE_621_length_28493_cov_223.194 11329361 28493 397.61909942792965 +NODE_622_length_28317_cov_223.802 11299773 28317 399.0455556732705 +NODE_623_length_28306_cov_225.41 11354060 28306 401.11849077934005 +NODE_624_length_28289_cov_224.022 11299679 28289 399.4372017391919 +NODE_625_length_28271_cov_224.276 11299720 28271 399.69297159633544 +NODE_626_length_28212_cov_221.643 11150329 28202 395.373696900929 +NODE_627_length_28061_cov_223.832 11199602 28061 399.11628238480455 +NODE_628_length_28046_cov_223.626 11194874 28046 399.1611638023248 +NODE_629_length_28018_cov_224.18 11199673 28018 399.7313512741809 +NODE_62_length_193756_cov_223.972 77447498 193756 399.7166436136171 +NODE_630_length_27886_cov_223.839 11149632 27886 399.8290181453059 +NODE_631_length_27781_cov_223.871 11099754 27781 399.54479680357076 +NODE_632_length_27682_cov_223.892 11049707 27682 399.165775594249 +NODE_633_length_27677_cov_220.986 10879349 27677 393.0826679192109 +NODE_634_length_27676_cov_223.689 11035260 27676 398.7303078479549 +NODE_635_length_27632_cov_224.294 11049769 27626 399.9771591978571 +NODE_636_length_27616_cov_237.593 11049619 27616 400.11656286210894 +NODE_637_length_27598_cov_223.502 10999729 27598 398.5697876657729 +NODE_638_length_27557_cov_224.043 11001420 27557 399.22415357259496 +NODE_639_length_27496_cov_223.367 10949575 27476 398.5141578104528 +NODE_63_length_192114_cov_222.758 76247201 192084 396.94717415297475 +NODE_640_length_27483_cov_224.151 10977817 27483 399.44027216824946 +NODE_641_length_27461_cov_224.479 10999763 27461 400.55944794435743 +NODE_642_length_27444_cov_223.79 10949624 27444 398.9806150706894 +NODE_643_length_27372_cov_223.352 10899787 27372 398.209374543329 +NODE_644_length_27347_cov_223.494 10899683 27347 398.56960544118186 +NODE_645_length_27318_cov_227.223 10895762 27318 398.84918368841056 +NODE_646_length_27313_cov_223.83 10899698 27313 399.06630542232637 +NODE_647_length_27182_cov_221.552 10737275 27172 395.1595392315619 +NODE_648_length_27170_cov_223.978 10849764 27170 399.3288185498712 +NODE_649_length_27102_cov_223.286 10799802 27102 398.48727031215407 +NODE_64_length_191597_cov_223.888 76599715 191597 399.79600411279927 +NODE_650_length_27090_cov_223.408 10799630 27090 398.6574381690661 +NODE_651_length_27082_cov_223.682 10799756 27082 398.77985377741675 +NODE_652_length_27061_cov_224.953 10849640 27061 400.9327075865637 +NODE_653_length_26988_cov_223.426 10749613 26988 398.3108418556395 +NODE_654_length_26979_cov_221.978 10671137 26979 395.53493457874646 +NODE_655_length_26832_cov_223.596 10699597 26832 398.7625596302922 +NODE_656_length_26756_cov_224.129 10699511 26756 399.8920242188668 +NODE_657_length_26705_cov_227.735 10722867 26705 401.53031267552893 +NODE_658_length_26542_cov_234.014 10612883 26542 399.85242257554063 +NODE_659_length_26506_cov_224.274 10599621 26506 399.895155813778 +NODE_65_length_190222_cov_224.248 76143122 190222 400.2855715952939 +NODE_660_length_26425_cov_223.769 10549707 26425 399.23205298013244 +NODE_661_length_26340_cov_218.829 10274464 26330 390.2189137865553 +NODE_662_length_26159_cov_219.895 10253438 26159 391.96597729270997 +NODE_663_length_26151_cov_223.531 10449849 26151 399.59653550533443 +NODE_664_length_26131_cov_257.786 10719199 26131 410.21005702039724 +NODE_665_length_26066_cov_223.846 10399663 26066 398.9742576536484 +NODE_666_length_25903_cov_224.278 10380607 25903 400.7492182372698 +NODE_667_length_25997_cov_223.04 10349765 25997 398.1138208254799 +NODE_668_length_25875_cov_225.639 10348754 25875 399.95184541062804 +NODE_669_length_25849_cov_222.833 10247578 25849 396.4400170219351 +NODE_66_length_188316_cov_222.886 74927812 188296 397.92567022135364 +NODE_670_length_25785_cov_226.564 10342812 25785 401.1173938336242 +NODE_671_length_25779_cov_243.196 11019132 25779 427.44606074711976 +NODE_672_length_25739_cov_225.041 10318492 25739 400.88938964217726 +NODE_673_length_25618_cov_223.34 10199716 25618 398.14645952064956 +NODE_674_length_25598_cov_223.537 10199741 25598 398.45851238378 +NODE_675_length_25567_cov_224.508 10220819 25567 399.76606563147806 +NODE_676_length_25494_cov_223.355 10149832 25494 398.1263042284459 +NODE_677_length_25454_cov_223.708 10149733 25454 398.74805531547105 +NODE_678_length_25328_cov_221.086 9981949 25318 394.2629354609369 +NODE_679_length_25283_cov_224.139 10099661 25283 399.46450183918046 +NODE_67_length_187178_cov_223.355 74660148 187178 398.8724529592153 +NODE_680_length_25255_cov_224.007 10099575 25255 399.9039794100178 +NODE_681_length_25202_cov_223.522 10049733 25202 398.76728037457343 +NODE_682_length_25196_cov_223.862 10049676 25196 398.8599777742499 +NODE_683_length_25147_cov_224.201 10049686 25147 399.6375710820376 +NODE_684_length_25131_cov_221.888 9917001 25121 394.7693563154333 +NODE_685_length_25101_cov_224.407 9999737 25101 398.38002470021115 +NODE_686_length_25095_cov_223.919 10006426 25095 398.7418210798964 +NODE_687_length_25093_cov_223.505 9999617 25093 398.5022516239589 +NODE_688_length_25085_cov_226.192 9981625 25085 397.91209886386287 +NODE_689_length_25054_cov_225.691 9968957 25054 397.89881855192783 +NODE_68_length_186486_cov_223.86 74547977 186486 399.7510644230666 +NODE_690_length_25040_cov_223.836 9999745 25040 399.350838658147 +NODE_691_length_25035_cov_222.001 9882320 25035 394.740163770721 +NODE_692_length_24992_cov_224.936 10050052 24992 402.13076184379 +NODE_693_length_24899_cov_223.872 9949580 24899 399.59757419976705 +NODE_694_length_24888_cov_224.291 9949740 24888 399.78061716489873 +NODE_695_length_24865_cov_229.454 9938424 24865 399.69531469937664 +NODE_696_length_24848_cov_224.623 9900872 24848 398.4575016097875 +NODE_697_length_24844_cov_219.562 9578951 24834 385.71921559152776 +NODE_698_length_24834_cov_223.613 9899803 24834 398.63908351453654 +NODE_699_length_24800_cov_224.885 9949764 24800 401.20016129032257 +NODE_69_length_185516_cov_224.263 74379788 185516 400.9346255848552 +NODE_6_length_801699_cov_224.213 320898043 801699 400.27247508104665 +NODE_700_length_24779_cov_226.317 10000593 24779 403.5914685822672 +NODE_701_length_24673_cov_224.026 9849714 24673 399.21022980586065 +NODE_702_length_24455_cov_225.858 9770418 24455 399.5263954201595 +NODE_703_length_24356_cov_223.111 9699733 24356 398.24819346362295 +NODE_704_length_24269_cov_228.214 9699421 24269 399.66298570192424 +NODE_705_length_24226_cov_223.475 9649751 24226 398.32209196730787 +NODE_706_length_24158_cov_224.08 9649749 24158 399.4432072191407 +NODE_707_length_24118_cov_224.221 9649615 24118 400.10013268098515 +NODE_708_length_24100_cov_223.241 9599724 24100 398.32879668049793 +NODE_709_length_24011_cov_226.032 9598709 24011 399.76298363250174 +NODE_70_length_184853_cov_224.33 73924308 184853 399.90861928126674 +NODE_710_length_24008_cov_224.059 9599756 24008 399.8565478173942 +NODE_711_length_24001_cov_225.132 9611971 24001 400.4821049122953 +NODE_712_length_23892_cov_224.021 9549691 23892 399.7024527038339 +NODE_713_length_23873_cov_222.819 9499672 23873 397.9253550035605 +NODE_714_length_23871_cov_224.574 9499777 23871 397.9630932931172 +NODE_715_length_23854_cov_223.29 9499740 23854 398.2451580447724 +NODE_716_length_23724_cov_223.344 9449666 23724 398.3167256786377 +NODE_717_length_23678_cov_222.583 9399650 23678 396.97820761888676 +NODE_718_length_23674_cov_224.151 9422084 23674 397.99290360733295 +NODE_719_length_23566_cov_220.922 9278421 23546 394.05508366601543 +NODE_71_length_184044_cov_223.493 73385245 184024 398.7808383689084 +NODE_720_length_23546_cov_223.696 9399705 23546 399.2060222543107 +NODE_721_length_23534_cov_230.512 9742853 23534 413.99052434775217 +NODE_722_length_23463_cov_224.726 9399983 23463 400.63005583258746 +NODE_723_length_23423_cov_222.337 9288017 23423 396.5340477308628 +NODE_724_length_23414_cov_225.026 9399782 23414 401.4598957888443 +NODE_725_length_23394_cov_224.265 9349783 23394 399.6658544926049 +NODE_726_length_23390_cov_224.309 9349713 23390 399.7312099187687 +NODE_727_length_23382_cov_224.167 9349783 23382 399.87096912154647 +NODE_728_length_23364_cov_224.502 9349719 23364 400.17629686697484 +NODE_729_length_23349_cov_224.361 9300221 23349 398.31346096192556 +NODE_72_length_183241_cov_223.107 73041880 183241 398.61100954480713 +NODE_730_length_23342_cov_223.337 9299706 23342 398.4108473995373 +NODE_731_length_23309_cov_223.896 9299785 23309 398.9782916470033 +NODE_732_length_23246_cov_223.543 9249674 23246 397.9038974447217 +NODE_733_length_23232_cov_223.574 9249658 23232 398.14299242424244 +NODE_734_length_23174_cov_223.91 9249678 23174 399.1403296798136 +NODE_735_length_23126_cov_224.307 9249729 23126 399.9709850384848 +NODE_736_length_23116_cov_225.571 9299753 23116 402.30805502682125 +NODE_737_length_23102_cov_223.4 9199798 23102 398.225175309497 +NODE_738_length_23055_cov_225.51 9248646 23055 401.1557579700716 +NODE_739_length_22919_cov_223.623 9149668 22919 399.217592390593 +NODE_73_length_181041_cov_225.016 72243273 181041 399.04371385487264 +NODE_740_length_22897_cov_223.222 9093201 22897 397.1350395248286 +NODE_741_length_22888_cov_224.086 9149725 22888 399.76079168123033 +NODE_742_length_22867_cov_223.452 9105497 22867 398.1937726855294 +NODE_743_length_22861_cov_223.349 9099739 22861 398.04641091815756 +NODE_744_length_22737_cov_226.512 9146895 22737 402.29119936667104 +NODE_745_length_22653_cov_224.97 9093389 22653 401.42095969628747 +NODE_746_length_22552_cov_223.963 8999732 22552 399.06580347641005 +NODE_747_length_22542_cov_223.763 8999735 22542 399.24296868068495 +NODE_748_length_22540_cov_224.06 8999668 22540 399.275421472937 +NODE_749_length_22502_cov_224.093 8999962 22502 399.9627588658786 +NODE_74_length_177686_cov_228.138 70970121 177686 399.4131276521504 +NODE_750_length_22438_cov_223.81 8949805 22438 398.8682146358856 +NODE_751_length_22408_cov_224.117 8949698 22408 399.39744734023566 +NODE_752_length_22401_cov_224.195 8949790 22401 399.52636043033795 +NODE_753_length_22367_cov_223.299 8900521 22367 397.93092502347207 +NODE_754_length_22364_cov_225.262 8938746 22364 399.69352530853155 +NODE_755_length_22326_cov_223.61 8899874 22326 398.63271521992294 +NODE_756_length_22294_cov_224.944 8927618 22294 400.44935857181304 +NODE_757_length_22290_cov_224.515 8914001 22290 399.9103185284881 +NODE_758_length_22203_cov_223.614 8849786 22203 398.585146151421 +NODE_759_length_22187_cov_223.345 8849755 22187 398.8711858295398 +NODE_75_length_176226_cov_225.289 70652205 176226 400.91816758026624 +NODE_760_length_22058_cov_226.084 8798136 22058 398.86372291232203 +NODE_761_length_22045_cov_224.961 8832116 22045 400.6403266046723 +NODE_762_length_22039_cov_233.591 8804050 22039 399.4759290348927 +NODE_763_length_22004_cov_224.962 8798898 22004 399.87720414470095 +NODE_764_length_21923_cov_224.003 8749803 21923 399.11522145691737 +NODE_765_length_21823_cov_223.504 8699722 21823 398.649223296522 +NODE_766_length_21789_cov_223.741 8699747 21789 399.27243104318694 +NODE_767_length_21773_cov_224.856 8699714 21773 399.5643227851008 +NODE_768_length_21746_cov_224.168 8649664 21746 397.75885220270396 +NODE_769_length_21730_cov_223.359 8649700 21730 398.0533824206167 +NODE_76_length_176017_cov_223.651 70266096 176007 399.22330361860605 +NODE_770_length_21695_cov_228.02 8653794 21695 398.8842590458631 +NODE_771_length_21597_cov_223.21 8599702 21597 398.1896559707367 +NODE_772_length_21572_cov_223.741 8599830 21572 398.65705544223994 +NODE_773_length_21561_cov_224.882 8558201 21561 396.92968786234405 +NODE_774_length_21541_cov_223.801 8599752 21541 399.22714822895875 +NODE_775_length_21519_cov_222.882 8536002 21519 396.6728007807054 +NODE_776_length_21488_cov_223.323 8549774 21488 397.8859828741623 +NODE_777_length_21458_cov_223.415 8549694 21458 398.438531083978 +NODE_778_length_21456_cov_224.915 8599751 21456 400.8086782252051 +NODE_779_length_21391_cov_224.871 8567782 21391 400.53209293628163 +NODE_77_length_171623_cov_223.292 68350797 171613 398.28449476438266 +NODE_780_length_21335_cov_223.449 8499697 21335 398.3921724865245 +NODE_781_length_21318_cov_223.342 8500023 21318 398.72516183506895 +NODE_782_length_21288_cov_224.101 8499645 21288 399.26930665163474 +NODE_783_length_21193_cov_223.521 8449722 21193 398.7034398150333 +NODE_784_length_21189_cov_225.753 8501295 21189 401.2126575109727 +NODE_785_length_21186_cov_223.856 8449739 21186 398.83597658831303 +NODE_786_length_21186_cov_224.536 8400082 21186 396.4921174360427 +NODE_787_length_21170_cov_223.864 8449764 21170 399.1385923476618 +NODE_788_length_21100_cov_231.448 8404468 21100 398.316018957346 +NODE_789_length_21087_cov_220.702 8292228 21087 393.2388675487267 +NODE_78_length_171012_cov_227.789 68361779 171012 399.74843285851284 +NODE_790_length_21076_cov_226.397 8493506 21076 402.99421142531787 +NODE_791_length_20880_cov_224.4 8349804 20874 400.0097729232538 +NODE_792_length_20826_cov_223.118 8299757 20826 398.5286180735619 +NODE_793_length_20821_cov_223.757 8299795 20821 398.6261466788339 +NODE_794_length_20730_cov_223.347 8249751 20730 397.96193921852387 +NODE_795_length_20722_cov_223.422 8249623 20722 398.10940063700417 +NODE_796_length_20700_cov_223.355 8249920 20700 398.54685990338163 +NODE_797_length_20679_cov_224.159 8249618 20679 398.936989216113 +NODE_798_length_20627_cov_224.287 8249711 20627 399.94720511950356 +NODE_799_length_20620_cov_222.681 8167607 20620 396.10121241513093 +NODE_79_length_170363_cov_223.487 67974708 170363 398.99924279332953 +NODE_7_length_734119_cov_224.107 293695243 734119 400.06489819770366 +NODE_800_length_20601_cov_225.858 8291705 20601 402.4904130867434 +NODE_801_length_20573_cov_223.489 8199793 20573 398.5706022456618 +NODE_802_length_20527_cov_223.967 8199818 20518 399.64021834486795 +NODE_803_length_20521_cov_224.037 8199922 20521 399.58686223868233 +NODE_804_length_20497_cov_223.192 8149776 20497 397.60823535151485 +NODE_805_length_20420_cov_223.812 8149773 20420 399.10739471106757 +NODE_806_length_20398_cov_255.439 8491240 20398 416.2780664771056 +NODE_807_length_20347_cov_223.646 8087588 20347 397.4830687570649 +NODE_808_length_20340_cov_221.192 8019635 20330 394.4729463846532 +NODE_809_length_20303_cov_231.165 8074210 20303 397.68556370979655 +NODE_80_length_169648_cov_223.265 67578753 169648 398.34688885221163 +NODE_810_length_20300_cov_223.556 8099686 20300 398.99931034482756 +NODE_811_length_20164_cov_222.5 7999821 20164 396.73780003967465 +NODE_812_length_20150_cov_223.359 8023707 20150 398.19885856079406 +NODE_813_length_20148_cov_226.551 8146953 20148 404.35541989279335 +NODE_814_length_20107_cov_224.424 8023288 20107 399.02959168448797 +NODE_815_length_20074_cov_223.514 7999763 20074 398.5136494968616 +NODE_816_length_20050_cov_223.676 7999809 20050 398.99296758104737 +NODE_817_length_20030_cov_240.052 7905065 20030 394.66125811283075 +NODE_818_length_20021_cov_224.263 7999724 20021 399.56665501223716 +NODE_819_length_20001_cov_224.891 7999965 20001 399.97825108744564 +NODE_81_length_168253_cov_224.198 67318638 168243 400.12742283482817 +NODE_820_length_19988_cov_224.76 7999628 19988 400.22153291975184 +NODE_821_length_19962_cov_224.857 7949778 19962 398.2455665764953 +NODE_822_length_19959_cov_224.526 7949700 19950 398.4812030075188 +NODE_823_length_19878_cov_224.442 7949792 19878 399.92916792433846 +NODE_824_length_19855_cov_223.317 7899680 19855 397.8685469654999 +NODE_825_length_19741_cov_225.834 7949713 19741 402.7006230687402 +NODE_826_length_19724_cov_221.671 7790883 19724 394.9950821334415 +NODE_827_length_19689_cov_223.693 7849921 19689 398.6957692112347 +NODE_828_length_19681_cov_224.517 7858213 19681 399.27915248208933 +NODE_829_length_19645_cov_226.439 7907402 19645 402.5147365741919 +NODE_82_length_165793_cov_224.349 66355589 165793 400.2315477734283 +NODE_830_length_19628_cov_224.476 7830505 19628 398.945638883228 +NODE_831_length_19622_cov_224.079 7849743 19622 400.048058301906 +NODE_832_length_19537_cov_224.876 7699724 19537 394.10984286226136 +NODE_833_length_19491_cov_225.785 7826216 19491 401.52973167102766 +NODE_834_length_19486_cov_224.041 7749766 19486 397.7094324130145 +NODE_835_length_19474_cov_223.346 7749752 19474 397.95378453322377 +NODE_836_length_19460_cov_223.25 7749774 19460 398.24121274409043 +NODE_837_length_19441_cov_224.054 7745961 19441 398.43428835965227 +NODE_838_length_19431_cov_223.896 7749695 19431 398.8315063558232 +NODE_839_length_19405_cov_219.587 7592866 19405 391.2839989693378 +NODE_83_length_162517_cov_224.639 64963668 162517 399.7346000726078 +NODE_840_length_19307_cov_223.887 7699811 19307 398.8092919666442 +NODE_841_length_19293_cov_225.102 7700041 19293 399.11061006582696 +NODE_842_length_19283_cov_223.735 7699847 19283 399.3075247627444 +NODE_843_length_19251_cov_223.118 7632706 19251 396.4836112409745 +NODE_844_length_19218_cov_223.254 7649763 19218 398.05198251639086 +NODE_845_length_19206_cov_227.545 7779926 19206 405.077892325315 +NODE_846_length_19174_cov_223.888 7649883 19174 398.9716804005424 +NODE_847_length_19168_cov_224.404 7655702 19168 399.40014607679467 +NODE_848_length_19148_cov_224.288 7649754 19148 399.5066847712555 +NODE_849_length_19121_cov_222.961 7599843 19121 397.4605407666963 +NODE_84_length_161314_cov_226.738 64518132 161314 399.95370519607724 +NODE_850_length_19119_cov_223.16 7584827 19119 396.716721585857 +NODE_851_length_19108_cov_227.784 7793411 19108 407.86115763031194 +NODE_852_length_19094_cov_223.229 7599782 19094 398.01937781502045 +NODE_853_length_19086_cov_223.595 7599959 19086 398.1954836005449 +NODE_854_length_19014_cov_224.497 7599804 19014 399.69517197854213 +NODE_855_length_18971_cov_223.027 7549777 18971 397.9641031047388 +NODE_856_length_18936_cov_223.835 7549755 18936 398.6985107731305 +NODE_857_length_18919_cov_228.698 7543759 18919 398.739838257836 +NODE_858_length_18916_cov_222.568 7499739 18916 396.475946288856 +NODE_859_length_18902_cov_220.859 7435804 18892 393.5953842896464 +NODE_85_length_159391_cov_223.033 63455180 159381 398.13516040180446 +NODE_860_length_18878_cov_224.203 7556781 18878 400.29563513084014 +NODE_861_length_18843_cov_223.786 7501818 18843 398.12227352332434 +NODE_862_length_18838_cov_223.334 7499651 18838 398.11291007537955 +NODE_863_length_18820_cov_218.501 7315618 18810 388.9217437533227 +NODE_864_length_18739_cov_223.226 7449761 18739 397.55381824003416 +NODE_865_length_18737_cov_225.72 7515922 18737 401.12728825318885 +NODE_866_length_18707_cov_224.291 7458296 18707 398.6901159993585 +NODE_867_length_18599_cov_223.394 7399784 18599 397.8592397440723 +NODE_868_length_18582_cov_224.011 7399781 18582 398.22306533204176 +NODE_869_length_18577_cov_223.991 7399822 18577 398.3324541099209 +NODE_86_length_158470_cov_224.398 63291384 158470 399.3903199343724 +NODE_870_length_18570_cov_223.696 7399778 18570 398.4802369413032 +NODE_871_length_18485_cov_223.995 7349738 18439 398.5974293616791 +NODE_872_length_18483_cov_224.062 7361102 18483 398.2633771573879 +NODE_873_length_18458_cov_224.338 7255082 18448 393.2720078057242 +NODE_874_length_18438_cov_223.81 7349770 18438 398.6207831652023 +NODE_875_length_18427_cov_223.97 7349910 18427 398.86633743962665 +NODE_876_length_18425_cov_223.606 7349692 18425 398.89780189959293 +NODE_877_length_18420_cov_224.406 7349748 18420 399.00912052117263 +NODE_878_length_18417_cov_224.05 7349838 18417 399.0790030949666 +NODE_879_length_18406_cov_225.52 7399798 18406 402.03183744431163 +NODE_87_length_157504_cov_224.253 62933569 157463 399.67210709817545 +NODE_880_length_18405_cov_224.236 7349840 18405 399.3393099701168 +NODE_881_length_18377_cov_224.329 7349792 18377 399.9451488273385 +NODE_882_length_18352_cov_223.407 7297944 18352 397.6647776809067 +NODE_883_length_18330_cov_225.11 7349823 18330 400.9723404255319 +NODE_884_length_18326_cov_223.708 7299714 18326 398.32554840117865 +NODE_885_length_18318_cov_223.594 7299795 18318 398.50393056010483 +NODE_886_length_18286_cov_224.152 7299861 18286 399.2049108607678 +NODE_887_length_18268_cov_224.083 7299748 18268 399.5920735712722 +NODE_888_length_18246_cov_223.094 7249818 18246 397.33738901677077 +NODE_889_length_18240_cov_226.375 7203554 18240 394.93168859649126 +NODE_88_length_156468_cov_224.271 62643054 156468 400.35696755886187 +NODE_890_length_18209_cov_223.429 7249846 18209 398.146301279587 +NODE_891_length_18204_cov_223.594 7249777 18204 398.25186772137994 +NODE_892_length_18195_cov_224.929 7299788 18195 401.1974718329211 +NODE_893_length_18155_cov_225.512 7299819 18155 402.0831175984577 +NODE_894_length_18141_cov_222.875 7199847 18141 396.8825864064826 +NODE_895_length_18096_cov_220.232 7099348 18086 392.53278779166203 +NODE_896_length_18072_cov_223.704 7199798 18072 398.3951969898185 +NODE_897_length_18049_cov_224.011 7199803 18049 398.903152529226 +NODE_898_length_18035_cov_223.543 7199583 18035 399.20060992514556 +NODE_899_length_18022_cov_224.301 7199847 18022 399.5032182887582 +NODE_89_length_155135_cov_223.847 62011848 155125 399.7540564061241 +NODE_8_length_686892_cov_223.785 274477822 686892 399.5938546379926 +NODE_900_length_18008_cov_224.334 7199788 18008 399.8105286539316 +NODE_901_length_18007_cov_222.759 7149800 18007 397.0567001721553 +NODE_902_length_17983_cov_230.339 7149877 17983 397.5908913974309 +NODE_903_length_17948_cov_223.585 7149854 17948 398.3649431691553 +NODE_904_length_17892_cov_224.413 7149768 17892 399.60697518444 +NODE_905_length_17833_cov_220.194 6992764 17823 392.3449475396959 +NODE_906_length_17826_cov_224.703 7117128 17826 399.2554695388758 +NODE_907_length_17822_cov_223.039 7078876 17819 397.26561535439697 +NODE_908_length_17811_cov_223.464 7099788 17811 398.618157318511 +NODE_909_length_17800_cov_228.162 7150691 17800 401.72421348314606 +NODE_90_length_154475_cov_223.259 61583420 154475 398.6626962291633 +NODE_910_length_17754_cov_224.694 7099838 17754 399.90075475949084 +NODE_911_length_17739_cov_222.997 7049765 17739 397.41614521675405 +NODE_912_length_17698_cov_223.401 7049916 17698 398.3453497570347 +NODE_913_length_17697_cov_221.836 6999791 17697 395.53545798722945 +NODE_914_length_17693_cov_223.735 7049824 17693 398.45272141524896 +NODE_915_length_17653_cov_224.261 7049721 17653 399.3497422534413 +NODE_916_length_17629_cov_224.539 7049779 17629 399.8967042940609 +NODE_917_length_17561_cov_223.961 6999853 17561 398.60218666362965 +NODE_918_length_17525_cov_222.743 6912290 17515 394.6497288038824 +NODE_919_length_17512_cov_224.46 6999301 17512 399.6859867519415 +NODE_91_length_154316_cov_223.813 61709279 154316 399.8890523341714 +NODE_920_length_17496_cov_216.604 6769705 17476 387.3715381094072 +NODE_921_length_17495_cov_223.714 6951905 17495 397.36524721348957 +NODE_922_length_17495_cov_225.545 7002518 17495 400.258245212918 +NODE_923_length_17485_cov_222.923 6949809 17485 397.4726336860166 +NODE_924_length_17385_cov_224.268 6949880 17385 399.76301409260856 +NODE_925_length_17345_cov_224.761 6934620 17345 399.80513116171807 +NODE_926_length_17296_cov_223.811 6899874 17296 398.92888529139685 +NODE_927_length_17291_cov_223.937 6899833 17291 399.04187149384074 +NODE_928_length_17227_cov_223.064 6849701 17227 397.614268299762 +NODE_929_length_17212_cov_229.712 6856393 17212 398.349581687195 +NODE_92_length_152811_cov_223.975 61102886 152811 399.85921170596356 +NODE_930_length_17148_cov_226.03 6904303 17148 402.63021926755306 +NODE_931_length_17132_cov_235.208 6866393 17132 400.7934275040859 +NODE_932_length_17118_cov_222.861 6799803 17118 397.23116018226426 +NODE_933_length_17117_cov_223.08 6799819 17117 397.2553017468014 +NODE_934_length_17111_cov_223.728 6816974 17111 398.39717141020395 +NODE_935_length_17103_cov_223.276 6799740 17103 397.5758638835292 +NODE_936_length_17004_cov_224.558 6799719 17004 399.8893789696542 +NODE_937_length_16994_cov_222.832 6749750 16994 397.18430034129693 +NODE_938_length_16969_cov_225.66 6811448 16969 401.4053862926513 +NODE_939_length_16953_cov_225.248 6799717 16953 401.0922550581018 +NODE_93_length_152002_cov_224.233 60799648 152002 399.99242115235324 +NODE_940_length_16940_cov_225.401 6799870 16940 401.40909090909093 +NODE_941_length_16937_cov_223.875 6767949 16937 399.5955009741985 +NODE_942_length_16911_cov_224.074 6749750 16911 399.1336999586068 +NODE_943_length_16887_cov_224.478 6749782 16887 399.7028483448807 +NODE_944_length_16879_cov_224.61 6749717 16879 399.8884412583684 +NODE_945_length_16851_cov_225.976 6760282 16851 401.1798706308231 +NODE_946_length_16839_cov_226.765 6652254 16839 395.05041867094246 +NODE_947_length_16823_cov_223.519 6699848 16823 398.25524579444806 +NODE_948_length_16802_cov_223.669 6699877 16802 398.75473157957384 +NODE_949_length_16777_cov_225.993 6749815 16777 402.32550515586814 +NODE_94_length_150624_cov_223.537 60030575 150614 398.57234387241556 +NODE_950_length_16771_cov_223.833 6663539 16771 397.3250849680997 +NODE_951_length_16730_cov_223.157 6649746 16730 397.47435744172145 +NODE_952_length_16642_cov_224.215 6649858 16642 399.5828626367023 +NODE_953_length_16595_cov_223.129 6585429 16595 396.83211810786383 +NODE_954_length_16549_cov_390.849 10550437 16549 637.52716176204 +NODE_955_length_16546_cov_224.401 6599859 16546 398.87942705185543 +NODE_956_length_16533_cov_229.993 6717493 16533 406.30817153571644 +NODE_957_length_16529_cov_223.794 6599840 16529 399.2885232016456 +NODE_958_length_16494_cov_220.768 6482076 16484 393.23440912399906 +NODE_959_length_16454_cov_223.573 6549844 16454 398.070013370609 +NODE_95_length_149436_cov_223.744 59769322 149436 399.9660188977221 +NODE_960_length_16450_cov_223.648 6549816 16450 398.1651063829787 +NODE_961_length_16405_cov_225.637 6449810 16405 393.161231331911 +NODE_962_length_16378_cov_222.92 6482616 16378 395.8124313102943 +NODE_963_length_16376_cov_224.316 6549843 16376 399.9659868099658 +NODE_964_length_16367_cov_222.941 6499782 16367 397.12726828374167 +NODE_965_length_16364_cov_217.828 6342214 16364 387.5711317526277 +NODE_966_length_16363_cov_222.872 6499786 16363 397.2245920674693 +NODE_967_length_16286_cov_224.178 6499864 16286 399.1074542551885 +NODE_968_length_16241_cov_223.09 6449897 16241 397.1366910904501 +NODE_969_length_16239_cov_222.861 6449866 16239 397.1836935771907 +NODE_96_length_144936_cov_226.858 57592194 144916 397.41777305473516 +NODE_970_length_16239_cov_226.299 6548501 16239 403.25765133321016 +NODE_971_length_16235_cov_223.14 6449872 16235 397.28192177394516 +NODE_972_length_16207_cov_223.549 6449938 16207 397.9723576232492 +NODE_973_length_16150_cov_220.497 6334743 16150 392.24414860681117 +NODE_974_length_16102_cov_223.277 6399816 16102 397.45472612097876 +NODE_975_length_16085_cov_223.448 6399799 16085 397.8737332918869 +NODE_976_length_16085_cov_223.209 6399869 16085 397.87808517252097 +NODE_977_length_16022_cov_225.506 6449714 16022 402.55361378105107 +NODE_978_length_15971_cov_223.313 6349759 15971 397.58055225095484 +NODE_979_length_15940_cov_223.779 6349818 15940 398.35746549560855 +NODE_97_length_144723_cov_223.834 57676815 144723 398.53247237826747 +NODE_980_length_15929_cov_224.12 6348971 15929 398.5793835143449 +NODE_981_length_15879_cov_220.136 6224096 15879 391.97027520624727 +NODE_982_length_15874_cov_226.448 6399712 15874 403.15686027466296 +NODE_983_length_15808_cov_220.184 6195289 15798 392.15653880238006 +NODE_984_length_15779_cov_220.592 6195031 15769 392.8613735810768 +NODE_985_length_15753_cov_226.105 6349769 15753 403.08315876341015 +NODE_986_length_15729_cov_223.152 6249884 15729 397.34782885116664 +NODE_987_length_15728_cov_288.518 8051606 15728 511.92815361139367 +NODE_988_length_15688_cov_223.92 6249795 15688 398.38060938296786 +NODE_989_length_15560_cov_268.742 6197083 15560 398.27011568123396 +NODE_98_length_143507_cov_224.136 57407950 143507 400.03588675116896 +NODE_990_length_15547_cov_218.956 6063886 15547 390.03576252653244 +NODE_991_length_15531_cov_224.335 6186991 15531 398.3639817139914 +NODE_992_length_15454_cov_223.567 6149859 15454 397.9460980975799 +NODE_993_length_15449_cov_224.901 6149852 15415 398.95244891339604 +NODE_994_length_15433_cov_225.125 6156984 15433 398.9492645629495 +NODE_995_length_15380_cov_224.419 6149756 15380 399.8540962288687 +NODE_996_length_15376_cov_222.55 6099588 15376 396.6953694068678 +NODE_997_length_15274_cov_224.298 6099665 15274 399.34954825193137 +NODE_998_length_15254_cov_224.395 6099896 15254 399.88829159564705 +NODE_999_length_15210_cov_223.159 6049806 15210 397.75187376725836 +NODE_99_length_143125_cov_224.739 57408956 143125 401.1106096069869 +NODE_9_length_644926_cov_224.104 257902794 644926 399.8951724693996 diff --git a/tests/metagenome.config b/tests/metagenome.config index 31525924d..a31aba0c2 100644 --- a/tests/metagenome.config +++ b/tests/metagenome.config @@ -11,7 +11,7 @@ bam = None lengths = None bed = None length_filtered = None -coverages = None +coverages = tests/data/coverage.tsv kmer_counts = None kmer_normalized = None kmer_embedded = None @@ -33,9 +33,9 @@ checkpoints = None ######################################## [parameters] -projects = projects +workspace = workspace project = 1 -resume = 0 +metagenome_num = 0 kingdom = bacteria length_cutoff = 3000 cov_from_spades = False @@ -53,5 +53,5 @@ purity = 90.0 verbose = False force = False usepickle = True -parallel = False +parallel = True cpus = 1 From b71e81371180008d7c45e357fde6433cb8841cfd Mon Sep 17 00:00:00 2001 From: EvanRees Date: Thu, 12 Mar 2020 14:55:30 -0500 Subject: [PATCH 07/17] updates to project configuration handling metagenome numbering. Now retrieves versions from each executable dependency in environ.py. This is used in prodigal to parse corresponding to the prodigal version. I.e. 2.5 differs from version >=2.6. Prodigal now will parse ORF headers and convert contigs to ORF headers according to version available. Default config now has versions section and generated config files now contain versions section related to executable dependencies. Renamed 'new_workspace' in user.py to 'new_project' as this is more appropriate. --- autometa.py | 10 +- autometa/common/external/prodigal.py | 22 ++- autometa/config/default.config | 10 ++ autometa/config/environ.py | 243 ++++++++++++++++++++++++++- autometa/config/project.py | 45 ++++- autometa/config/user.py | 4 +- 6 files changed, 313 insertions(+), 21 deletions(-) diff --git a/autometa.py b/autometa.py index 57c28433b..9d946f2eb 100755 --- a/autometa.py +++ b/autometa.py @@ -212,11 +212,19 @@ def main(args): help=f'Num. cpus to use when updating/constructing databases (default: {cpus} cpus)', type=int, default=cpus) + parser.add_argument('--debug', + help=f'Stream debugging information to terminal', + action='store_true', + default=False) args = parser.parse_args() timestamp = time.strftime("%Y-%m-%d_%H-%M-%S",time.gmtime()) - logger = init_logger(f'{timestamp}_autometa.log') + level = logging.DEBUG if args.debug else None + logger = init_logger(fpath=f'{timestamp}_autometa.log', level=level) try: main(args) + except KeyboardInterrupt as err: + logger.info('User cancelled run. Exiting...') + sys.exit(1) except Exception as err: issue_request = ''' An error was encountered! diff --git a/autometa/common/external/prodigal.py b/autometa/common/external/prodigal.py index 92cd21cd3..1644231cc 100644 --- a/autometa/common/external/prodigal.py +++ b/autometa/common/external/prodigal.py @@ -31,6 +31,9 @@ from glob import glob from Bio import SeqIO +from autometa.config.environ import get_versions + + logger = logging.getLogger(__name__) @@ -185,10 +188,15 @@ def contigs_from_headers(fpath): Why the exception is raised. """ + version = get_versions('prodigal').get('prodigal') + if version.count('.') >= 2: + version = float('.'.join(version.split('.')[:2])) + else: + version = float(version) translations = {} for record in SeqIO.parse(fpath, 'fasta'): - orf_id = record.description.split('#')[-1].split(';')[0].strip().replace('ID=','') - if orf_id in record.id: + if version < 2.6: + orf_id = record.description.split('#')[-1].split(';')[0].strip().replace('ID=','') contig_id = record.id.replace(f'_{orf_id}', '') else: contig_id = record.id.rsplit('_',1)[0] @@ -217,10 +225,16 @@ def orf_records_from_contigs(contigs, fpath): Why the exception is raised. """ + version = get_versions('prodigal').get('prodigal') + if version.count('.') >= 2: + version = float('.'.join(version.split('.')[:2])) + else: + version = float(version) + records = [] for record in SeqIO.parse(fpath, 'fasta'): - orf_id = record.description.split('#')[-1].split(';')[0].strip().replace('ID=','') - if orf_id in record.id: + if version < 2.6: + orf_id = record.description.split('#')[-1].split(';')[0].strip().replace('ID=','') contig_id = record.id.replace(f'_{orf_id}', '') else: contig_id = record.id.rsplit('_',1)[0] diff --git a/autometa/config/default.config b/autometa/config/default.config index bbbbfee8e..4a4029c6b 100644 --- a/autometa/config/default.config +++ b/autometa/config/default.config @@ -33,6 +33,16 @@ bowtie2 = None samtools = None bedtools = None +[versions] +diamond = None +hmmsearch = None +hmmpress = None +hmmscan = None +prodigal = None +bowtie2 = None +samtools = None +bedtools = None + ######################################## ######### Database Parameters ########## ######################################## diff --git a/autometa/config/environ.py b/autometa/config/environ.py index 790bfa61a..b4a9e5eaa 100644 --- a/autometa/config/environ.py +++ b/autometa/config/environ.py @@ -25,6 +25,7 @@ import logging import os import sys +import subprocess from configparser import ConfigParser from configparser import ExtendedInterpolation @@ -47,7 +48,6 @@ 'bedtools', ] - def which(program): """Finds the full path for an executable and checks read permissions exist. @@ -86,33 +86,260 @@ def is_exe(fpath): return '' def find_executables(): + """Short summary. + + Returns + ------- + type + Description of returned object. + + Raises + ------- + ExceptionName + Why the exception is raised. + + """ return {exe:which(exe) for exe in EXECUTABLES} -def update_config(config): +def diamond(): + """Get diamond version. + + Returns + ------- + str + version of diamond + """ + exe = which('diamond') + proc = subprocess.Popen( + [exe,'version'], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL) + stdout, stderr = proc.communicate() + # stdout = b'diamond version 0.9.24\n' + return stdout.decode().split()[-1] + +def hmmsearch(): + """Get hmmsearch version. + + Returns + ------- + str + version of hmmsearch + """ + exe = which('hmmsearch') + proc = subprocess.Popen( + [exe,'-h'], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL) + stdout, stderr = proc.communicate() + stdout = stdout.decode().split('#')[2] + # stdout = ' HMMER 3.2.1 (June 2018); http://hmmer.org/\n' + return stdout.strip().split()[1] + +def hmmpress(): + """Get hmmpress version. + + Returns + ------- + str + version of hmmpress + """ + exe = which('hmmpress') + proc = subprocess.Popen( + [exe,'-h'], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL) + stdout, stderr = proc.communicate() + stdout = stdout.decode().split('#')[2] + # stdout = ' HMMER 3.2.1 (June 2018); http://hmmer.org/\n' + return stdout.strip().split()[1] + +def hmmscan(): + """Get hmmscan version. + + Returns + ------- + str + version of hmmscan + """ + exe = which('hmmscan') + proc = subprocess.Popen( + [exe,'-h'], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL) + stdout, stderr = proc.communicate() + stdout = stdout.decode().split('#')[2] + # stdout = ' HMMER 3.2.1 (June 2018); http://hmmer.org/\n' + return stdout.strip().split()[1] + +def prodigal(): + """Get prodigal version. + + Returns + ------- + str + version of prodigal + """ + exe = which('prodigal') + proc = subprocess.Popen( + [exe,'-v'], + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE) + stdout, stderr = proc.communicate() + # stderr = b'\nProdigal V2.6.3: February, 2016\n\n' + return stderr.decode().strip().split(':')[0].replace('Prodigal V','') + +def bowtie2(): + """Get bowtie2 version. + + Returns + ------- + str + version of bowtie2 + """ + exe = which('bowtie2') + proc = subprocess.Popen( + [exe,'--version'], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL) + stdout, stderr = proc.communicate() + # stdout 'bowtie2-align-s version 2.3.5\n64-bit\n + return stdout.decode().split()[2] + +def samtools(): + """Get samtools version. + + Returns + ------- + str + version of samtools + """ + exe = which('samtools') + proc = subprocess.Popen([exe], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = proc.communicate() + stderr = stderr.decode().strip().split('\n')[1] + # stderr = 'Version: 1.10 (using htslib 1.10.2)' + return stderr.split()[1] + +def bedtools(): + """Get bedtools version. + + Returns + ------- + str + version of bedtools + """ + exe = which('bedtools') + proc = subprocess.Popen( + [exe, '--version'], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL) + stdout, stderr = proc.communicate() + # stdout = b'bedtools v2.29.2\n' + return stdout.decode().strip().split()[-1].strip('v') + +def get_versions(program=None): + """Retrieve versions from all required executable dependencies. + If `program` is provided will only return version for `program`. + + Parameters + ---------- + program : str + the program to retrieve the version. + + Returns + ------- + dict + {program:version, ...} + + Raises + ------- + ValueError + `program` is not a string + KeyError + `program` is not an executable dependency. + + """ + dispatcher = { + 'prodigal':prodigal, + 'diamond':diamond, + 'hmmsearch':hmmsearch, + 'hmmpress':hmmpress, + 'hmmscan':hmmscan, + 'prodigal':prodigal, + 'bowtie2':bowtie2, + 'samtools':samtools, + 'bedtools':bedtools, + } + if program: + if type(program) is not str: + raise ValueError(f'program is not string. given:{type(program)}') + if program not in dispatcher: + raise KeyError(f'{program} not in executables') + return {program:dispatcher[program]()} + versions = {} executables = find_executables() + for exe,found in executables.items(): + if found: + version = dispatcher[exe]() + else: + logger.warning(f'VersionUnavailable {exe}') + version = 'ExecutableNotFound' + versions.update({exe:version}) + return versions + +def configure(config=DEFAULT_CONFIG): + """Checks executable dependencies necessary to run autometa. + Will update `config` with executable dependencies with details: + 1. presence/absence of dependency and its location + 2. versions + + Parameters + ---------- + config : configparser.ConfigParser + Description of parameter `config`. + + Returns + ------- + configparser.ConfigParser + config updated with executables details + Details: + 1. location of executable + 2. version of executable + + Raises + ------- + ExceptionName + Why the exception is raised. + + """ if not config.has_section('environ'): config.add_section('environ') + if not config.has_section('versions'): + config.add_section('versions') + executables = find_executables() + versions = get_versions() satisfied = True for executable,found in executables.items(): + version = versions.get(executable) if not config.has_option('environ', executable) and not found: satisfied = False logger.warning(f'executable not found: {executable}') elif not config.has_option('environ', executable): logger.debug(f'Updated executable: {executable} : {found}') config.set('environ', executable, found) + config.set('versions', executable, version) user_executable = config.get('environ', executable) if not which(user_executable): logger.debug(f'Updated executable: {executable} : {found}') config.set('environ', executable, found) + config.set('versions', executable, version) + else: + version = get_versions(user_executable).get(user_executable) + config.set('versions',user_executable, version) logger.debug(f'Executable dependencies satisfied : {satisfied}') return config -def configure(config=DEFAULT_CONFIG): - """ - Checks executable dependencies necessary to run autometa - """ - return update_config(config) - def main(args): config = configure(infpath=args.infpath) if not args.out: diff --git a/autometa/config/project.py b/autometa/config/project.py index c1f4717a0..4c12b8824 100644 --- a/autometa/config/project.py +++ b/autometa/config/project.py @@ -53,14 +53,38 @@ def n_metagenomes(self): @property def metagenomes(self): - """retrieve metagenomes from project.config + """retrieve metagenome configs from project.config Returns ------- dict {metagenome_num:, ...} """ - return {k:v for k,v in self.config.items('metagenomes')} + return {int(k.strip('metagenome_')):v for k,v in self.config.items('metagenomes') if os.path.exists(v)} + + def new_metagenome_num(self): + """Retrieve new minimum metagenome num from metagenomes in project. + + Returns + ------- + int + Description of returned object. + + Raises + ------- + ExceptionName + Why the exception is raised. + + """ + max_num = max(self.metagenomes) + if max_num == self.n_metagenomes: + return self.n_metagenomes + 1 + # Otherwise metagenome_num in between max and others has been removed + # Therefore new metagenome may be inserted. + for mg_num in range(1, max_num): + if mg_num in self.metagenomes: + continue + return mg_num def save(self): put_config(self.config, self.config_fpath) @@ -79,19 +103,28 @@ def add(self, fpath): Raises ------- + FileNotFoundError + Directory found but metagenome.config not present FileExistsError metagenome.config already exists in project - """ - metagenome_num = 1 + self.n_metagenomes + # metagenome_num = 1 + self.n_metagenomes + metagenome_num = self.new_metagenome_num() metagenome_name = f'metagenome_{metagenome_num:03d}' metagenome_dirpath = os.path.join(self.dirpath, metagenome_name) - if os.path.exists(metagenome_dirpath): + mg_config_fpath = os.path.join(metagenome_dirpath, f'{metagenome_name}.config') + # Check presence of metagenome directory and config + mg_config_present = os.path.exists(mg_config_fpath) + mg_dir_present = os.path.exists(metagenome_dirpath) + if not mg_config_present and mg_dir_present: + raise FileNotFoundError(f'{mg_config_fpath} is not present but the directory exists! Either remove the directory or locate the config file before continuing.') + if mg_dir_present: raise FileExistsError(metagenome_dirpath) + os.makedirs(metagenome_dirpath) mg_config = get_config(fpath) # Add database and env for debugging individual metagenome binning runs. - for section in ['databases','environ']: + for section in ['databases','environ','versions']: if not mg_config.has_section(section): mg_config.add_section(section) for option,value in self.config.items(section): diff --git a/autometa/config/user.py b/autometa/config/user.py index a4f5d2009..288c404ed 100644 --- a/autometa/config/user.py +++ b/autometa/config/user.py @@ -62,7 +62,7 @@ def configure(self, configure_environ=True, configure_databases=True): if configure_databases: self.config = databases.configure(self.config, dryrun=self.dryrun, nproc=self.nproc) - def new_workspace(self, fpath): + def new_project(self, fpath): """Configure new project at `outdir`. Parameters @@ -132,7 +132,7 @@ def prepare_run(self, config_fpath): project_dirpath = os.path.realpath(os.path.join(workspace,proj_name)) project_config_fp = os.path.join(project_dirpath, 'project.config') if not os.path.exists(project_dirpath) or not os.path.exists(project_config_fp): - project = self.new_workspace(project_config_fp) + project = self.new_project(project_config_fp) else: project = Project(project_config_fp) # 3 check whether existing or new run with metagenome_num From 2b812dadab81520097f6f41f3ca1d4c831b404c3 Mon Sep 17 00:00:00 2001 From: EvanRees Date: Fri, 13 Mar 2020 15:29:58 -0500 Subject: [PATCH 08/17] significant simplification in API. Created Databases class in databases.py for handling databases config. Default behavior is to download and format required databases. Changed flag to flag to be more clear. autometa will print an issue request to the user upon any exceptions being encountered (NOT KeyboardInterrupt.. Although this will also be logged). Logging behavior changed slightly, where user can specify level (default is INFO) and path to log file. binning call has been moved to user.py. autometa.config imports in user.py have been removed and general autometa.config module is imported via to perform respective func call. --- autometa.py | 153 ++------- autometa/common/exceptions.py | 15 +- autometa/common/utilities.py | 6 +- autometa/config/databases.py | 574 +++++++++++++++------------------ autometa/config/default.config | 2 +- autometa/config/environ.py | 21 +- autometa/config/project.py | 6 +- autometa/config/user.py | 159 +++++++-- 8 files changed, 450 insertions(+), 486 deletions(-) diff --git a/autometa.py b/autometa.py index 9d946f2eb..2f3b5f589 100755 --- a/autometa.py +++ b/autometa.py @@ -30,14 +30,11 @@ import multiprocessing as mp from autometa.config.user import AutometaUser -from autometa.common.utilities import timeit -from autometa.common.metagenome import Metagenome - logger = logging.getLogger('autometa') -def init_logger(fpath=None, level=None): +def init_logger(fpath=None, level=logging.INFO): """Initialize logger. By default will initialize streaming logger with DEBUG level messages. @@ -62,8 +59,10 @@ def init_logger(fpath=None, level=None): Raises ------- + TypeError + `level` must be an int ValueError - `level` must be int and one of 0, 10, 20, 30, 40, 50 + `level` must be one of 0, 10, 20, 30, 40, 50 """ levels = { logging.NOTSET, @@ -72,8 +71,8 @@ def init_logger(fpath=None, level=None): logging.WARNING, logging.ERROR, logging.CRITICAL} - if level and type(level) is not int: - raise ValueError(f'{level} must be an int! {type(level)}') + if type(level) is not int: + raise TypeError(f'{level} must be an int! {type(level)}') if level and level not in levels: raise ValueError(f'{level} not in levels: {levels}!') formatter = logging.Formatter( @@ -86,115 +85,31 @@ def init_logger(fpath=None, level=None): filehandler = logging.FileHandler(fpath) filehandler.setFormatter(formatter) logger.addHandler(filehandler) - lvl = level if level else logging.INFO - else: - lvl = level if level else logging.DEBUG - streamhandler.setLevel(lvl) + streamhandler.setLevel(level) logger.addHandler(streamhandler) logger.setLevel(logging.DEBUG) return logger -@timeit -def run(mgargs): - """Run autometa. - - Parameters - ---------- - mgargs : argparse.Namespace - metagenome args - - Returns - ------- - NoneType - - Raises - ------- - TODO: Need to enumerate all exceptions raised from within binning pipeline. - I.e. Demarkate new exception (not yet handled) vs. handled exception. - Subclassing an AutometaException class may be most appropriate use case here. - """ - mg = Metagenome( - assembly=mgargs.files.metagenome, - outdir=mgargs.parameters.outdir, - nucl_orfs_fpath=mgargs.files.nucleotide_orfs, - prot_orfs_fpath=mgargs.files.amino_acid_orfs, - taxonomy_fpath=mgargs.files.taxonomy, - fwd_reads=mgargs.files.fwd_reads, - rev_reads=mgargs.files.rev_reads, - taxon_method=mgargs.parameters.taxon_method) - try: - # Original (raw) file should not be manipulated so return new object - mg = mg.length_filter( - out=mgargs.files.length_filtered, - cutoff=mgargs.parameters.length_cutoff) - except FileExistsError as err: - logger.debug(f'{mgargs.files.length_filtered} already exists. Continuing..') - mg = Metagenome( - assembly=mgargs.files.length_filtered, - outdir=mgargs.parameters.outdir, - nucl_orfs_fpath=mgargs.files.nucleotide_orfs, - prot_orfs_fpath=mgargs.files.amino_acid_orfs, - taxonomy_fpath=mgargs.files.taxonomy, - fwd_reads=mgargs.files.fwd_reads, - rev_reads=mgargs.files.rev_reads, - taxon_method=mgargs.parameters.taxon_method) - # I.e. asynchronous execution here (work-queue tasks) - mg.get_kmers( - kmer_size=mgargs.parameters.kmer_size, - normalized=mgargs.files.kmer_normalized, - out=mgargs.files.kmer_counts, - multiprocess=mgargs.parameters.kmer_multiprocess, - nproc=mgargs.parameters.cpus, - force=mgargs.parameters.force) - - coverages = mg.get_coverages( - out=mgargs.files.coverages, - from_spades=mgargs.parameters.cov_from_spades, - sam=mgargs.files.sam, - bam=mgargs.files.bam, - lengths=mgargs.files.lengths, - bed=mgargs.files.bed) - # Filter by Kingdom - kingdoms = mg.get_kingdoms( - ncbi=mgargs.databases.ncbi, - usepickle=mgargs.parameters.usepickle, - blast=mgargs.files.blastp, - hits=mgargs.files.blastp_hits, - force=mgargs.parameters.force, - cpus=mgargs.parameters.cpus) - - if not mgargs.parameters.kingdom in kingdoms: - raise KeyError(f'{mgargs.parameters.kingdom} not recovered in dataset. Recovered: {", ".join(kingdoms.keys())}') - mag = kingdoms.get(mgargs.parameters.kingdom) - bins_df = mag.get_binning( - method=mgargs.parameters.binning_method, - kmers=mgargs.files.kmer_counts, - embedded=mgargs.files.kmer_embedded, - do_pca=mgargs.parameters.do_pca, - pca_dims=mgargs.parameters.pca_dims, - embedding_method=mgargs.parameters.embedding_method, - coverage=coverages, - domain=mgargs.parameters.kingdom, - taxonomy=mgargs.files.taxonomy, - reverse=mgargs.parameters.reversed, - ) - binning_cols = ['cluster','completeness','purity'] - bins_df[binning_cols].to_csv( - mgargs.files.binning, - sep='\t', - index=True, - header=True) - def main(args): - user = AutometaUser(dryrun=args.dryrun, nproc=args.cpus) + # Setup logger + timestamp = time.strftime("%Y-%m-%d_%H-%M-%S",time.gmtime()) + log_fpath = args.log if args.log else f'{timestamp}_autometa.log' + if args.debug: + logger = init_logger(fpath=log_fpath, level=logging.DEBUG) + else: + logger = init_logger(fpath=log_fpath) + # Configure AutometaUser + # TODO: master from WorkQueue is AutometaUser + user = AutometaUser(dryrun=args.check_dependencies, nproc=args.cpus) + for config in args.config: - mgargs = user.prepare_run(config) - run(mgargs) - # cluster process -> mgargs.files.binning - # TODO: Refine bins by connection mapping, taxon, or other methods - # TODO: Construct pangenomes from multiple datasets - # get_pangenomes() + # TODO: Add directions to master from WorkQueue + mgargs = user.prepare_binning_args(config) + user.run_binning(mgargs) + # user.refine_binning() + # user.process_binning() + # user.get_pangenomes() if __name__ == '__main__': import argparse @@ -204,34 +119,32 @@ def main(args): parser.add_argument('config', help='', nargs='*') - parser.add_argument('--dryrun', - help='whether to perform database updating/construction', - action='store_true', - default=False) parser.add_argument('--cpus', help=f'Num. cpus to use when updating/constructing databases (default: {cpus} cpus)', type=int, default=cpus) parser.add_argument('--debug', - help=f'Stream debugging information to terminal', + help='Stream debugging information to terminal', + action='store_true', + default=False) + parser.add_argument('--log', help='', type=str) + parser.add_argument('--check-dependencies', + help='Check user executables and databases accessible to Autometa and exit.', action='store_true', default=False) args = parser.parse_args() - timestamp = time.strftime("%Y-%m-%d_%H-%M-%S",time.gmtime()) - level = logging.DEBUG if args.debug else None - logger = init_logger(fpath=f'{timestamp}_autometa.log', level=level) try: main(args) - except KeyboardInterrupt as err: + except KeyboardInterrupt: logger.info('User cancelled run. Exiting...') sys.exit(1) except Exception as err: issue_request = ''' - An error was encountered! Please help us fix your problem! You may file an issue with us at https://github.com/KwanLab/Autometa/issues/new ''' + err.issue_request = issue_request logger.exception(err) - print(issue_request) + logger.info(err.issue_request) diff --git a/autometa/common/exceptions.py b/autometa/common/exceptions.py index 6e2ee79c1..f0b37ab78 100644 --- a/autometa/common/exceptions.py +++ b/autometa/common/exceptions.py @@ -24,6 +24,10 @@ class AutometaException(Exception): """docstring for AutometaException.""" + + def __init__(self, value): + self.value = value + issue_request = ''' An error was encountered! @@ -31,20 +35,21 @@ class AutometaException(Exception): You may file an issue with us at https://github.com/KwanLab/Autometa/issues/new ''' - pass - + def __str__(self): + return f'{self.value}\n\n{issue_request}' -class KmerFormatError(Exception): +class KmerFormatError(AutometaException): """KmerFormatError exception class.""" def __init__(self, fpath): + super(AutometaException, self).__init__(fpath) self.fpath = fpath def __str__(self): return f'{self.fpath} does not contain a \"contig\" column. '\ 'Ensure the k-mer matrix was properly generated.' -class KmerEmbeddingError(Exception): +class KmerEmbeddingError(AutometaException): """KmerEmbeddingError exception class.""" def __init__(self, value): @@ -53,7 +58,7 @@ def __init__(self, value): def __str__(self): return self.value -class RecursiveDBSCANError(Exception): +class RecursiveDBSCANError(AutometaException): """RecursiveDBSCANError exception class.""" def __init__(self, value): diff --git a/autometa/common/utilities.py b/autometa/common/utilities.py index 4ff45eaa7..55f7f1d5a 100644 --- a/autometa/common/utilities.py +++ b/autometa/common/utilities.py @@ -133,7 +133,7 @@ def gunzip(infpath, outfpath): def untar(tarchive, outdir, member=None): """Decompress a tar archive (may be gzipped or bzipped). passing in `member` - requires an `outfpath` also be provided. + requires an `outdir` also be provided. See: https://docs.python.org/3.8/library/tarfile.html#module-tarfile @@ -196,10 +196,10 @@ def tarchive_results(outfpath, src_dirpath): Parameters ---------- - fpaths : list - outfpath : str Returns ------- diff --git a/autometa/config/databases.py b/autometa/config/databases.py index e8b12825c..7475be0f1 100644 --- a/autometa/config/databases.py +++ b/autometa/config/databases.py @@ -26,6 +26,8 @@ import os import requests +import multiprocessing as mp + from configparser import ConfigParser from configparser import ExtendedInterpolation from ftplib import FTP @@ -40,322 +42,255 @@ logger = logging.getLogger(__name__) -DB_SECTIONS = { - 'ncbi':[ - 'nodes', - 'names', - 'merged', - 'accession2taxid', - 'nr', - ], - 'markers':[ - 'bacteria_single_copy', - 'bacteria_single_copy_cutoffs', - 'archaea_single_copy', - 'archaea_single_copy_cutoffs', - ], -} - - -def format_nr(config, dryrun, nproc=2): - outdir = config.get('databases','ncbi') - nr = config.get('ncbi','nr') - formatted_nr = os.path.splitext(os.path.basename(nr))[0] - outfpath = os.path.join(outdir, formatted_nr) - if not dryrun and not os.path.exists(outfpath): - diamond.makedatabase(fasta=nr, database=outfpath, nproc=nproc) - config.set('ncbi','nr',outfpath) - logger.debug(f'set ncbi nr to {outfpath}') - return config - -def extract_taxdump(config, dryrun): - """Extract autometa required files from ncbi taxdump directory. - - Parameters - ---------- - config: configparser.ConfigParser - Description of parameter `config`. - dryrun : type - Description of parameter `dryrun`. - - Returns - ------- - type - Description of returned object. - - Raises - ------- - ExceptionName - Why the exception is raised. - - """ - outdir = config.get('databases','ncbi') - taxdump = config.get('ncbi','taxdump') - taxdump_files = [ - ('nodes','nodes.dmp'), - ('names','names.dmp'), - ('merged','merged.dmp'), - ] - for option,fname in taxdump_files: - outfp = os.path.join(outdir,fname) - if not dryrun and not os.path.exists(outfp): - outfp = untar(taxdump, outdir, fname) - logger.debug(f'update ncbi : {option} : {outfp}') - config.set('ncbi',option,outfp) - return config - -def update_ncbi(config, options, dryrun, nproc=2): - """Update NCBI database files (taxdump.tar.gz and nr.gz). - - Parameters - ---------- - config: configparser.ConfigParser - Description of parameter `config`. - options : type - Description of parameter `options`. - dryrun : type - Description of parameter `dryrun`. - - Returns - ------- - type - Description of returned object. - - Raises - ------- - ExceptionName - Why the exception is raised. - - """ - section = 'ncbi' - if not config.has_section('databases'): - config.add_section('databases') - if not config.has_option('databases',section): - outdir = DEFAULT_CONFIG.get('databases', section) - config.set('databases', section, outdir) - else: - outdir = config.get('databases',section) - host = DEFAULT_CONFIG.get(section,'host') - for option in options: - ftp_fullpath = DEFAULT_CONFIG.get('database_urls',option) - ftp_fpath = ftp_fullpath.split(host)[-1] - if config.has_option(section, option): - outfpath = config.get(section, option) - else: - outfname = os.path.basename(ftp_fpath) - outfpath = os.path.join(outdir, outfname) - download_success = True - if not dryrun and not os.path.exists(outfpath): - with FTP(host) as ftp, open(outfpath, 'wb') as fp: - ftp.login() - logger.debug(f'starting {option} download') - result = ftp.retrbinary(f'RETR {ftp_fpath}', fp.write) - download_success = True if result.startswith('226 Transfer complete') else False - logger.debug(f'{option} download successful : {download_success}') - ftp.quit() - logger.debug(f'update {section} : {option} : {outfpath}') - config.set(section, option, outfpath) - - config = extract_taxdump(config, dryrun) - return format_nr(config, dryrun, nproc) - -def update_markers(config, options, dryrun): - """Update single-copy markers hmms and cutoffs. - - Parameters - ---------- - config: configparser.ConfigParser - Description of parameter `config`. - options : type - Description of parameter `options`. - dryrun : type - Description of parameter `dryrun`. - - Returns - ------- - type - Description of returned object. - - Raises - ------- - ExceptionName - Why the exception is raised. - - """ - section = 'markers' - if not config.has_section('databases'): - config.add_section('databases') - if not config.has_option('databases',section): - outdir = DEFAULT_CONFIG.get('databases', section) - config.set('databases', section, outdir) - else: - outdir = config.get('databases',section) - for option in options: - url = DEFAULT_CONFIG.get('database_urls', option) - if config.has_option(section, option): - outfpath = config.get(section, option) - else: - outfname = os.path.basename(url) - outfpath = os.path.join(outdir, outfname) - if dryrun: - logger.debug(f'update {section} : {option} : {outfpath}') - config.set(section, option, outfpath) - continue - with requests.Session() as session: - resp = session.get(url) - if not resp.ok: - logger.warning(f'Failed to retrieve {url}') - continue - with open(outfpath, 'w') as outfh: - outfh.write(resp.text) - config.set(section, option, outfpath) - return config - -def validate_fpaths(config, section): - """Check all files from section exist and are not empty. - - Parameters - ---------- - config: configparser.ConfigParser - Description of parameter `config`. - section : type - Description of parameter `section`. - - Returns - ------- - type - Description of returned object. - - Raises - ------- - ExceptionName - Why the exception is raised. - - """ - for opt in config.options(section): - if opt not in DB_SECTIONS.get(section): - continue - fp = config.get(section,opt) - if not os.path.exists(fp) or os.stat(fp).st_size == 0: - logger.warning(f'removing invalid filepath {fp} : {section} : {opt}') - config.remove_option(section, opt) - return config - -def update_missing(config, section, dryrun, options=None, nproc=2): - """Download databases using provided `options` in `section`. If `options` is - None, all `options` in `section` will be downloaded and formatted. - - Parameters - ---------- - config: configparser.ConfigParser - Description of parameter `config`. - section : str - Description of parameter `section` (the default is None). - options : iterable - Options to be updated in section. Default will retrieve all required for section. - dryrun : bool - Do not perform retrieval/formatting of databases. - - Returns - ------- - configparser.ConfigParser - config updated with required missing sections. - - Raises - ------- - KeyError - provided `section` is not in DB_SECTIONS - - """ - if section not in DB_SECTIONS: - raise KeyError(f'section ({section}) not in DB_SECTIONS ({DB_SECTIONS.keys()})') - options = set(options) if options else set(DB_SECTIONS.get(section)) - if section == 'ncbi': - if 'nodes' in options or 'names' in options or 'merged' in options: - options.discard('nodes') - options.discard('names') - options.discard('merged') - options.add('taxdump') - config = update_ncbi(config, options, dryrun, nproc) - if section == 'markers': - config = update_markers(config, options, dryrun) - return config - -def check_format(config, dryrun, nproc=2): - """Checks database files - - Parameters - ---------- - config : configparser.ConfigParser - Description of parameter `config`. - dryrun : bool - Description of parameter `dryrun`. - - Returns - ------- - configparser.ConfigParser - Description of returned object. - - Raises - ------- - ExceptionName - Why the exception is raised. - - """ - for section,options in DB_SECTIONS.items(): - if not config.has_section(section): - logger.warning(f'Missing section : {section}') - config.add_section(section) - config = update_missing( - config=config, - section=section, - options=None, - dryrun=dryrun, - nproc=nproc) - continue - config = validate_fpaths(config, section) - missing = set(options) - set(config.options(section)) - if missing: - logger.warning(f'Missing options : {", ".join(missing)}') - config = update_missing( - config=config, - section=section, - options=missing, - dryrun=dryrun, - nproc=nproc) - return config - -def configure(config=DEFAULT_CONFIG, dryrun=True, nproc=2): - """Configures database dependencies necessary to run Autometa. - - Parameters - ---------- - config : configparser.ConfigParser - (the default is DEFAULT_CONFIG). - dryrun : bool - Log configuration actions but do not perform them. (the default is False). - - Returns - ------- - configparser.ConfigParser - config with updated options in respective databases sections - - Raises - ------- - TypeError - Provided `config` is not the python built-in ConfigParser type. - TypeError - `dryrun` can not be interpretted as a boolean. - - """ - if type(config) is not ConfigParser: - raise TypeError(f'config is not ConfigParser : {type(config)}') - if type(dryrun) is not bool: - raise TypeError(f'dryrun must be True or False. type: {type(dryrun)}') - return check_format(config, dryrun=dryrun, nproc=nproc) +class Databases: + """docstring for Databases.""" + SECTIONS = { + 'ncbi':[ + 'nodes', + 'names', + 'merged', + 'accession2taxid', + 'nr', + ], + 'markers':[ + 'bacteria_single_copy', + 'bacteria_single_copy_cutoffs', + 'archaea_single_copy', + 'archaea_single_copy_cutoffs', + ], + } + def __init__(self, config=DEFAULT_CONFIG, dryrun=False, nproc=mp.cpu_count()): + if type(config) is not ConfigParser: + raise TypeError(f'config is not ConfigParser : {type(config)}') + if type(dryrun) is not bool: + raise TypeError(f'dryrun must be True or False. type: {type(dryrun)}') + + self.config=config + self.dryrun=dryrun + self.nproc=nproc + self.prepare_sections() + self.ncbi_dir = self.config.get('databases','ncbi') + self.markers_dir = self.config.get('databases','markers') + + @property + def satisfied(self): + return self.get_missing(validate=True) + + def prepare_sections(self): + """Add database sections to 'databases' if missing. + + Returns + ------- + NoneType + + """ + if not self.config.has_section('databases'): + self.config.add_section('databases') + for section in Databases.SECTIONS: + if self.config.has_option('databases', section): + continue + outdir = DEFAULT_CONFIG.get('databases', section) + self.config.set('databases', section, outdir) + + def format_nr(self): + """Format NCBI nr.gz database into diamond formatted database nr.dmnd. + + Returns + ------- + NoneType + config updated option:'nr' in section:'ncbi'. + + """ + nr = self.config.get('ncbi','nr') + nr_base = os.path.splitext(os.path.basename(nr))[0] + db_infpath = os.path.join(self.ncbi_dir, nr_base) + db_outfpath = db_infpath.replace('.gz','.dmnd') + if not self.dryrun and not os.path.exists(db_infpath): + diamond.makedatabase(fasta=nr, database=db_infpath, nproc=self.nproc) + self.config.set('ncbi','nr', db_outfpath) + logger.debug(f'set ncbi nr: {db_outfpath}') + + def extract_taxdump(self): + """Extract autometa required files from ncbi taxdump directory. + + Extracts nodes.dmp, names.dmp and merged.dmp from taxdump.tar.gz + + Returns + ------- + NoneType + + """ + taxdump = self.config.get('ncbi','taxdump') + taxdump_files = [ + ('nodes','nodes.dmp'), + ('names','names.dmp'), + ('merged','merged.dmp'), + ] + for option,fname in taxdump_files: + outfpath = os.path.join(self.ncbi_dir,fname) + if not self.dryrun and not os.path.exists(outfpath): + outfpath = untar(taxdump, self.ncbi_dir, fname) + logger.debug(f'update ncbi : {option} : {outfpath}') + self.config.set('ncbi',option,outfpath) + + def update_ncbi(self, options): + """Update NCBI database files (taxdump.tar.gz and nr.gz). + + Parameters + ---------- + options : set + Set of options to update + + Returns + ------- + NoneType + Description of returned object. + + Raises + ------- + ConnectionError + NCBI file download failed. + + """ + # Download required NCBI database files + host = DEFAULT_CONFIG.get('ncbi','host') + for option in options: + ftp_fullpath = DEFAULT_CONFIG.get('database_urls',option) + ftp_fpath = ftp_fullpath.split(host)[-1] + if self.config.has_option('ncbi', option): + outfpath = self.config.get('ncbi', option) + else: + outfname = os.path.basename(ftp_fpath) + outfpath = os.path.join(self.ncbi_dir, outfname) + if not self.dryrun and not os.path.exists(outfpath): + with FTP(host) as ftp, open(outfpath, 'wb') as fp: + ftp.login() + logger.debug(f'starting {option} download') + result = ftp.retrbinary(f'RETR {ftp_fpath}', fp.write) + if not result.startswith('226 Transfer complete'): + raise ConnectionError(f'{option} download failed') + ftp.quit() + logger.debug(f'update ncbi {option}: {outfpath}') + self.config.set('ncbi', option, outfpath) + # Extract/format respective NCBI files + self.extract_taxdump() + self.format_nr() + + def update_markers(self, options): + """Update single-copy markers hmms and cutoffs. + + Parameters + ---------- + options : set + Description of parameter `options`. + + Returns + ------- + NoneType + + Raises + ------- + ConnectionError + marker file download failed. + + """ + for option in options: + url = DEFAULT_CONFIG.get('database_urls', option) + if self.config.has_option('markers', option): + outfpath = self.config.get('markers', option) + else: + outfname = os.path.basename(url) + outfpath = os.path.join(self.markers_dir, outfname) + if self.dryrun: + logger.debug(f'update markers {option}: {outfpath}') + self.config.set('markers', option, outfpath) + continue + with requests.Session() as session: + resp = session.get(url) + if not resp.ok: + raise ConnectionError(f'Failed to retrieve {url}') + with open(outfpath, 'w') as outfh: + outfh.write(resp.text) + self.config.set('markers', option, outfpath) + + def get_missing(self, validate=False): + """Retrieve all database files from all database sections that are not + available. + + Returns + ------- + bool or dict + if `validate` is True : bool + - True if all available, else False + if `validate` is False : dict + - {section:{option, option,...}, section:{...}, ...} + + """ + missing = {} + for section in Databases.SECTIONS: + for option in self.config.options(section): + if option not in Databases.SECTIONS.get(section): + # Skip user added options not required by Autometa + continue + fpath = self.config.get(section,option) + if os.path.exists(fpath) and os.stat(fpath).st_size >= 0: + # TODO: [Checkpoint validation] + continue + if validate: + return False + if section in missing: + missing[section].add(option) + else: + missing.update({section:set([option])}) + return True if validate else missing + + def update_missing(self): + """Download and format databases for all options in each section. + + NOTE: This will only perform the download and formatting if self.dryrun is False + + Returns + ------- + NoneType + config updated with required missing sections. + + """ + dispatcher = {'ncbi':self.update_ncbi, 'markers':self.update_markers} + missing = self.get_missing() + for section,options in missing.items(): + if section == 'ncbi': + if 'nodes' in options or 'names' in options or 'merged' in options: + options.discard('nodes') + options.discard('names') + options.discard('merged') + options.add('taxdump') + dispatcher[section](options) + + def configure(self): + """Checks database files + + Returns + ------- + 2-tuple + i.e. (config, satisfied) + config : configparser.ConfigParser + - config with updated options in respective databases sections. + satisfied : bool + - whether all required Autometa databases are available. + + Raises + ------- + ExceptionName + Why the exception is raised. + + """ + self.update_missing() + return self.config, self.satisfied def main(args): - config = configure(infpath=args.config, dryrun=args.dryrun) + dbs = Databases(config=args.config, dryrun=args.dryrun, nproc=args.nproc) + config, satisfied = dbs.configure() + logger.info(f'Database dependencies satisfied: {satisfied}') if not args.out: import sys;sys.exit(0) put_config(config, args.out) @@ -364,14 +299,19 @@ def main(args): if __name__ == '__main__': import argparse import logging as logger + + cpus = mp.cpu_count() logger.basicConfig( format='%(asctime)s : %(name)s : %(levelname)s : %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', level=logger.DEBUG) - parser = argparse.ArgumentParser('databases config') - parser.add_argument('config', help='') - parser.add_argument('--out', help='') + + parser = argparse.ArgumentParser('databases config', epilog='By default, with no arguments, will download/format databases into default databases directory.') + parser.add_argument('--config', help='') parser.add_argument('--dryrun', help='Log configuration actions but do not perform them.', action='store_true', default=False) + parser.add_argument('--nproc', + help=f'num. cpus to use for DB formatting. (default {cpus})', type=int, default=cpus) + parser.add_argument('--out', help='') args = parser.parse_args() main(args) diff --git a/autometa/config/default.config b/autometa/config/default.config index 4a4029c6b..04e585b17 100644 --- a/autometa/config/default.config +++ b/autometa/config/default.config @@ -73,7 +73,7 @@ nodes = ${databases:ncbi}/nodes.dmp names = ${databases:ncbi}/names.dmp merged = ${databases:ncbi}/merged.dmp accession2taxid = ${databases:ncbi}/prot.accession2taxid.gz -nr = ${databases:ncbi}/nr.dmnd +nr = ${databases:ncbi}/nr.gz [markers] host = raw.githubusercontent.com diff --git a/autometa/config/environ.py b/autometa/config/environ.py index b4a9e5eaa..f11ba5f2b 100644 --- a/autometa/config/environ.py +++ b/autometa/config/environ.py @@ -301,16 +301,14 @@ def configure(config=DEFAULT_CONFIG): Returns ------- - configparser.ConfigParser + 2-tuple + (config, satisfied) config updated with executables details Details: 1. location of executable 2. version of executable - - Raises - ------- - ExceptionName - Why the exception is raised. + config : configparser.ConfigParser + satisfied : bool """ if not config.has_section('environ'): @@ -326,22 +324,21 @@ def configure(config=DEFAULT_CONFIG): satisfied = False logger.warning(f'executable not found: {executable}') elif not config.has_option('environ', executable): - logger.debug(f'Updated executable: {executable} : {found}') + logger.debug(f'Updated executable {executable}: {found}') config.set('environ', executable, found) config.set('versions', executable, version) user_executable = config.get('environ', executable) if not which(user_executable): - logger.debug(f'Updated executable: {executable} : {found}') + logger.debug(f'Updated executable {executable}: {found}') config.set('environ', executable, found) config.set('versions', executable, version) else: version = get_versions(user_executable).get(user_executable) - config.set('versions',user_executable, version) - logger.debug(f'Executable dependencies satisfied : {satisfied}') - return config + config.set('versions', user_executable, version) + return config, satisfied def main(args): - config = configure(infpath=args.infpath) + config,satisfied = configure(infpath=args.infpath) if not args.out: import sys;sys.exit(0) put_config(config, args.out) diff --git a/autometa/config/project.py b/autometa/config/project.py index 4c12b8824..36b7042ae 100644 --- a/autometa/config/project.py +++ b/autometa/config/project.py @@ -105,8 +105,8 @@ def add(self, fpath): ------- FileNotFoundError Directory found but metagenome.config not present - FileExistsError - metagenome.config already exists in project + IsADirectoryError + Metagenome output directory already exists """ # metagenome_num = 1 + self.n_metagenomes metagenome_num = self.new_metagenome_num() @@ -119,7 +119,7 @@ def add(self, fpath): if not mg_config_present and mg_dir_present: raise FileNotFoundError(f'{mg_config_fpath} is not present but the directory exists! Either remove the directory or locate the config file before continuing.') if mg_dir_present: - raise FileExistsError(metagenome_dirpath) + raise IsADirectoryError(metagenome_dirpath) os.makedirs(metagenome_dirpath) mg_config = get_config(fpath) diff --git a/autometa/config/user.py b/autometa/config/user.py index 288c404ed..0afbe4137 100644 --- a/autometa/config/user.py +++ b/autometa/config/user.py @@ -28,18 +28,14 @@ import argparse -# TODO: Refactor autometa.config later as AutometaConfigUtils lib or something -from autometa.config import get_config -from autometa.config import put_config -from autometa.config import parse_config -from autometa.config import AUTOMETA_DIR -from autometa.config import DEFAULT_CONFIG -from autometa.config import DEFAULT_FPATH -from autometa.config import databases +from autometa import config from autometa.config import environ -from autometa.config.project import Project from autometa.common import utilities +from autometa.common.metagenome import Metagenome +from autometa.config.databases import Databases +from autometa.config.project import Project +from autometa.common.utilities import timeit logger = logging.getLogger(__name__) @@ -51,16 +47,37 @@ def __init__(self, config_fpath=None, dryrun=True, nproc=2): self.dryrun= dryrun self.nproc = nproc self.config_fp = config_fpath - self.config = get_config(self.config_fp) if self.config_fp else DEFAULT_CONFIG + self.config = config.get_config(self.config_fp) if self.config_fp else config.DEFAULT_CONFIG if not self.config.has_section('common'): self.config.add_section('common') - self.config.set('common','home_dir', AUTOMETA_DIR) + self.config.set('common','home_dir', config.AUTOMETA_DIR) - def configure(self, configure_environ=True, configure_databases=True): - if configure_environ: - self.config = environ.configure(self.config) - if configure_databases: - self.config = databases.configure(self.config, dryrun=self.dryrun, nproc=self.nproc) + if self.dryrun: + self.configure() + import sys;sys.exit(1) + + def configure(self): + """Configure user execution environment and databases. + + Returns + ------- + NoteType + + """ + # Execution env + self.config, exe_satisfied = environ.configure(self.config) + if self.dryrun: + logger.info(f'Executable dependencies satisfied: {exe_satisfied}') + # Database env + dbs = Databases(self.config, dryrun=self.dryrun, nproc=self.nproc) + self.config, db_satisfied = dbs.configure() + if self.dryrun: + logger.info(f'Database dependencies satisfied: {db_satisfied}') + + if not db_satisfied: + raise LookupError('Database dependencies not satisfied!') + if not exe_satisfied: + raise LookupError('Executable dependencies not satisfied!') def new_project(self, fpath): """Configure new project at `outdir`. @@ -80,15 +97,13 @@ def new_project(self, fpath): Why the exception is raised. """ - # 1. configure project from default config and provided config file - self.configure() dpath = os.path.dirname(fpath) if not os.path.exists(dpath): os.makedirs(dpath) - put_config(self.config, fpath) + config.put_config(self.config, fpath) return Project(fpath) - def prepare_run(self, config_fpath): + def prepare_binning_args(self, config_fpath): """Prepares metagenome binning run using provided `config_fpath`. This method performs a number of configuration checks to ensure the @@ -122,12 +137,14 @@ def prepare_run(self, config_fpath): Why the exception is raised. """ - mgargs = parse_config(config_fpath) - # 1 check workspace exists + # 1. configure user environment + self.configure() + # 2. check workspace exists + mgargs = config.parse_config(config_fpath) workspace = os.path.realpath(mgargs.parameters.workspace) if not os.path.exists(workspace): os.makedirs(workspace) - # 2 check project exists + # 3. check project exists proj_name = f'project_{mgargs.parameters.project:03d}' project_dirpath = os.path.realpath(os.path.join(workspace,proj_name)) project_config_fp = os.path.join(project_dirpath, 'project.config') @@ -135,7 +152,7 @@ def prepare_run(self, config_fpath): project = self.new_project(project_config_fp) else: project = Project(project_config_fp) - # 3 check whether existing or new run with metagenome_num + # 4. check whether existing or new run with metagenome_num metagenome = f'metagenome_{mgargs.parameters.metagenome_num:03d}' if metagenome not in project.metagenomes: mgargs = project.add(config_fpath) @@ -152,6 +169,98 @@ def prepare_run(self, config_fpath): project.save() return mgargs + @utilities.timeit + def run_binning(self, mgargs): + """Run autometa. + + Parameters + ---------- + mgargs : argparse.Namespace + metagenome args + + Returns + ------- + NoneType + + Raises + ------- + TODO: Need to enumerate all exceptions raised from within binning pipeline. + I.e. Demarkate new exception (not yet handled) vs. handled exception. + Subclassing an AutometaException class may be most appropriate use case here. + """ + mg = Metagenome( + assembly=mgargs.files.metagenome, + outdir=mgargs.parameters.outdir, + nucl_orfs_fpath=mgargs.files.nucleotide_orfs, + prot_orfs_fpath=mgargs.files.amino_acid_orfs, + taxonomy_fpath=mgargs.files.taxonomy, + fwd_reads=mgargs.files.fwd_reads, + rev_reads=mgargs.files.rev_reads, + taxon_method=mgargs.parameters.taxon_method) + try: + # Original (raw) file should not be manipulated so return new object + mg = mg.length_filter( + out=mgargs.files.length_filtered, + cutoff=mgargs.parameters.length_cutoff) + except FileExistsError as err: + logger.debug(f'{mgargs.files.length_filtered} already exists. Continuing..') + mg = Metagenome( + assembly=mgargs.files.length_filtered, + outdir=mgargs.parameters.outdir, + nucl_orfs_fpath=mgargs.files.nucleotide_orfs, + prot_orfs_fpath=mgargs.files.amino_acid_orfs, + taxonomy_fpath=mgargs.files.taxonomy, + fwd_reads=mgargs.files.fwd_reads, + rev_reads=mgargs.files.rev_reads, + taxon_method=mgargs.parameters.taxon_method) + # I.e. asynchronous execution here (work-queue tasks) + mg.get_kmers( + kmer_size=mgargs.parameters.kmer_size, + normalized=mgargs.files.kmer_normalized, + out=mgargs.files.kmer_counts, + multiprocess=mgargs.parameters.kmer_multiprocess, + nproc=mgargs.parameters.cpus, + force=mgargs.parameters.force) + + coverages = mg.get_coverages( + out=mgargs.files.coverages, + from_spades=mgargs.parameters.cov_from_spades, + sam=mgargs.files.sam, + bam=mgargs.files.bam, + lengths=mgargs.files.lengths, + bed=mgargs.files.bed) + # Filter by Kingdom + kingdoms = mg.get_kingdoms( + ncbi=mgargs.databases.ncbi, + usepickle=mgargs.parameters.usepickle, + blast=mgargs.files.blastp, + hits=mgargs.files.blastp_hits, + force=mgargs.parameters.force, + cpus=mgargs.parameters.cpus) + + if not mgargs.parameters.kingdom in kingdoms: + raise KeyError(f'{mgargs.parameters.kingdom} not recovered in dataset. Recovered: {", ".join(kingdoms.keys())}') + + mag = kingdoms.get(mgargs.parameters.kingdom) + bins_df = mag.get_binning( + method=mgargs.parameters.binning_method, + kmers=mgargs.files.kmer_counts, + embedded=mgargs.files.kmer_embedded, + do_pca=mgargs.parameters.do_pca, + pca_dims=mgargs.parameters.pca_dims, + embedding_method=mgargs.parameters.embedding_method, + coverage=coverages, + domain=mgargs.parameters.kingdom, + taxonomy=mgargs.files.taxonomy, + reverse=mgargs.parameters.reversed, + ) + binning_cols = ['cluster','completeness','purity'] + bins_df[binning_cols].to_csv( + mgargs.files.binning, + sep='\t', + index=True, + header=True) + def main(args): logger.info(args.user) @@ -163,6 +272,6 @@ def main(args): datefmt='%m/%d/%Y %I:%M:%S %p', level=logger.DEBUG) parser = argparse.ArgumentParser('Concise Functional Description of Script') - parser.add_argument('user',help='') + parser.add_argument('user', help='') args = parser.parse_args() main(args) From caeb6f9567cbe604769d0b45d30abff38c95fc0a Mon Sep 17 00:00:00 2001 From: EvanRees Date: Sun, 15 Mar 2020 11:47:27 -0500 Subject: [PATCH 09/17] updates to check dependencies and control of debugging information when checking dependencies. Executable versions are now logged in debug info. log is now only written when flag is supplied. Timestamped log has been commented out. In the future, this could be a flag to log each run via a timestamped log. in databases now only returns the config and the method of databases is used when checking dependencies. --- autometa.py | 8 ++++---- autometa/config/databases.py | 14 +++++++------- autometa/config/environ.py | 4 ++-- autometa/config/user.py | 7 ++++--- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/autometa.py b/autometa.py index 2f3b5f589..98042aab4 100755 --- a/autometa.py +++ b/autometa.py @@ -93,12 +93,12 @@ def init_logger(fpath=None, level=logging.INFO): def main(args): # Setup logger - timestamp = time.strftime("%Y-%m-%d_%H-%M-%S",time.gmtime()) - log_fpath = args.log if args.log else f'{timestamp}_autometa.log' + # timestamp = time.strftime("%Y-%m-%d_%H-%M-%S",time.gmtime()) + # log_fpath = args.log if args.log else f'{timestamp}_autometa.log' if args.debug: - logger = init_logger(fpath=log_fpath, level=logging.DEBUG) + logger = init_logger(fpath=args.log, level=logging.DEBUG) else: - logger = init_logger(fpath=log_fpath) + logger = init_logger(fpath=args.log) # Configure AutometaUser # TODO: master from WorkQueue is AutometaUser user = AutometaUser(dryrun=args.check_dependencies, nproc=args.cpus) diff --git a/autometa/config/databases.py b/autometa/config/databases.py index 7475be0f1..f9b010ad0 100644 --- a/autometa/config/databases.py +++ b/autometa/config/databases.py @@ -235,6 +235,7 @@ def get_missing(self, validate=False): fpath = self.config.get(section,option) if os.path.exists(fpath) and os.stat(fpath).st_size >= 0: # TODO: [Checkpoint validation] + logger.debug(f'({section},{option}): {fpath}') continue if validate: return False @@ -242,6 +243,9 @@ def get_missing(self, validate=False): missing[section].add(option) else: missing.update({section:set([option])}) + for section,opts in missing.items(): + for opt in opts: + logger.debug(f'MISSING: ({section},{opt})') return True if validate else missing def update_missing(self): @@ -271,12 +275,8 @@ def configure(self): Returns ------- - 2-tuple - i.e. (config, satisfied) - config : configparser.ConfigParser - - config with updated options in respective databases sections. - satisfied : bool - - whether all required Autometa databases are available. + configparser.ConfigParser + config with updated options in respective databases sections. Raises ------- @@ -285,7 +285,7 @@ def configure(self): """ self.update_missing() - return self.config, self.satisfied + return self.config def main(args): dbs = Databases(config=args.config, dryrun=args.dryrun, nproc=args.nproc) diff --git a/autometa/config/environ.py b/autometa/config/environ.py index f11ba5f2b..f51c9ef87 100644 --- a/autometa/config/environ.py +++ b/autometa/config/environ.py @@ -324,12 +324,12 @@ def configure(config=DEFAULT_CONFIG): satisfied = False logger.warning(f'executable not found: {executable}') elif not config.has_option('environ', executable): - logger.debug(f'Updated executable {executable}: {found}') + logger.debug(f'{executable}: {found} (version: {version})') config.set('environ', executable, found) config.set('versions', executable, version) user_executable = config.get('environ', executable) if not which(user_executable): - logger.debug(f'Updated executable {executable}: {found}') + logger.debug(f'{executable}: {found} (version: {version})') config.set('environ', executable, found) config.set('versions', executable, version) else: diff --git a/autometa/config/user.py b/autometa/config/user.py index 0afbe4137..8bcb8477b 100644 --- a/autometa/config/user.py +++ b/autometa/config/user.py @@ -70,11 +70,12 @@ def configure(self): logger.info(f'Executable dependencies satisfied: {exe_satisfied}') # Database env dbs = Databases(self.config, dryrun=self.dryrun, nproc=self.nproc) - self.config, db_satisfied = dbs.configure() + self.config = dbs.configure() if self.dryrun: - logger.info(f'Database dependencies satisfied: {db_satisfied}') + logger.info(f'Database dependencies satisfied: {dbs.satisfied}') + return - if not db_satisfied: + if not dbs.satisfied: raise LookupError('Database dependencies not satisfied!') if not exe_satisfied: raise LookupError('Executable dependencies not satisfied!') From 138c273c64a1d6be550b7e103527e1510415d99f Mon Sep 17 00:00:00 2001 From: EvanRees Date: Sun, 15 Mar 2020 12:30:51 -0500 Subject: [PATCH 10/17] updated 'get_versions' function to return the version string if a program is provided as input. Updated respective files using this function. This should be clearer than returning a dict of the program passed in as key and removes redundant calls to pass in the program as input and then again as a key to retrieve the version value. --- autometa/common/external/prodigal.py | 4 ++-- autometa/config/environ.py | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/autometa/common/external/prodigal.py b/autometa/common/external/prodigal.py index 1644231cc..d4f1f6eed 100644 --- a/autometa/common/external/prodigal.py +++ b/autometa/common/external/prodigal.py @@ -188,7 +188,7 @@ def contigs_from_headers(fpath): Why the exception is raised. """ - version = get_versions('prodigal').get('prodigal') + version = get_versions('prodigal') if version.count('.') >= 2: version = float('.'.join(version.split('.')[:2])) else: @@ -225,7 +225,7 @@ def orf_records_from_contigs(contigs, fpath): Why the exception is raised. """ - version = get_versions('prodigal').get('prodigal') + version = get_versions('prodigal') if version.count('.') >= 2: version = float('.'.join(version.split('.')[:2])) else: diff --git a/autometa/config/environ.py b/autometa/config/environ.py index f51c9ef87..23eb755bb 100644 --- a/autometa/config/environ.py +++ b/autometa/config/environ.py @@ -249,8 +249,9 @@ def get_versions(program=None): Returns ------- - dict - {program:version, ...} + dict or str + if program is None: dict - {program:version, ...} + if program: str - version Raises ------- @@ -276,7 +277,7 @@ def get_versions(program=None): raise ValueError(f'program is not string. given:{type(program)}') if program not in dispatcher: raise KeyError(f'{program} not in executables') - return {program:dispatcher[program]()} + return dispatcher[program]() versions = {} executables = find_executables() for exe,found in executables.items(): @@ -333,7 +334,7 @@ def configure(config=DEFAULT_CONFIG): config.set('environ', executable, found) config.set('versions', executable, version) else: - version = get_versions(user_executable).get(user_executable) + version = get_versions(user_executable) config.set('versions', user_executable, version) return config, satisfied From ea282d143a68f63965d5dbaeddf8baa8ebc2e63c Mon Sep 17 00:00:00 2001 From: EvanRees Date: Sun, 15 Mar 2020 12:44:48 -0500 Subject: [PATCH 11/17] hotfix to case where new project does not contain any metagenomes. skip performing check to place appropriate metagenome number and just return 1. --- autometa/config/project.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/autometa/config/project.py b/autometa/config/project.py index 36b7042ae..5e0d26491 100644 --- a/autometa/config/project.py +++ b/autometa/config/project.py @@ -76,6 +76,9 @@ def new_metagenome_num(self): Why the exception is raised. """ + # I.e. no metagenomes have been added to project yet. + if not self.metagenomes: + return 1 max_num = max(self.metagenomes) if max_num == self.n_metagenomes: return self.n_metagenomes + 1 From ee1fbec70b9ac08cb0e78414b1a083d92f8d2bca Mon Sep 17 00:00:00 2001 From: EvanRees Date: Mon, 16 Mar 2020 00:06:05 -0500 Subject: [PATCH 12/17] Changed OSError to subclass ChildProcessError in prodigal.py. This is a bug fix related to exception hierarchy. changed timeit logging message format. Respective exception handling updatedin metagenome.py --- autometa/common/external/prodigal.py | 4 ++-- autometa/common/metagenome.py | 4 ++-- autometa/common/utilities.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/autometa/common/external/prodigal.py b/autometa/common/external/prodigal.py index d4f1f6eed..613149f00 100644 --- a/autometa/common/external/prodigal.py +++ b/autometa/common/external/prodigal.py @@ -64,7 +64,7 @@ def run(assembly, nucls_out, prots_out, force=False,cpus=0,parallel=True): ------- FileExistsError `nucls_out` or `prots_out` already exists - OSError + ChildProcessError Prodigal Failed """ if not os.path.exists(assembly): @@ -159,7 +159,7 @@ def run(assembly, nucls_out, prots_out, force=False,cpus=0,parallel=True): # COMBAK: Check all possible return codes for GNU parallel for fp in [nucls_out, prots_out]: if not os.path.exists(fp): - raise OSError(f'{fp} not written') + raise ChildProcessError(f'{fp} not written') return nucls_out, prots_out def contigs_from_headers(fpath): diff --git a/autometa/common/metagenome.py b/autometa/common/metagenome.py index cbeede46b..76b8529cf 100644 --- a/autometa/common/metagenome.py +++ b/autometa/common/metagenome.py @@ -315,10 +315,10 @@ def call_orfs(self, force=False, cpus=0, parallel=True): cpus=cpus, parallel=parallel, ) - except OSError as err: - logger.exception(err) except FileExistsError as err: return self.nucl_orfs_fpath, self.prot_orfs_fpath + except ChildProcessError as err: + logger.exception(err) return nucls_fp, prots_fp def orfs(self, orf_type='prot', cpus=0): diff --git a/autometa/common/utilities.py b/autometa/common/utilities.py index 55f7f1d5a..2d90ba1b4 100644 --- a/autometa/common/utilities.py +++ b/autometa/common/utilities.py @@ -436,7 +436,7 @@ def wrapper(*args, **kwds): obj = func(*args, **kwds) end = time.time() time_taken = end - start - logger.info(f'{func.__name__} : {time_taken:.2f} seconds') + logger.info(f'{func.__name__} took {time_taken:.2f} seconds') # runlogger.info(f'func={func.__name__} : {time_taken} seconds') return obj return wrapper From bbfaecde5232510613115ab6d3581ba5ae50cf32 Mon Sep 17 00:00:00 2001 From: EvanRees Date: Mon, 16 Mar 2020 23:35:44 -0500 Subject: [PATCH 13/17] mostly resolves KwanLab/Autometa#21 and resolves KwanLab/Autometa#18. --- autometa.py | 2 +- autometa/common/coverage.py | 105 ++++++++++++++++++++------- autometa/common/external/bedtools.py | 47 ++++++++---- 3 files changed, 112 insertions(+), 42 deletions(-) diff --git a/autometa.py b/autometa.py index 98042aab4..6a76151c0 100755 --- a/autometa.py +++ b/autometa.py @@ -115,7 +115,7 @@ def main(args): import argparse import time cpus = mp.cpu_count() - parser = argparse.ArgumentParser('Main script to run Autometa') + parser = argparse.ArgumentParser(description='Main script to run Autometa pipeline.') parser.add_argument('config', help='', nargs='*') diff --git a/autometa/common/coverage.py b/autometa/common/coverage.py index 85129a951..ae9223439 100644 --- a/autometa/common/coverage.py +++ b/autometa/common/coverage.py @@ -60,16 +60,33 @@ def from_spades_names(records): Why the exception is raised. """ - return pd.Series( + logger.info(f'Retrieving coverages from contig ID in {args.assembly}') + coverages = pd.Series( {record.id:record.id.split('_cov_')[-1] for record in records}, name='coverage', dtype=float) + coverages.index.name = 'contig' + return coverages def make_length_table(fasta, out): - seqs = {r.id:len(r) for r in SeqIO.parse(fasta, 'fasta')} - length_s = pd.Series(seqs, name='length') - length_s.index.name = 'contig' - length_s.to_csv(out, sep='\t', index=True, header=True) + """Writes a tab-delimited length table to `out` given an input `fasta`. + + Parameters + ---------- + fasta : str + + out : str + + + Returns + ------- + str + + """ + seqs = {record.id:len(record) for record in SeqIO.parse(fasta, 'fasta')} + lengths = pd.Series(seqs, name='length') + lengths.index.name = 'contig' + lengths.to_csv(out, sep='\t', index=True, header=True) return out def get(fasta, out, fwd_reads=None, rev_reads=None, sam=None, bam=None, lengths=None, @@ -92,6 +109,12 @@ def get(fasta, out, fwd_reads=None, rev_reads=None, sam=None, bam=None, lengths= 3. `sam` 4. `fwd_reads` and `rev_reads` + Event sequence to calculate contig coverages: + 1. align paired-end reads to generate alignment.sam + 2. sort samfile to generate alignment.bam + 3. calculate assembly coverages to generate alignment.bed + 4. calculate contig coverages to generate coverage.tsv + Parameters ---------- @@ -130,34 +153,59 @@ def get(fasta, out, fwd_reads=None, rev_reads=None, sam=None, bam=None, lengths= lengths = lengths if lengths else os.path.join(tempdir, 'lengths.tsv') sam = sam if sam else os.path.join(tempdir, 'alignment.sam') db = os.path.join(tempdir, 'alignment.db') - if os.path.exists(bed): + + def parse_bed(bed=bed, out=out): return bedtools.parse(bed, out) - if os.path.exists(bam): + + def make_bed(lengths=lengths, fasta=fasta, bam=bam, bed=bed): if not os.path.exists(lengths): lengths = make_length_table(fasta, lengths) bedtools.genomecov(bam, lengths, bed) - return bedtools.parse(bed, out) - if os.path.exists(sam): + + def sort_samfile(sam=sam,bam=bam,nproc=nproc): samtools.sort(sam, bam, nproc=nproc) - if not os.path.exists(lengths): - lengths = make_length_table(fasta, lengths) - bedtools.genomecov(bam, lengths, bed) - return bedtools.parse(bed, out) - if not fwd_reads or not rev_reads: - raise ValueError(f'{fwd_reads} and {rev_reads} are required if no other alignments are specified!') - bowtie.build(fasta, db) - bowtie.align(db, sam, fwd_reads, rev_reads, nproc=nproc) - samtools.sort(sam, bam, nproc=nproc) - if not os.path.exists(lengths): - lengths = make_length_table(fasta, lengths) - bedtools.genomecov(bam, lengths, bed) - return bedtools.parse(bed, out) + + def align_pe_reads(fasta=fasta,db=db,sam=sam,fwd_reads=fwd_reads,rev_reads=rev_reads,nproc=nproc): + bowtie.build(fasta, db) + bowtie.align(db, sam, fwd_reads, rev_reads, nproc=nproc) + # Setup of coverage calculation sequence depending on file(s) provided + calculation_sequence = { + 'bed_exists':[parse_bed], + 'bam_exists':[make_bed, parse_bed], + 'sam_exists':[sort_samfile, make_bed, parse_bed], + 'full':[align_pe_reads, sort_samfile, make_bed, parse_bed]} + # Now need to determine which point to start calculation... + for fp,argname in zip([bed,bam,sam],['bed','bam','sam']): + step = 'full' + if os.path.exists(fp): + step = f'{argname}_exists' + break + + if (not fwd_reads or not rev_reads) and step == 'full': + raise ValueError(f'fwd_reads and rev_reads are required if no other alignments are specified!') + logger.debug(f'starting coverage calculation sequence from {step}') + for calculation in calculation_sequence[step]: + logger.debug(f'running {calculation.__name__}') + if calculation.__name__ == 'parse_bed': + return calculation() + calculation() + except Exception as err: logger.exception(err) + raise err finally: shutil.rmtree(tempdir, ignore_errors=True) def main(args): + + if args.from_spades: + records = [rec for rec in SeqIO.parse(args.assembly, 'fasta')] + coverages = from_spades_names(records) + logger.info(f'{coverages.index.nunique():,} contig coverages retrieved from {args.assembly}') + coverages.to_csv(args.out, sep='\t', index=True, header=True) + logger.info(f'written: {args.out}') + return + get(fasta=args.assembly, fwd_reads=args.fwd_reads, rev_reads=args.rev_reads, @@ -176,7 +224,7 @@ def main(args): format='%(asctime)s : %(name)s : %(levelname)s : %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', level=logger.DEBUG) - parser = argparse.ArgumentParser('Autometa Coverage') + parser = argparse.ArgumentParser(description='Constuct contig coverage table given an input assembly and reads.') parser.add_argument('-f','--assembly', help='', required=True) parser.add_argument('-1', '--fwd-reads', help='') parser.add_argument('-2', '--rev-reads', help='') @@ -184,7 +232,14 @@ def main(args): parser.add_argument('--bam', help='') parser.add_argument('--lengths', help='') parser.add_argument('--bed', help='') - parser.add_argument('--nproc', help=f'Num processors to use. (default: {mp.cpu_count()})', default=mp.cpu_count()) - parser.add_argument('--out', help='') + parser.add_argument('--nproc', + help=f'Num processors to use. (default: {mp.cpu_count()})', + default=mp.cpu_count(), + type=int) + parser.add_argument('--from-spades', + help='Extract k-mer coverages from contig IDs. (Input assembly is output from SPAdes)', + action='store_true', + default=False) + parser.add_argument('--out', help='', required=True) args = parser.parse_args() main(args) diff --git a/autometa/common/external/bedtools.py b/autometa/common/external/bedtools.py index 66dfa5610..b2f9db3b6 100644 --- a/autometa/common/external/bedtools.py +++ b/autometa/common/external/bedtools.py @@ -32,7 +32,7 @@ logger = logging.getLogger(__name__) -def genomecov(ibam, lengths, out): +def genomecov(ibam, lengths, out, force=False): """Run bedtools genomecov with input `ibam` and `lengths` to retrieve metagenome coverages. @@ -43,7 +43,7 @@ def genomecov(ibam, lengths, out): lengths : str tab-delimited cols=[contig,length] out : str - + The bedtools genomecov output is a tab-delimited file with the following columns: 1. Chromosome 2. Depth of coverage @@ -51,29 +51,33 @@ def genomecov(ibam, lengths, out): 4. Size of chromosome 5. Fraction of bases on that chromosome with that coverage See also: http://bedtools.readthedocs.org/en/latest/content/tools/genomecov.html + force : bool + force overwrite of `out` if it already exists (default is False). Returns ------- - type - Description of returned object. + str + Raises ------- FileExistsError - `out` file already exists + `out` file already exists and force is False OSError Why the exception is raised. """ cmd = f'bedtools genomecov -ibam {ibam} -g {lengths}' - if os.path.exists(out): - raise FileExistsError(out) + if os.path.exists(out) and not force: + logger.debug(f'{out} already exists. skipping...') + return out with open(os.devnull,'w') as stderr, open(out,'w') as stdout: retcode = subprocess.call(cmd, stdout=stdout, stderr=stderr, shell=True) if retcode or not os.path.exists(out) or os.stat(out).st_size == 0: - raise OSError(f'bedtools failed: {cmd}') + raise ChildProcessError(f'bedtools failed: {cmd}') + return out -def parse(bed, out=None): +def parse(bed, out=None, force=False): """Calculate coverages from bed file. Parameters @@ -82,6 +86,8 @@ def parse(bed, out=None): out : str if provided will write to `out`. I.e. + force : bool + force overwrite of `out` if it already exists (default is False). Returns ------- @@ -90,13 +96,18 @@ def parse(bed, out=None): Raises ------- + ValueError + `out` incorrectly formatted to be read as pandas DataFrame. FileNotFoundError `bed` does not exist """ - if out and os.path.exists(out): - cols = ['contig','coverage'] - return pd.read_csv(out, sep='\t', usecols=cols, index_col='contig') + if out and os.path.exists(out) and not os.stat(out).st_size == 0: + try: + cols = ['contig','coverage'] + return pd.read_csv(out, sep='\t', usecols=cols, index_col='contig') + except ValueError as err: + raise ValueError(f'InvalidTableFormat: {out}') if not os.path.exists(bed): raise FileNotFoundError(bed) names = ['contig','depth','bases','length','breadth'] @@ -107,15 +118,15 @@ def parse(bed, out=None): df = df.assign(total_breadth=lambda x: x.depth * x.bases) dff = df.groupby('contig')['total_breadth', 'bases'].sum() dff = dff.assign(coverage=lambda x: x.total_breadth/x.bases) - if out: + if out and (not os.path.exists(out) or (os.path.exists(out) and force)): dff.to_csv(out, sep='\t', index=True, header=True) logger.debug(f'{out} written') logger.debug(f'{os.path.basename(out)} shape: {dff.shape}') - return dff['coverage'] + return dff[['coverage']] def main(args): - genomecov(ibam=args.ibam, lengths=args.lengths, out=args.bed) - df = parse(bed=args.bed, out=args.coverage) + bed = genomecov(ibam=args.ibam, lengths=args.lengths, out=args.bed, force=args.force_bed) + df = parse(bed=bed, out=args.coverage, force=args.force_cov) if __name__ == '__main__': import argparse @@ -131,5 +142,9 @@ def main(args): parser.add_argument('bed', help=' tab-delimited cols=[contig,length]') parser.add_argument('--coverage', help='') + parser.add_argument('--force-bed', help='force overwrite `bed`', + action='store_true',default=False) + parser.add_argument('--force-cov', help='force overwrite `--coverage`', + action='store_true',default=False) args = parser.parse_args() main(args) From 5c735796764df761558d5035bfd7248409a5fe1a Mon Sep 17 00:00:00 2001 From: EvanRees Date: Thu, 19 Mar 2020 12:15:45 -0500 Subject: [PATCH 14/17] fix to extract contigs from orf_ids using specific prodigal version. Note: entire pipeline currently assumes orf calling was performed using prodigal. Update to template.py where ArgumentParser now has default description, where previously this was by default usage. (Which the usage by default should be the name of the script). Updates to respective files where ORF to contig translations are necessary. --- autometa/common/external/hmmer.py | 30 +++++++++----------- autometa/common/mag.py | 44 ++++++++++++------------------ autometa/common/markers.py | 16 +++++++---- autometa/common/metagenome.py | 2 +- autometa/taxonomy/lca.py | 20 ++++++-------- autometa/taxonomy/majority_vote.py | 12 ++------ docs/template.py | 2 +- 7 files changed, 54 insertions(+), 72 deletions(-) diff --git a/autometa/common/external/hmmer.py b/autometa/common/external/hmmer.py index 945592ee9..43f93e926 100644 --- a/autometa/common/external/hmmer.py +++ b/autometa/common/external/hmmer.py @@ -50,13 +50,13 @@ def hmmscan(orfs, hmmdb, outfpath, cpus=0, force=False, parallel=True, log=None) outfpath : str - cpus : int + cpus : int, optional Num. cpus to use. 0 will run as many cpus as possible (the default is 0). - force : bool + force : bool, optional Overwrite existing `outfpath` (the default is False). - parallel : bool + parallel : bool, optional Will parallelize hmmscan using GNU parallel (the default is True). - log : str + log : str, optional (the default is None). If provided will write parallel log to `log`. @@ -133,7 +133,7 @@ def hmmscan(orfs, hmmdb, outfpath, cpus=0, force=False, parallel=True, log=None) raise OSError(f'{outfpath} not written.') return outfpath -def filter_markers(infpath, outfpath, cutoffs, prodigal_annotations=None, force=False): +def filter_markers(infpath, outfpath, cutoffs, orfs=None, force=False): """Filter markers from hmmscan output table that are above cutoff values. Parameters @@ -144,10 +144,10 @@ def filter_markers(infpath, outfpath, cutoffs, prodigal_annotations=None, force= cutoffs : str - prodigal_annotations : str + orfs : str, optional Default will attempt to translate recovered qseqids to contigs - force : bool + force : bool, optional Overwrite existing `outfpath` (the default is False). Returns @@ -190,12 +190,8 @@ def filter_markers(infpath, outfpath, cutoffs, prodigal_annotations=None, force= raise AssertionError(f'No markers in {infpath} pass cutoff thresholds') cols = ['orf','sacc','sname','score','cutoff'] mdf = mdf[cols] - if prodigal_annotations: - logger.debug('Retrieving ORF->contig translations from ORF Caller') - translations = prodigal.contigs_from_headers(prodigal_annotations) - translater = lambda x: translations.get(x, x.rsplit('_',1)[0]) - else: - translater = lambda x: x.rsplit('_',1)[0] + translations = prodigal.contigs_from_headers(orfs) + translater = lambda x: translations.get(x, x.rsplit('_',1)[0]) mdf['contig'] = mdf['orf'].map(translater) mdf.set_index('contig', inplace=True) mdf.to_csv(outfpath, sep='\t', index=True, header=True) @@ -212,7 +208,7 @@ def main(args): outfpath=args.hmmscan, cpus=args.cpus, force=args.force, - parallel=args.noparallel, + parallel=args.parallel, log=args.log) except FileExistsError as err: logger.debug(err) @@ -222,7 +218,7 @@ def main(args): infpath=result, outfpath=args.markers, cutoffs=args.cutoffs, - prodigal_annotations=args.orfs, + orfs=args.orfs, force=args.force) @@ -241,8 +237,8 @@ def main(args): parser.add_argument('--log', help='') parser.add_argument('--force', help="force overwrite of out filepath", action='store_true') - parser.add_argument('--cpus', help='num cpus to use',default=0) - parser.add_argument('--noparallel',help="Disable GNU parallel", action='store_false') + parser.add_argument('--cpus', help='num cpus to use',default=0, type=int) + parser.add_argument('--parallel',help="Enable GNU parallel", action='store_true') parser.add_argument('--verbose', help="add verbosity", action='store_true') args = parser.parse_args() main(args) diff --git a/autometa/common/mag.py b/autometa/common/mag.py index f8e69532b..59df84c67 100644 --- a/autometa/common/mag.py +++ b/autometa/common/mag.py @@ -109,7 +109,7 @@ def prepared(self, fpath): return True return False - def get_orfs(self, orf_type='prot', prodigal_fpath=None): + def get_orfs(self, orf_type='prot'): """Retrieve ORFs corresponding to MAG. Parameters @@ -117,8 +117,6 @@ def get_orfs(self, orf_type='prot', prodigal_fpath=None): orf_type : str Type of ORF to retrieve (the default is 'prot'). Amino acid or nucleotide choices = ['prot','nucl'] - prodigal_fpath : str - (the default is MARKERS_DIR). + dbdir : str, optional + (the default is {MARKERS_DIR}). Should contain pressed hmms and cutoffs table. - force : bool - Will overwrite existing marker annotations (the default is False). - orf_caller : str - Will use `orf_caller` to connect ORFs with contigs and contigs with - their respective ORFs (the default is 'prodigal'). + force : bool, optional + Will overwrite existing marker annotations (the default is {force}). Returns ------- @@ -291,11 +283,9 @@ def markers(self, kingdom='bacteria', dbdir=MARKERS_DIR, force=False, orf_caller logger.debug(f'Retrieving markers for {kingdom} kingdom') orfs_fp = os.path.join(self.outdir, f'{kingdom.lower()}.orfs.faa') if (not os.path.exists(orfs_fp)) or (os.path.exists(orfs_fp) and force): - if orf_caller == 'prodigal': - self.write_orfs(orfs_fp, prodigal_fpath=self.prot_orfs_fpath) - else: - self.write_orfs(orfs_fp) - return Markers(orfs_fp, kingdom=kingdom, dbdir=dbdir).get_markers() + self.write_orfs(orfs_fp) + markers = Markers(orfs_fp, kingdom=kingdom, dbdir=dbdir) + return markers.get() def subset_df(self, df): if type(df) not in [pd.DataFrame, pd.Series]: diff --git a/autometa/common/markers.py b/autometa/common/markers.py index 57293dc3d..e72b6b9ea 100644 --- a/autometa/common/markers.py +++ b/autometa/common/markers.py @@ -44,9 +44,9 @@ class Markers: ---------- orfs_fpath : str Description of parameter `orfs_fpath`. - kingdom : str + kingdom : str, optional Description of parameter `kingdom` (the default is 'bacteria'). - dbdir : str + dbdir : str, optional Description of parameter `dbdir` (the default is {MARKERS_DIR}). Attributes @@ -162,7 +162,7 @@ def load(fpath, format='wide'): # TODO: Write Marker specific AutometaException raise ValueError(err_msg) - def get_markers(self, format='wide', **kwargs): + def get(self, format='wide', **kwargs): """Retrieve contigs' markers from markers database that pass cutoffs filter. Parameters @@ -189,18 +189,22 @@ def get_markers(self, format='wide', **kwargs): Why the exception is raised. """ if not self.searched: - hmmer.hmmscan(self.orfs_fpath, self.hmmdb, self.hmmscan_fp, **kwargs) + hmmer.hmmscan( + orfs=self.orfs_fpath, + hmmdb=self.hmmdb, + outfpath=self.hmmscan_fp, + **kwargs) if not self.found: hmmer.filter_markers( infpath=self.hmmscan_fp, outfpath=self.markers_fp, cutoffs=self.cutoffs, - prodigal_annotations=self.orfs_fpath) + orfs=self.orfs_fpath) return Markers.load(fpath=self.markers_fp, format=format) def main(args): markers = Markers(orfs_fpath=args.orfs, kingdom=args.kingdom, dbdir=args.dbdir) - markers.get_markers() + markers.get() if __name__ == '__main__': import argparse diff --git a/autometa/common/metagenome.py b/autometa/common/metagenome.py index 76b8529cf..cc05f7e91 100644 --- a/autometa/common/metagenome.py +++ b/autometa/common/metagenome.py @@ -417,7 +417,7 @@ def assign_taxonomy(self, method, force=False, *args, **kwargs): Parameters ---------- - force : bool + force : bool, optional overwrite existing voting method's file (the default is False). *args : type Description of parameter `*args`. diff --git a/autometa/taxonomy/lca.py b/autometa/taxonomy/lca.py index 5c165e102..d5709b00e 100644 --- a/autometa/taxonomy/lca.py +++ b/autometa/taxonomy/lca.py @@ -390,7 +390,7 @@ def write_lcas(self, lcas, outfpath): outfile.write(outlines) return outfpath - def parse(self, lca_fpath, prodigal_annotations=None): + def parse(self, lca_fpath, orfs_fpath): """Retrieve and construct contig dictionary from provided `lca_fpath`. Parameters @@ -399,8 +399,7 @@ def parse(self, lca_fpath, prodigal_annotations=None): tab-delimited ordered columns: qseqid, name, rank, lca_taxid - prodigal_annotations : str - Default will attempt to translate recovered qseqids to contigs + orfs_fpath : str Note: These ORFs should correspond to the ORFs provided in the BLAST table. @@ -413,17 +412,17 @@ def parse(self, lca_fpath, prodigal_annotations=None): ------- FileNotFoundError `lca_fpath` does not exist + FileNotFoundError + `orfs_fpath` does not exist """ logger.debug(f'Parsing LCA table: {lca_fpath}') if not os.path.exists(lca_fpath): raise FileNotFoundError(lca_fpath) # logger.exception(FileNotFoundError) - if prodigal_annotations and not os.path.exists(prodigal_annotations): - raise FileNotFoundError(prodigal_annotations) + if orfs_fpath and not os.path.exists(orfs_fpath): + raise FileNotFoundError(orfs_fpath) - if prodigal_annotations: - logger.debug('getting contig headers from prodigal ORFs') - translations = prodigal.contigs_from_headers(prodigal_annotations) + translations = prodigal.contigs_from_headers(orfs_fpath) fname = os.path.basename(lca_fpath) n_lines = file_length(lca_fpath) if self.verbose else None @@ -432,10 +431,9 @@ def parse(self, lca_fpath, prodigal_annotations=None): with open(lca_fpath) as fh: header = fh.readline() for line in tqdm(fh, total=n_lines, disable=disable, desc=f'Parsing {fname}', leave=False): - contig_orf_id, name, rank, taxid = line.strip().split('\t') - contig_ = contig_orf_id.rsplit('_', 1)[0] + orf_id, name, rank, taxid = line.strip().split('\t') taxid = int(taxid) - contig = translations.get(contig_orf_id, contig_) if prodigal_annotations else contig_ + contig = translations.get(orf_id) if taxid != 1: while rank not in set(NCBI.CANONICAL_RANKS): taxid = self.parent(taxid) diff --git a/autometa/taxonomy/majority_vote.py b/autometa/taxonomy/majority_vote.py index 487a73b64..765569fba 100644 --- a/autometa/taxonomy/majority_vote.py +++ b/autometa/taxonomy/majority_vote.py @@ -237,10 +237,8 @@ def majority_vote(fasta, ncbi_dir, outdir, votes_fname, lca_fname=None, **kwargs votes_fname : str - lca_fname : str - (the default is None). - If None, filename will be generated from fasta. - I.e. -> (the default is Date: Mon, 23 Mar 2020 13:14:23 -0500 Subject: [PATCH 15/17] updated pandas numpy module call for nan to pd.NA from pandas version 1.0. in kmers and recursive_dbscan. Updated main function for recursive dbscan with required coverage table input and subsetting taxonomy by the provided domain. Datatype conversion in pandas dataframes are now performed to optimize space in mag.py and recursive_dbscan.py. Added script description to coverage.py and removed unused exception handling in docstring. Renamed bedtools column 'breadth' to 'depth_fraction' and 'total_breadth' to 'depth_product'. Added KmerFormatError in docstring in kmers.load() func. Updated docstring in autometa.config.environ.find_executables() --- autometa/binning/recursive_dbscan.py | 19 ++++++++++++------- autometa/common/coverage.py | 7 +------ autometa/common/external/bedtools.py | 8 ++++---- autometa/common/kmers.py | 14 ++++++++------ autometa/common/mag.py | 1 + autometa/config/environ.py | 14 +++++--------- 6 files changed, 31 insertions(+), 32 deletions(-) diff --git a/autometa/binning/recursive_dbscan.py b/autometa/binning/recursive_dbscan.py index d12ed3821..5a4e9fb17 100644 --- a/autometa/binning/recursive_dbscan.py +++ b/autometa/binning/recursive_dbscan.py @@ -166,7 +166,7 @@ def run_dbscan(df, eps, dropcols=['cluster','purity','completeness'], usecols=[' """ n_samples = df.shape[0] if n_samples == 1: - clusters = pd.Series([pd.np.nan], index=df.index, name='cluster') + clusters = pd.Series([pd.NA], index=df.index, name='cluster') return pd.merge(df, clusters, how='left', left_index=True, right_index=True) for col in dropcols: if col in df.columns: @@ -220,7 +220,7 @@ def add_metrics(df, markers_df, domain='bacteria'): completeness = nunique_markers / expected_number * 100 # Protect from divide by zero if nunique_markers == 0: - purity = pd.np.nan + purity = pd.NA else: purity = num_single_copy_markers / nunique_markers * 100 metrics['completeness'].update({cluster:completeness}) @@ -272,7 +272,7 @@ def get_clusters(master_df, markers_df, domain='bacteria', completeness=20., pur # No contigs can be clustered, label as unclustered and add the final df # of (unclustered) contigs if clustered_df.empty: - unclustered_df = unclustered_df.assign(cluster=pd.np.nan) + unclustered_df = unclustered_df.assign(cluster=pd.NA) clusters.append(unclustered_df) break @@ -405,7 +405,7 @@ def binning(master, markers, domain='bacteria', completeness=20., purity=90., clusters.append(clustered) clustered_df = pd.concat(clusters, sort=True) unclustered_df = master.loc[~master.index.isin(clustered_df.index)] - unclustered_df.loc[:,'cluster'] = pd.np.nan + unclustered_df.loc[:,'cluster'] = pd.NA return pd.concat([clustered_df,unclustered_df], sort=True) def main(args): @@ -415,20 +415,24 @@ def main(args): embedded=args.embedded_kmers, method=args.embedding_method) - markers_df = Markers.load(args.markers) + cov_df = pd.read_csv(args.coverage, sep='\t', index_col='contig') + master_df = pd.merge(kmers_df, cov_df[['coverage']], how='left', left_index=True, right_index=True) + markers_df = Markers.load(args.markers) + markers_df = markers_df.convert_dtypes() # Taxonomy.load() - master_df = kmers_df if args.taxonomy: taxa_df = pd.read_csv(args.taxonomy, sep='\t', index_col='contig') + taxa_df = taxa_df[taxa_df.superkingdom == args.domain] master_df = pd.merge( left=master_df, right=taxa_df, - how='left', + how='inner', left_index=True, right_index=True) taxa_present = True if 'taxid' in master_df else False + master_df = master_df.convert_dtypes() logger.debug(f'master_df shape: {master_df.shape}') master_out = binning( master=master_df, @@ -455,6 +459,7 @@ def main(args): description="Perform decomposition/embedding/clustering via PCA/[TSNE|UMAP]/DBSCAN." ) parser.add_argument('kmers', help='') + parser.add_argument('coverage', help='') parser.add_argument('markers', help='') parser.add_argument('out', help='') parser.add_argument('--embedded-kmers',help='') diff --git a/autometa/common/coverage.py b/autometa/common/coverage.py index ae9223439..919923bde 100644 --- a/autometa/common/coverage.py +++ b/autometa/common/coverage.py @@ -54,11 +54,6 @@ def from_spades_names(records): pd.Series index=contig, name='coverage', dtype=float - Raises - ------- - ExceptionName - Why the exception is raised. - """ logger.info(f'Retrieving coverages from contig ID in {args.assembly}') coverages = pd.Series( @@ -224,7 +219,7 @@ def main(args): format='%(asctime)s : %(name)s : %(levelname)s : %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', level=logger.DEBUG) - parser = argparse.ArgumentParser(description='Constuct contig coverage table given an input assembly and reads.') + parser = argparse.ArgumentParser(description='Construct contig coverage table given an input assembly and reads.') parser.add_argument('-f','--assembly', help='', required=True) parser.add_argument('-1', '--fwd-reads', help='') parser.add_argument('-2', '--rev-reads', help='') diff --git a/autometa/common/external/bedtools.py b/autometa/common/external/bedtools.py index b2f9db3b6..bdff4ae1d 100644 --- a/autometa/common/external/bedtools.py +++ b/autometa/common/external/bedtools.py @@ -110,14 +110,14 @@ def parse(bed, out=None, force=False): raise ValueError(f'InvalidTableFormat: {out}') if not os.path.exists(bed): raise FileNotFoundError(bed) - names = ['contig','depth','bases','length','breadth'] + names = ['contig','depth','bases','length','depth_fraction'] df = pd.read_csv(bed, sep='\t', names=names, index_col='contig') criterion1 = df.depth != 0 criterion2 = df.index != 'genome' df = df[criterion1 & criterion2] - df = df.assign(total_breadth=lambda x: x.depth * x.bases) - dff = df.groupby('contig')['total_breadth', 'bases'].sum() - dff = dff.assign(coverage=lambda x: x.total_breadth/x.bases) + df = df.assign(depth_product=lambda x: x.depth * x.bases) + dff = df.groupby('contig')['depth_product', 'bases'].sum() + dff = dff.assign(coverage=lambda x: x.depth_product/x.bases) if out and (not os.path.exists(out) or (os.path.exists(out) and force)): dff.to_csv(out, sep='\t', index=True, header=True) logger.debug(f'{out} written') diff --git a/autometa/common/kmers.py b/autometa/common/kmers.py index 479501513..a9c1813ca 100644 --- a/autometa/common/kmers.py +++ b/autometa/common/kmers.py @@ -49,7 +49,7 @@ logger = logging.getLogger(__name__) def revcomp(string): - """Revers complement the provided `string`. + """Reverse complement the provided `string`. Parameters ---------- @@ -96,6 +96,7 @@ def init_kmers(kmer_size=5): for char in dna_letters: new_list.append(current_seq + char) all_kmers = new_list + # subset uniq_kmers by removing any reverse complements for kmer in all_kmers: kmer_reverse = revcomp(kmer) if type(kmer_reverse) is int: @@ -123,7 +124,8 @@ def load(kmers_fpath): ------- FileNotFoundError `kmers_fpath` does not exist or is empty - + KmerFormatError + `kmers_fpath` file format is invalid """ if not os.path.exists(kmers_fpath) or os.stat(kmers_fpath).st_size == 0: raise FileNotFoundError(kmers_fpath) @@ -189,7 +191,7 @@ def record_counter(args): max_length = record_length - kmer_size if max_length <= 0: logger.warning(f'{record.id} can not be counted! k-mer size exceeds length. {record_length}') - contig_kmer_counts = [pd.np.nan] * n_uniq_kmers + contig_kmer_counts = [pd.NA] * n_uniq_kmers return {record.id:contig_kmer_counts} for i in range(max_length): kmer = record.seq[i:i+kmer_size] @@ -203,7 +205,7 @@ def record_counter(args): else: index = ref_kmers[kmer_revcomp] contig_kmer_counts[index] += 1 - contig_kmer_counts = [c if c != 0 else pd.np.nan for c in contig_kmer_counts] + contig_kmer_counts = [c if c != 0 else pd.NA for c in contig_kmer_counts] return {record.id:contig_kmer_counts} def seq_counter(assembly, ref_kmers, verbose=True): @@ -242,7 +244,7 @@ def seq_counter(assembly, ref_kmers, verbose=True): max_length = len(record.seq) - kmer_size if max_length <= 0: logger.warning(f'{record.id} can not be counted! k-mer size exceeds length. {len(record.seq)}') - contig_kmer_counts = [pd.np.nan] * n_uniq_kmers + contig_kmer_counts = [pd.NA] * n_uniq_kmers kmer_counts.update({record.id:contig_kmer_counts}) continue for i in range(max_length): @@ -257,7 +259,7 @@ def seq_counter(assembly, ref_kmers, verbose=True): else: index = ref_kmers[kmer_revcomp] contig_kmer_counts[index] += 1 - contig_kmer_counts = [c if c != 0 else pd.np.nan for c in contig_kmer_counts] + contig_kmer_counts = [c if c != 0 else pd.NA for c in contig_kmer_counts] kmer_counts.update({record.id:contig_kmer_counts}) return kmer_counts diff --git a/autometa/common/mag.py b/autometa/common/mag.py index 59df84c67..39d38d817 100644 --- a/autometa/common/mag.py +++ b/autometa/common/mag.py @@ -238,6 +238,7 @@ def get_binning(self, method='recursive_dbscan', **kwargs): left_index=True, right_index=True) master_df = self.subset_df(master_df) + master_df = master_df.convert_dtypes() use_taxonomy = True if 'taxid' in master_df else False markers = self.markers(kwargs.get('domain','bacteria')) logger.info(f'Binning {kwargs.get("domain")} with {method}') diff --git a/autometa/config/environ.py b/autometa/config/environ.py index 23eb755bb..295f270e9 100644 --- a/autometa/config/environ.py +++ b/autometa/config/environ.py @@ -51,7 +51,7 @@ def which(program): """Finds the full path for an executable and checks read permissions exist. - See: https://stackoverflow.com/questions/377017/test-if-executable-exists-in-python + See: https://stackoverflow.com/a/377028 Returns: The path if it was valid or None if not @@ -86,17 +86,12 @@ def is_exe(fpath): return '' def find_executables(): - """Short summary. + """Retrieves executable file paths by looking in Autometa dependent executables. Returns ------- - type - Description of returned object. - - Raises - ------- - ExceptionName - Why the exception is raised. + dict + {executable:, ...} """ return {exe:which(exe) for exe in EXECUTABLES} @@ -108,6 +103,7 @@ def diamond(): ------- str version of diamond + """ exe = which('diamond') proc = subprocess.Popen( From 4688efb242b7c4d27389b8ef4b8d6d96958d5fa1 Mon Sep 17 00:00:00 2001 From: EvanRees Date: Tue, 24 Mar 2020 16:59:52 -0500 Subject: [PATCH 16/17] update to docstrings added new file key in config and comma-delimited list handling for multiple reads files in input. Added fasta format check and simple fasta parser from Biopython for performance and Exception handling. Docstrings noting where discussions should be placed on readthedocs relating to specific autometa functionality. --- autometa/binning/recursive_dbscan.py | 44 ++++++++------- autometa/common/coverage.py | 32 ++++++----- autometa/common/external/bowtie.py | 83 ++++++++++++++++++---------- autometa/common/external/prodigal.py | 9 ++- autometa/common/kmers.py | 62 ++++++++++----------- autometa/common/metagenome.py | 55 +++++++++++------- autometa/config/__init__.py | 29 ++++++---- autometa/config/default.config | 1 + autometa/config/metagenome.config | 4 +- autometa/config/user.py | 2 + tests/metagenome.config | 2 + 11 files changed, 196 insertions(+), 127 deletions(-) diff --git a/autometa/binning/recursive_dbscan.py b/autometa/binning/recursive_dbscan.py index 5a4e9fb17..e0aeefed5 100644 --- a/autometa/binning/recursive_dbscan.py +++ b/autometa/binning/recursive_dbscan.py @@ -1,23 +1,26 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +""" +Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, +Shaurya Chanana, Izaak Miller, Jason C. Kwan -# Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal, -# Shaurya Chanana, Izaak Miller, Jason C. Kwan -# -# This file is part of Autometa. -# -# Autometa is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Autometa is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with Autometa. If not, see . +This file is part of Autometa. + +Autometa is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Autometa is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with Autometa. If not, see . + +Cluster contigs recursively searching for bins with highest completeness and purity. +""" import logging import os @@ -30,15 +33,14 @@ from autometa.common.markers import Markers from autometa.common import kmers -from autometa.common.exceptions import RecursiveDBSCANError -# TODO: This should be -# from autometa.common.kmers import Kmers +# TODO: This should be from autometa.common.kmers import Kmers # So later we can simply/and more clearly do Kmers.load(kmers_fpath).embed(method) +from autometa.common.exceptions import RecursiveDBSCANError from autometa.taxonomy.ncbi import NCBI pd.set_option('mode.chained_assignment',None) -#logger + logger = logging.getLogger(__name__) diff --git a/autometa/common/coverage.py b/autometa/common/coverage.py index 919923bde..1d8264097 100644 --- a/autometa/common/coverage.py +++ b/autometa/common/coverage.py @@ -19,7 +19,7 @@ You should have received a copy of the GNU Affero General Public License along with Autometa. If not, see . -Autometa Coverage +Construct contig coverage table given an input assembly and reads or alignments. """ @@ -84,12 +84,12 @@ def make_length_table(fasta, out): lengths.to_csv(out, sep='\t', index=True, header=True) return out -def get(fasta, out, fwd_reads=None, rev_reads=None, sam=None, bam=None, lengths=None, - bed=None, nproc=1): +def get(fasta, out, fwd_reads=None, rev_reads=None, se_reads=None, sam=None, + bam=None, lengths=None, bed=None, nproc=1): """Get coverages for assembly `fasta` file using provided files: Either: - `fwd_reads` and `rev_reads` + `fwd_reads` and `rev_reads` and/or `se_reads` or: `sam` or: @@ -102,7 +102,7 @@ def get(fasta, out, fwd_reads=None, rev_reads=None, sam=None, bam=None, lengths= 1. `bed` 2. `bam` 3. `sam` - 4. `fwd_reads` and `rev_reads` + 4. `fwd_reads` and `rev_reads` and `se_reads` Event sequence to calculate contig coverages: 1. align paired-end reads to generate alignment.sam @@ -117,10 +117,12 @@ def get(fasta, out, fwd_reads=None, rev_reads=None, sam=None, bam=None, lengths= out : str - fwd_reads : str - - rev_reads : str - + fwd_reads : list, optional + [, ...] + rev_reads : list, optional + [, ...] + se_reads : list, optional + [, ...] sam : str bam : str @@ -160,15 +162,16 @@ def make_bed(lengths=lengths, fasta=fasta, bam=bam, bed=bed): def sort_samfile(sam=sam,bam=bam,nproc=nproc): samtools.sort(sam, bam, nproc=nproc) - def align_pe_reads(fasta=fasta,db=db,sam=sam,fwd_reads=fwd_reads,rev_reads=rev_reads,nproc=nproc): + def align_reads(fasta=fasta,db=db,sam=sam,fwd_reads=fwd_reads, + rev_reads=rev_reads, se_reads=se_reads, nproc=nproc): bowtie.build(fasta, db) - bowtie.align(db, sam, fwd_reads, rev_reads, nproc=nproc) + bowtie.align(db, sam, fwd_reads, rev_reads, se_reads, nproc=nproc) # Setup of coverage calculation sequence depending on file(s) provided calculation_sequence = { 'bed_exists':[parse_bed], 'bam_exists':[make_bed, parse_bed], 'sam_exists':[sort_samfile, make_bed, parse_bed], - 'full':[align_pe_reads, sort_samfile, make_bed, parse_bed]} + 'full':[align_reads, sort_samfile, make_bed, parse_bed]} # Now need to determine which point to start calculation... for fp,argname in zip([bed,bam,sam],['bed','bam','sam']): step = 'full' @@ -221,8 +224,9 @@ def main(args): level=logger.DEBUG) parser = argparse.ArgumentParser(description='Construct contig coverage table given an input assembly and reads.') parser.add_argument('-f','--assembly', help='', required=True) - parser.add_argument('-1', '--fwd-reads', help='') - parser.add_argument('-2', '--rev-reads', help='') + parser.add_argument('-1', '--fwd-reads', help='', nargs='*') + parser.add_argument('-2', '--rev-reads', help='', nargs='*') + parser.add_argument('-U', '--se-reads', help='', nargs='*') parser.add_argument('--sam', help='') parser.add_argument('--bam', help='') parser.add_argument('--lengths', help='') diff --git a/autometa/common/external/bowtie.py b/autometa/common/external/bowtie.py index 01ea6b5e6..28e881781 100644 --- a/autometa/common/external/bowtie.py +++ b/autometa/common/external/bowtie.py @@ -42,7 +42,7 @@ def run(cmd): Returns ------- bool - True if no returncode from subprocess.run else False + True if no returncode from subprocess.call else False """ logger.debug(f'run: {cmd}') @@ -71,17 +71,17 @@ def build(assembly, out): Raises ------- - OSError + ChildProcessError bowtie2-build failed """ cmd = f'bowtie2-build {assembly} {out}' success = run(cmd) if not success: - raise OSError(f'{cmd} failed. {out} not written') + raise ChildProcessError(f'{cmd} failed. {out} not written') return out -def align(db, sam, fwd_reads, rev_reads, nproc=0, **kwargs): - """Align reads to bowtie2-index `db`. +def align(db, sam, fwd_reads=None, rev_reads=None, se_reads=None, nproc=0, **kwargs): + """Align reads to bowtie2-index `db` (at least one `*_reads` argument is required). Parameters ---------- @@ -89,13 +89,15 @@ def align(db, sam, fwd_reads, rev_reads, nproc=0, **kwargs): . I.e. `db`.{#}.bt2 sam : str - fwd_reads : str - - rev_reads : str - - nproc : int + fwd_reads : list, optional + [, ...] + rev_reads : list, optional + [, ...] + se_reads : list, optional + [, ...] + nproc : int, optional Num. processors to use (the default is 0). - **kwargs : dict + **kwargs : dict, optional Additional optional args to supply to bowtie2. Must be in format: key = flag value = flag-value @@ -107,28 +109,43 @@ def align(db, sam, fwd_reads, rev_reads, nproc=0, **kwargs): Raises ------- - OSError + ChildProcessError bowtie2 failed """ - exc = f'bowtie2 -x {db}' - opts = '-q --phred33 --very-sensitive --no-unal' - added_opts = [f'{k} {v}' for k,v in kwargs.items()] if kwargs else None - nprocs = f'-p {nproc}' + exe = f'bowtie2 -x {db}' + flags = '-q --phred33 --very-sensitive --no-unal' sam_out = f'-S {sam}' - fwd_reads_param = f'-1 {fwd_reads}' - rev_reads_param = f'-2 {rev_reads}' - params = [exc,opts,nprocs,fwd_reads_param,rev_reads_param,sam_out] - if added_opts: - params += added_opts + params = [exe,flags,sam_out] + if type(nproc) is not int or nproc < 0: + raise ValueError(f'nproc must be an integer greater than 0. given: {nproc}') + # nproc==0 will skip adding -p/--threads flag + if nproc: + params.append(f'-p {nproc}') + reads_provided = False + for flag,reads in zip(['-1','-2','-U'],[fwd_reads,rev_reads,se_reads]): + if reads: + reads_provided = True + params.append(f'{flag} {",".join(reads)}') + if not reads_provided: + raise ValueError(f'At least one fastq file is required!') + if kwargs: + params += [f'{flag} {value}' for flag,value in kwargs.items()] cmd = ' '.join(params) success = run(cmd) if not success: - raise OSError(f'{cmd} failed. {sam} not written') + raise ChildProcessError(f'{cmd} failed. {sam} not written') return sam def main(args): db = build(args.assembly, args.database) - sam = align(args.database, args.sam, args.fwd_reads, args.rev_reads, args.nproc) + sam = align( + database=args.database, + sam=args.sam, + fwd_reads=args.fwd_reads, + rev_reads=args.rev_reads, + se_reads=args.se_reads, + nproc=args.nproc, + kwargs=args.kwargs) if __name__ == '__main__': import argparse @@ -136,12 +153,22 @@ def main(args): logger.basicConfig( format='%(asctime)s : %(name)s : %(levelname)s : %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') - parser = argparse.ArgumentParser() + desc=''' + Align provided reads to metagenome `assembly` and write alignments to `sam`. + NOTE: At least one reads file is required.''' + parser = argparse.ArgumentParser(description=desc) parser.add_argument('assembly', help='') - parser.add_argument('database', help='') + parser.add_argument('database', help='. Will construct database at provided path if not found.') parser.add_argument('sam', help='') - parser.add_argument('-1', '--fwd-reads', help='') - parser.add_argument('-2', '--rev-reads', help='') - parser.add_argument('--nproc', help='Num processors to use.', default=1) + parser.add_argument('-1', '--fwd-reads', + help='', + nargs='*') + parser.add_argument('-2', '--rev-reads', + help='', + nargs='*') + parser.add_argument('-U', '--se-reads', + help='', + nargs='*') + parser.add_argument('--nproc', help='Num processors to use.', default=1, type=int) args = parser.parse_args() main(args) diff --git a/autometa/common/external/prodigal.py b/autometa/common/external/prodigal.py index 613149f00..52ed9e709 100644 --- a/autometa/common/external/prodigal.py +++ b/autometa/common/external/prodigal.py @@ -30,6 +30,7 @@ from glob import glob from Bio import SeqIO +from Bio.SeqIO.FastaIO import SimpleFastaParser from autometa.config.environ import get_versions @@ -158,8 +159,14 @@ def run(assembly, nucls_out, prots_out, force=False,cpus=0,parallel=True): logger.warning(f'Args:{cmd} ReturnCode:{returncode}') # COMBAK: Check all possible return codes for GNU parallel for fp in [nucls_out, prots_out]: - if not os.path.exists(fp): + if not os.path.exists(fp) or os.stat(fp).st_size == 0: raise ChildProcessError(f'{fp} not written') + try: + with open(fp) as fh: + for _ in SimpleFastaParser(fh): + pass + except (IOError, ValueError): + raise IOError(f'InvalidFileFormat: {fp}') return nucls_out, prots_out def contigs_from_headers(fpath): diff --git a/autometa/common/kmers.py b/autometa/common/kmers.py index a9c1813ca..f448558cb 100644 --- a/autometa/common/kmers.py +++ b/autometa/common/kmers.py @@ -48,28 +48,21 @@ numba_logger = logging.getLogger("numba").setLevel(logging.ERROR) logger = logging.getLogger(__name__) -def revcomp(string): +def _revcomp(string): """Reverse complement the provided `string`. Parameters ---------- string : str - Description of parameter `string`. + A k-mer string generated from `init_kmers` Returns ------- - str or int(-1) + str reverse complemented string. - Note: If any weird letters are encountered, int value of -1 is returned. """ complement = {'A':'T','T':'A','C':'G','G':'C'} - complements = [] - for i in range(len(string)): - if string[i] in complement: - complements.append(complement[string[i]]) - else: - return -1 - return ''.join(reversed(complements)) + return ''.join(complement.get(char) for char in reversed(string)) def init_kmers(kmer_size=5): """Initialize k-mers from `kmer_size`. Respective reverse complements will @@ -77,7 +70,7 @@ def init_kmers(kmer_size=5): Parameters ---------- - kmer_size : int + kmer_size : int, optional pattern size of k-mer to intialize dict (the default is 5). Returns @@ -98,10 +91,7 @@ def init_kmers(kmer_size=5): all_kmers = new_list # subset uniq_kmers by removing any reverse complements for kmer in all_kmers: - kmer_reverse = revcomp(kmer) - if type(kmer_reverse) is int: - logger.warning(f'Encountered non-standard string: {kmer}. skipping...') - continue + kmer_reverse = _revcomp(kmer) if (kmer not in uniq_kmers) and (kmer_reverse not in uniq_kmers): uniq_kmers[kmer] = index index += 1 @@ -144,7 +134,7 @@ def mp_counter(assembly, ref_kmers, nproc=mp.cpu_count()): (nucleotides) ref_kmers : dict {kmer:index, ...} - nproc : int + nproc : int, optional Number of cpus to use. (the default will use all available). Returns @@ -217,7 +207,7 @@ def seq_counter(assembly, ref_kmers, verbose=True): (nucleotides) ref_kmers : dict {kmer:index, ...} - verbose : bool + verbose : bool, optional enable progress bar `verbose` (the default is True). Returns @@ -275,10 +265,16 @@ def count(assembly, kmer_size=5, normalized=False, verbose=True, multiprocess=Tr ---------- assembly : str Description of parameter `assembly`. - kmer_size : int + kmer_size : int, optional length of k-mer to count `kmer_size` (the default is 5). - normalized : bool + normalized : bool, optional Whether to return the CLR normalized dataframe (the default is True). + verbose : bool, optional + Enable progress bar `verbose` (the default is True). + multiprocess : bool, optional + Use multiple cores to count k-mer frequencies (the default is True). + nproc : int, optional + Number of cpus to use. (the default will use all available). Returns ------- @@ -327,6 +323,7 @@ def normalize(df): K-mers Dataframe where index_col='contig' and column values are k-mer frequencies. + # TODO: Place these references in readthedocs documentation and remove from def. References: - Aitchison, J. The Statistical Analysis of Compositional Data (1986) - Pawlowsky-Glahn, Egozcue, Tolosana-Delgado. Lecture Notes on Compositional Data Analysis (2011) @@ -357,23 +354,23 @@ def embed(kmers=None, embedded=None, n_components=2, do_pca=True, pca_dimensions ---------- kmers : str or pd.DataFrame - embedded : str - [optional] If provided will write to `embedded`. - n_components : int + embedded : str, optional + If provided will write to `embedded`. + n_components : int, optional `n_components` to embed k-mer frequencies (the default is 2). - do_pca : bool + do_pca : bool, optional Perform PCA decomposition prior to embedding (the default is True). - pca_dimensions : int + pca_dimensions : int, optional Reduce k-mer frequencies dimensions to `pca_dimensions` (the default is 50). If None, will estimate based on - method : str + method : str, optional embedding method to use (the default is 'UMAP'). - perplexity : float + perplexity : float, optional hyperparameter used to tune TSNE (the default is 30.0). See below for details: + # COMBAK: Insert link to readthedocs documentation https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html#sklearn.manifold.TSNE - - **kwargs : dict + **kwargs : dict, optional Other keyword arguments to be supplied to respective `method`. Returns @@ -393,7 +390,8 @@ def embed(kmers=None, embedded=None, n_components=2, do_pca=True, pca_dimensions `kmers` type must be a pd.DataFrame or filepath. """ if not kmers and not embedded: - raise KmerEmbeddingError('kmers or embedded is required') + msg = f'`kmers` (given: {kmers}) or `embedded` (given: {embedded}) is required' + raise KmerEmbeddingError(msg) df = None if kmers and type(kmers) is str and os.path.exists(kmers) and os.stat(kmers).st_size >0: try: @@ -435,6 +433,7 @@ def embed(kmers=None, embedded=None, n_components=2, do_pca=True, pca_dimensions def do_TSNE(perplexity=perplexity, n_components=n_components): # Adjust perplexity according to the number of data points + # COMBAK: Insert link to readthedocs discussion on python2.7 vs. python3 TSNE implementations n_rows = n_samples-1 scaler = 3.0 if n_rows < (scaler*perplexity): @@ -532,7 +531,8 @@ def main(args): type=int, default=50) parser.add_argument('--multiprocess', help='count k-mers using multiprocessing', action='store_true', default=False) - parser.add_argument('--nproc', help=f'num. processors to use if multiprocess is selected. (default = {cpus})', + parser.add_argument('--nproc', + help=f'num. processors to use if multiprocess is selected. (default = {cpus})', default=cpus, type=int) args = parser.parse_args() main(args) diff --git a/autometa/common/metagenome.py b/autometa/common/metagenome.py index cc05f7e91..4890883d9 100644 --- a/autometa/common/metagenome.py +++ b/autometa/common/metagenome.py @@ -30,6 +30,7 @@ import pandas as pd from Bio import SeqIO +from Bio.SeqIO.FastaIO import SimpleFastaParser from Bio import SeqUtils from autometa.common import kmers @@ -53,30 +54,38 @@ class Metagenome: Parameters ---------- assembly : str - - outdir : type + + outdir : str (the default is None) - taxon_method : str + nucl_orfs_fpath : str + + prot_orfs_fpath : str + + taxonomy_fpath : str + + taxon_method : str, optional method to assign taxonomy (the default is 'majority_vote'). choices=['majority_vote'] + fwd_reads : list, optional + [, ...] + rev_reads : list, optional + [, ...] + se_reads : list, optional + [, ...] Attributes ---------- - nucl_orfs_fpath : str - Description of attribute `nucl_orfs_fpath`. - prot_orfs_fpath : str - Description of attribute `prot_orfs_fpath`. taxonomy_fname : str - Description of attribute `taxonomy_fname`. - taxonomy_fpath : str - Description of attribute `taxonomy_fpath`. + basename of `taxonomy_fpath` taxonomy : pd.DataFrame index=contig cols=[taxid] may also contain lineage of taxid taxonomy_assigned : bool - `taxonomy_fpath` exists + True if `taxonomy_fpath` exists else False orfs_called : bool - `nucl_orfs_fpath` and `prot_orfs_fpath` exists + True if both `nucl_orfs_fpath` and `prot_orfs_fpath` exist else False sequences : list + [seq,...] + seqrecords : list [SeqRecord,...] nseqs : int Number of sequences in assembly. @@ -104,10 +113,12 @@ class Metagenome: - self.write_ranks() """ def __init__(self, assembly, outdir, nucl_orfs_fpath, prot_orfs_fpath, - taxonomy_fpath, fwd_reads=None, rev_reads=None, taxon_method='majority_vote'): + taxonomy_fpath, taxon_method='majority_vote', fwd_reads=None, + rev_reads=None, se_reads=None): self.assembly = os.path.realpath(assembly) self.fwd_reads = fwd_reads self.rev_reads = rev_reads + self.se_reads = se_reads self.outdir = outdir self.taxon_method = taxon_method self.nucl_orfs_fpath = nucl_orfs_fpath @@ -124,6 +135,11 @@ def __str__(self): @property def sequences(self): + with open(self.assembly) as fh: + return [seq for title,seq in SimpleFastaParser(fh)] + + @property + def seqrecords(self): return [seq for seq in SeqIO.parse(self.assembly, 'fasta')] @property @@ -132,17 +148,17 @@ def nseqs(self): @property def mean_gc(self): - return np.mean([SeqUtils.GC(record.seq) for record in self.sequences]) + return np.mean([SeqUtils.GC(seq) for seq in self.sequences]) @property def size(self): - return sum(len(record) for record in self.sequences) + return sum(len(seq) for seq in self.sequences) @property def largest_seq(self): max = float('-inf') largest = None - for rec in self.sequences: + for rec in self.seqrecords: if len(rec) > max: largest = rec max = len(rec) @@ -193,7 +209,7 @@ def fragmentation_metric(self, quality_measure=.50): """ target_size = self.size * quality_measure lengths = [] - for length in sorted([len(r) for r in self.sequences], reverse=True): + for length in sorted([len(seq) for seq in self.sequences], reverse=True): lengths.append(length) if sum(lengths) > target_size: return length @@ -262,7 +278,7 @@ def length_filter(self, out, cutoff=3000): if not os.path.exists(gunzipped_fpath): gunzip(self.assembly, gunzipped_fpath) self.assembly = gunzipped_fpath - records = [seq for seq in self.sequences if len(seq) >= cutoff] + records = [seq for seq in self.seqrecords if len(seq) >= cutoff] SeqIO.write(records, out, 'fasta') return Metagenome( assembly=out, @@ -399,12 +415,13 @@ def get_kmers(self, kmer_size=5, multiprocess=True, out=None, normalized=None, @timeit def get_coverages(self, out, from_spades=True, **kwargs): if from_spades: - return coverage.from_spades_names(self.sequences) + return coverage.from_spades_names(self.seqrecords) return coverage.get( fasta=self.assembly, out=out, fwd_reads=self.fwd_reads, rev_reads=self.rev_reads, + se_reads=self.se_reads, sam=kwargs.get('sam'), bam=kwargs.get('bam'), lengths=kwargs.get('lengths'), diff --git a/autometa/config/__init__.py b/autometa/config/__init__.py index b27db1a8c..e0b533863 100644 --- a/autometa/config/__init__.py +++ b/autometa/config/__init__.py @@ -30,19 +30,23 @@ logger = logging.getLogger(__name__) + DEFAULT_FPATH = os.path.join(os.path.dirname(__file__), 'default.config') AUTOMETA_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) WORKSPACE = os.path.join(AUTOMETA_DIR, 'workspace') - def get_config(fpath): if not os.path.exists(fpath) or os.stat(fpath).st_size == 0: raise FileNotFoundError(fpath) - config = ConfigParser(interpolation=ExtendedInterpolation()) + # https://stackoverflow.com/a/53274707/13118765 + converters={'list': lambda x: [val.strip() for val in x.split(',')]} + config = ConfigParser(interpolation=ExtendedInterpolation(), converters=converters) with open(fpath) as fh: config.read_file(fh) return config +DEFAULT_CONFIG = get_config(fpath=DEFAULT_FPATH) + def put_config(config, out): with open(out, 'w') as fh: config.write(fh) @@ -53,8 +57,6 @@ def update_config(fpath, section, option, value): put_config(c, fpath) logger.debug(f'updated {fpath} [{section}] option: {option} : {value}') -DEFAULT_CONFIG = get_config(fpath=DEFAULT_FPATH) - def parse_config(fpath=None): """Generate argparse namespace (args) from config file. @@ -74,7 +76,7 @@ def parse_config(fpath=None): provided `fpath` does not exist. """ - parameters = { + type_converter = { 'workspace':str, 'project':int, 'kingdoms':list, @@ -99,6 +101,9 @@ def parse_config(fpath=None): 'cpus':int, 'config':str, 'resume':bool, + 'fwd_reads':list, + 'rev_reads':list, + 'se_reads':list, } if fpath and (not os.path.exists(fpath) or os.stat(fpath).st_size == 0): raise FileNotFoundError(fpath) @@ -109,17 +114,17 @@ def parse_config(fpath=None): namespace.__dict__[section] = Namespace() for key, value in config.items(section): key = key.replace('-', '_') - if section != 'parameters' or key == 'metagenomes': + if section not in {'parameters','files'} or key == 'metagenomes': namespace.__dict__[section].__dict__[key] = value continue - if parameters.get(key) is not None: - if parameters.get(key) is bool: + if type_converter.get(key) is not None: + if type_converter.get(key) is bool: value = config.getboolean(section,key) - elif parameters.get(key) is int: + elif type_converter.get(key) is int: value = config.getint(section, key) - elif parameters.get(key) is float: + elif type_converter.get(key) is float: value = config.getfloat(section,key) - elif parameters.get(key) is list: - value = value.split(',') + elif type_converter.get(key) is list: + value = config.getlist(section,key) namespace.__dict__[section].__dict__[key] = value return namespace diff --git a/autometa/config/default.config b/autometa/config/default.config index 04e585b17..1982c156c 100644 --- a/autometa/config/default.config +++ b/autometa/config/default.config @@ -90,6 +90,7 @@ archaea_single_copy_cutoffs = ${databases:markers}/archaea.single_copy.cutoffs metagenome = metagenome.fna fwd_reads = fwd_reads.fastq rev_reads = rev_reads.fastq +se_reads = se_reads.fastq sam = alignments.sam bam = alignments.bam lengths = lengths.tsv diff --git a/autometa/config/metagenome.config b/autometa/config/metagenome.config index db3950a93..6ea6b5e37 100644 --- a/autometa/config/metagenome.config +++ b/autometa/config/metagenome.config @@ -22,8 +22,10 @@ [files] metagenome = metagenome.fna -fwd_reads = fwd_reads.fastq +# Multiple Reads files of respective format may be provided using a comma-delimiter +fwd_reads = fwd_reads.fastq, fwd_reads.fastq rev_reads = rev_reads.fastq +se_reads = se_reads.fastq sam = alignments.sam bam = alignments.bam lengths = lengths.tsv diff --git a/autometa/config/user.py b/autometa/config/user.py index 8bcb8477b..da02456a4 100644 --- a/autometa/config/user.py +++ b/autometa/config/user.py @@ -197,6 +197,7 @@ def run_binning(self, mgargs): taxonomy_fpath=mgargs.files.taxonomy, fwd_reads=mgargs.files.fwd_reads, rev_reads=mgargs.files.rev_reads, + se_reads=mgargs.files.se_reads, taxon_method=mgargs.parameters.taxon_method) try: # Original (raw) file should not be manipulated so return new object @@ -213,6 +214,7 @@ def run_binning(self, mgargs): taxonomy_fpath=mgargs.files.taxonomy, fwd_reads=mgargs.files.fwd_reads, rev_reads=mgargs.files.rev_reads, + se_reads=mgargs.files.se_reads, taxon_method=mgargs.parameters.taxon_method) # I.e. asynchronous execution here (work-queue tasks) mg.get_kmers( diff --git a/tests/metagenome.config b/tests/metagenome.config index a31aba0c2..25b7e15ed 100644 --- a/tests/metagenome.config +++ b/tests/metagenome.config @@ -4,8 +4,10 @@ [files] metagenome = tests/data/metagenome.fna +# Multiple Reads files of respective format may be provided using a comma-delimiter fwd_reads = None rev_reads = None +se_reads = None sam = None bam = None lengths = None From 406041bcbca9e9e14fc3991cad8ad5104509de83 Mon Sep 17 00:00:00 2001 From: EvanRees Date: Tue, 24 Mar 2020 17:07:30 -0500 Subject: [PATCH 17/17] returning from main rather than unnecessary sys import. --- autometa/common/kmers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autometa/common/kmers.py b/autometa/common/kmers.py index f448558cb..e44a08dd7 100644 --- a/autometa/common/kmers.py +++ b/autometa/common/kmers.py @@ -491,7 +491,7 @@ def main(args): logger.debug(f'Wrote {len(df)} normalized k-mer freqs. to {args.normalized}.') if not args.embedded: - import sys;sys.exit(0) + return if args.normalized: logger.debug(f'Embedding {args.normalized}')