Skip to content

Commit

Permalink
v1.6.3a
Browse files Browse the repository at this point in the history
  • Loading branch information
Kinggerm committed Feb 27, 2020
1 parent 0450133 commit 5c79c12
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 58 deletions.
23 changes: 13 additions & 10 deletions GetOrganelleLib/assembly_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ def minimize(self, fun=None, x0=None, jac=None, method=None, bounds=None, constr
import random
from copy import deepcopy

major_version, minor_version = sys.version_info[:2]
if major_version == 2 and minor_version >= 7:
MAJOR_VERSION, MINOR_VERSION = sys.version_info[:2]
if MAJOR_VERSION == 2 and MINOR_VERSION >= 7:
python_version = "2.7+"
RecursionError = RuntimeError
elif major_version == 3 and minor_version >= 5:
elif MAJOR_VERSION == 3 and MINOR_VERSION >= 5:
python_version = "3.5+"
else:
sys.stdout.write("Python version have to be 2.7+ or 3.5+")
Expand All @@ -50,7 +50,7 @@ def minimize(self, fun=None, x0=None, jac=None, method=None, bounds=None, constr


class ProcessingGraphFailed(Exception):
def __init__(self, value):
def __init__(self, value=""):
self.value = value

def __str__(self):
Expand Down Expand Up @@ -3001,16 +3001,16 @@ def export_path(self, in_path):
return Sequence(",".join(seq_names), "".join(seq_segments))


class NaiveDeBruijnGraph(Assembly):
def __init__(self, fasta_file, kmer_len=55, circular="auto", circular_head_ends="(circular)"):
class NaiveKmerNodeGraph(Assembly):
def __init__(self, fasta_file, kmer_len=55, circular="auto", circular_head_ends="(circular)", single_chain=False):
"""
:param fasta_file:
:param kmer_len:
:param circular: "auto" (default), "yes", "no"
:param circular_head_ends:
:return:
"""
super(NaiveDeBruijnGraph, self).__init__(overlap=kmer_len - 1)
super(NaiveKmerNodeGraph, self).__init__(overlap=kmer_len - 1)
assert circular in ("auto", "yes", "no")
assert kmer_len >= 3 and kmer_len % 2 == 1
self.__kmer = kmer_len # overlap is actually kmer_len - 1
Expand All @@ -3033,7 +3033,8 @@ def __init__(self, fasta_file, kmer_len=55, circular="auto", circular_head_ends=
self.vertex_info[this_vertex] = this_v_info = Vertex(this_vertex, kmer_len, 1., this_kmer_seq)
# record the connection as dict() rather than set() for counting
self.vertex_info[this_vertex].connections = {True: {}, False: {}}
recorded_kmers[this_v_info.seq[False]] = this_vertex, not this_end
if not single_chain:
recorded_kmers[this_v_info.seq[False]] = this_vertex, not this_end
if go_circle:
# add connection between the first kmer and the last kmer if the seq is circular
prev_kmer_seq = kmer_list[- 1]
Expand All @@ -3045,7 +3046,8 @@ def __init__(self, fasta_file, kmer_len=55, circular="auto", circular_head_ends=
recorded_kmers[prev_kmer_seq] = prev_vertex, prev_end = str(count_vertices), True
self.vertex_info[prev_vertex] = prev_v_info = Vertex(prev_vertex, kmer_len, 0., prev_kmer_seq)
self.vertex_info[prev_vertex].connections = {True: {}, False: {}}
recorded_kmers[prev_v_info.seq[False]] = prev_vertex, not prev_end
if not single_chain:
recorded_kmers[prev_v_info.seq[False]] = prev_vertex, not prev_end
if (this_vertex, not this_end) not in self.vertex_info[prev_vertex].connections[prev_end]:
self.vertex_info[prev_vertex].connections[prev_end][(this_vertex, not this_end)] = 0
self.vertex_info[prev_vertex].connections[prev_end][(this_vertex, not this_end)] += 1
Expand All @@ -3064,7 +3066,8 @@ def __init__(self, fasta_file, kmer_len=55, circular="auto", circular_head_ends=
recorded_kmers[this_kmer_seq] = this_vertex, this_end = str(count_vertices), True
self.vertex_info[this_vertex] = this_v_info = Vertex(this_vertex, kmer_len, 1., this_kmer_seq)
self.vertex_info[this_vertex].connections = {True: {}, False: {}}
recorded_kmers[this_v_info.seq[False]] = this_vertex, not this_end
if not single_chain:
recorded_kmers[this_v_info.seq[False]] = this_vertex, not this_end
# add the connection between this_kmer_seq and prev_kmer_seq
prev_kmer_seq = kmer_list[go_to - 1]
prev_vertex, prev_end = recorded_kmers[prev_kmer_seq]
Expand Down
15 changes: 12 additions & 3 deletions GetOrganelleLib/pipe_control_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
import os
from multiprocessing import Pool

major_version, minor_version = sys.version_info[:2]
if major_version == 2 and minor_version >= 7:
MAJOR_VERSION, MINOR_VERSION = sys.version_info[:2]
if MAJOR_VERSION == 2 and MINOR_VERSION >= 7:
python_version = "2.7+"
elif major_version == 3 and minor_version >= 5:
elif MAJOR_VERSION == 3 and MINOR_VERSION >= 5:
python_version = "3.5+"
else:
sys.stdout.write("Python version have to be 2.7+ or 3.5+")
Expand Down Expand Up @@ -74,6 +74,15 @@ def timed_log(log, output_base, prefix, log_level="NOTSET"):
return log_timed


if MAJOR_VERSION == 2:
class TimeoutError(Exception):
def __init__(self, value=""):
self.value = value

def __str__(self):
return repr(self.value)


def set_time_limit(num, flag_str="'--time-limit'"):
def wrap(func):
def handle(sig_num, interrupted_stack_frame):
Expand Down
30 changes: 17 additions & 13 deletions GetOrganelleLib/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,26 @@ def get_versions():


versions = [
{"number": "1.6.3a",
"features": [
"1. Minor bugs fixes",
],
"time": "2020-02-27 17:14 GMT-6"},
{"number": "1.6.3-beta",
"features": [
"1. log plastome info",
"2. get_organelle_from_assembly.py & disentangle_organelle_assembly.py: --max-multiplicity added",
"3. Assembly.estimate_copy_and_depth_precisely() modified: constraint_max_function() for --max-multiplicity",
"4. Assembly.tag_in_between() modified",
"5. Assembly.estimate_copy_and_depth_by_cov() modified: min average coverage limit",
"6. Assembly.processing_polymorphism():"
"1. get_organelle_from_assembly.py & disentangle_organelle_assembly.py: --max-multiplicity added",
"2. Assembly.estimate_copy_and_depth_precisely() modified: constraint_max_function() for --max-multiplicity",
"3. Assembly.tag_in_between() modified",
"4. Assembly.estimate_copy_and_depth_by_cov() modified: min average coverage limit",
"5. Assembly.processing_polymorphism():"
" fix a bug when kmer-len repeats shared by two contigs; fix a bug that cause RuntimeError",
"7. Assembly: too many results due to palindromic repeats, problem solved",
"8. Utilities/reconstruct_graph_from_fasta.py & NaiveDeBruijnGraph added",
"9. Utilities/gfa_to_fasta.py, Utilities/fastg_to_gfa.py: description corrected",
"10. Assembly.parse_gfa(): compatibility increased",
"11. Utilities/gfa2fastg.py: compatibility increased",
"12. Assembly.estimate_copy_and_depth_precisely(): fix a bug on a rare case that multiplicities res are 4,8,4",
"13. README.md: updated",
"6. Assembly: too many results due to palindromic repeats, problem solved",
"7. Utilities/reconstruct_graph_from_fasta.py & NaiveKmerNodeGraph added",
"8. Utilities/gfa_to_fasta.py, Utilities/fastg_to_gfa.py: description corrected",
"9. Assembly.parse_gfa(): compatibility increased",
"10. Utilities/gfa2fastg.py: compatibility increased",
"11. Assembly.estimate_copy_and_depth_precisely(): fix a bug on a rare case that multiplicities res are 4,8,4",
"12. README.md: updated",
],
"time": "2020-02-22 02:40 GMT-6"},
{"number": "1.6.2e",
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ Perl is required for the wrapper of Bowtie2, but we assume that it was builtin i

<a href=' '>Bandage</a > is a fantastic tool to view the assembly graph (`*.fastg`/`*.gfa`). If you have Bandage correctly configured and add the binary folder of Bandage (which is `Bandage.app/Contents/MacOS` for MacOS) to the $PATH, get_organelle_from_*.py would automatically generate the a png formatted image of the assembly graph.

If you installed python library psutil (version >= 3.0; pip install psutil), the memory cost of get_organelle_from_reads.py will be automatically logged. If you want to evaluate your results and plot the evaluation with `evaluate_assembly_using_mapping.py` and `round_statistics.py`, you have to further install python library matplotlib (pip install matplotlib).
If you installed python library psutil (version >= 3.0; pip install -U psutil), the memory cost of get_organelle_from_reads.py will be automatically logged. If you want to evaluate your results and plot the evaluation with `evaluate_assembly_using_mapping.py` and `round_statistics.py`, you have to further install python library matplotlib (pip install matplotlib).


## How To
Expand Down
32 changes: 21 additions & 11 deletions Utilities/reconstruct_graph_from_fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def get_options():
usage="reconstruct_graph_from_fasta.py -i fasta_file -o out.gfa")
parser.add_option("-i", dest="input",
help="Input fasta file.")
parser.add_option("-o", dest="output",
parser.add_option("-o", dest="output", default="",
help="Output graph file. The output format is GFA by default, but FASTG only when "
"indicated with postfix '.fastg'.")
parser.add_option("-k", dest="kmer", default=55, type=int,
Expand All @@ -26,8 +26,13 @@ def get_options():
help="Sequences in input fasta file are all circular (yes/no/auto). "
"The auto mode enables detection by checking the existence of '(circular)' in "
"the end of the header of each sequence. Default:%default")
parser.add_option("--single-chain", dest="single_chain", default=False, action="store_true",
help="The input sequence(s) was by default treated as DNA double-chain with its complementary "
"sequence. Choose this flag to turn off.")
parser.add_option("--out-kg", dest="out_kg", default="",
help="Output kmer node graph.")
options, argv = parser.parse_args()
if not (options.output and options.input):
if not ((options.output or options.out_kg) and options.input):
parser.print_help()
sys.stdout.write("Insufficient arguments!\n")
sys.exit()
Expand All @@ -46,15 +51,20 @@ def main():
time_0 = time.time()
options, argv = get_options()
# detect postfix
de_burijn_graph = NaiveDeBruijnGraph(options.input, kmer_len=options.kmer, circular=options.circular)
assembly_graph = de_burijn_graph.generate_assembly_graph()
if options.output.endswith(".fastg"):
sys.stdout.warning("Fastg is not recommended!\n")
assembly_graph.write_to_fastg(options.output)
else:
assembly_graph.write_to_gfa(options.output)
# de_burijn_graph.write_to_gfa(options.output + ".db.gfa")
sys.stdout.write("Took " + "%.4f" % (time.time() - time_0) + "s in generating " + options.output + "\n")
kmer_node_graph = NaiveKmerNodeGraph(options.input, kmer_len=options.kmer,
circular=options.circular, single_chain=options.single_chain)
if options.output:
assembly_graph = kmer_node_graph.generate_assembly_graph()
if options.output.endswith(".fastg"):
sys.stdout.warning("Fastg is not recommended!\n")
assembly_graph.write_to_fastg(options.output)
else:
assembly_graph.write_to_gfa(options.output)
if options.out_kg:
kmer_node_graph.write_to_gfa(options.out_kg)
sys.stdout.write("Took " + "%.4f" % (time.time() - time_0) + "s in generating " +
options.output * int(bool(options.output)) + ", " * int(bool(options.output and options.out_kg)) +
options.out_kg * int(bool(options.out_kg)) + "\n")


if __name__ == '__main__':
Expand Down
13 changes: 2 additions & 11 deletions get_organelle_from_assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,15 +421,6 @@ def get_options(description, version):
return options, log_handler


if MAJOR_VERSION == 2:
class TimeoutError(Exception):
def __init__(self, value):
self.value = value

def __str__(self):
return repr(self.value)


def slim_spades_result(organelle_types, in_custom, ex_custom, graph_in, graph_out_base,
verbose_log, log_handler, threads, which_slim, which_blast="", other_options="",
resume=False, keep_temp=False):
Expand Down Expand Up @@ -682,7 +673,7 @@ def disentangle_inside(fastg_f, tab_f, o_p, w_f, log_in, type_f=3., mode_in="emb
raise e
except RuntimeError as e:
log_handler.info("Disentangling failed: RuntimeError: " + str(e).strip())
except TimeoutError:
except TimeoutError as e:
log_handler.info("Disentangling timeout. (see " + timeout_flag + " for more)")
except ProcessingGraphFailed as e:
log_handler.info("Disentangling failed: " + str(e).strip())
Expand Down Expand Up @@ -719,7 +710,7 @@ def disentangle_inside(fastg_f, tab_f, o_p, w_f, log_in, type_f=3., mode_in="emb
if verbose:
log_handler.exception("")
log_handler.info("Disentangling failed: RuntimeError: " + str(e).strip())
except TimeoutError:
except TimeoutError as e:
log_handler.info("Disentangling timeout. (see " + timeout_flag + " for more)")
except ProcessingGraphFailed as e:
log_handler.info("Disentangling failed: " + str(e).strip())
Expand Down
9 changes: 0 additions & 9 deletions get_organelle_from_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -903,15 +903,6 @@ def get_options(description, version):
return options, log_handler, previous_attributes


if MAJOR_VERSION == 2:
class TimeoutError(Exception):
def __init__(self, value):
self.value = value

def __str__(self):
return repr(self.value)


def estimate_maximum_n_reads_using_mapping(
twice_max_coverage, check_dir, original_fq_list, reads_paired,
designed_maximum_n_reads, seed_files, organelle_types, target_genome_sizes,
Expand Down

0 comments on commit 5c79c12

Please sign in to comment.