From 6bcfe8949151dcb7df3601d2226d56181ba373d3 Mon Sep 17 00:00:00 2001 From: donkirkby Date: Tue, 29 Oct 2019 09:32:58 -0700 Subject: [PATCH] Add ArrowGroup to genome coverage diagrams, as part of #442. Also stop merging contigs into the full-genome reference. --- micall/core/plot_contigs.py | 82 +++++++++++--- micall/core/remap.py | 3 +- micall/tests/test_plot_contigs.py | 181 ++++++++++++++++++++++++++---- micall/tests/test_remap.py | 60 ++++------ micall/utils/compare_mapping.py | 119 ++++++++++++++++---- 5 files changed, 350 insertions(+), 95 deletions(-) diff --git a/micall/core/plot_contigs.py b/micall/core/plot_contigs.py index d78909c27..ded5deed6 100644 --- a/micall/core/plot_contigs.py +++ b/micall/core/plot_contigs.py @@ -1,9 +1,10 @@ +import typing from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType from collections import Counter, defaultdict from csv import DictReader from io import StringIO from itertools import groupby -from math import log10 +from math import log10, copysign from operator import itemgetter from pathlib import Path @@ -69,8 +70,14 @@ def get_color(self, coverage): class Arrow(Element): - def __init__(self, start, end, h=30, label=None): - super().__init__(x=start, y=0, w=end-start, h=h) + def __init__(self, start, end, h=20, label=None): + x = start + w = end-start + self.direction = copysign(1, w) + if w < 0: + x = end + w = -w + super().__init__(x=x, y=0, w=w, h=h) self.label = label def draw(self, x=0, y=0, xscale=1.0): @@ -78,27 +85,74 @@ def draw(self, x=0, y=0, xscale=1.0): a = self.x * xscale b = (self.x + self.w) * xscale x = x * xscale - direction = 1 - r = 10 * xscale + r = h/2 + font_size = h * 0.75 arrow_size = 7 * xscale - arrow_end = b - arrow_start = arrow_end - arrow_size*direction - centre = (a + b - direction*arrow_size)/2 - centre_start = centre - direction*r - centre_end = centre + direction*r + if self.direction >= 0: + line_start = a + arrow_end = b + else: + line_start = b + arrow_end = a + arrow_start = arrow_end - arrow_size*self.direction + centre = (a + b - self.direction*arrow_size)/2 + centre_start = centre - self.direction*r + centre_end = centre + self.direction*r group = draw.Group(transform="translate({} {})".format(x, y)) group.append(draw.Circle(centre, h/2, r, fill='ivory', stroke='black')) - group.append(draw.Line(a, h/2, centre_start, h/2, stroke='black')) + group.append(draw.Line(line_start, h/2, centre_start, h/2, stroke='black')) group.append(draw.Line(centre_end, h/2, arrow_start, h/2, stroke='black')) group.append(draw.Lines(arrow_end, h/2, arrow_start, (h + arrow_size)/2, arrow_start, (h - arrow_size)/2, fill='black')) group.append(draw.Text(self.label, - 15, - centre, h/2, + font_size, + centre, h / 2, text_anchor='middle', - dy="0.3em")) + dy="0.35em")) + return group + + +class ArrowGroup(Element): + def __init__(self, arrows: typing.Sequence[Arrow], gap=3): + self.arrows = [] + coordinates = [] # [(start, end, index)] + x1 = x2 = None + for i, arrow in enumerate(arrows): + self.arrows.append([0, arrow]) + arrow_x2 = arrow.x + arrow.w + coordinates.append((arrow.x, arrow_x2, i)) + if x1 is None: + x1, x2 = arrow.x, arrow_x2 + else: + x1 = min(x1, arrow.x) + x2 = max(x2, arrow_x2) + h = 0 + while coordinates: + if h > 0: + h += gap + row_end = coordinates[0][0]-1 + row_height = 0 + for key in coordinates[:]: + x1, x2, i = key + if x1 < row_end: + continue + coordinates.remove(key) + row = self.arrows[i] + arrow_h = row[1].h + row[0] = h + arrow_h + row_height = max(row_height, arrow_h) + row_end = x2 + h += row_height + for row in self.arrows: + row[0] -= h + super().__init__(x1, 0, w=x2-x1, h=h) + + def draw(self, x=0, y=0, xscale=1.0): + group = draw.Group(transform="translate({} {})".format(x, y)) + for i, (child_y, arrow) in enumerate(self.arrows): + group.append(arrow.draw(y=child_y, xscale=xscale)) return group diff --git a/micall/core/remap.py b/micall/core/remap.py index 917f4f4ba..7c6f70aec 100644 --- a/micall/core/remap.py +++ b/micall/core/remap.py @@ -39,6 +39,7 @@ PARTIAL_CONTIG_SUFFIX = 'partial' REVERSED_CONTIG_SUFFIX = 'reversed' EXCLUDED_CONTIG_SUFFIX = 'excluded' +ARE_CONTIGS_MERGED = False # SAM file format SAM_FIELDS = [ @@ -795,7 +796,7 @@ def read_contigs(contigs_csv, excluded_seeds=None): match_fraction = float(row['match']) is_match = 0.25 <= match_fraction is_reversed = match_fraction < 0 - if not is_match: + if not (ARE_CONTIGS_MERGED and is_match): contig_name = get_contig_name(i, row['ref'], is_match, diff --git a/micall/tests/test_plot_contigs.py b/micall/tests/test_plot_contigs.py index 696e41906..71f8d976c 100644 --- a/micall/tests/test_plot_contigs.py +++ b/micall/tests/test_plot_contigs.py @@ -9,7 +9,7 @@ from genetracks import Figure, Track, Multitrack, Label, Coverage from micall.core.plot_contigs import summarize_figure, \ - build_coverage_figure, SmoothCoverage, add_partial_banner, Arrow + build_coverage_figure, SmoothCoverage, add_partial_banner, Arrow, ArrowGroup HCV_HEADER = ('C[342-915], E1[915-1491], E2[1491-2580], P7[2580-2769], ' 'NS2[2769-3420], NS3[3420-5313], NS4A[5313-5475], ' @@ -24,28 +24,84 @@ class SvgDiffer: def __init__(self): self.work_dir: Path = Path(__file__).parent / 'svg_diffs' self.work_dir.mkdir(exist_ok=True) + self.mismatch_found = False for work_file in self.work_dir.iterdir(): if work_file.name == '.gitignore': continue assert work_file.suffix in ('.svg', '.png') work_file.unlink() + def diff_pixel(self, actual_pixel, expected_pixel): + ar, ag, ab, aa = actual_pixel + er, eg, eb, ea = expected_pixel + if actual_pixel != expected_pixel: + self.mismatch_found = True + # Colour + dr = 0xff + dg = (ag + eg) // 5 + db = (ab + eb) // 5 + + # Opacity + da = 0xff + else: + # Colour + dr, dg, db = ar, ag, ab + + # Opacity + da = aa // 3 + return dr, dg, db, da + def assert_equal(self, svg_actual: Drawing, svg_expected: Drawing, name: str): - # Display image when in live turtle mode. - display_image = getattr(Turtle, 'display_image', None) - if display_image is not None: - encoded = standard_b64encode(svg_actual.rasterize().pngData) - display_image(0, 0, image=encoded.decode('UTF-8')) - png_actual = drawing_to_image(svg_actual) png_expected = drawing_to_image(svg_expected) - png_diff = ImageChops.difference(png_actual, png_expected) + w = max(png_actual.width, png_expected.width) + h = max(png_actual.height, png_expected.height) + + png_actual_padded = Image.new(png_actual.mode, (w, h)) + png_expected_padded = Image.new(png_expected.mode, (w, h)) + png_actual_padded.paste(png_actual) + png_expected_padded.paste(png_expected) + png_diff = Image.new(png_actual.mode, (w, h)) + self.mismatch_found = False + png_diff.putdata([self.diff_pixel(actual_pixel, expected_pixel) + for actual_pixel, expected_pixel in zip( + png_actual_padded.getdata(), + png_expected_padded.getdata())]) - extrema = png_diff.getextrema() - if extrema == ((0, 0), (0, 0), (0, 0), (0, 0)): + # Display image when in live turtle mode. + display_image = getattr(Turtle, 'display_image', None) + if display_image is not None: + t = Turtle() + try: + w = t.screen.cv.cget('width') + h = t.screen.cv.cget('height') + ox, oy = w/2, h/2 + text_height = 20 + t.penup() + t.goto(-ox, oy) + t.right(90) + t.forward(text_height) + t.write(f'Actual') + display_image(ox+t.xcor(), oy-t.ycor(), + image=encode_image(png_actual)) + t.forward(png_actual.height) + t.forward(text_height) + t.write(f'Diff') + display_image(ox+t.xcor(), oy-t.ycor(), + image=encode_image(png_diff)) + t.forward(png_diff.height) + t.forward(text_height) + t.write('Expected') + display_image(ox+t.xcor(), oy-t.ycor(), + image=encode_image(png_expected)) + t.forward(png_expected.height) + except Exception as ex: + t.write(str(ex)) + + if not self.mismatch_found: return text_actual = svg_actual.asSvg() (self.work_dir / (name+'_actual.svg')).write_text(text_actual) @@ -63,6 +119,13 @@ def drawing_to_image(drawing: Drawing) -> Image: return image +def encode_image(image: Image) -> bytes: + writer = BytesIO() + image.save(writer, format='PNG') + encoded = standard_b64encode(writer.getvalue()) + return encoded.decode('UTF-8') + + @pytest.fixture(scope='session') def svg_differ(): return SvgDiffer() @@ -453,23 +516,101 @@ def test_plot_genome_coverage_insertion(): assert expected_figure == summarize_figure(figure) +# noinspection DuplicatedCode def test_arrow(svg_differ): - expected_svg = Drawing(200.0, 60.0, origin=(0, 0)) - expected_svg.append(Circle(168/2, 40, 10, stroke='black', fill='ivory')) + expected_svg = Drawing(175.0, 35.0, origin=(0, 0)) + expected_svg.append(Circle(168/2, 20, 10, stroke='black', fill='ivory')) expected_svg.append(Text('X', 15, - 168/2, 40, + 168/2, 20, text_anchor='middle', dy="0.3em")) - expected_svg.append(Line(0, 40, 74, 40, stroke='black')) - expected_svg.append(Line(94, 40, 168, 40, stroke='black')) - expected_svg.append(Lines(175, 40, - 168, 43.5, - 168, 36.5, - 175, 40, + expected_svg.append(Line(0, 20, 74, 20, stroke='black')) + expected_svg.append(Line(94, 20, 168, 20, stroke='black')) + expected_svg.append(Lines(175, 20, + 168, 23.5, + 168, 16.5, + 175, 20, fill='black')) f = Figure() - f.add(Arrow(0, 175, label='X')) + f.add(Arrow(0, 175, h=20, label='X')) svg = f.show() svg_differ.assert_equal(svg, expected_svg, 'test_arrow') + + +# noinspection DuplicatedCode +def test_reverse_arrow(svg_differ): + expected_svg = Drawing(175.0, 35.0, origin=(0, 0)) + expected_svg.append(Circle(7+168/2, 20, 10, stroke='black', fill='ivory')) + expected_svg.append(Text('X', + 15, + 7+168/2, 20, + text_anchor='middle', + dy="0.3em")) + expected_svg.append(Line(7, 20, 81, 20, stroke='black')) + expected_svg.append(Line(101, 20, 175, 20, stroke='black')) + expected_svg.append(Lines(0, 20, + 7, 23.5, + 7, 16.5, + 0, 20, + fill='black')) + f = Figure() + f.add(Arrow(175, 0, h=20, label='X')) + svg = f.show() + + svg_differ.assert_equal(svg, expected_svg, 'test_arrow') + + +# noinspection DuplicatedCode +def test_arrow_group(svg_differ): + expected_figure = Figure() + expected_figure.add(Track(1, 500, label='Header')) + h = 30 + expected_figure.add(Arrow(1, 200, label='X', h=h), gap=-h) + expected_figure.add(Arrow(300, 500, label='Y', h=h)) + expected_svg = expected_figure.show() + + f = Figure() + f.add(Track(1, 500, label='Header')) + f.add(ArrowGroup([Arrow(1, 200, label='X', h=h), + Arrow(300, 500, label='Y', h=h)])) + svg = f.show() + + svg_differ.assert_equal(svg, expected_svg, 'test_arrow_group') + + +# noinspection DuplicatedCode +def test_arrow_group_overlap(svg_differ): + expected_figure = Figure() + expected_figure.add(Track(1, 500, label='Header')) + h = 20 + expected_figure.add(Arrow(1, 300, label='X', h=h), gap=3) + expected_figure.add(Arrow(1, 300, label='Y', h=h)) + expected_svg = expected_figure.show() + + f = Figure() + f.add(Track(1, 500, label='Header')) + f.add(ArrowGroup([Arrow(1, 300, label='X', h=h), + Arrow(1, 300, label='Y', h=h)])) + svg = f.show() + + svg_differ.assert_equal(svg, expected_svg, 'test_arrow_group') + + +# noinspection DuplicatedCode +def test_arrow_group_reverse_overlap(svg_differ): + expected_figure = Figure() + expected_figure.add(Track(1, 500, label='Header')) + h = 20 + expected_figure.add(Arrow(1, 300, label='X', h=h), gap=3) + expected_figure.add(Arrow(400, 250, label='Y', h=h)) + expected_svg = expected_figure.show() + + f = Figure() + f.add(Track(1, 500, label='Header')) + f.add(ArrowGroup([Arrow(1, 300, label='X', h=h), + Arrow(400, 250, label='Y', h=h)])) + svg = f.show() + + svg_differ.assert_equal(svg, expected_svg, 'test_arrow_group') diff --git a/micall/tests/test_remap.py b/micall/tests/test_remap.py index 2d8139f91..43325dde8 100644 --- a/micall/tests/test_remap.py +++ b/micall/tests/test_remap.py @@ -1328,27 +1328,24 @@ def test_read_contigs(projects): HCV-1a,1.0,HCV-1a,TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA HCV-2b,1.0,HCV-2b,GCCCGCCCCCTGATGGGGGCGACACTCCGCCA """) - hcv1a_seq = projects.getReference('HCV-1a') - hcv2b_seq = projects.getReference('HCV-2b') expected_conseqs = { - '1-HCV-1a': hcv1a_seq, - '2-HCV-2b': hcv2b_seq} + '1-HCV-1a': 'TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA', + '2-HCV-2b': 'GCCCGCCCCCTGATGGGGGCGACACTCCGCCA'} conseqs = read_contigs(contigs_csv) assert expected_conseqs == conseqs -def test_read_contigs_filter(projects): +def test_read_contigs_filter(): contigs_csv = StringIO("""\ ref,match,group_ref,contig HCV-1a,0.24,HCV-1a,TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA HCV-2b,0.25,HCV-2b,GCCCGCCCCCTGATGGGGGCGACACTCCGCCA """) - hcv2b_seq = projects.getReference('HCV-2b') expected_conseqs = { '1-HCV-1a-partial': 'TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA', - '2-HCV-2b': hcv2b_seq} + '2-HCV-2b': 'GCCCGCCCCCTGATGGGGGCGACACTCCGCCA'} conseqs = read_contigs(contigs_csv) @@ -1370,16 +1367,15 @@ def test_read_contigs_reversed(projects): assert expected_conseqs == conseqs -def test_read_contigs_excluded(projects): +def test_read_contigs_excluded(): contigs_csv = StringIO("""\ ref,match,group_ref,contig HCV-1a,1.0,HCV-1a,TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA HLA-B-seed,0.02,HLA-B-seed,ATGCGGGTCACGGCACCCCGAACCGT """) - hcv1a_seq = projects.getReference('HCV-1a') excluded_seeds = ['HLA-B-seed'] expected_conseqs = { - '1-HCV-1a': hcv1a_seq, + '1-HCV-1a': 'TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA', '2-HLA-B-seed-excluded': 'ATGCGGGTCACGGCACCCCGAACCGT'} conseqs = read_contigs(contigs_csv, excluded_seeds) @@ -1387,27 +1383,22 @@ def test_read_contigs_excluded(projects): assert expected_conseqs == conseqs -def test_read_contigs_mutations(projects): +def test_read_contigs_mutations(): contigs_csv = StringIO("""\ ref,match,group_ref,contig HCV-1a,1.0,HCV-1a,TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA HCV-2b,1.0,HCV-2b,GCCCGACCCATGATGGGGGCGACACTCCGCCA """) - hcv1a_seq = projects.getReference('HCV-1a') - hcv2b_seq = projects.getReference('HCV-2b').replace( - 'GCCCGCCCCCTGATGGGGGCGACACTCCGCCA', - 'GCCCGACCCATGATGGGGGCGACACTCCGCCA') - # Changes:^ ^ expected_conseqs = { - '1-HCV-1a': hcv1a_seq, - '2-HCV-2b': hcv2b_seq} + '1-HCV-1a': 'TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA', + '2-HCV-2b': 'GCCCGACCCATGATGGGGGCGACACTCCGCCA'} conseqs = read_contigs(contigs_csv) assert expected_conseqs == conseqs -def test_read_contigs_merged(projects): +def test_read_contigs_merged(): contigs_csv = StringIO("""\ ref,match,group_ref,contig HCV-1a,1.0,HCV-1a,TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA @@ -1415,50 +1406,45 @@ def test_read_contigs_merged(projects): HCV-2a,1.0,HCV-2b,CTCCACCATGAATCACTCCCCTG """) # Changes: ^G->A ^G->T - hcv1a_seq = projects.getReference('HCV-1a') - hcv2b_seq = projects.getReference('HCV-2b').replace( - 'GCCCGCCCCCTGATGGGGGCGACACTCCGCCATGAATCACTCCCCTG', - 'GCCCGCCCCCTGATGGGGGCGACACTCCTCCATGAATCACTCCCCTG') + # TODO: Merge contigs 2 and 3, because they overlap. + # 'GCCCGCCCCCTGATGGGGGCGACACTCCTCCATGAATCACTCCCCTG' # Change: ^ expected_conseqs = { - '1-HCV-1a': hcv1a_seq, - '2_3-HCV-2b': hcv2b_seq} + '1-HCV-1a': 'TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA', + '2-HCV-2b': 'GCCCGCCCCCTGATGGGGGCGACACTCCTCCA', + '3-HCV-2a': 'CTCCACCATGAATCACTCCCCTG'} conseqs = read_contigs(contigs_csv) assert expected_conseqs == conseqs -def test_read_contigs_trim_left(projects): - """ Trim contigs that extend past reference start. """ +def test_read_contigs_untrimmed_left(): + """ Don't trim contigs that extend past reference start. """ contigs_csv = StringIO("""\ ref,match,group_ref,contig HCV-1a,1.0,HCV-1a,TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA HCV-2b,1.0,HCV-2b,TAGACATATTACCGCCCGCCCCCTGATGGGGGCGACACTCCGCCATGAATCACTCCCCTGT """) - hcv1a_seq = projects.getReference('HCV-1a') - hcv2b_seq = projects.getReference('HCV-2b') expected_conseqs = { - '1-HCV-1a': hcv1a_seq, - '2-HCV-2b': hcv2b_seq} + '1-HCV-1a': 'TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA', + '2-HCV-2b': 'TAGACATATTACCGCCCGCCCCCTGATGGGGGCGACACTCCGCCATGAATCACTCCCCTGT'} conseqs = read_contigs(contigs_csv) assert expected_conseqs == conseqs -def test_read_contigs_trim_right(projects): - """ Trim contigs that extend past reference end. """ +def test_read_contigs_untrimmed_right(): + """ Don't trim contigs that extend past reference end. """ contigs_csv = StringIO("""\ ref,match,group_ref,contig HCV-1a,1.0,HCV-1a,TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA HCV-2b,1.0,HCV-2b,TAGTTTCCGTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTAGACATATTACC """) - hcv1a_seq = projects.getReference('HCV-1a') - hcv2b_seq = projects.getReference('HCV-2b') expected_conseqs = { - '1-HCV-1a': hcv1a_seq, - '2-HCV-2b': hcv2b_seq} + '1-HCV-1a': 'TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA', + '2-HCV-2b': 'TAGTTTCCGTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTAGACATATTACC'} conseqs = read_contigs(contigs_csv) diff --git a/micall/utils/compare_mapping.py b/micall/utils/compare_mapping.py index 1cd04ca50..5c93321c8 100644 --- a/micall/utils/compare_mapping.py +++ b/micall/utils/compare_mapping.py @@ -1,10 +1,11 @@ from base64 import standard_b64encode +from math import copysign from turtle import Turtle from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType from collections import defaultdict, Counter from csv import DictReader -from drawSvg import Line, Lines, Drawing +from drawSvg import Line, Lines, Drawing, Text, Circle from micall.core.plot_contigs import build_coverage_figure from micall.core.remap import SAM_FLAG_IS_UNMAPPED, SAM_FLAG_IS_FIRST_SEGMENT @@ -55,48 +56,71 @@ def main(): diagram_path = args.compare_mapping_svg f = build_coverage_figure(args.genome_coverage1_csv) del f.elements[6:] + ref_y, ref = f.elements[5] f2 = build_coverage_figure(args.genome_coverage2_csv) - # del f2.elements[7:] - contig_y, contig = f2.elements[6] + coverage3_y, coverage3 = f2.elements[4] + contig3_y, contig3 = f2.elements[5] + coverage1_y, coverage1 = f2.elements[6] + contig1_y, contig1 = f2.elements[7] + dashes_y, dashes = f2.elements[10] + del f2.elements[4:] + # contig_y, contig = f2.elements[6] - f.h += 300 + # f.h += 50 f.w = max(f.w, f2.w) - f.elements.extend(((y + 120 + (100 if i > 1 else 0), track) - for i, (y, track) in enumerate(f2.elements[4:]))) + f.add(coverage3, gap=-4) + f.add(contig3, gap=30) + contig3_y = f.elements[-1][0] + coverage1.a = 0 + f.add(coverage1, gap=-4) + contig1_shift = contig1.tracks[0].a + for track in contig1.tracks: + if track.a >= contig1_shift: + track.a -= contig1_shift + track.b -= contig1_shift + f.add(contig1) + contig1_y = f.elements[-1][0] if __name__ != '__live_coding__': drawing_width = 970 else: # noinspection PyProtectedMember turtle_screen = Turtle._screen - drawing_width = turtle_screen.cv.cget('width') + drawing_width = turtle_screen.cv.cget('width') - 10 diagram_path = None drawing = f.show(w=drawing_width) seed1_y = f.h - f.elements[5][0] - 10 seed2_y = f.h - f.elements[7][0] + 25 - ref_y = f.h - f.elements[7][0] - contig_y = f.h - f.elements[8][0] + 10 - contig_x = contig.tracks[0].a + ref_y = f.h - ref_y + contig_y = f.h - contig1_y + 25 + contig_x = 0 x_scale = drawing_width / f.w blast_display = BlastDisplay(drawing, x_scale, ref_y, contig_x, contig_y) blast_rows = list(DictReader(args.blast2_csv)) - blast_rows.reverse() - best_score = 0 + blast_rows.sort(key=lambda match: int(match['score']), reverse=True) best_ref = None + matched_positions = set() for row in blast_rows: - score = int(row['score']) - if score > best_score: - best_score = score + if '003' not in row['contig_name']: + continue + if best_ref is None: best_ref = row['ref_name'] - for row in blast_rows: - if row['ref_name'] == best_ref: - start = int(row['start']) - end = int(row['end']) + elif row['ref_name'] != best_ref: + continue + start = int(row['start']) + end = int(row['end']) + new_positions = set(range(start, end)) + collisions = matched_positions & new_positions + collision_fraction = len(collisions) / len(new_positions) + if collision_fraction < 0.1: + matched_positions |= new_positions ref_start = int(row['ref_start']) ref_end = int(row['ref_end']) blast_display.add_match(start, end, ref_start, ref_end) + blast_display.draw() + for source_bin, bin_moves in sorted_moves: total = sum(bin_moves.values()) for i, (target_bin, count) in enumerate(bin_moves.most_common()): @@ -106,20 +130,26 @@ def main(): if i == 0: print(source_bin, end=':') print(f'\t{target_bin}({fraction})') + source_shift = 0 + target_shift = contig_x*x_scale if source_bin is None: source_x = target_bin source_y = seed2_y - 10 + source_shift = target_shift else: source_x = source_bin source_y = seed1_y if target_bin is None: target_x = source_bin target_y = seed1_y + 10 + target_shift = source_shift else: target_x = target_bin target_y = seed2_y source_x *= 100*x_scale target_x *= 100*x_scale + source_x += source_shift + target_x += target_shift drawing.append(Line(source_x, source_y, target_x, target_y, stroke='black', @@ -144,14 +174,27 @@ def __init__(self, self.ref_y = ref_y self.contig_x = contig_x self.contig_y = contig_y + self.matches = [] # [(start, end, ref_start, ref_end)] def add_match(self, start, end, ref_start, ref_end): - left1 = ref_start * self.x_scale + self.matches.append((start, end, ref_start, ref_end)) + + def draw(self, use_arrows: bool = True): + if use_arrows: + self.matches.sort() + for i, (start, end, ref_start, ref_end) in enumerate(self.matches, 1): + left1 = ref_start * self.x_scale + left2 = (self.contig_x + start) * self.x_scale + right1 = ref_end * self.x_scale + right2 = (self.contig_x + end) * self.x_scale + if use_arrows: + self.draw_arrows(left1, right1, left2, right2, str(i)) + else: + self.draw_region(left1, right1, left2, right2) + + def draw_region(self, left1, right1, left2, right2): top = self.ref_y bottom = self.contig_y - left2 = (self.contig_x + start) * self.x_scale - right1 = ref_end * self.x_scale - right2 = (self.contig_x + end) * self.x_scale self.drawing.append(Lines(left1, top, left2, bottom, right2, bottom, @@ -160,6 +203,36 @@ def add_match(self, start, end, ref_start, ref_end): stroke='black', fill='ivory')) + def draw_arrows(self, left1, right1, left2, right2, label): + self.draw_arrow(left1, right1, self.ref_y - 15, label) + self.draw_arrow(left2, right2, self.contig_y + 15, label) + + def draw_arrow(self, start, end, y, label): + r = 10 + arrow_size = 7 + direction = copysign(1, end-start) + centre = (start + end - direction*arrow_size)/2 + centre_start = centre - direction*r + centre_end = centre + direction*r + self.drawing.append(Line(start, y, + centre_start, y, + stroke='black')) + self.drawing.append(Line(centre_end, y, + end, y, + stroke='black')) + self.drawing.append(Circle(centre, y, r, fill='ivory', stroke='black')) + self.drawing.append(Text(label, + 15, + centre, y, + text_anchor='middle', + dy="0.3em")) + arrow_end = end + arrow_start = arrow_end - arrow_size*direction + self.drawing.append(Lines(arrow_end, y, + arrow_start, y + arrow_size/2, + arrow_start, y - arrow_size/2, + fill='black')) + def display_image(drawing): png_data = drawing.rasterize()