From d9aedd542b7d9af1845ab046127559a919665216 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Fri, 30 Mar 2018 10:48:50 -0400 Subject: [PATCH 1/3] update tbl2asn --- tools/tbl2asn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/tbl2asn.py b/tools/tbl2asn.py index b32844e10..b812c11c1 100644 --- a/tools/tbl2asn.py +++ b/tools/tbl2asn.py @@ -14,7 +14,7 @@ import gzip TOOL_NAME = "tbl2asn" -TOOL_VERSION = "25.3" # quirk: versions error-out one year after their compilation date +TOOL_VERSION = "25.6" # quirk: versions error-out one year after their compilation date TOOL_URL = 'ftp://ftp.ncbi.nih.gov/toolbox/ncbi_tools/converters/by_program/tbl2asn/{os}.tbl2asn.gz' log = logging.getLogger(__name__) From 5f5e82563d8938b1536ce8070ce2e4e6b8cf1a95 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Fri, 30 Mar 2018 11:02:35 -0400 Subject: [PATCH 2/3] keep left-trimmed CDS features in frame --- ncbi.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ncbi.py b/ncbi.py index e8a29e652..49d1f7809 100755 --- a/ncbi.py +++ b/ncbi.py @@ -82,11 +82,19 @@ def tbl_transfer_common(cmap, ref_tbl, out_tbl, alt_chrlens, oob_clip=False): # feature overhangs end of sequence if oob_clip: if row[0] == None: - row[0] = '<1' + # clip the beginning + if row[2] == 'CDS': + # for CDS features, clip in multiples of 3 + r = (row[1] if row[1] is not None else alt_chrlens[altid]) + row[0] = '<{}'.format((r % 3) + 1) + else: + row[0] = '<1' if row[1] == None: + # clip the end row[1] = '>{}'.format(alt_chrlens[altid]) feature_keep = True else: + # drop the partially out of bounds feature feature_keep = False continue line = '\t'.join(map(str, row)) From f4fac0eabde043a99d2f4509db60d96e43d674d2 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Fri, 30 Mar 2018 12:42:54 -0400 Subject: [PATCH 3/3] significant bugfixes to oob_clip treatment in tbl_transfer_prealigned when clipping features on the negative strand. also incorporate CDS rounding by 3 during clipping for these cases too. --- ncbi.py | 45 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/ncbi.py b/ncbi.py index 49d1f7809..37f44e367 100755 --- a/ncbi.py +++ b/ncbi.py @@ -64,15 +64,19 @@ def tbl_transfer_common(cmap, ref_tbl, out_tbl, alt_chrlens, oob_clip=False): raise Exception("this line has only one column?") row[0] = int(row[0]) row[1] = int(row[1]) + strand = None if row[1] >= row[0]: + strand = '+' row[0] = cmap.mapChr(refSeqID, altid, row[0], -1)[1] row[1] = cmap.mapChr(refSeqID, altid, row[1], 1)[1] else: # negative strand feature + strand = '-' row[0] = cmap.mapChr(refSeqID, altid, row[0], 1)[1] row[1] = cmap.mapChr(refSeqID, altid, row[1], -1)[1] if row[0] and row[1]: + # feature completely within bounds feature_keep = True elif row[0] == None and row[1] == None: # feature completely out of bounds @@ -81,17 +85,36 @@ def tbl_transfer_common(cmap, ref_tbl, out_tbl, alt_chrlens, oob_clip=False): else: # feature overhangs end of sequence if oob_clip: - if row[0] == None: - # clip the beginning - if row[2] == 'CDS': - # for CDS features, clip in multiples of 3 - r = (row[1] if row[1] is not None else alt_chrlens[altid]) - row[0] = '<{}'.format((r % 3) + 1) - else: - row[0] = '<1' - if row[1] == None: - # clip the end - row[1] = '>{}'.format(alt_chrlens[altid]) + if strand == '+': + # clip pos strand feature + if row[0] == None: + # clip the beginning + if row[2] == 'CDS': + # for CDS features, clip in multiples of 3 + r = (row[1] if row[1] is not None else alt_chrlens[altid]) + row[0] = '<{}'.format((r % 3) + 1) + else: + row[0] = '<1' + if row[1] == None: + # clip the end + row[1] = '>{}'.format(alt_chrlens[altid]) + else: + # clip neg strand feature + if row[0] == None: + # clip the beginning (right side) + r = alt_chrlens[altid] + if row[2] == 'CDS': + # for CDS features, clip in multiples of 3 + l = (row[1] if row[1] is not None else 1) # new left + r = r - ((r-l+1) % 3) # create new right in multiples of 3 from left + if (r-l) < 3: + # less than a codon remains, drop it + feature_keep = False + continue + row[0] = '>{}'.format(r) + if row[1] == None: + # clip the end (left side) + row[1] = '<1' feature_keep = True else: # drop the partially out of bounds feature