Skip to content

Commit

Permalink
fixes #276: customize delimiter between domains in exported DNA seque…
Browse files Browse the repository at this point in the history
…nces
  • Loading branch information
dave-doty committed Aug 25, 2023
1 parent 301edc4 commit 315c0f3
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 31 deletions.
102 changes: 72 additions & 30 deletions scadnano/scadnano.py
Original file line number Diff line number Diff line change
Expand Up @@ -2128,6 +2128,47 @@ def strand(self) -> Strand:
raise ValueError('_parent_strand has not yet been set')
return self._parent_strand

def idt_dna_sequence(self, domain_delimiter: str = '') -> Optional[str]:
"""
:param domain_delimiter:
delimiter to put between domains (and modifications) in the IDT DNA sequence;
if specified then any internal modifications will be separated from the rest of the sequence
by this value.
:return:
IDT DNA sequence of this :any:`Domain`, or ``None`` if no DNA sequence has been assigned.
The difference between this and the field :data:`Domain.dna_sequence` is that this
will add internal modification codes.
"""
if self.dna_sequence is None:
return None

strand = self.strand()
len_dna_prior = 0
for domain in strand.domains:
if domain is self:
break
len_dna_prior += domain.dna_length()

new_seq_list = []
for pos, base in enumerate(self.dna_sequence):
new_seq_list.append(base)
strand_pos = pos + len_dna_prior
if strand_pos in strand.modifications_int: # if internal mod attached to base, replace base
mod = strand.modifications_int[strand_pos]
if mod.idt_text is not None:
idt_text_with_delim = domain_delimiter + mod.idt_text
if mod.allowed_bases is not None:
if base not in mod.allowed_bases:
msg = (f'internal modification {mod} can only replace one of these bases: '
f'{",".join(mod.allowed_bases)}, '
f'but the base at position {strand_pos} is {base}')
raise IllegalDesignError(msg)
new_seq_list[-1] = idt_text_with_delim # replace base with modified base
else:
new_seq_list.append(idt_text_with_delim) # append modification between two bases

return ''.join(new_seq_list)

def set_name(self, name: str) -> None:
"""Sets name of this :any:`Domain`."""
self.name = name
Expand Down Expand Up @@ -3042,8 +3083,8 @@ def _most_recently_added_substrand_is_extension_3p(self) -> bool:

def update_to(self, offset: int) -> StrandBuilder:
"""
Like :py:meth:`StrandBuilder.to`, but changes the current offset without creating
a new :any:`Domain`. So unlike :py:meth:`StrandBuilder.to`, several consecutive calls to
Like :meth:`StrandBuilder.to`, but changes the current offset without creating
a new :any:`Domain`. So unlike :meth:`StrandBuilder.to`, several consecutive calls to
:meth:`StrandBuilder.update_to` are equivalent to only making the final call.
Generally there's no point in calling :meth:`StrandBuilder.update_to` in one line of code.
Expand Down Expand Up @@ -3146,19 +3187,19 @@ def with_modification_3p(self, mod: Modification3Prime) -> StrandBuilder:
self._strand.set_modification_3p(mod)
return self

def with_modification_internal(self, idx: int, mod: ModificationInternal, warn_on_no_dna: bool) \
-> StrandBuilder:
def with_modification_internal(self, idx: int, mod: ModificationInternal,
warn_no_dna: bool = True) -> StrandBuilder:
"""
Sets Strand being built to have given internal modification.
:param idx: idx along DNA sequence of internal modification
:param mod: internal modification
:param warn_on_no_dna: whether to print warning to screen if DNA has not been assigned
:param warn_no_dna: whether to print warning to screen if DNA has not been assigned
:return: self
"""
if self._strand is None:
raise ValueError('no Strand created yet; make at least one domain first')
self._strand.set_modification_internal(idx, mod, warn_on_no_dna)
self._strand.set_modification_internal(idx, mod, warn_no_dna)
return self

def with_color(self, color: Color) -> StrandBuilder:
Expand Down Expand Up @@ -3469,15 +3510,15 @@ class Strand(_JSONSerializable):
"""

domains: List[Union[Domain, Loopout, Extension]]
""":any:`Domain`'s (or :any:`Loopout`'s) composing this Strand.
""":any:`Domain`'s (or :any:`Loopout`'s or :any:`Extension`'s) composing this :any:`Strand`.
Each :any:`Domain` is contiguous on a single :any:`Helix`
and could be either single-stranded or double-stranded,
whereas each :any:`Loopout` is single-stranded and has no associated :any:`Helix`."""
whereas each :any:`Loopout` and :any:`Extension` is single-stranded and has no associated :any:`Helix`."""

circular: bool = False
"""If True, this :any:`Strand` is circular and has no 5' or 3' end. Although there is still a
first and last :any:`Domain`, we interpret there to be a crossover from the 3' end of the last domain
to the 5' end of the first domain, and any circular permutation of :py:data:`Strand.domains`
to the 5' end of the first domain, and any circular permutation of :data:`Strand.domains`
should result in a functionally equivalent :any:`Strand`. It is illegal to have a
:any:`Modification5Prime` or :any:`Modification3Prime` on a circular :any:`Strand`."""

Expand All @@ -3489,12 +3530,12 @@ def dna_sequence(self) -> Optional[str]:
Note that this does not include any IDT codes for :any:`Modification`'s.
To include those call :meth:`Strand.idt_dna_sequence`."""
sequence = ''
sequence_list = []
for domain in self.domains:
if domain.dna_sequence is None:
return None
sequence += domain.dna_sequence
return sequence
sequence_list.append(domain.dna_sequence)
return ''.join(sequence_list)

color: Optional[Color] = None
"""Color to show this strand in the main view. If not specified in the constructor,
Expand Down Expand Up @@ -4217,8 +4258,10 @@ def _ensure_domains_nonoverlapping(self) -> None:
f'\n{d1}'
f'\n{d2}')

def idt_dna_sequence(self) -> str:
def idt_dna_sequence(self, domain_delimiter: str = '') -> str:
"""
:param domain_delimiter:
string to put in between DNA sequences of each domain, and between modifications and DNA
:return: DNA sequence as it needs to be typed to order from IDT, with
:py:data:`Modification5Prime`'s,
:py:data:`Modification3Prime`'s,
Expand All @@ -4232,27 +4275,17 @@ def idt_dna_sequence(self) -> str:
raise ValueError('DNA sequence has not been assigned yet')

ret_list: List[str] = []

if self.modification_5p is not None and self.modification_5p.idt_text is not None:
ret_list.append(self.modification_5p.idt_text)

for offset, base in enumerate(self.dna_sequence):
ret_list.append(base)
if offset in self.modifications_int: # if internal mod attached to base, replace base
mod = self.modifications_int[offset]
if mod.idt_text is not None:
if mod.allowed_bases is not None:
if base not in mod.allowed_bases:
msg = f'internal modification {mod} can only replace one of these bases: ' \
f'{",".join(mod.allowed_bases)}, but the base at offset {offset} is {base}'
raise IllegalDesignError(msg)
ret_list[-1] = mod.idt_text # replace base with modified base
else:
ret_list.append(mod.idt_text) # append modification between two bases
for substrand in self.domains:
ret_list.append(substrand.idt_dna_sequence(domain_delimiter=domain_delimiter))

if self.modification_3p is not None and self.modification_3p.idt_text is not None:
ret_list.append(self.modification_3p.idt_text)

return ''.join(ret_list)
return domain_delimiter.join(ret_list)

def no_modifications_version(self) -> Strand:
"""
Expand Down Expand Up @@ -7063,7 +7096,8 @@ def move_strands_on_helices(self, delta: int) -> None:
self._check_strands_reference_helices_legally()

def assign_dna(self, strand: Strand, sequence: str, assign_complement: bool = True,
domain: Union[Domain, Loopout, Extension] = None, check_length: bool = False) -> None:
domain: Union[Domain, Loopout, Extension, None] = None,
check_length: bool = False) -> None:
"""
Assigns `sequence` as DNA sequence of `strand`.
Expand Down Expand Up @@ -7172,6 +7206,7 @@ def assign_dna(self, strand: Strand, sequence: str, assign_complement: bool = Tr

def to_idt_bulk_input_format(self,
delimiter: str = ',',
domain_delimiter: str = '',
key: Optional[KeyFunction[Strand]] = None,
warn_duplicate_name: bool = False,
only_strands_with_idt: bool = False,
Expand Down Expand Up @@ -7203,7 +7238,7 @@ def to_idt_bulk_input_format(self,
scale = default_idt_scale
purification = default_idt_purification
idt_lines.append(delimiter.join(
[strand.idt_export_name(), strand.idt_dna_sequence(),
[strand.idt_export_name(), strand.idt_dna_sequence(domain_delimiter=domain_delimiter),
scale, purification]
))

Expand Down Expand Up @@ -7293,6 +7328,7 @@ def write_idt_bulk_input_file(self, *, directory: str = '.', filename: str = Non
key: Optional[KeyFunction[Strand]] = None,
extension: Optional[str] = None,
delimiter: str = ',',
domain_delimiter: str = '',
warn_duplicate_name: bool = True,
only_strands_with_idt: bool = False,
export_scaffold: bool = False,
Expand Down Expand Up @@ -7321,7 +7357,12 @@ def write_idt_bulk_input_file(self, *, directory: str = '.', filename: str = Non
:param extension:
alternate filename extension to use (instead of idt)
:param delimiter:
is the symbol to delimit the four IDT fields name,sequence,scale,purification.
symbol to delimit the four IDT fields name,sequence,scale,purification.
:param domain_delimiter:
This is placed between the DNA sequences of adjacent domains on a strand. For instance, IDT
(Integrated DNA Technologies, Coralville, IA, https://www.idtdna.com/) ignores spaces,
so setting `domain_delimiter` to ``' '`` will insert a space between adjacent domains while
remaining readable by IDT's website.
:param warn_duplicate_name:
if ``True`` prints a warning when two different :any:`Strand`'s have the same
:data:`IDTFields.name` and the same :data:`Strand.dna_sequence`. An :any:`IllegalDesignError` is
Expand All @@ -7344,6 +7385,7 @@ def write_idt_bulk_input_file(self, *, directory: str = '.', filename: str = Non
'_nomods' appended to it.
"""
contents = self.to_idt_bulk_input_format(delimiter=delimiter,
domain_delimiter=domain_delimiter,
key=key,
warn_duplicate_name=warn_duplicate_name,
only_strands_with_idt=only_strands_with_idt,
Expand Down
40 changes: 39 additions & 1 deletion tests/scadnano_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,7 @@ def test_strand__multiple_strands_overlap_no_error(self) -> None:
design = self.design_6helix
design.draw_strand(0, 0).to(10).cross(1).to(0) \
.as_scaffold() \
.with_modification_internal(5, mod.cy3_int, warn_on_no_dna=False)
.with_modification_internal(5, mod.cy3_int, warn_no_dna=False)
design.draw_strand(0, 10).to(0).cross(1).to(10).with_modification_5p(mod.biotin_5p)
expected_strand0 = sc.Strand([
sc.Domain(0, True, 0, 10),
Expand Down Expand Up @@ -1098,6 +1098,44 @@ def _get_names_idt(design: sc.Design, key: sc.KeyFunction[sc.Strand]) -> str:
names_joined = ''.join(names)
return names_joined

def test_domain_delimiters(self) -> None:
helices = [sc.Helix(max_offset=100) for _ in range(6)]
design = sc.Design(helices=helices, strands=[], grid=sc.square)
strand_name = 's1'
(design.draw_strand(0, 0).move(5).with_domain_sequence('AAAAA')
.cross(1).move(-5).with_domain_sequence('CCCCC')
.cross(2).move(5).with_domain_sequence('GGGGG')
.with_name(strand_name))
idt_content = design.to_idt_bulk_input_format(delimiter=',', domain_delimiter=' ')
self.assertEqual(f'{strand_name},AAAAA CCCCC GGGGG,25nm,STD', idt_content)

def test_domain_delimiters_modifications(self) -> None:
strand_name = 's1'
mod_5 = sc.Modification5Prime(display_text='B', idt_text='/5Biosg/')
mod_3 = sc.Modification3Prime(display_text='Cy3', idt_text='/3Cy3Sp/')
mod_i = sc.ModificationInternal(display_text='B', idt_text='/iBiodT/', allowed_bases={'T'})

helices = [sc.Helix(max_offset=100) for _ in range(6)]
design = sc.Design(helices=helices, strands=[], grid=sc.square)

(design.draw_strand(0, 0)
.move(5).with_domain_sequence('AAAAA')
.cross(1).move(-5).with_domain_sequence('CCCCT')
.cross(2).move(5).with_domain_sequence('GGGGG')
.with_name(strand_name)
.with_modification_5p(mod_5)
.with_modification_internal(9, mod_i)
.with_modification_3p(mod_3)
)

strand = design.strands[0]
strand_idt_dna_sequence = strand.idt_dna_sequence(domain_delimiter=' ')
self.assertEqual('/5Biosg/ AAAAA CCCC /iBiodT/ GGGGG /3Cy3Sp/', strand_idt_dna_sequence)

idt_content = design.to_idt_bulk_input_format(delimiter=',', domain_delimiter=' ')
self.assertEqual(f'{strand_name},/5Biosg/ AAAAA CCCC /iBiodT/ GGGGG /3Cy3Sp/,25nm,STD',
idt_content)

def test_to_idt_bulk_input_format__row_major_5p(self) -> None:
key = sc.strand_order_key_function(column_major=False, strand_order=sc.StrandOrder.five_prime)
names_joined = self._get_names_idt(self.design_6h, key)
Expand Down

0 comments on commit 315c0f3

Please sign in to comment.