Skip to content

Commit

Permalink
fixes #287: export DNA sequences from extensions and loopouts
Browse files Browse the repository at this point in the history
  • Loading branch information
dave-doty committed Dec 20, 2023
1 parent db8d302 commit 999c493
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 35 deletions.
101 changes: 66 additions & 35 deletions scadnano/scadnano.py
Original file line number Diff line number Diff line change
Expand Up @@ -2016,6 +2016,40 @@ def _is_close(x1: float, x2: float) -> bool:
return abs(x1 - x2) < _floating_point_tolerance


def _vendor_dna_sequence_substrand(substrand: Union[Domain, Loopout, Extension]) -> Optional[str]:
# used to share code between Domain, Loopout Extension
# for adding modification codes to exported DNA sequence
if substrand.dna_sequence is None:
return None

strand = substrand.strand()
len_dna_prior = 0
for other_substrand in strand.domains:
if other_substrand is substrand:
break
len_dna_prior += other_substrand.dna_length()

new_seq_list = []
for pos, base in enumerate(substrand.dna_sequence):
new_seq_list.append(base)
strand_pos = pos + len_dna_prior
if strand_pos in strand.modifications_int: # if internal mod attached to base, replace base
mod = strand.modifications_int[strand_pos]
if mod.vendor_code is not None:
vendor_code_with_delim = mod.vendor_code
if mod.allowed_bases is not None:
if base not in mod.allowed_bases:
msg = (f'internal modification {mod} can only replace one of these bases: '
f'{",".join(mod.allowed_bases)}, '
f'but the base at position {strand_pos} is {base}')
raise IllegalDesignError(msg)
new_seq_list[-1] = vendor_code_with_delim # replace base with modified base
else:
new_seq_list.append(vendor_code_with_delim) # append modification between two bases

return ''.join(new_seq_list)


@dataclass
class Domain(_JSONSerializable):
"""
Expand Down Expand Up @@ -2167,35 +2201,7 @@ def vendor_dna_sequence(self) -> Optional[str]:
The difference between this and the field :data:`Domain.dna_sequence` is that this
will add internal modification codes.
"""
if self.dna_sequence is None:
return None

strand = self.strand()
len_dna_prior = 0
for domain in strand.domains:
if domain is self:
break
len_dna_prior += domain.dna_length()

new_seq_list = []
for pos, base in enumerate(self.dna_sequence):
new_seq_list.append(base)
strand_pos = pos + len_dna_prior
if strand_pos in strand.modifications_int: # if internal mod attached to base, replace base
mod = strand.modifications_int[strand_pos]
if mod.vendor_code is not None:
vendor_code_with_delim = mod.vendor_code
if mod.allowed_bases is not None:
if base not in mod.allowed_bases:
msg = (f'internal modification {mod} can only replace one of these bases: '
f'{",".join(mod.allowed_bases)}, '
f'but the base at position {strand_pos} is {base}')
raise IllegalDesignError(msg)
new_seq_list[-1] = vendor_code_with_delim # replace base with modified base
else:
new_seq_list.append(vendor_code_with_delim) # append modification between two bases

return ''.join(new_seq_list)
return _vendor_dna_sequence_substrand(self)

def set_name(self, name: str) -> None:
"""Sets name of this :any:`Domain`."""
Expand Down Expand Up @@ -2565,6 +2571,15 @@ def __repr__(self) -> str:
def __str__(self) -> str:
return repr(self) if self.name is None else self.name

def vendor_dna_sequence(self) -> Optional[str]:
"""
:return:
vendor DNA sequence of this :any:`Loopout`, or ``None`` if no DNA sequence has been assigned.
The difference between this and the field :data:`Loopout.dna_sequence` is that this
will add internal modification codes.
"""
return _vendor_dna_sequence_substrand(self)

def set_name(self, name: str) -> None:
"""Sets name of this :any:`Loopout`."""
self.name = name
Expand Down Expand Up @@ -2701,6 +2716,15 @@ def dna_length(self) -> int:
"""Length of this :any:`Extension`; same as field :data:`Extension.num_bases`."""
return self.num_bases

def vendor_dna_sequence(self) -> Optional[str]:
"""
:return:
vendor DNA sequence of this :any:`Extension`, or ``None`` if no DNA sequence has been assigned.
The difference between this and the field :data:`Extension.dna_sequence` is that this
will add internal modification codes.
"""
return _vendor_dna_sequence_substrand(self)

def set_label(self, label: Optional[str]) -> None:
"""Sets label of this :any:`Extension`."""
self.label = label
Expand Down Expand Up @@ -3709,7 +3733,7 @@ def __init__(self,
def __post_init__(self) -> None:
self._ensure_domains_not_none()

self.set_domains(self.domains)
self.set_domains(self.domains) # some error-checking code is in this method

self._ensure_modifications_legal()
self._ensure_domains_nonoverlapping()
Expand Down Expand Up @@ -3886,15 +3910,18 @@ def set_linear(self) -> None:

def set_domains(self, domains: Iterable[Union[Domain, Loopout]]) -> None:
"""
Sets the :any:`Domain`'s/:any:`Loopout`'s of this :any:`Strand` to be `domains`,
which can contain a mix of :any:`Domain`'s and :any:`Loopout`'s,
Sets the :any:`Domain`'s/:any:`Loopout`'s/:any:`Extension`'s of this :any:`Strand` to be `domains`,
which can contain a mix of :any:`Domain`'s, :any:`Loopout`'s, and :any:`Extension`'s,
just like the field :py:data:`Strand.domains`.
:param domains:
The new sequence of :any:`Domain`'s/:any:`Loopout`'s to use for this :any:`Strand`.
The new sequence of :any:`Domain`'s/:any:`Loopout`'s/:any:`Extension`'s to use for this
:any:`Strand`.
:raises StrandError:
if domains has two consecutive :any:`Loopout`'s, consists of just a single :any:`Loopout`'s,
or starts or ends with a :any:`Loopout`
if domains has two consecutive :any:`Loopout`'s, consists of just a single :any:`Loopout`'s
or a single :any:`Extension`, or starts or ends with a :any:`Loopout`,
or has an :any:`Extension` on a circular :any:`Strand`,
or has an :any:`Extension` not as the first or last element of `domains`.
"""
self.domains = domains if isinstance(domains, list) else list(domains)

Expand All @@ -3912,6 +3939,10 @@ def set_domains(self, domains: Iterable[Union[Domain, Loopout]]) -> None:
if len(self.domains) == 0:
raise StrandError(self, 'domains cannot be empty')

for domain in self.domains[1:-1]:
if isinstance(domain, Extension):
raise StrandError(self, 'cannot have an Extension in the middle of domains')

for domain1, domain2 in _pairwise(self.domains):
if isinstance(domain1, Loopout) and isinstance(domain2, Loopout):
raise StrandError(self, 'cannot have two consecutive Loopouts in a strand')
Expand Down
31 changes: 31 additions & 0 deletions tests/scadnano_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1309,6 +1309,37 @@ def test_write_idt_plate_excel_file(self) -> None:

os.remove(filename)

def test_export_dna_sequences_extension_5p(self) -> None:
design = sc.Design(helices=[sc.Helix(max_offset=100)])
design.draw_strand(0, 0) \
.extension_5p(3) \
.move(5) \
.with_sequence('TTT' + 'AAAAA') \
.with_name('strand')
contents = design.to_idt_bulk_input_format()
self.assertEqual('strand,TTTAAAAA,25nm,STD', contents)

def test_export_dna_sequences_extension_3p(self) -> None:
design = sc.Design(helices=[sc.Helix(max_offset=100)])
design.draw_strand(0, 0) \
.move(5) \
.extension_3p(3) \
.with_sequence('AAAAA' + 'TTT') \
.with_name('strand')
contents = design.to_idt_bulk_input_format()
self.assertEqual('strand,AAAAATTT,25nm,STD', contents)

def test_export_dna_sequences_loopout(self) -> None:
design = sc.Design(helices=[sc.Helix(max_offset=100), sc.Helix(max_offset=100)])
design.draw_strand(0, 0) \
.move(5) \
.loopout(1, 3) \
.move(-5) \
.with_sequence('AAAAA' + 'TTT' + 'AAAAA') \
.with_name('strand')
contents = design.to_idt_bulk_input_format()
self.assertEqual('strand,AAAAATTTAAAAA,25nm,STD', contents)


class TestExportCadnanoV2(unittest.TestCase):
"""
Expand Down

0 comments on commit 999c493

Please sign in to comment.