-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcommands.py
executable file
·488 lines (374 loc) · 16.5 KB
/
commands.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
#######################################################################
# Copyright 2012 Junghoon Kim
# jfkimberly@skku.edu
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#######################################################################
import os
import re
import random
from itertools import count, izip, product
from functions import *
strands = {}
def help():
""" 'help' command. prints out all possible commands. """
print "Possible commands are:"
print "1. newarms (na)"
print "2. show (s)"
print "3. link (l)"
print "4. crunch (c)"
print "5. strandgen (sg)"
print "6. repeatcheck (rp)"
print "7. dyadcheck (dc)"
print "8. save (sv)"
print "9. load (ld)"
print "10. exit"
print "Use '--help' for more information. e.g. 'na --help'"
return None
def newarms():
""" 'newarms' command; returns a dictionary 'arms' of the newly created
arms. The keys are arm1, arm2, etc. and their values are a 2-element
list. The elements are "empty" strings of the arms of length 'arm_length'
e.g. ['xxxx','xxxx']. The two strings are meant to become complementary
segments when using the "crunch" function.
"""
arm_num = 1
arms = {}
while True:
# user input for the number and length of the arms
# e.g. if 4 arms of length 4 and 4 arms of length 8 are needed then the
# user input for the first iteration through the outer while loop should
# be 'arm_numbers' = 4 and 'arm_length' = 4 and for the second iteration
# the input should be 'arm_numbers' = 4 and 'arm_length' = 8.
while True:
try:
arm_numbers = int(raw_input("How many arms do you want?\n"))
except ValueError:
print "Come again?"
else: break
while True:
try:
arm_length = int(raw_input("What is the length of the subarms of these arms?\n"))
except ValueError:
print "Come again?"
else: break
# creates a dictionary 'arms' where the keys are the arms, i.e. arm1,
# arm2, arm3, etc. and their corresponding values are 2-element lists of
# strings of length 'arm_length' (the first element is the DNA segment
# and the second element is its complementary segment. The specific
# bases for both are generated when using the "crunch" function).
# e.g. arm1:['xxxx','xxxx'], arm2:['xxxxxx','xxxxxx'], etc.
for arm_count in range(arm_num, arm_num+arm_numbers):
arms['arm'+str(arm_count)] = ['x'*arm_length, 'x'*arm_length]
print "Number of arms: %d, Arm length: %d" % (arm_numbers, arm_length)
# asks user if more arms are needed
while True:
decision = raw_input("Any more arms? (y/n)\n")
if decision == 'n': break
elif decision == 'y': break
else: print "please enter 'y' or 'n'\n"
if decision == 'n': return arms
arm_num = arm_count+1
return arms
def sorted_nicely(l):
""" Sorts the given iterable in the way that is expected.
Required arguments:
l -- The iterable to be sorted.
Taken from Jeff Atwood's blog
https://blog.codinghorror.com/sorting-for-humans-natural-sort-order/
"""
convert = lambda text: int(text) if text.isdigit() else text
alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
return sorted(l, key = alphanum_key)
def show(arms):
""" show (s) command; prints out 'arms' which is a dictionary of all the
created arms and their base sequences
"""
# uses sorted_nicely function so arm1, arm2, ..., arm10, ...
# instead of arm1, arm10, arm2, ....
for key in sorted_nicely(arms.iterkeys()):
print ""
print key
print re.sub("(.{5})", "\\1 ", arms[key][0])
print re.sub("(.{5})", "\\1 ", arms[key][1])
return None
def linker():
""" 'linker' function used in 'strandgen' function.
"""
# create links (the arms)
link3 = linker3()
link5 = linker5()
# join the 3'-linked and 5'-linked arms into 'linker_list' to create strands
linker_list = []
try:
# outer while loop searches through 'link5' list whose elements connect
# matching 'l3' elements in 'link3' list until all elements
# (connectors) of 'link5' are removed.
while link5:
for l5 in link5:
# Sequentially looks for a match between the first (last) element of
# all the elements of the 5'-linked arm list (link5), e.g. the '1'
# ('2') and '3' ('4') in [['1','2'],['3','4']], and the last (first)
# element of any element in the 3'-linked arm list (link3), e.g. the
# '1' ('2') and '3' ('4') in [['5','1'],['2','3'],['4','6']] and if
# found changes the value of 'index_fore' (index_post) to 1. Also
# checks if the matching elements are from different strands
# (l3_ind1 != l3_ind2) and appends the joined arms into 'link3' and
# copies to 'linker_list' list for output.
forearms = None
postarms = None
for l3 in link3:
if l3[-1] == l5[0]:
forearms = l3[:]
link3 = [x for x in link3 if x != l3]
if l3[0] == l5[1]:
postarms = l3[:]
link3 = [x for x in link3 if x != l3]
if forearms and postarms:
link3.append(forearms + postarms)
# remove 'l5' element (which connects matching 'l3'
# elements) from 'link5' list
link5.remove(l5)
except (IndexError, UnboundLocalError):
print "Hmm, something seems off, try the 'link' command again."
# add remaining strands to 'linker_list'
linker_list += link3
print "linker_list", linker_list
return linker_list
def crunch(arms, strands, linker_list, segment_list):
"""randomly generates or the user defines a sequence of bases for the each
of the arms and returns dictionary 'arms' which consists of the arms as
keys, e.g. arm1, arm2, etc., and each arms' sequence and its complementary
sequence as a 2-element list as its value.
The user input is the arm number 'arm', the starting base index number of
the arm segment to be randomly generated 'start', and the end base index
number of the arm segment to be randomly generated 'end'. 'criton' refers
to the length of the segment to be generated.
"""
print "Please enter the following"
print "arm #, starting base, end base, CRITON size, # of repeats (default: None)"
while True:
try:
crunch_dat = map(int, raw_input().split(','))
except ValueError:
print "Your input doesn't make any sense!"
break
else:
break
# check if repeats should be allowed
# (default is no input meaning no repeats allowed)
try:
if len(crunch_dat) == 4:
repeat = 0
arm, start, end, criton = crunch_dat
else:
arm, start, end, criton, repeat = crunch_dat
except (ValueError, UnboundLocalError) as error:
print error
else:
# segment size to crunch (length of random bases to produce)
segsize = end - start + 1
critkey = 'crit' + str(segsize)
print critkey
while True:
# produce a random segment 'segment' of 'segsize' and chooses to
# (a)ccept, (r)eject, or (s)et in 'decision'.
segment, decision = seggen(segsize, segment_list)
# check the number of repeats of 'segment' in 'strands' and changes
# 'decision' accordingly
decision = repeats(strands, segment, criton, repeat, decision)
# actions according to 'decision'
if decision == 'r':
break
elif decision == 'a':
# create complementary segment
comp_segment = compgen(segment)
# add segment and complementary segment to 'segment_list'
segment_list.append(segment)
segment_list.append(comp_segment)
# change the specified arm segment
arms = armgen(arms,segment,crunch_dat)
# change the corresponding strand segment
strands = strandgen(arms,linker_list)
break
elif decision == 's':
# create segment & complementary segment
segment = raw_input("Enter desired segment:\n").upper()
comp_segment = compgen(segment)
# add segment and complementary segment to 'segment_list' only if
# the segment is a nonempty string
if segment != '':
segment_list.append(segment)
segment_list.append(comp_segment)
# change the specified arm segment
arms = armgen(arms,segment,crunch_dat)
# change the corresponding strand segment
strands = strandgen(arms, linker_list)
break
return arms, strands, segment_list
def strandgen(arms, linker_list):
""" 'strandgen (sg)' command; returns the dictionary DNA strands 'strands'
created by combining the dictionary input 'arms'.
"""
# print "linker_list", linker_list
for strand_count, arm_num in izip(count(), linker_list):
try:
temp_strand = ''
for arm_count, arm_index in izip(count(), arm_num):
if arm_count % 2 == 0:
temp_strand += arms['arm'+arm_index][0]
else:
temp_strand += arms['arm'+arm_index][1][::-1]
strands['strand'+str(strand_count+1)] = temp_strand[:]
except KeyError as error:
print "Can't generate strands. Try 'link' command again then 'strandgen'."
break
strand_count = 1
for key in sorted(strands.iterkeys()):
print ""
print "strand %d (%d bases)" % (strand_count,len(strands[key]))
# print out strand in 5 base units
print re.sub("(.{5})", "\\1 ", strands[key])
strand_count += 1
print ""
return strands
def repeatcheck(strands):
""" 'repeatcheck' command; checks the number of repeats and returns the
positions of the repeating strands.
"""
print "Enter min. CRITON size, max. CRITON size, min. # of repeats, max. #\
of repeats"
while True:
try:
repeat_dat = map(int, raw_input().split(','))
if len(repeat_dat) == 4: break
else: print "Please enter 4 numbers"
except (ValueError, UnboundLocalError):
print "Please enter only integers!"
mincrit, maxcrit, minrep, maxrep = repeat_dat
# CRITON repeat check
for criton in range(mincrit,maxcrit+1):
segment_list = []
for strand_key in strands:
for base in range(len(strands[strand_key]) - criton + 1):
testseg = strands[strand_key][base:base + criton]
if testseg not in segment_list:
segment_list.append(testseg)
# check strands
reppos_list = []
repeatseg = 0
for key, value in strands.items():
for rep_pos in range(len(value) - criton + 1):
if testseg == value[rep_pos:rep_pos + criton]:
repeatseg += 1
reppos_list.append((key, rep_pos))
if minrep <= repeatseg <= maxrep:
print "'%s' has %d repeats " % (testseg, repeatseg)
print "strand # => base position"
for strand_num, pos in reppos_list:
print "%s => %d" % (strand_num, pos + 1)
print ""
def dyadcheck(strands):
""" 'dyadcheck' command; checks each strand for segments of dyad symmetry.
"""
print "Enter segment size (in nt's) of dyad symmetry check:"
while True:
try:
segment_size = int(raw_input())
except (ValueError, UnboundLocalError):
print "Please enter only integers!"
finally:
break
# dyad symmetry check
for strand_key in strands:
dyad_list = []
for base in range(len(strands[strand_key]) - segment_size + 1):
testseg = strands[strand_key][base:base + segment_size]
dyad = compgen(testseg)[::-1]
dyad_duplicates = False
# check dyad_list for duplicate dyads
for dyad_elem in dyad_list:
if testseg in dyad_elem:
dyad_duplicates = True
if dyad_duplicates is False:
dyad_repeats = 0
# check for dyad symmetric segments
for rep_pos in range(len(strands[strand_key]) - segment_size + 1):
if dyad == strands[strand_key][rep_pos:rep_pos + segment_size]:
dyad_repeats += 1
dyad_list.append((strand_key, rep_pos, dyad))
if dyad_repeats >= 1:
print "'%s' has %d dyad symmetric repeats ('%s')" %\
(testseg, dyad_repeats, dyad)
print "strand # => base position"
# print dyad symmetric repeat strand
print "%s =>" % (dyad_list[-dyad_repeats][0]),
print "%d" % (base + 1)
for strand_num, rep_pos, dyad in dyad_list[-dyad_repeats::]:
print "%d," % (rep_pos + 1),
print "\n"
def save(arms, strands):
""" 'save' command; saves the produced DNA strands 'strands' to file
'filename'.
"""
DIR = os.getcwd()
# remove existing file 'strands.txt' if it exists
# if os.path.exists(DIR + r'/' + 'strands.txt'): os.remove('strands.txt')
outputfile = raw_input("Type the name of the output file:\n")
strandfile = open(DIR+"//"+outputfile,"w")
while True:
decision = raw_input("Save to file (y/n)?\n")
if decision == 'y':
arm_count = 1
strand_count = 1
for key in sorted(arms.iterkeys()):
strandfile.write(key)
strandfile.write("\n")
strandfile.write(("%s\n%s\n\n") % (arms[key][0],arms[key][1]))
arm_count += 1
strandfile.write("5' -> 3'\n")
for key in sorted(strands.iterkeys()):
strandfile.write(("strand %d\n") % (strand_count))
strandfile.write(("%s\n\n") % (re.sub("(.{5})", "\\1 ", strands[key])))
strand_count += 1
break
elif decision == 'n': return None
else: print "please enter 'y' or 'n'"
return None
def load():
"""loads a file containing sequence information of strands from the user and
returns a dictionary 'strands'
"""
try:
user_input = raw_input("Enter the name of the file (e.g., strands.txt)\
or 'q' to exit command:\n")
if user_input == 'q': return 0
f = open(user_input, 'r')
except IOError:
print "file doesn't exist!"
return 0
strands = {}
print ''
for index,lines in izip(count(),f):
sequence = lines.upper().replace(" ","").strip()
if sequence:
strands['strand'+str(index+1)] = sequence
print '{}: {}'.format('strand'+str(index+1), strands['strand'+str(index+1)])
f.close()
return strands
if __name__ == '__main__':
list3 = [['1','2'],['3','4'],['5','6'],['7','1'],['9','3'],['2','8'],['11','5'],['4','10'],['6','12'],['12','11'],['10','9'],['8','7']]
list5 = [['2','3'],['4','5'],['3','2'],['5','4'],['11','10'],['9','8']]
arm =[]
strands(arm,list3,list5)