-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfiles_sorting.py
154 lines (134 loc) · 7.6 KB
/
files_sorting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import os.path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
class Txt:
def __init__(self, dir_name, number) -> None:
self.dir_name = dir_name
self.number = str(number)
self.name = self.dir_name + '/' + self.number
self.first_time = False # if the file is open for the first time use method _removeBegin
def _removeBegin(self):
if self.first_time:
a_file = open(self.name+'A.txt', "r")
lines = a_file.readlines()
a_file.close()
if "#Begin Write:" in lines[0]:
del lines[0]
new_file = open(self.name+'A.txt', "w+")
for line in lines:
new_file.write(line)
new_file.close()
def bricolage(self):
self._removeBegin()
alphabet = ['A','B','C','D','E','F','G','H','I','L','M','N','O','P','Q','R','S','T','U','V','Z','J','K','X','Y','W']
data = []
# Here I check if there are files for the same molecul and I open them and put them into the same dataFrame
for i in alphabet:
moment_name = str(self.name+i+'.txt')
if os.path.isfile(moment_name):
data.append(pd.read_csv(moment_name, sep = "\t"))
else:
break
data_frame = pd.concat([j for j in data], ignore_index=True) # I add together all the datasets
data_frame.rename(columns = {'#CycleCount/n':'CycleCount'}, inplace = True)
# Remove the keywords: #EndWrite and #Skipped
data_frame.drop(data_frame[data_frame['CycleCount'] == "#End Write"].index, inplace = True)
data_frame.drop(data_frame[data_frame['CycleCount'] == "#Skipped"].index, inplace = True)
data_frame = data_frame.reset_index()
data_frame = data_frame.drop(['index'], axis=1)
# Create the new column lamnda, variable useful later
data_frame['lambda'] = ((data_frame['A_dist-Y']+data_frame['B_dist-Y']).values)/2
'''
Store this dataFrame into a .txt file
'''
if not os.path.isfile(self.name+'.txt'):
with open(os.path.join(self.dir_name, self.number+'.txt'),'w') as outfile:
data_frame.to_string(outfile, index=False)
data_frame['Y_force'] = data_frame['Y_force'].abs()
self._createTxt(data_frame)
return data_frame
def _createTxt(self, data_frame):
# Now I want to create short dataFrame with only folding or unfolding conditions creating little .txt files with only one folding or unfolding situations:
if not os.path.isfile(self.name+'f1'+'.txt'): # here I check if the first type of file exist, which is n_molecul_u1.txt, where "u" stands for unfolding
# status == 130 => FOLDING
# status == 131 => UNFOLDING
df_empty = pd.DataFrame(columns = data_frame.columns) # here I create an empty dataframe, only with same columns of the original one
df_empty2 = df_empty.copy() # better to work with a copy
df_empty3 = df_empty.copy()
lista_folding = data_frame.index[data_frame['Status'] == 130].tolist()
lista_unfolding = data_frame.index[data_frame['Status'] == 131].tolist()
lista_folding.append(0)
lista_unfolding.append(0)
j = 0
'''
With these two for loops we create files with single folding and unfolding states
'''
for i in range(len(lista_folding)-1):
if lista_folding[i+1] - lista_folding[i] == 1:
df_empty2 = df_empty2.append(data_frame.loc[lista_folding[i]], ignore_index=True)
else:
df_empty2 = df_empty2.append(data_frame.loc[lista_folding[i]], ignore_index=True)
with open(os.path.join(self.dir_name, self.number+'f'+str(j+1)+'.txt'), 'w') as outfile:
df_empty2[['X_force', 'Z_force', 'lambda', 'Y_force', 'time(sec)']].to_string(outfile, index=False)
j += 1
self._createLog(df_empty2, j, 0)
df_empty2 = df_empty.iloc[0:0].copy() # empty the dataframe
j = 0
for i in range(len(lista_unfolding)-1):
if lista_unfolding[i+1] - lista_unfolding[i] == 1:
df_empty3 = df_empty3.append(data_frame.loc[lista_unfolding[i]], ignore_index=True)
else:
df_empty3 = df_empty3.append(data_frame.loc[lista_unfolding[i]], ignore_index=True)
with open(os.path.join(self.dir_name, self.number+'u'+str(j+1)+'.txt'), 'w') as outfile:
df_empty3[['X_force', 'Z_force', 'lambda', 'Y_force', 'time(sec)']].to_string(outfile, index=False)
j += 1
self._createLog(df_empty3, j, 1)
df_empty3 = df_empty.iloc[0:0].copy() # empty the dataframe
self._controlplot()
def _createLog(self, data_frame, file_name, fold_unfold):
# minimum = min(len([name for name in os.listdir("Pulling_3bs") if os.path.isfile("Pulling_3bs"+'/'+name) if 'u' in name]), len([name for name in os.listdir("Pulling_3bs") if os.path.isfile("Pulling_3bs"+'/'+name) if 'f' in name]))
# data = []
moment_name = self.name
if fold_unfold == 0:
moment_name += 'f'+str(file_name)+'.txt'
else:
moment_name += 'u'+str(file_name)+'.txt'
# data.append(pd.read_fwf(moment_name))
# data_frame = pd.concat([j for j in data], ignore_index=True) # I add together all the datasets
count_in = [data_frame.loc[0].at['CycleCount']]
count_fin = [data_frame.loc[data_frame.shape[0]-1].at['CycleCount']]
force_x_mean = [data_frame['X_force'].mean()]
sigma_force_x = [data_frame['X_force'].std()]
force_y_mean = [data_frame['Y_force'].mean()]
sigma_force_y = [data_frame['Y_force'].std()]
force_z_mean = [data_frame['Z_force'].mean()]
sigma_force_z = [data_frame['Z_force'].std()]
temp_A = [data_frame['A_Temperature'].mean()]
sigma_temp_A = [data_frame['A_Temperature'].std()]
temp_B = [data_frame['B_Temperature'].mean()]
sigma_temp_B = [data_frame['B_Temperature'].std()]
t_in = [data_frame.loc[0].at['time(sec)']]
t_fin = [data_frame.loc[data_frame.shape[0]-1].at['time(sec)']]
delta_time = [t_fin[0]-t_in[0]]
df = pd.DataFrame({"NomeFile": moment_name, "count_in": count_in, "count_fin": count_fin, "force_x_mean": force_x_mean,
"sigma_force_x": sigma_force_x, "force_y_mean": force_y_mean, "sigma_force_y": sigma_force_y, "force_z_mean": force_z_mean,
"sigma_force_z": sigma_force_z, "temp_A": temp_A, "sigma_temp_A": sigma_temp_A, "temp_B": temp_B, "sigma_temp_B": sigma_temp_B,
"delta_time": delta_time})
with open(os.path.join(self.dir_name, 'Log'+self.number+'.txt'),'a') as outfile:
df.to_string(outfile, index=False)
def _controlplot(self):
data = []
moment_name = self.dir_name+'/'+'Log'+self.number+'.txt'
data.append(pd.read_fwf(moment_name))
data_frame = pd.concat([j for j in data], ignore_index=True) # I add together all the datasets
#Graphic
x = range(data_frame.shape[0])
plt.plot(x, data_frame['force_y_mean'], label='Force Y')
plt.plot(x, data_frame['force_x_mean'], label='Force X')
plt.plot(x, data_frame['force_z_mean'], label='Force Z')
plt.xlabel('#Steps')
plt.ylabel('f')
plt.title('Log'+self.number)
plt.legend()
plt.show()