-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAn attempt on replicating Results.py
266 lines (152 loc) · 6.26 KB
/
An attempt on replicating Results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
#!/usr/bin/env python
# coding: utf-8
# The research article I've selected is **Orthogonality in Principal Component Analysis Allows the Discovery of Lipids in the Jejunum That Are Independent of Ad Libitum Feeding.** https://www.mdpi.com/2218-1989/12/9/866#app1-metabolites-12-00866
#
# I've tried to replicate Figures 2B and 4A from this article.
#
# **Fig 2B**. Loadings plot of the first and second principal components of the PCA of joining the lipidomes of the jejunum (circles) and the liver (triangles).
#
# **Fig 4A**. Barplots of the effect of the treatments on the log of ratio between sum of triacylglycerols (TGs) and the sum of fatty acids (FAs) in the jejunum.
# In[1]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
# In[2]:
#Reading the file and setting the first column as row header
data = pd.read_csv("C:/Users/mahen/Downloads/Orthogonality.csv", index_col=0)
data
# In[3]:
# Apply log transformation to the numeric columns using numpy
data_1 = np.log(data)
data_1
# In[4]:
#Centering the data with mean
center_data = data_1 - data_1.mean()
center_data
# In[5]:
#Scaling the data with Standard Deviation
scaled_data = center_data / data_1.std()
scaled_data
# In[6]:
#Transposing the data
transpose_data = np.transpose(scaled_data)
transpose_data
# In[7]:
#Doig Principal component analysis for 2 coponenets
pca = PCA(n_components=2)
# Fit and transform the data using PCA
pca_result = pca.fit_transform(transpose_data)
#Flipping the axis
pca_result_inverted = pca_result * -1
# Create a DataFrame with the PCA results
pca_df = pd.DataFrame(data=pca_result_inverted, columns=['PC1', 'PC2'], index=transpose_data.index)
# Plot the PCA results
plt.figure(figsize=(8, 6))
plt.scatter(pca_df['PC1'], pca_df['PC2'], c='blue', alpha=0.5)
plt.title('PCA Plot')
plt.xlabel('Principal Component 1 (PC1)')
plt.ylabel('Principal Component 2 (PC2)')
plt.show()
# In[8]:
#Values of PC1 and PC2
pca_df = pca_df.rename_axis('Sample.ID')
pca_df
# In[9]:
# Isolating the family and tissue data from Sheet 1
joiner = pd.read_csv("C:/Users/mahen/Downloads/RawData/BF550/Transformed.csv", index_col=0)
final_join = joiner.iloc[:282,38:41]
final_join
# In[10]:
#Creating 2 dataframes for the seperated columns
tissue_family_df = pd.DataFrame(final_join)
pca_df = pd.DataFrame(pca_df)
# Merge the 2 1dataframes on 'Sample.ID'
merged_df = pd.merge(tissue_family_df, pca_df, on = 'Sample.ID')
merged_df
# In[11]:
#Plotting the PCA results
liver_data = merged_df[merged_df['Tissue'] == 'Liver']
jejunum_data = merged_df[merged_df['Tissue'] == 'Jejunum']
plt.figure(figsize=(10, 8))
# Plot Liver data with circles
plt.scatter(liver_data['PC1'], liver_data['PC2'], c='none', edgecolor='black', marker='^', label='Lipid in Liver')
# Plot Jejunum data with hollow circles and black outline
plt.scatter(jejunum_data['PC1'], jejunum_data['PC2'], c='none', edgecolor='black', marker='o', label='Lipid in Jejunum')
# Jejunum data as circles differentiated by family
jej_fa = jejunum_data[jejunum_data['family'] == 'TG']
jej_sm = jejunum_data[jejunum_data['family'] == 'FA']
plt.scatter(jej_fa['PC1'], jej_fa['PC2'], c='red', marker='o', label='FA in Jejunum')
plt.scatter(jej_sm['PC1'], jej_sm['PC2'], c='blue', marker='o', label='TG in Jejunum')
# Customize plot
plt.title('PCA of the jejunal and hepatic lipidomes')
plt.xlabel('Loadings Principal Component 1')
plt.ylabel('Loadings Principal Component 2')
plt.legend() # Show legend with labels
# Add horizontal line at 0
plt.axhline(0, color='black', linestyle='-', linewidth=1)
# Add vertical line at 0
plt.axvline(0, color='black', linestyle='-', linewidth=1)
# Show the plot
plt.show()
# **Figure 2B**: Loadings plot of the first and second principal components of the PCA of joining the lipidomes of the jejunum (circles) and the liver (triangles).
# #
# In[12]:
data
# In[13]:
#Splitting the rows which are from tissue-Jejunum and family-FA
Jej_FA = data.iloc[:,124:157]
#Taking sum of all jejunum TGs across each variable
Jej_FA['Sum of FA'] = Jej_FA.sum(axis=1)
Jej_FA
# In[14]:
#Splitting the rows which are from tissue-Jejunum and family-TG
Jej_TG = data.iloc[:,241:263]
#Taking sum of all jejunum FAs across each variable
Jej_TG['Sum of TG'] = Jej_TG.sum(axis=1)
Jej_TG
# In[15]:
#Splitting the sum column of TG and FA and concatenating it
s_tg = Jej_TG.iloc[:,22]
s_fa = Jej_FA.iloc[:,33]
result_df = pd.concat([s_fa, s_tg], axis=1)
# In[16]:
# Dividing the TG column by FA column
result_df['Result'] = result_df['Sum of TG'].div(result_df['Sum of FA'])
result_df
# In[17]:
#Splitting the Result column
abc = result_df.iloc[:,2]
#Taking the log of the column
z = np.log(abc)
#Adding the column back to the dataframe
result_df['Log'] = z
result_df
# In[18]:
#Taking mean and standard error of the dataframe
result_summary = result_df.groupby('Sample.ID')['Log'].agg(['mean', 'sem'])
result_summary
# In[19]:
desired_order = ['Control', 'i.V. 6h', 'i.V. 24h', 'i.V. 72h', 'i.V. 168h', 'i.V. dd 72h', 'i.P. 72h']
# Reorder the rows
result_summary = result_summary.reindex(desired_order)
# Display the reordered DataFrame
print(result_summary)
# In[20]:
#Setting colors for each of the treatments
colors = {'Control': 'white', 'i.V. 6h': 'lightpink', 'i.V. 24h': 'pink', 'i.V. 72h': 'coral', 'i.V. 168h': 'red', 'i.V. dd 72h': 'lightgreen', 'i.P. 72h': 'blue',}
# Create a bar plot with error bars and assign colors
ax = result_summary.plot(kind='bar', y='mean', yerr='sem', capsize=5, color=[colors[index] for index in result_summary.index], edgecolor='black', legend=False)
# Set labels and title
plt.ylabel('log(sum(TGs)/sum(FAs))')
plt.title('Effect of the treatment in the selected lipids in the jejunum')
# Add dotted lines after 'Control' and 'i.V. dd 72h'
ax.axvline(result_summary.index.get_loc('Control') + 0.5, linestyle='--', color='black')
ax.axvline(result_summary.index.get_loc('i.V. 168h') + 0.5, linestyle='--', color='black')
ax.axvline(result_summary.index.get_loc('i.V. dd 72h') + 0.5, linestyle='--', color='black')
plt.xticks(rotation=0)
ax.set_xlabel('')
# Show the plot
plt.show()
# **Figure 4A** : Barplots of the effect of the treatments on the log of ratio between sum of triacylglycerols (TGs) and the sum of fatty acids (FAs) in the jejunum
# In[ ]: