-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathStep_3_MM20221122_boxplot_mean.R
313 lines (233 loc) · 11.8 KB
/
Step_3_MM20221122_boxplot_mean.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
# load libraries
library(ggplot2)
library(dplyr)
library(plotly)
library("DescTools")
# clean environment
rm(list=ls())
# define wd
setwd("C:/Users/Mende012/Documents/Bioinformatics/ROS assays/DR_final_pipeline")
# import file
df <- read.csv("./Step_2_MM20230106_2_auc_data.csv")
# Accidentally named HopN1 as AvrN1 - change to correct name
df$treatment_id[df$treatment_id == 'D36E+AvrN1_Bacterial PAMPs'] <- 'D36E+HopN1_Bacterial PAMPs'
# select only one assays per treatment combination
df <- df[!(df$assay_id == "plate025"),]
df <- df[!(df$assay_id == "plate020"),]
df <- df[!(df$assay_id == "plate029"),]
df <- df[!(df$assay_id == "plate032"),]
df <- df[!(df$assay_id == "plate033"),]
df <- df[!(df$assay_id == "plate034"),]
df <- df[!(df$assay_id == "plate037"),]
df <- df[!((df$assay_id == "plate028") & (grepl(("AvrPto"), df$treatment_id))),]
#################################################################################
#################################################################################
# ANOVA after normalisation
# 1. ANOVA to analyse wheather there are differences between the groups
# H0 All means are the same
# H1 at least one mean is different
#anova_ROS <- aov(area_under_curve ~ treatment_id, data = df)
#{summary(anova_ROS)
# summary.lm(anova_ROS)}
# Dunnett's test - summarised analysis
#Dunnet <- DunnettTest(area_under_curve ~ treatment_id, data = df, control = "D36E_Bacterial PAMPs", conf.level = 0.95)
#print(Dunnet)
################################################################################
################################################################################
# Dunnett's test - per plate --> makes more sense due to inter-assay variation
# tbl_df <- df %>% as_tibble() %>% group_by(assay_id)
dunn_res <- lapply(unique(df$assay_id), function(x) as.data.frame(DunnettTest(area_under_curve ~ treatment_id,
data = df[df$assay_id==x,],
control = "D36E_Bacterial PAMPs",
conf.level = 0.95)$`D36E_Bacterial PAMPs`)) %>% bind_rows()
# add column with asterix for significant results
dunn_res$ast <- ''
dunn_res$ast[dunn_res$pval<0.1] <- '.'
dunn_res$ast[dunn_res$pval<0.05] <- '*'
dunn_res$ast[dunn_res$pval<0.01] <- '**'
dunn_res$ast[dunn_res$pval<0.001] <- '***'
dunn_res_1 <- dunn_res
dunn_res$sample <- row.names(dunn_res_1)
rm(dunn_res_1)
# remove comparisons present in every assay
remove <- c("D36E_MQ-D36E_Bacterial PAMPs","DC3000_Bacterial PAMPs-D36E_Bacterial PAMPs", "DC3000_MQ-D36E_Bacterial PAMPs")
dunn_res <- dunn_res[!grepl(paste(remove, collapse = "|"), dunn_res$sample),]
# sub string sample name to match names of treatments
dunn_res$sample <- sub("-D36E.*", "", dunn_res$sample)
# simple table
simple <- data.frame(dunn_res$sample, dunn_res$ast, row.names = NULL)
#################################################################################
###############################boxplot pre-scaling###############################
#################################################################################
# colour scheme
cbp1 <- c("#999999", "#44AA99", "#E69F00", "#661100", "#56B4E9", "#000000", "#117733", "#332288", "#F0E442", "#0072B2", "#D55E00", "#999933", "#CC79A7")
# https://stackoverflow.com/questions/57153428/r-plot-color-combinations-that-are-colorblind-accessible
# basic boxplot
g2 <- ggplot(
#input data
df, aes(x=as.factor(treatment_id), y=area_under_curve)) +
# generate basic boxplot
geom_boxplot(fill="white", # box colour
outlier.colour = "white", # Outliers color,
alpha=0) + # Box color transparency
# overlay with jitter
geom_jitter(shape=16, position=position_jitter(0.1),
aes(colour = factor(assay_id))) +
scale_colour_manual(values = cbp1) +
# define the theme of the boxplot
theme_bw() + # make the bg white
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), # remove background, frame
axis.line = element_line(colour = "black")) +
# label the axises
xlab("Pseudomonas strain") +
ylab("log2 (normalised area under the curve)") +
theme(axis.text.x = element_text(angle = 70, hjust = 1)) + # turn the tick marks on the x axis 45 degree
# define axis limits if needed
expand_limits( y = c(0, 5)) +
# add statistical information: error bars, statistical results
stat_boxplot(geom = "errorbar", # Error bars
width = 0.2)
# ANOVA/TUKEY test labels
#geom_text(data = simple, aes(y= dunn_res.sample, x = dunn_res.ast))
#vjust= -10 ,hjust= 0.5)
# significance for single comparisons
#geom_signif(comparisons = list(c("D36", "D36+AvrE1"),
# c("D36", "D36+HopM1"),
# c("D36", "D36+HopU1"),
# c("D36", "DC3000")),
# map_signif_level = TRUE,
# y_position = c(3, 3.5, 4, 4.5))+
#ggsignif::geom_signif(
#comparisons = list(c("D36E_Bacterial PAMPs", "D36E+AvrE1_Bacterial PAMPs"),
# c("D36E_Bacterial PAMPs", "D36E+HopAD1_Bacterial PAMPs"),
# c("D36E_Bacterial PAMPs", "D36E+HopAF1_Bacterial PAMPs"),
# c("D36E_Bacterial PAMPs", "D36E+HopAM1_Bacterial PAMPs"),
# c("D36E_Bacterial PAMPs", "D36E+HopF2_Bacterial PAMPs"),
# c("D36E_Bacterial PAMPs", "D36E+HopM1_Bacterial PAMPs"),
# c("D36E_Bacterial PAMPs", "dCEL_Bacterial PAMPs")
# ),
#annotations = c("***", "***", "***", "***", "***", "*", '***'),
#y_position = c(17, 19, 21, 23, 25, 27, 29)
#)
ggsave("MM20230106_ROS_boxpl_single_mean.svg",width = 10, height = 6)
g2
dev.off()
g2
ggplotly(g2)
###################################################################################
######################## scale by positive control - ##############################
###################################################################################
### STEP1: write function to show ratio between mean/mean positive control #####
### and sample measurement ### c
## write a function to identify the mean/average of the positive control D36+PAMPs
# identify unique assayIDs and sort them to ensure order
assayIDs <- unique(df$assay_id)
assayIDs <- sort(assayIDs)
# creat/test function
med_pos <- function(df, assayID, positive_control){
a <- as.data.frame(df[df$assay_id == assayID,])
b <- as.data.frame(a[a$treatment_id == positive_control,])
contr_mean <- mean(b$area_under_curve) # change here from mean to mean
return(contr_mean)
}
# apply the function over all the assays
contr_mean <- lapply(assayIDs, function(x){
a <- as.data.frame(df[df$assay_id == x,])
b <- as.data.frame(a[a$treatment_id == "D36E_Bacterial PAMPs",])
contr_mean <- mean(b$area_under_curve)
return(contr_mean)
})
## make small df to store mean/mean results per assay
df_ctrmed <- data.frame(assayIDs, unlist(contr_mean))
colnames(df_ctrmed) <- c("assay_id", "mean_positive_control") #change mean/mean
rm(ls = contr_mean)
## write function (function works with single assay)
mult <- function(assayID){
rows <- nrow(df[df$assay_id == assayID,]) # count rows for of each assay
a <- df_ctrmed[df_ctrmed$assay_id == assayID,] # access the mean/mean of a respective control
re_assay_id <- unlist(rep(a$assay_id, times = rows)) # times assayID as many times as there are rows for the assayID
re_mean <- unlist(rep(a$mean_positive_control, times = rows)) #update mean/mean, times control/mean as many times as there are rows for the assayID
b <- data.frame(unlist(re_assay_id), unlist(re_mean)) # make a df of length = assay rows
return(b)
}
# use the function with lapply to repeat the same for all plates
rows <- lapply(assayIDs, function(x){
rows <- nrow(df[df$assay_id == x,])
a <- df_ctrmed[df_ctrmed$assay_id == x,]
re_assay_id <- unlist(rep(a$assay_id, times = rows))
re_mean <- unlist(rep(a$mean_positive_control, times = rows))
b <- data.frame(unlist(re_assay_id), unlist(re_mean))
return(b)
}
) %>% bind_rows()
## reorganise and merge df
# rename columns in a meaningful way
colnames(rows) <- c("assay_id", "mean_positive_control") #change name mean/mean
# order df so that it is also ordered by plateID
df <- df[order(df$assay_id, decreasing = FALSE),]
# add additional column to original df
df["mean_positive_control"] <- rows$mean_positive_control
# clean up
rm(ls=df_ctrmed)
rm(ls=rows)
### use the mean/mean to scale the measurements
## write a function to calculate how the measurments compare to the positive control
## calculate this as a % of the positive_control_mean
prc_pos_avg <- (df$area_under_curve/df$mean_positive_control)*100
df["prc_pos_avg"] <- prc_pos_avg
cbp1 <- cbp1 <- c("#999999", "#44AA99", "#E69F00", "#661100", "#56B4E9", "#000000", "#117733", "#332288", "#F0E442", "#0072B2", "#D55E00", "#999933", "#CC79A7")
# https://stackoverflow.com/questions/57153428/r-plot-color-combinations-that-are-colorblind-accessible
# scaled boxplot
g3 <- ggplot(
#input data
df, aes(x=as.factor(treatment_id), y=prc_pos_avg)) +
# generate basic boxplot
geom_boxplot(fill="white", # box colour
outlier.colour = "white", # Outliers color,
alpha=0) + # Box color transparency
# overlay with jitter
geom_jitter(shape=16, position=position_jitter(0.1),
aes(colour = factor(assay_id))) +
scale_colour_manual(values = cbp1) +
# define the theme of the boxplot
theme_bw() + # make the bg white
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), # remove background, frame
axis.line = element_line(colour = "black")) +
# label the axises
xlab("Pseudomonas strain") +
ylab("% log2 (normalised area under the curve)") +
theme(axis.text.x = element_text(angle = 70, hjust = 1)) + # turn the tick marks on the x axis 45 degree
# define axis limits if needed
expand_limits( y = c(0, 5)) +
# add statistical information: error bars, statistical results
stat_boxplot(geom = "errorbar", # Error bars
width = 0.2)
ggsave("MM20230106_ROS_boxpl_single_scaled_mean.svg",width = 10, height = 6)
g3
dev.off()
ggplotly(g3)
#################################################################################
#################################################################################
# Dunnett's test - per plate with scaled data
dunn_scal <- lapply(unique(df$assay_id), function(x) as.data.frame(DunnettTest(prc_pos_avg ~ treatment_id,
data = df[df$assay_id==x,],
control = "D36E_Bacterial PAMPs",
conf.level = 0.95)$`D36E_Bacterial PAMPs`)) %>% bind_rows()
# add column with asterix for significant results
dunn_scal$ast <- ''
dunn_scal$ast[dunn_scal$pval<0.1] <- '.'
dunn_scal$ast[dunn_scal$pval<0.05] <- '*'
dunn_scal$ast[dunn_scal$pval<0.01] <- '**'
dunn_scal$ast[dunn_scal$pval<0.001] <- '***'
dunn_scal_1 <- dunn_scal
dunn_scal$sample <- row.names(dunn_scal_1)
rm(dunn_scal_1)
# remove comparisons present in every assay
remove <- c("D36E_MQ-D36E_Bacterial PAMPs","DC3000_Bacterial PAMPs-D36E_Bacterial PAMPs", "DC3000_MQ-D36E_Bacterial PAMPs")
dunn_scal <- dunn_scal[!grepl(paste(remove, collapse = "|"), dunn_scal$sample),]
# sub string sample name to match names of treatments
dunn_scal$sample <- sub("-D36E.*", "", dunn_scal$sample)
# simple table
simple_scal <- data.frame(dunn_scal$sample, dunn_scal$ast, row.names = NULL)