-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMAIN.R
103 lines (82 loc) · 3.93 KB
/
MAIN.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#this is the main script. It will call functions from auxiliary scripts and do some basic operations.
setwd(the directory where your codes are saved) #please change path to suit your code.
source("setLabels.R")
source("ModuleAnalysis.R")
source("DataPreparation.R")
source("TextMining.R")
####### Bioconductor Libraries required
library(Biobase)
library(limma)
library(mygene)
library(ComplexHeatmap)
####### CRAN Libraries required
library(ggplot2)
library(circlize)
library(reshape2)
library(EnvStats)
library(devtools)
library(rentrez)
library(plyr)
library(e1071)
library(tidyr)
library(dplyr)
library(plyr)
setwd(the directory where your Data and Results are saved)
exp_matrix_all <- read.csv("./1 Data/TAC software/Expression matrices/Expression matrix_all_noUTI.csv",
header = TRUE,
row.names = 1)
fc_pval_all <- read.csv("./1 Data/TAC software/Fold Change_P Val/Limma_all.csv",
header = TRUE,
row.names = 1)
fc_pval_known <- fc_pval_all[!(as.numeric(fc_pval_all$Gene.Symbol) == 1),]
exp_matrix_all <- convCEL2patID(exp_matrix_all) #DataPreparation.R script. Converts column names from CEL file name to patient ID.
label = label_sepsis(exp_matrix_all) #setLabels.R script
outcome<-as.factor(label)
outcome<-as.character(outcome)
for (i in 1:length(outcome)){ #Converting 0/1 to Control/Case. This is required by many functions
if (outcome[i] == 1) {
outcome[i]<- 'Sepsis'
} else {
outcome[i] <- 'Vascular'
}
}
exp_matrix_known <- exp_matrix_all[rownames(fc_pval_known),]
#Cellular-Deconvolution. All functions here can be found in ModuleAnalysis.R script.
iris<-read.csv("./1 Data/IRIS_data.csv", header=TRUE)
iris_reduced <- as.data.frame(iris[iris$Cell.Specificity %in%
c("Neutrophil", "Dendritic Cell", "B Cell",
"T Cell", "NK Cell", "Monocyte"),
c("Name", "Cell.Specificity")])
write.csv(iris_reduced, "./1 Data/iris_genesymbol.csv")
a2s.obj_clean <- alias2symbol_transform(iris_reduced)
write.csv(a2s.obj_clean, "./1 Data/iris_genesymbol_alias2symbol.csv")
fc_pval_known_unnest <- data_unnest(fc_pval_known)
probe_cell_uniquePID <- probe2cell(fc_pval_known_unnest,
a2s.obj_clean)
write.csv(probe_cell_uniquePID, "./1 Data/Probe2Cell_avgexpression.csv")
total_iris <- analysis_ready_data(exp_matrix_known, probe_cell_uniquePID)
write.csv(total_iris, "./1 Data/IRIS_probes_allinfo.csv")
heatmap_sorted(exp_matrix_iris, total_iris)
aggdata_iris_trunc <- aggregate_data(total_iris, exp_matrix_iris)
write.csv(aggdata_iris_trunc, "./1 Data/IRIS_probe_TAC_forstatisticaltest.csv")
avg_exp_plt(aggdata_iris)
dir_reg_plt(total_iris)
#Post-feature selection tasks
exp_matrix_diff <- read.csv("./1 Data/TAC software/Expression matrices/Expression matrix_diffexp_noUTI.csv",
header = TRUE,
row.names = 1)
fc_pval_diff <- read.csv("./1 Data/TAC software/Fold Change_P Val/Limma_FDR0.01_logFC1.csv",
header = TRUE,
row.names = 1)
fc_pval_diff_known <- fc_pval_diff[!(as.numeric(fc_pval_diff$Gene.Symbol) == 1),]
voted_probes <- read.csv("./1 Data/Voted_probes.csv", header = TRUE, row.names = 1)
fc_pval_vote <- fc_pval_diff_known[voted_probes$Features, ]
write.csv(fc_pval_vote, "./1 Data/FC_pVal_votedprobes.csv")
genes_unique <- getGenelist(fc_pval_vote) #DataPreparation.R script.
#Text Mining of these selected genes. Find function in TextMining.R
textMining(genes_unique)
#Preparing data for Ingenuity Pathway Analysis (IPA)
newdata_ipa <- format_forIPA(fc_pval_vote)
write.table(newdata_ipa, "./1 Data/IPA/IPA_TAC_RMA_selectedprobes.txt",
quote = FALSE,
sep="\t")