-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path5_GetMatchedPhenoData.R
60 lines (47 loc) · 1.72 KB
/
5_GetMatchedPhenoData.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# Title: Get Matched Phenotype Data
# Author: Olivia Williamson, Natalie Goulett, Gabriel Odom
# Edited: 2024-04-29
# Install and load packages
library(BiocManager)
library(GEOquery)
library(Biobase)
library(biomaRt)
library(tidyverse)
### GSE 186332 ###
GSE186332 <- getGEO("GSE186332")
GSE186332_data <- GSE186332[[1]]
head(pData(GSE186332_data))
GSE186332_pheno_names_to_keep <- c(
"title", "characteristics_ch1.1", "characteristics_ch1.2",
"characteristics_ch1.3", "treatment_protocol_ch1.1",
"treatment_protocol_ch1.3", "treatment_protocol_ch1.4",
"treatment_protocol_ch1.5", "treatment_protocl_ch1.6",
"experimental set:ch1", "patient response:ch1", "trial arm:ch1"
)
pheno_GSE186332_df <- pData(GSE186332_data) %>%
select(title, treatment = `characteristics_ch1.1`) %>%
mutate(SampleID = str_replace(title, pattern = "-", replacement = "\\.")) %>%
mutate(SampleID = paste0("X", SampleID)) %>%
select(-title) %>%
as_tibble()
### GSE 154041 ###
GSE154041 <- getGEO("GSE154041")
GSE154041_data <- GSE154041[[1]]
head(pData(GSE154041_data))
GSE154041_pheno_names_to_keep <- c(
"title", "characteristics_ch1", "characteristics_ch1.1", "description",
"overall survival (months):ch1", "treatment:ch1"
)
pheno_GSE154041_df <- pData(GSE154041_data) %>%
select(title, treatment = `treatment:ch1`) %>%
mutate(SampleID = str_extract(title, pattern = "\\d{3}_\\d{2}")) %>%
mutate(SampleID = paste0("X", SampleID)) %>%
select(-title) %>%
as_tibble()
### Merge & Save ###
pheno_bind_df <- bind_rows(pheno_GSE154041_df, pheno_GSE186332_df)
write.csv(
x = pheno_bind_df,
file = "./Data_Clean/phenotype_joined_information_20240429.csv",
row.names = FALSE
)