-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathstep3.DeconstructSigs.R
125 lines (106 loc) · 4.17 KB
/
step3.DeconstructSigs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# deconstructSigs
# https://github.com/raerose01/deconstructSigs
# https://jakeconway.github.io/docs/deconstructSigs/
library(deconstructSigs)
library(data.table)
library(tidyverse)
library(BSgenome.Hsapiens.UCSC.hg38)
library(pals)
sample.mut.ref.smoking <- fread("/dir/BCFtools.filter.tumoronly.autosomal.vcf/smoking_38_vcf/smoking_tumor_autosomal_MPF_SBS.txt", header = F)
sample.mut.ref.smoking %>%
count(V1) %>%
pull(V1) %>%
dput() -> sampleid_teeth
# Convert to deconstructSigs input
sigs.input <- mut.to.sigs.input(
mut.ref = sample.mut.ref.smoking,
sample.id = "V1",
chr = "V2",
pos = "V3",
ref = "V4",
alt = "V5",
bsg = BSgenome.Hsapiens.UCSC.hg38
)
# signature analysis
# https://www.rdocumentation.org/packages/deconstructSigs/versions/1.8.0/topics/whichSignatures
plot_function <- function(i) {
plot_dataset <- whichSignatures(
tumor.ref = sigs.input,
signatures.ref = signatures.cosmic,
sample.id = i,
contexts.needed = TRUE
)
plot_dataset
}
sampleid_teeth %>% map(
plot_function
) -> plot_function_output
# make pie graphs
for (i in 1:25) {
filename <- paste0("/dir/", rownames(plot_function_output[[i]]$diff), "_pie.png")
png(file = filename, bg = "transparent")
makePie(plot_function_output[[i]])
}
dev.off()
# make signatures graphs
for (i in 1:25) {
filename <- paste0("/dir/", rownames(plot_function_output[[i]]$diff), "_plotSignatures.png")
png(file = filename, bg = "transparent")
deconstructSigs::plotSignatures(plot_function_output[[i]])
}
dev.off()
# make dataframe
data_function <- function(i) {
deconvolute_df <- plot_function_output[[i]]$weights
deconvolute_df$unknwon <- plot_function_output[[i]]$unknown
deconvolute_df$SSE <- sum(plot_function_output[[i]]$diff^2)
deconvolute_df
}
c(1:38) %>% map(
data_function
) -> deconvolute_df_output
deconvolute_df_output_summary <- deconvolute_df_output %>% bind_rows()
deconvolute_df_output_summary %>%
select(c(
"unknwon", "Signature.1", "Signature.2", "Signature.3", "Signature.4",
"Signature.5", "Signature.6", "Signature.7", "Signature.8", "Signature.9",
"Signature.10", "Signature.11", "Signature.12", "Signature.13",
"Signature.14", "Signature.15", "Signature.16", "Signature.17",
"Signature.18", "Signature.19", "Signature.20", "Signature.21",
"Signature.22", "Signature.23", "Signature.24", "Signature.25",
"Signature.26", "Signature.27", "Signature.28", "Signature.29",
"Signature.30"
)) %>%
rownames_to_column("sampleid") %>%
fwrite("/dir/deconstructSigs.out/deconvolute_df_output_summary.txt")
# plot out signature proportions
ggplot_df <- deconvolute_df_output_summary %>%
rownames_to_column("sampleid") %>%
pivot_longer(!c("sampleid", "SSE"), names_to = "signatures", values_to = "proportion") %>%
filter(proportion != 0)
ggplot_df %>%
pull(signatures) %>%
unique() %>%
dput()
ggplot_df %>% fwrite("/dir/deconstructSigs.out/deconstructSigs.out.txt")
# percent
png(file = "/dir/stacked_bar.png", bg = "transparent", width = 700, height = 500, units = "px")
ggplot(ggplot_df, aes(fill = factor(signatures, levels = c("unknwon", "Signature.25", "Signature.24", "Signature.20", "Signature.18", "Signature.16", "Signature.12", "Signature.13", "Signature.2", "Signature.4", "Signature.9", "Signature.8", "Signature.7", "Signature.6", "Signature.3", "Signature.5", "Signature.1")), y = proportion, x = reorder(sampleid, sampleid))) +
geom_bar(position = "fill", stat = "identity") +
theme_light() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
scale_fill_manual(values = as.vector(cols25(25))) +
guides(fill = guide_legend(title = "Signature")) +
labs(x = "sample id", title = "deconstructSigs signature deconvolution")
dev.off()
# plot out error rate
png(file = "/dir/SSE.png", bg = "transparent", width = 800, height = 600, units = "px")
deconvolute_df_output_summary %>%
rownames_to_column("sampleid") %>%
ggplot(aes(y = SSE, x = reorder(sampleid, sampleid), label = round(SSE, 4))) +
geom_point(color = "red") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
labs(x = "sample id", title = "deconstructSigs SSE") +
geom_text(size = 0.1) +
ggrepel::geom_text_repel()
dev.off()