Skip to content

Commit b51c174

Browse files
committed
First commit
0 parents  commit b51c174

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+56150
-0
lines changed

.Rbuildignore

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
^.*\.Rproj$
2+
^\.Rproj\.user$

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
.Rproj.user
2+
.Rhistory
3+
.RData

DESCRIPTION

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
Package: mutationProfiles
2+
Title: What the Package Does (one line, title case)
3+
Version: 0.0.0.9000
4+
Authors@R: person("First", "Last", email = "first.last@example.com", role = c("aut", "cre"))
5+
Description: What the package does (one paragraph).
6+
Depends: R (>= 3.4.1)
7+
License: What license is it under?
8+
Encoding: UTF-8
9+
LazyData: true
10+
RoxygenNote: 6.0.1.9000

NAMESPACE

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Generated by roxygen2: do not edit by hand
2+
3+
export(cleanTheme)
4+
export(genomeSnvs)
5+
export(getData)
6+
export(mutSigs)
7+
export(mutSpectrum)
8+
export(notchSnvs)
9+
export(printTris)
10+
export(samplesPlot)
11+
export(snvStats)
12+
export(triFreq)
13+
import(BSgenome.Dmelanogaster.UCSC.dm6)
14+
import(deconstructSigs)
15+
import(dplyr)
16+
import(ggplot2)

R/cleanTheme.R

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#' cleanTheme
2+
#'
3+
#' Clean theme for plotting
4+
#' @param base_size Base font size [Default 12]
5+
#' @import ggplot2
6+
#' @keywords theme
7+
#' @export
8+
9+
10+
cleanTheme <- function(base_size = 12){
11+
theme(
12+
plot.title = element_text(hjust = 0.5, size = 20),
13+
panel.background = element_blank(),
14+
plot.background = element_rect(fill = "transparent",colour = NA),
15+
panel.grid.minor = element_blank(),
16+
panel.grid.major = element_blank(),
17+
axis.line.x = element_line(color="black", size = 0.5),
18+
axis.line.y = element_line(color="black", size = 0.5),
19+
axis.text = element_text(size=12),
20+
axis.title = element_text(size=30)
21+
)
22+
}

R/genTris.R

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#' printTris
2+
#'
3+
#' This function returns all possible trinucleotide combinations
4+
#' @keywords trinucleotides
5+
#' @export
6+
#' @return Character string containing all 96 trinucleotides
7+
#' printTris()
8+
9+
10+
printTris <- function(){
11+
all.tri = c()
12+
for(i in c("A", "C", "G", "T")){
13+
for(j in c("C", "T")){
14+
for(k in c("A", "C", "G", "T")){
15+
if(j != k){
16+
for(l in c("A", "C", "G", "T")){
17+
tmp = paste(i, "[", j, ">", k, "]", l, sep = "")
18+
all.tri = c(all.tri, tmp)
19+
}
20+
}
21+
}
22+
}
23+
}
24+
all.tri <- all.tri[order(substr(all.tri, 3, 5))]
25+
return(all.tri)
26+
}

R/genomeSnvs.R

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#' genomeSnvs
2+
#'
3+
#' Plot snvs accross genome
4+
#' @import ggplot2
5+
#' @keywords genome
6+
#' @export
7+
8+
9+
genomeSnvs <- function(){
10+
data<-getData()
11+
data<-filter(data, chrom != "Y" & chrom != "4")
12+
p<-ggplot(data)
13+
p<-p + geom_point(aes(pos/1000000, sample, colour = trans))
14+
# p<-p + guides(color = FALSE)
15+
p<-p + theme(axis.text.x = element_text(angle=45, hjust = 1))
16+
17+
p<-p + facet_wrap(~chrom, scale = "free_x", ncol = 2)
18+
p<-p + scale_x_continuous("Mbs", breaks = seq(0,33,by=1), limits = c(0, 33), expand = c(0.01, 0.01))
19+
20+
p
21+
}
22+
23+

R/getData.R

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#' getData
2+
#'
3+
#' Function to clean cnv files
4+
#' @param infile File to process [Required]
5+
#' @keywords get
6+
#' @import dplyr
7+
#' @export
8+
#' @return Dataframe
9+
10+
getData <- function(infile = "data/GW.snv.dist.txt"){
11+
data<-read.delim(infile, header = F)
12+
13+
colnames(data)=c("sample", "chrom", "pos", "ref", "alt", "tri", "trans", "decomposed_tri", "grouped_trans", "type")
14+
levels(data$type) <- tolower(levels(data$type))
15+
#data <- filter(data, type == 'germline')
16+
17+
#filter on chroms
18+
# data<-filter(data, chrom != "Y" & chrom != "4")
19+
#filter out samples
20+
# data<-filter(data, sample != "A373R1" & sample != "A373R7" & sample != "A512R17" )
21+
data<-droplevels(data)
22+
dir.create(file.path("plots"), showWarnings = FALSE)
23+
return(data)
24+
}
25+

R/mutSigs.R

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#' mutSigs
2+
#'
3+
#' Calculate and plot the mutational signatures accross samples using the package `deconstructSigs`
4+
#' @param samples Calculates and plots mutational signatures on a per-sample basis [Default no]
5+
#' @param pie Plot a pie chart shwoing contribution of each signature to overall profile [Default no]
6+
#' @import deconstructSigs
7+
#' @import BSgenome.Dmelanogaster.UCSC.dm6
8+
#' @keywords signatures
9+
#' @export
10+
11+
12+
mutSigs <- function(samples=NA, pie=NA){
13+
suppressMessages(require(BSgenome.Dmelanogaster.UCSC.dm6))
14+
suppressMessages(require(deconstructSigs))
15+
16+
if(!exists('scaling_factor')){
17+
source('R/dmel6.trinucs.R')
18+
cat("calculationg trinucleotide frequencies in genome\n")
19+
scaling_factor <-triFreq()
20+
}
21+
22+
data<-getData()
23+
genome <- BSgenome.Dmelanogaster.UCSC.dm6
24+
25+
if(is.na(samples)){
26+
data$tissue = 'All'
27+
sigs.input <- mut.to.sigs.input(mut.ref = data, sample.id = "tissue", chr = "chrom", pos = "pos", alt = "alt", ref = "ref", bsg = genome)
28+
sig_plot<-whichSignatures(tumor.ref = sigs.input, signatures.ref = signatures.cosmic, sample.id = 'All',
29+
contexts.needed = TRUE,
30+
tri.counts.method = scaling_factor
31+
)
32+
33+
cat("Writing to file 'plots/all_signatures.pdf'\n")
34+
pdf('plots/all_signatures.pdf', width = 20, height = 10)
35+
plotSignatures(sig_plot)
36+
dev.off()
37+
plotSignatures(sig_plot)
38+
39+
40+
if(!is.na(pie)){
41+
makePie(sig_plot)
42+
}
43+
}
44+
45+
else{
46+
sigs.input <- mut.to.sigs.input(mut.ref = data, sample.id = "sample", chr = "chrom", pos = "pos", alt = "alt", ref = "ref", bsg = genome)
47+
cat("sample", "snv_count", sep="\t", "\n")
48+
for(s in levels(data$sample)) {
49+
snv_count<-nrow(filter(data, sample == s))
50+
51+
if(snv_count > 50){
52+
cat(s, snv_count, sep="\t", "\n")
53+
54+
sig_plot<-whichSignatures(tumor.ref = sigs.input, signatures.ref = signatures.nature2013, sample.id = s,
55+
contexts.needed = TRUE,
56+
tri.counts.method = scaling_factor)
57+
58+
outfile<-(paste('plots/', s, '_signatures.pdf', sep = ''))
59+
cat("Writing to file", outfile, "\n")
60+
pdf(outfile, width = 20, height = 10)
61+
plotSignatures(sig_plot)
62+
dev.off()
63+
plotSignatures(sig_plot)
64+
65+
if(!is.na(pie)){
66+
makePie(sig_plot)
67+
}
68+
}
69+
}
70+
}
71+
}

R/mutSpectrum.R

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#' mutSpectrum
2+
#'
3+
#' Plots the mutations spectrum for all samples combined
4+
#' @import ggplot2
5+
#' @keywords spectrum
6+
#' @export
7+
8+
mutSpectrum <- function(){
9+
data<-getData()
10+
cat("Showing global contribution of tri class to mutation load", "\n")
11+
12+
p<-ggplot(data)
13+
p<-p + geom_bar(aes(x = decomposed_tri, y = (..count..)/sum(..count..), group = decomposed_tri, fill = grouped_trans), position="dodge",stat="count")
14+
p<-p + scale_y_continuous("Relative contribution to mutation load", expand = c(0.0, .0005))
15+
p<-p + scale_x_discrete("Genomic context", expand = c(.005, .005))
16+
p<-p + cleanTheme() +
17+
theme(panel.grid.major.y = element_line(color="grey80", size = 0.5, linetype = "dotted"),
18+
axis.text.x = element_text(angle = 45, hjust=1),
19+
axis.title = element_text(size=20),
20+
strip.text.x = element_text(size = 15)
21+
)
22+
p<-p + labs(fill="Mutation class")
23+
p<-p + facet_wrap(~grouped_trans, ncol = 3, scale = "free_x" )
24+
25+
mut_spectrum<-paste("mutation_spectrum.pdf")
26+
cat("Writing file", mut_spectrum, "\n")
27+
ggsave(paste("plots/", mut_spectrum, sep=""), width = 20, height = 10)
28+
p
29+
}

R/notchSnvs.R

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#' notchSnvs
2+
#'
3+
#' Plot snvs around Notch by sample
4+
#' @import ggplot2
5+
#' @keywords notch
6+
#' @export
7+
8+
notchSnvs <- function(){
9+
data<-getData()
10+
data<-filter(data, chrom == "X", pos >= 3000000, pos <= 3300000)
11+
12+
if(nrow(data) == 0){
13+
stop("There are no snvs in Notch. Exiting\n")
14+
}
15+
16+
p<-ggplot(data)
17+
p<-p + geom_point(aes(pos/1000000, sample, colour = trans, size = 2))
18+
p<-p + guides(size = FALSE, sample = FALSE)
19+
p<-p + cleanTheme() +
20+
theme(axis.title.y=element_blank(),
21+
panel.grid.major.y = element_line(color="grey80", size = 0.5, linetype = "dotted")
22+
)
23+
p<-p + scale_x_continuous("Mbs", expand = c(0,0), breaks = seq(3,3.3,by=0.05), limits=c(3, 3.301))
24+
p<-p + annotate("rect", xmin=3.000000, xmax=3.134532, ymin=0, ymax=0.1, alpha=.2, fill="green")
25+
p<-p + annotate("rect", xmin=3.134870, xmax=3.172221, ymin=0, ymax=0.1, alpha=.2, fill="skyblue")
26+
p<-p + annotate("rect", xmin=3.176440, xmax=3.300000, ymin=0, ymax=0.1, alpha=.2, fill="red")
27+
28+
p
29+
}

R/samplesPlot.R

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#' samplesPlot
2+
#'
3+
#' Plot the snv distribution for each sample
4+
#' @import ggplot2
5+
#' @param count Output total counts instead of frequency if set [Default no]
6+
#' @keywords spectrum
7+
#' @export
8+
9+
samplesPlot <- function(count=NA){
10+
data<-getData()
11+
12+
p<-ggplot(data)
13+
14+
if(is.na(count)){
15+
p<-p + geom_bar(aes(x = grouped_trans, y = (..count..)/sum(..count..), group = sample, fill = sample), position="dodge",stat="count")
16+
tag='_freq'
17+
}
18+
else{
19+
p<-p + geom_bar(aes(x = grouped_trans, y = ..count.., group = sample, fill = sample), position="dodge",stat="count")
20+
tag='_count'
21+
}
22+
p<-p + scale_y_continuous("Relative contribution to total mutation load", expand = c(0.0, .001))
23+
p<-p + scale_x_discrete("Mutation class")
24+
p<-p + cleanTheme() +
25+
theme(panel.grid.major.y = element_line(color="grey80", size = 0.5, linetype = "dotted"),
26+
axis.title = element_text(size=20),
27+
strip.text.x = element_text(size = 10)
28+
)
29+
p<-p + facet_wrap(~sample, ncol = 4, scale = "free_x" )
30+
31+
samples_mut_spect<-paste("mutation_spectrum_samples", tag, ".pdf", sep = '')
32+
cat("Writing file", samples_mut_spect, "\n")
33+
ggsave(paste("plots/", samples_mut_spect, sep=""), width = 20, height = 10)
34+
p
35+
}

R/snvStats.R

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#' snvStats
2+
#'
3+
#' Calculate some basic stats for snv data
4+
#' @import dplyr
5+
#' @keywords stats
6+
#' @export
7+
8+
9+
snvStats <- function(){
10+
data<-getData()
11+
cat("sample", "snvs", sep='\t', "\n")
12+
rank<-sort(table(data$sample), decreasing = TRUE)
13+
rank<-as.array(rank)
14+
15+
for (i in 1:nrow(rank)){
16+
cat(names(rank[i]), rank[i], sep='\t', "\n")
17+
}
18+
19+
all_ts<-nrow(filter(data, trans == "A>G" | trans == "C>T" | trans == "G>A" | trans == "T>C"))
20+
all_tv<-nrow(filter(data, trans != "A>G" & trans != "C>T" & trans != "G>A" & trans != "T>C"))
21+
ts_tv<-all_ts/all_tv
22+
cat("ts/tv =", ts_tv)
23+
}

R/triCount.R

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#' triFreq
2+
#'
3+
#' This function counts the number of times each triunucleotide is found in a supplied genome
4+
#' @param genome BS.genome file defaults to BSgenome.Dmelanogaster.UCSC.dm6
5+
#' @param count Output total counts instead of frequency if set [Default no]
6+
#' @import dplyr
7+
#' @keywords trinucleotides
8+
#' @export
9+
#' @return Dataframe of trinucs and freqs (or counts if count=1)
10+
11+
triFreq <- function(genome=NA, count=NA){
12+
if(is.na(genome)){
13+
cat("No genome specfied, defaulting to 'BSgenome.Dmelanogaster.UCSC.dm6'\n")
14+
library(BSgenome.Dmelanogaster.UCSC.dm6, quietly = TRUE)
15+
genome <- BSgenome.Dmelanogaster.UCSC.dm6
16+
}
17+
18+
params <- new("BSParams", X = Dmelanogaster, FUN = trinucleotideFrequency, exclude = c("M", "_"), simplify = TRUE)
19+
data<-as.data.frame(bsapply(params))
20+
data$genome<-as.integer(rowSums(data))
21+
data$genome_adj<-(data$genome*2)
22+
23+
if(!is.na(count)){
24+
tri_count<-data['genome_adj']
25+
tri_count<-cbind(tri = rownames(tri_count), tri_count)
26+
colnames(tri_count) <- c("tri", "count")
27+
rownames(tri_count) <- NULL
28+
return(tri_count)
29+
}
30+
else{
31+
data$x <- (1/data$genome)
32+
scaling_factor<-data['x']
33+
return(scaling_factor)
34+
}
35+
36+
}

0 commit comments

Comments
 (0)