Skip to content

Commit 71cd248

Browse files
committed
Update for SNV/INDEL; options via elipses
1 parent 02e19fa commit 71cd248

37 files changed

+1589
-592
lines changed

NAMESPACE

+19-1
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,24 @@
11
# Generated by roxygen2: do not edit by hand
22

3+
export(EnrichmentVolcano)
4+
export(bpSim)
35
export(chromDist)
46
export(cleanTheme)
7+
export(dist2motif2)
8+
export(distOverlay2)
59
export(featureEnrichment)
610
export(featuresHit)
711
export(genTris)
812
export(geneEnrichment)
913
export(geneHit)
14+
export(generateData)
1015
export(getData)
16+
export(getTriFromFasta)
1117
export(mutSigs)
1218
export(mutSpectrum)
19+
export(plotdistanceOverlay2)
1320
export(rainfall)
21+
export(readBed)
1422
export(samplesPlot)
1523
export(setCols)
1624
export(sigTypes)
@@ -19,12 +27,22 @@ export(snvStats)
1927
export(snvinGene)
2028
export(triFreq)
2129
export(tssDist)
22-
import(BSgenome)
2330
import(BSgenome.Dmelanogaster.UCSC.dm6)
31+
import(Biostrings)
2432
import(GenomicRanges)
2533
import(RColorBrewer)
34+
import(colorspace)
35+
import(cowplot)
2636
import(data.table)
2737
import(deconstructSigs)
2838
import(dplyr)
39+
import(forcats)
2940
import(ggplot2)
41+
import(ggpubr)
42+
import(plotly)
43+
import(plyr)
3044
import(reshape)
45+
import(scales)
46+
import(tidyr)
47+
importFrom(data.table,as.data.table)
48+
importFrom(data.table,fread)

R/featureEnrichment.R

+73-24
Original file line numberDiff line numberDiff line change
@@ -10,33 +10,36 @@
1010
#' @param features File containing total genomic lengths of features [Default 'data/genomic_features.txt']
1111
#' @param genome_length The total legnth of the genome [Default 118274340 (mappable regions on chroms 2, 3, 4, X & Y for Drosophila melanogastor Dmel6.12)]
1212
#' @keywords enrichment
13-
#' @import dplyr
14-
#' @return A data frame with FC scores for all genes seen at least n times in snv data
15-
#' @export
13+
#' @import dplyr ggpubr
14+
#' @return A snv_data frame with FC scores for all genes seen at least n times in snv snv_data
15+
#' @export
16+
featureEnrichment <- function(..., snv_data=NULL, features='data/genomic_features.txt', genome_length=118274340, write=FALSE){
1617

17-
featureEnrichment <- function(features='data/genomic_features.txt', genome_length=118274340){
1818
genome_features<-read.delim(features, header = T)
19-
data<-getData()
20-
mutCount<-nrow(data)
21-
19+
20+
if(missing(snv_data)){
21+
snv_data<-getData(...)
22+
}
23+
mutCount<-nrow(snv_data)
24+
2225
# To condense exon counts into "exon"
23-
data$feature<-as.factor(gsub("exon_.*", "exon", data$feature))
24-
25-
classCount<-table(data$feature)
26+
snv_data$feature<-as.factor(gsub("exon_.*", "exon", snv_data$feature))
27+
28+
classCount<-table(snv_data$feature)
2629
classLengths<-setNames(as.list(genome_features$length), genome_features$feature)
27-
30+
2831
fun <- function(f) {
2932
# Calculate the fraction of geneome occupied by each feature
3033
featureFraction<-classLengths[[f]]/genome_length
31-
32-
# How many times should we expect to see this feature hit in our data (given number of obs. and fraction)?
34+
35+
# How many times should we expect to see this feature hit in our snv_data (given number of obs. and fraction)?
3336
featureExpect<-(mutCount*featureFraction)
34-
35-
# observed/expected
37+
38+
# observed/expected
3639
fc<-classCount[[f]]/featureExpect
37-
fc<-round(fc,digits=1)
40+
Log2FC<-log2(fc)
3841
featureExpect<-round(featureExpect,digits=3)
39-
42+
4043
# Binomial test
4144
if(!is.null(classLengths[[f]])){
4245
if(classCount[f] >= featureExpect){
@@ -47,17 +50,63 @@ featureEnrichment <- function(features='data/genomic_features.txt', genome_lengt
4750
stat<-binom.test(x = classCount[f], n = mutCount, p = featureFraction, alternative = "less")
4851
test<-"depletion"
4952
}
50-
sig_val<-'F'
51-
if(stat$p.value <= 0.05){ sig_val<-'T'}
53+
sig_val <- ifelse(stat$p.value <= 0.001, "***",
54+
ifelse(stat$p.value <= 0.01, "**",
55+
ifelse(stat$p.value <= 0.05, "*", "")))
56+
5257
p_val<-format.pval(stat$p.value, digits = 3, eps=0.0001)
53-
list(feature = f, observed = classCount[f], expected = featureExpect, fc = fc, test = test, sig = sig_val, p_val = p_val)
58+
list(feature = f, observed = classCount[f], expected = featureExpect, Log2FC = Log2FC, test = test, sig = sig_val, p_val = p_val)
5459
}
5560
}
56-
57-
enriched<-lapply(levels(data$feature), fun)
61+
62+
enriched<-lapply(levels(snv_data$feature), fun)
5863
enriched<-do.call(rbind, enriched)
5964
featuresFC<-as.data.frame(enriched)
6065
# Sort by FC value
61-
featuresFC<-arrange(featuresFC,desc(as.integer(fc)))
62-
return(featuresFC)
66+
featuresFC<-dplyr::arrange(featuresFC,desc(abs(as.numeric(Log2FC))))
67+
featuresFC$Log2FC<-round(as.numeric(featuresFC$Log2FC), 1)
68+
69+
if(write){
70+
featuresFC <- filter(featuresFC, observed >= 5)
71+
first.step <- lapply(featuresFC, unlist)
72+
second.step <- as.data.frame(first.step, stringsAsFactors = F)
73+
ggpubr::ggtexttable(second.step, rows = NULL, theme = ttheme("mGreen"))
74+
feat_enrichment_table <- paste("feature_enrichment_table.tiff")
75+
cat("Writing to file: ", 'plots/', feat_enrichment_table, sep = '')
76+
ggsave(paste("plots/", feat_enrichment_table, sep=""), width = 5.5, height = (nrow(featuresFC)/3), dpi=300)
77+
} else{
78+
return(featuresFC)
79+
}
80+
}
81+
82+
83+
featureEnrichmentPlot <- function(write=FALSE) {
84+
feature_enrichment<-featureEnrichment()
85+
86+
feature_enrichment$feature <- as.character(feature_enrichment$feature)
87+
feature_enrichment$Log2FC <- as.numeric(feature_enrichment$Log2FC)
88+
89+
feature_enrichment <- transform(feature_enrichment, feature = reorder(feature, -Log2FC))
90+
91+
feature_enrichment <- filter(feature_enrichment, observed >= 5)
92+
93+
# Custom sorting
94+
# feature_enrichment$feature <- factor(feature_enrichment$feature, levels=c("intron", "intergenic", "exon", "3UTR", "ncRNA", "5UTR"))
95+
96+
p<-ggplot(feature_enrichment)
97+
p<-p + geom_bar(aes(feature, Log2FC, fill = as.character(test)), stat="identity")
98+
p<-p + guides(fill=FALSE)
99+
p<-p + ylim(-2,2)
100+
p<-p + cleanTheme() +
101+
theme(panel.grid.major.y = element_line(color="grey80", size = 0.5, linetype = "dotted"),
102+
axis.text.x = element_text(angle = 45, hjust=1),
103+
axis.text = element_text(size=20)
104+
)
105+
106+
if(write){
107+
feat_plot <- paste("feat_plot.pdf")
108+
cat("Writing file", feat_plot, "\n")
109+
ggsave(paste("plots/", feat_plot, sep=""), width = 5, height = 10)
110+
}
111+
p
63112
}

R/featuresHit.R

+29-20
Original file line numberDiff line numberDiff line change
@@ -5,30 +5,39 @@
55
#' @keywords features
66
#' @export
77

8+
featuresHit <- function(..., write=FALSE){
9+
snv_data<-getData(...)
810

9-
featuresHit <- function(){
10-
data<-getData()
11-
1211
# To condense exon counts into "exon"
13-
data$feature<-as.factor(gsub("exon_.*", "exon", data$feature))
14-
12+
snv_data$feature<-as.factor(gsub("exon_.*", "exon", snv_data$feature))
13+
1514
# Reoders descending
16-
data$feature<-factor(data$feature, levels = names(sort(table(data$feature), decreasing = TRUE)))
17-
18-
#cols<-set_cols(data, "feature")
19-
20-
p<-ggplot(data)
21-
p<-p + geom_bar(aes(feature, fill = feature))
15+
snv_data$feature<-factor(snv_data$feature, levels = names(sort(table(snv_data$feature), decreasing = TRUE)))
16+
17+
snv_data <- snv_data %>%
18+
dplyr::group_by(feature) %>%
19+
dplyr::add_tally() %>%
20+
ungroup() %>%
21+
dplyr::filter(n >= 5) %>%
22+
droplevels()
23+
#cols<-setCols(snv_data, "feature")
24+
25+
p <- ggplot(snv_data)
26+
p <- p + geom_bar(aes(feature, fill = feature))
2227
#p<-p + cols
23-
p<-p + cleanTheme() +
28+
p <- p + cleanTheme() +
2429
theme(axis.title.x=element_blank(),
2530
panel.grid.major.y = element_line(color="grey80", size = 0.5, linetype = "dotted"))
26-
p<-p + scale_x_discrete(expand = c(0.01, 0.01))
27-
p<-p + scale_y_continuous(expand = c(0.01, 0.01))
28-
29-
features_outfile<-paste("hit_features_count.pdf")
30-
cat("Writing file", features_outfile, "\n")
31-
ggsave(paste("plots/", features_outfile, sep=""), width = 20, height = 10)
32-
31+
p <- p + scale_x_discrete(expand = c(0.01, 0.01))
32+
p <- p + scale_y_continuous(expand = c(0.01, 0.01))
33+
34+
# colour to a pub palette:
35+
# p<-p + ggpar(p, palette = 'jco')
36+
37+
if(write){
38+
features_outfile<-paste("hit_features_count.pdf")
39+
cat("Writing file", features_outfile, "\n")
40+
ggsave(paste("plots/", features_outfile, sep=""), width = 20, height = 10)
41+
}
3342
p
34-
}
43+
}

0 commit comments

Comments
 (0)