Skip to content

Commit

Permalink
Added more functions
Browse files Browse the repository at this point in the history
  • Loading branch information
salsalsal97 committed Oct 22, 2024
1 parent 3560809 commit 4e9c315
Show file tree
Hide file tree
Showing 19 changed files with 579 additions and 74 deletions.
8 changes: 7 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Generated by roxygen2: do not edit by hand

export(sc_cell_type_de)
export(DGE_analysis)
export(power_analysis)
export(within_data_correlation)
importFrom(EnsDb.Hsapiens.v79,EnsDb.Hsapiens.v79)
importFrom(Matrix,rowSums)
importFrom(SingleCellExperiment,colData)
Expand Down Expand Up @@ -36,7 +38,11 @@ importFrom(ggplot2,geom_point)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,ggsave)
importFrom(ggplot2,ggtitle)
importFrom(ggplot2,guide_legend)
importFrom(ggplot2,guides)
importFrom(ggplot2,labs)
importFrom(ggplot2,scale_alpha)
importFrom(ggplot2,scale_colour_brewer)
importFrom(ggplot2,scale_colour_manual)
importFrom(ggplot2,scale_fill_gradient2)
importFrom(ggplot2,scale_shape_manual)
Expand Down
14 changes: 7 additions & 7 deletions R/sc_cell_type_de.R → R/DGE_analysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,11 @@
#' .N,by=.(celltype,chromosome_name)]
#'}

sc_cell_type_de <- function(SCE, design, pseudobulk_ID, celltype_ID, y=NULL,
region="single_region", coef=NULL, control=NULL,
pval_adjust_method = "BH", adj_pval=0.05,
folder="sc.cell.type.de.graphs/",
rmv_zero_count_genes=TRUE, verbose=F){
DGE_analysis <- function(SCE, design, pseudobulk_ID, celltype_ID, y=NULL,
region="single_region", coef=NULL, control=NULL,
pval_adjust_method = "BH", adj_pval=0.05,
folder="sc.cell.type.de.graphs/",
rmv_zero_count_genes=TRUE, verbose=F){

# need to load SCE if a directory is passed
if(class(SCE)[1]=="character"){
Expand Down Expand Up @@ -135,8 +135,8 @@ sc_cell_type_de <- function(SCE, design, pseudobulk_ID, celltype_ID, y=NULL,

# run edgeR LRT DE analysis
celltype_de <-
de_analysis(pb_dat,formula,y_name=y,y_contin,coef,control,
pval_adjust_method, adj_pval, verbose)
differential_expression(pb_dat,formula,y_name=y,y_contin,coef,control,
pval_adjust_method, adj_pval, verbose)
# get sig DEGs for each
celltype_DEGs <- lapply(celltype_de, function(x) x[x$adj_pval<adj_pval,])

Expand Down
236 changes: 236 additions & 0 deletions R/correlation_boxplots.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
# Define global variables
utils::globalVariables(c("alpha"))

#' Obtain box plots for the correlations of all celltypes, and the mean correlations at a specified cutoff p-value

#' @importFrom ggplot2 geom_boxplot ggplot labs facet_wrap theme aes element_text element_blank scale_colour_brewer scale_alpha guides guide_legend
#' @importFrom cowplot theme_cowplot

#' @param corrMats (named) list of correlation matrices for each celltype with the final element being the mean correlation matrix, all at specified p-value
#' @param numRealDatasets total number of *real* datasets (most likely the number of studies, but sometimes a study may be split e.g. into 2 brain regions, so in this case it would be the number of studies plus 1)
#' @param pval the cut-off p-value which was used to select DEGs
#' @param alphaval (alpha) transparency of the non-mean boxplots
#' @param numPerms number of random permutations of the dataset used to select significant DEGs from
#' @param numSubsets number of pairs of random subsets of the dataset used to select significant DEGs from
#' @param sexDEGs true if DEGs come from sex chromosomes, else false
#' @param fontsize_yaxislabels font size for axis labels in plot
#' @param fontsize_yaxisticks font size for axis tick labels in plot
#' @param fontsize_title font size for plot title
#' @param fontsize_legendlabels font size for legend labels in plot
#' @param fontsize_legendtitle font size for legend title in plot
#' @param fontsize_facet_labels font size for facet labels

#' @return box plots for correlation matrices at a certain p-value cut-off, sorted by celltype and then type of correlation

correlation_boxplots <- function(corrMats,
numRealDatasets,
pval,
alphaval=0.25,
numPerms=5,
numSubsets=5,
sexDEGs=FALSE,
fontsize_yaxislabels=12,
fontsize_yaxisticks=9,
fontsize_title=14,
fontsize_legendlabels=9,
fontsize_legendtitle=9,
fontsize_facet_labels=9){

# check input parameters are fine
if(class(corrMats)!="list"){
stop("Error: corrMats should be a list of matrices")
}
if(floor(numRealDatasets)!=numRealDatasets){
stop("Error: numRealDatasets should be an integer specifying the number of real datasets (see description)")
}
if(!is.numeric(pval) | pval <= 0 | pval > 1){
stop("Error: pval should be a (positive) number between 0 and 1")
}
if(floor(numPerms)!=numPerms){
stop("Error: numPerms should be an integer specifying the number of random permutations of Tsai (see description)")
}
if(floor(numSubsets)!=numSubsets){
stop("Error: numSubsets should be an integer specifying the number of subsets of Tsai (see description)")
}
if(class(sexDEGs)!="logical"){
stop("Error: sexDEGs should be TRUE (if DEGs are chosen only from sex chromosomes) or FALSE")
}
if(class(fontsize_yaxislabels)!="numeric"){
stop("Error: fontsize_yaxislabels should be numerical.")
}else{
if(fontsize_yaxislabels-floor(fontsize_yaxislabels)!=0|fontsize_yaxislabels<0){
stop("Error: fontsize_yaxislabels should be a positive integer.")
}
}
if(class(fontsize_yaxisticks)!="numeric"){
stop("Error: fontsize_yaxisticks should be numerical.")
}else{
if(fontsize_yaxisticks-floor(fontsize_yaxisticks)!=0|fontsize_yaxisticks<0){
stop("Error: fontsize_yaxisticks should be a positive integer.")
}
}
if(class(fontsize_title)!="numeric"){
stop("Error: fontsize_title should be numerical.")
}else{
if(fontsize_title-floor(fontsize_title)!=0|fontsize_title<0){
stop("Error: fontsize_title should be a positive integer.")
}
}
if(class(fontsize_legendlabels)!="numeric"){
stop("Error: fontsize_legendlabels should be numerical.")
}else{
if(fontsize_legendlabels-floor(fontsize_legendlabels)!=0|fontsize_legendlabels<0){
stop("Error: fontsize_legendlabels should be a positive integer.")
}
}
if(class(fontsize_legendtitle)!="numeric"){
stop("Error: fontsize_legendtitle should be numerical.")
}else{
if(fontsize_legendtitle-floor(fontsize_legendtitle)!=0|fontsize_legendtitle<0){
stop("Error: fontsize_legendtitle should be a positive integer.")
}
}
if(class(fontsize_facet_labels)!="numeric"){
stop("Error: fontsize_facet_labels should be numerical.")
}else{
if(fontsize_facet_labels-floor(fontsize_facet_labels)!=0|fontsize_facet_labels<0){
stop("Error: fontsize_facet_labels should be a positive integer.")
}
}

# midCor submatrix limits
midCorLim <- numPerms + numRealDatasets
# list to hold results
corrOuts <- c()
# index
j <- 1

# get lists with all correlations
for(corrMat in corrMats){
# specify submatrices with upper/middle/lower bounds
lower <- corrMat[1:numPerms,1:numPerms]
middle <- corrMat[(numPerms+1):midCorLim,(numPerms+1):midCorLim]
upper <- corrMat[(midCorLim+1):(midCorLim+numSubsets),(midCorLim+1):(midCorLim+numSubsets)]
# convert each one to a list and remove "1" (selfcorrelation)
lower <- unique(unlist(as.list(lower)))
lower <- lower[-c(1)]
middle <- unique(unlist(as.list(middle)))
middle <- middle[-c(1)]
upper <- unique(unlist(as.list(upper)))
upper <- upper[-c(1)]
# store in list
corrOuts[[j]] <- list(lower,middle,upper)
names(corrOuts)[[j]] <- names(corrMats)[[j]]
# increment
j <- j+1
}

# store in dataframe
i <- 1
# empty dataframe
df <- data.frame()
# fill dataframe
for(out in corrOuts){
# define variables
var1 <- replicate(length(out[[1]])+length(out[[2]])+length(out[[3]]), names(corrOuts)[[i]]) #celltype
var2 <- c(replicate(length(out[[1]]),"Random Permutations"),replicate(length(out[[2]]),"Between Study"),replicate(length(out[[3]]),"Within-study subsamples"))
val <- unlist(out)
# put in dataframe
df_new <- data.frame(var1)
df_new$var2 <- var2
df_new$val <- val
# join
df <- rbind(df,df_new)
i <- i+1
}

df$alpha <- replicate(dim(df)[[1]],alphaval)
df$alpha <- ifelse(df$var1=="Mean", 1, ifelse(df$var1!="Mean",alphaval,alphaval))
unique_alphas <- df[!duplicated(df[,c("var1")]),]$alpha

# box plot
if(pval == 1){
if(sexDEGs == FALSE){
fig.plot <- ggplot(df,
aes(x=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),y=val))+
geom_boxplot(outlier.shape=NA,aes(fill=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),alpha=alpha),width=5)+
theme_cowplot()+
scale_colour_brewer(palette = "Set1")+
labs(y="Correlation", x="Type", fill="Cell Type",title=paste0("Using all DEGs (from all chromosomes)"))+
facet_wrap(factor(df$var2,levels=c("Random Permutations","Between Study","Within-study subsamples")), scales="fixed")+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y = element_text(size = fontsize_yaxislabels),
axis.text.y = element_text(size = fontsize_yaxisticks),
plot.title = element_text(size = fontsize_title),
legend.text = element_text(size = fontsize_legendlabels),
legend.title = element_text(size = fontsize_legendtitle),
strip.text = element_text(size = fontsize_facet_labels))+
scale_alpha(guide = 'none')+
guides(fill=guide_legend(override.aes = list(alpha=unique_alphas)))
}else{
fig.plot <- ggplot(df,
aes(x=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),y=val))+
geom_boxplot(outlier.shape=NA,aes(fill=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),alpha=alpha),width=5)+
theme_cowplot()+
scale_colour_brewer(palette = "Set1")+
labs(y="Correlation", x="Type", fill="Cell Type",title=paste0("Using all DEGs (from sex chromosomes)"))+
facet_wrap(factor(df$var2,levels=c("Random Permutations","Between Study","Within-study subsamples")), scales="fixed")+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y = element_text(size = fontsize_yaxislabels),
axis.text.y = element_text(size = fontsize_yaxisticks),
plot.title = element_text(size = fontsize_title),
legend.text = element_text(size = fontsize_legendlabels),
legend.title = element_text(size = fontsize_legendtitle),
strip.text = element_text(size = fontsize_facet_labels))+
scale_alpha(guide = 'none')+
guides(fill=guide_legend(override.aes = list(alpha=unique_alphas)))
}
}else{
if(sexDEGs == FALSE){
fig.plot <- ggplot(df,
aes(x=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),y=val))+
geom_boxplot(outlier.shape=NA,aes(fill=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),alpha=alpha),width=5)+
theme_cowplot()+
scale_colour_brewer(palette = "Set1")+
labs(y="Correlation", x="Type", fill="Cell Type",title=paste0("DEGs selected at a ",pval*100,"% cut-off (from all chromosomes)"))+
facet_wrap(factor(df$var2,levels=c("Random Permutations","Between Study","Within-study subsamples")), scales="fixed")+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y = element_text(size = fontsize_yaxislabels),
axis.text.y = element_text(size = fontsize_yaxisticks),
plot.title = element_text(size = fontsize_title),
legend.text = element_text(size = fontsize_legendlabels),
legend.title = element_text(size = fontsize_legendtitle),
strip.text = element_text(size = fontsize_facet_labels))+
scale_alpha(guide = 'none')+
guides(fill=guide_legend(override.aes = list(alpha=unique_alphas)))
}else{
fig.plot <- ggplot(df,
aes(x=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),y=val))+
geom_boxplot(outlier.shape=NA,aes(fill=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),alpha=alpha),width=5)+
theme_cowplot()+
scale_colour_brewer(palette = "Set1")+
labs(y="Correlation", x="Type", fill="Cell Type",title=paste0("DEGs selected at a ",pval*100,"% cut-off (from sex chromosomes)"))+
facet_wrap(factor(df$var2,levels=c("Random Permutations","Between Study","Within-study subsamples")), scales="fixed")+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y = element_text(size = fontsize_yaxislabels),
axis.text.y = element_text(size = fontsize_yaxisticks),
plot.title = element_text(size = fontsize_title),
legend.text = element_text(size = fontsize_legendlabels),
legend.title = element_text(size = fontsize_legendtitle),
strip.text = element_text(size = fontsize_facet_labels))+
scale_alpha(guide = 'none')+
guides(fill=guide_legend(override.aes = list(alpha=unique_alphas)))
}
}

return(fig.plot)

}
4 changes: 2 additions & 2 deletions R/de_analysis.R → R/differential_expression.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
#' @return a list containing differential expression data (a dataframe) for each cell type. The dataframe contains:
#' log fold change (logFC), log counts per million (logCPM), log ratio (LR), p-value (PValue), adjusted p-value (adj_pval)

de_analysis <- function(pb_dat,formula,y_name,y_contin,coef, control,
pval_adjust_method, adj_pval,verbose){
differential_expression <- function(pb_dat,formula,y_name,y_contin,coef, control,
pval_adjust_method, adj_pval,verbose){
# use Likelihood ratio test from edgeR, found to have best perf in a recent benchmark: https://www.biorxiv.org/content/10.1101/2021.03.12.435024v1.full
# other option for edgeR is quasi-likelihood F-tests
# run each cell type through
Expand Down
6 changes: 3 additions & 3 deletions R/downsampling_DEanalysis.r
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ downsampling_DEanalysis <- function(data,
# check if DE analysis output present already in output_path
if(!"DEout.RData" %in% list.files(output_path)){
# run and save DE analysis
assign("DEout", sc_cell_type_de(data, design=design, pseudobulk_ID=sampleID, celltype_ID=celltypeID, y=y, region=region, control=control, pval_adjust_method=pval_adjust_method, rmv_zero_count_genes=rmv_zero_count_genes, verbose=T, coef=coeff))
assign("DEout", DGE_analysis(data, design=design, pseudobulk_ID=sampleID, celltype_ID=celltypeID, y=y, region=region, control=control, pval_adjust_method=pval_adjust_method, rmv_zero_count_genes=rmv_zero_count_genes, verbose=T, coef=coeff))
save(DEout,file=paste0(output_path,"/DEout.RData"))
}else{
load(paste0(output_path,"/DEout.RData"))
Expand Down Expand Up @@ -115,7 +115,7 @@ downsampling_DEanalysis <- function(data,
dir.create(path,showWarnings=FALSE)
setwd(path)
# ensure sexID isnt "Sex", has to be lower case (change this in SCE if needed)
assign(paste0("DEout_",toString(value)), sc_cell_type_de(samples[[j]], design=design, pseudobulk_ID=sampleID, celltype_ID=celltypeID, y=y, region=region, control=control, pval_adjust_method=pval_adjust_method, rmv_zero_count_genes=rmv_zero_count_genes, verbose=T, coef=coeff))
assign(paste0("DEout_",toString(value)), DGE_analysis(samples[[j]], design=design, pseudobulk_ID=sampleID, celltype_ID=celltypeID, y=y, region=region, control=control, pval_adjust_method=pval_adjust_method, rmv_zero_count_genes=rmv_zero_count_genes, verbose=T, coef=coeff))
# save output
save(list=eval(paste0("DEout_",toString(value))),file=paste0("DEout",toString(value),"_",j,".RData"))
# get number of TP DEGs
Expand Down Expand Up @@ -170,7 +170,7 @@ downsampling_DEanalysis <- function(data,
dir.create(path,showWarnings=FALSE)
setwd(path)
# ensure sexID isnt "Sex", has to be lower case (change this in SCE if needed)
assign(paste0("DEout_",toString(value)), sc_cell_type_de(cells[[j]], design=design, pseudobulk_ID=sampleID, celltype_ID=celltypeID, y=y, region=region, control=control, pval_adjust_method=pval_adjust_method, rmv_zero_count_genes=rmv_zero_count_genes, verbose=T, coef=coeff))
assign(paste0("DEout_",toString(value)), DGE_analysis(cells[[j]], design=design, pseudobulk_ID=sampleID, celltype_ID=celltypeID, y=y, region=region, control=control, pval_adjust_method=pval_adjust_method, rmv_zero_count_genes=rmv_zero_count_genes, verbose=T, coef=coeff))
# save output
save(list=eval(paste0("DEout_",toString(value))),file=paste0("DEout",toString(value),"_",j,".RData"))
# get number of TP DEGs
Expand Down
2 changes: 1 addition & 1 deletion R/downsampling_corrplots.r
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ downsampling_corrplots <- function(data,
# check if DE analysis output present already in output_path
if(!"DEout.RData" %in% list.files(output_path)){
# run and save DE analysis
assign("DEout", sc_cell_type_de(data, design=design, pseudobulk_ID=sampleID, celltype_ID=celltypeID, y=y, region=region, control=control, pval_adjust_method=pval_adjust_method, rmv_zero_count_genes=rmv_zero_count_genes, verbose=T, coef=coeff))
assign("DEout", DGE_analysis(data, design=design, pseudobulk_ID=sampleID, celltype_ID=celltypeID, y=y, region=region, control=control, pval_adjust_method=pval_adjust_method, rmv_zero_count_genes=rmv_zero_count_genes, verbose=T, coef=coeff))
save(DEout,file=paste0(output_path,"/DEout.RData"))
}else{
load(paste0(output_path,"/DEout.RData"))
Expand Down
2 changes: 1 addition & 1 deletion R/plot_de_analysis.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Define global variables
utils::globalVariables(c("deg_direction",".I","adj_pval","celltype","i.deg_direction","phenotype","gene_name","x.hgnc_symbol",".N","N","num_cells","prop","N_prop","colour_ident","."))

#' Create differential expression analysis plots. Run by sc_cell_type_de()
#' Create differential expression analysis plots. Run by DGE_analysis()

#' @importFrom EnsDb.Hsapiens.v79 EnsDb.Hsapiens.v79
#' @importFrom data.table rbindlist setnames as.data.table setkey data.table setorder :=
Expand Down
Loading

0 comments on commit 4e9c315

Please sign in to comment.