Multiple changes, adding bulk functions

neurogenomics · Jan 26, 2025 · 5e0a163 · 5e0a163
1 parent 1acd66c
commit 5e0a163
Show file tree

Hide file tree

Showing 31 changed files with 972 additions and 164 deletions.
diff --git a/.Rhistory b/.Rhistory
@@ -1,73 +1,3 @@
-cells_per_control = 100,
-ncells_variation_type = "Poisson",
-pval = 0.05,
-foldchange = 2)
-error_hierarchicell(data_summ,
-method = "Pseudobulk_mean",
-n_genes = 1000,
-n_per_group = 10,
-cells_per_case = 100,
-cells_per_control = 100,
-ncells_variation_type = "Poisson",
-pval = 0.05,
-foldchange = 3)
-error_hierarchicell(data_summ,
-method = "Pseudobulk_sum",
-n_genes = 1000,
-n_per_group = 10,
-cells_per_case = 50,
-cells_per_control = 50,
-ncells_variation_type = "Poisson",
-pval = 0.05,
-foldchange = 2)
-error_hierarchicell(data_summ,
-method = "Pseudobulk_sum",
-n_genes = 1000,
-n_per_group = 10,
-cells_per_case = 50,
-cells_per_control = 50,
-ncells_variation_type = "NegBin",
-pval = 0.05,
-foldchange = 2)
-?error_hierarchicell
-data_summ
-error_hierarchicell(data_summ,
-method = "Pseudobulk_sum",
-n_genes = 1000,
-n_per_group = 10,
-cells_per_case = 50,
-cells_per_control = 50,
-ncells_variation_type = "NB",
-pval = 0.05,
-foldchange = 2)
-error_hierarchicell(data_summ,
-method = "Pseudobulk_sum",
-n_genes = 1000,
-n_per_group = 10,
-cells_per_case = 500,
-cells_per_control = 500,
-ncells_variation_type = "NB",
-pval = 0.05,
-foldchange = 2)
-error_hierarchicell(data_summ,
-method = "Pseudobulk_sum",
-n_genes = 10000,
-n_per_group = 10,
-cells_per_case = 50,
-cells_per_control = 50,
-ncells_variation_type = "Poisson",
-pval = 0.05,
-foldchange = 2)
-error_hierarchicell(data_summ,
-method = "Pseudobulk_sum",
-n_genes = 1000,
-n_per_group = 10,
-cells_per_case = 50,
-cells_per_control = 50,
-ncells_variation_type = "Poisson",
-pval = 0.05,
-foldchange = 2)
-devtools::install_github("neurogenomics/hierarchicell")
 library(rlang)
 devtools::install_github("neurogenomics/hierarchicell")
 install.packages("rlang")
@@ -470,43 +400,113 @@ test <- c(1,2,3)
 for(j in test){
 print(j)
 }
+mean_cl_normal()
+library(ggplot2)
+mean_cl_normal()
+?mean_cl_normal
 getwd()
-setwd("C:\Users\salma\Power_Analysis_package")
-setwd("C:/sers/salma/Power_Analysis_package")
 setwd("C:/Users/salma/Power_Analysis_package")
-### TESTING PACKAGE
-## TESTING ACTUAL PACKAGE
 devtools::document()
-#usethis::use_vignette("poweranalysis")
 devtools::load_all()
 devtools::install()
 devtools::check()
 devtools::build()
-## TESTING FUNCTION WORKS
+?rep
+?list.dirs
+?paste0
+?is.na
+is.na()
+is.na([1,2])
+is.na(c(1,2;1))
+is.na(c(1,2))
+~length
+?length
+?nrow
+?rownames
+?round
+?strsplit
+?sapply
+?str_split
+?subn
+?sub
+?toupper
+?substring
+?tail
+?as.name
+?brewer.pal
+?colorRampPalette
+?geom_boxplot
+?factor
+?position_dodge2
+?scale_y_continuous
+?guides
+?guide_legend
+?scale_fill_manual
+?scale_alpha
+None
+NA
+f
+class(c(1,2,3))
+class(c(1,2,3)) == "character"
+c(1,2,3) != "test"
+if(c(1,2,3) != "test"){}
+if(c(1,2,3) != "test"){
+print("ok")
+}
+if(c(1,2,3)=="test"){
+print("OK")
+}
+len(c(1,2,3))
+length(c(1,2,3))
+"test"*2
+rep("test")
+rep("test",3)
+rep("test",length(c(1,2,3)))
+c(1,2,3) != "test"
+c(1,2,3) != "placeholder"
+a=c(1,2,3)
+if(a!="test"){
+print(*"OK")
+}
+if(a!="test"){
+print("OK")
+}
+if(a=="test"){
+print("ok")
+}
+c(1,2,3)=="placeholder"
+if(c(1,2,3)=="placeholder"){
+print(test)
+}
+identical("a","a")
+identical(c(1,2),"a")
+!identical("a","a")
+!identical(c(1,2),"a")
+celltype_names <- c("Astro","Micro")
+celltype_names[0]
+celltype_names[1]
+"Microglia"%in%"aseto"
+"Microglia"%in%"test (Microglia)"
+"allen"%in%"allen (astr0)"
+"allen"%in%"allen_atreuy"
+c("allen")%in%"allen_astro"
+"allen_VIP"%in%c("allen")
+getwd()
+devtools::document()
+rlang::last_trace()
+devtools::document()
 devtools::document()
-#usethis::use_vignette("poweranalysis")
 devtools::load_all()
-devtools::install()
-devtools::check()
-devtools::build()
-### TESTING PACKAGE
-## TESTING ACTUAL PACKAGE
+?tail
+devtools::document()
+gerwd()
+getwd()
 devtools::document()
-#usethis::use_vignette("poweranalysis")
 devtools::load_all()
 devtools::install()
 devtools::check()
-devtools::build()
-getwd()
-devtools::check()
 devtools::document()
 #usethis::use_vignette("poweranalysis")
 devtools::load_all()
 devtools::install()
 devtools::check()
-mean_cl_normal
-?mean_cl_normal
-mean_cl_normal()
-library(Hmisc)
-mean_cl_normal()
-?mean_cl_normal
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -40,7 +40,9 @@ Imports:
     stringr,
     gridExtra,
     ggcorrplot,
-    infotheo
+    infotheo,
+    RColorBrewer,
+    grDevices
 Suggests: 
     knitr,
     rmarkdown,

diff --git a/NAMESPACE b/NAMESPACE
@@ -6,6 +6,7 @@ export(power_analysis)
 export(within_data_correlation)
 importFrom(EnsDb.Hsapiens.v79,EnsDb.Hsapiens.v79)
 importFrom(Matrix,rowSums)
+importFrom(RColorBrewer,brewer.pal)
 importFrom(SingleCellExperiment,colData)
 importFrom(SingleCellExperiment,counts)
 importFrom(biomaRt,getBM)
@@ -44,15 +45,19 @@ importFrom(ggplot2,guide_legend)
 importFrom(ggplot2,guides)
 importFrom(ggplot2,labs)
 importFrom(ggplot2,mean_cl_normal)
+importFrom(ggplot2,position_dodge2)
 importFrom(ggplot2,scale_alpha)
 importFrom(ggplot2,scale_colour_brewer)
 importFrom(ggplot2,scale_colour_manual)
 importFrom(ggplot2,scale_fill_gradient2)
+importFrom(ggplot2,scale_fill_manual)
 importFrom(ggplot2,scale_shape_manual)
+importFrom(ggplot2,scale_y_continuous)
 importFrom(ggplot2,stat_summary)
 importFrom(ggplot2,theme)
 importFrom(ggplot2,unit)
 importFrom(ggrepel,geom_text_repel)
+importFrom(grDevices,colorRampPalette)
 importFrom(grid,unit)
 importFrom(gridExtra,arrangeGrob)
 importFrom(gtools,mixedsort)
@@ -68,7 +73,9 @@ importFrom(stats,model.matrix)
 importFrom(stats,p.adjust)
 importFrom(stats,quantile)
 importFrom(stats,reshape)
+importFrom(stringr,str_split)
 importFrom(stringr,str_sub)
 importFrom(utils,globalVariables)
+importFrom(utils,tail)
 importFrom(viridis,scale_colour_viridis)
 importFrom(viridis,scale_fill_viridis)
diff --git a/R/compute_downsampled_corr.r b/R/compute_downsampled_corr.r
@@ -1,7 +1,7 @@
 # Define global variables
 utils::globalVariables(c(".","dataset"))
 
-#' For a given down-sampled DE output, computes the correlation of the log-foldchange of the DEGs (at specified p-value) for a given dataset (celltype)
+#' For a given down-sampled DGE analysis output, computes the correlation of the log-foldchange of the DEGs (at specified p-value) for a given dataset (celltype)
 
 #' @importFrom ggcorrplot ggcorrplot
 #' @importFrom data.table rbindlist setkey

diff --git a/R/correlation_analysis.r b/R/correlation_analysis.r
@@ -10,7 +10,7 @@
 #' @param numSubsets number of pairs of random subsets of the dataset used to select significant DEGs from
 #' @param output_path base path in which outputs will be stored
 
-#' Saves all plots and DE outputs in the appropriate directories
+#' Saves all plots and DGE analysis outputs in the appropriate directories
 #' @export
 
 correlation_analysis <- function(dataset_name="placeholder",

diff --git a/R/downsampling_DEanalysis.r b/R/downsampling_DEanalysis.r
@@ -6,7 +6,7 @@ utils::globalVariables(c("PValue","name"))
 #' @importFrom stats as.formula
 
 #' @param data the input data (should be an SCE object)
-#' @param range_downsampled range of values to be downsampled for, in ascending order
+#' @param range_downsampled vector or list containing values which the data will be downsampled at, in ascending order
 #' @param output_path base path in which outputs will be stored
 #' @param sampled downsampling carried out based on what (either "individuals" or "cells")
 #' @param sampleID sample ID
@@ -23,7 +23,7 @@ utils::globalVariables(c("PValue","name"))
 #' @param pval_adjust_method the adjustment method for the p-value in the differential expression analysis. Default is benjamini hochberg "BH". See  stats::p.adjust for available options
 #' @param rmv_zero_count_genes whether genes with no count values in any cell should be removed. Default is TRUE
 
-#' Saves all DE outputs for downsampled files as well as a summary table of results showing number of true DEGs detected at each number of samples/cells
+#' Saves all DGE analysis outputs for downsampled files as well as a summary table of results showing number of true DEGs detected at each number of samples/cells
 
 downsampling_DEanalysis <- function(data,
                                     range_downsampled="placeholder",
@@ -44,7 +44,7 @@ downsampling_DEanalysis <- function(data,
                                     rmv_zero_count_genes=TRUE){
 
     # alter range_downsampled
-    if(range_downsampled=="placeholder"){
+    if(identical(range_downsampled,"placeholder")){
         range_downsampled <- downsampling_range(data, sampled, sampleID)
     }
     # alter design

diff --git a/R/downsampling_corrplots.r b/R/downsampling_corrplots.r
@@ -6,9 +6,9 @@
 #' @importFrom ggcorrplot ggcorrplot
 
 #' @param data the input data (should be an SCE object)
-#' @param range_downsampled range of values to be downsampled for, in ascending order
+#' @param range_downsampled vector or list containing values which the data will be downsampled at, in ascending order
 #' @param output_path base path in which outputs will be stored
-#' @param inpath base path where downsampled DE output folders are stored (taken to be output_path if not provided)
+#' @param inpath base path where downsampled DGE analysis output folders are stored (taken to be output_path if not provided)
 #' @param sampled downsampling carried out based on what (either "individuals" or "cells")
 #' @param sampleID sample ID
 #' @param celltypeID cell type ID
@@ -38,7 +38,7 @@ downsampling_corrplots <- function(data,
                                    rmv_zero_count_genes=TRUE){
 
     # alter range_downsampled
-    if(range_downsampled=="placeholder"){
+    if(identical(range_downsampled,"placeholder")){
         range_downsampled <- downsampling_range(data, sampled, sampleID)
     }
     # alter inpath
@@ -106,7 +106,7 @@ downsampling_corrplots <- function(data,
                     # go into permutation
                     subpath <- paste0(newpath,"/",paste0(num_samples,"_",i))
                     setwd(subpath)
-                    # read DE output
+                    # read DGE analysis output
                     load(paste0("DEout",num_samples,"_",i,".RData"))
                     # get df for top 1000 genes
                     all_genes <- get(paste0("DEout_",num_samples))$celltype_all_genes[[celltype_name]]
@@ -153,7 +153,7 @@ downsampling_corrplots <- function(data,
                     # go into permutation
                     subpath <- paste0(newpath,"/",paste0(num_samples,"_",i))
                     setwd(subpath)
-                    # read DE output
+                    # read DGE analysis output
                     load(paste0("DEout",num_samples,"_",i,".RData"))
                     # get df for top 500 genes
                     all_genes <- get(paste0("DEout_",num_samples))$celltype_all_genes[[celltype_name]]
@@ -206,7 +206,7 @@ downsampling_corrplots <- function(data,
                     # go into permutation
                     subpath <- paste0(newpath,"/",paste0(num_cells,"_",i))
                     setwd(subpath)
-                    # read DE output
+                    # read DGE analysis output
                     load(paste0("DEout",num_cells,"_",i,".RData"))
                     # get df for top 1000 genes
                     all_genes <- get(paste0("DEout_",num_cells))$celltype_all_genes[[celltype_name]]
@@ -239,7 +239,7 @@ downsampling_corrplots <- function(data,
         ggsave("meanCorr_downsampling_cells_1000.pdf",meanCorr_downsampling_cells_1000.plot,width=25,height=20,units="cm",bg="white")
 
         ## correlation using down-sampled datasets (cells) - top 500 genes
-        # load in DE outputs for down-sampled datasets, take union of DEGs across perms
+        # load in DGE analysis outputs for down-sampled datasets, take union of DEGs across perms
         setwd(path)
         # get corr matrices for each permutation
         corrMats_cells_500 <- list()
@@ -256,7 +256,7 @@ downsampling_corrplots <- function(data,
                     # go into permutation
                     subpath <- paste0(newpath,"/",paste0(num_cells,"_",i))
                     setwd(subpath)
-                    # read DE output
+                    # read DGE analysis output
                     load(paste0("DEout",num_cells,"_",i,".RData"))
                     # get df for top 500 genes
                     all_genes <- get(paste0("DEout_",num_cells))$celltype_all_genes[[celltype_name]]

diff --git a/R/downsampling_range.r b/R/downsampling_range.r
@@ -7,7 +7,7 @@
 #' @param sampled downsampling carried out based on what (either "individuals" or "cells")
 #' @param sampleID sample ID
 
-#' @return a list of values to be downsampled at
+#' @return list containing values which the data will be downsampled at, in ascending order
 
 downsampling_range <- function(data,
                                sampled="individuals",