-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathnormalize_sim_rand.R
59 lines (46 loc) · 2.01 KB
/
normalize_sim_rand.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# quantile normalization for dyn data
library(preprocessCore)
library(Seurat)
library(dplyr)
design <- "5000.gene.4ct"
n.cells <- 4000
n.cells.per.type <- 1000
n.genes <- 5000
version.n <- 1
output_dyn_data <- function(design, version.n){
gen <- read.csv(paste0(design, ".dyn.v", version.n, ".S.clean.csv"), header = F)
rand <- read.csv(paste0(design, ".random.dyn.", version.n, ".txt"), header = F, row.names = 1)
a <- normalize.quantiles.use.target(as.matrix(gen),target = as.vector(as.matrix(rand)),copy=TRUE,subset=NULL)
rownames(a) <- rownames(a) - 1
colnames(a) <- colnames(rand)
gex <- rbind(a, rand)
write.table(gex, paste0(design, ".v", version.n, ".dyn.clean.csv"), quote = F, row.names = F, col.names = F, sep = ",")
}
output_dyn_data(design = design, version.n = 3)
output_dyn_data(design = "5000.gene.6ct", version.n = 1)
output_dyn_data(design = "5000.gene.6ct", version.n = 2)
output_dyn_data(design = "5000.gene.6ct", version.n = 3)
# ``````````````````
# below are codes for code testing
gen <- read.csv(paste0(design, ".dyn.v", version.n, ".S.clean.csv"), header = F)
rand <- read.csv(paste0(design, ".random.dyn.", version.n, ".txt"), header = F, row.names = 1)
a <- normalize.quantiles.use.target(as.matrix(gen),target = as.vector(as.matrix(rand)),copy=TRUE,subset=NULL)
rownames(a) <- rownames(a) - 1
colnames(a) <- colnames(rand)
gex <- rbind(a, rand)
# colnames(gex) <- paste0("V", 1:n.cells)
write.table(gex, paste0(design, ".v", version.n, ".dyn.clean.csv"), quote = F, row.names = F, col.names = F, sep = ",")
#
rownames(gex) <- paste0("gene", rownames(gex))
colnames(gex) <- paste0("cell_", colnames(gex))
sr <- CreateSeuratObject(gex)
sr$cell.type <- c(rep("cell.type.1", n.cells.per.type), rep("cell.type.2", n.cells.per.type),
rep("cell.type.3", n.cells.per.type), rep("cell.type.4", n.cells.per.type))
sr <- sr %>%
NormalizeData() %>%
FindVariableFeatures() %>%
ScaleData() %>%
RunPCA() %>%
FindNeighbors() %>%
RunUMAP(dims = 1:10)
DimPlot(sr, group.by = "cell.type")