-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathmiso_modeling.R
89 lines (80 loc) · 3.75 KB
/
miso_modeling.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
library(stats)
# read in attributes
args = commandArgs(trailingOnly = T)
path = args[1]
D_dist = args[2]
# list of half_lives
half_lives <- c(seq(0.2,0.9,by=0.1),seq(1,10,by=0.75),seq(11,100,by=2))
expression_levels <- seq(1,50,5)
# list of introns
introns = c(seq(0.04,0.09,by=0.02), seq(0.1,1,by=0.1), seq(1,50,by=2))
uplength = 500
downlength = 300
intronnames <- c()
for(intron in introns){
#print(intron)
chr = paste0("intron",(intron*1000))
genelength = uplength + downlength + (intron*1000)
downexon_start = uplength + (intron*1000)
# gene row
intronnames <- c(intronnames,
paste0("chr",chr,":1:",uplength,":+@chr",chr,":",downexon_start,":",genelength,":+"))
}
getLM_throughzero_tau_lowadj <- function(input){
vals <- as.numeric(input[1:3])
distance <- as.numeric(input[4])*1000
# assume txn rate is 1.5kb/minute
tau = distance/1500
# get corrected time
times <- c(5,10,20)
times_with <- (times+tau)/2
lm.row = c(NA,NA,NA,NA)
try(lm.hold <- lm(log(vals)~0+times_with))
try(lm.row <- c(as.numeric(lm.hold$coefficients[1]),summary(lm.hold)$coefficients[1,4],
summary(lm.hold)$adj.r.squared, "fit"))
# correct for low half-life; if second effective timepoint should have PSI < 0.1
k <- -log(vals[1])/times_with[1]
meansecond <- exp(-k*times_with[2])
try(if(meansecond <= 0.1){ lm.row <- c(-k, NA, NA, "corrected") })
return(lm.row)
}
get_halflife <- function(x){
#x = [constant]
#half.val <- 1-x[3]
#return((x[2] + log(half.val))/x[1])
return(log(2)/(-x))
}
## go through all half-lives for given D_dist
psi.data.all <- c()
for(exp in expression_levels){
for(halflife in half_lives){
print(paste0(exp," - ",halflife))
# name files
min5.name <- paste0(path,'summary/m5/m5_D',D_dist,'_X',exp,'_H',halflife,'_reads_sorted_introns.psi.miso_summary')
min10.name <- paste0(path,'summary/m10/m10_D',D_dist,'_X',exp,'_H',halflife,'_reads_sorted_introns.psi.miso_summary')
min20.name <- paste0(path,'summary/m20/m20_D',D_dist,'_X',exp,'_H',halflife,'_reads_sorted_introns.psi.miso_summary')
# read in files
if(file.exists(min5.name) & file.exists(min10.name) & file.exists(min20.name)){
min5 <- read.table(min5.name,header=T,sep="\t")
min10 <- read.table(min10.name,header=T,sep="\t")
min20 <- read.table(min20.name,header=T,sep="\t")
# make dataframe of data
psi.data <- data.frame(intron = intronnames,
intron_len = introns,
D_dist = D_dist,
expression = exp,
sim_half_life = halflife,
psi5 = min5$miso_posterior_mean[match(intronnames, min5$event_name)],
psi10 = min10$miso_posterior_mean[match(intronnames, min10$event_name)],
psi20 = min20$miso_posterior_mean[match(intronnames, min20$event_name)])
# get rates
psi.data$coef <- t(apply(psi.data[,c(6:8,3)], 1, getLM_throughzero_tau_lowadj))[,1]
psi.data$halflife <- sapply(as.numeric(psi.data$coef), get_halflife)
# add to overall dataframe
psi.data.all <- rbind(psi.data.all, psi.data)
}
else { print("... no files!") }
}
}
psi.file <- paste0(path,'psiSIM_D',D_dist,'.txt')
write.table(psi.data.all, file=psi.file, quote=F, sep="\t",row.names=F,col.names=T)