-
Notifications
You must be signed in to change notification settings - Fork 39
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f506f26
commit e28b624
Showing
15 changed files
with
1,835 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
Version: 1.0 | ||
|
||
RestoreWorkspace: Default | ||
SaveWorkspace: Default | ||
AlwaysSaveHistory: Default | ||
|
||
EnableCodeIndexing: Yes | ||
UseSpacesForTab: Yes | ||
NumSpacesForTab: 2 | ||
Encoding: UTF-8 | ||
|
||
RnwWeave: Sweave | ||
LaTeX: pdfLaTeX |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
library(exactRankTests) | ||
library(nlme) | ||
library(ggplot2) | ||
|
||
ancom.W = function(otu_data,var_data, | ||
adjusted,repeated, | ||
main.var,adj.formula, | ||
repeat.var,long,rand.formula, | ||
multcorr,sig){ | ||
|
||
n_otu=dim(otu_data)[2]-1 | ||
|
||
otu_ids=colnames(otu_data)[-1] | ||
|
||
if(repeated==F){ | ||
data_comp=data.frame(merge(otu_data,var_data,by="Sample.ID",all.y=T),row.names=NULL, check.names = F) | ||
#data_comp=data.frame(merge(otu_data,var_data[,c("Sample.ID",main.var)],by="Sample.ID",all.y=T),row.names=NULL) | ||
}else if(repeated==T){ | ||
data_comp=data.frame(merge(otu_data,var_data,by="Sample.ID"),row.names=NULL, check.names = F) | ||
# data_comp=data.frame(merge(otu_data,var_data[,c("Sample.ID",main.var,repeat.var)],by="Sample.ID"),row.names=NULL) | ||
} | ||
|
||
base.formula = paste0("lr ~ ",main.var) | ||
if(repeated==T){ | ||
repeat.formula = paste0(base.formula," | ", repeat.var) | ||
} | ||
if(adjusted==T){ | ||
adjusted.formula = paste0(base.formula," + ", adj.formula) | ||
} | ||
|
||
if( adjusted == F & repeated == F ){ | ||
fformula <- formula(base.formula) | ||
} else if( adjusted == F & repeated == T & long == T ){ | ||
fformula <- formula(base.formula) | ||
}else if( adjusted == F & repeated == T & long == F ){ | ||
fformula <- formula(repeat.formula) | ||
}else if( adjusted == T & repeated == F ){ | ||
fformula <- formula(adjusted.formula) | ||
}else if( adjusted == T & repeated == T ){ | ||
fformula <- formula(adjusted.formula) | ||
}else{ | ||
stop("Problem with data. Dataset should contain OTU abundances, groups, | ||
and optionally an ID for repeated measures.") | ||
} | ||
|
||
|
||
|
||
if( repeated==FALSE & adjusted == FALSE){ | ||
if( length(unique(data_comp[,which(colnames(data_comp)==main.var)]))==2 ){ | ||
tfun <- exactRankTests::wilcox.exact | ||
} else{ | ||
tfun <- stats::kruskal.test | ||
} | ||
}else if( repeated==FALSE & adjusted == TRUE){ | ||
tfun <- stats::aov | ||
}else if( repeated== TRUE & adjusted == FALSE & long == FALSE){ | ||
tfun <- stats::friedman.test | ||
}else if( repeated== TRUE & adjusted == FALSE & long == TRUE){ | ||
tfun <- nlme::lme | ||
}else if( repeated== TRUE & adjusted == TRUE){ | ||
tfun <- nlme::lme | ||
} | ||
|
||
logratio.mat <- matrix(NA, nrow=n_otu, ncol=n_otu) | ||
for(ii in 1:(n_otu-1)){ | ||
for(jj in (ii+1):n_otu){ | ||
data.pair <- data_comp[,which(colnames(data_comp)%in%otu_ids[c(ii,jj)])] | ||
lr <- log((1+as.numeric(data.pair[,1]))/(1+as.numeric(data.pair[,2]))) | ||
|
||
lr_dat <- data.frame( lr=lr, data_comp,row.names=NULL, check.names = F ) | ||
|
||
if(adjusted==FALSE&repeated==FALSE){ ## Wilcox, Kruskal Wallis | ||
logratio.mat[ii,jj] <- tfun( formula=fformula, data = lr_dat)$p.value | ||
}else if(adjusted==FALSE&repeated==TRUE&long==FALSE){ ## Friedman's | ||
logratio.mat[ii,jj] <- tfun( formula=fformula, data = lr_dat)$p.value | ||
}else if(adjusted==TRUE&repeated==FALSE){ ## ANOVA | ||
model=tfun(formula=fformula, data = lr_dat,na.action=na.omit) | ||
picker=which(gsub(" ","",row.names(summary(model)[[1]]))==main.var) | ||
logratio.mat[ii,jj] <- summary(model)[[1]][["Pr(>F)"]][picker] | ||
}else if(repeated==TRUE&long==TRUE){ ## GEE | ||
model=tfun(fixed=fformula,data = lr_dat, | ||
random = formula(rand.formula), | ||
correlation=corAR1(), | ||
na.action=na.omit) | ||
picker=which(gsub(" ","",row.names(anova(model)))==main.var) | ||
logratio.mat[ii,jj] <- anova(model)[["p-value"]][picker] | ||
} | ||
|
||
} | ||
} | ||
|
||
ind <- lower.tri(logratio.mat) | ||
logratio.mat[ind] <- t(logratio.mat)[ind] | ||
|
||
|
||
logratio.mat[which(is.finite(logratio.mat)==FALSE)] <- 1 | ||
|
||
mc.pval <- t(apply(logratio.mat,1,function(x){ | ||
s <- p.adjust(x, method = "BH") | ||
return(s) | ||
})) | ||
|
||
a <- logratio.mat[upper.tri(logratio.mat,diag=FALSE)==TRUE] | ||
|
||
b <- matrix(0,ncol=n_otu,nrow=n_otu) | ||
b[upper.tri(b)==T] <- p.adjust(a, method = "BH") | ||
diag(b) <- NA | ||
ind.1 <- lower.tri(b) | ||
b[ind.1] <- t(b)[ind.1] | ||
|
||
######################################### | ||
### Code to extract surrogate p-value | ||
surr.pval <- apply(mc.pval,1,function(x){ | ||
s0=quantile(x[which(as.numeric(as.character(x))<sig)],0.95) | ||
# s0=max(x[which(as.numeric(as.character(x))<alpha)]) | ||
return(s0) | ||
}) | ||
######################################### | ||
### Conservative | ||
if(multcorr==1){ | ||
W <- apply(b,1,function(x){ | ||
subp <- length(which(x<sig)) | ||
}) | ||
### Moderate | ||
} else if(multcorr==2){ | ||
W <- apply(mc.pval,1,function(x){ | ||
subp <- length(which(x<sig)) | ||
}) | ||
### No correction | ||
} else if(multcorr==3){ | ||
W <- apply(logratio.mat,1,function(x){ | ||
subp <- length(which(x<sig)) | ||
}) | ||
} | ||
|
||
return(W) | ||
} | ||
|
||
|
||
|
||
ANCOM.main = function(OTUdat,Vardat, | ||
adjusted,repeated, | ||
main.var,adj.formula, | ||
repeat.var,longitudinal, | ||
random.formula, | ||
multcorr,sig, | ||
prev.cut){ | ||
|
||
p.zeroes=apply(OTUdat[,-1],2,function(x){ | ||
s=length(which(x==0))/length(x) | ||
}) | ||
|
||
zeroes.dist=data.frame(colnames(OTUdat)[-1],p.zeroes,row.names=NULL) | ||
colnames(zeroes.dist)=c("Taxon","Proportion_zero") | ||
|
||
zero.plot = ggplot(zeroes.dist, aes(x=Proportion_zero)) + | ||
geom_histogram(binwidth=0.1,colour="black",fill="white") + | ||
xlab("Proportion of zeroes") + ylab("Number of taxa") + | ||
theme_bw() | ||
|
||
#print(zero.plot) | ||
|
||
OTUdat.thinned=OTUdat | ||
OTUdat.thinned=OTUdat.thinned[,c(1,1+which(p.zeroes<prev.cut))] | ||
|
||
otu.names=colnames(OTUdat.thinned)[-1] | ||
|
||
W.detected <- ancom.W(OTUdat.thinned,Vardat, | ||
adjusted,repeated, | ||
main.var,adj.formula, | ||
repeat.var,longitudinal,random.formula, | ||
multcorr,sig) | ||
|
||
W_stat <- W.detected | ||
|
||
|
||
### Bubble plot | ||
|
||
W_frame = data.frame(otu.names,W_stat,row.names=NULL) | ||
W_frame = W_frame[order(-W_frame$W_stat),] | ||
|
||
W_frame$detected_0.9=rep(FALSE,dim(W_frame)[1]) | ||
W_frame$detected_0.8=rep(FALSE,dim(W_frame)[1]) | ||
W_frame$detected_0.7=rep(FALSE,dim(W_frame)[1]) | ||
W_frame$detected_0.6=rep(FALSE,dim(W_frame)[1]) | ||
|
||
W_frame$detected_0.9[which(W_frame$W_stat>0.9*dim(OTUdat.thinned[,-1])[2])]=TRUE | ||
W_frame$detected_0.8[which(W_frame$W_stat>0.8*dim(OTUdat.thinned[,-1])[2])]=TRUE | ||
W_frame$detected_0.7[which(W_frame$W_stat>0.7*dim(OTUdat.thinned[,-1])[2])]=TRUE | ||
W_frame$detected_0.6[which(W_frame$W_stat>0.6*dim(OTUdat.thinned[,-1])[2])]=TRUE | ||
|
||
final_results=list(W_frame,zero.plot) | ||
names(final_results)=c("W.taxa","PLot.zeroes") | ||
return(final_results) | ||
} | ||
|
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
library(readr) | ||
library(tidyverse) | ||
|
||
otu_data = read_tsv("data/ecam-table-taxa.tsv", skip = 1) | ||
otu_id = otu_data$`feature-id` | ||
otu_data = data.frame(t(otu_data[, -1]), check.names = FALSE) | ||
colnames(otu_data) = otu_id | ||
otu_data = otu_data%>%rownames_to_column("Sample.ID") | ||
|
||
meta_data = read_tsv("data/ecam-sample-metadata.tsv")[-1, ] | ||
meta_data = meta_data%>%rename(Sample.ID = `#SampleID`) | ||
|
||
otu_data = otu_data%>%arrange(Sample.ID) | ||
meta_data = meta_data%>%arrange(Sample.ID) | ||
all(otu_data$Sample.ID == meta_data$Sample.ID) | ||
|
||
main_var = "delivery"; zero_cut = 0.90; p_adjust_method = "BH"; alpha = 0.05 | ||
adj_formula = NULL; rand_formula = "~ 1 | studyid" | ||
|
||
# Run ANCOM | ||
source("ancom_v2.0.R") | ||
|
||
t_start = Sys.time() | ||
res = ANCOM(otu_data, meta_data, main_var, zero_cut, p_adjust_method, alpha, | ||
adj_formula, rand_formula) | ||
t_end = Sys.time() | ||
t_run = t_end - t_start # around 30s | ||
|
||
write_csv(res, "outputs/res_ecam.csv") | ||
|
||
# Run old ANCOM | ||
source("ancom_v1.0.R") | ||
|
||
var_data = meta_data; long = T; rand.formula="~1|studyid" | ||
OTUdat = otu_data; Vardat = meta_data | ||
adjusted=F; repeated=T; main.var="delivery"; adj.formula=NULL | ||
repeat.var="month"; longitudinal=T; random.formula="~1|studyid" | ||
multcorr=2; sig=0.05; prev.cut=0.90 | ||
|
||
t_start = Sys.time() | ||
longitudinal_comparison_test=ANCOM.main(OTUdat, Vardat, adjusted, repeated, | ||
main.var, adj.formula, repeat.var, longitudinal, | ||
random.formula, multcorr, sig, prev.cut) | ||
t_end = Sys.time() | ||
t_run_old = t_end - t_start # around 30s | ||
|
||
res_old = longitudinal_comparison_test$W.taxa | ||
|
||
|
||
|
||
|
Oops, something went wrong.