From 1f98ddd83366db53d4aabfa6f42b1153f8e1553d Mon Sep 17 00:00:00 2001 From: jcolomb Date: Tue, 22 Oct 2013 20:41:35 +0200 Subject: [PATCH] pca3d+ test figshare --- CeTrAn/other_codes/3Dplots_for_PCA_2.r | 36 +++-- CeTrAn/other_codes/test3dpca.r | 208 +++++++++++++++++++++++++ CeTrAn/scripts/PCA.r | 14 +- figshare upload code.R | 10 +- 4 files changed, 250 insertions(+), 18 deletions(-) create mode 100644 CeTrAn/other_codes/test3dpca.r diff --git a/CeTrAn/other_codes/3Dplots_for_PCA_2.r b/CeTrAn/other_codes/3Dplots_for_PCA_2.r index eebb76d..7c5341e 100644 --- a/CeTrAn/other_codes/3Dplots_for_PCA_2.r +++ b/CeTrAn/other_codes/3Dplots_for_PCA_2.r @@ -66,23 +66,32 @@ text3d(loadings[j,1]*scalingfact*1.1,loadings[j,3]*scalingfact*1.1,loadings[j,2] ### ### Plot data, mean +- ses on the 3 axes, and ellipse of X=20% confidence interval -## beware: no legend -LEV=0.4 -plot3d(loadings[,1]*scalingfact,loadings[,3]*scalingfact,loadings[,2]*scalingfact, - type = "n",box =FALSE,axes=TRUE,xlab="PC1",ylab="PC3",zlab="PC2") +## beware: no legend +setwd(outputpath) +pdf("samplesize_color_forpca.pdf") +par(mai= c(1.5,1.5,1.5,1.5)) +plot(PCA_res$group, type="n", main="samplesize", ylim=c(0,30)) +for (i in 1:length(levels(PCA_res$group))){ + + plot(PCA_res$group[PCA_res$group == levels(PCA_res$group)[i]], col=i, add=TRUE) + } +dev.off() +LEV=0.4 plot3d(c(0,PCA_res$PC1)*scalingfact,c(0,PCA_res$PC3)*scalingfact,c(0,PCA_res$PC2)*scalingfact, type = "n",box =FALSE,axes=TRUE,xlab="PC1",ylab="PC3",zlab="PC2") for (i in 1:length(levels(PCA_res$group))){ X = subset(PCA_res,PCA_res$group == levels(PCA_res$group)[i]) - points= data.frame(X$PC1, X$PC3,X$PC2) + points= data.frame(X$PC1, X$PC3,X$PC2) + plot(PCA_res$group, type="n") + plot(PCA_res$group[PCA_res$group == levels(PCA_res$group)[i]], col=i, add=TRUE) plot3d(points, col=i,type= "p",add=TRUE) #plot3d(X$PC1, X$PC3,X$PC2, col=i,type= "p",add=TRUE) #Ell= sd(points)/sqrt(nrow(points)) #plot3d(ellipse3d(cor(points), centre = mean(points), level=LEV), col=i, alpha=0.3 , add=TRUE) - plot3d(ellipse3d(cov(points), centre = mean(points), level=LEV), col=i+2, alpha=0.3 , add=TRUE) + x=Mean_PCA_3d$means$PCA_res.PC1[i] y=Mean_PCA_3d$means$PCA_res.PC3[i] @@ -92,7 +101,9 @@ y2=Mean_PCA_3d$ses$PCA_res.PC3[i] z2 =Mean_PCA_3d$ses$PCA_res.PC3[i] A= matrix (c(x-x2,y,z,x+x2,y,z,x,y-y2,z,x,y+y2,z,x,y, z-z2,x,y,z+z2),6,3, byrow =TRUE) segments3d(A, col=i) - #textplot (levels(PCA_res$group)[i],col=i, add=TRUE) + #textplot (levels(PCA_res$group)[i],col=i, add=TRUE) + + plot3d(ellipse3d(cov(points), centre = c(x,y,z), level=LEV), col=i, alpha=0.3 , add=TRUE) } # text3d(max(points[1]),max(points[3]),max(points[2]), @@ -108,18 +119,21 @@ z2 =Mean_PCA_3d$ses$PCA_res.PC3[i] ########## code for making a snapshot -######## multiple snapshot to transform into a video +######## multiple snapshot to transform into a video +setwd(paste(outputpath,"/3dpca",sep="")) + for (i in 0:180) { rgl.viewpoint(theta = -180+2*i, phi =-90+(i ), fov = 45, zoom = 1) filename <- paste("pic1_",1000+i,".png",sep="") - #rgl.snapshot(filename, fmt="png") -} + rgl.snapshot(filename, fmt="png") +} + for (k in 0:90) { rgl.viewpoint(theta = 0, phi =k, fov = 45, zoom = 1) filename <- paste("pic2_",1000+k,".png",sep="") - #rgl.snapshot(filename, fmt="png") + rgl.snapshot(filename, fmt="png") } diff --git a/CeTrAn/other_codes/test3dpca.r b/CeTrAn/other_codes/test3dpca.r new file mode 100644 index 0000000..9e62179 --- /dev/null +++ b/CeTrAn/other_codes/test3dpca.r @@ -0,0 +1,208 @@ +library(corrplot) + +message("start PCA") +#f_table= f_table_ori + +f_table_new = f_table +f_table2 =f_table_new +#colnames(f_table_new) +# +##firstpaperorder +# colnames(f_table2)=c ("AA_id","AB_group","H_speeds","N_distance_traveled","D_turning_angle","E_meander", + # "O_activitytime_ST", + # "K_act_bouts_ST","C_pause_length_ST","I_#pauses_ST","P_activitytime_timeT", + # "L_act_bouts_DtimeT","B_pause_length_timeT","J_#pauses_timeT", "F_thigmotaxis_moving", "G_thigmotasix_pause","M_#walks","AC_stripe_deviation") + +if (MINCOL==2){ + colnames(f_table2)= c("AA_id","AB_group","H_speeds","N_distance_traveled","D_turning_angle","E_meander","XA_activitytime_ST","XB_act_bouts_ST","XC_pause_length_ST","XD_#pauses_ST","P_activitytime_timeT","L_act_bouts_DtimeT","B_pause_length_timeT","J_#pauses_timeT","F_thigmotaxis_moving", "G_thigmotasix_pause","M_#walks","AC_stripe_deviation") +} + +if (MINCOL==8){ + colnames(f_table2)= c("AA_id","AB_group","ZA_genotype", "ZB_treatment", "ZC_machine","ZD_other", "ZE_date","ZF_timeofday","H_speeds","N_distance_traveled","D_turning_angle","E_meander","XA_activitytime_ST","XB_act_bouts_ST","XC_pause_length_ST","XD_#pauses_ST","P_activitytime_timeT","L_act_bouts_DtimeT","B_pause_length_timeT","J_#pauses_timeT","F_thigmotaxis_moving", "G_thigmotasix_pause","M_#walks","AC_stripe_deviation") +} +###add this to make the pca over the genotype group only + +# if (MINCOL==8){ + # colnames(f_table2)= c("AA_id","ZA_group2","AB_group", "ZB_treatment", "ZC_machine","ZD_other", "ZE_date","ZF_timeofday","H_speeds","N_distance_traveled","D_turning_angle","E_meander","XA_activitytime_ST","XB_act_bouts_ST","XC_pause_length_ST","XD_#pauses_ST","P_activitytime_timeT","L_act_bouts_DtimeT","B_pause_length_timeT","J_#pauses_timeT","F_thigmotaxis_moving", "G_thigmotasix_pause","M_#walks","AC_stripe_deviation") +# } + +data.frame(names(f_table),names(f_table2)) +## get linearity score out +#f_table2=f_table2[,-16] + +## reorder variables following the free walk analysis +f_table2= f_table2[,order(colnames(f_table2))] + +### gives output if not all calculation are made +#if(length(f_table_new) != 19) f_table2=f_table_new + + +#### ENTER HERE MANUALLY A NEW DATA AS ENTRY FOR THE PCA ANALYSIS +# f_table2= f_table + + +i_table =na.omit(f_table2) +h_table<-f_table2[,c(3:(length(f_table2)-MINCOL+2-4))] #+2 from group and id, -4 to take ST out +rownames (h_table) = paste (c(1:length(f_table2[,1])),f_table2[,2]) +#h_table<-f_table2[,c(3:13,16:length(f_table2))] +g_table <- na.omit(h_table) + head(g_table) +mydata.pca <- prcomp(g_table, retx= TRUE, center= TRUE, scale.=TRUE) + + + +sd <- mydata.pca$sdev +loadings <- mydata.pca$rotation +rownames(loadings) <- colnames(g_table) +scores <- mydata.pca$x + + +t=mydata.pca$sdev^2 / sum(mydata.pca$sdev^2) +t2= cumsum(t) +plot(t2*10, main= "variance explained cumulative") + + +PCA_res= data.frame(scores) +PCA_res$group = as.factor(id_table$genotype) + +#PCA_res$group = i_table[,19] + +#Plot PC1 and 2 info on one graph +PCA1to3 = data.frame(PCA_res$PC1,PCA_res$PC2,PCA_res$PC3,PCA_res$group) + +Mean_PCA = create.mean.table(PCA1to3,levels(PCA1to3$PCA_res.group),1:2) +Mean_PCA2 = create.mean.table(PCA1to3,levels(PCA1to3$PCA_res.group),c(1,3)) +################################################# +################################################### +################################################## +setwd(outputpath) +scalingfact=7 +scalingaxis=1 + + +###prepare mean tables +PCA1to3 = data.frame(PCA_res$PC1,PCA_res$PC2,PCA_res$PC3,PCA_res$group) +Mean_PCA_3d = create.mean.table(PCA1to3,levels(PCA1to3$PCA_res.group),1:3) +Mean_PCA = create.mean.table(PCA1to3,levels(PCA1to3$PCA_res.group),1:2) + + +abc= c(1:8, rgb(202,100,20,maxColorValue = 255),rgb(100,0,200,maxColorValue = 255)) + + plot(scores[,1], scores[,2], xlab="PCA 1", ylab="PCA 2", + type="n", main="distance biplot",xlim=c(-max(scores[,1:2]*scalingaxis), max(scores[,1:2]*scalingaxis)), + ylim=c(-max(scores[,1:2]*scalingaxis), max(scores[,1:2]*scalingaxis))) + + +for (i in 1:length(levels(PCA_res$group))){ + X = subset(PCA_res,PCA_res$group == levels(PCA_res$group)[i]) + points(X$PC2~ X$PC1, col=abc[i]) + #legend(list(x=-6,y=-i/2+5), legend=(levels(PCA_res$group)[i]), fill= i, bty="n") + #text(x+1,y-0.1, levels(PCA_res$group)[i], col=i, cex=0.7) +legend("topleft", legend=(levels(PCA_res$group)[i]), fill= abc[i], bty="n", inset = c(0,i/30)) + + x=Mean_PCA$means$PCA_res.PC1[i] +y=Mean_PCA$means$PCA_res.PC2[i] +x2=Mean_PCA$ses$PCA_res.PC1[i] +y2=Mean_PCA$ses$PCA_res.PC2[i] +segments (x-x2,y,x+x2,y, col=abc[i]) +segments (x,y-y2,x,y+y2, col=abc[i]) + } + abline (v=0, h=0) + + plot(scores[,1], scores[,3], xlab="PCA 1", ylab="PCA 3", + type="n", main="distance biplot",xlim=c(-max(scores[,1:3]*scalingaxis), max(scores[,1:3]*scalingaxis)), + ylim=c(-max(scores[,1:3]*scalingaxis), max(scores[,1:3]*scalingaxis))) + + +for (i in 1:length(levels(PCA_res$group))){ + X = subset(PCA_res,PCA_res$group == levels(PCA_res$group)[i]) + points(X$PC3~ X$PC1, col=abc[i]) + #legend(list(x=-6,y=-i/2+5), legend=(levels(PCA_res$group)[i]), fill= i, bty="n") + #text(x+1,y-0.1, levels(PCA_res$group)[i], col=i, cex=0.7) +legend("topleft", legend=(levels(PCA_res$group)[i]), fill= abc[i], bty="n", inset = c(0,i/30)) + + x=Mean_PCA2$means$PCA_res.PC1[i] +y=Mean_PCA2$means$PCA_res.PC3[i] +x2=Mean_PCA2$ses$PCA_res.PC1[i] +y2=Mean_PCA2$ses$PCA_res.PC3[i] +segments (x-x2,y,x+x2,y, col=abc[i]) +segments (x,y-y2,x,y+y2, col=abc[i]) + } + abline (v=0, h=0) + + + +########################################## +## plot data pc1 and 2 / 1 and 3 without points +M=max(Mean_PCA$means)+max(Mean_PCA$ses)*1.1 + plot(scores[,1], scores[,2], xlab="PCA 1", ylab="PCA 2", + type="n", main="distance biplot",xlim=c(-M, M), + ylim=c(-M, M)) + +#plot(PCA_res$PC2~PCA_res$PC1, type= "n", add=TRUE) +for (i in 1:length(levels(PCA_res$group))){ + X = subset(PCA_res,PCA_res$group == levels(PCA_res$group)[i]) + #points(X$PC2~ X$PC1, col=i, pch= i+10) +legend("topleft", legend=(levels(PCA_res$group)[i]), fill= abc[i], bty="n", inset = c(0,i/30)) + #text(x+1,y-0.1, levels(PCA_res$group)[i], col=i, +# cex=0.7) + x=Mean_PCA$means$PCA_res.PC1[i] +y=Mean_PCA$means$PCA_res.PC2[i] +x2=Mean_PCA$ses$PCA_res.PC1[i] +y2=Mean_PCA$ses$PCA_res.PC2[i] +segments (x-x2,y,x+x2,y, col=abc[i]) +segments (x,y-y2,x,y+y2, col=abc[i]) + } + abline (v=0, h=0) + + + + +####### +###################################################### +#plot all +require (rgl) +load("/Users/choupi/Desktop/pca3d.rdata") + + +setwd(outputpath) +pdf("samplesize_color_forpca.pdf") +par(mai= c(1.5,1.5,1.5,1.5)) +plot(PCA_res$group, type="n", main="samplesize", ylim=c(0,30)) +for (i in 1:length(levels(PCA_res$group))){ + + plot(PCA_res$group[PCA_res$group == levels(PCA_res$group)[i]], col=abc[i], add=TRUE) + } + +dev.off() +par(mai= c(1,1,1,1)) + +LEV=0.4 +scalingfact=1 + +plot3d(c(0,PCA_res$PC1)*scalingfact,c(0,PCA_res$PC3)*scalingfact,c(0,PCA_res$PC2)*scalingfact, + type = "n",box =FALSE,axes=TRUE,xlab="PC1",ylab="PC3",zlab="PC2") + + for (i in 1:length(levels(PCA_res$group))){ + #add points: + #X = subset(PCA_res,PCA_res$group == levels(PCA_res$group)[i]) + points= data.frame(X$PC1, X$PC3,X$PC2) + #plot3d(points, col=i,type= "p",add=TRUE) + + + + +x=Mean_PCA_3d$means$PCA_res.PC1[i] +y=Mean_PCA_3d$means$PCA_res.PC3[i] +z=Mean_PCA_3d$means$PCA_res.PC2[i] +x2=Mean_PCA_3d$ses$PCA_res.PC1[i] +y2=Mean_PCA_3d$ses$PCA_res.PC3[i] +z2 =Mean_PCA_3d$ses$PCA_res.PC3[i] + A= matrix (c(x-x2,y,z,x+x2,y,z,x,y-y2,z,x,y+y2,z,x,y, z-z2,x,y,z+z2),6,3, byrow =TRUE) + segments3d(A, col=abc[i]) + #textplot (levels(PCA_res$group)[i],col=i, add=TRUE) + + plot3d(ellipse3d(cov(points), centre = c(x,y,z), level=LEV), col=i, alpha=0.3 , add=TRUE) + } + +#PCA_res$group= as.factor(id_table$genotype) \ No newline at end of file diff --git a/CeTrAn/scripts/PCA.r b/CeTrAn/scripts/PCA.r index 144a715..cc224a6 100644 --- a/CeTrAn/scripts/PCA.r +++ b/CeTrAn/scripts/PCA.r @@ -19,7 +19,12 @@ if (MINCOL==2){ if (MINCOL==8){ colnames(f_table2)= c("AA_id","AB_group","ZA_genotype", "ZB_treatment", "ZC_machine","ZD_other", "ZE_date","ZF_timeofday","H_speeds","N_distance_traveled","D_turning_angle","E_meander","XA_activitytime_ST","XB_act_bouts_ST","XC_pause_length_ST","XD_#pauses_ST","P_activitytime_timeT","L_act_bouts_DtimeT","B_pause_length_timeT","J_#pauses_timeT","F_thigmotaxis_moving", "G_thigmotasix_pause","M_#walks","AC_stripe_deviation") -} +} +###add this to make the pca over the genotype group only + +# if (MINCOL==8){ + # colnames(f_table2)= c("AA_id","ZA_group2","AB_group", "ZB_treatment", "ZC_machine","ZD_other", "ZE_date","ZF_timeofday","H_speeds","N_distance_traveled","D_turning_angle","E_meander","XA_activitytime_ST","XB_act_bouts_ST","XC_pause_length_ST","XD_#pauses_ST","P_activitytime_timeT","L_act_bouts_DtimeT","B_pause_length_timeT","J_#pauses_timeT","F_thigmotaxis_moving", "G_thigmotasix_pause","M_#walks","AC_stripe_deviation") +# } data.frame(names(f_table),names(f_table2)) ## get linearity score out @@ -58,9 +63,9 @@ plot(t2*10, main= "variance explained cumulative") PCA_res= data.frame(scores) -PCA_res$group = i_table[,2] +PCA_res$group = as.factor(i_table[,2]) -PCA_res$group = i_table[,19] +#PCA_res$group = i_table[,19] #Plot PC1 and 2 info on one graph PCA1to3 = data.frame(PCA_res$PC1,PCA_res$PC2,PCA_res$PC3,PCA_res$group) @@ -81,7 +86,8 @@ Mean_PCA_3d = create.mean.table(PCA1to3,levels(PCA1to3$PCA_res.group),1:3) Mean_PCA = create.mean.table(PCA1to3,levels(PCA1to3$PCA_res.group),1:2) -abc= c(1:8, rgb(202,100,20,maxColorValue = 255),rgb(100,0,200,maxColorValue = 255)) +abc= c(1:8, rgb(202,100,20,maxColorValue = 255),rgb(100,0,200,maxColorValue = 255)) + plot(scores[,1], scores[,2], xlab="PCA 1", ylab="PCA 2", type="n", main="distance biplot",xlim=c(-max(scores[,1:2]*scalingaxis), max(scores[,1:2]*scalingaxis)), ylim=c(-max(scores[,1:2]*scalingaxis), max(scores[,1:2]*scalingaxis))) diff --git a/figshare upload code.R b/figshare upload code.R index 6726720..ef52f76 100644 --- a/figshare upload code.R +++ b/figshare upload code.R @@ -7,7 +7,10 @@ options(FigshareKey = "jBkrLNsYbeov2oM09cXBBw") options(FigsharePrivateKey = "LhQkDeJaVJzAhWRIKYeobA") options(FigshareToken = "BYuYn4OjWjd8njBKCyeFXQOEAYY1MfLJ1Y0z80rVWj6AXYuYn4OjWjd8njXKCyeFXQ") options(FigsharePrivateToken = "aC2q4lBod3Xl52CeKwp7Fg") -filesetid= "807697" +#filesetid= "807697" + +filesetid="826026" + fs_auth() @@ -25,7 +28,8 @@ alreadyuploaded=c() newuploaded = c() nup=0 newup=0 -for (i in c(1: length(DFN))){ +for (i in c(1: 8)){ +#for (i in c(1: length(DFN))){ test= DFN[i] if (!all(!grepl(test, det))){ nup=nup+1 @@ -35,7 +39,7 @@ for (i in c(1: length(DFN))){ print(i) if (as.character(test) != "not_accessible"){ - fs_upload(article_id=filesetid, file= test) + fs_upload(article_id=filesetid, file= as.character(test)) #scan(as.character(test),nlines = 1, what = character()) }