From d5c98ab5dad4eef5ba2c1a1afa55016a16c7dc91 Mon Sep 17 00:00:00 2001 From: zhaotianjing Date: Mon, 12 Feb 2024 14:03:55 -0600 Subject: [PATCH 1/2] fix estimate_scale --- src/1.JWAS/src/JWAS.jl | 22 +++++++++------ src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl | 28 +++++++++++-------- .../src/RRM/MCMC_BayesianAlphabet_RRM.jl | 4 +-- src/1.JWAS/src/build_MME.jl | 6 +++- src/1.JWAS/src/markers/readgenotypes.jl | 5 +--- src/1.JWAS/src/output.jl | 4 +-- src/1.JWAS/src/random_effects.jl | 5 +++- src/1.JWAS/src/types.jl | 8 +++--- test/Unitest.jl | 8 +++--- test/test_BayesianAlphabet.jl | 8 +++--- test/test_BayesianAlphabet_deprecated.jl | 8 +++--- 11 files changed, 59 insertions(+), 47 deletions(-) diff --git a/src/1.JWAS/src/JWAS.jl b/src/1.JWAS/src/JWAS.jl index 13669bc9..349d6afc 100644 --- a/src/1.JWAS/src/JWAS.jl +++ b/src/1.JWAS/src/JWAS.jl @@ -81,7 +81,6 @@ export dataset output_samples_frequency::Integer = chain_length>1000 ? div(chain_length,1000) : 1, update_priors_frequency::Integer = 0, #Methods - estimate_variance = true, single_step_analysis = false, #parameters for single-step analysis pedigree = false, #parameters for single-step analysis fitting_J_vector = true, #parameters for single-step analysis @@ -107,8 +106,7 @@ export dataset ##for deprecated JWAS methods = "conventional (no markers)", Pi = 0.0, - estimatePi = false, - estimate_scale = false) + estimatePi = false) **Run MCMC for Bayesian Linear Mixed Models with or without estimation of variance components.** @@ -126,7 +124,6 @@ export dataset * Priors are updated every `update_priors_frequency` iterations, defaulting to `0`. * Methods * Single step analysis is allowed if `single_step_analysis` = `true` and `pedigree` is provided. - * Variance components are estimated if `estimate_variance`=true, defaulting to `true`. * Miscellaneous Options * Missing phenotypes are allowed in multi-trait analysis with `missing_phenotypes`=true, defaulting to `true`. * Catogorical Traits are allowed if `categorical_trait`=true, defaulting to `false`. Phenotypes should be coded as 1,2,3... @@ -155,7 +152,6 @@ function runMCMC(mme::MME,df; output_samples_frequency::Integer = chain_length>1000 ? div(chain_length,1000) : 1, update_priors_frequency::Integer = 0, #Methods - estimate_variance = true, single_step_analysis = false, #parameters for single-step analysis pedigree = false, #parameters for single-step analysis fitting_J_vector = true, #parameters for single-step analysis @@ -181,7 +177,8 @@ function runMCMC(mme::MME,df; methods = "conventional (no markers)", Pi = 0.0, estimatePi = false, - estimate_scale = false, + estimate_scale = false, #this has been moved to get_genotypes() + estimate_variance = true, #this has been moved to build_MME() and set_random() categorical_trait = false, #this has been moved to build_model() censored_trait = false) #this has been moved to build_model() @@ -230,7 +227,7 @@ function runMCMC(mme::MME,df; Mi.name = "geno" Mi.π = Pi Mi.estimatePi = estimatePi - Mi.estimate_scale = estimate_scale + Mi.G.estimate_scale = estimate_scale Mi.method = methods end end @@ -239,6 +236,13 @@ function runMCMC(mme::MME,df; print_single_categorical_censored_trait_example() error("The arguments 'categorical_trait' and 'censored_trait' has been moved to build_model(). Please check our latest example.") end + if estimate_scale != false #user set estimate_variance=true in runMCMC() + error("The argument 'estimate_scale' for marker effect variance has been moved to get_genotypes().") + end + if estimate_variance != true #user set estimate_variance=false in runMCMC() + error("The argument 'estimate_variance' for non-marker variance components has been moved to build_MME() for residual variance, + and set_random() for random terms.") + end ############################################################################ # censored traits ############################################################################ @@ -277,7 +281,7 @@ function runMCMC(mme::MME,df; mme.MCMCinfo = MCMCinfo(heterogeneous_residuals, chain_length,burnin,output_samples_frequency, printout_model_info,printout_frequency, single_step_analysis, - fitting_J_vector,missing_phenotypes,estimate_variance, + fitting_J_vector,missing_phenotypes, update_priors_frequency,outputEBV,output_heritability,prediction_equation, seed,double_precision,output_folder,RRM) ############################################################################ @@ -575,7 +579,7 @@ function getMCMCinfo(mme) end @printf("%-30s %20s\n","estimatePi",Mi.estimatePi ? "true" : "false") end - @printf("%-30s %20s\n","estimate_scale",Mi.estimate_scale ? "true" : "false") + @printf("%-30s %20s\n","estimate_scale",Mi.G.estimate_scale ? "true" : "false") end end end diff --git a/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl b/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl index 44af600b..ebaeff16 100644 --- a/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl +++ b/src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl @@ -7,7 +7,6 @@ function MCMC_BayesianAlphabet(mme,df) burnin = mme.MCMCinfo.burnin output_samples_frequency = mme.MCMCinfo.output_samples_frequency output_folder = mme.MCMCinfo.output_folder - estimate_variance = mme.MCMCinfo.estimate_variance invweights = mme.invweights update_priors_frequency = mme.MCMCinfo.update_priors_frequency has_categorical_trait = "categorical" ∈ mme.traits_type @@ -267,7 +266,7 @@ function MCMC_BayesianAlphabet(mme,df) ######################################################################## # Variance of Marker Effects ######################################################################## - if Mi.estimate_variance == true #methd specific estimate_variance + if Mi.G.estimate_variance == true #methd specific estimate_variance sample_marker_effect_variance(Mi) if mme.MCMCinfo.double_precision == false && Mi.method != "BayesB" Mi.G.val = Float32.(Mi.G.val) @@ -276,7 +275,7 @@ function MCMC_BayesianAlphabet(mme,df) ######################################################################## # Scale Parameter in Priors for Marker Effect Variances ######################################################################## - if Mi.estimate_scale == true + if Mi.G.estimate_scale == true if !is_multi_trait a = size(Mi.G.val,1)*Mi.G.df/2 + 1 b = sum(Mi.G.df ./ (2*Mi.G.val)) + 1 @@ -288,15 +287,20 @@ function MCMC_BayesianAlphabet(mme,df) ######################################################################## # 3. Non-marker Variance Components ######################################################################## - if estimate_variance == true - ######################################################################## - # 3.1 Variance of Non-marker Random Effects - # e.g, i.i.d; polygenic effects (pedigree) - ######################################################################## - sampleVCs(mme,mme.sol) - ######################################################################## - # 3.2 Residual Variance - ######################################################################## + + ######################################################################## + # 3.1 Variance of Non-marker Random Effects + # e.g, i.i.d; polygenic effects (pedigree) + ######################################################################## + if length(mme.rndTrmVec)>0 + if mme.rndTrmVec[1].Gi.estimate_variance == true + sampleVCs(mme,mme.sol) + end + end + ######################################################################## + # 3.2 Residual Variance + ######################################################################## + if mme.R.estimate_variance == true if is_multi_trait mme.R.val = sample_variance(wArray, length(mme.obsID), mme.R.df, mme.R.scale, diff --git a/src/1.JWAS/src/RRM/MCMC_BayesianAlphabet_RRM.jl b/src/1.JWAS/src/RRM/MCMC_BayesianAlphabet_RRM.jl index 02784678..f61e78d3 100644 --- a/src/1.JWAS/src/RRM/MCMC_BayesianAlphabet_RRM.jl +++ b/src/1.JWAS/src/RRM/MCMC_BayesianAlphabet_RRM.jl @@ -181,7 +181,7 @@ function MCMC_BayesianAlphabet_RRM(mme,df; ######################################################################## if mme.M != 0 for Mi in mme.M - if Mi.estimate_scale == true + if Mi.G.estimate_scale == true a = size(Mi.G.val,1)*Mi.G.df/2 + 1 b = sum(Mi.G.df ./ (2*Mi.G.val )) + 1 Mi.G.scale = rand(Gamma(a,1/b)) @@ -227,7 +227,7 @@ function MCMC_BayesianAlphabet_RRM(mme,df; Mi.meanVara2 += (Mi.G.val.^2 - Mi.meanVara2)/nsamples end - if Mi.estimate_scale == true + if Mi.G.estimate_scale == true Mi.meanScaleVara += (Mi.G.scale - Mi.meanScaleVara)/nsamples Mi.meanScaleVara2 += (Mi.G.scale .^2 - Mi.meanScaleVara2)/nsamples end diff --git a/src/1.JWAS/src/build_MME.jl b/src/1.JWAS/src/build_MME.jl index a6917a20..cd4298aa 100644 --- a/src/1.JWAS/src/build_MME.jl +++ b/src/1.JWAS/src/build_MME.jl @@ -16,13 +16,14 @@ function mkDict(a::Vector{T}) where T <: Any end """ - build_model(model_equations::AbstractString,R=false; df::AbstractFloat=4.0) + build_model(model_equations::AbstractString,R=false; df::AbstractFloat=4.0, estimate_variance=true) * Build a model from **model equations** with the residual variance **R**. In Bayesian analysis, **R** is the mean for the prior assigned for the residual variance with degree of freedom **df**, defaulting to 4.0. If **R** is not provided, a value is calculated from responses (phenotypes). * By default, all variabels in model_equations are factors (categorical) and fixed. Set variables to be covariates (continuous) or random using functions `set_covariate()` or `set_random()`. +* The argument `estimate_variance` indicates whether to estimate the residual variance; `estimate_variance=true` is the default. ```julia #single-trait @@ -56,6 +57,9 @@ function build_model(model_equations::AbstractString, error("Model equations are wrong.\n To find an example, type ?build_model and press enter.\n") end + if estimate_scale != false + error("estimate scale for residual variance is not supported now.") + end ############################################################################ # Bayesian Neural Network diff --git a/src/1.JWAS/src/markers/readgenotypes.jl b/src/1.JWAS/src/markers/readgenotypes.jl index 28dc07da..41cdcfd9 100644 --- a/src/1.JWAS/src/markers/readgenotypes.jl +++ b/src/1.JWAS/src/markers/readgenotypes.jl @@ -54,7 +54,7 @@ end get_genotypes(file::Union{AbstractString,Array{Float64,2},Array{Float32,2},Array{Any,2},DataFrames.DataFrame},G=false; separator=',',header=true,rowID=false, center=true,quality_control=false, - method = "RR-BLUP",Pi = 0.0,estimatePi = true,estimate_scale=false, + method = "RR-BLUP",Pi = 0.0,estimatePi = true,estimate_scale=false,estimate_variance=true, G_is_marker_variance = false,df = 4.0) * Get marker informtion from a genotype file/matrix. This file needs to be column-wise sorted by marker positions. * If a text file is provided, the file format should be: @@ -218,9 +218,6 @@ function get_genotypes(file::Union{AbstractString,Array{Float64,2},Array{Float32 genotypes.method = method genotypes.estimatePi = estimatePi genotypes.π = Pi - # genotypes.df = df #It will be modified base on number of traits in build_model() - # genotypes.estimate_scale = estimate_scale - # genotypes.estimate_variance = estimate_variance writedlm("IDs_for_individuals_with_genotypes.txt",genotypes.obsID) println("Genotype informatin:") diff --git a/src/1.JWAS/src/output.jl b/src/1.JWAS/src/output.jl index f76a456f..75e1c938 100644 --- a/src/1.JWAS/src/output.jl +++ b/src/1.JWAS/src/output.jl @@ -145,7 +145,7 @@ function output_result(mme,output_folder, if Mi.estimatePi == true output["pi_"*Mi.name] = dict2dataframe(Mi.mean_pi,Mi.mean_pi2) end - if Mi.estimate_scale == true + if Mi.G.estimate_scale == true output["ScaleEffectVar"*Mi.name] = matrix2dataframe(string.(mme.lhsVec),Mi.meanScaleVara,Mi.meanScaleVara2) end end @@ -587,7 +587,7 @@ function output_posterior_mean_variance(mme,nsamples) Mi.meanVara += (Mi.G.val - Mi.meanVara)/nsamples Mi.meanVara2 += (Mi.G.val .^2 - Mi.meanVara2)/nsamples end - if Mi.estimate_scale == true + if Mi.G.estimate_scale == true Mi.meanScaleVara += (Mi.G.scale - Mi.meanScaleVara)/nsamples Mi.meanScaleVara2 += (Mi.G.scale .^2 - Mi.meanScaleVara2)/nsamples end diff --git a/src/1.JWAS/src/random_effects.jl b/src/1.JWAS/src/random_effects.jl index 129a264e..2911d335 100644 --- a/src/1.JWAS/src/random_effects.jl +++ b/src/1.JWAS/src/random_effects.jl @@ -106,7 +106,10 @@ function set_random(mme::MME,randomStr::AbstractString,G=false;Vinv=0,names=[],d df = Float32(df) if constraint != false #check constraint - error("Constraint for set_random is not supported now.") + error("Constraint for variance of random term is not supported now.") + end + if estimate_scale != false + error("Estimate scale for variance of random term is not supported now.") end ############################################################################ diff --git a/src/1.JWAS/src/types.jl b/src/1.JWAS/src/types.jl index a50e483d..fc9a5744 100644 --- a/src/1.JWAS/src/types.jl +++ b/src/1.JWAS/src/types.jl @@ -117,8 +117,8 @@ mutable struct Genotypes method #prior for marker effects (Bayesian ALphabet, GBLUP ...) estimatePi - estimate_variance - estimate_scale +# estimate_variance +# estimate_scale mArray #a collection of matrices used in Bayesian Alphabet mRinvArray #a collection of matrices used in Bayesian Alphabet @@ -149,7 +149,7 @@ mutable struct Genotypes Genotypes(a1,a2,a3,a4,a5,a6,a7,a8,a9)=new(false,false, a1,a2,a3,a4,a5,a6,a7,a8,a4,false, Variance(false,false,false,true,false,false),Variance(false,false,false,true,false,false), #false,false, - false,true,true,false, + false,true, #true,false, false,false,false,false,false,false, false,false,false,false, false,false,false,false,false,false,false,false,false, @@ -175,7 +175,7 @@ mutable struct MCMCinfo missing_phenotypes # constraint # mega_trait - estimate_variance + # estimate_variance update_priors_frequency outputEBV output_heritability diff --git a/test/Unitest.jl b/test/Unitest.jl index 1da8b583..f67bf2ab 100644 --- a/test/Unitest.jl +++ b/test/Unitest.jl @@ -65,7 +65,7 @@ for single_step in [false, true] if single_step == false # genomic models or PBLUP - out1 = runMCMC(model1, phenotypes, estimate_variance=true, heterogeneous_residuals=false, + out1 = runMCMC(model1, phenotypes, heterogeneous_residuals=false, #estimate all variances==true by default double_precision=true, chain_length=1000, output_samples_frequency=10, printout_frequency=100, seed=314) @@ -75,7 +75,7 @@ for single_step in [false, true] PA_dict[newdir] = accuracy elseif single_step == true && test_method != "PBLUP" && test_method != "GBLUP" # genomic model, no PBLUP/GBLUP - out1 = runMCMC(model1, phenotypes, estimate_variance=true, heterogeneous_residuals=false, + out1 = runMCMC(model1, phenotypes, heterogeneous_residuals=false, #estimate all variances==true by default chain_length=1000, output_samples_frequency=10, printout_frequency=100, single_step_analysis=true, pedigree=pedigree, seed=314) results = innerjoin(out1["EBV_t1"], phenotypes, on=:ID) @@ -134,14 +134,14 @@ for single_step in [false, true] if single_step == false - out2 = runMCMC(model2, phenotypes, estimate_variance=true, heterogeneous_residuals=false, double_precision=true, + out2 = runMCMC(model2, phenotypes, heterogeneous_residuals=false, double_precision=true, #estimate all variances==true by default chain_length=1000, output_samples_frequency=10, printout_frequency=100, seed=314) results = innerjoin(out2["EBV_t1"], phenotypes, on=:ID) ind_id = findall(x -> !ismissing(x), results[!, :t1]) accuracy = cor(results[ind_id, :EBV], results[ind_id, :t1]) PA_dict[newdir] = accuracy elseif single_step == true && test_method != "PBLUP" && test_method != "GBLUP" - out2 = runMCMC(model2, phenotypes, estimate_variance=true, heterogeneous_residuals=false, double_precision=true, + out2 = runMCMC(model2, phenotypes, heterogeneous_residuals=false, double_precision=true, #estimate all variances==true by default chain_length=1000, output_samples_frequency=10, printout_frequency=100, single_step_analysis=true, pedigree=pedigree, seed=314) results = innerjoin(out2["EBV_t1"], phenotypes, on=:ID) diff --git a/test/test_BayesianAlphabet.jl b/test/test_BayesianAlphabet.jl index dd05501d..1ec8f809 100644 --- a/test/test_BayesianAlphabet.jl +++ b/test/test_BayesianAlphabet.jl @@ -51,12 +51,12 @@ for single_step in [false,true] outputMCMCsamples(model1,"x2") if single_step == false - out1=runMCMC(model1,phenotypes,estimate_variance=true,heterogeneous_residuals=false, + out1=runMCMC(model1,phenotypes,heterogeneous_residuals=false, #estimate all variances==true by default double_precision=true, chain_length=100,output_samples_frequency=10, printout_frequency=50,seed=314); elseif single_step == true && test_method!="conventional (no markers)" && test_method!="GBLUP" - out1=runMCMC(model1,phenotypes_ssbr,estimate_variance=true,heterogeneous_residuals=false, + out1=runMCMC(model1,phenotypes_ssbr,heterogeneous_residuals=false, #estimate all variances==true by default chain_length=100,output_samples_frequency=10,printout_frequency=50, single_step_analysis=true,pedigree=pedigree,seed=314); end @@ -110,10 +110,10 @@ for single_step in [false,true] outputMCMCsamples(model2,"x2") if single_step == false - out2=runMCMC(model2,phenotypes,estimate_variance=true,heterogeneous_residuals=false,double_precision=true, + out2=runMCMC(model2,phenotypes,heterogeneous_residuals=false,double_precision=true, #estimate all variances==true by default chain_length=100,output_samples_frequency=10,printout_frequency=50,seed=314); elseif single_step == true && test_method!="conventional (no markers)" && test_method!="GBLUP" - out2=runMCMC(model2,phenotypes_ssbr,estimate_variance=true,heterogeneous_residuals=false,double_precision=true, + out2=runMCMC(model2,phenotypes_ssbr,heterogeneous_residuals=false,double_precision=true, #estimate all variances==true by default chain_length=100,output_samples_frequency=10,printout_frequency=50, single_step_analysis=true,pedigree=pedigree,seed=314); end diff --git a/test/test_BayesianAlphabet_deprecated.jl b/test/test_BayesianAlphabet_deprecated.jl index 885f8b3f..c57024ea 100644 --- a/test/test_BayesianAlphabet_deprecated.jl +++ b/test/test_BayesianAlphabet_deprecated.jl @@ -47,12 +47,12 @@ for single_step in [false,true] outputMCMCsamples(model1,"x2") if single_step == false - out1=runMCMC(model1,phenotypes,estimate_variance=true,heterogeneous_residuals=false, + out1=runMCMC(model1,phenotypes,heterogeneous_residuals=false, #estimate all variances==true by default methods=test_method,estimatePi=test_estimatePi,double_precision=true, chain_length=100,output_samples_frequency=10, printout_frequency=50,seed=314); elseif single_step == true && test_method!="conventional (no markers)" && test_method!="GBLUP" - out1=runMCMC(model1,phenotypes_ssbr,estimate_variance=true,heterogeneous_residuals=false, + out1=runMCMC(model1,phenotypes_ssbr,heterogeneous_residuals=false, #estimate all variances==true by default methods=test_method,estimatePi=test_estimatePi,chain_length=100,output_samples_frequency=10,printout_frequency=50, single_step_analysis=true,pedigree=pedigree,seed=314); end @@ -99,10 +99,10 @@ for single_step in [false,true] outputMCMCsamples(model2,"x2") if single_step == false - out2=runMCMC(model2,phenotypes,estimate_variance=true,heterogeneous_residuals=false,double_precision=true, + out2=runMCMC(model2,phenotypes,heterogeneous_residuals=false,double_precision=true, #estimate all variances==true by default methods=test_method,estimatePi=test_estimatePi,chain_length=100,output_samples_frequency=10,printout_frequency=50,seed=314); elseif single_step == true && test_method!="conventional (no markers)" && test_method!="GBLUP" - out2=runMCMC(model2,phenotypes_ssbr,estimate_variance=true,heterogeneous_residuals=false,double_precision=true, + out2=runMCMC(model2,phenotypes_ssbr,heterogeneous_residuals=false,double_precision=true, #estimate all variances==true by default methods=test_method,estimatePi=test_estimatePi,chain_length=100,output_samples_frequency=10,printout_frequency=50, single_step_analysis=true,pedigree=pedigree,seed=314); end From df204bb6700bd8a3426fd6f3fa999dfd6e696bfd Mon Sep 17 00:00:00 2001 From: zhaotianjing Date: Mon, 12 Feb 2024 14:33:58 -0600 Subject: [PATCH 2/2] add error message when sample scale of marker effect variance in multi-trait model --- src/1.JWAS/src/input_data_validation.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/1.JWAS/src/input_data_validation.jl b/src/1.JWAS/src/input_data_validation.jl index 1172c243..73eb661f 100644 --- a/src/1.JWAS/src/input_data_validation.jl +++ b/src/1.JWAS/src/input_data_validation.jl @@ -58,6 +58,13 @@ function errors_args(mme) if mme.causal_structure != false && mme.nModels == 1 error("Causal strutures are only allowed in multi-trait analysis") end + if mme.M!=0 && mme.nModels>1 #multi-trait with genotypes + for Mi in mme.M + if Mi.G.estimate_scale==true + error("estimate_scale=true is only supported for single trait now.") + end + end + end end function check_outputID(mme)