Skip to content

Commit

Permalink
Merge pull request #147 from reworkhow/chatgpt
Browse files Browse the repository at this point in the history
Setting the estimate_variance and estimate_scale parameters within their respective functions.
  • Loading branch information
reworkhow authored Feb 13, 2024
2 parents 64d5618 + df204bb commit 9281a16
Show file tree
Hide file tree
Showing 12 changed files with 66 additions and 47 deletions.
22 changes: 13 additions & 9 deletions src/1.JWAS/src/JWAS.jl
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ export dataset
output_samples_frequency::Integer = chain_length>1000 ? div(chain_length,1000) : 1,
update_priors_frequency::Integer = 0,
#Methods
estimate_variance = true,
single_step_analysis = false, #parameters for single-step analysis
pedigree = false, #parameters for single-step analysis
fitting_J_vector = true, #parameters for single-step analysis
Expand All @@ -107,8 +106,7 @@ export dataset
##for deprecated JWAS
methods = "conventional (no markers)",
Pi = 0.0,
estimatePi = false,
estimate_scale = false)
estimatePi = false)
**Run MCMC for Bayesian Linear Mixed Models with or without estimation of variance components.**
Expand All @@ -126,7 +124,6 @@ export dataset
* Priors are updated every `update_priors_frequency` iterations, defaulting to `0`.
* Methods
* Single step analysis is allowed if `single_step_analysis` = `true` and `pedigree` is provided.
* Variance components are estimated if `estimate_variance`=true, defaulting to `true`.
* Miscellaneous Options
* Missing phenotypes are allowed in multi-trait analysis with `missing_phenotypes`=true, defaulting to `true`.
* Catogorical Traits are allowed if `categorical_trait`=true, defaulting to `false`. Phenotypes should be coded as 1,2,3...
Expand Down Expand Up @@ -155,7 +152,6 @@ function runMCMC(mme::MME,df;
output_samples_frequency::Integer = chain_length>1000 ? div(chain_length,1000) : 1,
update_priors_frequency::Integer = 0,
#Methods
estimate_variance = true,
single_step_analysis = false, #parameters for single-step analysis
pedigree = false, #parameters for single-step analysis
fitting_J_vector = true, #parameters for single-step analysis
Expand All @@ -181,7 +177,8 @@ function runMCMC(mme::MME,df;
methods = "conventional (no markers)",
Pi = 0.0,
estimatePi = false,
estimate_scale = false,
estimate_scale = false, #this has been moved to get_genotypes()
estimate_variance = true, #this has been moved to build_MME() and set_random()
categorical_trait = false, #this has been moved to build_model()
censored_trait = false) #this has been moved to build_model()

Expand Down Expand Up @@ -230,7 +227,7 @@ function runMCMC(mme::MME,df;
Mi.name = "geno"
Mi.π = Pi
Mi.estimatePi = estimatePi
Mi.estimate_scale = estimate_scale
Mi.G.estimate_scale = estimate_scale
Mi.method = methods
end
end
Expand All @@ -239,6 +236,13 @@ function runMCMC(mme::MME,df;
print_single_categorical_censored_trait_example()
error("The arguments 'categorical_trait' and 'censored_trait' has been moved to build_model(). Please check our latest example.")
end
if estimate_scale != false #user set estimate_variance=true in runMCMC()
error("The argument 'estimate_scale' for marker effect variance has been moved to get_genotypes().")
end
if estimate_variance != true #user set estimate_variance=false in runMCMC()
error("The argument 'estimate_variance' for non-marker variance components has been moved to build_MME() for residual variance,
and set_random() for random terms.")
end
############################################################################
# censored traits
############################################################################
Expand Down Expand Up @@ -277,7 +281,7 @@ function runMCMC(mme::MME,df;
mme.MCMCinfo = MCMCinfo(heterogeneous_residuals,
chain_length,burnin,output_samples_frequency,
printout_model_info,printout_frequency, single_step_analysis,
fitting_J_vector,missing_phenotypes,estimate_variance,
fitting_J_vector,missing_phenotypes,
update_priors_frequency,outputEBV,output_heritability,prediction_equation,
seed,double_precision,output_folder,RRM)
############################################################################
Expand Down Expand Up @@ -575,7 +579,7 @@ function getMCMCinfo(mme)
end
@printf("%-30s %20s\n","estimatePi",Mi.estimatePi ? "true" : "false")
end
@printf("%-30s %20s\n","estimate_scale",Mi.estimate_scale ? "true" : "false")
@printf("%-30s %20s\n","estimate_scale",Mi.G.estimate_scale ? "true" : "false")
end
end
end
Expand Down
28 changes: 16 additions & 12 deletions src/1.JWAS/src/MCMC/MCMC_BayesianAlphabet.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ function MCMC_BayesianAlphabet(mme,df)
burnin = mme.MCMCinfo.burnin
output_samples_frequency = mme.MCMCinfo.output_samples_frequency
output_folder = mme.MCMCinfo.output_folder
estimate_variance = mme.MCMCinfo.estimate_variance
invweights = mme.invweights
update_priors_frequency = mme.MCMCinfo.update_priors_frequency
has_categorical_trait = "categorical" mme.traits_type
Expand Down Expand Up @@ -267,7 +266,7 @@ function MCMC_BayesianAlphabet(mme,df)
########################################################################
# Variance of Marker Effects
########################################################################
if Mi.estimate_variance == true #methd specific estimate_variance
if Mi.G.estimate_variance == true #methd specific estimate_variance
sample_marker_effect_variance(Mi)
if mme.MCMCinfo.double_precision == false && Mi.method != "BayesB"
Mi.G.val = Float32.(Mi.G.val)
Expand All @@ -276,7 +275,7 @@ function MCMC_BayesianAlphabet(mme,df)
########################################################################
# Scale Parameter in Priors for Marker Effect Variances
########################################################################
if Mi.estimate_scale == true
if Mi.G.estimate_scale == true
if !is_multi_trait
a = size(Mi.G.val,1)*Mi.G.df/2 + 1
b = sum(Mi.G.df ./ (2*Mi.G.val)) + 1
Expand All @@ -288,15 +287,20 @@ function MCMC_BayesianAlphabet(mme,df)
########################################################################
# 3. Non-marker Variance Components
########################################################################
if estimate_variance == true
########################################################################
# 3.1 Variance of Non-marker Random Effects
# e.g, i.i.d; polygenic effects (pedigree)
########################################################################
sampleVCs(mme,mme.sol)
########################################################################
# 3.2 Residual Variance
########################################################################

########################################################################
# 3.1 Variance of Non-marker Random Effects
# e.g, i.i.d; polygenic effects (pedigree)
########################################################################
if length(mme.rndTrmVec)>0
if mme.rndTrmVec[1].Gi.estimate_variance == true
sampleVCs(mme,mme.sol)
end
end
########################################################################
# 3.2 Residual Variance
########################################################################
if mme.R.estimate_variance == true
if is_multi_trait
mme.R.val = sample_variance(wArray, length(mme.obsID),
mme.R.df, mme.R.scale,
Expand Down
4 changes: 2 additions & 2 deletions src/1.JWAS/src/RRM/MCMC_BayesianAlphabet_RRM.jl
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ function MCMC_BayesianAlphabet_RRM(mme,df;
########################################################################
if mme.M != 0
for Mi in mme.M
if Mi.estimate_scale == true
if Mi.G.estimate_scale == true
a = size(Mi.G.val,1)*Mi.G.df/2 + 1
b = sum(Mi.G.df ./ (2*Mi.G.val )) + 1
Mi.G.scale = rand(Gamma(a,1/b))
Expand Down Expand Up @@ -227,7 +227,7 @@ function MCMC_BayesianAlphabet_RRM(mme,df;
Mi.meanVara2 += (Mi.G.val.^2 - Mi.meanVara2)/nsamples
end

if Mi.estimate_scale == true
if Mi.G.estimate_scale == true
Mi.meanScaleVara += (Mi.G.scale - Mi.meanScaleVara)/nsamples
Mi.meanScaleVara2 += (Mi.G.scale .^2 - Mi.meanScaleVara2)/nsamples
end
Expand Down
6 changes: 5 additions & 1 deletion src/1.JWAS/src/build_MME.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,14 @@ function mkDict(a::Vector{T}) where T <: Any
end

"""
build_model(model_equations::AbstractString,R=false; df::AbstractFloat=4.0)
build_model(model_equations::AbstractString,R=false; df::AbstractFloat=4.0, estimate_variance=true)
* Build a model from **model equations** with the residual variance **R**. In Bayesian analysis, **R**
is the mean for the prior assigned for the residual variance with degree of freedom **df**, defaulting
to 4.0. If **R** is not provided, a value is calculated from responses (phenotypes).
* By default, all variabels in model_equations are factors (categorical) and fixed. Set variables
to be covariates (continuous) or random using functions `set_covariate()` or `set_random()`.
* The argument `estimate_variance` indicates whether to estimate the residual variance; `estimate_variance=true` is the default.
```julia
#single-trait
Expand Down Expand Up @@ -56,6 +57,9 @@ function build_model(model_equations::AbstractString,
error("Model equations are wrong.\n
To find an example, type ?build_model and press enter.\n")
end
if estimate_scale != false
error("estimate scale for residual variance is not supported now.")
end

############################################################################
# Bayesian Neural Network
Expand Down
7 changes: 7 additions & 0 deletions src/1.JWAS/src/input_data_validation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,13 @@ function errors_args(mme)
if mme.causal_structure != false && mme.nModels == 1
error("Causal strutures are only allowed in multi-trait analysis")
end
if mme.M!=0 && mme.nModels>1 #multi-trait with genotypes
for Mi in mme.M
if Mi.G.estimate_scale==true
error("estimate_scale=true is only supported for single trait now.")
end
end
end
end

function check_outputID(mme)
Expand Down
5 changes: 1 addition & 4 deletions src/1.JWAS/src/markers/readgenotypes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ end
get_genotypes(file::Union{AbstractString,Array{Float64,2},Array{Float32,2},Array{Any,2},DataFrames.DataFrame},G=false;
separator=',',header=true,rowID=false,
center=true,quality_control=false,
method = "RR-BLUP",Pi = 0.0,estimatePi = true,estimate_scale=false,
method = "RR-BLUP",Pi = 0.0,estimatePi = true,estimate_scale=false,estimate_variance=true,
G_is_marker_variance = false,df = 4.0)
* Get marker informtion from a genotype file/matrix. This file needs to be column-wise sorted by marker positions.
* If a text file is provided, the file format should be:
Expand Down Expand Up @@ -218,9 +218,6 @@ function get_genotypes(file::Union{AbstractString,Array{Float64,2},Array{Float32
genotypes.method = method
genotypes.estimatePi = estimatePi
genotypes.π = Pi
# genotypes.df = df #It will be modified base on number of traits in build_model()
# genotypes.estimate_scale = estimate_scale
# genotypes.estimate_variance = estimate_variance

writedlm("IDs_for_individuals_with_genotypes.txt",genotypes.obsID)
println("Genotype informatin:")
Expand Down
4 changes: 2 additions & 2 deletions src/1.JWAS/src/output.jl
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ function output_result(mme,output_folder,
if Mi.estimatePi == true
output["pi_"*Mi.name] = dict2dataframe(Mi.mean_pi,Mi.mean_pi2)
end
if Mi.estimate_scale == true
if Mi.G.estimate_scale == true
output["ScaleEffectVar"*Mi.name] = matrix2dataframe(string.(mme.lhsVec),Mi.meanScaleVara,Mi.meanScaleVara2)
end
end
Expand Down Expand Up @@ -587,7 +587,7 @@ function output_posterior_mean_variance(mme,nsamples)
Mi.meanVara += (Mi.G.val - Mi.meanVara)/nsamples
Mi.meanVara2 += (Mi.G.val .^2 - Mi.meanVara2)/nsamples
end
if Mi.estimate_scale == true
if Mi.G.estimate_scale == true
Mi.meanScaleVara += (Mi.G.scale - Mi.meanScaleVara)/nsamples
Mi.meanScaleVara2 += (Mi.G.scale .^2 - Mi.meanScaleVara2)/nsamples
end
Expand Down
5 changes: 4 additions & 1 deletion src/1.JWAS/src/random_effects.jl
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,10 @@ function set_random(mme::MME,randomStr::AbstractString,G=false;Vinv=0,names=[],d
df = Float32(df)

if constraint != false #check constraint
error("Constraint for set_random is not supported now.")
error("Constraint for variance of random term is not supported now.")
end
if estimate_scale != false
error("Estimate scale for variance of random term is not supported now.")
end

############################################################################
Expand Down
8 changes: 4 additions & 4 deletions src/1.JWAS/src/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ mutable struct Genotypes

method #prior for marker effects (Bayesian ALphabet, GBLUP ...)
estimatePi
estimate_variance
estimate_scale
# estimate_variance
# estimate_scale

mArray #a collection of matrices used in Bayesian Alphabet
mRinvArray #a collection of matrices used in Bayesian Alphabet
Expand Down Expand Up @@ -149,7 +149,7 @@ mutable struct Genotypes
Genotypes(a1,a2,a3,a4,a5,a6,a7,a8,a9)=new(false,false,
a1,a2,a3,a4,a5,a6,a7,a8,a4,false,
Variance(false,false,false,true,false,false),Variance(false,false,false,true,false,false), #false,false,
false,true,true,false,
false,true, #true,false,
false,false,false,false,false,false,
false,false,false,false,
false,false,false,false,false,false,false,false,false,
Expand All @@ -175,7 +175,7 @@ mutable struct MCMCinfo
missing_phenotypes
# constraint
# mega_trait
estimate_variance
# estimate_variance
update_priors_frequency
outputEBV
output_heritability
Expand Down
8 changes: 4 additions & 4 deletions test/Unitest.jl
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ for single_step in [false, true]


if single_step == false # genomic models or PBLUP
out1 = runMCMC(model1, phenotypes, estimate_variance=true, heterogeneous_residuals=false,
out1 = runMCMC(model1, phenotypes, heterogeneous_residuals=false, #estimate all variances==true by default
double_precision=true,
chain_length=1000, output_samples_frequency=10,
printout_frequency=100, seed=314)
Expand All @@ -75,7 +75,7 @@ for single_step in [false, true]
PA_dict[newdir] = accuracy
elseif single_step == true && test_method != "PBLUP" && test_method != "GBLUP"
# genomic model, no PBLUP/GBLUP
out1 = runMCMC(model1, phenotypes, estimate_variance=true, heterogeneous_residuals=false,
out1 = runMCMC(model1, phenotypes, heterogeneous_residuals=false, #estimate all variances==true by default
chain_length=1000, output_samples_frequency=10, printout_frequency=100,
single_step_analysis=true, pedigree=pedigree, seed=314)
results = innerjoin(out1["EBV_t1"], phenotypes, on=:ID)
Expand Down Expand Up @@ -134,14 +134,14 @@ for single_step in [false, true]


if single_step == false
out2 = runMCMC(model2, phenotypes, estimate_variance=true, heterogeneous_residuals=false, double_precision=true,
out2 = runMCMC(model2, phenotypes, heterogeneous_residuals=false, double_precision=true, #estimate all variances==true by default
chain_length=1000, output_samples_frequency=10, printout_frequency=100, seed=314)
results = innerjoin(out2["EBV_t1"], phenotypes, on=:ID)
ind_id = findall(x -> !ismissing(x), results[!, :t1])
accuracy = cor(results[ind_id, :EBV], results[ind_id, :t1])
PA_dict[newdir] = accuracy
elseif single_step == true && test_method != "PBLUP" && test_method != "GBLUP"
out2 = runMCMC(model2, phenotypes, estimate_variance=true, heterogeneous_residuals=false, double_precision=true,
out2 = runMCMC(model2, phenotypes, heterogeneous_residuals=false, double_precision=true, #estimate all variances==true by default
chain_length=1000, output_samples_frequency=10, printout_frequency=100,
single_step_analysis=true, pedigree=pedigree, seed=314)
results = innerjoin(out2["EBV_t1"], phenotypes, on=:ID)
Expand Down
8 changes: 4 additions & 4 deletions test/test_BayesianAlphabet.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,12 @@ for single_step in [false,true]
outputMCMCsamples(model1,"x2")

if single_step == false
out1=runMCMC(model1,phenotypes,estimate_variance=true,heterogeneous_residuals=false,
out1=runMCMC(model1,phenotypes,heterogeneous_residuals=false, #estimate all variances==true by default
double_precision=true,
chain_length=100,output_samples_frequency=10,
printout_frequency=50,seed=314);
elseif single_step == true && test_method!="conventional (no markers)" && test_method!="GBLUP"
out1=runMCMC(model1,phenotypes_ssbr,estimate_variance=true,heterogeneous_residuals=false,
out1=runMCMC(model1,phenotypes_ssbr,heterogeneous_residuals=false, #estimate all variances==true by default
chain_length=100,output_samples_frequency=10,printout_frequency=50,
single_step_analysis=true,pedigree=pedigree,seed=314);
end
Expand Down Expand Up @@ -110,10 +110,10 @@ for single_step in [false,true]
outputMCMCsamples(model2,"x2")

if single_step == false
out2=runMCMC(model2,phenotypes,estimate_variance=true,heterogeneous_residuals=false,double_precision=true,
out2=runMCMC(model2,phenotypes,heterogeneous_residuals=false,double_precision=true, #estimate all variances==true by default
chain_length=100,output_samples_frequency=10,printout_frequency=50,seed=314);
elseif single_step == true && test_method!="conventional (no markers)" && test_method!="GBLUP"
out2=runMCMC(model2,phenotypes_ssbr,estimate_variance=true,heterogeneous_residuals=false,double_precision=true,
out2=runMCMC(model2,phenotypes_ssbr,heterogeneous_residuals=false,double_precision=true, #estimate all variances==true by default
chain_length=100,output_samples_frequency=10,printout_frequency=50,
single_step_analysis=true,pedigree=pedigree,seed=314);
end
Expand Down
Loading

0 comments on commit 9281a16

Please sign in to comment.