-
Notifications
You must be signed in to change notification settings - Fork 0
/
khamix.sh
executable file
·109 lines (95 loc) · 4.82 KB
/
khamix.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/bin/sh
#=======================================================================#
# kernelized haplotype-based mixed model association mapping (KHAMIX) #
#=======================================================================#
# source modules for R
# source module_load_r.sh
#--------------------------------------------------------------------------#
#1. setting haplotype size, in number of markers, and trait to be analyzed #
#--------------------------------------------------------------------------#
trait_name="LLGTH"
nb_snp_hap=6
nb_chromosomes=17 # total number of chromosomes for the analyzed data set
kernel_index=1 # 1 for VanRaden linear kernel and 2 for Gaussian kernel (i.e. RBF)
signif_level=0.01 # significance level for the restricted likelihood ratio test (RLRT)
local_or_cluster=2 # 1 for local computation and 2 for parallelize computation on a cluster
echo "$trait_name" > trait_name.txt
echo "$nb_snp_hap" > nb_snp_hap.txt
echo "$nb_chromosomes" > nb_chromosomes.txt
echo "$kernel_index" > kernel_index.txt
echo "$signif_level" > signif_level.txt
mv trait_name.txt data_parameters/
mv nb_snp_hap.txt data_parameters/
mv nb_chromosomes.txt data_parameters/
mv kernel_index.txt data_parameters/
mv signif_level.txt data_parameters/
#------------------------------------------------#
#2. compute estimates under null hypothesis (h0) #
#------------------------------------------------#
mkdir estimates_h0/
cp data_parameters/trait_name.txt estimates_h0/
cp data_parameters/phenotypes.txt estimates_h0/
if [ -f data_parameters/incidence_fixed_effects.txt ] ; then
cp data_parameters/incidence_fixed_effects.txt estimates_h0/
fi
if [ -f data_parameters/incidence_polygenic_effects.txt ] ; then
cp data_parameters/incidence_polygenic_effects.txt estimates_h0/
fi
if [ -f data_parameters/k_matrix.txt ] ; then
cp data_parameters/k_matrix.txt estimates_h0/
fi
cp data_parameters/kernel_index.txt estimates_h0/
cp data_parameters/genotypes.txt estimates_h0/
cp programs/get_data_trait_name.R estimates_h0/
cp programs/get_data_incidence.R estimates_h0/
cp programs/compute_k_matrix.R estimates_h0/
cp programs/compute_estimates_h0.R estimates_h0/
cd estimates_h0/
R -q --vanilla<get_data_trait_name.R
R -q --vanilla<get_data_incidence.R
R -q --vanilla<compute_k_matrix.R
R -q --vanilla<compute_estimates_h0.R
cd ../
#-------------------------------------------------------------------------#
#3. genome scan, for each chromosome, associated with the analyzed trait #
#-------------------------------------------------------------------------#
for chromo_num_k in $(seq 1 1 $nb_chromosomes)
do
echo "$chromo_num_k">chromo_num_k.txt
#-------------------------------------------------------------------------#
#3.1 creation of the directory for chromosome chromo_num_k to be analyzed #
#-------------------------------------------------------------------------#
mkdir genome_scan_chromo_num_$chromo_num_k
# copy data into the directory for chromosome chromo_num_k
mv chromo_num_k.txt genome_scan_chromo_num_$chromo_num_k
cp data_parameters/nb_snp_hap.txt genome_scan_chromo_num_$chromo_num_k
cp data_parameters/nb_chromosomes.txt genome_scan_chromo_num_$chromo_num_k
cp data_parameters/genotypes.txt genome_scan_chromo_num_$chromo_num_k
cp data_parameters/phased_genotypes.txt genome_scan_chromo_num_$chromo_num_k
cp data_parameters/physical_map.txt genome_scan_chromo_num_$chromo_num_k
# copy data and estimates for model under null hypothesis (h0) into directory for chromosome chromo_num_k
cp estimates_h0/phenotypes_trait_name.txt genome_scan_chromo_num_$chromo_num_k
cp estimates_h0/x_matrix.txt genome_scan_chromo_num_$chromo_num_k
cp estimates_h0/z_u_matrix.txt genome_scan_chromo_num_$chromo_num_k
cp estimates_h0/k_matrix.txt genome_scan_chromo_num_$chromo_num_k
cp estimates_h0/emmreml_h0 genome_scan_chromo_num_$chromo_num_k
# copy programs into the directory for chromosome chromo_num_k
cp programs/get_data_chromo_num_k.R genome_scan_chromo_num_$chromo_num_k
cp programs/ibs_haplotypes_window genome_scan_chromo_num_$chromo_num_k
cp programs/compute_z_h_matrix genome_scan_chromo_num_$chromo_num_k
cp programs/emmreml_rlrt.R genome_scan_chromo_num_$chromo_num_k
cp programs/scan_chromo_num_k.sh genome_scan_chromo_num_$chromo_num_k
# extract data associated to trait, fixed effects and compute Gram matrix (i.e. kernel matrix)
cd genome_scan_chromo_num_$chromo_num_k
# perform genome scan by sliding window for chromosome $chromo_num_k
if [ "$local_or_cluster" -gt 1 ] ; then
# qsub -q normal.q -v scan_chromo_num_k.sh # a cc2 queue
sbatch scan_chromo_num_k.sh
# qsub -q workq scan_chromo_num_k.sh
else
./scan_chromo_num_k.sh
fi
# moving to the next directory "genome_scan_chromo_num_$chromo_num_k",
# i.e. moving up the directory tree
cd ../
done