-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpreparing_gvcfs.sh
executable file
·68 lines (54 loc) · 2.12 KB
/
preparing_gvcfs.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/bin/bash
projectDir=$1
##create a directory called "gvcfs" where we gather all gvcfs in one place by creating hard link
echo ${projectDir}/
mkdir ${projectDir}/gvcfs/
for f in ${projectDir}/cromwell-executions/gatk_variant_calling/*/call-haplotypeCaller/shard-*/execution/*.g.vcf; do
foldername="$f";
file=${foldername##*/}
parent=${foldername#*"${foldername%/*/"$file"}"/}
shard=${foldername#*"${foldername%/*/"$parent"}"/}
folder=$(echo "$shard" | cut -f 1 -d'/');
filename=$(basename $f);
filename1=$(echo "$filename" | cut -f 1 -d '.');
echo $filename1;
mkdir "$projectDir/gvcfs/$folder/";
ln -P $f "$projectDir/gvcfs/$folder/";
done
##after gathering gvcfs, we do rename them according to their location
##then we remove the "gvcfs" directory created above
mkdir ${projectDir}/allgvcfs/
for gf in ${projectDir}/gvcfs/shard-*/*.g.vcf; do
parent_name=$gf;
echo ${parent_name};
file=${parent_name##*/}
parent_dir=${parent_name#*"${parent_name%/*/"$file"}"/}
shard=${foldername#*"${foldername%/*/"$parent"}"/}
echo $shard;
parent=$(echo "$parent_dir" | cut -f 1 -d'/');
file_name=$(basename $gf);
filename2=$(echo "$file_name" | cut -f 1 -d '.');
name=${parent}_${filename2};
mv $gf "$projectDir/allgvcfs/$name.g.vcf";
done
rm -rf ${projectDir}/gvcfs
#create a list of gvcfs full paths
for f in ${projectDir}/allgvcfs/*.g.vcf ; do
echo "$f" >> ${projectDir}/lists/gvcfs.txt;
done
#get samples names
cut -f 1 ${projectDir}/lists/fastq_list.txt > ${projectDir}/lists/samples_names.txt
#from the list created above, we just split each sample in a text file
while IFS= read -r line; do
line+=".g.vcf"
grep -F "${line}" ${projectDir}/lists/gvcfs.txt >> ${projectDir}/lists/$line.list;
done < ${projectDir}/lists/samples_names.txt
for f in ${projectDir}/lists/*.g.vcf.list; do
bn=$(basename $f | cut -f 1 -d"."); mv $f ${projectDir}/lists/$bn.list;
done
#creating list of lists
for f in ${projectDir}/lists/*.list ; do
echo $f >> ${projectDir}/lists/gvcfs_samples_lists.list
done
rm -f ${projectDir}/lists/gvcfs.txt
echo "GVCFs are ready"