-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbsv-R1-S20_genomicsdbimport_missing_make_input.sh
executable file
·74 lines (64 loc) · 2 KB
/
bsv-R1-S20_genomicsdbimport_missing_make_input.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#! /bin/bash
#########################################################
#
# Platform: NCI Gadi HPC
# Description: Check logs from genomicsdbimport - write failed tasks or tarball logs if all passed
# Author: Tracy Chew and Cali Willet
# tracy.chew@sydney.edu.au;cali.willet@sydney.edu.au
# Date last modified: 14/10/2020
#
# If you use this script towards a publication, please acknowledge the
# Sydney Informatics Hub (or co-authorship, where appropriate).
#
# Suggested acknowledgement:
# The authors acknowledge the scientific and technical assistance
# <or e.g. bioinformatics assistance of <PERSON> of Sydney Informatics
# Hub and resources and services from the National Computational
# Infrastructure (NCI), which is supported by the Australian Government
# with access facilitated by the University of Sydney.
#
#########################################################
cohort=<cohort>
round=<round>
logdir=./GATK_logs/GenomicsDBImport_round${round}
errordir=./Error_capture/GenomicsDBImport_round${round}
perlscript=./bsv-R1-S20_genomicsdbimport_checklogs.pl
perlout=${logdir}/${cohort}_interval_duration_memory.txt
input=./Inputs/genomicsdbimport_missing.inputs
rm -rf $input
# Run perl script to get duration
# hashed out the exist check as I want the existing updated with the tasks completed from missing run parallel
if [ ! -f $perlout ]
then
`perl $perlscript $logdir $cohort`
wait
fi
# Check output file
{
read # skip header
while read -r interval duration memory
do
err=${errordir}/${interval}.err
if [[ $duration =~ NA || $memory =~ NA ]]
then
redo+=("$interval")
elif [ -f $err ]
then
redo+=("$interval")
fi
done
} < $perlout
# Report errors
if [[ ${#redo[@]}>1 ]]
then
echo "There are ${#redo[@]} intervals that need to be re-run."
echo "Writing inputs to ${input}"
for redo_interval in ${redo[@]}
do
echo ${redo_interval} >> ${input}
done
else
echo "There are no intervals that need to be re-run. Tidying up..."
cd ${logdir}
tar --remove-files -czvf genomicsdbimport_logs.tar.gz *.oe
fi