Skip to content

Commit

Permalink
Merge pull request #11 from TRON-Bioinformatics/create-indices
Browse files Browse the repository at this point in the history
Create indices
  • Loading branch information
priesgo authored Oct 19, 2022
2 parents 5faa1bd + 0c151ff commit 266c18b
Show file tree
Hide file tree
Showing 8 changed files with 66,753 additions and 10 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ report.html*
timeline.html*
trace.txt*
dag.dot*
*.swp
*.swp
/test_data/ucsc.hg19.minimal.without_indices.dict
/test_data/ucsc.hg19.minimal.without_indices.fasta.fai
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ test:
bash tests/test_08.sh
bash tests/test_09.sh
bash tests/test_10.sh
bash tests/test_11.sh
37 changes: 32 additions & 5 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ include { MARK_DUPLICATES; SPLIT_CIGAR_N_READS } from './modules/02_mark_duplica
include { METRICS; HS_METRICS; COVERAGE_ANALYSIS; FLAGSTAT } from './modules/03_metrics'
include { REALIGNMENT_AROUND_INDELS } from './modules/04_realignment_around_indels'
include { BQSR; CREATE_OUTPUT } from './modules/05_bqsr'
include { CREATE_FAIDX; CREATE_DICT } from './modules/00_reference_indices'

params.help= false
params.input_files = false
Expand Down Expand Up @@ -82,10 +83,36 @@ else if (params.input_files) {
.set { input_files }
}

workflow CHECK_REFERENCE {
take:
reference

emit:
checked_reference = reference

main:
// checks the reference and its indexes, if the indexes are not there creates them
reference_file = file(reference)
if (reference_file.isEmpty()) {
log.error "--reference points to a non existing file"
exit 1
}
faidx = file("${reference}.fai")
if (faidx.isEmpty()) {
CREATE_FAIDX(reference)
}
dict = file("${reference_file.getParent() }/${reference_file.baseName }*.dict")
if (dict.isEmpty()) {
CREATE_DICT(reference)
}
}


workflow {

PREPARE_BAM(input_files, params.reference)
CHECK_REFERENCE(params.reference)

PREPARE_BAM(input_files, CHECK_REFERENCE.out.checked_reference)

if (!params.skip_deduplication) {
MARK_DUPLICATES(PREPARE_BAM.out.prepared_bams)
Expand All @@ -97,29 +124,29 @@ workflow {
}

if (params.split_cigarn) {
SPLIT_CIGAR_N_READS(deduplicated_bams, params.reference)
SPLIT_CIGAR_N_READS(deduplicated_bams, CHECK_REFERENCE.out.checked_reference)
deduplicated_bams = SPLIT_CIGAR_N_READS.out.split_cigarn_bams
}

if (! params.skip_metrics) {
if (params.intervals) {
HS_METRICS(deduplicated_bams)
}
METRICS(deduplicated_bams, params.reference)
METRICS(deduplicated_bams, CHECK_REFERENCE.out.checked_reference)
COVERAGE_ANALYSIS(deduplicated_bams)
FLAGSTAT(deduplicated_bams)
}

if (!params.skip_realignment) {
REALIGNMENT_AROUND_INDELS(deduplicated_bams, params.reference)
REALIGNMENT_AROUND_INDELS(deduplicated_bams, CHECK_REFERENCE.out.checked_reference)
realigned_bams = REALIGNMENT_AROUND_INDELS.out.realigned_bams
}
else {
realigned_bams = deduplicated_bams
}

if (!params.skip_bqsr) {
BQSR(realigned_bams, params.reference)
BQSR(realigned_bams, CHECK_REFERENCE.out.checked_reference)
preprocessed_bams = BQSR.out.recalibrated_bams
}
else {
Expand Down
30 changes: 30 additions & 0 deletions modules/00_reference_indices.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@

process CREATE_FAIDX {
cpus "1"
memory "4g"
tag "${name}"

conda (params.enable_conda ? "bioconda::samtools=1.12" : null)

input:
val(reference)

"""
samtools faidx ${reference}
"""
}

process CREATE_DICT {
cpus "1"
memory "4g"
tag "${name}"

conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null)

input:
val(reference)

"""
gatk CreateSequenceDictionary --REFERENCE ${reference}
"""
}
8 changes: 5 additions & 3 deletions modules/02_mark_duplicates.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
params.mark_duplicates_cpus = 2
params.mark_duplicates_memory = "16g"
params.split_reads_cpus = 2
params.split_reads_memory = "4g"
params.remove_duplicates = true
params.output = 'output'

Expand Down Expand Up @@ -50,8 +52,8 @@ process MARK_DUPLICATES {
}

process SPLIT_CIGAR_N_READS {
cpus "${params.prepare_bam_cpus}"
memory "${params.prepare_bam_memory}"
cpus "${params.split_reads_cpus}"
memory "${params.split_reads_memory}"
tag "${name}"
publishDir "${params.output}/${name}/", mode: "copy", pattern: "software_versions.*"

Expand All @@ -70,7 +72,7 @@ process SPLIT_CIGAR_N_READS {
mkdir tmp
gatk SplitNCigarReads \
--java-options '-Xmx${params.prepare_bam_memory} -Djava.io.tmpdir=./tmp' \
--java-options '-Xmx${params.split_reads_memory} -Djava.io.tmpdir=./tmp' \
--input ${bam} \
--output ${name}.split_cigarn.bam \
--create-output-bam-index true \
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ process.shell = ['/bin/bash', '-euo', 'pipefail']

cleanup = true

VERSION = '2.0.1'
VERSION = '2.1.0'
DOI = 'https://zenodo.org/badge/latestdoi/358400957'

manifest {
Expand Down
Loading

0 comments on commit 266c18b

Please sign in to comment.