From 6fd62067d6c4ecee186e24a49373e0df25c04140 Mon Sep 17 00:00:00 2001 From: priesgof Date: Fri, 23 Apr 2021 07:06:52 +0200 Subject: [PATCH 1/7] upgrade version to 1.1.0 --- environment.yml | 2 +- nextflow.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index 4cfb454..1d7a1eb 100644 --- a/environment.yml +++ b/environment.yml @@ -1,6 +1,6 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml -name: tronflow-bam-preprocessing-1.0.1 +name: tronflow-bam-preprocessing-1.1.0 channels: - conda-forge - bioconda diff --git a/nextflow.config b/nextflow.config index 99f3241..7392cf1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -59,5 +59,5 @@ manifest { description = 'Picard and GATK BAM preprocessing pipeline' mainScript = 'main.nf' nextflowVersion = '>=19.10.0' - version = '1.0.1' + version = '1.1.0' } From 128cbf26eef9b54e99c0c0afca7c3a20d2a92ae3 Mon Sep 17 00:00:00 2001 From: priesgof Date: Fri, 23 Apr 2021 11:39:43 +0200 Subject: [PATCH 2/7] add collect metrics to the workflow --- main.nf | 50 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/main.nf b/main.nf index 2185f3c..9986869 100755 --- a/main.nf +++ b/main.nf @@ -10,6 +10,7 @@ params.known_indels2 = "/projects/data/gatk_bundle/hg19/Mills_and_1000G_gold_sta params.skip_bqsr = false params.skip_realignment = false params.skip_deduplication = false +params.skip_metrics = false params.output = false params.platform = "ILLUMINA" @@ -47,6 +48,7 @@ Optional input: * skip_bqsr: optionally skip BQSR * skip_realignment: optionally skip realignment * skip_deduplication: optionally skip deduplication + * skip_metrics: optionally skip the calculation of metrics from the BAM * output: the folder where to publish output * platform: the platform to be added to the BAM header. Valid values: [ILLUMINA, SOLID, LS454, HELICOS and PACBIO] (default: ILLUMINA) * prepare_bam_cpus: default 3 @@ -103,8 +105,10 @@ process prepareBam { set name, type, file(bam) from input_files output: - set val(name), val("${bam.baseName}"), val(type), - file("${bam.baseName}.prepared.bam"), file("${bam.baseName}.prepared.bai") into prepared_bams + set val(name), + val("${bam.baseName}"), + val(type), file("${bam.baseName}.prepared.bam"), + file("${bam.baseName}.prepared.bai") into prepared_bams, prepared_bams_for_metrics """ mkdir tmp @@ -145,7 +149,7 @@ if (!params.skip_deduplication) { cpus "${params.mark_duplicates_cpus}" memory "${params.mark_duplicates_memory}" tag "${name}" - publishDir "${publish_dir}/${name}", mode: "copy", pattern: "*.dedup_metrics.txt" + publishDir "${publish_dir}/${name}/metrics", mode: "copy", pattern: "*.dedup_metrics" input: set name, bam_name, type, file(bam), file(bai) from prepared_bams @@ -153,7 +157,7 @@ if (!params.skip_deduplication) { output: set val(name), val(bam_name), val(type), file("${bam.baseName}.dedup.bam"), file("${bam.baseName}.dedup.bam.bai") into deduplicated_bams - file("${bam.baseName}.dedup_metrics.txt") into deduplication_metrics + file("${bam.baseName}.dedup_metrics") into deduplication_metrics """ mkdir tmp @@ -163,7 +167,7 @@ if (!params.skip_deduplication) { --input ${bam} \ --output ${bam.baseName}.dedup.bam \ --conf 'spark.executor.cores=${task.cpus}' \ - --metrics-file ${bam.baseName}.dedup_metrics.txt + --metrics-file ${bam.baseName}.dedup_metrics rm -rf tmp """ @@ -173,6 +177,42 @@ else { deduplicated_bams = prepared_bams } +if (! params.skip_metrics) { + process metrics { + cpus 1 + memory "2g" + tag "${name}" + publishDir "${publish_dir}/${name}/metrics", mode: "copy" + + input: + set name, bam_name, type, file(bam), file(bai) from prepared_bams_for_metrics + + output: + file("*_metrics") optional true into txt_metrics + file("*.pdf") optional true into pdf_metrics + + """ + mkdir tmp + + gatk CollectMultipleMetrics \ + --java-options '-Xmx2g -Djava.io.tmpdir=tmp' \ + --INPUT ${bam} \ + --OUTPUT ${bam.baseName} \ + --REFERENCE_SEQUENCE ${params.reference} \ + --PROGRAM QualityScoreDistribution \ + --PROGRAM MeanQualityByCycle \ + --PROGRAM CollectAlignmentSummaryMetrics \ + --PROGRAM CollectBaseDistributionByCycle \ + --PROGRAM CollectGcBiasMetrics \ + --PROGRAM CollectInsertSizeMetrics \ + --PROGRAM CollectSequencingArtifactMetrics \ + --PROGRAM CollectSequencingArtifactMetrics + + rm -rf tmp + """ + } +} + if (!params.skip_realignment) { process realignmentAroundindels { cpus "${params.realignment_around_indels_cpus}" From df5dddd1cfe7e048578ed95161f990c888d2564b Mon Sep 17 00:00:00 2001 From: priesgof Date: Fri, 23 Apr 2021 12:14:59 +0200 Subject: [PATCH 3/7] enforce the cleanup of work folder as it becomes unmanageable --- nextflow.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nextflow.config b/nextflow.config index 7392cf1..fbea7e0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -35,6 +35,8 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +cleanup = true + timeline { enabled = true //file = "${params.output}/execution_timeline.html" From e1db1e8c0969ec9d464710467dac8d348c478559 Mon Sep 17 00:00:00 2001 From: priesgof Date: Fri, 23 Apr 2021 12:20:05 +0200 Subject: [PATCH 4/7] update documentation --- Makefile | 9 +++++---- README.md | 10 ++++++---- main.nf | 2 +- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 53ecb0d..93e9b80 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,8 @@ clean: rm -rf .nextflow* test: - nextflow main.nf -profile test,conda --output output/test1 - nextflow main.nf -profile test,conda --skip_bqsr --output output/test2 - nextflow main.nf -profile test,conda --skip_realignment --output output/test3 - nextflow main.nf -profile test,conda --skip_deduplication --output output/test4 + #nextflow main.nf -profile test,conda --output output/test1 + #nextflow main.nf -profile test,conda --skip_bqsr --output output/test2 + #nextflow main.nf -profile test,conda --skip_realignment --output output/test3 + #nextflow main.nf -profile test,conda --skip_deduplication --output output/test4 + nextflow main.nf -profile test,conda --output output/test5 --skip_metrics diff --git a/README.md b/README.md index 2e476f4..0e0d10e 100644 --- a/README.md +++ b/README.md @@ -23,9 +23,10 @@ Steps: * **Clean BAM**. Sets the mapping quality to 0 for all unmapped reads and avoids soft clipping going beyond the reference genome boundaries. Implemented in Picard * **Reorder chromosomes**. Makes the chromosomes in the BAM follow the same order as the reference genome. Implemented in Picard * **Add read groups**. GATK requires that some headers are adde to the BAM, also we want to flag somehow the normal and tumor BAMs in the header as some callers, such as Mutect2 require it. Implemented in Picard. - * **Mark duplicates** (optional). Identify the PCR and the optical duplications and marks those reads. This uses the parallelized version on Spark, it is reported to scale linearly up to 16 CPUs. - * **Realignment around indels** (optional). This procedure is important for locus based variant callers, but for any variant caller doing haplotype assembly it is not needed. This is computing intensive as it first finds regions for realignment where there are indication of indels and then it performs a local realignment over those regions. Implemented in GATK3, deprecated in GATK4 - * **Base Quality Score Recalibration (BQSR)** (optional). It aims at correcting systematic errors in the sequencer when assigning the base call quality errors, as these scores are used by variant callers it improves variant calling in some situations. Implemented in GATK4 +* **Mark duplicates** (optional). Identify the PCR and the optical duplications and marks those reads. This uses the parallelized version on Spark, it is reported to scale linearly up to 16 CPUs. +* **Realignment around indels** (optional). This procedure is important for locus based variant callers, but for any variant caller doing haplotype assembly it is not needed. This is computing intensive as it first finds regions for realignment where there are indication of indels and then it performs a local realignment over those regions. Implemented in GATK3, deprecated in GATK4 +* **Base Quality Score Recalibration (BQSR)** (optional). It aims at correcting systematic errors in the sequencer when assigning the base call quality errors, as these scores are used by variant callers it improves variant calling in some situations. Implemented in GATK4 +* **Metrics** (optional). A number of metrics are obtained over the BAM file with Picard's CollectMetrics (eg: duplication, insert size, alignment, etc.). ![Pipeline](bam_preprocessing2.png) @@ -56,6 +57,7 @@ Optional input: * skip_bqsr: optionally skip BQSR * skip_realignment: optionally skip realignment * skip_deduplication: optionally skip deduplication + * skip_metrics: optionally skip metrics * output: the folder where to publish output, if not provided they will be moved to "output" folder inside the workflow folder* prepare_bam_cpus: default 3 * platform: the platform to be added to the BAM header. Valid values: [ILLUMINA, SOLID, LS454, HELICOS and PACBIO] (default: ILLUMINA) * prepare_bam_memory: default 8g @@ -73,5 +75,5 @@ Optional input: Optional output: * Recalibration report * Realignment intervals - * Duplication metrics + * Metrics ``` diff --git a/main.nf b/main.nf index 9986869..5560808 100755 --- a/main.nf +++ b/main.nf @@ -67,7 +67,7 @@ Output: Optional output: * Recalibration report * Realignment intervals - * Duplication metrics + * Metrics """ } From 285bdb0bf81d55bac9c2031b8ba64894cafa6bc7 Mon Sep 17 00:00:00 2001 From: priesgof Date: Fri, 23 Apr 2021 12:22:23 +0200 Subject: [PATCH 5/7] commented tests by mistake --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 93e9b80..8149b75 100644 --- a/Makefile +++ b/Makefile @@ -8,8 +8,8 @@ clean: rm -rf .nextflow* test: - #nextflow main.nf -profile test,conda --output output/test1 - #nextflow main.nf -profile test,conda --skip_bqsr --output output/test2 - #nextflow main.nf -profile test,conda --skip_realignment --output output/test3 - #nextflow main.nf -profile test,conda --skip_deduplication --output output/test4 + nextflow main.nf -profile test,conda --output output/test1 + nextflow main.nf -profile test,conda --skip_bqsr --output output/test2 + nextflow main.nf -profile test,conda --skip_realignment --output output/test3 + nextflow main.nf -profile test,conda --skip_deduplication --output output/test4 nextflow main.nf -profile test,conda --output output/test5 --skip_metrics From c9f0dd2574fecfebbe1ac2b20dbe389e0261497a Mon Sep 17 00:00:00 2001 From: priesgof Date: Tue, 4 May 2021 12:14:46 +0200 Subject: [PATCH 6/7] integrate CollectHsMetrics --- Makefile | 1 + README.md | 54 +++++++++++++++--------- main.nf | 107 ++++++++++++++++++++++++++++++------------------ nextflow.config | 1 + 4 files changed, 103 insertions(+), 60 deletions(-) diff --git a/Makefile b/Makefile index 8149b75..e6d97df 100644 --- a/Makefile +++ b/Makefile @@ -13,3 +13,4 @@ test: nextflow main.nf -profile test,conda --skip_realignment --output output/test3 nextflow main.nf -profile test,conda --skip_deduplication --output output/test4 nextflow main.nf -profile test,conda --output output/test5 --skip_metrics + nextflow main.nf -profile test,conda --output output/test6 --intervals false diff --git a/README.md b/README.md index 0e0d10e..2459998 100644 --- a/README.md +++ b/README.md @@ -30,17 +30,25 @@ Steps: ![Pipeline](bam_preprocessing2.png) +## References + +The bam preprocessing workflow use some required references (`--reference`, `--dbsnp`, `--known_indels1` and `--known_indels2`). +These resources can be fetched from the GATK bundle https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle. + +Optionally, in order to run Picard's CollectHsMetrics an intervals file will need to be provided (`--intervals`). +This can be built from a BED file using Picard's BedToIntervalList (https://gatk.broadinstitute.org/hc/en-us/articles/360036883931-BedToIntervalList-Picard-) + ## How to run it ``` -$ nextflow run tron-bioinformatics/tronflow-bam-preprocessing -r v1.0.0 --help +$ nextflow run tron-bioinformatics/tronflow-bam-preprocessing -r v1.1.0 --help N E X T F L O W ~ version 19.07.0 Launching `main.nf` [intergalactic_shannon] - revision: e707c77d7b Usage: main.nf --input_files input_files Input: - * input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file + * --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file Sample type will be added to the BAM header @SN sample name The input file does not have header! Example input file: @@ -49,24 +57,30 @@ Input: name2 tumor tumor.2.bam Optional input: - * reference: path to the FASTA genome reference (indexes expected *.fai, *.dict) - * dbsnp: path to the dbSNP VCF - * known_indels1: path to a VCF of known indels - * known_indels2: path to a second VCF of known indels - * NOTE: if any of the above parameters is not provided, default hg19 resources will be used - * skip_bqsr: optionally skip BQSR - * skip_realignment: optionally skip realignment - * skip_deduplication: optionally skip deduplication - * skip_metrics: optionally skip metrics - * output: the folder where to publish output, if not provided they will be moved to "output" folder inside the workflow folder* prepare_bam_cpus: default 3 - * platform: the platform to be added to the BAM header. Valid values: [ILLUMINA, SOLID, LS454, HELICOS and PACBIO] (default: ILLUMINA) - * prepare_bam_memory: default 8g - * mark_duplicates_cpus: default 16 - * mark_duplicates_memory: default 64g - * realignment_around_indels_cpus: default 2 - * realignment_around_indels_memory: default 32g - * bqsr_cpus: default 3 - * bqsr_memory: default 4g + * --reference: path to the FASTA genome reference (indexes expected *.fai, *.dict) + * --dbsnp: path to the dbSNP VCF + * --known_indels1: path to a VCF of known indels + * --known_indels2: path to a second VCF of known indels + **NOTE**: if any of the above parameters is not provided, default hg19 resources under + /projects/data/gatk_bundle/hg19/ will be used + + * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None) + * --skip_bqsr: optionally skip BQSR (default: false) + * --skip_realignment: optionally skip realignment (default: false) + * --skip_deduplication: optionally skip deduplication (default: false) + * --skip_metrics: optionally skip metrics (default: false) + * --output: the folder where to publish output (default: ./output) + * --platform: the platform to be added to the BAM header. Valid values: [ILLUMINA, SOLID, LS454, HELICOS and PACBIO] (default: ILLUMINA) + +Computational resources: + * --prepare_bam_cpus: (default: 3) + * --prepare_bam_memory: (default: 8g) + * --mark_duplicates_cpus: (default: 16) + * --mark_duplicates_memory: (default: 64g) + * --realignment_around_indels_cpus: (default: 2) + * --realignment_around_indels_memory: (default: 32g) + * --bqsr_cpus: (default: 3) + * --bqsr_memory: (default: 4g) Output: * Preprocessed and indexed BAMs diff --git a/main.nf b/main.nf index 5560808..46c4ea4 100755 --- a/main.nf +++ b/main.nf @@ -7,6 +7,7 @@ params.reference = "/projects/data/gatk_bundle/hg19/ucsc.hg19.fasta" params.dbsnp = "/projects/data/gatk_bundle/hg19/dbsnp_138.hg19.vcf" params.known_indels1 = "/projects/data/gatk_bundle/hg19/1000G_phase1.indels.hg19.sites.vcf" params.known_indels2 = "/projects/data/gatk_bundle/hg19/Mills_and_1000G_gold_standard.indels.hg19.sites.sorted.vcf" +params.intervals = false params.skip_bqsr = false params.skip_realignment = false params.skip_deduplication = false @@ -28,40 +29,45 @@ params.bqsr_memory = "4g" def helpMessage() { log.info""" Usage: - bam_preprocessing.nf --input_files input_files --reference reference.fasta + main.nf --input_files input_files Input: - * input_files: the path to a tab-separated values file containing in each row the sample name, sample type (tumor or normal) and path to the BAM file + * --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file Sample type will be added to the BAM header @SN sample name The input file does not have header! Example input file: - name1 tumor tumor.1.bam - name1 normal normal.1.bam - name2 tumor tumor.2.bam + name1 tumor tumor.1.bam + name1 normal normal.1.bam + name2 tumor tumor.2.bam Optional input: - * reference: path to the FASTA genome reference (indexes expected *.fai, *.dict) - * dbsnp: path to the dbSNP VCF - * known_indels1: path to a VCF of known indels - * known_indels2: path to a second VCF of known indels - * NOTE: if any of the above parameters is not provided, default hg19 resources will be used - * skip_bqsr: optionally skip BQSR - * skip_realignment: optionally skip realignment - * skip_deduplication: optionally skip deduplication - * skip_metrics: optionally skip the calculation of metrics from the BAM - * output: the folder where to publish output - * platform: the platform to be added to the BAM header. Valid values: [ILLUMINA, SOLID, LS454, HELICOS and PACBIO] (default: ILLUMINA) - * prepare_bam_cpus: default 3 - * prepare_bam_memory: default 8g - * mark_duplicates_cpus: default 16 - * mark_duplicates_memory: default 64g - * realignment_around_indels_cpus: default 2 - * realignment_around_indels_memory: default 32g - * bqsr_cpus: default 3 - * bqsr_memory: default 4g - -Output: - * Preprocessed and indexed BAM + * --reference: path to the FASTA genome reference (indexes expected *.fai, *.dict) + * --dbsnp: path to the dbSNP VCF + * --known_indels1: path to a VCF of known indels + * --known_indels2: path to a second VCF of known indels + **NOTE**: if any of the above parameters is not provided, default hg19 resources under + /projects/data/gatk_bundle/hg19/ will be used + + * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None) + * --skip_bqsr: optionally skip BQSR (default: false) + * --skip_realignment: optionally skip realignment (default: false) + * --skip_deduplication: optionally skip deduplication (default: false) + * --skip_metrics: optionally skip metrics (default: false) + * --output: the folder where to publish output (default: ./output) + * --platform: the platform to be added to the BAM header. Valid values: [ILLUMINA, SOLID, LS454, HELICOS and PACBIO] (default: ILLUMINA) + +Computational resources: + * --prepare_bam_cpus: (default: 3) + * --prepare_bam_memory: (default: 8g) + * --mark_duplicates_cpus: (default: 16) + * --mark_duplicates_memory: (default: 64g) + * --realignment_around_indels_cpus: (default: 2) + * --realignment_around_indels_memory: (default: 32g) + * --bqsr_cpus: (default: 3) + * --bqsr_memory: (default: 4g) + + Output: + * Preprocessed and indexed BAMs * Tab-separated values file with the absolute paths to the preprocessed BAMs, preprocessed_bams.txt Optional output: @@ -108,7 +114,7 @@ process prepareBam { set val(name), val("${bam.baseName}"), val(type), file("${bam.baseName}.prepared.bam"), - file("${bam.baseName}.prepared.bai") into prepared_bams, prepared_bams_for_metrics + file("${bam.baseName}.prepared.bai") into prepared_bams, prepared_bams_for_metrics, prepared_bams_for_hs_metrics """ mkdir tmp @@ -135,8 +141,6 @@ process prepareBam { --RGPL ${params.platform} \ --SORT_ORDER coordinate \ --CREATE_INDEX true - - rm -rf tmp """ } @@ -157,8 +161,10 @@ if (!params.skip_deduplication) { output: set val(name), val(bam_name), val(type), file("${bam.baseName}.dedup.bam"), file("${bam.baseName}.dedup.bam.bai") into deduplicated_bams - file("${bam.baseName}.dedup_metrics") into deduplication_metrics + file("${bam.baseName}.dedup_metrics") optional true into deduplication_metrics + script: + dedup_metrics = params.skip_metrics ? "--metrics-file ${bam.baseName}.dedup_metrics" : "" """ mkdir tmp @@ -167,9 +173,7 @@ if (!params.skip_deduplication) { --input ${bam} \ --output ${bam.baseName}.dedup.bam \ --conf 'spark.executor.cores=${task.cpus}' \ - --metrics-file ${bam.baseName}.dedup_metrics - - rm -rf tmp + ${dedup_metrics} """ } } @@ -178,6 +182,35 @@ else { } if (! params.skip_metrics) { + + if (params.intervals) { + + process hsMetrics { + cpus 1 + memory "2g" + tag "${name}" + publishDir "${publish_dir}/${name}/metrics", mode: "copy" + + input: + set name, bam_name, type, file(bam), file(bai) from prepared_bams_for_hs_metrics + + output: + file("*_metrics") optional true into txt_hs_metrics + file("*.pdf") optional true into pdf_hs_metrics + + """ + mkdir tmp + + gatk CollectHsMetrics \ + --java-options '-Xmx2g -Djava.io.tmpdir=tmp' \ + --INPUT ${bam} \ + --OUTPUT ${bam.baseName} \ + --TARGET_INTERVALS ${params.intervals} \ + --BAIT_INTERVALS ${params.intervals} + """ + } + } + process metrics { cpus 1 memory "2g" @@ -207,8 +240,6 @@ if (! params.skip_metrics) { --PROGRAM CollectInsertSizeMetrics \ --PROGRAM CollectSequencingArtifactMetrics \ --PROGRAM CollectSequencingArtifactMetrics - - rm -rf tmp """ } } @@ -247,8 +278,6 @@ if (!params.skip_realignment) { --consensusDeterminationModel USE_SW \ --LODThresholdForCleaning 0.4 \ --maxReadsInMemory 600000 - - rm -rf tmp """ } } @@ -288,8 +317,6 @@ if (!params.skip_bqsr) { --output ${bam_name}.preprocessed.bam \ --reference ${params.reference} \ --bqsr-recal-file ${bam_name}.recalibration_report.grp - - rm -rf tmp """ } } diff --git a/nextflow.config b/nextflow.config index fbea7e0..50d514a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -23,6 +23,7 @@ profiles { params.bqsr_memory = "3g" params.known_indels1 = "$baseDir/test_data/1000G_phase1.indels.hg19.sites.minimal.vcf" params.known_indels2 = "$baseDir/test_data/Mills_and_1000G_gold_standard.indels.hg19.sites.sorted.minimal.vcf" + params.intervals = "$baseDir/test_data/minimal_intervals.intervals" params.dbsnp = "$baseDir/test_data/dbsnp_138.hg19.minimal.vcf" } } From bead81acb7a77bb7e0c8838a6009a8e907e0ab31 Mon Sep 17 00:00:00 2001 From: priesgof Date: Tue, 4 May 2021 14:39:03 +0200 Subject: [PATCH 7/7] add optional per target and per base metrics --- Makefile | 13 +- README.md | 2 + main.nf | 18 +- test_data/minimal_intervals.bed | 351 +++++++++++++++++++++++++ test_data/minimal_intervals.intervals | 356 ++++++++++++++++++++++++++ 5 files changed, 732 insertions(+), 8 deletions(-) create mode 100644 test_data/minimal_intervals.bed create mode 100644 test_data/minimal_intervals.intervals diff --git a/Makefile b/Makefile index e6d97df..eee69aa 100644 --- a/Makefile +++ b/Makefile @@ -8,9 +8,10 @@ clean: rm -rf .nextflow* test: - nextflow main.nf -profile test,conda --output output/test1 - nextflow main.nf -profile test,conda --skip_bqsr --output output/test2 - nextflow main.nf -profile test,conda --skip_realignment --output output/test3 - nextflow main.nf -profile test,conda --skip_deduplication --output output/test4 - nextflow main.nf -profile test,conda --output output/test5 --skip_metrics - nextflow main.nf -profile test,conda --output output/test6 --intervals false + #nextflow main.nf -profile test,conda --output output/test1 + #nextflow main.nf -profile test,conda --skip_bqsr --output output/test2 + #nextflow main.nf -profile test,conda --skip_realignment --output output/test3 + #nextflow main.nf -profile test,conda --skip_deduplication --output output/test4 + #nextflow main.nf -profile test,conda --output output/test5 --skip_metrics + #nextflow main.nf -profile test,conda --output output/test6 --intervals false + nextflow main.nf -profile test,conda --output output/test6 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt diff --git a/README.md b/README.md index 2459998..08501fe 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,8 @@ Optional input: /projects/data/gatk_bundle/hg19/ will be used * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None) + * --hs_metrics_target_coverage: name of output file for target HS metrics (default: None) + * --hs_metrics_per_base_coverage: name of output file for per base HS metrics (default: None) * --skip_bqsr: optionally skip BQSR (default: false) * --skip_realignment: optionally skip realignment (default: false) * --skip_deduplication: optionally skip deduplication (default: false) diff --git a/main.nf b/main.nf index 46c4ea4..1c663ff 100755 --- a/main.nf +++ b/main.nf @@ -8,6 +8,8 @@ params.dbsnp = "/projects/data/gatk_bundle/hg19/dbsnp_138.hg19.vcf" params.known_indels1 = "/projects/data/gatk_bundle/hg19/1000G_phase1.indels.hg19.sites.vcf" params.known_indels2 = "/projects/data/gatk_bundle/hg19/Mills_and_1000G_gold_standard.indels.hg19.sites.sorted.vcf" params.intervals = false +params.hs_metrics_target_coverage = false +params.hs_metrics_per_base_coverage = false params.skip_bqsr = false params.skip_realignment = false params.skip_deduplication = false @@ -49,6 +51,8 @@ Optional input: /projects/data/gatk_bundle/hg19/ will be used * --intervals: path to an intervals file to collect HS metrics from, this can be built with Picard's BedToIntervalList (default: None) + * --hs_metrics_target_coverage: name of output file for target HS metrics (default: None) + * --hs_metrics_per_base_coverage: name of output file for per base HS metrics (default: None) * --skip_bqsr: optionally skip BQSR (default: false) * --skip_realignment: optionally skip realignment (default: false) * --skip_deduplication: optionally skip deduplication (default: false) @@ -197,7 +201,16 @@ if (! params.skip_metrics) { output: file("*_metrics") optional true into txt_hs_metrics file("*.pdf") optional true into pdf_hs_metrics - + file(params.hs_metrics_target_coverage) optional true into target_hs_metrics + file(params.hs_metrics_per_base_coverage) optional true into per_base_hs_metrics + + script: + hs_metrics_target_coverage= params.hs_metrics_target_coverage ? + "--PER_TARGET_COVERAGE ${params.hs_metrics_target_coverage} --REFERENCE_SEQUENCE ${params.reference}" : + "" + hs_metrics_per_base_coverage= params.hs_metrics_per_base_coverage ? + "--PER_BASE_COVERAGE ${params.hs_metrics_per_base_coverage}" : + "" """ mkdir tmp @@ -206,7 +219,8 @@ if (! params.skip_metrics) { --INPUT ${bam} \ --OUTPUT ${bam.baseName} \ --TARGET_INTERVALS ${params.intervals} \ - --BAIT_INTERVALS ${params.intervals} + --BAIT_INTERVALS ${params.intervals} \ + ${hs_metrics_target_coverage} ${hs_metrics_per_base_coverage} """ } } diff --git a/test_data/minimal_intervals.bed b/test_data/minimal_intervals.bed new file mode 100644 index 0000000..a039493 --- /dev/null +++ b/test_data/minimal_intervals.bed @@ -0,0 +1,351 @@ +chr1 10002 10106 +chr1 12395 12496 +chr1 12635 12736 +chr1 21326 21427 +chr1 21506 21607 +chr1 23920 24021 +chr1 24150 24251 +chr1 25063 25164 +chr1 25222 25323 +chr1 33633 33700 +chr1 33741 33842 +chr1 53092 53193 +chr1 53253 53354 +chr1 55922 56023 +chr1 56025 56126 +chr1 61916 62017 +chr1 64445 64546 +chr1 65717 65818 +chr1 65887 65988 +chr1 68698 68841 +chr1 68864 69055 +chr1 69334 69435 +chr1 69552 69653 +chr1 74145 74392 +chr1 74409 74510 +chr1 74564 74665 +chr1 75046 75147 +chr1 75179 75330 +chr1 75385 75755 +chr1 75877 76015 +chr1 76031 76332 +chr1 76339 76445 +chr1 76450 76592 +chr1 130116 130217 +chr1 140562 140663 +chr1 140747 140848 +chr1 146624 146725 +chr1 329353 329454 +chr1 334232 334333 +chr1 334351 334452 +chr1 358159 358260 +chr1 372848 372949 +chr1 375793 375894 +chr1 376518 376619 +chr1 376773 376856 +chr1 381192 381291 +chr1 381342 381505 +chr1 404974 405081 +chr1 471013 471114 +chr1 471229 471330 +chr1 526832 526933 +chr1 526988 527089 +chr1 559951 560050 +chr1 565313 565477 +chr1 565497 565603 +chr1 565797 565898 +chr1 565913 566014 +chr1 566048 566149 +chr1 566205 566306 +chr1 566352 566453 +chr1 567808 567909 +chr1 567942 568103 +chr1 568179 568280 +chr1 568992 569093 +chr1 569206 569307 +chr1 582199 582300 +chr1 582403 582504 +chr1 585389 585490 +chr1 608253 608354 +chr1 608884 608985 +chr1 611357 611458 +chr1 611471 611572 +chr1 621356 621457 +chr1 621552 621653 +chr1 647770 647871 +chr1 721779 721880 +chr1 721963 722064 +chr1 735857 735958 +chr1 736077 736159 +chr1 808726 808827 +chr1 808900 809050 +chr1 809115 809216 +chr1 827626 827724 +chr1 827833 827934 +chr1 834422 834557 +chr1 834637 834702 +chr1 863500 863581 +chr1 863615 863716 +chr1 887624 887823 +chr1 900330 900431 +chr1 900479 900578 +chr1 914191 914292 +chr1 914400 914501 +chr1 916458 916559 +chr1 916623 916724 +chr1 943149 943396 +chr1 954072 954173 +chr1 954462 954485 +chr1 979016 979117 +chr1 979243 979344 +chr1 979581 979682 +chr1 979703 979804 +chr1 985111 985212 +chr1 985217 985318 +chr1 996570 996740 +chr2 41557 41658 +chr2 41735 41836 +chr2 97468 97569 +chr2 127422 127587 +chr2 146032 146133 +chr2 164737 164838 +chr2 164960 165037 +chr2 171536 171718 +chr2 172804 172891 +chr2 172955 173055 +chr2 218712 218813 +chr2 218821 218922 +chr2 229582 229683 +chr2 247497 247593 +chr2 247713 247814 +chr2 271575 271774 +chr2 299657 299755 +chr2 299788 299889 +chr2 331993 332094 +chr2 332170 332271 +chr2 334096 334197 +chr2 334309 334410 +chr2 405070 405171 +chr2 409128 409207 +chr2 409213 409313 +chr2 578949 579075 +chr2 579178 579213 +chr2 679778 679879 +chr2 703338 703439 +chr2 703548 703586 +chr2 703928 704073 +chr2 704136 704220 +chr2 730187 730298 +chr2 786695 786796 +chr2 799807 800033 +chr2 800149 800250 +chr2 800436 800538 +chr2 800549 800678 +chr2 800798 800899 +chr2 800936 801231 +chr2 801240 801420 +chr2 801444 801605 +chr2 801681 802115 +chr2 802125 802225 +chr2 802319 802588 +chr2 802635 802842 +chr2 802883 803045 +chr2 803079 803349 +chr2 803402 803757 +chr2 803789 804024 +chr2 804384 804568 +chr2 804692 804798 +chr2 804830 805255 +chr2 805272 805373 +chr2 829800 829903 +chr2 966068 966238 +chr2 966259 966449 +chr3 228643 228744 +chr3 262289 262390 +chr3 262428 262529 +chr3 273416 273517 +chr3 343633 343731 +chr3 343749 343850 +chr3 343927 344025 +chr3 344132 344234 +chr3 345427 345528 +chr3 387191 387292 +chr3 402285 402474 +chr3 402489 402592 +chr3 402818 403021 +chr3 403095 403204 +chr3 430010 430134 +chr3 430202 430251 +chr3 443171 443372 +chr3 447173 447274 +chr3 447341 447442 +chr3 517251 517352 +chr3 621597 621698 +chr3 622286 622392 +chr3 622463 622564 +chr3 622602 622968 +chr3 622972 623073 +chr3 623276 623456 +chr3 623606 623707 +chr3 652283 652309 +chr3 652458 652623 +chr3 652671 652773 +chr3 652867 652957 +chr3 652992 653093 +chr3 653158 653517 +chr3 653538 653661 +chr3 654575 654677 +chr3 654702 654828 +chr3 654854 654955 +chr3 655006 655108 +chr3 655197 655315 +chr3 655463 655608 +chr3 661724 661825 +chr3 728606 728707 +chr3 739237 739434 +chr3 739469 739770 +chr3 739783 740025 +chr3 740026 740127 +chr3 801636 801816 +chr3 802246 802587 +chr3 802604 802717 +chr3 802756 802857 +chr3 803134 803235 +chr3 803859 804232 +chr3 814259 814382 +chr3 814442 814513 +chr3 814727 814828 +chr3 851082 851183 +chr3 852774 852875 +chr3 853536 853936 +chr3 853938 854408 +chr3 854428 854704 +chr3 854915 855401 +chr3 855452 855825 +chr3 855859 855960 +chr3 856010 856111 +chr3 856177 856278 +chr3 856451 856552 +chr3 856589 856723 +chr3 863818 863918 +chr3 863995 864096 +chr3 868516 868617 +chr3 868777 868878 +chr3 922513 922616 +chr3 922653 922732 +chr3 932974 933072 +chr3 933176 933273 +chr3 934695 934896 +chr3 941113 941214 +chr3 953589 953690 +chr3 982605 982706 +chr4 14896 14997 +chr4 15400 15589 +chr4 15599 15704 +chr4 18362 18463 +chr4 18545 18646 +chr4 29961 30024 +chr4 30060 30170 +chr4 30234 30335 +chr4 30776 30877 +chr4 37707 37808 +chr4 37930 38029 +chr4 39215 39316 +chr4 39386 39482 +chr4 40146 40247 +chr4 40256 40357 +chr4 50208 50309 +chr4 57853 58047 +chr4 59193 59294 +chr4 59370 59471 +chr4 60068 60169 +chr4 60229 60330 +chr4 128773 128874 +chr4 146190 146291 +chr4 146357 146458 +chr4 157123 157224 +chr4 159823 160029 +chr4 160041 160159 +chr4 160309 160410 +chr4 160575 160676 +chr4 160879 160952 +chr4 161116 161364 +chr4 162448 162587 +chr4 162664 162852 +chr4 162909 163031 +chr4 163047 163358 +chr4 163411 163508 +chr4 163595 163696 +chr4 163953 164078 +chr4 164161 164261 +chr4 164836 164937 +chr4 169661 169762 +chr4 248378 248479 +chr4 248567 248668 +chr4 263976 264241 +chr4 264744 264845 +chr4 264920 265021 +chr4 265110 265211 +chr4 265311 265412 +chr4 294589 294690 +chr4 295212 295308 +chr4 295337 295438 +chr4 304137 304237 +chr4 306456 306557 +chr4 337533 337643 +chr4 337719 337859 +chr4 357603 357704 +chr4 358074 358175 +chr4 358308 358386 +chr4 375755 375931 +chr4 397495 397558 +chr4 397634 397735 +chr4 401090 401246 +chr4 435886 435987 +chr4 436164 436265 +chr4 457311 457412 +chr4 457551 457652 +chr4 466254 466355 +chr4 466429 466594 +chr4 466597 466698 +chr4 470902 471003 +chr4 471067 471168 +chr4 479500 479602 +chr4 490689 490800 +chr4 502604 502705 +chr4 502724 502825 +chr4 524223 524325 +chr4 524346 524562 +chr4 544531 544617 +chr4 544887 544988 +chr4 551799 551881 +chr4 551926 552028 +chr4 552539 552640 +chr4 586314 586415 +chr4 586465 586596 +chr4 586714 586742 +chr4 639080 639386 +chr4 666980 667081 +chr4 667230 667331 +chr4 690005 690105 +chr4 705301 705467 +chr4 705489 705545 +chr4 727439 727540 +chr4 727622 727723 +chr4 742955 743012 +chr4 743061 743162 +chr4 797368 797469 +chr4 804016 804117 +chr4 835367 835493 +chr4 841067 841168 +chr4 844773 844874 +chr4 844993 845094 +chr4 914456 914619 +chr4 921888 921989 +chr4 927108 927209 +chr4 932076 932177 +chr4 936539 936640 +chr4 972413 972514 +chr4 977792 977893 +chr4 988230 988278 +chr4 988344 988445 diff --git a/test_data/minimal_intervals.intervals b/test_data/minimal_intervals.intervals new file mode 100644 index 0000000..a277e19 --- /dev/null +++ b/test_data/minimal_intervals.intervals @@ -0,0 +1,356 @@ +@HD VN:1.6 SO:coordinate +@SQ SN:chr1 LN:1000000 +@SQ SN:chr2 LN:1000000 +@SQ SN:chr3 LN:1000000 +@SQ SN:chr4 LN:1000000 +chr1 10003 10106 + . +chr1 12396 12496 + . +chr1 12636 12736 + . +chr1 21327 21427 + . +chr1 21507 21607 + . +chr1 23921 24021 + . +chr1 24151 24251 + . +chr1 25064 25164 + . +chr1 25223 25323 + . +chr1 33634 33700 + . +chr1 33742 33842 + . +chr1 53093 53193 + . +chr1 53254 53354 + . +chr1 55923 56023 + . +chr1 56026 56126 + . +chr1 61917 62017 + . +chr1 64446 64546 + . +chr1 65718 65818 + . +chr1 65888 65988 + . +chr1 68699 68841 + . +chr1 68865 69055 + . +chr1 69335 69435 + . +chr1 69553 69653 + . +chr1 74146 74392 + . +chr1 74410 74510 + . +chr1 74565 74665 + . +chr1 75047 75147 + . +chr1 75180 75330 + . +chr1 75386 75755 + . +chr1 75878 76015 + . +chr1 76032 76332 + . +chr1 76340 76445 + . +chr1 76451 76592 + . +chr1 130117 130217 + . +chr1 140563 140663 + . +chr1 140748 140848 + . +chr1 146625 146725 + . +chr1 329354 329454 + . +chr1 334233 334333 + . +chr1 334352 334452 + . +chr1 358160 358260 + . +chr1 372849 372949 + . +chr1 375794 375894 + . +chr1 376519 376619 + . +chr1 376774 376856 + . +chr1 381193 381291 + . +chr1 381343 381505 + . +chr1 404975 405081 + . +chr1 471014 471114 + . +chr1 471230 471330 + . +chr1 526833 526933 + . +chr1 526989 527089 + . +chr1 559952 560050 + . +chr1 565314 565477 + . +chr1 565498 565603 + . +chr1 565798 565898 + . +chr1 565914 566014 + . +chr1 566049 566149 + . +chr1 566206 566306 + . +chr1 566353 566453 + . +chr1 567809 567909 + . +chr1 567943 568103 + . +chr1 568180 568280 + . +chr1 568993 569093 + . +chr1 569207 569307 + . +chr1 582200 582300 + . +chr1 582404 582504 + . +chr1 585390 585490 + . +chr1 608254 608354 + . +chr1 608885 608985 + . +chr1 611358 611458 + . +chr1 611472 611572 + . +chr1 621357 621457 + . +chr1 621553 621653 + . +chr1 647771 647871 + . +chr1 721780 721880 + . +chr1 721964 722064 + . +chr1 735858 735958 + . +chr1 736078 736159 + . +chr1 808727 808827 + . +chr1 808901 809050 + . +chr1 809116 809216 + . +chr1 827627 827724 + . +chr1 827834 827934 + . +chr1 834423 834557 + . +chr1 834638 834702 + . +chr1 863501 863581 + . +chr1 863616 863716 + . +chr1 887625 887823 + . +chr1 900331 900431 + . +chr1 900480 900578 + . +chr1 914192 914292 + . +chr1 914401 914501 + . +chr1 916459 916559 + . +chr1 916624 916724 + . +chr1 943150 943396 + . +chr1 954073 954173 + . +chr1 954463 954485 + . +chr1 979017 979117 + . +chr1 979244 979344 + . +chr1 979582 979682 + . +chr1 979704 979804 + . +chr1 985112 985212 + . +chr1 985218 985318 + . +chr1 996571 996740 + . +chr2 41558 41658 + . +chr2 41736 41836 + . +chr2 97469 97569 + . +chr2 127423 127587 + . +chr2 146033 146133 + . +chr2 164738 164838 + . +chr2 164961 165037 + . +chr2 171537 171718 + . +chr2 172805 172891 + . +chr2 172956 173055 + . +chr2 218713 218813 + . +chr2 218822 218922 + . +chr2 229583 229683 + . +chr2 247498 247593 + . +chr2 247714 247814 + . +chr2 271576 271774 + . +chr2 299658 299755 + . +chr2 299789 299889 + . +chr2 331994 332094 + . +chr2 332171 332271 + . +chr2 334097 334197 + . +chr2 334310 334410 + . +chr2 405071 405171 + . +chr2 409129 409207 + . +chr2 409214 409313 + . +chr2 578950 579075 + . +chr2 579179 579213 + . +chr2 679779 679879 + . +chr2 703339 703439 + . +chr2 703549 703586 + . +chr2 703929 704073 + . +chr2 704137 704220 + . +chr2 730188 730298 + . +chr2 786696 786796 + . +chr2 799808 800033 + . +chr2 800150 800250 + . +chr2 800437 800538 + . +chr2 800550 800678 + . +chr2 800799 800899 + . +chr2 800937 801231 + . +chr2 801241 801420 + . +chr2 801445 801605 + . +chr2 801682 802115 + . +chr2 802126 802225 + . +chr2 802320 802588 + . +chr2 802636 802842 + . +chr2 802884 803045 + . +chr2 803080 803349 + . +chr2 803403 803757 + . +chr2 803790 804024 + . +chr2 804385 804568 + . +chr2 804693 804798 + . +chr2 804831 805255 + . +chr2 805273 805373 + . +chr2 829801 829903 + . +chr2 966069 966238 + . +chr2 966260 966449 + . +chr3 228644 228744 + . +chr3 262290 262390 + . +chr3 262429 262529 + . +chr3 273417 273517 + . +chr3 343634 343731 + . +chr3 343750 343850 + . +chr3 343928 344025 + . +chr3 344133 344234 + . +chr3 345428 345528 + . +chr3 387192 387292 + . +chr3 402286 402474 + . +chr3 402490 402592 + . +chr3 402819 403021 + . +chr3 403096 403204 + . +chr3 430011 430134 + . +chr3 430203 430251 + . +chr3 443172 443372 + . +chr3 447174 447274 + . +chr3 447342 447442 + . +chr3 517252 517352 + . +chr3 621598 621698 + . +chr3 622287 622392 + . +chr3 622464 622564 + . +chr3 622603 622968 + . +chr3 622973 623073 + . +chr3 623277 623456 + . +chr3 623607 623707 + . +chr3 652284 652309 + . +chr3 652459 652623 + . +chr3 652672 652773 + . +chr3 652868 652957 + . +chr3 652993 653093 + . +chr3 653159 653517 + . +chr3 653539 653661 + . +chr3 654576 654677 + . +chr3 654703 654828 + . +chr3 654855 654955 + . +chr3 655007 655108 + . +chr3 655198 655315 + . +chr3 655464 655608 + . +chr3 661725 661825 + . +chr3 728607 728707 + . +chr3 739238 739434 + . +chr3 739470 739770 + . +chr3 739784 740025 + . +chr3 740027 740127 + . +chr3 801637 801816 + . +chr3 802247 802587 + . +chr3 802605 802717 + . +chr3 802757 802857 + . +chr3 803135 803235 + . +chr3 803860 804232 + . +chr3 814260 814382 + . +chr3 814443 814513 + . +chr3 814728 814828 + . +chr3 851083 851183 + . +chr3 852775 852875 + . +chr3 853537 853936 + . +chr3 853939 854408 + . +chr3 854429 854704 + . +chr3 854916 855401 + . +chr3 855453 855825 + . +chr3 855860 855960 + . +chr3 856011 856111 + . +chr3 856178 856278 + . +chr3 856452 856552 + . +chr3 856590 856723 + . +chr3 863819 863918 + . +chr3 863996 864096 + . +chr3 868517 868617 + . +chr3 868778 868878 + . +chr3 922514 922616 + . +chr3 922654 922732 + . +chr3 932975 933072 + . +chr3 933177 933273 + . +chr3 934696 934896 + . +chr3 941114 941214 + . +chr3 953590 953690 + . +chr3 982606 982706 + . +chr4 14897 14997 + . +chr4 15401 15589 + . +chr4 15600 15704 + . +chr4 18363 18463 + . +chr4 18546 18646 + . +chr4 29962 30024 + . +chr4 30061 30170 + . +chr4 30235 30335 + . +chr4 30777 30877 + . +chr4 37708 37808 + . +chr4 37931 38029 + . +chr4 39216 39316 + . +chr4 39387 39482 + . +chr4 40147 40247 + . +chr4 40257 40357 + . +chr4 50209 50309 + . +chr4 57854 58047 + . +chr4 59194 59294 + . +chr4 59371 59471 + . +chr4 60069 60169 + . +chr4 60230 60330 + . +chr4 128774 128874 + . +chr4 146191 146291 + . +chr4 146358 146458 + . +chr4 157124 157224 + . +chr4 159824 160029 + . +chr4 160042 160159 + . +chr4 160310 160410 + . +chr4 160576 160676 + . +chr4 160880 160952 + . +chr4 161117 161364 + . +chr4 162449 162587 + . +chr4 162665 162852 + . +chr4 162910 163031 + . +chr4 163048 163358 + . +chr4 163412 163508 + . +chr4 163596 163696 + . +chr4 163954 164078 + . +chr4 164162 164261 + . +chr4 164837 164937 + . +chr4 169662 169762 + . +chr4 248379 248479 + . +chr4 248568 248668 + . +chr4 263977 264241 + . +chr4 264745 264845 + . +chr4 264921 265021 + . +chr4 265111 265211 + . +chr4 265312 265412 + . +chr4 294590 294690 + . +chr4 295213 295308 + . +chr4 295338 295438 + . +chr4 304138 304237 + . +chr4 306457 306557 + . +chr4 337534 337643 + . +chr4 337720 337859 + . +chr4 357604 357704 + . +chr4 358075 358175 + . +chr4 358309 358386 + . +chr4 375756 375931 + . +chr4 397496 397558 + . +chr4 397635 397735 + . +chr4 401091 401246 + . +chr4 435887 435987 + . +chr4 436165 436265 + . +chr4 457312 457412 + . +chr4 457552 457652 + . +chr4 466255 466355 + . +chr4 466430 466594 + . +chr4 466598 466698 + . +chr4 470903 471003 + . +chr4 471068 471168 + . +chr4 479501 479602 + . +chr4 490690 490800 + . +chr4 502605 502705 + . +chr4 502725 502825 + . +chr4 524224 524325 + . +chr4 524347 524562 + . +chr4 544532 544617 + . +chr4 544888 544988 + . +chr4 551800 551881 + . +chr4 551927 552028 + . +chr4 552540 552640 + . +chr4 586315 586415 + . +chr4 586466 586596 + . +chr4 586715 586742 + . +chr4 639081 639386 + . +chr4 666981 667081 + . +chr4 667231 667331 + . +chr4 690006 690105 + . +chr4 705302 705467 + . +chr4 705490 705545 + . +chr4 727440 727540 + . +chr4 727623 727723 + . +chr4 742956 743012 + . +chr4 743062 743162 + . +chr4 797369 797469 + . +chr4 804017 804117 + . +chr4 835368 835493 + . +chr4 841068 841168 + . +chr4 844774 844874 + . +chr4 844994 845094 + . +chr4 914457 914619 + . +chr4 921889 921989 + . +chr4 927109 927209 + . +chr4 932077 932177 + . +chr4 936540 936640 + . +chr4 972414 972514 + . +chr4 977793 977893 + . +chr4 988231 988278 + . +chr4 988345 988445 + .