diff --git a/.github/workflows/automated_tests.yml b/.github/workflows/automated_tests.yml
index 3b27f45..fde92fb 100644
--- a/.github/workflows/automated_tests.yml
+++ b/.github/workflows/automated_tests.yml
@@ -29,4 +29,5 @@ jobs:
         key: ${{ runner.os }}-tronflow-bam-preprocessing
     - name: Run tests
       run: |
+        export NXF_VER=22.04.5
         make
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 434cfd2..7eb699b 100644
--- a/Makefile
+++ b/Makefile
@@ -16,3 +16,4 @@ test:
 	bash tests/test_07.sh
 	bash tests/test_08.sh
 	bash tests/test_09.sh
+	bash tests/test_10.sh
diff --git a/main.nf b/main.nf
index 8e517de..d7a37c8 100755
--- a/main.nf
+++ b/main.nf
@@ -3,8 +3,8 @@
 nextflow.enable.dsl = 2
 
 include { PREPARE_BAM; INDEX_BAM } from './modules/01_prepare_bam'
-include { MARK_DUPLICATES } from './modules/02_mark_duplicates'
-include { METRICS; HS_METRICS; COVERAGE_ANALYSIS } from './modules/03_metrics'
+include { MARK_DUPLICATES; SPLIT_CIGAR_N_READS } from './modules/02_mark_duplicates'
+include { METRICS; HS_METRICS; COVERAGE_ANALYSIS; FLAGSTAT } from './modules/03_metrics'
 include { REALIGNMENT_AROUND_INDELS } from './modules/04_realignment_around_indels'
 include { BQSR; CREATE_OUTPUT } from './modules/05_bqsr'
 
@@ -26,6 +26,7 @@ params.output = 'output'
 params.platform = "ILLUMINA"
 params.collect_hs_metrics_min_base_quality = false
 params.collect_hs_metrics_min_mapping_quality = false
+params.split_cigarn = false
 
 // computational resources
 params.prepare_bam_cpus = 3
@@ -84,7 +85,7 @@ else if (params.input_files) {
 
 workflow {
 
-    PREPARE_BAM(input_files)
+    PREPARE_BAM(input_files, params.reference)
 
     if (!params.skip_deduplication) {
         MARK_DUPLICATES(PREPARE_BAM.out.prepared_bams)
@@ -95,16 +96,22 @@ workflow {
         deduplicated_bams = INDEX_BAM.out.indexed_bams
     }
 
+    if (params.split_cigarn) {
+        SPLIT_CIGAR_N_READS(deduplicated_bams, params.reference)
+        deduplicated_bams = SPLIT_CIGAR_N_READS.out.split_cigarn_bams
+    }
+
     if (! params.skip_metrics) {
         if (params.intervals) {
             HS_METRICS(deduplicated_bams)
         }
-        METRICS(deduplicated_bams)
+        METRICS(deduplicated_bams, params.reference)
         COVERAGE_ANALYSIS(deduplicated_bams)
+        FLAGSTAT(deduplicated_bams)
     }
 
     if (!params.skip_realignment) {
-        REALIGNMENT_AROUND_INDELS(deduplicated_bams)
+        REALIGNMENT_AROUND_INDELS(deduplicated_bams, params.reference)
         realigned_bams = REALIGNMENT_AROUND_INDELS.out.realigned_bams
     }
     else {
@@ -112,7 +119,7 @@ workflow {
     }
 
     if (!params.skip_bqsr) {
-        BQSR(realigned_bams)
+        BQSR(realigned_bams, params.reference)
         preprocessed_bams = BQSR.out.recalibrated_bams
     }
     else {
diff --git a/modules/01_prepare_bam.nf b/modules/01_prepare_bam.nf
index c1b3c01..d410496 100644
--- a/modules/01_prepare_bam.nf
+++ b/modules/01_prepare_bam.nf
@@ -3,8 +3,6 @@ params.prepare_bam_memory = "8g"
 params.index_cpus = 1
 params.index_memory = "8g"
 params.platform = "ILLUMINA"
-params.reference = false
-params.skip_deduplication = false
 params.output = 'output'
 
 /*
@@ -18,35 +16,31 @@ process PREPARE_BAM {
     tag "${name}"
     publishDir "${params.output}/${name}/", mode: "copy", pattern: "software_versions.*"
 
-    conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0 bioconda::samtools=1.12" : null)
+    conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null)
 
     input:
     tuple val(name), val(type), file(bam)
+    val(reference)
 
     output:
     tuple val(name), val(type), file("${name}.prepared.bam"), emit: prepared_bams
     file("software_versions.${task.process}.txt")
 
     script:
-    order = params.skip_deduplication ? "--SORT_ORDER coordinate": "--SORT_ORDER queryname"
     """
     mkdir tmp
 
-    samtools sort \
-    --threads ${params.prepare_bam_cpus} \
-    -o ${name}.sorted.bam ${bam}
-
     gatk AddOrReplaceReadGroups \
     --java-options '-Xmx${params.prepare_bam_memory} -Djava.io.tmpdir=./tmp' \
     --VALIDATION_STRINGENCY SILENT \
-    --INPUT ${name}.sorted.bam \
+    --INPUT ${bam} \
     --OUTPUT /dev/stdout \
-    --REFERENCE_SEQUENCE ${params.reference} \
+    --REFERENCE_SEQUENCE ${reference} \
     --RGPU 1 \
     --RGID 1 \
     --RGSM ${type} \
     --RGLB 1 \
-    --RGPL ${params.platform} ${order} | \
+    --RGPL ${params.platform} | \
     gatk CleanSam \
     --java-options '-Xmx${params.prepare_bam_memory} -Djava.io.tmpdir=./tmp' \
     --INPUT /dev/stdin \
@@ -55,13 +49,10 @@ process PREPARE_BAM {
     --java-options '-Xmx${params.prepare_bam_memory} -Djava.io.tmpdir=./tmp' \
     --INPUT /dev/stdin \
     --OUTPUT ${name}.prepared.bam \
-    --SEQUENCE_DICTIONARY ${params.reference}
-
-    rm -f ${name}.sorted.bam
+    --SEQUENCE_DICTIONARY ${reference}
 
     echo ${params.manifest} >> software_versions.${task.process}.txt
     gatk --version >> software_versions.${task.process}.txt
-    samtools --version >> software_versions.${task.process}.txt
     """
 }
 
@@ -71,24 +62,32 @@ process INDEX_BAM {
     tag "${name}"
     publishDir "${params.output}/${name}", mode: "copy", pattern: "software_versions.*"
 
-    conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null)
+    conda (params.enable_conda ? "bioconda::sambamba=0.8.2" : null)
 
     input:
     tuple val(name), val(type), file(bam)
 
     output:
-    tuple val(name), val(type), file("${bam}"), file("${bam.baseName}.bai"), emit: indexed_bams
+    tuple val(name), val(type), file("${name}.sorted.bam"), file("${name}.sorted.bam.bai"), emit: indexed_bams
     file("software_versions.${task.process}.txt")
 
     script:
     """
     mkdir tmp
 
-    gatk BuildBamIndex \
-    --java-options '-Xmx8g  -Djava.io.tmpdir=./tmp' \
-    --INPUT  ${bam}
+    # sort
+    sambamba sort \
+        --nthreads=${task.cpus} \
+        --tmpdir=./tmp \
+        --out=${name}.sorted.bam \
+        ${bam}
+
+    # indexes the output BAM file
+    sambamba index \
+        --nthreads=${task.cpus} \
+        ${name}.sorted.bam ${name}.sorted.bam.bai
 
     echo ${params.manifest} >> software_versions.${task.process}.txt
-    gatk --version >> software_versions.${task.process}.txt
+    sambamba --version >> software_versions.${task.process}.txt
     """
 }
diff --git a/modules/02_mark_duplicates.nf b/modules/02_mark_duplicates.nf
index 3e4307f..3be3305 100644
--- a/modules/02_mark_duplicates.nf
+++ b/modules/02_mark_duplicates.nf
@@ -1,7 +1,6 @@
 params.mark_duplicates_cpus = 2
 params.mark_duplicates_memory = "16g"
 params.remove_duplicates = true
-params.skip_metrics = false
 params.output = 'output'
 
 
@@ -9,42 +8,76 @@ process MARK_DUPLICATES {
     cpus "${params.mark_duplicates_cpus}"
     memory "${params.mark_duplicates_memory}"
     tag "${name}"
-    publishDir "${params.output}/${name}/metrics/mark_duplicates", mode: "copy", pattern: "*.dedup_metrics.txt"
     publishDir "${params.output}/${name}/", mode: "copy", pattern: "software_versions.*"
 
-    conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null)
+    conda (params.enable_conda ? "bioconda::sambamba=0.8.2" : null)
 
     input:
     tuple val(name), val(type), file(bam)
 
     output:
     tuple val(name), val(type), file("${name}.dedup.bam"), file("${name}.dedup.bam.bai"), emit: deduplicated_bams
-    file("${name}.dedup_metrics.txt") optional true
     file("software_versions.${task.process}.txt")
 
     script:
-    dedup_metrics = params.skip_metrics ? "": "--METRICS_FILE ${name}.dedup_metrics.txt"
-    remove_duplicates = params.remove_duplicates ? "--REMOVE_DUPLICATES true" : "--REMOVE_DUPLICATES false"
+    remove_duplicates_param = params.remove_duplicates ? "--remove-duplicates" : ""
     """
     mkdir tmp
 
-    gatk SortSam \
-    --java-options '-Xmx${params.mark_duplicates_memory}  -Djava.io.tmpdir=./tmp' \
-    --INPUT ${bam} \
-    --OUTPUT ${name}.sorted.bam \
-    --SORT_ORDER coordinate
-
-    gatk MarkDuplicates \
-    --java-options '-Xmx${params.mark_duplicates_memory}  -Djava.io.tmpdir=./tmp' \
-    --INPUT  ${name}.sorted.bam \
-    --OUTPUT ${name}.dedup.bam \
-    --ASSUME_SORT_ORDER coordinate \
-    --CREATE_INDEX true ${remove_duplicates} ${dedup_metrics}
+    # sort
+    sambamba sort \
+        --nthreads=${task.cpus} \
+        --tmpdir=./tmp \
+        --out=${name}.sorted.bam \
+        ${bam}
 
-    cp ${name}.dedup.bai ${name}.dedup.bam.bai
+    # removes duplicates (sorted from the alignment process)
+    sambamba markdup ${remove_duplicates_param} \
+        --nthreads=${task.cpus} \
+        --tmpdir=./tmp \
+        ${name}.sorted.bam ${name}.dedup.bam
 
     rm -f ${name}.sorted.bam
 
+    # indexes the output BAM file
+    sambamba index \
+        --nthreads=${task.cpus} \
+        ${name}.dedup.bam ${name}.dedup.bam.bai
+
+    echo ${params.manifest} >> software_versions.${task.process}.txt
+    sambamba --version >> software_versions.${task.process}.txt
+    """
+}
+
+process SPLIT_CIGAR_N_READS {
+    cpus "${params.prepare_bam_cpus}"
+    memory "${params.prepare_bam_memory}"
+    tag "${name}"
+    publishDir "${params.output}/${name}/", mode: "copy", pattern: "software_versions.*"
+
+    conda (params.enable_conda ? "bioconda::gatk4=4.2.5.0" : null)
+
+    input:
+    tuple val(name), val(type), file(bam), file(bai)
+    val(reference)
+
+    output:
+    tuple val(name), val(type), file("${name}.split_cigarn.bam"), file("${name}.split_cigarn.bam.bai"), emit: split_cigarn_bams
+    file("software_versions.${task.process}.txt")
+
+    script:
+    """
+    mkdir tmp
+
+    gatk SplitNCigarReads \
+    --java-options '-Xmx${params.prepare_bam_memory}  -Djava.io.tmpdir=./tmp' \
+    --input ${bam} \
+    --output ${name}.split_cigarn.bam \
+    --create-output-bam-index true \
+    --reference ${reference}
+
+    cp ${name}.split_cigarn.bai ${name}.split_cigarn.bam.bai
+
     echo ${params.manifest} >> software_versions.${task.process}.txt
     gatk --version >> software_versions.${task.process}.txt
     """
diff --git a/modules/03_metrics.nf b/modules/03_metrics.nf
index ed08e96..f279158 100644
--- a/modules/03_metrics.nf
+++ b/modules/03_metrics.nf
@@ -2,7 +2,6 @@ params.metrics_cpus = 1
 params.metrics_memory = "8g"
 params.collect_hs_metrics_min_base_quality = false
 params.collect_hs_metrics_min_mapping_quality = false
-params.reference = false
 params.output = 'output'
 params.intervals = false
 
@@ -63,6 +62,7 @@ process METRICS {
 
     input:
     tuple val(name), val(type), file(bam), file(bai)
+    val(reference)
 
     output:
     file("*_metrics") optional true
@@ -76,7 +76,7 @@ process METRICS {
     --java-options '-Xmx${params.metrics_memory}  -Djava.io.tmpdir=./tmp' \
     --INPUT  ${bam} \
     --OUTPUT ${name} \
-    --REFERENCE_SEQUENCE ${params.reference} \
+    --REFERENCE_SEQUENCE ${reference} \
     --PROGRAM QualityScoreDistribution \
     --PROGRAM MeanQualityByCycle \
     --PROGRAM CollectAlignmentSummaryMetrics \
@@ -122,3 +122,31 @@ process COVERAGE_ANALYSIS {
     samtools --version >> software_versions.${task.process}.txt
     """
 }
+
+process FLAGSTAT {
+    cpus "${params.metrics_cpus}"
+    memory "${params.metrics_memory}"
+    tag "${name}"
+    publishDir "${params.output}/${name}/metrics/flagstat", mode: "copy", pattern: "*.flagstat.csv"
+    publishDir "${params.output}/${name}/", mode: "copy", pattern: "software_versions.*"
+
+    conda (params.enable_conda ? "bioconda::sambamba=0.8.2" : null)
+
+    input:
+    tuple val(name), val(type), file(bam), file(bai)
+
+    output:
+    file("${name}.flagstat.csv")
+    file("software_versions.${task.process}.txt")
+
+    script:
+    """
+    sambamba flagstat \
+        --nthreads=${task.cpus} \
+        --tabular \
+        ${bam} > ${name}.flagstat.csv
+
+    echo ${params.manifest} >> software_versions.${task.process}.txt
+    sambamba --version >> software_versions.${task.process}.txt
+    """
+}
diff --git a/modules/04_realignment_around_indels.nf b/modules/04_realignment_around_indels.nf
index 15fa013..f517d94 100644
--- a/modules/04_realignment_around_indels.nf
+++ b/modules/04_realignment_around_indels.nf
@@ -2,7 +2,6 @@ params.realignment_around_indels_cpus = 2
 params.realignment_around_indels_memory = "31g"
 params.known_indels1 = false
 params.known_indels2 = false
-params.reference = false
 params.output = 'output'
 
 
@@ -19,6 +18,7 @@ process REALIGNMENT_AROUND_INDELS {
 
     input:
     tuple val(name), val(type), file(bam), file(bai)
+    val(reference)
 
     output:
     tuple val(name), val(type), file("${name}.realigned.bam"), file("${name}.realigned.bai"), emit: realigned_bams
@@ -36,12 +36,12 @@ process REALIGNMENT_AROUND_INDELS {
     gatk3 -Xmx${params.realignment_around_indels_memory} -Djava.io.tmpdir=./tmp -T RealignerTargetCreator \
     --input_file ${bam} \
     --out ${name}.RA.intervals \
-    --reference_sequence ${params.reference} ${known_indels1} ${known_indels2}
+    --reference_sequence ${reference} ${known_indels1} ${known_indels2}
 
     gatk3 -Xmx${params.realignment_around_indels_memory} -Djava.io.tmpdir=./tmp -T IndelRealigner \
     --input_file ${bam} \
     --out ${name}.realigned.bam \
-    --reference_sequence ${params.reference} \
+    --reference_sequence ${reference} \
     --targetIntervals ${name}.RA.intervals \
     --consensusDeterminationModel USE_SW \
     --LODThresholdForCleaning 0.4 \
diff --git a/modules/05_bqsr.nf b/modules/05_bqsr.nf
index 0fa8405..ba94d75 100644
--- a/modules/05_bqsr.nf
+++ b/modules/05_bqsr.nf
@@ -1,6 +1,5 @@
 params.bqsr_cpus = 3
 params.bqsr_memory = "4g"
-params.reference = false
 params.dbsnp = false
 params.output = 'output'
 
@@ -16,6 +15,7 @@ process BQSR {
 
     input:
     tuple val(name), val(type), file(bam), file(bai)
+    val(reference)
 
     output:
     tuple val("${name}"), val("${type}"), val("${params.output}/${name}/${bam_name}.preprocessed.bam"), emit: recalibrated_bams
@@ -31,14 +31,14 @@ process BQSR {
     --java-options '-Xmx${params.bqsr_memory} -Djava.io.tmpdir=./tmp' \
     --input ${bam} \
     --output ${name}.recalibration_report.grp \
-    --reference ${params.reference} \
+    --reference ${reference} \
     --known-sites ${params.dbsnp}
 
     gatk ApplyBQSR \
     --java-options '-Xmx${params.bqsr_memory} -Djava.io.tmpdir=./tmp' \
     --input ${bam} \
     --output ${name}.preprocessed.bam \
-    --reference ${params.reference} \
+    --reference ${reference} \
     --bqsr-recal-file ${name}.recalibration_report.grp
 
     echo ${params.manifest} >> software_versions.${task.process}.txt
diff --git a/nextflow.config b/nextflow.config
index 5573d6f..c806cb0 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -44,7 +44,7 @@ process.shell = ['/bin/bash', '-euo', 'pipefail']
 
 cleanup = true
 
-VERSION = '1.9.1'
+VERSION = '2.0.0'
 DOI = 'https://zenodo.org/badge/latestdoi/358400957'
 
 manifest {
diff --git a/tests/test_08.sh b/tests/test_08.sh
index 4337444..9d3f387 100644
--- a/tests/test_08.sh
+++ b/tests/test_08.sh
@@ -9,7 +9,7 @@ nextflow main.nf -profile test,conda --output $output --collect_hs_metrics_min_b
 test -s $output/sample1/sample1.preprocessed.bam || { echo "Missing BAM file!"; exit 1; }
 test -s $output/sample1/sample1.preprocessed.bai || { echo "Missing BAI file!"; exit 1; }
 test -s $output/sample1/metrics/hs_metrics/sample1.hs_metrics.txt || { echo "Missing HS metrics file!"; exit 1; }
-test -s $output/sample1/metrics/mark_duplicates/sample1.dedup_metrics.txt || { echo "Missing dedup metrics file!"; exit 1; }
+test -s $output/sample1/metrics/flagstat/sample1.flagstat.csv || { echo "Missing dedup metrics file!"; exit 1; }
 test -s $output/sample1/metrics/coverage/sample1.coverage.tsv || { echo "Missing horizontal coverage file!"; exit 1; }
 test -s $output/sample1/metrics/coverage/sample1.depth.tsv || { echo "Missing depth of coverage file!"; exit 1; }
 test -s $output/sample2/sample2.preprocessed.bam || { echo "Missing BAM file!"; exit 1; }
diff --git a/tests/test_10.sh b/tests/test_10.sh
new file mode 100644
index 0000000..c0e57fc
--- /dev/null
+++ b/tests/test_10.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+
+source tests/assert.sh
+output=output/test10
+nextflow main.nf -profile test,conda --output $output --skip_realignment --split_cigarn
+
+test -s $output/sample1/sample1.preprocessed.bam || { echo "Missing output BAM file!"; exit 1; }
+test -s $output/sample1/sample1.preprocessed.bai || { echo "Missing output BAI file!"; exit 1; }
+test -s $output/sample1/metrics/flagstat/sample1.flagstat.csv || { echo "Missing output metrics file!"; exit 1; }
+test -s $output/sample2/sample2.preprocessed.bam || { echo "Missing output BAM file!"; exit 1; }
+test -s $output/sample2/sample2.preprocessed.bai || { echo "Missing output BAI file!"; exit 1; }
+test -s $output/sample2/metrics/flagstat/sample2.flagstat.csv || { echo "Missing output metrics file!"; exit 1; }
\ No newline at end of file