From 4fcdcfb2896bbe2c0e3e51474fd025744cf4767c Mon Sep 17 00:00:00 2001 From: priesgof Date: Tue, 1 Jun 2021 19:47:30 +0200 Subject: [PATCH] add support for a single file --- Makefile | 67 ++++++++++++++++++++++++++----------------------- README.md | 6 +++-- main.nf | 18 ++++++++++--- nextflow.config | 8 +++--- 4 files changed, 58 insertions(+), 41 deletions(-) diff --git a/Makefile b/Makefile index 61615f1..2edf503 100644 --- a/Makefile +++ b/Makefile @@ -11,41 +11,44 @@ test: nextflow main.nf -profile test,conda --skip_bqsr --output output/test2 nextflow main.nf -profile test,conda --skip_realignment --output output/test3 nextflow main.nf -profile test,conda --skip_deduplication --output output/test4 - nextflow main.nf -profile test,conda --output output/test5 --skip_metrics --known_indels1 false --known_indels2 false - nextflow main.nf -profile test,conda --output output/test6 --intervals false - nextflow main.nf -profile test,conda --output output/test7 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt - nextflow main.nf -profile test,conda --output output/test8 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --collect_hs_metrics_min_base_quality 10 --collect_hs_metrics_min_mapping_quality 10 --remove_duplicates false + nextflow main.nf -profile test,conda --output output/test5 --skip_deduplication --skip_bqsr --skip_metrics --known_indels1 false --known_indels2 false + nextflow main.nf -profile test,conda --output output/test6 --intervals false --skip_deduplication --skip_bqsr --skip_realignment + nextflow main.nf -profile test,conda --output output/test7 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --skip_bqsr --skip_realignment + nextflow main.nf -profile test,conda --output output/test8 --hs_metrics_target_coverage target_coverage.txt --hs_metrics_per_base_coverage per_base_coverage.txt --collect_hs_metrics_min_base_quality 10 --collect_hs_metrics_min_mapping_quality 10 --remove_duplicates false --skip_bqsr --skip_realignment + nextflow main.nf -profile test,conda --output output/test9 --skip_deduplication --skip_bqsr --skip_realignment --input_files false --input_bam test_data/TESTX_S1_L001.bam check: test -s output/test1/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; } test -s output/test1/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; } test -s output/test1/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; } test -s output/test1/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test2/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test2/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test2/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test2/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test3/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test3/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test3/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test3/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test4/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test4/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test4/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test4/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test5/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test5/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test5/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test5/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test6/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test6/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test6/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test6/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test7/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test7/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test7/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test7/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 1 output file!"; exit 1; } - test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 1 output file!"; exit 1; } \ No newline at end of file + test -s output/test2/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 2 output file!"; exit 1; } + test -s output/test2/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 2 output file!"; exit 1; } + test -s output/test2/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 2 output file!"; exit 1; } + test -s output/test2/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 2 output file!"; exit 1; } + test -s output/test3/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 3 output file!"; exit 1; } + test -s output/test3/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 3 output file!"; exit 1; } + test -s output/test3/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 3 output file!"; exit 1; } + test -s output/test3/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 3 output file!"; exit 1; } + test -s output/test4/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 4 output file!"; exit 1; } + test -s output/test4/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 4 output file!"; exit 1; } + test -s output/test4/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 4 output file!"; exit 1; } + test -s output/test4/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 4 output file!"; exit 1; } + test -s output/test5/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 5 output file!"; exit 1; } + test -s output/test5/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 5 output file!"; exit 1; } + test -s output/test5/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 5 output file!"; exit 1; } + test -s output/test5/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 5 output file!"; exit 1; } + test -s output/test6/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 6 output file!"; exit 1; } + test -s output/test6/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 6 output file!"; exit 1; } + test -s output/test6/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 6 output file!"; exit 1; } + test -s output/test6/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 6 output file!"; exit 1; } + test -s output/test7/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 7 output file!"; exit 1; } + test -s output/test7/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 7 output file!"; exit 1; } + test -s output/test7/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 7 output file!"; exit 1; } + test -s output/test7/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 7 output file!"; exit 1; } + test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 8 output file!"; exit 1; } + test -s output/test8/sample1/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 8 output file!"; exit 1; } + test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bam || { echo "Missing test 8 output file!"; exit 1; } + test -s output/test8/sample2/TESTX_S1_L002.preprocessed.bai || { echo "Missing test 8 output file!"; exit 1; } + test -s output/test9/TESTX_S1_L001/TESTX_S1_L001.preprocessed.bam || { echo "Missing test 9 output file!"; exit 1; } + test -s output/test9/TESTX_S1_L001/TESTX_S1_L001.preprocessed.bai || { echo "Missing test 9 output file!"; exit 1; } \ No newline at end of file diff --git a/README.md b/README.md index 6611599..65292cb 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,8 @@ Usage: main.nf --input_files input_files Input: - * --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file + * --input_bam: the path to a single BAM (this option is not compatible with --input_files) + * --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file (this option is not compatible with --input_bam) Sample type will be added to the BAM header @SN sample name The input file does not have header! Example input file: @@ -64,6 +65,7 @@ Input: * --reference: path to the FASTA genome reference (indexes expected *.fai, *.dict) Optional input: + * --input_name: the name of the sample. Only used when --input_bam is provided (default: normal) * --dbsnp: path to the dbSNP VCF (required to perform BQSR) * --known_indels1: path to a VCF of known indels (optional to perform realignment around indels) * --known_indels2: path to a second VCF of known indels (optional to perform realignment around indels) @@ -86,7 +88,7 @@ Computational resources: * --mark_duplicates_cpus: (default: 16) * --mark_duplicates_memory: (default: 64g) * --realignment_around_indels_cpus: (default: 2) - * --realignment_around_indels_memory: (default: 32g) + * --realignment_around_indels_memory: (default: 31g) * --bqsr_cpus: (default: 3) * --bqsr_memory: (default: 4g) * --metrics_cpus: (default: 1) diff --git a/main.nf b/main.nf index 71fcc0c..154ea01 100755 --- a/main.nf +++ b/main.nf @@ -3,6 +3,8 @@ publish_dir = 'output' params.help= false params.input_files = false +params.input_name = "normal" +params.input_bam = false params.reference = false params.dbsnp = false params.known_indels1 = false @@ -59,15 +61,23 @@ if (params.output) { publish_dir = params.output } -// checks required inputs -if (params.input_files) { +if (! params.input_files && ! params.input_bam) { + exit 1, "Neither --input_files or --input_bam are provided!" +} +else if (params.input_files && params.input_bam) { + exit 1, "Both --input_files and --input_bam are provided! Please, provide only one." +} +else if (params.input_files) { Channel .fromPath(params.input_files) .splitCsv(header: ['name', 'type', 'bam'], sep: "\t") .map{ row-> tuple(row.name, row.type, file(row.bam)) } .set { input_files } -} else { - exit 1, "Input file not specified!" +} else if (params.input_bam && params.input_name) { + input_bam = file(params.input_bam) + Channel + .fromList([tuple(input_bam.name.take(input_bam.name.lastIndexOf('.')), params.input_name, input_bam)]) + .set { input_files } } /* diff --git a/nextflow.config b/nextflow.config index bc3c25f..dbc239d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -46,7 +46,7 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] cleanup = true -VERSION = '1.4.1' +VERSION = '1.5.0' DOI = 'https://zenodo.org/badge/latestdoi/358400957' manifest { @@ -67,7 +67,8 @@ Usage: main.nf --input_files input_files Input: - * --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file + * --input_bam: the path to a single BAM (this option is not compatible with --input_files) + * --input_files: the path to a tab-separated values file containing in each row the sample name, sample type (eg: tumor or normal) and path to the BAM file (this option is not compatible with --input_bam) Sample type will be added to the BAM header @SN sample name The input file does not have header! Example input file: @@ -77,6 +78,7 @@ Input: * --reference: path to the FASTA genome reference (indexes expected *.fai, *.dict) Optional input: + * --input_name: the name of the sample. Only used when --input_bam is provided (default: normal) * --dbsnp: path to the dbSNP VCF (required to perform BQSR) * --known_indels1: path to a VCF of known indels (optional to perform realignment around indels) * --known_indels2: path to a second VCF of known indels (optional to perform realignment around indels) @@ -99,7 +101,7 @@ Computational resources: * --mark_duplicates_cpus: (default: 16) * --mark_duplicates_memory: (default: 64g) * --realignment_around_indels_cpus: (default: 2) - * --realignment_around_indels_memory: (default: 32g) + * --realignment_around_indels_memory: (default: 31g) * --bqsr_cpus: (default: 3) * --bqsr_memory: (default: 4g) * --metrics_cpus: (default: 1)