From 789911f8285fcaaaa4c07824213ca86ab61c0658 Mon Sep 17 00:00:00 2001 From: Timothy Parnell Date: Sat, 14 Sep 2024 17:14:34 -0600 Subject: [PATCH] update help doc --- bin/bam_umi_dedup.pl | 48 +++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/bin/bam_umi_dedup.pl b/bin/bam_umi_dedup.pl index 09f3b0e..f12213c 100755 --- a/bin/bam_umi_dedup.pl +++ b/bin/bam_umi_dedup.pl @@ -44,7 +44,7 @@ # version 2.1 - change default to SAM tag instead of read name # version 2.2 - bug fix, improve optical detection # version 2.3 - add explicit support for cram, remove Bio::ToolBox dependency, -# fix untagged paired-end counting +# fix untagged paired-end counting, fix missing header @PG #### Inputs my $infile; @@ -132,29 +132,31 @@ END USAGE: bam_umi_dedup.pl --in in.bam --out out.bam OPTIONS: - Required: - -i --in The input bam file, should be sorted and indexed - -o --out The output bam file + Required: + -i --in The input bam file, should be sorted and indexed + -o --out The output bam file - UMI options: - -u --umi SAM tag name for UMI sequence. Default 'RX' - Specify 'name' when UMI appended to read name. - -m --mark Mark duplicates (flag 0x400) instead of discarding - -t --tolerance UMI sequence edit distance tolerance ($edit_tolerance) - --indel Set insertion/deletion penalty score ($indel_score) - --skip Skip mismatch detection if depth exceeds ($skip_mismatch_depth) - - Other options: - -f --fasta Provide fasta file for Cram files - -d --distance Set optical duplicate distance threshold. - Use 100 for unpatterned flowcell (HiSeq) or - 2500 for patterned flowcell (NovaSeq). Default 0. - --coord Provide the tile:X:Y integer 1-base positions in the - read name for optical checking. For Illumina CASAVA 1.8 - 7-element names, this is 5:6:7 (default) - -c --cpu Specify the number of forks to use ($cpu) - --samtools Path to samtools ($sam_app) - -h --help Display full description and help + UMI options: + -u --umi SAM tag name for UMI sequence. Default 'RX' + Specify 'name' when UMI appended to read name. + -m --mark Mark duplicates (flag 0x400) instead of discarding + -t --tolerance UMI sequence edit distance tolerance ($edit_tolerance) + --indel Set insertion/deletion penalty score ($indel_score) + --skip Skip mismatch detection if depth exceeds ($skip_mismatch_depth) + + Other options: + -f --fasta Provide indexed fasta file for Cram files + -d --distance Set optical duplicate distance threshold. + Use 100 for unpatterned flowcell (HiSeq) or + 2500 for patterned flowcell (NextSeq or NovaSeq6000) + or 200 for NovaseqX. Default 0. + --coord Provide the tile:X:Y integer 1-base positions in the + read name for optical checking. For Illumina CASAVA 1.8 + 7-element names, this is 5:6:7 (default) + -c --cpu Specify the number of forks to use ($cpu) + --samtools Path to samtools ($sam_app) + --nosam Do not use samtools for final concatenation (slower) + -h --help Display full description and help END