Skip to content

Commit

Permalink
Merge pull request #48 from Roco-scientist/args
Browse files Browse the repository at this point in the history
Args
  • Loading branch information
Roco-scientist authored Nov 7, 2021
2 parents cf00608 + 497dd23 commit 75e2f91
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 106 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "barcode-count"
description = "NGS barcode counter for DEL, CRISPR-seq, and Barcode-seq"
version = "0.8.6"
version = "0.8.7"
edition = "2018"
license = "Apache-2.0"
readme = "README.md"
Expand Down
30 changes: 13 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,28 +164,27 @@ Run NGS-Barcode-Count<br>

```
barcode-count --fastq <fastq_file> \
--sample_barcodes <sample_barcodes_file> \
--sequence_format <sequence_format_file> \
--counted_barcodes <counted_barcodes_file> \
--output_dir <output_dir> \
--sample-barcodes <sample_barcodes_file> \
--sequence-format <sequence_format_file> \
--counted-barcodes <counted_barcodes_file> \
--output-dir <output_dir> \
--prefix <file_prefix> \
--threads <num_of_threads> \
--merge_output \
--min_quality <min_barcode_read_quality>\
--single\
--double\
--merge-output \
--min-quality <min_barcode_read_quality>\
--enrich
```

<br>
<ul>
<li>
--counted_barcodes is optional. If it is not used, the output counts uses the DNA barcode to count with no error handling on these barcodes.
--counted-barcodes is optional. If it is not used, the output counts uses the DNA barcode to count with no error handling on these barcodes.
</li>
<li>
--sample_barcodes is optional. If it is not used, all samples are marked as unknown.
--sample-barcodes is optional. If it is not used, all samples are marked as unknown.
</li>
<li>
--output_dir defaults to the current directory if not used.
--output-dir defaults to the current directory if not used.
</li>
<li>
--prefix defaults to the current date. All files end with _sample_name_counts.csv
Expand All @@ -194,16 +193,13 @@ barcode-count --fastq <fastq_file> \
--threads defaults to the number of threads on the machine if not used.
</li>
<li>
--merge_output flag that merges the output csv file so that each sample has one column
--merge-output flag that merges the output csv file so that each sample has one column
</li>
<li>
--min_quality will filter out reads where any of the barcodes have an average quality score below the threshold set here. Default is 0 and no filtering.
--min-quality will filter out reads where any of the barcodes have an average quality score below the threshold set here. Default is 0 and no filtering.
</li>
<li>
--single argument flag that will find the counts for each barcode if there are 2 or more counted barcodes included, and output the file. Useful for DEL
</li>
<li>
--double argument flag that will find the counts for each pair of barcodes if there are 3 or more counted barcodes included, and output the file. Useful for DEL
--enrich argument flag that will find the counts for each barcode if there are 2 or more counted barcodes included, and output the file. Also will do the same with double barcodes if there are 3+. Useful for DEL
</li>
</ul>

Expand Down
101 changes: 44 additions & 57 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,25 @@ pub mod output;
pub mod parse;

use chrono::Local;
use clap::{App, Arg};
use clap::{crate_version, App, Arg};
// use rayon::prelude::*;
use std::error::Error;

/// A struct that contains and initiates all input arguments
pub struct Args {
pub fastq: String, // fastq file path
pub format: String, // format scheme file path
pub sample_barcodes_option: Option<String>, // sample barcode file path. Optional
pub counted_barcodes_option: Option<String>, // building block barcode file path. Optional
pub output_dir: String, // output directory. Deafaults to './'
pub fastq: String, // fastq file path
pub format: String, // format scheme file path
pub sample_barcodes_option: Option<String>, // sample barcode file path. Optional
pub counted_barcodes_option: Option<String>, // building block barcode file path. Optional
pub output_dir: String, // output directory. Deafaults to './'
pub threads: u8, // Number of threads to use. Defaults to number of threads on the machine
pub prefix: String, // Prefix string for the output files
pub merge_output: bool, // Whether or not to create an additional output file that merges all samples
pub barcodes_errors_option: Option<u8>, // Optional input of how many errors are allowed in each building block barcode. Defaults to 20% of the length
pub sample_errors_option: Option<u8>, // Optional input of how many errors are allowed in each sample barcode. Defaults to 20% of the length
pub constant_errors_option: Option<u8>, // Optional input of how many errors are allowed in each constant region barcode. Defaults to 20% of the length
pub min_average_quality_score: f32,
pub double_barcode_enrichment: bool,
pub single_barcode_enrichment: bool,
pub enrich: bool,
}

impl Args {
Expand All @@ -32,7 +31,7 @@ impl Args {
let today = Local::today().format("%Y-%m-%d").to_string();
// parse arguments
let args = App::new("NGS-Barcode-Count")
.version("0.8.6")
.version(crate_version!())
.author("Rory Coffey <coffeyrt@gmail.com>")
.about("Counts barcodes located in sequencing data")
.arg(
Expand All @@ -44,24 +43,24 @@ impl Args {
.help("FASTQ file unzipped"),
)
.arg(
Arg::with_name("sequence_format")
Arg::with_name("format_file")
.short("q")
.long("sequence_format")
.long("sequence-format")
.takes_value(true)
.required(true)
.help("Sequence format file"),
)
.arg(
Arg::with_name("sample_barcodes")
Arg::with_name("sample_file")
.short("s")
.long("sample_barcodes")
.long("sample-barcodes")
.takes_value(true)
.help("Sample barcodes file"),
)
.arg(
Arg::with_name("counted_barcodes")
Arg::with_name("barcode_file")
.short("c")
.long("counted_barcodes")
.long("counted-barcodes")
.takes_value(true)
.help("Counted barcodes file"),
)
Expand All @@ -74,9 +73,9 @@ impl Args {
.help("Number of threads"),
)
.arg(
Arg::with_name("output_dir")
Arg::with_name("dir")
.short("o")
.long("output_dir")
.long("output-dir")
.takes_value(true)
.default_value("./")
.help("Directory to output the counts to"),
Expand All @@ -90,111 +89,100 @@ impl Args {
.help("File prefix name. THe output will end with '_<sample_name>_counts.csv'"),
)
.arg(
Arg::with_name("merge_output")
Arg::with_name("merge-output")
.short("m")
.long("merge_output")
.long("merge-output")
.takes_value(false)
.help("Merge sample output counts into a single file. Not necessary when there is only one sample"),
)
.arg(
Arg::with_name("double")
.long("double")
Arg::with_name("enrich")
.long("enrich")
.short("e")
.takes_value(false)
.help("Output double barcode enrichment counts"),
.help("Create output files of enrichment for single and double synthons/barcodes"),
)
.arg(
Arg::with_name("single")
.long("single")
.takes_value(false)
.help("Output single barcode enrichment counts"),
)
.arg(
Arg::with_name("barcodes_errors")
.long("barcodes_errors")
Arg::with_name("max_barcode")
.long("max-errors-counted-barcode")
.takes_value(true)
.help("Maximimum number of sequence errors allowed within each counted barcode. Defaults to 20% of the total."),
)
.arg(
Arg::with_name("sample_errors")
.long("sample_errors")
Arg::with_name("max_sample")
.long("max-errors-sample")
.takes_value(true)
.help("Maximimum number of sequence errors allowed within sample barcode. Defaults to 20% of the total."),
)
.arg(
Arg::with_name("contant_errors")
.long("constant_errors")
Arg::with_name("max_constant")
.long("max-errors-constant")
.takes_value(true)
.help("Maximimum number of sequence errors allowed within constant region. Defaults to 20% of the total."),
)
.arg(
Arg::with_name("min_quality")
.long("min_quality")
Arg::with_name("min")
.long("min-quality")
.takes_value(true)
.default_value("0")
.help("Minimum average read quality score per barcode"),
)
.get_matches();

let sample_barcodes_option;
if let Some(sample) = args.value_of("sample_barcodes") {
if let Some(sample) = args.value_of("sample_file") {
sample_barcodes_option = Some(sample.to_string())
} else {
sample_barcodes_option = None
}

let counted_barcodes_option;
if let Some(barcodes) = args.value_of("counted_barcodes") {
if let Some(barcodes) = args.value_of("barcode_file") {
counted_barcodes_option = Some(barcodes.to_string())
} else {
counted_barcodes_option = None
}

let barcodes_errors_option;
if let Some(barcodes) = args.value_of("barcodes_errors") {
if let Some(barcodes) = args.value_of("max_barcode") {
barcodes_errors_option = Some(barcodes.parse::<u8>()?)
} else {
barcodes_errors_option = None
}

let sample_errors_option;
if let Some(sample) = args.value_of("sample_errors") {
if let Some(sample) = args.value_of("max_sample") {
sample_errors_option = Some(sample.parse::<u8>()?)
} else {
sample_errors_option = None
}

let constant_errors_option;
if let Some(constant) = args.value_of("constant_errors") {
if let Some(constant) = args.value_of("max_constant") {
constant_errors_option = Some(constant.parse::<u8>()?)
} else {
constant_errors_option = None
}

let merge_output;
if args.is_present("merge_output") {
if args.is_present("merge-output") {
merge_output = true
} else {
merge_output = false
}
let double_barcode_enrichment;
if args.is_present("double") {
double_barcode_enrichment = true
} else {
double_barcode_enrichment = false
}
let single_barcode_enrichment;
if args.is_present("single") {
single_barcode_enrichment = true
let enrich;
if args.is_present("enrich") {
enrich = true
} else {
single_barcode_enrichment = false
enrich = false
}
let fastq = args.value_of("fastq").unwrap().to_string();
let format = args.value_of("sequence_format").unwrap().to_string();
let output_dir = args.value_of("output_dir").unwrap().to_string();
let format = args.value_of("format_file").unwrap().to_string();
let output_dir = args.value_of("dir").unwrap().to_string();
let threads = args.value_of("threads").unwrap().parse::<u8>().unwrap();
let prefix = args.value_of("prefix").unwrap().to_string();
let min_average_quality_score = args
.value_of("min_quality")
.value_of("min")
.unwrap()
.parse::<f32>()
.unwrap();
Expand All @@ -212,8 +200,7 @@ impl Args {
sample_errors_option,
constant_errors_option,
min_average_quality_score,
double_barcode_enrichment,
single_barcode_enrichment,
enrich,
})
}
}
17 changes: 7 additions & 10 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,17 @@ fn main() {
let start_time = Local::now();

// get the argument inputs
let mut args = barcode_count::Args::new().unwrap_or_else(|err| panic!("Argument error: {}", err));
let mut args =
barcode_count::Args::new().unwrap_or_else(|err| panic!("Argument error: {}", err));

let sequence_format = barcode_count::info::SequenceFormat::new(args.format.clone())
.unwrap_or_else(|err| panic!("sequence format error: {}", err));
println!("{}\n", sequence_format);

// Check how many barcodes occur if either single or double barcode enrichment is callsed. If there are too few, ignore the argument flag
if args.single_barcode_enrichment && sequence_format.barcode_num < 2 {
eprintln!("Fewer than 2 counted barcodes. Too few for single barcode enrichment. Argument flag is ignored");
args.single_barcode_enrichment = false;
}

if args.double_barcode_enrichment && sequence_format.barcode_num < 3 {
eprintln!("Fewer than 3 counted barcodes. Too few for double barcode enrichment. Argument flag is ignored");
args.double_barcode_enrichment = false;
if args.enrich && sequence_format.barcode_num < 2 {
eprintln!("Fewer than 2 counted barcodes. Too few for barcode enrichment. Argument flag is ignored");
args.enrich = false;
}

// Start getting the barcode conversion with the BarcodeConversions struct
Expand Down Expand Up @@ -96,7 +92,8 @@ fn main() {
finished_clone.store(true, Ordering::Relaxed);
});

let shared_mut = barcode_count::parse::SharedMutData::new(seq, finished, Arc::clone(&results));
let shared_mut =
barcode_count::parse::SharedMutData::new(seq, finished, Arc::clone(&results));
// Create processing threads. One less than the total threads because of the single reading thread
for _ in 1..args.threads {
// Clone all variables needed to pass into each thread
Expand Down
Loading

0 comments on commit 75e2f91

Please sign in to comment.