Skip to content

Commit

Permalink
bug fix
Browse files Browse the repository at this point in the history
  • Loading branch information
dagou committed Aug 12, 2024
1 parent 609a086 commit a3d2b25
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 37 deletions.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,11 @@ Options:
Minimum quality score for FASTQ data [default: 0]
-p, --num-threads <NUM_THREADS>
The number of threads to use [default: 10]
--batch-size <BATCH_SIZE>
--buffer-size <BUFFER_SIZE>
[default: 16777216]
--batch-size <BATCH_SIZE>
The size of each batch for processing taxid match results, used to control memory usage
[default: 16]
-T, --confidence-threshold <CONFIDENCE_THRESHOLD>
Confidence score threshold [default: 0]
-g, --minimum-hit-groups <MINIMUM_HIT_GROUPS>
Expand All @@ -358,8 +361,6 @@ Options:
In comb. w/ -R, provide minimizer information in report
-z, --report-zero-counts
In comb. w/ -R, report taxa w/ 0 count
--full-output
output file contains all unclassified sequence
-h, --help
Print help (see more with '--help')
-V, --version
Expand Down
2 changes: 1 addition & 1 deletion kr2r/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "kr2r"
version = "0.6.3"
version = "0.6.8"
edition = "2021"
authors = ["eric9n@gmail.com"]

Expand Down
7 changes: 3 additions & 4 deletions kr2r/src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,9 @@ pub struct ClassifyArgs {
#[clap(short = 'z', long, value_parser, default_value_t = false)]
pub report_zero_counts: bool,

/// output file contains all unclassified sequence
#[clap(long, value_parser, default_value_t = false)]
pub full_output: bool,

// /// output file contains all unclassified sequence
// #[clap(long, value_parser, default_value_t = false)]
// pub full_output: bool,
/// A list of input file paths (FASTA/FASTQ) to be processed by the classify program.
/// Supports fasta or fastq format files (e.g., .fasta, .fastq) and gzip compressed files (e.g., .fasta.gz, .fastq.gz).
// #[clap(short = 'F', long = "files")]
Expand Down
2 changes: 1 addition & 1 deletion kr2r/src/bin/kun.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ impl From<ClassifyArgs> for resolve::Args {
kraken_output_dir: item.kraken_output_dir,
report_kmer_data: item.report_kmer_data,
report_zero_counts: item.report_zero_counts,
full_output: item.full_output,
// full_output: item.full_output,
num_threads: item.num_threads,
}
}
Expand Down
52 changes: 24 additions & 28 deletions kr2r/src/bin/resolve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use kr2r::utils::{find_and_trans_bin_files, find_and_trans_files, open_file};
use kr2r::HitGroup;
// use rayon::prelude::*;
use seqkmer::{buffer_map_parallel, trim_pair_info, OptionPair};
use std::collections::{HashMap, HashSet};
use std::collections::HashMap;
use std::fs::File;
use std::io::{self, BufRead, BufReader, BufWriter, Read, Result, Write};
use std::path::{Path, PathBuf};
Expand Down Expand Up @@ -65,9 +65,9 @@ pub struct Args {
#[clap(long = "output-dir", value_parser)]
pub kraken_output_dir: Option<PathBuf>,

/// output file contains all unclassified sequence
#[clap(long, value_parser, default_value_t = false)]
pub full_output: bool,
// /// output file contains all unclassified sequence
// #[clap(long, value_parser, default_value_t = false)]
// pub full_output: bool,
/// Confidence score threshold, default is 0.0.
#[clap(
short = 'T',
Expand Down Expand Up @@ -120,8 +120,7 @@ fn process_batch<P: AsRef<Path>>(
id_map: &HashMap<u32, (String, String, usize, Option<usize>)>,
writer: &mut Box<dyn Write + Send>,
value_mask: usize,
) -> Result<(TaxonCountersDash, usize, HashSet<u32>)> {
let hit_seq_id_set = HashSet::new();
) -> Result<(TaxonCountersDash, usize)> {
let confidence_threshold = args.confidence_threshold;
let minimum_hit_groups = args.minimum_hit_groups;

Expand All @@ -138,6 +137,7 @@ fn process_batch<P: AsRef<Path>>(
if let Some(item) = id_map.get(&k) {
let mut rows = rows.to_owned();
rows.sort_unstable();

let dna_id = trim_pair_info(&item.0);
let range =
OptionPair::from(((0, item.2), item.3.map(|size| (item.2, size + item.2))));
Expand Down Expand Up @@ -179,11 +179,7 @@ fn process_batch<P: AsRef<Path>>(
.expect("failed");
}

Ok((
cur_taxon_counts,
classify_counter.load(Ordering::SeqCst),
hit_seq_id_set,
))
Ok((cur_taxon_counts, classify_counter.load(Ordering::SeqCst)))
}

pub fn run(args: Args) -> Result<()> {
Expand Down Expand Up @@ -218,7 +214,7 @@ pub fn run(args: Args) -> Result<()> {
}
None => Box::new(BufWriter::new(io::stdout())) as Box<dyn Write + Send>,
};
let (thread_taxon_counts, thread_classified, hit_seq_set) = process_batch::<PathBuf>(
let (thread_taxon_counts, thread_classified) = process_batch::<PathBuf>(
sam_files,
&args,
&taxo,
Expand All @@ -227,22 +223,22 @@ pub fn run(args: Args) -> Result<()> {
value_mask,
)?;

if args.full_output {
sample_id_map
.iter()
.filter(|(key, _)| !hit_seq_set.contains(key))
.for_each(|(_, value)| {
let dna_id = trim_pair_info(&value.0); // 假设 key 是 &str 类型
let output_line = format!(
"U\t{}\t0\t{}\t{}\n",
dna_id,
value.1,
if value.3.is_none() { "" } else { " |:| " }
);

writer.write_all(output_line.as_bytes()).unwrap();
});
}
// if args.full_output {
// sample_id_map
// .iter()
// .filter(|(key, _)| !hit_seq_set.contains(key))
// .for_each(|(_, value)| {
// let dna_id = trim_pair_info(&value.0); // 假设 key 是 &str 类型
// let output_line = format!(
// "U\t{}\t0\t{}\t{}\n",
// dna_id,
// value.1,
// if value.3.is_none() { "" } else { " |:| " }
// );

// writer.write_all(output_line.as_bytes()).unwrap();
// });
// }

let mut sample_taxon_counts: HashMap<
u64,
Expand Down

0 comments on commit a3d2b25

Please sign in to comment.