Skip to content

Commit

Permalink
Merge pull request #64 from noamteyssier/53-bed12-support
Browse files Browse the repository at this point in the history
53 bed12 support
  • Loading branch information
noamteyssier authored Nov 20, 2023
2 parents f3fb345 + 828c6f4 commit 8b08fb0
Show file tree
Hide file tree
Showing 41 changed files with 1,681 additions and 74 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "gia"
version = "0.1.21"
version = "0.1.22"
edition = "2021"
description = "A tool for set theoretic operations of genomic intervals"
license = "MIT"
Expand Down
54 changes: 46 additions & 8 deletions src/commands/closest.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::{
io::{
match_input, match_output, read_paired_bed3_sets, read_paired_bed6_sets,
write_pairs_iter_with,
match_input, match_output, read_paired_bed12_sets, read_paired_bed3_sets,
read_paired_bed6_sets, write_pairs_iter_with,
},
types::{InputFormat, IntervalPair},
utils::sort_pairs,
Expand Down Expand Up @@ -112,6 +112,34 @@ pub fn closest_bed6(
Ok(())
}

pub fn closest_bed12(
a: Option<String>,
b: String,
output: Option<String>,
upstream: bool,
downstream: bool,
named: bool,
sorted: bool,
compression_threads: usize,
compression_level: u32,
) -> Result<()> {
// load pairs
let query_handle = match_input(a)?;
let target_handle = match_input(Some(b))?;
let (mut a_set, mut b_set, translater) =
read_paired_bed12_sets(query_handle, target_handle, named)?;
sort_pairs(&mut a_set, &mut b_set, sorted);

// run closest
let method = ClosestType::new(upstream, downstream);
let pairs_iter = run_closest(&a_set, &b_set, method);

// write output
let output_handle = match_output(output, compression_threads, compression_level)?;
write_pairs_iter_with(pairs_iter, output_handle, translater.as_ref())?;
Ok(())
}

pub fn closest(
a: Option<String>,
b: String,
Expand All @@ -124,8 +152,19 @@ pub fn closest(
compression_threads: usize,
compression_level: u32,
) -> Result<()> {
if format == InputFormat::Bed3 {
closest_bed3(
match format {
InputFormat::Bed3 => closest_bed3(
a,
b,
output,
upstream,
downstream,
named,
sorted,
compression_threads,
compression_level,
),
InputFormat::Bed6 => closest_bed6(
a,
b,
output,
Expand All @@ -135,9 +174,8 @@ pub fn closest(
sorted,
compression_threads,
compression_level,
)
} else {
closest_bed6(
),
InputFormat::Bed12 => closest_bed12(
a,
b,
output,
Expand All @@ -147,7 +185,7 @@ pub fn closest(
sorted,
compression_threads,
compression_level,
)
),
}
}

Expand Down
49 changes: 47 additions & 2 deletions src/commands/extend.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::{
io::{
match_input, match_output, read_bed3_set, read_bed6_set, write_records_iter_with,
WriteNamedIter, WriteNamedIterImpl,
match_input, match_output, read_bed12_set, read_bed3_set, read_bed6_set,
write_records_iter_with, WriteNamedIter, WriteNamedIterImpl,
},
types::{Genome, InputFormat, Translater},
};
Expand Down Expand Up @@ -135,6 +135,40 @@ fn extend_bed6(
Ok(())
}

fn extend_bed12(
input: Option<String>,
output: Option<String>,
both: Option<usize>,
left: Option<usize>,
right: Option<usize>,
genome_path: Option<String>,
named: bool,
compression_threads: usize,
compression_level: u32,
) -> Result<()> {
let input_handle = match_input(input)?;
let (mut iset, translater) = read_bed12_set(input_handle, named)?;
let genome = if let Some(path) = genome_path {
let genome_handle = match_input(Some(path))?;
let genome = Genome::from_reader_immutable(genome_handle, translater.as_ref(), false)?;
Some(genome)
} else {
None
};
extend_set(
output,
&mut iset,
both,
left,
right,
genome,
translater.as_ref(),
compression_threads,
compression_level,
)?;
Ok(())
}

pub fn extend(
input: Option<String>,
output: Option<String>,
Expand Down Expand Up @@ -170,5 +204,16 @@ pub fn extend(
compression_threads,
compression_level,
),
InputFormat::Bed12 => extend_bed12(
input,
output,
both,
left,
right,
genome_path,
named,
compression_threads,
compression_level,
),
}
}
51 changes: 50 additions & 1 deletion src/commands/get_fasta.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
io::{build_reader, match_input, match_output},
types::{Bed6, InputFormat},
types::{Bed12, Bed6, InputFormat},
};
use anyhow::Result;
use bedrs::{Coordinates, NamedInterval};
Expand Down Expand Up @@ -85,6 +85,52 @@ fn get_fasta_bed6(
Ok(())
}

fn get_fasta_bed12(
bed: Option<String>,
fasta: &str,
output: Option<String>,
compression_threads: usize,
compression_level: u32,
) -> Result<()> {
let handle = match_input(bed)?;
let fasta_index = build_fasta_index(fasta)?;
let fasta = IndexedFasta::new(fasta_index, fasta)?;

let mut csv_reader = build_reader(handle);
let mut byterecord = ByteRecord::new();
let mut output = match_output(output, compression_threads, compression_level)?;

while csv_reader.read_byte_record(&mut byterecord)? {
let record: Bed12 = byterecord.deserialize(None)?;
match fasta.query_buffer(record.chr, record.start, record.end) {
Ok(buffer) => {
write!(
output,
">{}:{}-{}::{}::{}::{}::{}::{}::{}::{}::{}::{}\n",
record.chr,
record.start,
record.end,
record.name,
record.score,
record.strand,
record.thick_start,
record.thick_end,
record.item_rgb,
record.block_count,
record.block_sizes,
record.block_starts,
)?;
for subseq in buffer.split_str("\n") {
output.write(subseq)?;
}
output.write(b"\n")?;
}
Err(_) => continue,
}
}
Ok(())
}

pub fn get_fasta(
bed: Option<String>,
fasta: &str,
Expand All @@ -100,5 +146,8 @@ pub fn get_fasta(
InputFormat::Bed6 => {
get_fasta_bed6(bed, fasta, output, compression_threads, compression_level)
}
InputFormat::Bed12 => {
get_fasta_bed12(bed, fasta, output, compression_threads, compression_level)
}
}
}
41 changes: 38 additions & 3 deletions src/commands/intersect/intersect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ use super::iter::{run_function, OutputMethod};
use crate::{
commands::{run_find, OverlapMethod},
io::{
build_reader, match_input, match_output, read_paired_bed3_sets, read_paired_bed6_sets,
write_named_records_iter_dashmap, write_records_iter_with, NamedIter, UnnamedIter,
WriteNamedIter, WriteNamedIterImpl,
build_reader, match_input, match_output, read_paired_bed12_sets, read_paired_bed3_sets,
read_paired_bed6_sets, write_named_records_iter_dashmap, write_records_iter_with,
NamedIter, UnnamedIter, WriteNamedIter, WriteNamedIterImpl,
},
types::{InputFormat, StreamTranslater, Translater},
};
Expand Down Expand Up @@ -90,6 +90,31 @@ fn intersect_bed6(
)
}

fn intersect_bed12(
a: Option<String>,
b: String,
output: Option<String>,
overlap_method: OverlapMethod,
output_method: OutputMethod,
named: bool,
compression_threads: usize,
compression_level: u32,
) -> Result<()> {
let handle_a = match_input(a)?;
let handle_b = match_input(Some(b))?;
let (query_set, target_set, translater) = read_paired_bed12_sets(handle_a, handle_b, named)?;
run_intersect_set(
&query_set,
&target_set,
overlap_method,
output_method,
output,
translater.as_ref(),
compression_threads,
compression_level,
)
}

pub fn intersect_set(
a: Option<String>,
b: String,
Expand Down Expand Up @@ -131,6 +156,16 @@ pub fn intersect_set(
compression_threads,
compression_level,
),
InputFormat::Bed12 => intersect_bed12(
a,
b,
output,
overlap_method,
output_method,
named,
compression_threads,
compression_level,
),
}
}

Expand Down
34 changes: 30 additions & 4 deletions src/commands/merge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ use std::io::{Read, Write};

use crate::{
io::{
build_reader, iter_unnamed, match_input, match_output, read_bed3_set, read_bed6_set,
write_records_iter, write_records_iter_with,
build_reader, iter_unnamed, match_input, match_output, read_bed12_set, read_bed3_set,
read_bed6_set, write_3col_iter_with, write_records_iter,
},
types::InputFormat,
};
Expand All @@ -27,7 +27,7 @@ where
set.set_sorted();
}
let merged = set.merge()?;
write_records_iter_with(
write_3col_iter_with(
merged.records().into_iter(),
output_handle,
translater.as_ref(),
Expand All @@ -52,7 +52,32 @@ where
set.set_sorted();
}
let merged = set.merge()?;
write_records_iter_with(
write_3col_iter_with(
merged.records().into_iter(),
output_handle,
translater.as_ref(),
)?;
Ok(())
}

fn merge_in_memory_bed12<R, W>(
input_handle: R,
output_handle: W,
sorted: bool,
named: bool,
) -> Result<()>
where
R: Read,
W: Write,
{
let (mut set, translater) = read_bed12_set(input_handle, named)?;
if !sorted {
set.sort();
} else {
set.set_sorted();
}
let merged = set.merge()?;
write_3col_iter_with(
merged.records().into_iter(),
output_handle,
translater.as_ref(),
Expand Down Expand Up @@ -91,6 +116,7 @@ pub fn merge(
match format {
InputFormat::Bed3 => merge_in_memory_bed3(input_handle, output_handle, sorted, named),
InputFormat::Bed6 => merge_in_memory_bed6(input_handle, output_handle, sorted, named),
InputFormat::Bed12 => merge_in_memory_bed12(input_handle, output_handle, sorted, named),
}
}
}
Loading

0 comments on commit 8b08fb0

Please sign in to comment.