-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
20 changed files
with
884 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
//! Data container implementations. | ||
pub mod vec; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
//! Data container implementations for [`ndarray::Array1`] and [`ndarray::Array2`]. | ||
use ndarray::{Array1, Array2, ArrayView1}; | ||
use crate::traits::IndexedDataContainer; | ||
|
||
impl<'a, U> IndexedDataContainer<'a> for Array1<U> | ||
where | ||
U: Copy + Default + 'a, | ||
{ | ||
type Item = U; | ||
type Output = Array1<U>; | ||
|
||
fn get_value(&'a self, index: usize) -> Self::Item { | ||
self[index] | ||
} | ||
|
||
fn len(&self) -> usize { | ||
self.len() | ||
} | ||
|
||
fn is_valid_index(&self, index: usize) -> bool { | ||
index < self.shape()[0] | ||
} | ||
|
||
fn new_from_indices(&self, indices: &[usize]) -> Self::Output { | ||
Array1::from_iter(indices.iter().map(|&idx| self.get_value(idx))) | ||
} | ||
} | ||
|
||
impl<'a, U> IndexedDataContainer<'a> for Array2<U> | ||
where | ||
U: Copy + Default + 'a, | ||
{ | ||
type Item = ArrayView1<'a, U>; | ||
type Output = Array2<U>; | ||
|
||
fn get_value(&'a self, index: usize) -> Self::Item { | ||
self.row(index) | ||
} | ||
|
||
fn len(&self) -> usize { | ||
self.shape()[0] | ||
} | ||
|
||
fn is_valid_index(&self, index: usize) -> bool { | ||
index < self.shape()[0] | ||
} | ||
|
||
fn new_from_indices(&self, indices: &[usize]) -> Self::Output { | ||
let cols = self.shape()[1]; | ||
|
||
let rows_data: Vec<U> = indices | ||
.iter() | ||
.flat_map(|&idx| self.row(idx).iter().cloned().collect::<Vec<_>>()) | ||
.collect(); | ||
|
||
// create a new Array2<U> from the rows | ||
// shape is (number of indices, number of columns) | ||
Array2::from_shape_vec((indices.len(), cols), rows_data) | ||
.expect("Shape and collected data size mismatch") | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
//! Data container implementations for [`Vec<U>`]. | ||
|
||
/// Trait methods for the commonly-used `Vec<U>` data container. | ||
/// | ||
/// Note that the associated `Item` type is always a *reference* to the data elements. | ||
impl<'a, U> IndexedDataContainer<'a> for Vec<U> | ||
where | ||
U: Clone + 'a, | ||
{ | ||
type Item = &'a U; | ||
type Output = Vec<U>; | ||
|
||
fn get_value(&'a self, index: usize) -> Self::Item { | ||
self.get(index).unwrap() | ||
} | ||
|
||
fn len(&self) -> usize { | ||
self.len() | ||
} | ||
|
||
fn is_valid_index(&self, index: usize) -> bool { | ||
self.get(index).is_some() | ||
} | ||
|
||
fn new_from_indices(&self, indices: &[usize]) -> Self::Output { | ||
Vec::from_iter(indices.iter().map(|&idx| (*self.get_value(idx)).clone())) | ||
} | ||
} | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
//! Input/Output file handling with [`InputFile`] and [`OutputFile`]. | ||
//! | ||
//! These types abstract over reading/writing both plaintext and gzip-compressed | ||
//! input/output. | ||
use flate2::read::GzDecoder; | ||
use flate2::write::GzEncoder; | ||
use flate2::Compression; | ||
use std::fs::File; | ||
use std::io::Write; | ||
use std::io::{self, BufWriter}; | ||
use std::io::{BufRead, BufReader, Read}; | ||
|
||
/// Check if a file is a gzipped by looking for the magic numbers | ||
fn is_gzipped_file(file_path: &str) -> io::Result<bool> { | ||
let mut file = File::open(file_path)?; | ||
let mut buffer = [0; 2]; | ||
file.read_exact(&mut buffer)?; | ||
|
||
Ok(buffer == [0x1f, 0x8b]) | ||
} | ||
|
||
/// Represents an input file. | ||
/// | ||
/// This struct is used to handle operations on an input file, such as reading from the file. | ||
/// This abstracts how data is read in, allowing for both plaintext and gzip-compressed input | ||
/// to be read through a common interface. | ||
pub struct InputFile { | ||
pub filepath: String, | ||
pub comments: Option<Vec<String>>, | ||
pub header: Option<String>, | ||
pub skip_lines: usize, | ||
} | ||
|
||
impl InputFile { | ||
/// Constructs a new `InputFile`. | ||
/// | ||
/// # Arguments | ||
/// | ||
/// * `filepath` - A string slice that holds the path to the file. If the file extension is | ||
/// `.gz`, `InputFile` will automatically uncompress the input. | ||
pub fn new(filepath: &str) -> Self { | ||
Self { | ||
filepath: filepath.to_string(), | ||
comments: None, | ||
header: None, | ||
skip_lines: 0, | ||
} | ||
} | ||
|
||
/// Opens the file and returns a buffered reader. | ||
/// | ||
/// If the file is gzip-compressed (indicated by a ".gz" extension), this method will | ||
/// automatically handle the decompression. | ||
/// | ||
/// # Returns | ||
/// | ||
/// A result containing a `BufReader<Box<dyn Read>>` on success, or a `FileError` on failure. | ||
/// | ||
pub fn reader(&self) -> io::Result<BufReader<Box<dyn Read>>> { | ||
let file = File::open(self.filepath.clone())?; | ||
//let is_gzipped_name = self.filepath.ends_with(".gz"); | ||
let is_gzipped = is_gzipped_file(&self.filepath)?; | ||
let reader: Box<dyn Read> = if is_gzipped { | ||
Box::new(GzDecoder::new(file)) | ||
} else { | ||
Box::new(file) | ||
}; | ||
Ok(BufReader::new(reader)) | ||
} | ||
|
||
/// Collects comment lines and/or a line at the start of the file. | ||
pub fn collect_metadata(&mut self, comment: &str, header: Option<&str>) -> io::Result<bool> { | ||
let mut buf_reader = self.reader()?; | ||
let mut comments = Vec::new(); | ||
let mut line = String::new(); | ||
|
||
while buf_reader.read_line(&mut line)? > 0 { | ||
if line.starts_with(comment) { | ||
comments.push(line.trim_end().to_string()); | ||
self.skip_lines += 1; | ||
} else if let Some(header_string) = header { | ||
if line.starts_with(header_string) { | ||
self.header = Some(line.trim_end().to_string()); | ||
self.skip_lines += 1; | ||
// We only handle one header line. If there are more, the | ||
// file is *very* poorly formatted. So just let downstream | ||
// parsing errors catch this. In the future, we could have a specialized | ||
// error. | ||
break; | ||
} | ||
// break on the first non-header/comment line | ||
break; | ||
} | ||
line.clear(); | ||
} | ||
|
||
self.comments = Some(comments); | ||
Ok(self.skip_lines > 0) | ||
} | ||
|
||
/// Method to continue reading after skipping the comment and header lines. | ||
pub fn continue_reading(&self) -> io::Result<BufReader<Box<dyn Read>>> { | ||
let mut buf_reader = self.reader()?; | ||
let mut skipped_lines = 0; | ||
let mut line = String::new(); | ||
|
||
// skip the lines that were previously read as comments or header | ||
while skipped_lines < self.skip_lines { | ||
buf_reader.read_line(&mut line)?; | ||
skipped_lines += 1; | ||
line.clear(); | ||
} | ||
Ok(buf_reader) | ||
} | ||
} | ||
|
||
/// Represents an output file. | ||
/// | ||
/// This struct is used to handle operations on an output file, such as writing to the file. | ||
/// This abstracts writing both plaintext and gzip-compressed files. | ||
pub struct OutputFile { | ||
pub filepath: String, | ||
pub header: Option<Vec<String>>, | ||
} | ||
|
||
impl OutputFile { | ||
/// Constructs a new `OutputFile`. | ||
/// | ||
/// # Arguments | ||
/// | ||
/// * `filepath` - A string slice that holds the path to the file. If the file extension is | ||
/// `.gz`, `OutputFile` will automatically write gzip-compressed output. | ||
/// * `header` - An optional vector of strings representing commented header lines to be written to the file. | ||
pub fn new(filepath: &str, header: Option<Vec<String>>) -> Self { | ||
Self { | ||
filepath: filepath.to_string(), | ||
header, | ||
} | ||
} | ||
|
||
/// Opens the file and returns a writer. | ||
/// | ||
/// If the file path ends with ".gz", the file is treated as gzip-compressed, and the | ||
/// function will handle compression automatically. If a header is set, it will be written | ||
/// to the file. | ||
/// | ||
/// # Returns | ||
/// | ||
/// A result containing a `Box<dyn Write>` on success, or an `io::Error` on failure. | ||
pub fn writer(&self) -> io::Result<Box<dyn Write>> { | ||
let outfile = &self.filepath; | ||
let is_gzip = outfile.ends_with(".gz"); | ||
let mut writer: Box<dyn Write> = if is_gzip { | ||
Box::new(BufWriter::new(GzEncoder::new( | ||
File::create(outfile)?, | ||
Compression::default(), | ||
))) | ||
} else { | ||
Box::new(BufWriter::new(File::create(outfile)?)) | ||
}; | ||
// write header if one is set | ||
if let Some(entries) = &self.header { | ||
for entry in entries { | ||
writeln!(writer, "#{}", entry)?; | ||
} | ||
} | ||
Ok(writer) | ||
} | ||
} |
Oops, something went wrong.