From c1724d5e33293fd630afd60d54369db84389443d Mon Sep 17 00:00:00 2001 From: Christophe Labouisse Date: Sat, 20 Jul 2024 14:29:16 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Add=20support=20of=20zstandard?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.toml | 28 ++++++++++++++++++++++------ README.md | 2 ++ src/oneio/compressions/mod.rs | 2 ++ src/oneio/compressions/zstd.rs | 22 ++++++++++++++++++++++ src/oneio/mod.rs | 4 ++++ tests/oneio_test.rs | 2 ++ tests/test_data.txt.zst | Bin 0 -> 45 bytes 7 files changed, 54 insertions(+), 6 deletions(-) create mode 100644 src/oneio/compressions/zstd.rs create mode 100644 tests/test_data.txt.zst diff --git a/Cargo.toml b/Cargo.toml index 0667c32..bca7c8c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,13 +21,18 @@ required-features = ["cli"] [dependencies] # remote -reqwest = { version = "0.12", default-features = false, features = ["blocking", "http2", "charset"], optional = true } +reqwest = { version = "0.12", default-features = false, features = [ + "blocking", + "http2", + "charset", +], optional = true } # compression flate2 = { version = "1", optional = true } bzip2 = { version = "0.4.4", optional = true } lz4 = { version = "1.24", optional = true } xz2 = { version = "0.1", optional = true } +zstd = { version = "0.13.2", optional = true } # sha256 ring = { version = "0.17", optional = true } @@ -42,7 +47,9 @@ serde = { version = "1.0", optional = true } serde_json = { version = "1.0", optional = true } # s3 -rust-s3 = { version = "0.34.0-rc4", optional = true, default-features = false, features = ["sync"] } +rust-s3 = { version = "0.34.0-rc4", optional = true, default-features = false, features = [ + "sync", +] } dotenvy = { version = "0.15", optional = true } # ftp @@ -60,23 +67,32 @@ lib-core = ["remote", "compressions", "json"] # cli dependencies cli = [ # core dependency - "lib-core", "rustls", "s3", "digest", + "lib-core", + "rustls", + "s3", + "digest", # CLI specific - "clap", "tracing", + "clap", + "tracing", ] # optional flags to select native-tls or rust-tls -native-tls = ["reqwest?/default-tls", "suppaftp?/native-tls", "rust-s3?/sync-native-tls"] +native-tls = [ + "reqwest?/default-tls", + "suppaftp?/native-tls", + "rust-s3?/sync-native-tls", +] rustls = ["reqwest?/rustls-tls", "suppaftp?/rustls", "rust-s3?/sync-rustls-tls"] digest = ["ring", "hex"] # supported compression algorithms, which can be toggled on/off individually -compressions = ["gz", "bz", "lz", "xz"] +compressions = ["gz", "bz", "lz", "xz", "zstd"] gz = ["flate2"] bz = ["bzip2"] lz = ["lz4"] xz = ["xz2"] +zstd = ["dep:zstd"] remote = ["reqwest", "suppaftp"] json = ["serde", "serde_json"] diff --git a/README.md b/README.md index adf2a80..90cd8c1 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ Default flags include `lib-core` and `rustls`. - `bz`: support `bzip2` files using `bzip2` crate - `lz`: support `lz4` files using `lz4` crate - `xz`: support `xz` files using `xz2` crate (requires xz library installed) + - `zstd: support `zst` files using `zstd` crate - `json`: allow reading JSON content into structs with `serde` and `serde_json` ### TLS choice: `rustls` or `native-tls` @@ -126,6 +127,7 @@ The returned reader implements BufRead, and handles decompression from the follo - `bzip2`: files ending with `bz` or `bz2` - `lz4`: files ending with `lz4` or `lz` - `xz`: files ending with `xz` or `xz2` +- `zstd`: files ending with `zst` or `zstd` It also handles reading from remote or local files transparently. diff --git a/src/oneio/compressions/mod.rs b/src/oneio/compressions/mod.rs index fc60d1c..b573c2a 100644 --- a/src/oneio/compressions/mod.rs +++ b/src/oneio/compressions/mod.rs @@ -10,6 +10,8 @@ pub(crate) mod gzip; pub(crate) mod lz4; #[cfg(feature = "xz")] pub(crate) mod xz; +#[cfg(feature = "zstd")] +pub(crate) mod zstd; pub trait OneIOCompression { fn get_reader(raw_reader: Box) -> Result, OneIoError>; diff --git a/src/oneio/compressions/zstd.rs b/src/oneio/compressions/zstd.rs new file mode 100644 index 0000000..b9d21f4 --- /dev/null +++ b/src/oneio/compressions/zstd.rs @@ -0,0 +1,22 @@ +use crate::oneio::compressions::OneIOCompression; +use crate::OneIoError; +use std::fs::File; +use std::io::{BufWriter, Read, Write}; + +pub(crate) struct OneIOZstd; + +impl OneIOCompression for OneIOZstd { + fn get_reader(raw_reader: Box) -> Result, OneIoError> { + match zstd::Decoder::new(raw_reader) { + Ok(dec) => Ok(Box::new(dec)), + Err(e) => Err(OneIoError::IoError(e)), + } + } + + fn get_writer(raw_writer: BufWriter) -> Result, OneIoError> { + match zstd::Encoder::new(raw_writer, 9) { + Ok(dec) => Ok(Box::new(dec.auto_finish())), + Err(e) => Err(OneIoError::IoError(e)), + } + } +} diff --git a/src/oneio/mod.rs b/src/oneio/mod.rs index 94c5d53..652e8bb 100644 --- a/src/oneio/mod.rs +++ b/src/oneio/mod.rs @@ -70,6 +70,8 @@ pub fn get_reader(path: &str) -> Result, OneIoError> { "lz4" | "lz" => compressions::lz4::OneIOLz4::get_reader(raw_reader), #[cfg(feature = "xz")] "xz" | "xz2" | "lzma" => compressions::xz::OneIOXz::get_reader(raw_reader), + #[cfg(feature = "zstd")] + "zst" | "zstd" => compressions::zstd::OneIOZstd::get_reader(raw_reader), _ => { // unknown file type of file {}. try to read as uncompressed file Ok(Box::new(raw_reader)) @@ -165,6 +167,8 @@ pub fn get_writer(path: &str) -> Result, OneIoError> { "lz4" | "lz" => compressions::lz4::OneIOLz4::get_writer(output_file), #[cfg(feature = "xz")] "xz" | "xz2" | "lzma" => compressions::xz::OneIOXz::get_writer(output_file), + #[cfg(feature = "zstd")] + "zst" | "zstd" => compressions::zstd::OneIOZstd::get_writer(output_file), _ => Ok(Box::new(BufWriter::new(output_file))), } } diff --git a/tests/oneio_test.rs b/tests/oneio_test.rs index feb435a..8f1eb0b 100644 --- a/tests/oneio_test.rs +++ b/tests/oneio_test.rs @@ -88,6 +88,7 @@ fn test_reader_local() { test_read("tests/test_data.txt.bz2"); test_read("tests/test_data.txt.lz4"); test_read("tests/test_data.txt.xz"); + test_read("tests/test_data.txt.zst"); } #[test] @@ -104,6 +105,7 @@ fn test_writer() { test_write("tests/test_write_data.txt", "tests/test_data.txt"); test_write("tests/test_write_data.txt.gz", "tests/test_data.txt.gz"); test_write("tests/test_write_data.txt.bz2", "tests/test_data.txt.bz2"); + test_write("tests/test_write_data.txt.zst", "tests/test_data.txt.zst"); // lz4 writer is not currently supported } diff --git a/tests/test_data.txt.zst b/tests/test_data.txt.zst new file mode 100644 index 0000000000000000000000000000000000000000..1bd0c3bc59d2aec775462fa3642446af25b52db2 GIT binary patch literal 45 wcmdPcs{dC-fsv8HKQGnOU!f$mxI`f>GbdG#D