Skip to content

Commit

Permalink
Support compressed gzip multistream
Browse files Browse the repository at this point in the history
  • Loading branch information
milesgranger committed Sep 6, 2021
1 parent 446a671 commit b9cfa13
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 6 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "cramjam"
version = "2.3.2"
version = "2.4.0"
authors = ["Miles Granger <miles59923@gmail.com>"]
edition = "2018"
license = "MIT License"
Expand Down
27 changes: 23 additions & 4 deletions src/gzip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,17 @@ pub fn decompress_into(input: BytesType, mut output: BytesType) -> PyResult<usiz
}

pub(crate) mod internal {
use flate2::read::{GzDecoder, GzEncoder};
use flate2::read::{GzEncoder, MultiGzDecoder};
use flate2::Compression;
use std::io::prelude::*;
use std::io::Error;
use std::io::{Cursor, Error};

/// Decompress gzip data
pub fn decompress<W: Write + ?Sized, R: Read>(input: R, output: &mut W) -> Result<usize, Error> {
let mut decoder = GzDecoder::new(input);
let n_bytes = std::io::copy(&mut decoder, output)?;
let mut decoder = MultiGzDecoder::new(input);
let mut out = vec![];
let n_bytes = decoder.read_to_end(&mut out)?;
std::io::copy(&mut Cursor::new(out.as_slice()), output)?;
Ok(n_bytes as usize)
}

Expand All @@ -73,4 +75,21 @@ pub(crate) mod internal {
let n_bytes = std::io::copy(&mut encoder, output)?;
Ok(n_bytes as usize)
}

#[cfg(test)]
mod tests {

#[test]
fn test_gzip_multiple_streams() {
let mut out1 = vec![];
let mut out2 = vec![];
super::compress(b"foo".to_vec().as_slice(), &mut out1, None).unwrap();
super::compress(b"bar".to_vec().as_slice(), &mut out2, None).unwrap();

let mut out3 = vec![];
out1.extend_from_slice(&out2);
super::decompress(out1.as_slice(), &mut out3).unwrap();
assert_eq!(out3, b"foobar".to_vec());
}
}
}
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ pub enum BytesType<'a> {
RustyBuffer(&'a PyCell<RustyBuffer>),
/// `numpy.array` with `dtype=np.uint8`
#[pyo3(transparent, annotation = "numpy")]
NumpyArray(RustyNumpyArray<'a>)
NumpyArray(RustyNumpyArray<'a>),
}

impl<'a> AsBytes for BytesType<'a> {
Expand Down
18 changes: 18 additions & 0 deletions tests/test_variants.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import gzip
import pytest
import numpy as np
import cramjam
Expand Down Expand Up @@ -221,3 +222,20 @@ def test_lz4_block(compress_kwargs):
output_len=len(data) if not compress_kwargs["store_size"] else None,
)
assert bytes(out) == data


def test_gzip_multiple_streams():

out1 = gzip.compress(b"foo")
out2 = gzip.compress(b"bar")
assert gzip.decompress(out1 + out2) == b"foobar"

# works with data compressed by std gzip lib
out = bytes(cramjam.gzip.decompress(out1 + out2))
assert out == b"foobar"

# works with data compressed by cramjam
o1 = bytes(cramjam.gzip.compress(b"foo"))
o2 = bytes(cramjam.gzip.compress(b"bar"))
out = bytes(cramjam.gzip.decompress(o1 + o2))
assert out == b"foobar"

0 comments on commit b9cfa13

Please sign in to comment.