Skip to content

Commit

Permalink
Actually use LZ4 (#84)
Browse files Browse the repository at this point in the history
* actually use lz4 and lz4hc

* fix tests

* dry
  • Loading branch information
normanrz authored Jun 2, 2023
1 parent b5c8f8a commit f1cdb9e
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 23 deletions.
1 change: 1 addition & 0 deletions .github/workflows/python-module.yml
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ jobs:
- build_mac
- build_win
runs-on: ubuntu-latest
if: startsWith(github.event.ref, 'refs/tags')
steps:
- uses: actions/checkout@v3
with:
Expand Down
64 changes: 44 additions & 20 deletions python/tests/test_wkw.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,31 @@ def test_readwrite():
path.getsize(path.join("tests/tmp", "z0", "y0", "x0.wkw"))
== np.prod(SIZE) * (dataset.header.file_len**3) + header_size
)
assert np.all(dataset.read(POSITION, SIZE) == test_data)
assert np.array_equiv(dataset.read(POSITION, SIZE), test_data)


def test_readwrite_lz4():
with wkw.Dataset.create(
"tests/tmp",
wkw.Header(np.uint8, block_type=wkw.Header.BLOCK_TYPE_LZ4, file_len=4),
) as dataset:
SIZE128 = (128, 128, 128)
test_data = generate_test_data(dataset.header.voxel_type, SIZE128)

dataset.write(POSITION, test_data)
assert np.array_equiv(dataset.read(POSITION, SIZE128), test_data)


def test_readwrite_lz4hc():
with wkw.Dataset.create(
"tests/tmp",
wkw.Header(np.uint8, block_type=wkw.Header.BLOCK_TYPE_LZ4HC, file_len=4),
) as dataset:
SIZE128 = (128, 128, 128)
test_data = generate_test_data(dataset.header.voxel_type, SIZE128)

dataset.write(POSITION, test_data)
assert np.array_equiv(dataset.read(POSITION, SIZE128), test_data)


def test_readwrite_live_compression():
Expand All @@ -87,13 +111,13 @@ def test_readwrite_live_compression():
)

with wkw.Dataset.open("tests/tmp") as dataset:
assert np.all(dataset.read(POSITION, SIZE128) == test_data)
assert np.array_equiv(dataset.read(POSITION, SIZE128), test_data)


def test_readwrite_live_compression_should_enforce_full_file_write():
with pytest.raises(Exception):
with wkw.Dataset.create(
"tests/tmp", wkw.Header(np.uint8, block_type=BLOCK_TYPE_LZ4)
"tests/tmp", wkw.Header(np.uint8, block_type=wkw.Header.BLOCK_TYPE_LZ4)
) as dataset:
test_data = generate_test_data(dataset.header.voxel_type)
dataset.write(POSITION, test_data)
Expand All @@ -113,7 +137,7 @@ def test_readwrite_live_compression_should_not_allow_inconsistent_writes():
dataset.write(POSITION, test_data)

with wkw.Dataset.open("tests/tmp") as dataset:
assert np.all(dataset.read(POSITION, SIZE129) == empty_data)
assert np.array_equiv(dataset.read(POSITION, SIZE129), empty_data)


def test_readwrite_live_compression_should_truncate():
Expand All @@ -138,7 +162,7 @@ def test_readwrite_live_compression_should_truncate():
assert empty_compressed_size < random_compressed_size

with wkw.Dataset.open("tests/tmp") as dataset:
assert np.all(dataset.read(POSITION, SIZE128) == ones_data)
assert np.array_equiv(dataset.read(POSITION, SIZE128), ones_data)


def test_compress():
Expand All @@ -157,7 +181,7 @@ def test_compress():
path.getsize(path.join("tests/tmp2", "z0", "y0", "x0.wkw"))
< np.prod(SIZE) * (dataset2.header.file_len**3) + header_size
)
assert np.all(dataset2.read(POSITION, SIZE) == test_data)
assert np.array_equiv(dataset2.read(POSITION, SIZE), test_data)


def test_row_major_order():
Expand All @@ -167,15 +191,15 @@ def test_row_major_order():
dataset.write((0, 0, 0), data)
read_data = dataset.read((0, 0, 0), data_shape)

assert np.all(data == read_data)
assert np.array_equiv(data, read_data)

with wkw.Dataset.create("tests/tmp2", wkw.Header(np.uint8)) as dataset:
fortran_data = np.asfortranarray(data)
dataset.write((0, 0, 0), fortran_data)
fortran_read_data = dataset.read((0, 0, 0), data_shape)

assert np.all(fortran_read_data == read_data)
assert np.all(fortran_read_data == fortran_data)
assert np.array_equiv(fortran_read_data, read_data)
assert np.array_equiv(fortran_read_data, fortran_data)


def test_row_major_order_with_offset():
Expand All @@ -185,7 +209,7 @@ def test_row_major_order_with_offset():
dataset.write((15, 2, 0), data)
read_data = dataset.read((15, 2, 0), data_shape)

assert np.all(data == read_data)
assert np.array_equiv(data, read_data)


def test_row_major_order_with_different_voxel_size():
Expand All @@ -195,7 +219,7 @@ def test_row_major_order_with_different_voxel_size():
dataset.write((3, 1, 0), data)
read_data = dataset.read((3, 1, 0), data_shape)

assert np.all(data == read_data)
assert np.array_equiv(data, read_data)


def test_row_major_order_with_channels():
Expand All @@ -207,7 +231,7 @@ def test_row_major_order_with_channels():
dataset.write((3, 1, 0), data)
read_data = dataset.read((3, 1, 0), data_shape[1:])

assert np.all(data == read_data)
assert np.array_equiv(data, read_data)


def test_row_major_order_with_channels_and_different_voxel_size():
Expand All @@ -219,7 +243,7 @@ def test_row_major_order_with_channels_and_different_voxel_size():
dataset.write((3, 1, 0), data)
read_data = dataset.read((3, 1, 0), data_shape[1:])

assert np.all(data == read_data)
assert np.array_equiv(data, read_data)


def test_column_major_order_with_channels_and_different_voxel_size():
Expand All @@ -231,7 +255,7 @@ def test_column_major_order_with_channels_and_different_voxel_size():
dataset.write((3, 1, 0), data)
read_data = dataset.read((3, 1, 0), data_shape[1:])

assert np.all(data == read_data)
assert np.array_equiv(data, read_data)


def test_view_on_np_array():
Expand All @@ -242,7 +266,7 @@ def test_view_on_np_array():
dataset.write((3, 1, 0), data)
read_data = dataset.read((3, 1, 0), data.shape)

assert np.all(data == read_data)
assert np.array_equiv(data, read_data)


def test_not_too_much_data_is_written():
Expand All @@ -257,9 +281,9 @@ def write_and_test_in_given_order(wkw_path, order):
before = dataset.read((0, 0, 0), (1, 2, 3))
after = dataset.read((0, 0, 38), (35, 35, 26))

assert np.all(data == read_data)
assert np.all(before == 1)
assert np.all(after == 1)
assert np.array_equiv(data, read_data)
assert np.array_equiv(before, 1)
assert np.array_equiv(after, 1)

write_and_test_in_given_order("tests/tmp", "F")
write_and_test_in_given_order("tests/tmp2", "C")
Expand All @@ -281,7 +305,7 @@ def test_multiple_writes_and_reads():
] = data

read_data = dataset.read((0, 0, 0), (200, 200, 200))
assert np.all(mem_buffer == read_data)
assert np.array_equiv(mem_buffer, read_data)


def test_multi_channel_column_major_order():
Expand All @@ -295,7 +319,7 @@ def test_multi_channel_column_major_order():
dataset.write(offset, data)

read_data = dataset.read(offset, data_shape[1:])
assert np.all(data == read_data)
assert np.array_equiv(data, read_data)


def test_big_read():
Expand Down
16 changes: 13 additions & 3 deletions rust/src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,8 @@ impl File {

let result = match self.header.block_type {
BlockType::Raw => self.write_block_raw(buf),
BlockType::LZ4 | BlockType::LZ4HC => self.write_block_lz4(buf),
BlockType::LZ4 => self.write_block_lz4(buf, BlockType::LZ4),
BlockType::LZ4HC => self.write_block_lz4(buf, BlockType::LZ4HC),
};

// advance
Expand All @@ -328,10 +329,19 @@ impl File {
}
}

fn write_block_lz4(&mut self, buf: &[u8]) -> Result<usize> {
fn write_block_lz4(&mut self, buf: &[u8], block_type: BlockType) -> Result<usize> {
// compress data
let mut buf_lz4 = &mut *self.disk_block_buf.as_mut().unwrap();
let len_lz4 = lz4::compress_hc(buf, &mut buf_lz4)?;
let len_lz4 = match block_type {
BlockType::LZ4 => lz4::compress(buf, &mut buf_lz4)?,
BlockType::LZ4HC => lz4::compress_hc(buf, &mut buf_lz4)?,
_ => {
return Err(format!(
"Invalid block_type {:?} for compression.",
block_type
));
}
};

// write data
self.file
Expand Down
19 changes: 19 additions & 0 deletions rust/src/lz4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,25 @@ pub fn compress_bound(input_size: usize) -> usize {
unsafe { liblz4::LZ4_compressBound(input_size as i32) as usize }
}

pub fn compress(src_buf: &[u8], dst_buf: &mut [u8]) -> Result<usize> {
let src_size = src_buf.len() as i32;
let dst_capacity = dst_buf.len() as i32;

let dst_len = unsafe {
liblz4::LZ4_compress_default(
std::mem::transmute::<&[u8], &[i8]>(src_buf).as_ptr(),
std::mem::transmute::<&mut [u8], &mut [i8]>(dst_buf).as_mut_ptr(),
src_size,
dst_capacity,
)
};

match dst_len == 0 {
true => Err(String::from("Error in LZ4_compress_default")),
false => Ok(dst_len as usize),
}
}

pub fn compress_hc(src_buf: &[u8], dst_buf: &mut [u8]) -> Result<usize> {
let src_size = src_buf.len() as i32;
let dst_capacity = dst_buf.len() as i32;
Expand Down

0 comments on commit f1cdb9e

Please sign in to comment.