Actually use LZ4 (#84)

* actually use lz4 and lz4hc * fix tests * dry
scalableminds · Jun 2, 2023 · f1cdb9e · f1cdb9e
1 parent b5c8f8a
commit f1cdb9e
Show file tree

Hide file tree

Showing 4 changed files with 77 additions and 23 deletions.
diff --git a/.github/workflows/python-module.yml b/.github/workflows/python-module.yml
@@ -145,6 +145,7 @@ jobs:
       - build_mac
       - build_win
     runs-on: ubuntu-latest
+    if: startsWith(github.event.ref, 'refs/tags')
     steps:
       - uses: actions/checkout@v3
         with:

diff --git a/python/tests/test_wkw.py b/python/tests/test_wkw.py
@@ -65,7 +65,31 @@ def test_readwrite():
             path.getsize(path.join("tests/tmp", "z0", "y0", "x0.wkw"))
             == np.prod(SIZE) * (dataset.header.file_len**3) + header_size
         )
-        assert np.all(dataset.read(POSITION, SIZE) == test_data)
+        assert np.array_equiv(dataset.read(POSITION, SIZE), test_data)
+
+
+def test_readwrite_lz4():
+    with wkw.Dataset.create(
+        "tests/tmp",
+        wkw.Header(np.uint8, block_type=wkw.Header.BLOCK_TYPE_LZ4, file_len=4),
+    ) as dataset:
+        SIZE128 = (128, 128, 128)
+        test_data = generate_test_data(dataset.header.voxel_type, SIZE128)
+
+        dataset.write(POSITION, test_data)
+        assert np.array_equiv(dataset.read(POSITION, SIZE128), test_data)
+
+
+def test_readwrite_lz4hc():
+    with wkw.Dataset.create(
+        "tests/tmp",
+        wkw.Header(np.uint8, block_type=wkw.Header.BLOCK_TYPE_LZ4HC, file_len=4),
+    ) as dataset:
+        SIZE128 = (128, 128, 128)
+        test_data = generate_test_data(dataset.header.voxel_type, SIZE128)
+
+        dataset.write(POSITION, test_data)
+        assert np.array_equiv(dataset.read(POSITION, SIZE128), test_data)
 
 
 def test_readwrite_live_compression():
@@ -87,13 +111,13 @@ def test_readwrite_live_compression():
         )
 
     with wkw.Dataset.open("tests/tmp") as dataset:
-        assert np.all(dataset.read(POSITION, SIZE128) == test_data)
+        assert np.array_equiv(dataset.read(POSITION, SIZE128), test_data)
 
 
 def test_readwrite_live_compression_should_enforce_full_file_write():
     with pytest.raises(Exception):
         with wkw.Dataset.create(
-            "tests/tmp", wkw.Header(np.uint8, block_type=BLOCK_TYPE_LZ4)
+            "tests/tmp", wkw.Header(np.uint8, block_type=wkw.Header.BLOCK_TYPE_LZ4)
         ) as dataset:
             test_data = generate_test_data(dataset.header.voxel_type)
             dataset.write(POSITION, test_data)
@@ -113,7 +137,7 @@ def test_readwrite_live_compression_should_not_allow_inconsistent_writes():
             dataset.write(POSITION, test_data)
 
     with wkw.Dataset.open("tests/tmp") as dataset:
-        assert np.all(dataset.read(POSITION, SIZE129) == empty_data)
+        assert np.array_equiv(dataset.read(POSITION, SIZE129), empty_data)
 
 
 def test_readwrite_live_compression_should_truncate():
@@ -138,7 +162,7 @@ def test_readwrite_live_compression_should_truncate():
     assert empty_compressed_size < random_compressed_size
 
     with wkw.Dataset.open("tests/tmp") as dataset:
-        assert np.all(dataset.read(POSITION, SIZE128) == ones_data)
+        assert np.array_equiv(dataset.read(POSITION, SIZE128), ones_data)
 
 
 def test_compress():
@@ -157,7 +181,7 @@ def test_compress():
                 path.getsize(path.join("tests/tmp2", "z0", "y0", "x0.wkw"))
                 < np.prod(SIZE) * (dataset2.header.file_len**3) + header_size
             )
-            assert np.all(dataset2.read(POSITION, SIZE) == test_data)
+            assert np.array_equiv(dataset2.read(POSITION, SIZE), test_data)
 
 
 def test_row_major_order():
@@ -167,15 +191,15 @@ def test_row_major_order():
         dataset.write((0, 0, 0), data)
         read_data = dataset.read((0, 0, 0), data_shape)
 
-    assert np.all(data == read_data)
+    assert np.array_equiv(data, read_data)
 
     with wkw.Dataset.create("tests/tmp2", wkw.Header(np.uint8)) as dataset:
         fortran_data = np.asfortranarray(data)
         dataset.write((0, 0, 0), fortran_data)
         fortran_read_data = dataset.read((0, 0, 0), data_shape)
 
-    assert np.all(fortran_read_data == read_data)
-    assert np.all(fortran_read_data == fortran_data)
+    assert np.array_equiv(fortran_read_data, read_data)
+    assert np.array_equiv(fortran_read_data, fortran_data)
 
 
 def test_row_major_order_with_offset():
@@ -185,7 +209,7 @@ def test_row_major_order_with_offset():
         dataset.write((15, 2, 0), data)
         read_data = dataset.read((15, 2, 0), data_shape)
 
-    assert np.all(data == read_data)
+    assert np.array_equiv(data, read_data)
 
 
 def test_row_major_order_with_different_voxel_size():
@@ -195,7 +219,7 @@ def test_row_major_order_with_different_voxel_size():
         dataset.write((3, 1, 0), data)
         read_data = dataset.read((3, 1, 0), data_shape)
 
-    assert np.all(data == read_data)
+    assert np.array_equiv(data, read_data)
 
 
 def test_row_major_order_with_channels():
@@ -207,7 +231,7 @@ def test_row_major_order_with_channels():
         dataset.write((3, 1, 0), data)
         read_data = dataset.read((3, 1, 0), data_shape[1:])
 
-    assert np.all(data == read_data)
+    assert np.array_equiv(data, read_data)
 
 
 def test_row_major_order_with_channels_and_different_voxel_size():
@@ -219,7 +243,7 @@ def test_row_major_order_with_channels_and_different_voxel_size():
         dataset.write((3, 1, 0), data)
         read_data = dataset.read((3, 1, 0), data_shape[1:])
 
-    assert np.all(data == read_data)
+    assert np.array_equiv(data, read_data)
 
 
 def test_column_major_order_with_channels_and_different_voxel_size():
@@ -231,7 +255,7 @@ def test_column_major_order_with_channels_and_different_voxel_size():
         dataset.write((3, 1, 0), data)
         read_data = dataset.read((3, 1, 0), data_shape[1:])
 
-    assert np.all(data == read_data)
+    assert np.array_equiv(data, read_data)
 
 
 def test_view_on_np_array():
@@ -242,7 +266,7 @@ def test_view_on_np_array():
         dataset.write((3, 1, 0), data)
         read_data = dataset.read((3, 1, 0), data.shape)
 
-    assert np.all(data == read_data)
+    assert np.array_equiv(data, read_data)
 
 
 def test_not_too_much_data_is_written():
@@ -257,9 +281,9 @@ def write_and_test_in_given_order(wkw_path, order):
             before = dataset.read((0, 0, 0), (1, 2, 3))
             after = dataset.read((0, 0, 38), (35, 35, 26))
 
-        assert np.all(data == read_data)
-        assert np.all(before == 1)
-        assert np.all(after == 1)
+        assert np.array_equiv(data, read_data)
+        assert np.array_equiv(before, 1)
+        assert np.array_equiv(after, 1)
 
     write_and_test_in_given_order("tests/tmp", "F")
     write_and_test_in_given_order("tests/tmp2", "C")
@@ -281,7 +305,7 @@ def test_multiple_writes_and_reads():
             ] = data
 
             read_data = dataset.read((0, 0, 0), (200, 200, 200))
-            assert np.all(mem_buffer == read_data)
+            assert np.array_equiv(mem_buffer, read_data)
 
 
 def test_multi_channel_column_major_order():
@@ -295,7 +319,7 @@ def test_multi_channel_column_major_order():
         dataset.write(offset, data)
 
         read_data = dataset.read(offset, data_shape[1:])
-        assert np.all(data == read_data)
+        assert np.array_equiv(data, read_data)
 
 
 def test_big_read():

diff --git a/rust/src/file.rs b/rust/src/file.rs
@@ -302,7 +302,8 @@ impl File {
 
         let result = match self.header.block_type {
             BlockType::Raw => self.write_block_raw(buf),
-            BlockType::LZ4 | BlockType::LZ4HC => self.write_block_lz4(buf),
+            BlockType::LZ4 => self.write_block_lz4(buf, BlockType::LZ4),
+            BlockType::LZ4HC => self.write_block_lz4(buf, BlockType::LZ4HC),
         };
 
         // advance
@@ -328,10 +329,19 @@ impl File {
         }
     }
 
-    fn write_block_lz4(&mut self, buf: &[u8]) -> Result<usize> {
+    fn write_block_lz4(&mut self, buf: &[u8], block_type: BlockType) -> Result<usize> {
         // compress data
         let mut buf_lz4 = &mut *self.disk_block_buf.as_mut().unwrap();
-        let len_lz4 = lz4::compress_hc(buf, &mut buf_lz4)?;
+        let len_lz4 = match block_type {
+            BlockType::LZ4 => lz4::compress(buf, &mut buf_lz4)?,
+            BlockType::LZ4HC => lz4::compress_hc(buf, &mut buf_lz4)?,
+            _ => {
+                return Err(format!(
+                    "Invalid block_type {:?} for compression.",
+                    block_type
+                ));
+            }
+        };
 
         // write data
         self.file

diff --git a/rust/src/lz4.rs b/rust/src/lz4.rs
@@ -6,6 +6,25 @@ pub fn compress_bound(input_size: usize) -> usize {
     unsafe { liblz4::LZ4_compressBound(input_size as i32) as usize }
 }
 
+pub fn compress(src_buf: &[u8], dst_buf: &mut [u8]) -> Result<usize> {
+    let src_size = src_buf.len() as i32;
+    let dst_capacity = dst_buf.len() as i32;
+
+    let dst_len = unsafe {
+        liblz4::LZ4_compress_default(
+            std::mem::transmute::<&[u8], &[i8]>(src_buf).as_ptr(),
+            std::mem::transmute::<&mut [u8], &mut [i8]>(dst_buf).as_mut_ptr(),
+            src_size,
+            dst_capacity,
+        )
+    };
+
+    match dst_len == 0 {
+        true => Err(String::from("Error in LZ4_compress_default")),
+        false => Ok(dst_len as usize),
+    }
+}
+
 pub fn compress_hc(src_buf: &[u8], dst_buf: &mut [u8]) -> Result<usize> {
     let src_size = src_buf.len() as i32;
     let dst_capacity = dst_buf.len() as i32;