-
-
Notifications
You must be signed in to change notification settings - Fork 39
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #655 from jamesmudd/implicit-index
Add Implicit Index Support
- Loading branch information
Showing
6 changed files
with
174 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
46 changes: 46 additions & 0 deletions
46
jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/ImplicitChunkIndex.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
/* | ||
* This file is part of jHDF. A pure Java library for accessing HDF5 files. | ||
* | ||
* https://jhdf.io | ||
* | ||
* Copyright (c) 2024 James Mudd | ||
* | ||
* MIT License see 'LICENSE' file | ||
*/ | ||
package io.jhdf.dataset.chunked.indexing; | ||
|
||
import io.jhdf.Utils; | ||
import io.jhdf.dataset.chunked.Chunk; | ||
import io.jhdf.dataset.chunked.DatasetInfo; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Collection; | ||
import java.util.List; | ||
|
||
public class ImplicitChunkIndex implements ChunkIndex { | ||
|
||
private final int chunkSize; | ||
private final int[] datasetDimensions; | ||
private final int[] chunkDimensions; | ||
private final long baseAddress; | ||
|
||
public ImplicitChunkIndex(long baseAddress, DatasetInfo datasetInfo) { | ||
this.baseAddress = baseAddress; | ||
this.chunkSize = datasetInfo.getChunkSizeInBytes(); | ||
this.datasetDimensions = datasetInfo.getDatasetDimensions(); | ||
this.chunkDimensions = datasetInfo.getChunkDimensions(); | ||
} | ||
|
||
@Override | ||
public Collection<Chunk> getAllChunks() { | ||
int totalChunks = Utils.totalChunks(datasetDimensions, chunkDimensions); | ||
List<Chunk> chunks = new ArrayList<>(totalChunks); | ||
for (int i = 0; i < totalChunks; i++) { | ||
chunks.add(new ChunkImpl(baseAddress + i* chunkSize, | ||
chunkSize, | ||
Utils.chunkIndexToChunkOffset(i, chunkDimensions, datasetDimensions))); | ||
} | ||
return chunks; | ||
} | ||
|
||
} |
57 changes: 57 additions & 0 deletions
57
jhdf/src/test/java/io/jhdf/dataset/chunked/indexing/ImplicitIndexTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
/* | ||
* This file is part of jHDF. A pure Java library for accessing HDF5 files. | ||
* | ||
* https://jhdf.io | ||
* | ||
* Copyright (c) 2024 James Mudd | ||
* | ||
* MIT License see 'LICENSE' file | ||
*/ | ||
package io.jhdf.dataset.chunked.indexing; | ||
|
||
import io.jhdf.HdfFile; | ||
import io.jhdf.api.Dataset; | ||
import org.junit.jupiter.api.AfterAll; | ||
import org.junit.jupiter.api.BeforeAll; | ||
import org.junit.jupiter.api.Test; | ||
|
||
import static io.jhdf.TestUtils.loadTestHdfFile; | ||
import static org.assertj.core.api.AssertionsForClassTypes.assertThat; | ||
|
||
class ImplicitIndexTest { | ||
|
||
private static final String HDF5_TEST_FILE_NAME = "implicit_index_datasets.hdf5"; | ||
|
||
private static HdfFile hdfFile; | ||
|
||
@BeforeAll | ||
static void setup() throws Exception { | ||
hdfFile = loadTestHdfFile(HDF5_TEST_FILE_NAME); | ||
} | ||
|
||
@AfterAll | ||
static void tearDown() { | ||
hdfFile.close(); | ||
} | ||
|
||
@Test | ||
void testDataReadCorrectly() { | ||
// Unfiltered | ||
Dataset implicitIndex = hdfFile.getDatasetByPath("implicit_index_exact"); | ||
int[] implicitIndexDataFlat = (int[]) implicitIndex.getDataFlat(); | ||
assertThat(implicitIndexDataFlat).isEqualTo(expectedData(Math.toIntExact(implicitIndex.getSize()))); | ||
|
||
Dataset indexMismatch = hdfFile.getDatasetByPath("implicit_index_mismatch"); | ||
int[] indexMismatchDataFlat = (int[]) indexMismatch.getDataFlat(); | ||
assertThat(indexMismatchDataFlat).isEqualTo(expectedData(Math.toIntExact(indexMismatch.getSize()))); | ||
|
||
} | ||
|
||
private int[] expectedData(int length) { | ||
int[] data = new int[length]; | ||
for (int i = 0; i < length; i++) { | ||
data[i] = i; | ||
} | ||
return data; | ||
} | ||
} |
Binary file not shown.
56 changes: 56 additions & 0 deletions
56
jhdf/src/test/resources/scripts/implicit_index_datasets.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
# ------------------------------------------------------------------------------- | ||
# This file is part of jHDF. A pure Java library for accessing HDF5 files. | ||
# | ||
# https://jhdf.io | ||
# | ||
# Copyright (c) 2024 James Mudd | ||
# | ||
# MIT License see 'LICENSE' file | ||
# ------------------------------------------------------------------------------- | ||
import h5py | ||
import numpy | ||
|
||
f = h5py.File("implicit_index_datasets.hdf5", "w", libver='latest') | ||
|
||
data = numpy.arange(20) | ||
|
||
dataspace = h5py.h5s.create_simple(data.shape) # Create simple dataspace | ||
datatype = h5py.h5t.NATIVE_INT32 | ||
|
||
# Dataset creation property list | ||
dcpl = h5py.h5p.create(h5py.h5p.DATASET_CREATE) | ||
dcpl.set_alloc_time(h5py.h5d.ALLOC_TIME_EARLY) | ||
# Set chunk dimensions (e.g., chunks of size 5) | ||
chunk_dims = (5,) # Ensure chunks are compatible with dataspace shape | ||
dcpl.set_chunk(chunk_dims) | ||
|
||
# Create the dataset | ||
dataset_name = "implicit_index_exact".encode('utf-8') # Dataset name must be bytes | ||
dataset = h5py.h5d.create(f.id, dataset_name, datatype, dataspace, dcpl) | ||
|
||
# Write data to the dataset | ||
dataset.write(h5py.h5s.ALL, h5py.h5s.ALL, data) | ||
dataset.close() | ||
|
||
# Second dataset with chunk size mismatch | ||
data = numpy.arange(50).reshape(10,5) | ||
|
||
dataspace = h5py.h5s.create_simple(data.shape) # Create simple dataspace | ||
datatype = h5py.h5t.NATIVE_INT32 | ||
|
||
# Dataset creation property list | ||
dcpl = h5py.h5p.create(h5py.h5p.DATASET_CREATE) | ||
dcpl.set_alloc_time(h5py.h5d.ALLOC_TIME_EARLY) | ||
# Set chunk dimensions | ||
chunk_dims = (3,2) # mismatched to data shape | ||
dcpl.set_chunk(chunk_dims) | ||
|
||
# Create the dataset | ||
dataset_name = "implicit_index_mismatch".encode('utf-8') # Dataset name must be bytes | ||
dataset = h5py.h5d.create(f.id, dataset_name, datatype, dataspace, dcpl) | ||
|
||
# Write data to the dataset | ||
dataset.write(h5py.h5s.ALL, h5py.h5s.ALL, data) | ||
dataset.close() | ||
|
||
f.close() |