diff --git a/jhdf/src/main/java/io/jhdf/Utils.java b/jhdf/src/main/java/io/jhdf/Utils.java index c5dd7c60..21a186c0 100644 --- a/jhdf/src/main/java/io/jhdf/Utils.java +++ b/jhdf/src/main/java/io/jhdf/Utils.java @@ -456,4 +456,15 @@ private static void flattenInternal(Object data, List flat) { flat.add(data); } } + + public static int totalChunks(int[] datasetDimensions, int[] chunkDimensions) { + int chunks = 1; + for (int i = 0; i < datasetDimensions.length; i++) { + int chunksInDim = datasetDimensions[i] / chunkDimensions[i]; + // If there is a partial chunk then we need to add one chunk in this dim + if(datasetDimensions[i] % chunkDimensions[i] != 0 ) chunksInDim++; + chunks *= chunksInDim; + } + return chunks; + } } diff --git a/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV4.java b/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV4.java index e17ef193..5c86a257 100644 --- a/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV4.java +++ b/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV4.java @@ -17,9 +17,9 @@ import io.jhdf.dataset.chunked.indexing.EmptyChunkIndex; import io.jhdf.dataset.chunked.indexing.ExtensibleArrayIndex; import io.jhdf.dataset.chunked.indexing.FixedArrayIndex; +import io.jhdf.dataset.chunked.indexing.ImplicitChunkIndex; import io.jhdf.dataset.chunked.indexing.SingleChunkIndex; import io.jhdf.exceptions.HdfException; -import io.jhdf.exceptions.UnsupportedHdfException; import io.jhdf.object.message.DataLayoutMessage.ChunkedDataLayoutMessageV4; import io.jhdf.storage.HdfBackingStorage; import org.apache.commons.lang3.ArrayUtils; @@ -85,7 +85,9 @@ protected Map initialize() { chunkIndex = new SingleChunkIndex(layoutMessage, datasetInfo); break; case 2: // Implicit - throw new UnsupportedHdfException("Implicit indexing is currently not supported"); + logger.debug("Reading implicit indexed dataset"); + chunkIndex = new ImplicitChunkIndex(layoutMessage.getAddress(), datasetInfo); + break; case 3: // Fixed array logger.debug("Reading fixed array indexed dataset"); chunkIndex = new FixedArrayIndex(hdfBackingStorage, layoutMessage.getAddress(), datasetInfo); diff --git a/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/ImplicitChunkIndex.java b/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/ImplicitChunkIndex.java new file mode 100644 index 00000000..ef275769 --- /dev/null +++ b/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/ImplicitChunkIndex.java @@ -0,0 +1,46 @@ +/* + * This file is part of jHDF. A pure Java library for accessing HDF5 files. + * + * https://jhdf.io + * + * Copyright (c) 2024 James Mudd + * + * MIT License see 'LICENSE' file + */ +package io.jhdf.dataset.chunked.indexing; + +import io.jhdf.Utils; +import io.jhdf.dataset.chunked.Chunk; +import io.jhdf.dataset.chunked.DatasetInfo; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +public class ImplicitChunkIndex implements ChunkIndex { + + private final int chunkSize; + private final int[] datasetDimensions; + private final int[] chunkDimensions; + private final long baseAddress; + + public ImplicitChunkIndex(long baseAddress, DatasetInfo datasetInfo) { + this.baseAddress = baseAddress; + this.chunkSize = datasetInfo.getChunkSizeInBytes(); + this.datasetDimensions = datasetInfo.getDatasetDimensions(); + this.chunkDimensions = datasetInfo.getChunkDimensions(); + } + + @Override + public Collection getAllChunks() { + int totalChunks = Utils.totalChunks(datasetDimensions, chunkDimensions); + List chunks = new ArrayList<>(totalChunks); + for (int i = 0; i < totalChunks; i++) { + chunks.add(new ChunkImpl(baseAddress + i* chunkSize, + chunkSize, + Utils.chunkIndexToChunkOffset(i, chunkDimensions, datasetDimensions))); + } + return chunks; + } + +} diff --git a/jhdf/src/test/java/io/jhdf/dataset/chunked/indexing/ImplicitIndexTest.java b/jhdf/src/test/java/io/jhdf/dataset/chunked/indexing/ImplicitIndexTest.java new file mode 100644 index 00000000..83a0b9ff --- /dev/null +++ b/jhdf/src/test/java/io/jhdf/dataset/chunked/indexing/ImplicitIndexTest.java @@ -0,0 +1,57 @@ +/* + * This file is part of jHDF. A pure Java library for accessing HDF5 files. + * + * https://jhdf.io + * + * Copyright (c) 2024 James Mudd + * + * MIT License see 'LICENSE' file + */ +package io.jhdf.dataset.chunked.indexing; + +import io.jhdf.HdfFile; +import io.jhdf.api.Dataset; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import static io.jhdf.TestUtils.loadTestHdfFile; +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; + +class ImplicitIndexTest { + + private static final String HDF5_TEST_FILE_NAME = "implicit_index_datasets.hdf5"; + + private static HdfFile hdfFile; + + @BeforeAll + static void setup() throws Exception { + hdfFile = loadTestHdfFile(HDF5_TEST_FILE_NAME); + } + + @AfterAll + static void tearDown() { + hdfFile.close(); + } + + @Test + void testDataReadCorrectly() { + // Unfiltered + Dataset implicitIndex = hdfFile.getDatasetByPath("implicit_index_exact"); + int[] implicitIndexDataFlat = (int[]) implicitIndex.getDataFlat(); + assertThat(implicitIndexDataFlat).isEqualTo(expectedData(Math.toIntExact(implicitIndex.getSize()))); + + Dataset indexMismatch = hdfFile.getDatasetByPath("implicit_index_mismatch"); + int[] indexMismatchDataFlat = (int[]) indexMismatch.getDataFlat(); + assertThat(indexMismatchDataFlat).isEqualTo(expectedData(Math.toIntExact(indexMismatch.getSize()))); + + } + + private int[] expectedData(int length) { + int[] data = new int[length]; + for (int i = 0; i < length; i++) { + data[i] = i; + } + return data; + } +} diff --git a/jhdf/src/test/resources/hdf5/implicit_index_datasets.hdf5 b/jhdf/src/test/resources/hdf5/implicit_index_datasets.hdf5 new file mode 100644 index 00000000..627eda66 Binary files /dev/null and b/jhdf/src/test/resources/hdf5/implicit_index_datasets.hdf5 differ diff --git a/jhdf/src/test/resources/scripts/implicit_index_datasets.py b/jhdf/src/test/resources/scripts/implicit_index_datasets.py new file mode 100644 index 00000000..c3f5f0cc --- /dev/null +++ b/jhdf/src/test/resources/scripts/implicit_index_datasets.py @@ -0,0 +1,56 @@ +# ------------------------------------------------------------------------------- +# This file is part of jHDF. A pure Java library for accessing HDF5 files. +# +# https://jhdf.io +# +# Copyright (c) 2024 James Mudd +# +# MIT License see 'LICENSE' file +# ------------------------------------------------------------------------------- +import h5py +import numpy + +f = h5py.File("implicit_index_datasets.hdf5", "w", libver='latest') + +data = numpy.arange(20) + +dataspace = h5py.h5s.create_simple(data.shape) # Create simple dataspace +datatype = h5py.h5t.NATIVE_INT32 + +# Dataset creation property list +dcpl = h5py.h5p.create(h5py.h5p.DATASET_CREATE) +dcpl.set_alloc_time(h5py.h5d.ALLOC_TIME_EARLY) +# Set chunk dimensions (e.g., chunks of size 5) +chunk_dims = (5,) # Ensure chunks are compatible with dataspace shape +dcpl.set_chunk(chunk_dims) + +# Create the dataset +dataset_name = "implicit_index_exact".encode('utf-8') # Dataset name must be bytes +dataset = h5py.h5d.create(f.id, dataset_name, datatype, dataspace, dcpl) + +# Write data to the dataset +dataset.write(h5py.h5s.ALL, h5py.h5s.ALL, data) +dataset.close() + +# Second dataset with chunk size mismatch +data = numpy.arange(50).reshape(10,5) + +dataspace = h5py.h5s.create_simple(data.shape) # Create simple dataspace +datatype = h5py.h5t.NATIVE_INT32 + +# Dataset creation property list +dcpl = h5py.h5p.create(h5py.h5p.DATASET_CREATE) +dcpl.set_alloc_time(h5py.h5d.ALLOC_TIME_EARLY) +# Set chunk dimensions +chunk_dims = (3,2) # mismatched to data shape +dcpl.set_chunk(chunk_dims) + +# Create the dataset +dataset_name = "implicit_index_mismatch".encode('utf-8') # Dataset name must be bytes +dataset = h5py.h5d.create(f.id, dataset_name, datatype, dataspace, dcpl) + +# Write data to the dataset +dataset.write(h5py.h5s.ALL, h5py.h5s.ALL, data) +dataset.close() + +f.close()