From c924d03c109d94c2748cdb5ee3f405b325f1e163 Mon Sep 17 00:00:00 2001 From: James Mudd Date: Mon, 25 Nov 2024 20:47:18 +0000 Subject: [PATCH 1/6] Start adding ImplicitChunkIndex --- .../dataset/chunked/ChunkedDatasetV4.java | 5 ++- .../chunked/indexing/ImplicitChunkIndex.java | 39 +++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/ImplicitChunkIndex.java diff --git a/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV4.java b/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV4.java index e17ef193..2be73520 100644 --- a/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV4.java +++ b/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV4.java @@ -17,6 +17,7 @@ import io.jhdf.dataset.chunked.indexing.EmptyChunkIndex; import io.jhdf.dataset.chunked.indexing.ExtensibleArrayIndex; import io.jhdf.dataset.chunked.indexing.FixedArrayIndex; +import io.jhdf.dataset.chunked.indexing.ImplicitChunkIndex; import io.jhdf.dataset.chunked.indexing.SingleChunkIndex; import io.jhdf.exceptions.HdfException; import io.jhdf.exceptions.UnsupportedHdfException; @@ -85,7 +86,9 @@ protected Map initialize() { chunkIndex = new SingleChunkIndex(layoutMessage, datasetInfo); break; case 2: // Implicit - throw new UnsupportedHdfException("Implicit indexing is currently not supported"); + logger.debug("Reading implicit indexed dataset"); + chunkIndex = new ImplicitChunkIndex(layoutMessage.getAddress(), datasetInfo); + break; case 3: // Fixed array logger.debug("Reading fixed array indexed dataset"); chunkIndex = new FixedArrayIndex(hdfBackingStorage, layoutMessage.getAddress(), datasetInfo); diff --git a/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/ImplicitChunkIndex.java b/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/ImplicitChunkIndex.java new file mode 100644 index 00000000..9d66c42a --- /dev/null +++ b/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/ImplicitChunkIndex.java @@ -0,0 +1,39 @@ +/* + * This file is part of jHDF. A pure Java library for accessing HDF5 files. + * + * https://jhdf.io + * + * Copyright (c) 2024 James Mudd + * + * MIT License see 'LICENSE' file + */ +package io.jhdf.dataset.chunked.indexing; + +import io.jhdf.dataset.chunked.Chunk; +import io.jhdf.dataset.chunked.DatasetInfo; +import io.jhdf.object.message.DataLayoutMessage.ChunkedDataLayoutMessageV4; + +import java.util.Collection; +import java.util.Collections; + +public class ImplicitChunkIndex implements ChunkIndex { + + private final int unfilteredChunkSize; + private final int[] datasetDimensions; + private final int[] chunkDimensions; + private final long baseAddress; + + public ImplicitChunkIndex(long baseAddress, DatasetInfo datasetInfo) { + + this.baseAddress = baseAddress; + this.unfilteredChunkSize = datasetInfo.getChunkSizeInBytes(); + this.datasetDimensions = datasetInfo.getDatasetDimensions(); + this.chunkDimensions = datasetInfo.getChunkDimensions(); + } + + @Override + public Collection getAllChunks() { + return Collections.emptyList(); + } + +} From 37f4c19e1d5aa64088eb7252d1224d33e6533069 Mon Sep 17 00:00:00 2001 From: James Mudd Date: Sun, 8 Dec 2024 12:09:04 +0000 Subject: [PATCH 2/6] Working implicit index impl --- jhdf/src/main/java/io/jhdf/Utils.java | 11 +++++++++++ .../chunked/indexing/ImplicitChunkIndex.java | 17 +++++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/jhdf/src/main/java/io/jhdf/Utils.java b/jhdf/src/main/java/io/jhdf/Utils.java index c5dd7c60..21a186c0 100644 --- a/jhdf/src/main/java/io/jhdf/Utils.java +++ b/jhdf/src/main/java/io/jhdf/Utils.java @@ -456,4 +456,15 @@ private static void flattenInternal(Object data, List flat) { flat.add(data); } } + + public static int totalChunks(int[] datasetDimensions, int[] chunkDimensions) { + int chunks = 1; + for (int i = 0; i < datasetDimensions.length; i++) { + int chunksInDim = datasetDimensions[i] / chunkDimensions[i]; + // If there is a partial chunk then we need to add one chunk in this dim + if(datasetDimensions[i] % chunkDimensions[i] != 0 ) chunksInDim++; + chunks *= chunksInDim; + } + return chunks; + } } diff --git a/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/ImplicitChunkIndex.java b/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/ImplicitChunkIndex.java index 9d66c42a..3ee47884 100644 --- a/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/ImplicitChunkIndex.java +++ b/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/ImplicitChunkIndex.java @@ -9,16 +9,18 @@ */ package io.jhdf.dataset.chunked.indexing; +import io.jhdf.Utils; import io.jhdf.dataset.chunked.Chunk; import io.jhdf.dataset.chunked.DatasetInfo; -import io.jhdf.object.message.DataLayoutMessage.ChunkedDataLayoutMessageV4; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.List; public class ImplicitChunkIndex implements ChunkIndex { - private final int unfilteredChunkSize; + private final int chunkSize; private final int[] datasetDimensions; private final int[] chunkDimensions; private final long baseAddress; @@ -26,14 +28,21 @@ public class ImplicitChunkIndex implements ChunkIndex { public ImplicitChunkIndex(long baseAddress, DatasetInfo datasetInfo) { this.baseAddress = baseAddress; - this.unfilteredChunkSize = datasetInfo.getChunkSizeInBytes(); + this.chunkSize = datasetInfo.getChunkSizeInBytes(); this.datasetDimensions = datasetInfo.getDatasetDimensions(); this.chunkDimensions = datasetInfo.getChunkDimensions(); } @Override public Collection getAllChunks() { - return Collections.emptyList(); + int totalChunks = Utils.totalChunks(datasetDimensions, chunkDimensions); + List chunks = new ArrayList<>(totalChunks); + for (int i = 0; i < totalChunks; i++) { + chunks.add(new ChunkImpl(baseAddress + i* chunkSize, + chunkSize, + Utils.chunkIndexToChunkOffset(i, chunkDimensions, datasetDimensions))); + } + return chunks; } } From 3b0d74dbef117d483751bd47fb38617b25ed2e8b Mon Sep 17 00:00:00 2001 From: James Mudd Date: Fri, 10 Jan 2025 22:24:17 +0000 Subject: [PATCH 3/6] Add test file for implicit index --- .../chunked/indexing/ImplicitChunkIndex.java | 1 - .../hdf5/implicit_index_datasets.hdf5 | Bin 0 -> 2416 bytes .../scripts/implicit_index_datasets.py | 48 ++++++++++++++++++ 3 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 jhdf/src/test/resources/hdf5/implicit_index_datasets.hdf5 create mode 100644 jhdf/src/test/resources/scripts/implicit_index_datasets.py diff --git a/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/ImplicitChunkIndex.java b/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/ImplicitChunkIndex.java index 3ee47884..ad945d66 100644 --- a/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/ImplicitChunkIndex.java +++ b/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/ImplicitChunkIndex.java @@ -26,7 +26,6 @@ public class ImplicitChunkIndex implements ChunkIndex { private final long baseAddress; public ImplicitChunkIndex(long baseAddress, DatasetInfo datasetInfo) { - this.baseAddress = baseAddress; this.chunkSize = datasetInfo.getChunkSizeInBytes(); this.datasetDimensions = datasetInfo.getDatasetDimensions(); diff --git a/jhdf/src/test/resources/hdf5/implicit_index_datasets.hdf5 b/jhdf/src/test/resources/hdf5/implicit_index_datasets.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..627eda66cfc7402df6844884694335a8bd9c5b54 GIT binary patch literal 2416 zcmeH}yG{Z@6o${t0>TOkilTS}@Au0in4qE4SlSSSL^lybA_b+fv$wYQ1uSfBeFrav z6^)gp{%3Yh+>lUOknBl5{+VUYnc2VCGq>f^e#*_ePCQN$KQ3MO{sbC^kcotXFSbcvY`i@h(qmtxBzRQf)rl%CT8qlIvJf_pa(E z&Fc9pikKL*`X|?W?RRseV(>UTCZ)1OS{X43n3`~sq|$iXh?x~Aic>m?c$!p1BO!`k z@#4v*^rTcizCOEpGY@ZIj%(j6YPECT#jA|*%2AGv=tD8 Date: Sat, 11 Jan 2025 22:20:38 +0000 Subject: [PATCH 4/6] Add test checking the data --- .../chunked/indexing/ImplicitIndexTest.java | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 jhdf/src/test/java/io/jhdf/dataset/chunked/indexing/ImplicitIndexTest.java diff --git a/jhdf/src/test/java/io/jhdf/dataset/chunked/indexing/ImplicitIndexTest.java b/jhdf/src/test/java/io/jhdf/dataset/chunked/indexing/ImplicitIndexTest.java new file mode 100644 index 00000000..83a0b9ff --- /dev/null +++ b/jhdf/src/test/java/io/jhdf/dataset/chunked/indexing/ImplicitIndexTest.java @@ -0,0 +1,57 @@ +/* + * This file is part of jHDF. A pure Java library for accessing HDF5 files. + * + * https://jhdf.io + * + * Copyright (c) 2024 James Mudd + * + * MIT License see 'LICENSE' file + */ +package io.jhdf.dataset.chunked.indexing; + +import io.jhdf.HdfFile; +import io.jhdf.api.Dataset; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import static io.jhdf.TestUtils.loadTestHdfFile; +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; + +class ImplicitIndexTest { + + private static final String HDF5_TEST_FILE_NAME = "implicit_index_datasets.hdf5"; + + private static HdfFile hdfFile; + + @BeforeAll + static void setup() throws Exception { + hdfFile = loadTestHdfFile(HDF5_TEST_FILE_NAME); + } + + @AfterAll + static void tearDown() { + hdfFile.close(); + } + + @Test + void testDataReadCorrectly() { + // Unfiltered + Dataset implicitIndex = hdfFile.getDatasetByPath("implicit_index_exact"); + int[] implicitIndexDataFlat = (int[]) implicitIndex.getDataFlat(); + assertThat(implicitIndexDataFlat).isEqualTo(expectedData(Math.toIntExact(implicitIndex.getSize()))); + + Dataset indexMismatch = hdfFile.getDatasetByPath("implicit_index_mismatch"); + int[] indexMismatchDataFlat = (int[]) indexMismatch.getDataFlat(); + assertThat(indexMismatchDataFlat).isEqualTo(expectedData(Math.toIntExact(indexMismatch.getSize()))); + + } + + private int[] expectedData(int length) { + int[] data = new int[length]; + for (int i = 0; i < length; i++) { + data[i] = i; + } + return data; + } +} From d106f4979f0b96ab6b7c775f3dd87f171b10ab7c Mon Sep 17 00:00:00 2001 From: James Mudd Date: Sat, 11 Jan 2025 22:33:01 +0000 Subject: [PATCH 5/6] Add header --- .../test/resources/scripts/implicit_index_datasets.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/jhdf/src/test/resources/scripts/implicit_index_datasets.py b/jhdf/src/test/resources/scripts/implicit_index_datasets.py index 79eeddf5..c3f5f0cc 100644 --- a/jhdf/src/test/resources/scripts/implicit_index_datasets.py +++ b/jhdf/src/test/resources/scripts/implicit_index_datasets.py @@ -1,7 +1,15 @@ +# ------------------------------------------------------------------------------- +# This file is part of jHDF. A pure Java library for accessing HDF5 files. +# +# https://jhdf.io +# +# Copyright (c) 2024 James Mudd +# +# MIT License see 'LICENSE' file +# ------------------------------------------------------------------------------- import h5py import numpy - f = h5py.File("implicit_index_datasets.hdf5", "w", libver='latest') data = numpy.arange(20) From fe4e6e1746572f177a48bd9a6d03abc3b86793e2 Mon Sep 17 00:00:00 2001 From: James Mudd Date: Sat, 11 Jan 2025 22:34:10 +0000 Subject: [PATCH 6/6] Linting --- jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV4.java | 1 - .../io/jhdf/dataset/chunked/indexing/ImplicitChunkIndex.java | 1 - 2 files changed, 2 deletions(-) diff --git a/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV4.java b/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV4.java index 2be73520..5c86a257 100644 --- a/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV4.java +++ b/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV4.java @@ -20,7 +20,6 @@ import io.jhdf.dataset.chunked.indexing.ImplicitChunkIndex; import io.jhdf.dataset.chunked.indexing.SingleChunkIndex; import io.jhdf.exceptions.HdfException; -import io.jhdf.exceptions.UnsupportedHdfException; import io.jhdf.object.message.DataLayoutMessage.ChunkedDataLayoutMessageV4; import io.jhdf.storage.HdfBackingStorage; import org.apache.commons.lang3.ArrayUtils; diff --git a/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/ImplicitChunkIndex.java b/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/ImplicitChunkIndex.java index ad945d66..ef275769 100644 --- a/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/ImplicitChunkIndex.java +++ b/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/ImplicitChunkIndex.java @@ -15,7 +15,6 @@ import java.util.ArrayList; import java.util.Collection; -import java.util.Collections; import java.util.List; public class ImplicitChunkIndex implements ChunkIndex {