From 3e05f8de9b76a091494f7f0bf32abb974b2eecfb Mon Sep 17 00:00:00 2001 From: James Mudd <james.mudd@gmail.com> Date: Sun, 9 Feb 2020 19:42:19 +0000 Subject: [PATCH 1/7] Add test files for not allocated datasets --- .../hdf5/test_odd_datasets_earliest.hdf5 | Bin 103530 -> 103530 bytes .../hdf5/test_odd_datasets_latest.hdf5 | Bin 52374 -> 52374 bytes .../test/resources/scripts/odd_datasets.py | 7 +++++-- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/jhdf/src/test/resources/hdf5/test_odd_datasets_earliest.hdf5 b/jhdf/src/test/resources/hdf5/test_odd_datasets_earliest.hdf5 index 6419019812c52bed7123893595f447f429db39e4..2773b90f774c09f9eeda0013f6c3e1a9dd3ccd03 100644 GIT binary patch delta 318 zcmaF0g6-7`wh0>;H8yT!XJTZWypzdXD>*-}Bs0A<zqB|$FF(GxB)=#zJ(YohAvvQo zFFQ2_NpQ0ivlS!D-FSz%%@Hh3s~A}(-(DBU<p5L*0ydi_Zj|HFfN&UeHcs5QZ?epW zE9@Zk3=9SnH=0gXVCE2EV1%e(WZ(cZm>3wCChuWZwPIod%WyDAfW?^^|3d-PL?|T$ z)dx}xF?zzpjZ;BJ3nPq%7{Lhiq5THY$vGP|jN!&IF~Q6Q+ROmLV1Q|C!^Vx@88;qy P!8q}N-sUUYrriYq4bxK? delta 93 zcmaF0g6-7`wh0>;IW}%&XJXWtypzeClaT=e3?^<=pRBWyW3vXc4kOF-Us17}BUqYN rF)~fQy)JO#fe({qHe6u`2{SMlY~1*rapQp(U_9}F*ybzSrriYq_;Dr3 diff --git a/jhdf/src/test/resources/hdf5/test_odd_datasets_latest.hdf5 b/jhdf/src/test/resources/hdf5/test_odd_datasets_latest.hdf5 index 9779c2f9e9b0bcfa182ad433922de6f74df7768b..b2e4b3903f576a6a299a4d3d17d99b2b352da431 100644 GIT binary patch delta 397 zcmbO>lX==q<_Tu1cjF!6ka2~800YBYRt7MzhfoX?V@<r+6c`v78AOxw^GY()OY=*M z<MZ<4i%as064O(+GD8(|KxrVA$g5Z6@8J@}q=;-hh`sS96C=xH;~>4s_ZgK0&}BCZ zGJ&M0hwh#%&OFZ#yAceGj4TWcOhClU!@wv2v=+z)0s#hACI&`kE)F3E1{TKuPym(! z5ho@eWIm-bf92eTIE@tn8pX)SIN6#>O`JmzXbKaM&&0&c1vhLb(2(8EJhzOw{hd9$ vnSeGiFu){XG@C3au!NE`O7pT)Q;<UHBGB0&;4^2{rp*VLuP|=nn05pJT@HU$ delta 129 zcmbO>lX==q<_TtkA~*NNBIAn5A6dmFMw>7(Xic8Tq$A~NKT*%$!zGAG5m^<8z40a! yBa2ej-{+I>Gb#z7%Wf8A0!nkKM8r)NXP&p&knKF<W(T$djFTPM_H5#qb_4(+j55Ih diff --git a/jhdf/src/test/resources/scripts/odd_datasets.py b/jhdf/src/test/resources/scripts/odd_datasets.py index b498a9c8..b034a2cc 100644 --- a/jhdf/src/test/resources/scripts/odd_datasets.py +++ b/jhdf/src/test/resources/scripts/odd_datasets.py @@ -21,9 +21,12 @@ def write_chunked_datasets(f): f.create_dataset('8D_int16', data=data, dtype='i2', chunks=(2,3,1,2,3,1,1,2), compression="gzip") # Small data with inappropriate chunking - data = np.arange(5*5*5).reshape(5, 5, 5); + data = np.arange(5*5*5).reshape(5, 5, 5) f.create_dataset('1D_int16', data=data, dtype='i2', chunks=(4, 4, 4), compression="gzip") + f.create_dataset('contiguous_no_storage', dtype='i2') + f.create_dataset('chunked_no_storage', dtype='i2', shape=(5,), chunks=(2,)) + f.flush() f.close() @@ -32,7 +35,7 @@ def write_chunked_datasets(f): f = h5py.File('test_odd_datasets_earliest.hdf5', 'w', libver='earliest') write_chunked_datasets(f) - print('test_compressed_chunked_datasets_earliest.hdf5') + print('created test_odd_datasets_earliest.hdf5') f = h5py.File('test_odd_datasets_latest.hdf5', 'w', libver='latest') write_chunked_datasets(f) From c533f4ada04394062b0fcd8c4d2996270318e5b2 Mon Sep 17 00:00:00 2001 From: James Mudd <james.mudd@gmail.com> Date: Sun, 9 Feb 2020 17:03:26 +0000 Subject: [PATCH 2/7] Improve exception when accessing datasets with no storage allocated --- .../main/java/io/jhdf/dataset/chunked/ChunkedDatasetV3.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV3.java b/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV3.java index e5247e87..893496f0 100644 --- a/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV3.java +++ b/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV3.java @@ -9,6 +9,7 @@ */ package io.jhdf.dataset.chunked; +import io.jhdf.Constants; import io.jhdf.HdfFileChannel; import io.jhdf.ObjectHeader; import io.jhdf.api.Group; @@ -68,6 +69,10 @@ private final class ChunkLookupLazyInitializer extends LazyInitializer<Map<Chunk protected Map<ChunkOffset, Chunk> initialize() { logger.debug("Creating chunk lookup for '{}'", getPath()); + if(layoutMessage.getBTreeAddress() == Constants.UNDEFINED_ADDRESS) { + throw new HdfException("No storage allocated for '" + getPath() + "'"); + } + final BTreeV1Data bTree = BTreeV1.createDataBTree(hdfFc, layoutMessage.getBTreeAddress(), getDimensions().length); final Collection<Chunk> allChunks = bTree.getChunks(); From c1b1e0d05e5f48da455ed63a318f6e3e08628694 Mon Sep 17 00:00:00 2001 From: James Mudd <james.mudd@gmail.com> Date: Mon, 17 Aug 2020 21:33:09 +0100 Subject: [PATCH 3/7] Refactor methods to allow distinction between dataset size and storage size Renames Dataset#getDiskSize -> Dataset#getSizeInBytes Adds new method Dataset#getStorageInBytes --- jhdf/src/main/java/io/jhdf/api/Attribute.java | 7 +++---- jhdf/src/main/java/io/jhdf/api/Dataset.java | 13 ++++++++++--- .../src/main/java/io/jhdf/dataset/DatasetBase.java | 14 +++++++++----- .../jhdf/dataset/chunked/ChunkedDatasetBase.java | 11 ++++++++++- .../io/jhdf/dataset/chunked/ChunkedDatasetV3.java | 8 ++++++-- jhdf/src/test/java/io/jhdf/DatasetImplTest.java | 6 +++--- .../java/io/jhdf/dataset/EmptyDatasetTest.java | 2 +- .../java/io/jhdf/dataset/ScalarDatasetTest.java | 2 +- .../test/java/io/jhdf/examples/TestAllFiles.java | 10 +++++----- 9 files changed, 48 insertions(+), 25 deletions(-) diff --git a/jhdf/src/main/java/io/jhdf/api/Attribute.java b/jhdf/src/main/java/io/jhdf/api/Attribute.java index be021f9c..c364e7df 100644 --- a/jhdf/src/main/java/io/jhdf/api/Attribute.java +++ b/jhdf/src/main/java/io/jhdf/api/Attribute.java @@ -51,11 +51,10 @@ public interface Attribute { long getSize(); /** - * Gets the disk size used by this attributes dataset. <blockquote>i.e. number - * of elements * size of each element</blockquote> + * Gets the size of this dataset. <blockquote>i.e. number of elements * size of each element</blockquote> * - * @return the total number of bytes the attributes dataset is using - * @see Dataset#getDiskSize() + * @return the total number of bytes the dataset is using + * @see Dataset#getSizeInBytes() */ long getDiskSize(); diff --git a/jhdf/src/main/java/io/jhdf/api/Dataset.java b/jhdf/src/main/java/io/jhdf/api/Dataset.java index 0ff679c8..8192ac0a 100644 --- a/jhdf/src/main/java/io/jhdf/api/Dataset.java +++ b/jhdf/src/main/java/io/jhdf/api/Dataset.java @@ -27,12 +27,19 @@ public interface Dataset extends Node { long getSize(); /** - * Gets the disk size used by this dataset. <blockquote>i.e. number of elements - * * size of each element</blockquote> + * Gets the size of this dataset. <blockquote>i.e. number of elements * size of each element</blockquote> * * @return the total number of bytes the dataset is using */ - long getDiskSize(); + long getSizeInBytes(); + + /** + * Gets the size of the storage used for this dataset in bytes. This may differ from + * {@link #getSizeInBytes()} due to the use of compression. + * + * @return the total number of bytes the datasets storage is using + */ + long getStorageInBytes(); /** * Gets the dimensions of this dataset diff --git a/jhdf/src/main/java/io/jhdf/dataset/DatasetBase.java b/jhdf/src/main/java/io/jhdf/dataset/DatasetBase.java index 51fa2ab5..4531df7d 100644 --- a/jhdf/src/main/java/io/jhdf/dataset/DatasetBase.java +++ b/jhdf/src/main/java/io/jhdf/dataset/DatasetBase.java @@ -75,7 +75,7 @@ public long getSize() { } @Override - public long getDiskSize() { + public long getSizeInBytes() { return getSize() * dataType.getSize(); } @@ -118,12 +118,11 @@ public DataType getDataType() { public Object getData() { logger.debug("Getting data for '{}'...", getPath()); - final ByteBuffer bb = getDataBuffer(); - if (bb == null) { - // Empty + if (isEmpty()) { return null; } + final ByteBuffer bb = getDataBuffer(); final DataType type = getDataType(); return DatasetReader.readDataset(type, bb, getDimensions(), hdfFc); @@ -136,7 +135,7 @@ public boolean isScalar() { @Override public boolean isEmpty() { - return getDiskSize() == 0; + return getSizeInBytes() == 0; } @Override @@ -171,4 +170,9 @@ public String toString() { public boolean isVariableLength() { return getDataType() instanceof VariableLength; } + + @Override + public long getStorageInBytes() { + return getSizeInBytes(); + } } diff --git a/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetBase.java b/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetBase.java index 0b4a12dd..2d0fe03c 100644 --- a/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetBase.java +++ b/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetBase.java @@ -143,7 +143,7 @@ public ByteBuffer getDataBuffer() { logger.trace("Getting data buffer for {}", getPath()); // Need to load the full buffer into memory so create the array - final byte[] dataArray = new byte[toIntExact(getDiskSize())]; + final byte[] dataArray = new byte[toIntExact(getSizeInBytes())]; logger.trace("Created data buffer for '{}' of size {} bytes", getPath(), dataArray.length); final int elementSize = getDataType().getSize(); @@ -296,4 +296,13 @@ private Chunk getChunk(ChunkOffset chunkOffset) { protected abstract Map<ChunkOffset, Chunk> getChunkLookup(); + @Override + public boolean isEmpty() { + return getChunkLookup().isEmpty(); + } + + @Override + public long getStorageInBytes() { + return getChunkLookup().values().stream().mapToLong(Chunk::getSize).sum(); + } } diff --git a/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV3.java b/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV3.java index 893496f0..bde1a347 100644 --- a/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV3.java +++ b/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV3.java @@ -15,6 +15,7 @@ import io.jhdf.api.Group; import io.jhdf.btree.BTreeV1; import io.jhdf.btree.BTreeV1Data; +import io.jhdf.exceptions.HdfEmptyDatasetException; import io.jhdf.exceptions.HdfException; import io.jhdf.object.message.DataLayoutMessage.ChunkedDataLayoutMessageV3; import org.apache.commons.lang3.concurrent.ConcurrentException; @@ -23,8 +24,10 @@ import org.slf4j.LoggerFactory; import java.util.Collection; +import java.util.Collections; import java.util.Map; import java.util.function.Function; +import java.util.stream.Collectors; import static java.util.stream.Collectors.toMap; @@ -67,10 +70,10 @@ public int[] getChunkDimensions() { private final class ChunkLookupLazyInitializer extends LazyInitializer<Map<ChunkOffset, Chunk>> { @Override protected Map<ChunkOffset, Chunk> initialize() { - logger.debug("Creating chunk lookup for '{}'", getPath()); + logger.debug("Creating chunk lookup for [{}]", getPath()); if(layoutMessage.getBTreeAddress() == Constants.UNDEFINED_ADDRESS) { - throw new HdfException("No storage allocated for '" + getPath() + "'"); + return Collections.emptyMap(); } final BTreeV1Data bTree = BTreeV1.createDataBTree(hdfFc, layoutMessage.getBTreeAddress(), getDimensions().length); @@ -81,4 +84,5 @@ protected Map<ChunkOffset, Chunk> initialize() { , Function.identity())); // values } } + } diff --git a/jhdf/src/test/java/io/jhdf/DatasetImplTest.java b/jhdf/src/test/java/io/jhdf/DatasetImplTest.java index e8cf53bb..cf02aa98 100644 --- a/jhdf/src/test/java/io/jhdf/DatasetImplTest.java +++ b/jhdf/src/test/java/io/jhdf/DatasetImplTest.java @@ -156,7 +156,7 @@ void testByteDatasetSize() { @Test void testByteDatasetDiskSize() { Dataset dataset = (Dataset) hdfFile.getByPath(INT8_PATH); - assertThat(dataset.getDiskSize(), is(equalTo(21L))); + assertThat(dataset.getSizeInBytes(), is(equalTo(21L))); } @Test @@ -168,7 +168,7 @@ void testShortDatasetSize() { @Test void testShortDatasetDiskSize() { Dataset dataset = (Dataset) hdfFile.getByPath(INT16_PATH); - assertThat(dataset.getDiskSize(), is(equalTo(42L))); + assertThat(dataset.getSizeInBytes(), is(equalTo(42L))); } @Test @@ -180,7 +180,7 @@ void testInt32DatasetSize() { @Test void testInt32DatasetDiskSize() { Dataset dataset = (Dataset) hdfFile.getByPath(INT32_PATH); - assertThat(dataset.getDiskSize(), is(equalTo(84L))); + assertThat(dataset.getSizeInBytes(), is(equalTo(84L))); } @Test diff --git a/jhdf/src/test/java/io/jhdf/dataset/EmptyDatasetTest.java b/jhdf/src/test/java/io/jhdf/dataset/EmptyDatasetTest.java index c8d3f125..49d22667 100644 --- a/jhdf/src/test/java/io/jhdf/dataset/EmptyDatasetTest.java +++ b/jhdf/src/test/java/io/jhdf/dataset/EmptyDatasetTest.java @@ -107,7 +107,7 @@ private Executable createTest(HdfFile file, String datasetPath, Class<?> expecte Dataset dataset = file.getDatasetByPath(datasetPath); // should have 0 length dimensions assertThat(dataset.getDimensions(), is(equalTo(new int[0]))); - assertThat(dataset.getDiskSize(), is(equalTo(0L))); + assertThat(dataset.getSizeInBytes(), is(equalTo(0L))); // Should be empty assertThat(dataset.isEmpty(), is(true)); // Empty dataset getData should return null diff --git a/jhdf/src/test/java/io/jhdf/dataset/ScalarDatasetTest.java b/jhdf/src/test/java/io/jhdf/dataset/ScalarDatasetTest.java index d086569f..39d00241 100644 --- a/jhdf/src/test/java/io/jhdf/dataset/ScalarDatasetTest.java +++ b/jhdf/src/test/java/io/jhdf/dataset/ScalarDatasetTest.java @@ -109,7 +109,7 @@ private Executable createTest(HdfFile file, String datasetPath, Object expected) assertThat(dataset.getDimensions(), is(equalTo(new int[0]))); assertThat(dataset.isScalar(), is(true)); assertThat(dataset.getData(), is(equalTo(expected))); - assertThat(dataset.getDiskSize(), is(greaterThan(0L))); + assertThat(dataset.getSizeInBytes(), is(greaterThan(0L))); }; } diff --git a/jhdf/src/test/java/io/jhdf/examples/TestAllFiles.java b/jhdf/src/test/java/io/jhdf/examples/TestAllFiles.java index 228e4fee..cccc0710 100644 --- a/jhdf/src/test/java/io/jhdf/examples/TestAllFiles.java +++ b/jhdf/src/test/java/io/jhdf/examples/TestAllFiles.java @@ -158,24 +158,24 @@ private void verifyDataset(Dataset dataset, Group group) { if (dataset.isEmpty()) { assertThat(data, is(nullValue())); // Empty so should have 0 size - assertThat(dataset.getDiskSize(), is(equalTo(0L))); + assertThat(dataset.getStorageInBytes(), is(equalTo(0L))); } else if (dataset.isScalar()) { assertThat(data.getClass(), is(equalTo(dataset.getJavaType()))); // Should have some size - assertThat(dataset.getDiskSize(), is(greaterThan(0L))); + assertThat(dataset.getSizeInBytes(), is(greaterThan(0L))); } else if (dataset.isCompound()) { // Compound datasets are currently returned as maps, maybe a custom CompoundDataset might be better in the future.. assertThat(data, is(instanceOf(Map.class))); assertThat((Map<String, Object>) data, is(not(anEmptyMap()))); - assertThat(dataset.getDiskSize(), is(greaterThan(0L))); + assertThat(dataset.getSizeInBytes(), is(greaterThan(0L))); } else if (dataset.isVariableLength()) { assertThat(getDimensions(data)[0], is(equalTo(dims[0]))); - assertThat(dataset.getDiskSize(), is(greaterThan(0L))); + assertThat(dataset.getSizeInBytes(), is(greaterThan(0L))); } else { assertThat(getDimensions(data), is(equalTo(dims))); assertThat(getType(data), is(equalTo(dataset.getJavaType()))); // Should have some size - assertThat(dataset.getDiskSize(), is(greaterThan(0L))); + assertThat(dataset.getSizeInBytes(), is(greaterThan(0L))); } if (dataset instanceof ContiguousDataset && !dataset.isEmpty()) { From 084774448a3bc2d247bf299bf0106c64484a6072 Mon Sep 17 00:00:00 2001 From: James Mudd <james.mudd@gmail.com> Date: Mon, 13 Apr 2020 19:42:19 +0100 Subject: [PATCH 4/7] Add support for no storage assigned v4 chunked datasets --- .../dataset/chunked/ChunkedDatasetV4.java | 52 +++++++++++-------- .../chunked/indexing/EmptyChunkIndex.java | 14 +++++ 2 files changed, 45 insertions(+), 21 deletions(-) create mode 100644 jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/EmptyChunkIndex.java diff --git a/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV4.java b/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV4.java index 1303fa61..39be17b8 100644 --- a/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV4.java +++ b/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV4.java @@ -9,11 +9,13 @@ */ package io.jhdf.dataset.chunked; +import io.jhdf.Constants; import io.jhdf.HdfFileChannel; import io.jhdf.ObjectHeader; import io.jhdf.api.Group; import io.jhdf.dataset.chunked.indexing.BTreeIndex; import io.jhdf.dataset.chunked.indexing.ChunkIndex; +import io.jhdf.dataset.chunked.indexing.EmptyChunkIndex; import io.jhdf.dataset.chunked.indexing.ExtensibleArrayIndex; import io.jhdf.dataset.chunked.indexing.FixedArrayIndex; import io.jhdf.dataset.chunked.indexing.SingleChunkIndex; @@ -28,6 +30,7 @@ import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.Map; import java.util.function.Function; @@ -71,27 +74,34 @@ protected Map<ChunkOffset, Chunk> initialize() { final DatasetInfo datasetInfo = new DatasetInfo(getChunkSizeInBytes(), getDimensions(), getChunkDimensions()); final ChunkIndex chunkIndex; - switch (layoutMessage.getIndexingType()) { - case 1: // Single chunk - logger.debug("Reading single chunk indexed dataset"); - chunkIndex = new SingleChunkIndex(layoutMessage, datasetInfo); - break; - case 2: // Implicit - throw new UnsupportedHdfException("Implicit indexing is currently not supported"); - case 3: // Fixed array - logger.debug("Reading fixed array indexed dataset"); - chunkIndex = new FixedArrayIndex(hdfFc, layoutMessage.getAddress(), datasetInfo); - break; - case 4: // Extensible Array - logger.debug("Reading extensible array indexed dataset"); - chunkIndex = new ExtensibleArrayIndex(hdfFc, layoutMessage.getAddress(), datasetInfo); - break; - case 5: // B Tree V2 - logger.debug("Reading B tree v2 indexed dataset"); - chunkIndex = new BTreeIndex(hdfFc, layoutMessage.getAddress(), datasetInfo); - break; - default: - throw new HdfException("Unrecognized chunk indexing type = " + layoutMessage.getIndexingType()); + + if(layoutMessage.getAddress() == Constants.UNDEFINED_ADDRESS) { + logger.debug("No storage allocated for '{}'", getPath()); + chunkIndex = new EmptyChunkIndex(); + } else { + + switch (layoutMessage.getIndexingType()) { + case 1: // Single chunk + logger.debug("Reading single chunk indexed dataset"); + chunkIndex = new SingleChunkIndex(layoutMessage, datasetInfo); + break; + case 2: // Implicit + throw new UnsupportedHdfException("Implicit indexing is currently not supported"); + case 3: // Fixed array + logger.debug("Reading fixed array indexed dataset"); + chunkIndex = new FixedArrayIndex(hdfFc, layoutMessage.getAddress(), datasetInfo); + break; + case 4: // Extensible Array + logger.debug("Reading extensible array indexed dataset"); + chunkIndex = new ExtensibleArrayIndex(hdfFc, layoutMessage.getAddress(), datasetInfo); + break; + case 5: // B Tree V2 + logger.debug("Reading B tree v2 indexed dataset"); + chunkIndex = new BTreeIndex(hdfFc, layoutMessage.getAddress(), datasetInfo); + break; + default: + throw new HdfException("Unrecognized chunk indexing type = " + layoutMessage.getIndexingType()); + } } final Collection<Chunk> allChunks = chunkIndex.getAllChunks(); diff --git a/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/EmptyChunkIndex.java b/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/EmptyChunkIndex.java new file mode 100644 index 00000000..5ed33c7f --- /dev/null +++ b/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/EmptyChunkIndex.java @@ -0,0 +1,14 @@ +package io.jhdf.dataset.chunked.indexing; + +import io.jhdf.dataset.chunked.Chunk; + +import java.util.Collection; +import java.util.Collections; + +public class EmptyChunkIndex implements ChunkIndex { + + @Override + public Collection<Chunk> getAllChunks() { + return Collections.emptyList(); + } +} From 19d03447d5cef03ddd741beab8c92ed3568dd0cc Mon Sep 17 00:00:00 2001 From: James Mudd <james.mudd@gmail.com> Date: Mon, 17 Aug 2020 21:42:35 +0100 Subject: [PATCH 5/7] Add tests checking compression ratios --- .../dataset/CompressedChunkedDatasetTest.java | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/jhdf/src/test/java/io/jhdf/dataset/CompressedChunkedDatasetTest.java b/jhdf/src/test/java/io/jhdf/dataset/CompressedChunkedDatasetTest.java index 836a6523..b0f6298f 100644 --- a/jhdf/src/test/java/io/jhdf/dataset/CompressedChunkedDatasetTest.java +++ b/jhdf/src/test/java/io/jhdf/dataset/CompressedChunkedDatasetTest.java @@ -53,21 +53,21 @@ Collection<DynamicNode> compressedChunkedDatasetReadTests() { // List of all the datasetPaths return Arrays.asList( dynamicContainer(HDF5_TEST_EARLIEST_FILE_NAME, Arrays.asList( - dynamicTest("float32", createTest(earliestHdfFile,"/float/float32")), - dynamicTest("float64", createTest(earliestHdfFile,"/float/float64")), - dynamicTest("int8", createTest(earliestHdfFile,"/int/int8")), - dynamicTest("int16", createTest(earliestHdfFile,"/int/int16")), - dynamicTest("int32", createTest(earliestHdfFile,"/int/int32")))), + dynamicTest("float32", createTest(earliestHdfFile,"/float/float32", 0.4560260586319218)), + dynamicTest("float64", createTest(earliestHdfFile,"/float/float64", 1.6374269005847952)), + dynamicTest("int8", createTest(earliestHdfFile,"/int/int8", 0.45454545454545453)), + dynamicTest("int16", createTest(earliestHdfFile,"/int/int16", 0.2)), + dynamicTest("int32", createTest(earliestHdfFile,"/int/int32", 0.625)))), dynamicContainer(HDF5_TEST_LATEST_FILE_NAME, Arrays.asList( - dynamicTest("float32", createTest(latestHdfFile, "/float/float32")), - dynamicTest("float64", createTest(latestHdfFile,"/float/float64")), - dynamicTest("int8", createTest(latestHdfFile,"/int/int8")), - dynamicTest("int16", createTest(latestHdfFile,"/int/int16")), - dynamicTest("int32", createTest(latestHdfFile,"/int/int32"))))); + dynamicTest("float32", createTest(latestHdfFile, "/float/float32", 0.4560260586319218)), + dynamicTest("float64", createTest(latestHdfFile,"/float/float64", 1.6374269005847952)), + dynamicTest("int8", createTest(latestHdfFile,"/int/int8", 0.45454545454545453)), + dynamicTest("int16", createTest(latestHdfFile,"/int/int16", 0.2)), + dynamicTest("int32", createTest(latestHdfFile,"/int/int32", 0.625))))); } - private Executable createTest(HdfFile hdfFile, String datasetPath) { + private Executable createTest(HdfFile hdfFile, String datasetPath, double expectedCompressionRatio) { return () -> { Dataset dataset = hdfFile.getDatasetByPath(datasetPath); Object data = dataset.getData(); @@ -78,6 +78,8 @@ private Executable createTest(HdfFile hdfFile, String datasetPath) { // convert to double assertThat(Double.valueOf(flatData[i].toString()), is(equalTo((double) i))); } + double actualCompressionRatio = (double) dataset.getSizeInBytes() / dataset.getStorageInBytes(); + assertThat(actualCompressionRatio, is(equalTo(expectedCompressionRatio))); }; } From 383c1a4a92645923cc7789787150698fe1957d56 Mon Sep 17 00:00:00 2001 From: James Mudd <james.mudd@gmail.com> Date: Tue, 18 Aug 2020 17:07:27 +0100 Subject: [PATCH 6/7] Fixup PR issues --- .../java/io/jhdf/dataset/chunked/ChunkedDatasetV3.java | 2 -- .../java/io/jhdf/dataset/chunked/ChunkedDatasetV4.java | 1 - .../jhdf/dataset/chunked/indexing/EmptyChunkIndex.java | 9 +++++++++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV3.java b/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV3.java index bde1a347..0ee1d4a2 100644 --- a/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV3.java +++ b/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV3.java @@ -15,7 +15,6 @@ import io.jhdf.api.Group; import io.jhdf.btree.BTreeV1; import io.jhdf.btree.BTreeV1Data; -import io.jhdf.exceptions.HdfEmptyDatasetException; import io.jhdf.exceptions.HdfException; import io.jhdf.object.message.DataLayoutMessage.ChunkedDataLayoutMessageV3; import org.apache.commons.lang3.concurrent.ConcurrentException; @@ -27,7 +26,6 @@ import java.util.Collections; import java.util.Map; import java.util.function.Function; -import java.util.stream.Collectors; import static java.util.stream.Collectors.toMap; diff --git a/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV4.java b/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV4.java index 39be17b8..dd466f68 100644 --- a/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV4.java +++ b/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV4.java @@ -30,7 +30,6 @@ import java.util.Arrays; import java.util.Collection; -import java.util.Collections; import java.util.Map; import java.util.function.Function; diff --git a/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/EmptyChunkIndex.java b/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/EmptyChunkIndex.java index 5ed33c7f..34fcc366 100644 --- a/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/EmptyChunkIndex.java +++ b/jhdf/src/main/java/io/jhdf/dataset/chunked/indexing/EmptyChunkIndex.java @@ -1,3 +1,12 @@ +/* + * This file is part of jHDF. A pure Java library for accessing HDF5 files. + * + * http://jhdf.io + * + * Copyright (c) 2020 James Mudd + * + * MIT License see 'LICENSE' file + */ package io.jhdf.dataset.chunked.indexing; import io.jhdf.dataset.chunked.Chunk; From 9d85dd5aa92b2ba621c7cd938f9f6a16be4f9864 Mon Sep 17 00:00:00 2001 From: James Mudd <james.mudd@gmail.com> Date: Thu, 20 Aug 2020 17:19:51 +0100 Subject: [PATCH 7/7] Make Attribute consistent with Dataset --- jhdf/src/main/java/io/jhdf/AttributeImpl.java | 2 +- jhdf/src/main/java/io/jhdf/api/Attribute.java | 6 +++--- jhdf/src/main/java/io/jhdf/api/Dataset.java | 2 +- jhdf/src/test/java/io/jhdf/AttributesTest.java | 2 +- jhdf/src/test/java/io/jhdf/examples/TestAllFiles.java | 6 +++--- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/jhdf/src/main/java/io/jhdf/AttributeImpl.java b/jhdf/src/main/java/io/jhdf/AttributeImpl.java index 99d9db8f..514ee904 100644 --- a/jhdf/src/main/java/io/jhdf/AttributeImpl.java +++ b/jhdf/src/main/java/io/jhdf/AttributeImpl.java @@ -52,7 +52,7 @@ public long getSize() { } @Override - public long getDiskSize() { + public long getSizeInBytes() { return getSize() * message.getDataType().getSize(); } diff --git a/jhdf/src/main/java/io/jhdf/api/Attribute.java b/jhdf/src/main/java/io/jhdf/api/Attribute.java index c364e7df..669fdb61 100644 --- a/jhdf/src/main/java/io/jhdf/api/Attribute.java +++ b/jhdf/src/main/java/io/jhdf/api/Attribute.java @@ -51,12 +51,12 @@ public interface Attribute { long getSize(); /** - * Gets the size of this dataset. <blockquote>i.e. number of elements * size of each element</blockquote> + * Gets the size of this dataset. i.e. <blockquote>number of elements * size of each element</blockquote> * - * @return the total number of bytes the dataset is using + * @return the total number of bytes this attributes dataset is using * @see Dataset#getSizeInBytes() */ - long getDiskSize(); + long getSizeInBytes(); /** * Gets the dimensions of this attributes dataset diff --git a/jhdf/src/main/java/io/jhdf/api/Dataset.java b/jhdf/src/main/java/io/jhdf/api/Dataset.java index 8192ac0a..c696cd64 100644 --- a/jhdf/src/main/java/io/jhdf/api/Dataset.java +++ b/jhdf/src/main/java/io/jhdf/api/Dataset.java @@ -27,7 +27,7 @@ public interface Dataset extends Node { long getSize(); /** - * Gets the size of this dataset. <blockquote>i.e. number of elements * size of each element</blockquote> + * Gets the size of this dataset. i.e. <blockquote>number of elements * size of each element</blockquote> * * @return the total number of bytes the dataset is using */ diff --git a/jhdf/src/test/java/io/jhdf/AttributesTest.java b/jhdf/src/test/java/io/jhdf/AttributesTest.java index 9f6d74aa..d978fcc0 100644 --- a/jhdf/src/test/java/io/jhdf/AttributesTest.java +++ b/jhdf/src/test/java/io/jhdf/AttributesTest.java @@ -489,7 +489,7 @@ private Executable createTest(HdfFile file, String nodePath, String attributeNam assertThat(attribute.isEmpty(), is(true)); assertThat(attribute.isScalar(), is(false)); assertThat(attribute.getSize(), is(0L)); - assertThat(attribute.getDiskSize(), is(0L)); + assertThat(attribute.getSizeInBytes(), is(0L)); } else if (expectedData.getClass().isArray()) { // Array assertThat(attribute.getJavaType(), is(equalTo(getArrayType(expectedData)))); assertThat(attribute.isEmpty(), is(false)); diff --git a/jhdf/src/test/java/io/jhdf/examples/TestAllFiles.java b/jhdf/src/test/java/io/jhdf/examples/TestAllFiles.java index cccc0710..435e6d8d 100644 --- a/jhdf/src/test/java/io/jhdf/examples/TestAllFiles.java +++ b/jhdf/src/test/java/io/jhdf/examples/TestAllFiles.java @@ -112,16 +112,16 @@ private void verifyAttributes(Node node) { assertThat(attribute.getJavaType(), is(notNullValue())); if (attribute.isEmpty()) { assertThat(attribute.getSize(), is(equalTo(0L))); - assertThat(attribute.getDiskSize(), is(equalTo(0L))); + assertThat(attribute.getSizeInBytes(), is(equalTo(0L))); assertThat(attribute.getData(), is(nullValue())); } else if (attribute.isScalar()) { assertThat(attribute.getSize(), is(equalTo(1L))); - assertThat(attribute.getDiskSize(), is(greaterThan(0L))); + assertThat(attribute.getSizeInBytes(), is(greaterThan(0L))); assertThat(attribute.getData(), is(notNullValue())); assertThat(attribute.getBuffer(), is(notNullValue())); } else { assertThat(attribute.getSize(), is(greaterThan(0L))); - assertThat(attribute.getDiskSize(), is(greaterThan(0L))); + assertThat(attribute.getSizeInBytes(), is(greaterThan(0L))); assertThat(attribute.getData(), is(notNullValue())); assertThat(attribute.getBuffer(), is(notNullValue())); }