diff --git a/jhdf/src/main/java/io/jhdf/dataset/ContiguousDatasetImpl.java b/jhdf/src/main/java/io/jhdf/dataset/ContiguousDatasetImpl.java index c2590765..3a6c8da9 100644 --- a/jhdf/src/main/java/io/jhdf/dataset/ContiguousDatasetImpl.java +++ b/jhdf/src/main/java/io/jhdf/dataset/ContiguousDatasetImpl.java @@ -22,22 +22,17 @@ public class ContiguousDatasetImpl extends DatasetBase implements ContiguousDataset { + final ContiguousDataLayoutMessage contiguousDataLayoutMessage; + public ContiguousDatasetImpl(HdfFileChannel hdfFc, long address, String name, Group parent, ObjectHeader oh) { super(hdfFc, address, name, parent, oh); + this.contiguousDataLayoutMessage = getHeaderMessage(ContiguousDataLayoutMessage.class); } @Override public ByteBuffer getDataBuffer() { - ContiguousDataLayoutMessage contiguousDataLayoutMessage = getHeaderMessage(ContiguousDataLayoutMessage.class); - - // Check for empty dataset - if (contiguousDataLayoutMessage.getAddress() == UNDEFINED_ADDRESS) { - return null; - } - try { - ByteBuffer data = hdfFc.map(contiguousDataLayoutMessage.getAddress(), - contiguousDataLayoutMessage.getSize()); + ByteBuffer data = hdfFc.map(contiguousDataLayoutMessage.getAddress(), getSizeInBytes()); convertToCorrectEndiness(data); return data; } catch (Exception e) { @@ -52,8 +47,11 @@ public ByteBuffer getBuffer() { @Override public long getDataAddress() { - ContiguousDataLayoutMessage contiguousDataLayoutMessage = getHeaderMessage(ContiguousDataLayoutMessage.class); return contiguousDataLayoutMessage.getAddress(); } + @Override + public boolean isEmpty() { + return contiguousDataLayoutMessage.getAddress() == UNDEFINED_ADDRESS; + } } diff --git a/jhdf/src/main/java/io/jhdf/dataset/DatasetLoader.java b/jhdf/src/main/java/io/jhdf/dataset/DatasetLoader.java index 9205d562..24f8d3a2 100644 --- a/jhdf/src/main/java/io/jhdf/dataset/DatasetLoader.java +++ b/jhdf/src/main/java/io/jhdf/dataset/DatasetLoader.java @@ -17,7 +17,7 @@ import io.jhdf.dataset.chunked.ChunkedDatasetV4; import io.jhdf.exceptions.HdfException; import io.jhdf.object.message.DataLayoutMessage; -import io.jhdf.object.message.DataLayoutMessage.ChunkedDataLayoutMessageV3; +import io.jhdf.object.message.DataLayoutMessage.ChunkedDataLayoutMessage; import io.jhdf.object.message.DataLayoutMessage.ChunkedDataLayoutMessageV4; import io.jhdf.object.message.DataLayoutMessage.CompactDataLayoutMessage; import io.jhdf.object.message.DataLayoutMessage.ContiguousDataLayoutMessage; @@ -42,7 +42,7 @@ public static Dataset createDataset(HdfFileChannel hdfFc, ObjectHeader oh, Strin } else if (dlm instanceof ContiguousDataLayoutMessage) { return new ContiguousDatasetImpl(hdfFc, address, name, parent, oh); - } else if (dlm instanceof ChunkedDataLayoutMessageV3) { + } else if (dlm instanceof ChunkedDataLayoutMessage) { return new ChunkedDatasetV3(hdfFc, address, name, parent, oh); } else if (dlm instanceof ChunkedDataLayoutMessageV4) { diff --git a/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV3.java b/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV3.java index 0ee1d4a2..11ad26be 100644 --- a/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV3.java +++ b/jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV3.java @@ -16,7 +16,7 @@ import io.jhdf.btree.BTreeV1; import io.jhdf.btree.BTreeV1Data; import io.jhdf.exceptions.HdfException; -import io.jhdf.object.message.DataLayoutMessage.ChunkedDataLayoutMessageV3; +import io.jhdf.object.message.DataLayoutMessage.ChunkedDataLayoutMessage; import org.apache.commons.lang3.concurrent.ConcurrentException; import org.apache.commons.lang3.concurrent.LazyInitializer; import org.slf4j.Logger; @@ -39,14 +39,14 @@ public class ChunkedDatasetV3 extends ChunkedDatasetBase { private static final Logger logger = LoggerFactory.getLogger(ChunkedDatasetV3.class); - private final ChunkedDataLayoutMessageV3 layoutMessage; + private final ChunkedDataLayoutMessage layoutMessage; private final ChunkLookupLazyInitializer chunkLookupLazyInitializer; public ChunkedDatasetV3(HdfFileChannel hdfFc, long address, String name, Group parent, ObjectHeader oh) { super(hdfFc, address, name, parent, oh); - layoutMessage = oh.getMessageOfType(ChunkedDataLayoutMessageV3.class); + layoutMessage = oh.getMessageOfType(ChunkedDataLayoutMessage.class); chunkLookupLazyInitializer = new ChunkLookupLazyInitializer(); } diff --git a/jhdf/src/main/java/io/jhdf/object/message/DataLayoutMessage.java b/jhdf/src/main/java/io/jhdf/object/message/DataLayoutMessage.java index 9f7ac1ad..a2309528 100644 --- a/jhdf/src/main/java/io/jhdf/object/message/DataLayoutMessage.java +++ b/jhdf/src/main/java/io/jhdf/object/message/DataLayoutMessage.java @@ -9,6 +9,7 @@ */ package io.jhdf.object.message; +import io.jhdf.Constants; import io.jhdf.Superblock; import io.jhdf.Utils; import io.jhdf.exceptions.HdfException; @@ -29,28 +30,75 @@ public DataLayoutMessage(BitSet flags) { public static DataLayoutMessage createDataLayoutMessage(ByteBuffer bb, Superblock sb, BitSet flags) { final byte version = bb.get(); - if (version != 3 && version != 4) { - throw new UnsupportedHdfException( - "Only v3 and v4 data layout messages are supported. Detected version = " + version); + switch (version) { + case 1: + case 2: + return readV1V2Message(bb, sb, flags); + case 3: + case 4: + return readV3V4Message(bb, sb, flags, version); + default: + throw new UnsupportedHdfException("Unsupported data layout message version detected. Detected version = " + version); + } + } + + private static DataLayoutMessage readV1V2Message(ByteBuffer bb, Superblock sb, BitSet flags) { + byte dimensionality = bb.get(); // for chunked is +1 than actual dims + + final byte layoutClass = bb.get(); + + bb.position(bb.position() + 5); // skip reserved bytes + + final long dataAddress; + if (layoutClass != 0) { // not compact + dataAddress = Utils.readBytesAsUnsignedLong(bb, sb.getSizeOfOffsets()); + } else { + dataAddress = Constants.UNDEFINED_ADDRESS; + } + + // If chunked value stored is +1 so correct it here + if (layoutClass == 2) { + dimensionality--; } + int[] dimensions = new int[dimensionality]; + for (int i = 0; i < dimensions.length; i++) { + dimensions[i] = Utils.readBytesAsUnsignedInt(bb, 4); + } + + switch (layoutClass) { + case 0: // Compact Storage + final int compactDataSize = Utils.readBytesAsUnsignedInt(bb, 4); + final ByteBuffer compactDataBuffer = Utils.createSubBuffer(bb, compactDataSize); + return new CompactDataLayoutMessage(flags, compactDataBuffer); + case 1: // Contiguous + return new ContiguousDataLayoutMessage(flags, dataAddress, -1L); + case 2: // Chunked + final int dataElementSize = Utils.readBytesAsUnsignedInt(bb, 4); + return new ChunkedDataLayoutMessage(flags, dataAddress, dataElementSize, dimensions); + default: + throw new UnsupportedHdfException("Unknown storage layout " + layoutClass); + } + } + + private static DataLayoutMessage readV3V4Message(ByteBuffer bb, Superblock sb, BitSet flags, byte version) { final byte layoutClass = bb.get(); switch (layoutClass) { - case 0: // Compact Storage - return new CompactDataLayoutMessage(bb, flags); - case 1: // Contiguous Storage - return new ContiguousDataLayoutMessage(bb, sb, flags); - case 2: // Chunked Storage - if (version == 3) { - return new ChunkedDataLayoutMessageV3(bb, sb, flags); - } else { // v4 - return new ChunkedDataLayoutMessageV4(bb, sb, flags); - } - case 3: // Virtual storage - throw new UnsupportedHdfException("Virtual storage is not supported"); - default: - throw new UnsupportedHdfException("Unknown storage layout " + layoutClass); + case 0: // Compact Storage + return new CompactDataLayoutMessage(bb, flags); + case 1: // Contiguous Storage + return new ContiguousDataLayoutMessage(bb, sb, flags); + case 2: // Chunked Storage + if (version == 3) { + return new ChunkedDataLayoutMessage(bb, sb, flags); + } else { // v4 + return new ChunkedDataLayoutMessageV4(bb, sb, flags); + } + case 3: // Virtual storage + throw new UnsupportedHdfException("Virtual storage is not supported"); + default: + throw new UnsupportedHdfException("Unknown storage layout " + layoutClass); } } @@ -58,10 +106,15 @@ public static class CompactDataLayoutMessage extends DataLayoutMessage { private final ByteBuffer dataBuffer; + private CompactDataLayoutMessage(BitSet flags, ByteBuffer dataBuffer) { + super(flags); + this.dataBuffer = dataBuffer; + } + private CompactDataLayoutMessage(ByteBuffer bb, BitSet flags) { super(flags); final int compactDataSize = Utils.readBytesAsUnsignedInt(bb, 2); - dataBuffer = Utils.createSubBuffer(bb, compactDataSize); + this.dataBuffer = Utils.createSubBuffer(bb, compactDataSize); } @Override @@ -70,7 +123,7 @@ public DataLayout getDataLayout() { } public ByteBuffer getDataBuffer() { - return dataBuffer; + return dataBuffer.slice(); } } @@ -79,6 +132,12 @@ public static class ContiguousDataLayoutMessage extends DataLayoutMessage { private final long address; private final long size; + private ContiguousDataLayoutMessage(BitSet flags, long address, long size) { + super(flags); + this.address = address; + this.size = size; + } + private ContiguousDataLayoutMessage(ByteBuffer bb, Superblock sb, BitSet flags) { super(flags); address = Utils.readBytesAsUnsignedLong(bb, sb.getSizeOfOffsets()); @@ -94,21 +153,31 @@ public long getAddress() { return address; } + /** + * @return size in bytes if known or -1 otherwise + */ public long getSize() { return size; } } - public static class ChunkedDataLayoutMessageV3 extends DataLayoutMessage { + public static class ChunkedDataLayoutMessage extends DataLayoutMessage { - private final long address; + private final long bTreeAddress; private final int size; private final int[] chunkDimensions; - private ChunkedDataLayoutMessageV3(ByteBuffer bb, Superblock sb, BitSet flags) { + public ChunkedDataLayoutMessage(BitSet flags, long bTreeAddress, int size, int[] chunkDimensions) { + super(flags); + this.bTreeAddress = bTreeAddress; + this.size = size; + this.chunkDimensions = ArrayUtils.clone(chunkDimensions); + } + + private ChunkedDataLayoutMessage(ByteBuffer bb, Superblock sb, BitSet flags) { super(flags); final int chunkDimensionality = bb.get() - 1; - address = Utils.readBytesAsUnsignedLong(bb, sb.getSizeOfOffsets()); + bTreeAddress = Utils.readBytesAsUnsignedLong(bb, sb.getSizeOfOffsets()); chunkDimensions = new int[chunkDimensionality]; for (int i = 0; i < chunkDimensions.length; i++) { chunkDimensions[i] = Utils.readBytesAsUnsignedInt(bb, 4); @@ -122,7 +191,7 @@ public DataLayout getDataLayout() { } public long getBTreeAddress() { - return address; + return bTreeAddress; } public int getSize() { @@ -160,7 +229,7 @@ public static class ChunkedDataLayoutMessageV4 extends DataLayoutMessage { private ChunkedDataLayoutMessageV4(ByteBuffer bb, Superblock sb, BitSet flags) { super(flags); - final BitSet chunkedFlags = BitSet.valueOf(new byte[] { bb.get() }); + final BitSet chunkedFlags = BitSet.valueOf(new byte[]{bb.get()}); final int chunkDimensionality = bb.get(); final int dimSizeBytes = bb.get(); @@ -172,37 +241,37 @@ private ChunkedDataLayoutMessageV4(ByteBuffer bb, Superblock sb, BitSet flags) { indexingType = bb.get(); switch (indexingType) { - case 1: // Single Chunk - if (chunkedFlags.get(SINGLE_INDEX_WITH_FILTER)) { - isFilteredSingleChunk = true; - sizeOfFilteredSingleChunk = Utils.readBytesAsUnsignedInt(bb, sb.getSizeOfLengths()); - filterMaskFilteredSingleChunk = BitSet.valueOf(new byte[] { bb.get(), bb.get(), bb.get(), bb.get() }); - } - break; - - case 2: // Implicit - break; // There is nothing for this case - - case 3: // Fixed Array - pageBits = bb.get(); - break; - - case 4: // Extensible Array - maxBits = bb.get(); - indexElements = bb.get(); - minPointers = bb.get(); - minElements = bb.get(); - pageBits = bb.get(); // This is wrong in the spec says 2 bytes its actually 1 - break; - - case 5: // B tree v2 - nodeSize = bb.getInt(); - splitPercent = bb.get(); - mergePercent = bb.get(); - break; - - default: - throw new UnsupportedHdfException("Unrecognized chunk indexing type. type=" + indexingType); + case 1: // Single Chunk + if (chunkedFlags.get(SINGLE_INDEX_WITH_FILTER)) { + isFilteredSingleChunk = true; + sizeOfFilteredSingleChunk = Utils.readBytesAsUnsignedInt(bb, sb.getSizeOfLengths()); + filterMaskFilteredSingleChunk = BitSet.valueOf(new byte[]{bb.get(), bb.get(), bb.get(), bb.get()}); + } + break; + + case 2: // Implicit + break; // There is nothing for this case + + case 3: // Fixed Array + pageBits = bb.get(); + break; + + case 4: // Extensible Array + maxBits = bb.get(); + indexElements = bb.get(); + minPointers = bb.get(); + minElements = bb.get(); + pageBits = bb.get(); // This is wrong in the spec says 2 bytes its actually 1 + break; + + case 5: // B tree v2 + nodeSize = bb.getInt(); + splitPercent = bb.get(); + mergePercent = bb.get(); + break; + + default: + throw new UnsupportedHdfException("Unrecognized chunk indexing type. type=" + indexingType); } address = Utils.readBytesAsUnsignedLong(bb, sb.getSizeOfOffsets()); @@ -258,14 +327,14 @@ public int[] getChunkDimensions() { } public int getSizeOfFilteredSingleChunk() { - if(!isFilteredSingleChunk) { + if (!isFilteredSingleChunk) { throw new HdfException("Requested size of filtered single chunk when its not set."); } return sizeOfFilteredSingleChunk; } public BitSet getFilterMaskFilteredSingleChunk() { - if(!isFilteredSingleChunk){ + if (!isFilteredSingleChunk) { throw new HdfException("Requested filter mask of filtered single chunk when its not set."); } return filterMaskFilteredSingleChunk; diff --git a/jhdf/src/main/java/io/jhdf/object/message/Message.java b/jhdf/src/main/java/io/jhdf/object/message/Message.java index 43861589..e9be496d 100644 --- a/jhdf/src/main/java/io/jhdf/object/message/Message.java +++ b/jhdf/src/main/java/io/jhdf/object/message/Message.java @@ -115,6 +115,8 @@ private static Message readMessage(ByteBuffer bb, Superblock sb, int messageType return new AttributeMessage(bb, sb, flags); case 13: // 0x000D return new ObjectCommentMessage(bb, flags); + case 14: // 0x000E + return new OldObjectModificationTimeMessage(bb, flags); case 16: // 0x0010 return new ObjectHeaderContinuationMessage(bb, sb, flags); case 17: // 0x0011 diff --git a/jhdf/src/main/java/io/jhdf/object/message/OldObjectModificationTimeMessage.java b/jhdf/src/main/java/io/jhdf/object/message/OldObjectModificationTimeMessage.java new file mode 100644 index 00000000..42b7f8a5 --- /dev/null +++ b/jhdf/src/main/java/io/jhdf/object/message/OldObjectModificationTimeMessage.java @@ -0,0 +1,68 @@ +/* + * This file is part of jHDF. A pure Java library for accessing HDF5 files. + * + * http://jhdf.io + * + * Copyright (c) 2020 James Mudd + * + * MIT License see 'LICENSE' file + */ +package io.jhdf.object.message; + +import io.jhdf.Utils; + +import java.nio.ByteBuffer; +import java.time.LocalDateTime; +import java.util.BitSet; + +import static java.lang.Integer.parseInt; +import static java.nio.charset.StandardCharsets.US_ASCII; + +/** + *
+ * Old Object Modification Time Message + *
+ * + *+ * Format + * Spec + *
+ * + * @author James Mudd + */ +public class OldObjectModificationTimeMessage extends Message { + + final LocalDateTime modificationTime; + + public OldObjectModificationTimeMessage(ByteBuffer bb, BitSet flags) { + super(flags); + + final ByteBuffer yearBuffer = Utils.createSubBuffer(bb, 4); + final int year = parseInt(US_ASCII.decode(yearBuffer).toString()); + + final ByteBuffer monthBuffer = Utils.createSubBuffer(bb, 2); + final int month = parseInt(US_ASCII.decode(monthBuffer).toString()); + + final ByteBuffer dayBuffer = Utils.createSubBuffer(bb, 2); + final int day = parseInt(US_ASCII.decode(dayBuffer).toString()); + + final ByteBuffer hourBuffer = Utils.createSubBuffer(bb, 2); + final int hour = parseInt(US_ASCII.decode(hourBuffer).toString()); + + final ByteBuffer minuteBuffer = Utils.createSubBuffer(bb, 2); + final int minute = parseInt(US_ASCII.decode(minuteBuffer).toString()); + + final ByteBuffer secondBuffer = Utils.createSubBuffer(bb, 2); + final int second = parseInt(US_ASCII.decode(secondBuffer).toString()); + + // Skip reserved bytes + bb.position(bb.position() + 2); + + this.modificationTime = LocalDateTime.of(year, month, day, hour, minute, second); + } + + public LocalDateTime getModifiedTime() { + return modificationTime; + } +} diff --git a/jhdf/src/test/resources/hdf5/hdf_v14_test1.hdf5 b/jhdf/src/test/resources/hdf5/hdf_v14_test1.hdf5 new file mode 100644 index 00000000..9c6815e6 Binary files /dev/null and b/jhdf/src/test/resources/hdf5/hdf_v14_test1.hdf5 differ diff --git a/jhdf/src/test/resources/hdf5/hdf_v14_test2.hdf5 b/jhdf/src/test/resources/hdf5/hdf_v14_test2.hdf5 new file mode 100644 index 00000000..7681231a Binary files /dev/null and b/jhdf/src/test/resources/hdf5/hdf_v14_test2.hdf5 differ diff --git a/jhdf/src/test/resources/hdf5/test_compact_datasets_earliest.hdf5 b/jhdf/src/test/resources/hdf5/test_compact_datasets_earliest.hdf5 new file mode 100644 index 00000000..c213a3f9 Binary files /dev/null and b/jhdf/src/test/resources/hdf5/test_compact_datasets_earliest.hdf5 differ diff --git a/jhdf/src/test/resources/hdf5/test_compact_datasets_latest.hdf5 b/jhdf/src/test/resources/hdf5/test_compact_datasets_latest.hdf5 new file mode 100644 index 00000000..cb73fbe0 Binary files /dev/null and b/jhdf/src/test/resources/hdf5/test_compact_datasets_latest.hdf5 differ diff --git a/jhdf/src/test/resources/scripts/compact_datasets.py b/jhdf/src/test/resources/scripts/compact_datasets.py new file mode 100644 index 00000000..0b868fdd --- /dev/null +++ b/jhdf/src/test/resources/scripts/compact_datasets.py @@ -0,0 +1,71 @@ +#------------------------------------------------------------------------------- +# This file is part of jHDF. A pure Java library for accessing HDF5 files. +# +# http://jhdf.io +# +# Copyright (c) 2020 James Mudd +# +# MIT License see 'LICENSE' file +#------------------------------------------------------------------------------- +import h5py + +import numpy as np + +''' +The idea of this test is to write compact datasets +''' +def write_chunked_datasets(f): + + data = np.arange(10) + + compact = h5py.h5p.create(h5py.h5p.DATASET_CREATE) + compact.set_layout(h5py.h5d.COMPACT) + + float_group = f.create_group('float') + float_group.create_dataset('float16', data=data, dtype='f2', dcpl=compact) + float_group.create_dataset('float32', data=data, dtype='f4', dcpl=compact) + float_group.create_dataset('float64', data=data, dtype='f8', dcpl=compact) + + int_group = f.create_group('int') + int_group.create_dataset('int8', data=data, dtype='i1', dcpl=compact) + int_group.create_dataset('int16', data=data, dtype='i2', dcpl=compact) + int_group.create_dataset('int32', data=data, dtype='i4', dcpl=compact) + + string_group = f.create_group('string') + # Fixed length (20) ASCII dataset + fixed_length = 'S20' + fixed_ds = string_group.create_dataset('fixed_length_ascii', (10,), dtype=fixed_length, dcpl=compact) + for i in range(10): + fixed_ds[i] = ('string number ' + str(i)).encode('ascii') + + # Fixed length (15) ASCII dataset the exact length of 'string number 0' + fixed_length = 'S15' + fixed_ds = string_group.create_dataset('fixed_length_ascii_1_char', (10,), dtype=fixed_length, dcpl=compact) + for i in range(10): + fixed_ds[i] = ('string number ' + str(i)).encode('ascii') + + # Variable length ASCII dataset + ascii = h5py.special_dtype(vlen=bytes) + varaible_ascii_ds = string_group.create_dataset('variable_length_ascii', (10,), dtype=ascii, dcpl=compact) + for i in range(10): + varaible_ascii_ds[i] = ('string number ' + str(i)).encode('ascii') + + # Variable length UTF8 dataset + utf8 = h5py.special_dtype(vlen=str) + varaible_ascii_ds = string_group.create_dataset('variable_length_utf8', (10,), dtype=utf8, dcpl=compact) + for i in range(10): + varaible_ascii_ds[i] = 'string number ' + str(i) + + f.flush() + f.close() + +if __name__ == '__main__': + print('Making compact dataset test files...') + + f = h5py.File('test_compact_datasets_earliest.hdf5', 'w', libver='earliest') + write_chunked_datasets(f) + print('created test_compact_datasets_earliest.hdf5') + + f = h5py.File('test_compact_datasets_latest.hdf5', 'w', libver='latest') + write_chunked_datasets(f) + print('created test_compact_datasets_latest.hdf5')