diff --git a/README.md b/README.md index 194d8b1f..163d0593 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # jHDF - Pure Java HDF5 library [![Build Status](https://dev.azure.com/jamesmudd/jhdf/_apis/build/status/jhdf-CI)](https://dev.azure.com/jamesmudd/jhdf/_build/latest?definitionId=3) [![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=jamesmudd_jhdf&metric=alert_status)](https://sonarcloud.io/dashboard?id=jamesmudd_jhdf) [![Coverage](https://sonarcloud.io/api/project_badges/measure?project=jamesmudd_jhdf&metric=coverage)](https://sonarcloud.io/dashboard?id=jamesmudd_jhdf) [ ![Download](https://api.bintray.com/packages/jamesmudd/jhdf/jhdf/images/download.svg) ](https://bintray.com/jamesmudd/jhdf/jhdf/_latestVersion) [![Javadocs](http://javadoc.io/badge/io.jhdf/jhdf.svg)](http://javadoc.io/doc/io.jhdf/jhdf) -This is a hobby project for me to see if I can write a loader for HDF5 files. The file format specification is available from the HDF Group [here](https://support.hdfgroup.org/HDF5/doc/H5.format.html). +This is a hobby project for me to see if I can write a loader for HDF5 files. The file format specification is available from the HDF Group [here](https://support.hdfgroup.org/HDF5/doc/H5.format.html). More information on the format is available on [Wikipedia](https://en.wikipedia.org/wiki/Hierarchical_Data_Format) The intension is to make a clean Java API to access HDF5 data. Currently the project is targeting HDF5 1.8 read-only compatibility. For progress see the [change log](CHANGES.md) diff --git a/jhdf/src/main/java/io/jhdf/ObjectHeader.java b/jhdf/src/main/java/io/jhdf/ObjectHeader.java index 926122c4..ad158e2d 100644 --- a/jhdf/src/main/java/io/jhdf/ObjectHeader.java +++ b/jhdf/src/main/java/io/jhdf/ObjectHeader.java @@ -273,7 +273,7 @@ private ObjectHeaderV2(FileChannel fc, Superblock sb, long address) { } private ByteBuffer readMessages(FileChannel fc, Superblock sb, ByteBuffer bb) throws IOException { - while (bb.remaining() > 4) { + while (bb.remaining() >= 8) { Message m = Message.readObjectHeaderV2Message(bb, sb); messages.add(m); diff --git a/jhdf/src/main/java/io/jhdf/dataset/DatasetReader.java b/jhdf/src/main/java/io/jhdf/dataset/DatasetReader.java index e461457d..31ef71be 100644 --- a/jhdf/src/main/java/io/jhdf/dataset/DatasetReader.java +++ b/jhdf/src/main/java/io/jhdf/dataset/DatasetReader.java @@ -1,5 +1,7 @@ package io.jhdf.dataset; +import static java.nio.charset.StandardCharsets.US_ASCII; + import java.lang.reflect.Array; import java.math.BigInteger; import java.nio.ByteBuffer; @@ -9,7 +11,6 @@ import java.nio.IntBuffer; import java.nio.LongBuffer; import java.nio.ShortBuffer; -import java.nio.charset.Charset; import java.util.Arrays; import io.jhdf.Utils; @@ -18,7 +19,6 @@ import io.jhdf.object.datatype.FixedPoint; import io.jhdf.object.datatype.FloatingPoint; import io.jhdf.object.datatype.StringData; -import io.jhdf.object.datatype.VariableLength; /** *

@@ -108,11 +108,6 @@ private DatasetReader() { StringData stringData = (StringData) type; int stringLength = stringData.getSize(); fillFixedLentghStringData(data, dimensions, buffer, stringLength); - } else if (type instanceof VariableLength) { - VariableLength variableLentgh = (VariableLength) type; - Charset encoding = variableLentgh.getEncoding(); - int stringLength = variableLentgh.getSize(); - fillVariableLentghStringData(data, dimensions, buffer, stringLength, encoding); } return data; @@ -270,24 +265,8 @@ private static void fillFixedLentghStringData(Object data, int[] dims, ByteBuffe } else { for (int i = 0; i < dims[0]; i++) { buffer.position(i * stringLength); - Array.set(data, i, Utils.readUntilNull(buffer)); - } - } - } - - private static void fillVariableLentghStringData(Object data, int[] dims, ByteBuffer buffer, int stringLength, - Charset charset) { - if (dims.length > 1) { - for (int i = 0; i < dims[0]; i++) { - Object newArray = Array.get(data, i); - fillData(newArray, stripLeadingIndex(dims), buffer); - } - } else { - byte[] elementBuffer = new byte[stringLength]; - for (int i = 0; i < dims[0]; i++) { - buffer.position(i * stringLength); - buffer.get(elementBuffer); - Array.set(data, i, charset.decode(ByteBuffer.wrap(elementBuffer)).toString()); + ByteBuffer elementBuffer = Utils.createSubBuffer(buffer, stringLength); + Array.set(data, i, US_ASCII.decode(elementBuffer).toString().trim()); } } } diff --git a/jhdf/src/test/java/io/jhdf/dataset/StringDatasetTest.java b/jhdf/src/test/java/io/jhdf/dataset/StringDatasetTest.java index e4154382..5883efd6 100644 --- a/jhdf/src/test/java/io/jhdf/dataset/StringDatasetTest.java +++ b/jhdf/src/test/java/io/jhdf/dataset/StringDatasetTest.java @@ -44,12 +44,16 @@ Collection stringDataset1DTests() { return Arrays.asList( dynamicTest("earliest - fixed ASCII", createTest(earliestHdfFile, "/fixed_lentgh_ascii")), + dynamicTest("earliest - fixed ASCII 1 char", + createTest(earliestHdfFile, "/fixed_lentgh_ascii_1_char")), dynamicTest("earliest - variable ASCII", createTest(earliestHdfFile, "/variable_lentgh_ascii")), dynamicTest("earliest - variable UTF8", createTest(earliestHdfFile, "/variable_lentgh_utf8")), dynamicTest("latest - fixed ASCII", createTest(latestHdfFile, "/fixed_lentgh_ascii")), + dynamicTest("latest - fixed ASCII 1 char", + createTest(latestHdfFile, "/fixed_lentgh_ascii_1_char")), dynamicTest("latest - variable ASCII", createTest(latestHdfFile, "/variable_lentgh_ascii")), dynamicTest("latest - variable UTF8", diff --git a/jhdf/src/test/resources/io/jhdf/string_datasets.py b/jhdf/src/test/resources/io/jhdf/string_datasets.py index c55e1f7b..fc5be93c 100644 --- a/jhdf/src/test/resources/io/jhdf/string_datasets.py +++ b/jhdf/src/test/resources/io/jhdf/string_datasets.py @@ -12,6 +12,12 @@ def write_string_datasets(f): for i in range(10): fixed_ds[i] = ('string number ' + str(i)).encode('ascii') + # Fixed length (15) ASCII dataset the exact lentgh of 'string number 0' + fixed_length = 'S15' + fixed_ds = f.create_dataset('fixed_lentgh_ascii_1_char', (10,), dtype=fixed_length) + for i in range(10): + fixed_ds[i] = ('string number ' + str(i)).encode('ascii') + # Variable length ASCII dataset ascii = h5py.special_dtype(vlen=bytes) varaible_ascii_ds = f.create_dataset('variable_lentgh_ascii', (10,), dtype=ascii) diff --git a/jhdf/src/test/resources/io/jhdf/test_string_datasets_earliest.hdf5 b/jhdf/src/test/resources/io/jhdf/test_string_datasets_earliest.hdf5 index b562c42c..00140b89 100644 Binary files a/jhdf/src/test/resources/io/jhdf/test_string_datasets_earliest.hdf5 and b/jhdf/src/test/resources/io/jhdf/test_string_datasets_earliest.hdf5 differ diff --git a/jhdf/src/test/resources/io/jhdf/test_string_datasets_latest.hdf5 b/jhdf/src/test/resources/io/jhdf/test_string_datasets_latest.hdf5 index 9f7f4458..d071d4a5 100644 Binary files a/jhdf/src/test/resources/io/jhdf/test_string_datasets_latest.hdf5 and b/jhdf/src/test/resources/io/jhdf/test_string_datasets_latest.hdf5 differ