Skip to content

Commit

Permalink
Merge pull request #57 from jamesmudd/string-dataset-fix
Browse files Browse the repository at this point in the history
String dataset fix
  • Loading branch information
jamesmudd authored Feb 27, 2019
2 parents b11130b + 109ff00 commit d0af7a6
Show file tree
Hide file tree
Showing 7 changed files with 16 additions and 27 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# jHDF - Pure Java HDF5 library
[![Build Status](https://dev.azure.com/jamesmudd/jhdf/_apis/build/status/jhdf-CI)](https://dev.azure.com/jamesmudd/jhdf/_build/latest?definitionId=3) [![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=jamesmudd_jhdf&metric=alert_status)](https://sonarcloud.io/dashboard?id=jamesmudd_jhdf) [![Coverage](https://sonarcloud.io/api/project_badges/measure?project=jamesmudd_jhdf&metric=coverage)](https://sonarcloud.io/dashboard?id=jamesmudd_jhdf) [ ![Download](https://api.bintray.com/packages/jamesmudd/jhdf/jhdf/images/download.svg) ](https://bintray.com/jamesmudd/jhdf/jhdf/_latestVersion) [![Javadocs](http://javadoc.io/badge/io.jhdf/jhdf.svg)](http://javadoc.io/doc/io.jhdf/jhdf)

This is a hobby project for me to see if I can write a loader for HDF5 files. The file format specification is available from the HDF Group [here](https://support.hdfgroup.org/HDF5/doc/H5.format.html).
This is a hobby project for me to see if I can write a loader for HDF5 files. The file format specification is available from the HDF Group [here](https://support.hdfgroup.org/HDF5/doc/H5.format.html). More information on the format is available on [Wikipedia](https://en.wikipedia.org/wiki/Hierarchical_Data_Format)

The intension is to make a clean Java API to access HDF5 data. Currently the project is targeting HDF5 1.8 read-only compatibility. For progress see the [change log](CHANGES.md)

Expand Down
2 changes: 1 addition & 1 deletion jhdf/src/main/java/io/jhdf/ObjectHeader.java
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ private ObjectHeaderV2(FileChannel fc, Superblock sb, long address) {
}

private ByteBuffer readMessages(FileChannel fc, Superblock sb, ByteBuffer bb) throws IOException {
while (bb.remaining() > 4) {
while (bb.remaining() >= 8) {
Message m = Message.readObjectHeaderV2Message(bb, sb);
messages.add(m);

Expand Down
29 changes: 4 additions & 25 deletions jhdf/src/main/java/io/jhdf/dataset/DatasetReader.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package io.jhdf.dataset;

import static java.nio.charset.StandardCharsets.US_ASCII;

import java.lang.reflect.Array;
import java.math.BigInteger;
import java.nio.ByteBuffer;
Expand All @@ -9,7 +11,6 @@
import java.nio.IntBuffer;
import java.nio.LongBuffer;
import java.nio.ShortBuffer;
import java.nio.charset.Charset;
import java.util.Arrays;

import io.jhdf.Utils;
Expand All @@ -18,7 +19,6 @@
import io.jhdf.object.datatype.FixedPoint;
import io.jhdf.object.datatype.FloatingPoint;
import io.jhdf.object.datatype.StringData;
import io.jhdf.object.datatype.VariableLength;

/**
* <p>
Expand Down Expand Up @@ -108,11 +108,6 @@ private DatasetReader() {
StringData stringData = (StringData) type;
int stringLength = stringData.getSize();
fillFixedLentghStringData(data, dimensions, buffer, stringLength);
} else if (type instanceof VariableLength) {
VariableLength variableLentgh = (VariableLength) type;
Charset encoding = variableLentgh.getEncoding();
int stringLength = variableLentgh.getSize();
fillVariableLentghStringData(data, dimensions, buffer, stringLength, encoding);
}

return data;
Expand Down Expand Up @@ -270,24 +265,8 @@ private static void fillFixedLentghStringData(Object data, int[] dims, ByteBuffe
} else {
for (int i = 0; i < dims[0]; i++) {
buffer.position(i * stringLength);
Array.set(data, i, Utils.readUntilNull(buffer));
}
}
}

private static void fillVariableLentghStringData(Object data, int[] dims, ByteBuffer buffer, int stringLength,
Charset charset) {
if (dims.length > 1) {
for (int i = 0; i < dims[0]; i++) {
Object newArray = Array.get(data, i);
fillData(newArray, stripLeadingIndex(dims), buffer);
}
} else {
byte[] elementBuffer = new byte[stringLength];
for (int i = 0; i < dims[0]; i++) {
buffer.position(i * stringLength);
buffer.get(elementBuffer);
Array.set(data, i, charset.decode(ByteBuffer.wrap(elementBuffer)).toString());
ByteBuffer elementBuffer = Utils.createSubBuffer(buffer, stringLength);
Array.set(data, i, US_ASCII.decode(elementBuffer).toString().trim());
}
}
}
Expand Down
4 changes: 4 additions & 0 deletions jhdf/src/test/java/io/jhdf/dataset/StringDatasetTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,16 @@ Collection<DynamicNode> stringDataset1DTests() {
return Arrays.asList(
dynamicTest("earliest - fixed ASCII",
createTest(earliestHdfFile, "/fixed_lentgh_ascii")),
dynamicTest("earliest - fixed ASCII 1 char",
createTest(earliestHdfFile, "/fixed_lentgh_ascii_1_char")),
dynamicTest("earliest - variable ASCII",
createTest(earliestHdfFile, "/variable_lentgh_ascii")),
dynamicTest("earliest - variable UTF8",
createTest(earliestHdfFile, "/variable_lentgh_utf8")),
dynamicTest("latest - fixed ASCII",
createTest(latestHdfFile, "/fixed_lentgh_ascii")),
dynamicTest("latest - fixed ASCII 1 char",
createTest(latestHdfFile, "/fixed_lentgh_ascii_1_char")),
dynamicTest("latest - variable ASCII",
createTest(latestHdfFile, "/variable_lentgh_ascii")),
dynamicTest("latest - variable UTF8",
Expand Down
6 changes: 6 additions & 0 deletions jhdf/src/test/resources/io/jhdf/string_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ def write_string_datasets(f):
for i in range(10):
fixed_ds[i] = ('string number ' + str(i)).encode('ascii')

# Fixed length (15) ASCII dataset the exact lentgh of 'string number 0'
fixed_length = 'S15'
fixed_ds = f.create_dataset('fixed_lentgh_ascii_1_char', (10,), dtype=fixed_length)
for i in range(10):
fixed_ds[i] = ('string number ' + str(i)).encode('ascii')

# Variable length ASCII dataset
ascii = h5py.special_dtype(vlen=bytes)
varaible_ascii_ds = f.create_dataset('variable_lentgh_ascii', (10,), dtype=ascii)
Expand Down
Binary file modified jhdf/src/test/resources/io/jhdf/test_string_datasets_earliest.hdf5
Binary file not shown.
Binary file modified jhdf/src/test/resources/io/jhdf/test_string_datasets_latest.hdf5
Binary file not shown.

0 comments on commit d0af7a6

Please sign in to comment.