Skip to content

Commit

Permalink
Merge pull request #162 from jamesmudd/no-allocated-storage
Browse files Browse the repository at this point in the history
Fix exception when accessing datasets with no storage allocated
  • Loading branch information
jamesmudd authored Aug 20, 2020
2 parents d5e4bb2 + 9d85dd5 commit d3d22d8
Show file tree
Hide file tree
Showing 17 changed files with 127 additions and 64 deletions.
2 changes: 1 addition & 1 deletion jhdf/src/main/java/io/jhdf/AttributeImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public long getSize() {
}

@Override
public long getDiskSize() {
public long getSizeInBytes() {
return getSize() * message.getDataType().getSize();
}

Expand Down
9 changes: 4 additions & 5 deletions jhdf/src/main/java/io/jhdf/api/Attribute.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,12 @@ public interface Attribute {
long getSize();

/**
* Gets the disk size used by this attributes dataset. <blockquote>i.e. number
* of elements * size of each element</blockquote>
* Gets the size of this dataset. i.e. <blockquote>number of elements * size of each element</blockquote>
*
* @return the total number of bytes the attributes dataset is using
* @see Dataset#getDiskSize()
* @return the total number of bytes this attributes dataset is using
* @see Dataset#getSizeInBytes()
*/
long getDiskSize();
long getSizeInBytes();

/**
* Gets the dimensions of this attributes dataset
Expand Down
13 changes: 10 additions & 3 deletions jhdf/src/main/java/io/jhdf/api/Dataset.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,19 @@ public interface Dataset extends Node {
long getSize();

/**
* Gets the disk size used by this dataset. <blockquote>i.e. number of elements
* * size of each element</blockquote>
* Gets the size of this dataset. i.e. <blockquote>number of elements * size of each element</blockquote>
*
* @return the total number of bytes the dataset is using
*/
long getDiskSize();
long getSizeInBytes();

/**
* Gets the size of the storage used for this dataset in bytes. This may differ from
* {@link #getSizeInBytes()} due to the use of compression.
*
* @return the total number of bytes the datasets storage is using
*/
long getStorageInBytes();

/**
* Gets the dimensions of this dataset
Expand Down
14 changes: 9 additions & 5 deletions jhdf/src/main/java/io/jhdf/dataset/DatasetBase.java
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ public long getSize() {
}

@Override
public long getDiskSize() {
public long getSizeInBytes() {
return getSize() * dataType.getSize();
}

Expand Down Expand Up @@ -118,12 +118,11 @@ public DataType getDataType() {
public Object getData() {
logger.debug("Getting data for '{}'...", getPath());

final ByteBuffer bb = getDataBuffer();
if (bb == null) {
// Empty
if (isEmpty()) {
return null;
}

final ByteBuffer bb = getDataBuffer();
final DataType type = getDataType();

return DatasetReader.readDataset(type, bb, getDimensions(), hdfFc);
Expand All @@ -136,7 +135,7 @@ public boolean isScalar() {

@Override
public boolean isEmpty() {
return getDiskSize() == 0;
return getSizeInBytes() == 0;
}

@Override
Expand Down Expand Up @@ -171,4 +170,9 @@ public String toString() {
public boolean isVariableLength() {
return getDataType() instanceof VariableLength;
}

@Override
public long getStorageInBytes() {
return getSizeInBytes();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ public ByteBuffer getDataBuffer() {
logger.trace("Getting data buffer for {}", getPath());

// Need to load the full buffer into memory so create the array
final byte[] dataArray = new byte[toIntExact(getDiskSize())];
final byte[] dataArray = new byte[toIntExact(getSizeInBytes())];
logger.trace("Created data buffer for '{}' of size {} bytes", getPath(), dataArray.length);

final int elementSize = getDataType().getSize();
Expand Down Expand Up @@ -296,4 +296,13 @@ private Chunk getChunk(ChunkOffset chunkOffset) {

protected abstract Map<ChunkOffset, Chunk> getChunkLookup();

@Override
public boolean isEmpty() {
return getChunkLookup().isEmpty();
}

@Override
public long getStorageInBytes() {
return getChunkLookup().values().stream().mapToLong(Chunk::getSize).sum();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
*/
package io.jhdf.dataset.chunked;

import io.jhdf.Constants;
import io.jhdf.HdfFileChannel;
import io.jhdf.ObjectHeader;
import io.jhdf.api.Group;
Expand All @@ -22,6 +23,7 @@
import org.slf4j.LoggerFactory;

import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.util.function.Function;

Expand Down Expand Up @@ -66,7 +68,11 @@ public int[] getChunkDimensions() {
private final class ChunkLookupLazyInitializer extends LazyInitializer<Map<ChunkOffset, Chunk>> {
@Override
protected Map<ChunkOffset, Chunk> initialize() {
logger.debug("Creating chunk lookup for '{}'", getPath());
logger.debug("Creating chunk lookup for [{}]", getPath());

if(layoutMessage.getBTreeAddress() == Constants.UNDEFINED_ADDRESS) {
return Collections.emptyMap();
}

final BTreeV1Data bTree = BTreeV1.createDataBTree(hdfFc, layoutMessage.getBTreeAddress(), getDimensions().length);
final Collection<Chunk> allChunks = bTree.getChunks();
Expand All @@ -76,4 +82,5 @@ protected Map<ChunkOffset, Chunk> initialize() {
, Function.identity())); // values
}
}

}
51 changes: 30 additions & 21 deletions jhdf/src/main/java/io/jhdf/dataset/chunked/ChunkedDatasetV4.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@
*/
package io.jhdf.dataset.chunked;

import io.jhdf.Constants;
import io.jhdf.HdfFileChannel;
import io.jhdf.ObjectHeader;
import io.jhdf.api.Group;
import io.jhdf.dataset.chunked.indexing.BTreeIndex;
import io.jhdf.dataset.chunked.indexing.ChunkIndex;
import io.jhdf.dataset.chunked.indexing.EmptyChunkIndex;
import io.jhdf.dataset.chunked.indexing.ExtensibleArrayIndex;
import io.jhdf.dataset.chunked.indexing.FixedArrayIndex;
import io.jhdf.dataset.chunked.indexing.SingleChunkIndex;
Expand Down Expand Up @@ -71,27 +73,34 @@ protected Map<ChunkOffset, Chunk> initialize() {

final DatasetInfo datasetInfo = new DatasetInfo(getChunkSizeInBytes(), getDimensions(), getChunkDimensions());
final ChunkIndex chunkIndex;
switch (layoutMessage.getIndexingType()) {
case 1: // Single chunk
logger.debug("Reading single chunk indexed dataset");
chunkIndex = new SingleChunkIndex(layoutMessage, datasetInfo);
break;
case 2: // Implicit
throw new UnsupportedHdfException("Implicit indexing is currently not supported");
case 3: // Fixed array
logger.debug("Reading fixed array indexed dataset");
chunkIndex = new FixedArrayIndex(hdfFc, layoutMessage.getAddress(), datasetInfo);
break;
case 4: // Extensible Array
logger.debug("Reading extensible array indexed dataset");
chunkIndex = new ExtensibleArrayIndex(hdfFc, layoutMessage.getAddress(), datasetInfo);
break;
case 5: // B Tree V2
logger.debug("Reading B tree v2 indexed dataset");
chunkIndex = new BTreeIndex(hdfFc, layoutMessage.getAddress(), datasetInfo);
break;
default:
throw new HdfException("Unrecognized chunk indexing type = " + layoutMessage.getIndexingType());

if(layoutMessage.getAddress() == Constants.UNDEFINED_ADDRESS) {
logger.debug("No storage allocated for '{}'", getPath());
chunkIndex = new EmptyChunkIndex();
} else {

switch (layoutMessage.getIndexingType()) {
case 1: // Single chunk
logger.debug("Reading single chunk indexed dataset");
chunkIndex = new SingleChunkIndex(layoutMessage, datasetInfo);
break;
case 2: // Implicit
throw new UnsupportedHdfException("Implicit indexing is currently not supported");
case 3: // Fixed array
logger.debug("Reading fixed array indexed dataset");
chunkIndex = new FixedArrayIndex(hdfFc, layoutMessage.getAddress(), datasetInfo);
break;
case 4: // Extensible Array
logger.debug("Reading extensible array indexed dataset");
chunkIndex = new ExtensibleArrayIndex(hdfFc, layoutMessage.getAddress(), datasetInfo);
break;
case 5: // B Tree V2
logger.debug("Reading B tree v2 indexed dataset");
chunkIndex = new BTreeIndex(hdfFc, layoutMessage.getAddress(), datasetInfo);
break;
default:
throw new HdfException("Unrecognized chunk indexing type = " + layoutMessage.getIndexingType());
}
}

final Collection<Chunk> allChunks = chunkIndex.getAllChunks();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* This file is part of jHDF. A pure Java library for accessing HDF5 files.
*
* http://jhdf.io
*
* Copyright (c) 2020 James Mudd
*
* MIT License see 'LICENSE' file
*/
package io.jhdf.dataset.chunked.indexing;

import io.jhdf.dataset.chunked.Chunk;

import java.util.Collection;
import java.util.Collections;

public class EmptyChunkIndex implements ChunkIndex {

@Override
public Collection<Chunk> getAllChunks() {
return Collections.emptyList();
}
}
2 changes: 1 addition & 1 deletion jhdf/src/test/java/io/jhdf/AttributesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ private Executable createTest(HdfFile file, String nodePath, String attributeNam
assertThat(attribute.isEmpty(), is(true));
assertThat(attribute.isScalar(), is(false));
assertThat(attribute.getSize(), is(0L));
assertThat(attribute.getDiskSize(), is(0L));
assertThat(attribute.getSizeInBytes(), is(0L));
} else if (expectedData.getClass().isArray()) { // Array
assertThat(attribute.getJavaType(), is(equalTo(getArrayType(expectedData))));
assertThat(attribute.isEmpty(), is(false));
Expand Down
6 changes: 3 additions & 3 deletions jhdf/src/test/java/io/jhdf/DatasetImplTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ void testByteDatasetSize() {
@Test
void testByteDatasetDiskSize() {
Dataset dataset = (Dataset) hdfFile.getByPath(INT8_PATH);
assertThat(dataset.getDiskSize(), is(equalTo(21L)));
assertThat(dataset.getSizeInBytes(), is(equalTo(21L)));
}

@Test
Expand All @@ -168,7 +168,7 @@ void testShortDatasetSize() {
@Test
void testShortDatasetDiskSize() {
Dataset dataset = (Dataset) hdfFile.getByPath(INT16_PATH);
assertThat(dataset.getDiskSize(), is(equalTo(42L)));
assertThat(dataset.getSizeInBytes(), is(equalTo(42L)));
}

@Test
Expand All @@ -180,7 +180,7 @@ void testInt32DatasetSize() {
@Test
void testInt32DatasetDiskSize() {
Dataset dataset = (Dataset) hdfFile.getByPath(INT32_PATH);
assertThat(dataset.getDiskSize(), is(equalTo(84L)));
assertThat(dataset.getSizeInBytes(), is(equalTo(84L)));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,21 +53,21 @@ Collection<DynamicNode> compressedChunkedDatasetReadTests() {
// List of all the datasetPaths
return Arrays.asList(
dynamicContainer(HDF5_TEST_EARLIEST_FILE_NAME, Arrays.asList(
dynamicTest("float32", createTest(earliestHdfFile,"/float/float32")),
dynamicTest("float64", createTest(earliestHdfFile,"/float/float64")),
dynamicTest("int8", createTest(earliestHdfFile,"/int/int8")),
dynamicTest("int16", createTest(earliestHdfFile,"/int/int16")),
dynamicTest("int32", createTest(earliestHdfFile,"/int/int32")))),
dynamicTest("float32", createTest(earliestHdfFile,"/float/float32", 0.4560260586319218)),
dynamicTest("float64", createTest(earliestHdfFile,"/float/float64", 1.6374269005847952)),
dynamicTest("int8", createTest(earliestHdfFile,"/int/int8", 0.45454545454545453)),
dynamicTest("int16", createTest(earliestHdfFile,"/int/int16", 0.2)),
dynamicTest("int32", createTest(earliestHdfFile,"/int/int32", 0.625)))),

dynamicContainer(HDF5_TEST_LATEST_FILE_NAME, Arrays.asList(
dynamicTest("float32", createTest(latestHdfFile, "/float/float32")),
dynamicTest("float64", createTest(latestHdfFile,"/float/float64")),
dynamicTest("int8", createTest(latestHdfFile,"/int/int8")),
dynamicTest("int16", createTest(latestHdfFile,"/int/int16")),
dynamicTest("int32", createTest(latestHdfFile,"/int/int32")))));
dynamicTest("float32", createTest(latestHdfFile, "/float/float32", 0.4560260586319218)),
dynamicTest("float64", createTest(latestHdfFile,"/float/float64", 1.6374269005847952)),
dynamicTest("int8", createTest(latestHdfFile,"/int/int8", 0.45454545454545453)),
dynamicTest("int16", createTest(latestHdfFile,"/int/int16", 0.2)),
dynamicTest("int32", createTest(latestHdfFile,"/int/int32", 0.625)))));
}

private Executable createTest(HdfFile hdfFile, String datasetPath) {
private Executable createTest(HdfFile hdfFile, String datasetPath, double expectedCompressionRatio) {
return () -> {
Dataset dataset = hdfFile.getDatasetByPath(datasetPath);
Object data = dataset.getData();
Expand All @@ -78,6 +78,8 @@ private Executable createTest(HdfFile hdfFile, String datasetPath) {
// convert to double
assertThat(Double.valueOf(flatData[i].toString()), is(equalTo((double) i)));
}
double actualCompressionRatio = (double) dataset.getSizeInBytes() / dataset.getStorageInBytes();
assertThat(actualCompressionRatio, is(equalTo(expectedCompressionRatio)));
};
}

Expand Down
2 changes: 1 addition & 1 deletion jhdf/src/test/java/io/jhdf/dataset/EmptyDatasetTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ private Executable createTest(HdfFile file, String datasetPath, Class<?> expecte
Dataset dataset = file.getDatasetByPath(datasetPath);
// should have 0 length dimensions
assertThat(dataset.getDimensions(), is(equalTo(new int[0])));
assertThat(dataset.getDiskSize(), is(equalTo(0L)));
assertThat(dataset.getSizeInBytes(), is(equalTo(0L)));
// Should be empty
assertThat(dataset.isEmpty(), is(true));
// Empty dataset getData should return null
Expand Down
2 changes: 1 addition & 1 deletion jhdf/src/test/java/io/jhdf/dataset/ScalarDatasetTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ private Executable createTest(HdfFile file, String datasetPath, Object expected)
assertThat(dataset.getDimensions(), is(equalTo(new int[0])));
assertThat(dataset.isScalar(), is(true));
assertThat(dataset.getData(), is(equalTo(expected)));
assertThat(dataset.getDiskSize(), is(greaterThan(0L)));
assertThat(dataset.getSizeInBytes(), is(greaterThan(0L)));
};
}

Expand Down
16 changes: 8 additions & 8 deletions jhdf/src/test/java/io/jhdf/examples/TestAllFiles.java
Original file line number Diff line number Diff line change
Expand Up @@ -112,16 +112,16 @@ private void verifyAttributes(Node node) {
assertThat(attribute.getJavaType(), is(notNullValue()));
if (attribute.isEmpty()) {
assertThat(attribute.getSize(), is(equalTo(0L)));
assertThat(attribute.getDiskSize(), is(equalTo(0L)));
assertThat(attribute.getSizeInBytes(), is(equalTo(0L)));
assertThat(attribute.getData(), is(nullValue()));
} else if (attribute.isScalar()) {
assertThat(attribute.getSize(), is(equalTo(1L)));
assertThat(attribute.getDiskSize(), is(greaterThan(0L)));
assertThat(attribute.getSizeInBytes(), is(greaterThan(0L)));
assertThat(attribute.getData(), is(notNullValue()));
assertThat(attribute.getBuffer(), is(notNullValue()));
} else {
assertThat(attribute.getSize(), is(greaterThan(0L)));
assertThat(attribute.getDiskSize(), is(greaterThan(0L)));
assertThat(attribute.getSizeInBytes(), is(greaterThan(0L)));
assertThat(attribute.getData(), is(notNullValue()));
assertThat(attribute.getBuffer(), is(notNullValue()));
}
Expand Down Expand Up @@ -158,24 +158,24 @@ private void verifyDataset(Dataset dataset, Group group) {
if (dataset.isEmpty()) {
assertThat(data, is(nullValue()));
// Empty so should have 0 size
assertThat(dataset.getDiskSize(), is(equalTo(0L)));
assertThat(dataset.getStorageInBytes(), is(equalTo(0L)));
} else if (dataset.isScalar()) {
assertThat(data.getClass(), is(equalTo(dataset.getJavaType())));
// Should have some size
assertThat(dataset.getDiskSize(), is(greaterThan(0L)));
assertThat(dataset.getSizeInBytes(), is(greaterThan(0L)));
} else if (dataset.isCompound()) {
// Compound datasets are currently returned as maps, maybe a custom CompoundDataset might be better in the future..
assertThat(data, is(instanceOf(Map.class)));
assertThat((Map<String, Object>) data, is(not(anEmptyMap())));
assertThat(dataset.getDiskSize(), is(greaterThan(0L)));
assertThat(dataset.getSizeInBytes(), is(greaterThan(0L)));
} else if (dataset.isVariableLength()) {
assertThat(getDimensions(data)[0], is(equalTo(dims[0])));
assertThat(dataset.getDiskSize(), is(greaterThan(0L)));
assertThat(dataset.getSizeInBytes(), is(greaterThan(0L)));
} else {
assertThat(getDimensions(data), is(equalTo(dims)));
assertThat(getType(data), is(equalTo(dataset.getJavaType())));
// Should have some size
assertThat(dataset.getDiskSize(), is(greaterThan(0L)));
assertThat(dataset.getSizeInBytes(), is(greaterThan(0L)));
}

if (dataset instanceof ContiguousDataset && !dataset.isEmpty()) {
Expand Down
Binary file modified jhdf/src/test/resources/hdf5/test_odd_datasets_earliest.hdf5
Binary file not shown.
Binary file modified jhdf/src/test/resources/hdf5/test_odd_datasets_latest.hdf5
Binary file not shown.
Loading

0 comments on commit d3d22d8

Please sign in to comment.