diff --git a/docs/src/UserGuide/cache.md b/docs/src/UserGuide/cache.md new file mode 100644 index 00000000..67b12c2a --- /dev/null +++ b/docs/src/UserGuide/cache.md @@ -0,0 +1,18 @@ +# Caching YAXArrays + +For some applications like interactive plotting of large datasets it can not be avoided that the same data must be accessed several times. In these cases it can be useful to store recently accessed data in a cache. In YAXArrays this can be easily achieved using the `cache` function. For example, if we open a large dataset from a remote source and want to keep data in a cache of size 500MB one can use: + +````julia +using YAXArrays, Zarr +ds = open_dataset("path/to/source") +cachesize = 500 #MB +cache(ds,maxsize = cachesize) +```` + +The above will wrap every array in the dataset into its own cache, where the 500MB are distributed equally across datasets. +Alternatively individual caches can be applied to single `YAXArray`s + +````julia +yax = ds.avariable +cache(yax,maxsize = 1000) +```` diff --git a/src/Cubes/Cubes.jl b/src/Cubes/Cubes.jl index e2cf5fe2..6666cde6 100644 --- a/src/Cubes/Cubes.jl +++ b/src/Cubes/Cubes.jl @@ -17,7 +17,7 @@ using Tables: istable, schema, columns using DimensionalData: DimensionalData as DD, AbstractDimArray, NoName import DimensionalData: name -export concatenatecubes, caxes, subsetcube, readcubedata, renameaxis!, YAXArray, setchunks +export concatenatecubes, caxes, subsetcube, readcubedata, renameaxis!, YAXArray, setchunks, cache """ This function calculates a subset of a cube's data diff --git a/test/Datasets/datasets.jl b/test/Datasets/datasets.jl index 423177d5..c6dba655 100644 --- a/test/Datasets/datasets.jl +++ b/test/Datasets/datasets.jl @@ -411,6 +411,10 @@ end end @testset "Caching" begin + using YAXArrays.Cubes.DiskArrays.TestTypes + using YAXArrays.Cubes: DiskArrays + using Test + import DimensionalData as DD a = AccessCountDiskArray(reshape(1:100, 5, 20), chunksize=(2, 10)) ds = Dataset(; ar=YAXArray((DD.X(1:5), DD.Y(1:20)), a)) dscached = DiskArrays.cache(ds)