Skip to content

Commit

Permalink
Add option to persist handle to NetCDF files (#31)
Browse files Browse the repository at this point in the history
* add interface for keeping handles open for faster dataset opening

* update tests

* test on lts instead of 1.9

* Add dependabot

* test 1.10 since workflows are too old
  • Loading branch information
meggart authored Nov 22, 2024
1 parent 7773192 commit 2ebac48
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 27 deletions.
7 changes: 7 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/" # Location of package manifests
schedule:
interval: "weekly"
2 changes: 1 addition & 1 deletion .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
fail-fast: false
matrix:
version:
- '1.9'
- '1.10'
- '1'
- 'nightly'
os:
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "YAXArrayBase"
uuid = "90b8fcef-0c2d-428d-9c56-5f86629e9d14"
authors = ["Fabian Gans <fgans@bgc-jena.mpg.de>"]
version = "0.7.4"
version = "0.7.5"

[deps]
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
Expand Down
2 changes: 1 addition & 1 deletion ext/ArchGDALExt/archgdaldataset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ function GDALDataset(filename; mode="r")
end
Base.haskey(ds::GDALDataset, k) = in(k, ("X", "Y")) || haskey(ds.bands, k)
#Implement Dataset interface
function YAB.get_var_handle(ds::GDALDataset, name)
function YAB.get_var_handle(ds::GDALDataset, name; persist=true)
if name == "X"
range(ds.trans[1], length = ds.bandsize[1], step = ds.trans[2])
elseif name == "Y"
Expand Down
47 changes: 38 additions & 9 deletions ext/NetCDFExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,33 @@ as a data sink:
struct NetCDFDataset
filename::String
mode::UInt16
handle::Base.RefValue{Union{Nothing, NcFile}}
end
NetCDFDataset(filename;mode="r") = mode == "r" ? NetCDFDataset(filename,NC_NOWRITE) : NetCDFDataset(filename,NC_WRITE)
function NetCDFDataset(filename;mode="r")
m = mode == "r" ? NC_NOWRITE : NC_WRITE
NetCDFDataset(filename,m,Ref{Union{Nothing, NcFile}}(nothing))
end
function dsopen(f,ds::NetCDFDataset)
if ds.handle[] === nothing
NetCDF.open(f, ds.filename)
else
f(ds.handle[])
end
end
function YAB.open_dataset_handle(f, ds::NetCDFDataset)
if ds.handle[] === nothing
try
ds.handle[] = NetCDF.open(ds.filename, mode=ds.mode)
f(ds)
finally
ds.handle[]=nothing
end
else
f(ds)
end
end



import .NetCDF: AbstractDiskArray, readblock!, writeblock!, haschunks, eachchunk

Expand Down Expand Up @@ -49,15 +74,19 @@ YAB.iscompressed(v::NetCDFVariable) = NetCDF.open(v->v.compress > 0, v.filename,

Base.size(v::NetCDFVariable) = v.size

YAB.get_var_dims(ds::NetCDFDataset,name) = NetCDF.open(v->map(i->i.name,v[name].dim),ds.filename)
YAB.get_varnames(ds::NetCDFDataset) = NetCDF.open(v->collect(keys(v.vars)),ds.filename)
YAB.get_var_attrs(ds::NetCDFDataset, name) = NetCDF.open(v->v[name].atts,ds.filename)
YAB.get_global_attrs(ds::NetCDFDataset) = NetCDF.open(nc->nc.gatts, ds.filename)
function Base.getindex(ds::NetCDFDataset, i)
s,et = NetCDF.open(j->(size(j),eltype(j)),ds.filename,i)
NetCDFVariable{et,length(s)}(ds.filename, i, s)
YAB.get_var_dims(ds::NetCDFDataset,name) = dsopen(v->map(i->i.name,v[name].dim),ds)
YAB.get_varnames(ds::NetCDFDataset) = dsopen(v->collect(keys(v.vars)),ds)
YAB.get_var_attrs(ds::NetCDFDataset, name) = dsopen(v->v[name].atts,ds)
YAB.get_global_attrs(ds::NetCDFDataset) = dsopen(nc->nc.gatts, ds)
function YAB.get_var_handle(ds::NetCDFDataset, i; persist = true)
if persist || ds.handle[] === nothing
s,et = NetCDF.open(j->(size(j),eltype(j)),ds.filename,i)
NetCDFVariable{et,length(s)}(ds.filename, i, s)
else
ds.handle[][i]
end
end
Base.haskey(ds::NetCDFDataset,k) = NetCDF.open(nc->haskey(nc.vars,k),ds.filename)
Base.haskey(ds::NetCDFDataset,k) = dsopen(nc->haskey(nc.vars,k),ds)

function YAB.add_var(p::NetCDFDataset, T::Type, varname, s, dimnames, attr;
chunksize=s, compress = -1)
Expand Down
7 changes: 6 additions & 1 deletion src/datasets/datasetinterface.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#Functions to be implemented for Dataset sources:
"Return a DiskArray handle to a dataset"
get_var_handle(ds, name) = ds[name]
get_var_handle(ds, name; persist=true) = ds[name]

"Return a list of variable names"
function get_varnames end
Expand All @@ -18,6 +18,11 @@ function get_global_attrs end
"Initialize and return a handle to a new empty dataset"
function create_empty end

"Apply a function `f` on a dataset `ds` while keeping possible file handles open during the operations"
function open_dataset_handle(f, ds)
f(ds)
end

"""
add_var(ds, T, name, s, dimlist, atts)
Expand Down
51 changes: 37 additions & 14 deletions test/datasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,29 @@ h = get_var_handle(ds_nc, "tas")
@test all(isapprox.(h[1:2,1:2], [215.893 217.168; 215.805 217.03]))
@test allow_parallel_write(ds_nc) == false
@test allow_missings(ds_nc) == false
#Repeat the same test with an open get_var_handle
ds_nc2 = YAXArrayBase.to_dataset(p2)
YAXArrayBase.open_dataset_handle(ds_nc2) do ds_nc
@test ds_nc.handle[] !== nothing
vn = get_varnames(ds_nc)
@test sort(vn) == ["area", "lat", "lat_bnds", "lon", "lon_bnds", "msk_rgn",
"plev", "pr", "tas", "time", "time_bnds", "ua"]
@test get_var_dims(ds_nc, "tas") == ["lon", "lat", "time"]
@test get_var_dims(ds_nc, "area") == ["lon", "lat"]
@test get_var_dims(ds_nc, "time") == ["time"]
@test get_var_dims(ds_nc, "time_bnds") == ["bnds", "time"]
@test get_var_attrs(ds_nc,"tas")["long_name"] == "air_temperature"
h1 = get_var_handle(ds_nc, "tas",persist=true)
@test !(h1 isa NetCDF.NcVar)
@test !YAXArrayBase.iscompressed(h1)
@test all(isapprox.(h1[1:2,1:2], [215.893 217.168; 215.805 217.03]))
h2 = get_var_handle(ds_nc, "tas",persist=false)
@test h2 isa NetCDF.NcVar
@test !YAXArrayBase.iscompressed(h2)
@test all(isapprox.(h2[1:2,1:2], [215.893 217.168; 215.805 217.03]))
@test allow_parallel_write(ds_nc) == false
@test allow_missings(ds_nc) == false
end
end

@testset "Reading Zarr" begin
Expand Down Expand Up @@ -71,22 +94,22 @@ end
@test allow_missings(ds_tif) == true
end
function test_write(T)
p = tempname()
ds = create_empty(T, p)
add_var(ds, 0.5:1:9.5, "lon", ("lon",), Dict("units"=>"degrees_east"))
add_var(ds, 20:-1.0:1, "lat", ("lat",), Dict("units"=>"degrees_north"))
v = add_var(ds, Float32, "tas", (10,20), ("lon", "lat"), Dict{String,Any}("units"=>"Celsius"))
p = tempname()
ds = create_empty(T, p)
add_var(ds, 0.5:1:9.5, "lon", ("lon",), Dict("units"=>"degrees_east"))
add_var(ds, 20:-1.0:1, "lat", ("lat",), Dict("units"=>"degrees_north"))
v = add_var(ds, Float32, "tas", (10,20), ("lon", "lat"), Dict{String,Any}("units"=>"Celsius"))

v[:,:] = collect(reshape(1:200, 10, 20))
v[:,:] = collect(reshape(1:200, 10, 20))

@test sort(get_varnames(ds)) == ["lat","lon","tas"]
@test get_var_dims(ds, "tas") == ["lon", "lat"]
@test get_var_dims(ds, "lon") == ["lon"]
@test get_var_attrs(ds,"tas")["units"] == "Celsius"
h = get_var_handle(ds, "lon")
@test h[:] == 0.5:1:9.5
v = get_var_handle(ds, "tas")
@test v[1:2,1:2] == [1 11; 2 12]
@test sort(get_varnames(ds)) == ["lat","lon","tas"]
@test get_var_dims(ds, "tas") == ["lon", "lat"]
@test get_var_dims(ds, "lon") == ["lon"]
@test get_var_attrs(ds,"tas")["units"] == "Celsius"
h = get_var_handle(ds, "lon")
@test h[:] == 0.5:1:9.5
v = get_var_handle(ds, "tas")
@test v[1:2,1:2] == [1 11; 2 12]
end

@testset "Writing NetCDF" begin
Expand Down

2 comments on commit 2ebac48

@meggart
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/119980

Tip: Release Notes

Did you know you can add release notes too? Just add markdown formatted text underneath the comment after the text
"Release notes:" and it will be added to the registry PR, and if TagBot is installed it will also be added to the
release that TagBot creates. i.e.

@JuliaRegistrator register

Release notes:

## Breaking changes

- blah

To add them here just re-invoke and the PR will be updated.

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.7.5 -m "<description of version>" 2ebac481ecbdbf5929544d955eb5e176b5e73148
git push origin v0.7.5

Please sign in to comment.