From c55e8d7565073cabe57cab58308fdca55ce3ef27 Mon Sep 17 00:00:00 2001 From: Lazaro Alonso Date: Thu, 12 Dec 2024 16:35:55 +0100 Subject: [PATCH 01/14] no more Ti --- src/DatasetAPI/Datasets.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DatasetAPI/Datasets.jl b/src/DatasetAPI/Datasets.jl index 7a4bcf68..e3de1f1f 100644 --- a/src/DatasetAPI/Datasets.jl +++ b/src/DatasetAPI/Datasets.jl @@ -285,7 +285,7 @@ function toaxis(dimname, g, offs, len) catch ar[:] end - DD.Ti(tsteps[offs+1:end]) + DD.rebuild(DD.name2dim(axname), tsteps[offs+1:end]) elseif haskey(aratts, "_ARRAYVALUES") vals = identity.(aratts["_ARRAYVALUES"]) DD.rebuild(DD.name2dim(axname),(vals)) From f845879248726e702e3cef9bca9756e9bad8b05b Mon Sep 17 00:00:00 2001 From: Lazaro Alonso Date: Thu, 12 Dec 2024 16:53:19 +0100 Subject: [PATCH 02/14] go back to :time --- docs/src/UserGuide/group.md | 28 ++++++++++++++-------------- src/Cubes/Cubes.jl | 2 +- test/Datasets/datasets.jl | 6 +++--- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/docs/src/UserGuide/group.md b/docs/src/UserGuide/group.md index 389efbe0..11c50731 100644 --- a/docs/src/UserGuide/group.md +++ b/docs/src/UserGuide/group.md @@ -50,22 +50,22 @@ nothing # hide ````julia function weighted_seasons(ds) # calculate weights - tempo = dims(ds, :Ti) + tempo = dims(ds, :time) month_length = YAXArray((tempo,), daysinmonth.(tempo)) g_tempo = groupby(month_length, Ti => seasons(; start=December)) - sum_days = sum.(g_tempo, dims=:Ti) + sum_days = sum.(g_tempo, dims=:time) weights = map(./, g_tempo, sum_days) # unweighted seasons g_ds = groupby(ds, Ti => seasons(; start=December)) - mean_g = mean.(g_ds, dims=:Ti) - mean_g = dropdims.(mean_g, dims=:Ti) + mean_g = mean.(g_ds, dims=:time) + mean_g = dropdims.(mean_g, dims=:time) # weighted seasons g_dsW = broadcast_dims.(*, weights, g_ds) - weighted_g = sum.(g_dsW, dims = :Ti); - weighted_g = dropdims.(weighted_g, dims=:Ti) + weighted_g = sum.(g_dsW, dims = :time); + weighted_g = dropdims.(weighted_g, dims=:time) # differences diff_g = map(.-, weighted_g, mean_g) - seasons_g = lookup(mean_g, :Ti) + seasons_g = lookup(mean_g, :time) return mean_g, weighted_g, diff_g, seasons_g end ```` @@ -80,7 +80,7 @@ g_ds = groupby(ds, Ti => seasons(; start=December)) And the mean per season is calculated as follows ````@ansi compareXarray -mean_g = mean.(g_ds, dims=:Ti) +mean_g = mean.(g_ds, dims=:time) ```` ### dropdims @@ -88,7 +88,7 @@ mean_g = mean.(g_ds, dims=:Ti) Note that now the time dimension has length one, we can use `dropdims` to remove it ````@ansi compareXarray -mean_g = dropdims.(mean_g, dims=:Ti) +mean_g = dropdims.(mean_g, dims=:time) ```` ### seasons @@ -96,7 +96,7 @@ mean_g = dropdims.(mean_g, dims=:Ti) Due to the `groupby` function we will obtain new grouping names, in this case in the time dimension: ````@example compareXarray -seasons_g = lookup(mean_g, :Ti) +seasons_g = lookup(mean_g, :time) ```` Next, we will weight this grouping by days/month in each group. @@ -106,7 +106,7 @@ Next, we will weight this grouping by days/month in each group. Create a `YAXArray` for the month length ````@example compareXarray -tempo = dims(ds, :Ti) +tempo = dims(ds, :time) month_length = YAXArray((tempo,), daysinmonth.(tempo)) ```` @@ -119,7 +119,7 @@ g_tempo = groupby(month_length, Ti => seasons(; start=December)) Get the number of days per season ````@ansi compareXarray -sum_days = sum.(g_tempo, dims=:Ti) +sum_days = sum.(g_tempo, dims=:time) ```` ### weights @@ -146,8 +146,8 @@ g_dsW = broadcast_dims.(*, weights, g_ds) apply a `sum` over the time dimension and drop it ````@ansi compareXarray -weighted_g = sum.(g_dsW, dims = :Ti); -weighted_g = dropdims.(weighted_g, dims=:Ti) +weighted_g = sum.(g_dsW, dims = :time); +weighted_g = dropdims.(weighted_g, dims=:time) ```` Calculate the differences diff --git a/src/Cubes/Cubes.jl b/src/Cubes/Cubes.jl index 6487b718..271bfa6a 100644 --- a/src/Cubes/Cubes.jl +++ b/src/Cubes/Cubes.jl @@ -484,7 +484,7 @@ function Base.getindex(a::YAXArray, args::DD.Dimension...; kwargs...) if v isa UnitRange{Int} v = Date(first(v))..Date(last(v),12,31) end - d2[:Ti] = v + d2[:time] = v else d2[DD.name(d)] = v end diff --git a/test/Datasets/datasets.jl b/test/Datasets/datasets.jl index 67e31d09..21493d21 100644 --- a/test/Datasets/datasets.jl +++ b/test/Datasets/datasets.jl @@ -24,7 +24,7 @@ using Dates YAXArray(axlist2, data[3], props[3]), ) ds = Dataset(avar=c1, something=c2, smaller=c3) - # previous version will throw this error: `KeyError: key :Ti not found` + # previous version will throw this error: `KeyError: key :time not found` f = "./temp.zarr" @test_nowarn savedataset(ds; path=f) rm(f, recursive=true, force=true) @@ -215,11 +215,11 @@ end ds = open_dataset("test.mock") @test size(ds.Var1) == (10, 5, 2) @test size(ds.Var2) == (10, 5) - @test all(in(keys(ds.axes)), (:Ti, :d2, :d3)) + @test all(in(keys(ds.axes)), (:time, :d2, :d3)) ar = Cube(ds) @test ar isa YAXArray @test size(ar) == (10, 5, 2, 2) - @test DD.name.(ar.axes) == (:Ti, :d2, :d3, :Variable) + @test DD.name.(ar.axes) == (:time, :d2, :d3, :Variable) @test DD.lookup(ar.axes[4]) == ["Var1", "Var3"] end @testset "Dataset creation" begin From 1ac2dff147384605a4ead43c79a8fd7d4fd6f15f Mon Sep 17 00:00:00 2001 From: Lazaro Alonso Date: Fri, 13 Dec 2024 09:27:17 +0100 Subject: [PATCH 03/14] no Ti check --- src/Cubes/Cubes.jl | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/Cubes/Cubes.jl b/src/Cubes/Cubes.jl index 271bfa6a..ac1c2e6a 100644 --- a/src/Cubes/Cubes.jl +++ b/src/Cubes/Cubes.jl @@ -480,14 +480,7 @@ function Base.getindex(a::YAXArray, args::DD.Dimension...; kwargs...) for (k,v) in kwargsdict d = getAxis(k,a) if d !== nothing - if d isa DD.Ti - if v isa UnitRange{Int} - v = Date(first(v))..Date(last(v),12,31) - end - d2[:time] = v - else - d2[DD.name(d)] = v - end + d2[DD.name(d)] = v else d2[k] = v end From 273b8c9790d4400fe2c2347865a5557ce53dad49 Mon Sep 17 00:00:00 2001 From: Lazaro Alonso Date: Fri, 13 Dec 2024 12:26:50 +0100 Subject: [PATCH 04/14] use Dim{:time} --- docs/src/UserGuide/group.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/src/UserGuide/group.md b/docs/src/UserGuide/group.md index 11c50731..4101e21e 100644 --- a/docs/src/UserGuide/group.md +++ b/docs/src/UserGuide/group.md @@ -52,11 +52,11 @@ function weighted_seasons(ds) # calculate weights tempo = dims(ds, :time) month_length = YAXArray((tempo,), daysinmonth.(tempo)) - g_tempo = groupby(month_length, Ti => seasons(; start=December)) + g_tempo = groupby(month_length, Dim{:time} => seasons(; start=December)) sum_days = sum.(g_tempo, dims=:time) weights = map(./, g_tempo, sum_days) # unweighted seasons - g_ds = groupby(ds, Ti => seasons(; start=December)) + g_ds = groupby(ds, Dim{:time} => seasons(; start=December)) mean_g = mean.(g_ds, dims=:time) mean_g = dropdims.(mean_g, dims=:time) # weighted seasons @@ -74,7 +74,7 @@ end Now, we continue with the `groupby` operations as usual ````@ansi compareXarray -g_ds = groupby(ds, Ti => seasons(; start=December)) +g_ds = groupby(ds, Dim{:time} => seasons(; start=December)) ```` And the mean per season is calculated as follows @@ -113,7 +113,7 @@ month_length = YAXArray((tempo,), daysinmonth.(tempo)) Now group it by season ````@ansi compareXarray -g_tempo = groupby(month_length, Ti => seasons(; start=December)) +g_tempo = groupby(month_length, Dim{:time} => seasons(; start=December)) ```` Get the number of days per season @@ -181,9 +181,9 @@ with_theme(theme_ggplot2()) do fig = Figure(; size = (850,500)) axs = [Axis(fig[i,j], aspect=DataAspect()) for i in 1:3, j in 1:4] for (j, s) in enumerate(seasons_g) - hm_o = heatmap!(axs[1,j], mean_g[Ti=At(s)]; colorrange, lowclip, highclip, colormap) - hm_w = heatmap!(axs[2,j], weighted_g[Ti=At(s)]; colorrange, lowclip, highclip, colormap) - hm_d = heatmap!(axs[3,j], diff_g[Ti=At(s)]; colorrange=(-0.1,0.1), lowclip, highclip, + hm_o = heatmap!(axs[1,j], mean_g[time=At(s)]; colorrange, lowclip, highclip, colormap) + hm_w = heatmap!(axs[2,j], weighted_g[time=At(s)]; colorrange, lowclip, highclip, colormap) + hm_d = heatmap!(axs[3,j], diff_g[time=At(s)]; colorrange=(-0.1,0.1), lowclip, highclip, colormap=:diverging_bwr_20_95_c54_n256) end Colorbar(fig[1:2,5], hm_o, label=cb_label) From 7c99072331d197698f66ffac90a34ea314c09732 Mon Sep 17 00:00:00 2001 From: Lazaro Alonso Date: Sun, 15 Dec 2024 17:49:56 +0100 Subject: [PATCH 05/14] adds dims --- docs/src/UserGuide/group.md | 9 +++++---- src/YAXArrays.jl | 2 +- src/dims.jl | 28 ++++++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 5 deletions(-) create mode 100644 src/dims.jl diff --git a/docs/src/UserGuide/group.md b/docs/src/UserGuide/group.md index 4101e21e..c2500768 100644 --- a/docs/src/UserGuide/group.md +++ b/docs/src/UserGuide/group.md @@ -4,6 +4,7 @@ The following examples will use the `groupby` function to calculate temporal and ````@example compareXarray using YAXArrays, DimensionalData +using YAXArrays: YAXArrays as YAX using NetCDF using Downloads using Dates @@ -52,11 +53,11 @@ function weighted_seasons(ds) # calculate weights tempo = dims(ds, :time) month_length = YAXArray((tempo,), daysinmonth.(tempo)) - g_tempo = groupby(month_length, Dim{:time} => seasons(; start=December)) + g_tempo = groupby(month_length, YAX.time => seasons(; start=December)) sum_days = sum.(g_tempo, dims=:time) weights = map(./, g_tempo, sum_days) # unweighted seasons - g_ds = groupby(ds, Dim{:time} => seasons(; start=December)) + g_ds = groupby(ds, YAX.time => seasons(; start=December)) mean_g = mean.(g_ds, dims=:time) mean_g = dropdims.(mean_g, dims=:time) # weighted seasons @@ -74,7 +75,7 @@ end Now, we continue with the `groupby` operations as usual ````@ansi compareXarray -g_ds = groupby(ds, Dim{:time} => seasons(; start=December)) +g_ds = groupby(ds, YAX.time => seasons(; start=December)) ```` And the mean per season is calculated as follows @@ -113,7 +114,7 @@ month_length = YAXArray((tempo,), daysinmonth.(tempo)) Now group it by season ````@ansi compareXarray -g_tempo = groupby(month_length, Dim{:time} => seasons(; start=December)) +g_tempo = groupby(month_length, YAX.time => seasons(; start=December)) ```` Get the number of days per season diff --git a/src/YAXArrays.jl b/src/YAXArrays.jl index 8de4ed5e..500568ff 100644 --- a/src/YAXArrays.jl +++ b/src/YAXArrays.jl @@ -33,7 +33,7 @@ recalculate() = YAXDefaults.recal[] YAXdir() = YAXDefaults.workdir[] export YAXdir include("helpers.jl") - +include("dims.jl") include("YAXTools.jl") include("Cubes/Cubes.jl") include("DatasetAPI/Datasets.jl") diff --git a/src/dims.jl b/src/dims.jl new file mode 100644 index 00000000..cde346bd --- /dev/null +++ b/src/dims.jl @@ -0,0 +1,28 @@ +using DimensionalData: @dim, YDim, XDim, ZDim, TimeDim +export Lat, lat, latitude, Latitude +export rlat, lat_c +export Lon, lon, longitude, long, Longitude +export rlon, lon_c +export height, depth + +@dim Lat YDim "Latitude" +@dim lat YDim "Latitude" +@dim latitude YDim "Latitude" +@dim Latitude YDim "Latitude" +@dim rlat YDim +@dim lat_c YDim + +@dim Lon XDim "Longitude" +@dim lon XDim "Longitude" +@dim long XDim "Longitude" +@dim longitude XDim "Longitude" +@dim Longitude XDim "Longitude" +@dim rlon XDim +@dim lon_c XDim + +@dim height ZDim +@dim depth ZDim + +@dim time TimeDim "time" +@dim Time TimeDim "time" + From 2adb67c409c0b5454097ced51836c03bc55e794b Mon Sep 17 00:00:00 2001 From: Lazaro Alonso Date: Sun, 15 Dec 2024 18:48:02 +0100 Subject: [PATCH 06/14] add YAX --- README.md | 6 +++-- docs/src/UserGuide/combine.md | 12 ++++----- docs/src/UserGuide/compute.md | 46 +++++++++++++++++------------------ docs/src/UserGuide/create.md | 8 +++--- docs/src/UserGuide/faq.md | 12 ++++----- docs/src/get_started.md | 4 +-- src/dims.jl | 3 +-- test/Datasets/datasets.jl | 9 ++++--- 8 files changed, 51 insertions(+), 49 deletions(-) diff --git a/README.md b/README.md index 9ff6ef55..dc1f1539 100644 --- a/README.md +++ b/README.md @@ -107,9 +107,11 @@ using YAXArrays Let's assemble a `YAXArray` with 4 dimensions i.e. time, x,y and a variable dimension with two variables. ```julia -using YAXArrays, DimensionalData +using YAXArrays: YAXArrays as YAX, YAXArrays +using DimensionalData + axlist = ( - Dim{:time}(range(1, 20, length=20)), + YAX.time(range(1, 20, length=20)), X(range(1, 10, length=10)), Y(range(1, 5, length=15)), Dim{:Variable}(["var1", "var2"])) diff --git a/docs/src/UserGuide/combine.md b/docs/src/UserGuide/combine.md index 73bde3c5..33ee037b 100644 --- a/docs/src/UserGuide/combine.md +++ b/docs/src/UserGuide/combine.md @@ -10,10 +10,10 @@ We glue the arrays along the first dimension using `dims = 1`: The resulting array `whole_year` still has one dimension, i.e. time, but with 12 instead of 6 elements. ````@example cat -using YAXArrays +using YAXArrays: YAXArrays as YAX, YAXArrays -first_half = YAXArray((Dim{:time}(1:6),), rand(6)) -second_half = YAXArray((Dim{:time}(7:12),), rand(6)) +first_half = YAXArray((YAX.time(1:6),), rand(6)) +second_half = YAXArray((YAX.time(7:12),), rand(6)) whole_year = cat(first_half, second_half, dims = 1) ```` @@ -24,10 +24,10 @@ The resulting array `combined` has an additional dimension `variable` indicating Note that using a `Dataset` instead is a more flexible approach in handling different variables. ````@example concatenatecubes -using YAXArrays +using YAXArrays: YAXArrays as YAX, YAXArrays -temperature = YAXArray((Dim{:time}(1:6),), rand(6)) -precipitation = YAXArray((Dim{:time}(1:6),), rand(6)) +temperature = YAXArray((YAX.time(1:6),), rand(6)) +precipitation = YAXArray((YAX.time(1:6),), rand(6)) cubes = [temperature,precipitation] var_axis = Dim{:variable}(["temp", "prep"]) combined = concatenatecubes(cubes, var_axis) diff --git a/docs/src/UserGuide/compute.md b/docs/src/UserGuide/compute.md index 89229c31..3510b2c2 100644 --- a/docs/src/UserGuide/compute.md +++ b/docs/src/UserGuide/compute.md @@ -11,13 +11,13 @@ This section describes how to create new YAXArrays by performing operations on t Let's start by creating an example dataset: ````@example compute -using YAXArrays +using YAXArrays: YAXArrays as YAX, YAXArrays using Dates axlist = ( - Dim{:time}(Date("2022-01-01"):Day(1):Date("2022-01-30")), - Dim{:lon}(range(1, 10, length=10)), - Dim{:lat}(range(1, 5, length=15)), + YAX.time(Date("2022-01-01"):Day(1):Date("2022-01-30")), + lon(range(1, 10, length=10)), + lat(range(1, 5, length=15)), ) data = rand(30, 10, 15) properties = Dict(:origin => "user guide") @@ -102,7 +102,7 @@ Dimensions may be added or removed. Here, we will define a simple function, that will take as input several `YAXArrays`. But first, let's load the necessary packages. ````@example mapCube -using YAXArrays, Zarr +using YAXArrays: YAXArrays as YAX, YAXArrays, Zarr using Dates ```` @@ -130,15 +130,15 @@ Note the `.` after `f`, this is because we will slice across time, namely, the f Here, we do create `YAXArrays` only with the desired dimensions as ````@ansi mapCube -lon = YAXArray(Dim{:lon}(range(1, 15))) -lat = YAXArray(Dim{:lat}(range(1, 10))) +lon = YAXArray(lon(range(1, 15))) +lat = YAXArray(lat(range(1, 10))) ```` And a time Cube's Axis ````@example mapCube tspan = Date("2022-01-01"):Day(1):Date("2022-01-30") -time = YAXArray(Dim{:time}(tspan)) +time = YAXArray(YAX.time(tspan)) ```` note that the following can be extended to arbitrary `YAXArrays` with additional data and dimensions. @@ -197,14 +197,14 @@ which outputs the same as the `gen_cube.data[1, :, :]` called above. Here, we will consider different scenarios, namely how we deal with different input cubes and how to specify the output ones. We will illustrate this with the following test example and the subsequent function definitions. ````@example outdims -using YAXArrays, Dates +using YAXArrays: YAXArrays as YAX, YAXArrays, Dates using Zarr using Random axlist = ( - Dim{:time}(Date("2022-01-01"):Day(1):Date("2022-01-05")), - Dim{:lon}(range(1, 4, length=4)), - Dim{:lat}(range(1, 3, length=3)), + YAX.time(Date("2022-01-01"):Day(1):Date("2022-01-05")), + lon(range(1, 4, length=4)), + lat(range(1, 3, length=3)), Dim{:variables}(["a", "b"]) ) @@ -308,7 +308,7 @@ Here, the goal is to operate at the pixel level (longitude, latitude), and then Random.seed!(123) data = rand(3.0:5.0, 5, 4, 3) -axlist = (Dim{:lon}(1:4), Dim{:lat}(1:3), Dim{:depth}(1:7),) +axlist = (lon(1:4), lat(1:3), Dim{:depth}(1:7),) yax_2d = YAXArray(axlist, rand(-3.0:0.0, 4, 3, 7)) ```` @@ -318,8 +318,8 @@ and Random.seed!(123) data = rand(3.0:5.0, 5, 4, 3) -axlist = (Dim{:time}(Date("2022-01-01"):Day(1):Date("2022-01-05")), - Dim{:lon}(1:4), Dim{:lat}(1:3),) +axlist = (YAX.time(Date("2022-01-01"):Day(1):Date("2022-01-05")), + lon(1:4), lat(1:3),) properties = Dict("description" => "multi dimensional test cube") yax_test = YAXArray(axlist, data, properties) @@ -358,14 +358,14 @@ Here we transform a raster array with spatial dimension lat and lon into a vecto First, create the raster array: ````@example compute_mapcube -using YAXArrays +using YAXArrays: YAXArrays as YAX, YAXArrays using DimensionalData using Dates axlist = ( - Dim{:time}(Date("2022-01-01"):Day(1):Date("2022-01-30")), - Dim{:lon}(range(1, 10, length=10)), - Dim{:lat}(range(1, 5, length=15)), + YAX.time(Date("2022-01-01"):Day(1):Date("2022-01-30")), + lon(range(1, 10, length=10)), + lat(range(1, 5, length=15)), ) data = rand(30, 10, 15) raster_arr = YAXArray(axlist, data) @@ -431,14 +431,14 @@ For example, we can execute each date of a time series in a different CPU thread The following code does a time mean over all grid points using multiple CPUs of a local machine: ````julia -using YAXArrays +using YAXArrays: YAXArrays as YAX, YAXArrays using Dates using Distributed axlist = ( - Dim{:time}(Date("2022-01-01"):Day(1):Date("2022-01-30")), - Dim{:lon}(range(1, 10, length=10)), - Dim{:lat}(range(1, 5, length=15)), + YAX.time(Date("2022-01-01"):Day(1):Date("2022-01-30")), + lon(range(1, 10, length=10)), + lat(range(1, 5, length=15)), ) data = rand(30, 10, 15) properties = Dict(:origin => "user guide") diff --git a/docs/src/UserGuide/create.md b/docs/src/UserGuide/create.md index 835c7436..b2ee4771 100644 --- a/docs/src/UserGuide/create.md +++ b/docs/src/UserGuide/create.md @@ -7,7 +7,7 @@ This section describes how to create arrays and datasets by filling values direc We can create a new YAXArray by filling the values directly: ````@example create -using YAXArrays +using YAXArrays: YAXArrays as YAX, YAXArrays a1 = YAXArray(rand(10, 20, 5)) ```` @@ -18,9 +18,9 @@ We can also specify the dimensions with custom names enabling easier access: using Dates axlist = ( - Dim{:time}(Date("2022-01-01"):Day(1):Date("2022-01-30")), - Dim{:lon}(range(1, 10, length=10)), - Dim{:lat}(range(1, 5, length=15)), + YAX.time(Date("2022-01-01"):Day(1):Date("2022-01-30")), + lon(range(1, 10, length=10)), + lat(range(1, 5, length=15)), ) data2 = rand(30, 10, 15) properties = Dict(:origin => "user guide") diff --git a/docs/src/UserGuide/faq.md b/docs/src/UserGuide/faq.md index cd8d394e..4fc0f650 100644 --- a/docs/src/UserGuide/faq.md +++ b/docs/src/UserGuide/faq.md @@ -80,11 +80,11 @@ It is possible to concatenate several cubes that shared the same dimensions usin Let's create two dummy cubes ````@example howdoi -using YAXArrays +using YAXArrays: YAXArrays as YAX, YAXArrays axlist = ( - Dim{:time}(range(1, 20, length=20)), - Dim{:lon}(range(1, 10, length=10)), - Dim{:lat}(range(1, 5, length=15)) + YAX.time(range(1, 20, length=20)), + lon(range(1, 10, length=10)), + lat(range(1, 5, length=15)) ) data1 = rand(20, 10, 15) @@ -324,7 +324,7 @@ You will not be able to save this dataset, first you will need to rename those ` In this section we will use `MarketData.jl` and `TimeSeries.jl` to simulate some stocks. ````@example howdoi -using YAXArrays, DimensionalData +using YAXArrays: YAXArrays as YAX, YAXArrays, DimensionalData using MarketData, TimeSeries stocks = Dict(:Stock1 => random_ohlcv(), :Stock2 => random_ohlcv(), :Stock3 => random_ohlcv()) @@ -334,7 +334,7 @@ d_keys = keys(stocks) currently there is not direct support to obtain `dims` from a `TimeArray`, but we can code a function for it ````@example howdoi -getTArrayAxes(ta::TimeArray) = (Dim{:time}(timestamp(ta)), Dim{:variable}(colnames(ta)), ); +getTArrayAxes(ta::TimeArray) = (YAX.time(timestamp(ta)), Dim{:variable}(colnames(ta)), ); nothing # hide ```` then, we create the `YAXArrays` as diff --git a/docs/src/get_started.md b/docs/src/get_started.md index 7a885704..e3b4bf70 100644 --- a/docs/src/get_started.md +++ b/docs/src/get_started.md @@ -19,7 +19,7 @@ import Pkg; Pkg.add("YAXArrays") Create a simple array from random numbers given the size of each dimension or axis: ```@example quickstart -using YAXArrays +using YAXArrays: YAXArrays as YAX, YAXArrays a = YAXArray(rand(2,3)) ``` @@ -31,7 +31,7 @@ using DimensionalData # axes or dimensions with name and tick values axlist = ( - Dim{:time}(range(1, 20, length=20)), + YAX.time(range(1, 20, length=20)), X(range(1, 10, length=10)), Y(range(1, 5, length=15)), Dim{:variable}(["temperature", "precipitation"]) diff --git a/src/dims.jl b/src/dims.jl index cde346bd..10fb78da 100644 --- a/src/dims.jl +++ b/src/dims.jl @@ -24,5 +24,4 @@ export height, depth @dim depth ZDim @dim time TimeDim "time" -@dim Time TimeDim "time" - +@dim Time TimeDim "time" \ No newline at end of file diff --git a/test/Datasets/datasets.jl b/test/Datasets/datasets.jl index 21493d21..6d69d2b0 100644 --- a/test/Datasets/datasets.jl +++ b/test/Datasets/datasets.jl @@ -416,7 +416,8 @@ end end @testset "Saving, OutDims" begin - using YAXArrays, Zarr, NetCDF, ArchGDAL + using YAXArrays: YAXArrays as YAX, YAXArrays + using Zarr, NetCDF, ArchGDAL using Dates flolat(lo, la, t) = (lo + la + Dates.dayofyear(t)) @@ -430,10 +431,10 @@ end xout .= flola.(lo, la) end - lon = YAXArray(Dim{:lon}(range(1, 15))) - lat = YAXArray(Dim{:lat}(range(1, 10))) + lon = YAXArray(lon(range(1, 15))) + lat = YAXArray(lat(range(1, 10))) tspan = Date("2022-01-01"):Day(1):Date("2022-01-30") - time = YAXArray(Dim{:time}(tspan)) + time = YAXArray(YAX.time(tspan)) properties = Dict{String, Any}("name" => "out_array") From b19dad38f74113a24973243a06cf7c697afec9eb Mon Sep 17 00:00:00 2001 From: Lazaro Alonso Date: Sun, 15 Dec 2024 19:38:40 +0100 Subject: [PATCH 07/14] adds dim Variables --- README.md | 6 ++--- docs/src/UserGuide/combine.md | 6 +++-- docs/src/UserGuide/compute.md | 16 ++++++++---- docs/src/UserGuide/convert.md | 4 +-- docs/src/UserGuide/create.md | 4 ++- docs/src/UserGuide/faq.md | 14 ++++++---- docs/src/get_started.md | 3 ++- .../esdl/examples_from_esdl_study_1.jl | 4 +-- .../esdl/examples_from_esdl_study_2.jl | 4 +-- .../esdl/examples_from_esdl_study_3.jl | 4 +-- docs/src/tutorials/mean_seasonal_cycle.md | 2 +- docs/src/tutorials/plottingmaps.md | 2 +- src/Cubes/TransformedCubes.jl | 4 +-- src/DatasetAPI/Datasets.jl | 26 +++++++++---------- src/dims.jl | 4 ++- test/DAT/DAT.jl | 2 +- test/DAT/MovingWindow.jl | 14 +++++----- test/Datasets/datasets.jl | 19 +++++++------- test/dimarray.jl | 26 +++++++++---------- test/runtests.jl | 1 + 20 files changed, 92 insertions(+), 73 deletions(-) diff --git a/README.md b/README.md index dc1f1539..f6dae575 100644 --- a/README.md +++ b/README.md @@ -107,14 +107,14 @@ using YAXArrays Let's assemble a `YAXArray` with 4 dimensions i.e. time, x,y and a variable dimension with two variables. ```julia -using YAXArrays: YAXArrays as YAX, YAXArrays -using DimensionalData +using YAXArrays +using YAXArrays: YAXArrays as YAX axlist = ( YAX.time(range(1, 20, length=20)), X(range(1, 10, length=10)), Y(range(1, 5, length=15)), - Dim{:Variable}(["var1", "var2"])) + Variables(["var1", "var2"])) ``` ``` ↓ time 1.0:1.0:20.0, diff --git a/docs/src/UserGuide/combine.md b/docs/src/UserGuide/combine.md index 33ee037b..71a8e306 100644 --- a/docs/src/UserGuide/combine.md +++ b/docs/src/UserGuide/combine.md @@ -10,7 +10,8 @@ We glue the arrays along the first dimension using `dims = 1`: The resulting array `whole_year` still has one dimension, i.e. time, but with 12 instead of 6 elements. ````@example cat -using YAXArrays: YAXArrays as YAX, YAXArrays +using YAXArrays +using YAXArrays: YAXArrays as YAX first_half = YAXArray((YAX.time(1:6),), rand(6)) second_half = YAXArray((YAX.time(7:12),), rand(6)) @@ -24,7 +25,8 @@ The resulting array `combined` has an additional dimension `variable` indicating Note that using a `Dataset` instead is a more flexible approach in handling different variables. ````@example concatenatecubes -using YAXArrays: YAXArrays as YAX, YAXArrays +using YAXArrays +using YAXArrays: YAXArrays as YAX temperature = YAXArray((YAX.time(1:6),), rand(6)) precipitation = YAXArray((YAX.time(1:6),), rand(6)) diff --git a/docs/src/UserGuide/compute.md b/docs/src/UserGuide/compute.md index 3510b2c2..66bbb090 100644 --- a/docs/src/UserGuide/compute.md +++ b/docs/src/UserGuide/compute.md @@ -11,7 +11,8 @@ This section describes how to create new YAXArrays by performing operations on t Let's start by creating an example dataset: ````@example compute -using YAXArrays: YAXArrays as YAX, YAXArrays +using YAXArrays +using YAXArrays: YAXArrays as YAX using Dates axlist = ( @@ -102,7 +103,8 @@ Dimensions may be added or removed. Here, we will define a simple function, that will take as input several `YAXArrays`. But first, let's load the necessary packages. ````@example mapCube -using YAXArrays: YAXArrays as YAX, YAXArrays, Zarr +using YAXArrays, Zarr +using YAXArrays: YAXArrays as YAX using Dates ```` @@ -197,7 +199,9 @@ which outputs the same as the `gen_cube.data[1, :, :]` called above. Here, we will consider different scenarios, namely how we deal with different input cubes and how to specify the output ones. We will illustrate this with the following test example and the subsequent function definitions. ````@example outdims -using YAXArrays: YAXArrays as YAX, YAXArrays, Dates +using YAXArrays +using YAXArrays: YAXArrays as YAX +using Dates using Zarr using Random @@ -358,7 +362,8 @@ Here we transform a raster array with spatial dimension lat and lon into a vecto First, create the raster array: ````@example compute_mapcube -using YAXArrays: YAXArrays as YAX, YAXArrays +using YAXArrays +using YAXArrays: YAXArrays as YAX using DimensionalData using Dates @@ -431,7 +436,8 @@ For example, we can execute each date of a time series in a different CPU thread The following code does a time mean over all grid points using multiple CPUs of a local machine: ````julia -using YAXArrays: YAXArrays as YAX, YAXArrays +using YAXArrays +using YAXArrays: YAXArrays as YAX using Dates using Distributed diff --git a/docs/src/UserGuide/convert.md b/docs/src/UserGuide/convert.md index 4c3684cc..883d0516 100644 --- a/docs/src/UserGuide/convert.md +++ b/docs/src/UserGuide/convert.md @@ -34,7 +34,7 @@ m2 = collect(a.data) A `Raster` as defined in [Rasters.jl](https://rafaqz.github.io/Rasters.jl/stable/) has a same supertype of a `YAXArray`, i.e. `AbstractDimArray`, allowing easy conversion between those types: -````@example convert +````julia using Rasters lon, lat = X(25:1:30), Y(25:1:30) @@ -43,7 +43,7 @@ ras = Raster(rand(lon, lat, time)) a = YAXArray(dims(ras), ras.data) ```` -````@example convert +````julia ras2 = Raster(a) ```` diff --git a/docs/src/UserGuide/create.md b/docs/src/UserGuide/create.md index b2ee4771..e5aa8eed 100644 --- a/docs/src/UserGuide/create.md +++ b/docs/src/UserGuide/create.md @@ -7,7 +7,9 @@ This section describes how to create arrays and datasets by filling values direc We can create a new YAXArray by filling the values directly: ````@example create -using YAXArrays: YAXArrays as YAX, YAXArrays +using YAXArrays +using YAXArrays: YAXArrays as YAX + a1 = YAXArray(rand(10, 20, 5)) ```` diff --git a/docs/src/UserGuide/faq.md b/docs/src/UserGuide/faq.md index 4fc0f650..59f0d0a1 100644 --- a/docs/src/UserGuide/faq.md +++ b/docs/src/UserGuide/faq.md @@ -80,7 +80,9 @@ It is possible to concatenate several cubes that shared the same dimensions usin Let's create two dummy cubes ````@example howdoi -using YAXArrays: YAXArrays as YAX, YAXArrays +using YAXArrays +using YAXArrays: YAXArrays as YAX + axlist = ( YAX.time(range(1, 20, length=20)), lon(range(1, 10, length=10)), @@ -125,7 +127,7 @@ t = Date("2020-01-01"):Month(1):Date("2022-12-31") create YAXArray axes ```@example howdoi -axes = (Dim{:Lon}(1:10), Dim{:Lat}(1:10), Dim{:Time}(t)) +axes = (Lon(1:10), Lat(1:10), YAX.Time(t)) ``` create the YAXArray @@ -169,7 +171,7 @@ using Dates # To generate the dates of the time axis using DimensionalData # To use the "Between" option for selecting data t = Date("2020-01-01"):Month(1):Date("2022-12-31") -axes = (Dim{:Lon}(1:10), Dim{:Lat}(1:10), Dim{:Time}(t)) +axes = (Lon(1:10), Lat(1:10), YAX.Time(t)) var1 = YAXArray(axes, reshape(1:3600, (10, 10, 36))) var2 = YAXArray(axes, reshape((1:3600)*5, (10, 10, 36))) @@ -195,7 +197,7 @@ using DimensionalData # To use the "Between" selector for selecting data t = Date("2020-01-01"):Month(1):Date("2022-12-31") common_axis = Dim{:points}(1:100) -time_axis = Dim{:Time}(t) +time_axis = YAX.Time(t) # Note that longitudes and latitudes are not dimensions, but YAXArrays longitudes = YAXArray((common_axis,), rand(1:369, 100)) # 100 random values taken from 1 to 359 @@ -324,7 +326,9 @@ You will not be able to save this dataset, first you will need to rename those ` In this section we will use `MarketData.jl` and `TimeSeries.jl` to simulate some stocks. ````@example howdoi -using YAXArrays: YAXArrays as YAX, YAXArrays, DimensionalData +using YAXArrays +using YAXArrays: YAXArrays as YAX +using DimensionalData using MarketData, TimeSeries stocks = Dict(:Stock1 => random_ohlcv(), :Stock2 => random_ohlcv(), :Stock3 => random_ohlcv()) diff --git a/docs/src/get_started.md b/docs/src/get_started.md index e3b4bf70..51cb70fc 100644 --- a/docs/src/get_started.md +++ b/docs/src/get_started.md @@ -19,7 +19,8 @@ import Pkg; Pkg.add("YAXArrays") Create a simple array from random numbers given the size of each dimension or axis: ```@example quickstart -using YAXArrays: YAXArrays as YAX, YAXArrays +using YAXArrays +using YAXArrays: YAXArrays as YAX a = YAXArray(rand(2,3)) ``` diff --git a/docs/src/tutorials/esdl/examples_from_esdl_study_1.jl b/docs/src/tutorials/esdl/examples_from_esdl_study_1.jl index c2240d63..601aef97 100644 --- a/docs/src/tutorials/esdl/examples_from_esdl_study_1.jl +++ b/docs/src/tutorials/esdl/examples_from_esdl_study_1.jl @@ -190,7 +190,7 @@ function zonal_polar_plot(d_msc_lat, sbp, it, vari, lab) for j = it jj = convert(Int, j) try - var_idx = findall(vari .== getAxis("Variable", d_msc_lat).values)[1] + var_idx = findall(vari .== getAxis("Variables", d_msc_lat).values)[1] ts = d_msc_lat[:, jj, var_idx] va = [ts; ts[1]] ## correction for temperature @@ -216,7 +216,7 @@ L = collect(getAxis("lat", caxes(cube_msc_lat)).values) sbps = 321:2:332 labtoshow = ["a)", "b)", "c)", "d)", "e)", "f)"] -vari = getAxis("Variable", caxes(cube_msc_lat)).values +vari = getAxis("Variables", caxes(cube_msc_lat)).values for (sbp, lab, vari) in zip(sbps, labtoshow, vari) it1 = range(72/2, stop = 1, step = -2) diff --git a/docs/src/tutorials/esdl/examples_from_esdl_study_2.jl b/docs/src/tutorials/esdl/examples_from_esdl_study_2.jl index 0732b5aa..18d0e519 100644 --- a/docs/src/tutorials/esdl/examples_from_esdl_study_2.jl +++ b/docs/src/tutorials/esdl/examples_from_esdl_study_2.jl @@ -182,7 +182,7 @@ end # We first apply the function `cube_decomp` to the standard data cube with the threshold of 95% of retained variance. As we see from the description of the atomic function above, we need as minimum input dimension `Time` and `Variable`. We call the output cube `cube_int_dim`, which efficiently is a map. -cube_int_dim = mapslices(sufficient_dimensions, cube_fill_itp, 0.95, dims = ("Time","Variable")) +cube_int_dim = mapslices(sufficient_dimensions, cube_fill_itp, 0.95, dims = ("Time","Variables")) # Saving intermediate results can save CPU later, not needed to guarantee reproducibility tough # `savecube(cube_int_dim, "../data/IntDim", overwrite=true)` @@ -199,7 +199,7 @@ cube_int_dim = mapslices(sufficient_dimensions, cube_fill_itp, 0.95, dims = ("Ti # f_{\{time, var\}}^{\{\}} : \mathcal{C}(\{lat, lon, time, var, freq\})\rightarrow \mathcal{C}(\{lat, lon, freq\}) # \end{equation} -cube_int_dim_dec = mapslices(sufficient_dimensions, cube_decomp, 0.95, dims = ("Time","Variable")) +cube_int_dim_dec = mapslices(sufficient_dimensions, cube_decomp, 0.95, dims = ("Time","Variables")) # for saving the output please use the command line below # `savecube(cube_int_dim_dec, "../data/IntDimDec", overwrite=true)` diff --git a/docs/src/tutorials/esdl/examples_from_esdl_study_3.jl b/docs/src/tutorials/esdl/examples_from_esdl_study_3.jl index ece85930..eebd851e 100644 --- a/docs/src/tutorials/esdl/examples_from_esdl_study_3.jl +++ b/docs/src/tutorials/esdl/examples_from_esdl_study_3.jl @@ -78,7 +78,7 @@ world_τ = map(tair -> (tair - Float32(273.15+15))/10, world_tair) world_ρ = map(log, world_resp) # ... and we combine them into a Data Cube again using `concatenatecubes` -world_new = concatenatecubes([world_τ, world_ρ], Dim{:Variable}(["τ","ρ"])) +world_new = concatenatecubes([world_τ, world_ρ], Variables(["τ","ρ"])) # First we need a function for time-series filtering. Using a moving average filter is the simplest way @@ -237,7 +237,7 @@ ds = concatenatecubes([world_tair, rb_scape_reshaped], Dim{:Variables}(["tair", # And compute the correlation between Air temperature and Base respiration -cor_tair_rb = mapslices(i->cor(eachcol(i)...),ds, dims=("Time","Variable")) +cor_tair_rb = mapslices(i->cor(eachcol(i)...),ds, dims=("Time","Variables")) q10_diff = map((x,y)->x-y, q10_direct, q10_scape) crange = (-1,1) diff --git a/docs/src/tutorials/mean_seasonal_cycle.md b/docs/src/tutorials/mean_seasonal_cycle.md index aab984e9..36c64ff6 100644 --- a/docs/src/tutorials/mean_seasonal_cycle.md +++ b/docs/src/tutorials/mean_seasonal_cycle.md @@ -36,7 +36,7 @@ fig ````@ansi mean_season using YAXArrays, DimensionalData -axes = (Dim{:Time}(t),) +axes = (YAX.Time(t),) c = YAXArray(axes, var) ```` diff --git a/docs/src/tutorials/plottingmaps.md b/docs/src/tutorials/plottingmaps.md index 7c5222e4..a71f4480 100644 --- a/docs/src/tutorials/plottingmaps.md +++ b/docs/src/tutorials/plottingmaps.md @@ -22,7 +22,7 @@ nothing # hide Subset, first time step ````@ansi plots -ct1_slice = c[Ti = Near(Date("2015-01-01"))]; +ct1_slice = c[time = Near(Date("2015-01-01"))]; nothing # hide ```` diff --git a/src/Cubes/TransformedCubes.jl b/src/Cubes/TransformedCubes.jl index 110fefca..3cd678a5 100644 --- a/src/Cubes/TransformedCubes.jl +++ b/src/Cubes/TransformedCubes.jl @@ -63,7 +63,7 @@ function concatenatecubes(; kwargs...) push!(cubenames, string(n)) end cubes = map(i -> i[2], collect(kwargs)) - findAxis("Variable", cubes[1]) === nothing || + findAxis("Variables", cubes[1]) === nothing || error("Input cubes must not contain a variable kwarg concatenation") - concatenateCubes(cubes, CategoricalAxis("Variable", cubenames)) + concatenateCubes(cubes, CategoricalAxis("Variables", cubenames)) end diff --git a/src/DatasetAPI/Datasets.jl b/src/DatasetAPI/Datasets.jl index e3de1f1f..74eddb47 100644 --- a/src/DatasetAPI/Datasets.jl +++ b/src/DatasetAPI/Datasets.jl @@ -43,7 +43,7 @@ function Dataset(; properties = Dict{String,Any}(), cubes...) end """ -to_dataset(c;datasetaxis = "Variable", layername = "layer") +to_dataset(c;datasetaxis = "Variables", layername = "layer") Convert a Data Cube into a Dataset. It is possible to treat one of the Cube's axes as a "DatasetAxis" i.e. the cube will be split into @@ -51,7 +51,7 @@ different parts that become variables in the Dataset. If no such axis is specified or found, there will only be a single variable in the dataset with the name `layername` """ -function to_dataset(c;datasetaxis = "Variable", layername = get(c.properties,"name","layer")) +function to_dataset(c;datasetaxis = "Variables", layername = get(c.properties,"name","layer")) axlist = DD.dims(c) splice_generic(x::AbstractArray, i) = [x[1:(i-1)]; x[(i+1:end)]] splice_generic(x::Tuple, i) = (x[1:(i-1)]..., x[(i+1:end)]...) @@ -372,20 +372,20 @@ end Opens and concatenates a list of dataset paths along the dimension specified in `files`. This method can be used when the generic glob-based version of open_mfdataset fails or is too slow. -For example, to concatenate a list of annual NetCDF files along the `Ti` dimension, +For example, to concatenate a list of annual NetCDF files along the `time` dimension, one can use: ````julia files = ["1990.nc","1991.nc","1992.nc"] -open_mfdataset(DD.DimArray(files,DD.Ti())) +open_mfdataset(DD.DimArray(files, YAX.time())) ```` alternatively, if the dimension to concatenate along does not exist yet, the dimension provided in the input arg is used: ````julia -files = ["a.nc","b.nc","c.nc"] -open_mfdataset(DD.DimArray(files,DD.Dim{:NewDim}(["a","b","c"]))) +files = ["a.nc", "b.nc", "c.nc"] +open_mfdataset(DD.DimArray(files, DD.Dim{:NewDim}(["a","b","c"]))) ```` """ function open_mfdataset(vec::DD.DimVector{<:AbstractString};kwargs...) @@ -457,14 +457,14 @@ end YAXDataset(; kwargs...) = Dataset(YAXArrays.YAXDefaults.cubedir[]; kwargs...) -to_array(ds::Dataset; joinname = "Variable") = Cube(ds;joinname) +to_array(ds::Dataset; joinname = "Variables") = Cube(ds;joinname) """ - Cube(ds::Dataset; joinname="Variable") + Cube(ds::Dataset; joinname="Variables") Construct a single YAXArray from the dataset `ds` by concatenating the cubes in the datset on the `joinname` dimension. """ -function Cube(ds::Dataset; joinname = "Variable", target_type = nothing) +function Cube(ds::Dataset; joinname = "Variables", target_type = nothing) dl = collect(keys(ds.axes)) dls = string.(dl) @@ -732,7 +732,7 @@ Save a [`YAXArray`](@ref) to the `path`. The keyword arguments are: * `name`: -* `datasetaxis="Variable"` special treatment of a categorical axis that gets written into separate zarr arrays +* `datasetaxis="Variables"` special treatment of a categorical axis that gets written into separate zarr arrays * `max_cache`: The number of bits that are used as cache for the data handling. * `backend`: The backend, that is used to save the data. Falls back to searching the backend according to the extension of the path. * `driver`: The same setting as `backend`. @@ -744,7 +744,7 @@ function savecube( c, path::AbstractString; layername = get(c.properties,"name","layer"), - datasetaxis = "Variable", + datasetaxis = "Variables", max_cache = 5e8, backend = :all, driver = backend, @@ -783,7 +783,7 @@ function createdataset(DS::Type,axlist; kwargs...) * `properties=Dict{String,Any}()` additional cube properties * `globalproperties=Dict{String,Any}` global attributes to be added to the dataset * `fillvalue= T>:Missing ? defaultfillval(Base.nonmissingtype(T)) : nothing` fill value - * `datasetaxis="Variable"` special treatment of a categorical axis that gets written into separate zarr arrays + * `datasetaxis="Variables"` special treatment of a categorical axis that gets written into separate zarr arrays * `layername="layer"` Fallback name of the variable stored in the dataset if no `datasetaxis` is found """ function createdataset( @@ -797,7 +797,7 @@ function createdataset( overwrite::Bool = false, properties = Dict{String,Any}(), globalproperties = Dict{String,Any}(), - datasetaxis = "Variable", + datasetaxis = "Variables", layername = get(properties, "name", "layer"), kwargs..., ) diff --git a/src/dims.jl b/src/dims.jl index 10fb78da..4f862eaf 100644 --- a/src/dims.jl +++ b/src/dims.jl @@ -24,4 +24,6 @@ export height, depth @dim depth ZDim @dim time TimeDim "time" -@dim Time TimeDim "time" \ No newline at end of file +@dim Time TimeDim "time" + +@dim Variables \ No newline at end of file diff --git a/test/DAT/DAT.jl b/test/DAT/DAT.jl index 3ba66ac0..f3a9e1fc 100644 --- a/test/DAT/DAT.jl +++ b/test/DAT/DAT.jl @@ -9,7 +9,7 @@ m[4,1,:] .= 10 lon = X(1:4) lat = Y(1:2) - tim = Ti(1:10) + tim = YAX.time(1:10) c = YAXArray((lon, lat, tim), m) indims = InDims("Time") outdims = OutDims() diff --git a/test/DAT/MovingWindow.jl b/test/DAT/MovingWindow.jl index a2eef852..5bd9406a 100644 --- a/test/DAT/MovingWindow.jl +++ b/test/DAT/MovingWindow.jl @@ -1,9 +1,9 @@ @testset "MovingWindow" begin using Zarr a = Array{Union{Float64,Missing}}(rand(40, 20, 10)) - lon = Dim{:Lon}(1:40) - lat = Dim{:Lat}(1:20) - tim = Dim{:Time}(1:10) + lon = Lon(1:40) + lat = Lat(1:20) + tim = YAX.Time(1:10) c = YAXArray((lon, lat, tim), a) d = tempname() # Why is this done in the Moving Window testset? @@ -44,10 +44,10 @@ @test all(ismissing, r3.data[:, :, end]) a = Array{Union{Float64,Missing}}(rand(10,4, 40, 20)); - varax = Dim{:Variable}('a':'d') - tim = Dim{:Time}(1:10) - lon = Dim{:Lon}(1:40) - lat = Dim{:Lat}(1:20) + varax = Variables('a':'d') + tim = YAX.Time(1:10) + lon = Lon(1:40) + lat = Lat(1:20) c = YAXArray((tim, varax, lon,lat), a) indims = InDims("Time",YAXArrays.MovingWindow("Lon",1,1)) r1 = mapCube(c, indims=indims, outdims=OutDims("Time")) do xout,xin diff --git a/test/Datasets/datasets.jl b/test/Datasets/datasets.jl index 6d69d2b0..76fc21e4 100644 --- a/test/Datasets/datasets.jl +++ b/test/Datasets/datasets.jl @@ -4,13 +4,13 @@ using DimensionalData: DimensionalData as DD using Dates -@testset "Datasets axes Ti" begin +@testset "Datasets axes time" begin using Zarr, NetCDF ## first example data = [rand(4, 5, 12), rand(4, 5, 12), rand(4, 5)] - # dim_time = DD.Dim{:Time}(Date(2001, 1, 15):Month(1):Date(2001, 12, 15)) - dim_time = Ti(Date(2001, 1, 15):Month(1):Date(2001, 12, 15)) + # dim_time = YAX.Time(Date(2001, 1, 15):Month(1):Date(2001, 12, 15)) + dim_time = YAX.time(Date(2001, 1, 15):Month(1):Date(2001, 12, 15)) axlist1 = ( DD.Dim{:XVals}(1.0:4.0), DD.Dim{:YVals}([1, 2, 3, 4, 5]), @@ -58,7 +58,7 @@ end axlist1 = ( DD.Dim{:XVals}(1.0:4.0), DD.Dim{:YVals}([1, 2, 3, 4, 5]), - DD.Dim{:Time}(Date(2001, 1, 15):Month(1):Date(2001, 12, 15)), + YAX.Time(Date(2001, 1, 15):Month(1):Date(2001, 12, 15)), ) axlist2 = (DD.Dim{:XVals}(1.0:4.0), DD.Dim{:YVals}([1, 2, 3, 4, 5])) props = [Dict("att$i" => i) for i = 1:3] @@ -224,8 +224,8 @@ end end @testset "Dataset creation" begin al = ( - DD.Dim{:Time}(Date(2001):Month(1):Date(2001, 12, 31)), - DD.Dim{:Variable}(["A", "B"]), + YAX.Time(Date(2001):Month(1):Date(2001, 12, 31)), + Variables(["A", "B"]), DD.Dim{:Xvals}(1:10), ) # Basic @@ -416,7 +416,8 @@ end end @testset "Saving, OutDims" begin - using YAXArrays: YAXArrays as YAX, YAXArrays + using YAXArrays + using YAXArrays: YAXArrays as YAX using Zarr, NetCDF, ArchGDAL using Dates @@ -548,14 +549,14 @@ end savecube(array1,f1) savecube(array2,f2) - ds = open_mfdataset(DD.DimArray([f1,f2],(DD.Ti(1:2),))) + ds = open_mfdataset(DD.DimArray([f1,f2],(YAX.time(1:2),))) @test ds.layer.data[:,:,1] == array1 @test ds.layer.data[:,:,2] == array2 td = mktempdir() f1, f2 = joinpath.(td,("file_1.nc","file_2.nc")) - td1, td2 = DD.Ti(1:2), DD.Ti(3:4) + td1, td2 = YAX.time(1:2), YAX.time(3:4) a1,a2 = rand(20,10,2), rand(20,10,2) array1,array2 = YAXArray((d1,d2,td1),a1), YAXArray((d1,d2,td2),a2) savecube(array1,f1) diff --git a/test/dimarray.jl b/test/dimarray.jl index 38bbdcc5..e425d66e 100644 --- a/test/dimarray.jl +++ b/test/dimarray.jl @@ -10,9 +10,9 @@ m[3,1,6:10] .=missing m[4,1,:] .= 10 - lon = Dim{:Lon}(1:4) - lat = Dim{:Lat}(1:2) - tim = Dim{:Time}(1:10) + lon = Lon(1:4) + lat = Lat(1:2) + tim = YAX.Time(1:10) c = DimArray(m, (lon, lat, tim)) indims = InDims("Time") outdims = OutDims() @@ -96,9 +96,9 @@ end using YAXArrays using DimensionalData a = Array{Union{Float64,Missing}}(rand(40, 20, 10)) - lon = Dim{:Lon}(1:40) - lat = Dim{:Lat}(1:20) - tim = Dim{:Time}(1:10) + lon = Lon(1:40) + lat = Lat(1:20) + tim = YAX.Time(1:10) c = DimArray(a,(lon, lat, tim)) indims = InDims("Time",YAXArrays.MovingWindow("Lon",1,1),window_oob_value = -9999.0) @@ -143,10 +143,10 @@ end @test r3 isa AbstractDimArray a = Array{Union{Float64,Missing}}(rand(10,4, 40, 20)); - varax = Dim{:Variable}('a':'d') - lon = Dim{:Lon}(1:40) - lat = Dim{:Lat}(1:20) - tim = Dim{:Time}(1:10) + varax = Variables('a':'d') + lon = Lon(1:40) + lat = Lat(1:20) + tim = YAX.Time(1:10) c = DimArray(a, (tim, varax, lon,lat)) indims = InDims("Time",YAXArrays.MovingWindow("Lon",1,1)) @@ -163,9 +163,9 @@ end using DimensionalData a = Array{Union{Float64,Missing}}(rand(40, 20, 10)) - lon = Dim{:Lon}(1:40) - lat = Dim{:Lat}(1:20) - tim = Dim{:Time}(1:10) + lon = Lon(1:40) + lat = Lat(1:20) + tim = YAX.Time(1:10) c = DimArray(a,(lon, lat, tim)) d = tempname() @test_broken c_chunked = setchunks(c,Dict("Lon" => 7, "Lat" => 9)) diff --git a/test/runtests.jl b/test/runtests.jl index c34a3291..a09b04d6 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,6 +3,7 @@ using Test using TestItemRunner using Dates using YAXArrayBase +using YAXArrays: YAXArrays as YAX @run_package_tests From 1d9f0b51e9331acaae68b0c3fe4de7e6fafbc2f0 Mon Sep 17 00:00:00 2001 From: Lazaro Alonso Date: Sun, 15 Dec 2024 19:40:05 +0100 Subject: [PATCH 08/14] export it --- src/dims.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/dims.jl b/src/dims.jl index 4f862eaf..0ad73006 100644 --- a/src/dims.jl +++ b/src/dims.jl @@ -4,6 +4,7 @@ export rlat, lat_c export Lon, lon, longitude, long, Longitude export rlon, lon_c export height, depth +export Variables @dim Lat YDim "Latitude" @dim lat YDim "Latitude" From 8dd7fd730a51662b0ac0e04437d6e4bc462ac037 Mon Sep 17 00:00:00 2001 From: Lazaro Alonso Date: Sun, 15 Dec 2024 20:28:14 +0100 Subject: [PATCH 09/14] fix tests --- test/Datasets/datasets.jl | 20 ++++++++++---------- test/dimarray.jl | 7 +++++-- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/test/Datasets/datasets.jl b/test/Datasets/datasets.jl index 76fc21e4..b702b522 100644 --- a/test/Datasets/datasets.jl +++ b/test/Datasets/datasets.jl @@ -219,7 +219,7 @@ end ar = Cube(ds) @test ar isa YAXArray @test size(ar) == (10, 5, 2, 2) - @test DD.name.(ar.axes) == (:time, :d2, :d3, :Variable) + @test DD.name.(ar.axes) == (:time, :d2, :d3, :Variables) @test DD.lookup(ar.axes[4]) == ["Var1", "Var3"] end @testset "Dataset creation" begin @@ -230,7 +230,7 @@ end ) # Basic newds, newds2 = YAXArrays.Datasets.createdataset(MockDataset, al) - @test DD.name.(newds2.axes) == (:Time, :Xvals, :Variable) + @test DD.name.(newds2.axes) == (:Time, :Xvals, :Variables) @test DD.lookup(newds2.axes[1]) == Date(2001):Month(1):Date(2001, 12, 31) @test DD.lookup(newds2.axes[3]) == ["A", "B"] @test DD.lookup(newds2.axes[2]) == 1:10 @@ -432,28 +432,28 @@ end xout .= flola.(lo, la) end - lon = YAXArray(lon(range(1, 15))) - lat = YAXArray(lat(range(1, 10))) + lon_yax = YAXArray(lon(range(1, 15))) + lat_yax = YAXArray(lat(range(1, 10))) tspan = Date("2022-01-01"):Day(1):Date("2022-01-30") - time = YAXArray(YAX.time(tspan)) + time_yax = YAXArray(YAX.time(tspan)) properties = Dict{String, Any}("name" => "out_array") - gen_cube = mapCube(g, (lon, lat, time); + gen_cube = mapCube(g, (lon_yax, lat_yax, time_yax); indims = (InDims(), InDims(), InDims("time")), outdims = OutDims("time"; properties, outtype = Float32) # max_cache=1e9 ) - gen_cube2d = mapCube(g2d, (lon, lat); + gen_cube2d = mapCube(g2d, (lon_yax, lat_yax); indims = (InDims(), InDims()), outdims = OutDims(; outtype = Float32) # max_cache=1e9 ) properties = Dict{String, Any}("name" => "out_zarr") # test saves, zarr - mapCube(g, (lon, lat, time); + mapCube(g, (lon_yax, lat_yax, time_yax); indims = (InDims(), InDims(), InDims("time")), outdims = OutDims("time"; overwrite=true, path="my_gen_cube.zarr", properties, @@ -463,14 +463,14 @@ end ds_zarr = open_dataset("my_gen_cube.zarr") # test saves, nc properties = Dict{String, Any}("name" => "out_nc") - mapCube(g, (lon, lat, time); + mapCube(g, (lon_yax, lat_yax, time_yax); indims = (InDims(), InDims(), InDims("time")), outdims = OutDims("time"; overwrite=true, path="my_gen_cube.nc", properties, outtype = Float32) # max_cache=1e9 ) - mapCube(g, (lon, lat, time); + mapCube(g, (lon_yax, lat_yax, time_yax); indims = (InDims(), InDims(), InDims("time")), outdims = OutDims("time"; overwrite=true, path="my_gen_cube_no_p.nc", outtype = Float32) diff --git a/test/dimarray.jl b/test/dimarray.jl index e425d66e..46958e76 100644 --- a/test/dimarray.jl +++ b/test/dimarray.jl @@ -1,6 +1,7 @@ @testitem "DimensionalData mapcube" begin using DimensionalData using YAXArrays + using YAXArrays: YAXArrays as YAX m = Array{Union{Int, Missing}}(undef, 4,2,10) for i in 1:size(m,3) m[:,1,i] .= i @@ -94,6 +95,7 @@ end @testitem "Moving Window DimArray" begin using YAXArrays + using YAXArrays: YAXArrays as YAX using DimensionalData a = Array{Union{Float64,Missing}}(rand(40, 20, 10)) lon = Lon(1:40) @@ -159,7 +161,8 @@ end end @testitem "DimArray Chunking" begin - using YAXArrays + using YAXArrays + using YAXArrays: YAXArrays as YAX using DimensionalData a = Array{Union{Float64,Missing}}(rand(40, 20, 10)) @@ -175,7 +178,7 @@ end @testitem "DimArray tablestats" begin using DimensionalData - using YAXArrays + using YAXArrays using OnlineStats data = collect(reshape(1:20.,4,5)) axlist = (Dim{:XVals}(1.0:4.0), Dim{:YVals}([1,2,3,4,5])) From 115440bd1d1c2665767120b316da3dac6c6646ce Mon Sep 17 00:00:00 2001 From: Lazaro Alonso Date: Sun, 15 Dec 2024 21:04:12 +0100 Subject: [PATCH 10/14] fix docs --- README.md | 28 ++++++++++++++-------------- docs/src/UserGuide/combine.md | 2 +- docs/src/UserGuide/compute.md | 12 ++++++------ docs/src/UserGuide/faq.md | 2 +- docs/src/get_started.md | 10 ++++------ 5 files changed, 26 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index f6dae575..878ab9e0 100644 --- a/README.md +++ b/README.md @@ -112,15 +112,15 @@ using YAXArrays: YAXArrays as YAX axlist = ( YAX.time(range(1, 20, length=20)), - X(range(1, 10, length=10)), - Y(range(1, 5, length=15)), + lon(range(1, 10, length=10)), + lat(range(1, 5, length=15)), Variables(["var1", "var2"])) ``` ``` ↓ time 1.0:1.0:20.0, -→ X 1.0:1.0:10.0, -↗ Y 1.0:0.2857142857142857:5.0, -⬔ Variable ["var1", "var2"] +→ lon 1.0:1.0:10.0, +↗ lat 1.0:0.2857142857142857:5.0, +⬔ Variables ["var1", "var2"] ``` and the corresponding data. @@ -150,8 +150,8 @@ ds = YAXArray(axlist, data, props) │ 20×10×15×2 YAXArray{Float64,4} │ ├────────────────────────────────┴─────────────────────────────────────────────── dims ┐ ↓ time Sampled{Float64} 1.0:1.0:20.0 ForwardOrdered Regular Points, - → X Sampled{Float64} 1.0:1.0:10.0 ForwardOrdered Regular Points, - ↗ Y Sampled{Float64} 1.0:0.2857142857142857:5.0 ForwardOrdered Regular Points, + → lon Sampled{Float64} 1.0:1.0:10.0 ForwardOrdered Regular Points, + ↗ lat Sampled{Float64} 1.0:0.2857142857142857:5.0 ForwardOrdered Regular Points, ⬔ Variable Categorical{String} ["var1", "var2"] ForwardOrdered ├──────────────────────────────────────────────────────────────────────────── metadata ┤ Dict{String, String} with 5 entries: @@ -170,17 +170,17 @@ ds = YAXArray(axlist, data, props) For axis can be via `.` ```julia -ds.X +ds.lon ``` ``` -X Sampled{Float64} ForwardOrdered Regular Points +lon Sampled{Float64} ForwardOrdered Regular Points wrapping: 1.0:1.0:10.0 ``` or better yet via `lookup` ```julia -lookup(ds, :X) +lookup(ds, :lon) ``` ``` Sampled{Float64} ForwardOrdered Regular Points @@ -189,7 +189,7 @@ wrapping: 1.0:1.0:10.0 note that also the `.data` field can be use ```julia -lookup(ds, :X).data +lookup(ds, :lon).data ``` ``` 1.0:1.0:10.0 @@ -198,15 +198,15 @@ lookup(ds, :X).data The data for one variables, i.e. `var1` can be accessed via: ```julia -ds[Variable=At("var1")] +ds[Variables=At("var1")] ``` ``` ╭──────────────────────────────╮ │ 20×10×15 YAXArray{Float64,3} │ ├──────────────────────────────┴────────────────────────────────────────────── dims ┐ ↓ time Sampled{Float64} 1.0:1.0:20.0 ForwardOrdered Regular Points, - → X Sampled{Float64} 1.0:1.0:10.0 ForwardOrdered Regular Points, - ↗ Y Sampled{Float64} 1.0:0.2857142857142857:5.0 ForwardOrdered Regular Points + → lon Sampled{Float64} 1.0:1.0:10.0 ForwardOrdered Regular Points, + ↗ lat Sampled{Float64} 1.0:0.2857142857142857:5.0 ForwardOrdered Regular Points ├───────────────────────────────────────────────────────────────────────── metadata ┤ Dict{String, String} with 5 entries: "var1" => "one of your variables" diff --git a/docs/src/UserGuide/combine.md b/docs/src/UserGuide/combine.md index 71a8e306..01df2e6b 100644 --- a/docs/src/UserGuide/combine.md +++ b/docs/src/UserGuide/combine.md @@ -31,6 +31,6 @@ using YAXArrays: YAXArrays as YAX temperature = YAXArray((YAX.time(1:6),), rand(6)) precipitation = YAXArray((YAX.time(1:6),), rand(6)) cubes = [temperature,precipitation] -var_axis = Dim{:variable}(["temp", "prep"]) +var_axis = Variables(["temp", "prep"]) combined = concatenatecubes(cubes, var_axis) ```` diff --git a/docs/src/UserGuide/compute.md b/docs/src/UserGuide/compute.md index 66bbb090..f9e033df 100644 --- a/docs/src/UserGuide/compute.md +++ b/docs/src/UserGuide/compute.md @@ -132,15 +132,15 @@ Note the `.` after `f`, this is because we will slice across time, namely, the f Here, we do create `YAXArrays` only with the desired dimensions as ````@ansi mapCube -lon = YAXArray(lon(range(1, 15))) -lat = YAXArray(lat(range(1, 10))) +lon_yax = YAXArray(lon(range(1, 15))) +lat_yax = YAXArray(lat(range(1, 10))) ```` And a time Cube's Axis ````@example mapCube tspan = Date("2022-01-01"):Day(1):Date("2022-01-30") -time = YAXArray(YAX.time(tspan)) +time_yax = YAXArray(YAX.time(tspan)) ```` note that the following can be extended to arbitrary `YAXArrays` with additional data and dimensions. @@ -148,7 +148,7 @@ note that the following can be extended to arbitrary `YAXArrays` with additional Let's generate a new `cube` using `mapCube` and saving the output directly into disk. ````@ansi mapCube -gen_cube = mapCube(g, (lon, lat, time); +gen_cube = mapCube(g, (lon_yax, lat_yax, time_yax); indims = (InDims(), InDims(), InDims("time")), outdims = OutDims("time", overwrite=true, path="my_gen_cube.zarr", backend=:zarr, outtype = Float32) @@ -172,7 +172,7 @@ gen_cube.data[1, :, :] but, we can generate a another cube with a different `output order` as follows ````@ansi mapCube -gen_cube = mapCube(g, (lon, lat, time); +gen_cube = mapCube(g, (lon_yax, lat_yax, time_yax); indims = (InDims("lon"), InDims(), InDims()), outdims = OutDims("lon", overwrite=true, path="my_gen_cube.zarr", backend=:zarr, outtype = Float32) @@ -209,7 +209,7 @@ axlist = ( YAX.time(Date("2022-01-01"):Day(1):Date("2022-01-05")), lon(range(1, 4, length=4)), lat(range(1, 3, length=3)), - Dim{:variables}(["a", "b"]) + Variables(["a", "b"]) ) Random.seed!(123) diff --git a/docs/src/UserGuide/faq.md b/docs/src/UserGuide/faq.md index 59f0d0a1..81402892 100644 --- a/docs/src/UserGuide/faq.md +++ b/docs/src/UserGuide/faq.md @@ -338,7 +338,7 @@ d_keys = keys(stocks) currently there is not direct support to obtain `dims` from a `TimeArray`, but we can code a function for it ````@example howdoi -getTArrayAxes(ta::TimeArray) = (YAX.time(timestamp(ta)), Dim{:variable}(colnames(ta)), ); +getTArrayAxes(ta::TimeArray) = (YAX.time(timestamp(ta)), Variables(colnames(ta)), ); nothing # hide ```` then, we create the `YAXArrays` as diff --git a/docs/src/get_started.md b/docs/src/get_started.md index 51cb70fc..2af42a7c 100644 --- a/docs/src/get_started.md +++ b/docs/src/get_started.md @@ -28,14 +28,12 @@ a = YAXArray(rand(2,3)) Assemble a more complex `YAXArray` with 4 dimensions, i.e. time, x, y and a variable type: ```@example quickstart -using DimensionalData - # axes or dimensions with name and tick values axlist = ( YAX.time(range(1, 20, length=20)), - X(range(1, 10, length=10)), - Y(range(1, 5, length=15)), - Dim{:variable}(["temperature", "precipitation"]) + lon(range(1, 10, length=10)), + lat(range(1, 5, length=15)), + Variables(["temperature", "precipitation"]) ) # the actual data matching the dimensions defined in axlist @@ -54,7 +52,7 @@ a2 = YAXArray(axlist, data, props) Get the temperature map at the first point in time: ```@example quickstart -a2[variable=At("temperature"), time=1].data +a2[Variables=At("temperature"), time=1].data ``` ## Updates From dc0af09359573606adfee121aecb34f7abd2518c Mon Sep 17 00:00:00 2001 From: Lazaro Alonso Date: Sun, 15 Dec 2024 21:30:44 +0100 Subject: [PATCH 11/14] fix msc --- docs/src/tutorials/mean_seasonal_cycle.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/src/tutorials/mean_seasonal_cycle.md b/docs/src/tutorials/mean_seasonal_cycle.md index 36c64ff6..3fe13f61 100644 --- a/docs/src/tutorials/mean_seasonal_cycle.md +++ b/docs/src/tutorials/mean_seasonal_cycle.md @@ -36,6 +36,8 @@ fig ````@ansi mean_season using YAXArrays, DimensionalData +using YAXArrays: YAXArrays as YAX + axes = (YAX.Time(t),) c = YAXArray(axes, var) ```` From 63261817f855984efc4a0204b541781a6a4ac4fd Mon Sep 17 00:00:00 2001 From: Lazaro Alonso Date: Sun, 15 Dec 2024 23:22:24 +0100 Subject: [PATCH 12/14] fixes doc strings --- docs/src/UserGuide/group.md | 12 +++------ src/DAT/DAT.jl | 11 ++++---- src/DatasetAPI/Datasets.jl | 54 ++++++++++++++++++------------------- 3 files changed, 37 insertions(+), 40 deletions(-) diff --git a/docs/src/UserGuide/group.md b/docs/src/UserGuide/group.md index c2500768..4357eaa7 100644 --- a/docs/src/UserGuide/group.md +++ b/docs/src/UserGuide/group.md @@ -23,7 +23,6 @@ Where the goal is to calculate the seasonal average. And in order to do this pro url_path = "https://github.com/pydata/xarray-data/raw/master/rasm.nc" filename = Downloads.download(url_path, "rasm.nc") ds_o = Cube(filename) -nothing # hide ```` ::: warning @@ -34,14 +33,8 @@ Related to https://github.com/rafaqz/DimensionalData.jl/issues/642 ::: ````@example compareXarray - -axs = dims(ds_o) # get the dimensions -data = ds_o.data[:,:,:] # read the data _FillValue = ds_o.properties["_FillValue"] -data = replace(data, _FillValue => NaN) -# create new YAXArray -ds = YAXArray(axs, data) -nothing # hide +ds = replace(ds_o[:,:,:], _FillValue => NaN) # load into memory and replace _FillValue by NaN ```` ## GroupBy: seasons @@ -78,6 +71,9 @@ Now, we continue with the `groupby` operations as usual g_ds = groupby(ds, YAX.time => seasons(; start=December)) ```` +> [!IMPORTANT] +> Note how we are referencing the `time` dimension via `YAX.time`. This approach is used to avoid name clashes with `time` (`Time`) from `Base` (`Dates`). For convenience, we have defined the `Dimensions` `time` and `Time` in `YAXArrays.jl`, which are only accessible when explicitly called. + And the mean per season is calculated as follows ````@ansi compareXarray diff --git a/src/DAT/DAT.jl b/src/DAT/DAT.jl index 0e9f6917..37e8e59a 100644 --- a/src/DAT/DAT.jl +++ b/src/DAT/DAT.jl @@ -315,12 +315,13 @@ mapCube(fu::Function, cdata, addargs...; kwargs...) = """ mapCube(fun, cube, addargs...;kwargs...) - Map a given function `fun` over slices of all cubes of the dataset `ds`. - Use InDims to discribe the input dimensions and OutDims to describe the output dimensions of the function. - For Datasets, only one output cube can be specified. - In contrast to the mapCube function for cubes, additional arguments for the inner function should be set as keyword arguments. +Map a given function `fun` over slices of all cubes of the dataset `ds`. +Use InDims to discribe the input dimensions and OutDims to describe the output dimensions of the function. + +For Datasets, only one output cube can be specified. +In contrast to the mapCube function for cubes, additional arguments for the inner function should be set as keyword arguments. - For the specific keyword arguments see the docstring of the mapCube function for cubes. +For the specific keyword arguments see the docstring of the mapCube function for cubes. """ function mapCube( f::Function, diff --git a/src/DatasetAPI/Datasets.jl b/src/DatasetAPI/Datasets.jl index 74eddb47..54d552a9 100644 --- a/src/DatasetAPI/Datasets.jl +++ b/src/DatasetAPI/Datasets.jl @@ -26,7 +26,7 @@ struct Dataset properties::Dict end """ -Dataset(; properties = Dict{String,Any}, cubes...) + Dataset(; properties = Dict{String,Any}, cubes...) Construct a YAXArray Dataset with global attributes `properties` a and a list of named YAXArrays cubes... """ @@ -43,7 +43,7 @@ function Dataset(; properties = Dict{String,Any}(), cubes...) end """ -to_dataset(c;datasetaxis = "Variables", layername = "layer") + to_dataset(c;datasetaxis = "Variables", layername = "layer") Convert a Data Cube into a Dataset. It is possible to treat one of the Cube's axes as a "DatasetAxis" i.e. the cube will be split into @@ -406,7 +406,7 @@ end """ -open_dataset(g; driver=:all) + open_dataset(g; driver=:all) Open the dataset at `g` with the given `driver`. The default driver will search for available drivers and tries to detect the useable driver from the filename extension. @@ -461,8 +461,8 @@ to_array(ds::Dataset; joinname = "Variables") = Cube(ds;joinname) """ Cube(ds::Dataset; joinname="Variables") -Construct a single YAXArray from the dataset `ds` - by concatenating the cubes in the datset on the `joinname` dimension. + +Construct a single YAXArray from the dataset `ds` by concatenating the cubes in the datset on the `joinname` dimension. """ function Cube(ds::Dataset; joinname = "Variables", target_type = nothing) @@ -635,8 +635,7 @@ function setchunks(ds::Dataset, chunks) end """ -savedataset(ds::Dataset; path = "", persist = nothing, overwrite = false, append = false, skeleton=false, backend = :all, - driver = backend, max_cache = 5e8, writefac=4.0) + savedataset(ds::Dataset; path = "", persist = nothing, overwrite = false, append = false, skeleton=false, backend = :all, driver = backend, max_cache = 5e8, writefac=4.0) Saves a Dataset into a file at `path` with the format given by `driver`, i.e., driver=:netcdf or driver=:zarr. @@ -725,6 +724,7 @@ end """ savecube(cube,name::String) + Save a [`YAXArray`](@ref) to the `path`. # Extended Help @@ -766,26 +766,26 @@ end """ -function createdataset(DS::Type,axlist; kwargs...) - - Creates a new dataset with axes specified in `axlist`. Each axis must be a subtype - of `CubeAxis`. A new empty Zarr array will be created and can serve as a sink for - `mapCube` operations. - - ### Keyword arguments - - * `path=""` location where the new cube is stored - * `T=Union{Float32,Missing}` data type of the target cube - * `chunksize = ntuple(i->length(axlist[i]),length(axlist))` chunk sizes of the array - * `chunkoffset = ntuple(i->0,length(axlist))` offsets of the chunks - * `persist::Bool=true` shall the disk data be garbage-collected when the cube goes out of scope? - * `overwrite::Bool=false` overwrite cube if it already exists - * `properties=Dict{String,Any}()` additional cube properties - * `globalproperties=Dict{String,Any}` global attributes to be added to the dataset - * `fillvalue= T>:Missing ? defaultfillval(Base.nonmissingtype(T)) : nothing` fill value - * `datasetaxis="Variables"` special treatment of a categorical axis that gets written into separate zarr arrays - * `layername="layer"` Fallback name of the variable stored in the dataset if no `datasetaxis` is found - """ + function createdataset(DS::Type,axlist; kwargs...) + +Creates a new dataset with axes specified in `axlist`. Each axis must be a subtype +of `CubeAxis`. A new empty Zarr array will be created and can serve as a sink for +`mapCube` operations. + +### Keyword arguments + +* `path=""` location where the new cube is stored +* `T=Union{Float32,Missing}` data type of the target cube +* `chunksize = ntuple(i->length(axlist[i]),length(axlist))` chunk sizes of the array +* `chunkoffset = ntuple(i->0,length(axlist))` offsets of the chunks +* `persist::Bool=true` shall the disk data be garbage-collected when the cube goes out of scope? +* `overwrite::Bool=false` overwrite cube if it already exists +* `properties=Dict{String,Any}()` additional cube properties +* `globalproperties=Dict{String,Any}` global attributes to be added to the dataset +* `fillvalue= T>:Missing ? defaultfillval(Base.nonmissingtype(T)) : nothing` fill value +* `datasetaxis="Variables"` special treatment of a categorical axis that gets written into separate zarr arrays +* `layername="layer"` Fallback name of the variable stored in the dataset if no `datasetaxis` is found +""" function createdataset( DS, axlist; From d1955813bf26d867b8d94037b0033352fff230dc Mon Sep 17 00:00:00 2001 From: Lazaro Alonso Date: Sun, 15 Dec 2024 23:38:56 +0100 Subject: [PATCH 13/14] no Rasters --- docs/Project.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/Project.toml b/docs/Project.toml index 63bbbb9c..e6ae7333 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -24,7 +24,6 @@ MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411" NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9" OnlineStats = "a15396b6-48d5-5d58-9928-6d29437db91e" PlotUtils = "995b91a9-d308-5afd-9ec6-746e21dbc043" -Rasters = "a3a2b9e3-a471-40c9-b274-f788e487c689" SkipNan = "aed68c70-c8b0-4309-8cd1-d392a74f991a" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" TimeSeries = "9e3dc215-6440-5c97-bce1-76c03772f85e" From 856eaf0859bb4579e3ca998784692bbc848570a0 Mon Sep 17 00:00:00 2001 From: Lazaro Alonso Date: Mon, 16 Dec 2024 14:02:30 +0100 Subject: [PATCH 14/14] more docs --- docs/src/UserGuide/group.md | 6 ++--- docs/src/UserGuide/types.md | 49 +++++++++++++++++++++++++++++++++++-- src/DAT/DAT.jl | 2 +- src/DatasetAPI/Datasets.jl | 16 ++++++------ src/dims.jl | 6 +---- 5 files changed, 60 insertions(+), 19 deletions(-) diff --git a/docs/src/UserGuide/group.md b/docs/src/UserGuide/group.md index 4357eaa7..0a9a8c4d 100644 --- a/docs/src/UserGuide/group.md +++ b/docs/src/UserGuide/group.md @@ -65,15 +65,15 @@ end ```` ::: +> [!IMPORTANT] +> In what follows, note how we are referencing the _time_ dimension via _YAX.time_. This approach is used to avoid name clashes with _time_ (_Time_) from **Base** (**Dates**). For convenience, we have defined the **Dimensions** _time_ and _Time_ in **YAXArrays.jl**, which are only accessible when explicitly called. + Now, we continue with the `groupby` operations as usual ````@ansi compareXarray g_ds = groupby(ds, YAX.time => seasons(; start=December)) ```` -> [!IMPORTANT] -> Note how we are referencing the `time` dimension via `YAX.time`. This approach is used to avoid name clashes with `time` (`Time`) from `Base` (`Dates`). For convenience, we have defined the `Dimensions` `time` and `Time` in `YAXArrays.jl`, which are only accessible when explicitly called. - And the mean per season is calculated as follows ````@ansi compareXarray diff --git a/docs/src/UserGuide/types.md b/docs/src/UserGuide/types.md index 9f9698db..b574838a 100644 --- a/docs/src/UserGuide/types.md +++ b/docs/src/UserGuide/types.md @@ -33,6 +33,51 @@ This data structure is useful when we want to use all variables at once. For example, the arrays temperature and precipitation which are measured at the same locations and dates can be combined into a single cube. A more formal definition of Data Cubes are given in [Mahecha et al. 2020](https://doi.org/10.5194/esd-11-201-2020) -## Dimension +## Dimensions -A `Dimension` or axis as defined by [DimensionalData.jl](https://rafaqz.github.io/DimensionalData.jl/dev/dimensions) adds tick labels, e.g., to each row or column of an array. It's name is used to access particular subsets of that array. \ No newline at end of file +A `Dimension` or axis as defined by [DimensionalData.jl](https://rafaqz.github.io/DimensionalData.jl/dev/dimensions) adds tick labels, e.g., to each row or column of an array. It's name is used to access particular subsets of that array. + +### Lon, Lat, time + +For convenience, several `Dimensions` have been defined in `YAXArrays.jl`, but only a few have been exported. The remaining dimensions can be used by calling them explicitly. See the next table for an overview + + +| Dimension | exported | usage: `using YAXArrays: YAXArrays as YAX` | +| :-----------------| :--------|--------------------------------------------| +| `lon` | ✔ | `lon` or `YAX.lon` | +| `Lon` | ✔ | `Lon` or `YAX.Lon` | +| `longitude` | ✔ | `longitude` or `YAX.longitude` | +| `Longitude` | ✔ | `Longitude` or `YAX.Longitude` | +| `lat` | ✔ | `lat` or `YAX.lat` | +| `Lat` | ✔ | `Lat` or `YAX.Lat` | +| `latitude` | ✔ | `latitude` or `YAX.latitude` | +| `Latitude` | ✔ | `Latitude` or `YAX.Latitude` | +| `time` | ✘ | `YAX.time` | +| `Time` | ✘ | `YAX.Time` | +| `rlat` | ✘ | `YAX.rlat` | +| `rlon` | ✘ | `YAX.rlon` | +| `lat_c` | ✘ | `YAX.lat_c` | +| `lon_c` | ✘ | `YAX.lon_c` | +| `height` | ✘ | `YAX.height` | +| `depth` | ✘ | `YAX.depth` | +| `Variables` | ✔ | `Variables` or `YAX.Variables` | + + +::: info + +If the dimension you are looking for is not in that table, you can define your own by doing + +````julia +using DimensionalData: @dim, XDim # If you want it to be a subtype of XDim +@dim newDim XDim "Your newDim label" +```` + +Sometimes, when you want to operate on a specific dimension in your dataset (for example, a dimension named `date`), then doing + +````julia +groupby(ds, Dim{:date} => seasons()) +```` + +should do the job. + +::: \ No newline at end of file diff --git a/src/DAT/DAT.jl b/src/DAT/DAT.jl index 37e8e59a..f9f3e2ed 100644 --- a/src/DAT/DAT.jl +++ b/src/DAT/DAT.jl @@ -402,7 +402,7 @@ Map a given function `fun` over slices of the data cube `cube`. Use InDims to discribe the input dimensions and OutDims to describe the output dimensions of the function. ### Keyword arguments -* `max_cache=YAXDefaults.max_cache` Float64 maximum size of blocks that are read into memory in bits e.g. ```max_cache=5.0e8```. Or String. e.g. ```max_cache="10MB" or ```max_cache=1GB``` defaults to approx 10Mb. +* `max_cache=YAXDefaults.max_cache` Float64 maximum size of blocks that are read into memory in bits e.g. `max_cache=5.0e8`. Or String. e.g. `max_cache="10MB"` or `max_cache=1GB` defaults to approx 10Mb. * `indims::InDims` List of input cube descriptors of type [`InDims`](@ref) for each input data cube. * `outdims::OutDims` List of output cube descriptors of type [`OutDims`](@ref) for each output cube. * `inplace` does the function write to an output array inplace or return a single value> defaults to `true` diff --git a/src/DatasetAPI/Datasets.jl b/src/DatasetAPI/Datasets.jl index 54d552a9..16a6bd5d 100644 --- a/src/DatasetAPI/Datasets.jl +++ b/src/DatasetAPI/Datasets.jl @@ -16,9 +16,9 @@ using DimensionalData: DimensionalData as DD export Dataset, Cube, open_dataset, to_dataset, savecube, savedataset, open_mfdataset """ - Dataset object which stores an `OrderedDict` of YAXArrays with Symbol keys. - a dictionary of CubeAxes and a Dictionary of general properties. - A dictionary can hold cubes with differing axes. But it will share the common axes between the subcubes. +Dataset object which stores an `OrderedDict` of YAXArrays with Symbol keys. +A dictionary of CubeAxes and a Dictionary of general properties. +A dictionary can hold cubes with differing axes. But it will share the common axes between the subcubes. """ struct Dataset cubes::OrderedDict{Symbol,YAXArray} @@ -46,10 +46,10 @@ end to_dataset(c;datasetaxis = "Variables", layername = "layer") Convert a Data Cube into a Dataset. It is possible to treat one of -the Cube's axes as a "DatasetAxis" i.e. the cube will be split into +the Cube's axes as a `datasetaxis` i.e. the cube will be split into different parts that become variables in the Dataset. If no such axis is specified or found, there will only be a single variable -in the dataset with the name `layername` +in the dataset with the name `layername`. """ function to_dataset(c;datasetaxis = "Variables", layername = get(c.properties,"name","layer")) axlist = DD.dims(c) @@ -635,13 +635,13 @@ function setchunks(ds::Dataset, chunks) end """ - savedataset(ds::Dataset; path = "", persist = nothing, overwrite = false, append = false, skeleton=false, backend = :all, driver = backend, max_cache = 5e8, writefac=4.0) + savedataset(ds::Dataset; path= "", persist=nothing, overwrite=false, append=false, skeleton=false, backend=:all, driver=backend, max_cache=5e8, writefac=4.0) -Saves a Dataset into a file at `path` with the format given by `driver`, i.e., driver=:netcdf or driver=:zarr. +Saves a Dataset into a file at `path` with the format given by `driver`, i.e., `driver=:netcdf` or `driver=:zarr`. !!! warning - overwrite = true, deletes ALL your data and it will create a new file. + `overwrite=true`, deletes ALL your data and it will create a new file. """ function savedataset( ds::Dataset; diff --git a/src/dims.jl b/src/dims.jl index 0ad73006..c7aefda9 100644 --- a/src/dims.jl +++ b/src/dims.jl @@ -1,9 +1,6 @@ using DimensionalData: @dim, YDim, XDim, ZDim, TimeDim export Lat, lat, latitude, Latitude -export rlat, lat_c -export Lon, lon, longitude, long, Longitude -export rlon, lon_c -export height, depth +export Lon, lon, longitude, Longitude export Variables @dim Lat YDim "Latitude" @@ -15,7 +12,6 @@ export Variables @dim Lon XDim "Longitude" @dim lon XDim "Longitude" -@dim long XDim "Longitude" @dim longitude XDim "Longitude" @dim Longitude XDim "Longitude" @dim rlon XDim