Skip to content

Commit

Permalink
Detrend on different domains (#54)
Browse files Browse the repository at this point in the history
  • Loading branch information
juliohm authored Sep 17, 2024
1 parent b0e0512 commit e2c7898
Show file tree
Hide file tree
Showing 11 changed files with 50 additions and 147 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ ColumnSelectors = "0.1"
Combinatorics = "1.0"
DataScienceTraits = "0.4"
Distances = "0.10"
GeoStatsModels = "0.4"
GeoStatsModels = "0.5"
GeoStatsProcesses = "0.6"
GeoTables = "1.21"
LinearAlgebra = "1.9"
Expand Down
125 changes: 29 additions & 96 deletions src/detrend.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,121 +42,54 @@ Detrend(cols::C...; degree=1) where {C<:Column} = Detrend(selector(cols), degree
isrevertible(::Type{<:Detrend}) = true

function apply(transform::Detrend, geotable)
table = values(geotable)
cols = Tables.columns(table)
dom = domain(geotable)
tab = values(geotable)
cols = Tables.columns(tab)
names = Tables.columnnames(cols)
snames = transform.selector(names)

tdata = trend(geotable, snames; degree=transform.degree)
ttable = values(tdata)
tcols = Tables.columns(ttable)
gview = geotable |> Select(snames)
model = Polynomial(transform.degree)
fitted = GeoStatsModels.fit(model, gview)

ncols = map(names) do n
x = Tables.getcolumn(cols, n)
if n snames
μ = Tables.getcolumn(tcols, n)
x .- μ
ncols = map(names) do name
z = Tables.getcolumn(cols, name)
(i) = GeoStatsModels.predict(fitted, name, centroid(dom, i))
if name snames
@inbounds [z[i] - (i) for i in 1:nelements(dom)]
else
x
z
end
end

𝒯 = (; zip(names, ncols)...)
newtable = 𝒯 |> Tables.materializer(table)
newtab = 𝒯 |> Tables.materializer(tab)

newgeotable = georef(newtable, domain(geotable))
newgeotable = georef(newtab, dom)

newgeotable, (snames, tcols)
newgeotable, (snames, fitted)
end

function revert(::Detrend, newgeotable, cache)
newtable = values(newgeotable)
cols = Tables.columns(newtable)
names = Tables.schema(newtable).names
newdom = domain(newgeotable)
newtab = values(newgeotable)
cols = Tables.columns(newtab)
names = Tables.columnnames(cols)

snames, tcols = cache
snames, fitted = cache

ncols = map(names) do n
x = Tables.getcolumn(cols, n)
if n snames
μ = Tables.getcolumn(tcols, n)
x .+ μ
ocols = map(names) do name
z = Tables.getcolumn(cols, name)
(i) = GeoStatsModels.predict(fitted, name, centroid(newdom, i))
if name snames
@inbounds [z[i] + (i) for i in 1:nelements(newdom)]
else
x
z
end
end

𝒯 = (; zip(names, ncols)...)
table = 𝒯 |> Tables.materializer(newtable)

georef(table, domain(newgeotable))
end

"""
polymat(xs, d)
Return the matrix of monomials for the iterator `xs`, i.e.
for each item `x = (x₁, x₂,…, xₙ)` in `xs`, evaluate
the monomial terms of the expansion `(x₁ + x₂ + ⋯ + xₙ)ᵈ`
for a given degree `d`.
The resulting matrix has a number of rows that is equal
to the number of items in the iterator `xs`. The number
of columns is a function of the degree. For `d=0`, a
single column of ones is returned that corresponds to
the constant term `x₁⁰⋅x₂⁰⋅⋯⋅xₙ⁰` for all items in `xs`.
"""
function polymat(xs, d)
x = first(xs)
n = length(x)
es = Iterators.flatten(multiexponents(n, d) for d in 0:d)
ps = [[prod(x .^ e) for x in xs] for e in es]
reduce(hcat, ps)
end

"""
trend(data, vars; degree=1)
Return the deterministic spatial trend for the variables `vars`
in the spatial `data`. Approximate the trend with a polynomial
of given `degree`.
## References
* Menafoglio, A., Secchi, P. 2013. [A Universal Kriging predictor
for spatially dependent functional data of a Hilbert Space]
(https://doi.org/10.1214/13-EJS843)
"""
function trend(data, vars::AbstractVector{Symbol}; degree=1)
𝒯 = values(data)
𝒟 = domain(data)

# retrieve columns
cols = Tables.columns(𝒯)

# build polynomial drift terms
coords(𝒟, i) = ustrip.(to(centroid(𝒟, i)))
xs = (coords(𝒟, i) for i in 1:nelements(𝒟))
F = polymat(xs, degree)

# eqs 25 and 26 in Menafoglio, A., Secchi, P. 2013.
ms = map(vars) do var
z = Tables.getcolumn(cols, var)
a = (F'F \ F') * z
F * a
end

ctor = Tables.materializer(𝒯)
means = ctor((; zip(vars, ms)...))

georef(means, 𝒟)
end

trend(data, var::Symbol; kwargs...) = trend(data, [var]; kwargs...)
𝒯 = (; zip(names, ocols)...)
table = 𝒯 |> Tables.materializer(newtab)

function trend(data; kwargs...)
𝒯 = values(data)
s = Tables.schema(𝒯)
vars = collect(s.names)
trend(data, vars; kwargs...)
georef(table, newdom)
end
2 changes: 1 addition & 1 deletion test/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ GeoTables = "e502b557-6362-48c1-8219-d30d308dcdb0"
ImageIO = "82e4d734-157c-48bb-816b-45c225c6df19"
ImageQuilting = "e8712464-036d-575c-85ac-952ae31322ab"
Meshes = "eacbb407-ea5a-433e-ab97-5258b1ca43fa"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
TableTransforms = "0d432bfd-3ee1-4ac1-886a-39f05cc69a3e"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
Expand Down
1 change: 0 additions & 1 deletion test/clustering.jl
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@
end

@testset "GSC" begin
Random.seed!(2022)
𝒮 = georef((Z=[10sin(i / 10) + j for i in 1:100, j in 1:100],))
C = 𝒮 |> GSC(50, 2.0)
@test Set(C.CLUSTER) == Set(1:50)
Expand Down
39 changes: 11 additions & 28 deletions test/detrend.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
@testset "Detrend" begin
rng = MersenneTwister(42)
# reversibility on the same domain
rng = StableRNG(42)
l = range(-1, stop=1, length=100)
μ = [x^2 + y^2 for x in l, y in l]
ϵ = 0.1rand(rng, 100, 100)
Expand All @@ -11,31 +12,13 @@
R = Tables.matrix(values(r))
@test isapprox(D, R, atol=1e-6)

rng = MersenneTwister(42)
# constant trend
d = georef((z=rand(rng, 100),), CartesianGrid(100))
= GeoStatsTransforms.trend(d, :z).z
@test all(abs.(diff(z̄)) .< 0.01)

# linear trend
μ = range(0, stop=1, length=100)
ϵ = 0.1rand(rng, 100)
d = georef((z=μ + ϵ,), CartesianGrid(100))
= GeoStatsTransforms.trend(d, :z).z
@test all([abs(z̄[i] - μ[i]) < 0.1 for i in 1:length(z̄)])

# quadratic trend
r = range(-1, stop=1, length=100)
μ = [x^2 + y^2 for x in r, y in r]
ϵ = 0.1rand(rng, 100, 100)
d = georef((z=μ + ϵ,))
= GeoStatsTransforms.trend(d, :z, degree=2)
= reshape(d̄.z, 100, 100)
@test all([abs(z̄[i] - μ[i]) < 0.1 for i in 1:length(z̄)])

d = georef((x=rand(rng, 10), y=rand(rng, 10)), rand(rng, Point, 10))
𝒯 = d |> GeoStatsTransforms.trend |> values
s = Tables.schema(𝒯)
@test s.names == (:x, :y)
@test s.types == (Float64, Float64)
# reversibility on different domains
g = CartesianGrid(10, 10)
d = georef((z=rand(100),), g)
p = Detrend(:z, degree=2)
n, c = apply(p, d)
n2 = georef((z=[n.z; n.z],), [centroid.(g); centroid.(g)])
r2 = revert(p, n2, c)
@test r2.z[1:100] d.z
@test r2.z[101:200] d.z
end
10 changes: 3 additions & 7 deletions test/interpmissing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,37 +10,33 @@
gtb = georef((; z), grid)
variogram = GaussianVariogram(range=35.0, nugget=0.0)

Random.seed!(2021)
ngtb = gtb |> InterpolateMissing(:z => Kriging(variogram), maxneighbors=3)
@test isapprox(ngtb.z[linds[25, 25]], 1.0, atol=1e-3)
@test isapprox(ngtb.z[linds[50, 75]], 0.0, atol=1e-3)
@test isapprox(ngtb.z[linds[75, 50]], 1.0, atol=1e-3)

Random.seed!(2021)
ngtb = gtb |> InterpolateMissing(:z => Kriging(variogram), maxneighbors=3)
@test isapprox(ngtb.z[linds[25, 25]], 1.0, atol=1e-3)
@test isapprox(ngtb.z[linds[50, 75]], 0.0, atol=1e-3)
@test isapprox(ngtb.z[linds[75, 50]], 1.0, atol=1e-3)

Random.seed!(2021)
ngtb = gtb |> InterpolateMissing(:z => Kriging(variogram), maxneighbors=3, neighborhood=MetricBall(100.0))
@test isapprox(ngtb.z[linds[25, 25]], 1.0, atol=1e-3)
@test isapprox(ngtb.z[linds[50, 75]], 0.0, atol=1e-3)
@test isapprox(ngtb.z[linds[75, 50]], 1.0, atol=1e-3)

# units
grid = CartesianGrid(5, 5)
gtb = georef((; T=shuffle([[1.0, 0.0, 1.0]; fill(missing, 25 - 3)]) * u"K"), grid)
grid = CartesianGrid(3)
gtb = georef((; T=[1.0, NaN, 1.0] * u"K"), grid)
ngtb = gtb |> InterpolateMissing(IDW())
@test unit(eltype(ngtb.T)) == u"K"

# affine units
gtb = georef((; T=shuffle([[-272.15, -273.15, -272.15]; fill(missing, 25 - 3)]) * u"°C"), grid)
gtb = georef((; T=[1.0, NaN, 1.0] * u"°C"), grid)
ngtb = gtb |> InterpolateMissing(IDW())
@test unit(eltype(ngtb.T)) == u"K"

# default model is NN
Random.seed!(2021)
pset = PointSet(rand(Point, 5))
gtb = georef((; z=[1.0, missing, 2.0, missing, 3.0], c=[missing, "a", "b", "c", missing]), pset)
ngtb = gtb |> InterpolateMissing()
Expand Down
10 changes: 3 additions & 7 deletions test/interpnan.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,37 +10,33 @@
gtb = georef((; z), grid)
variogram = GaussianVariogram(range=35.0, nugget=0.0)

Random.seed!(2021)
ngtb = gtb |> InterpolateNaN(:z => Kriging(variogram), maxneighbors=3)
@test isapprox(ngtb.z[linds[25, 25]], 1.0, atol=1e-3)
@test isapprox(ngtb.z[linds[50, 75]], 0.0, atol=1e-3)
@test isapprox(ngtb.z[linds[75, 50]], 1.0, atol=1e-3)

Random.seed!(2021)
ngtb = gtb |> InterpolateNaN(:z => Kriging(variogram), maxneighbors=3)
@test isapprox(ngtb.z[linds[25, 25]], 1.0, atol=1e-3)
@test isapprox(ngtb.z[linds[50, 75]], 0.0, atol=1e-3)
@test isapprox(ngtb.z[linds[75, 50]], 1.0, atol=1e-3)

Random.seed!(2021)
ngtb = gtb |> InterpolateNaN(:z => Kriging(variogram), maxneighbors=3, neighborhood=MetricBall(100.0))
@test isapprox(ngtb.z[linds[25, 25]], 1.0, atol=1e-3)
@test isapprox(ngtb.z[linds[50, 75]], 0.0, atol=1e-3)
@test isapprox(ngtb.z[linds[75, 50]], 1.0, atol=1e-3)

# units
grid = CartesianGrid(5, 5)
gtb = georef((; T=shuffle([[1.0, 0.0, 1.0]; fill(NaN, 25 - 3)]) * u"K"), grid)
grid = CartesianGrid(3)
gtb = georef((; T=[1.0, NaN, 1.0] * u"K"), grid)
ngtb = gtb |> InterpolateNaN(IDW())
@test unit(eltype(ngtb.T)) == u"K"

# affine units
gtb = georef((; T=shuffle([[-272.15, -273.15, -272.15]; fill(NaN, 25 - 3)]) * u"°C"), grid)
gtb = georef((; T=[1.0, NaN, 1.0] * u"°C"), grid)
ngtb = gtb |> InterpolateNaN(IDW())
@test unit(eltype(ngtb.T)) == u"K"

# default model is NN
Random.seed!(2021)
pset = PointSet(rand(Point, 5))
gtb = georef((; z=[1.0, NaN, 2.0, NaN, 3.0]), pset)
ngtb = gtb |> InterpolateNaN()
Expand Down
3 changes: 0 additions & 3 deletions test/interpneighbors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,16 @@
linds = LinearIndices(size(grid))
variogram = GaussianVariogram(range=35.0, nugget=0.0)

Random.seed!(2021)
ngtb = gtb |> InterpolateNeighbors(grid, :z => Kriging(variogram), maxneighbors=3)
@test isapprox(ngtb.z[linds[25, 25]], 1.0, atol=1e-3)
@test isapprox(ngtb.z[linds[50, 75]], 0.0, atol=1e-3)
@test isapprox(ngtb.z[linds[75, 50]], 1.0, atol=1e-3)

Random.seed!(2021)
ngtb = gtb |> InterpolateNeighbors(grid, :z => Kriging(variogram), maxneighbors=3)
@test isapprox(ngtb.z[linds[25, 25]], 1.0, atol=1e-3)
@test isapprox(ngtb.z[linds[50, 75]], 0.0, atol=1e-3)
@test isapprox(ngtb.z[linds[75, 50]], 1.0, atol=1e-3)

Random.seed!(2021)
ngtb = gtb |> InterpolateNeighbors(grid, :z => Kriging(variogram), maxneighbors=3, neighborhood=MetricBall(100.0))
@test isapprox(ngtb.z[linds[25, 25]], 1.0, atol=1e-3)
@test isapprox(ngtb.z[linds[50, 75]], 0.0, atol=1e-3)
Expand Down
1 change: 0 additions & 1 deletion test/interpolate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
linds = LinearIndices(size(grid))
variogram = GaussianVariogram(range=35.0, nugget=0.0)

Random.seed!(2021)
ngtb = gtb |> Interpolate(grid, :z => Kriging(variogram))
@test isapprox(ngtb.z[linds[25, 25]], 1.0, atol=1e-3)
@test isapprox(ngtb.z[linds[50, 75]], 0.0, atol=1e-3)
Expand Down
2 changes: 1 addition & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ using GeoStatsImages
using TableTransforms
using CategoricalArrays
using Statistics
using Test, Random
using Test, StableRNGs
using FileIO: load
import DataScienceTraits as DST

Expand Down
2 changes: 1 addition & 1 deletion test/uniquecoords.jl
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@
end

# units and missings
sdata = georef((; T=shuffle([fill(missing, 50); rand(50)]) * u"K"), pset)
sdata = georef((; T=[fill(missing, 50); rand(50)] * u"K"), pset)
ndata = sdata |> UniqueCoords()
@test nrow(ndata) == 10
@test unit(eltype(ndata.T)) == u"K"
Expand Down

0 comments on commit e2c7898

Please sign in to comment.