Skip to content

Commit

Permalink
Allow indexing with nd arrays of integers and CartesianIndex (#151)
Browse files Browse the repository at this point in the history
* Allow indexing with nd arrays of integers and cart

* simplify
  • Loading branch information
meggart authored Feb 21, 2024
1 parent a48787c commit c817c5f
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 32 deletions.
1 change: 0 additions & 1 deletion src/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ end
_reverse1(a) = _reverse(a, 1)
function _reverse1(a, start::Int, stop::Int)
inds = [firstindex(a):start-1; stop:-1:start; stop+1:lastindex(a)]
@show inds
return view(a, inds)
end

Expand Down
33 changes: 21 additions & 12 deletions src/batchgetindex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ end
has_chunk_gap(cs,ids) = true

#Compute the number of possible indices in the hyperrectangle
span(v::AbstractVector{<:Integer}) = 1 -(-(extrema(v)...))
function span(v::AbstractVector{CartesianIndex{N}}) where N
span(v::AbstractArray{<:Integer}) = 1 -(-(extrema(v)...))
function span(v::AbstractArray{CartesianIndex{N}}) where N
minind,maxind = extrema(v)
prod((maxind-minind+oneunit(minind)).I)
end
Expand All @@ -78,7 +78,7 @@ function span(v::AbstractArray{Bool})
end
#The number of indices to actually be read
numind(v::AbstractArray{Bool}) = sum(v)
numind(v::Union{AbstractVector{<:Integer},AbstractVector{<:CartesianIndex}})=length(v)
numind(v::Union{AbstractArray{<:Integer},AbstractArray{<:CartesianIndex}})=length(v)

function is_sparse_index(ids; density_threshold = 0.5)
indexdensity = numind(ids) / span(ids)
Expand All @@ -91,16 +91,17 @@ function process_index(i, cs, strategy::Union{ChunkRead,SubRanges})
end


function process_index(i::AbstractVector{<:Integer}, cs, ::ChunkRead)
function process_index(i::AbstractArray{<:Integer,N}, cs, ::ChunkRead) where N
csnow = first(cs)
chunksdict = Dict{Int,Vector{Pair{Int,Int}}}()
chunksdict = Dict{Int,Vector{Pair{Int,CartesianIndex{N}}}}()
# Look for affected chunks
for (outindex,dataindex) in enumerate(i)
for outindex in CartesianIndices(i)
dataindex = i[outindex]
cI = findchunk(csnow,dataindex)
a = get!(()->Pair{Int,Int}[],chunksdict,cI)
push!(a,(dataindex=>outindex))
end
tempinds,datainds,outinds = Tuple{Vector{Int}}[], Tuple{UnitRange{Int}}[], Tuple{Vector{Int}}[]
tempinds,datainds,outinds = Tuple{Vector{Int}}[], Tuple{UnitRange{Int}}[], Tuple{Vector{CartesianIndex{N}}}[]
maxtempind = -1
for (cI,a) in chunksdict
dataind = extrema(first,a)
Expand All @@ -110,7 +111,7 @@ function process_index(i::AbstractVector{<:Integer}, cs, ::ChunkRead)
push!(tempinds, (tempind,))
maxtempind = max(maxtempind,maximum(tempind))
end
(length(i),), ((maxtempind),), (outinds,), (tempinds,), (datainds,), Base.tail(cs)
size(i), ((maxtempind),), (outinds,), (tempinds,), (datainds,), Base.tail(cs)
end

function find_subranges_sorted(inds,allow_steprange=false)
Expand Down Expand Up @@ -154,9 +155,17 @@ function find_subranges_sorted(inds,allow_steprange=false)
rangelist, outputinds
end

#For index arrays >1D we need to store the cartesian indices in the sort
#perm result
function mysortperm(i)
p = collect(vec(CartesianIndices(i)))
sort!(p;by=Base.Fix1(getindex,i))
p
end
mysortperm(i::AbstractVector) = sortperm(i)
##Implement NCDatasets behavior of splitting list of indices into ranges
function process_index(i::AbstractVector{<:Integer}, cs, s::SubRanges)
if issorted(i)
function process_index(i::AbstractArray{<:Integer,N}, cs, s::SubRanges) where N
if i isa AbstractVector && issorted(i)
rangelist, outputinds = find_subranges_sorted(i,allow_steprange(s))
datainds = tuple.(rangelist)
tempinds = map(rangelist,outputinds) do rl,oi
Expand All @@ -168,7 +177,7 @@ function process_index(i::AbstractVector{<:Integer}, cs, s::SubRanges)
tempsize = maximum(length(rangelist))
(length(i),), (tempsize,), (outinds,), (tempinds,), (datainds,), Base.tail(cs)
else
p = sortperm(i)
p = mysortperm(i)
i_sorted = view(i,p)
rangelist, outputinds = find_subranges_sorted(i_sorted,allow_steprange(s))
datainds = tuple.(rangelist)
Expand All @@ -181,7 +190,7 @@ function process_index(i::AbstractVector{<:Integer}, cs, s::SubRanges)
(view(p,oi),)
end
tempsize = maximum(length(rangelist))
(length(i),), (tempsize,), (outinds,), (tempinds,), (datainds,), Base.tail(cs)
size(i), (tempsize,), (outinds,), (tempinds,), (datainds,), Base.tail(cs)
end
end
function process_index(i::AbstractArray{Bool,N}, cs, cr::ChunkRead) where N
Expand Down
28 changes: 11 additions & 17 deletions src/diskarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -87,22 +87,15 @@ _resolve_indices(::Tuple{}, ::Tuple{}, output_size, temp_sizes, output_indices,
#No dimension left in array, only singular indices allowed
function _resolve_indices(::Tuple{}, i, output_size, temp_sizes, output_indices, temp_indices, data_indices, nb)
inow = first(i)
if inow isa Integer
inow == 1 || throw(ArgumentError("Trailing indices must be 1"))
_resolve_indices((), Base.tail(i), output_size, temp_sizes, output_indices, temp_indices, data_indices, nb)
elseif inow isa AbstractVector
(length(inow) == 1 && first(inow) == 1) || throw(ArgumentError("Trailing indices must be 1"))
output_size = (output_size..., 1)
output_indices = (output_indices..., 1)
_resolve_indices((), Base.tail(i), output_size, temp_sizes, output_indices, temp_indices, data_indices, nb)
else
throw(ArgumentError("Trailing indices must be 1"))
end
(length(inow) == 1 && only(inow) == 1) || throw(ArgumentError("Trailing indices must be 1"))
output_size = (output_size..., size(inow)...)
output_indices = (output_indices..., size(inow)...)
_resolve_indices((), Base.tail(i), output_size, temp_sizes, output_indices, temp_indices, data_indices, nb)
end
#Still dimensions left, but no indices available
function _resolve_indices(cs, ::Tuple{}, output_size, temp_sizes, output_indices, temp_indices, data_indices, nb)
csnow = first(cs)
arraysize_from_chunksize(csnow) == 1 || throw(ArgumentError("Wrong indexing"))
arraysize_from_chunksize(csnow) == 1 || throw(ArgumentError("Indices can only be omitted for trailing singleton dimensions"))
data_indices = (data_indices..., 1:1)
temp_sizes = (temp_sizes..., 1)
temp_indices = (temp_indices..., 1)
Expand All @@ -120,9 +113,9 @@ end
function process_index(i::AbstractUnitRange, cs)
(length(i),), (length(i),), (Colon(),), (Colon(),), (i,), Base.tail(cs)
end
function process_index(i::AbstractVector{<:Integer}, cs, ::NoBatch)
function process_index(i::AbstractArray{<:Integer}, cs, ::NoBatch)
indmin, indmax = extrema(i)
(length(i),), ((indmax - indmin + 1),), (Colon(),), ((i .- (indmin - 1)),), (indmin:indmax,), Base.tail(cs)
size(i), ((indmax - indmin + 1),), map(_->Colon(),size(i)), ((i .- (indmin - 1)),), (indmin:indmax,), Base.tail(cs)
end
function process_index(i::AbstractArray{Bool,N}, cs, ::NoBatch) where {N}
csnow, csrem = splitcs(i, cs)
Expand All @@ -133,22 +126,23 @@ function process_index(i::AbstractArray{Bool,N}, cs, ::NoBatch) where {N}
tempinds = view(i, range.(indmin, indmax)...)
(sum(i),), tempsize, (Colon(),), (tempinds,), range.(indmin, indmax), csrem
end
function process_index(i::AbstractVector{<:CartesianIndex{N}}, cs, ::NoBatch) where {N}
function process_index(i::AbstractArray{<:CartesianIndex{N}}, cs, ::NoBatch) where {N}
csnow, csrem = splitcs(i, cs)
s = arraysize_from_chunksize.(csnow)
cindmin, cindmax = extrema(view(CartesianIndices(s), i))
indmin, indmax = cindmin.I, cindmax.I
tempsize = indmax .- indmin .+ 1
tempoffset = cindmin - oneunit(cindmin)
tempinds = i .- tempoffset
(length(i),), tempsize, (Colon(),), (tempinds,), range.(indmin, indmax), csrem
outinds = map(_->Colon(),size(i))
size(i), tempsize, outinds, (tempinds,), range.(indmin, indmax), csrem
end
function process_index(i::CartesianIndices{N}, cs, ::NoBatch) where {N}
_, csrem = splitcs(i, cs)
cols = map(_ -> Colon(), i.indices)
length.(i.indices), length.(i.indices), cols, cols, i.indices, csrem
end
splitcs(i::AbstractVector{<:CartesianIndex}, cs) = splitcs(first(i).I, (), cs)
splitcs(i::AbstractArray{<:CartesianIndex}, cs) = splitcs(first(i).I, (), cs)
splitcs(i::AbstractArray{Bool}, cs) = splitcs(size(i), (), cs)
splitcs(i::CartesianIndices, cs) = splitcs(i.indices, (), cs)
splitcs(_, cs) = (first(cs),), Base.tail(cs)
Expand Down
41 changes: 39 additions & 2 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ end
@testset "Getindex/Setindex with vectors" begin
a = AccessCountDiskArray(reshape(1:20, 4, 5, 1); chunksize=(4, 1, 1))
@test a[:, [1, 4], 1] == trueparent(a)[:, [1, 4], 1]
@test_broken getindex_count(a) == 2
@test getindex_count(a) == 1
coords = CartesianIndex.([(1, 1, 1), (3, 1, 1), (2, 4, 1), (4, 4, 1)])
@test a[coords] == trueparent(a)[coords]
@test_broken getindex_count(a) == 4
Expand Down Expand Up @@ -497,6 +497,8 @@ end
a_inner = rand(100)
inds_sorted = [1,1,1,3,5,6,6,7,10,13,16,16,19,20]
inds_unsorted = [7, 5, 1, 16, 1, 10, 20, 6, 19, 1, 13, 6, 3, 16]
inds_sorted_matrix = reshape(inds_sorted,7,2)
inds_unsorted_matrix = reshape(inds_unsorted,7,2)
a = AccessCountDiskArray(a_inner,chunksize=(10,),batchstrategy=DiskArrays.ChunkRead(NoStepRange(),0.5));
b1 = a[inds_sorted];
@test b1 == a_inner[inds_sorted]
Expand All @@ -505,6 +507,17 @@ end
b2 = a[inds_unsorted]
@test b2 == a_inner[inds_unsorted]
@test getindex_log(a) == [(1:20,)]
empty!(a.getindex_log)
b3 = a[inds_sorted_matrix];
@test size(b3) == size(a_inner[inds_sorted_matrix])
@test b3 == a_inner[inds_sorted_matrix]
@test getindex_log(a) == [(1:20,)]
empty!(a.getindex_log)
b4 = a[inds_unsorted_matrix]
@test b4 == a_inner[inds_unsorted_matrix]
@test getindex_log(a) == [(1:20,)]
empty!(a.getindex_log)


a = AccessCountDiskArray(a_inner,chunksize=(5,),batchstrategy=DiskArrays.ChunkRead(CanStepRange(),0.8));
b1 = a[inds_sorted];
Expand All @@ -514,6 +527,14 @@ end
b2 = a[inds_unsorted]
@test b2 == a_inner[inds_unsorted]
@test sort(getindex_log(a)) == [(1:5,), (6:10,), (13:13,), (16:20,)]
empty!(a.getindex_log)
b3 = a[inds_sorted_matrix];
@test b3 == a_inner[inds_sorted_matrix]
@test sort(getindex_log(a)) == [(1:5,), (6:10,), (13:13,), (16:20,)]
empty!(a.getindex_log)
b4 = a[inds_unsorted_matrix]
@test b4 == a_inner[inds_unsorted_matrix]
@test sort(getindex_log(a)) == [(1:5,), (6:10,), (13:13,), (16:20,)]


a = AccessCountDiskArray(a_inner,chunksize=(10,),batchstrategy=DiskArrays.SubRanges(CanStepRange(),0.5));
Expand All @@ -533,6 +554,14 @@ end
b2 = a[inds_unsorted]
@test b2 == a_inner[inds_unsorted]
@test sort(getindex_log(a)) == [(1:2:5,), (6:7,), (10:3:19,), (20:20,)]
empty!(a.getindex_log)
b3 = a[inds_sorted_matrix];
@test b3 == a_inner[inds_sorted_matrix]
@test sort(getindex_log(a)) == [(1:2:5,), (6:7,), (10:3:19,), (20:20,)]
empty!(a.getindex_log)
b4 = a[inds_unsorted_matrix]
@test b4 == a_inner[inds_unsorted_matrix]
@test sort(getindex_log(a)) == [(1:2:5,), (6:7,), (10:3:19,), (20:20,)]

a = AccessCountDiskArray(a_inner,chunksize=(5,),batchstrategy=DiskArrays.SubRanges(NoStepRange(),0.8));
b1 = a[inds_sorted];
Expand All @@ -542,6 +571,14 @@ end
b2 = a[inds_unsorted]
@test b2 == a_inner[inds_unsorted]
@test sort(getindex_log(a)) == [(1:1,), (3:3,), (5:7,), (10:10,), (13:13,), (16:16,), (19:20,)]
empty!(a.getindex_log)
b3 = a[inds_sorted_matrix];
@test b3 == a_inner[inds_sorted_matrix]
@test sort(getindex_log(a)) == [(1:1,), (3:3,), (5:7,), (10:10,), (13:13,), (16:16,), (19:20,)]
empty!(a.getindex_log)
b4 = a[inds_unsorted_matrix]
@test b4 == a_inner[inds_unsorted_matrix]
@test sort(getindex_log(a)) == [(1:1,), (3:3,), (5:7,), (10:10,), (13:13,), (16:16,), (19:20,)]
end

@testset "generator" begin
Expand Down Expand Up @@ -580,7 +617,7 @@ end

@test collect(reverse(a_disk)) == reverse(a)
@test reverse(view(a_disk, :, 1)) == reverse(a[:, 1])
@test reverse(view(a_disk, :, 1), 1) == reverse(a[:, 1], 1)
@test_broken reverse(view(a_disk, :, 1), 1) == reverse(a[:, 1], 1)
# ERROR: ArgumentError: Can only subset chunks for sorted indices
@test_broken reverse(view(a_disk, :, 1), 5) == reverse(a[:, 1], 5)
@test_broken reverse(view(a_disk, :, 1), 5, 10) == reverse(a[:, 1], 5, 10)
Expand Down

0 comments on commit c817c5f

Please sign in to comment.