From 87aa1f1c0b7e9f7e0d97d9dd069f09b2cc7902f6 Mon Sep 17 00:00:00 2001
From: Fabian Gans <fgans@bgc-jena.mpg.de>
Date: Fri, 2 Feb 2024 13:50:29 +0100
Subject: [PATCH] cont

---
 src/batchgetindex.jl | 38 +++++++++++++++-----------
 src/diskarray.jl     | 63 ++++++++++++++++++++++++++++++--------------
 2 files changed, 65 insertions(+), 36 deletions(-)

diff --git a/src/batchgetindex.jl b/src/batchgetindex.jl
index 5752529..998ad5f 100644
--- a/src/batchgetindex.jl
+++ b/src/batchgetindex.jl
@@ -1,29 +1,35 @@
-
-
-function has_chunk_gap(cs,ids)
+function has_chunk_gap(cs,ids::AbstractVector{<:Integer})
     #Find largest jump in indices
     largest_jump = foldl(ids,init=(0,first(ids))) do (largest,last),next
         largest = max(largest,next-last)
         (largest,next)
     end |> first
-    largest_jump > cs
+    largest_jump > first(cs)
 end
-function is_sparse_index(ids; density_threshold = 0.5)
-    minid, maxid = extrema(ids)
-    indexdensity = (length(ids) / (maxid - minid))
-    return indexdensity < density_threshold
+#Return true for all multidimensional indices for now, could be optimised in the future
+has_chunk_gap(cs,ids) = true
+
+#Compute the number of possible indices in the hyperrectangle
+span(v::AbstractVector{<:Integer}) = 1 -(-(extrema(v)...))
+function span(v::AbstractVector{CartesianIndex{N}}) where N
+    minind,maxind = extrema(v)
+    prod((maxind-mindind+oneunit(minind)).I)
 end
+function span(v::AbstractArray{Bool})
+    mindind,maxind = extrema(view(CartesianIndices(size(v)),v))
+    prod((maxind-mindind+oneunit(minind)).I)
+end
+#The number of indices to actually be read
+numind(v::AbstractArray{Bool}) = sum(v)
+numind(v::Union{AbstractVector{<:Integer},AbstractVector{<:CartesianIndex}})=length(v)
 
-function need_batch(a,inds) 
-    allow_multi = allow_multi_chunk_access(a)
-    cs = approx_chunksize(eachchunk(a))
-    map(cs,inds) do c, i
-        (allow_multi || has_chunk_gap(c,i)) && is_sparse_index(i)
-    end |> any
+function is_sparse_index(ids; density_threshold = 0.5)
+    indexdensity = numind(ids) / span(ids)
+    return indexdensity < density_threshold
 end
 
 # Define fallbacks for reading and writing sparse data
-function _readblock!(A::AbstractArray, A_ret, r::AbstractVector...)
+#= function _readblock!(A::AbstractArray, A_ret, r::AbstractVector...)
     if need_batch(A,r)
         # Fall back to batchgetindex to do the readblock
         A_ret .= batchgetindex(A, r...)
@@ -57,7 +63,7 @@ function _writeblock!(A::AbstractArray, A_ret, r::AbstractVector...)
         writeblock!(A, A_temp, map(:, mi, ma)...)
     end
     return nothing
-end
+end =#
 
 macro implement_batchgetindex(t)
     t = esc(t)
diff --git a/src/diskarray.jl b/src/diskarray.jl
index 8688a69..c10a9e1 100644
--- a/src/diskarray.jl
+++ b/src/diskarray.jl
@@ -38,21 +38,31 @@ Determines a list of tuples used to perform the read or write operations. The re
 - `temp_indices` indices for reading from temp array
 - `data_indices` indices for reading from data array
 """
-Base.@assume_effects :foldable resolve_indices(a, i) = _resolve_indices(eachchunk(a).chunks,i,(),(),(),(),())
-Base.@assume_effects :foldable resolve_indices(a::AbstractVector,i::Tuple{AbstractVector{<:Integer}}) = _resolve_indices(eachchunk(a).chunks,i,(),(),(),(),())
-Base.@assume_effects :foldable need_batch(a,i) = _need_batch(eachchunk(a).chunks,i)
-function _need_batch(cs, i)
-    res, csrem = need_batch_index(first(i),)
+Base.@assume_effects :foldable resolve_indices(a, i, nb=Val{false}()) = _resolve_indices(eachchunk(a).chunks,i,(),(),(),(),(),nb)
+Base.@assume_effects :foldable resolve_indices(a::AbstractVector,i::Tuple{AbstractVector{<:Integer}},nb=Val{false}()) = _resolve_indices(eachchunk(a).chunks,i,(),(),(),(),(),nb)
+Base.@assume_effects :foldable need_batch(a,i) = _need_batch(eachchunk(a).chunks,i,allow_multi_chunk_access(a))
+function _need_batch(cs, i, am)
+    nb, csrem = need_batch_index(first(i),cs,am)
+    nb ? true : _need_batch(csrem,Base.tail(i),am)
 end
-function _resolve_indices(cs,i,output_size,temp_sizes,output_indices,temp_indices,data_indices)
+_need_batch(::Tuple{},::Tuple{},_) = false
+_need_batch(::Tuple{},_,_) = false
+_need_batch(_,::Tuple{},_) = false
+need_batch_index(::Union{Integer,UnitRange,Colon},cs,_) = false, Base.tail(cs)
+function need_batch_index(i, cs,allow_multi)
+    csnow,csrem = splitcs(i,cs)
+    nb = (allow_multi || has_chunk_gap(approx_chunksize.(csnow),i)) && is_sparse_index(i)
+    nb, csrem
+end
+function _resolve_indices(cs,i,output_size,temp_sizes,output_indices,temp_indices,data_indices,nb)
     inow = first(i)
-    outsize, tempsize, outinds,tempinds,datainds,cs = process_index(inow, cs)
+    outsize, tempsize, outinds,tempinds,datainds,cs = process_index(inow, cs, nb)
     output_size = (output_size...,outsize...)
     output_indices = (output_indices...,outinds...)
     temp_sizes = (temp_sizes...,tempsize...)
     temp_indices = (temp_indices...,tempinds...)
     data_indices = (data_indices...,datainds...)
-    _resolve_indices(cs,Base.tail(i),output_size,temp_sizes,output_indices,temp_indices, data_indices)
+    _resolve_indices(cs,Base.tail(i),output_size,temp_sizes,output_indices,temp_indices, data_indices, nb)
 end
 _resolve_indices(::Tuple{},::Tuple{},output_size,temp_sizes,output_indices,temp_indices,data_indices) = output_size,temp_sizes,output_indices,temp_indices,data_indices
 #No dimension left in array, only singular indices allowed
@@ -96,6 +106,11 @@ function resolve_indices(a, i::Tuple{<:AbstractVector{<:Integer}})
     length.(i),tempsize,(Colon(),),(tempinds,),datainds
 end
 #outsize, tempsize, outinds,tempinds,datainds,cs
+process_index(i, cs, ::Val{false}) = process_index(i,cs)
+function process_index(i, cs, ::Val{true})
+    outsize, tempsize, outinds,tempinds,datainds,cs = process_index(i,cs)
+    outsize, tempsize, MultiRead(outinds), MultiRead(tempinds), MultiRead(datainds)
+end
 process_index(inow::Integer, cs) = ((), 1, (), (1,),(inow:inow,), Base.tail(cs))
 function process_index(::Colon, cs)
     s = arraysize_from_chunksize(first(cs))
@@ -104,13 +119,12 @@ end
 function process_index(i::AbstractUnitRange, cs)
     (length(i),), (length(i),), (Colon(),), (Colon(),), (i,), Base.tail(cs)
 end
-function process_index(i::AbstractVector{<:Integer}, cs)
+function process_index(i::AbstractVector{<:Integer}, cs, ::Val{false})
     indmin,indmax = extrema(i)
-    
     (length(i),), ((indmax-indmin+1),), (Colon(),), ((i.-(indmin-1)),), (indmin:indmax,), Base.tail(cs) 
 end
-function process_index(i::AbstractArray{Bool,N}, cs) where N
-    csnow, csrem = splitcs(size(i),(),cs)
+function process_index(i::AbstractArray{Bool,N}, cs, ::Val{false}) where N
+    csnow, csrem = splitcs(i,cs)
     s = arraysize_from_chunksize.(csnow)
     cindmin,cindmax = extrema(view(CartesianIndices(s),i))
     indmin,indmax = cindmin.I,cindmax.I
@@ -118,8 +132,8 @@ function process_index(i::AbstractArray{Bool,N}, cs) where N
     tempinds = view(i,range.(indmin,indmax)...)
     (sum(i),), tempsize, (Colon(),),(tempinds,), range.(indmin,indmax), csrem
 end
-function process_index(i::AbstractVector{<:CartesianIndex{N}}, cs) where N
-    csnow, csrem = splitcs(first(i).I,(),cs)
+function process_index(i::AbstractVector{<:CartesianIndex{N}}, cs, ::Val{false}) where N
+    csnow, csrem = splitcs(i,cs)
     s = arraysize_from_chunksize.(csnow)
     cindmin,cindmax = extrema(view(CartesianIndices(s),i))
     indmin,indmax = cindmin.I,cindmax.I
@@ -128,6 +142,9 @@ function process_index(i::AbstractVector{<:CartesianIndex{N}}, cs) where N
     tempinds = i .- tempoffset
     (length(i),), tempsize, (Colon(),), (tempinds,), range.(indmin,indmax), csrem
 end
+splitcs(i::AbstractVector{<:CartesianIndex},cs) = splitcs(first(i).I,(),cs)
+splitcs(i::AbstractArray{Bool},cs) = splitcs(size(i),(),cs)
+splitcs(_,cs) = (first(cs),), Base.tail(cs)
 splitcs(si,csnow,csrem) = splitcs(Base.tail(si),(csnow...,first(csrem)),Base.tail(csrem))
 splitcs(::Tuple{},csnow,csrem) = (csnow,csrem)
 
@@ -156,12 +173,18 @@ function create_outputarray(out,a,output_size)
 end
 create_outputarray(::Nothing,a,output_size) = Array{eltype(a)}(undef, output_size...)
 function getindex_disk!(out, a, i...)
-    output_size, temparray_size, output_indices, temparray_indices, data_indices = resolve_indices(a,i)
-    #@debug output_size, temparray_size, output_indices, temparray_indices, data_indices
-    outputarray = create_outputarray(out,a,output_size)
-    temparray = Array{eltype(a)}(undef, temparray_size...)  
-    readblock!(a, temparray, data_indices...)
-    transfer_results!(outputarray, temparray, output_indices, temparray_indices)
+    if need_batch(a,i)
+        output_size, temparray_size, output_indices, temparray_indices, data_indices = resolve_indices(a,i,batch=true)
+        println("Doing batch stuff")
+
+    else
+        output_size, temparray_size, output_indices, temparray_indices, data_indices = resolve_indices(a,i)
+        #@debug output_size, temparray_size, output_indices, temparray_indices, data_indices
+        outputarray = create_outputarray(out,a,output_size)
+        temparray = Array{eltype(a)}(undef, temparray_size...)  
+        readblock!(a, temparray, data_indices...)
+        transfer_results!(outputarray, temparray, output_indices, temparray_indices)
+    end
 end
 
 function transfer_results!(outputarray, temparray, output_indices, temparray_indices)