diff --git a/Project.toml b/Project.toml
index 9663ca1..5a010e9 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,19 +1,15 @@
 name = "NaNStatistics"
 uuid = "b946abbf-3ea7-4610-9019-9858bfdeaf2d"
 authors = ["C. Brenhin Keller"]
-version = "0.6.35"
+version = "0.6.36"
 
 [deps]
-IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
-LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
 PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
 Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
-VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
+StaticArrayInterface = "0d7ed370-da01-4f52-bd93-41d350b8b718"
 
 [compat]
-IfElse = "0.1"
-LoopVectorization = "0.12.113"
-VectorizationBase = "0.21.0 - 0.21.64"
 PrecompileTools = "1"
 Static = "0.8"
-julia = "1.8"
+StaticArrayInterface = "1"
+julia = "1.10"
diff --git a/src/ArrayStats/ArrayStats.jl b/src/ArrayStats/ArrayStats.jl
index f5f8a96..dc7d832 100644
--- a/src/ArrayStats/ArrayStats.jl
+++ b/src/ArrayStats/ArrayStats.jl
@@ -11,16 +11,9 @@
     ```
     Return the number of elements of `A` that are `NaN`s.
     """
-    function countnans(A::StridedArray{T}) where T<:PrimitiveNumber
-        n = 0
-        @turbo check_empty=true for i ∈ eachindex(A)
-            n += A[i]!=A[i]
-        end
-        return n
-    end
     function countnans(A)
         n = 0
-        @inbounds for i ∈ eachindex(A)
+        @inbounds @simd ivdep for i ∈ eachindex(A)
             n += A[i]!=A[i]
         end
         return n
@@ -35,7 +28,7 @@
     """
     function countnotnans(A)
         n = 0
-        @turbo check_empty=true for i ∈ eachindex(A)
+        @inbounds @simd ivdep for i ∈ eachindex(A)
             n += A[i]==A[i]
         end
         return n
@@ -57,14 +50,8 @@
     ```
     Fill a Boolean mask of dimensions `size(A)` that is false wherever `A` is `NaN`
     """
-    function nanmask!(mask::StridedArray, A::StridedArray{T}) where T<:PrimitiveNumber
-        @turbo for i ∈ eachindex(A)
-            mask[i] = A[i]==A[i]
-        end
-        return mask
-    end
     function nanmask!(mask, A)
-        @inbounds for i ∈ eachindex(A)
+        @inbounds @simd ivdep for i ∈ eachindex(A)
             mask[i] = A[i]==A[i]
         end
         return mask
@@ -81,21 +68,11 @@
     ```
     Replace all `NaN`s in A with zeros of the same type
     """
-    function zeronan!(A::StridedArray{T}) where T<:PrimitiveNumber
-        ∅ = zero(T)
-        @turbo for i ∈ eachindex(A)
-            Aᵢ = A[i]
-            A[i] = ifelse(Aᵢ==Aᵢ, Aᵢ, ∅)
-        end
-        return A
-    end
     function zeronan!(A::AbstractArray{T}) where T
         ∅ = zero(T)
-        @inbounds for i ∈ eachindex(A)
+        @inbounds @simd ivdep for i ∈ eachindex(A)
             Aᵢ = A[i]
-            if isnan(Aᵢ)
-                A[i] = ∅
-            end
+            A[i] = ifelse(Aᵢ==Aᵢ, Aᵢ, ∅)
         end
         return A
     end
@@ -153,9 +130,8 @@
     function nanadd(A::AbstractArray, B::AbstractArray)
         result_type = promote_type(eltype(A), eltype(B))
         result = similar(A, result_type)
-        @inbounds @simd for i ∈ eachindex(A)
-            Aᵢ = A[i]
-            Bᵢ = B[i]
+        @inbounds @simd ivdep for i ∈ eachindex(A,B)
+            Aᵢ, Bᵢ = A[i], B[i]
             result[i] = (Aᵢ * (Aᵢ==Aᵢ)) + (Bᵢ * (Bᵢ==Bᵢ))
         end
         return result
@@ -169,9 +145,8 @@
     Add the non-NaN elements of `B` to `A`, treating `NaN`s as zeros
     """
     function nanadd!(A::Array, B::AbstractArray)
-        @inbounds @simd for i ∈ eachindex(A)
-            Aᵢ = A[i]
-            Bᵢ = B[i]
+        @inbounds @simd for i ∈ eachindex(A,B)
+            Aᵢ, Bᵢ = A[i], B[i]
             A[i] = (Aᵢ * (Aᵢ==Aᵢ)) + (Bᵢ * (Bᵢ==Bᵢ))
         end
         return A
@@ -198,7 +173,6 @@
     __nanminimum(A, ::Colon, region) = reducedims(_nanminimum(A, region), region)
     _nanminimum(A, region) = reduce(nanmin, A, dims=region, init=float(eltype(A))(NaN))
     _nanminimum(A, ::Colon) = reduce(nanmin, A, init=float(eltype(A))(NaN))
-    _nanminimum(A::Array{<:Number}, ::Colon) = vreduce(nanmin, A)
     export nanminimum
 
 
@@ -219,7 +193,6 @@
     __nanmaximum(A, ::Colon, region) = reducedims(_nanmaximum(A, region), region)
     _nanmaximum(A, region) = reduce(nanmax, A, dims=region, init=float(eltype(A))(NaN))
     _nanmaximum(A, ::Colon) = reduce(nanmax, A, init=float(eltype(A))(NaN))
-    _nanmaximum(A::Array{<:Number}, ::Colon) = vreduce(nanmax, A)
     export nanmaximum
 
     """
@@ -312,41 +285,25 @@
         mask = nanmask(A)
         return sum(A.*W.*mask, dims=region) ./ sum(W.*mask, dims=region)
     end
-    # Fallback method for non-StridedArrays
-    function _nanmean(A, W, ::Colon)
-        n = zero(eltype(W))
-        m = zero(promote_type(eltype(W), eltype(A)))
-        @inbounds @simd for i ∈ eachindex(A)
-            Aᵢ = A[i]
-            Wᵢ = W[i]
-            t = Aᵢ == Aᵢ
-            n += Wᵢ * t
-            m += Wᵢ * Aᵢ * t
-        end
-        return m / n
-    end
     # Can't have NaNs if array is all Integers
-    function _nanmean(A::StridedArray{<:Integer}, W, ::Colon)
+    function _nanmean(A::AbstractArray{<:Integer}, W, ::Colon)
         n = zero(eltype(W))
         m = zero(promote_type(eltype(W), eltype(A)))
-        @turbo for i ∈ eachindex(A)
+        @inbounds @simd ivdep for i ∈ eachindex(A,W)
             Wᵢ = W[i]
             n += Wᵢ
             m += Wᵢ * A[i]
         end
         return m / n
     end
-    function _nanmean(A::StridedArray, W, ::Colon)
-        T1 = eltype(W)
-        T2 = promote_type(eltype(W), eltype(A))
-        n = zero(T1)
-        m = zero(T2)
-        @turbo for i ∈ eachindex(A)
-            Aᵢ = A[i]
-            Wᵢ = W[i]
+    function _nanmean(A, W, ::Colon)
+        n = ∅ₙ = zero(eltype(W))
+        m = ∅ₘ = zero(promote_type(eltype(W), eltype(A)))
+        @inbounds @simd ivdep for i ∈ eachindex(A,W)
+            Aᵢ, Wᵢ = A[i], W[i]
             t = Aᵢ==Aᵢ
-            n += ifelse(t, Wᵢ, zero(T1))
-            m += ifelse(t, Wᵢ * Aᵢ, zero(T2))
+            n += ifelse(t, Wᵢ, ∅ₙ)
+            m += ifelse(t, Wᵢ * Aᵢ, ∅ₘ)
         end
         return m / n
     end
@@ -374,23 +331,23 @@
         w = sum(W.*mask, dims=region)
         s = sum(A.*W.*mask, dims=region) ./ w
         d = A .- s # Subtract mean, using broadcasting
-        @turbo for i ∈ eachindex(d)
+        @inbounds @simd ivdep for i ∈ eachindex(d, W)
             dᵢ = d[i]
             d[i] = (dᵢ * dᵢ * W[i]) * mask[i]
         end
         s .= sum(d, dims=region)
-        @turbo for i ∈ eachindex(s)
+        @inbounds @simd ivdep for i ∈ eachindex(s,n,w)
             s[i] = sqrt((s[i] * n[i]) / (w[i] * (n[i] - 1)))
         end
         return s
     end
     function _nanstd(A, W, ::Colon)
+        @assert eachindex(A) == eachindex(W)
         n = 0
         w = zero(eltype(W))
         m = zero(promote_type(eltype(W), eltype(A)))
-        @inbounds @simd for i ∈ eachindex(A)
-            Aᵢ = A[i]
-            Wᵢ = W[i]
+        @inbounds @simd ivdep for i ∈ eachindex(A,W)
+            Aᵢ, Wᵢ = A[i], W[i]
             t = Aᵢ == Aᵢ
             n += t
             w += Wᵢ * t
@@ -398,36 +355,13 @@
         end
         mu = m / w
         s = zero(typeof(mu))
-        @inbounds @simd for i ∈ eachindex(A)
+        @inbounds @simd ivdep for i ∈ eachindex(A,W)
             Aᵢ = A[i]
             d = Aᵢ - mu
             s += (d * d * W[i]) * (Aᵢ == Aᵢ) # Zero if Aᵢ is NaN
         end
         return sqrt(s / w * n / (n-1))
     end
-    function _nanstd(A::StridedArray{Ta}, W::StridedArray{Tw}, ::Colon) where {Ta<:PrimitiveNumber, Tw<:PrimitiveNumber}
-        n = 0
-        Tm = promote_type(Tw, Ta)
-        w = zero(Tw)
-        m = zero(Tm)
-        @turbo for i ∈ eachindex(A)
-            Aᵢ = A[i]
-            Wᵢ = W[i]
-            t = Aᵢ==Aᵢ
-            n += t
-            w += ifelse(t, Wᵢ,  zero(Tw))
-            m += ifelse(t, Wᵢ * Aᵢ, zero(Tm))
-        end
-        mu = m / w
-        Tmu = typeof(mu)
-        s = zero(Tmu)
-        @turbo for i ∈ eachindex(A)
-            Aᵢ = A[i]
-            d = Aᵢ - mu
-            s += ifelse(Aᵢ==Aᵢ, d * d * W[i], zero(Tmu))
-        end
-        return sqrt(s / w * n / (n-1))
-    end
     export nanstd
 
 
diff --git a/src/ArrayStats/nancov.jl b/src/ArrayStats/nancov.jl
index 6f09d95..c668949 100644
--- a/src/ArrayStats/nancov.jl
+++ b/src/ArrayStats/nancov.jl
@@ -2,7 +2,7 @@ function _nancov(x::AbstractVector, y::AbstractVector, corrected::Bool, μᵪ::N
     # Calculate covariance
     σᵪᵧ = ∅ = zero(promote_type(typeof(μᵪ), typeof(μᵧ), Int))
     n = 0
-    @turbo check_empty=true for i ∈ eachindex(x,y)
+    @inbounds @simd ivdep for i ∈ eachindex(x,y)
             δᵪ = x[i] - μᵪ
             δᵧ = y[i] - μᵧ
             δ² = δᵪ * δᵧ
@@ -19,7 +19,7 @@ function _nancov(x::AbstractVector, y::AbstractVector, corrected::Bool)
     n = 0
     Σᵪ = ∅ᵪ = zero(eltype(x))
     Σᵧ = ∅ᵧ = zero(eltype(y))
-    @turbo check_empty=true for i ∈ eachindex(x,y)
+    @inbounds @simd ivdep for i ∈ eachindex(x,y)
         xᵢ, yᵢ = x[i], y[i]
         notnan = (xᵢ==xᵢ) & (yᵢ==yᵢ)
         n += notnan
@@ -103,49 +103,13 @@ function nancor(x::AbstractVector, y::AbstractVector; corrected::Bool=true)
     @assert eachindex(x) == eachindex(y)
     return _nancor(x, y, corrected)
 end
-
 # Pair-wise nan-covariance
-function _nancor(x::StridedVector{T}, y::StridedVector{T}, corrected::Bool) where T<:PrimitiveNumber
+function _nancor(x::AbstractVector{Tx}, y::AbstractVector{Ty}, corrected::Bool) where {Tx, Ty}
     # Parwise nan-means
     n = 0
-    Σᵪ = ∅ᵪ = zero(T)
-    Σᵧ = ∅ᵧ = zero(T)
-    @turbo check_empty=true for i ∈ eachindex(x,y)
-        xᵢ, yᵢ = x[i], y[i]
-        notnan = (xᵢ==xᵢ) & (yᵢ==yᵢ)
-        n += notnan
-        Σᵪ += ifelse(notnan, xᵢ, ∅ᵪ)
-        Σᵧ += ifelse(notnan, yᵢ, ∅ᵧ)
-    end
-    μᵪ = Σᵪ/n
-    μᵧ = Σᵧ/n
-    n == 0 && return (∅ᵪ+∅ᵧ)/0 # Return appropriate NaN if no data
-
-    # Pairwise nan-variances
-    σ²ᵪ = ∅ᵪ = zero(typeof(μᵪ))
-    σ²ᵧ = ∅ᵧ = zero(typeof(μᵧ))
-    @turbo check_empty=true for i ∈ eachindex(x,y)
-        δᵪ = x[i] - μᵪ
-        δᵧ = y[i] - μᵧ
-        notnan = (δᵪ==δᵪ) & (δᵧ==δᵧ)
-        σ²ᵪ += ifelse(notnan, δᵪ * δᵪ, ∅ᵪ)
-        σ²ᵧ += ifelse(notnan, δᵧ * δᵧ, ∅ᵧ)
-    end
-    σᵪ = sqrt(σ²ᵪ / max(n-corrected, 0))
-    σᵧ = sqrt(σ²ᵧ / max(n-corrected, 0))
-
-    # Covariance and correlation
-    σᵪᵧ = _nancov(x, y, corrected, μᵪ, μᵧ)
-    ρᵪᵧ = σᵪᵧ / (σᵪ * σᵧ)
-
-    return ρᵪᵧ
-end
-function _nancor(x::AbstractVector, y::AbstractVector, corrected::Bool)
-    # Parwise nan-means
-    n = 0
-    Σᵪ = ∅ᵪ = zero(eltype(x))
-    Σᵧ = ∅ᵧ = zero(eltype(y))
-    @inbounds for i ∈ eachindex(x,y)
+    Σᵪ = ∅ᵪ = zero(Tx)
+    Σᵧ = ∅ᵧ = zero(Ty)
+    @inbounds @simd ivdep for i ∈ eachindex(x,y)
         xᵢ, yᵢ = x[i], y[i]
         notnan = (xᵢ==xᵢ) & (yᵢ==yᵢ)
         n += notnan
@@ -159,7 +123,7 @@ function _nancor(x::AbstractVector, y::AbstractVector, corrected::Bool)
     # Pairwise nan-variances
     σ²ᵪ = ∅ᵪ = zero(typeof(μᵪ))
     σ²ᵧ = ∅ᵧ = zero(typeof(μᵧ))
-    @inbounds for i ∈ eachindex(x,y)
+    @inbounds @simd ivdep for i ∈ eachindex(x,y)
         δᵪ = x[i] - μᵪ
         δᵧ = y[i] - μᵧ
         notnan = (δᵪ==δᵪ) & (δᵧ==δᵧ)
diff --git a/src/ArrayStats/nanmean.jl b/src/ArrayStats/nanmean.jl
index 4ec214e..48238fe 100644
--- a/src/ArrayStats/nanmean.jl
+++ b/src/ArrayStats/nanmean.jl
@@ -50,11 +50,11 @@ function _nanmean(A::AbstractArray{T,N}, dims::Tuple) where {T,N}
 end
 
 # Reduce all the dims!
-function _nanmean(A::StridedArray{T}, ::Colon) where T<:PrimitiveFloat
-    Tₒ = Base.promote_op(/, T, Int)
+function _nanmean(A, ::Colon)
+    Tₒ = Base.promote_op(/, eltype(A), Int)
     n = 0
     Σ = ∅ = zero(Tₒ)
-    @turbo check_empty=true for i ∈ eachindex(A)
+    @inbounds @simd ivdep for i ∈ eachindex(A)
         Aᵢ = A[i]
         notnan = Aᵢ==Aᵢ
         n += notnan
@@ -62,33 +62,20 @@ function _nanmean(A::StridedArray{T}, ::Colon) where T<:PrimitiveFloat
     end
     return Σ / n
 end
-function _nanmean(A::StridedArray{T}, ::Colon) where T<:PrimitiveInteger
+function _nanmean(A::AbstractArray{T}, ::Colon) where T<:Integer
     Tₒ = Base.promote_op(/, T, Int)
     Σ = zero(Tₒ)
-    @turbo check_empty=true for i ∈ eachindex(A)
+    @inbounds @simd ivdep for i ∈ eachindex(A)
         Σ += A[i]
     end
     return Σ / length(A)
 end
-# Fallback method for non-StridedArrays
-function _nanmean(A, ::Colon)
-    Tₒ = Base.promote_op(/, eltype(A), Int)
-    n = 0
-    Σ = ∅ = zero(Tₒ)
-    @inbounds for i ∈ eachindex(A)
-        Aᵢ = A[i]
-        notnan = Aᵢ==Aᵢ
-        n += notnan
-        Σ += ifelse(notnan, A[i], ∅)
-    end
-    return Σ / n
-end
 
 
 # Metaprogramming magic adapted from Chris Elrod example:
 # Generate customized set of loops for a given ndims and a vector
 # `static_dims` of dimensions to reduce over
-function staticdim_nanmean_quote_turbo(static_dims::Vector{Int}, N::Int)
+function staticdim_nanmean_quote(static_dims::Vector{Int}, N::Int)
   M = length(static_dims)
   # `static_dims` now contains every dim we're taking the mean over.
   Bᵥ = Expr(:call, :view, :B)
@@ -129,111 +116,11 @@ function staticdim_nanmean_quote_turbo(static_dims::Vector{Int}, N::Int)
   # Build the reduction loop
   for n ∈ reduct_inds
     newblock = Expr(:block)
-    push!(block.args, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock))
-    block = newblock
-  end
-  # Push more things here if you want them in the innermost loop
-  push!(block.args, :(Aᵢ = $Aind))
-  push!(block.args, :(notnan = Aᵢ==Aᵢ))
-  push!(block.args, :(n += notnan))
-  push!(block.args, :(Σ += ifelse(notnan, Aᵢ, ∅)))
-  # Push more things here if you want them at the end of the reduction loop
-  push!(rblock.args, :($Bind = Σ * inv(n)))
-  # Put it all together
-  quote
-    ∅ = zero(eltype(B))
-    Bᵥ = $Bᵥ
-    @turbo $loops
-    return B
-  end
-end
-
-# Chris Elrod metaprogramming magic:
-# Turn non-static integers in `dims` tuple into `StaticInt`s
-# so we can construct `static_dims` vector within @generated code
-function branches_nanmean_quote_turbo(N::Int, M::Int, D)
-  static_dims = Int[]
-  for m ∈ 1:M
-    param = D.parameters[m]
-    if param <: StaticInt
-      new_dim = _dim(param)::Int
-      @assert new_dim ∉ static_dims
-      push!(static_dims, new_dim)
+    if n==last(reduct_inds)
+      push!(block.args, Expr(:macrocall, Symbol("@simd"), :ivdep, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock)))
     else
-      t = Expr(:tuple)
-      for n ∈ static_dims
-        push!(t.args, :(StaticInt{$n}()))
-      end
-      q = Expr(:block, :(dimm = dims[$m]))
-      qold = q
-      ifsym = :if
-      for n ∈ 1:N
-        n ∈ static_dims && continue
-        tc = copy(t)
-        push!(tc.args, :(StaticInt{$n}()))
-        qnew = Expr(ifsym, :(dimm == $n), :(return _nanmean!(B, A, $tc)))
-        for r ∈ m+1:M
-          push!(tc.args, :(dims[$r]))
-        end
-        push!(qold.args, qnew)
-        qold = qnew
-        ifsym = :elseif
-      end
-      push!(qold.args, Expr(:block, :(throw("Dimension `$dimm` not found."))))
-      return q
-    end
-  end
-  staticdim_nanmean_quote_turbo(static_dims, N)
-end
-
-# Efficient @generated in-place mean
-@generated function _nanmean!(B::StridedArray{Tₒ,N}, A::StridedArray{T,N}, dims::D) where {Tₒ<:PrimitiveNumber,T,N,M,D<:Tuple{Vararg{IntOrStaticInt,M}}}
-  N == M && return :(B[1] = _nanmean(A, :); B)
-  branches_nanmean_quote_turbo(N, M, D)
-end
-
-function staticdim_nanmean_quote(static_dims::Vector{Int}, N::Int)
-  M = length(static_dims)
-  # `static_dims` now contains every dim we're taking the mean over.
-  Bᵥ = Expr(:call, :view, :B)
-  reduct_inds = Int[]
-  nonreduct_inds = Int[]
-  # Firstly, build our expressions for indexing each array
-  Aind = :(A[])
-  Bind = :(Bᵥ[])
-  inds = Vector{Symbol}(undef, N)
-  for n ∈ 1:N
-    ind = Symbol(:i_,n)
-    inds[n] = ind
-    push!(Aind.args, ind)
-    if n ∈ static_dims
-      push!(reduct_inds, n)
-      push!(Bᵥ.args, :(firstindex(B,$n)))
-    else
-      push!(nonreduct_inds, n)
-      push!(Bᵥ.args, :)
-      push!(Bind.args, ind)
-    end
-  end
-  firstn = first(nonreduct_inds)
-  # Secondly, build up our set of loops
-  block = Expr(:block)
-  loops = Expr(:for, :($(inds[firstn]) = indices((A,B),$firstn)), block)
-  if length(nonreduct_inds) > 1
-    for n ∈ @view(nonreduct_inds[2:end])
-      newblock = Expr(:block)
-      push!(block.args, Expr(:for, :($(inds[n]) = indices((A,B),$n)), newblock))
-      block = newblock
+      push!(block.args, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock))
     end
-  end
-  rblock = block
-  # Push more things here if you want them at the beginning of the reduction loop
-  push!(rblock.args, :(n = 0))
-  push!(rblock.args, :(Σ = ∅))
-  # Build the reduction loop
-  for n ∈ reduct_inds
-    newblock = Expr(:block)
-    push!(block.args, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock))
     block = newblock
   end
   # Push more things here if you want them in the innermost loop
@@ -252,6 +139,8 @@ function staticdim_nanmean_quote(static_dims::Vector{Int}, N::Int)
   end
 end
 
+# Turn non-static integers in `dims` tuple into `StaticInt`s
+# so we can construct `static_dims` vector within @generated code
 function branches_nanmean_quote(N::Int, M::Int, D)
   static_dims = Int[]
   for m ∈ 1:M
@@ -287,6 +176,7 @@ function branches_nanmean_quote(N::Int, M::Int, D)
   staticdim_nanmean_quote(static_dims, N)
 end
 
+# Efficient @generated in-place mean
 @generated function _nanmean!(B::AbstractArray{Tₒ,N}, A::AbstractArray{T,N}, dims::D) where {Tₒ,T,N,M,D<:Tuple{Vararg{IntOrStaticInt,M}}}
   N == M && return :(B[1] = _nanmean(A, :); B)
   branches_nanmean_quote(N, M, D)
diff --git a/src/ArrayStats/nansem.jl b/src/ArrayStats/nansem.jl
index c0d86af..64b51a8 100644
--- a/src/ArrayStats/nansem.jl
+++ b/src/ArrayStats/nansem.jl
@@ -45,60 +45,40 @@ _nansem(μ, corrected::Bool, A, dims::Int) = _nansem(μ, corrected, A, (dims,))
 # If the mean isn't known, compute it
 _nansem(::Nothing, corrected::Bool, A, dims::Tuple) = _nansem!(_nanmean(A, dims), corrected, A, dims)
 # Reduce all the dims!
-function _nansem(::Nothing, corrected::Bool, A::StridedArray{T}, ::Colon) where T<:PrimitiveFloat
-    Tₒ = Base.promote_op(/, T, Int)
-    n = 0
-    Σ = ∅ = zero(Tₒ)
-    @turbo check_empty=true for i ∈ eachindex(A)
-        Aᵢ = A[i]
-        notnan = Aᵢ==Aᵢ
-        n += notnan
-        Σ += ifelse(notnan, Aᵢ, ∅)
-    end
-    μ = Σ / n
-    σ² = ∅ = zero(typeof(μ))
-    @turbo check_empty=true for i ∈ eachindex(A)
-        δ = A[i] - μ
-        notnan = δ==δ
-        σ² += ifelse(notnan, δ * δ, ∅)
-    end
-    return sqrt(σ² / max(n-corrected,0) / n)
+function _nansem(::Nothing, corrected::Bool, A, ::Colon)
+  Tₒ = Base.promote_op(/, eltype(A), Int)
+  n = 0
+  Σ = ∅ = zero(Tₒ)
+  @inbounds @simd ivdep for i ∈ eachindex(A)
+      Aᵢ = A[i]
+      notnan = Aᵢ==Aᵢ
+      n += notnan
+      Σ += ifelse(notnan, Aᵢ, ∅)
+  end
+  μ = Σ / n
+  σ² = ∅ = zero(typeof(μ))
+  @inbounds @simd ivdep for i ∈ eachindex(A)
+      δ = A[i] - μ
+      notnan = δ==δ
+      σ² += ifelse(notnan, δ * δ, ∅)
+  end
+  return sqrt(σ² / max(n-corrected,0) / n)
 end
-function _nansem(::Nothing, corrected::Bool, A::StridedArray{T}, ::Colon) where T<:PrimitiveInteger
+function _nansem(::Nothing, corrected::Bool, A::AbstractArray{T}, ::Colon) where T<:Integer
     Tₒ = Base.promote_op(/, T, Int)
     n = length(A)
     Σ = zero(Tₒ)
-    @turbo check_empty=true for i ∈ eachindex(A)
+    @inbounds @simd ivdep for i ∈ eachindex(A)
         Σ += A[i]
     end
     μ = Σ / n
     σ² = zero(typeof(μ))
-    @turbo check_empty=true for i ∈ eachindex(A)
+    @inbounds @simd ivdep for i ∈ eachindex(A)
         δ = A[i] - μ
         σ² += δ * δ
     end
     return sqrt(σ² / max(n-corrected,0) / n)
 end
-# Fallback method for non-StridedArrays
-function _nansem(::Nothing, corrected::Bool, A, ::Colon)
-    Tₒ = Base.promote_op(/, eltype(A), Int)
-    n = 0
-    Σ = ∅ = zero(Tₒ)
-    @inbounds for i ∈ eachindex(A)
-        Aᵢ = A[i]
-        notnan = Aᵢ==Aᵢ
-        n += notnan
-        Σ += ifelse(notnan, Aᵢ, ∅)
-    end
-    μ = Σ / n
-    σ² = ∅ = zero(typeof(μ))
-    @inbounds for i ∈ eachindex(A)
-        δ = A[i] - μ
-        notnan = δ==δ
-        σ² += ifelse(notnan, δ * δ, ∅)
-    end
-    return sqrt(σ² / max(n-corrected,0) / n)
-end
 
 
 # If the mean is known, pass it on in the appropriate form
@@ -106,21 +86,21 @@ _nansem(μ, corrected::Bool, A, dims::Tuple) = _nansem!(collect(μ), corrected,
 _nansem(μ::Array, corrected::Bool, A, dims::Tuple) = _nansem!(copy(μ), corrected, A, dims)
 _nansem(μ::Number, corrected::Bool, A, dims::Tuple) = _nansem!([μ], corrected, A, dims)
 # Reduce all the dims!
-function _nansem(μ::Number, corrected::Bool, A::StridedArray{T}, ::Colon) where T<:PrimitiveFloat
-    n = 0
-    σ² = ∅ = zero(typeof(μ))
-    @turbo check_empty=true for i ∈ eachindex(A)
-        δ = A[i] - μ
-        notnan = δ==δ
-        n += notnan
-        σ² += ifelse(notnan, δ * δ, ∅)
-    end
-    return sqrt(σ² / max(n-corrected, 0) / n)
+function _nansem(μ::Number, corrected::Bool, A, ::Colon)
+  n = 0
+  σ² = ∅ = zero(typeof(μ))
+  @inbounds @simd ivdep for i ∈ eachindex(A)
+      δ = A[i] - μ
+      notnan = δ==δ
+      n += notnan
+      σ² += ifelse(notnan, δ * δ, ∅)
+  end
+  return sqrt(σ² / max(n-corrected, 0) / n)
 end
-function _nansem(μ::Number, corrected::Bool, A::StridedArray{T}, ::Colon) where T<:PrimitiveInteger
+function _nansem(μ::Number, corrected::Bool, A::AbstractArray{T}, ::Colon) where T<:Integer
     σ² = zero(typeof(μ))
     if μ==μ
-        @turbo check_empty=true for i ∈ eachindex(A)
+        @inbounds @simd ivdep for i ∈ eachindex(A)
             δ = A[i] - μ
             σ² += δ * δ
         end
@@ -130,36 +110,6 @@ function _nansem(μ::Number, corrected::Bool, A::StridedArray{T}, ::Colon) where
     end
     return sqrt(σ² / max(n-corrected, 0) / n)
 end
-# Fallback method for non-strided-arrays
-function _nansem(μ::Number, corrected::Bool, A, ::Colon)
-    n = 0
-    σ² = ∅ = zero(typeof(μ))
-    @inbounds for i ∈ eachindex(A)
-        δ = A[i] - μ
-        notnan = δ==δ
-        n += notnan
-        σ² += ifelse(notnan, δ * δ, ∅)
-    end
-    return sqrt(σ² / max(n-corrected, 0) / n)
-end
-
-# # Fallback method for overly-complex reductions
-# function _nansem_fallback!(B::AbstractArray, corrected::Bool, A::AbstractArray,region)
-#     mask = nanmask(A)
-#     N = sum(mask, dims=region)
-#     Σ = sum(A.*mask, dims=region)./N
-#     δ = A .- Σ # Subtract mean, using broadcasting
-#     @turbo check_empty=true for i ∈ eachindex(δ)
-#         δᵢ = δ[i]
-#         δ[i] = ifelse(mask[i], δᵢ * δᵢ, 0)
-#     end
-#     B .= sum(δ, dims=region)
-#     @turbo check_empty=true for i ∈ eachindex(B)
-#         B[i] = B[i] / max(N[i] - corrected, 0)
-#     end
-#     return B
-# end
-
 
 
 function staticdim_nansem_quote(static_dims::Vector{Int}, N::Int)
diff --git a/src/ArrayStats/nanstd.jl b/src/ArrayStats/nanstd.jl
index 147ef67..6b75e8a 100644
--- a/src/ArrayStats/nanstd.jl
+++ b/src/ArrayStats/nanstd.jl
@@ -36,14 +36,8 @@ nanstd(A; dims=:, dim=:, mean=nothing, corrected=true) = sqrt!(__nanvar(mean, co
 export nanstd
 
 sqrt!(x::Number) = sqrt(x)
-function sqrt!(A::StridedArray{<:PrimitiveNumber})
-    @turbo check_empty=true for i ∈ eachindex(A)
-        A[i] = sqrt(A[i])
-    end
-    return A
-end
 function sqrt!(A::AbstractArray)
-    @inbounds for i ∈ eachindex(A)
+    @inbounds @simd ivdep for i ∈ eachindex(A)
         A[i] = sqrt(A[i])
     end
     return A
diff --git a/src/ArrayStats/nansum.jl b/src/ArrayStats/nansum.jl
index 022e430..0e8e4a7 100644
--- a/src/ArrayStats/nansum.jl
+++ b/src/ArrayStats/nansum.jl
@@ -50,41 +50,30 @@ function _nansum(A::AbstractArray{T,N}, dims::Tuple) where {T,N}
 end
 
 # Reduce all the dims!
-function _nansum(A::StridedArray{T}, ::Colon) where T<:PrimitiveFloat
-    Tₒ = Base.promote_op(+, T, Int)
+function _nansum(A, ::Colon)
+    Tₒ = Base.promote_op(+, eltype(A), Int)
     Σ = ∅ = zero(Tₒ)
-    @turbo check_empty=true for i ∈ eachindex(A)
+    @inbounds @simd ivdep for i ∈ eachindex(A)
         Aᵢ = A[i]
         notnan = Aᵢ==Aᵢ
         Σ += ifelse(notnan, Aᵢ, ∅)
     end
     return Σ
 end
-function _nansum(A::StridedArray{T}, ::Colon) where T<:PrimitiveInteger
+function _nansum(A::AbstractArray{T}, ::Colon) where T<:Integer
     Tₒ = Base.promote_op(+, T, Int)
     Σ = zero(Tₒ)
-    @turbo check_empty=true for i ∈ eachindex(A)
+    @inbounds @simd ivdep for i ∈ eachindex(A)
         Σ += A[i]
     end
     return Σ
 end
-# Fallback method for non-StridedArrays
-function _nansum(A, ::Colon)
-    Tₒ = Base.promote_op(+, eltype(A), Int)
-    Σ = ∅ = zero(Tₒ)
-    @inbounds for i ∈ eachindex(A)
-        Aᵢ = A[i]
-        notnan = Aᵢ==Aᵢ
-        Σ += ifelse(notnan, Aᵢ, ∅)
-    end
-    return Σ
-end
 
 
 # Metaprogramming magic adapted from Chris Elrod example:
 # Generate customized set of loops for a given ndims and a vector
 # `static_dims` of dimensions to reduce over
-function staticdim_nansum_quote_turbo(static_dims::Vector{Int}, N::Int)
+function staticdim_nansum_quote(static_dims::Vector{Int}, N::Int)
   M = length(static_dims)
   # `static_dims` now contains every dim we're taking the sum over.
   Bᵥ = Expr(:call, :view, :B)
@@ -124,110 +113,11 @@ function staticdim_nansum_quote_turbo(static_dims::Vector{Int}, N::Int)
   # Build the reduction loop
   for n ∈ reduct_inds
     newblock = Expr(:block)
-    push!(block.args, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock))
-    block = newblock
-  end
-  # Push more things here if you want them in the innermost loop
-  push!(block.args, :(Aᵢ = $Aind))
-  push!(block.args, :(notnan = Aᵢ==Aᵢ))
-  push!(block.args, :(Σ += ifelse(notnan, Aᵢ, ∅)))
-  # Push more things here if you want them at the end of the reduction loop
-  push!(rblock.args, :($Bind = Σ))
-  # Put it all together
-  quote
-    ∅ = zero(eltype(B))
-    Bᵥ = $Bᵥ
-    @turbo $loops
-    return B
-  end
-end
-
-# Chris Elrod metaprogramming magic:
-# Turn non-static integers in `dims` tuple into `StaticInt`s
-# so we can construct `static_dims` vector within @generated code
-function branches_nansum_quote_turbo(N::Int, M::Int, D)
-  static_dims = Int[]
-  for m ∈ 1:M
-    param = D.parameters[m]
-    if param <: StaticInt
-      new_dim = _dim(param)::Int
-      @assert new_dim ∉ static_dims
-      push!(static_dims, new_dim)
+    if n==last(reduct_inds)
+      push!(block.args, Expr(:macrocall, Symbol("@simd"), :ivdep, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock)))
     else
-      t = Expr(:tuple)
-      for n ∈ static_dims
-        push!(t.args, :(StaticInt{$n}()))
-      end
-      q = Expr(:block, :(dimm = dims[$m]))
-      qold = q
-      ifsym = :if
-      for n ∈ 1:N
-        n ∈ static_dims && continue
-        tc = copy(t)
-        push!(tc.args, :(StaticInt{$n}()))
-        qnew = Expr(ifsym, :(dimm == $n), :(return _nansum!(B, A, $tc)))
-        for r ∈ m+1:M
-          push!(tc.args, :(dims[$r]))
-        end
-        push!(qold.args, qnew)
-        qold = qnew
-        ifsym = :elseif
-      end
-      push!(qold.args, Expr(:block, :(throw("Dimension `$dimm` not found."))))
-      return q
-    end
-  end
-  staticdim_nansum_quote_turbo(static_dims, N)
-end
-
-# Efficient @generated in-place sum
-@generated function _nansum!(B::StridedArray{Tₒ,N}, A::StridedArray{T,N}, dims::D) where {Tₒ<:PrimitiveNumber,T,N,M,D<:Tuple{Vararg{IntOrStaticInt,M}}}
-  N == M && return :(B[1] = _nansum(A, :); B)
-  branches_nansum_quote_turbo(N, M, D)
-end
-
-
-function staticdim_nansum_quote(static_dims::Vector{Int}, N::Int)
-  M = length(static_dims)
-  # `static_dims` now contains every dim we're taking the sum over.
-  Bᵥ = Expr(:call, :view, :B)
-  reduct_inds = Int[]
-  nonreduct_inds = Int[]
-  # Firstly, build our expressions for indexing each array
-  Aind = :(A[])
-  Bind = :(Bᵥ[])
-  inds = Vector{Symbol}(undef, N)
-  for n ∈ 1:N
-    ind = Symbol(:i_,n)
-    inds[n] = ind
-    push!(Aind.args, ind)
-    if n ∈ static_dims
-      push!(reduct_inds, n)
-      push!(Bᵥ.args, :(firstindex(B,$n)))
-    else
-      push!(nonreduct_inds, n)
-      push!(Bᵥ.args, :)
-      push!(Bind.args, ind)
+      push!(block.args, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock))
     end
-  end
-  firstn = first(nonreduct_inds)
-  # Secondly, build up our set of loops
-  block = Expr(:block)
-  loops = Expr(:for, :($(inds[firstn]) = indices((A,B),$firstn)), block)
-  if length(nonreduct_inds) > 1
-    for n ∈ @view(nonreduct_inds[2:end])
-      newblock = Expr(:block)
-      push!(block.args, Expr(:for, :($(inds[n]) = indices((A,B),$n)), newblock))
-      block = newblock
-    end
-  end
-  rblock = block
-  # Push more things here if you want them at the beginning of the reduction loop
-  push!(rblock.args, :(Σ = ∅))
-  # Build the reduction loop
-  for n ∈ reduct_inds
-    newblock = Expr(:block)
-    push!(block.args, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock))
     block = newblock
   end
   # Push more things here if you want them in the innermost loop
@@ -245,6 +135,8 @@ function staticdim_nansum_quote(static_dims::Vector{Int}, N::Int)
   end
 end
 
+# Turn non-static integers in `dims` tuple into `StaticInt`s
+# so we can construct `static_dims` vector within @generated code
 function branches_nansum_quote(N::Int, M::Int, D)
   static_dims = Int[]
   for m ∈ 1:M
diff --git a/src/ArrayStats/nanvar.jl b/src/ArrayStats/nanvar.jl
index 0cbdc6c..12a68cc 100644
--- a/src/ArrayStats/nanvar.jl
+++ b/src/ArrayStats/nanvar.jl
@@ -45,11 +45,11 @@ _nanvar(μ, corrected::Bool, A, dims::Int) = _nanvar(μ, corrected, A, (dims,))
 # If the mean isn't known, compute it
 _nanvar(::Nothing, corrected::Bool, A, dims::Tuple) = _nanvar!(_nanmean(A, dims), corrected, A, dims)
 # Reduce all the dims!
-function _nanvar(::Nothing, corrected::Bool, A::StridedArray{T}, ::Colon) where T<:PrimitiveFloat
-    Tₒ = Base.promote_op(/, T, Int)
+function _nanvar(::Nothing, corrected::Bool, A, ::Colon)
+    Tₒ = Base.promote_op(/, eltype(A), Int)
     n = 0
     Σ = ∅ = zero(Tₒ)
-    @turbo check_empty=true for i ∈ eachindex(A)
+    @inbounds @simd ivdep for i ∈ eachindex(A)
         Aᵢ = A[i]
         notnan = Aᵢ==Aᵢ
         n += notnan
@@ -57,48 +57,28 @@ function _nanvar(::Nothing, corrected::Bool, A::StridedArray{T}, ::Colon) where
     end
     μ = Σ / n
     σ² = ∅ = zero(typeof(μ))
-    @turbo check_empty=true for i ∈ eachindex(A)
+    @inbounds @simd ivdep for i ∈ eachindex(A)
         δ = A[i] - μ
         notnan = δ==δ
         σ² += ifelse(notnan, δ * δ, ∅)
     end
     return σ² / max(n-corrected,0)
 end
-function _nanvar(::Nothing, corrected::Bool, A::StridedArray{T}, ::Colon) where T<:PrimitiveInteger
+function _nanvar(::Nothing, corrected::Bool, A::AbstractArray{T}, ::Colon) where T<:Integer
     Tₒ = Base.promote_op(/, T, Int)
     n = length(A)
     Σ = zero(Tₒ)
-    @turbo check_empty=true for i ∈ eachindex(A)
+    @inbounds @simd ivdep for i ∈ eachindex(A)
         Σ += A[i]
     end
     μ = Σ / n
     σ² = zero(typeof(μ))
-    @turbo check_empty=true for i ∈ eachindex(A)
+    @inbounds @simd ivdep for i ∈ eachindex(A)
         δ = A[i] - μ
         σ² += δ * δ
     end
     return σ² / max(n-corrected,0)
 end
-# Fallback method for non-StridedArrays
-function _nanvar(::Nothing, corrected::Bool, A, ::Colon)
-    Tₒ = Base.promote_op(/, eltype(A), Int)
-    n = 0
-    Σ = ∅ = zero(Tₒ)
-    @inbounds for i ∈ eachindex(A)
-        Aᵢ = A[i]
-        notnan = Aᵢ==Aᵢ
-        n += notnan
-        Σ += ifelse(notnan, Aᵢ, ∅)
-    end
-    μ = Σ / n
-    σ² = ∅ = zero(typeof(μ))
-    @inbounds for i ∈ eachindex(A)
-        δ = A[i] - μ
-        notnan = δ==δ
-        σ² += ifelse(notnan, δ * δ, ∅)
-    end
-    return σ² / max(n-corrected,0)
-end
 
 
 # If the mean is known, pass it on in the appropriate form
@@ -106,21 +86,21 @@ _nanvar(μ, corrected::Bool, A, dims::Tuple) = _nanvar!(collect(μ), corrected,
 _nanvar(μ::Array, corrected::Bool, A, dims::Tuple) = _nanvar!(copy(μ), corrected, A, dims)
 _nanvar(μ::Number, corrected::Bool, A, dims::Tuple) = _nanvar!([μ], corrected, A, dims)
 # Reduce all the dims!
-function _nanvar(μ::Number, corrected::Bool, A::StridedArray{T}, ::Colon) where T<:PrimitiveFloat
-    n = 0
-    σ² = ∅ = zero(typeof(μ))
-    @turbo check_empty=true for i ∈ eachindex(A)
-        δ = A[i] - μ
-        notnan = δ==δ
-        n += notnan
-        σ² += ifelse(notnan, δ * δ, ∅)
-    end
-    return σ² / max(n-corrected, 0)
+function _nanvar(μ::Number, corrected::Bool, A, ::Colon)
+  n = 0
+  σ² = ∅ = zero(typeof(μ))
+  @inbounds @simd ivdep for i ∈ eachindex(A)
+      δ = A[i] - μ
+      notnan = δ==δ
+      n += notnan
+      σ² += ifelse(notnan, δ * δ, ∅)
+  end
+  return σ² / max(n-corrected, 0)
 end
-function _nanvar(μ::Number, corrected::Bool, A::StridedArray{T}, ::Colon) where T<:PrimitiveInteger
+function _nanvar(μ::Number, corrected::Bool, A::AbstractArray{T}, ::Colon) where T<:Integer
     σ² = zero(typeof(μ))
     if μ==μ
-        @turbo check_empty=true for i ∈ eachindex(A)
+        @inbounds @simd ivdep for i ∈ eachindex(A)
             δ = A[i] - μ
             σ² += δ * δ
         end
@@ -130,41 +110,11 @@ function _nanvar(μ::Number, corrected::Bool, A::StridedArray{T}, ::Colon) where
     end
     return σ² / max(n-corrected, 0)
 end
-# Fallback method for non-strided-arrays
-function _nanvar(μ::Number, corrected::Bool, A, ::Colon)
-    n = 0
-    σ² = ∅ = zero(typeof(μ))
-    @inbounds for i ∈ eachindex(A)
-        δ = A[i] - μ
-        notnan = δ==δ
-        n += notnan
-        σ² += ifelse(notnan, δ * δ, ∅)
-    end
-    return σ² / max(n-corrected, 0)
-end
-
-# # Fallback method for overly-complex reductions
-# function _nanvar_fallback!(B::AbstractArray, corrected::Bool, A::AbstractArray,region)
-#     mask = nanmask(A)
-#     N = sum(mask, dims=region)
-#     Σ = sum(A.*mask, dims=region)./N
-#     δ = A .- Σ # Subtract mean, using broadcasting
-#     @turbo check_empty=true for i ∈ eachindex(δ)
-#         δᵢ = δ[i]
-#         δ[i] = ifelse(mask[i], δᵢ * δᵢ, 0)
-#     end
-#     B .= sum(δ, dims=region)
-#     @turbo check_empty=true for i ∈ eachindex(B)
-#         B[i] = B[i] / max(N[i] - corrected, 0)
-#     end
-#     return B
-# end
-
 
 # Metaprogramming magic adapted from Chris Elrod example:
 # Generate customized set of loops for a given ndims and a vector
 # `static_dims` of dimensions to reduce over
-function staticdim_nanvar_quote_turbo(static_dims::Vector{Int}, N::Int)
+function staticdim_nanvar_quote(static_dims::Vector{Int}, N::Int)
   M = length(static_dims)
   # `static_dims` now contains every dim we're taking the var over.
   Bᵥ = Expr(:call, :view, :B)
@@ -206,120 +156,11 @@ function staticdim_nanvar_quote_turbo(static_dims::Vector{Int}, N::Int)
   # Build the reduction loop
   for n ∈ reduct_inds
     newblock = Expr(:block)
-    push!(block.args, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock))
-    block = newblock
-  end
-  # Push more things here if you want them in the innermost loop
-  push!(block.args, :(δ = $Aind - μ))
-  push!(block.args, :(notnan = δ==δ))
-  push!(block.args, :(n += notnan))
-  push!(block.args, :(σ² += ifelse(notnan, δ * δ, ∅)))
-  # Push more things here if you want them at the end of the reduction loop
-  push!(rblock.args, :($Bind = σ² * inv(max(n-corrected,0))))
-
-  # Put it all together
-  quote
-    ∅ = zero(eltype(B))
-    Bᵥ = $Bᵥ
-    @turbo $loops
-    return B
-  end
-end
-
-# Chris Elrod metaprogramming magic:
-# Turn non-static integers in `dims` tuple into `StaticInt`s
-# so we can construct `static_dims` vector within @generated code
-function branches_nanvar_quote_turbo(N::Int, M::Int, D)
-  static_dims = Int[]
-  for m ∈ 1:M
-    param = D.parameters[m]
-    if param <: StaticInt
-      new_dim = _dim(param)::Int
-      @assert new_dim ∉ static_dims
-      push!(static_dims, new_dim)
+    if n==last(reduct_inds)
+      push!(block.args, Expr(:macrocall, Symbol("@simd"), :ivdep, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock)))
     else
-      t = Expr(:tuple)
-      for n ∈ static_dims
-        push!(t.args, :(StaticInt{$n}()))
-      end
-      q = Expr(:block, :(dimm = dims[$m]))
-      qold = q
-      ifsym = :if
-      for n ∈ 1:N
-        n ∈ static_dims && continue
-        tc = copy(t)
-        push!(tc.args, :(StaticInt{$n}()))
-        qnew = Expr(ifsym, :(dimm == $n), :(return _nanvar!(B, corrected, A, $tc)))
-        for r ∈ m+1:M
-          push!(tc.args, :(dims[$r]))
-        end
-        push!(qold.args, qnew)
-        qold = qnew
-        ifsym = :elseif
-      end
-      push!(qold.args, Expr(:block, :(throw("Dimension `$dimm` not found."))))
-      return q
-    end
-  end
-  staticdim_nanvar_quote_turbo(static_dims, N)
-end
-
-# Efficient @generated in-place var
-@generated function _nanvar!(B::StridedArray{Tₒ,N}, corrected::Bool, A::StridedArray{T,N}, dims::D) where {Tₒ<:PrimitiveNumber,T,N,M,D<:Tuple{Vararg{IntOrStaticInt,M}}}
-  N == M && return :(B[1] = _nanvar(B[1], corrected, A, :); B)
-  # total_combinations = binomial(N,M)
-  # if total_combinations > 6
-  #   # Fallback, for overly-complex reductions
-  #   return :(_nanvar_fallback!(B, corrected, A, dims))
-  # else
-    branches_nanvar_quote_turbo(N, M, D)
-  # end
-end
-
-
-function staticdim_nanvar_quote(static_dims::Vector{Int}, N::Int)
-  M = length(static_dims)
-  # `static_dims` now contains every dim we're taking the var over.
-  Bᵥ = Expr(:call, :view, :B)
-  reduct_inds = Int[]
-  nonreduct_inds = Int[]
-  # Firstly, build our expressions for indexing each array
-  Aind = :(A[])
-  Bind = :(Bᵥ[])
-  inds = Vector{Symbol}(undef, N)
-  for n ∈ 1:N
-    ind = Symbol(:i_,n)
-    inds[n] = ind
-    push!(Aind.args, ind)
-    if n ∈ static_dims
-      push!(reduct_inds, n)
-      push!(Bᵥ.args, :(firstindex(B,$n)))
-    else
-      push!(nonreduct_inds, n)
-      push!(Bᵥ.args, :)
-      push!(Bind.args, ind)
+      push!(block.args, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock))
     end
-  end
-  firstn = first(nonreduct_inds)
-  # Secondly, build up our set of loops
-  block = Expr(:block)
-  loops = Expr(:for, :($(inds[firstn]) = indices((A,B),$firstn)), block)
-  if length(nonreduct_inds) > 1
-    for n ∈ @view(nonreduct_inds[2:end])
-      newblock = Expr(:block)
-      push!(block.args, Expr(:for, :($(inds[n]) = indices((A,B),$n)), newblock))
-      block = newblock
-    end
-  end
-  rblock = block
-  # Push more things here if you want them at the beginning of the reduction loop
-  push!(rblock.args, :(μ = $Bind))
-  push!(rblock.args, :(n = 0))
-  push!(rblock.args, :(σ² = ∅))
-  # Build the reduction loop
-  for n ∈ reduct_inds
-    newblock = Expr(:block)
-    push!(block.args, Expr(:for, :($(inds[n]) = axes(A,$n)), newblock))
     block = newblock
   end
   # Push more things here if you want them in the innermost loop
@@ -339,6 +180,8 @@ function staticdim_nanvar_quote(static_dims::Vector{Int}, N::Int)
   end
 end
 
+# Turn non-static integers in `dims` tuple into `StaticInt`s
+# so we can construct `static_dims` vector within @generated code
 function branches_nanvar_quote(N::Int, M::Int, D)
   static_dims = Int[]
   for m ∈ 1:M
diff --git a/src/NaNStatistics.jl b/src/NaNStatistics.jl
index 6399f42..4c15cf0 100644
--- a/src/NaNStatistics.jl
+++ b/src/NaNStatistics.jl
@@ -1,16 +1,11 @@
 module NaNStatistics
 
-    # AVX vectorziation tools
-    using IfElse: ifelse
-    using LoopVectorization
-
     using Static
     const IntOrStaticInt = Union{Integer, StaticInt}
-    const PrimitiveFloat = Union{Float16, Float32, Float64}
-    const PrimitiveInteger = Union{Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128}
-    const PrimitiveNumber = Union{PrimitiveFloat, PrimitiveInteger}
     _dim(::Type{StaticInt{N}}) where {N} = N::Int
 
+    using StaticArrayInterface: indices
+
     include("ArrayStats/ArrayStats.jl")
     include("ArrayStats/nanmean.jl")
     include("ArrayStats/nansum.jl")
diff --git a/src/Sorting/nanmedian.jl b/src/Sorting/nanmedian.jl
index 7968e9a..9be0aa6 100644
--- a/src/Sorting/nanmedian.jl
+++ b/src/Sorting/nanmedian.jl
@@ -108,20 +108,20 @@ function _nanmedian!(A::AbstractArray{T}, ::Colon) where {T}
     n < 2 && return float(T)(A[iₗ])
     i½ = (iₗ + iᵤ) ÷ 2
     if iseven(n)
-        if n < 384
+        Aᵢ₋, Aᵢ₊ =  if n < 384
             quicksort!(A, iₗ, iᵤ)
+            A[i½], A[i½+1]
         else
-            quickselect!(A, iₗ, iᵤ, i½)
-            quickselect!(A, i½+1, iᵤ, i½+1)
+            quickselect!(A, iₗ, iᵤ, i½), quickselect!(A, iₗ, iᵤ, i½+1)
         end
-        return (A[i½] + A[i½+1]) / 2
+        return float(T)(0.5*Aᵢ₋ + 0.5*Aᵢ₊)
     else
         if n < 192
             quicksort!(A, iₗ, iᵤ)
         else
             quickselect!(A, iₗ, iᵤ, i½)
         end
-        return A[i½] / 1
+        return float(T)(A[i½])
     end
 end
 
diff --git a/src/Sorting/nanpctile.jl b/src/Sorting/nanpctile.jl
index d3271bf..3b8db57 100644
--- a/src/Sorting/nanpctile.jl
+++ b/src/Sorting/nanpctile.jl
@@ -182,14 +182,14 @@ function _nanquantile!(A::AbstractArray{T}, q::Real, ::Colon) where {T}
     iₚ = q*n₋ + iₗ
     iₚ₋ = floor(Int, iₚ)
     iₚ₊ = ceil(Int, iₚ)
-    if n₋ < 384
+    f = iₚ - iₚ₋
+    Aᵢ₋, Aᵢ₊ = if n₋ < 384
         quicksort!(A, iₗ, iᵤ)
+        A[iₚ₋], A[iₚ₊]
     else
-        quickselect!(A, iₗ, iᵤ, iₚ₋)
-        quickselect!(A, iₚ₊, iᵤ, iₚ₊)
+        quickselect!(A, iₗ, iᵤ, iₚ₋), quickselect!(A, iₗ, iᵤ, iₚ₊)
     end
-    f = iₚ - iₚ₋
-    return f*A[iₚ₊] + (1-f)*A[iₚ₋]
+    return f*Aᵢ₊ + (1-f)*Aᵢ₋
 end
 
 # Generate customized set of loops for a given ndims and a vector
diff --git a/src/Sorting/quicksort.jl b/src/Sorting/quicksort.jl
index e353641..2f740ff 100644
--- a/src/Sorting/quicksort.jl
+++ b/src/Sorting/quicksort.jl
@@ -5,7 +5,7 @@ function sortnans!(A, iₗ=firstindex(A), iᵤ=lastindex(A))
 
     # Count up NaNs
     Nₙₐₙ = 0
-    @turbo for i = iₗ:iᵤ
+    @inbounds @simd ivdep for i = iₗ:iᵤ
         Nₙₐₙ += A[i] != A[i]
     end
     # If none, return early
@@ -61,7 +61,7 @@ end
             A[𝔦ₗ], A[𝔦ᵤ] = A[𝔦ᵤ], A[𝔦ₗ]
         end
     else
-        @turbo for i ∈ 0:n
+        @inbounds @simd ivdep for i ∈ 0:n
             𝔦ₗ = iₗ+i
             𝔦ᵤ = iᵤ-i
             l = A[𝔦ₗ]
@@ -115,7 +115,7 @@ function quicksort!(A, iₗ=firstindex(A), iᵤ=lastindex(A))
 
         # Count up elements that must be moved to upper partition
         Nᵤ = 0
-        @turbo for i = (iₗ+1):iᵤ
+        @inbounds @simd ivdep for i = (iₗ+1):iᵤ
             Nᵤ += A[i] >= pivot
         end
         Nₗ = N - Nᵤ