FixedSizeArrays.jl Showcase #62
Replies: 3 comments
-
Adding vectors of
|
Beta Was this translation helpful? Give feedback.
-
Reshaping
|
Beta Was this translation helpful? Give feedback.
-
Reduced allocations in a function with side effectsjulia> using FixedSizeArrays, BenchmarkTools, Random
julia> function f(T=Vector{Float64})
v = T(undef, 8)
rand!(v)
return length(v)
end
f (generic function with 2 methods)
julia> @code_llvm debuginfo=:none f(Vector{Float64}) ; Function Signature: f(Type{Array{Float64, 1}})
define i64 @julia_f_1330() local_unnamed_addr #0 {
top:
%gcframe1 = alloca [3 x ptr], align 16
call void @llvm.memset.p0.i64(ptr align 16 %gcframe1, i8 0, i64 24, i1 true)
%thread_ptr = call ptr asm "movq %fs:0, $0", "=r"() #12
%tls_ppgcstack = getelementptr inbounds i8, ptr %thread_ptr, i64 -8
%tls_pgcstack = load ptr, ptr %tls_ppgcstack, align 8
store i64 4, ptr %gcframe1, align 8
%frame.prev = getelementptr inbounds ptr, ptr %gcframe1, i64 1
%task.gcstack = load ptr, ptr %tls_pgcstack, align 8
store ptr %task.gcstack, ptr %frame.prev, align 8
store ptr %gcframe1, ptr %tls_pgcstack, align 8
%ptls_field = getelementptr inbounds i8, ptr %tls_pgcstack, i64 16
%ptls_load = load ptr, ptr %ptls_field, align 8
%"Memory{Float64}[]" = call noalias nonnull align 8 dereferenceable(96) ptr @ijl_gc_small_alloc(ptr %ptls_load, i32 600, i32 96, i64 139898984951520) #8
%"Memory{Float64}[].tag_addr" = getelementptr inbounds i64, ptr %"Memory{Float64}[]", i64 -1
store atomic i64 139898984951520, ptr %"Memory{Float64}[].tag_addr" unordered, align 8
%memory_ptr = getelementptr inbounds { i64, ptr }, ptr %"Memory{Float64}[]", i64 0, i32 1
%memory_data = getelementptr inbounds i8, ptr %"Memory{Float64}[]", i64 16
store ptr %memory_data, ptr %memory_ptr, align 8
store i64 8, ptr %"Memory{Float64}[]", align 8
%gc_slot_addr_0 = getelementptr inbounds ptr, ptr %gcframe1, i64 2
store ptr %"Memory{Float64}[]", ptr %gc_slot_addr_0, align 8
%ptls_load16 = load ptr, ptr %ptls_field, align 8
%"new::Array" = call noalias nonnull align 8 dereferenceable(32) ptr @ijl_gc_small_alloc(ptr %ptls_load16, i32 408, i32 32, i64 139899057726624) #8
%"new::Array.tag_addr" = getelementptr inbounds i64, ptr %"new::Array", i64 -1
store atomic i64 139899057726624, ptr %"new::Array.tag_addr" unordered, align 8
%0 = getelementptr inbounds i8, ptr %"new::Array", i64 8
store ptr %memory_data, ptr %"new::Array", align 8
store ptr %"Memory{Float64}[]", ptr %0, align 8
%"new::Array.size_ptr" = getelementptr inbounds i8, ptr %"new::Array", i64 16
store i64 8, ptr %"new::Array.size_ptr", align 8
store ptr %"new::Array", ptr %gc_slot_addr_0, align 8
%1 = call i64 @j_xoshiro_bulk_simd_1334(ptr nonnull %memory_data, i64 signext 64)
%2 = sub i64 64, %1
%3 = getelementptr i8, ptr %memory_data, i64 %1
%4 = icmp eq i64 %1, 64
br i1 %4, label %L26, label %L24
L24: ; preds = %top
store ptr %"new::Array", ptr %gc_slot_addr_0, align 8
call void @j_xoshiro_bulk_nosimd_1335(ptr %3, i64 signext %2)
br label %L26
L26: ; preds = %L24, %top
%"new::Array.size6.0.copyload" = load i64, ptr %"new::Array.size_ptr", align 8
%frame.prev20 = load ptr, ptr %frame.prev, align 8
store ptr %frame.prev20, ptr %tls_pgcstack, align 8
ret i64 %"new::Array.size6.0.copyload"
} julia> @code_llvm debuginfo=:none f(FixedSizeVectorDefault{Float64}) ; Function Signature: f(Type{FixedSizeArrays.FixedSizeArray{Float64, 1, Memory{Float64}}})
define i64 @julia_f_1347() local_unnamed_addr #0 {
top:
%gcframe1 = alloca [4 x ptr], align 16
call void @llvm.memset.p0.i64(ptr align 16 %gcframe1, i8 0, i64 32, i1 true)
%0 = getelementptr inbounds ptr, ptr %gcframe1, i64 3
%1 = getelementptr inbounds ptr, ptr %gcframe1, i64 2
%"new::FixedSizeArray" = alloca [2 x i64], align 8
%sret_box = alloca [2 x i64], align 8
%thread_ptr = call ptr asm "movq %fs:0, $0", "=r"() #11
%tls_ppgcstack = getelementptr inbounds i8, ptr %thread_ptr, i64 -8
%tls_pgcstack = load ptr, ptr %tls_ppgcstack, align 8
store i64 8, ptr %gcframe1, align 8
%frame.prev = getelementptr inbounds ptr, ptr %gcframe1, i64 1
%task.gcstack = load ptr, ptr %tls_pgcstack, align 8
store ptr %task.gcstack, ptr %frame.prev, align 8
store ptr %gcframe1, ptr %tls_pgcstack, align 8
%ptls_field = getelementptr inbounds i8, ptr %tls_pgcstack, i64 16
%ptls_load = load ptr, ptr %ptls_field, align 8
%"Memory{Float64}[]" = call noalias nonnull align 8 dereferenceable(96) ptr @ijl_gc_small_alloc(ptr %ptls_load, i32 600, i32 96, i64 139898984951520) #7
%"Memory{Float64}[].tag_addr" = getelementptr inbounds i64, ptr %"Memory{Float64}[]", i64 -1
store atomic i64 139898984951520, ptr %"Memory{Float64}[].tag_addr" unordered, align 8
%memory_ptr = getelementptr inbounds { i64, ptr }, ptr %"Memory{Float64}[]", i64 0, i32 1
%memory_data = getelementptr inbounds i8, ptr %"Memory{Float64}[]", i64 16
store ptr %memory_data, ptr %memory_ptr, align 8
store i64 8, ptr %"Memory{Float64}[]", align 8
%2 = getelementptr inbounds i8, ptr %"new::FixedSizeArray", i64 8
store i64 8, ptr %2, align 8
store ptr %"Memory{Float64}[]", ptr %0, align 8
call void @"j_rand!_1350"(ptr noalias nocapture noundef nonnull sret({ ptr, [1 x i64] }) %sret_box, ptr noalias nocapture noundef nonnull %1, ptr nocapture nonnull readonly %"new::FixedSizeArray", ptr nocapture nonnull readonly %0)
%frame.prev3 = load ptr, ptr %frame.prev, align 8
store ptr %frame.prev3, ptr %tls_pgcstack, align 8
ret i64 8
} julia> @benchmark f(Vector{Float64})
BenchmarkTools.Trial: 10000 samples with 992 evaluations per sample.
Range (min … max): 36.956 ns … 3.587 μs ┊ GC (min … max): 0.00% … 98.49%
Time (median): 37.688 ns ┊ GC (median): 0.00%
Time (mean ± σ): 43.966 ns ± 66.210 ns ┊ GC (mean ± σ): 11.74% ± 9.13%
█▂ ▁
███▇▅▄▃▆▁▅▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▄▃▃▁▁▁▃▁▁▁▁▃▁▁▁▁▁▁▁▁▁▃▁▁▁▁▁▁▁▁▃▅ █
37 ns Histogram: log(frequency) by time 445 ns <
Memory estimate: 128 bytes, allocs estimate: 2.
julia> @benchmark f(FixedSizeVectorDefault{Float64})
BenchmarkTools.Trial: 10000 samples with 999 evaluations per sample.
Range (min … max): 11.305 ns … 2.407 μs ┊ GC (min … max): 0.00% … 98.80%
Time (median): 12.714 ns ┊ GC (median): 0.00%
Time (mean ± σ): 16.228 ns ± 50.026 ns ┊ GC (mean ± σ): 16.37% ± 6.04%
▄▂ ▄█▃
██████▅▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▂▂▂▁▁▂▁▂▁▁▁▁▁▁▂▁▁▁▁▁▂▂▂▂▂▂▂▂ ▃
11.3 ns Histogram: frequency by time 33.3 ns <
Memory estimate: 96 bytes, allocs estimate: 1.
julia> versioninfo()
Julia Version 1.13.0-DEV.111
Commit 58399e22ad0 (2025-02-25 00:18 UTC)
Build Info:
Official https://julialang.org release
Platform Info:
OS: Linux (x86_64-linux-gnu)
CPU: 22 × Intel(R) Core(TM) Ultra 7 155H
WORD_SIZE: 64
LLVM: libLLVM-18.1.7 (ORCJIT, alderlake)
GC: Built with stock GC
Threads: 1 default, 1 interactive, 1 GC (on 22 virtual cores) This isn't much different from other similar examples above: the return value is fully statically inferred, the memory allocations aren't fully removed but they're reduced compared to |
Beta Was this translation helpful? Give feedback.
-
I'm starting this discussion to collect examples of how
FixedSizeArray
can be better (smaller generated code and/or faster) than standardArray
. I'll start with the example in theREADME.md
Simple function which can constant-propagate the size of the array
julia> code_llvm(h)
julia> versioninfo() Julia Version 1.12.0-DEV.1082 Commit 58c7186d19* (2024-08-22 02:53 UTC) Platform Info: OS: macOS (arm64-apple-darwin23.4.0) CPU: 8 × Apple M1 WORD_SIZE: 64 LLVM: libLLVM-18.1.7 (ORCJIT, apple-m1) Threads: 1 default, 0 interactive, 1 GC (on 4 virtual cores)
Beta Was this translation helpful? Give feedback.
All reactions