From 01f6029c12d72c4f3f9153c858ad045a7deb4b31 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 7 Nov 2023 14:58:22 +0000 Subject: [PATCH 1/3] Add API call for Arm64 Sve.LoadVectorNonFaulting --- src/coreclr/jit/CMakeLists.txt | 1 + src/coreclr/jit/emitarm64.cpp | 34 +++++++ src/coreclr/jit/emitarm64.h | 3 + src/coreclr/jit/gentree.cpp | 5 +- src/coreclr/jit/hwintrinsic.h | 20 ++++ src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 6 ++ src/coreclr/jit/hwintrinsiclistarm64.h | 2 + src/coreclr/jit/hwintrinsiclistarm64sve.h | 26 +++++ .../ILLink.Substitutions.NoArmIntrinsics.xml | 6 ++ .../System.Private.CoreLib.Shared.projitems | 2 + .../Arm/Sve.PlatformNotSupported.cs | 94 ++++++++++++++++++ .../src/System/Runtime/Intrinsics/Arm/Sve.cs | 94 ++++++++++++++++++ .../ref/System.Runtime.Intrinsics.cs | 17 ++++ .../HardwareIntrinsics/Arm/Shared/Program.cs | 1 + .../HardwareIntrinsics/Arm/Sve/Program.Sve.cs | 97 +++++++++++++++++++ .../JIT/HardwareIntrinsics/Arm/Sve/Sve.csproj | 15 +++ 16 files changed, 422 insertions(+), 1 deletion(-) create mode 100644 src/coreclr/jit/hwintrinsiclistarm64sve.h create mode 100644 src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs create mode 100644 src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/Arm/Sve/Program.Sve.cs create mode 100644 src/tests/JIT/HardwareIntrinsics/Arm/Sve/Sve.csproj diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 480d9d50350ddc..aa660321075890 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -402,6 +402,7 @@ set( JIT_ARM64_HEADERS emitfmtsarm64.h emitfmtsarm64sve.h hwintrinsiclistarm64.h + hwintrinsiclistarm64sve.h instrsarm64.h instrsarm64sve.h registerarm64.h diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 28f05dfe53be5f..4cc833b0a8ca47 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -5801,6 +5801,34 @@ emitter::code_t emitter::emitInsCodeSve(instruction ins, insFormat fmt) } } +// For the given 'elemsize' returns the 'arrangement' when used in a SVE vector register arrangement. +// Asserts and returns INS_OPTS_NONE if an invalid 'elemsize' is passed +// +/*static*/ insOpts emitter::optGetSveInsOpt(emitAttr elemsize) +{ + switch (elemsize) + { + case EA_1BYTE: + return INS_OPTS_SCALABLE_B; + + case EA_2BYTE: + return INS_OPTS_SCALABLE_H; + + case EA_4BYTE: + return INS_OPTS_SCALABLE_S; + + case EA_8BYTE: + return INS_OPTS_SCALABLE_D; + + case EA_16BYTE: + return INS_OPTS_SCALABLE_Q; + + default: + assert(!"Invalid emitAttr for sve vector register"); + return INS_OPTS_NONE; + } +} + // For the given 'arrangement' returns the 'elemsize' specified by the SVE vector register arrangement // asserts and returns EA_UNKNOWN if an invalid 'arrangement' value is passed // @@ -9924,6 +9952,12 @@ void emitter::emitIns_R_R_R(instruction ins, fmt = IF_SVE_CZ_4A; break; + case INS_sve_ldnf1b: + case INS_sve_ldnf1h: + case INS_sve_ldnf1w: + case INS_sve_ldnf1d: + return emitIns_R_R_R_I(ins, size, reg1, reg2, reg3, 0, opt); + default: unreached(); break; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 56dbe2e2c52d1c..fca60c3c3c7d06 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -726,6 +726,9 @@ static emitAttr optGetDatasize(insOpts arrangement); // For the given 'arrangement' returns the 'elemsize' specified by the vector register arrangement static emitAttr optGetElemsize(insOpts arrangement); +// For the given 'elemsize' returns the 'arrangement' when used in a SVE vector register arrangement. +static insOpts optGetSveInsOpt(emitAttr elemsize); + // For the given 'arrangement' returns the 'elemsize' specified by the SVE vector register arrangement static emitAttr optGetSveElemsize(insOpts arrangement); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 31afa8983c9e19..386450ce96e629 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -25711,9 +25711,12 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2: case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3: case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4: - addr = Op(3); break; + + case NI_Sve_LoadVectorNonFaulting: + addr = Op(2); + break; #endif // TARGET_ARM64 default: diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index dcd5c86129b74d..9b34d38bbb82ea 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -175,6 +175,13 @@ enum HWIntrinsicFlag : unsigned int // The intrinsic needs consecutive registers HW_Flag_NeedsConsecutiveRegisters = 0x4000, + + // The intrinsic uses scalable registers + HW_Flag_Scalable = 0x8000, + + // The intrinsic uses a mask in arg1 to predicate the result + HW_Flag_Predicated = 0x10000, + #else #error Unsupported platform #endif @@ -846,6 +853,19 @@ struct HWIntrinsicInfo const HWIntrinsicFlag flags = lookupFlags(id); return (flags & HW_Flag_HasImmediateOperand) != 0; } + + static bool isScalable(NamedIntrinsic id) + { + const HWIntrinsicFlag flags = lookupFlags(id); + return (flags & HW_Flag_Scalable) != 0; + } + + // TODO-SVE: Check this flag when register allocating + static bool HasPredicatedResult(NamedIntrinsic id) + { + const HWIntrinsicFlag flags = lookupFlags(id); + return (flags & HW_Flag_Predicated) != 0; + } #endif // TARGET_ARM64 static bool HasSpecialSideEffect(NamedIntrinsic id) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index eba1b6f33a09c4..7c243bd585a1e0 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -265,6 +265,12 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) emitSize = EA_UNKNOWN; opt = INS_OPTS_NONE; } + else if (HWIntrinsicInfo::isScalable(intrin.id)) + { + emitSize = EA_SCALABLE; + // TODO-SVE: This shouldn't require GetEmitter() + opt = GetEmitter()->optGetSveInsOpt(emitTypeSize(intrin.baseType)); + } else { emitSize = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize())); diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index cb4c8269d61171..e334a20bd59fe1 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -802,6 +802,8 @@ HARDWARE_INTRINSIC(Sha256, ScheduleUpdate1, #endif // FEATURE_HW_INTRINSIC +#include "hwintrinsiclistarm64sve.h" + #undef HARDWARE_INTRINSIC // clang-format on diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h new file mode 100644 index 00000000000000..5cae83e46ecc97 --- /dev/null +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -0,0 +1,26 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/*****************************************************************************/ +#ifndef HARDWARE_INTRINSIC +#error Define HARDWARE_INTRINSIC before including this file +#endif +/*****************************************************************************/ + +// clang-format off + +#ifdef FEATURE_HW_INTRINSICS +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// SVE Intrinsics + +// Sve +HARDWARE_INTRINSIC(Sve, LoadVectorNonFaulting, -1, 2, true, {INS_sve_ldnf1b, INS_sve_ldnf1b, INS_sve_ldnf1h, INS_sve_ldnf1h, INS_sve_ldnf1w, INS_sve_ldnf1w, INS_sve_ldnf1d, INS_sve_ldnf1d, INS_sve_ldnf1w, INS_sve_ldnf1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_Predicated) + +#endif // FEATURE_HW_INTRINSIC + +#undef HARDWARE_INTRINSIC + +// clang-format on diff --git a/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoArmIntrinsics.xml b/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoArmIntrinsics.xml index 1da6f6d57d26e3..7d05d2b47a61fe 100644 --- a/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoArmIntrinsics.xml +++ b/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoArmIntrinsics.xml @@ -51,5 +51,11 @@ + + + + + + diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index e46bd04f7d6958..1213cb5d3cb22b 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -2632,6 +2632,7 @@ + @@ -2642,6 +2643,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs new file mode 100644 index 00000000000000..6670cebf7423df --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs @@ -0,0 +1,94 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Numerics; + +namespace System.Runtime.Intrinsics.Arm +{ + /// + /// This class provides access to the ARM SVE hardware instructions via intrinsics + /// + [Intrinsic] + [CLSCompliant(false)] + public abstract class Sve : AdvSimd + { + internal Sve() { } + + public static new bool IsSupported { get => IsSupported; } + + [Intrinsic] + public new abstract class Arm64 : AdvSimd.Arm64 + { + internal Arm64() { } + + public static new bool IsSupported { get => IsSupported; } + } + + + /// LoadVectorNonFaulting : Unextended load, non-faulting + + /// + /// svint8_t svldnf1[_s8](svbool_t pg, const int8_t *base) + /// LDNF1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVectorNonFaulting(Vector mask, sbyte* address) { throw new PlatformNotSupportedException(); } + + /// + /// svint16_t svldnf1[_s16](svbool_t pg, const int16_t *base) + /// LDNF1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVectorNonFaulting(Vector mask, short* address) { throw new PlatformNotSupportedException(); } + + /// + /// svint32_t svldnf1[_s32](svbool_t pg, const int32_t *base) + /// LDNF1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVectorNonFaulting(Vector mask, int* address) { throw new PlatformNotSupportedException(); } + + /// + /// svint64_t svldnf1[_s64](svbool_t pg, const int64_t *base) + /// LDNF1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVectorNonFaulting(Vector mask, long* address) { throw new PlatformNotSupportedException(); } + + /// + /// svuint8_t svldnf1[_u8](svbool_t pg, const uint8_t *base) + /// LDNF1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVectorNonFaulting(Vector mask, byte* address) { throw new PlatformNotSupportedException(); } + + /// + /// svuint16_t svldnf1[_u16](svbool_t pg, const uint16_t *base) + /// LDNF1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVectorNonFaulting(Vector mask, ushort* address) { throw new PlatformNotSupportedException(); } + + /// + /// svuint32_t svldnf1[_u32](svbool_t pg, const uint32_t *base) + /// LDNF1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVectorNonFaulting(Vector mask, uint* address) { throw new PlatformNotSupportedException(); } + + /// + /// svuint64_t svldnf1[_u64](svbool_t pg, const uint64_t *base) + /// LDNF1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVectorNonFaulting(Vector mask, ulong* address) { throw new PlatformNotSupportedException(); } + + /// + /// svfloat32_t svldnf1[_f32](svbool_t pg, const float32_t *base) + /// LDNF1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVectorNonFaulting(Vector mask, float* address) { throw new PlatformNotSupportedException(); } + + /// + /// svfloat64_t svldnf1[_f64](svbool_t pg, const float64_t *base) + /// LDNF1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVectorNonFaulting(Vector mask, double* address) { throw new PlatformNotSupportedException(); } + + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs new file mode 100644 index 00000000000000..f9c13c7e082bd9 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs @@ -0,0 +1,94 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Numerics; + +namespace System.Runtime.Intrinsics.Arm +{ + /// + /// This class provides access to the ARM SVE hardware instructions via intrinsics + /// + [Intrinsic] + [CLSCompliant(false)] + public abstract class Sve : AdvSimd + { + internal Sve() { } + + public static new bool IsSupported { get => IsSupported; } + + [Intrinsic] + public new abstract class Arm64 : AdvSimd.Arm64 + { + internal Arm64() { } + + public static new bool IsSupported { get => IsSupported; } + } + + /// LoadVectorNonFaulting : Unextended load, non-faulting + + /// + /// svint8_t svldnf1[_s8](svbool_t pg, const int8_t *base) + /// LDNF1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVectorNonFaulting(Vector mask, sbyte* address) => LoadVectorNonFaulting(mask, address); + + /// + /// svint16_t svldnf1[_s16](svbool_t pg, const int16_t *base) + /// LDNF1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVectorNonFaulting(Vector mask, short* address) => LoadVectorNonFaulting(mask, address); + + /// + /// svint32_t svldnf1[_s32](svbool_t pg, const int32_t *base) + /// LDNF1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVectorNonFaulting(Vector mask, int* address) => LoadVectorNonFaulting(mask, address); + + /// + /// svint64_t svldnf1[_s64](svbool_t pg, const int64_t *base) + /// LDNF1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVectorNonFaulting(Vector mask, long* address) => LoadVectorNonFaulting(mask, address); + + /// + /// svuint8_t svldnf1[_u8](svbool_t pg, const uint8_t *base) + /// LDNF1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVectorNonFaulting(Vector mask, byte* address) => LoadVectorNonFaulting(mask, address); + + /// + /// svuint16_t svldnf1[_u16](svbool_t pg, const uint16_t *base) + /// LDNF1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVectorNonFaulting(Vector mask, ushort* address) => LoadVectorNonFaulting(mask, address); + + /// + /// svuint32_t svldnf1[_u32](svbool_t pg, const uint32_t *base) + /// LDNF1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVectorNonFaulting(Vector mask, uint* address) => LoadVectorNonFaulting(mask, address); + + /// + /// svuint64_t svldnf1[_u64](svbool_t pg, const uint64_t *base) + /// LDNF1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVectorNonFaulting(Vector mask, ulong* address) => LoadVectorNonFaulting(mask, address); + + /// + /// svfloat32_t svldnf1[_f32](svbool_t pg, const float32_t *base) + /// LDNF1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVectorNonFaulting(Vector mask, float* address) => LoadVectorNonFaulting(mask, address); + + /// + /// svfloat64_t svldnf1[_f64](svbool_t pg, const float64_t *base) + /// LDNF1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL] + /// + public static unsafe Vector LoadVectorNonFaulting(Vector mask, double* address) => LoadVectorNonFaulting(mask, address); + + + } +} diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index b3db125c2c7ece..cd578b78934a83 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -4142,6 +4142,23 @@ internal Arm64() { } public static new bool IsSupported { get { throw null; } } } } + [System.CLSCompliantAttribute(false)] + public abstract partial class Sve : System.Runtime.Intrinsics.Arm.AdvSimd + { + internal Sve() { } + public static new bool IsSupported { get { throw null; } } + + public static unsafe System.Numerics.Vector LoadVectorNonFaulting(System.Numerics.Vector mask, sbyte* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVectorNonFaulting(System.Numerics.Vector mask, short* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVectorNonFaulting(System.Numerics.Vector mask, int* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVectorNonFaulting(System.Numerics.Vector mask, long* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVectorNonFaulting(System.Numerics.Vector mask, byte* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVectorNonFaulting(System.Numerics.Vector mask, ushort* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVectorNonFaulting(System.Numerics.Vector mask, uint* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVectorNonFaulting(System.Numerics.Vector mask, ulong* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVectorNonFaulting(System.Numerics.Vector mask, float* address) { throw null; } + public static unsafe System.Numerics.Vector LoadVectorNonFaulting(System.Numerics.Vector mask, double* address) { throw null; } + } } namespace System.Runtime.Intrinsics.X86 { diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Program.cs b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Program.cs index 08b254882aa74f..4ad42ce51fd0a1 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Program.cs +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Program.cs @@ -21,6 +21,7 @@ public static void PrintSupportedIsa() TestLibrary.TestFramework.LogInformation($" Rdm: {Rdm.IsSupported}"); TestLibrary.TestFramework.LogInformation($" Sha1: {Sha1.IsSupported}"); TestLibrary.TestFramework.LogInformation($" Sha256: {Sha256.IsSupported}"); + TestLibrary.TestFramework.LogInformation($" Sve: {Sve.IsSupported}"); TestLibrary.TestFramework.LogInformation(string.Empty); } } diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Sve/Program.Sve.cs b/src/tests/JIT/HardwareIntrinsics/Arm/Sve/Program.Sve.cs new file mode 100644 index 00000000000000..036cf37f6a81af --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Sve/Program.Sve.cs @@ -0,0 +1,97 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics.Arm\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ + +using System; +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using System.Numerics; +using Xunit; + +// TODO-SVE: This file should be replaced with a .template test. + +namespace JIT.HardwareIntrinsics.Arm._AdvSimd +{ + public static partial class Program + { + [Fact] + public static void SveTest() + { + var test = new SveTest__SveTest(); + test.Succeeded = true; + + if (test.IsSupported) + { + test.RunBasicScenario_LoadVector(); + } + else + { + Console.WriteLine("SVE is not Supported."); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class SveTest__SveTest + { + public bool IsSupported => Sve.IsSupported; + + public bool Succeeded { get; set; } + + private static unsafe void* Align(byte* buffer, ulong expectedAlignment) + { + return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1)); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public Vector do_LoadVectorNonFaulting(Vector mask, int* address) + { + return Sve.LoadVectorNonFaulting(mask, address); + } + + public void RunBasicScenario_LoadVector() + { + Vector mask = Vector.One; + + int elemsInVector = 4; + int OpElementCount = elemsInVector * 2; + int[] inArray1 = new int[OpElementCount]; + for (var i = 0; i < OpElementCount; i++) { inArray1[i] = i+1; } + + GCHandle inHandle1; + inHandle1 = GCHandle.Alloc(inArray1, GCHandleType.Pinned); + int* inArray1Ptr = (int*)Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), 128); + + Vector outVector1 = do_LoadVectorNonFaulting(mask, inArray1Ptr); + + // TODO-SVE: There is no register allocation for predicate registers. + // Instead, the jit will allocate a Z register for mask, but codegen will use the equivalent + // register number as a predicate. But that predicate register will have an invalid value + // (probably zero) and load the wrong vector elements. + for (var i = 0; i < elemsInVector; i++) + { + if (inArray1[i] != outVector1[i]) + { + Console.WriteLine("{0} {1} != {2}", i, inArray1[i], outVector1[i]); + Succeeded = false; + } + Console.WriteLine(outVector1[i]); + } + + Console.WriteLine("Done"); + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Sve/Sve.csproj b/src/tests/JIT/HardwareIntrinsics/Arm/Sve/Sve.csproj new file mode 100644 index 00000000000000..095623946b2d3c --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Sve/Sve.csproj @@ -0,0 +1,15 @@ + + + + Exe + net9.0 + true + AnyCPU;ARM64 + true + + + + + + + From db3f33acbc477902924a93669d13c1c13ed3b109 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 31 Jan 2024 10:42:37 +0000 Subject: [PATCH 2/3] Check for predicated results in lsra --- src/coreclr/jit/lsra.cpp | 29 +++++++++++++++++++++++++++++ src/coreclr/jit/lsra.h | 6 ++++++ src/coreclr/jit/lsraarm64.cpp | 31 +++++++++++++++++++++++++++++++ src/coreclr/jit/targetarm64.h | 4 ++++ 4 files changed, 70 insertions(+) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index ac2e25f95a11d6..04ddb9b73b2b61 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -271,6 +271,33 @@ regMaskTP LinearScan::lowSIMDRegs() #endif } + +#ifdef TARGET_ARM64 + +//------------------------------------------------------------------------ +// allPredicateRegs(): Return the set of all predicate SVE registers. +// +// Return Value: +// Register mask of the SVE predicate registers +// +regMaskTP LinearScan::allPredicateRegs() +{ + return (availablePredicateRegs & RBM_ALLPREDICATE); +} + +//------------------------------------------------------------------------ +// lowPredicateRegs(): Return the set of all the lower predicate SVE registers. +// +// Return Value: +// Register mask of the low SVE predicate registers +// +regMaskTP LinearScan::lowPredicateRegs() +{ + return (availablePredicateRegs & RBM_LOWPREDICATE); +} + +#endif + void LinearScan::updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition) { LsraLocation nextLocation; @@ -790,6 +817,8 @@ LinearScan::LinearScan(Compiler* theCompiler) availableDoubleRegs = RBM_ALLDOUBLE; #if defined(TARGET_XARCH) availableMaskRegs = RBM_ALLMASK; +#elif defined(TARGET_ARM64) + availablePredicateRegs = RBM_ALLPREDICATE; #endif #if defined(TARGET_AMD64) || defined(TARGET_ARM64) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index c0e0f5d2fdbd34..bd14f62496b155 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1098,6 +1098,10 @@ class LinearScan : public LinearScanInterface regMaskTP allSIMDRegs(); regMaskTP lowSIMDRegs(); regMaskTP internalFloatRegCandidates(); +#ifdef TARGET_ARM64 + regMaskTP allPredicateRegs(); + regMaskTP lowPredicateRegs(); +#endif void makeRegisterInactive(RegRecord* physRegRecord); void freeRegister(RegRecord* physRegRecord); @@ -1664,6 +1668,8 @@ class LinearScan : public LinearScanInterface PhasedVar availableDoubleRegs; #if defined(TARGET_XARCH) PhasedVar availableMaskRegs; +#elif defined(TARGET_ARM64) + PhasedVar availablePredicateRegs; #endif PhasedVar* availableRegs[TYP_COUNT]; diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index ea3bc9d7fb37e0..5835024bf7698e 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1518,6 +1518,24 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou srcCount++; } } + else if (HWIntrinsicInfo::HasPredicatedResult(intrin.id)) + { + // TODO-SVE: Allocate a predicate register instead of a vector regiter + + regMaskTP predMask = RBM_NONE; + switch (intrin.id) + { + case NI_Sve_LoadVectorNonFaulting: + predMask = lowPredicateRegs(); + break; + + // TODO-SVE: allPredicateRegs() cases + + default: + noway_assert(!"Not a supported predicated result SVE operation"); + } + srcCount += BuildOperandUses(intrin.op1, predMask); + } else if (intrinsicTree->OperIsMemoryLoadOrStore()) { srcCount += BuildAddrUses(intrin.op1); @@ -1716,6 +1734,19 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } return srcCount; } + else if (HWIntrinsicInfo::HasPredicatedResult(intrin.id)) + { + // For intrinsics with a predicated result, op2 is the same as op1 in other intrinsics. + if (intrinsicTree->OperIsMemoryLoadOrStore()) + { + srcCount += BuildAddrUses(intrin.op2); + } + else + { + // TODO-SVE: Support more SVE cases here. + noway_assert(!"Not a supported predicated result SVE operation"); + } + } else if (intrin.op2 != nullptr) { // RMW intrinsic operands doesn't have to be delayFree when they can be assigned the same register as op1Reg diff --git a/src/coreclr/jit/targetarm64.h b/src/coreclr/jit/targetarm64.h index 3646ecb4407bf7..6bd67280f0971c 100644 --- a/src/coreclr/jit/targetarm64.h +++ b/src/coreclr/jit/targetarm64.h @@ -140,6 +140,10 @@ #define REG_JUMP_THUNK_PARAM REG_R12 #define RBM_JUMP_THUNK_PARAM RBM_R12 + #define RBM_LOWPREDICATE (RBM_P0 | RBM_P1 | RBM_P2 | RBM_P3 | RBM_P4 | RBM_P5 | RBM_P6 | RBM_P7) + #define RBM_HIGHPREDICATE (RBM_P8 | RBM_P9 | RBM_P10 | RBM_P11 | RBM_P12 | RBM_P13 | RBM_P14 | RBM_P15) + #define RBM_ALLPREDICATE (RBM_LOWPREDICATE | RBM_HIGHPREDICATE) + // ARM64 write barrier ABI (see vm\arm64\asmhelpers.asm, vm\arm64\asmhelpers.S): // CORINFO_HELP_ASSIGN_REF (JIT_WriteBarrier), CORINFO_HELP_CHECKED_ASSIGN_REF (JIT_CheckedWriteBarrier): // On entry: From 034806eeba193b214aff09ba3316a1d55d022af6 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 31 Jan 2024 10:46:19 +0000 Subject: [PATCH 3/3] Add Sve preview marker --- .../src/System/Runtime/Intrinsics/Arm/Sve.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs index f9c13c7e082bd9..76cdb6f5c0063c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs @@ -13,6 +13,7 @@ namespace System.Runtime.Intrinsics.Arm /// [Intrinsic] [CLSCompliant(false)] + [System.Runtime.Versioning.RequiresPreviewFeaturesAttribute("Sve is in preview.")] public abstract class Sve : AdvSimd { internal Sve() { }