Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add API call for Arm64 Sve.LoadVectorNonFaulting #97695

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/coreclr/jit/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,7 @@ set( JIT_ARM64_HEADERS
emitfmtsarm64.h
emitfmtsarm64sve.h
hwintrinsiclistarm64.h
hwintrinsiclistarm64sve.h
instrsarm64.h
instrsarm64sve.h
registerarm64.h
Expand Down
34 changes: 34 additions & 0 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5801,6 +5801,34 @@ emitter::code_t emitter::emitInsCodeSve(instruction ins, insFormat fmt)
}
}

// For the given 'elemsize' returns the 'arrangement' when used in a SVE vector register arrangement.
// Asserts and returns INS_OPTS_NONE if an invalid 'elemsize' is passed
//
/*static*/ insOpts emitter::optGetSveInsOpt(emitAttr elemsize)
{
switch (elemsize)
{
case EA_1BYTE:
return INS_OPTS_SCALABLE_B;

case EA_2BYTE:
return INS_OPTS_SCALABLE_H;

case EA_4BYTE:
return INS_OPTS_SCALABLE_S;

case EA_8BYTE:
return INS_OPTS_SCALABLE_D;

case EA_16BYTE:
return INS_OPTS_SCALABLE_Q;

default:
assert(!"Invalid emitAttr for sve vector register");
return INS_OPTS_NONE;
}
}

// For the given 'arrangement' returns the 'elemsize' specified by the SVE vector register arrangement
// asserts and returns EA_UNKNOWN if an invalid 'arrangement' value is passed
//
Expand Down Expand Up @@ -9924,6 +9952,12 @@ void emitter::emitIns_R_R_R(instruction ins,
fmt = IF_SVE_CZ_4A;
break;

case INS_sve_ldnf1b:
case INS_sve_ldnf1h:
case INS_sve_ldnf1w:
case INS_sve_ldnf1d:
return emitIns_R_R_R_I(ins, size, reg1, reg2, reg3, 0, opt);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this doesn't look right. The caller should make sure to call appropriate emitIns* method.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, but there are lots of places this is done elsewhere:

        case INS_adds:
        case INS_subs:
            emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0, opt);
            return;

Which means it can all use the existing table generation code. Plus, we get a handy shortcut for elsewhere where we don't need an immediate offset. This ideally needs some codegen test cases.

The alternative would be to use HW_Flag_SpecialCodeGen and then add a case in genHWIntrinsic(). That's more code and possibly slower in the long run? I suspect we'll get a lot of things added in genHWIntrinsic() by the end of SVE so it'd be nice to keep it short.


default:
unreached();
break;
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/emitarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,9 @@ static emitAttr optGetDatasize(insOpts arrangement);
// For the given 'arrangement' returns the 'elemsize' specified by the vector register arrangement
static emitAttr optGetElemsize(insOpts arrangement);

// For the given 'elemsize' returns the 'arrangement' when used in a SVE vector register arrangement.
static insOpts optGetSveInsOpt(emitAttr elemsize);

// For the given 'arrangement' returns the 'elemsize' specified by the SVE vector register arrangement
static emitAttr optGetSveElemsize(insOpts arrangement);

Expand Down
5 changes: 4 additions & 1 deletion src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25711,9 +25711,12 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4:

addr = Op(3);
break;

case NI_Sve_LoadVectorNonFaulting:
addr = Op(2);
break;
#endif // TARGET_ARM64

default:
Expand Down
20 changes: 20 additions & 0 deletions src/coreclr/jit/hwintrinsic.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,13 @@ enum HWIntrinsicFlag : unsigned int

// The intrinsic needs consecutive registers
HW_Flag_NeedsConsecutiveRegisters = 0x4000,

// The intrinsic uses scalable registers
HW_Flag_Scalable = 0x8000,

// The intrinsic uses a mask in arg1 to predicate the result
HW_Flag_Predicated = 0x10000,

#else
#error Unsupported platform
#endif
Expand Down Expand Up @@ -846,6 +853,19 @@ struct HWIntrinsicInfo
const HWIntrinsicFlag flags = lookupFlags(id);
return (flags & HW_Flag_HasImmediateOperand) != 0;
}

static bool isScalable(NamedIntrinsic id)
{
const HWIntrinsicFlag flags = lookupFlags(id);
return (flags & HW_Flag_Scalable) != 0;
}

// TODO-SVE: Check this flag when register allocating
static bool HasPredicatedResult(NamedIntrinsic id)
{
const HWIntrinsicFlag flags = lookupFlags(id);
return (flags & HW_Flag_Predicated) != 0;
}
#endif // TARGET_ARM64

static bool HasSpecialSideEffect(NamedIntrinsic id)
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,12 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
emitSize = EA_UNKNOWN;
opt = INS_OPTS_NONE;
}
else if (HWIntrinsicInfo::isScalable(intrin.id))
{
emitSize = EA_SCALABLE;
// TODO-SVE: This shouldn't require GetEmitter()
opt = GetEmitter()->optGetSveInsOpt(emitTypeSize(intrin.baseType));
}
else
{
emitSize = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize()));
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,8 @@ HARDWARE_INTRINSIC(Sha256, ScheduleUpdate1,

#endif // FEATURE_HW_INTRINSIC

#include "hwintrinsiclistarm64sve.h"

#undef HARDWARE_INTRINSIC

// clang-format on
26 changes: 26 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

/*****************************************************************************/
#ifndef HARDWARE_INTRINSIC
#error Define HARDWARE_INTRINSIC before including this file
#endif
/*****************************************************************************/

// clang-format off

#ifdef FEATURE_HW_INTRINSICS
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// SVE Intrinsics

// Sve
HARDWARE_INTRINSIC(Sve, LoadVectorNonFaulting, -1, 2, true, {INS_sve_ldnf1b, INS_sve_ldnf1b, INS_sve_ldnf1h, INS_sve_ldnf1h, INS_sve_ldnf1w, INS_sve_ldnf1w, INS_sve_ldnf1d, INS_sve_ldnf1d, INS_sve_ldnf1w, INS_sve_ldnf1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_Predicated)

#endif // FEATURE_HW_INTRINSIC

#undef HARDWARE_INTRINSIC

// clang-format on
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,11 @@
<type fullname="System.Runtime.Intrinsics.Arm.Sha256/Arm64">
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" />
</type>
<type fullname="System.Runtime.Intrinsics.Arm.Sve">
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" />
</type>
<type fullname="System.Runtime.Intrinsics.Arm.Sve/Arm64">
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" />
</type>
</assembly>
</linker>
Original file line number Diff line number Diff line change
Expand Up @@ -2632,6 +2632,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\Arm\Rdm.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\Arm\Sha1.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\Arm\Sha256.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\Arm\Sve.cs" />
</ItemGroup>
<ItemGroup Condition="'$(SupportsArmIntrinsics)' != 'true'">
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\Arm\AdvSimd.PlatformNotSupported.cs" />
Expand All @@ -2642,6 +2643,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\Arm\Rdm.PlatformNotSupported.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\Arm\Sha1.PlatformNotSupported.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\Arm\Sha256.PlatformNotSupported.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\Arm\Sve.PlatformNotSupported.cs" />
</ItemGroup>
<ItemGroup Condition="'$(SupportsWasmIntrinsics)' == 'true'">
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\Wasm\WasmBase.cs" />
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Numerics;

namespace System.Runtime.Intrinsics.Arm
{
/// <summary>
/// This class provides access to the ARM SVE hardware instructions via intrinsics
/// </summary>
[Intrinsic]
[CLSCompliant(false)]
public abstract class Sve : AdvSimd
{
internal Sve() { }

public static new bool IsSupported { get => IsSupported; }

[Intrinsic]
public new abstract class Arm64 : AdvSimd.Arm64
{
internal Arm64() { }

public static new bool IsSupported { get => IsSupported; }
}


/// LoadVectorNonFaulting : Unextended load, non-faulting

/// <summary>
/// svint8_t svldnf1[_s8](svbool_t pg, const int8_t *base)
/// LDNF1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<sbyte> LoadVectorNonFaulting(Vector<sbyte> mask, sbyte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint16_t svldnf1[_s16](svbool_t pg, const int16_t *base)
/// LDNF1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<short> LoadVectorNonFaulting(Vector<short> mask, short* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svldnf1[_s32](svbool_t pg, const int32_t *base)
/// LDNF1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<int> LoadVectorNonFaulting(Vector<int> mask, int* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svldnf1[_s64](svbool_t pg, const int64_t *base)
/// LDNF1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<long> LoadVectorNonFaulting(Vector<long> mask, long* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint8_t svldnf1[_u8](svbool_t pg, const uint8_t *base)
/// LDNF1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<byte> LoadVectorNonFaulting(Vector<byte> mask, byte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint16_t svldnf1[_u16](svbool_t pg, const uint16_t *base)
/// LDNF1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<ushort> LoadVectorNonFaulting(Vector<ushort> mask, ushort* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svldnf1[_u32](svbool_t pg, const uint32_t *base)
/// LDNF1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<uint> LoadVectorNonFaulting(Vector<uint> mask, uint* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svldnf1[_u64](svbool_t pg, const uint64_t *base)
/// LDNF1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<ulong> LoadVectorNonFaulting(Vector<ulong> mask, ulong* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat32_t svldnf1[_f32](svbool_t pg, const float32_t *base)
/// LDNF1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<float> LoadVectorNonFaulting(Vector<float> mask, float* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat64_t svldnf1[_f64](svbool_t pg, const float64_t *base)
/// LDNF1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<double> LoadVectorNonFaulting(Vector<double> mask, double* address) { throw new PlatformNotSupportedException(); }

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Numerics;

namespace System.Runtime.Intrinsics.Arm
{
/// <summary>
/// This class provides access to the ARM SVE hardware instructions via intrinsics
/// </summary>
[Intrinsic]
[CLSCompliant(false)]
public abstract class Sve : AdvSimd
{
internal Sve() { }

public static new bool IsSupported { get => IsSupported; }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we will have to make sure to return false for Mono

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you know where that check would be added? not sure if that would be in the API or the part that checks if SVE is supported in the OS.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@fanyang-mono - do you know?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One way of doing it is to add a new element to this array
https://github.com/dotnet/runtime/blob/52e1ad3779e57c35d2416cd10d8ad7d75b2c0c8b/src/mono/mono/mini/simd-intrinsics.c#L3896C26-L3896C50

It will be something like

"Sve", MONO_CPU_ARM64_SVE, unsupported, sizeof (unsupported)

Additionally, you need to define the enum MONO_CPU_ARM64_SVE here:

MONO_CPU_ARM64_DP = 1 << 6,

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One way of doing it is to add a new element to this array

aren't these the entries of things that are supported? so probably no SVE entry is needed in that array?

Copy link
Member

@fanyang-mono fanyang-mono Feb 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When you specify unsupported, IsSupported will return false. So it is needed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! I can also see some examples of unsupported in supported_x86_intrinsics.


[Intrinsic]
public new abstract class Arm64 : AdvSimd.Arm64
{
internal Arm64() { }

public static new bool IsSupported { get => IsSupported; }
}

/// LoadVectorNonFaulting : Unextended load, non-faulting

/// <summary>
/// svint8_t svldnf1[_s8](svbool_t pg, const int8_t *base)
/// LDNF1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<sbyte> LoadVectorNonFaulting(Vector<sbyte> mask, sbyte* address) => LoadVectorNonFaulting(mask, address);

/// <summary>
/// svint16_t svldnf1[_s16](svbool_t pg, const int16_t *base)
/// LDNF1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<short> LoadVectorNonFaulting(Vector<short> mask, short* address) => LoadVectorNonFaulting(mask, address);

/// <summary>
/// svint32_t svldnf1[_s32](svbool_t pg, const int32_t *base)
/// LDNF1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<int> LoadVectorNonFaulting(Vector<int> mask, int* address) => LoadVectorNonFaulting(mask, address);

/// <summary>
/// svint64_t svldnf1[_s64](svbool_t pg, const int64_t *base)
/// LDNF1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<long> LoadVectorNonFaulting(Vector<long> mask, long* address) => LoadVectorNonFaulting(mask, address);

/// <summary>
/// svuint8_t svldnf1[_u8](svbool_t pg, const uint8_t *base)
/// LDNF1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<byte> LoadVectorNonFaulting(Vector<byte> mask, byte* address) => LoadVectorNonFaulting(mask, address);

/// <summary>
/// svuint16_t svldnf1[_u16](svbool_t pg, const uint16_t *base)
/// LDNF1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<ushort> LoadVectorNonFaulting(Vector<ushort> mask, ushort* address) => LoadVectorNonFaulting(mask, address);

/// <summary>
/// svuint32_t svldnf1[_u32](svbool_t pg, const uint32_t *base)
/// LDNF1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<uint> LoadVectorNonFaulting(Vector<uint> mask, uint* address) => LoadVectorNonFaulting(mask, address);

/// <summary>
/// svuint64_t svldnf1[_u64](svbool_t pg, const uint64_t *base)
/// LDNF1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<ulong> LoadVectorNonFaulting(Vector<ulong> mask, ulong* address) => LoadVectorNonFaulting(mask, address);

/// <summary>
/// svfloat32_t svldnf1[_f32](svbool_t pg, const float32_t *base)
/// LDNF1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<float> LoadVectorNonFaulting(Vector<float> mask, float* address) => LoadVectorNonFaulting(mask, address);

/// <summary>
/// svfloat64_t svldnf1[_f64](svbool_t pg, const float64_t *base)
/// LDNF1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<double> LoadVectorNonFaulting(Vector<double> mask, double* address) => LoadVectorNonFaulting(mask, address);


}
}
Loading
Loading