Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add API call for Arm64 Sve.LoadVectorNonFaulting #97695

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/coreclr/jit/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,7 @@ set( JIT_ARM64_HEADERS
emitfmtsarm64.h
emitfmtsarm64sve.h
hwintrinsiclistarm64.h
hwintrinsiclistarm64sve.h
instrsarm64.h
instrsarm64sve.h
registerarm64.h
Expand Down
34 changes: 34 additions & 0 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5801,6 +5801,34 @@ emitter::code_t emitter::emitInsCodeSve(instruction ins, insFormat fmt)
}
}

// For the given 'elemsize' returns the 'arrangement' when used in a SVE vector register arrangement.
// Asserts and returns INS_OPTS_NONE if an invalid 'elemsize' is passed
//
/*static*/ insOpts emitter::optGetSveInsOpt(emitAttr elemsize)
{
switch (elemsize)
{
case EA_1BYTE:
return INS_OPTS_SCALABLE_B;

case EA_2BYTE:
return INS_OPTS_SCALABLE_H;

case EA_4BYTE:
return INS_OPTS_SCALABLE_S;

case EA_8BYTE:
return INS_OPTS_SCALABLE_D;

case EA_16BYTE:
return INS_OPTS_SCALABLE_Q;

default:
assert(!"Invalid emitAttr for sve vector register");
return INS_OPTS_NONE;
}
}

// For the given 'arrangement' returns the 'elemsize' specified by the SVE vector register arrangement
// asserts and returns EA_UNKNOWN if an invalid 'arrangement' value is passed
//
Expand Down Expand Up @@ -9924,6 +9952,12 @@ void emitter::emitIns_R_R_R(instruction ins,
fmt = IF_SVE_CZ_4A;
break;

case INS_sve_ldnf1b:
case INS_sve_ldnf1h:
case INS_sve_ldnf1w:
case INS_sve_ldnf1d:
return emitIns_R_R_R_I(ins, size, reg1, reg2, reg3, 0, opt);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this doesn't look right. The caller should make sure to call appropriate emitIns* method.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, but there are lots of places this is done elsewhere:

        case INS_adds:
        case INS_subs:
            emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0, opt);
            return;

Which means it can all use the existing table generation code. Plus, we get a handy shortcut for elsewhere where we don't need an immediate offset. This ideally needs some codegen test cases.

The alternative would be to use HW_Flag_SpecialCodeGen and then add a case in genHWIntrinsic(). That's more code and possibly slower in the long run? I suspect we'll get a lot of things added in genHWIntrinsic() by the end of SVE so it'd be nice to keep it short.


default:
unreached();
break;
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/emitarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,9 @@ static emitAttr optGetDatasize(insOpts arrangement);
// For the given 'arrangement' returns the 'elemsize' specified by the vector register arrangement
static emitAttr optGetElemsize(insOpts arrangement);

// For the given 'elemsize' returns the 'arrangement' when used in a SVE vector register arrangement.
static insOpts optGetSveInsOpt(emitAttr elemsize);

// For the given 'arrangement' returns the 'elemsize' specified by the SVE vector register arrangement
static emitAttr optGetSveElemsize(insOpts arrangement);

Expand Down
5 changes: 4 additions & 1 deletion src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25711,9 +25711,12 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4:

addr = Op(3);
break;

case NI_Sve_LoadVectorNonFaulting:
addr = Op(2);
break;
#endif // TARGET_ARM64

default:
Expand Down
20 changes: 20 additions & 0 deletions src/coreclr/jit/hwintrinsic.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,13 @@ enum HWIntrinsicFlag : unsigned int

// The intrinsic needs consecutive registers
HW_Flag_NeedsConsecutiveRegisters = 0x4000,

// The intrinsic uses scalable registers
HW_Flag_Scalable = 0x8000,

// The intrinsic uses a mask in arg1 to predicate the result
HW_Flag_Predicated = 0x10000,

#else
#error Unsupported platform
#endif
Expand Down Expand Up @@ -846,6 +853,19 @@ struct HWIntrinsicInfo
const HWIntrinsicFlag flags = lookupFlags(id);
return (flags & HW_Flag_HasImmediateOperand) != 0;
}

static bool isScalable(NamedIntrinsic id)
{
const HWIntrinsicFlag flags = lookupFlags(id);
return (flags & HW_Flag_Scalable) != 0;
}

// TODO-SVE: Check this flag when register allocating
static bool HasPredicatedResult(NamedIntrinsic id)
{
const HWIntrinsicFlag flags = lookupFlags(id);
return (flags & HW_Flag_Predicated) != 0;
}
#endif // TARGET_ARM64

static bool HasSpecialSideEffect(NamedIntrinsic id)
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,12 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
emitSize = EA_UNKNOWN;
opt = INS_OPTS_NONE;
}
else if (HWIntrinsicInfo::isScalable(intrin.id))
{
emitSize = EA_SCALABLE;
// TODO-SVE: This shouldn't require GetEmitter()
opt = GetEmitter()->optGetSveInsOpt(emitTypeSize(intrin.baseType));
}
else
{
emitSize = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize()));
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,8 @@ HARDWARE_INTRINSIC(Sha256, ScheduleUpdate1,

#endif // FEATURE_HW_INTRINSIC

#include "hwintrinsiclistarm64sve.h"

#undef HARDWARE_INTRINSIC

// clang-format on
26 changes: 26 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

/*****************************************************************************/
#ifndef HARDWARE_INTRINSIC
#error Define HARDWARE_INTRINSIC before including this file
#endif
/*****************************************************************************/

// clang-format off

#ifdef FEATURE_HW_INTRINSICS
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// SVE Intrinsics

// Sve
HARDWARE_INTRINSIC(Sve, LoadVectorNonFaulting, -1, 2, true, {INS_sve_ldnf1b, INS_sve_ldnf1b, INS_sve_ldnf1h, INS_sve_ldnf1h, INS_sve_ldnf1w, INS_sve_ldnf1w, INS_sve_ldnf1d, INS_sve_ldnf1d, INS_sve_ldnf1w, INS_sve_ldnf1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_Predicated)

#endif // FEATURE_HW_INTRINSIC

#undef HARDWARE_INTRINSIC

// clang-format on
29 changes: 29 additions & 0 deletions src/coreclr/jit/lsra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,33 @@ regMaskTP LinearScan::lowSIMDRegs()
#endif
}


#ifdef TARGET_ARM64

//------------------------------------------------------------------------
// allPredicateRegs(): Return the set of all predicate SVE registers.
//
// Return Value:
// Register mask of the SVE predicate registers
//
regMaskTP LinearScan::allPredicateRegs()
{
return (availablePredicateRegs & RBM_ALLPREDICATE);
}

//------------------------------------------------------------------------
// lowPredicateRegs(): Return the set of all the lower predicate SVE registers.
//
// Return Value:
// Register mask of the low SVE predicate registers
//
regMaskTP LinearScan::lowPredicateRegs()
{
return (availablePredicateRegs & RBM_LOWPREDICATE);
}

#endif

void LinearScan::updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition)
{
LsraLocation nextLocation;
Expand Down Expand Up @@ -790,6 +817,8 @@ LinearScan::LinearScan(Compiler* theCompiler)
availableDoubleRegs = RBM_ALLDOUBLE;
#if defined(TARGET_XARCH)
availableMaskRegs = RBM_ALLMASK;
#elif defined(TARGET_ARM64)
availablePredicateRegs = RBM_ALLPREDICATE;
#endif

#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/lsra.h
Original file line number Diff line number Diff line change
Expand Up @@ -1098,6 +1098,10 @@ class LinearScan : public LinearScanInterface
regMaskTP allSIMDRegs();
regMaskTP lowSIMDRegs();
regMaskTP internalFloatRegCandidates();
#ifdef TARGET_ARM64
regMaskTP allPredicateRegs();
regMaskTP lowPredicateRegs();
#endif

void makeRegisterInactive(RegRecord* physRegRecord);
void freeRegister(RegRecord* physRegRecord);
Expand Down Expand Up @@ -1664,6 +1668,8 @@ class LinearScan : public LinearScanInterface
PhasedVar<regMaskTP> availableDoubleRegs;
#if defined(TARGET_XARCH)
PhasedVar<regMaskTP> availableMaskRegs;
#elif defined(TARGET_ARM64)
PhasedVar<regMaskTP> availablePredicateRegs;
#endif
PhasedVar<regMaskTP>* availableRegs[TYP_COUNT];

Expand Down
31 changes: 31 additions & 0 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1518,6 +1518,24 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
srcCount++;
}
}
else if (HWIntrinsicInfo::HasPredicatedResult(intrin.id))
{
// TODO-SVE: Allocate a predicate register instead of a vector regiter

regMaskTP predMask = RBM_NONE;
switch (intrin.id)
{
case NI_Sve_LoadVectorNonFaulting:
predMask = lowPredicateRegs();
break;

// TODO-SVE: allPredicateRegs() cases

default:
noway_assert(!"Not a supported predicated result SVE operation");
}
srcCount += BuildOperandUses(intrin.op1, predMask);
}
else if (intrinsicTree->OperIsMemoryLoadOrStore())
{
srcCount += BuildAddrUses(intrin.op1);
Expand Down Expand Up @@ -1716,6 +1734,19 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
}
return srcCount;
}
else if (HWIntrinsicInfo::HasPredicatedResult(intrin.id))
{
// For intrinsics with a predicated result, op2 is the same as op1 in other intrinsics.
if (intrinsicTree->OperIsMemoryLoadOrStore())
{
srcCount += BuildAddrUses(intrin.op2);
}
else
{
// TODO-SVE: Support more SVE cases here.
noway_assert(!"Not a supported predicated result SVE operation");
}
}
else if (intrin.op2 != nullptr)
{
// RMW intrinsic operands doesn't have to be delayFree when they can be assigned the same register as op1Reg
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/targetarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,10 @@
#define REG_JUMP_THUNK_PARAM REG_R12
#define RBM_JUMP_THUNK_PARAM RBM_R12

#define RBM_LOWPREDICATE (RBM_P0 | RBM_P1 | RBM_P2 | RBM_P3 | RBM_P4 | RBM_P5 | RBM_P6 | RBM_P7)
#define RBM_HIGHPREDICATE (RBM_P8 | RBM_P9 | RBM_P10 | RBM_P11 | RBM_P12 | RBM_P13 | RBM_P14 | RBM_P15)
#define RBM_ALLPREDICATE (RBM_LOWPREDICATE | RBM_HIGHPREDICATE)

// ARM64 write barrier ABI (see vm\arm64\asmhelpers.asm, vm\arm64\asmhelpers.S):
// CORINFO_HELP_ASSIGN_REF (JIT_WriteBarrier), CORINFO_HELP_CHECKED_ASSIGN_REF (JIT_CheckedWriteBarrier):
// On entry:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,11 @@
<type fullname="System.Runtime.Intrinsics.Arm.Sha256/Arm64">
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" />
</type>
<type fullname="System.Runtime.Intrinsics.Arm.Sve">
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" />
</type>
<type fullname="System.Runtime.Intrinsics.Arm.Sve/Arm64">
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" />
</type>
</assembly>
</linker>
Original file line number Diff line number Diff line change
Expand Up @@ -2632,6 +2632,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\Arm\Rdm.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\Arm\Sha1.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\Arm\Sha256.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\Arm\Sve.cs" />
</ItemGroup>
<ItemGroup Condition="'$(SupportsArmIntrinsics)' != 'true'">
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\Arm\AdvSimd.PlatformNotSupported.cs" />
Expand All @@ -2642,6 +2643,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\Arm\Rdm.PlatformNotSupported.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\Arm\Sha1.PlatformNotSupported.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\Arm\Sha256.PlatformNotSupported.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\Arm\Sve.PlatformNotSupported.cs" />
</ItemGroup>
<ItemGroup Condition="'$(SupportsWasmIntrinsics)' == 'true'">
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\Wasm\WasmBase.cs" />
Expand Down
Loading
Loading