Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARM64-SVE: Implement ShiftLeftLogical, ShiftRightArithmetic, ShiftRightLogical #104119

Merged
merged 12 commits into from
Jun 28, 2024
6 changes: 6 additions & 0 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1800,6 +1800,12 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
retNode->AsHWIntrinsic()->SetAuxiliaryJitType(sigReader.op1JitType);
break;

case NI_Sve_ShiftLeftLogical:
case NI_Sve_ShiftRightArithmetic:
case NI_Sve_ShiftRightLogical:
retNode->AsHWIntrinsic()->SetAuxiliaryJitType(getBaseJitTypeOfSIMDType(sigReader.op2ClsHnd));
break;

default:
break;
}
Expand Down
34 changes: 28 additions & 6 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,28 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
{
assert(instrIsRMW);

insScalableOpts sopt;

switch (intrinEmbMask.id)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if this is the best place to put this switch statement, but these intrinsics need to be wrapped with ConditionalSelect, so implementing special codegen for them off this path doesn't seem feasible without some more invasive refactoring.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we do similar handling for ConvertTo for numOperands == 1 case.

{
case NI_Sve_ShiftLeftLogical:
case NI_Sve_ShiftRightArithmetic:
case NI_Sve_ShiftRightLogical:
{
const emitAttr op2Size = emitTypeSize(op2->AsHWIntrinsic()->GetAuxiliaryType());
if (op2Size != emitTypeSize(intrinEmbMask.baseType))
{
assert(emitter::optGetSveInsOpt(op2Size) == INS_OPTS_SCALABLE_D);
sopt = INS_SCALABLE_OPTS_WIDE;
break;
}
}

default:
sopt = INS_SCALABLE_OPTS_NONE;
break;
}

if (intrin.op3->IsVectorZero())
{
// If `falseReg` is zero, then move the first operand of `intrinEmbMask` in the
Expand All @@ -568,7 +590,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)

// Finally, perform the actual "predicated" operation so that `targetReg` is the first operand
// and `embMaskOp2Reg` is the second operand.
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, opt);
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, opt, sopt);
}
else if (targetReg != falseReg)
{
Expand All @@ -583,7 +605,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
// If the embedded instruction supports optional mask operation, use the "unpredicated"
// version of the instruction, followed by "sel" to select the active lanes.
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, embMaskOp1Reg,
embMaskOp2Reg, opt);
embMaskOp2Reg, opt, sopt);
}
else
{
Expand All @@ -598,7 +620,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, embMaskOp1Reg);

GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,
opt);
opt, sopt);
}

GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg,
Expand All @@ -615,13 +637,13 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)

// Finally, perform the actual "predicated" operation so that `targetReg` is the first operand
// and `embMaskOp2Reg` is the second operand.
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, opt);
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, opt, sopt);
}
else
{
// Just perform the actual "predicated" operation so that `targetReg` is the first operand
// and `embMaskOp2Reg` is the second operand.
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, opt);
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, opt, sopt);
}

break;
Expand Down Expand Up @@ -2168,7 +2190,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
/* canSkip */ true);
}

GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op2Reg, opt);
GetEmitter()->emitInsSve_R_R(ins, emitSize, targetReg, op2Reg, opt);
break;
}

Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,9 @@ HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy64BitElementCount,
HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy8BitElementCount, 0, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqincb, INS_sve_uqincb, INS_sve_sqincb, INS_sve_uqincb, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, SaturatingIncrementByActiveElementCount, -1, 2, true, {INS_invalid, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, Scale, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fscale, INS_sve_fscale}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, ShiftLeftLogical, -1, -1, false, {INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, ShiftRightArithmetic, -1, -1, false, {INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, ShiftRightLogical, -1, -1, false, {INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, SignExtend16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxth, INS_invalid, INS_sve_sxth, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SignExtend32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxtw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SignExtend8, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
Expand Down
Loading
Loading