From abf6f85b918bade28952c53e19578d36f1947a37 Mon Sep 17 00:00:00 2001 From: Min Hsu Date: Thu, 19 Dec 2024 17:26:19 -0800 Subject: [PATCH 1/6] Pre-commit test --- llvm/test/CodeGen/RISCV/neg-abs.ll | 226 +++++++++++++++++++++++++++++ 1 file changed, 226 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/neg-abs.ll b/llvm/test/CodeGen/RISCV/neg-abs.ll index 7d6a6d7ed4ce64..9d2397756300b4 100644 --- a/llvm/test/CodeGen/RISCV/neg-abs.ll +++ b/llvm/test/CodeGen/RISCV/neg-abs.ll @@ -258,3 +258,229 @@ define i64 @neg_abs64_multiuse(i64 %x, ptr %y) { %neg = sub nsw i64 0, %abs ret i64 %neg } + +define i32 @expanded_neg_abs32(i32 %x) { +; RV32I-LABEL: expanded_neg_abs32: +; RV32I: # %bb.0: +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: blt a0, a1, .LBB6_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: .LBB6_2: +; RV32I-NEXT: neg a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_abs32: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: neg a1, a0 +; RV32ZBB-NEXT: max a0, a1, a0 +; RV32ZBB-NEXT: neg a0, a0 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_abs32: +; RV64I: # %bb.0: +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: negw a0, a0 +; RV64I-NEXT: blt a1, a0, .LBB6_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB6_2: +; RV64I-NEXT: negw a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_abs32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: sext.w a1, a0 +; RV64ZBB-NEXT: negw a0, a0 +; RV64ZBB-NEXT: max a0, a0, a1 +; RV64ZBB-NEXT: negw a0, a0 +; RV64ZBB-NEXT: ret + %n = sub i32 0, %x + %t = call i32 @llvm.smax.i32(i32 %n, i32 %x) + %r = sub i32 0, %t + ret i32 %r +} + +define i32 @expanded_neg_abs32_unsigned(i32 %x) { +; RV32I-LABEL: expanded_neg_abs32_unsigned: +; RV32I: # %bb.0: +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: bltu a0, a1, .LBB7_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: .LBB7_2: +; RV32I-NEXT: neg a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_abs32_unsigned: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: neg a1, a0 +; RV32ZBB-NEXT: maxu a0, a1, a0 +; RV32ZBB-NEXT: neg a0, a0 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_abs32_unsigned: +; RV64I: # %bb.0: +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: negw a0, a0 +; RV64I-NEXT: bltu a1, a0, .LBB7_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB7_2: +; RV64I-NEXT: negw a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_abs32_unsigned: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: sext.w a1, a0 +; RV64ZBB-NEXT: negw a0, a0 +; RV64ZBB-NEXT: maxu a0, a0, a1 +; RV64ZBB-NEXT: negw a0, a0 +; RV64ZBB-NEXT: ret + %n = sub i32 0, %x + %t = call i32 @llvm.umax.i32(i32 %n, i32 %x) + %r = sub i32 0, %t + ret i32 %r +} + +define i64 @expanded_neg_abs64(i64 %x) { +; RV32I-LABEL: expanded_neg_abs64: +; RV32I: # %bb.0: +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: neg a3, a1 +; RV32I-NEXT: sub a2, a3, a2 +; RV32I-NEXT: neg a3, a0 +; RV32I-NEXT: beq a2, a1, .LBB8_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: slt a4, a1, a2 +; RV32I-NEXT: beqz a4, .LBB8_3 +; RV32I-NEXT: j .LBB8_4 +; RV32I-NEXT: .LBB8_2: +; RV32I-NEXT: sltu a4, a0, a3 +; RV32I-NEXT: bnez a4, .LBB8_4 +; RV32I-NEXT: .LBB8_3: +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: mv a3, a0 +; RV32I-NEXT: .LBB8_4: +; RV32I-NEXT: snez a0, a3 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: neg a0, a3 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_abs64: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: snez a2, a0 +; RV32ZBB-NEXT: neg a3, a1 +; RV32ZBB-NEXT: sub a2, a3, a2 +; RV32ZBB-NEXT: neg a3, a0 +; RV32ZBB-NEXT: beq a2, a1, .LBB8_2 +; RV32ZBB-NEXT: # %bb.1: +; RV32ZBB-NEXT: slt a4, a1, a2 +; RV32ZBB-NEXT: beqz a4, .LBB8_3 +; RV32ZBB-NEXT: j .LBB8_4 +; RV32ZBB-NEXT: .LBB8_2: +; RV32ZBB-NEXT: sltu a4, a0, a3 +; RV32ZBB-NEXT: bnez a4, .LBB8_4 +; RV32ZBB-NEXT: .LBB8_3: +; RV32ZBB-NEXT: mv a2, a1 +; RV32ZBB-NEXT: mv a3, a0 +; RV32ZBB-NEXT: .LBB8_4: +; RV32ZBB-NEXT: snez a0, a3 +; RV32ZBB-NEXT: add a0, a2, a0 +; RV32ZBB-NEXT: neg a1, a0 +; RV32ZBB-NEXT: neg a0, a3 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_abs64: +; RV64I: # %bb.0: +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: blt a0, a1, .LBB8_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: .LBB8_2: +; RV64I-NEXT: neg a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_abs64: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: neg a1, a0 +; RV64ZBB-NEXT: max a0, a1, a0 +; RV64ZBB-NEXT: neg a0, a0 +; RV64ZBB-NEXT: ret + %n = sub i64 0, %x + %t = call i64 @llvm.smax.i64(i64 %n, i64 %x) + %r = sub i64 0, %t + ret i64 %r +} + +define i64 @expanded_neg_abs64_unsigned(i64 %x) { +; RV32I-LABEL: expanded_neg_abs64_unsigned: +; RV32I: # %bb.0: +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: neg a3, a1 +; RV32I-NEXT: sub a2, a3, a2 +; RV32I-NEXT: neg a3, a0 +; RV32I-NEXT: beq a2, a1, .LBB9_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sltu a4, a1, a2 +; RV32I-NEXT: beqz a4, .LBB9_3 +; RV32I-NEXT: j .LBB9_4 +; RV32I-NEXT: .LBB9_2: +; RV32I-NEXT: sltu a4, a0, a3 +; RV32I-NEXT: bnez a4, .LBB9_4 +; RV32I-NEXT: .LBB9_3: +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: mv a3, a0 +; RV32I-NEXT: .LBB9_4: +; RV32I-NEXT: snez a0, a3 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: neg a0, a3 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_abs64_unsigned: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: snez a2, a0 +; RV32ZBB-NEXT: neg a3, a1 +; RV32ZBB-NEXT: sub a2, a3, a2 +; RV32ZBB-NEXT: neg a3, a0 +; RV32ZBB-NEXT: beq a2, a1, .LBB9_2 +; RV32ZBB-NEXT: # %bb.1: +; RV32ZBB-NEXT: sltu a4, a1, a2 +; RV32ZBB-NEXT: beqz a4, .LBB9_3 +; RV32ZBB-NEXT: j .LBB9_4 +; RV32ZBB-NEXT: .LBB9_2: +; RV32ZBB-NEXT: sltu a4, a0, a3 +; RV32ZBB-NEXT: bnez a4, .LBB9_4 +; RV32ZBB-NEXT: .LBB9_3: +; RV32ZBB-NEXT: mv a2, a1 +; RV32ZBB-NEXT: mv a3, a0 +; RV32ZBB-NEXT: .LBB9_4: +; RV32ZBB-NEXT: snez a0, a3 +; RV32ZBB-NEXT: add a0, a2, a0 +; RV32ZBB-NEXT: neg a1, a0 +; RV32ZBB-NEXT: neg a0, a3 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_abs64_unsigned: +; RV64I: # %bb.0: +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: bltu a0, a1, .LBB9_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: .LBB9_2: +; RV64I-NEXT: neg a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_abs64_unsigned: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: neg a1, a0 +; RV64ZBB-NEXT: maxu a0, a1, a0 +; RV64ZBB-NEXT: neg a0, a0 +; RV64ZBB-NEXT: ret + %n = sub i64 0, %x + %t = call i64 @llvm.umax.i64(i64 %n, i64 %x) + %r = sub i64 0, %t + ret i64 %r +} From 25c43d8a1e28c96ca7b2fecefb47da47dfb7fe67 Mon Sep 17 00:00:00 2001 From: Min Hsu Date: Thu, 19 Dec 2024 17:27:47 -0800 Subject: [PATCH 2/6] [DAGCombiner] Turn `(neg (max x, (neg x)))` into `(min x, (neg x))` We already have a rule to turn `(neg (abs x))` into `(min x, (neg x))`. But in some cases `(neg (max x, (neg x)))` is formed by an expanded `abs` followed by a `neg` that is only generated after the expansion. This patch adds a separate pattern to match this kind of cases. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 14 ++++++++++++++ llvm/test/CodeGen/RISCV/neg-abs.ll | 12 ++++-------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6cbfef2d238bbe..3cb33bdd02ef39 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3949,6 +3949,20 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true)) return Result; + // Similar to the previous rule, but this time targeting an expanded abs. + // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X)) + // Note that this is applicable to both signed and unsigned min/max. + SDValue X; + if (LegalOperations && + sd_match(N1, + m_OneUse(m_AnyOf(m_SMax(m_Value(X), m_Neg(m_Deferred(X))), + m_UMax(m_Value(X), m_Neg(m_Deferred(X))))))) { + unsigned MinOpc = N1->getOpcode() == ISD::SMAX ? ISD::SMIN : ISD::UMIN; + if (hasOperation(MinOpc, VT)) + return DAG.getNode(MinOpc, DL, VT, X, + DAG.getNode(ISD::SUB, DL, VT, N0, X)); + } + // Fold neg(splat(neg(x)) -> splat(x) if (VT.isVector()) { SDValue N1S = DAG.getSplatValue(N1, true); diff --git a/llvm/test/CodeGen/RISCV/neg-abs.ll b/llvm/test/CodeGen/RISCV/neg-abs.ll index 9d2397756300b4..c1695c88f1f384 100644 --- a/llvm/test/CodeGen/RISCV/neg-abs.ll +++ b/llvm/test/CodeGen/RISCV/neg-abs.ll @@ -273,8 +273,7 @@ define i32 @expanded_neg_abs32(i32 %x) { ; RV32ZBB-LABEL: expanded_neg_abs32: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: neg a1, a0 -; RV32ZBB-NEXT: max a0, a1, a0 -; RV32ZBB-NEXT: neg a0, a0 +; RV32ZBB-NEXT: min a0, a0, a1 ; RV32ZBB-NEXT: ret ; ; RV64I-LABEL: expanded_neg_abs32: @@ -315,8 +314,7 @@ define i32 @expanded_neg_abs32_unsigned(i32 %x) { ; RV32ZBB-LABEL: expanded_neg_abs32_unsigned: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: neg a1, a0 -; RV32ZBB-NEXT: maxu a0, a1, a0 -; RV32ZBB-NEXT: neg a0, a0 +; RV32ZBB-NEXT: minu a0, a0, a1 ; RV32ZBB-NEXT: ret ; ; RV64I-LABEL: expanded_neg_abs32_unsigned: @@ -405,8 +403,7 @@ define i64 @expanded_neg_abs64(i64 %x) { ; RV64ZBB-LABEL: expanded_neg_abs64: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: neg a1, a0 -; RV64ZBB-NEXT: max a0, a1, a0 -; RV64ZBB-NEXT: neg a0, a0 +; RV64ZBB-NEXT: min a0, a0, a1 ; RV64ZBB-NEXT: ret %n = sub i64 0, %x %t = call i64 @llvm.smax.i64(i64 %n, i64 %x) @@ -476,8 +473,7 @@ define i64 @expanded_neg_abs64_unsigned(i64 %x) { ; RV64ZBB-LABEL: expanded_neg_abs64_unsigned: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: neg a1, a0 -; RV64ZBB-NEXT: maxu a0, a1, a0 -; RV64ZBB-NEXT: neg a0, a0 +; RV64ZBB-NEXT: minu a0, a0, a1 ; RV64ZBB-NEXT: ret %n = sub i64 0, %x %t = call i64 @llvm.umax.i64(i64 %n, i64 %x) From 1abcdc3172121d95b309080fef162e7df32475fa Mon Sep 17 00:00:00 2001 From: Min Hsu Date: Fri, 20 Dec 2024 10:30:12 -0800 Subject: [PATCH 3/6] Address review comments Preserve the flags from the first sub --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 3cb33bdd02ef39..59d16d5cbb739a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3953,14 +3953,16 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X)) // Note that this is applicable to both signed and unsigned min/max. SDValue X; + SDValue S0; if (LegalOperations && - sd_match(N1, - m_OneUse(m_AnyOf(m_SMax(m_Value(X), m_Neg(m_Deferred(X))), - m_UMax(m_Value(X), m_Neg(m_Deferred(X))))))) { + sd_match(N1, m_OneUse(m_AnyOf( + m_SMax(m_Value(X), + m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0))), + m_UMax(m_Value(X), m_AllOf(m_Neg(m_Deferred(X)), + m_Value(S0))))))) { unsigned MinOpc = N1->getOpcode() == ISD::SMAX ? ISD::SMIN : ISD::UMIN; if (hasOperation(MinOpc, VT)) - return DAG.getNode(MinOpc, DL, VT, X, - DAG.getNode(ISD::SUB, DL, VT, N0, X)); + return DAG.getNode(MinOpc, DL, VT, X, S0); } // Fold neg(splat(neg(x)) -> splat(x) From 55d7531db25c9b79db35daf321cc49cb996eb9bb Mon Sep 17 00:00:00 2001 From: Min Hsu Date: Fri, 20 Dec 2024 10:51:16 -0800 Subject: [PATCH 4/6] Add `sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X))` --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 36 ++- llvm/test/CodeGen/RISCV/neg-abs.ll | 222 ++++++++++++++++++ 2 files changed, 249 insertions(+), 9 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 59d16d5cbb739a..808dedc9d679fd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3951,18 +3951,36 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // Similar to the previous rule, but this time targeting an expanded abs. // (sub 0, (max X, (sub 0, X))) --> (min X, (sub 0, X)) - // Note that this is applicable to both signed and unsigned min/max. + // as well as + // (sub 0, (min X, (sub 0, X))) --> (max X, (sub 0, X)) + // Note that these two are applicable to both signed and unsigned min/max. SDValue X; SDValue S0; + auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0)); if (LegalOperations && - sd_match(N1, m_OneUse(m_AnyOf( - m_SMax(m_Value(X), - m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0))), - m_UMax(m_Value(X), m_AllOf(m_Neg(m_Deferred(X)), - m_Value(S0))))))) { - unsigned MinOpc = N1->getOpcode() == ISD::SMAX ? ISD::SMIN : ISD::UMIN; - if (hasOperation(MinOpc, VT)) - return DAG.getNode(MinOpc, DL, VT, X, S0); + sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat), + m_UMax(m_Value(X), NegPat), + m_SMin(m_Value(X), NegPat), + m_UMin(m_Value(X), NegPat))))) { + unsigned NewOpc = 0; + switch (N1->getOpcode()) { + case ISD::SMAX: + NewOpc = ISD::SMIN; + break; + case ISD::UMAX: + NewOpc = ISD::UMIN; + break; + case ISD::SMIN: + NewOpc = ISD::SMAX; + break; + case ISD::UMIN: + NewOpc = ISD::UMAX; + break; + default: + llvm_unreachable("unrecognized opcode"); + } + if (hasOperation(NewOpc, VT)) + return DAG.getNode(NewOpc, DL, VT, X, S0); } // Fold neg(splat(neg(x)) -> splat(x) diff --git a/llvm/test/CodeGen/RISCV/neg-abs.ll b/llvm/test/CodeGen/RISCV/neg-abs.ll index c1695c88f1f384..fe19a4fa8bbd81 100644 --- a/llvm/test/CodeGen/RISCV/neg-abs.ll +++ b/llvm/test/CodeGen/RISCV/neg-abs.ll @@ -480,3 +480,225 @@ define i64 @expanded_neg_abs64_unsigned(i64 %x) { %r = sub i64 0, %t ret i64 %r } + +define i32 @expanded_neg_inv_abs32(i32 %x) { +; RV32I-LABEL: expanded_neg_inv_abs32: +; RV32I: # %bb.0: +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: blt a1, a0, .LBB10_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: .LBB10_2: +; RV32I-NEXT: neg a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_inv_abs32: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: neg a1, a0 +; RV32ZBB-NEXT: max a0, a0, a1 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_inv_abs32: +; RV64I: # %bb.0: +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: negw a0, a0 +; RV64I-NEXT: blt a0, a1, .LBB10_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB10_2: +; RV64I-NEXT: negw a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_inv_abs32: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: sext.w a1, a0 +; RV64ZBB-NEXT: negw a0, a0 +; RV64ZBB-NEXT: min a0, a0, a1 +; RV64ZBB-NEXT: negw a0, a0 +; RV64ZBB-NEXT: ret + %n = sub i32 0, %x + %t = call i32 @llvm.smin.i32(i32 %n, i32 %x) + %r = sub i32 0, %t + ret i32 %r +} + +define i32 @expanded_neg_inv_abs32_unsigned(i32 %x) { +; RV32I-LABEL: expanded_neg_inv_abs32_unsigned: +; RV32I: # %bb.0: +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: bltu a1, a0, .LBB11_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: .LBB11_2: +; RV32I-NEXT: neg a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_inv_abs32_unsigned: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: neg a1, a0 +; RV32ZBB-NEXT: maxu a0, a0, a1 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_inv_abs32_unsigned: +; RV64I: # %bb.0: +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: negw a0, a0 +; RV64I-NEXT: bltu a0, a1, .LBB11_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB11_2: +; RV64I-NEXT: negw a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_inv_abs32_unsigned: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: sext.w a1, a0 +; RV64ZBB-NEXT: negw a0, a0 +; RV64ZBB-NEXT: minu a0, a0, a1 +; RV64ZBB-NEXT: negw a0, a0 +; RV64ZBB-NEXT: ret + %n = sub i32 0, %x + %t = call i32 @llvm.umin.i32(i32 %n, i32 %x) + %r = sub i32 0, %t + ret i32 %r +} + +define i64 @expanded_neg_inv_abs64(i64 %x) { +; RV32I-LABEL: expanded_neg_inv_abs64: +; RV32I: # %bb.0: +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: neg a3, a1 +; RV32I-NEXT: sub a2, a3, a2 +; RV32I-NEXT: neg a3, a0 +; RV32I-NEXT: beq a2, a1, .LBB12_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: slt a4, a2, a1 +; RV32I-NEXT: beqz a4, .LBB12_3 +; RV32I-NEXT: j .LBB12_4 +; RV32I-NEXT: .LBB12_2: +; RV32I-NEXT: sltu a4, a3, a0 +; RV32I-NEXT: bnez a4, .LBB12_4 +; RV32I-NEXT: .LBB12_3: +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: mv a3, a0 +; RV32I-NEXT: .LBB12_4: +; RV32I-NEXT: snez a0, a3 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: neg a0, a3 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_inv_abs64: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: snez a2, a0 +; RV32ZBB-NEXT: neg a3, a1 +; RV32ZBB-NEXT: sub a2, a3, a2 +; RV32ZBB-NEXT: neg a3, a0 +; RV32ZBB-NEXT: beq a2, a1, .LBB12_2 +; RV32ZBB-NEXT: # %bb.1: +; RV32ZBB-NEXT: slt a4, a2, a1 +; RV32ZBB-NEXT: beqz a4, .LBB12_3 +; RV32ZBB-NEXT: j .LBB12_4 +; RV32ZBB-NEXT: .LBB12_2: +; RV32ZBB-NEXT: sltu a4, a3, a0 +; RV32ZBB-NEXT: bnez a4, .LBB12_4 +; RV32ZBB-NEXT: .LBB12_3: +; RV32ZBB-NEXT: mv a2, a1 +; RV32ZBB-NEXT: mv a3, a0 +; RV32ZBB-NEXT: .LBB12_4: +; RV32ZBB-NEXT: snez a0, a3 +; RV32ZBB-NEXT: add a0, a2, a0 +; RV32ZBB-NEXT: neg a1, a0 +; RV32ZBB-NEXT: neg a0, a3 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_inv_abs64: +; RV64I: # %bb.0: +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: blt a1, a0, .LBB12_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: .LBB12_2: +; RV64I-NEXT: neg a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_inv_abs64: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: neg a1, a0 +; RV64ZBB-NEXT: max a0, a0, a1 +; RV64ZBB-NEXT: ret + %n = sub i64 0, %x + %t = call i64 @llvm.smin.i64(i64 %n, i64 %x) + %r = sub i64 0, %t + ret i64 %r +} + +define i64 @expanded_neg_inv_abs64_unsigned(i64 %x) { +; RV32I-LABEL: expanded_neg_inv_abs64_unsigned: +; RV32I: # %bb.0: +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: neg a3, a1 +; RV32I-NEXT: sub a2, a3, a2 +; RV32I-NEXT: neg a3, a0 +; RV32I-NEXT: beq a2, a1, .LBB13_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sltu a4, a2, a1 +; RV32I-NEXT: beqz a4, .LBB13_3 +; RV32I-NEXT: j .LBB13_4 +; RV32I-NEXT: .LBB13_2: +; RV32I-NEXT: sltu a4, a3, a0 +; RV32I-NEXT: bnez a4, .LBB13_4 +; RV32I-NEXT: .LBB13_3: +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: mv a3, a0 +; RV32I-NEXT: .LBB13_4: +; RV32I-NEXT: snez a0, a3 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: neg a0, a3 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: expanded_neg_inv_abs64_unsigned: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: snez a2, a0 +; RV32ZBB-NEXT: neg a3, a1 +; RV32ZBB-NEXT: sub a2, a3, a2 +; RV32ZBB-NEXT: neg a3, a0 +; RV32ZBB-NEXT: beq a2, a1, .LBB13_2 +; RV32ZBB-NEXT: # %bb.1: +; RV32ZBB-NEXT: sltu a4, a2, a1 +; RV32ZBB-NEXT: beqz a4, .LBB13_3 +; RV32ZBB-NEXT: j .LBB13_4 +; RV32ZBB-NEXT: .LBB13_2: +; RV32ZBB-NEXT: sltu a4, a3, a0 +; RV32ZBB-NEXT: bnez a4, .LBB13_4 +; RV32ZBB-NEXT: .LBB13_3: +; RV32ZBB-NEXT: mv a2, a1 +; RV32ZBB-NEXT: mv a3, a0 +; RV32ZBB-NEXT: .LBB13_4: +; RV32ZBB-NEXT: snez a0, a3 +; RV32ZBB-NEXT: add a0, a2, a0 +; RV32ZBB-NEXT: neg a1, a0 +; RV32ZBB-NEXT: neg a0, a3 +; RV32ZBB-NEXT: ret +; +; RV64I-LABEL: expanded_neg_inv_abs64_unsigned: +; RV64I: # %bb.0: +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: bltu a1, a0, .LBB13_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: .LBB13_2: +; RV64I-NEXT: neg a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: expanded_neg_inv_abs64_unsigned: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: neg a1, a0 +; RV64ZBB-NEXT: maxu a0, a0, a1 +; RV64ZBB-NEXT: ret + %n = sub i64 0, %x + %t = call i64 @llvm.umin.i64(i64 %n, i64 %x) + %r = sub i64 0, %t + ret i64 %r +} From 3e85bc412d1fb8ec6c0214724fd918f2a513af25 Mon Sep 17 00:00:00 2001 From: Min Hsu Date: Mon, 23 Dec 2024 09:50:10 -0800 Subject: [PATCH 5/6] Extract min<->max conversion into its own helper function --- llvm/include/llvm/CodeGen/ISDOpcodes.h | 2 ++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 18 +----------------- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 0b6d155b6d161e..01346f01cead35 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1506,6 +1506,8 @@ inline bool isBitwiseLogicOp(unsigned Opcode) { return Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR; } +NodeType getInverseMinMaxOpcode(unsigned MinMaxOpc); + /// Get underlying scalar opcode for VECREDUCE opcode. /// For example ISD::AND for ISD::VECREDUCE_AND. NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 808dedc9d679fd..74a5a64f616b30 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3962,23 +3962,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { m_UMax(m_Value(X), NegPat), m_SMin(m_Value(X), NegPat), m_UMin(m_Value(X), NegPat))))) { - unsigned NewOpc = 0; - switch (N1->getOpcode()) { - case ISD::SMAX: - NewOpc = ISD::SMIN; - break; - case ISD::UMAX: - NewOpc = ISD::UMIN; - break; - case ISD::SMIN: - NewOpc = ISD::SMAX; - break; - case ISD::UMIN: - NewOpc = ISD::UMAX; - break; - default: - llvm_unreachable("unrecognized opcode"); - } + unsigned NewOpc = ISD::getInverseMinMaxOpcode(N1->getOpcode()); if (hasOperation(NewOpc, VT)) return DAG.getNode(NewOpc, DL, VT, X, S0); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 07749ec87d0b20..6a65e3b88d0fc0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -430,6 +430,21 @@ bool ISD::matchBinaryPredicate( return true; } +ISD::NodeType ISD::getInverseMinMaxOpcode(unsigned MinMaxOpc) { + switch (MinMaxOpc) { + default: + llvm_unreachable("unrecognized opcode"); + case ISD::UMIN: + return ISD::UMAX; + case ISD::UMAX: + return ISD::UMIN; + case ISD::SMIN: + return ISD::SMAX; + case ISD::SMAX: + return ISD::SMIN; + } +} + ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) { switch (VecReduceOpcode) { default: From b8b253b0afd4cd16d6b8b15e056e8c7ca9505f12 Mon Sep 17 00:00:00 2001 From: Min Hsu Date: Thu, 26 Dec 2024 15:39:15 -0800 Subject: [PATCH 6/6] Address review comments And run this combiner rule in both pre- and post-legalized phase. --- llvm/include/llvm/CodeGen/ISDOpcodes.h | 2 + llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 3 +- llvm/test/CodeGen/RISCV/rvv/fixed-neg-abs.ll | 54 +++++++++++++++++++ 3 files changed, 57 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-neg-abs.ll diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 01346f01cead35..69cc850d325cb2 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1506,6 +1506,8 @@ inline bool isBitwiseLogicOp(unsigned Opcode) { return Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR; } +/// Given a \p MinMaxOpc of ISD::(U|S)MIN or ISD::(U|S)MAX, returns +/// ISD::(U|S)MAX and ISD::(U|S)MIN, respectively. NodeType getInverseMinMaxOpcode(unsigned MinMaxOpc); /// Get underlying scalar opcode for VECREDUCE opcode. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 74a5a64f616b30..9a451750ef8bf1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3957,8 +3957,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue X; SDValue S0; auto NegPat = m_AllOf(m_Neg(m_Deferred(X)), m_Value(S0)); - if (LegalOperations && - sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat), + if (sd_match(N1, m_OneUse(m_AnyOf(m_SMax(m_Value(X), NegPat), m_UMax(m_Value(X), NegPat), m_SMin(m_Value(X), NegPat), m_UMin(m_Value(X), NegPat))))) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-neg-abs.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-neg-abs.ll new file mode 100644 index 00000000000000..6f1efb6885deef --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-neg-abs.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s + +define <2 x i64> @expanded_fixed_neg_abs64(<2 x i64> %x) { +; CHECK-LABEL: expanded_fixed_neg_abs64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0 +; CHECK-NEXT: vmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %t = sub <2 x i64> , %x + %t1 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %t, <2 x i64> %x) + %t2 = sub <2 x i64> , %t1 + ret <2 x i64> %t2 +} + +define <2 x i64> @expanded_fixed_neg_abs64_unsigned(<2 x i64> %x) { +; CHECK-LABEL: expanded_fixed_neg_abs64_unsigned: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0 +; CHECK-NEXT: vminu.vv v8, v8, v9 +; CHECK-NEXT: ret + %t = sub <2 x i64> , %x + %t1 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %t, <2 x i64> %x) + %t2 = sub <2 x i64> , %t1 + ret <2 x i64> %t2 +} + +define <2 x i64> @expanded_fixed_neg_inv_abs64(<2 x i64> %x) { +; CHECK-LABEL: expanded_fixed_neg_inv_abs64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0 +; CHECK-NEXT: vmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %t = sub <2 x i64> , %x + %t1 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %t, <2 x i64> %x) + %t2 = sub <2 x i64> , %t1 + ret <2 x i64> %t2 +} + +define <2 x i64> @expanded_fixed_neg_inv_abs64_unsigned(<2 x i64> %x) { +; CHECK-LABEL: expanded_fixed_neg_inv_abs64_unsigned: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0 +; CHECK-NEXT: vmaxu.vv v8, v8, v9 +; CHECK-NEXT: ret + %t = sub <2 x i64> , %x + %t1 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %t, <2 x i64> %x) + %t2 = sub <2 x i64> , %t1 + ret <2 x i64> %t2 +}