From 0f57d4e9dec602b68671be8891e7af876285f275 Mon Sep 17 00:00:00 2001 From: Radomir Djogo <159184120+rdjogoTT@users.noreply.github.com> Date: Tue, 10 Dec 2024 12:02:28 -0500 Subject: [PATCH] Fix load/store mode for add int32 (#48) * Add template arg for load/store mode --- common/inc/sfpu/ckernel_sfpu_add_int32.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/common/inc/sfpu/ckernel_sfpu_add_int32.h b/common/inc/sfpu/ckernel_sfpu_add_int32.h index 71547ad..df02231 100644 --- a/common/inc/sfpu/ckernel_sfpu_add_int32.h +++ b/common/inc/sfpu/ckernel_sfpu_add_int32.h @@ -17,22 +17,26 @@ namespace ckernel namespace sfpu { -template +template inline void _add_int32_(const uint dst_offset) { + // Use '12' if Dest is in sign-magnitude format and '4' for 2's complement, + // because TTI_SFPIADD requires 2's complement format in LREGs + constexpr int sfpload_instr_mod = SIGN_MAGNITUDE_FORMAT ? 12 : 4; + // Operand A is input1 (int32) // Operand B is input2 (int32) // Output is int32 #pragma GCC unroll 8 for (int d = 0; d < ITERATIONS; d++) { // operand A - int32 - TTI_SFPLOAD(0, 12, 3, 0); + TTI_SFPLOAD(0, sfpload_instr_mod, 3, 0); // operand B - int32 - TT_SFPLOAD(1, 12, 3, dst_offset * 64); + TT_SFPLOAD(1, sfpload_instr_mod, 3, dst_offset * 64); TTI_SFPIADD(0, 1, 0, 4); // MAD has a 2-cycle pipeline latency so we need one cycle latency until next instr can consume the result TTI_NOP; // LREG_0 -> dest as int32 - TTI_SFPSTORE(0, 12, 3, 0); + TTI_SFPSTORE(0, sfpload_instr_mod, 3, 0); dst_reg++; } }