Skip to content
This repository has been archived by the owner on Feb 20, 2025. It is now read-only.

Commit

Permalink
Recip round fp32->fp16b if dest in 16bit mode
Browse files Browse the repository at this point in the history
  • Loading branch information
rdjogoTT authored and rtawfik01 committed Aug 7, 2024
1 parent 3a492fb commit f16a78e
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions common/inc/sfpu/ckernel_sfpu_recip.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ sfpi_inline vFloat _sfpu_reciprocal_(const vFloat in)
return setexp(result, new_exp);
}

template <bool APPROXIMATION_MODE, int ITERATIONS>
template <bool APPROXIMATION_MODE, int ITERATIONS, bool is_fp32_dest_acc_en = true>
inline void _calculate_reciprocal_(const int iterations)
{
#pragma GCC unroll 8
Expand All @@ -70,7 +70,11 @@ inline void _calculate_reciprocal_(const int iterations)
}
v_endif;

dst_reg[0] = out;
if constexpr (is_fp32_dest_acc_en) {
dst_reg[0] = out;
} else {
dst_reg[0] = reinterpret<vFloat>(float_to_fp16b(out, 0));
}

dst_reg++;
}
Expand Down

0 comments on commit f16a78e

Please sign in to comment.