Skip to content
This repository has been archived by the owner on Feb 20, 2025. It is now read-only.

Move typecast to llk submodule #25

Merged
merged 1 commit into from
Jul 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions common/inc/ckernel_sfpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include "sfpu/ckernel_sfpu_tanh.h"
#include "sfpu/ckernel_sfpu_topk.h"
#include "sfpu/ckernel_sfpu_trigonometry.h"
#include "sfpu/ckernel_sfpu_typecast.h"

// using namespace sfpi;

Expand Down
225 changes: 225 additions & 0 deletions common/inc/sfpu/ckernel_sfpu_typecast.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
// SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC
//
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include "ckernel_defs.h"
#include "ckernel.h"
#include "noc_nonblocking_api.h"
#include <limits>

#include "sfpi.h"

using namespace sfpi;

namespace ckernel
{
namespace sfpu
{

template <bool APPROXIMATION_MODE, int ITERATIONS>
inline void _calculate_typecast_fp16b_to_uint16_()
{
#pragma GCC unroll 0
for (int d = 0; d < ITERATIONS; d++) {
TTI_SFPENCC(0,0,0,0);
TTI_SFPLOAD(0,0,3,0);
TTI_SFPSETCC(0,0,0,0);
TTI_SFPLOADI(0,0,0);
TTI_SFPENCC(0,0,0,0);
TTI_SFP_STOCH_RND(0,0,2,0,1,14);
TTI_SFPSTORE(1,6,3,0);
dst_reg++;
}
}

template <bool APPROXIMATION_MODE, int ITERATIONS>
inline void _calculate_typecast_uint16_to_fp16b_()
{
#pragma GCC unroll 0
for (int d = 0; d < ITERATIONS; d++) {
TTI_SFPLOAD(0,6,3,0);
TTI_SFPCAST(0,1,0);
TTI_SFP_STOCH_RND(0,0,3,1,2,1);
TTI_SFPSTORE(2,2,3,0);
dst_reg++;
}
}

template <bool APPROXIMATION_MODE, int ITERATIONS>
inline void _calculate_typecast_int32_to_fp16b_()
{
#pragma GCC unroll 0
for (int d = 0; d < ITERATIONS; d++) {
TTI_SFPLOAD(0,12,3,0);
TTI_SFPCAST(0,1,0);
TTI_SFP_STOCH_RND(0,0,3,1,2,1);
TTI_SFPSTORE(2,2,3,0);
dst_reg++;
}
}

template <bool APPROXIMATION_MODE, int ITERATIONS>
inline void _calculate_typecast_fp16b_to_int32_()
{
#pragma GCC unroll 0
for (int d = 0; d < ITERATIONS; d++) {
vFloat in = dst_reg[0];

// extract exponent
vInt exp = exexp(in);

v_if (exp < 0) {
dst_reg[0] = 0;
} v_elseif (exp > 30) {
// set to int32 max value in case of overflow
vInt tmp = std::numeric_limits<int32_t>::max();
// check sign
v_if (in < 0) {
tmp = reinterpret<vInt>(setsgn(reinterpret<vFloat>(tmp), 1));
} v_endif
dst_reg[0] = tmp;
} v_else {
// extract mantissa
vInt man = exman8(in);
// shift the mantissa by (23-exponent) to the right
vInt shift = exp - 23;
man = shft(reinterpret<vUInt>(man), shift);
// check sign
v_if (in < 0) {
man = reinterpret<vInt>(setsgn(reinterpret<vFloat>(man), 1));
} v_endif
dst_reg[0] = man;
} v_endif

dst_reg++;
}
}

template <bool APPROXIMATION_MODE, int ITERATIONS>
inline void _calculate_typecast_fp32_to_fp16b_()
{
#pragma GCC unroll 0
for (int d = 0; d < ITERATIONS; d++) {
TTI_SFPLOAD(0,0,3,0);
TTI_SFP_STOCH_RND(0,0,2,0,1,1);
TTI_SFPSTORE(1,0,3,0);
dst_reg++;
}
}

template <bool APPROXIMATION_MODE, int ITERATIONS>
inline void _calculate_typecast_uint16_to_fp32_()
{
#pragma GCC unroll 0
for (int d = 0; d < ITERATIONS; d++) {
TTI_SFPLOAD(0,6,3,0);
TTI_SFPCAST(0,1,0);
TTI_SFPSTORE(1,3,3,0);
dst_reg++;
}
}

template <bool APPROXIMATION_MODE, int ITERATIONS>
inline void _calculate_typecast_int32_to_fp32_()
{
#pragma GCC unroll 0
for (int d = 0; d < ITERATIONS; d++) {
TTI_SFPLOAD(0,12,3,0);
TTI_SFPCAST(0,1,0);
TTI_SFPSTORE(1,3,3,0);
dst_reg++;
}
}

template <bool APPROXIMATION_MODE, int ITERATIONS>
inline void _calculate_typecast_fp16b_to_uint32_()
{
#pragma GCC unroll 0
for (int d = 0; d < ITERATIONS; d++) {
vFloat in = dst_reg[0];

// check sign
v_if (in <= 0) {
dst_reg[0] = 0;
} v_else {
// extract exponent
vInt exp = exexp(in);

v_if (exp < 0) {
dst_reg[0] = 0;
} v_elseif (exp > 31) {
// set to uint32 max value in case of overflow
vInt tmp = std::numeric_limits<int32_t>::max();
dst_reg[0] = setsgn(reinterpret<vFloat>(tmp), 1);
} v_elseif (exp == 31) {
// extract mantissa without hidden bit
vInt man = exman9(in);
// shift the mantissa by (23-exponent) to the right
vInt shift = exp - 23;
man = shft(reinterpret<vUInt>(man), shift);
// add hidden bit back (due to bug when shifting a 1 into MSB)
dst_reg[0] = setsgn(reinterpret<vFloat>(man), 1);
} v_else {
// extract mantissa
vInt man = exman8(in);
// shift the mantissa by (23-exponent) to the right
vInt shift = exp - 23;
man = shft(reinterpret<vUInt>(man), shift);
dst_reg[0] = man;
} v_endif
} v_endif

dst_reg++;
}
}

template <bool APPROXIMATION_MODE, int ITERATIONS>
inline void _calculate_typecast_uint32_to_fp16b_()
{
#pragma GCC unroll 0
for (int d = 0; d < ITERATIONS; d++) {
TTI_SFPENCC(0,0,0,0);
TTI_SFPLOAD(0,4,3,0);
TTI_SFPSETSGN(0,0,1,1);
TTI_SFPCAST(1,2,0);
TTI_SFP_STOCH_RND(0,0,4,2,3,1);
TTI_SFPSETCC(0,0,0,0);
TTI_SFPADDI(0x4f00, 3, 0); // 2^31
TTI_SFPENCC(0,0,0,0);
TTI_SFPSTORE(3,2,3,0);
dst_reg++;
}
}

template <bool APPROXIMATION_MODE, int ITERATIONS>
inline void _calculate_typecast_uint32_to_fp32_()
{
#pragma GCC unroll 0
for (int d = 0; d < ITERATIONS; d++) {
TTI_SFPENCC(0,0,0,0);
TTI_SFPLOAD(0,4,3,0);
TTI_SFPSETSGN(0,0,1,1);
TTI_SFPCAST(1,2,0);
TTI_SFPSETCC(0,0,0,0);
TTI_SFPADDI(0x4f00, 2, 0); // 2^31
TTI_SFPENCC(0,0,0,0);
TTI_SFPSTORE(2,3,3,0);
dst_reg++;
}
}

template <bool APPROXIMATION_MODE, int ITERATIONS>
inline void _calculate_typecast_uint16_to_uint32_()
{
#pragma GCC unroll 0
for (int d = 0; d < ITERATIONS; d++) {
TTI_SFPLOAD(0,6,3,0);
TTI_SFPSTORE(0,4,3,0);
dst_reg++;
}
}

} // namespace sfpu
} // namespace ckernel
2 changes: 1 addition & 1 deletion llk_lib/llk_math_eltwise_unary_datacopy.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ inline void _llk_math_eltwise_unary_datacopy_init_(const std::uint32_t transpose
if constexpr (type == A2D) {
eltwise_unary_configure_mop<type, src_b_bcast_type, is_fp32_dest_acc_en, is_int_fpu_en>(p_mova2d::MOV_8_ROWS, 16, num_faces, dst_format);
} else if constexpr (type == B2D) {
eltwise_unary_configure_mop<type, src_b_bcast_type>(p_movb2d::MOV_4_ROWS, 16, num_faces);
eltwise_unary_configure_mop<type, src_b_bcast_type>(p_movb2d::MOV_4_ROWS, 16, num_faces, dst_format);
} else {
FWASSERT("Unsupported op!", false);
}
Expand Down
Loading