This repository has been archived by the owner on Feb 20, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add binary shift left/right LLK (#49)
* Add binary shift LLK
- Loading branch information
Showing
2 changed files
with
70 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
// SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
#pragma once | ||
|
||
#include "ckernel_defs.h" | ||
#include "ckernel.h" | ||
#include "noc_nonblocking_api.h" | ||
|
||
#include "sfpi.h" | ||
|
||
using namespace sfpi; | ||
|
||
namespace ckernel | ||
{ | ||
namespace sfpu | ||
{ | ||
|
||
template <bool APPROXIMATION_MODE, int ITERATIONS> | ||
inline void _calculate_binary_left_shift_(const uint dst_offset) { | ||
// SFPU microcode | ||
for (int d = 0; d < ITERATIONS; d++) { | ||
constexpr uint dst_tile_size = 64; | ||
// load | ||
TTI_SFPLOAD(0,4,3,0); | ||
TT_SFPLOAD(1,4,3,dst_offset*dst_tile_size); | ||
// if (shift_amount < 0 OR shift_amount >= 32) -> result should be 0 | ||
TTI_SFPSETCC(0,1,0,4); | ||
TTI_SFPIADD(0xFE0,1,2,1); // 0xFE0 = -32 | ||
TTI_SFPCOMPC(0,0,0,0); | ||
TTI_SFPMOV(0,9,0,0); | ||
TTI_SFPENCC(0,0,0,0); | ||
// shift left | ||
TTI_SFPSHFT(0,1,0,0); | ||
// store result | ||
TTI_SFPSTORE(0,4,3,0); | ||
dst_reg++; | ||
} | ||
} | ||
|
||
template <bool APPROXIMATION_MODE, int ITERATIONS> | ||
inline void _calculate_binary_right_shift_(const uint dst_offset) { | ||
// SFPU microcode | ||
for (int d = 0; d < ITERATIONS; d++) { | ||
constexpr uint dst_tile_size = 64; | ||
// load | ||
TTI_SFPLOAD(0,4,3,0); | ||
TT_SFPLOAD(1,4,3,dst_offset*dst_tile_size); | ||
TTI_SFPMOV(0,0,4,0); // save shift_value for later | ||
// shift right | ||
TTI_SFPIADD(0,9,1,6); // take negative of shift_amount to shift right | ||
TTI_SFPSHFT(0,1,0,0); | ||
// if shift_value was negative, need to shift in 1's manually | ||
TTI_SFPSETCC(0,4,0,0); // only run if shift_value is negative | ||
TTI_SFPSETCC(0,1,0,2); // only needed if shift_amount>0 | ||
TTI_SFPIADD(0x020,1,2,5); // take 32-shift_amount (0x020 = 32) | ||
TTI_SFPNOT(0,9,3,0); // put all 1's into LREG3 | ||
TTI_SFPSHFT(0,2,3,0); // shift all 1's by 32-shift_amount | ||
TTI_SFPOR(0,3,0,0); // OR in the 1's | ||
TTI_SFPENCC(0,0,0,0); | ||
// store result | ||
TTI_SFPSTORE(0,4,3,0); | ||
dst_reg++; | ||
} | ||
} | ||
|
||
} // namespace sfpu | ||
} // namespace ckernel |