Skip to content
This repository has been archived by the owner on Feb 20, 2025. It is now read-only.

Commit

Permalink
Remove tile clear to enable output accum
Browse files Browse the repository at this point in the history
  • Loading branch information
rdjogoTT committed Jul 19, 2024
1 parent 3f90b5c commit 0c6abb2
Showing 1 changed file with 7 additions and 6 deletions.
13 changes: 7 additions & 6 deletions common/inc/sfpu/ckernel_sfpu_reshuffle_rows.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,13 @@ inline void _calculate_reshuffle_rows_(const uint idx_addr)
constexpr uint output_tile_offset = 64;

// clr DEST tile 1
for (uint row=0; row < 32; row+=4) {
TT_SFPSTORE(p_sfpu::LCONST_0, 0, ADDR_MOD_3, output_tile_offset + row);
TT_SFPSTORE(p_sfpu::LCONST_0, 0, ADDR_MOD_3, output_tile_offset + row + 2);
TT_SFPSTORE(p_sfpu::LCONST_0, 0, ADDR_MOD_3, output_tile_offset + row + 32);
TT_SFPSTORE(p_sfpu::LCONST_0, 0, ADDR_MOD_3, output_tile_offset + row + 34);
}
// TODO (Radomir): Add optional clear that is more optimal using tile copy
// for (uint row=0; row < 32; row+=4) {
// TT_SFPSTORE(p_sfpu::LCONST_0, 0, ADDR_MOD_3, output_tile_offset + row);
// TT_SFPSTORE(p_sfpu::LCONST_0, 0, ADDR_MOD_3, output_tile_offset + row + 2);
// TT_SFPSTORE(p_sfpu::LCONST_0, 0, ADDR_MOD_3, output_tile_offset + row + 32);
// TT_SFPSTORE(p_sfpu::LCONST_0, 0, ADDR_MOD_3, output_tile_offset + row + 34);
// }

volatile tt_l1_ptr uint8_t *idx_ptr = reinterpret_cast<volatile tt_l1_ptr uint8_t*>(idx_addr+(1<<4));
static constexpr uint input_lreg[4] = {p_sfpu::LREG0, p_sfpu::LREG1, p_sfpu::LREG2, p_sfpu::LREG3};
Expand Down

0 comments on commit 0c6abb2

Please sign in to comment.