Skip to content
This repository has been archived by the owner on Feb 20, 2025. It is now read-only.

Enable uint8 A2D and (un)pack reconfig #31

Merged
merged 1 commit into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions common/inc/cpack_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -343,11 +343,24 @@ namespace ckernel::packer
TTI_REG2FLOP(2,0,0,0,THCON_SEC1_REG1_Row_start_section_size_ADDR32+2-THCON_CFGREG_BASE_ADDR32, p_gpr_pack::TMP_LO);
TTI_REG2FLOP(2,0,0,0,THCON_SEC1_REG8_Row_start_section_size_ADDR32+2-THCON_CFGREG_BASE_ADDR32, p_gpr_pack::TMP_LO);

uint32_t reconfig_PCK_DEST_RD_CTRL_Read_unsigned = 0;
dest_rd_ctrl_u dest_rd_ctrl;
dest_rd_ctrl.val = 0;
dest_rd_ctrl.f.PCK_DEST_RD_CTRL_Read_32b_data = (pack_src_format == (uint)DataFormat::Int8) |
(pack_src_format == (uint)DataFormat::UInt8) |
(pack_src_format == (uint)DataFormat::Int32) |
(pack_src_format == (uint)DataFormat::Float32) |
(is_fp32_dest_acc_en ? 1 : 0);
if (pack_dst_format == (uint)DataFormat::UInt8) {
reconfig_PCK_DEST_RD_CTRL_Read_unsigned = 1;
dest_rd_ctrl.f.PCK_DEST_RD_CTRL_Read_unsigned = 1;
}
//Round to 10 bit mantissa from fp32 dest
if(is_fp32_dest_acc_en && (pack_src_format!=(uint)DataFormat::Float32)) {
dest_rd_ctrl.f.PCK_DEST_RD_CTRL_Round_10b_mant = 1;
}
cfg_reg_rmw_tensix<PCK_DEST_RD_CTRL_Read_unsigned_RMW>(reconfig_PCK_DEST_RD_CTRL_Read_unsigned);
cfg_reg_rmw_tensix<PCK_DEST_RD_CTRL_Read_32b_data_ADDR32,
PCK_DEST_RD_CTRL_Read_32b_data_SHAMT,
PCK_DEST_RD_CTRL_Read_32b_data_MASK | PCK_DEST_RD_CTRL_Read_unsigned_MASK | PCK_DEST_RD_CTRL_Round_10b_mant_MASK>
(dest_rd_ctrl.val);

if (IS_BFP_FORMAT(pack_dst_format)) {
// Override exp section size for packers 1,2,3
Expand Down
4 changes: 2 additions & 2 deletions llk_lib/llk_math_eltwise_unary_datacopy.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ inline void eltwise_unary_configure_mop(uint rows_per_inst, uint total_rows, con
uint innerloop = (rows_per_inst == p_mova2d::MOV_1_ROW) ? total_rows : (total_rows >> 3);
uint outerloop = num_faces;

if ((is_fp32_dest_acc_en || is_int_fpu_en) && !(dst_format == (uint)DataFormat::UInt16)) {
//use elwadd to handle unpacking data into src A as fp16, but dest is in fp32 mode
if (((is_fp32_dest_acc_en || is_int_fpu_en) && !(dst_format == (uint)DataFormat::UInt16)) || (dst_format == (uint)DataFormat::UInt8)) {
// use elwadd to handle unpacking data into src A as fp16, but dest is in fp32 mode OR to handle uint8 datums
ckernel_template tmp(outerloop, innerloop, TT_OP_ELWADD(0, 0, p_elwise::SRCB_NO_BCAST, ADDR_MOD_2, 0));
tmp.set_end_op(TT_OP_SETRWC(p_setrwc::CLR_AB, 0, 0, 0, 0, p_setrwc::SET_AB));
tmp.program(instrn_buffer);
Expand Down
11 changes: 11 additions & 0 deletions llk_lib/llk_unpack_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,17 @@ inline void _llk_unpack_config_tile_dim_srcb_impl_(const std::uint32_t face_r_di

inline void _llk_unpack_reconfig_data_format_srca_impl_(const std::uint32_t unpack_src_format, const std::uint32_t unpack_dst_format, const std::uint32_t tile_size)
{
alu_config_u alu_payload = {.val = 0};
alu_payload.f.ALU_FORMAT_SPEC_REG0_SrcA = unpack_dst_format;
if ((uint)unpack_src_format == (uint)DataFormat::UInt8) {
alu_payload.f.ALU_FORMAT_SPEC_REG0_SrcAUnsigned = 1;
}
alu_payload.f.ALU_ACC_CTRL_INT8_math_enabled = ((uint)unpack_dst_format == (uint)DataFormat::Int8) ||
((uint)unpack_dst_format == (uint)DataFormat::UInt8) ||
((uint)unpack_dst_format == (uint)DataFormat::Int32);
constexpr uint alu_mask = ALU_FORMAT_SPEC_REG0_SrcA_MASK | ALU_FORMAT_SPEC_REG0_SrcAUnsigned_MASK | ALU_ACC_CTRL_INT8_math_enabled_MASK;
cfg_reg_rmw_tensix<ALU_FORMAT_SPEC_REG0_SrcA_ADDR32, 0, alu_mask>(alu_payload.val);

cfg_reg_rmw_tensix<THCON_SEC0_REG0_TileDescriptor_ADDR32, 0, 0x0f>(unpack_src_format);
cfg_reg_rmw_tensix<THCON_SEC0_REG2_Out_data_format_RMW>(unpack_dst_format);
TT_SETDMAREG(0, LOWER_HALFWORD(tile_size), 0, LO_16(p_gpr_unpack::TILE_SIZE_A)); // update gpr which holds tile size A
Expand Down
Loading