Skip to content
This repository has been archived by the owner on Feb 20, 2025. It is now read-only.

Commit

Permalink
Used hardware PRNG for dropout
Browse files Browse the repository at this point in the history
  • Loading branch information
Anil Mahmud authored and amahmudTT committed Aug 1, 2024
1 parent 250dd61 commit c92609b
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 48 deletions.
13 changes: 13 additions & 0 deletions common/inc/ckernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -553,5 +553,18 @@ inline void serialize_input_loop_end() {
#endif
}
#endif
}

inline void init_prng_seed(const uint seed)
{
// The seed for PRNG should at least be initialzied during chip bootup time.
volatile uint tt_reg_ptr *cfg = get_cfg_pointer();
cfg[PRNG_SEED_Seed_Val_ADDR32] = seed;

// TODO: ckernel::wait does not work properly. Use ckernel::wait when fixed.
for(int i = 0; i < 600; i++) {
TTI_SFPNOP;
}
}

}
76 changes: 28 additions & 48 deletions common/inc/sfpu/ckernel_sfpu_dropout.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,76 +17,56 @@ namespace ckernel
namespace sfpu
{

// probability should be between 0 - INT_MAX (signed)
// scale should be binary representation of a float32
template <bool APPROXIMATION_MODE, int ITERATIONS>
inline void _calculate_dropout_(const int iterations, uint prob, uint scale)
inline void _calculate_dropout_(const int iterations, uint probability, uint scale)
{
// SFPU microcode

FWLOG1("calculate_dropout() -- prob:%x", prob);
FWLOG1("calculate_dropout() -- probability:%x", probability);
FWLOG1("calculate_dropout() -- scale:%x", scale);

vUInt rand = l_reg[LRegs::LReg3];

TT_SFPLOADI(p_sfpu::LREG1, 10, scale&0xFFFF);
TT_SFPLOADI(p_sfpu::LREG1, 8, scale>>16);
TT_SFPLOADI(p_sfpu::LREG2, 10, probability&0xFFFF);
TT_SFPLOADI(p_sfpu::LREG2, 8, probability>>16);
#pragma GCC unroll 0
for (int d = 0; d < iterations; d++) {

////////////////////////
// Scale samples
// dst_reg[0] = dst_reg[0] * s2vFloat16b(scale);
///////////////////////
dst_reg[0] = dst_reg[0] * s2vFloat16b(scale);
TTI_SFPLOAD(p_sfpu::LREG0, 0, 3, 0);
TTI_SFPMUL(p_sfpu::LREG0, p_sfpu::LREG1, p_sfpu::LCONST_0, p_sfpu::LREG0, 0);

////////////////////////
// Instruction SFPMOV generates a uint32_t pseudorandom number
// when instr_mod1 = 8 and lreg_c = 9.
// Arguments: (imm12_math, lreg_c, lreg_dest, instr_mod1)
// Unset sign-bit for easy comparison with probability
////////////////////////
TTI_SFPMOV(0, 9, p_sfpu::LREG3, 8);
TTI_SFPSETSGN(0, p_sfpu::LREG3, p_sfpu::LREG3, 1);

////////////////////////
// Drop samples
// v_if (rand < probability)
// dst_reg[0] = vConst0;
///////////////////////
v_if (rand < prob) {
dst_reg[0] = vConst0;
}
v_endif;

////////////////////////
// 16-bit PRNG update
///////////////////////
vUInt lfsr = vConstIntPrgm1;
vUInt tmp = lfsr & rand;
rand = rand >> 1;
v_if (tmp != 0) {
vUInt mask = vConstIntPrgm0;
rand ^= mask;
}
v_endif;
TTI_SFPIADD(0, p_sfpu::LREG2, p_sfpu::LREG3, 10);
TTI_SFPMOV(0, p_sfpu::LCONST_0, p_sfpu::LREG0, 0);
TTI_SFPENCC(0,0,0,0);
TTI_SFPSTORE(0,0,3,0);

dst_reg++;
}

l_reg[LRegs::LReg3] = rand;
}

inline void _init_dropout_seed_(uint16_t p2){
FWLOG1("calculate_dropout() -- input seed:%x", p2);

uint32_t noc_id_reg = NOC_CMD_BUF_READ_REG(0, 0, NOC_NODE_ID);

uint16_t my_x = noc_id_reg & NOC_NODE_ID_MASK;
uint16_t my_y = (noc_id_reg >> NOC_ADDR_NODE_ID_BITS) & NOC_NODE_ID_MASK;

uint16_t per_tensix_input_seed = p2 ^ (my_x << my_y);

FWLOG1("calculate_dropout() -- calculated seed:%x", per_tensix_input_seed);

vInt result = l_reg[LRegs::LReg3];

vInt tmp = vConstTileId << 10;
vInt ptis = per_tensix_input_seed;
result = ~(tmp & ptis) & (tmp | ptis);

l_reg[LRegs::LReg3] = result;
}

inline void _init_dropout_(const uint seed)
{
vConstIntPrgm0 = 0xb400;
vConstIntPrgm1 = 0x1; // binary 0b1 - used to extract LSB

_init_dropout_seed_(seed);
init_prng_seed(seed);
}

} // namespace sfpu
Expand Down

0 comments on commit c92609b

Please sign in to comment.