Used hardware PRNG for dropout

tenstorrent · Aug 1, 2024 · c92609b · c92609b
1 parent 250dd61
commit c92609b
Show file tree

Hide file tree

Showing 2 changed files with 41 additions and 48 deletions.
diff --git a/common/inc/ckernel.h b/common/inc/ckernel.h
@@ -553,5 +553,18 @@ inline void serialize_input_loop_end() {
         #endif
             }
     #endif
+}
+
+inline void init_prng_seed(const uint seed)
+{
+    // The seed for PRNG should at least be initialzied during chip bootup time.
+    volatile uint tt_reg_ptr *cfg = get_cfg_pointer();
+    cfg[PRNG_SEED_Seed_Val_ADDR32] = seed;
+
+    // TODO: ckernel::wait does not work properly. Use ckernel::wait when fixed.
+    for(int i = 0; i < 600; i++) {
+	    TTI_SFPNOP;
     }
 }
+
+}
diff --git a/common/inc/sfpu/ckernel_sfpu_dropout.h b/common/inc/sfpu/ckernel_sfpu_dropout.h
@@ -17,76 +17,56 @@ namespace ckernel
 namespace sfpu
 {
 
+// probability should be between 0 - INT_MAX (signed)
+// scale should be binary representation of a float32
 template <bool APPROXIMATION_MODE, int ITERATIONS>
-inline void _calculate_dropout_(const int iterations, uint prob, uint scale)
+inline void _calculate_dropout_(const int iterations, uint probability, uint scale)
 {
     // SFPU microcode
 
-    FWLOG1("calculate_dropout() -- prob:%x", prob);
+    FWLOG1("calculate_dropout() -- probability:%x", probability);
     FWLOG1("calculate_dropout() -- scale:%x", scale);
 
-    vUInt rand = l_reg[LRegs::LReg3];
-
+    TT_SFPLOADI(p_sfpu::LREG1, 10, scale&0xFFFF);
+    TT_SFPLOADI(p_sfpu::LREG1, 8, scale>>16);
+    TT_SFPLOADI(p_sfpu::LREG2, 10, probability&0xFFFF);
+    TT_SFPLOADI(p_sfpu::LREG2, 8, probability>>16);
     #pragma GCC unroll 0
     for (int d = 0; d < iterations; d++) {
+
         ////////////////////////
         // Scale samples
+        // dst_reg[0] = dst_reg[0] * s2vFloat16b(scale);
         ///////////////////////
-        dst_reg[0] = dst_reg[0] * s2vFloat16b(scale);
+	TTI_SFPLOAD(p_sfpu::LREG0, 0, 3, 0);
+	TTI_SFPMUL(p_sfpu::LREG0, p_sfpu::LREG1, p_sfpu::LCONST_0, p_sfpu::LREG0, 0);
+
+	////////////////////////
+        // Instruction SFPMOV generates a uint32_t pseudorandom number
+        // when instr_mod1 = 8 and lreg_c =  9.
+        // Arguments: (imm12_math, lreg_c, lreg_dest, instr_mod1)
+	// Unset sign-bit for easy comparison with probability
+	////////////////////////
+        TTI_SFPMOV(0, 9, p_sfpu::LREG3, 8);
+	TTI_SFPSETSGN(0, p_sfpu::LREG3, p_sfpu::LREG3, 1);
 
         ////////////////////////
         // Drop samples
+	// v_if (rand < probability)
+        //   dst_reg[0] = vConst0;
         ///////////////////////
-        v_if (rand < prob) {
-            dst_reg[0] = vConst0;
-        }
-        v_endif;
-
-        ////////////////////////
-        // 16-bit PRNG update
-        ///////////////////////
-        vUInt lfsr = vConstIntPrgm1;
-        vUInt tmp = lfsr & rand;
-        rand = rand >> 1;
-        v_if (tmp != 0) {
-            vUInt mask = vConstIntPrgm0;
-            rand ^= mask;
-        }
-        v_endif;
+	TTI_SFPIADD(0, p_sfpu::LREG2, p_sfpu::LREG3, 10);
+	TTI_SFPMOV(0, p_sfpu::LCONST_0, p_sfpu::LREG0, 0);
+	TTI_SFPENCC(0,0,0,0);
+	TTI_SFPSTORE(0,0,3,0);
 
         dst_reg++;
     }
-
-    l_reg[LRegs::LReg3] = rand;
-}
-
-inline void _init_dropout_seed_(uint16_t p2){
-    FWLOG1("calculate_dropout() -- input seed:%x", p2);
-
-    uint32_t noc_id_reg = NOC_CMD_BUF_READ_REG(0, 0, NOC_NODE_ID);
-
-    uint16_t my_x = noc_id_reg & NOC_NODE_ID_MASK;
-    uint16_t my_y = (noc_id_reg >> NOC_ADDR_NODE_ID_BITS) & NOC_NODE_ID_MASK;
-
-    uint16_t per_tensix_input_seed = p2 ^ (my_x << my_y);
-
-    FWLOG1("calculate_dropout() -- calculated seed:%x", per_tensix_input_seed);
-
-    vInt result = l_reg[LRegs::LReg3];
-
-    vInt tmp = vConstTileId << 10;
-    vInt ptis = per_tensix_input_seed;
-    result = ~(tmp & ptis) & (tmp | ptis);
-
-    l_reg[LRegs::LReg3] = result;
 }
 
 inline void _init_dropout_(const uint seed)
 {
-    vConstIntPrgm0 = 0xb400;
-    vConstIntPrgm1 = 0x1; // binary 0b1 - used to extract LSB
-
-    _init_dropout_seed_(seed);
+    init_prng_seed(seed);
 }
 
 } // namespace sfpu