-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy paththompson_sampling.py
37 lines (31 loc) · 1.15 KB
/
thompson_sampling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from .randmax import randmax
import numpy as np
from .base_mab import BaseMAB
class ThompsonSampling(BaseMAB):
"""Thompson Sampling with Beta(a,b) prior and Bernoulli likelihood
Parameters
----------
nbArms :int,
Number of arms of bandit
alpha : float,
Added to the first param of the beta distrib
beta : float,
Added to the 2nd param of the beta distrib
"""
def __init__(self, nbArms,alpha = 1,beta= 1):
self.nbArms = nbArms
self.clear()
# Beta distribution parameters
self.alpha = alpha
self.beta = beta
def clear(self):
self.nbDraws = np.zeros(self.nbArms)
self.cumRewards = np.zeros(self.nbArms)
self.arm_means = np.zeros(self.nbArms)
def chooseArmToPlay(self):
return randmax(np.random.beta(self.alpha + self.cumRewards,self.beta + self.nbDraws - self.cumRewards))
def receiveReward(self, arm, reward):
# Binarization trick in case reward are not binary
bin_reward = float(np.random.random()<reward)
self.cumRewards[arm] = self.cumRewards[arm]+bin_reward
self.nbDraws[arm] = self.nbDraws[arm] +1