johnmyleswhite · yoshidan · Oct 20, 2016
diff --git a/generate_plots.sh b/generate_plots.sh
@@ -22,6 +22,7 @@
 # python algorithms/softmax/test_annealing.py
 # python algorithms/ucb/test_ucb1.py
 # python algorithms/exp3/test_exp3.py
+# python algorithms/ts/test_thompson_sampling.py
 # cd ..
 
 # Use R for generating graphs

diff --git a/python/algorithms/ts/test_thompson_sampling.py b/python/algorithms/ts/test_thompson_sampling.py
@@ -0,0 +1,20 @@
+execfile("core.py")
+
+import random
+
+random.seed(1)
+means = [0.1, 0.1, 0.1, 0.1, 0.9]
+n_arms = len(means)
+random.shuffle(means)
+arms = map(lambda (mu): BernoulliArm(mu), means)
+print("Best arm is " + str(ind_max(means)))
+
+f = open("algorithms/ts/thompson_sampling_results.csv", "w")
+
+algo = ThompsonSampling(1,1, [], [], [])
+algo.initialize(n_arms)
+results = test_algorithm(algo, arms, 5000, 250)
+for i in range(len(results[0])):
+  f.write(",".join([str(results[j][i]) for j in range(len(results))]) + "\n")
+
+f.close()
diff --git a/python/algorithms/ts/thompson_sampling.py b/python/algorithms/ts/thompson_sampling.py
@@ -0,0 +1,36 @@
+import random
+
+def ind_max(x):
+  m = max(x)
+  return x.index(m)
+
+class ThompsonSampling():
+  def __init__(self, initial_alpha, initial_beta, counts, values, s_counts):
+    self.counts = counts
+    self.s_counts = s_counts
+    self.values = values
+    self.alpha = initial_alpha
+    self.beta = initial_beta
+    return
+
+  def initialize(self, n_arms):
+    self.counts = [0 for col in range(n_arms)]
+    self.values = [0.0 for col in range(n_arms)]
+    self.s_counts = [0 for col in range(n_arms)]
+    return
+
+  def select_arm(self):
+    rho = lambda i:random.betavariate(self.alpha + self.s_counts[i], self.beta + self.counts[i] - self.s_counts[i])
+    mu = map(rho, range(len(self.counts)))
+    return ind_max(mu);
+
+  def update(self, chosen_arm, reward):
+    self.counts[chosen_arm] = self.counts[chosen_arm] + 1
+    if reward == 1:
+      self.s_counts[chosen_arm] += 1
+
+    n = self.counts[chosen_arm]
+    value = self.values[chosen_arm]
+    new_value = ((n - 1) / float(n)) * value + (1 / float(n)) * reward
+    self.values[chosen_arm] = new_value
+    return
diff --git a/python/core.py b/python/core.py
@@ -20,6 +20,7 @@ def ind_max(x):
 from algorithms.ucb.ucb2 import *
 from algorithms.exp3.exp3 import *
 from algorithms.hedge.hedge import *
+from algorithms.ts.thompson_sampling import *
 
 # # Testing framework
 from testing_framework.tests import *
diff --git a/r/ts/plot_thompson_sampling.R b/r/ts/plot_thompson_sampling.R
@@ -0,0 +1,52 @@
+library("plyr")
+library("ggplot2")
+
+results <- read.csv("python/algorithms/ts/thompson_sampling_results.csv", header = FALSE)
+names(results) <- c("Sim", "T", "ChosenArm", "Reward", "CumulativeReward")
+
+# Plot average reward as a function of time.
+stats <- ddply(results,
+          c("T"),
+          function (df) {mean(df$Reward)})
+ggplot(stats, aes(x = T, y = V1)) +
+  geom_line() +
+  ylim(0, 1) +
+  xlab("Time") +
+  ylab("Average Reward") +
+ggtitle("Performance of the Thompson Sampling Algorithm")
+ggsave("r/graphs/ts_average_reward.pdf")
+
+# Plot frequency of selecting correct arm as a function of time.
+# In this instance, 5 is the correct arm.
+stats <- ddply(results,
+         c("T"),
+         function (df) {mean(df$ChosenArm == 1)})
+ggplot(stats, aes(x = T, y = V1)) +
+  geom_line() +
+  ylim(0, 1) +
+  xlab("Time") +
+  ylab("Probability of Selecting Best Arm") +
+ggtitle("Accuracy of the Thompson Sampling Algorithm")
+ggsave("r/graphs/ts_average_accuracy.pdf")
+
+# Plot variance of chosen arms as a function of time.
+stats <- ddply(results,
+  c("T"),
+  function (df) {var(df$ChosenArm)})
+ggplot(stats, aes(x = T, y = V1)) +
+  geom_line() +
+  xlab("Time") +
+  ylab("Variance of Chosen Arm") +
+ggtitle("Variability of the Thompson Sampling Algorithm")
+ggsave("r/graphs/ts_variance_choices.pdf")
+
+# Plot cumulative reward as a function of time.
+stats <- ddply(results,
+  c("T"),
+  function (df) {mean(df$CumulativeReward)})
+ggplot(stats, aes(x = T, y = V1)) +
+  geom_line() +
+  xlab("Time") +
+  ylab("Cumulative Reward of Chosen Arm") +
+ggtitle("Cumulative Reward of the Thompson Sampling Algorithm")
+ggsave("r/graphs/ts_cumulative_reward.pdf")