diff --git a/generate_plots.sh b/generate_plots.sh index e8eb60d..c9aee30 100644 --- a/generate_plots.sh +++ b/generate_plots.sh @@ -22,6 +22,7 @@ # python algorithms/softmax/test_annealing.py # python algorithms/ucb/test_ucb1.py # python algorithms/exp3/test_exp3.py +# python algorithms/ts/test_thompson_sampling.py # cd .. # Use R for generating graphs diff --git a/python/algorithms/ts/test_thompson_sampling.py b/python/algorithms/ts/test_thompson_sampling.py new file mode 100644 index 0000000..22967c3 --- /dev/null +++ b/python/algorithms/ts/test_thompson_sampling.py @@ -0,0 +1,20 @@ +execfile("core.py") + +import random + +random.seed(1) +means = [0.1, 0.1, 0.1, 0.1, 0.9] +n_arms = len(means) +random.shuffle(means) +arms = map(lambda (mu): BernoulliArm(mu), means) +print("Best arm is " + str(ind_max(means))) + +f = open("algorithms/ts/thompson_sampling_results.csv", "w") + +algo = ThompsonSampling(1,1, [], [], []) +algo.initialize(n_arms) +results = test_algorithm(algo, arms, 5000, 250) +for i in range(len(results[0])): + f.write(",".join([str(results[j][i]) for j in range(len(results))]) + "\n") + +f.close() diff --git a/python/algorithms/ts/thompson_sampling.py b/python/algorithms/ts/thompson_sampling.py new file mode 100644 index 0000000..11321f9 --- /dev/null +++ b/python/algorithms/ts/thompson_sampling.py @@ -0,0 +1,36 @@ +import random + +def ind_max(x): + m = max(x) + return x.index(m) + +class ThompsonSampling(): + def __init__(self, initial_alpha, initial_beta, counts, values, s_counts): + self.counts = counts + self.s_counts = s_counts + self.values = values + self.alpha = initial_alpha + self.beta = initial_beta + return + + def initialize(self, n_arms): + self.counts = [0 for col in range(n_arms)] + self.values = [0.0 for col in range(n_arms)] + self.s_counts = [0 for col in range(n_arms)] + return + + def select_arm(self): + rho = lambda i:random.betavariate(self.alpha + self.s_counts[i], self.beta + self.counts[i] - self.s_counts[i]) + mu = map(rho, range(len(self.counts))) + return ind_max(mu); + + def update(self, chosen_arm, reward): + self.counts[chosen_arm] = self.counts[chosen_arm] + 1 + if reward == 1: + self.s_counts[chosen_arm] += 1 + + n = self.counts[chosen_arm] + value = self.values[chosen_arm] + new_value = ((n - 1) / float(n)) * value + (1 / float(n)) * reward + self.values[chosen_arm] = new_value + return diff --git a/python/core.py b/python/core.py index 4a2266d..37ef735 100644 --- a/python/core.py +++ b/python/core.py @@ -20,6 +20,7 @@ def ind_max(x): from algorithms.ucb.ucb2 import * from algorithms.exp3.exp3 import * from algorithms.hedge.hedge import * +from algorithms.ts.thompson_sampling import * # # Testing framework from testing_framework.tests import * diff --git a/r/ts/plot_thompson_sampling.R b/r/ts/plot_thompson_sampling.R new file mode 100644 index 0000000..84a85ec --- /dev/null +++ b/r/ts/plot_thompson_sampling.R @@ -0,0 +1,52 @@ +library("plyr") +library("ggplot2") + +results <- read.csv("python/algorithms/ts/thompson_sampling_results.csv", header = FALSE) +names(results) <- c("Sim", "T", "ChosenArm", "Reward", "CumulativeReward") + +# Plot average reward as a function of time. +stats <- ddply(results, + c("T"), + function (df) {mean(df$Reward)}) +ggplot(stats, aes(x = T, y = V1)) + + geom_line() + + ylim(0, 1) + + xlab("Time") + + ylab("Average Reward") + +ggtitle("Performance of the Thompson Sampling Algorithm") +ggsave("r/graphs/ts_average_reward.pdf") + +# Plot frequency of selecting correct arm as a function of time. +# In this instance, 5 is the correct arm. +stats <- ddply(results, + c("T"), + function (df) {mean(df$ChosenArm == 1)}) +ggplot(stats, aes(x = T, y = V1)) + + geom_line() + + ylim(0, 1) + + xlab("Time") + + ylab("Probability of Selecting Best Arm") + +ggtitle("Accuracy of the Thompson Sampling Algorithm") +ggsave("r/graphs/ts_average_accuracy.pdf") + +# Plot variance of chosen arms as a function of time. +stats <- ddply(results, + c("T"), + function (df) {var(df$ChosenArm)}) +ggplot(stats, aes(x = T, y = V1)) + + geom_line() + + xlab("Time") + + ylab("Variance of Chosen Arm") + +ggtitle("Variability of the Thompson Sampling Algorithm") +ggsave("r/graphs/ts_variance_choices.pdf") + +# Plot cumulative reward as a function of time. +stats <- ddply(results, + c("T"), + function (df) {mean(df$CumulativeReward)}) +ggplot(stats, aes(x = T, y = V1)) + + geom_line() + + xlab("Time") + + ylab("Cumulative Reward of Chosen Arm") + +ggtitle("Cumulative Reward of the Thompson Sampling Algorithm") +ggsave("r/graphs/ts_cumulative_reward.pdf")