-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathblackjack.py
457 lines (399 loc) · 18.4 KB
/
blackjack.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
import random
from enum import IntEnum
import sys
import argparse
import math
class Card:
def __init__(self, color, rank, value):
self.color = color
self.rank = rank
self.value = value
def __str__(self):
return self.rank + " of " + self.color
def __eq__(self, other):
return self.color == other.color and self.rank == other.rank
def generate_deck(suits=["Hearts", "Spades", "Clubs", "Diamonds"],
ranks=[("2",2), ("3",3), ("4",4), ("5",5), ("6",6), ("7",7), ("8",8), ("9",9), ("10",10), ("Jack",10), ("Queen",10), ("King",10), ("Ace",11)]):
result = []
for suit in suits:
for (rank,value) in ranks:
result.append(Card(suit,rank,value))
return result
def format(cards):
if isinstance(cards, Card):
return str(cards)
return ", ".join(map(str, cards))
def get_value(cards):
"""
Calculate the value of a set of cards. Aces may be counted as 11 or 1, to avoid going over 21
"""
result = 0
aces = 0
for c in cards:
result += c.value
if c.rank == "Ace":
aces += 1
while result > 21 and aces > 0:
result -= 10
aces -= 1
return result
class PlayerType(IntEnum):
PLAYER = 1
DEALER = 2
class Action(IntEnum):
HIT = 1
STAND = 2
DOUBLE_DOWN = 3
SPLIT = 4
class Player:
"""
The basic player just chooses a random action
"""
def __init__(self, name, deck):
self.name = name
self.deck = deck
def get_action(self, cards, actions, dealer_cards):
return random.choice(actions)
def reset(self):
pass
class TimidPlayer(Player):
"""
The timid player always stands, and never takes additional cards.
"""
def get_action(self, cards, actions, dealer_cards):
return Action.STAND
class BasicStrategyPlayer(Player):
"""
Basic strategy: If the dealer has a card lower than a 7 open, we hit if we have less than 12. Otherwise, we hit if we have less than 17. The idea being: If the dealer has a low card open, they are more likely to bust, if they have a high card open they are more likely to stand with a high score that we need to beat.
"""
def get_action(self, cards, actions, dealer_cards):
pval = get_value(cards)
if dealer_cards[0].value < 7:
if pval < 12:
return Action.HIT
return Action.STAND
if pval < 17:
return Action.HIT
return Action.STAND
"""
Represents a node in the MCTS tree which stores information
about itself along with references to its parent/children.
For the UCB1 formula I defined a constant CURIOSITY_FACTOR
to alter the favorability of less explored nodes over
those best known for higher overall values.
"""
CURIOSITY_FACTOR = 3.5
class MCTSNode:
# No Arguments Implies Root Node
def __init__(self, action = None, parent = None):
self.parent = parent
self.action_path = []
if parent is not None and action is not None:
self.action_path = parent.action_path + [action]
self.children = []
self.total = 0
self.visits = 0
# Returns The Expected Value Of The Node
def score(self):
return 0 if self.visits == 0 else self.total * 1.0 / self.visits
# Gets The Best Action By Its Expected Value
def best_action(self):
return max(self.children, key=lambda node:node.score()).action_path[-1]
# Given Total Number Of Iterations So Far
# Calculates UCB1 Result For This Node
# Assuming It Has Already Been Visted!
def ucb1(self, num_iterations):
if self.visits == 0:
return math.inf
return self.score() + CURIOSITY_FACTOR * (math.sqrt(math.log(num_iterations) / self.visits))
# Selects A Child Node For Expansion, Null If No Children
# Returns An Unvisited Node If Available Or Highest UCB1
def select_child(self, num_iterations):
max = None
max_ucb1 = None
for candidate in self.children:
if candidate.visits == 0:
return candidate
if max is None or candidate.ucb1(num_iterations) > max_ucb1:
max = candidate
max_ucb1 = candidate.ucb1(num_iterations)
return max
# Expand; Each Action Expands To One Child Node
def expand(self, actions):
for action in actions:
self.children.append(MCTSNode(action, self))
# Recursively Backpropogates Score AND Increment Node Visits
def backpropogate(self, value):
self.total += value
self.visits += 1
if (self.parent is not None):
self.parent.backpropogate(value)
MCTS_N = 1000
class MCTSPlayer(Player):
"""
This agent will run MCTS_N simulations.
For each simulation, the cards the player has not yet seen are shuffled and used as the assumed deck.
Then the `RolloutPlayer` plays MCTS_N games starting from that random shuffle.
The agent will give the `RolloutPlayer` an initial sequence of actions and let it
complete the game randomly, recording how many points where obtained for each rollout.
"""
def __init__(self, name, deck):
self.name = name
self.bet = 2
self.deck = deck
def get_action(self, cards, actions, dealer_cards):
# Make a copy of the deck!
deck = self.deck[:]
# Remove cards we have already seen (ours, and the open dealer card)
for p in cards:
deck.remove(p)
for p in dealer_cards:
deck.remove(p)
# For each of our simulations we use the rollout player.
# Our Rollout Player selects actions at random, and records what it did (!)
p = RolloutPlayer("Rollout", deck)
# We create a new game object with the reduced deck, played by our rollout player
g1 = Game(deck, p, verbose=False)
# Create Initial Node Corresponding To Current State
root = MCTSNode()
root.expand(actions)
for i in range(MCTS_N):
# Get The Next Best Node To Expand
selected = root.select_child(i + 1)
# If Node Has Already Been Visited, Select Child
# Expand Node If Necessary
while selected.visits > 0:
next_selection = selected.select_child(i)
if next_selection is None:
selected.expand(actions)
else:
selected = next_selection
# The rollout player stores its action history, we reset this first
p.reset()
# Rollout After Following Initial Sequence Leading To Node
for action in selected.action_path:
p.queue_action(action)
# continue_round allows us to pass a partial game state (which cards we have, what the open
# card of the dealer is, and how much we've bet), and continue the game from there
# i.e. the game will *not* deal us two new cards, but instead use the ones we already have
# It will, however, then run as normal, calling `get_action` on the player object we passed earlier,
# which is our rollout_player
# The return value is the amount of money the agent won, across *all* hands (if they split)
res = g1.continue_round(cards, dealer_cards, self.bet)
# Record the result for each possible action
selected.backpropogate(res)
# Calculate the action with the highest *average* return
act = root.best_action()
# Make sure we also record our own bet in case we double down (!)
if act == Action.DOUBLE_DOWN:
self.bet *= 2
return act
def reset(self):
self.bet = 2
class RolloutPlayer(Player):
"""
Used by the MCTS Player to perform rollouts: play randomly and record actions
"""
def __init__(self, name, deck):
self.name = name
self.actions = []
self.deck = deck
self.queued_actions = []
# Allow Initial Action Before Random Rollout
def queue_action(self, action):
self.queued_actions.append(action)
def get_action(self, cards, actions, dealer_cards):
# Next Queued Action Or Random If None
act = self.queued_actions.pop(0) if len(self.queued_actions) > 0 else random.choice(actions)
self.actions.append(act)
return act
def reset(self):
self.actions = []
self.queued_actions = []
class ConsolePlayer(Player):
def get_action(self, cards, actions, dealer_cards):
print()
print(" Your cards:", format(cards), "(%.1f points)"%get_value(cards))
print(" Dealer's visible card:", format(dealer_cards), "(%.1f points)"%get_value(dealer_cards))
while True:
print(" Which action do you want to take?")
for i, a in enumerate(actions):
print(" ", i+1, a.name)
x = input()
try:
x = int(x)
return actions[x-1]
except Exception:
print(" >>> Please enter a valid action number <<<")
def reset(self):
pass
class Dealer(Player):
"""
The dealer has a fixed strategy: Hit when he has fewer than 17 points, otherwise stand.
"""
def __init__(self):
self.name = "Dealer"
def get_action(self, cards, actions, dealer_cards):
if get_value(cards) < 17:
return Action.HIT
return Action.STAND
def same_rank(a, b):
return a.rank == b.rank
def same_value(a, b):
return a.value == b.value
class Game:
def __init__(self, cards, player, split_rule=same_value, verbose=True):
self.cards = cards
self.player = player
self.dealer = Dealer()
self.dealer_cards = []
self.player_cards = []
self.split_cards = []
self.verbose = verbose
self.split_rule = split_rule
def round(self):
"""
Play one round of black jack. First, the player is asked to take actions until they
either stand or have more than 21 points. The return value of this function is the
amount of money the player won.
"""
self.deck = self.cards[:]
random.shuffle(self.deck)
self.dealer_cards = []
self.player_cards = []
self.bet = 2
self.player.reset()
self.dealer.reset()
for i in range(2):
self.deal(self.player_cards, self.player.name)
self.deal(self.dealer_cards, self.dealer.name, i < 1)
return self.play_round()
def continue_round(self, player_cards, dealer_cards, bet):
"""
Like round, but allows passing an initial game state in order to finish a partially played game.
player_cards are the cards the player has in their hand
dealer_cards are the visible cards (typically 1) of the dealer
bet is the current bet of the player
Note: For best results create a *new* Game object with a deck that has player_cards and dealer_cards removed.
"""
self.deck = self.cards[:]
random.shuffle(self.deck)
self.bet = bet
self.player_cards = player_cards[:]
self.dealer_cards = dealer_cards[:]
while len(self.dealer_cards) < 2:
self.deal(self.dealer_cards, self.dealer.name)
return self.play_round()
def play_round(self):
"""
Function used to actually play a round of blackjack after the initial setup done in round or continue_round.
Will first let the player take their actions and then proceed with the dealer.
"""
cards = self.play(self.player, self.player_cards)
if self.verbose:
print("Dealer reveals: ", format(self.dealer_cards[-1]))
print("Dealer has:", format(self.dealer_cards), "(%.1f points)"%get_value(self.dealer_cards))
self.play(self.dealer, self.dealer_cards)
reward = sum(self.reward(c) for c in cards)
if self.verbose:
print("Bet:", self.bet, "won:", reward, "\n")
return reward
def deal(self, cards, name, public=True):
"""
Deal the next card to the given hand
"""
card = self.deck[0]
if self.verbose and public:
print(name, "draws", format(card))
self.deck = self.deck[1:]
cards.append(card)
def play(self, player, cards, cansplit=True, postfix=""):
"""
Play a round of blackjack for *one* participant (player or dealer).
Note that a player may only split once, and only if the split_rule is satisfied (either two cards of the same rank, or of the same value)
"""
while get_value(cards) < 21:
actions = [Action.HIT, Action.STAND, Action.DOUBLE_DOWN]
if len(cards) == 2 and cansplit and self.split_rule(cards[0], cards[1]):
actions.append(Action.SPLIT)
act = player.get_action(cards, actions, self.dealer_cards[:1])
if act in actions:
if self.verbose:
print(player.name, "does", act.name)
if act == Action.STAND:
break
if act == Action.HIT or act == Action.DOUBLE_DOWN:
self.deal(cards, player.name)
if act == Action.DOUBLE_DOWN:
self.bet *= 2
break
if act == Action.SPLIT:
pilea = cards[:1]
pileb = cards[1:]
if self.verbose:
print(player.name, "now has 2 hands")
print("Hand 1:", format(pilea))
print("Hand 2:", format(pileb))
self.play(player, pilea, False, " (hand 1)")
self.play(player, pileb, False, " (hand 2)")
return [pilea, pileb]
if self.verbose:
print(player.name, "ends with%s"%(postfix), format(cards), "with value", get_value(cards), "\n")
return [cards]
def reward(self, player_cards):
"""
Calculate amount of money won by the player. Blackjack pays 3:2.
"""
pscore = get_value(player_cards)
dscore = get_value(self.dealer_cards)
if self.verbose:
print(self.player.name + ":", format(player_cards), "(%.1f points)"%(pscore))
print(self.dealer.name + ":", format(self.dealer_cards), "(%.1f points)"%(dscore))
if pscore > 21:
return -self.bet
result = -self.bet
if pscore > dscore or dscore > 21:
if pscore == 21 and len(self.player_cards) == 2:
result = 3*self.bet/2
result = self.bet
if pscore == dscore and (pscore != 21 or len(self.player_cards) != 2):
result = 0
return result
player_types = {"default": Player, "timid": TimidPlayer, "basic": BasicStrategyPlayer, "mcts": MCTSPlayer, "console": ConsolePlayer}
# Our implementation allows us to define different deck "types", such as only even cards,
# or even use made-up card values like "1.5"
deck_types = {"default": generate_deck(),
"high": generate_deck(ranks=[("2", 2), ("10", 10), ("Ace", 11), ("Fool", 12)]),
"low": generate_deck(ranks=[("1.5", 1.5), ("2", 2),("2.2", 2.2), ("3", 3), ("3", 4), ("Ace", 11)], suits=["Hearts", "Spades", "Clubs", "Diamonds", "Swords", "Wands", "Bows"]),
"even": generate_deck(ranks=[("2",2), ("4",4), ("6",6), ("8",8), ("10",10), ("Jack",10), ("Queen",10), ("King",10)]),
"odd": generate_deck(ranks=[("3",3), ("5",5), ("7",7), ("9",9), ("Ace",11)]),
"red": generate_deck(suits=["Diamonds", "Hearts"]),
"random": generate_deck(ranks=random.sample([("2",2), ("3",3), ("4",4), ("5",5), ("6",6), ("7",7), ("8",8), ("9",9), ("10",10), ("Jack",10), ("Queen",10), ("King",10), ("Ace",11)], random.randint(5,13)))}
def main(ptype="default", dtype="default", n=100, split_rule=same_value, verbose=True):
deck = deck_types[dtype]
g = Game(deck, player_types[ptype]("Sir Gladington III, Esq.", deck[:]), split_rule, verbose)
points = []
for i in range(n):
points.append(g.round())
print("Average points: ", sum(points)*1.0/n)
# run `python blackjack.py --help` for usage information
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Run a simulation of a Blackjack agent.')
parser.add_argument('player', nargs="?", default="default",
help='the player type (available values: %s)'%(", ".join(player_types.keys())))
parser.add_argument('-n', '--count', dest='count', action='store', default=100,
help='How many games to run')
parser.add_argument('-s', '-q', '--silent', '--quiet', dest='verbose', action='store_const', default=True, const=False,
help='Do not print game output (only average score at the end is printed)')
parser.add_argument('-r', '--rank', '--rank-split', dest='split', action='store_const', default=same_value, const=same_rank,
help="Only allow split when the player's cards have the same rank (default: allow split when they have the same value)")
parser.add_argument('-d', "--deck", metavar='D', dest="deck", nargs=1, default=["default"],
help='the deck type to use (available values: %s)'%(", ".join(deck_types.keys())))
args = parser.parse_args()
if args.player not in player_types:
print("Invalid player type: %s. Available options are: \n%s"%(args.player, ", ".join(player_types.keys())))
sys.exit(-1)
if args.deck[0] not in deck_types:
print("Invalid deck type: %s. Available options are: \n%s"%(args.deck, ", ".join(deck_types.keys())))
sys.exit(-1)
main(args.player, args.deck[0], int(args.count), args.split, args.verbose)