from functools import reduce
def to_bits(*l):
return reduce(lambda r,i: r |(1<<i), l, 0)
def winning_patterns():
v1 = to_bits(0,1,2)
h1 = to_bits(0,3,6)
return [v1, v1<<3, v1<<6, h1, h1<<1, h1<<2, to_bits(0,4,8), to_bits(2,4,6)]
WINNING_PATTERNS = winning_patterns()
class Game:
def __init__(self, board=None, step=-1, result=None):
if board == None:
board = [0, 0]
self.board=[0, 0]
self.load((board, step, result))
def save(self):
return self.board[:], self.step, self.result
def load(self, state):
self.board[:], self.step, self.result = state
def possible_moves(self):
occupied = self.board[0]|self.board[1]
return [i for i in range(9) if occupied&(1<<i) ==0]
def move(self, i):
self.step+=1
color = self.step&1
occupied = self.board[0]|self.board[1]
assert (occupied >> i)&1 == 0
self.board[color] |= (1<<i)
if any(p&self.board[color] == p for p in WINNING_PATTERNS):
self.end = True
self.result = 2*color-1
elif self.step == 8:
self.result = 0
def sim_move(self, i):
color = (self.step+1)&1
board = self.board[:]
board[color] |= (1<<i)
return (board[0]<<9)|board[1]
def repr(self):
rtn = ""
for i in range(9):
is_o = (self.board[0]>>i)&1
is_x = (self.board[1]>>i)&1
p = '.ox'[is_o + 2*is_x]
rtn+=p
if i%3==2:
rtn+="\n"
return rtn
from random import choice, random, shuffle
Q=[None]*2**18
reg = lambda x: 1. if x is None else -x
def run_game(game, verbose = False):
board = (game.board[0]<<9)|game.board[1]
if game.result is not None:
Q[board] = -abs(float(game.result))
if verbose:
print("result", game.result)
return
color = (game.step+1)&1
d = 2*color -1
moves = game.possible_moves()
boards = [game.sim_move(m) for m in moves]
r = random()
if r < 0.01:
m = choice(moves)
else:
values = [reg(Q[b]) for b in boards]
m = max(zip(values,moves))[1]
s = game.repr()
game.move(m)
run_game(game, verbose)
estQ = max(-Q[b] for b in boards if Q[b] is not None)
if Q[board] is None:
Q[board] = 0.
Q[board]+=0.1*(estQ-Q[board])
if verbose:
print("color", color, "new score", Q[board], [Q[b] for b in boards])
print(s)
print()
for i in range(100):
run_game(Game())
run_game(Game(), True)
result -1 color 0 new score 0.1 [-1.0] xxo .oo xox color 1 new score 0.0 [0.1, 0.0] .xo .oo xox color 0 new score 0.0 [None, None, 0.0] .xo ..o xox color 1 new score 0.0 [None, None, None, 0.0] .xo ..o .ox color 0 new score 0.0 [None, None, None, None, 0.0] .xo ..o ..x color 1 new score 0.0 [None, None, None, None, None, 0.0] .xo ..o ... color 0 new score 4.685590000000002e-05 [None, 0.0, -0.00010000000000000003, -0.00010000000000000003, -0.00010000000000000003, 1.0000000000000004e-05, -0.00010000000000000003] .x. ..o ... color 1 new score -2.7951601083128245e-05 [4.0951000000000015e-05, 4.685590000000002e-05, 4.111102279000002e-05, 4.098229373826158e-05, 4.685590000000002e-05, 0.0010090000000000003, 0.0010090000000000003, 4.685590000000002e-05] ... ..o ... color 0 new score 1.808124087450758e-05 [1.0000000000000005e-07, -1.0000000000000005e-08, -1.0000000000000005e-08, 0.0, 0.0, -2.7951601083128245e-05, 7.574869000000003e-06, -9.100000000000004e-07, -8.290000000000003e-07] ... ... ...
def alphabeta(game, ub=1, lb=-1, level=0, verbose=False):
#print(level, "alphabeta", ub, lb)
#print(game.repr())
board = (game.board[0]<<9)|game.board[1]
if game.result is not None:
return abs(game.result), -1
color = (game.step+1)&1
d = 2*color -1
moves = game.possible_moves()
state = game.save()
best_m = None
for m in moves:
game.move(m)
v, _ = alphabeta(game, -lb, -ub, level+1)
game.load(state)
if verbose:
print(m, v, lb)
if v >= ub:
return -ub, best_m
if v > lb:
lb = v
best_m = m
return -lb, best_m
game = Game()
while game.result is None:
v, m = alphabeta(game)
print(v,m)
game.move(m)
print(game.repr())
0 0 o.. ... ... 0 4 o.. .x. ... 0 1 oo. .x. ... 0 2 oox .x. ... 0 6 oox .x. o.. 0 3 oox xx. o.. 0 5 oox xxo o.. 0 7 oox xxo ox. 0 8 oox xxo oxo