import numpy as np
import math
import pickle
import operator
import random
import time
import sys
class tictactoe:
N = 3
board = None
V = dict()
alpha = 0.3
epsilon = 0.2
player1 = "O"
player2 = "X"
empty = "-"
player = None
moves = None
def init_board(self):
self.board = np.empty(self.N**2, dtype=object)
self.board[:] = '-'
self.moves = set()
if not self.V:
self.V[self.pos_board()] = 0.5
self.player = random.choice([self.player1, self.player2])
def pos_board(self):
# return np.array_str(self.board)[1:-1].strip()
return tuple(self.board)
def print_board(self):
board_2d = np.reshape(self.board, (self.N, self.N))
sys.stdout.flush()
print(board_2d, flush=True)
print()
def flip_player(self):
if self.player == self.player1:
return self.player2
else:
return self.player1
def game_win(self, player):
board_2d = np.reshape([ord(item) for item in self.board], (self.N, self.N))
# self.print_board()
row = np.sum(board_2d, axis=0)
col = np.sum(board_2d, axis=1)
# if player == -1:
# if row.min() == -3 or col.min() == -3:
# return "Win"
# else:
# if row.max() == 3 or col.max() == 3:
# return "Win"
return np.any(row == ord(self.player1) * self.N) or np.any(col == ord(self.player1) * self.N)\
or np.any(row == ord(self.player2) * self.N) or np.any(col == ord(self.player2) * self.N)\
or np.trace(board_2d) == ord(self.player1) * self.N or np.trace(board_2d) == ord(self.player2) * self.N\
or np.trace(np.fliplr(board_2d)) == ord(self.player1) * self.N or np.trace(np.fliplr(board_2d)) == ord(self.player2) * self.N
def game_play_train(self):
self.init_board()
count = 0
while True:
s = self.pos_board()
end_game = False
pos = np.random.randint(self.N**2, size=1)
while self.board[pos] != self.empty:
pos = np.random.randint(self.N**2, size=1)
self.board[pos] = self.player
# self.print_board()
s_ = self.pos_board()
# print(pos)
if self.game_win(None):
if s_ in self.V:
assert self.V[s_] == 1
self.V[s_] = 1
# self.V[s] = self.V[s] + self.alpha * (self.V[s_] - self.V[s])
# print("Win Player " + str(self.player))
end_game = True
elif not len(np.where(self.board == self.empty)[0]):
if s_ in self.V:
assert self.V[s_] == 0
self.V[s_] = 0
# self.V[s] = self.V[s] + self.alpha * (self.V[s_] - self.V[s])
# print("Draw")
end_game = True
elif not s_ in self.V:
self.V[s_] = 0.5
self.V[s] = self.V[s] + self.alpha * (self.V[s_] - self.V[s])
if end_game:
break
self.player = self.flip_player()
count = count + 1
if count % 1000 == 0:
print("Train", count)
return count
def value_move(self, player):
b = np.array(self.pos_board())
# print(b)
m = []
m_V = []
ii = np.where(b == self.empty)
# print(ii)
# print(player)
for i in ii[0]:
c = list(b)
c[i] = player
# print(i, c)
m.append(i)
try:
m_V.append(self.V[tuple(c)])
except:
m_V.append(0.5)
m = np.array(m)
m_V = np.array(m_V)
# print(m, m_V)
# print(max(m_V))
ii = np.where(m_V == max(m_V))
# print(ii)
# print(random.choice(m[ii[0]]))
return random.choice(m[ii])
def game_play(self, user = False):
self.init_board()
count = 0
while True:
s = self.pos_board()
end_game = False
if user:
if self.player == self.player2:
pos = -1
while pos < 0 or pos >= self.N**2 or self.board[pos] != self.empty:
# print(pos)
pos_str = input("Enter valid move? ")
pos_i = pos_str.split()
pos = self.N * int(pos_i[0]) + int(pos_i[1])
else:
pos = self.value_move(self.player)
else:
pos = np.random.randint(self.N**2, size=1)
if random.random() < self.epsilon:
while self.board[pos] != self.empty:
# print(pos)
pos = np.random.randint(self.N**2, size=1)
else:
pos = self.value_move(self.player)
# print(pos)
self.board[pos] = self.player
self.print_board()
# print(self.game_win(None))
# print(self.board,np.where(self.board == 0))
if self.game_win(None):
print("Player Win", self.player)
end_game = True
elif not len(np.where(self.board == self.empty)[0]):
print("Draw")
end_game = True
if end_game:
break
self.player = self.flip_player()
count = count + 1
# time.sleep(.5)
return count
if __name__ == "__main__":
a = tictactoe()
if True:
count = 0
N = 10000
x = 0
for i in range(N):
count = count + a.game_play_train()
x = x + 1
if x % 1000 == 0:
print("Train ", x)
print("Avg ", count / N)
output = open('V.pkl', 'wb')
pickle.dump(a.V, output)
output.close()
pkl_file = open('V.pkl', 'rb')
a.V = pickle.load(pkl_file)
# for i in a.V:
# print(i, a.V[i])
count = 0
N = 1
for i in range(N):
count = count + a.game_play(True)
print("Avg ", count / N)