#!/usr/bin/env python3
from math import inf as infinity
from random import choice
import platform
import time
from os import system
HUMAN = -1
COMP = +1
board = [
[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
]
def evaluate(state):
"""
Function to heuristic evaluation of state.
:param state: the state of the current board
:return: +1 if the computer wins; -1 if the human wins; 0 draw
"""
if wins(state, COMP):
score = +1
elif wins(state, HUMAN):
score = -1
else:
score = 0
return score
def wins(state, player):
"""
This function tests if a specific player wins. Possibilities:
* Three rows [X X X] or [O O O]
* Three cols [X X X] or [O O O]
* Two diagonals [X X X] or [O O O]
:param state: the state of the current board
:param player: a human or a computer
:return: True if the player wins
"""
win_state = [
[state[0][0], state[0][1], state[0][2]],
[state[1][0], state[1][1], state[1][2]],
[state[2][0], state[2][1], state[2][2]],
[state[0][0], state[1][0], state[2][0]],
[state[0][1], state[1][1], state[2][1]],
[state[0][2], state[1][2], state[2][2]],
[state[0][0], state[1][1], state[2][2]],
[state[2][0], state[1][1], state[0][2]],
]
if [player, player, player] in win_state:
return True
else:
return False
def game_over(state):
"""
This function test if the human or computer wins
:param state: the state of the current board
:return: True if the human or computer wins
"""
return wins(state, HUMAN) or wins(state, COMP)
def empty_cells(state):
"""
Each empty cell will be added into cells' list
:param state: the state of the current board
:return: a list of empty cells
"""
cells = []
for x, row in enumerate(state):
for y, cell in enumerate(row):
if cell == 0:
cells.append([x, y])
return cells
def valid_move(x, y):
"""
A move is valid if the chosen cell is empty
:param x: X coordinate
:param y: Y coordinate
:return: True if the board[x][y] is empty
"""
if [x, y] in empty_cells(board):
return True
else:
return False
def set_move(x, y, player):
"""
Set the move on board, if the coordinates are valid
:param x: X coordinate
:param y: Y coordinate
:param player: the current player
"""
if valid_move(x, y):
board[x][y] = player
return True
else:
return False
def minimax(state, depth, player):
"""
AI function that choice the best move
:param state: current state of the board
:param depth: node index in the tree (0 <= depth <= 9),
but never nine in this case (see iaturn() function)
:param player: an human or a computer
:return: a list with [the best row, best col, best score]
"""
if player == COMP:
best = [-1, -1, -infinity]
else:
best = [-1, -1, +infinity]
if depth == 0 or game_over(state):
score = evaluate(state)
return [-1, -1, score]
for cell in empty_cells(state):
x, y = cell[0], cell[1]
state[x][y] = player
score = minimax(state, depth - 1, -player)
state[x][y] = 0
score[0], score[1] = x, y
if player == COMP:
if score[2] > best[2]:
best = score # max value
else:
if score[2] < best[2]:
best = score # min value
return best
def clean():
"""
Clears the console
"""
os_name = platform.system().lower()
if 'windows' in os_name:
system('cls')
else:
system('clear')
def render(state, c_choice, h_choice):
"""
Print the board on console
:param state: current state of the board
"""
chars = {
-1: h_choice,
+1: c_choice,
0: ' '
}
str_line = '---------------'
print('\n' + str_line)
for row in state:
for cell in row:
symbol = chars[cell]
print(f'| {symbol} |', end='')
print('\n' + str_line)
def ai_turn(c_choice, h_choice):
"""
It calls the minimax function if the depth < 9,
else it choices a random coordinate.
:param c_choice: computer's choice X or O
:param h_choice: human's choice X or O
:return:
"""
depth = len(empty_cells(board))
if depth == 0 or game_over(board):
return
clean()
print(f'Computer turn [{c_choice}]')
render(board, c_choice, h_choice)
if depth == 9:
x = choice([0, 1, 2])
y = choice([0, 1, 2])
else:
move = minimax(board, depth, COMP)
x, y = move[0], move[1]
set_move(x, y, COMP)
time.sleep(1)
def human_turn(c_choice, h_choice):
"""
The Human plays choosing a valid move.
:param c_choice: computer's choice X or O
:param h_choice: human's choice X or O
:return:
"""
depth = len(empty_cells(board))
if depth == 0 or game_over(board):
return
# Dictionary of valid moves
move = -1
moves = {
1: [0, 0], 2: [0, 1], 3: [0, 2],
4: [1, 0], 5: [1, 1], 6: [1, 2],
7: [2, 0], 8: [2, 1], 9: [2, 2],
}
clean()
print(f'Human turn [{h_choice}]')
render(board, c_choice, h_choice)
while move < 1 or move > 9:
try:
move = int(input('Use numpad (1..9): '))
coord = moves[move]
can_move = set_move(coord[0], coord[1], HUMAN)
if not can_move:
print('Bad move')
move = -1
except (EOFError, KeyboardInterrupt):
print('Bye')
exit()
except (KeyError, ValueError):
print('Bad choice')
def main():
"""
Main function that calls all functions
"""
clean()
h_choice = '' # X or O
c_choice = '' # X or O
first = '' # if human is the first
# Human chooses X or O to play
while h_choice != 'O' and h_choice != 'X':
try:
print('')
h_choice = input('Choose X or O\nChosen: ').upper()
except (EOFError, KeyboardInterrupt):
print('Bye')
exit()
except (KeyError, ValueError):
print('Bad choice')
# Setting computer's choice
if h_choice == 'X':
c_choice = 'O'
else:
c_choice = 'X'
# Human may starts first
clean()
while first != 'Y' and first != 'N':
try:
first = input('First to start?[y/n]: ').upper()
except (EOFError, KeyboardInterrupt):
print('Bye')
exit()
except (KeyError, ValueError):
print('Bad choice')
# Main loop of this game
while len(empty_cells(board)) > 0 and not game_over(board):
if first == 'N':
ai_turn(c_choice, h_choice)
first = ''
human_turn(c_choice, h_choice)
ai_turn(c_choice, h_choice)
# Game over message
if wins(board, HUMAN):
clean()
print(f'Human turn [{h_choice}]')
render(board, c_choice, h_choice)
print('YOU WIN!')
elif wins(board, COMP):
clean()
print(f'Computer turn [{c_choice}]')
render(board, c_choice, h_choice)
print('YOU LOSE!')
else:
clean()
render(board, c_choice, h_choice)
print('DRAW!')
exit()
main()
Monthly Archives: August 2021
Naive Learning
import itertools
import time
import numpy as np
import cv2
from moviepy.editor import VideoClip
WORLD_HEIGHT = 4
WORLD_WIDTH = 4
WALL_FRAC = .2
NUM_WINS = 5
NUM_LOSE = 10
class GridWorld:
def __init__(self, world_height=3, world_width=4, discount_factor=.5, default_reward=-.5, wall_penalty=-.6,
win_reward=5., lose_reward=-10., viz=True, patch_side=120, grid_thickness=2, arrow_thickness=3,
wall_locs=[[1, 1], [1, 2]], win_locs=[[0, 3]], lose_locs=[[1, 3]], start_loc=[0, 0],
reset_prob=.2):
self.world = np.ones([world_height, world_width]) * default_reward
self.reset_prob = reset_prob
self.world_height = world_height
self.world_width = world_width
self.wall_penalty = wall_penalty
self.win_reward = win_reward
self.lose_reward = lose_reward
self.default_reward = default_reward
self.discount_factor = discount_factor
self.patch_side = patch_side
self.grid_thickness = grid_thickness
self.arrow_thickness = arrow_thickness
self.wall_locs = np.array(wall_locs)
self.win_locs = np.array(win_locs)
self.lose_locs = np.array(lose_locs)
self.at_terminal_state = False
self.auto_reset = True
self.random_respawn = True
self.step = 0
self.viz_canvas = None
self.viz = viz
self.path_color = (128, 128, 128)
self.wall_color = (0, 255, 0)
self.win_color = (0, 0, 255)
self.lose_color = (255, 0, 0)
self.world[self.wall_locs[:, 0], self.wall_locs[:, 1]] = self.wall_penalty
self.world[self.lose_locs[:, 0], self.lose_locs[:, 1]] = self.lose_reward
self.world[self.win_locs[:, 0], self.win_locs[:, 1]] = self.win_reward
spawn_condn = lambda loc: self.world[loc[0], loc[1]] == self.default_reward
self.spawn_locs = np.array([loc for loc in itertools.product(np.arange(self.world_height),
np.arange(self.world_width))
if spawn_condn(loc)])
self.start_state = np.array(start_loc)
self.bot_rc = None
self.reset()
self.actions = [self.up, self.left, self.right, self.down, self.noop]
self.action_labels = ['UP', 'LEFT', 'RIGHT', 'DOWN', 'NOOP']
self.q_values = np.ones([self.world.shape[0], self.world.shape[1], len(self.actions)]) * 1. / len(self.actions)
if self.viz:
self.init_grid_canvas()
self.video_out_fpath = 'shm_dqn_gridsolver-' + str(time.time()) + '.mp4'
self.clip = VideoClip(self.make_frame, duration=15)
def make_frame(self, t):
self.action()
frame = self.highlight_loc(self.viz_canvas, self.bot_rc[0], self.bot_rc[1])
return frame
def check_terminal_state(self):
if self.world[self.bot_rc[0], self.bot_rc[1]] == self.lose_reward \
or self.world[self.bot_rc[0], self.bot_rc[1]] == self.win_reward:
self.at_terminal_state = True
# print('------++++---- TERMINAL STATE ------++++----')
# if self.world[self.bot_rc[0], self.bot_rc[1]] == self.win_reward:
# print('GAME WON! :D')
# elif self.world[self.bot_rc[0], self.bot_rc[1]] == self.lose_reward:
# print('GAME LOST! :(')
if self.auto_reset:
self.reset()
def reset(self):
# print('Resetting')
if not self.random_respawn:
self.bot_rc = self.start_state.copy()
else:
self.bot_rc = self.spawn_locs[np.random.choice(np.arange(len(self.spawn_locs)))].copy()
self.at_terminal_state = False
def up(self):
action_idx = 0
# print(self.action_labels[action_idx])
new_r = self.bot_rc[0] - 1
if new_r < 0 or self.world[new_r, self.bot_rc[1]] == self.wall_penalty:
return self.wall_penalty, action_idx
self.bot_rc[0] = new_r
reward = self.world[self.bot_rc[0], self.bot_rc[1]]
self.check_terminal_state()
return reward, action_idx
def left(self):
action_idx = 1
# print(self.action_labels[action_idx])
new_c = self.bot_rc[1] - 1
if new_c < 0 or self.world[self.bot_rc[0], new_c] == self.wall_penalty:
return self.wall_penalty, action_idx
self.bot_rc[1] = new_c
reward = self.world[self.bot_rc[0], self.bot_rc[1]]
self.check_terminal_state()
return reward, action_idx
def right(self):
action_idx = 2
# print(self.action_labels[action_idx])
new_c = self.bot_rc[1] + 1
if new_c >= self.world.shape[1] or self.world[self.bot_rc[0], new_c] == self.wall_penalty:
return self.wall_penalty, action_idx
self.bot_rc[1] = new_c
reward = self.world[self.bot_rc[0], self.bot_rc[1]]
self.check_terminal_state()
return reward, action_idx
def down(self):
action_idx = 3
# print(self.action_labels[action_idx])
new_r = self.bot_rc[0] + 1
if new_r >= self.world.shape[0] or self.world[new_r, self.bot_rc[1]] == self.wall_penalty:
return self.wall_penalty, action_idx
self.bot_rc[0] = new_r
reward = self.world[self.bot_rc[0], self.bot_rc[1]]
self.check_terminal_state()
return reward, action_idx
def noop(self):
action_idx = 4
# print(self.action_labels[action_idx])
reward = self.world[self.bot_rc[0], self.bot_rc[1]]
self.check_terminal_state()
return reward, action_idx
def qvals2probs(self, q_vals, epsilon=1e-4):
action_probs = q_vals - q_vals.min() + epsilon
action_probs = action_probs / action_probs.sum()
return action_probs
def action(self):
# print('================ ACTION =================')
if self.at_terminal_state:
print('At terminal state, please call reset()')
exit()
# print('Start position:', self.bot_rc)
start_bot_rc = self.bot_rc[0], self.bot_rc[1]
q_vals = self.q_values[self.bot_rc[0], self.bot_rc[1]]
action_probs = self.qvals2probs(q_vals)
reward, action_idx = np.random.choice(self.actions, p=action_probs)()
# print('End position:', self.bot_rc)
# print('Reward:', reward)
alpha = np.exp(-self.step / 10e9)
self.step += 1
qv = (1 - alpha) * q_vals[action_idx] + alpha * (reward + self.discount_factor
* self.q_values[self.bot_rc[0], self.bot_rc[1]].max())
self.q_values[start_bot_rc[0], start_bot_rc[1], action_idx] = qv
if self.viz:
self.update_viz(start_bot_rc[0], start_bot_rc[1])
if np.random.rand() < self.reset_prob:
# print('-----> Randomly resetting to a random spawn point with probability', self.reset_prob)
self.reset()
def highlight_loc(self, viz_in, i, j):
starty = i * (self.patch_side + self.grid_thickness)
endy = starty + self.patch_side
startx = j * (self.patch_side + self.grid_thickness)
endx = startx + self.patch_side
viz = viz_in.copy()
cv2.rectangle(viz, (startx, starty), (endx, endy), (255, 255, 255), thickness=self.grid_thickness)
return viz
def update_viz(self, i, j):
starty = i * (self.patch_side + self.grid_thickness)
endy = starty + self.patch_side
startx = j * (self.patch_side + self.grid_thickness)
endx = startx + self.patch_side
patch = np.zeros([self.patch_side, self.patch_side, 3]).astype(np.uint8)
if self.world[i, j] == self.default_reward:
patch[:, :, :] = self.path_color
elif self.world[i, j] == self.wall_penalty:
patch[:, :, :] = self.wall_color
elif self.world[i, j] == self.win_reward:
patch[:, :, :] = self.win_color
elif self.world[i, j] == self.lose_reward:
patch[:, :, :] = self.lose_color
if self.world[i, j] == self.default_reward:
action_probs = self.qvals2probs(self.q_values[i, j])
x_component = action_probs[2] - action_probs[1]
y_component = action_probs[0] - action_probs[3]
magnitude = 1. - action_probs[-1]
s = self.patch_side // 2
x_patch = int(s * x_component)
y_patch = int(s * y_component)
arrow_canvas = np.zeros_like(patch)
vx = s + x_patch
vy = s - y_patch
cv2.arrowedLine(arrow_canvas, (s, s), (vx, vy), (255, 255, 255), thickness=self.arrow_thickness,
tipLength=0.5)
gridbox = (magnitude * arrow_canvas + (1 - magnitude) * patch).astype(np.uint8)
self.viz_canvas[starty:endy, startx:endx] = gridbox
else:
self.viz_canvas[starty:endy, startx:endx] = patch
def init_grid_canvas(self):
org_h, org_w = self.world_height, self.world_width
viz_w = (self.patch_side * org_w) + (self.grid_thickness * (org_w - 1))
viz_h = (self.patch_side * org_h) + (self.grid_thickness * (org_h - 1))
self.viz_canvas = np.zeros([viz_h, viz_w, 3]).astype(np.uint8)
for i in range(org_h):
for j in range(org_w):
self.update_viz(i, j)
def solve(self):
if not self.viz:
while True:
self.action()
else:
self.clip.write_videofile(self.video_out_fpath, fps=460)
def gen_world_config(h, w, wall_frac=.5, num_wins=2, num_lose=3):
n = h * w
num_wall_blocks = int(wall_frac * n)
wall_locs = (np.random.rand(num_wall_blocks, 2) * [h, w]).astype(np.int)
win_locs = (np.random.rand(num_wins, 2) * [h, w]).astype(np.int)
lose_locs = (np.random.rand(num_lose, 2) * [h, w]).astype(np.int)
return wall_locs, win_locs, lose_locs
if __name__ == '__main__':
wall_locs, win_locs, lose_locs = gen_world_config(WORLD_HEIGHT, WORLD_WIDTH, WALL_FRAC, NUM_WINS, NUM_LOSE)
g = GridWorld(world_height=WORLD_HEIGHT, world_width=WORLD_WIDTH,
wall_locs=wall_locs, win_locs=win_locs, lose_locs=lose_locs, viz=True)
g.solve()
k = 0