#!/usr/bin/env python # coding: utf-8 # # Negamax # Last time we discussed the minimax search method for searching trees # in adversarial games. The alternating maximizing and minimizing steps # can be replaced with the same maximizing step if we negate the values # returned each time **and** the game is truly a zero-sum game with the # utilty in terminal states for one player always being the negative of # the utility for the other player. # # Here is an illustration of Negamax applied to Tic-Tac-Toe. # In[7]: from IPython.display import IFrame IFrame("http://www.cs.colostate.edu/~anderson/cs440/notebooks/negamax.pdf", width=800, height=600) # Here is a python implementation. # In[8]: ### Assumes that the argument 'game' is an object with the following methods # game.get_moves() # game.make_move(move) changes lookahead player # game.unmake_move(move) changes lookahead player # game.change_player() changes next turn player # game.get_utility() # game.is_over() # game.__str__() inf = float('infinity') def negamax(game, depth_left): if debug: print(' '*(10 - depth_left), game, end='') # If at terminal state or depth limit, return utility value and move None if game.is_over() or depth_left == 0: if debug: print('terminal value', game.get_utility()) return game.get_utility(), None if debug: print() # Find best move and its value from current state bestValue = -inf bestMove = None for move in game.get_moves(): # Apply a move to current state game.make_move(move) # print('trying',game) # Use depth-first search to find eventual utility value and back it up. # Negate it because it will come back in context of next player value, _ = negamax(game, depth_left-1) value = - value # Remove the move from current state, to prepare for trying a different move game.unmake_move(move) if debug: print(' '*(10 - depth_left), game, "move", move, "backed up value", value) if value > bestValue: # Value for this move is better than moves tried so far from this state. bestValue = value bestMove = move if debug: print("new best") else: if debug: print return bestValue, bestMove # And we can apply `negamax` to Tic-Tac-Toe using the following # `game` class definition. # In[9]: class TTT(object): def __init__(self): self.board = [' '] * 9 self.player = 'X' if False: self.board = ['X', 'X', ' ', 'X', 'O', 'O', ' ', ' ', ' '] self.player = 'O' self.player_look_ahead = self.player def locations(self, c): return [i for i, mark in enumerate(self.board) if mark == c] def get_moves(self): moves = self.locations(' ') return moves def get_utility(self): where_X = self.locations('X') where_O = self.locations('O') wins = [[0, 1, 2], [3, 4, 5], [6, 7, 8], [0, 3, 6], [1, 4, 7], [2, 5, 8], [0, 4, 8], [2, 4, 6]] X_won = any([all([wi in where_X for wi in w]) for w in wins]) O_won = any([all([wi in where_O for wi in w]) for w in wins]) if X_won: return 1 if self.player_look_ahead == 'X' else -1 elif O_won: return 1 if self.player_look_ahead == 'O' else -1 elif ' ' not in self.board: return 0 else: return None def is_over(self): return self.get_utility() is not None def make_move(self, move): self.board[move] = self.player_look_ahead self.player_look_ahead = 'X' if self.player_look_ahead == 'O' else 'O' def change_player(self): self.player = 'X' if self.player == 'O' else 'O' self.player_look_ahead = self.player def unmake_move(self, move): self.board[move] = ' ' self.player_look_ahead = 'X' if self.player_look_ahead == 'O' else 'O' def __str__(self): s = '{}|{}|{}\n-----\n{}|{}|{}\n-----\n{}|{}|{}'.format(*self.board) return s # Now, let's try an example. # In[10]: def play_game_negamax(game): print(game) while not game.is_over(): value, move = negamax(game, 9) if move is None: print('move is None. Stopping') break game.make_move(move) print('\nPlayer', game.player, 'to', move, 'for value', value) print(game) game.change_player() # In[11]: debug = False play_game_negamax(TTT()) # ## Negamax with Alpha-Beta Pruning # For a negamax version, the meanings of *alpha* and *beta* must be swapped as players alternate, and their values must be negated. # In[12]: IFrame("http://www.cs.colostate.edu/~anderson/cs440/notebooks/negamax2.pdf", width=800, height=600) # Modify Negamax to perform alpha-beta cutoffs by making these changes: # # * Two new arguments, `alpha` and `beta`, whose initial values are $-\infty$ and $\infty$. # * In the for loop for trying moves, negate and swap the values of `alpha` and `beta`, and the returned value from recursive calls must be negated. # * Do early return if `best_score` is greater than or equal to `beta`. # * Update `alpha` to maximum of `best_score` and current `alpha`. # # What if you cannot search to end of game? # # Apply an **evaluation function** to non-terminal states. It must # *estimate* the expected utility (function applied to terminal states # only) of the game from the current position. # # A good evaluation function # * orders the terminal states in the same way as the utility function, # * cannot take too much execution time (can't search the whole remaining tree!), # * should be strongly correlated with actual expected utility. # # An evaluation function is often a simple function of **features** of # the current game position. Choice of good features is key. Requires # considerable knowledge of the game and of good strategies. # # A strict cutoff of search at a specific depth with the evaluation # function applied there, can lead to problems. What if the advantage # in the game swings quickly just after the cutoff? If a state can be # judged this way, then additional search such be performed for that # state (a non-**quiescent** state). # # Current methods allow computers to search about 14 plies in chess.