#!/usr/bin/env python
# coding: utf-8

# # Negamax

# Last time we discussed the minimax search method for searching trees
# in adversarial games.  The alternating maximizing and minimizing steps
# can be replaced with the same maximizing step if we negate the values
# returned each time **and** the game is truly a zero-sum game with the
# utilty in terminal states for one player always being the negative of
# the utility for the other player.
# 
# Here is an illustration of Negamax applied to Tic-Tac-Toe.

# In[7]:


from IPython.display import IFrame
IFrame("http://www.cs.colostate.edu/~anderson/cs440/notebooks/negamax.pdf", width=800, height=600)


# Here is a python implementation.

# In[8]:


### Assumes that the argument 'game' is an object with the following methods
# game.get_moves()
# game.make_move(move)    changes lookahead player
# game.unmake_move(move)  changes lookahead player
# game.change_player()    changes next turn player
# game.get_utility()
# game.is_over()
# game.__str__()

inf = float('infinity')

def negamax(game, depth_left):
    if debug:
        print('   '*(10 - depth_left), game, end='')
    # If at terminal state or depth limit, return utility value and move None
    if game.is_over() or depth_left == 0:
        if debug:
            print('terminal value', game.get_utility())
        return game.get_utility(), None
    if debug:
        print()
    # Find best move and its value from current state
    bestValue = -inf
    bestMove = None
    for move in game.get_moves():
        # Apply a move to current state
        game.make_move(move)
        # print('trying',game)
        # Use depth-first search to find eventual utility value and back it up.
        #  Negate it because it will come back in context of next player
        value, _ = negamax(game, depth_left-1)
        value = - value
        # Remove the move from current state, to prepare for trying a different move
        game.unmake_move(move)
        if debug:
            print('   '*(10 - depth_left), game, "move", move,
                  "backed up value", value)
        if value > bestValue:
            # Value for this move is better than moves tried so far from this state.
            bestValue = value
            bestMove = move
            if debug:
                print("new best")
        else:
            if debug:
                print
    return bestValue, bestMove


# And we can apply `negamax` to Tic-Tac-Toe using the following
# `game` class definition.

# In[9]:


class TTT(object):

    def __init__(self):
        self.board = [' '] * 9
        self.player = 'X'
        if False:
            self.board = ['X', 'X', ' ', 'X', 'O', 'O', ' ', ' ', ' ']
            self.player = 'O'
        self.player_look_ahead = self.player

    def locations(self, c):
        return [i for i, mark in enumerate(self.board) if mark == c]

    def get_moves(self):
        moves = self.locations(' ')
        return moves

    def get_utility(self):
        where_X = self.locations('X')
        where_O = self.locations('O')
        wins = [[0, 1, 2], [3, 4, 5], [6, 7, 8],
                [0, 3, 6], [1, 4, 7], [2, 5, 8],
                [0, 4, 8], [2, 4, 6]]
        X_won = any([all([wi in where_X for wi in w]) for w in wins])
        O_won = any([all([wi in where_O for wi in w]) for w in wins])
        if X_won:
            return 1 if self.player_look_ahead == 'X' else -1
        elif O_won:
            return 1 if self.player_look_ahead == 'O' else -1
        elif ' ' not in self.board:
            return 0
        else:
            return None

    def is_over(self):
        return self.get_utility() is not None

    def make_move(self, move):
        self.board[move] = self.player_look_ahead
        self.player_look_ahead = 'X' if self.player_look_ahead == 'O' else 'O'

    def change_player(self):
        self.player = 'X' if self.player == 'O' else 'O'
        self.player_look_ahead = self.player

    def unmake_move(self, move):
        self.board[move] = ' '
        self.player_look_ahead = 'X' if self.player_look_ahead == 'O' else 'O'

    def __str__(self):
        s = '{}|{}|{}\n-----\n{}|{}|{}\n-----\n{}|{}|{}'.format(*self.board)
        return s


# Now, let's try an example.

# In[10]:


def play_game_negamax(game):
    print(game)
    while not game.is_over():
        value, move = negamax(game, 9)
        if move is None:
            print('move is None. Stopping')
            break
        game.make_move(move)
        print('\nPlayer', game.player, 'to', move, 'for value', value)
        print(game)
        game.change_player()


# In[11]:


debug = False
play_game_negamax(TTT())


# ## Negamax with Alpha-Beta Pruning

# For a negamax version, the meanings of *alpha* and *beta* must be swapped as players alternate, and their values must be negated.

# In[12]:


IFrame("http://www.cs.colostate.edu/~anderson/cs440/notebooks/negamax2.pdf", width=800, height=600)


# Modify Negamax to perform alpha-beta cutoffs by making these changes:
# 
#   * Two new arguments, `alpha` and `beta`, whose initial values are $-\infty$ and $\infty$. 
#   * In the for loop for trying moves, negate and swap the values of `alpha` and `beta`, and the returned value from recursive calls must be negated.
#   * Do early return if `best_score` is greater than or equal to `beta`.
#   * Update `alpha` to maximum of `best_score` and current `alpha`.

# # What if you cannot search to end of game?
# 
# Apply an **evaluation function** to non-terminal states.  It must
# *estimate* the expected utility (function applied to terminal states
# only) of the game from the current position.  
# 
# A good evaluation function
#   * orders the terminal states in the same way as the utility function,
#   * cannot take too much execution time (can't search the whole remaining tree!),
#   * should be strongly correlated with actual expected utility.
# 
# An evaluation function is often a simple function of **features** of
# the current game position.  Choice of good features is key.  Requires
# considerable knowledge of the game and of good strategies.
# 
# A strict cutoff of search at a specific depth with the evaluation
# function applied there, can lead to problems.  What if the advantage
# in the game swings quickly just after the cutoff?  If a state can be
# judged this way, then additional search such be performed for that
# state (a non-**quiescent** state).
# 
# Current methods allow computers to search about 14 plies in chess.