%matplotlib inline
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
import copy
def get_max_estimate_index(estimates):
max = estimates[np.argmax(estimates)]
maxes = []
for i in range(10):
if max == estimates[i]:
maxes.append(i)
return maxes
def random(array):
a = copy.deepcopy(array)
np.random.shuffle(a)
return a[0]
def probability(epsilon):
if np.random.rand(1)[0] <= epsilon:
return True
return False
Qstar = np.random.randn(2000, 10)
def play(Qstar, epsilon, repeat):
estimates = np.zeros([2000,10])
history = [[[] for j in range(10)] for i in range(2000)]
result = []
for j in range(repeat):
total = 0
for i in range(2000):
indexes = get_max_estimate_index(estimates[i])
if probability(epsilon):
indexes = range(10)
n = random(indexes)
reward = Qstar[i][n] + np.random.randn()
history[i][n].append(reward)
estimates[i][n] = sum(history[i][n])/len(history[i][n])
total += reward
result.append(total/2000)
return result
zero = play(Qstar, 0, 500)
one = play(Qstar, 0.01, 500)
ten = play(Qstar, 0.1, 500)
fifty = play(Qstar, 0.5, 500)
hundred = play(Qstar, 1, 500)
def play2(Qstar, repeat):
estimates = np.zeros([2000,10])
history = [[[] for j in range(10)] for i in range(2000)]
result = []
for j in range(repeat):
total = 0
if j <= repeat * 0.8:
epsilon = 0.1
else:
epsilon = 0.01
for i in range(2000):
indexes = get_max_estimate_index(estimates[i])
if probability(epsilon):
indexes = range(10)
n = random(indexes)
reward = Qstar[i][n] + np.random.randn()
history[i][n].append(reward)
estimates[i][n] = sum(history[i][n])/len(history[i][n])
total += reward
result.append(total/2000)
return result
one_to_zero = play2(Qstar, 500)
plt.title("n-Armed Bandit Problem")
plt.xlabel("number of times")
plt.ylabel("average of total reward")
plt.ylim(-0.2, 1.6)
plt.xlim(0, 500)
plt.grid()
plt.plot(zero,label='e = 0')
plt.plot(one, label='e = 0.01')
plt.plot(ten, label='e = 0.1')
plt.plot(fifty, label='e = 0.5')
plt.plot(hundred, label='e = 1')
plt.legend(loc='lower right')
<matplotlib.legend.Legend at 0x7f1d044eff90>
plt.title("n-Armed Bandit Problem")
plt.xlabel("number of times")
plt.ylabel("average of total reward")
plt.ylim(-0.2, 1.6)
plt.xlim(0, 500)
plt.grid()
plt.plot(one, label='e = 0.01')
plt.plot(ten, label='e = 0.1')
plt.plot(one_to_zero, label='e = 0.1 to 0.01')
plt.legend(loc='lower right')
<matplotlib.legend.Legend at 0x7f1d03bdbd50>