%matplotlib inline

import numpy as np
import pandas as pd

from matplotlib import pyplot as plt
import prettyplotlib as pplt
import seaborn as sns

from statsmodels.formula.api import glm
import statsmodels.api as sm

np.set_printoptions(precision=2)

# local imports from the http://github.com/pjbull/civil_conflict
# repository
from ConflictMap import ConflictMap

import ConflictModel
reload(ConflictModel)
#from ConflictModel import ConflictModel

from graphics import plot_locations
from graphics import plot_route_with_colors

from models import Configuration
from models import Route
from models import HqLocator
from models import HqLocatorUganda

from optimization import sim_anneal

from SliceSampler import SliceSampler

# Load in the data
with open("data/ACLED-Uganda_19970101-to-20131231_final.xlsx") as f:
    uganda_file = pd.ExcelFile(f)
    
uganda_data = uganda_file.parse('Sheet1')
uganda_data.head()

# create the conflict map object
# based on the uganda data and plot it
conflict_map = ConflictMap(uganda_data)
fig, ax = conflict_map.plot_map()
fig.show()

# overplot the kde onto the map of the conflict data
conflict_model = ConflictModel.ConflictModel(uganda_data)
fig, ax = conflict_map.plot_map()
conflict_model.plot_kde(fig=fig, ax=ax)
fig.show()

slice_samples = conflict_model.draw_samples(50000)
SliceSampler.run_diagnostics(slice_samples)

# Visualize the samples on our map of Uganda
fig, ax = conflict_map.plot_map()
pplt.scatter(ax, slice_samples[:1000,0], 
             slice_samples[:1000,1], 
             zorder=20, 
             s=20, 
             color='b', 
             alpha=0.3)
fig.show()

# strip the dataframe just to the variables we care about
dt = uganda_data[['EVENT_DATE', 'FATALITIES']]
dt.set_index('EVENT_DATE', inplace=True)

# lambda for our groupby
by = lambda x: lambda y: getattr(y, x)

# get a column with the count of the number of events
fatality_df = dt.groupby([by('year'), by('month')]).count()

# get a column with the sum of the fatalities
fatality_df['sum'] = dt.groupby([by('year'), by('month')]).sum()

# adds a column for each lag that we want to the dataframe
def add_lags(orig_df, num_lags):
    df = orig_df.copy()
    
    lag_names = []
    for i in range(1, num_lags+1):
        fat_name = 'prev{}fat'.format(i)
        
        df[fat_name] = np.zeros(df.shape[0])
        df[fat_name][i:] = df['FATALITIES'][:-i]
        lag_names.append(fat_name)
        
    return df.iloc[i:,:], lag_names

fatality_df, new_lags = add_lags(fatality_df, 5)
fatality_df.head()

# fit the poisson model and summarize it
model_string = "FATALITIES ~ " + "+".join(new_lags) 
poisson_glm_lags = glm(model_string, data = fatality_df, family = sm.families.Poisson(sm.families.links.log)).fit()
print poisson_glm_lags.summary()

# create figure
fig, ax = plt.subplots(1)
fig.set_size_inches(16,4)
plt.xlim(0, 195)

# update reset index and get tick labels
melt = fatality_df.reset_index()
melt["ticks"] = ["{}-{}".format(melt.level_0.iloc[i], melt.level_1.iloc[i]) for i in range(melt.shape[0])]

# plot the fitted values of the GLM
pplt.plot(poisson_glm_lags.fittedvalues, label="Auto-regressive Poisson GLM")
pplt.plot(fatality_df.FATALITIES, label="True Values")
plt.ylabel("Count of Violent Events")
loc, label = plt.xticks()

# update the ticks
plt.xticks(np.array(loc[:-1], dtype=int), melt.ticks.iloc[np.array(loc[:-1], dtype=int)].values)
pplt.legend()

# We can now predict the next time step
newrow = pd.DataFrame([dict(prev1fat=fatality_df.FATALITIES[-1],
                             prev2fat=fatality_df.FATALITIES[-2],
                             prev3fat=fatality_df.FATALITIES[-3],
                             prev4fat=fatality_df.FATALITIES[-4],
                             prev5fat=fatality_df.FATALITIES[-5])])

new_fatality_df = fatality_df.append(newrow, ignore_index=True)
poisson_mean = poisson_glm_lags.predict(exog=new_fatality_df[-1:])
n_conflicts = np.random.poisson(poisson_mean)

# use slice sampling, and get a number of points drawn from the Poisson distribution
c = Configuration(sample_method=conflict_model.sample_fake_uniform, n=n_conflicts)

# put headquarters in the capitol city, Kampala
c.set_hq(np.array([[32.5811, 0.3136]]))

# initialize the supplies
r = Route(c, np.array([5]*3))

# now we can use simulated annealing to find the best
# path between the simulated events.
x0 = np.arange(c.n)
res, losses, dists = sim_anneal(r.loss, r.perturb, x0,
                               t0=10.0, ntrial=55000, reanneal=1000,
                               other_func=r.dist, verbose=True)

# plot the results
fig, ax = conflict_map.plot_map(outline_only=True)
ax = plot_route_with_colors(c, res, ax)
ax.legend()
plt.show()

fig = plt.figure(figsize=(14, 4))
plt.plot(losses, label='loss')
plt.semilogy()
plt.legend()
plt.xlabel('accepted proposal')
plt.ylabel('loss')
plt.show()

r2 = HqLocatorUganda(n=50, proposed_location=np.array([32.5811, 0.3136]), data_model=conflict_model)

x0 = np.zeros(2, dtype=np.float)
res, losses, hqs = sim_anneal(r2.loss, r2.perturb, x0,
                                t0=1000.0, ntrial=1000, reanneal=1000,
                                other_func=r2.proposed_location, verbose=True)

# plot the map of hq locations
fig, ax = conflict_map.plot_map(outline_only=True)
ax.scatter(*np.hsplit(r2.cities[:, :], 2), c='b', zorder=35, label="Events of Political Violence")
ax.scatter(*np.hsplit(hqs[:, :], 2), c='r', marker='x', zorder=35, label="Path of Proposed HQs")
ax.scatter(*np.hsplit(hqs[-1, :], 2), c='g', marker='o', s=150, zorder=35, label="Final HQ")
plt.legend()
plt.show()

# plot the loss function
fig = plt.figure(figsize=(8, 4))
plt.plot(losses, label='loss')

plt.legend()
plt.xlabel('accepted proposal')
plt.ylabel('loss')
plt.semilogy()
plt.show()

c.set_hq(hqs[-1])
r = Route(c, np.array([20]*3))

x0 = np.arange(c.n)
res, losses, dists = sim_anneal(r.loss, r.perturb, x0,
                               t0=10.0, ntrial=55000, reanneal=1000,
                               other_func=r.dist, verbose=True)

fig, ax = conflict_map.plot_map(outline_only=True)
ax = graphics.plot_route_with_colors(c, res, ax)
ax.legend()
plt.show()

fig = plt.figure(figsize=(14, 4))
plt.plot(losses, label='loss')
plt.semilogy()
plt.legend()
plt.xlabel('accepted proposal')
plt.ylabel('loss')
plt.show()