%matplotlib inline import numpy as np import pandas as pd from matplotlib import pyplot as plt import prettyplotlib as pplt import seaborn as sns from statsmodels.formula.api import glm import statsmodels.api as sm np.set_printoptions(precision=2) # local imports from the http://github.com/pjbull/civil_conflict # repository from ConflictMap import ConflictMap import ConflictModel reload(ConflictModel) #from ConflictModel import ConflictModel from graphics import plot_locations from graphics import plot_route_with_colors from models import Configuration from models import Route from models import HqLocator from models import HqLocatorUganda from optimization import sim_anneal from SliceSampler import SliceSampler # Load in the data with open("data/ACLED-Uganda_19970101-to-20131231_final.xlsx") as f: uganda_file = pd.ExcelFile(f) uganda_data = uganda_file.parse('Sheet1') uganda_data.head() # create the conflict map object # based on the uganda data and plot it conflict_map = ConflictMap(uganda_data) fig, ax = conflict_map.plot_map() fig.show() # overplot the kde onto the map of the conflict data conflict_model = ConflictModel.ConflictModel(uganda_data) fig, ax = conflict_map.plot_map() conflict_model.plot_kde(fig=fig, ax=ax) fig.show() slice_samples = conflict_model.draw_samples(50000) SliceSampler.run_diagnostics(slice_samples) # Visualize the samples on our map of Uganda fig, ax = conflict_map.plot_map() pplt.scatter(ax, slice_samples[:1000,0], slice_samples[:1000,1], zorder=20, s=20, color='b', alpha=0.3) fig.show() # strip the dataframe just to the variables we care about dt = uganda_data[['EVENT_DATE', 'FATALITIES']] dt.set_index('EVENT_DATE', inplace=True) # lambda for our groupby by = lambda x: lambda y: getattr(y, x) # get a column with the count of the number of events fatality_df = dt.groupby([by('year'), by('month')]).count() # get a column with the sum of the fatalities fatality_df['sum'] = dt.groupby([by('year'), by('month')]).sum() # adds a column for each lag that we want to the dataframe def add_lags(orig_df, num_lags): df = orig_df.copy() lag_names = [] for i in range(1, num_lags+1): fat_name = 'prev{}fat'.format(i) df[fat_name] = np.zeros(df.shape[0]) df[fat_name][i:] = df['FATALITIES'][:-i] lag_names.append(fat_name) return df.iloc[i:,:], lag_names fatality_df, new_lags = add_lags(fatality_df, 5) fatality_df.head() # fit the poisson model and summarize it model_string = "FATALITIES ~ " + "+".join(new_lags) poisson_glm_lags = glm(model_string, data = fatality_df, family = sm.families.Poisson(sm.families.links.log)).fit() print poisson_glm_lags.summary() # create figure fig, ax = plt.subplots(1) fig.set_size_inches(16,4) plt.xlim(0, 195) # update reset index and get tick labels melt = fatality_df.reset_index() melt["ticks"] = ["{}-{}".format(melt.level_0.iloc[i], melt.level_1.iloc[i]) for i in range(melt.shape[0])] # plot the fitted values of the GLM pplt.plot(poisson_glm_lags.fittedvalues, label="Auto-regressive Poisson GLM") pplt.plot(fatality_df.FATALITIES, label="True Values") plt.ylabel("Count of Violent Events") loc, label = plt.xticks() # update the ticks plt.xticks(np.array(loc[:-1], dtype=int), melt.ticks.iloc[np.array(loc[:-1], dtype=int)].values) pplt.legend() # We can now predict the next time step newrow = pd.DataFrame([dict(prev1fat=fatality_df.FATALITIES[-1], prev2fat=fatality_df.FATALITIES[-2], prev3fat=fatality_df.FATALITIES[-3], prev4fat=fatality_df.FATALITIES[-4], prev5fat=fatality_df.FATALITIES[-5])]) new_fatality_df = fatality_df.append(newrow, ignore_index=True) poisson_mean = poisson_glm_lags.predict(exog=new_fatality_df[-1:]) n_conflicts = np.random.poisson(poisson_mean) # use slice sampling, and get a number of points drawn from the Poisson distribution c = Configuration(sample_method=conflict_model.sample_fake_uniform, n=n_conflicts) # put headquarters in the capitol city, Kampala c.set_hq(np.array([[32.5811, 0.3136]])) # initialize the supplies r = Route(c, np.array([5]*3)) # now we can use simulated annealing to find the best # path between the simulated events. x0 = np.arange(c.n) res, losses, dists = sim_anneal(r.loss, r.perturb, x0, t0=10.0, ntrial=55000, reanneal=1000, other_func=r.dist, verbose=True) # plot the results fig, ax = conflict_map.plot_map(outline_only=True) ax = plot_route_with_colors(c, res, ax) ax.legend() plt.show() fig = plt.figure(figsize=(14, 4)) plt.plot(losses, label='loss') plt.semilogy() plt.legend() plt.xlabel('accepted proposal') plt.ylabel('loss') plt.show() r2 = HqLocatorUganda(n=50, proposed_location=np.array([32.5811, 0.3136]), data_model=conflict_model) x0 = np.zeros(2, dtype=np.float) res, losses, hqs = sim_anneal(r2.loss, r2.perturb, x0, t0=1000.0, ntrial=1000, reanneal=1000, other_func=r2.proposed_location, verbose=True) # plot the map of hq locations fig, ax = conflict_map.plot_map(outline_only=True) ax.scatter(*np.hsplit(r2.cities[:, :], 2), c='b', zorder=35, label="Events of Political Violence") ax.scatter(*np.hsplit(hqs[:, :], 2), c='r', marker='x', zorder=35, label="Path of Proposed HQs") ax.scatter(*np.hsplit(hqs[-1, :], 2), c='g', marker='o', s=150, zorder=35, label="Final HQ") plt.legend() plt.show() # plot the loss function fig = plt.figure(figsize=(8, 4)) plt.plot(losses, label='loss') plt.legend() plt.xlabel('accepted proposal') plt.ylabel('loss') plt.semilogy() plt.show() c.set_hq(hqs[-1]) r = Route(c, np.array([20]*3)) x0 = np.arange(c.n) res, losses, dists = sim_anneal(r.loss, r.perturb, x0, t0=10.0, ntrial=55000, reanneal=1000, other_func=r.dist, verbose=True) fig, ax = conflict_map.plot_map(outline_only=True) ax = graphics.plot_route_with_colors(c, res, ax) ax.legend() plt.show() fig = plt.figure(figsize=(14, 4)) plt.plot(losses, label='loss') plt.semilogy() plt.legend() plt.xlabel('accepted proposal') plt.ylabel('loss') plt.show()