# Some code to style the IPython notebook and make it more legible.
# CSS styling adapted from 
# https://github.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers
from IPython.core.display import HTML
styles = open("Style.css").read()
HTML(styles)

import datetime as dt
from collections import defaultdict

import matplotlib.pyplot as plt

from mpl_toolkits.basemap import Basemap

# Set this variable to the directory where the GDELT data files are
PATH = "GDELT.1979-2012.reduced/"

with open(PATH+"2010.reduced.txt") as f:
    col_names = f.readline().split("\t")
for i, col_name in enumerate(col_names):
    print i, col_name

data = []
for year in range(1979, 2013):
    f = open(PATH + str(year) + ".reduced.txt")
    for raw_row in f:
        row = raw_row.split("\t")
        actor1 = row[1][:3]
        actor2 = row[2][:3]
        both = actor1 + actor2
        if "RUS" in both:
            data.append(raw_row)
print "Russia-related records:", len(data)

point_counts = defaultdict(int) # Defaultdict with (lat, long) as key
for row in data:
    row = row.split("\t")
    try:
        lat = float(row[10])
        lon = float(row[11])
        point_counts[(lat, lon)] += 1
    except:
        pass

# Get some summary statistics
counts = np.array(point_counts.values())
print "Total points:", len(counts) 
print "Min events:", counts.min()
print "Max events:", counts.max()
print "Mean events:", counts.mean()
print "Median points:", np.median(counts)

def get_size(count):
    '''
    Convert a count to a point size.
    Log-scaled.
    '''
    scale_factor = 2
    return np.log10(count + 1) * scale_factor

# Note that we're drawing on a regular matplotlib figure, so we set the 
# figure size just like we would any other.
plt.figure(figsize=(12,12))

# Create the Basemap
event_map = Basemap(projection='merc', 
                    resolution='l', area_thresh=1000.0, # Low resolution
                    lat_0 = 55.0, lon_0=60.0, # Map center 
                    llcrnrlon=10, llcrnrlat=20, # Lower left corner
                    urcrnrlon=100, urcrnrlat=70) # Upper right corner

# Draw important features
event_map.drawcoastlines() 
event_map.drawcountries()
event_map.fillcontinents(color='0.8') # Light gray
event_map.drawmapboundary()

# Draw the points on the map:
for point, count in point_counts.iteritems():
    x, y = event_map(point[1], point[0]) # Convert lat, long to y,x
    marker_size = get_size(count)
    event_map.plot(x,y, 'ro', markersize=marker_size, alpha=0.3)

# Defaultdict with ((lat, long), (lat,long)) as key
interaction_counts = defaultdict(int) 

for row in data:
    row = row.split("\t")
    # Skip row if not in 2012
    if row[0][:4] != '2012':
        continue
    try:
        lat_1 = float(row[6])
        lon_1 = float(row[7])
        lat_2 = float(row[8])
        lon_2 = float(row[9])
        
        interaction_counts[((lat_1, lon_1), (lat_2, lon_2))] += 1
    except:
        pass
# Check point data:
counts = np.array(interaction_counts.values())
print "Total point-pairs:", len(counts) 
print "Min events:", counts.min()
print "Max events:", counts.max()
print "Mean events:", counts.mean()
print "Median points:", np.median(counts)

max_val = np.log10(counts.max())

def get_alpha(count):
    '''
    Convert a count to an alpha val.
    Log-scaled
    '''
    scale = np.log10(count)
    return (scale/max_val) * 0.25

# Draw the basemap like before
plt.figure(figsize=(12,12))
event_map = Basemap(projection='merc', 
                    resolution='l', area_thresh=1000.0, # Low resolution
                    lat_0 = 55.0, lon_0=60.0, # Map center 
                    llcrnrlon=10, llcrnrlat=20, # Lower left corner
                    urcrnrlon=100, urcrnrlat=70) # Upper right corner

# Draw important features
event_map.drawcoastlines()
event_map.drawcountries()
event_map.fillcontinents(color='0.8')
event_map.drawmapboundary()

# Draw the lines on the map:
for arc, count in interaction_counts.iteritems():
    point1, point2 = arc
    y1, x1 = point1
    y2, x2 = point2
    
    # Only plot lines where both points are on our map:
    if ((x1 > 10 and x1 < 100 and y1 > 20 and y1 < 70) and
        (x2 > 10 and x2 < 100 and y2 > 20 and y2 < 70)):
    
        line_alpha = get_alpha(count)
    
        # Draw the great circle line
        event_map.drawgreatcircle(x1, y1, x2, y2, linewidth=2, 
                                color='r', alpha=line_alpha)