# Some code to style the IPython notebook and make it more legible. # CSS styling adapted from # https://github.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers from IPython.core.display import HTML styles = open("Style.css").read() HTML(styles) import datetime as dt from collections import defaultdict import matplotlib.pyplot as plt from mpl_toolkits.basemap import Basemap # Set this variable to the directory where the GDELT data files are PATH = "GDELT.1979-2012.reduced/" with open(PATH+"2010.reduced.txt") as f: col_names = f.readline().split("\t") for i, col_name in enumerate(col_names): print i, col_name data = [] for year in range(1979, 2013): f = open(PATH + str(year) + ".reduced.txt") for raw_row in f: row = raw_row.split("\t") actor1 = row[1][:3] actor2 = row[2][:3] both = actor1 + actor2 if "RUS" in both: data.append(raw_row) print "Russia-related records:", len(data) point_counts = defaultdict(int) # Defaultdict with (lat, long) as key for row in data: row = row.split("\t") try: lat = float(row[10]) lon = float(row[11]) point_counts[(lat, lon)] += 1 except: pass # Get some summary statistics counts = np.array(point_counts.values()) print "Total points:", len(counts) print "Min events:", counts.min() print "Max events:", counts.max() print "Mean events:", counts.mean() print "Median points:", np.median(counts) def get_size(count): ''' Convert a count to a point size. Log-scaled. ''' scale_factor = 2 return np.log10(count + 1) * scale_factor # Note that we're drawing on a regular matplotlib figure, so we set the # figure size just like we would any other. plt.figure(figsize=(12,12)) # Create the Basemap event_map = Basemap(projection='merc', resolution='l', area_thresh=1000.0, # Low resolution lat_0 = 55.0, lon_0=60.0, # Map center llcrnrlon=10, llcrnrlat=20, # Lower left corner urcrnrlon=100, urcrnrlat=70) # Upper right corner # Draw important features event_map.drawcoastlines() event_map.drawcountries() event_map.fillcontinents(color='0.8') # Light gray event_map.drawmapboundary() # Draw the points on the map: for point, count in point_counts.iteritems(): x, y = event_map(point[1], point[0]) # Convert lat, long to y,x marker_size = get_size(count) event_map.plot(x,y, 'ro', markersize=marker_size, alpha=0.3) # Defaultdict with ((lat, long), (lat,long)) as key interaction_counts = defaultdict(int) for row in data: row = row.split("\t") # Skip row if not in 2012 if row[0][:4] != '2012': continue try: lat_1 = float(row[6]) lon_1 = float(row[7]) lat_2 = float(row[8]) lon_2 = float(row[9]) interaction_counts[((lat_1, lon_1), (lat_2, lon_2))] += 1 except: pass # Check point data: counts = np.array(interaction_counts.values()) print "Total point-pairs:", len(counts) print "Min events:", counts.min() print "Max events:", counts.max() print "Mean events:", counts.mean() print "Median points:", np.median(counts) max_val = np.log10(counts.max()) def get_alpha(count): ''' Convert a count to an alpha val. Log-scaled ''' scale = np.log10(count) return (scale/max_val) * 0.25 # Draw the basemap like before plt.figure(figsize=(12,12)) event_map = Basemap(projection='merc', resolution='l', area_thresh=1000.0, # Low resolution lat_0 = 55.0, lon_0=60.0, # Map center llcrnrlon=10, llcrnrlat=20, # Lower left corner urcrnrlon=100, urcrnrlat=70) # Upper right corner # Draw important features event_map.drawcoastlines() event_map.drawcountries() event_map.fillcontinents(color='0.8') event_map.drawmapboundary() # Draw the lines on the map: for arc, count in interaction_counts.iteritems(): point1, point2 = arc y1, x1 = point1 y2, x2 = point2 # Only plot lines where both points are on our map: if ((x1 > 10 and x1 < 100 and y1 > 20 and y1 < 70) and (x2 > 10 and x2 < 100 and y2 > 20 and y2 < 70)): line_alpha = get_alpha(count) # Draw the great circle line event_map.drawgreatcircle(x1, y1, x2, y2, linewidth=2, color='r', alpha=line_alpha)