from IPython.core.display import HTML
with open('creative_commons.txt', 'r') as f:
html = f.read()
name = '2015-10-05-brazil_germany'
html = '''
<small>
<p> This post was written as an IPython notebook.
It is available for <a href='https://ocefpaf.github.com/python4oceanographers/downloads/notebooks/%s.ipynb'>download</a>
or as a static <a href='https://nbviewer.ipython.org/url/ocefpaf.github.com/python4oceanographers/downloads/notebooks/%s.ipynb'>html</a>.</p>
<p></p>
%s''' % (name, name, html)
%matplotlib inline
import os
from datetime import datetime
title = "Exploring what's new in Cartopy"
hour = datetime.utcnow().strftime('%H:%M')
comments="true"
date = '-'.join(name.split('-')[:3])
slug = '-'.join(name.split('-')[3:])
metadata = dict(title=title,
date=date,
hour=hour,
comments=comments,
slug=slug,
name=name)
markdown = """Title: {title}
date: {date} {hour}
comments: {comments}
slug: {slug}
{{% notebook {name}.ipynb cells[2:] %}}
""".format(**metadata)
content = os.path.abspath(os.path.join(os.getcwd(), os.pardir, os.pardir, '{}.md'.format(name)))
with open('{}'.format(content), 'w') as f:
f.writelines(markdown)
import time
import json
import requests
import mechanize
from bs4 import BeautifulSoup
# Initializes the browser.
br = mechanize.Browser()
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=10)
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
url = 'http://data.huffingtonpost.com/2014/world-cup/matches/'
match = 'brazil-vs-germany-731827'
def get_match_data(match):
match_id = match.split('-')[-1]
response = mechanize.urlopen('http://data.huffingtonpost.com/2014/world-cup/matches/%s.json' % match_id)
return json.loads(response.read())
def get_match_names(match):
url = 'http://data.huffingtonpost.com/2014/world-cup/matches/%s' % match
response = mechanize.urlopen(url)
soup = BeautifulSoup(response)
data = {}
# Gets the second script block. Hopefully all pages follow the same format.
data_script = soup.findAll("script")[1]
data_lines = data_script.text.split('\n')
for line in data_lines[1:]:
if line:
# Format of a variable is HPIN.variable = [list of dictionaries].
# This tries to convert it to .
line_data = line.split(' = ')
name = line_data[0].split('.')[1]
value = json.loads(line_data[1][:-1])
data[name] = value
return data
import os
import pickle
fname = "./data/brazil_vs_germany_2014.pickle"
if os.path.isfile(fname):
pickle.load(open(fname, "rb"))
else:
data = {}
match_data = get_match_data(match)
match_names = get_match_names(match)
data[match] = {'data': match_data, 'names': match_names}
pickle.dump(data, open(fname, "wb"))
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from footyviz import draw_events, draw_pitch, type_names
pd.options.display.mpl_style = 'default'
df = pd.read_csv("../datasets/germany-vs-argentina-731830.csv", encoding='utf-8', index_col=0)
#standard dimensions
x_size = 105.0
y_size = 68.0
box_height = 16.5*2 + 7.32
box_width = 16.5
y_box_start = y_size/2-box_height/2
y_box_end = y_size/2+box_height/2
#scale of dataset is 100 by 100. Normalizing for a standard soccer pitch size
df['x']=df['x']/100*x_size
df['y']=df['y']/100*y_size
df['to_x']=df['to_x']/100*x_size
df['to_y']=df['to_y']/100*y_size
#creating some measures and classifiers from the original
df['count'] = 1
df['dx'] = df['to_x'] - df['x']
df['dy'] = df['to_y'] - df['y']
df['distance'] = np.sqrt(df['dx']**2+df['dy']**2)
df['fivemin'] = np.floor(df['min']/5)*5
df['type_name'] = df['type'].map(type_names.get)
df['to_box'] = (df['to_x'] > x_size - box_width) & (y_box_start < df['to_y']) & (df['to_y'] < y_box_end)
df['from_box'] = (df['x'] > x_size - box_width) & (y_box_start < df['y']) & (df['y'] < y_box_end)
df['on_offense'] = df['x']>x_size/2
HTML(html)