Notebook

In [ ]:

from IPython.core.display import HTML

with open('creative_commons.txt', 'r') as f:
    html = f.read()
    
name = '2015-10-05-brazil_germany'

html = '''
<small>
<p> This post was written as an IPython notebook.
 It is available for <a href='https://ocefpaf.github.com/python4oceanographers/downloads/notebooks/%s.ipynb'>download</a>
 or as a static <a href='https://nbviewer.ipython.org/url/ocefpaf.github.com/python4oceanographers/downloads/notebooks/%s.ipynb'>html</a>.</p>
<p></p>
%s''' % (name, name, html)

In [ ]:

%matplotlib inline

import os
from datetime import datetime

title = "Exploring what's new in Cartopy"
hour = datetime.utcnow().strftime('%H:%M')
comments="true"

date = '-'.join(name.split('-')[:3])
slug = '-'.join(name.split('-')[3:])

metadata = dict(title=title,
                date=date,
                hour=hour,
                comments=comments,
                slug=slug,
                name=name)

markdown = """Title: {title}
date:  {date} {hour}
comments: {comments}
slug: {slug}

{{% notebook {name}.ipynb cells[2:] %}}
""".format(**metadata)

content = os.path.abspath(os.path.join(os.getcwd(), os.pardir, os.pardir, '{}.md'.format(name)))
with open('{}'.format(content), 'w') as f:
    f.writelines(markdown)

http://nbviewer.ipython.org/github/rjtavares/football-crunching/blob/master/notebooks/how%20to%20get%20full%20play-by-play%20data%20for%20the%20WC2014.ipynb

In [ ]:

import time
import json
import requests
import mechanize
from bs4 import BeautifulSoup

# Initializes the browser.
br = mechanize.Browser()
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=10)
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]

In [ ]:

url = 'http://data.huffingtonpost.com/2014/world-cup/matches/'
match = 'brazil-vs-germany-731827'


def get_match_data(match):
    match_id = match.split('-')[-1]
    response = mechanize.urlopen('http://data.huffingtonpost.com/2014/world-cup/matches/%s.json' % match_id)
    return json.loads(response.read())    


def get_match_names(match):
    url = 'http://data.huffingtonpost.com/2014/world-cup/matches/%s' % match
    response = mechanize.urlopen(url)
    soup = BeautifulSoup(response)
    data = {}
    # Gets the second script block. Hopefully all pages follow the same format.
    data_script = soup.findAll("script")[1]
    data_lines = data_script.text.split('\n')
    for line in data_lines[1:]:
        if line:
            # Format of a variable is HPIN.variable = [list of dictionaries].
            # This tries to convert it to .
            line_data = line.split(' = ')
            name = line_data[0].split('.')[1]
            value = json.loads(line_data[1][:-1])
            data[name] = value
    return data

In [ ]:

import os
import pickle

fname = "./data/brazil_vs_germany_2014.pickle"

if os.path.isfile(fname):
    pickle.load(open(fname, "rb"))
else:
    data = {}
    match_data = get_match_data(match)
    match_names = get_match_names(match)
    data[match] = {'data': match_data, 'names': match_names}
    
    pickle.dump(data, open(fname, "wb"))

https://github.com/rjtavares/football-crunching/tree/master/notebooks

In [ ]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from footyviz import draw_events, draw_pitch, type_names

pd.options.display.mpl_style = 'default'

In [ ]:

df = pd.read_csv("../datasets/germany-vs-argentina-731830.csv", encoding='utf-8', index_col=0)

#standard dimensions
x_size = 105.0
y_size = 68.0
box_height = 16.5*2 + 7.32
box_width = 16.5
y_box_start = y_size/2-box_height/2
y_box_end = y_size/2+box_height/2

#scale of dataset is 100 by 100. Normalizing for a standard soccer pitch size
df['x']=df['x']/100*x_size 
df['y']=df['y']/100*y_size
df['to_x']=df['to_x']/100*x_size
df['to_y']=df['to_y']/100*y_size

#creating some measures and classifiers from the original 
df['count'] = 1
df['dx'] = df['to_x'] - df['x']
df['dy'] = df['to_y'] - df['y']
df['distance'] = np.sqrt(df['dx']**2+df['dy']**2)
df['fivemin'] = np.floor(df['min']/5)*5
df['type_name'] = df['type'].map(type_names.get)
df['to_box'] = (df['to_x'] > x_size - box_width) & (y_box_start < df['to_y']) & (df['to_y'] < y_box_end)
df['from_box'] = (df['x'] > x_size - box_width) & (y_box_start < df['y']) & (df['y'] < y_box_end)
df['on_offense'] = df['x']>x_size/2

In [ ]:

HTML(html)