Up and Down the Python Data and Web Visualization Stack

USGS dataset listing every wind turbine in the United States:

In [35]:
from collections import OrderedDict

import bearcart
import bokeh
import bokeh.plotting as bp
from bokeh.plotting import output_notebook
import folium
import ggplot as gg
from ggplot import ggplot
from IPython.html.widgets import interact
import matplotlib.pyplot as plt
import mpld3
import numpy as np
import pandas as pd
import vincent

%matplotlib inline
mpld3.enable_notebook()
bearcart.initialize_notebook()
vincent.initialize_notebook()
folium.initialize_notebook()

# axis_color = 'black'
axis_color = '#d0d0d0'
In [36]:
df = pd.read_csv('USGS_WindTurbine_201307_cleaned.csv')
df.head()
Out[36]:
Unnamed: 0 Unique ID Site Name Online Year Turbine Manufacturer Turbine Model Tower Type Turbine MW Total Height Tower/Hub Height Blade Length Rotor Diameter Rotor Swept Area Latitude-Decimal Degrees Longitude-Decimal Degrees State County Attribute Confidence Location Confidence WENDI Site Name
0 0 1836 Sand Point 2012 Vestas V39 monopole 0.500 59.5 40 19.5 39 1194.59 55.3436 -160.4891 AK Aleutians East 2 2 unknown ...
1 1 1837 Sand Point 2012 Vestas V39 monopole 0.500 59.5 40 19.5 39 1194.59 55.3448 -160.4906 AK Aleutians East 2 2 unknown ...
2 2 1838 St. Paul Island 2007 Vestas V27 monopole 0.225 50.5 37 13.5 27 572.55 57.1571 -170.2352 AK Aleutians West 2 2 St. Paul Island ...
3 3 1839 St. Paul Island 1999 Vestas V27 monopole 0.225 50.5 37 13.5 27 572.55 57.1576 -170.2359 AK Aleutians West 2 2 St. Paul Island ...
4 4 1840 St. Paul Island 2007 Vestas V27 monopole 0.225 50.5 37 13.5 27 572.55 57.1576 -170.2373 AK Aleutians West 2 2 St. Paul Island ...

5 rows × 27 columns

In [37]:
ws = pd.read_table('CO_WS_2011_2012.txt')
ws = ws.set_index('Date & Time Stamp')
ws.index = ws.index.to_datetime()
ws.head()
Out[37]:
WS1_50mMean WS1_50mStdev WS1_50mMax WS1_50mMin WS2_50mMean WS2_50mStDev WS2_50mMax WS2_50mMin WS3_30mMean WS3_30mStDev WS3_30mMax WS3_30mMin WS4_40mMean WS4_40mStDev WS4_40mMax WS4_40mMin WD1_49mMean WD1_49mStDev WD1_49mMax WD1_49mMin
2011-06-03 00:00:00 9.50 1.12 11.70 6.40 9.39 1.03 11.39 6.43 7.96 1.33 10.97 4.51 8.76 1.17 11.01 4.95 172 7 173 240 ...
2011-06-03 00:10:00 8.37 0.56 10.19 6.78 8.27 0.56 9.86 6.43 6.83 0.76 9.06 3.75 7.60 0.64 9.49 5.70 172 5 169 240 ...
2011-06-03 00:20:00 8.38 0.43 9.44 7.17 8.28 0.41 9.48 7.20 7.13 0.64 8.69 4.89 7.81 0.45 9.10 6.83 169 5 177 240 ...
2011-06-03 00:30:00 7.48 1.17 9.44 3.76 7.43 1.12 9.48 3.77 6.43 1.03 8.29 3.37 6.93 1.03 9.10 3.43 163 11 166 240 ...
2011-06-03 00:40:00 7.13 0.79 9.06 5.26 7.10 0.76 9.09 5.28 6.21 0.67 7.91 2.99 6.61 0.66 8.34 3.81 172 12 166 240 ...

5 rows × 24 columns

In [38]:
# Rotor Diameter vs. Turbine Manufacturer
mf_grouped = df.groupby('Turbine Manufacturer')
mean_grouped = mf_grouped.mean().dropna()
mean_rd = mean_grouped.sort('Rotor Diameter')['Rotor Diameter']
rotor_diam = vincent.Bar(mean_rd)
rotor_diam.axis_titles(x='Turbine Manufacturer', y='Rotor Diameter')
# The Hard Way
from vincent.axes import AxisProperties
from vincent.properties import PropertySet
from vincent.values import ValueRef
for axis in rotor_diam.axes:
    axis.properties = AxisProperties()
    for prop in ['ticks', 'axis', 'major_ticks', 'minor_ticks']:
        setattr(axis.properties, prop, PropertySet(stroke=ValueRef(value=axis_color)))
    axis.properties.title = PropertySet(font_size=ValueRef(value=20), 
                                        fill=ValueRef(value=axis_color))
    axis.properties.labels = PropertySet(fill=ValueRef(value=axis_color))
rotor_diam.axes[0].properties.labels.angle = ValueRef(value=50)
rotor_diam.axes[0].properties.labels.align = ValueRef(value='left')
rotor_diam.axes[0].properties.title.dy = ValueRef(value=115)
rotor_diam.scales[2].range = ['#b48ead']
rotor_diam
Out[38]:
In [39]:
# Total Turbine Count
turbine_ct = mf_grouped.count().dropna().sort('Unique ID', ascending=False)['Unique ID']
num_turbines = (vincent.Bar(turbine_ct[:25])
                       .axis_titles(x='Turbine Manufacturer', 
                                    y='Number of Turbines in the US')
                       .colors(range_=['#6a9fb5']))
# Shortcuts!
def lighten_axes(vis, x_offset=50):
    (vis.common_axis_properties(color=axis_color, title_size=20)
        .x_axis_properties(label_angle=50, label_align='left', 
                           title_offset=x_offset)
        .y_axis_properties(title_offset=-40))
# If Area Chart
# num_turbines.scales[0].type = 'ordinal'
lighten_axes(num_turbines)
num_turbines
Out[39]:
In [40]:
# Turbine Count vs. Date
grouped_date = df.groupby(['Online Year', 'Turbine Manufacturer'])
by_year = grouped_date.count()['Unique ID'].reset_index()
by_year['Online Year'] = pd.to_datetime(by_year['Online Year'], coerce=True)
by_year = by_year.rename(columns={'Unique ID': 'Turbine Count'}).dropna()
by_year = by_year.pivot(index='Online Year', columns='Turbine Manufacturer', values='Turbine Count')
by_year = by_year[turbine_ct[:10].index.tolist()]

online_by_year = (vincent.StackedArea(by_year)
                         .axis_titles(x='Date', y='Turbine Count')
                         .legend(title='Turbine Manufacturer', text_color=axis_color)
                         .colors(range_=['#ac4142', '#d28445', '#f4bf75', '#90a959', 
                                         '#75b5aa', '#6a9fb5', '#aa759f', '#8f5536']))
lighten_axes(online_by_year, x_offset=30)
online_by_year
Out[40]:
In [41]:
height_diam = (vincent.GroupedBar(mean_grouped[['Tower/Hub Height', 'Rotor Diameter']]
                                  .sort(['Rotor Diameter', 'Tower/Hub Height'], ascending=False))
                      .axis_titles(x='Turbine Manufacturer', y='Meters')
                      .legend(title='Parameters', text_color=axis_color)
                      .colors(range_=['#f4bf75', '#75b5aa']))
lighten_axes(height_diam, 100)
height_diam
Out[41]:
In [42]:
november_2011 = ws['2011-11-15':'2011-12-01']
ws_line = (vincent.Line(november_2011['WS1_50mMean'])
                  .axis_titles(x='Date', y='Wind Speed (m/s)')
                  .colors(range_=['#d28445']))
lighten_axes(ws_line, x_offset=30)
ws_line
Out[42]:
In [43]:
# Rotor Diameter vs. Power
min_heights = df[df['Rotor Diameter'] > 10]
diameter_vs_mw = (vincent.Scatter(min_heights[['Turbine MW', 'Rotor Diameter']], iter_idx='Turbine MW')
                         .axis_titles(x='Power (MW)', y='Rotor Diameter (m)')
                         .colors(range_=['#75b5aa']))
lighten_axes(diameter_vs_mw, x_offset=30)
diameter_vs_mw
Out[43]: