from ggplot import * import pandas as pd import numpy as np %matplotlib inline df = pd.read_csv("./baseball-pitches-clean.csv") df = df[['pitch_time', 'inning', 'pitcher_name', 'hitter_name', 'pitch_type', 'px', 'pz', 'pitch_name', 'start_speed', 'end_speed', 'type_confidence']] df.head() ggplot(df, aes(x='px', y='pz')) + geom_point() ggplot(aes(x='start_speed', y='end_speed'), data=df) + geom_point() ggplot(df, aes(x='start_speed')) + geom_histogram() for name, frame in df.groupby("pitch_name"): print ggplot(aes(x='start_speed'), data=frame) + geom_histogram() + ggtitle("Distribution of " + str(name)) ggplot(aes(x='start_speed'), data=df) +\ geom_histogram() +\ facet_wrap('pitch_name') from IPython.display import YouTubeVideo YouTubeVideo("ikLlRT2j7EQ") ggplot(aes(x='pitch_type'), data=df) + geom_bar() ggplot(aes(x='start_speed'), data=df) +\ geom_histogram() +\ facet_grid('pitch_type') ggplot(aes(x='start_speed'), data=df) +\ geom_histogram() +\ facet_grid('pitch_name', 'pitch_type', scales="free") ggplot(df, aes(x='start_speed')) +\ geom_density() ggplot(df, aes(x='start_speed', color='pitch_name')) +\ geom_density()