from pandas import * from ggplot import * import pandas def lineplot(hr_year_csv): # You can check out the data in the csv file at the link below: # https://www.dropbox.com/s/awgdal71hc1u06d/hr_year.csv # # You can read more about ggplot at the following link: # https://github.com/yhat/ggplot/ dataframe = pandas.read_csv(hr_year_csv) gg = ggplot(dataframe, aes(dataframe['HR'], dataframe['yearID'])) + geom_point( color = 'red') + geom_line(color='red')+ xlab('HR') + ylab('Year') return gg if __name__== '__main__': print lineplot('data/hr_year.csv') from pandas import * from ggplot import * import pandas def lineplot_compare(hr_by_team_year_sf_la_csv): # You can see the data in hr_by_team_year_sf_la_csv # at the link below: # https://www.dropbox.com/s/wn43cngo2wdle2b/hr_by_team_year_sf_la.csv dataframe = pandas.read_csv(hr_by_team_year_sf_la_csv) gg = ggplot(dataframe, aes(dataframe['HR'], dataframe['yearID'], color='teamID' )) + geom_point() + geom_line()+ xlab('HR') + ylab('Year') return gg if __name__== '__main__': print lineplot_compare('data/hr_by_team_year_sf_la.csv') from pandas import * from ggplot import * def plot_weather_data(turnstile_weather): ''' You are passed in a dataframe called turnstile_weather. Use turnstile_weather along with ggplot to make a data visualization focused on the MTA and weather data we used in assignment #3. You should feel free to implement something that we discussed in class (e.g., scatterplots, line plots, or histograms) or attempt to implement something more advanced if you'd like. Here are some suggestions for things to investigate and illustrate: * Ridership by time of day or day of week * How ridership varies based on Subway station * Which stations have more exits or entries at different times of day You can check out: https://www.dropbox.com/s/meyki2wl9xfa7yk/turnstile_data_master_with_weather.csv ''' turnstile_weather = pandas.read_csv(turnstile_weather) plot = ggplot(turnstile_weather, aes( 'Hour', 'ENTRIESn_hourly' )) + \ geom_point(color='lightblue') +stat_smooth(span=.15, color='red', se=True) +\ xlab('Hour') + ylab('entruies') + geom_density() print plot if __name__== '__main__': plot_weather_data('data/turnstile_data_master_with_weather.csv') #Data Visualization 2 from pandas import * from ggplot import * def plot_weather_data(turnstile_weather): ''' Here are some suggestions for things to investigate and illustrate: * Ridership by time of day or day of week * How ridership varies based on Subway station * Which stations have more exits or entries at different times of day You can check out: https://www.dropbox.com/s/meyki2wl9xfa7yk/turnstile_data_master_with_weather.csv ''' turnstile_weather = pandas.read_csv(turnstile_weather) turnstile_weather = turnstile_weather[['UNIT',"ENTRIESn_hourly"]].groupby('UNIT', as_index=False).mean() turnstile_weather=turnstile_weather.reset_index() sortedEntries = turnstile_weather.sort(["ENTRIESn_hourly", "UNIT"], ascending=False) sortedEntries=sortedEntries.head(14) sortedEntries=sortedEntries.reset_index() plot = ggplot(sortedEntries, aes(x='UNIT', y='ENTRIESn_hourly')) + \ geom_bar(aes(weight='ENTRIESn_hourly'),fill='lightblue',stat = 'identity') + \ ggtitle('How ridership varies by subway station') + xlab('Station') + ylab('Entries') print plot if __name__== '__main__': plot_weather_data('data/turnstile_data_master_with_weather.csv')