from IPython.display import IFrame IFrame("http://www.randalolson.com/2014/06/28/" "how-to-make-beautiful-data-visualizations-in-python-with-matplotlib/", 720, 400) IFrame("http://www.randalolson.com/wp-content/uploads/" "percent-bachelors-degrees-women-usa.png", 1000, 1000) import plotly plotly.__version__ import plotly.plotly as py import plotly.tools as tls %pylab inline from pandas import read_csv # Read the data into a pandas DataFrame. gender_degree_data = read_csv("http://www.randalolson.com/wp-content/uploads/" "percent-bachelors-degrees-women-usa.csv") # These are the "Tableau 20" colors as RGB. tableau20 = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120), (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150), (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148), (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199), (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)] # Scale the RGB values to the [0, 1] range, which is the format matplotlib accepts. for i in range(len(tableau20)): r, g, b = tableau20[i] tableau20[i] = (r / 255., g / 255., b / 255.) # You typically want your plot to be ~1.33x wider than tall. This plot is a rare # exception because of the number of lines being plotted on it. # Common sizes: (10, 7.5) and (12, 9) figure(figsize=(12, 14)) # Remove the plot frame lines. They are unnecessary chartjunk. ax = subplot(111) ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) # Ensure that the axis ticks only show up on the bottom and left of the plot. # Ticks on the right and top of the plot are generally unnecessary chartjunk. ax.get_xaxis().tick_bottom() ax.get_yaxis().tick_left() # Limit the range of the plot to only where the data is. # Avoid unnecessary whitespace. ylim(0, 90) xlim(1968, 2014) # Make sure your axis ticks are large enough to be easily read. # You don't want your viewers squinting to read your plot. yticks(range(0, 91, 10), [str(x) + "%" for x in range(0, 91, 10)], fontsize=14) xticks(fontsize=14) # Provide tick lines across the plot to help your viewers trace along # the axis ticks. Make sure that the lines are light and small so they # don't obscure the primary data lines. for y in range(10, 91, 10): plot(range(1968, 2012), [y] * len(range(1968, 2012)), "--", lw=0.5, color="black", alpha=0.3) # Remove the tick marks; they are unnecessary with the tick lines we just plotted. plt.tick_params(axis="both", which="both", bottom="off", top="off", labelbottom="on", left="off", right="off", labelleft="on") # Now that the plot is prepared, it's time to actually plot the data! # Note that I plotted the majors in order of the highest % in the final year. majors = ['Health Professions', 'Public Administration', 'Education', 'Psychology', 'Foreign Languages', 'English', 'Communications\nand Journalism', 'Art and Performance', 'Biology', 'Agriculture', 'Social Sciences and History', 'Business', 'Math and Statistics', 'Architecture', 'Physical Sciences', 'Computer Science', 'Engineering'] for rank, column in enumerate(majors): # Plot each line separately with its own color, using the Tableau 20 # color set in order. plot(gender_degree_data.Year.values, gender_degree_data[column.replace("\n", " ")].values, lw=2.5, color=tableau20[rank]) # Add a text label to the right end of every line. Most of the code below # is adding specific offsets y position because some labels overlapped. y_pos = gender_degree_data[column.replace("\n", " ")].values[-1] - 0.5 if column == "Foreign Languages": y_pos += 0.5 elif column == "English": y_pos -= 0.5 elif column == "Communications\nand Journalism": y_pos += 0.75 elif column == "Art and Performance": y_pos -= 0.25 elif column == "Agriculture": y_pos += 1.25 elif column == "Social Sciences and History": y_pos += 0.25 elif column == "Business": y_pos -= 0.75 elif column == "Math and Statistics": y_pos += 0.75 elif column == "Architecture": y_pos -= 0.75 elif column == "Computer Science": y_pos += 0.75 elif column == "Engineering": y_pos -= 0.25 # Again, make sure that all labels are large enough to be easily read # by the viewer. text(2011.5, y_pos, column, fontsize=14, color=tableau20[rank]) # matplotlib's title() call centers the title on the plot, but not the graph, # so I used the text() call to customize where the title goes. # Make the title big enough so it spans the entire plot, but don't make it # so big that it requires two lines to show. # Note that if the title is descriptive enough, it is unnecessary to include # axis labels; they are self-evident, in this plot's case. text(1995, 93, "Percentage of Bachelor's degrees conferred to women in the U.S.A." ", by major (1970-2012)", fontsize=17, ha="center") # Always include your data source(s) and copyright notice! And for your # data sources, tell your viewers exactly where the data came from, # preferably with a direct link to the data. Just telling your viewers # that you used data from the "U.S. Census Bureau" is completely useless: # the U.S. Census Bureau provides all kinds of data, so how are your # viewers supposed to know which data set you used? text(1966, -8, "Data source: nces.ed.gov/programs/digest/2013menu_tables.asp" "\nAuthor: Randy Olson (randalolson.com / @randal_olson)" "\nNote: Some majors are missing because the historical data " "is not available for them", fontsize=10) # Finally, save the figure as a PNG. # You can also save it as a PDF, JPEG, etc. # Just change the file extension in this call. # bbox_inches="tight" removes all the extra whitespace on the edges of your plot. #savefig("percent-bachelors-degrees-women-usa.png", bbox_inches="tight"); # (!) Grab figure object and link it to variable (must be in same cell as figure) dataviz1 = gcf() py.iplot_mpl(dataviz1, resize=False, filename='dataviz1', width=960, height=1120) dataviz1_plotly = tls.mpl_to_plotly(dataviz1) print dataviz1_plotly.to_string() # show plotly figure object in notebook # List of all annotation texts, show it in notebook annos_text = [anno['text'] for anno in dataviz1_plotly['layout']['annotations']] annos_text # List all majors in dataset, show it in notebook majors = annos_text[:-2] majors # (1) Adjust margins (use our web GUI to easier find the appropriate values) dataviz1_plotly['layout']['margin'].update( l=50, # left margin in pixels r=160, # right " " " b=100, # bottom " " " t=100 # top " " " ) # (2) Add title (appears in figure's URL, nice for sharing), remove title annotation dataviz1_plotly['layout'].update( title=annos_text[-2], titlefont=dict(size=20) # increase font size ) dataviz1_plotly['layout']['annotations'][-2].update(text=' ') # (3) Remove tick lines dataviz1_plotly['layout']['xaxis1'].update(ticks='') dataviz1_plotly['layout']['yaxis1'].update(ticks='') # (4) Add hover label to data trace, remove hover label from grid traces N_traces = len(dataviz1_plotly['data']) N_majors = len(majors) update_name = [{'name': ' '} for i in range(N_traces)] update_name[N_traces-N_majors:] = [{'name': major} for major in majors] dataviz1_plotly['data'].update(update_name) # (5) Make every y coordinate show when hovering over a given x coordinate dataviz1_plotly['layout'].update(hovermode='x') py.iplot(dataviz1_plotly, filename='dataviz1_updated', width=960, height=1120) from IPython.display import display, HTML import urllib2 url = 'https://raw.githubusercontent.com/plotly/python-user-guide/master/custom.css' display(HTML(urllib2.urlopen(url).read()))