#!/usr/bin/env python # coding: utf-8 # # `df.style` # # You can apply **conditional formatting**, the visual styling of a DataFrame # depending on the data within, by using the ``DataFrame.style`` property. # This is a property that returns a ``Styler`` object, which has # useful methods for formatting and displaying DataFrames. # # The styling is accomplished using CSS. # You write functions that take `DataFrame`s or `Series`, and return *like-indexed* # DataFrames or Series with CSS `"attribute: value"` pairs for the values. # You can build up your styles incrementally using method chains, before rending. # ## Initialization # # The main class you'll work with is `pd.Styler`. # You can create this class by either # # - accessing it with the `DataFrame.style` property # - passing in a DataFrame, `pd.Styler(dataframe)` # # This class holds a reference to your dataframe in the `.data` attribute. # ## Building Styles # # Pass your style functions into one of the following methods: # # - `Styler.applymap`: elementwise # - `Styler.apply`: column/row-wise # - `Styler.tee`: tablewise # # Each of those methods take a function (and some other keyword arguments) and apply your function to the DataFrame in a certain way. `applymap` works through the DataFrame elementwise, `apply` passes each column or row into your DataFrame one-at-a-time, and `.tee` pass in the entire DataFrame at once. # Let's create a DataFrame to work with. # In[11]: import pandas as pd import numpy as np np.random.seed(24) df = pd.DataFrame({'A': np.linspace(1, 10, 10)}) df = pd.concat([df, pd.DataFrame(np.random.randn(10, 4), columns=list('BCDE'))], axis=1) df.iloc[0, 2] = np.nan # Here's a (boring) example: # In[12]: df.style # *Note*: The `DataFrame.style` is a propetry that returns a `Styler` object. `Styler` has a `_repr_html_` method defined on it so they are rendered automatically. If you want the actual HTML back for further processing or for writing to file call the `.render()` method which returns a string. # # The above output looks very similar to the standard DataFrame HTML representation. But we've done some work behind the scenes to attach CSS classes to each cell. We can view these by calling the `.render` method. # In[13]: df.style.render().split('\n')[:10] # The `row0_col0` is the identifier for that particular cell. We've also prepended each row/column identifier with a UUID unique each DataFrame so that the style from one doesn't collied with the styling from another within the same notebook / page (you can set the `uuid` if you'd like to tie together the styling of two DataFrames). # Let's write a simple function that will color negative numbers red and positive numbers black. # In[14]: def color_negative_red(val): """ Takes a scalar and returns a string with the css property `'color: red'` for negative strings, black otherwise. """ color = 'red' if val < 0 else 'black' return 'color: %s' % color # In[15]: df.style.applymap(color_negative_red) # First, notice the similarity with the standard `df.applymap`, which operates on DataFrames elementwise. We want you to be able to resuse your existing knowledge of how to interact with DataFrames. `df.style.applymap` also takes a function that operates elementwise. # # Second, notice that our function returned a string containing the CSS attribute and value. This will be a common theme. # Now suppose you wanted to highlight the maximum value in each column. # We can't use `.applymap` anymore since that operated elementwise. # Instead, we'll turn to `.apply` which operates columnwise (or rowwise using the `axis` keyword). # In[16]: def highlight_max(s): ''' highlight the maximum in a Series ''' is_max = s == s.max() return pd.Series(['background-color: yellow' if v else '' for v in is_max], name=s.name) # In[18]: df.style.apply(highlight_max) # We encourage you to use method chains to build up a style piecewise, before finally rending at the end of the chain. # In[19]: df.style.\ applymap(color_negative_red).\ apply(highlight_max) # We've seen `.applymap` and `.apply`. The final method for passing in style functions is `.tee`. # # Let's rewrite our `highlight-max` to handle either Series (from `.apply`) or DataFrames (from `.tee`). We'll also allow the color to be adjustable, to demonstrate that `.tee`, `.apply`, and `.applymap` all pass along keyword arguments. # In[20]: def highlight_max(data, color='yellow'): ''' highlight the maximum in a Series or DataFrame ''' attr = 'background-color: {}'.format(color) if data.ndim == 1: # Series from .apply is_max = data == data.max() return pd.Series([attr if v else '' for v in is_max]) else: # DataFrame from .tee is_max = data == data.max().max() return pd.DataFrame(np.where(is_max, attr, ''), index=data.index, columns=data.columns) # In[21]: df.style.tee(highlight_max, color='darkorange') # You can control the precision of floats using pandas' regular `display.precision` option. # In[22]: with pd.option_context('display.precision', 2): html = (df.style .applymap(color_negative_red) .apply(highlight_max)) html # ## Finer Control # The signatures for `Styler.apply`, `Styler.applymap` and `Styler.tee` all include a `subset` keyword. # This allows you to apply styles to specific rows or columns, without having to code that logic into your `style` function. # In[23]: df.style.applymap(color_negative_red, subset=pd.IndexSlice[2:5, ['B', 'D']]) # We reuse `DataFrame.loc` internally, so `subset` can be anything that will slice the original DataFrame. Consider using `pd.IndexSlice` to make writing the slices easier. # # **N.B.** If your style function uses a `subset` keyword argument, consider wrapping your function in a `functools.partial`, partialing out that keyword. # # ```python # my_func2 = functools.partial(my_func, subset=42) # ``` # ## Builtins # Finally, we expect certain styling functions to be common enough that we've included a few "built-in" to the `Styler`, so you don't have to write them yourself. # In[24]: df.style.highlight_null(null_color='red') # In[25]: import seaborn as sns cm = sns.light_palette("green", as_cmap=True) s = df.style.color_bg_range(cmap=cm) s # In[26]: # Use .set_properties when it doesn't actually depend on the values df.style.set_properties(**{'background-color': 'black', 'color': 'lawngreen', 'border-color': 'white'}) # ## Other options # # You've seen a few methods for data-driven styling. # `Styler` also provides a few other options for styling that don't depend on the data. # # - captions # - table-wide styles # # Each of these can be specified in two ways: # # - A keyword argument to `pd.Styler` # - A keyword argument to `.render` # # ### Captions # In[27]: from pandas.core.style import Styler # In[28]: s = Styler(df, caption='Colormaps, with a caption.') s.color_bg_range(cmap=cm) # Alternativly, pass the caption in when rendering. # In[29]: from IPython.display import HTML HTML( df.style.color_bg_range(cmap=cm).render(caption="Caption, from render.") ) # The next option you have to pass in are "table styles". # These are styles that apply to the table as a whole, and don't look at the data. # Certain sytlings, including pseudo-selectors like `:hover` can only be used this way. # In[35]: def hover(hover_color="#ffff99"): return dict(selector="tr:hover", props=[("background-color", "%s" % hover_color)]) styles = [hover(), dict(selector="th", props=[("font-size", "150%"), ("text-align", "center")])] html = df.style.render(style=styles) HTML(html) # A few example styling functions. Perhaps we'll have a repo of these somewhere. # Fun stuff. # In[39]: # https://developer.mozilla.org/en-US/docs/Web/CSS/animation#Cylon_Eye # no animation yet :( def cylon(s): tpl = """ background-color: red; background-image: -webkit-linear-gradient( left, rgba( 0,0,0,0.9 ) 25%, rgba( 0,0,0,0.1 ) 50%, rgba( 0,0,0,0.9 ) 75%); background-image: -moz-linear-gradient( left, rgba( 0,0,0,0.9 ) 25%, rgba( 0,0,0,0.1 ) 50%, rgba( 0,0,0,0.9 ) 75%); background-image: -o-linear-gradient( left, rgba( 0,0,0,0.9 ) 25%, rgba( 0,0,0,0.1 ) 50%, rgba( 0,0,0,0.9 ) 75%); background-image: linear-gradient(to right, rgba( 0,0,0,0.9 ) 25%, rgba( 0,0,0,0.1 ) 50%, rgba( 0,0,0,0.9 ) 75%); color: white; height: 100%; width: 20""" return pd.Series([tpl for i in s], index=s.index, name=s.name) # In[40]: df.style.apply(cylon) # Interacts pretty well with widgets. # In[41]: from IPython.html import widgets @widgets.interact def f(h_neg=(0, 359, 1), h_pos=(0, 359), s=(0., 99.9), l=(0., 99.9)): return df.style.color_bg_range( cmap=sns.palettes.diverging_palette(h_neg=h_neg, h_pos=h_pos, s=s, l=l, as_cmap=True) ) # In[46]: from IPython.display import HTML np.random.seed(24) df = pd.DataFrame({'A': np.linspace(1, 10, 10)}) df = pd.concat([df, pd.DataFrame(np.random.randn(10, 4), columns=list('BCDE'))], axis=1) df.iloc[0, 2] = np.nan # In[56]: def panda(df): spots = [ # left ear (0, 0), (1, 0), (0, 1), # right ear (0, 10), (1, 10), (0, 9), # left eye (5, 2), (4, 3), (5, 3), # right eye (5, 7), (4, 7), (5, 8), # nose (10, 4), (10, 5), (10, 6), (11, 5), (15, 5), (16, 5), (17, 5), (17, 4), (17, 6), (17, 3), (17, 7), (16, 2), (16, 8) ] rows = [] for r in df.index: cols = [] for c in df.columns: if (r, c) in spots: cols.append("background-color: black; color: white") else: cols.append("background-color: white; color: black") rows.append(cols) return pd.DataFrame(rows) df = pd.DataFrame(np.random.randn(20, 11)) s = df.round(2).style s.update_ctx(panda(df)) s # # Subclassing # # Unlike most objects that pandas exposes, `Styler` should be designed with subclassing in mind. There's no particular reason why CSS should be used over other backend, other than it's convinient for users of Notebooks, and relatively straightforward compared to, say, $\LaTeX$. I've laid out a bit of the internal implementation here. # # As users apply styles (via `.apply`, `.applymap` and `.tee`), we modify an internal dict, `self.ctx`. This maps `(row_position, col_position)` to a list of (for CSS) `'attribute: value:` strings. Given this `ctx`, the rendering roughly consists of two steps # # - `translate` to a dictionary ready to be passed into the template `Styler.t` # - render template with the dictionary from `Styler.translate` # # This is done in `Styler.render`, but most of the heavy lifitng is done in `translate`. # # # ## Alternate templates # # We've used [Jinja](http://jinja.pocoo.org/) templates to build up the HTML. # The template is stored as a class variable on `Styler` (called `Styler.t` for now) Subclasses can override that. # # ```python # class CustomStyle(Styler): # t = Template("""...""") # ```