#!/usr/bin/env python # coding: utf-8 # # scatter demonstration # - This notebook is part of sequana, which was originally part of biokit # - https://github.com/biokit/biokit # - https://pypi.python.org/pypi/biokit # # - scatter plot with histogram: scatter_hist # # In[1]: get_ipython().run_line_magic('pylab', 'inline') from sequana.viz import ScatterHist import pandas as pd # # Scatter plot with histogram # In[2]: # input can be a 2-column matrix or a dataframe with 2 columns X = pylab.randn(1000) Y = pylab.randn(1000) # In[3]: df = pd.DataFrame({'X':X, 'Y':Y}) # In[4]: sh = ScatterHist(df) sh.plot() # In[5]: # you can tune the scatter plot and histogram with valid optional # arguments expected by the pylab functions. Check the pylab.hist # and pylab.scatter helps for details. _ = sh.plot(kargs_scatter={'c':'r', 's':30, 'alpha':.3}, kargs_histy={'color':'g', 'bins':20}) # In[8]: # you can fix the size and color of the markers in the scatter plot as # shown above with the 'c' and 's' parameters. # However, if you use a dataframe, you can populate it with a 'size' # and 'color' columsn, which will be interpreted automatically. df['color'] = [abs(int(1-(x**2+y**2)))/10. for x,y in zip(df.iloc[:,0], df.iloc[:,1])] df['size'] = [int(x**2*10+y**2*10) for x,y in zip(df.iloc[:,0], df.iloc[:,1])] # In[9]: # you can also tune the position and size of the different axes # in the figure # W fixes the width of scatter (and therefore top hist) # hist_position='left' moves the right histo to the left dummy = sh.plot(kargs_scatter={'c':df['color'], 's': df['size']}, kargs_histy={'color':'g'}, axisbg='lightgrey', width=.6, hist_position='left') # In[ ]: