#!/usr/bin/env python
# coding: utf-8

# # scatter demonstration
# - This notebook is part of sequana, which was originally part of biokit 
#  - https://github.com/biokit/biokit
#  - https://pypi.python.org/pypi/biokit
# 
# - scatter plot with histogram: scatter_hist
# 

# In[1]:


get_ipython().run_line_magic('pylab', 'inline')
from sequana.viz import ScatterHist
import pandas as pd


# # Scatter plot with histogram

# In[2]:


# input can be a 2-column matrix or a dataframe with 2 columns
X = pylab.randn(1000)
Y = pylab.randn(1000)


# In[3]:


df = pd.DataFrame({'X':X, 'Y':Y})


# In[4]:


sh = ScatterHist(df)
sh.plot()


# In[5]:


# you can tune the scatter plot and histogram with valid optional 
# arguments expected by the pylab functions. Check the pylab.hist 
# and pylab.scatter helps for details.
_ = sh.plot(kargs_scatter={'c':'r', 's':30, 'alpha':.3},
                   kargs_histy={'color':'g', 'bins':20})


# In[8]:


# you can fix the size and color of the markers in the scatter plot as
# shown above with the 'c' and 's' parameters.
# However, if you use a dataframe, you can populate it with a 'size'
# and 'color' columsn, which will be interpreted automatically.
df['color'] = [abs(int(1-(x**2+y**2)))/10. for x,y in zip(df.iloc[:,0], df.iloc[:,1])]
df['size'] = [int(x**2*10+y**2*10) for x,y in zip(df.iloc[:,0], df.iloc[:,1])]


# In[9]:


# you can also tune the position and size of the different axes 
# in the figure 
# W fixes the width of scatter (and therefore top hist)
# hist_position='left' moves the right histo to the left
dummy = sh.plot(kargs_scatter={'c':df['color'], 's': df['size']},
                   kargs_histy={'color':'g'}, 
                   axisbg='lightgrey',
                   width=.6,
                   hist_position='left')


# In[ ]: