#!/usr/bin/env python
# coding: utf-8

# # How to leverage the entire PyData Stack

# # A quick poll ...

# ### Who uses pandas?

# ### Who uses numpy?

# ## Blaze - A Quick Tour
# 
# Blaze provides a lightweight interface on top of pre-existing computational infrastructure.  This notebook gives a quick overview of how Blaze interacts with a variety of data types.

# In[37]:


get_ipython().run_line_magic('reload_ext', 'autotime')

from blaze import Data, by, compute


# ### Blaze wraps pre-existing data
# 
# Blaze interacts with normal Python objects.  Operations on Blaze `Data` objects create expression trees.  
# 
# These expressions deliver an intuitive numpy/pandas-like feel.

# ### Lists
# 
# Starting small, Blaze interacts happily with collections of data.  
# 
# It uses Pandas for pretty notebook printing.

# In[38]:


x = Data([1, 2, 3, 4, 5])
x


# In[5]:


x[x > 2] * 10


# In[42]:


x.dshape


# ## Or Tabular, Pandas-like datasets
# 
# Slightly more exciting, Blaze operates on tabular data

# In[9]:


L = [[1, 'Alice',   100],
     [2, 'Bob',    -200],
     [3, 'Charlie', 300],
     [4, 'Dennis',  400],
     [5, 'Edith',  -500]]


# In[10]:


x = Data(L, fields=['id', 'name', 'amount'])


# In[43]:


x.amount.mean()


# In[12]:


x.dshape


# ### Here's `x` again

# In[13]:


x


# In[14]:


deadbeats = x[x.amount < 0].name
deadbeats


# ## Or it can even just drive pandas

# Blaze doesn't do work, it just tells other systems to do work.
# 
# In the previous example, Blaze told Python which for-loops to write.  In this example, it calls the right functions in Pandas.  
# 
# The user experience is mostly identical, only performance differs.

# In[15]:


from pandas import DataFrame

df = DataFrame([[1, 'Alice',   100],                         
                [2, 'Bob',    -200],
                [3, 'Charlie', 300],
                [4, 'Denis',   400],
                [5, 'Edith',  -500]], columns=['id', 'name', 'amount'])


# In[16]:


df


# In[17]:


x = Data(df)
x


# In[18]:


deadbeats = x[x.amount < 0].name
deadbeats


# ### Outputs are Blaze expressions

# In[19]:


type(deadbeats)


# ### `compute` turns Blaze expressions into something concrete

# In[20]:


compute(deadbeats)


# In[21]:


type(compute(deadbeats))


# ### Blaze also works with other data types like SQLAlchemy `Table`s

# Blaze extends beyond just Python and Pandas (that's the main motivation.)  
# 
# Here it drives SQLAlchemy.

# In[22]:


from sqlalchemy import Table, Column, MetaData, Integer, String, create_engine

tab = Table('bank', MetaData(),
            Column('id', Integer),
            Column('name', String),
            Column('amount', Integer))


# In[23]:


x = Data(tab)
x.dshape


# Just like computations on pandas objects produce pandas objects, computations on SQLAlchemy tables produce SQLAlchemy Select statements.  

# In[24]:


deadbeats = x[x.amount < 0].name
compute(deadbeats)


# In[25]:


print(compute(deadbeats))  # SQLAlchemy generates SQL


# ### Let's connect to a real database
# 
# When we drive a SQLAlchemy table connected to a database we get actual computation.

# In[26]:


engine = create_engine('sqlite:///../blaze/blaze/examples/data/iris.db')


# In[28]:


x = Data(engine)
x


# In[29]:


x.fields


# In[30]:


x.iris.sepal_length.mean()


# In[31]:


by(
    x.iris.species,
    shortest=x.iris.sepal_length.min(),
    longest=x.iris.sepal_length.max()
)


# In[32]:


print(compute(_))


# ### Use URI strings to ease access
# 
# Often just figuring out how to produce the relevant Python object can be a challenge.
# 
# Blaze supports many formats of URI strings

# In[33]:


x = Data('sqlite:///../blaze/blaze/examples/data/iris.db::iris')


# In[34]:


x