#!/usr/bin/env python # coding: utf-8 # # How to leverage the entire PyData Stack # # A quick poll ... # ### Who uses pandas? # ### Who uses numpy? # ## Blaze - A Quick Tour # # Blaze provides a lightweight interface on top of pre-existing computational infrastructure. This notebook gives a quick overview of how Blaze interacts with a variety of data types. # In[37]: get_ipython().run_line_magic('reload_ext', 'autotime') from blaze import Data, by, compute # ### Blaze wraps pre-existing data # # Blaze interacts with normal Python objects. Operations on Blaze `Data` objects create expression trees. # # These expressions deliver an intuitive numpy/pandas-like feel. # ### Lists # # Starting small, Blaze interacts happily with collections of data. # # It uses Pandas for pretty notebook printing. # In[38]: x = Data([1, 2, 3, 4, 5]) x # In[5]: x[x > 2] * 10 # In[42]: x.dshape # ## Or Tabular, Pandas-like datasets # # Slightly more exciting, Blaze operates on tabular data # In[9]: L = [[1, 'Alice', 100], [2, 'Bob', -200], [3, 'Charlie', 300], [4, 'Dennis', 400], [5, 'Edith', -500]] # In[10]: x = Data(L, fields=['id', 'name', 'amount']) # In[43]: x.amount.mean() # In[12]: x.dshape # ### Here's `x` again # In[13]: x # In[14]: deadbeats = x[x.amount < 0].name deadbeats # ## Or it can even just drive pandas # Blaze doesn't do work, it just tells other systems to do work. # # In the previous example, Blaze told Python which for-loops to write. In this example, it calls the right functions in Pandas. # # The user experience is mostly identical, only performance differs. # In[15]: from pandas import DataFrame df = DataFrame([[1, 'Alice', 100], [2, 'Bob', -200], [3, 'Charlie', 300], [4, 'Denis', 400], [5, 'Edith', -500]], columns=['id', 'name', 'amount']) # In[16]: df # In[17]: x = Data(df) x # In[18]: deadbeats = x[x.amount < 0].name deadbeats # ### Outputs are Blaze expressions # In[19]: type(deadbeats) # ### `compute` turns Blaze expressions into something concrete # In[20]: compute(deadbeats) # In[21]: type(compute(deadbeats)) # ### Blaze also works with other data types like SQLAlchemy `Table`s # Blaze extends beyond just Python and Pandas (that's the main motivation.) # # Here it drives SQLAlchemy. # In[22]: from sqlalchemy import Table, Column, MetaData, Integer, String, create_engine tab = Table('bank', MetaData(), Column('id', Integer), Column('name', String), Column('amount', Integer)) # In[23]: x = Data(tab) x.dshape # Just like computations on pandas objects produce pandas objects, computations on SQLAlchemy tables produce SQLAlchemy Select statements. # In[24]: deadbeats = x[x.amount < 0].name compute(deadbeats) # In[25]: print(compute(deadbeats)) # SQLAlchemy generates SQL # ### Let's connect to a real database # # When we drive a SQLAlchemy table connected to a database we get actual computation. # In[26]: engine = create_engine('sqlite:///../blaze/blaze/examples/data/iris.db') # In[28]: x = Data(engine) x # In[29]: x.fields # In[30]: x.iris.sepal_length.mean() # In[31]: by( x.iris.species, shortest=x.iris.sepal_length.min(), longest=x.iris.sepal_length.max() ) # In[32]: print(compute(_)) # ### Use URI strings to ease access # # Often just figuring out how to produce the relevant Python object can be a challenge. # # Blaze supports many formats of URI strings # In[33]: x = Data('sqlite:///../blaze/blaze/examples/data/iris.db::iris') # In[34]: x