from blaze import Data, by, join, transform bank = Data([[1, 'Alice', 100], [2, 'Bob', -200], [3, 'Charlie', 300], [4, 'Dennis', 400], [5, 'Edith', -500]], columns=['id', 'name', 'amount']) bank.amount bank.amount / 100 (bank.amount / 100).mean() bank[['name', 'amount']].sort('amount') bank[bank.amount < 0] bank[bank.amount < 0].amount / 100 bank[bank.amount < 0].name # What are the IDs of everyone with a positive amount? # What is the name of the person with amount 400? # What is the difference between the minimum and maximum amounts? iris = Data('data/iris.csv') iris iris.dshape iris.species.distinct() iris.species.nunique() iris.sepal_length.nunique() transform(iris, sepal_ratio=iris.sepal_length / iris.sepal_width, petal_ratio=iris.petal_length / iris.petal_width) by(iris.species, count=iris.species.count()) by(iris.species, count=iris.species.count(), longest_petal=iris.petal_length.max()) # What are the longest and shortest sepal_lengths per species? # What is the difference of longest to shortest sepal length per species import pandas as pd df = pd.read_csv('data/iris.csv') df.groupby(df.species).sepal_length.min()