# special IPython command to prepare the notebook for matplotlib %matplotlib inline import pandas as pd import matplotlib.pyplot as plt pd.options.display.mpl_style = 'default' url = 'https://raw.githubusercontent.com/cs109/2014_data/master/diamonds.csv' diamonds = pd.read_csv(url, sep = ',', index_col=0) diamonds.head() diamonds.describe() diamonds['price'].hist(bins=50, color = 'black') plt.title('Distribution of Price') plt.xlabel('Price') # Try changing bin size from 20 to 500. What do you notice? diamonds['carat'].hist(bins=20, color = 'black', figsize=(6, 4)) plt.title('Distribution of weights in carats') plt.xlim(0, 3) plt.xlabel('Weight in Carats') diamonds['price'].plot(kind='kde', color = 'black') plt.title('Distribution of Price') diamonds.plot(x='carat', y='price', kind = 'scatter', color = 'black', alpha = 1) plt.scatter(diamonds['carat'], diamonds['price'], color = 'black', alpha = 0.05) plt.xlabel('Carat') plt.ylabel('Price') diamonds.groupby('color').plot(x='carat', y='price', kind = 'scatter', color = 'black', alpha = 1) # try here diamonds.boxplot('price', by = 'color') diamonds.mean() diamonds.corr() # correlation diamonds.var() # variance diamonds.sort('price', ascending = True, inplace = False).head() # sorting subtable = diamonds.iloc[0:2, 0:2] print "subtable" print subtable print "" column = diamonds['color'] print "head of the color column" print column.head() print "" row = diamonds.ix[1:2] #row 1 and 2 print "row" print row print "" rows = diamonds.ix[:3] # all the rows before 3 print "rows" print rows print "" color = diamonds.ix[1,'color'] print "color of diamond in row 1" print color print "" # max along column print "max price %g" % diamonds['price'].max() print "" # axes print "axes" print diamonds.axes print "" row = diamonds.ix[1] print "row info" print row.name print row.index print "" def squared(x): """ Return the square of a value """ return x ** 2 squared(4) f = lambda x: x**2 f(4) for i in range(4): print 'Hello world!' prefixes = 'JKLMNOPQ' suffix = 'ack' for letter in prefixes: print letter + suffix def countdown(n): while n > 0: print n n = n-1 print 'Blastoff!' countdown(3) a = [5, 10, 15, 20] b = [i**2 for i in a] c = [i**2 for i in a if i != 10] print "a: ", a print "b: ", b print "c: ", c