cd C:\Users\tk\Desktop

%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import brewer2mpl
from matplotlib import rcParams

#colorbrewer2 Dark2 qualitative color table
dark2_cmap = brewer2mpl.get_map('Dark2', 'Qualitative', 7)
dark2_colors = dark2_cmap.mpl_colors

rcParams['figure.figsize'] = (10, 6)
rcParams['figure.dpi'] = 150
rcParams['axes.color_cycle'] = dark2_colors
rcParams['lines.linewidth'] = 2
rcParams['axes.facecolor'] = 'white'
rcParams['font.size'] = 14
rcParams['patch.edgecolor'] = 'white'
rcParams['patch.facecolor'] = dark2_colors[0]
rcParams['font.family'] = 'StixGeneral'


def remove_border(axes=None, top=False, right=False, left=True, bottom=True):
    """
    Minimize chartjunk by stripping out unnecesasry plot borders and axis ticks
    
    The top/right/left/bottom keywords toggle whether the corresponding plot border is drawn
    """
    ax = axes or plt.gca()
    ax.spines['top'].set_visible(top)
    ax.spines['right'].set_visible(right)
    ax.spines['left'].set_visible(left)
    ax.spines['bottom'].set_visible(bottom)
    
    #turn off all ticks
    ax.yaxis.set_ticks_position('none')
    ax.xaxis.set_ticks_position('none')
    
    #now re-enable visibles
    if top:
        ax.xaxis.tick_top()
    if bottom:
        ax.xaxis.tick_bottom()
    if left:
        ax.yaxis.tick_left()
    if right:
        ax.yaxis.tick_right()

pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)

olive_oil = pd.read_csv('olive.csv') 
olive_oil.head(5)

olive_oil.shape

olive_oil.rename(columns = {olive_oil.columns[0]:'area_Idili'}, inplace = True) 
olive_oil.head(5)

pd.DataFrame(olive_oil.columns)

unique_in_region = olive_oil.region.unique() # We will find how many unique entries are there in region column.
unique_in_area = olive_oil.area.unique()
print unique_in_region
print unique_in_area

pd.crosstab(olive_oil.area, olive_oil.region) 

olive_oil.head(5)

olive_oil.area_Idili = olive_oil.area_Idili.map(lambda x: x.split('.')[-1]) 
olive_oil.head()

# How the split function works 
x = '1.northapulia'
y = x.split('.')
print y
z = x.split('.')[-1] #-1 returns the last element of the list
z

olive_oil[['palmitic', 'palmitoleic']].head(5) # you can access subset of columns of a data frame. (http://bit.ly/1sPHf1u)

olive_oil['palmitic']

print " the type of olive_oil[['palmitic']]: \t", type(olive_oil[['palmitic']])
print " the type of olive_oil['palmitic']: \t", type(olive_oil['palmitic'])


olive_oil.palmitic # this is a convienient way to access a specific column

list_of_acids =['palmitic', 'palmitoleic', 'stearic', 'oleic', 'linoleic', 'linolenic', 'arachidic', 'eicosenoic']
df = olive_oil[list_of_acids].apply(lambda x: x/100.0)
df.head(5)

olive_oil[list_of_acids] =df # we are replacing the acid list values in olive_oil
olive_oil.head(5)

plt.hist(olive_oil.palmitic)

fig, axes=plt.subplots(figsize=(10,10), nrows=2, ncols=2)
axes[0][0].plot(olive_oil.palmitic, olive_oil.linolenic)
axes[0][1].plot(olive_oil.palmitic, olive_oil.linolenic, '.')
axes[1][0].scatter(olive_oil.palmitic, olive_oil.linolenic)
axes[1][1].hist(olive_oil.palmitic)
fig.tight_layout()

region_groupby = olive_oil.groupby('region')
grp_reg=region_groupby.describe()
grp_reg.head(20)

olstd = olive_oil.groupby('region').std()
olstd

olmean=region_groupby.aggregate(np.mean)
olmean.head()

renamedict_std={k:k+"_std" for k in list_of_acids}
renamedict_mean={k:k+"_mean" for k in list_of_acids}
olstd.rename(columns=renamedict_std,inplace=True)
olmean.rename(columns=renamedict_mean,inplace=True) 
olstd.head()

olpalmiticmean = olmean[['palmitic_mean']] 
olpalmiticstd = olstd[['palmitic_std']] 
newolbyregion=olpalmiticmean.join(olpalmiticstd)
newolbyregion

eico=(olive_oil.eicosenoic < 0.05)
eico

new_data = pd.DataFrame({'Bigdata' : [12, 34, 99, 45, 13], \
'Examiner' : [0.9, 0.8, 0.7, 0.6, None], 'Data science' \
: ['L', 'M', None, 'c', 'a']})
new_data

new_data.dropna()

data = pd.DataFrame([1., None, 3.5, None, 7])
data

mean = data.mean()
data.fillna(mean)