import pandas as pd #I am importing pandas as pd
from pandas import Series, DataFrame # Series and Data Frame are two data structures available in python

mjp= Series([5,4,3,2,1])# a simple series
print mjp        # A series is represented by index on the left and values on the right
print mjp.values # similar to dictionary. ".values" command returns values in a series 

print mjp.index # returns the index values of the series

jeeva = Series([5,4,3,2,1,-7,-29], index =['a','b','c','d','e','f','h']) # The index is specified
print jeeva # try jeeva.index and jeeva.values
print jeeva['a'] # selecting a particular value from a Series, by using index

jeeva['d'] = 9 # change the value of a particular element in series
print jeeva
jeeva[['a','b','c']] # select a group of values

print jeeva[jeeva>0] # returns only the positive values
print jeeva *2 # multiplies 2 to each element of a series

import numpy as np
np.mean(jeeva) # you can apply numpy functions to a Series

print 'b' in jeeva # checks whether the index is present in Series or not
print 'z' in jeeva

player_salary ={'Rooney': 50000, 'Messi': 75000, 'Ronaldo': 85000, 'Fabregas':40000, 'Van persie': 67000} 
new_player = Series(player_salary)# converting a dictionary to a series
print new_player # the series has keys of a dictionary

players =['Klose', 'Messi', 'Ronaldo', 'Van persie', 'Ballack'] 
player_1 =Series(player_salary, index= players)
print player_1 # I have changed the index of the Series. Since, no value was not found for Klose and Ballack, it appears as NAN

pd.isnull(player_1)#checks for Null values in player_1, pd denotes a pandas dataframe

pd.notnull(player_1)# Checks for null values that are not Null

player_1.name ='Bundesliga players' # name for the Series
player_1.index.name='Player names' #name of the index
player_1

player_1.index =['Neymar', 'Hulk', 'Pirlo', 'Buffon', 'Anderson'] # is used to alter the index of Series
player_1 

states ={'State' :['Gujarat', 'Tamil Nadu', ' Andhra', 'Karnataka', 'Kerala'],
                  'Population': [36, 44, 67,89,34],
                  'Language' :['Gujarati', 'Tamil', 'Telugu', 'Kannada', 'Malayalam']}
india = DataFrame(states) # creating a data frame
india

DataFrame(states, columns=['State', 'Language', 'Population']) # change the sequence of column index

new_farme = DataFrame(states, columns=['State', 'Language', 'Population', 'Per Capita Income'], index =['a','b','c','d','e'])
#if you pass a column that isnt in states, it will appear with Na values

print new_farme.columns
print new_farme['State'] # retrieveing data like dictionary

new_farme.Population # like Series

new_farme.ix[3] # rows can be retrieved using .ic function
# here I have retrieved 3rd row

 new_farme

new_farme['Per Capita Income'] = 99 # the empty per capita income column can be assigned a value
new_farme

new_farme['Per Capita Income'] = np.arange(5) # assigning a value to the last column
new_farme

series = Series([44,33,22], index =['b','c','d'])
new_farme['Per Capita Income'] = series
#when assigning list or arrays to a column, the values lenght should match the length of the DataFrame
new_farme # again the missing values are displayed as NAN

new_farme['Development'] = new_farme.State == 'Gujarat'# assigning a new column
print new_farme
del new_farme['Development'] # will delete the column 'Development'
new_farme

new_data ={'Modi': {2010: 72, 2012: 78, 2014 : 98},'Rahul': {2010: 55, 2012: 34, 2014: 22}}
elections = DataFrame(new_data) 
print elections# the outer dict keys are columns and inner dict keys are rows
elections.T # transpose of a data frame

DataFrame(new_data, index =[2012, 2014, 2016]) # you can assign index for the data frame

ex= {'Gujarat':elections['Modi'][:-1], 'India': elections['Rahul'][:2]}
px =DataFrame(ex)
px

from IPython.display import Image
i = Image(filename='Constructors.png')
i # list of things you can pass to a dataframe

px.index.name = 'year'
px.columns.name = 'politicians'
px

px.values

jeeva = Series([5,4,3,2,1,-7,-29], index =['a','b','c','d','e','f','h'])
index = jeeva.index
print index #u denotes unicode
print index[1:]# returns all the index elements except a. 
index[1] = 'f' # you cannot modify an index element. It will generate an error. In other words, they are immutable

print px
2013 in px.index # checks if 2003 is an index in data frame px

var = Series(['Python', 'Java', 'c', 'c++', 'Php'], index =[5,4,3,2,1])
print var
var1 = var.reindex([1,2,3,4,5])# reindex creates a new object 
print var1 

var.reindex([1,2,3,4,5,6,7])# introduces new indexes with values Nan

var.reindex([1,2,3,4,5,6,7], fill_value =1) # you can use fill value to fill the Nan values. Here I have used fill value as 1. You can use any value.

gh =Series(['Dhoni', 'Sachin', 'Kohli'], index =[0,2,4])
print gh
gh.reindex(range(6), method ='ffill') #ffill is forward fill. It forward fills the values

gh.reindex(range(6), method ='bfill')# bfill, backward fills the values

import numpy as np
fp = DataFrame(np.arange(9).reshape((3,3)),index =['a','b','c'], columns =['Gujarat','Tamil Nadu', 'Kerala'])
fp

fp1 =fp.reindex(['a', 'b', 'c', 'd'], columns = states) # reindexing columns and indices
fp1

er = Series(np.arange(5), index =['a','b','c','d','e'])
print er
er.drop(['a','b']) #drop method will return a new object  with values deleted from an axis

states ={'State' :['Gujarat', 'Tamil Nadu', ' Andhra', 'Karnataka', 'Kerala'],
                  'Population': [36, 44, 67,89,34],
                  'Language' :['Gujarati', 'Tamil', 'Telugu', 'Kannada', 'Malayalam']}
india = DataFrame(states, columns =['State', 'Population', 'Language'])
print india
india.drop([0,1])# will drop index 0 and 1

india.drop(['State', 'Population'], axis =1 )# the function dropped population and state columns. Apply the same concept with axis =0

var = Series(['Python', 'Java', 'c', 'c++', 'Php'], index =[5,4,3,2,1])
var

print var[5]
print var[2:4]

var[[3,2,1]]

var[var == 'Php']

states ={'State' :['Gujarat', 'Tamil Nadu', ' Andhra', 'Karnataka', 'Kerala'],
                  'Population': [36, 44, 67,89,34],
                  'Language' :['Gujarati', 'Tamil', 'Telugu', 'Kannada', 'Malayalam']}
india = DataFrame(states, columns =['State', 'Population', 'Language'])
india

india[['Population', 'Language']] # retrieve data from data frame

india[india['Population'] > 50] # returns data for population greater than 50

india[:3] # first three rows

# for selecting specific rows and columns, you can use ix function
import pandas as pd
states ={'State' :['Gujarat', 'Tamil Nadu', ' Andhra', 'Karnataka', 'Kerala'],
                  'Population': [36, 44, 67,89,34],
                  'Language' :['Gujarati', 'Tamil', 'Telugu', 'Kannada', 'Malayalam']}
india = DataFrame(states, columns =['State', 'Population', 'Language'], index =['a', 'b', 'c', 'd', 'e'])
india

india.ix[['a','b'], ['State','Language']] # this is how you select subset of rows