#create DataFrame
import pandas as pd
import numpy as np
from pandas import DataFrame, Series
df = DataFrame(
{'integer':[1,2,3,6,7,23,8,3],
'float':[2,3.4,5,6,2,4.7,4,8],
'string':['saya',None,'aku','cinta','kamu','a','b','jika']}
)
#show data in DataFrame
df
float | integer | string | |
---|---|---|---|
0 | 2.0 | 1 | saya |
1 | 3.4 | 2 | None |
2 | 5.0 | 3 | aku |
3 | 6.0 | 6 | cinta |
4 | 2.0 | 7 | kamu |
5 | 4.7 | 23 | a |
6 | 4.0 | 8 | b |
7 | 8.0 | 3 | jika |
#Show data based on columns selected by index
df.ix[:,['string','float']]
string | float | |
---|---|---|
0 | saya | 2.0 |
1 | None | 3.4 |
2 | aku | 5.0 |
3 | cinta | 6.0 |
4 | kamu | 2.0 |
5 | a | 4.7 |
6 | b | 4.0 |
7 | jika | 8.0 |
#show data based columns selected
df[['string','float']]
string | float | |
---|---|---|
0 | saya | 2.0 |
1 | None | 3.4 |
2 | aku | 5.0 |
3 | cinta | 6.0 |
4 | kamu | 2.0 |
5 | a | 4.7 |
6 | b | 4.0 |
7 | jika | 8.0 |
#show data with condition
df[df['float']>4]
float | integer | string | |
---|---|---|---|
2 | 5.0 | 3 | aku |
3 | 6.0 | 6 | cinta |
5 | 4.7 | 23 | a |
7 | 8.0 | 3 | jika |
#rename columns in DataFrame
df2 = df.rename(columns={'string':'characters'})
#Show DataFrame after rename column
df2
float | integer | characters | |
---|---|---|---|
0 | 2.0 | 1 | saya |
1 | 3.4 | 2 | None |
2 | 5.0 | 3 | aku |
3 | 6.0 | 6 | cinta |
4 | 2.0 | 7 | kamu |
5 | 4.7 | 23 | a |
6 | 4.0 | 8 | b |
7 | 8.0 | 3 | jika |
#Drop NA value in DataFrame
df2.dropna()
float | integer | characters | |
---|---|---|---|
0 | 2.0 | 1 | saya |
2 | 5.0 | 3 | aku |
3 | 6.0 | 6 | cinta |
4 | 2.0 | 7 | kamu |
5 | 4.7 | 23 | a |
6 | 4.0 | 8 | b |
7 | 8.0 | 3 | jika |
#Show mean, median, and maximum column in Data Frame
mean = df2['float'].mean()
print "mean %f" % mean
median = df2['float'].median()
print "median %f" %median
max = df2['float'].max()
print "max %f" % max
mean 4.387500 median 4.350000 max 8.000000