import pandas as pd
import numpy as np
%pylab inline
Populating the interactive namespace from numpy and matplotlib
figsize(7, 4)
df = pd.read_csv('titanic.csv')
df.head(5)
PassengerId | Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22 | 1 | 0 | A/5 21171 | 7.2500 | NaN | S |
1 | 2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
2 | 3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
3 | 4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35 | 1 | 0 | 113803 | 53.1000 | C123 | S |
4 | 5 | 0 | 3 | Allen, Mr. William Henry | male | 35 | 0 | 0 | 373450 | 8.0500 | NaN | S |
df['Survived']
0 0 1 1 2 1 3 1 4 0 5 0 6 0 7 0 8 1 9 1 10 1 11 1 12 0 13 0 14 0 ... 876 0 877 0 878 0 879 1 880 1 881 0 882 0 883 0 884 0 885 0 886 0 887 1 888 0 889 1 890 0 Name: Survived, Length: 891, dtype: int64
len(df)
891
df['Sex'].value_counts()
male 577 female 314 dtype: int64
df['Survived'].head(10)
0 0 1 1 2 1 3 1 4 0 5 0 6 0 7 0 8 1 9 1 Name: Survived, dtype: int64
df['Sex'].value_counts().plot(kind='bar')
<matplotlib.axes.AxesSubplot at 0x10cca2050>
df_survived = df[ df['Survived'] == 1 ]
df_survived.head(5)
PassengerId | Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Th... | female | 38 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |
2 | 3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26 | 0 | 0 | STON/O2. 3101282 | 7.9250 | NaN | S |
3 | 4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35 | 1 | 0 | 113803 | 53.1000 | C123 | S |
8 | 9 | 1 | 3 | Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) | female | 27 | 0 | 2 | 347742 | 11.1333 | NaN | S |
9 | 10 | 1 | 2 | Nasser, Mrs. Nicholas (Adele Achem) | female | 14 | 1 | 0 | 237736 | 30.0708 | NaN | C |
df0 = df[ df['Age'] < 10 ]
len([1, 4, 5, 6, 6])
5
(df_survived['Pclass'].value_counts() / df['Pclass'].value_counts().astype(float)).plot(kind='bar')
<matplotlib.axes.AxesSubplot at 0x10cca9450>
(df_survived['Sex'].value_counts() / df['Sex'].value_counts().astype(float)).plot(kind='bar')
<matplotlib.axes.AxesSubplot at 0x10ce8fd10>
df['Age'].value_counts().plot(kind='bar')
<matplotlib.axes.AxesSubplot at 0x10ce46f10>
df['Age'].hist(bins=30)
<matplotlib.axes.AxesSubplot at 0x10d61a410>
df['Age'].dropna().plot(kind='kde')
df_survived['Age'].dropna().plot(kind='kde')
<matplotlib.axes.AxesSubplot at 0x10d698750>
df[ df['Sex'] == 'male']['Age'].dropna().plot(kind='kde')
df_survived[df_survived['Sex'] == 'male']['Age'].dropna().plot(kind='kde')
<matplotlib.axes.AxesSubplot at 0x10dc09890>