%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
file_name = "flying-etiquette.csv"
data = pd.read_csv(file_name)
data.head()
RespondentID | How often do you travel by plane? | Do you ever recline your seat when you fly? | How tall are you? | Do you have any children under 18? | In a row of three seats, who should get to use the two arm rests? | In a row of two seats, who should get to use the middle arm rest? | Who should have control over the window shade? | Is itrude to move to an unsold seat on a plane? | Generally speaking, is it rude to say more than a few words tothe stranger sitting next to you on a plane? | ... | Is itrude to wake a passenger up if you are trying to walk around? | In general, is itrude to bring a baby on a plane? | In general, is it rude to knowingly bring unruly children on a plane? | Have you ever used personal electronics during take off or landing in violation of a flight attendant's direction? | Have you ever smoked a cigarette in an airplane bathroom when it was against the rules? | Gender | Age | Household Income | Education | Location (Census Region) | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 3436139758 | Once a year or less | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | 3434278696 | Once a year or less | About half the time | 6'3" | Yes | The arm rests should be shared | The arm rests should be shared | Everyone in the row should have some say | No, not rude at all | No, not at all rude | ... | No, not at all rude | No, not at all rude | No, not at all rude | No | No | Male | 30-44 | NaN | Graduate degree | Pacific |
2 | 3434275578 | Once a year or less | Usually | 5'8" | No | Whoever puts their arm on the arm rest first | The arm rests should be shared | The person in the window seat should have excl... | No, not rude at all | No, not at all rude | ... | Yes, somewhat rude | Yes, somewhat rude | Yes, very rude | No | No | Male | 30-44 | $100,000 - $149,999 | Bachelor degree | Pacific |
3 | 3434268208 | Once a year or less | Always | 5'11" | No | The arm rests should be shared | The arm rests should be shared | Everyone in the row should have some say | No, not rude at all | No, not at all rude | ... | Yes, somewhat rude | Yes, somewhat rude | Yes, very rude | No | No | Male | 30-44 | $0 - $24,999 | Bachelor degree | Pacific |
4 | 3434250245 | Once a month or less | About half the time | 5'7" | No | The person in the middle seat gets both arm rests | The person in aisle | Everyone in the row should have some say | No, not rude at all | No, not at all rude | ... | Yes, somewhat rude | Yes, somewhat rude | Yes, very rude | Yes | No | Male | 30-44 | $50,000 - $99,999 | Bachelor degree | Pacific |
5 rows × 27 columns
data["Household Income"].value_counts()
45-60 275 > 60 258 30-44 254 18-29 220 dtype: int64
Now let's do some filters and crosstab stuff
gender_edu_demos = data.groupby("Location (Census Region)")
questions = gender_edu_demos.first().columns
for q in questions:
if q not in ["RespondentID","Questions"]:
df = pd.DataFrame()
for name, group in gender_edu_demos:
df[name] = group[q].value_counts(normalize=True)
df.plot(kind="barh", title=q)
plt.show()
rude_vs_freq = data.groupby("How often do you travel by plane?")["In general, is itrude to bring a baby on a plane?"]
rude_vs_freq.value_counts().plot(kind='barh')
<matplotlib.axes.AxesSubplot at 0x107eaa4d0>
create_all_graphs(data)
df = pd.DataFrame()
q = "In general, is itrude to bring a baby on a plane?"
travel_data = data.groupby("Location (Census Region)")[q]
for location, group in travel_data:
if location == "Pacific":
df['West Coast'] = group.value_counts(normalize=True)
else:
try:
df["Rest of Nation"].append(group.value_counts(normalize=True))
except KeyError:
df["Rest of Nation"] = group.value_counts(normalize=True)
df.plot(kind='barh',title=q, figsize=(10.67,5.33))
#title = "charts/" + "01-" + q.replace(" ","_") + "_results_.png"
#plt.savefig(title,format="png")
## helper functions here ##
def create_all_graphs(data):
for index, question in enumerate(data.columns):
if question not in ["RespondentID","Questions"]:
data.icol(index).value_counts(normalize=True).plot(kind='barh',title=question, figsize=(10.67,5.33) )
title = "charts/" + question.replace(" ","_") + "_results.png"
plt.savefig(title,format="png")
plt.show()