# !pip install conda
# !conda install numpy pandas matplotlib ipython-notebook
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
#!pip install seaborn
import seaborn as sns
sns.set_palette("deep", desat=.6)
sns.set_context(rc={"figure.figsize": (11, 6)})
#sns.set(font='Liberation Sans')
import matplotlib as mpl
mpl.rcParams['font.sans-serif'].insert(0, 'Arial')
mpl.rcParams['font.sans-serif'].insert(0, 'Liberation Sans')
mpl.rcParams['font.family'] = 'sans-serif'
df = pd.read_csv('./data.csv')
status_labels = {-1: 'No Interest', 0: 'Limited Interest', 1:'Interest' }
df['status_label'] = df['status'].apply(status_labels.get)
df
company | headquartered | status | status_label | |
---|---|---|---|---|
0 | Amplify | Brooklyn, NY | 1 | Interest |
1 | Bank of America | NYC/Toronto | 0 | Limited Interest |
2 | Cars.com | San Franciso, CA; Boston, MA | 1 | Interest |
3 | CashStar | Portland, ME | 0 | Limited Interest |
4 | Continuum Analytics | Austin, TX | 1 | Interest |
5 | Coursera | Mountain View, CA | -1 | No Interest |
6 | Cox Media | Atlanta, GA | 1 | Interest |
7 | DataPad | San Francisco, CA | 0 | Limited Interest |
8 | Demonware | Vancouver, Canada | 0 | Limited Interest |
9 | Disney Animation Studio | Burbank, CA | -1 | No Interest |
10 | Divio | Zurich, Switzerland | 1 | Interest |
11 | Dow Jones | New York, NY | 1 | Interest |
12 | EMC | NaN | 1 | Interest |
13 | ESRI | Redlands, CA | 0 | Limited Interest |
14 | Enovance | Montreal, QC, CA; Paris, France | 1 | Interest |
15 | Enthought | Austin, TX | 0 | Limited Interest |
16 | EventBrite | San Francisco, CA | -1 | No Interest |
17 | Excella | Washington, DC | -1 | No Interest |
18 | Menlo Park, CA | 1 | Interest | |
19 | FusionBox | Denver, CO | -1 | No Interest |
20 | Google (Nest) | Palo Alto, CA | -1 | No Interest |
21 | Guidebook | San Francisco, CA | 0 | Limited Interest |
22 | HP | NaN | 0 | Limited Interest |
23 | HearSaySocial | San Francisco, CA | -1 | No Interest |
24 | LaunchKey | NaN | NaN | None |
25 | LexMachina | Menlo Park, CA | 1 | Interest |
26 | Lightspeed | Montreal, QC, Canada | -1 | No Interest |
27 | Mountain View, CA | 0 | Limited Interest | |
28 | Mozilla | Mountain View, CA | 1 | Interest |
29 | New Relic | Portland, OR | -1 | No Interest |
30 | PDTPartners | New York, NY | -1 | No Interest |
31 | Prism Skylabs | San Francisco, CA | -1 | No Interest |
32 | Rackspace | San Antonio, TX | 1 | Interest |
33 | Red Hat | Raleigh, NC | 1 | Interest |
34 | SauceLabs | San Francisco, CA | 1 | Interest |
35 | Savoir-Faire Linux | Montreal, QC, CA | -1 | No Interest |
36 | Scraping Hub | NaN | 1 | Interest |
37 | Simple Energy | Boulder, CO | -1 | No Interest |
38 | Sourceforge | San Francisco, CA | 0 | Limited Interest |
39 | Stripe | San Francisco, CA | 1 | Interest |
40 | TNDTechnologies | NaN | NaN | None |
41 | Thumbtack | San Francisco, CA | -1 | No Interest |
42 | Twilio | San Francisco, CA | -1 | No Interest |
43 | San Francisco, CA | 0 | Limited Interest | |
44 | Vigilant Global | Montreal, QC, Canada | -1 | No Interest |
45 | Wargaming.net | Austin, TX | 0 | Limited Interest |
46 | iWeb | Montreal, QC, Canada | -1 | No Interest |
47 rows × 4 columns
df.count()
company 47 headquartered 42 status 45 status_label 45 dtype: int64
df['status'].describe()
count 45.000000 mean -0.022222 std 0.865734 min -1.000000 25% -1.000000 50% 0.000000 75% 1.000000 max 1.000000 Name: status, dtype: float64
df_crosstab = pd.crosstab(df['headquartered'], df['status_label'], colnames=['Interest in hiring remote developers'])
df_crosstab_margins = pd.crosstab(df['headquartered'], df['status_label'], margins=True, colnames=['Interest in hiring remote developers'])
df_crosstab_margins
Interest in hiring remote developers | Interest | Limited Interest | No Interest | All |
---|---|---|---|---|
headquartered | ||||
Atlanta, GA | 1 | 0 | 0 | 1 |
Austin, TX | 1 | 2 | 0 | 3 |
Boulder, CO | 0 | 0 | 1 | 1 |
Brooklyn, NY | 1 | 0 | 0 | 1 |
Burbank, CA | 0 | 0 | 1 | 1 |
Denver, CO | 0 | 0 | 1 | 1 |
Menlo Park, CA | 2 | 0 | 0 | 2 |
Montreal, QC, CA | 0 | 0 | 1 | 1 |
Montreal, QC, CA; Paris, France | 1 | 0 | 0 | 1 |
Montreal, QC, Canada | 0 | 0 | 3 | 3 |
Mountain View, CA | 1 | 1 | 1 | 3 |
NYC/Toronto | 0 | 1 | 0 | 1 |
New York, NY | 1 | 0 | 1 | 2 |
Palo Alto, CA | 0 | 0 | 1 | 1 |
Portland, ME | 0 | 1 | 0 | 1 |
Portland, OR | 0 | 0 | 1 | 1 |
Raleigh, NC | 1 | 0 | 0 | 1 |
Redlands, CA | 0 | 1 | 0 | 1 |
San Antonio, TX | 1 | 0 | 0 | 1 |
San Francisco, CA | 2 | 4 | 5 | 11 |
San Franciso, CA; Boston, MA | 1 | 0 | 0 | 1 |
Vancouver, Canada | 0 | 1 | 0 | 1 |
Washington, DC | 0 | 0 | 1 | 1 |
Zurich, Switzerland | 1 | 0 | 0 | 1 |
All | 16 | 12 | 17 | 47 |
25 rows × 4 columns
crosstab_plot = df_crosstab.plot(kind='bar', stacked=True, title="PyCon 2014 Job Fair Survey")
plt.savefig(r'pycon_2014_job_fair_survey_crosstab_stacked_bar_chart.png', bbox_inches='tight')