import numpy as np
import pandas as pd
figsize(15, 5)
df = pd.read_csv('crunchbase.csv')
df.head(5)
permalink | name | homepage_url | category_code | funding_total_usd | status | country_code | state_code | region | city | funding_rounds | founded_at | founded_month | founded_quarter | founded_year | first_funding_at | last_funding_at | last_milestone_at | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | /company/canal-do-credito | Canal do Credito | http://www.canaldocredito.com.br | finance | 750000 | operating | BRA | NaN | Belo Horizonte | Belo Horizonte | 1 | NaN | NaN | NaN | NaN | 1/1/10 | 1/1/10 | NaN |
1 | /company/waywire | #waywire | http://www.waywire.com | news | 1750000 | acquired | USA | NY | New York | New York | 1 | 6/1/12 | 2012-06 | 2012-Q2 | 2012 | 6/30/12 | 6/30/12 | 10/17/13 |
2 | /company/tv-communications | &TV Communications | http://enjoyandtv.com | games_video | 4000000 | operating | USA | CA | Los Angeles | Los Angeles | 2 | NaN | NaN | NaN | NaN | 6/4/10 | 9/23/10 | NaN |
3 | /company/in-touch-network | (In)Touch Network | http://www.InTouchNetwork.com | ecommerce | 1500000 | operating | GBR | NaN | London | London | 1 | 4/1/11 | 2011-04 | 2011-Q2 | 2011 | 4/1/11 | 4/1/11 | 11/15/13 |
4 | /company/n-plusn | #NAME? | http://plusn.com | software | 600000 | operating | USA | NY | New York | New York | 1 | 1/1/12 | 2012-01 | 2012-Q1 | 2012 | 8/29/12 | 8/29/12 | NaN |
df = df.dropna(subset=['founded_year', 'category_code'])
df = df[ (df['founded_year'] >= 2000) & (df['founded_year'] <= 2013) ]
df = df[ df['region'] != 'unknown' ]
df['region'].value_counts().head(10).plot(kind='bar')
<matplotlib.axes.AxesSubplot at 0x10d49ecd0>
df.groupby('region')['name'].count().order(ascending=False).head(5)
region SF Bay 4247 New York 1642 Boston 1047 Los Angeles 966 London 857 dtype: int64
num_companies = df.groupby('founded_year')['name'].count()
total_funding = df.groupby('founded_year')['funding_total_usd'].sum()
pd.DataFrame({ 'num_companies' : num_companies, 'total_funding' : total_funding }).plot(secondary_y='total_funding')
<matplotlib.axes.AxesSubplot at 0x1086d2990>
sf_funding = df[df['region'] == 'SF Bay'].groupby('founded_year')['funding_total_usd'].sum()
bos_funding = df[df['region'] == 'Boston'].groupby('founded_year')['funding_total_usd'].sum()
ny_funding = df[df['region'] == 'New York'].groupby('founded_year')['funding_total_usd'].sum()
(sf_funding / total_funding).plot()
(bos_funding / total_funding).plot()
(ny_funding / total_funding).plot()
<matplotlib.axes.AxesSubplot at 0x10b6eb310>