import numpy as np import pandas as pd figsize(15, 5) df = pd.read_csv('crunchbase.csv') df.head(5) df = df.dropna(subset=['founded_year', 'category_code']) df = df[ (df['founded_year'] >= 2000) & (df['founded_year'] <= 2013) ] df = df[ df['region'] != 'unknown' ] df['region'].value_counts().head(10).plot(kind='bar') df.groupby('region')['name'].count().order(ascending=False).head(5) num_companies = df.groupby('founded_year')['name'].count() total_funding = df.groupby('founded_year')['funding_total_usd'].sum() pd.DataFrame({ 'num_companies' : num_companies, 'total_funding' : total_funding }).plot(secondary_y='total_funding') sf_funding = df[df['region'] == 'SF Bay'].groupby('founded_year')['funding_total_usd'].sum() bos_funding = df[df['region'] == 'Boston'].groupby('founded_year')['funding_total_usd'].sum() ny_funding = df[df['region'] == 'New York'].groupby('founded_year')['funding_total_usd'].sum() (sf_funding / total_funding).plot() (bos_funding / total_funding).plot() (ny_funding / total_funding).plot()