import pandas as pd from matplotlib import pyplot as plt %matplotlib inline all_quakes = pd.DataFrame.from_csv("../data/earthquake_states.csv", index_col=None, parse_dates=["time", "updated"]) us_quakes = all_quakes.dropna(subset=["state"]) state_counts = pd.DataFrame(us_quakes.state.value_counts()) state_counts.columns = ["total_number_of_earthquakes"] state_counts ax = us_quakes[us_quakes["state"] == "Oklahoma"].set_index("time")["id"].resample("A", how="count").plot() ax.set_title("Oklahoma Earthquake Count By Year") pass ax = us_quakes[us_quakes["state"] == "California"].set_index("time")["id"].resample("A", how="count").plot() ax.set_title("California Earthquake Count By Year") pass ax = us_quakes[us_quakes["state"] == "Texas"].set_index("time")["id"].resample("A", how="count").plot() ax.set_title("Texas Earthquake Count By Year") pass ax = us_quakes[us_quakes["state"] == "Ohio"].set_index("time")["id"].resample("A", how="count").plot(color="b") ax.set_title("Ohio Earthquake Count By Year") ax.set_ylim([0,5]) pass ax = us_quakes[us_quakes["state"] == "Colorado"].set_index("time")["id"].resample("A", how="count").plot() ax.set_title("Colorado Earthquake Count By Year") ax.set_ylim([0,25]) pass ax = us_quakes[us_quakes["state"] == "Tennessee"].set_index("time")["id"].resample("A", how="count").plot() ax.set_title("Tennessee Earthquake Count By Year") ax.set_ylim([0,10]) pass ax = us_quakes[us_quakes["state"] == "Kentucky"].set_index("time")["id"].resample("A", how="count").plot() ax.set_title("Kentucky Earthquake Count By Year") ax.set_ylim([0,10]) pass ax = us_quakes[us_quakes["state"] == "Kansas"].set_index("time")["id"].resample("A", how="count").plot() ax.set_title("Kansas Earthquake Count By Year") pass ax = us_quakes[us_quakes["state"] == "Arkansas"].set_index("time")["id"].resample("A", how="count").plot() ax.set_title("Arkansas Earthquake Count By Year") pass def quake_percentage_change(state): by_year = pd.DataFrame(us_quakes[us_quakes["state"] == state].set_index("time")["id"].resample("AS", how="count")) by_year["start"] = by_year.index by_year["year"] = by_year["start"].apply(lambda x: x.year) decade_05_14 = by_year[(by_year["year"] >= 2005) & (by_year["year"] <= 2014)] total_05_14 = decade_05_14["id"].sum() decade_95_04 = by_year[(by_year["year"] >= 1995) & (by_year["year"] <= 2004)] total_95_04 = decade_95_04["id"].sum() if total_95_04 != 0: pct = round(100.0 * (total_05_14 - total_95_04) / total_95_04, 2) else: pct = None return pct, total_05_14, total_95_04 state_counts["name"] = state_counts.index state_counts["percentage_change"], state_counts["total_05-14"], state_counts["total_95-04"] =\ zip(*state_counts["name"].apply(lambda x: quake_percentage_change(x))) round(100.0 * (state_counts["total_05-14"].sum() - state_counts["total_95-04"].sum()) / state_counts["total_95-04"].sum(), 2) state_counts[state_counts["total_95-04"] >= 5].sort("percentage_change", ascending=False)