import pandas as pd

df = pd.read_csv('police_inct/police_inct.csv')
df

# Columns of our data frame
df.columns

# First entry
df.ix[0]

crime = df[['DC_DIST', 'DISPATCH_DATE_TIME', 'LOCATION_BLOCK', 'UCR_GENERAL', 'OBJECTID', 'TEXT_GENERAL_CODE']]
crime

# Count the different types of crime
crime.TEXT_GENERAL_CODE.value_counts()

# Homicide - Criminal is listed twice because of a trailing space
# Clean leading and trailing whitespace.
crime.TEXT_GENERAL_CODE = crime.TEXT_GENERAL_CODE.map(lambda x: x.strip())

crime.TEXT_GENERAL_CODE.value_counts()

# Create a new DataFrame for the 22nd district
dist_22 = crime[crime.DC_DIST == 22]

# Look at the number of distinct crimes in District 22
dist_22['TEXT_GENERAL_CODE'].value_counts()

# Create a cross tabulation of crime type across all districts
crime_counts = pd.crosstab(crime.DC_DIST, crime.TEXT_GENERAL_CODE)
crime_counts

# Normalize types of crime for each district 
crime_pct = crime_counts.div(crime_counts.sum(1).astype(float), axis=0)

# Sort by thefts column. This creates a view and does not change the original DataFrame
crime_pct.sort('Thefts')

# Plot normalize crime vs district
colors=['r', 'g', 'b', 'c', 'y', 'w', 'm', 'k', 'burlywood','navy', 'teal', 'LightSteelBlue', 'Honeydew', 'Goldenrod']
p = crime_pct.plot(kind='bar', stacked=True, color=colors)
p.legend(loc=0, bbox_to_anchor=(1,1))

def code_rename(code):
    """" Lazy consolidattion of crime codes"""
    lower_code = code.lower()
    new_codes = ['Assault', 'Burglary', 'Homicide', 'Vehicle', 'Robbery', 'Theft']
    for new_code in new_codes:
        if new_code.lower() in lower_code:
            return new_code
    return code

# Consolidate crimes
crime.TEXT_GENERAL_CODE = crime.TEXT_GENERAL_CODE.map(code_rename)

crime.TEXT_GENERAL_CODE.unique()

simple_crime_count = pd.crosstab(crime.DC_DIST, crime.TEXT_GENERAL_CODE)

# Normalize crime types
crime_pct = simple_crime_count.div(simple_crime_count.sum(1).astype(float), axis=0)
crime_pct

plot = crime_pct.plot(kind='bar', stacked=True, color=['r', 'g', 'b', 'c', 'y', 'k', 'm', 'w'])
plot.legend(loc=0, bbox_to_anchor=(1,1))

df.TEXT_GENERAL_CODE = df.TEXT_GENERAL_CODE.map(code_rename)

ct_date = pd.crosstab(df['DISPATCH_DATE'],df['TEXT_GENERAL_CODE'])
ct_date

# Set index as a DateTime format
ct_date.index = pd.to_datetime(ct_date.index)

# Date filtering
ct_date.ix['2012-01'].sum()

year_2012 = ct_date.ix['2012']

# Resample data to monthly periods
year_2012.resample('M', how='mean', kind='period').plot()
legend(loc=0, bbox_to_anchor=(1,1))

# Plot of all years available
ct_date.resample('M', how='mean', kind='period').plot()
legend(loc=0, bbox_to_anchor=(1,1))