import pandas as pd pd.options.display.max_columns = 5200 pd.options.display.max_rows = 5200 inFluCDC = '/Users/danielmsheehan/GitHub/d3-presentation/data/flu/cdc/StateData.csv' #http://www.cdc.gov/flu/weekly/fluactivitysurv.htm inFluGoo = '/Users/danielmsheehan/GitHub/d3-presentation/data/flu/google/data_20150210.txt' inFluGoo = 'https://www.google.org/flutrends/us/data.txt' inImmCal = '/Users/danielmsheehan/GitHub/d3-presentation/data/immunization/schools/ca/2014-2015 CA Seventh Grade Data.xls' dfFluCDC = pd.read_csv(inFluCDC) dfFluGoo = pd.read_csv(inFluGoo, header=11) inImmCal = pd.io.excel.read_excel(inImmCal, 'SA1415', header=3) dfFluCDC.head(2) inImmCal.head(1) #Keep around for Dr. R. dfFluGoo.head(10) #Split into States, HHS Regions and Cities and Rtoate dataframe 90 with Transpose statesList = ['Alabama','Alaska','Arizona','Arkansas','California','Colorado','Connecticut','Delaware','District of Columbia','Florida','Georgia','Hawaii','Idaho','Illinois','Indiana','Iowa','Kansas','Kentucky','Louisiana','Maine','Maryland','Massachusetts','Michigan','Minnesota','Mississippi','Missouri','Montana','Nebraska','Nevada','New Hampshire','New Jersey','New Mexico','New York','North Carolina','North Dakota','Ohio','Oklahoma','Oregon','Pennsylvania','Rhode Island','South Carolina','South Dakota','Tennessee','Texas','Utah','Vermont','Virginia','Washington','West Virginia','Wisconsin','Wyoming'] hhsList = ['HHS Region 1 (CT, ME, MA, NH, RI, VT)','HHS Region 2 (NJ, NY)','HHS Region 3 (DE, DC, MD, PA, VA, WV)','HHS Region 4 (AL, FL, GA, KY, MS, NC, SC, TN)','HHS Region 5 (IL, IN, MI, MN, OH, WI)','HHS Region 6 (AR, LA, NM, OK, TX)','HHS Region 7 (IA, KS, MO, NE)','HHS Region 8 (CO, MT, ND, SD, UT, WY)','HHS Region 9 (AZ, CA, HI, NV)','HHS Region 10 (AK, ID, OR, WA)'] citiesList = ['Anchorage, AK','Birmingham, AL','Little Rock, AR','Mesa, AZ','Phoenix, AZ','Scottsdale, AZ','Tempe, AZ','Tucson, AZ','Berkeley, CA','Fresno, CA','Irvine, CA','Los Angeles, CA','Oakland, CA','Sacramento, CA','San Diego, CA','San Francisco, CA','San Jose, CA','Santa Clara, CA','Sunnyvale, CA','Colorado Springs, CO','Denver, CO','Washington, DC','Gainesville, FL','Jacksonville, FL','Miami, FL','Orlando, FL','Tampa, FL','Atlanta, GA','Roswell, GA','Honolulu, HI','Des Moines, IA','Boise, ID','Chicago, IL','Indianapolis, IN','Wichita, KS','Lexington, KY','Baton Rouge, LA','New Orleans, LA','Boston, MA','Somerville, MA','Baltimore, MD','Grand Rapids, MI','St Paul, MN','Kansas City, MO','Springfield, MO','St Louis, MO','Jackson, MS','Cary, NC','Charlotte, NC','Durham, NC','Greensboro, NC','Raleigh, NC','Lincoln, NE','Omaha, NE','Newark, NJ','Albuquerque, NM','Las Vegas, NV','Reno, NV','Albany, NY','Buffalo, NY','New York, NY','Rochester, NY','Cleveland, OH','Columbus, OH','Dayton, OH','Oklahoma City, OK','Tulsa, OK','Beaverton, OR','Eugene, OR','Portland, OR','Philadelphia, PA','Pittsburgh, PA','State College, PA','Providence, RI','Columbia, SC','Greenville, SC','Knoxville, TN','Memphis, TN','Nashville, TN','Austin, TX','Dallas, TX','Ft Worth, TX','Houston, TX','Irving, TX','Lubbock, TX','Plano, TX','San Antonio, TX','Salt Lake City, UT','Arlington, VA','Norfolk, VA','Reston, VA','Richmond, VA','Bellevue, WA','Seattle, WA','Spokane, WA','Madison, WI','Milwaukee, WI'] dfS = dfFluGoo[['Date']+statesList] dfS = dfS.T dfH = dfFluGoo[['Date']+hhsList] dfH = dfH.T dfC = dfFluGoo[['Date']+citiesList] dfC = dfC.T StatesFlu = '/Users/danielmsheehan/GitHub/d3-presentation/data/flu/google/states_flu_data.csv' HHSFlu = '/Users/danielmsheehan/GitHub/d3-presentation/data/flu/google/hhs_flu_data.csv' CitiesFlu = '/Users/danielmsheehan/GitHub/d3-presentation/data/flu/google/cities_flu_data.csv' dfS.to_csv(StatesFlu, index=True, header = False) dfH.to_csv(HHSFlu, index=True, header = False) dfC.to_csv(CitiesFlu, index=True, header = False) dfS = pd.read_csv(StatesFlu) dfC = pd.read_csv(CitiesFlu) dfS = dfS.rename(columns=lambda x: x.replace('Date', 'state')) #dfH = dfH.rename(columns=lambda x: x.replace('Date', 'State')) dfC = dfC.rename(columns=lambda x: x.replace('Date', 'City')) #print dfS.head(10) #print dfC.head(10) dfS.to_csv(StatesFlu, index=False) dfC.to_csv(CitiesFlu, index=False) #Download Shapefile import urllib zipLoc = "/Users/danielmsheehan/GitHub/d3-presentation/data/census/states/cb_2013_us_state_20m.zip" urllib.urlretrieve ("http://www2.census.gov/geo/tiger/GENZ2013/cb_2013_us_state_20m.zip", zipLoc) import zipfile zip = zipfile.ZipFile(zipLoc) zip.extractall("/Users/danielmsheehan/GitHub/d3-presentation/data/census/states/") #Need to get US State Population Dataset to create a Flu Rate. dfPop = pd.io.json.read_json('http://api.census.gov/data/2010/sf1?key=30699f15ab4d04a1e0943715b539d256c9a3ee44&get=P0010001&for=state') #dfPop = dfPop.header(1) dfPop = dfPop.ix[1:] dfPop.columns = ['pop', 'fips'] print dfPop.head(5) dfFIPS = pd.read_csv('/Users/danielmsheehan/GitHub/data/census/state_fips.csv', dtype={'fips':object}) print dfFIPS.head(5) df = dfS.merge(dfFIPS, how='left', on='state') df = df.merge(dfPop, how='left', on='fips') df.head(10) df.to_csv('/Users/danielmsheehan/GitHub/d3-presentation/data/flu/google/states_pop_week.csv', index=False) #print list(df.columns.values) df = pd.read_csv('/Users/danielmsheehan/GitHub/d3-presentation/data/flu/google/states_pop_week.csv', dtype={'fips':object}) #df['pop'] = df['pop'].astype(float) #print df.types for i in weekList: df['rate'+i] = df[i]/df['pop'] * 10000 df = df.drop(weekList, axis=1) df.head(53) df.to_csv('/Users/danielmsheehan/GitHub/d3-presentation/data/flu/google/states_pop_week_rate_state.csv', index=False) #Get a list of County FIPS dfCounties = pd.read_csv('http://www2.census.gov/geo/docs/reference/codes/files/national_county.txt', header=None, dtype={1:object,2:object}) dfCounties.columns = ['state_abbrev', 'state_fips','county_fips','fullname','note'] dfCounties['fips'] = dfCounties['state_fips'] dfCounties.head(5) dfAll = dfCounties.merge(df, on='fips', how='left') dfAll['id'] = dfAll.state_fips.map(str) + dfAll.county_fips.map(str) dfAll['id'] = dfAll['id'].astype(int) dfAll.head(10) dfAll.to_csv('/Users/danielmsheehan/GitHub/d3-presentation/data/flu/google/states_pop_week_rates_county.csv', index=False) weekList = ['2003-09-28','2003-10-05','2003-10-12','2003-10-19','2003-10-26','2003-11-02','2003-11-09','2003-11-16','2003-11-23','2003-11-30','2003-12-07','2003-12-14','2003-12-21','2003-12-28','2004-01-04','2004-01-11','2004-01-18','2004-01-25','2004-02-01','2004-02-08','2004-02-15','2004-02-22','2004-02-29','2004-03-07','2004-03-14','2004-03-21','2004-03-28','2004-04-04','2004-04-11','2004-04-18','2004-04-25','2004-05-02','2004-05-09','2004-05-16','2004-05-23','2004-05-30','2004-06-06','2004-06-13','2004-06-20','2004-06-27','2004-07-04','2004-07-11','2004-07-18','2004-07-25','2004-08-01','2004-08-08','2004-08-15','2004-08-22','2004-08-29','2004-09-05','2004-09-12','2004-09-19','2004-09-26','2004-10-03','2004-10-10','2004-10-17','2004-10-24','2004-10-31','2004-11-07','2004-11-14','2004-11-21','2004-11-28','2004-12-05','2004-12-12','2004-12-19','2004-12-26','2005-01-02','2005-01-09','2005-01-16','2005-01-23','2005-01-30','2005-02-06','2005-02-13','2005-02-20','2005-02-27','2005-03-06','2005-03-13','2005-03-20','2005-03-27','2005-04-03','2005-04-10','2005-04-17','2005-04-24','2005-05-01','2005-05-08','2005-05-15','2005-05-22','2005-05-29','2005-06-05','2005-06-12','2005-06-19','2005-06-26','2005-07-03','2005-07-10','2005-07-17','2005-07-24','2005-07-31','2005-08-07','2005-08-14','2005-08-21','2005-08-28','2005-09-04','2005-09-11','2005-09-18','2005-09-25','2005-10-02','2005-10-09','2005-10-16','2005-10-23','2005-10-30','2005-11-06','2005-11-13','2005-11-20','2005-11-27','2005-12-04','2005-12-11','2005-12-18','2005-12-25','2006-01-01','2006-01-08','2006-01-15','2006-01-22','2006-01-29','2006-02-05','2006-02-12','2006-02-19','2006-02-26','2006-03-05','2006-03-12','2006-03-19','2006-03-26','2006-04-02','2006-04-09','2006-04-16','2006-04-23','2006-04-30','2006-05-07','2006-05-14','2006-05-21','2006-05-28','2006-06-04','2006-06-11','2006-06-18','2006-06-25','2006-07-02','2006-07-09','2006-07-16','2006-07-23','2006-07-30','2006-08-06','2006-08-13','2006-08-20','2006-08-27','2006-09-03','2006-09-10','2006-09-17','2006-09-24','2006-10-01','2006-10-08','2006-10-15','2006-10-22','2006-10-29','2006-11-05','2006-11-12','2006-11-19','2006-11-26','2006-12-03','2006-12-10','2006-12-17','2006-12-24','2006-12-31','2007-01-07','2007-01-14','2007-01-21','2007-01-28','2007-02-04','2007-02-11','2007-02-18','2007-02-25','2007-03-04','2007-03-11','2007-03-18','2007-03-25','2007-04-01','2007-04-08','2007-04-15','2007-04-22','2007-04-29','2007-05-06','2007-05-13','2007-05-20','2007-05-27','2007-06-03','2007-06-10','2007-06-17','2007-06-24','2007-07-01','2007-07-08','2007-07-15','2007-07-22','2007-07-29','2007-08-05','2007-08-12','2007-08-19','2007-08-26','2007-09-02','2007-09-09','2007-09-16','2007-09-23','2007-09-30','2007-10-07','2007-10-14','2007-10-21','2007-10-28','2007-11-04','2007-11-11','2007-11-18','2007-11-25','2007-12-02','2007-12-09','2007-12-16','2007-12-23','2007-12-30','2008-01-06','2008-01-13','2008-01-20','2008-01-27','2008-02-03','2008-02-10','2008-02-17','2008-02-24','2008-03-02','2008-03-09','2008-03-16','2008-03-23','2008-03-30','2008-04-06','2008-04-13','2008-04-20','2008-04-27','2008-05-04','2008-05-11','2008-05-18','2008-05-25','2008-06-01','2008-06-08','2008-06-15','2008-06-22','2008-06-29','2008-07-06','2008-07-13','2008-07-20','2008-07-27','2008-08-03','2008-08-10','2008-08-17','2008-08-24','2008-08-31','2008-09-07','2008-09-14','2008-09-21','2008-09-28','2008-10-05','2008-10-12','2008-10-19','2008-10-26','2008-11-02','2008-11-09','2008-11-16','2008-11-23','2008-11-30','2008-12-07','2008-12-14','2008-12-21','2008-12-28','2009-01-04','2009-01-11','2009-01-18','2009-01-25','2009-02-01','2009-02-08','2009-02-15','2009-02-22','2009-03-01','2009-03-08','2009-03-15','2009-03-22','2009-03-29','2009-04-05','2009-04-12','2009-04-19','2009-04-26','2009-05-03','2009-05-10','2009-05-17','2009-05-24','2009-05-31','2009-06-07','2009-06-14','2009-06-21','2009-06-28','2009-07-05','2009-07-12','2009-07-19','2009-07-26','2009-08-02','2009-08-09','2009-08-16','2009-08-23','2009-08-30','2009-09-06','2009-09-13','2009-09-20','2009-09-27','2009-10-04','2009-10-11','2009-10-18','2009-10-25','2009-11-01','2009-11-08','2009-11-15','2009-11-22','2009-11-29','2009-12-06','2009-12-13','2009-12-20','2009-12-27','2010-01-03','2010-01-10','2010-01-17','2010-01-24','2010-01-31','2010-02-07','2010-02-14','2010-02-21','2010-02-28','2010-03-07','2010-03-14','2010-03-21','2010-03-28','2010-04-04','2010-04-11','2010-04-18','2010-04-25','2010-05-02','2010-05-09','2010-05-16','2010-05-23','2010-05-30','2010-06-06','2010-06-13','2010-06-20','2010-06-27','2010-07-04','2010-07-11','2010-07-18','2010-07-25','2010-08-01','2010-08-08','2010-08-15','2010-08-22','2010-08-29','2010-09-05','2010-09-12','2010-09-19','2010-09-26','2010-10-03','2010-10-10','2010-10-17','2010-10-24','2010-10-31','2010-11-07','2010-11-14','2010-11-21','2010-11-28','2010-12-05','2010-12-12','2010-12-19','2010-12-26','2011-01-02','2011-01-09','2011-01-16','2011-01-23','2011-01-30','2011-02-06','2011-02-13','2011-02-20','2011-02-27','2011-03-06','2011-03-13','2011-03-20','2011-03-27','2011-04-03','2011-04-10','2011-04-17','2011-04-24','2011-05-01','2011-05-08','2011-05-15','2011-05-22','2011-05-29','2011-06-05','2011-06-12','2011-06-19','2011-06-26','2011-07-03','2011-07-10','2011-07-17','2011-07-24','2011-07-31','2011-08-07','2011-08-14','2011-08-21','2011-08-28','2011-09-04','2011-09-11','2011-09-18','2011-09-25','2011-10-02','2011-10-09','2011-10-16','2011-10-23','2011-10-30','2011-11-06','2011-11-13','2011-11-20','2011-11-27','2011-12-04','2011-12-11','2011-12-18','2011-12-25','2012-01-01','2012-01-08','2012-01-15','2012-01-22','2012-01-29','2012-02-05','2012-02-12','2012-02-19','2012-02-26','2012-03-04','2012-03-11','2012-03-18','2012-03-25','2012-04-01','2012-04-08','2012-04-15','2012-04-22','2012-04-29','2012-05-06','2012-05-13','2012-05-20','2012-05-27','2012-06-03','2012-06-10','2012-06-17','2012-06-24','2012-07-01','2012-07-08','2012-07-15','2012-07-22','2012-07-29','2012-08-05','2012-08-12','2012-08-19','2012-08-26','2012-09-02','2012-09-09','2012-09-16','2012-09-23','2012-09-30','2012-10-07','2012-10-14','2012-10-21','2012-10-28','2012-11-04','2012-11-11','2012-11-18','2012-11-25','2012-12-02','2012-12-09','2012-12-16','2012-12-23','2012-12-30','2013-01-06','2013-01-13','2013-01-20','2013-01-27','2013-02-03','2013-02-10','2013-02-17','2013-02-24','2013-03-03','2013-03-10','2013-03-17','2013-03-24','2013-03-31','2013-04-07','2013-04-14','2013-04-21','2013-04-28','2013-05-05','2013-05-12','2013-05-19','2013-05-26','2013-06-02','2013-06-09','2013-06-16','2013-06-23','2013-06-30','2013-07-07','2013-07-14','2013-07-21','2013-07-28','2013-08-04','2013-08-11','2013-08-18','2013-08-25','2013-09-01','2013-09-08','2013-09-15','2013-09-22','2013-09-29','2013-10-06','2013-10-13','2013-10-20','2013-10-27','2013-11-03','2013-11-10','2013-11-17','2013-11-24','2013-12-01','2013-12-08','2013-12-15','2013-12-22','2013-12-29','2014-01-05','2014-01-12','2014-01-19','2014-01-26','2014-02-02','2014-02-09','2014-02-16','2014-02-23','2014-03-02','2014-03-09','2014-03-16','2014-03-23','2014-03-30','2014-04-06','2014-04-13','2014-04-20','2014-04-27','2014-05-04','2014-05-11','2014-05-18','2014-05-25','2014-06-01','2014-06-08','2014-06-15','2014-06-22','2014-06-29','2014-07-06','2014-07-13','2014-07-20','2014-07-27','2014-08-03','2014-08-10','2014-08-17','2014-08-24','2014-08-31','2014-09-07','2014-09-14','2014-09-21','2014-09-28','2014-10-05','2014-10-12','2014-10-19','2014-10-26','2014-11-02','2014-11-09','2014-11-16','2014-11-23','2014-11-30','2014-12-07','2014-12-14','2014-12-21','2014-12-28','2015-01-04','2015-01-11','2015-01-18','2015-01-25','2015-02-01','2015-02-08','2015-02-15','2015-02-22','2015-03-01'] for i in weekList: #print i dfSamp = dfAll[['id','rate'+i]] dfSamp.columns = ['id', 'rate'] dfSamp.to_csv('/Users/danielmsheehan/GitHub/stat4701-edav-d3.github.com/viz/choropleth/pages/data/rate-'+i+'.tsv', index=False, sep='\t')