Import Pandas and set IPython Notebook display settings.
import pandas as pd
pd.options.display.max_columns = 5200
pd.options.display.max_rows = 5200
wk = "/Users/danielmsheehan/Desktop/" #Define our workspace
#Download Shapefile
import urllib
zipLoc = wk+"cb_2013_us_state_20m.zip"
fileURL = "http://www2.census.gov/geo/tiger/GENZ2013/cb_2013_us_state_20m.zip"
urllib.urlretrieve (fileURL, zipLoc) #get that file
import zipfile
zip = zipfile.ZipFile(zipLoc)
zip.extractall(wk) #unzip!
And now the date table associated with the shapefile (in the .dbf). Note: data or columns in GIS files is often referred to as an 'attribute table.'
Using GDAL/OGR convert the Shapefile to JSON.
ogr2ogr -f GeoJSON /Users/danielmsheehan/GitHub/d3-presentation/data/census/states/states.json /Users/danielmsheehan/GitHub/d3-presentation/data/census/states/cb_2013_us_state_20m.shp
Then, after installing node.js, we can create TopoJson
topojson /Users/danielmsheehan/GitHub/d3-presentation/data/census/states/states.topo.json /Users/danielmsheehan/GitHub/d3-presentation/data/census/states/states.json
More simply: topojson states_cln.topo.json states.json
inFluGoo = 'https://www.google.org/flutrends/us/data.txt' #The online Google Flu Trends .txt
dfFluGoo = pd.read_csv(inFluGoo, header=11) #Let's read the Google data into a dataframe
dfFluGoo.head(3) #Let's see the data
Date | United States | Alabama | Alaska | Arizona | Arkansas | California | Colorado | Connecticut | Delaware | District of Columbia | Florida | Georgia | Hawaii | Idaho | Illinois | Indiana | Iowa | Kansas | Kentucky | Louisiana | Maine | Maryland | Massachusetts | Michigan | Minnesota | Mississippi | Missouri | Montana | Nebraska | Nevada | New Hampshire | New Jersey | New Mexico | New York | North Carolina | North Dakota | Ohio | Oklahoma | Oregon | Pennsylvania | Rhode Island | South Carolina | South Dakota | Tennessee | Texas | Utah | Vermont | Virginia | Washington | West Virginia | Wisconsin | Wyoming | HHS Region 1 (CT, ME, MA, NH, RI, VT) | HHS Region 2 (NJ, NY) | HHS Region 3 (DE, DC, MD, PA, VA, WV) | HHS Region 4 (AL, FL, GA, KY, MS, NC, SC, TN) | HHS Region 5 (IL, IN, MI, MN, OH, WI) | HHS Region 6 (AR, LA, NM, OK, TX) | HHS Region 7 (IA, KS, MO, NE) | HHS Region 8 (CO, MT, ND, SD, UT, WY) | HHS Region 9 (AZ, CA, HI, NV) | HHS Region 10 (AK, ID, OR, WA) | Anchorage, AK | Birmingham, AL | Little Rock, AR | Mesa, AZ | Phoenix, AZ | Scottsdale, AZ | Tempe, AZ | Tucson, AZ | Berkeley, CA | Fresno, CA | Irvine, CA | Los Angeles, CA | Oakland, CA | Sacramento, CA | San Diego, CA | San Francisco, CA | San Jose, CA | Santa Clara, CA | Sunnyvale, CA | Colorado Springs, CO | Denver, CO | Washington, DC | Gainesville, FL | Jacksonville, FL | Miami, FL | Orlando, FL | Tampa, FL | Atlanta, GA | Roswell, GA | Honolulu, HI | Des Moines, IA | Boise, ID | Chicago, IL | Indianapolis, IN | Wichita, KS | Lexington, KY | Baton Rouge, LA | New Orleans, LA | Boston, MA | Somerville, MA | Baltimore, MD | Grand Rapids, MI | St Paul, MN | Kansas City, MO | Springfield, MO | St Louis, MO | Jackson, MS | Cary, NC | Charlotte, NC | Durham, NC | Greensboro, NC | Raleigh, NC | Lincoln, NE | Omaha, NE | Newark, NJ | Albuquerque, NM | Las Vegas, NV | Reno, NV | Albany, NY | Buffalo, NY | New York, NY | Rochester, NY | Cleveland, OH | Columbus, OH | Dayton, OH | Oklahoma City, OK | Tulsa, OK | Beaverton, OR | Eugene, OR | Portland, OR | Philadelphia, PA | Pittsburgh, PA | State College, PA | Providence, RI | Columbia, SC | Greenville, SC | Knoxville, TN | Memphis, TN | Nashville, TN | Austin, TX | Dallas, TX | Ft Worth, TX | Houston, TX | Irving, TX | Lubbock, TX | Plano, TX | San Antonio, TX | Salt Lake City, UT | Arlington, VA | Norfolk, VA | Reston, VA | Richmond, VA | Bellevue, WA | Seattle, WA | Spokane, WA | Madison, WI | Milwaukee, WI | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2003-09-28 | 902 | 477 | NaN | 606 | NaN | 929 | 233 | 223 | NaN | 927 | 587 | 514 | NaN | NaN | 677 | 544 | 303 | 272 | 420 | 1017 | NaN | 1268 | 344 | 685 | 484 | NaN | 349 | NaN | NaN | NaN | NaN | 695 | NaN | 649 | 565 | NaN | 616 | 1040 | 409 | 1186 | NaN | 462 | NaN | 551 | 1398 | NaN | NaN | 1112 | 588 | NaN | 466 | NaN | 322 | 666 | 1366 | 631 | 690 | 1385 | 385 | 266 | 878 | 624 | NaN | 407 | NaN | NaN | 757 | NaN | 585 | 598 | NaN | NaN | NaN | 901 | 848 | 448 | 562 | 1003 | 731 | 990 | 602 | NaN | 235 | 1153 | NaN | NaN | 373 | 609 | 461 | 519 | NaN | 794 | NaN | NaN | 731 | 641 | NaN | NaN | NaN | 1154 | 314 | 332 | 1505 | NaN | 426 | 330 | NaN | 391 | NaN | NaN | 561 | 521 | NaN | 503 | NaN | 314 | 540 | NaN | 843 | NaN | 505 | NaN | 579 | 406 | 466 | 437 | NaN | 924 | 1034 | NaN | NaN | 444 | 1204 | 1122 | NaN | NaN | NaN | NaN | NaN | NaN | 425 | 1150 | 1200 | NaN | 1412 | 1122 | NaN | NaN | 986 | 261 | 1066 | 948 | NaN | 1035 | NaN | 668 | NaN | 622 | 452 |
1 | 2003-10-05 | 952 | 501 | NaN | 663 | NaN | 849 | 251 | 243 | NaN | 993 | 582 | 532 | NaN | NaN | 732 | 607 | 303 | 270 | 442 | 1096 | NaN | 1374 | 362 | 748 | 514 | NaN | 359 | NaN | NaN | NaN | NaN | 716 | NaN | 725 | 660 | NaN | 699 | 1065 | 409 | 1176 | NaN | 478 | NaN | 597 | 1517 | NaN | NaN | 1198 | 624 | NaN | 504 | NaN | 381 | 711 | 1335 | 652 | 775 | 1613 | 400 | 271 | 853 | 688 | NaN | 402 | NaN | NaN | 796 | NaN | 608 | 674 | NaN | NaN | NaN | 891 | 888 | 436 | 840 | 1115 | 740 | 915 | 594 | NaN | 270 | 1310 | NaN | NaN | 386 | 663 | 581 | 484 | NaN | 877 | NaN | NaN | 850 | 657 | NaN | NaN | NaN | 1162 | 323 | 375 | 1535 | NaN | 423 | 316 | NaN | 397 | NaN | NaN | 673 | 536 | NaN | 586 | NaN | 331 | 549 | NaN | 831 | NaN | 508 | NaN | 730 | 483 | 535 | 415 | NaN | 894 | 1042 | NaN | NaN | 471 | 1124 | 1193 | NaN | NaN | NaN | NaN | NaN | NaN | 468 | 1331 | 1487 | NaN | 2057 | 1208 | NaN | NaN | 989 | 249 | 1249 | 963 | NaN | 1135 | NaN | 787 | NaN | 626 | 449 |
2 | 2003-10-12 | 1092 | 492 | NaN | 700 | NaN | 1032 | 283 | 261 | NaN | 1033 | 606 | 557 | NaN | NaN | 799 | 637 | 312 | 280 | 460 | 1144 | NaN | 1445 | 372 | 791 | 588 | NaN | 381 | NaN | NaN | NaN | NaN | 815 | NaN | 739 | 861 | NaN | 729 | 1122 | 428 | 1340 | NaN | 521 | NaN | 670 | 2010 | NaN | NaN | 1343 | 777 | NaN | 538 | NaN | 410 | 819 | 1411 | 735 | 760 | 2089 | 422 | 285 | 1102 | 791 | NaN | 428 | NaN | NaN | 766 | NaN | 629 | 731 | NaN | NaN | NaN | 1165 | 839 | 468 | 938 | 1311 | 826 | 989 | 609 | NaN | 257 | 1309 | 641 | NaN | 370 | 615 | 567 | 497 | NaN | 1030 | NaN | NaN | 799 | 685 | NaN | NaN | NaN | 1274 | 369 | 447 | 1549 | NaN | 457 | 343 | NaN | 408 | NaN | NaN | 738 | 521 | NaN | 838 | NaN | 373 | 575 | 1068 | 824 | NaN | 555 | NaN | 652 | 476 | 671 | 442 | NaN | 922 | 1089 | NaN | NaN | 574 | 1249 | 1306 | NaN | NaN | NaN | NaN | NaN | NaN | 497 | 1492 | 1869 | NaN | 3770 | 1191 | NaN | NaN | 1463 | 295 | 1289 | 970 | NaN | 1170 | NaN | 994 | NaN | 661 | 437 |
#Need to get US State Population Dataset to create a Flu Rate.
dfPop = pd.io.json.read_json('http://api.census.gov/data/2010/sf1?key=30699f15ab4d04a1e0943715b539d256c9a3ee44&get=P0010001&for=state')
#dfPop = dfPop.header(1)
dfPop = dfPop.ix[1:]
dfPop.columns = ['pop', 'fips']
print dfPop.head(5)
dfFIPS = pd.read_csv('https://raw.githubusercontent.com/nygeog/data/master/census/state_fips.csv', dtype={'fips':object})
print dfFIPS.head(5)
pop fips 1 4779736 01 2 710231 02 3 6392017 04 4 2915918 05 5 37253956 06 name abbrev fips info 0 Alabama AL 01 State; counties 1 Alaska AK 02 State; boroughs 2 American Samoa AS 60 Outlying area under U.S. sovereignty 3 American Samoa * NaN 03 (FIPS 5-1 reserved code) 4 Arizona AZ 04 State; counties
df = dfS.merge(dfFIPS, how='left', on='state')
df = df.merge(dfPop, how='left', on='fips')
df.head(10)
df.to_csv('/Users/danielmsheehan/GitHub/d3-presentation/data/flu/google/states_pop_week.csv', index=False)
#print list(df.columns.values)
df = pd.read_csv('/Users/danielmsheehan/GitHub/d3-presentation/data/flu/google/states_pop_week.csv', dtype={'fips':object})
#df['pop'] = df['pop'].astype(float)
#print df.types
for i in weekList:
df['rate'+i] = df[i]/df['pop'] * 10000
df = df.drop(weekList, axis=1)
df.head(53)
df.to_csv('/Users/danielmsheehan/GitHub/d3-presentation/data/flu/google/states_pop_week_rate_state.csv', index=False)
#Get a list of County FIPS
dfCounties = pd.read_csv('http://www2.census.gov/geo/docs/reference/codes/files/national_county.txt', header=None, dtype={1:object,2:object})
dfCounties.columns = ['state_abbrev', 'state_fips','county_fips','fullname','note']
dfCounties['fips'] = dfCounties['state_fips']
dfCounties.head(5)
dfAll = dfCounties.merge(df, on='fips', how='left')
dfAll['id'] = dfAll.state_fips.map(str) + dfAll.county_fips.map(str)
dfAll['id'] = dfAll['id'].astype(int)
dfAll.head(10)
dfAll.to_csv('/Users/danielmsheehan/GitHub/d3-presentation/data/flu/google/states_pop_week_rates_county.csv', index=False)
weekList = ['2003-09-28','2003-10-05','2003-10-12','2003-10-19','2003-10-26','2003-11-02','2003-11-09','2003-11-16','2003-11-23','2003-11-30','2003-12-07','2003-12-14','2003-12-21','2003-12-28','2004-01-04','2004-01-11','2004-01-18','2004-01-25','2004-02-01','2004-02-08','2004-02-15','2004-02-22','2004-02-29','2004-03-07','2004-03-14','2004-03-21','2004-03-28','2004-04-04','2004-04-11','2004-04-18','2004-04-25','2004-05-02','2004-05-09','2004-05-16','2004-05-23','2004-05-30','2004-06-06','2004-06-13','2004-06-20','2004-06-27','2004-07-04','2004-07-11','2004-07-18','2004-07-25','2004-08-01','2004-08-08','2004-08-15','2004-08-22','2004-08-29','2004-09-05','2004-09-12','2004-09-19','2004-09-26','2004-10-03','2004-10-10','2004-10-17','2004-10-24','2004-10-31','2004-11-07','2004-11-14','2004-11-21','2004-11-28','2004-12-05','2004-12-12','2004-12-19','2004-12-26','2005-01-02','2005-01-09','2005-01-16','2005-01-23','2005-01-30','2005-02-06','2005-02-13','2005-02-20','2005-02-27','2005-03-06','2005-03-13','2005-03-20','2005-03-27','2005-04-03','2005-04-10','2005-04-17','2005-04-24','2005-05-01','2005-05-08','2005-05-15','2005-05-22','2005-05-29','2005-06-05','2005-06-12','2005-06-19','2005-06-26','2005-07-03','2005-07-10','2005-07-17','2005-07-24','2005-07-31','2005-08-07','2005-08-14','2005-08-21','2005-08-28','2005-09-04','2005-09-11','2005-09-18','2005-09-25','2005-10-02','2005-10-09','2005-10-16','2005-10-23','2005-10-30','2005-11-06','2005-11-13','2005-11-20','2005-11-27','2005-12-04','2005-12-11','2005-12-18','2005-12-25','2006-01-01','2006-01-08','2006-01-15','2006-01-22','2006-01-29','2006-02-05','2006-02-12','2006-02-19','2006-02-26','2006-03-05','2006-03-12','2006-03-19','2006-03-26','2006-04-02','2006-04-09','2006-04-16','2006-04-23','2006-04-30','2006-05-07','2006-05-14','2006-05-21','2006-05-28','2006-06-04','2006-06-11','2006-06-18','2006-06-25','2006-07-02','2006-07-09','2006-07-16','2006-07-23','2006-07-30','2006-08-06','2006-08-13','2006-08-20','2006-08-27','2006-09-03','2006-09-10','2006-09-17','2006-09-24','2006-10-01','2006-10-08','2006-10-15','2006-10-22','2006-10-29','2006-11-05','2006-11-12','2006-11-19','2006-11-26','2006-12-03','2006-12-10','2006-12-17','2006-12-24','2006-12-31','2007-01-07','2007-01-14','2007-01-21','2007-01-28','2007-02-04','2007-02-11','2007-02-18','2007-02-25','2007-03-04','2007-03-11','2007-03-18','2007-03-25','2007-04-01','2007-04-08','2007-04-15','2007-04-22','2007-04-29','2007-05-06','2007-05-13','2007-05-20','2007-05-27','2007-06-03','2007-06-10','2007-06-17','2007-06-24','2007-07-01','2007-07-08','2007-07-15','2007-07-22','2007-07-29','2007-08-05','2007-08-12','2007-08-19','2007-08-26','2007-09-02','2007-09-09','2007-09-16','2007-09-23','2007-09-30','2007-10-07','2007-10-14','2007-10-21','2007-10-28','2007-11-04','2007-11-11','2007-11-18','2007-11-25','2007-12-02','2007-12-09','2007-12-16','2007-12-23','2007-12-30','2008-01-06','2008-01-13','2008-01-20','2008-01-27','2008-02-03','2008-02-10','2008-02-17','2008-02-24','2008-03-02','2008-03-09','2008-03-16','2008-03-23','2008-03-30','2008-04-06','2008-04-13','2008-04-20','2008-04-27','2008-05-04','2008-05-11','2008-05-18','2008-05-25','2008-06-01','2008-06-08','2008-06-15','2008-06-22','2008-06-29','2008-07-06','2008-07-13','2008-07-20','2008-07-27','2008-08-03','2008-08-10','2008-08-17','2008-08-24','2008-08-31','2008-09-07','2008-09-14','2008-09-21','2008-09-28','2008-10-05','2008-10-12','2008-10-19','2008-10-26','2008-11-02','2008-11-09','2008-11-16','2008-11-23','2008-11-30','2008-12-07','2008-12-14','2008-12-21','2008-12-28','2009-01-04','2009-01-11','2009-01-18','2009-01-25','2009-02-01','2009-02-08','2009-02-15','2009-02-22','2009-03-01','2009-03-08','2009-03-15','2009-03-22','2009-03-29','2009-04-05','2009-04-12','2009-04-19','2009-04-26','2009-05-03','2009-05-10','2009-05-17','2009-05-24','2009-05-31','2009-06-07','2009-06-14','2009-06-21','2009-06-28','2009-07-05','2009-07-12','2009-07-19','2009-07-26','2009-08-02','2009-08-09','2009-08-16','2009-08-23','2009-08-30','2009-09-06','2009-09-13','2009-09-20','2009-09-27','2009-10-04','2009-10-11','2009-10-18','2009-10-25','2009-11-01','2009-11-08','2009-11-15','2009-11-22','2009-11-29','2009-12-06','2009-12-13','2009-12-20','2009-12-27','2010-01-03','2010-01-10','2010-01-17','2010-01-24','2010-01-31','2010-02-07','2010-02-14','2010-02-21','2010-02-28','2010-03-07','2010-03-14','2010-03-21','2010-03-28','2010-04-04','2010-04-11','2010-04-18','2010-04-25','2010-05-02','2010-05-09','2010-05-16','2010-05-23','2010-05-30','2010-06-06','2010-06-13','2010-06-20','2010-06-27','2010-07-04','2010-07-11','2010-07-18','2010-07-25','2010-08-01','2010-08-08','2010-08-15','2010-08-22','2010-08-29','2010-09-05','2010-09-12','2010-09-19','2010-09-26','2010-10-03','2010-10-10','2010-10-17','2010-10-24','2010-10-31','2010-11-07','2010-11-14','2010-11-21','2010-11-28','2010-12-05','2010-12-12','2010-12-19','2010-12-26','2011-01-02','2011-01-09','2011-01-16','2011-01-23','2011-01-30','2011-02-06','2011-02-13','2011-02-20','2011-02-27','2011-03-06','2011-03-13','2011-03-20','2011-03-27','2011-04-03','2011-04-10','2011-04-17','2011-04-24','2011-05-01','2011-05-08','2011-05-15','2011-05-22','2011-05-29','2011-06-05','2011-06-12','2011-06-19','2011-06-26','2011-07-03','2011-07-10','2011-07-17','2011-07-24','2011-07-31','2011-08-07','2011-08-14','2011-08-21','2011-08-28','2011-09-04','2011-09-11','2011-09-18','2011-09-25','2011-10-02','2011-10-09','2011-10-16','2011-10-23','2011-10-30','2011-11-06','2011-11-13','2011-11-20','2011-11-27','2011-12-04','2011-12-11','2011-12-18','2011-12-25','2012-01-01','2012-01-08','2012-01-15','2012-01-22','2012-01-29','2012-02-05','2012-02-12','2012-02-19','2012-02-26','2012-03-04','2012-03-11','2012-03-18','2012-03-25','2012-04-01','2012-04-08','2012-04-15','2012-04-22','2012-04-29','2012-05-06','2012-05-13','2012-05-20','2012-05-27','2012-06-03','2012-06-10','2012-06-17','2012-06-24','2012-07-01','2012-07-08','2012-07-15','2012-07-22','2012-07-29','2012-08-05','2012-08-12','2012-08-19','2012-08-26','2012-09-02','2012-09-09','2012-09-16','2012-09-23','2012-09-30','2012-10-07','2012-10-14','2012-10-21','2012-10-28','2012-11-04','2012-11-11','2012-11-18','2012-11-25','2012-12-02','2012-12-09','2012-12-16','2012-12-23','2012-12-30','2013-01-06','2013-01-13','2013-01-20','2013-01-27','2013-02-03','2013-02-10','2013-02-17','2013-02-24','2013-03-03','2013-03-10','2013-03-17','2013-03-24','2013-03-31','2013-04-07','2013-04-14','2013-04-21','2013-04-28','2013-05-05','2013-05-12','2013-05-19','2013-05-26','2013-06-02','2013-06-09','2013-06-16','2013-06-23','2013-06-30','2013-07-07','2013-07-14','2013-07-21','2013-07-28','2013-08-04','2013-08-11','2013-08-18','2013-08-25','2013-09-01','2013-09-08','2013-09-15','2013-09-22','2013-09-29','2013-10-06','2013-10-13','2013-10-20','2013-10-27','2013-11-03','2013-11-10','2013-11-17','2013-11-24','2013-12-01','2013-12-08','2013-12-15','2013-12-22','2013-12-29','2014-01-05','2014-01-12','2014-01-19','2014-01-26','2014-02-02','2014-02-09','2014-02-16','2014-02-23','2014-03-02','2014-03-09','2014-03-16','2014-03-23','2014-03-30','2014-04-06','2014-04-13','2014-04-20','2014-04-27','2014-05-04','2014-05-11','2014-05-18','2014-05-25','2014-06-01','2014-06-08','2014-06-15','2014-06-22','2014-06-29','2014-07-06','2014-07-13','2014-07-20','2014-07-27','2014-08-03','2014-08-10','2014-08-17','2014-08-24','2014-08-31','2014-09-07','2014-09-14','2014-09-21','2014-09-28','2014-10-05','2014-10-12','2014-10-19','2014-10-26','2014-11-02','2014-11-09','2014-11-16','2014-11-23','2014-11-30','2014-12-07','2014-12-14','2014-12-21','2014-12-28','2015-01-04','2015-01-11','2015-01-18','2015-01-25','2015-02-01','2015-02-08','2015-02-15','2015-02-22','2015-03-01']
for i in weekList:
#print i
dfSamp = dfAll[['id','rate'+i]]
dfSamp.columns = ['id', 'rate']
dfSamp.to_csv('/Users/danielmsheehan/GitHub/stat4701-edav-d3.github.com/viz/choropleth/pages/data/rate-'+i+'.tsv', index=False, sep='\t')