%pylab --no-import-all inline
Populating the interactive namespace from numpy and matplotlib
import numpy as np
import matplotlib.pyplot as plt
from pandas import DataFrame, Series, Index
import pandas as pd
from itertools import islice
import census
import us
import settings
The census documentation has example URLs but needs your API key to work. In this notebook, we'll use the IPython notebook HTML display mechanism to help out.
c = census.Census(key=settings.CENSUS_KEY)
# generators for the various census geographic entities of interest
def states(variables='NAME'):
geo={'for':'state:*'}
states_fips = set([state.fips for state in us.states.STATES])
# need to filter out non-states
for r in c.sf1.get(variables, geo=geo):
if r['state'] in states_fips:
yield r
def counties(variables='NAME'):
"""ask for all the states in one call"""
# tabulate a set of fips codes for the states
states_fips = set([s.fips for s in us.states.STATES])
geo={'for':'county:*',
'in':'state:*'}
for county in c.sf1.get(variables, geo=geo):
# eliminate counties whose states aren't in a state or DC
if county['state'] in states_fips:
yield county
def counties2(variables='NAME'):
"""generator for all counties"""
# since we can get all the counties in one call,
# this function is for demonstrating the use of walking through
# the states to get at the counties
for state in us.states.STATES:
geo={'for':'county:*',
'in':'state:{fips}'.format(fips=state.fips)}
for county in c.sf1.get(variables, geo=geo):
yield county
def tracts(variables='NAME'):
for state in us.states.STATES:
# handy to print out state to monitor progress
# print state.fips, state
counties_in_state={'for':'county:*',
'in':'state:{fips}'.format(fips=state.fips)}
for county in c.sf1.get('NAME', geo=counties_in_state):
# print county['state'], county['NAME']
tracts_in_county = {'for':'tract:*',
'in': 'state:{s_fips} county:{c_fips}'.format(s_fips=state.fips,
c_fips=county['county'])}
for tract in c.sf1.get(variables,geo=tracts_in_county):
yield tract
def block_groups(variables='NAME'):
# http://api.census.gov/data/2010/sf1?get=P0010001&for=block+group:*&in=state:02+county:170
# let's use the county generator
for county in counties(variables):
geo = {'for':'block group:*',
'in':'state:{state} county:{county}'.format(state=county['state'],
county=county['county'])
}
for block_group in c.sf1.get(variables, geo):
yield block_group
def blocks(variables='NAME'):
# http://api.census.gov/data/2010/sf1?get=P0010001&for=block:*&in=state:02+county:290+tract:00100
# make use of the tract generator
for tract in tracts(variables):
geo={'for':'block:*',
'in':'state:{state} county:{county} tract:{tract}'.format(state=tract['state'],
county=tract['county'],
tract=tract['tract'])
}
for block in c.sf1.get(variables, geo):
yield block
# msa, csas, districts, zip_codes
def msas(variables="NAME"):
for state in us.STATES:
geo = {'for':'metropolitan statistical area/micropolitan statistical area:*',
'in':'state:{state_fips}'.format(state_fips=state.fips)
}
for msa in c.sf1.get(variables, geo=geo):
yield msa
def csas(variables="NAME"):
# http://api.census.gov/data/2010/sf1?get=P0010001&for=combined+statistical+area:*&in=state:24
for state in us.STATES:
geo = {'for':'combined statistical area:*',
'in':'state:{state_fips}'.format(state_fips=state.fips)
}
for csa in c.sf1.get(variables, geo=geo):
yield csa
def districts(variables="NAME"):
# http://api.census.gov/data/2010/sf1?get=P0010001&for=congressional+district:*&in=state:24
for state in us.STATES:
geo = {'for':'congressional district:*',
'in':'state:{state_fips}'.format(state_fips=state.fips)
}
for district in c.sf1.get(variables, geo=geo):
yield district
def zip_code_tabulation_areas(variables="NAME"):
# http://api.census.gov/data/2010/sf1?get=P0010001&for=zip+code+tabulation+area:*&in=state:02
for state in us.STATES:
geo = {'for':'zip code tabulation area:*',
'in':'state:{state_fips}'.format(state_fips=state.fips)
}
for zip_code_tabulation_area in c.sf1.get(variables, geo=geo):
yield zip_code_tabulation_area
list(islice(msas(), 1))
[{u'NAME': u'Albertville, AL Micro Area', u'metropolitan statistical area/micropolitan statistical area': u'10700', u'state': u'01'}]
list(islice(csas(), 1))
[{u'NAME': u'Atlanta-Sandy Springs-Gainesville, GA-AL CSA (part)', u'combined statistical area': u'122', u'state': u'01'}]
districts_list = list(islice(districts(), 1))
districts_list
[{u'NAME': u'Congressional District 1', u'congressional district': u'01', u'state': u'01'}]
list(islice(zip_code_tabulation_areas(), 1))
[{u'NAME': u'ZCTA5 30165 (part)', u'state': u'01', u'zip code tabulation area': u'30165'}]
Note: There are definitely improvements to be made in these generators. One of the most important would be to limit the generators to specific geographies -- typically, we don't want to have all the blocks in the country but the ones in a specific area. A good exercise to rewrite our generators to allow for limited geography.
We can compare the total number of tracts we calculate to:
https://www.census.gov/geo/maps-data/data/tallies/tractblock.html
and
https://www.census.gov/geo/maps-data/data/docs/geo_tallies/Tract_Block2010.txt
http://www.census.gov/developers/data/sf1.xml
compare to http://www.census.gov/prod/cen2010/briefs/c2010br-02.pdf
I think the P0050001 might be the key category
P0050002 Not Hispanic or Latino (total) =
P0050003 Not Hispanic White only
P0050004 Not Hispanic Black only
P0050006 Not Hispanic Asian only
Not Hispanic Other (should also be P0050002 - (P0050003 + P0050004 + P0050006)
P0050010 Hispanic or Latino
P0050010 = P0050011...P0050017
From Hispanic and Latino Americans (Wikipedia):
While the two terms are sometimes used interchangeably, Hispanic is a narrower term which mostly refers to persons of Spanish speaking origin or ancestry, while Latino is more frequently used to refer more generally to anyone of Latin American origin or ancestry, including Brazilians.
and
The Census Bureau's 2010 census does provide a definition of the terms Latino or Hispanic and is as follows: “Hispanic or Latino” refers to a person of Cuban, Mexican, Puerto Rican, South or Central American, or other Spanish culture or origin regardless of race. It allows respondents to self-define whether they were Latino or Hispanic and then identify their specific country or place of origin.[52] On its website, the Census Bureau defines "Hispanic" or "Latino" persons as being "persons who trace their origin [to]... Spanish speaking Central and South America countries, and other Spanish cultures".
In the Racial Dot Map: "Whites are coded as blue; African-Americans, green; Asians, red; Hispanics, orange; and all other racial categories are coded as brown."
In this notebook, we will relate the Racial Dot Map 5-category scheme to the P005* variables.
# let's get the total population -- tabulated in two variables: P0010001, P0050001
# P0050002 Not Hispanic or Latino (total)
# P0050010 Hispanic or Latino
r = list(states(('NAME','P0010001','P0050001','P0050002','P0050010')))
r[:5]
[{u'NAME': u'Alabama', u'P0010001': u'4779736', u'P0050001': u'4779736', u'P0050002': u'4594134', u'P0050010': u'185602', u'state': u'01'}, {u'NAME': u'Alaska', u'P0010001': u'710231', u'P0050001': u'710231', u'P0050002': u'670982', u'P0050010': u'39249', u'state': u'02'}, {u'NAME': u'Arizona', u'P0010001': u'6392017', u'P0050001': u'6392017', u'P0050002': u'4496868', u'P0050010': u'1895149', u'state': u'04'}, {u'NAME': u'Arkansas', u'P0010001': u'2915918', u'P0050001': u'2915918', u'P0050002': u'2729868', u'P0050010': u'186050', u'state': u'05'}, {u'NAME': u'California', u'P0010001': u'37253956', u'P0050001': u'37253956', u'P0050002': u'23240237', u'P0050010': u'14013719', u'state': u'06'}]
# Hispanic/Latino origin vs not-Hispanic/Latino
# Compare with http://www.census.gov/prod/cen2010/briefs/c2010br-02.pdf Table 1
# Hispanic/Latino: 50477594
# non-Hispanic/Latino: 258267944
df=DataFrame(r)
df[['P0010001', 'P0050001','P0050002','P0050010']] = \
df[['P0010001', 'P0050001','P0050002','P0050010']].astype('int')
df[['P0010001', 'P0050001', 'P0050002', 'P0050010']].sum()
P0010001 308745538 P0050001 308745538 P0050002 258267944 P0050010 50477594 dtype: int64
# is the total Hispanic/Latino population and non-Hispanic populations the same as reported in
# http://www.census.gov/prod/cen2010/briefs/c2010br-02.pdf Table 1
(df['P0050010'].sum() == 50477594,
df['P0050002'].sum() == 258267944)
(True, True)
# How about the non-Hispanic/Latino White only category?
# P0050003
# total should be 196817552
df = DataFrame(list(states('NAME,P0050003')))
df['P0050003'] = df['P0050003'].astype('int')
df.P0050003.sum()
196817552
SUGGESTED EXERCISE: write a function convert_to_rdotmap(row)
tha takes an input Python dict that has the keys:
* NAME
* P005001, P005002...,P0050016, P0050017
and that returns a Pandas Series with the following columns:
* Total
* White
* Black
* Asian
* Hispanic
* Other
* Name (note lowercase)
that correspond to those used in the Racial Dot Map.
Also write a function def convert_P005_to_int(df) that converts all the P005* columns to int
# USE a little convience function to calculate the variable names to be used
def P005_range(n0,n1):
return tuple(('P005'+ "{i:04d}".format(i=i) for i in xrange(n0,n1)))
P005_vars = P005_range(1,18)
P005_vars_str = ",".join(P005_vars)
P005_vars_with_name = ['NAME'] + list(P005_vars)
P005_vars_with_name
['NAME', 'P0050001', 'P0050002', 'P0050003', 'P0050004', 'P0050005', 'P0050006', 'P0050007', 'P0050008', 'P0050009', 'P0050010', 'P0050011', 'P0050012', 'P0050013', 'P0050014', 'P0050015', 'P0050016', 'P0050017']
# HAVE YOU TRIED THE EXERCISE....IF NOT....TRY IT....HERE'S ONE POSSIBLE ANSWER#
# http://manishamde.github.io/blog/2013/03/07/pandas-and-python-top-10/#create
def convert_P005_to_int(df):
# do conversion in place
df[list(P005_vars)] = df[list(P005_vars)].astype('int')
return df
def convert_to_rdotmap(row):
"""takes the P005 variables and maps to a series with White, Black, Asian, Hispanic, Other
Total and Name"""
return pd.Series({'Total':row['P0050001'],
'White':row['P0050003'],
'Black':row['P0050004'],
'Asian':row['P0050006'],
'Hispanic':row['P0050010'],
'Other': row['P0050005'] + row['P0050007'] + row['P0050008'] + row['P0050009'],
'Name': row['NAME']
}, index=['Name', 'Total', 'White', 'Black', 'Hispanic', 'Asian', 'Other'])
from census import Census
import settings
from settings import CENSUS_KEY
import time
from itertools import islice
def P005_range(n0,n1):
return tuple(('P005'+ "{i:04d}".format(i=i) for i in xrange(n0,n1)))
P005_vars = P005_range(1,18)
P005_vars_str = ",".join(P005_vars)
# http://manishamde.github.io/blog/2013/03/07/pandas-and-python-top-10/#create
def convert_to_rdotmap(row):
"""takes the P005 variables and maps to a series with White, Black, Asian, Hispanic, Other
Total and Name"""
return pd.Series({'Total':row['P0050001'],
'White':row['P0050003'],
'Black':row['P0050004'],
'Asian':row['P0050006'],
'Hispanic':row['P0050010'],
'Other': row['P0050005'] + row['P0050007'] + row['P0050008'] + row['P0050009'],
'Name': row['NAME']
}, index=['Name', 'Total', 'White', 'Black', 'Hispanic', 'Asian', 'Other'])
def normalize(s):
"""take a Series and divide each item by the sum so that the new series adds up to 1.0"""
total = np.sum(s)
return s.astype('float') / total
def entropy(series):
"""Normalized Shannon Index"""
# a series in which all the entries are equal should result in normalized entropy of 1.0
# eliminate 0s
series1 = series[series!=0]
# if len(series) < 2 (i.e., 0 or 1) then return 0
if len(series) > 1:
# calculate the maximum possible entropy for given length of input series
max_s = -np.log(1.0/len(series))
total = float(sum(series1))
p = series1.astype('float')/float(total)
return sum(-p*np.log(p))/max_s
else:
return 0.0
def convert_P005_to_int(df):
# do conversion in place
df[list(P005_vars)] = df[list(P005_vars)].astype('int')
return df
def diversity(r):
"""Returns a DataFrame with the following columns
"""
df = DataFrame(r)
df = convert_P005_to_int(df)
# df[list(P005_vars)] = df[list(P005_vars)].astype('int')
df1 = df.apply(convert_to_rdotmap, axis=1)
df1['entropy5'] = df1[['Asian','Black','Hispanic','White','Other']].apply(entropy,axis=1)
df1['entropy4'] = df1[['Asian','Black','Hispanic','White']].apply(entropy,axis=1)
return df1
# states
r=list(states(P005_vars_with_name))
diversity(r)
Name | Total | White | Black | Hispanic | Asian | Other | entropy5 | entropy4 | |
---|---|---|---|---|---|---|---|---|---|
0 | Alabama | 4779736 | 3204402 | 1244437 | 185602 | 52937 | 92358 | 0.541001 | 0.570292 |
1 | Alaska | 710231 | 455320 | 21949 | 39249 | 37459 | 156254 | 0.646677 | 0.475235 |
2 | Arizona | 6392017 | 3695647 | 239101 | 1895149 | 170509 | 391611 | 0.663524 | 0.643529 |
3 | Arkansas | 2915918 | 2173469 | 447102 | 186050 | 35647 | 73650 | 0.515025 | 0.526205 |
4 | California | 37253956 | 14956253 | 2163804 | 14013719 | 4775070 | 1345110 | 0.796994 | 0.843670 |
5 | Colorado | 5029196 | 3520793 | 188778 | 1038687 | 135564 | 145374 | 0.558232 | 0.570130 |
6 | Connecticut | 3574097 | 2546262 | 335119 | 479087 | 134091 | 79538 | 0.584509 | 0.615330 |
7 | Delaware | 897934 | 586752 | 186782 | 73221 | 28308 | 22871 | 0.628490 | 0.660917 |
8 | District of Columbia | 601723 | 209464 | 301053 | 54749 | 20818 | 15639 | 0.710288 | 0.757369 |
9 | Florida | 18801310 | 10884722 | 2851100 | 4223806 | 445216 | 396466 | 0.688393 | 0.741076 |
10 | Georgia | 9687653 | 5413920 | 2910800 | 853689 | 311692 | 197552 | 0.677545 | 0.729666 |
11 | Hawaii | 1360301 | 309343 | 19904 | 120842 | 513294 | 396918 | 0.833108 | 0.750762 |
12 | Idaho | 1567582 | 1316243 | 8875 | 175901 | 18529 | 48034 | 0.360829 | 0.330227 |
13 | Illinois | 12830632 | 8167753 | 1832924 | 2027578 | 580586 | 221791 | 0.663131 | 0.719347 |
14 | Indiana | 6483802 | 5286453 | 582140 | 389707 | 101444 | 124058 | 0.430342 | 0.439752 |
15 | Iowa | 3046355 | 2701123 | 86906 | 151544 | 52597 | 54185 | 0.310137 | 0.300998 |
16 | Kansas | 2853118 | 2230539 | 162700 | 300042 | 66967 | 92870 | 0.492215 | 0.483675 |
17 | Kentucky | 4339367 | 3745655 | 333075 | 132836 | 48338 | 79463 | 0.344293 | 0.340010 |
18 | Louisiana | 4533372 | 2734884 | 1442420 | 192560 | 69327 | 94181 | 0.588919 | 0.623788 |
19 | Maine | 1328361 | 1254297 | 15154 | 16935 | 13442 | 28533 | 0.180061 | 0.137155 |
20 | Maryland | 5773552 | 3157958 | 1674229 | 470632 | 316694 | 154039 | 0.714090 | 0.760596 |
21 | Massachusetts | 6547629 | 4984800 | 391693 | 627654 | 347495 | 195987 | 0.535423 | 0.540767 |
22 | Michigan | 9883640 | 7569939 | 1383756 | 436358 | 236490 | 257097 | 0.498010 | 0.504299 |
23 | Minnesota | 5303925 | 4405142 | 269141 | 250258 | 212996 | 166388 | 0.427024 | 0.407947 |
24 | Mississippi | 2967297 | 1722287 | 1093512 | 81481 | 25477 | 44540 | 0.550642 | 0.591949 |
25 | Missouri | 5988927 | 4850748 | 687149 | 212470 | 97221 | 141339 | 0.430525 | 0.429356 |
26 | Montana | 989415 | 868628 | 3743 | 28565 | 6138 | 82341 | 0.295872 | 0.149198 |
27 | Nebraska | 1826341 | 1499753 | 80959 | 167405 | 31919 | 46305 | 0.424281 | 0.417907 |
28 | Nevada | 2700551 | 1462081 | 208058 | 716501 | 191047 | 122864 | 0.751622 | 0.774363 |
29 | New Hampshire | 1316470 | 1215050 | 13625 | 36704 | 28241 | 22850 | 0.232308 | 0.210183 |
30 | New Jersey | 8791894 | 5214878 | 1125401 | 1555144 | 719827 | 176644 | 0.722462 | 0.783517 |
31 | New Mexico | 2059179 | 833810 | 35462 | 953403 | 26305 | 210199 | 0.671781 | 0.603770 |
32 | New York | 19378102 | 11304247 | 2783857 | 3416922 | 1406194 | 466882 | 0.732727 | 0.787727 |
33 | North Carolina | 9535483 | 6223995 | 2019854 | 800120 | 206579 | 284935 | 0.623233 | 0.645955 |
34 | North Dakota | 672591 | 598007 | 7720 | 13467 | 6839 | 46558 | 0.289289 | 0.165826 |
35 | Ohio | 11536504 | 9359263 | 1389115 | 354674 | 190765 | 242687 | 0.422934 | 0.426370 |
36 | Oklahoma | 3751351 | 2575381 | 272071 | 332007 | 64154 | 507738 | 0.623426 | 0.506346 |
37 | Oregon | 3831074 | 3005848 | 64984 | 450062 | 139436 | 170744 | 0.478609 | 0.444008 |
38 | Pennsylvania | 12702379 | 10094652 | 1327091 | 719660 | 346288 | 214688 | 0.465015 | 0.486249 |
39 | Rhode Island | 1052567 | 803685 | 51560 | 130655 | 29988 | 36679 | 0.516377 | 0.508129 |
40 | South Carolina | 4625364 | 2962740 | 1279998 | 235682 | 58307 | 88637 | 0.573768 | 0.609445 |
41 | South Dakota | 814180 | 689502 | 9959 | 22119 | 7553 | 85047 | 0.355383 | 0.191061 |
42 | Tennessee | 6346105 | 4800782 | 1049391 | 290059 | 90311 | 115562 | 0.486619 | 0.508575 |
43 | Texas | 25145561 | 11397345 | 2886825 | 9460921 | 948426 | 452044 | 0.727466 | 0.793870 |
44 | Utah | 2763885 | 2221719 | 25951 | 358340 | 54176 | 103699 | 0.425283 | 0.393087 |
45 | Vermont | 625741 | 590223 | 5943 | 9208 | 7875 | 12492 | 0.183061 | 0.144800 |
46 | Virginia | 8001024 | 5186450 | 1523704 | 631825 | 436298 | 222747 | 0.655915 | 0.688954 |
47 | Washington | 6724540 | 4876804 | 229603 | 755790 | 475634 | 386709 | 0.587508 | 0.555274 |
48 | West Virginia | 1852994 | 1726256 | 62122 | 22268 | 12285 | 30063 | 0.206960 | 0.183409 |
49 | Wisconsin | 5686986 | 4738411 | 350898 | 336056 | 128052 | 133569 | 0.412929 | 0.408698 |
50 | Wyoming | 563626 | 483874 | 4351 | 50231 | 4279 | 20891 | 0.337501 | 0.288172 |
51 rows × 9 columns
# counties
r = list(counties(P005_vars_with_name))
df2 = diversity(r)
df2.sort_index(by='entropy5',ascending=False)
Name | Total | White | Black | Hispanic | Asian | Other | entropy5 | entropy4 | |
---|---|---|---|---|---|---|---|---|---|
1868 | Queens County | 2230722 | 616727 | 395881 | 613750 | 508334 | 96030 | 0.925644 | 0.989171 |
68 | Aleutians West Census Area | 5561 | 1745 | 318 | 726 | 1575 | 1197 | 0.920216 | 0.882623 |
186 | Alameda County | 1510271 | 514559 | 184126 | 339889 | 390524 | 81173 | 0.910834 | 0.957875 |
233 | Solano County | 413344 | 168628 | 58743 | 99356 | 59027 | 27590 | 0.897416 | 0.926901 |
67 | Aleutians East Borough | 3141 | 425 | 212 | 385 | 1113 | 1006 | 0.896064 | 0.864996 |
2601 | Fort Bend County | 585375 | 211680 | 123267 | 138967 | 98762 | 12699 | 0.882673 | 0.970379 |
1851 | Kings County | 2504700 | 893306 | 799066 | 496285 | 260129 | 55914 | 0.853105 | 0.934130 |
219 | Sacramento County | 1418788 | 687166 | 139949 | 306196 | 198944 | 86533 | 0.842896 | 0.865689 |
453 | Gwinnett County | 805321 | 354316 | 184122 | 162035 | 84763 | 20085 | 0.838965 | 0.912596 |
550 | Maui County | 154834 | 49193 | 818 | 15710 | 43384 | 45729 | 0.833108 | 0.751354 |
546 | Hawaii County | 185079 | 57831 | 899 | 21383 | 39588 | 65378 | 0.830209 | 0.765941 |
192 | Contra Costa County | 1049025 | 500923 | 93604 | 255560 | 148881 | 50057 | 0.829415 | 0.865931 |
1207 | Montgomery County | 971777 | 478765 | 161689 | 165398 | 134677 | 31248 | 0.828220 | 0.887528 |
224 | San Joaquin County | 685306 | 245919 | 48540 | 266341 | 94547 | 29959 | 0.828052 | 0.869824 |
1782 | Hudson County | 634266 | 195510 | 71315 | 267853 | 83825 | 15763 | 0.827493 | 0.899094 |
1229 | Suffolk County | 722023 | 346979 | 142980 | 143455 | 58963 | 29646 | 0.826117 | 0.871302 |
549 | Kauai County | 67091 | 20611 | 258 | 6315 | 20296 | 19611 | 0.824890 | 0.737361 |
2936 | Manassas Park city | 14273 | 6070 | 1784 | 4645 | 1261 | 513 | 0.821891 | 0.873913 |
2892 | Prince William County | 402002 | 195656 | 78492 | 81460 | 29986 | 16408 | 0.818339 | 0.862245 |
547 | Honolulu County | 953207 | 181684 | 17929 | 77433 | 410019 | 266142 | 0.816249 | 0.722054 |
223 | San Francisco County | 805235 | 337451 | 46781 | 121774 | 265700 | 33529 | 0.816230 | 0.858482 |
226 | San Mateo County | 718451 | 303609 | 18763 | 182502 | 175934 | 37643 | 0.811677 | 0.837993 |
1967 | Robeson County | 134168 | 36160 | 32347 | 10932 | 971 | 53758 | 0.809458 | 0.757652 |
1936 | Hoke County | 46952 | 19142 | 15392 | 5823 | 467 | 6128 | 0.808919 | 0.758726 |
1858 | New York County | 1585873 | 761493 | 205340 | 403577 | 177624 | 37839 | 0.807452 | 0.877051 |
228 | Santa Clara County | 1781642 | 626909 | 42331 | 479210 | 565466 | 67726 | 0.806568 | 0.852239 |
1780 | Essex County | 783969 | 260177 | 308358 | 159117 | 35292 | 21025 | 0.803622 | 0.867143 |
1748 | Clark County | 1951269 | 935955 | 194821 | 568644 | 165121 | 86728 | 0.800982 | 0.835903 |
1785 | Middlesex County | 809858 | 398724 | 71557 | 148975 | 172534 | 18068 | 0.800874 | 0.872133 |
610 | Cook County | 5194675 | 2278358 | 1265778 | 1244762 | 318869 | 86908 | 0.800036 | 0.882240 |
2579 | Dallas County | 2368139 | 784693 | 518732 | 905940 | 117797 | 40977 | 0.798837 | 0.879632 |
204 | Los Angeles County | 9818605 | 2728321 | 815086 | 4687889 | 1325671 | 261638 | 0.796781 | 0.859287 |
2623 | Harris County | 4092459 | 1349646 | 754258 | 1671540 | 249853 | 67162 | 0.796176 | 0.878354 |
367 | Orange County | 1145956 | 526754 | 223200 | 308244 | 55541 | 32217 | 0.792964 | 0.852138 |
325 | Broward County | 1748066 | 760817 | 449677 | 438247 | 55692 | 43633 | 0.782936 | 0.845847 |
992 | Wyandotte County | 157505 | 68170 | 39046 | 41633 | 3886 | 4770 | 0.781122 | 0.834209 |
2848 | Fairfax County | 1081726 | 590622 | 96078 | 168482 | 188737 | 37807 | 0.780967 | 0.826231 |
1793 | Union County | 536499 | 243312 | 111705 | 146704 | 24496 | 10282 | 0.780779 | 0.854469 |
2294 | Philadelphia County | 1526006 | 562585 | 644287 | 187611 | 95521 | 36002 | 0.777585 | 0.842034 |
2935 | Manassas city | 37821 | 17994 | 4905 | 11876 | 1861 | 1185 | 0.769676 | 0.818704 |
2536 | Bell County | 310235 | 157289 | 63380 | 67010 | 8350 | 14206 | 0.769433 | 0.795595 |
2915 | Alexandria city | 139966 | 74878 | 29778 | 22524 | 8351 | 4435 | 0.767634 | 0.815651 |
1921 | Durham County | 267587 | 112697 | 100260 | 36077 | 12180 | 6373 | 0.765370 | 0.827083 |
222 | San Diego County | 3095313 | 1500047 | 146600 | 991348 | 328058 | 129260 | 0.764654 | 0.795817 |
1784 | Mercer County | 366513 | 199909 | 71378 | 55318 | 32545 | 7363 | 0.763097 | 0.831665 |
2645 | Jefferson County | 252273 | 112503 | 84500 | 42899 | 8525 | 3846 | 0.749335 | 0.825618 |
2742 | Tarrant County | 1809034 | 937135 | 262522 | 482977 | 83378 | 43022 | 0.748173 | 0.806698 |
215 | Orange County | 3010232 | 1328499 | 44000 | 1012973 | 532477 | 92283 | 0.747159 | 0.792896 |
80 | Kodiak Island Borough | 13592 | 7137 | 85 | 996 | 2620 | 2754 | 0.747039 | 0.631715 |
242 | Yolo County | 200849 | 100240 | 4752 | 60953 | 25640 | 9264 | 0.746835 | 0.767605 |
221 | San Bernardino County | 2035210 | 677598 | 170700 | 1001145 | 123978 | 61789 | 0.745345 | 0.791244 |
236 | Sutter County | 94737 | 47782 | 1713 | 27251 | 13442 | 4549 | 0.745009 | 0.762589 |
1949 | Mecklenburg County | 919628 | 465372 | 278042 | 111944 | 41991 | 22279 | 0.741724 | 0.798176 |
1915 | Cumberland County | 319431 | 150749 | 113939 | 30190 | 6885 | 17668 | 0.738064 | 0.743731 |
1789 | Passaic County | 501226 | 227144 | 55480 | 185677 | 24556 | 8369 | 0.737074 | 0.807972 |
2146 | Comanche County | 124098 | 73122 | 20794 | 13896 | 2663 | 13623 | 0.733880 | 0.676671 |
2185 | Oklahoma County | 718633 | 425791 | 108954 | 108543 | 21151 | 54194 | 0.733371 | 0.712225 |
195 | Fresno County | 930450 | 304522 | 45005 | 468070 | 86856 | 25997 | 0.732562 | 0.780302 |
2542 | Brazoria County | 313166 | 166674 | 36880 | 86643 | 17013 | 5956 | 0.731108 | 0.795982 |
1774 | Atlantic County | 274549 | 160871 | 40882 | 46241 | 20419 | 6136 | 0.730078 | 0.787988 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
3143 rows × 9 columns
msas_list = list(islice(msas('NAME,P0010001'),None))
len(msas_list)
1013
df = DataFrame(msas_list)
df.P0010001 = df.P0010001.astype('int')
df.groupby('metropolitan statistical area/micropolitan statistical area').apply(lambda x:sum(x['P0010001']))
metropolitan statistical area/micropolitan statistical area 10020 57999 10100 40602 10140 72797 10180 165252 10220 37492 10300 99892 10420 703200 10460 63797 10500 157308 10540 116672 10580 870716 10620 60585 10660 31255 10700 93019 10740 887077 ... 49060 36311 49100 51461 49180 477717 49260 20081 49300 114520 49340 798552 49380 21378 49420 243231 49460 22438 49540 28065 49620 434972 49660 565773 49700 166892 49740 195751 49780 86074 Length: 942, dtype: int64