# FILL IN WITH YOUR CODE # Testing code def to_unicode(vals): return [unicode(v) for v in vals] def test_msas_df(msas_df): min_set_of_columns = set(['Asian','Black','Hispanic', 'Other', 'Total', 'White', 'entropy4', 'entropy5', 'entropy_rice', 'gini_simpson','p_Asian', 'p_Black', 'p_Hispanic', 'p_Other','p_White']) assert min_set_of_columns & set(msas_df.columns) == min_set_of_columns # https://www.census.gov/geo/maps-data/data/tallies/national_geo_tallies.html # 366 metro areas # 576 micropolitan areas assert len(msas_df) == 942 # total number of people in metro/micro areas assert msas_df.Total.sum() == 289261315 assert msas_df['White'].sum() == 180912856 assert msas_df['Other'].sum() == 8540181 # list of msas in descendng order by entropy_rice # calculate the top 10 metros by population top_10_metros = msas_df.sort_index(by='Total', ascending=False)[:10] msa_codes_in_top_10_pop_sorted_by_entropy_rice = list(top_10_metros.sort_index(by='entropy_rice', ascending=False).index) assert to_unicode(msa_codes_in_top_10_pop_sorted_by_entropy_rice)== [u'26420', u'35620', u'47900', u'31100', u'19100', u'33100', u'16980', u'12060', u'37980', u'14460'] top_10_metro = msas_df.sort_index(by='Total', ascending=False)[:10] list(top_10_metro.sort_index(by='entropy_rice', ascending=False)['entropy5']) np.testing.assert_allclose(top_10_metro.sort_index(by='entropy_rice', ascending=False)['entropy5'], [0.79628076626851163, 0.80528601550164602, 0.80809418318973791, 0.7980698349711991, 0.75945930510650161, 0.74913610558765376, 0.73683277781032397, 0.72964862063970914, 0.64082509648457675, 0.55697288400004963]) np.testing.assert_allclose(top_10_metro.sort_index(by='entropy_rice', ascending=False)['entropy_rice'], [0.87361766576115552, 0.87272877244078051, 0.85931803868749834, 0.85508015237749468, 0.82169723530719896, 0.81953527301129059, 0.80589423784325431, 0.78602596561378812, 0.68611350427640316, 0.56978827050565117]) # you are on the right track if test_msas_df doesn't complain test_msas_df(msas_df) # code to save your dataframe to a CSV # upload the CSV to bCourses # uncomment to run # msas_df.to_csv("msas_2010.csv", encoding="UTF-8") # load back the CSV and test again # df = DataFrame.from_csv("msas_2010.csv", encoding="UTF-8") # test_msas_df(df)