import pandas as pd pd.options.display.max_columns = 5200 pd.options.display.max_rows = 5200 inFluGoo = 'https://www.google.org/flutrends/us/data.txt' df = pd.read_csv(inFluGoo, header=11) citiesList = ['Anchorage, AK','Birmingham, AL','Little Rock, AR','Mesa, AZ','Phoenix, AZ','Scottsdale, AZ','Tempe, AZ','Tucson, AZ','Berkeley, CA','Fresno, CA','Irvine, CA','Los Angeles, CA','Oakland, CA','Sacramento, CA','San Diego, CA','San Francisco, CA','San Jose, CA','Santa Clara, CA','Sunnyvale, CA','Colorado Springs, CO','Denver, CO','Washington, DC','Gainesville, FL','Jacksonville, FL','Miami, FL','Orlando, FL','Tampa, FL','Atlanta, GA','Roswell, GA','Honolulu, HI','Des Moines, IA','Boise, ID','Chicago, IL','Indianapolis, IN','Wichita, KS','Lexington, KY','Baton Rouge, LA','New Orleans, LA','Boston, MA','Somerville, MA','Baltimore, MD','Grand Rapids, MI','St Paul, MN','Kansas City, MO','Springfield, MO','St Louis, MO','Jackson, MS','Cary, NC','Charlotte, NC','Durham, NC','Greensboro, NC','Raleigh, NC','Lincoln, NE','Omaha, NE','Newark, NJ','Albuquerque, NM','Las Vegas, NV','Reno, NV','Albany, NY','Buffalo, NY','New York, NY','Rochester, NY','Cleveland, OH','Columbus, OH','Dayton, OH','Oklahoma City, OK','Tulsa, OK','Beaverton, OR','Eugene, OR','Portland, OR','Philadelphia, PA','Pittsburgh, PA','State College, PA','Providence, RI','Columbia, SC','Greenville, SC','Knoxville, TN','Memphis, TN','Nashville, TN','Austin, TX','Dallas, TX','Ft Worth, TX','Houston, TX','Irving, TX','Lubbock, TX','Plano, TX','San Antonio, TX','Salt Lake City, UT','Arlington, VA','Norfolk, VA','Reston, VA','Richmond, VA','Bellevue, WA','Seattle, WA','Spokane, WA','Madison, WI','Milwaukee, WI'] df = df[['Date']+citiesList] df_list = [] for i in citiesList: #iList = [i] #colList = ['Date']+iList #print colList dfI = df[['Date']+[i]] dfI['City'] = i dfI.columns = ['date','flu','city'] df_list.append(dfI) dfAll = pd.concat(df_list) #df = df.T #df = df.stack()#.unstack(0) #df = df.pivot(index=['Date', 'Anchorage, AK']) #dfc = df[['City']] #dfTran.head(1000) #dfdate = df.drop('City',axis=1) #dfdate = dfdate.T #df = pd.melt(df, id_vars=['City', 'Date']) #df.head(800) dfAll.head(4000) #pd.__version__ dfAll.to_csv('/Users/danielmsheehan/GitHub/d3-presentation/data/flu/google/cities_flu_data_long.csv', index=False) import geopy #from geopy.geocoders import Google from geopy.geocoders import Nominatim import time import numpy as np geolocator = Nominatim() pause = 2 citiesList = ['Anchorage, AK','Birmingham, AL','Little Rock, AR','Mesa, AZ','Phoenix, AZ','Scottsdale, AZ','Tempe, AZ','Tucson, AZ','Berkeley, CA','Fresno, CA','Irvine, CA','Los Angeles, CA','Oakland, CA','Sacramento, CA','San Diego, CA','San Francisco, CA','San Jose, CA','Santa Clara, CA','Sunnyvale, CA','Colorado Springs, CO','Denver, CO','Washington, DC','Gainesville, FL','Jacksonville, FL','Miami, FL','Orlando, FL','Tampa, FL','Atlanta, GA','Roswell, GA','Honolulu, HI','Des Moines, IA','Boise, ID','Chicago, IL','Indianapolis, IN','Wichita, KS','Lexington, KY','Baton Rouge, LA','New Orleans, LA','Boston, MA','Somerville, MA','Baltimore, MD','Grand Rapids, MI','St Paul, MN','Kansas City, MO','Springfield, MO','St Louis, MO','Jackson, MS','Cary, NC','Charlotte, NC','Durham, NC','Greensboro, NC','Raleigh, NC','Lincoln, NE','Omaha, NE','Newark, NJ','Albuquerque, NM','Las Vegas, NV','Reno, NV','Albany, NY','Buffalo, NY','New York, NY','Rochester, NY','Cleveland, OH','Columbus, OH','Dayton, OH','Oklahoma City, OK','Tulsa, OK','Beaverton, OR','Eugene, OR','Portland, OR','Philadelphia, PA','Pittsburgh, PA','State College, PA','Providence, RI','Columbia, SC','Greenville, SC','Knoxville, TN','Memphis, TN','Nashville, TN','Austin, TX','Dallas, TX','Ft Worth, TX','Houston, TX','Irving, TX','Lubbock, TX','Plano, TX','San Antonio, TX','Salt Lake City, UT','Arlington, VA','Norfolk, VA','Reston, VA','Richmond, VA','Bellevue, WA','Seattle, WA','Spokane, WA','Madison, WI','Milwaukee, WI'] citiesList = ['Bellevue, WA','Seattle, WA','Spokane, WA','Madison, WI','Milwaukee, WI'] #citiesLoc = [] f = 0 for i in citiesList: location = geolocator.geocode(i) row = i + ';' + str(location) print row citiesLoc.append(row) time.sleep(pause) print citiesLoc import numpy as np citiesList = ['Anchorage, AK','Birmingham, AL','Little Rock, AR','Mesa, AZ','Phoenix, AZ','Scottsdale, AZ','Tempe, AZ','Tucson, AZ','Berkeley, CA','Fresno, CA','Irvine, CA','Los Angeles, CA','Oakland, CA','Sacramento, CA','San Diego, CA','San Francisco, CA','San Jose, CA','Santa Clara, CA','Sunnyvale, CA','Colorado Springs, CO','Denver, CO','Washington, DC','Gainesville, FL','Jacksonville, FL','Miami, FL','Orlando, FL','Tampa, FL','Atlanta, GA','Roswell, GA','Honolulu, HI','Des Moines, IA','Boise, ID','Chicago, IL','Indianapolis, IN','Wichita, KS','Lexington, KY','Baton Rouge, LA','New Orleans, LA','Boston, MA','Somerville, MA','Baltimore, MD','Grand Rapids, MI','St Paul, MN','Kansas City, MO','Springfield, MO','St Louis, MO','Jackson, MS','Cary, NC','Charlotte, NC','Durham, NC','Greensboro, NC','Raleigh, NC','Lincoln, NE','Omaha, NE','Newark, NJ','Albuquerque, NM','Las Vegas, NV','Reno, NV','Albany, NY','Buffalo, NY','New York, NY','Rochester, NY','Cleveland, OH','Columbus, OH','Dayton, OH','Oklahoma City, OK','Tulsa, OK','Beaverton, OR','Eugene, OR','Portland, OR','Philadelphia, PA','Pittsburgh, PA','State College, PA','Providence, RI','Columbia, SC','Greenville, SC','Knoxville, TN','Memphis, TN','Nashville, TN','Austin, TX','Dallas, TX','Ft Worth, TX','Houston, TX','Irving, TX','Lubbock, TX','Plano, TX','San Antonio, TX','Salt Lake City, UT','Arlington, VA','Norfolk, VA','Reston, VA','Richmond, VA','Bellevue, WA','Seattle, WA','Spokane, WA','Madison, WI','Milwaukee, WI'] s = pd.DataFrame(citiesLoc) p = pd.DataFrame(citiesList) s.columns = ['geocoding']#,'coords'] s['city'] = s.geocoding.str.split(';',1).str[0] s['coords'] = s.geocoding.str.split('(',2).str[2] s['lat'] = s.coords.str.split(',',1).str[0] s['lng'] = s.coords.str.split(',',1).str[1] s['lng'] = s.lng.str.replace(")",'') s = s[['city','lat','lng']] print s.head(10) s.to_csv('/Users/danielmsheehan/GitHub/d3-presentation/data/flu/google/cities_xy.csv', index=False) dfAll = pd.read_csv('/Users/danielmsheehan/GitHub/d3-presentation/data/flu/google/cities_flu_data_long.csv') dfAll = dfAll.merge(s, how='left', on='city') dfAll['datetime'] = pd.to_datetime(dfAll['date']) dfAll['fluround'] = np.around(dfAll['flu'], -3) #dfAll = dfAll[['datetime','city','flu','lat','lng']] print dfAll.head(1000) dfAll.to_csv('/Users/danielmsheehan/GitHub/d3-presentation/data/flu/google/cities_flu_data_long.csv', index=False)