Google tracks where you've been. On one hand, it's just a little creepy. On the other hand, at least they give you access to the raw data; you can grab a copy by:
.json
file to your working directory as LocationHistory.json
The json is not structured according to any standard but the location data is in there. If you want to work with the data in GeoPandas or a GIS file format, the procedure below will get you started.
Inspired by http://nbviewer.ipython.org/github/chrisalbon/code_py/blob/master/matplotlib_map_your_google_data.ipynb
import geopandas as gpd
import json
import datetime
from shapely.geometry import Point
%matplotlib inline
import matplotlib.pylab as pylab
pylab.rcParams['figure.figsize'] = 16, 12
with open("LocationHistory.json", 'r') as fh:
data = json.loads(fh.read())
locations = data['locations']
print len(locations)
# first pass, gather activity types
act_types = set()
for loc in locations:
try:
activities = loc['activitys'][0]['activities']
assert len(activities) >= 1
for act in activities:
act_type = act['type']
act_types.add(act_type)
except:
pass
act_types = sorted(list(act_types))
print act_types
def generate_locations(locations):
for loc in locations:
loc['lon'] = loc['longitudeE7'] * 0.0000001
loc['lat'] = loc['latitudeE7'] * 0.0000001
#loc['wkt'] = 'POINT({0} {1})'.format(loc['lon'], loc['lat'])
loc['geometry'] = Point(loc['lon'], loc['lat'])
loc['timestampSec'] = int(loc['timestampMs']) * 0.001 # ms to s
loc['datetime'] = datetime.datetime.fromtimestamp(loc['timestampSec']).strftime('%Y-%m-%d %H:%M:%S')
for typ in act_types:
loc[typ] = 0
maxprob = 0
bestguess = ""
try:
activities = loc['activitys'][0]['activities']
assert len(activities) >= 1
# Look at confidence estimate for each activity
for act in activities:
conf = act['confidence']
act_type = act['type']
if conf > maxprob:
maxprob = conf
bestguess = act_type
loc[act_type] = conf
except:
pass
loc['likelyActivity'] = bestguess
# Get rid of extraneous columns
try:
del loc['activitys']
except KeyError:
pass
try:
del loc['latitudeE7']
del loc['longitudeE7']
del loc['timestampMs']
except KeyError:
pass
yield loc
#test = pd.DataFrame(generate_locations(locations))
alldf = gpd.GeoDataFrame(generate_locations(locations),
crs={'proj':'longlat', 'ellps':'WGS84', 'datum':'WGS84', 'no_defs':True})
alldf.head(3)
75045 [u'exitingVehicle', u'inVehicle', u'onBicycle', u'onFoot', u'still', u'tilting', u'unknown']
accuracy | altitude | datetime | exitingVehicle | geometry | heading | inVehicle | lat | likelyActivity | lon | onBicycle | onFoot | still | tilting | timestampSec | unknown | velocity | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 26 | NaN | 2014-09-07 18:50:26 | 0 | POINT (-122.7784913 45.5437714) | NaN | 0 | 45.543771 | still | -122.778491 | 0 | 0 | 100 | 0 | 1.410141e+09 | 0 | NaN |
1 | 26 | NaN | 2014-09-07 18:43:27 | 0 | POINT (-122.778479 45.5437834) | NaN | 0 | 45.543783 | still | -122.778479 | 0 | 0 | 100 | 0 | 1.410141e+09 | 0 | NaN |
2 | 30 | NaN | 2014-09-07 18:38:25 | 0 | POINT (-122.7785237 45.5437296) | NaN | 0 | 45.543730 | still | -122.778524 | 0 | 0 | 100 | 0 | 1.410140e+09 | 0 | NaN |
df = alldf[alldf['accuracy'] < 50]
print len(df)
66429
Limited to Northwest Portland where I spend most of my days...
hits = df.sindex.intersection((-122.7147, 45.51435, -122.6708, 45.54110), objects=True)
dfsub = gpd.GeoDataFrame([df.ix[hit.object] for hit in hits])
len(dfsub)
17938
dfsub.plot()
<matplotlib.axes.AxesSubplot at 0x7ff2eaa98050>
!rm location_history.*
df.to_file('location_history.shp')
WARNING:Fiona:OGR Error 6: Normalized/laundered field name: 'exitingVehicle' to 'exitingVeh' WARNING:Fiona:OGR Error 6: Normalized/laundered field name: 'likelyActivity' to 'likelyActi' WARNING:Fiona:OGR Error 6: Normalized/laundered field name: 'timestampSec' to 'timestampS'