Valencia - PyConES 2015 - 2015/11/22
Slides and repo
You have to deal with data that has special rules.
Rules based on a reality not told in schools
Believe me, I know what I'm talking about
Although its the true shape of the Earth, we can't measure over it.
An Ellipsoid
On average, they are quite similar
Remember
The GEO information lays over something mathematical that has its own rules.
THE DATUM
You have to choose what you want to break:
In the best case you can choose two of the three.
(I'm skipping several courses on Cartography with this slide, trust me on this)
Remember
The GEO information uses a mathematical trick to make things flat orderly.
THE PROJECTION
Toghether a DATUM and a PROJECTION make a CRS
The most famous catalog of CRS is EPSG
OGC Simple Feature Access
There's a little bit of fuss
In theory it should be lon/lat (x,y)
But we are used to lat/lon
Always check things ... twice
KNOW YOUR (geo) DATA!!!
from fiona.collection import supported_drivers
for frmt in sorted(supported_drivers):
print("{:20}{}".format(frmt,supported_drivers[frmt]))
ARCGEN r AeronavFAA r BNA raw DGN raw DXF raw ESRI Shapefile raw FileGDB raw GMT raw GPKG rw GPSTrackMaker raw GPX raw GeoJSON rw Idrisi r MapInfo File raw OpenFileGDB r PCIDSK r PDS r SEGY r SUA r
select
ST_X(the_geom) as lon,
ST_Y(the_geom) as lat,
cartodb_id, actor_preferredusername, body, postedtime
from
jsanz.twitter_pycones_pycones2015_pycones15
import requests
url = 'https://jsanz.cartodb.com:443/api/v2/sql?q=select ST_X(the_geom) as lon, ST_Y(the_geom) as lat, cartodb_id, actor_preferredusername, body, postedtime from jsanz.twitter_pycones_pycones2015_pycones15&format=csv'
csv_file = '/tmp/tweets.csv'
with open(csv_file,'w') as csvfile:
req = requests.get(url)
csvfile.write(req.text)
target = '/tmp/tweets.shp'
epsg = 4258 # http://epsg.io/4258
driver = "ESRI Shapefile"
schema = {
"geometry": "Point",
"properties": {
("cartodb_id", "int"),
("lon","float"),
("lat","float"),
("author","str"),
("body","str"),
("postedtime","str")
}
}
import fiona, csv
from fiona.crs import from_epsg
output = fiona.open(target, "w", driver=driver,
crs=from_epsg(epsg), schema=schema)
with open(csv_file,'r') as csvfile:
csvreader = csv.reader(csvfile,delimiter=',',quotechar='"')
next(csvreader) #skip the header
for line in csvreader:
try:
x = float(line[0])
y = float(line[1])
feature = {
"geometry" : {
"coordinates" : (x, y), "type" : "Point"
},
"properties" : {
"cartodb_id" : int(line[2]),
"lon" : x,
"lat" : y,
"author" : line[3],
"body" : line[4],
"postedtime" : line[5]
}
}
output.write(feature)
except ValueError:
pass
try:
output.close()
except RuntimeError:
pass
WARNING:Fiona:CPLE_AppDefined in b"One or several characters couldn't be converted correctly from UTF-8 to ISO-8859-1.\nThis warning will not be emitted anymore."
source = fiona.open(target, 'r')
" ".join([atr for atr in dir(source) if atr[0] != '_'])
'bounds close closed crs crs_wkt driver enabled_drivers encoding env filter flush guard_driver_mode items iterator keys meta mode name next path schema session validate_record validate_record_geometry values write writerecords'
print("{} tweets\r\n".format(len(source)))
print("bounds: {}\r\n".format(source.bounds))
print("CRS: {}".format(source.crs))
1031 tweets bounds: (-123.26204, -43.24895, 151.77647, 55.95206) CRS: {'no_defs': True, 'proj': 'longlat', 'ellps': 'GRS80'}
print("{:3} - {:15} - {:^15}\r\n{:*^45}".format("ID","Author","Coords",""))
for f in source[:10]:
print("{:3} - {:15} - {}"
.format(f['properties']['cartodb_id'],
f['properties']['author'],
f['geometry']['coordinates']))
ID - Author - Coords ********************************************* 187 - rmajadas - (-3.69063, 40.42526) 39 - CValdeMontes - (2.15899, 41.38879) 249 - python_granada - (-3.60667, 37.18817) 247 - sdelquin - (-16.25462, 28.46824) 105 - ipedrazas - (-0.12574, 51.50853) 206 - rafbermudez - (-3.69063, 40.42526) 192 - rafbermudez - (-3.69063, 40.42526) 9 - seattle113 - (-4.52406, 42.00955) 35 - d1eg0_garc1a - (2.15899, 41.38879) 99 - ipedrazas - (-0.12574, 51.50853)
import folium
basemap = r'http://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}.png'
map = folium.Map(location=[39.5,-2.5],
zoom_start=6, width=960, height=600,
tiles=basemap, attr='OpenStreetMap and Twitter')
for f in source:
x,y = f['geometry']['coordinates']
map.simple_marker(
[y,x], #lat/lon!!!
popup=f['properties']['body'])
map
We can use the Well Known Text format to define a Shapely geometry.
from shapely.wkt import loads
pycones = loads("POINT (-0.346713 39.482767)")
pycones
" ".join([ atr for atr in dir(pycones) if atr[0] != '_'])
'almost_equals area array_interface array_interface_base boundary bounds buffer centroid contains convex_hull coords covers crosses ctypes difference disjoint distance empty envelope equals equals_exact geom_type geometryType has_z impl interpolate intersection intersects is_closed is_empty is_ring is_simple is_valid length overlaps project relate relate_pattern representative_point simplify svg symmetric_difference to_wkb to_wkt touches type union within wkb wkb_hex wkt x xy y z'
print('pycones {}\r\n'.format('is valid'
if pycones.is_valid else 'is not valid'))
print('WKT: {}\r\n'.format(pycones.wkt))
print('SVG: {}\r\n'.format(pycones.svg()))
pycones is valid WKT: POINT (-0.346713 39.482767) SVG: <circle cx="-0.346713" cy="39.482767" r="3.0" stroke="#555555" stroke-width="1.0" fill="#66cc99" opacity="0.6" />
We cannot create a buffer of 100km around a geodetic point
We need to compute the buffer on a projected CRS
Shapely provides a method to allow using pyproj with any geometry
import pyproj
from functools import partial
project = partial(
pyproj.transform,
pyproj.Proj(init='epsg:4258'),
pyproj.Proj(init='epsg:25830')
)
project_inv = partial(
pyproj.transform,
pyproj.Proj(init='epsg:25830'),
pyproj.Proj(init='epsg:4326')
)
import shapely.ops
pycones_25830 = shapely.ops.transform(project,pycones)
print(pycones_25830)
POINT (728199.1076832865 4373712.985082146)
Once projected we are ready to define a Shapely Point object, compute the buffer and project it back to lon/lat coordinates
from shapely.geometry import Point
p = Point(pycones_25830)
pycones_buffer_25830 = p.buffer(100000)
pycones_buffer = shapely.ops.transform(project_inv,pycones_buffer_25830)
pycones_buffer.bounds
(-1.5089067784607482, 38.582592551915646, 0.8139462109021063, 40.38277685271952)
All ready to read the Shapefile, check if every feature is intersected by the buffer and fill a list of tuples with the distance and the tweet.
import fiona
from shapely.geometry import shape
tweets = []
with fiona.open('/tmp/tweets.shp','r') as source:
for f in source:
geometry = shape(f['geometry'])
if pycones_buffer.intersects(geometry):
geometry_25830 = shapely.ops.transform(project,geometry)
distance = geometry_25830.distance(pycones_25830)
tweets.append((distance,f))
print("{} tweets at less than 100km".format(len(tweets)))
118 tweets at less than 100km
WARNING: We need to transform to projected coordinates to get a distance in meters!
Let's sort the results and print the closest 10 tweets to PyConES venue
tweets = sorted(tweets, key=lambda tweet: tweet[0])
print("{:17} - {:>10} - {:>4}\r\n{:*^37}".format("Author","Distance","ID",""))
for distance,tweet in tweets[:10]:
print("{:17} - {:10.2f} - {:4}"
.format(tweet['properties']['author'],
distance,tweet['properties']['cartodb_id']))
Author - Distance - ID ************************************* ch_doig - 30.17 - 666 ch_doig - 58.11 - 583 ch_doig - 60.24 - 601 manuerux - 379.37 - 718 luiyo - 514.44 - 779 xurxosanz - 514.44 - 780 vero4ka_ru - 514.44 - 783 sdelquin - 917.01 - 806 andres_sanchis - 3009.98 - 209 OFNblog - 3009.98 - 262
Let's use again folium but rendering also the buffer and with different colour for tweets inside.
basemap = r'http://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}.png'
map = folium.Map(location=[39.5,-2.5],
zoom_start=6, width=960, height=600,
tiles=basemap, attr='OpenStreetMap and Twitter')
buffer_coords = [ [lat,lon] for lon,lat in pycones_buffer.boundary.coords]
map.line(locations= buffer_coords)
with fiona.open('/tmp/tweets.shp','r') as source:
for f in source:
geometry = shape(f['geometry'])
color = 'red' if pycones_buffer.intersects(geometry) else 'blue'
x,y = f['geometry']['coordinates']
map.simple_marker([y,x], marker_color=color,
popup=f['properties']['body'])
map
From major Open Source desktop GIS projects:
Slides and repo