IRuby.html ''
require 'rgeo'
require 'rgeo-geojson'
require 'dbscan'
require 'nyaplot'
Nyaplot.init_iruby
geomstr = '{"type":"FeatureCollection","features":[{"type":"Feature","properties":{"user_id":638},"geometry":{"type":"Polygon","coordinates":[[[-73.98620970547199,40.7356342514617],[-73.98627072572708,40.735547874977094],[-73.98632504045963,40.73557226364293],[-73.98622445762157,40.73570995781772],[-73.9861835539341,40.73569268254945],[-73.98621775209902,40.735640856717666]]]}},{"type":"Feature","properties":{"user_id":666},"geometry":{"type":"Polygon","coordinates":[[[-73.98620769381522,40.73563526765495],[-73.9862660318613,40.735547874977094],[-73.98632504045963,40.735570739351566],[-73.98622579872608,40.73570944972167],[-73.98618154227734,40.73569217445325],[-73.98621775209902,40.73563933242788]]]}},{"type":"Feature","properties":{"session_id":"79e7ee062a9e0333926e3e1fdc3e92db"},"geometry":{"type":"Polygon","coordinates":[[[-73.98632369935513,40.735570739351566],[-73.98622512817383,40.73570944972167],[-73.98618154227734,40.73569014206842],[-73.98621909320354,40.735640856717666],[-73.98620970547199,40.73563526765495],[-73.98627005517483,40.73554889117169]]]}},{"type":"Feature","properties":{"session_id":"3d3003b26bb6b2f3b9577924b9ed5e0e"},"geometry":{"type":"Polygon","coordinates":[[[-73.98621842265129,40.7356423810074],[-73.98620903491974,40.73563577575159],[-73.98627139627934,40.735547874977094],[-73.98632436990738,40.735571755545806],[-73.98622579872608,40.73570995781772],[-73.98618087172508,40.735689633972214]]]}},{"type":"Feature","properties":{"user_id":596},"geometry":{"type":"Polygon","coordinates":[[[-73.98626938462257,40.73554889117167],[-73.98632369935513,40.735572771740024],[-73.98622445762157,40.73570894162559],[-73.98618154227734,40.73569065016463],[-73.98621775209902,40.735640856717666],[-73.98620836436749,40.735634251461676]]]}},{"type":"Feature","properties":{"session_id":"0afaf74383ce51aceba02fc49ce5a9e3"},"geometry":{"type":"Polygon","coordinates":[[[-73.98621775209902,40.73563984052446],[-73.98620836436749,40.73563272717173],[-73.98626938462257,40.735550415463514],[-73.98632235825062,40.73557124744871],[-73.98622360456956,40.73570641325812],[-73.98618768252459,40.73568957578454]]]}},{"type":"Feature","properties":{"user_id":538},"geometry":{"type":"Polygon","coordinates":[[[-73.98632571101189,40.735571755545806],[-73.98622378706932,40.73570995781772],[-73.98618288338184,40.73569268254945],[-73.98621775209902,40.73564034862108],[-73.9862110465765,40.7356362838482],[-73.98627005517483,40.735550923560815]]]}},{"type":"Feature","properties":{"user_id":580},"geometry":{"type":"Polygon","coordinates":[[[-73.98632436990738,40.73557124744871],[-73.98626066744328,40.7356581319994],[-73.98625999689102,40.7356581319994],[-73.98620903491974,40.735634759558316],[-73.98626804351805,40.735547874977094]]]}},{"type":"Feature","properties":{"user_id":580},"geometry":{"type":"Polygon","coordinates":[[[-73.98626133799553,40.7356581319994],[-73.98622579872608,40.73570944972167],[-73.98618154227734,40.73569166635704],[-73.98621842265129,40.73563984052446]]]}},{"type":"Feature","properties":{"user_id":548},"geometry":{"type":"Polygon","coordinates":[[[-73.98620970547199,40.73563475955834],[-73.98627005517483,40.73554990736624],[-73.98632369935513,40.735571755545806],[-73.98622360456956,40.73570641325812],[-73.9861848950386,40.735689633972214],[-73.98621842265129,40.735640856717666]]]}},{"type":"Feature","properties":{"session_id":"53056025663f6d6564a39975971cb87c"},"geometry":{"type":"Polygon","coordinates":[[[-73.98621909320354,40.735638316234656],[-73.98620836436749,40.7356362838482],[-73.98620769381522,40.73563577575159],[-73.98627005517483,40.73554939926897],[-73.98632302880287,40.73557023125444],[-73.98622360456956,40.73570641325812],[-73.98617953062057,40.735689633972214]]]}}]}'
geocollection = RGeo::GeoJSON.decode(geomstr, :json_parser => :json)
def parse_geojson(json)
RGeo::GeoJSON.decode(json, :json_parser => :json)
end
geocollection.first.geometry
def get_centroid(poly_feature)
return if (poly_feature.geometry.geometry_type.type_name != "Polygon")
c = poly_feature.geometry.centroid
return [c.x, c.y]
end
centroid = get_centroid(geocollection.first)
def get_all_centroids(geom)
centroids = {}
geom.each_with_index do |poly,index|
next if (poly.geometry.geometry_type.type_name != "Polygon")
centroids[index] = get_centroid(poly)
end
return centroids
end
centroids = get_all_centroids(geocollection)
plot = Nyaplot::Plot.new
plot.width(400)
plot.height(400)
plot.zoom(true)
plot.rotate_x_label(-60)
points_x = centroids.map { |p| p[1][0] }
points_y = centroids.map { |p| p[1][1] }
df = Nyaplot::DataFrame.new({x:points_x,y:points_y})
# add some padding
xmin = points_x.min - 1e-5
xmax = points_x.max + 1e-5
ymin = points_y.min - 1e-5
ymax = points_y.max + 1e-5
plot.xrange([xmin,xmax])
plot.yrange([ymin,ymax])
# end padding
sc = plot.add_with_df(df, :scatter, :x, :y)
plot.show
dists = []
done = {}
centroids.each_with_index do |cc1,i|
centroids.each_with_index do |cc2,j|
c1 = cc1[1]
c2 = cc2[1]
dists.push({:dist=>Math.hypot(c1[0]-c2[0],c1[1]-c2[1]),:from=>i,:to=>j,:from_centroid=>c1,:to_centroid=>c2}) if (c1 != c2 && !done[[c2,c1]])
done[[c1,c2]] = true
end
end
dists = dists.sort_by!{|k| k[:dist]}
dist_df = Nyaplot::DataFrame.new(dists)
def cluster_centroids(centroids, epsilon=1.8e-06, min_points=2)
dbscan = DBSCAN( centroids.map{|c| c[1]}, :epsilon => epsilon, :min_points => min_points, :distance => :euclidean_distance )
return dbscan.results.select{|k,v| k != -1} # omit the non-cluster
end
centroid_clusters = cluster_centroids(centroids)
def plot_clusters(clusters)
plot = Nyaplot::Plot.new
plot.width(300)
plot.height(400)
plot.zoom(true)
plot.rotate_x_label(-60)
pts = clusters.map{|c| c[1]}.flatten(1)
# add some padding
xmin = pts.map {|p| p[0]}.min - 1e-5
xmax = pts.map {|p| p[0]}.max + 1e-5
ymin = pts.map {|p| p[1]}.min - 1e-5
ymax = pts.map {|p| p[1]}.max + 1e-5
plot.xrange([xmin,xmax])
plot.yrange([ymin,ymax])
# now plot
clusters.each do |cluster|
if cluster[0] != -1 # ignore cluster -1 because not enough points
cluster_x = cluster[1].map { |c| c[0] }
cluster_y = cluster[1].map { |c| c[1] }
names = cluster[1].map { |c| cluster[0] }
df = Nyaplot::DataFrame.new({x:cluster_x,y:cluster_y,cluster:names})
sc = plot.add_with_df(df, :scatter, :x, :y)
sc.tooltip_contents([:cluster])
color = "#"+ "%06x" % (rand * 0xffffff)
sc.color(color)
end
end
plot.show
return plot
end
plot = plot_clusters(centroid_clusters)
plot.show
# given a list of centroids (lon,lat), find their poly's index in the centroid list (index => lon,lat)
def get_polys_for_centroid_cluster(cluster, centroids, original_polys)
polys = []
cluster.each do |cl|
index = centroids.select {|k,v| v == cl}.keys.first
polys.push(original_polys[index]) if index != -1
end
return polys
end
cluster_polygons = get_polys_for_centroid_cluster(centroid_clusters[0], centroids, geocollection)
def get_points(poly_feature)
geom = poly_feature.geometry
return false if (geom.geometry_type.type_name != "Polygon")
pts = []
points = geom.exterior_ring.points
points.each do |point|
pts.push([point.x,point.y])
end
return pts
end
def plot_polys(polys)
plot = Nyaplot::Plot.new
plot.width(500)
plot.height(500)
plot.zoom(true)
plot.rotate_x_label(-60)
polys.each do |poly|
plot_poly(poly, plot)
end
plot.show
end
def plot_poly(poly, plot = nil)
showplot = false
if plot == nil
showplot = true
plot = Nyaplot::Plot.new
plot.width(500)
plot.height(500)
plot.zoom(true)
plot.rotate_x_label(-60)
end
points = get_points(poly)
points_x = points.map { |p| p[0] }
points_y = points.map { |p| p[1] }
df = Nyaplot::DataFrame.new({x:points_x,y:points_y})
sc = plot.add_with_df(df, :scatter, :x, :y)
color = "#"+ "%06x" % (rand * 0xffffff)
sc.color(color)
plot.show if showplot
end
plot_polys(cluster_polygons)
def get_all_poly_points(polys)
points = []
polys.each do |poly|
points.push(get_points(poly))
end
return points
end
cluster_poly_points = get_all_poly_points(cluster_polygons)
def cluster_points(points, epsilon=6e-06, min_points=2)
dbscan = DBSCAN( points.flatten(1), :epsilon => epsilon, :min_points => min_points, :distance => :euclidean_distance )
return dbscan.results.select{|k,v| k != -1} # omit the non-cluster
end
# exclude first item in each poly since it is same as last
unique_points = cluster_poly_points.map{|poly| poly[1..-1]}
vertex_clusters = cluster_points(unique_points)
plot = plot_clusters(vertex_clusters)
plot.show
class Array
def sum
inject(0.0) { |result, el| result + el }
end
def mean
sum / size
end
end
def get_mean_poly(clusters)
means = {}
clusters.each do |cluster|
next if cluster[0] == -1 # ignore cluster -1
lon = cluster[1].map {|c| c[0]}.mean
lat = cluster[1].map {|c| c[1]}.mean
means[cluster[0]] = [lon,lat]
end
return means
end
mean_poly = get_mean_poly(vertex_clusters)
# plot clusters with overlaid (yellow) mean points
plot = plot_clusters(vertex_clusters)
# add means
m_x = mean_poly.map { |m| m[1][0] }
m_y = mean_poly.map { |m| m[1][1] }
sc = plot.add(:scatter, m_x, m_y)
color = "#ffff00"
sc.color(color)
sc.shape('diamond')
plot.show
def validate_clusters(clusters, unique_points)
average = (unique_points.flatten.count.to_f / (unique_points.size * 2).to_f).round
return clusters.select{|k,v| k!=-1}.size >= average
end
validate_clusters(vertex_clusters, unique_points)
def find_connected_point(point, original_points)
original_points.each do |poly|
poly.each_with_index do |p,index|
return poly[index+1] if point === p
end
end
return
end
def find_cluster_for_point(point, clusters)
clusters.each do |cluster|
cluster[1].each do |p|
return cluster[0] if point === p && cluster[0] != -1
end
end
return
end
def connect_clusters(clusters, original_points)
connections = {}
# for each cluster
clusters.each do |cluster|
# for each point in cluster
if cluster[0] != -1 # exclude invalid cluster
cluster_votes = {} # to weigh connection popularity (diff pts might be connected to diff clusters)
cluster[1].each do |point|
# find original point connected to it
connection = find_connected_point(point, original_points)
connected_cluster = find_cluster_for_point(connection, clusters)
# if original point belongs to another cluster
if connected_cluster != nil && connected_cluster != cluster[0]
# vote for the cluster
cluster_votes[connected_cluster] = 0 if cluster_votes[connected_cluster] == nil
cluster_votes[connected_cluster] += 1
end
end
connections[cluster[0]] = cluster_votes.sort_by{|k, v| v}
next if connections[cluster[0]].size == 0
connections[cluster[0]] = connections[cluster[0]].reverse[0][0]
end
end
return connections
end
connections = connect_clusters(vertex_clusters, cluster_poly_points)
def sort_connections(connections)
# does some simple check for non-circularity
sorted = []
seen = {}
as_list = connections.select{|k,v| k}
done = false
first = as_list.first[0]
from = first
while !done do
to = connections[from]
done = true if seen[to] || to == nil || to.size == 0
seen[to] = true
from = to
sorted.push(to)
done = true if seen.size == connections.size
end
return nil if seen.size != connections.size
return sorted
end
# testing sort function
sort_connections(connections)
def connect_mean_poly(mean_poly, connections)
connected = []
sorted = sort_connections(connections)
return nil if sorted == nil
sorted.each do |c|
connected.push([mean_poly[c][0], mean_poly[c][1]])
end
# for GeoJSON, last == first
first = sorted[0]
connected.push([mean_poly[first][0], mean_poly[first][1]])
return connected
end
final_polygon = connect_mean_poly(mean_poly, connections)
plot = plot_clusters(vertex_clusters)
m_x = final_polygon.map { |m| m[0] }
m_y = final_polygon.map { |m| m[1] }
sc = plot.add(:scatter, m_x, m_y)
color = "#ffff00"
sc.color(color)
sc.shape('diamond')
# add the MEAN POLYGON
final_polygon.each_with_index do |c, i|
next if i >= final_polygon.size-1
from = [ final_polygon[i][0], final_polygon[i+1][0] ]
to = [ final_polygon[i][1], final_polygon[i+1][1] ]
plot.add(:line, from, to)
end
plot.show
def calculate_polygonfix_consensus(geojson)
output = []
geom = parse_geojson(geojson)
centroids = get_all_centroids(geom)
centroid_clusters = cluster_centroids(centroids)
centroid_clusters.each do |ccluster|
next if ccluster[0] == -1
cluster = ccluster[1] # only the set of latlons
sub_geom = get_polys_for_centroid_cluster(cluster, centroids, geom)
next if sub_geom.size == 0
original_points = get_all_poly_points(sub_geom)
next if original_points == nil
unique_points = original_points.map{|poly| poly[1..-1]}
vertex_clusters = cluster_points(unique_points)
next if !validate_clusters(vertex_clusters, unique_points)
mean_poly = get_mean_poly(vertex_clusters)
next if mean_poly == {}
connections = connect_clusters(vertex_clusters, original_points)
next if connections == nil || connections == {}
poly = connect_mean_poly(mean_poly, connections)
next if poly == nil || poly.count == 0
output.push(poly)
end
return output
end
consensus = calculate_polygonfix_consensus(geomstr)
geo_json = {:type => "FeatureCollection", :features => consensus.map { |f| {:type => "Feature", :properties => { :id => 1 }, :geometry => { :type => "Polygon", :coordinates =>[f] } } } }.to_json
puts geo_json
IRuby.html ''