#Scenario 7
"""
scenario7 picks a runway but then has a switching probability
lat_init initial latitude of plane
lon_init initial longitude of plane
hrs hours to run simulation at
sph samples per hour. in other words, how often we update and plot the plane's position.
cruise_speed cruising speed of Boeing 777, 905 km/hr
eq_deg_km degrees per km at the equator
max_hrs amount of time plane can be in air
switch_prob probability of switching destination
"""
def scenario7(lat_init,lon_init,hrs,sph,cruise_speed,eq_deg_km,max_hrs,switch_prob):
samples = hrs * sph
max_flying_dist = cruise_speed * max_hrs # km/hr * hrs = km
sample_flying_dist = cruise_speed * hrs
runway_select = 0 #haven't chosen runway yet, so assign 0
while runway_select == 0:
runway_draw = np.int(np.random.choice(len(master_777_runway_avg),size=1))
dest_lat = master_777_runway_avg[runway_draw][0]
dest_lon = master_777_runway_avg[runway_draw][1]
dist_to_dest = haversine(earth_radius,lat_init,lon_init,dest_lat,dest_lon)
if dist_to_dest < max_flying_dist: #if we can fly there in the amount of time
if dist_to_dest > sample_flying_dist: #if we can't get there yet, in the amount of time to run simualtion
runway_select = 1 #we've chosen our runway
circle_dest_radius = haversine(earth_radius,lat_init,lon_init,dest_lat,dest_lon)
circle_of_dest = make_circle(circle_dest_radius,360,lon_init,lat_init)
heading = np.around(get_heading(lat_init,lon_init,dest_lat,dest_lon))
#make a master list of destinations to return, in event it'll change
dest_lat_list = []
dest_lon_list = []
dest_lat_list.append(dest_lat)
dest_lon_list.append(dest_lon)
#------------------------------------------------------------------------------#
plane_lat = np.zeros(samples+1)
plane_lon = np.zeros(samples+1)
plane_lat[0] = lat_init
plane_lon[0] = lon_init
plane_dist = (cruise_speed/eq_deg_km)/sph
for i in xrange(samples):
new_locations_circle = make_circle(plane_dist,360,plane_lon[0],plane_lat[0])
new_position = make_vector(plane_dist,heading, plane_lon[i], plane_lat[i])
plane_lat[i+1] = new_position[0] #i+1 because we had the initial coords be at 0th index, before 1st step
plane_lon[i+1] = new_position[1]
#add probabilistic condition of switching at the end
draw = np.random.uniform(low=0.0,high=1.0,size=1)
if draw <= switch_prob:
runway_select = 0 #haven't chosen runway yet, so assign 0
while runway_select == 0:
runway_draw = np.int(np.random.choice(len(master_777_runway_avg),size=1))
dest_lat = master_777_runway_avg[runway_draw][0]
dest_lon = master_777_runway_avg[runway_draw][1]
dist_to_dest = haversine(earth_radius,lat_init,lon_init,dest_lat,dest_lon)
if dist_to_dest < max_flying_dist: #if we can fly there in the amount of time
if dist_to_dest > sample_flying_dist: #if we can't get there yet, in the amount of time to run simualtion
runway_select = 1 #we've chosen our runway
circle_dest_radius = haversine(earth_radius,lat_init,lon_init,dest_lat,dest_lon)
circle_of_dest = make_circle(circle_dest_radius,360,lon_init,lat_init)
heading = np.around(get_heading(lat_init,lon_init,dest_lat,dest_lon))
dest_lat_list.append(dest_lat)
dest_lon_list.append(dest_lon)
return plane_lat,plane_lon,dest_lat_list,dest_lon_list
#test function
#w/ 5% switching probability
testlats,testlons,testdestlat,testdestlon = scenario7(igariwaypoint[0],igariwaypoint[1],4,sph,cruise_speed,eq_deg_km,max_hrs,0.05)
#Set figure size
fig = plt.figure(figsize=[30,20])
#Setup Basemap
fig = Basemap(width=13000000,height=15000000,projection='lcc',resolution='c',lat_0=5,lon_0=100,suppress_ticks=True)
#Call figure function
figure_function(fig)
#Put other plots in here -- they don't have to be in any strict order necessarily.
x6,y6 = fig(testlons,testlats)
x7,y7 = fig(testdestlon,testdestlat)
fig.plot(x6,y6,'go',markersize=5,label='Plane Path')
fig.plot(x7,y7,'go',markersize=10,label='Target Runway')
#Make legend
legend = plt.legend(loc='lower left',fontsize=10,frameon=True,title='Legend',markerscale=1,prop={'size':15})
legend.get_title().set_fontsize('20')
#Add title
plt.title('Scenario 7', fontsize=30)
#Show below
plt.show()
#Scenario 8
"""
scenario8 picks a city but then has a switching probability
lat_init initial latitude of plane
lon_init initial longitude of plane
hrs hours to run simulation at
sph samples per hour. in other words, how often we update and plot the plane's position.
cruise_speed cruising speed of Boeing 777, 905 km/hr
eq_deg_km degrees per km at the equator
max_hrs amount of time plane can be in air
switch_prob probability of switching destination
"""
def scenario8(lat_init,lon_init,hrs,sph,cruise_speed,eq_deg_km,max_hrs,switch_prob):
samples = hrs * sph
max_flying_dist = cruise_speed * max_hrs # km/hr * hrs = km
sample_flying_dist = cruise_speed * hrs
city_draw = np.int(np.random.choice(len(asiatoptencities),size=None,p=fractions))
dest_lat = asiatoptencities['Lat'][city_draw]
dest_lon = asiatoptencities['Lon'][city_draw]
circle_dest_radius = haversine(earth_radius,lat_init,lon_init,dest_lat,dest_lon)
circle_of_dest = make_circle(circle_dest_radius,360,lon_init,lat_init)
heading = np.around(get_heading(lat_init,lon_init,dest_lat,dest_lon))
#make a master list of destinations to return, in event it'll change
dest_lat_list = []
dest_lon_list = []
dest_lat_list.append(dest_lat)
dest_lon_list.append(dest_lon)
#------------------------------------------------------------------------------#
plane_lat = np.zeros(samples+1)
plane_lon = np.zeros(samples+1)
plane_lat[0] = lat_init
plane_lon[0] = lon_init
plane_dist = (cruise_speed/eq_deg_km)/sph
for i in xrange(samples):
new_locations_circle = make_circle(plane_dist,360,plane_lon[0],plane_lat[0])
new_position = make_vector(plane_dist,heading, plane_lon[i], plane_lat[i])
plane_lat[i+1] = new_position[0]
plane_lon[i+1] = new_position[1]
#add probabilistic condition of switching at the end
draw = np.random.uniform(low=0.0,high=1.0,size=1)
if draw <= switch_prob:
city_draw = np.int(np.random.choice(len(asiatoptencities),size=None,p=fractions))
dest_lat = asiatoptencities['Lat'][city_draw]
dest_lon = asiatoptencities['Lon'][city_draw]
dest_lat_list.append(dest_lat)
dest_lon_list.append(dest_lon)
circle_dest_radius = haversine(earth_radius,lat_init,lon_init,dest_lat,dest_lon)
circle_of_dest = make_circle(circle_dest_radius,360,lon_init,lat_init)
heading = np.around(get_heading(lat_init,lon_init,dest_lat,dest_lon))
return plane_lat,plane_lon,dest_lat_list,dest_lon_list
#test function
#w/ 5% switching probability
testlats,testlons,testdestlat,testdestlon = scenario8(igariwaypoint[0],igariwaypoint[1],4,sph,cruise_speed,eq_deg_km,max_hrs,0.05)
#Set figure size
fig = plt.figure(figsize=[30,20])
#Setup Basemap
fig = Basemap(width=13000000,height=15000000,projection='lcc',resolution='c',lat_0=5,lon_0=100,suppress_ticks=True)
#Call figure function
figure_function(fig)
#Put other plots in here -- they don't have to be in any strict order necessarily.
x6,y6 = fig(testlons,testlats)
x7,y7 = fig(testdestlon,testdestlat)
fig.plot(x6,y6,'go',markersize=5,label='Plane Path')
fig.plot(x7,y7,'go',markersize=10,label='Target City')
#Make legend
legend = plt.legend(loc='lower left',fontsize=10,frameon=True,title='Legend',markerscale=1,prop={'size':15})
legend.get_title().set_fontsize('20')
#Add title
plt.title('Scenario 8', fontsize=30)
#Show below
plt.show()
#Scenario 9
"""
scenario9 picks either scen1 or scen2 a certain portion of the time.
lat_init initial latitude of plane
lon_init initial longitude of plane
hrs hours to run simulation at
sph samples per hour. in other words, how often we update and plot the plane's position.
cruise_speed cruising speed of Boeing 777, 905 km/hr
eq_deg_km degrees per km at the equator
max_hrs amount of time plane can be in air
scen1 the name of the first scenario to test (get from function names above)
scen2 the name of the other scenario to test (get from function names above)
dominant_prob probability of the dominant scenario occurring
"""
def scenario9(lat_init,lon_init,hrs,sph,cruise_speed,eq_deg_km,max_hrs,scen1,scen2,dominant_prob):
draw = np.random.uniform(low=0.0,high=1.0,size=1)
if draw <= dominant_prob:
plane_lat,plane_lon,dest_lat,dest_lon = scen1(lat_init,lon_init,hrs,sph,cruise_speed,eq_deg_km,max_hrs)
else:
plane_lat,plane_lon,dest_lat,dest_lon = scen2(lat_init,lon_init,hrs,sph,cruise_speed,eq_deg_km,max_hrs)
return plane_lat,plane_lon,dest_lat,dest_lon
n = 10
#initialize to store multiple simulations as list -> to ensure that both cities and runways are being chosen
testlat_list, testlon_list, testdestlat_list, testdestlon_list = [],[],[],[]
for i in xrange(n):
testlats,testlons,testdestlat,testdestlon = scenario9(igariwaypoint[0],igariwaypoint[1],4,sph,cruise_speed,eq_deg_km,max_hrs,scenario1,scenario2,0.5)
testlat_list.append(testlats)
testlon_list.append(testlons)
testdestlat_list.append(testdestlat)
testdestlon_list.append(testdestlon)
#Set figure size
fig = plt.figure(figsize=[30,20])
#Setup Basemap
fig = Basemap(width=13000000,height=15000000,projection='lcc',resolution='c',lat_0=5,lon_0=100,suppress_ticks=True)
#Call figure function
figure_function(fig)
#Put other plots in here -- they don't have to be in any strict order necessarily.
x6,y6 = fig(np.array(testlon_list),np.array(testlat_list))
x7,y7 = fig(np.array(testdestlon_list),np.array(testdestlat_list))
fig.plot(x6,y6,'go',markersize=5)
fig.plot(x7,y7,'go',markersize=10,label='Target Destination')
#Make legend
legend = plt.legend(loc='lower left',fontsize=10,frameon=True,title='Legend',markerscale=1,prop={'size':15})
legend.get_title().set_fontsize('20')
#Add title
plt.title('Scenario 9', fontsize=30)
#Show below
plt.show()
#Scenario 10
"""
scenario10 picks either scen1 or scen2 a certain portion of the time.
right now, have in mind scenario7 and scenario8.
lat_init initial latitude of plane
lon_init initial longitude of plane
hrs hours to run simulation at
sph samples per hour. in other words, how often we update and plot the plane's position.
cruise_speed cruising speed of Boeing 777, 905 km/hr
eq_deg_km degrees per km at the equator
max_hrs amount of time plane can be in air
scen1 the name of the first scenario to test (get from function names above)
scen2 the name of the other scenario to test (get from function names above)
dominant_prob probability of the dominant scenario occurring
"""
def scenario10(lat_init,lon_init,hrs,sph,cruise_speed,eq_deg_km,max_hrs,scen1,scen2,dominant_prob):
draw = np.random.uniform(low=0.0,high=1.0,size=1)
if draw <= dominant_prob:
plane_lat,plane_lon,dest_lat,dest_lon = scen1(lat_init,lon_init,hrs,sph,cruise_speed,eq_deg_km,max_hrs,dominant_prob)
else:
plane_lat,plane_lon,dest_lat,dest_lon = scen2(lat_init,lon_init,hrs,sph,cruise_speed,eq_deg_km,max_hrs,dominant_prob)
return plane_lat,plane_lon,dest_lat,dest_lon
n = 10
#initialize to store multiple simulations as list -> to ensure that both cities and runways are being chosen
testlat_list, testlon_list, testdestlat_list, testdestlon_list = [],[],[],[]
for i in xrange(n):
testlats,testlons,testdestlat,testdestlon = scenario10(igariwaypoint[0],igariwaypoint[1],4,sph,cruise_speed,eq_deg_km,max_hrs,scenario7,scenario8,0.5)
testlat_list.append(testlats)
testlon_list.append(testlons)
testdestlat_list.append(testdestlat)
testdestlon_list.append(testdestlon)
testdestlat_list2 = []
testdestlon_list2 = []
for i in xrange(len(testdestlat_list)):
for j in xrange(len(testdestlat_list[i])):
testdestlat_list2.append(testdestlat_list[i][j])
for i in xrange(len(testdestlon_list)):
for j in xrange(len(testdestlon_list[i])):
testdestlon_list2.append(testdestlon_list[i][j])
#Set figure size
fig = plt.figure(figsize=[30,20])
#Setup Basemap
fig = Basemap(width=13000000,height=15000000,projection='lcc',resolution='c',lat_0=5,lon_0=100,suppress_ticks=True)
#Call figure function
figure_function(fig)
#Put other plots in here -- they don't have to be in any strict order necessarily.
x6,y6 = fig(np.array(testlon_list),np.array(testlat_list))
x7,y7 = fig(np.array(testdestlon_list2),np.array(testdestlat_list2))
fig.plot(x6,y6,'go',markersize=5)
fig.plot(x7,y7,'go',markersize=10,label='Target Destination')
#Make legend
legend = plt.legend(loc='lower left',fontsize=10,frameon=True,title='Legend',markerscale=1,prop={'size':15})
legend.get_title().set_fontsize('20')
#Add title
plt.title('Scenario 10', fontsize=30)
#Show below
plt.show()
"""
sim_gen generate simulations
inputs from other functions used in this function
---------------
lat_init initial latitude of plane (set to Igari Waypoint as default)
lon_init initial longitude of plane (set to Igari Waypoint as default)
hrs hours to run simulation at
sph samples per hour. in other words, how often we update and plot the plane's position.
(set to 60 by default)
cruise_speed cruising speed of Boeing 777, 905 km/hr (set to 905 by default)
eq_deg_km degrees per km at the equator (set to default of eq_deg_km = 111.32)
max_hrs amount of time plane can be in air (set to 6.5 hrs by default)
inputs specific to this function
---------------
n number of flights to simulate
dominant_prob probability of the dominant scenario occurring
scen1 the name of the first scenario to test (get from function names above)
scen2 the name of the other scenario to test (get from function names above)
scen1text text to label scen1 as (i.e. 'runway'). *pass in as string*
scen2text text to label scen2 as (i.e. 'city') *pass in as string*
"""
def sim_gen(n,dominant_prob,scen1,scen2,scen1text,scen2text,hrs,
eq_deg_km=111.32,max_hrs=6.5,cruise_speed=905,
sph=60,lat_init=igariwaypoint[0],lon_init=igariwaypoint[1]):
simdata = []
for i in xrange(n):
draw = np.random.uniform(low=0.0,high=1.0,size=1) #make uniform draw
if draw <= dominant_prob:
lats,lons,destLat,destLon = scen1(lat_init,lon_init,hrs,sph,cruise_speed,eq_deg_km,max_hrs)
tempDataToStore = [scen1text,lats,lons,destLat,destLon]
simdata.append(tempDataToStore)
else:
lats,lons,destLat,destLon = scen2(lat_init,lon_init,hrs,sph,cruise_speed,eq_deg_km,max_hrs)
tempDataToStore = [scen2text,lats,lons,destLat,destLon]
simdata.append(tempDataToStore)
return simdata
simdata = sim_gen(200,0.9,scenario1,scenario2,'runway','city',2)
"""
scenario_viz plots distribution of 2 option scenarios. right now, hardcoded for runway vs city.
simdata the simulated data
scen1text text to label scen1 as (i.e. 'runway'). *pass in as string*
scen2text text to label scen2 as (i.e. 'city') *pass in as string*
"""
def scenario_viz(simdata,scen1text,scen2text):
simtype = []
for i in xrange(len(simdata)):
simtypeI = simdata[i][0]
if simtypeI == scen1text:
simtype.append(0) #scen1text is 0
if simtypeI == scen2text:
simtype.append(1) #scen2text is 1
#Plot to make sure we generated the correct portion of runways vs. cities:
plt.figure()
plt.hist(simtype)
plt.title(scen1text +' vs.'+scen2text+' scenario ratio')
plt.ylabel('%')
plt.xlabel(scen1text +' (left) or '+scen2text+ ' (right)')
plt.show()
citysum = np.sum(simtype)
print "There are "+str(len(simtype)-citysum)+" "+ scen1text + " scenarios"
print "There are "+str(citysum)+" "+scen2text+ " scenarios"
scenario_viz(simdata,'runway','city')
There are 180 runway scenarios There are 20 city scenarios
"""
add_heading adds heading information to our simulated data
simdata the simulated data
"""
def add_heading(simdata):
for i in xrange(len(simdata)):
currentSim = simdata[i]
lats = simdata[i][1]
lons = simdata[i][2]
headingList = []
for j in xrange(1,len(lats)): #lats & lons are same length
startLat = lats[j-1]
startLon = lons[j-1]
endLat = lats[j]
endLon = lons[j]
heading = get_heading(startLat,startLon,endLat,endLon)
headingList.append(heading)
currentSim.append(headingList)
simdata[i] = currentSim
return currentSim
currentSim = add_heading(simdata)
"""
add_noise adds noise (error) to our simulated data
simdata the simulated data
currentSim the heading information to the simulated data
scen1text text to label scen1 as (i.e. 'runway') *pass in as string*
scen2text text to label scen2 as (i.e. 'city') *pass in as string*
"""
def add_noise(simdata,currentSim,scen1text,scen2text):
for i in xrange(len(simdata)):
# get the current simulation
currentSim = simdata[i]
headingList = currentSim[5]
# perturb more if scen2 (rarer scenario)
if currentSim[0] == scen1text:
headingListNew = [h*((100 + np.random.normal(0,0.2))/100) for h in headingList]
if currentSim[0] == scen2text:
headingListNew = [h*((100 + np.random.normal(0,2))/100) for h in headingList]
currentSim[5] = headingListNew
simdata[i] = currentSim
return simdata, currentSim
simdata, currentSim = add_noise(simdata,currentSim,'runway','city')
"""
hmm hidden markov model. our main hidden markov model function, from scikitlearn.
simdata our simulated data
n_components number of components
functions we use for indices
---------------
hrs hours to run simulation at
sph samples per hour. in other words, how often we update and plot the plane's position.
"""
def hmm(simdata,n_components,hrs,sph):
# Collect data into one column, getting differenced heading info
headingDifData = []
startIndices = []
for i in xrange(len(simdata)):
h = simdata[i][5]
headingDif = [ h[j] - h[j-1] for j in xrange(1,len(h)) ]
startIndices.append(len(headingDifData))
for h in headingDif:
headingDifData.append(h)
#print len(headingDifData)
#manipulate array via numpy's column_stack
# http://docs.scipy.org/doc/numpy/reference/generated/numpy.column_stack.html
X = np.column_stack([headingDifData])
# ------ Run Gaussian HMM ------
print "fitting to HMM and decoding ...",
# make an HMM instance and execute fit
model = GaussianHMM(n_components, "diag")
model.fit([X])
hidden_states = model.predict(X)
print 'mean of hidden states: '+ str(np.mean(hidden_states))
hiddenStatesList =[]
#define index ranges to separate simulations. for 2 hrs, 60 samples/hr
#it's every 119, since 60*2 120 but have 1 less heading pair change.
idx_range = sph*hrs-1
for i in xrange(len(simdata)):
startIdx = i*idx_range
endIdx = (i+1)*idx_range
predHiddenStates = [hidden_states[startIdx:endIdx]]
hiddenStatesList.append(predHiddenStates[0]) #append hidden states runway or city prediction for each simulation
#how many individual samples we have across all our simulations
print 'how many individual samples we have across all our simulations: '+str(len(hidden_states))
#how many simulations we have
print 'how many simulations we have: '+str(len(hiddenStatesList))
return simdata, hiddenStatesList
simdata, hiddenStatesList = hmm(simdata,2,2,60)
fitting to HMM and decoding ... mean of hidden states: 0.0896218487395 how many individual samples we have across all our simulations: 23800 how many simulations we have: 200
print "actual scenario is: ", simdata[0][0]
print "lats are:", simdata[0][1]
print "lons are:", simdata[0][2]
print "------------------------"
print "dest lat/lon is: "+str(simdata[0][3])+", "+str(simdata[0][4])
actual scenario is: city lats are: [ 6.93666667 6.80519619 6.67372571 6.54225523 6.41078475 6.27931426 6.14784378 6.0163733 5.88490282 5.75343234 5.62196186 5.49049138 5.3590209 5.22755042 5.09607994 4.96460946 4.83313898 4.7016685 4.57019802 4.43872754 4.30725706 4.17578658 4.0443161 3.91284562 3.78137514 3.64990466 3.51843418 3.3869637 3.25549322 3.12402274 2.99255226 2.86108178 2.7296113 2.59814082 2.46667034 2.33519985 2.20372937 2.07225889 1.94078841 1.80931793 1.67784745 1.54637697 1.41490649 1.28343601 1.15196553 1.02049505 0.88902457 0.75755409 0.62608361 0.49461313 0.36314265 0.23167217 0.10020169 -0.03126879 -0.16273927 -0.29420975 -0.42568023 -0.55715071 -0.68862119 -0.82009167 -0.95156215 -1.08303263 -1.21450311 -1.34597359 -1.47744408 -1.60891456 -1.74038504 -1.87185552 -2.003326 -2.13479648 -2.26626696 -2.39773744 -2.52920792 -2.6606784 -2.79214888 -2.92361936 -3.05508984 -3.18656032 -3.3180308 -3.44950128 -3.58097176 -3.71244224 -3.84391272 -3.9753832 -4.10685368 -4.23832416 -4.36979464 -4.50126512 -4.6327356 -4.76420608 -4.89567656 -5.02714704 -5.15861752 -5.290088 -5.42155849 -5.55302897 -5.68449945 -5.81596993 -5.94744041 -6.07891089 -6.21038137 -6.34185185 -6.47332233 -6.60479281 -6.73626329 -6.86773377 -6.99920425 -7.13067473 -7.26214521 -7.39361569 -7.52508617 -7.65655665 -7.78802713 -7.91949761 -8.05096809 -8.18243857 -8.31390905 -8.44537953 -8.57685001 -8.70832049 -8.83979097] lons are: [ 103.585 103.61801185 103.65101474 103.68400887 103.7169944 103.74997152 103.7829404 103.81590122 103.84885416 103.88179939 103.91473709 103.94766745 103.98059062 104.0135068 104.04641616 104.07931887 104.11221511 104.14510506 104.17798889 104.21086677 104.24373889 104.27660541 104.30946651 104.34232237 104.37517316 104.40801905 104.44086023 104.47369685 104.50652911 104.53935717 104.5721812 104.60500138 104.63781789 104.67063089 104.70344056 104.73624708 104.76905061 104.80185134 104.83464942 104.86744505 104.90023838 104.93302959 104.96581886 104.99860636 105.03139226 105.06417673 105.09695995 105.12974209 105.16252332 105.19530381 105.22808374 105.26086328 105.2936426 105.32642188 105.35920129 105.39198099 105.42476117 105.45754199 105.49032363 105.52310626 105.55589005 105.58867518 105.62146182 105.65425014 105.68704031 105.71983251 105.75262691 105.78542369 105.81822301 105.85102504 105.88382997 105.91663797 105.94944921 105.98226385 106.01508209 106.04790408 106.08073 106.11356004 106.14639435 106.17923312 106.21207652 106.24492472 106.2777779 106.31063623 106.34349989 106.37636905 106.40924388 106.44212457 106.47501129 106.50790421 106.5408035 106.57370936 106.60662194 106.63954142 106.67246799 106.70540182 106.73834308 106.77129196 106.80424862 106.83721326 106.87018603 106.90316713 106.93615673 106.969155 107.00216213 107.0351783 107.06820368 107.10123846 107.1342828 107.1673369 107.20040093 107.23347508 107.26655951 107.29965442 107.33275998 107.36587638 107.3990038 107.43214241 107.46529241 107.49845397 107.53162727] ------------------------ dest lat/lon is: -6.2, 106.8
"""
scen2_indices finds out which indices were the alternate rarer scenario we are trying to detect.
simdata our simulated data
scen2text text to label scen2 as (i.e. 'city') *pass in as string*
"""
def scen2_indices(simdata,scen2text):
actualcityscens = []
for i in xrange(len(simdata)):
if simdata[i][0] == scen2text:
actualcityscens.append(i)
print "indices of "+str(scen2text)+": "+str(actualcityscens)
return actualcityscens
actualcityscens = scen2_indices(simdata,'city')
indices of city: [0, 1, 12, 14, 31, 38, 44, 53, 80, 102, 103, 121, 137, 139, 140, 151, 164, 185, 193, 196]
"""
hmm_performance take indices of the actual rare events, and see whether HMM predicted them to be rare or the
other, more common scenario.
hiddenStatesList list of all of the HMM guesses for every sample of every flight simulation
actualcityscens the indices of the rare scenario
"""
def hmm_performance(hiddenStatesList,actualcityscens):
return [hiddenStatesList[i] for i in actualcityscens]
guess = hmm_performance(hiddenStatesList,actualcityscens)
guess
[array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0]), array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]), array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0]), array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0]), array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]), array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]
"""
scen2_parameters returns lat,lon and dest lat/lon of the rare scenarios
for example, cities if runway/city and 90%/10%
simdata the simulated data
actualcityscens indices of the rare scenario
"""
def scen2_parameters(simdata,actualcityscens):
citylats = [simdata[i][1] for i in actualcityscens]; #print citylats
citylons = [simdata[i][2] for i in actualcityscens]; #print citylons
citydestlat = [simdata[i][3] for i in actualcityscens]; #print citydestlat
citydestlon = [simdata[i][4] for i in actualcityscens]; #print citydestlon
#convert to numpy arrays for plotting purposes
citylons = np.array(citylons)
citylats = np.array(citylats)
citydestlat = np.array(citydestlat)
citydestlon = np.array(citydestlon)
return citylons,citylats,citydestlat,citydestlon
citylons,citylats,citydestlat,citydestlon = scen2_parameters(simdata,actualcityscens)
print len(citylats[0])
print len(guess[0])
121 119
x,y = citylats.shape
#print x,y
#create new array 2 less than originals since we're dropping first and last elements
citylons2 = np.zeros((x,y-1))
citylats2 = np.zeros((x,y-1))
print citylons2.shape
citylons3 = np.zeros((x,y-2))
citylats3 = np.zeros((x,y-2))
print citylons3.shape
(20L, 120L) (20L, 119L)
# iterate through lats and lons and drop first & last entries
for i in xrange(len(citylats2)): #for each of the city scenario simulations except the end
citylons2[i] = citylons[i][:-1]
citylats2[i] = citylats[i][:-1]
for i in xrange(len(citylats3)):
citylons3[i] = citylons2[i][1:]
citylats3[i] = citylats2[i][1:]
#print citylons3.shape
#print citylats3.shape
#latitudes
citylats3_city = []
citylats3_runway = []
for i in xrange(len(citylats3)):
for j in xrange(len(citylats3[i])):
if guess[i][j] == 0: #if HMM predicts it's a runway scenario
citylats3_runway.append(citylats3[i][j])
if guess[i][j] == 1: #if HMM predicts it's a city scenario
citylats3_city.append(citylats3[i][j])
citylats3_city = np.array(citylats3_city)
citylats3_runway = np.array(citylats3_runway)
#longitudes
citylons3_city = []
citylons3_runway = []
for i in xrange(len(citylons3)):
for j in xrange(len(citylons3[i])):
if guess[i][j] == 0: #if HMM predicts it's a runway scenario (or the dominant scenario)
citylons3_runway.append(citylons3[i][j])
if guess[i][j] == 1: #if HMM predicts it's a city scenario (or the rare scenario)
citylons3_city.append(citylons3[i][j])
citylons3_city = np.array(citylons3_city)
citylons3_runway = np.array(citylons3_runway)
print "samples identified as city scenarios: ", len(citylats3_city)
print "samples misidentified as runway scenarios:", len(citylats3_runway)
#print len(citylons3_city)
#print len(citylons3_runway)
print "When it *is* a city scenario, the model is right %r percent of the time." %np.around(100*np.float(len(citylats3_city))/(len(citylats3_runway)+len(citylats3_city)))
samples identified as city scenarios: 1958 samples misidentified as runway scenarios: 422 When it *is* a city scenario, the model is right 82.0 percent of the time.
What this means is that when it's actually a city... the HMM is right more often than not.
That took longer than expected, but it worked. Now we plot some of the paths on a map, and color code accordingly if they're a 0 or 1:
¶#Set figure size
fig = plt.figure(figsize=[30,20])
#Setup Basemap
fig = Basemap(width=13000000,height=15000000,projection='lcc',resolution='c',lat_0=5,lon_0=100,suppress_ticks=True)
#Call figure function
figure_function(fig)
#Put other plots in here -- they don't have to be in any strict order necessarily.
x6,y6 = fig(citylons3_city,citylats3_city)
x7,y7 = fig(citylons3_runway,citylats3_runway)
x8,y8 = fig(citydestlon,citydestlat)
fig.plot(x6,y6,'go',markersize=5,alpha=0.8,label='HMM Correct - City') #alpha adjusts opacity. 0 is nothing. 1 is solid.
fig.plot(x7,y7,'ro',markersize=5,alpha=0.3,label='HMM Wrong - Runway') #adjust opacity since red darker than green
fig.plot(x8,y8,'go',markersize=10,label='Target City')
#Make legend
legend = plt.legend(loc='lower left',fontsize=10,frameon=True,title='Legend',markerscale=1,prop={'size':15})
legend.get_title().set_fontsize('20')
#Add title
plt.title('Actual City Scenarios vs. HMM Prediction', fontsize=30)
#Show below
plt.show()
allguess = [hiddenStatesList[i] for i in xrange(len(simdata))]
allcitylats = [simdata[i][1] for i in xrange(len(simdata))]
allcitylons = [simdata[i][2] for i in xrange(len(simdata))]
allcitydestlat = [simdata[i][3] for i in xrange(len(simdata))]
allcitydestlon = [simdata[i][4] for i in xrange(len(simdata))]
#convert for plotting purposes
allcitylons = np.array(allcitylons)
allcitylats = np.array(allcitylats)
allcitydestlat = np.array(allcitydestlat)
allcitydestlon = np.array(allcitydestlon)
allguess = np.array(allguess)
print len(allcitylats)
200
print allguess.shape
(200L, 119L)
x,y = allcitylats.shape
#create new array 2 less than originals since we're dropping first and last elements
allcitylons2 = np.zeros((x,y-1))
allcitylats2 = np.zeros((x,y-1))
allcitylons3 = np.zeros((x,y-2))
allcitylats3 = np.zeros((x,y-2))
# iterate through lats and lons and drop first & last entries
for i in xrange(len(allcitylats2)): #for each of the city scenario simulations except the end
allcitylons2[i] = allcitylons[i][:-1]
allcitylats2[i] = allcitylats[i][:-1]
for i in xrange(len(allcitylats3)):
allcitylons3[i] = allcitylons2[i][1:]
allcitylats3[i] = allcitylats2[i][1:]
#latitudes
allcitylats3_right = []
allcitylats3_wrong = []
for i in xrange(len(allcitylats3)):
for j in xrange(len(allcitylats3[i])):
if allguess[i][j] == 0 and simdata[i][0] == 'runway': #if HMM predicts it's a runway scenario and true
allcitylats3_right.append(allcitylats3[i][j])
if allguess[i][j] == 1 and simdata[i][0] == 'city': #if HMM predicts it's a city scenario and true
allcitylats3_right.append(allcitylats3[i][j])
else:
allcitylats3_wrong.append(allcitylats3[i][j])
allcitylats3_right = np.array(allcitylats3_right)
allcitylats3_wrong = np.array(allcitylats3_wrong)
#longitudes
allcitylons3_right = []
allcitylons3_wrong = []
for i in xrange(len(allcitylons3)):
for j in xrange(len(allcitylons3[i])):
if allguess[i][j] == 0 and simdata[i][0] == 'runway': #if HMM predicts it's a runway scenario and true
allcitylons3_right.append(allcitylons3[i][j])
if allguess[i][j] == 1 and simdata[i][0] == 'city': #if HMM predicts it's a city scenario and true
allcitylons3_right.append(allcitylons3[i][j])
else:
allcitylons3_wrong.append(allcitylons3[i][j])
allcitylons3_right = np.array(allcitylons3_right)
allcitylons3_wrong = np.array(allcitylons3_wrong)
right = np.float(len(allcitylats3_right))
print "number times HMM right: ", np.int(right)
#print len(allcitylons3_right)
wrong = np.float(len(allcitylats3_wrong))
print "number times HMM wrong: ", np.int(wrong)
#print len(allcitylons3_wrong)
percentright = 100*right/(right+wrong)
print "In an effort to detect most of the city scenarios when they occur, the model is right %r percent of the time." %np.around(percentright)
number times HMM right: 23203 number times HMM wrong: 21842 In an effort to detect most of the city scenarios when they occur, the model is right 52.0 percent of the time.
#Set figure size
fig = plt.figure(figsize=[30,20])
#Setup Basemap
fig = Basemap(width=13000000,height=15000000,projection='lcc',resolution='c',lat_0=5,lon_0=100,suppress_ticks=True)
#Call figure function
figure_function(fig)
#Put other plots in here -- they don't have to be in any strict order necessarily.
x6,y6 = fig(allcitylons3_right,allcitylats3_right)
x7,y7 = fig(allcitylons3_wrong,allcitylats3_wrong)
x8,y8 = fig(allcitydestlon,allcitydestlat)
fig.plot(x6,y6,'go',markersize=5,alpha=0.9,label='HMM -- Right') #alpha adjusts opacity. 0 is nothing. 1 is solid.
fig.plot(x7,y7,'ro',markersize=5,alpha=0.1,label='HMM -- Wrong')
fig.plot(x8,y8,'go',markersize=10,label='Target Destination')
#Make legend
legend = plt.legend(loc='lower left',fontsize=10,frameon=True,title='Legend',markerscale=1,prop={'size':15})
legend.get_title().set_fontsize('20')
#Add title
plt.title('All Simulations vs. HMM Predictions', fontsize=30)
#Show below
plt.show()
What this means is that as it stands, our HMM is great at identifying the rare city scenarios when they do occur, but it also raises many false alarms. So, this has only limited effectiveness, as if it picked everything to be runways, it'd be right 90% of the time but miss all of the scenarios it wanted it to find. Instead, it picks out most cities, and is right more than 10% of the time if it just picked everything to be cities -- over half the time, in fact -- but is overzealous in identifying city scenarios compared to a runway. (Of course, some runways are near cities so practically they're the same thing, even if in our model they're not -- but still...)
Above, we see the limitation of our HMM. We can be right when it counts...but then we'll be wrong when it doesn't count, more than we'd like to. The main way we detect the rare city is via higher variance in one scenario vs. another -- which is a plausible assumption about the scenarios, but is not a more tailor-made emissions function to describe the nuances of how the scenarios differ from each other. We pick up a lot of false instances of runways as cities (when they're in fact runways), to ensure we get and recognize most of the city scenarios when they do in fact occur.
¶Finally, we look at a separate problem that stems from our noisy data: reducing the noise of the flight path, which could aid in the detection of where the plane is heading (in a straight line). The key thing about both Scenario 1 and Scenario 2 that we have chosen as examples, is that the heading and velocity will be maintained throughout the scenario. It's not in the noisy data, but we should be able to tune our model to get it close to zero as possible. To do this we take the data from the simulations we ran as our test case. We will create a Kalman Filter model and try to come up with the ideal path of the plane, inferred through a change in heading that's as close to zero as possible, based on adjusting our Kalman parameters.
(We have to create a model in order to apply the filter, which is perhaps why Kalman Filters are not used more often. This is not a blackbox method approach -- you really need to understand what is going on, to set it up.)
¶kalmansimdata = sim_gen(100,0.9,scenario1,scenario2,'runway','city',2)
kalmancurrentSim = add_heading(kalmansimdata)
# Heading Change
headingDifData = []
startIndices = []
for i in xrange(len(kalmansimdata)):
h = kalmansimdata[i][5]
headingDif = [ h[j] - h[j-1] for j in xrange(1,len(h)) ]
startIndices.append(len(headingDifData))
for h in headingDif:
headingDifData.append(h)
print "heading changes"
print "# of heading changes: ", len(headingDifData)
headingDifData[0:10]
heading changes # of heading changes: 11900
[-0.0059499962516156302, -0.0060313425897220441, -0.0061127225129098406, -0.0061941364836570756, -0.0062755849487530213, -0.0063570683475404621, -0.006438587145908059, -0.0065201417769458203, -0.006601732700175944, -0.0066833603676741404]
kalmansimdata_err, kalmancurrentSim_err = add_noise(kalmansimdata,kalmancurrentSim,'runway','city')
headingDifData_err = []
startIndices_err = []
for i in xrange(len(kalmansimdata_err)):
h_err = kalmansimdata_err[i][5]
headingDif_err = [ h_err[j] - h_err[j-1] for j in xrange(1,len(h_err)) ]
startIndices_err.append(len(headingDifData_err))
for h_err in headingDif_err:
headingDifData_err.append(h_err)
print "heading changes w/ error"
print "# of heading changes: ", len(headingDifData_err)
headingDifData_err[0:10]
heading changes w/ error # of heading changes: 11900
[0.68562717552993035, -0.71829155424899227, -0.6360958819886946, 0.75619909293209275, -0.37927622502769509, -0.95258339006062442, 0.44235340684679159, 0.46924857015511634, 0.20350388374629347, 1.6054973788712346]
fig = plt.figure(figsize=[10,6])
plt.plot(headingDifData, 'ro', label='heading change')
plt.title('heading change, all simulations, pre-error add', fontsize=20)
legend = plt.legend()
plt.xlabel('sample #', fontsize=15)
plt.ylabel('heading change',fontsize=15)
plt.xlim([0,len(headingDifData)])
plt.show()
fig = plt.figure(figsize=[10,6])
plt.plot(headingDifData_err, 'bo', label='heading changes, observed values')
plt.plot(headingDifData, 'ro', label='heading change, true values')
plt.title('heading change due to error, all simulations', fontsize=20)
legend = plt.legend()
plt.xlabel('sample #', fontsize=15)
plt.ylabel('heading change',fontsize=15)
plt.xlim([0,len(headingDifData)])
plt.show()
Now we write our Kalman Filter function. The Kalman Filter is a predictive-corrective algorithm. We predict what the future will be, and then compare and adjust. The design pattern has two main steps: the Predictive Step and the Update Step (which are sometimes called slightly different names.)
The predict step is the first two lines which correspond to the first two lines of the 'for' loop. The first line updates the linear model (or, rather, predict the next step of the linear model.) The second line predicts the next covariance matrix. Next, comes the measurement update step, where we calculate the relevant variables on the fly at every iteration.
¶"""
kalman Kalman Filter model (sometimes called scalar/linear)
n number of steps
initial conditions
----------------------------------
init_state initial conditions estimate, state (choose) - change in heading. is [position,velocity].
init_covar initial conditions estimate, covariance (choose) - the expected error in the initial state.
the matrices that define the model
----------------------------------
F state transition matrix - at each discrete time step we update the model by multiplying the
state vector by the state transition matrix.
Q process variance matrix - the system noise. noise in the actual process itself
(not measurement noise.)
H measurement matrix - describes how the system state, or what parts, are measured
by the measurements.
R measurement covariance matrix - set at the start, and is our anticipated accuracy of
measuring.
data the data to perform the Kalman Filter on
"""
def kalman(n,init_state,init_covar,F,Q,H,R,data):
#need one more than samples since the first one is the initial values
steps = n+1
#initialize coordinates
results = np.zeros((steps,2))
#assign first entry to be the starting point
results[0] = init_state
#where the state of the system starts
x_new = init_state.T
#how much we anticipate this first system state, is off by
p_new = init_covar.T
#assume velocity is zero, but make a 2 array to add velocity to data if we had a different example
measured = np.zeros((n,2))
for i in xrange(len(data)):
measured[i][0] = data[i] #assign our observed data to first entry of array
measured[i][1] = 0 #assign velocity to 0 since we don't use this component
for i in xrange(1,steps):
#Predict Step
#update the model to the new timestep (called newt)
#Predicted (a priori) state estimate -> http://en.wikipedia.org/wiki/Kalman_filter#Details
x_newt = np.dot(F,x_new) #needed to transpose this to make it work
#the predicted value of the covariance matrix
#Predicted (a priori) estimate covariance -> http://en.wikipedia.org/wiki/Kalman_filter#Details
p_newt = np.dot(np.dot(F,p_new),F.T) + Q
#Measurement Update Step
#Calculate residuals by comparing the measured value to the model prediction.
#Innovation or measurement residual -> http://en.wikipedia.org/wiki/Kalman_filter#Details
y = measured[i-1] - np.dot(H,x_newt)
#Residual covariance matrix
#Innovation (or residual) covariance -> http://en.wikipedia.org/wiki/Kalman_filter#Details
s_new = np.dot(np.dot(H,p_newt),H.T) + R
#Kalman Gain
#Optimal Kalman gain -> http://en.wikipedia.org/wiki/Kalman_filter#Details
k_new = np.dot(np.dot(p_newt,H.T),np.linalg.inv(s_new))
#Corrected State Estimate
#Updated (a posteriori) state estimate -> http://en.wikipedia.org/wiki/Kalman_filter#Details
x_new = x_newt + np.dot(k_new,y)
#Saving the result
results[i] = x_new.T[0]
#Corrected covariance estimate that will be used as part of the next step
#Updated (a posteriori) estimate covariance -> http://en.wikipedia.org/wiki/Kalman_filter#Details
p_new = np.dot( (np.eye(2)-np.dot(k_new,H)), p_newt)
return results
#Setup
#Pick this constant -- can change
covariance_guess = 0.0001 #adjust this to be super small (played with results to tune)
#State transition matrix
F = np.matrix(([1,1],[0,1])); print "F: "; print F
#Process variance matrix
Q = covariance_guess*np.matrix(([1./4,1./2],[1./2,1])); print "Q: "; print Q
#Measurement matrix
H = np.matrix(([1,0],[0,0])); print "H: "; print H
#Measurement variance matrix
R = 1*np.matrix(([1,0],[0,1])); print "R: "; print R
#Initial Conditions
#The state of our system is position and velocity, which is represented as a length 2 vector.
#The first component is position; the second is velocity, the change of the first component with respect to time.
init_state = np.matrix(([0,0])); print "init_state: "; print init_state
#Next is how much we think the initial state is off by.
init_covar = np.matrix(([1,0],[0,1])); print "init_covar: "; print init_covar
F: [[1 1] [0 1]] Q: [[ 2.50000000e-05 5.00000000e-05] [ 5.00000000e-05 1.00000000e-04]] H: [[1 0] [0 0]] R: [[1 0] [0 1]] init_state: [[0 0]] init_covar: [[1 0] [0 1]]
The Kalman Filter is based on a linear model of the system. At every time step we take the position and add the velocity (change in position with time) to it. We assume our time step is 1.
For a simple dataset, we'll guess [0,0] for our initial state because we'll expect to have no heading change. [1,0],[0,1], our initial covariance, says that we have very little confidence that our initial state is what it is. We could pass in all zeros if we were confident that our initial guess would be close to the reality. All zeros mean that we have complete confidence in our measurements. Depending on context, we can start from a known location, in which case we would be more sure, or alternatively an unknown starting point which should converge over time to see the behavior we are examining.
¶#Run Kalman Filter on data
kf = kalman(len(headingDifData_err),init_state,init_covar,F,Q,H,R,headingDifData_err)
#Only keep 1st entry since we don't really have a control state in this example.
kf1 = np.zeros(len(kf))
for i in xrange(len(kf)):
kf1[i] = kf[i][0]
#Plot results
fig = plt.figure(figsize=[10,6])
plt.scatter(range(len(headingDifData_err)),headingDifData_err)
plt.plot(kf1,color='red')
plt.title('All heading changes', fontsize=20)
plt.xlabel('samples',fontsize=15)
plt.ylabel('value',fontsize=15)
plt.xlim([0,len(headingDifData)])
plt.show()
Our result is smoothed out almost like our original data. This may very well be since we have extreme variance in one type of rare scenario (city, in our case), whereas we are tuning our Kalman Filter for the more common scenario (runway). Recall that we know that there are two types of errors added -- one for city and one for runway.
Let's if this is the case by splitting up our runway and city scenarios. Ironically, if we make the result less smooth we are able to distinguish runway from city visually, even better.
¶#Split up runway and city scenarios
kalmansimdata_city = []
kalmansimdata_runway = []
for i in xrange(len(kalmansimdata_err)):
if kalmansimdata_err[i][0] == 'runway':
kalmansimdata_runway.append(kalmansimdata_err[i])
elif kalmansimdata_err[i][0] == 'city':
kalmansimdata_city.append(kalmansimdata_err[i])
print "# of city scenarios: ", len(kalmansimdata_city)
print "# of runway scenarios: ", len(kalmansimdata_runway)
# of city scenarios: 16 # of runway scenarios: 84
# Heading Change
#Runway
headingDifData_runway = []
startIndices_runway = []
for i in xrange(len(kalmansimdata_runway)):
h_runway = kalmansimdata_runway[i][5]
headingDif_runway = [ h_runway[j] - h_runway[j-1] for j in xrange(1,len(h_runway)) ]
startIndices_runway.append(len(headingDifData_runway))
for h_runway in headingDif_runway:
headingDifData_runway.append(h_runway)
#City
headingDifData_city = []
startIndices_city = []
for i in xrange(len(kalmansimdata_city)):
h_city = kalmansimdata_city[i][5]
headingDif_city = [ h_city[j] - h_city[j-1] for j in xrange(1,len(h_city)) ]
startIndices_city.append(len(headingDifData_city))
for h_city in headingDif_city:
headingDifData_city.append(h_city)
headingDifData_runway[0:20]
[0.68562717552993035, -0.71829155424899227, -0.6360958819886946, 0.75619909293209275, -0.37927622502769509, -0.95258339006062442, 0.44235340684679159, 0.46924857015511634, 0.20350388374629347, 1.6054973788712346, -1.4408957787704821, -0.071000980427072591, 1.088632477143392, -1.9005634710284767, 0.97018604527829666, 0.62885014822057883, -1.8574597701306743, 0.08263863336594568, 1.4517250487336923, -1.0146679033302917]
headingDifData_city[0:20]
[-0.1099432387491035, 1.8230157554691289, -0.91584992395443976, -0.18363385861638193, -0.84367125259485221, 0.3924123796650818, -0.00070315832054035354, 0.80330401767336923, -0.12189271271410718, -0.82546277948348035, 0.072262553244307526, 0.58615701256256614, -0.20963953198192087, -0.45313440950149442, 0.46746045385084756, 1.0758924386421782, -1.6754802953017176, -0.76357764472861334, 0.77881294225866782, 0.79346761065809801]
fig = plt.figure(figsize=[10,6])
plt.plot(headingDifData_city, 'ro', label='city heading change')
plt.plot(headingDifData_runway, 'bo', label='runway heading change')
plt.title('heading change, city vs. runway scenarios', fontsize=20)
legend = plt.legend()
plt.xlabel('sample #', fontsize=15)
plt.ylabel('heading change',fontsize=15)
plt.show()
#make test data
testdata = []
xaxis = range(100)
for i in xaxis:
#We made an arbitrary function below that's random to test out Kalman Filter on unrelated material
#to show that the implementation does not just depend on our sitation, if you adjust the starting parameters
testdata.append(0.5*i + np.random.uniform(low=1.0,high=3.0,size=1)*np.random.uniform(low=-2.0,high=2.0,size=1))
fig = plt.figure(figsize=[10,6])
plt.scatter(xaxis,testdata)
plt.title('test data', fontsize=20)
plt.xlabel('samples',fontsize=15)
plt.ylabel('value',fontsize=15)
plt.show()
#Setup
#Pick this constant -- can change
covariance_guess = 0.01 #in this case, a smaller starting number will give a more linear look to our result
#we can pump this number down, but leaving as is because this a good naive first guess
#State transition matrix
F = np.matrix(([1,1],[0,1])); print "F: "; print F
#Process variance matrix
Q = covariance_guess*np.matrix(([1./4,1./2],[1./2,1])); print "Q: "; print Q
#Measurement matrix
H = np.matrix(([1,0],[0,0])); print "H: "; print H
#Measurement variance matrix
R = 5*np.matrix(([1,0],[0,1])); print "R: "; print R #we pick this too -- 4 or 5 works better than 1 as a multiplier
#This initial guess doesn't matter too much in the end in this example
init_state = np.matrix(([0,0])); print "initcond1: "; print init_state
init_covar = np.matrix(([100,0],[0,100])); print "initcond2: "; print init_covar
#could be [10,0] or [1000,0], etc. This indicates we don't have much confidence in our starting state
F: [[1 1] [0 1]] Q: [[ 0.0025 0.005 ] [ 0.005 0.01 ]] H: [[1 0] [0 0]] R: [[5 0] [0 5]] initcond1: [[0 0]] initcond2: [[100 0] [ 0 100]]
thetest = kalman(100,initcond1,initcond2,F,Q,H,R,testdata)
thetest1 = np.zeros(len(thetest))
for i in xrange(len(thetest)):
thetest1[i] = thetest[i][0]
fig = plt.figure(figsize=[10,6])
plt.scatter(xaxis,testdata)
plt.plot(thetest1,color='red')
plt.title('Kalman Filter on Test Data', fontsize=20)
plt.xlabel('samples',fontsize=15)
plt.ylabel('value',fontsize=15)
plt.show()