Notebook

Now we'll write 7th and 8th scenarios that do the cities or the runways ... but with probabilites of switching.¶

In [58]:

#Scenario 7
"""    
scenario7         picks a runway but then has a switching probability

lat_init          initial latitude of plane
lon_init          initial longitude of plane

hrs               hours to run simulation at
sph               samples per hour. in other words, how often we update and plot the plane's position.
cruise_speed      cruising speed of Boeing 777, 905 km/hr
eq_deg_km         degrees per km at the equator
max_hrs           amount of time plane can be in air

switch_prob       probability of switching destination
"""
def scenario7(lat_init,lon_init,hrs,sph,cruise_speed,eq_deg_km,max_hrs,switch_prob):
    
    samples = hrs * sph
    
    max_flying_dist = cruise_speed * max_hrs # km/hr * hrs = km
    
    sample_flying_dist = cruise_speed * hrs
    
    runway_select = 0 #haven't chosen runway yet, so assign 0
    while runway_select == 0:
        runway_draw = np.int(np.random.choice(len(master_777_runway_avg),size=1))

        dest_lat = master_777_runway_avg[runway_draw][0]
        dest_lon = master_777_runway_avg[runway_draw][1]

        dist_to_dest = haversine(earth_radius,lat_init,lon_init,dest_lat,dest_lon)
        
        if dist_to_dest < max_flying_dist: #if we can fly there in the amount of time
            if dist_to_dest > sample_flying_dist: #if we can't get there yet, in the amount of time to run simualtion
                runway_select = 1 #we've chosen our runway

    circle_dest_radius = haversine(earth_radius,lat_init,lon_init,dest_lat,dest_lon)   
    circle_of_dest = make_circle(circle_dest_radius,360,lon_init,lat_init)

    heading = np.around(get_heading(lat_init,lon_init,dest_lat,dest_lon))

    #make a master list of destinations to return, in event it'll change
    dest_lat_list = []
    dest_lon_list = []
    dest_lat_list.append(dest_lat)
    dest_lon_list.append(dest_lon)
    
    #------------------------------------------------------------------------------#
    
    plane_lat = np.zeros(samples+1)
    plane_lon = np.zeros(samples+1)  
    plane_lat[0] = lat_init
    plane_lon[0] = lon_init

    plane_dist = (cruise_speed/eq_deg_km)/sph 
    
    for i in xrange(samples):
        new_locations_circle = make_circle(plane_dist,360,plane_lon[0],plane_lat[0])
        new_position = make_vector(plane_dist,heading, plane_lon[i], plane_lat[i])
    
        plane_lat[i+1] = new_position[0] #i+1 because we had the initial coords be at 0th index, before 1st step
        plane_lon[i+1] = new_position[1]
        
        #add probabilistic condition of switching at the end
        draw = np.random.uniform(low=0.0,high=1.0,size=1)
        if draw <= switch_prob:
            
            runway_select = 0 #haven't chosen runway yet, so assign 0
            
            while runway_select == 0:
                runway_draw = np.int(np.random.choice(len(master_777_runway_avg),size=1))
        
                dest_lat = master_777_runway_avg[runway_draw][0]
                dest_lon = master_777_runway_avg[runway_draw][1]
        
                dist_to_dest = haversine(earth_radius,lat_init,lon_init,dest_lat,dest_lon)
                
                if dist_to_dest < max_flying_dist: #if we can fly there in the amount of time
                    if dist_to_dest > sample_flying_dist: #if we can't get there yet, in the amount of time to run simualtion
                        runway_select = 1 #we've chosen our runway
        
            circle_dest_radius = haversine(earth_radius,lat_init,lon_init,dest_lat,dest_lon)   
            circle_of_dest = make_circle(circle_dest_radius,360,lon_init,lat_init)

            heading = np.around(get_heading(lat_init,lon_init,dest_lat,dest_lon))

            dest_lat_list.append(dest_lat)
            dest_lon_list.append(dest_lon)
    
    return plane_lat,plane_lon,dest_lat_list,dest_lon_list

In [335]:

#test function
#w/ 5% switching probability
testlats,testlons,testdestlat,testdestlon = scenario7(igariwaypoint[0],igariwaypoint[1],4,sph,cruise_speed,eq_deg_km,max_hrs,0.05)

In [336]:

#Set figure size
fig = plt.figure(figsize=[30,20])
#Setup Basemap
fig = Basemap(width=13000000,height=15000000,projection='lcc',resolution='c',lat_0=5,lon_0=100,suppress_ticks=True)
#Call figure function
figure_function(fig)
#Put other plots in here -- they don't have to be in any strict order necessarily.
x6,y6 = fig(testlons,testlats)
x7,y7 = fig(testdestlon,testdestlat)
fig.plot(x6,y6,'go',markersize=5,label='Plane Path')
fig.plot(x7,y7,'go',markersize=10,label='Target Runway')
#Make legend
legend = plt.legend(loc='lower left',fontsize=10,frameon=True,title='Legend',markerscale=1,prop={'size':15})
legend.get_title().set_fontsize('20')
#Add title
plt.title('Scenario 7', fontsize=30)
#Show below
plt.show()

In [61]:

#Scenario 8
"""    
scenario8         picks a city but then has a switching probability

lat_init          initial latitude of plane
lon_init          initial longitude of plane

hrs               hours to run simulation at
sph               samples per hour. in other words, how often we update and plot the plane's position.
cruise_speed      cruising speed of Boeing 777, 905 km/hr
eq_deg_km         degrees per km at the equator
max_hrs           amount of time plane can be in air

switch_prob       probability of switching destination
"""
def scenario8(lat_init,lon_init,hrs,sph,cruise_speed,eq_deg_km,max_hrs,switch_prob):
    
    samples = hrs * sph
    
    max_flying_dist = cruise_speed * max_hrs # km/hr * hrs = km    
    sample_flying_dist = cruise_speed * hrs
    
    city_draw = np.int(np.random.choice(len(asiatoptencities),size=None,p=fractions))

    dest_lat = asiatoptencities['Lat'][city_draw]
    dest_lon = asiatoptencities['Lon'][city_draw]

    circle_dest_radius = haversine(earth_radius,lat_init,lon_init,dest_lat,dest_lon)   
    circle_of_dest = make_circle(circle_dest_radius,360,lon_init,lat_init)

    heading = np.around(get_heading(lat_init,lon_init,dest_lat,dest_lon))
    
    #make a master list of destinations to return, in event it'll change
    dest_lat_list = []
    dest_lon_list = []
    dest_lat_list.append(dest_lat)
    dest_lon_list.append(dest_lon)
    
    #------------------------------------------------------------------------------#    
    plane_lat = np.zeros(samples+1)
    plane_lon = np.zeros(samples+1)    
    plane_lat[0] = lat_init
    plane_lon[0] = lon_init
    
    plane_dist = (cruise_speed/eq_deg_km)/sph 
    
    for i in xrange(samples): 
        new_locations_circle = make_circle(plane_dist,360,plane_lon[0],plane_lat[0]) 
        new_position = make_vector(plane_dist,heading, plane_lon[i], plane_lat[i])
    
        plane_lat[i+1] = new_position[0]
        plane_lon[i+1] = new_position[1]
        
        #add probabilistic condition of switching at the end
        draw = np.random.uniform(low=0.0,high=1.0,size=1)
        
        if draw <= switch_prob:
    
            city_draw = np.int(np.random.choice(len(asiatoptencities),size=None,p=fractions))

            dest_lat = asiatoptencities['Lat'][city_draw]
            dest_lon = asiatoptencities['Lon'][city_draw]
      
            dest_lat_list.append(dest_lat)
            dest_lon_list.append(dest_lon)
            
            circle_dest_radius = haversine(earth_radius,lat_init,lon_init,dest_lat,dest_lon)   
            circle_of_dest = make_circle(circle_dest_radius,360,lon_init,lat_init)
        
            heading = np.around(get_heading(lat_init,lon_init,dest_lat,dest_lon))
                
    return plane_lat,plane_lon,dest_lat_list,dest_lon_list

In [62]:

#test function
#w/ 5% switching probability
testlats,testlons,testdestlat,testdestlon = scenario8(igariwaypoint[0],igariwaypoint[1],4,sph,cruise_speed,eq_deg_km,max_hrs,0.05)

In [63]:

#Set figure size
fig = plt.figure(figsize=[30,20])
#Setup Basemap
fig = Basemap(width=13000000,height=15000000,projection='lcc',resolution='c',lat_0=5,lon_0=100,suppress_ticks=True)
#Call figure function
figure_function(fig)
#Put other plots in here -- they don't have to be in any strict order necessarily.
x6,y6 = fig(testlons,testlats)
x7,y7 = fig(testdestlon,testdestlat)
fig.plot(x6,y6,'go',markersize=5,label='Plane Path')
fig.plot(x7,y7,'go',markersize=10,label='Target City')
#Make legend
legend = plt.legend(loc='lower left',fontsize=10,frameon=True,title='Legend',markerscale=1,prop={'size':15})
legend.get_title().set_fontsize('20')
#Add title
plt.title('Scenario 8', fontsize=30)
#Show below
plt.show()

Now we'll write a 9th scenario which is an amalgamation of the runway and city: one that picks one and then runs with it. This is a wrapper function. And it'll induce a bit of flavor variety in picking a target.¶

In [64]:

#Scenario 9
"""    
scenario9         picks either scen1 or scen2 a certain portion of the time.

lat_init          initial latitude of plane
lon_init          initial longitude of plane

hrs               hours to run simulation at
sph               samples per hour. in other words, how often we update and plot the plane's position.
cruise_speed      cruising speed of Boeing 777, 905 km/hr
eq_deg_km         degrees per km at the equator
max_hrs           amount of time plane can be in air

scen1             the name of the first scenario to test (get from function names above)
scen2             the name of the other scenario to test (get from function names above)

dominant_prob     probability of the dominant scenario occurring
"""
def scenario9(lat_init,lon_init,hrs,sph,cruise_speed,eq_deg_km,max_hrs,scen1,scen2,dominant_prob):
    
    draw = np.random.uniform(low=0.0,high=1.0,size=1)
    
    if draw <= dominant_prob:
        plane_lat,plane_lon,dest_lat,dest_lon = scen1(lat_init,lon_init,hrs,sph,cruise_speed,eq_deg_km,max_hrs)
        
    else:
        plane_lat,plane_lon,dest_lat,dest_lon = scen2(lat_init,lon_init,hrs,sph,cruise_speed,eq_deg_km,max_hrs)
        
    return plane_lat,plane_lon,dest_lat,dest_lon

In [65]:

n = 10
#initialize to store multiple simulations as list -> to ensure that both cities and runways are being chosen
testlat_list, testlon_list, testdestlat_list, testdestlon_list = [],[],[],[]

for i in xrange(n):
    testlats,testlons,testdestlat,testdestlon = scenario9(igariwaypoint[0],igariwaypoint[1],4,sph,cruise_speed,eq_deg_km,max_hrs,scenario1,scenario2,0.5)
    testlat_list.append(testlats)
    testlon_list.append(testlons)
    testdestlat_list.append(testdestlat)
    testdestlon_list.append(testdestlon)

In [66]:

#Set figure size
fig = plt.figure(figsize=[30,20])
#Setup Basemap
fig = Basemap(width=13000000,height=15000000,projection='lcc',resolution='c',lat_0=5,lon_0=100,suppress_ticks=True)
#Call figure function
figure_function(fig)
#Put other plots in here -- they don't have to be in any strict order necessarily.
x6,y6 = fig(np.array(testlon_list),np.array(testlat_list))
x7,y7 = fig(np.array(testdestlon_list),np.array(testdestlat_list))
fig.plot(x6,y6,'go',markersize=5)
fig.plot(x7,y7,'go',markersize=10,label='Target Destination')
#Make legend
legend = plt.legend(loc='lower left',fontsize=10,frameon=True,title='Legend',markerscale=1,prop={'size':15})
legend.get_title().set_fontsize('20')
#Add title
plt.title('Scenario 9', fontsize=30)
#Show below
plt.show()

Finally, a 10th that combines switching. Here, we return a list of the destinations, keeping track of them, because recall that they'll change:¶

In [67]:

#Scenario 10
"""    
scenario10        picks either scen1 or scen2 a certain portion of the time.
                  right now, have in mind scenario7 and scenario8.

lat_init          initial latitude of plane
lon_init          initial longitude of plane

hrs               hours to run simulation at
sph               samples per hour. in other words, how often we update and plot the plane's position.
cruise_speed      cruising speed of Boeing 777, 905 km/hr
eq_deg_km         degrees per km at the equator
max_hrs           amount of time plane can be in air

scen1             the name of the first scenario to test (get from function names above)
scen2             the name of the other scenario to test (get from function names above)

dominant_prob     probability of the dominant scenario occurring
"""
def scenario10(lat_init,lon_init,hrs,sph,cruise_speed,eq_deg_km,max_hrs,scen1,scen2,dominant_prob):
    
    draw = np.random.uniform(low=0.0,high=1.0,size=1)
    
    if draw <= dominant_prob:
        plane_lat,plane_lon,dest_lat,dest_lon = scen1(lat_init,lon_init,hrs,sph,cruise_speed,eq_deg_km,max_hrs,dominant_prob)
        
    else:
        plane_lat,plane_lon,dest_lat,dest_lon = scen2(lat_init,lon_init,hrs,sph,cruise_speed,eq_deg_km,max_hrs,dominant_prob)
        
    return plane_lat,plane_lon,dest_lat,dest_lon

In [68]:

n = 10
#initialize to store multiple simulations as list -> to ensure that both cities and runways are being chosen
testlat_list, testlon_list, testdestlat_list, testdestlon_list = [],[],[],[]

for i in xrange(n):
    testlats,testlons,testdestlat,testdestlon = scenario10(igariwaypoint[0],igariwaypoint[1],4,sph,cruise_speed,eq_deg_km,max_hrs,scenario7,scenario8,0.5)
    testlat_list.append(testlats)
    testlon_list.append(testlons)
    testdestlat_list.append(testdestlat)
    testdestlon_list.append(testdestlon)

In [69]:

testdestlat_list2 = []
testdestlon_list2 = []

for i in xrange(len(testdestlat_list)):
    for j in xrange(len(testdestlat_list[i])):
        testdestlat_list2.append(testdestlat_list[i][j])
        
for i in xrange(len(testdestlon_list)):
    for j in xrange(len(testdestlon_list[i])):
        testdestlon_list2.append(testdestlon_list[i][j])

In [70]:

#Set figure size
fig = plt.figure(figsize=[30,20])
#Setup Basemap
fig = Basemap(width=13000000,height=15000000,projection='lcc',resolution='c',lat_0=5,lon_0=100,suppress_ticks=True)
#Call figure function
figure_function(fig)
#Put other plots in here -- they don't have to be in any strict order necessarily.
x6,y6 = fig(np.array(testlon_list),np.array(testlat_list))
x7,y7 = fig(np.array(testdestlon_list2),np.array(testdestlat_list2))
fig.plot(x6,y6,'go',markersize=5)
fig.plot(x7,y7,'go',markersize=10,label='Target Destination')
#Make legend
legend = plt.legend(loc='lower left',fontsize=10,frameon=True,title='Legend',markerscale=1,prop={'size':15})
legend.get_title().set_fontsize('20')
#Add title
plt.title('Scenario 10', fontsize=30)
#Show below
plt.show()

Hidden Markov Model¶

Now we simulate a number of flight paths that are one of two scenarios, to feed into our Hidden Markov Model (HMM) to try to detect systematic differences between how those two scenarios manifest themselves in our flight path. Suppose that 90% are Scenario 1 (runways), and 10% are Scenario 2 (cities). We perturb the data uniquely depending on the model chosen, since by visual inspection there's not enough of a heading differentiator in the first 2 hours, which is the current time interval we're looking at:¶

Generate simulations:¶

In [205]:

"""
sim_gen        generate simulations

inputs from other functions used in this function
---------------
lat_init       initial latitude of plane (set to Igari Waypoint as default)
lon_init       initial longitude of plane (set to Igari Waypoint as default)

hrs            hours to run simulation at
sph            samples per hour. in other words, how often we update and plot the plane's position.
               (set to 60 by default)
               
cruise_speed   cruising speed of Boeing 777, 905 km/hr (set to 905 by default)
eq_deg_km      degrees per km at the equator (set to default of eq_deg_km = 111.32)
max_hrs        amount of time plane can be in air (set to 6.5 hrs by default)

inputs specific to this function
---------------
n              number of flights to simulate
dominant_prob  probability of the dominant scenario occurring

scen1          the name of the first scenario to test (get from function names above)
scen2          the name of the other scenario to test (get from function names above)
scen1text      text to label scen1 as (i.e. 'runway'). *pass in as string*
scen2text      text to label scen2 as (i.e. 'city')    *pass in as string*

"""
def sim_gen(n,dominant_prob,scen1,scen2,scen1text,scen2text,hrs,
            eq_deg_km=111.32,max_hrs=6.5,cruise_speed=905,
            sph=60,lat_init=igariwaypoint[0],lon_init=igariwaypoint[1]):
    
    simdata = []
    
    for i in xrange(n):
            
        draw = np.random.uniform(low=0.0,high=1.0,size=1) #make uniform draw
        
        if draw <= dominant_prob:
            lats,lons,destLat,destLon = scen1(lat_init,lon_init,hrs,sph,cruise_speed,eq_deg_km,max_hrs)        
            tempDataToStore = [scen1text,lats,lons,destLat,destLon]
            simdata.append(tempDataToStore)
            
        else:
            lats,lons,destLat,destLon = scen2(lat_init,lon_init,hrs,sph,cruise_speed,eq_deg_km,max_hrs)
            tempDataToStore = [scen2text,lats,lons,destLat,destLon]
            simdata.append(tempDataToStore)
            
    return simdata

Let's visualize the data by seeing the breakdown of runway versus city scenarios:¶

In [206]:

simdata = sim_gen(200,0.9,scenario1,scenario2,'runway','city',2)

In [207]:

"""
scenario_viz    plots distribution of 2 option scenarios. right now, hardcoded for runway vs city.

simdata         the simulated data
scen1text       text to label scen1 as (i.e. 'runway'). *pass in as string*
scen2text       text to label scen2 as (i.e. 'city')    *pass in as string*

"""
def scenario_viz(simdata,scen1text,scen2text):
    
    simtype = []
    
    for i in xrange(len(simdata)):
        simtypeI = simdata[i][0]
        
        if simtypeI == scen1text:
            simtype.append(0) #scen1text is 0
        if simtypeI == scen2text:
            simtype.append(1) #scen2text is 1
            
    #Plot to make sure we generated the correct portion of runways vs. cities:
    plt.figure()
    plt.hist(simtype)
    plt.title(scen1text +' vs.'+scen2text+' scenario ratio')
    plt.ylabel('%')
    plt.xlabel(scen1text +' (left) or '+scen2text+ ' (right)')
    plt.show()
    
    citysum = np.sum(simtype)

    print "There are "+str(len(simtype)-citysum)+" "+ scen1text + " scenarios"
    print "There are "+str(citysum)+" "+scen2text+ " scenarios"

In [208]:

scenario_viz(simdata,'runway','city')

There are 180 runway scenarios
There are 20 city scenarios

Add heading information to our simulated data.¶

In [209]:

"""
add_heading      adds heading information to our simulated data

simdata          the simulated data

"""
def add_heading(simdata):
    
    for i in xrange(len(simdata)):
        
        currentSim = simdata[i]
        lats = simdata[i][1] 
        lons = simdata[i][2]
        headingList = [] 
        
        for j in xrange(1,len(lats)): #lats & lons are same length
            startLat = lats[j-1] 
            startLon = lons[j-1]
            endLat = lats[j] 
            endLon = lons[j]
            
            heading = get_heading(startLat,startLon,endLat,endLon)
            headingList.append(heading)
            
            currentSim.append(headingList)
            simdata[i] = currentSim
    
    return currentSim

In [210]:

currentSim = add_heading(simdata)

Perturb our data, more depending on if it's going to a city or not, given our scenario assumption of more disruption for that type of situation:¶

In [287]:

"""
add_noise        adds noise (error) to our simulated data

simdata          the simulated data
currentSim       the heading information to the simulated data

scen1text        text to label scen1 as (i.e. 'runway') *pass in as string*
scen2text        text to label scen2 as (i.e. 'city')   *pass in as string*

"""
def add_noise(simdata,currentSim,scen1text,scen2text):

    for i in xrange(len(simdata)):       
        # get the current simulation
        currentSim = simdata[i]
        headingList = currentSim[5]
        
        # perturb more if scen2 (rarer scenario)
        if currentSim[0] == scen1text:
            headingListNew = [h*((100 + np.random.normal(0,0.2))/100) for h in headingList]
            
        if currentSim[0] == scen2text:
            headingListNew = [h*((100 + np.random.normal(0,2))/100) for h in headingList]
            
        currentSim[5] = headingListNew
        simdata[i] = currentSim
    
    return simdata, currentSim

In [288]:

simdata, currentSim = add_noise(simdata,currentSim,'runway','city')

Now we write a function for the scikitlearn hmm model:¶

In [289]:

"""
hmm             hidden markov model. our main hidden markov model function, from scikitlearn.

simdata         our simulated data
n_components    number of components

functions we use for indices
---------------
hrs            hours to run simulation at
sph            samples per hour. in other words, how often we update and plot the plane's position.
"""
def hmm(simdata,n_components,hrs,sph):

    # Collect data into one column, getting differenced heading info
    headingDifData = []
    startIndices = []
    
    for i in xrange(len(simdata)):
        h = simdata[i][5]
        headingDif = [ h[j] - h[j-1] for j in xrange(1,len(h)) ]
        startIndices.append(len(headingDifData))
        
        for h in headingDif:
            headingDifData.append(h) 
    #print len(headingDifData)
    
    #manipulate array via numpy's column_stack 
    # http://docs.scipy.org/doc/numpy/reference/generated/numpy.column_stack.html
    X = np.column_stack([headingDifData])
    
    # ------ Run Gaussian HMM ------
    print "fitting to HMM and decoding ...",
    
    # make an HMM instance and execute fit
    model = GaussianHMM(n_components, "diag")
    model.fit([X])
    hidden_states = model.predict(X)
    print 'mean of hidden states: '+ str(np.mean(hidden_states))
    
    hiddenStatesList =[]
    #define index ranges to separate simulations. for 2 hrs, 60 samples/hr
    #it's every 119, since 60*2 120 but have 1 less heading pair change.
    idx_range = sph*hrs-1
    
    for i in xrange(len(simdata)): 
        startIdx = i*idx_range
        endIdx = (i+1)*idx_range
        
        predHiddenStates = [hidden_states[startIdx:endIdx]]
        hiddenStatesList.append(predHiddenStates[0]) #append hidden states runway or city prediction for each simulation
        
    #how many individual samples we have across all our simulations
    print 'how many individual samples we have across all our simulations: '+str(len(hidden_states))
    #how many simulations we have
    print 'how many simulations we have: '+str(len(hiddenStatesList))
    
    return simdata, hiddenStatesList

In [290]:

simdata, hiddenStatesList = hmm(simdata,2,2,60)

fitting to HMM and decoding ... mean of hidden states: 0.0896218487395
how many individual samples we have across all our simulations: 23800
how many simulations we have: 200

Each simdata has 5 columns ... what scenario was actually run, lats & lons for the timesteps and then the destination lat & lon:¶

In [291]:

print "actual scenario is: ", simdata[0][0]
print "lats are:", simdata[0][1]
print "lons are:", simdata[0][2]
print "------------------------"
print "dest lat/lon is: "+str(simdata[0][3])+", "+str(simdata[0][4])

actual scenario is:  city
lats are: [ 6.93666667  6.80519619  6.67372571  6.54225523  6.41078475  6.27931426
  6.14784378  6.0163733   5.88490282  5.75343234  5.62196186  5.49049138
  5.3590209   5.22755042  5.09607994  4.96460946  4.83313898  4.7016685
  4.57019802  4.43872754  4.30725706  4.17578658  4.0443161   3.91284562
  3.78137514  3.64990466  3.51843418  3.3869637   3.25549322  3.12402274
  2.99255226  2.86108178  2.7296113   2.59814082  2.46667034  2.33519985
  2.20372937  2.07225889  1.94078841  1.80931793  1.67784745  1.54637697
  1.41490649  1.28343601  1.15196553  1.02049505  0.88902457  0.75755409
  0.62608361  0.49461313  0.36314265  0.23167217  0.10020169 -0.03126879
 -0.16273927 -0.29420975 -0.42568023 -0.55715071 -0.68862119 -0.82009167
 -0.95156215 -1.08303263 -1.21450311 -1.34597359 -1.47744408 -1.60891456
 -1.74038504 -1.87185552 -2.003326   -2.13479648 -2.26626696 -2.39773744
 -2.52920792 -2.6606784  -2.79214888 -2.92361936 -3.05508984 -3.18656032
 -3.3180308  -3.44950128 -3.58097176 -3.71244224 -3.84391272 -3.9753832
 -4.10685368 -4.23832416 -4.36979464 -4.50126512 -4.6327356  -4.76420608
 -4.89567656 -5.02714704 -5.15861752 -5.290088   -5.42155849 -5.55302897
 -5.68449945 -5.81596993 -5.94744041 -6.07891089 -6.21038137 -6.34185185
 -6.47332233 -6.60479281 -6.73626329 -6.86773377 -6.99920425 -7.13067473
 -7.26214521 -7.39361569 -7.52508617 -7.65655665 -7.78802713 -7.91949761
 -8.05096809 -8.18243857 -8.31390905 -8.44537953 -8.57685001 -8.70832049
 -8.83979097]
lons are: [ 103.585       103.61801185  103.65101474  103.68400887  103.7169944
  103.74997152  103.7829404   103.81590122  103.84885416  103.88179939
  103.91473709  103.94766745  103.98059062  104.0135068   104.04641616
  104.07931887  104.11221511  104.14510506  104.17798889  104.21086677
  104.24373889  104.27660541  104.30946651  104.34232237  104.37517316
  104.40801905  104.44086023  104.47369685  104.50652911  104.53935717
  104.5721812   104.60500138  104.63781789  104.67063089  104.70344056
  104.73624708  104.76905061  104.80185134  104.83464942  104.86744505
  104.90023838  104.93302959  104.96581886  104.99860636  105.03139226
  105.06417673  105.09695995  105.12974209  105.16252332  105.19530381
  105.22808374  105.26086328  105.2936426   105.32642188  105.35920129
  105.39198099  105.42476117  105.45754199  105.49032363  105.52310626
  105.55589005  105.58867518  105.62146182  105.65425014  105.68704031
  105.71983251  105.75262691  105.78542369  105.81822301  105.85102504
  105.88382997  105.91663797  105.94944921  105.98226385  106.01508209
  106.04790408  106.08073     106.11356004  106.14639435  106.17923312
  106.21207652  106.24492472  106.2777779   106.31063623  106.34349989
  106.37636905  106.40924388  106.44212457  106.47501129  106.50790421
  106.5408035   106.57370936  106.60662194  106.63954142  106.67246799
  106.70540182  106.73834308  106.77129196  106.80424862  106.83721326
  106.87018603  106.90316713  106.93615673  106.969155    107.00216213
  107.0351783   107.06820368  107.10123846  107.1342828   107.1673369
  107.20040093  107.23347508  107.26655951  107.29965442  107.33275998
  107.36587638  107.3990038   107.43214241  107.46529241  107.49845397
  107.53162727]
------------------------
dest lat/lon is: -6.2, 106.8

Now we need to see which indices were cities:¶

In [292]:

"""
scen2_indices   finds out which indices were the alternate rarer scenario we are trying to detect.

simdata         our simulated data
scen2text       text to label scen2 as (i.e. 'city')    *pass in as string*
"""
def scen2_indices(simdata,scen2text):

    actualcityscens = []
    for i in xrange(len(simdata)):
        if simdata[i][0] == scen2text:
            actualcityscens.append(i)
            
    print "indices of "+str(scen2text)+": "+str(actualcityscens)
    return actualcityscens

In [293]:

actualcityscens = scen2_indices(simdata,'city')

indices of city: [0, 1, 12, 14, 31, 38, 44, 53, 80, 102, 103, 121, 137, 139, 140, 151, 164, 185, 193, 196]

Now that we have those indices, we can see what the HMM predicted them to be. 1s are city; 0s are runway:¶

In [294]:

"""
hmm_performance       take indices of the actual rare events, and see whether HMM predicted them to be rare or the 
                      other, more common scenario.

hiddenStatesList      list of all of the HMM guesses for every sample of every flight simulation
actualcityscens       the indices of the rare scenario
"""
def hmm_performance(hiddenStatesList,actualcityscens):

    return [hiddenStatesList[i] for i in actualcityscens]

In [295]:

guess = hmm_performance(hiddenStatesList,actualcityscens)
guess

Out[295]:

[array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1]),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1]),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0]),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1]),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0]),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1]),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1]),
 array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 0]),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1]),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 0, 0, 0]),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1]),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0]),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1]),
 array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1,
       1, 1, 1, 1]),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1]),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1]),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1]),
 array([0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0])]

Now we can use that same list comprehension trick, to grab the lats and lons and plot them on the map! We'll only plot the city scenarios, and whether or not the program thought they were cities.¶

In [296]:

"""
scen2_parameters       returns lat,lon and dest lat/lon of the rare scenarios
                       for example, cities if runway/city and 90%/10%
                       
simdata                the simulated data
actualcityscens        indices of the rare scenario

"""
def scen2_parameters(simdata,actualcityscens):

    citylats = [simdata[i][1] for i in actualcityscens]; #print citylats
    citylons = [simdata[i][2] for i in actualcityscens]; #print citylons
    citydestlat = [simdata[i][3] for i in actualcityscens]; #print citydestlat
    citydestlon = [simdata[i][4] for i in actualcityscens]; #print citydestlon
    
    #convert to numpy arrays for plotting purposes
    citylons = np.array(citylons)
    citylats = np.array(citylats)
    citydestlat = np.array(citydestlat)
    citydestlon = np.array(citydestlon)
    
    return citylons,citylats,citydestlat,citydestlon

In [297]:

citylons,citylats,citydestlat,citydestlon = scen2_parameters(simdata,actualcityscens)

Okay, so we now want to color the points differently depending on what the guess is -- if the HMM is right or wrong for a given sample point. For 2 hours time, our city lats & lons are 121 long. Our guesses are 119 long. This is due to pairwise matching. We need to drop the first entry of the lat & lon since the model is just starting. And drop the last entry because there's nothing afterward to compare to. Then we can color the points, as a sort of hack for coloring the lines in between the points.¶

In [298]:

print len(citylats[0])
print len(guess[0])

121
119

Of course, if the hrs is in fact some other number, and we have a different sampling rate than the default 60/hr, then we just proportionately adjust the above example numbers. But the point is, or guess is going to be shorter than the number of lats and lons so we need to trim those down.¶

In [299]:

x,y = citylats.shape
#print x,y

#create new array 2 less than originals since we're dropping first and last elements
citylons2 = np.zeros((x,y-1))
citylats2 = np.zeros((x,y-1))
print citylons2.shape

citylons3 = np.zeros((x,y-2))
citylats3 = np.zeros((x,y-2))
print citylons3.shape

(20L, 120L)
(20L, 119L)

In [300]:

# iterate through lats and lons and drop first & last entries
for i in xrange(len(citylats2)): #for each of the city scenario simulations except the end
    citylons2[i] = citylons[i][:-1] 
    citylats2[i] = citylats[i][:-1]
  
for i in xrange(len(citylats3)):    
    citylons3[i] = citylons2[i][1:]
    citylats3[i] = citylats2[i][1:]
    
#print citylons3.shape
#print citylats3.shape

Now append into two lists for the different colors. Remember we're plotting points, not connecting lines, so this is a-okay and in fact is the ideal way to color.¶

In [301]:

#latitudes

citylats3_city = []
citylats3_runway = []

for i in xrange(len(citylats3)):
    for j in xrange(len(citylats3[i])):
        if guess[i][j] == 0: #if HMM predicts it's a runway scenario
            citylats3_runway.append(citylats3[i][j])
        if guess[i][j] == 1: #if HMM predicts it's a city scenario
            citylats3_city.append(citylats3[i][j])
            
citylats3_city = np.array(citylats3_city)
citylats3_runway = np.array(citylats3_runway)

#longitudes

citylons3_city = []
citylons3_runway = []  

for i in xrange(len(citylons3)):
    for j in xrange(len(citylons3[i])):
        if guess[i][j] == 0: #if HMM predicts it's a runway scenario (or the dominant scenario)
            citylons3_runway.append(citylons3[i][j])
        if guess[i][j] == 1: #if HMM predicts it's a city scenario (or the rare scenario)
            citylons3_city.append(citylons3[i][j])
            
citylons3_city = np.array(citylons3_city)
citylons3_runway = np.array(citylons3_runway)

In [338]:

print "samples identified as city scenarios: ", len(citylats3_city)
print "samples misidentified as runway scenarios:", len(citylats3_runway)
#print len(citylons3_city)
#print len(citylons3_runway)

print "When it *is* a city scenario, the model is right %r percent of the time." %np.around(100*np.float(len(citylats3_city))/(len(citylats3_runway)+len(citylats3_city)))

samples identified as city scenarios:  1958
samples misidentified as runway scenarios: 422
When it *is* a city scenario, the model is right 82.0 percent of the time.

What this means is that when it's actually a city... the HMM is right more often than not.

That took longer than expected, but it worked. Now we plot some of the paths on a map, and color code accordingly if they're a 0 or 1:
¶

In [337]:

#Set figure size
fig = plt.figure(figsize=[30,20])
#Setup Basemap
fig = Basemap(width=13000000,height=15000000,projection='lcc',resolution='c',lat_0=5,lon_0=100,suppress_ticks=True)
#Call figure function
figure_function(fig)

#Put other plots in here -- they don't have to be in any strict order necessarily.
x6,y6 = fig(citylons3_city,citylats3_city)
x7,y7 = fig(citylons3_runway,citylats3_runway)
x8,y8 = fig(citydestlon,citydestlat)
fig.plot(x6,y6,'go',markersize=5,alpha=0.8,label='HMM Correct - City') #alpha adjusts opacity. 0 is nothing. 1 is solid.
fig.plot(x7,y7,'ro',markersize=5,alpha=0.3,label='HMM Wrong - Runway') #adjust opacity since red darker than green
fig.plot(x8,y8,'go',markersize=10,label='Target City')

#Make legend
legend = plt.legend(loc='lower left',fontsize=10,frameon=True,title='Legend',markerscale=1,prop={'size':15})
legend.get_title().set_fontsize('20')
#Add title
plt.title('Actual City Scenarios vs. HMM Prediction', fontsize=30)
#Show below
plt.show()

All Simulations - Right or Wrong?¶

Next we organize the coloring of the simulations by whether they're right or wrong. So green is right, and red is wrong. Even though I know that we made the runways red and the cities blue. Whatever.¶

In [304]:

allguess = [hiddenStatesList[i] for i in xrange(len(simdata))]

allcitylats = [simdata[i][1] for i in xrange(len(simdata))]
allcitylons = [simdata[i][2] for i in xrange(len(simdata))]
allcitydestlat = [simdata[i][3] for i in xrange(len(simdata))]
allcitydestlon = [simdata[i][4] for i in xrange(len(simdata))]

#convert for plotting purposes
allcitylons = np.array(allcitylons)
allcitylats = np.array(allcitylats)
allcitydestlat = np.array(allcitydestlat)
allcitydestlon = np.array(allcitydestlon)

allguess = np.array(allguess)

In [305]:

print len(allcitylats)

In [306]:

print allguess.shape

(200L, 119L)

Now that we grab all of the city lats, we do what we did before to strip the first and last, and then check to see if they're a 1 or a 0 to see what the HMM guessed:¶

In [307]:

x,y = allcitylats.shape

#create new array 2 less than originals since we're dropping first and last elements
allcitylons2 = np.zeros((x,y-1))
allcitylats2 = np.zeros((x,y-1))
allcitylons3 = np.zeros((x,y-2))
allcitylats3 = np.zeros((x,y-2))

In [308]:

# iterate through lats and lons and drop first & last entries
for i in xrange(len(allcitylats2)): #for each of the city scenario simulations except the end
    allcitylons2[i] = allcitylons[i][:-1] 
    allcitylats2[i] = allcitylats[i][:-1]
    
for i in xrange(len(allcitylats3)):    
    allcitylons3[i] = allcitylons2[i][1:]
    allcitylats3[i] = allcitylats2[i][1:]

Below, add in a simdata[i][0] check to see if it actually was a runway or a city for that simulation. Plot right vs. wrong guesses and color that way, instead of city vs. runway¶

In [310]:

#latitudes

allcitylats3_right = []
allcitylats3_wrong = []

for i in xrange(len(allcitylats3)):
    for j in xrange(len(allcitylats3[i])):
        if allguess[i][j] == 0 and simdata[i][0] == 'runway': #if HMM predicts it's a runway scenario and true
            allcitylats3_right.append(allcitylats3[i][j])
        if allguess[i][j] == 1 and simdata[i][0] == 'city': #if HMM predicts it's a city scenario and true
            allcitylats3_right.append(allcitylats3[i][j])
        else:
            allcitylats3_wrong.append(allcitylats3[i][j])
            
allcitylats3_right = np.array(allcitylats3_right)
allcitylats3_wrong = np.array(allcitylats3_wrong)

#longitudes

allcitylons3_right = []
allcitylons3_wrong = []  

for i in xrange(len(allcitylons3)):
    for j in xrange(len(allcitylons3[i])):
        if allguess[i][j] == 0 and simdata[i][0] == 'runway': #if HMM predicts it's a runway scenario and true
            allcitylons3_right.append(allcitylons3[i][j])
        if allguess[i][j] == 1 and simdata[i][0] == 'city': #if HMM predicts it's a city scenario and true
            allcitylons3_right.append(allcitylons3[i][j])
        else:
            allcitylons3_wrong.append(allcitylons3[i][j])
            
allcitylons3_right = np.array(allcitylons3_right)
allcitylons3_wrong = np.array(allcitylons3_wrong)

In [339]:

right = np.float(len(allcitylats3_right))
print "number times HMM right: ", np.int(right)
#print len(allcitylons3_right)
wrong = np.float(len(allcitylats3_wrong))
print "number times HMM wrong: ", np.int(wrong)
#print len(allcitylons3_wrong)

percentright = 100*right/(right+wrong)

print "In an effort to detect most of the city scenarios when they occur, the model is right %r percent of the time." %np.around(percentright)

number times HMM right:  23203
number times HMM wrong:  21842
In an effort to detect most of the city scenarios when they occur, the model is right 52.0 percent of the time.

Now we'll plot all of the scenarios; the city and the runway:¶

In [342]:

#Set figure size
fig = plt.figure(figsize=[30,20])
#Setup Basemap
fig = Basemap(width=13000000,height=15000000,projection='lcc',resolution='c',lat_0=5,lon_0=100,suppress_ticks=True)
#Call figure function
figure_function(fig)

#Put other plots in here -- they don't have to be in any strict order necessarily.
x6,y6 = fig(allcitylons3_right,allcitylats3_right)
x7,y7 = fig(allcitylons3_wrong,allcitylats3_wrong)
x8,y8 = fig(allcitydestlon,allcitydestlat)
fig.plot(x6,y6,'go',markersize=5,alpha=0.9,label='HMM -- Right') #alpha adjusts opacity. 0 is nothing. 1 is solid.
fig.plot(x7,y7,'ro',markersize=5,alpha=0.1,label='HMM -- Wrong')
fig.plot(x8,y8,'go',markersize=10,label='Target Destination')

#Make legend
legend = plt.legend(loc='lower left',fontsize=10,frameon=True,title='Legend',markerscale=1,prop={'size':15})
legend.get_title().set_fontsize('20')
#Add title
plt.title('All Simulations vs. HMM Predictions', fontsize=30)
#Show below
plt.show()

What this means is that as it stands, our HMM is great at identifying the rare city scenarios when they do occur, but it also raises many false alarms. So, this has only limited effectiveness, as if it picked everything to be runways, it'd be right 90% of the time but miss all of the scenarios it wanted it to find. Instead, it picks out most cities, and is right more than 10% of the time if it just picked everything to be cities -- over half the time, in fact -- but is overzealous in identifying city scenarios compared to a runway. (Of course, some runways are near cities so practically they're the same thing, even if in our model they're not -- but still...)

Above, we see the limitation of our HMM. We can be right when it counts...but then we'll be wrong when it doesn't count, more than we'd like to. The main way we detect the rare city is via higher variance in one scenario vs. another -- which is a plausible assumption about the scenarios, but is not a more tailor-made emissions function to describe the nuances of how the scenarios differ from each other. We pick up a lot of false instances of runways as cities (when they're in fact runways), to ensure we get and recognize most of the city scenarios when they do in fact occur.
¶

Kalman Filter¶

Finally, we look at a separate problem that stems from our noisy data: reducing the noise of the flight path, which could aid in the detection of where the plane is heading (in a straight line). The key thing about both Scenario 1 and Scenario 2 that we have chosen as examples, is that the heading and velocity will be maintained throughout the scenario. It's not in the noisy data, but we should be able to tune our model to get it close to zero as possible. To do this we take the data from the simulations we ran as our test case. We will create a Kalman Filter model and try to come up with the ideal path of the plane, inferred through a change in heading that's as close to zero as possible, based on adjusting our Kalman parameters.

(We have to create a model in order to apply the filter, which is perhaps why Kalman Filters are not used more often. This is not a blackbox method approach -- you really need to understand what is going on, to set it up.)
¶

In [797]:

kalmansimdata = sim_gen(100,0.9,scenario1,scenario2,'runway','city',2)

In [798]:

kalmancurrentSim = add_heading(kalmansimdata)

We grab the change in heading data based on the lat/lons of all the simulations (and based on what we did in part to use our HMM, in Part 2):¶

In [799]:

# Heading Change

headingDifData = []
startIndices = []

for i in xrange(len(kalmansimdata)):
    h = kalmansimdata[i][5]
    headingDif = [ h[j] - h[j-1] for j in xrange(1,len(h)) ]
    startIndices.append(len(headingDifData))
    
    for h in headingDif:
        headingDifData.append(h) 

In [800]:

print "heading changes"
print "# of heading changes: ", len(headingDifData)
headingDifData[0:10]

heading changes
# of heading changes:  11900

Out[800]:

[-0.0059499962516156302,
 -0.0060313425897220441,
 -0.0061127225129098406,
 -0.0061941364836570756,
 -0.0062755849487530213,
 -0.0063570683475404621,
 -0.006438587145908059,
 -0.0065201417769458203,
 -0.006601732700175944,
 -0.0066833603676741404]

Now we add error and repeat the process:¶

In [801]:

kalmansimdata_err, kalmancurrentSim_err = add_noise(kalmansimdata,kalmancurrentSim,'runway','city')

In [802]:

headingDifData_err = []
startIndices_err = []

for i in xrange(len(kalmansimdata_err)):
    h_err = kalmansimdata_err[i][5]
    headingDif_err = [ h_err[j] - h_err[j-1] for j in xrange(1,len(h_err)) ]
    startIndices_err.append(len(headingDifData_err))
    
    for h_err in headingDif_err:
        headingDifData_err.append(h_err) 

In [803]:

print "heading changes w/ error"
print "# of heading changes: ", len(headingDifData_err)
headingDifData_err[0:10]

heading changes w/ error
# of heading changes:  11900

Out[803]:

[0.68562717552993035,
 -0.71829155424899227,
 -0.6360958819886946,
 0.75619909293209275,
 -0.37927622502769509,
 -0.95258339006062442,
 0.44235340684679159,
 0.46924857015511634,
 0.20350388374629347,
 1.6054973788712346]

The error adds much more variation than before. Now we plot the heading changes over time for all of the simulations, with and without error. Note that there will be small error for the 'without error' case, due to roundoff somewhere in the calculation (in how we set up and store -- not due to the computer memory). But the error pales in comparison to what is added afterward:¶

In [804]:

fig = plt.figure(figsize=[10,6])
plt.plot(headingDifData, 'ro', label='heading change')
plt.title('heading change, all simulations, pre-error add', fontsize=20)
legend = plt.legend()
plt.xlabel('sample #', fontsize=15)
plt.ylabel('heading change',fontsize=15)
plt.xlim([0,len(headingDifData)])
plt.show()

As you can see from the above figure, this is essentially zero, with some roundoff error in our calculation.¶

In [805]:

fig = plt.figure(figsize=[10,6])
plt.plot(headingDifData_err, 'bo', label='heading changes, observed values')
plt.plot(headingDifData, 'ro', label='heading change, true values')
plt.title('heading change due to error, all simulations', fontsize=20)
legend = plt.legend()
plt.xlabel('sample #', fontsize=15)
plt.ylabel('heading change',fontsize=15)
plt.xlim([0,len(headingDifData)])
plt.show()

The added error makes the blue a straight line in comparison. For some of the heading changes, we have drastic heading changes with the added error for just a few of the hundred simulations. This could be because very occasionally, we add a lot more error for some of our cities scenarios given the error input. We'll investigate this later, as a followup to these results. But whatever the reason, we can work with this data and apply a Kalman Filter to smooth it out.¶

Now we write our Kalman Filter function. The Kalman Filter is a predictive-corrective algorithm. We predict what the future will be, and then compare and adjust. The design pattern has two main steps: the Predictive Step and the Update Step (which are sometimes called slightly different names.)

The predict step is the first two lines which correspond to the first two lines of the 'for' loop. The first line updates the linear model (or, rather, predict the next step of the linear model.) The second line predicts the next covariance matrix. Next, comes the measurement update step, where we calculate the relevant variables on the fly at every iteration.
¶

In [806]:

"""
kalman      Kalman Filter model (sometimes called scalar/linear)

n           number of steps

initial conditions
----------------------------------
init_state   initial conditions estimate, state (choose)      - change in heading. is [position,velocity].
init_covar   initial conditions estimate, covariance (choose) - the expected error in the initial state.

the matrices that define the model
----------------------------------
F           state transition matrix       - at each discrete time step we update the model by multiplying the
                                            state vector by the state transition matrix.
Q           process variance matrix       - the system noise. noise in the actual process itself
                                            (not measurement noise.)
H           measurement matrix            - describes how the system state, or what parts, are measured
                                            by the measurements.
R           measurement covariance matrix - set at the start, and is our anticipated accuracy of
                                            measuring.

data        the data to perform the Kalman Filter on
"""
def kalman(n,init_state,init_covar,F,Q,H,R,data):
    
    #need one more than samples since the first one is the initial values
    steps = n+1
    
    #initialize coordinates
    results = np.zeros((steps,2)) 
    #assign first entry to be the starting point
    results[0] = init_state
    
    #where the state of the system starts 
    x_new = init_state.T
    #how much we anticipate this first system state, is off by
    p_new = init_covar.T
    
    #assume velocity is zero, but make a 2 array to add velocity to data if we had a different example
    measured = np.zeros((n,2))
    
    for i in xrange(len(data)):
        measured[i][0] = data[i] #assign our observed data to first entry of array
        measured[i][1] = 0       #assign velocity to 0 since we don't use this component
    
    for i in xrange(1,steps):

        #Predict Step
        
        #update the model to the new timestep (called newt)
        #Predicted (a priori) state estimate -> http://en.wikipedia.org/wiki/Kalman_filter#Details
        x_newt = np.dot(F,x_new) #needed to transpose this to make it work
        
        #the predicted value of the covariance matrix
        #Predicted (a priori) estimate covariance -> http://en.wikipedia.org/wiki/Kalman_filter#Details
        p_newt = np.dot(np.dot(F,p_new),F.T) + Q
        
        #Measurement Update Step
        
        #Calculate residuals by comparing the measured value to the model prediction.
        #Innovation or measurement residual -> http://en.wikipedia.org/wiki/Kalman_filter#Details
        y = measured[i-1] - np.dot(H,x_newt)
        
        #Residual covariance matrix
        #Innovation (or residual) covariance -> http://en.wikipedia.org/wiki/Kalman_filter#Details
        s_new = np.dot(np.dot(H,p_newt),H.T) + R
        
        #Kalman Gain
        #Optimal Kalman gain -> http://en.wikipedia.org/wiki/Kalman_filter#Details
        k_new = np.dot(np.dot(p_newt,H.T),np.linalg.inv(s_new))
        
        #Corrected State Estimate
        #Updated (a posteriori) state estimate	-> http://en.wikipedia.org/wiki/Kalman_filter#Details
        x_new = x_newt + np.dot(k_new,y)
        
        #Saving the result
        results[i] = x_new.T[0]
        
        #Corrected covariance estimate that will be used as part of the next step
        #Updated (a posteriori) estimate covariance -> http://en.wikipedia.org/wiki/Kalman_filter#Details
        p_new = np.dot( (np.eye(2)-np.dot(k_new,H)), p_newt)
        
    return results

In [807]:

#Setup

#Pick this constant -- can change
covariance_guess = 0.0001 #adjust this to be super small (played with results to tune)

#State transition matrix
F = np.matrix(([1,1],[0,1])); print "F: "; print F

#Process variance matrix
Q = covariance_guess*np.matrix(([1./4,1./2],[1./2,1])); print "Q: "; print Q

#Measurement matrix
H = np.matrix(([1,0],[0,0])); print "H: "; print H

#Measurement variance matrix
R = 1*np.matrix(([1,0],[0,1])); print "R: "; print R

#Initial Conditions

#The state of our system is position and velocity, which is represented as a length 2 vector. 
#The first component is position; the second is velocity, the change of the first component with respect to time.
init_state = np.matrix(([0,0])); print "init_state: "; print init_state
#Next is how much we think the initial state is off by.
init_covar = np.matrix(([1,0],[0,1])); print "init_covar: "; print init_covar

F: 
[[1 1]
 [0 1]]
Q: 
[[  2.50000000e-05   5.00000000e-05]
 [  5.00000000e-05   1.00000000e-04]]
H: 
[[1 0]
 [0 0]]
R: 
[[1 0]
 [0 1]]
init_state: 
[[0 0]]
init_covar: 
[[1 0]
 [0 1]]

The Kalman Filter is based on a linear model of the system. At every time step we take the position and add the velocity (change in position with time) to it. We assume our time step is 1.

For a simple dataset, we'll guess [0,0] for our initial state because we'll expect to have no heading change. [1,0],[0,1], our initial covariance, says that we have very little confidence that our initial state is what it is. We could pass in all zeros if we were confident that our initial guess would be close to the reality. All zeros mean that we have complete confidence in our measurements. Depending on context, we can start from a known location, in which case we would be more sure, or alternatively an unknown starting point which should converge over time to see the behavior we are examining.
¶

In [808]:

#Run Kalman Filter on data
kf = kalman(len(headingDifData_err),init_state,init_covar,F,Q,H,R,headingDifData_err)

#Only keep 1st entry since we don't really have a control state in this example.
kf1 = np.zeros(len(kf)) 
for i in xrange(len(kf)):
    kf1[i] = kf[i][0]

In [809]:

#Plot results
fig = plt.figure(figsize=[10,6])
plt.scatter(range(len(headingDifData_err)),headingDifData_err)
plt.plot(kf1,color='red')
plt.title('All heading changes', fontsize=20)
plt.xlabel('samples',fontsize=15)
plt.ylabel('value',fontsize=15)
plt.xlim([0,len(headingDifData)])
plt.show()

Our result is smoothed out almost like our original data. This may very well be since we have extreme variance in one type of rare scenario (city, in our case), whereas we are tuning our Kalman Filter for the more common scenario (runway). Recall that we know that there are two types of errors added -- one for city and one for runway.

Let's if this is the case by splitting up our runway and city scenarios. Ironically, if we make the result less smooth we are able to distinguish runway from city visually, even better.
¶

In [810]:

#Split up runway and city scenarios
kalmansimdata_city = []
kalmansimdata_runway = []

for i in xrange(len(kalmansimdata_err)):
    if kalmansimdata_err[i][0] == 'runway':
        kalmansimdata_runway.append(kalmansimdata_err[i])
    elif kalmansimdata_err[i][0] == 'city':
        kalmansimdata_city.append(kalmansimdata_err[i])

print "# of city scenarios: ", len(kalmansimdata_city)
print "# of runway scenarios: ", len(kalmansimdata_runway)    

# of city scenarios:  16
# of runway scenarios:  84

Now we redo the set up by grabbing the heading changes by runway and city, adding error, and creating plots like the above so we can see what is going on before instantiating our Kalman Filter:¶

In [811]:

# Heading Change

#Runway
headingDifData_runway = []
startIndices_runway = []

for i in xrange(len(kalmansimdata_runway)):
    h_runway = kalmansimdata_runway[i][5]
    headingDif_runway = [ h_runway[j] - h_runway[j-1] for j in xrange(1,len(h_runway)) ]
    startIndices_runway.append(len(headingDifData_runway))
    
    for h_runway in headingDif_runway:
        headingDifData_runway.append(h_runway) 

#City        
headingDifData_city = []
startIndices_city = []

for i in xrange(len(kalmansimdata_city)):
    h_city = kalmansimdata_city[i][5]
    headingDif_city = [ h_city[j] - h_city[j-1] for j in xrange(1,len(h_city)) ]
    startIndices_city.append(len(headingDifData_city))
    
    for h_city in headingDif_city:
        headingDifData_city.append(h_city) 

In [812]:

headingDifData_runway[0:20]

Out[812]:

[0.68562717552993035,
 -0.71829155424899227,
 -0.6360958819886946,
 0.75619909293209275,
 -0.37927622502769509,
 -0.95258339006062442,
 0.44235340684679159,
 0.46924857015511634,
 0.20350388374629347,
 1.6054973788712346,
 -1.4408957787704821,
 -0.071000980427072591,
 1.088632477143392,
 -1.9005634710284767,
 0.97018604527829666,
 0.62885014822057883,
 -1.8574597701306743,
 0.08263863336594568,
 1.4517250487336923,
 -1.0146679033302917]

In [813]:

headingDifData_city[0:20]

Out[813]:

[-0.1099432387491035,
 1.8230157554691289,
 -0.91584992395443976,
 -0.18363385861638193,
 -0.84367125259485221,
 0.3924123796650818,
 -0.00070315832054035354,
 0.80330401767336923,
 -0.12189271271410718,
 -0.82546277948348035,
 0.072262553244307526,
 0.58615701256256614,
 -0.20963953198192087,
 -0.45313440950149442,
 0.46746045385084756,
 1.0758924386421782,
 -1.6754802953017176,
 -0.76357764472861334,
 0.77881294225866782,
 0.79346761065809801]

From the above, we can already see that our city scenarios can have more variation than the runway cases, so our hunch is right...and further confirmed by the below plot.¶

In [814]:

fig = plt.figure(figsize=[10,6])
plt.plot(headingDifData_city, 'ro', label='city heading change')
plt.plot(headingDifData_runway, 'bo', label='runway heading change')
plt.title('heading change, city vs. runway scenarios', fontsize=20)
legend = plt.legend()
plt.xlabel('sample #', fontsize=15)
plt.ylabel('heading change',fontsize=15)
plt.show()

Ignore the positioning of the city versus the runway along the x axis -- we have them both start at 0, and the order in which the simulations are run is actually irrelevant to what we show here. The point is, we show now that the variation culprit is indeed our cities, and our Kalman Filter was effectively able to see where our city scenarios occurred in our long list of 100 simulations, because the small tell-tale city variation spikes remained.¶

Kalman Filter - Alternate Example¶

Here we show a final example that demonstrates that we have implemented a general Kalman Filter that doesn't just apply to the situation at hand:¶

In [815]:

#make test data
testdata = []
xaxis = range(100)

for i in xaxis:
    #We made an arbitrary function below that's random to test out Kalman Filter on unrelated material
    #to show that the implementation does not just depend on our sitation, if you adjust the starting parameters
    testdata.append(0.5*i + np.random.uniform(low=1.0,high=3.0,size=1)*np.random.uniform(low=-2.0,high=2.0,size=1))

In [816]:

fig = plt.figure(figsize=[10,6])
plt.scatter(xaxis,testdata)
plt.title('test data', fontsize=20)
plt.xlabel('samples',fontsize=15)
plt.ylabel('value',fontsize=15)
plt.show()

In [819]:

#Setup

#Pick this constant -- can change
covariance_guess = 0.01 #in this case, a smaller starting number will give a more linear look to our result
                        #we can pump this number down, but leaving as is because this a good naive first guess

#State transition matrix
F = np.matrix(([1,1],[0,1])); print "F: "; print F

#Process variance matrix
Q = covariance_guess*np.matrix(([1./4,1./2],[1./2,1])); print "Q: "; print Q

#Measurement matrix
H = np.matrix(([1,0],[0,0])); print "H: "; print H

#Measurement variance matrix
R = 5*np.matrix(([1,0],[0,1])); print "R: "; print R #we pick this too -- 4 or 5 works better than 1 as a multiplier

#This initial guess doesn't matter too much in the end in this example
init_state = np.matrix(([0,0])); print "initcond1: "; print init_state
init_covar = np.matrix(([100,0],[0,100])); print "initcond2: "; print init_covar  
#could be [10,0] or [1000,0], etc. This indicates we don't have much confidence in our starting state

F: 
[[1 1]
 [0 1]]
Q: 
[[ 0.0025  0.005 ]
 [ 0.005   0.01  ]]
H: 
[[1 0]
 [0 0]]
R: 
[[5 0]
 [0 5]]
initcond1: 
[[0 0]]
initcond2: 
[[100   0]
 [  0 100]]

Test Kalman Filter on sample synthetic data and plot:¶

In [820]:

thetest = kalman(100,initcond1,initcond2,F,Q,H,R,testdata)
thetest1 = np.zeros(len(thetest))
for i in xrange(len(thetest)):
    thetest1[i] = thetest[i][0]
    
fig = plt.figure(figsize=[10,6])
plt.scatter(xaxis,testdata)
plt.plot(thetest1,color='red')
plt.title('Kalman Filter on Test Data', fontsize=20)
plt.xlabel('samples',fontsize=15)
plt.ylabel('value',fontsize=15)
plt.show()