Notebook

DEAP¶

DEAP is a novel evolutionary computation framework for rapid prototyping and testing of ideas. It seeks to make algorithms explicit and data structures transparent. It works in perfect harmony with parallelisation mechanism such as multiprocessing and SCOOP. The following documentation presents the key concepts and many features to build your own evolutions.

Library documentation: http://deap.readthedocs.org/en/master/

One Max Problem (GA)¶

This problem is very simple, we search for a 1 filled list individual. This problem is widely used in the evolutionary computation community since it is very simple and it illustrates well the potential of evolutionary algorithms.

In [1]:

import random

from deap import base
from deap import creator
from deap import tools

In [2]:

# creator is a class factory that can build new classes at run-time
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

In [3]:

# a toolbox stores functions and their arguments
toolbox = base.Toolbox()

# attribute generator
toolbox.register("attr_bool", random.randint, 0, 1)

# structure initializers
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, 100)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

In [4]:

# evaluation function
def evalOneMax(individual):
    return sum(individual),

In [5]:

# register the required genetic operators
toolbox.register("evaluate", evalOneMax)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)

In [6]:

random.seed(64)

# instantiate a population
pop = toolbox.population(n=300)
CXPB, MUTPB, NGEN = 0.5, 0.2, 40

# evaluate the entire population
fitnesses = list(map(toolbox.evaluate, pop))
for ind, fit in zip(pop, fitnesses):
    ind.fitness.values = fit

print("  Evaluated %i individuals" % len(pop))

  Evaluated 300 individuals

In [7]:

# begin the evolution
for g in range(NGEN):
    print("-- Generation %i --" % g)

    # select the next generation individuals
    offspring = toolbox.select(pop, len(pop))

    # clone the selected individuals
    offspring = list(map(toolbox.clone, offspring))

    # apply crossover and mutation on the offspring
    for child1, child2 in zip(offspring[::2], offspring[1::2]):
        if random.random() < CXPB:
            toolbox.mate(child1, child2)
            del child1.fitness.values
            del child2.fitness.values

    for mutant in offspring:
        if random.random() < MUTPB:
            toolbox.mutate(mutant)
            del mutant.fitness.values

    # evaluate the individuals with an invalid fitness
    invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
    fitnesses = map(toolbox.evaluate, invalid_ind)
    for ind, fit in zip(invalid_ind, fitnesses):
        ind.fitness.values = fit

    print("  Evaluated %i individuals" % len(invalid_ind))

    # the population is entirely replaced by the offspring
    pop[:] = offspring

    # gather all the fitnesses in one list and print the stats
    fits = [ind.fitness.values[0] for ind in pop]

    length = len(pop)
    mean = sum(fits) / length
    sum2 = sum(x*x for x in fits)
    std = abs(sum2 / length - mean**2)**0.5

    print("  Min %s" % min(fits))
    print("  Max %s" % max(fits))
    print("  Avg %s" % mean)
    print("  Std %s" % std)

-- Generation 0 --
  Evaluated 189 individuals
  Min 40.0
  Max 65.0
  Avg 54.7433333333
  Std 4.46289766358
-- Generation 1 --
  Evaluated 171 individuals
  Min 44.0
  Max 70.0
  Avg 58.48
  Std 3.98533980149
-- Generation 2 --
  Evaluated 169 individuals
  Min 54.0
  Max 68.0
  Avg 61.6066666667
  Std 2.92779021714
-- Generation 3 --
  Evaluated 185 individuals
  Min 57.0
  Max 73.0
  Avg 63.82
  Std 2.74364720764
-- Generation 4 --
  Evaluated 175 individuals
  Min 54.0
  Max 73.0
  Avg 65.67
  Std 2.57961883489
-- Generation 5 --
  Evaluated 164 individuals
  Min 60.0
  Max 76.0
  Avg 67.5466666667
  Std 2.57833710407
-- Generation 6 --
  Evaluated 185 individuals
  Min 63.0
  Max 77.0
  Avg 69.0666666667
  Std 2.50510589707
-- Generation 7 --
  Evaluated 194 individuals
  Min 62.0
  Max 78.0
  Avg 70.78
  Std 2.39963886172
-- Generation 8 --
  Evaluated 199 individuals
  Min 63.0
  Max 79.0
  Avg 72.3133333333
  Std 2.57717330077
-- Generation 9 --
  Evaluated 169 individuals
  Min 67.0
  Max 81.0
  Avg 74.0
  Std 2.62551582234
-- Generation 10 --
  Evaluated 180 individuals
  Min 67.0
  Max 83.0
  Avg 75.9166666667
  Std 2.52910831893
-- Generation 11 --
  Evaluated 193 individuals
  Min 67.0
  Max 84.0
  Avg 77.5966666667
  Std 2.40291258453
-- Generation 12 --
  Evaluated 177 individuals
  Min 72.0
  Max 85.0
  Avg 78.97
  Std 2.29690371297
-- Generation 13 --
  Evaluated 195 individuals
  Min 70.0
  Max 86.0
  Avg 80.13
  Std 2.35650164439
-- Generation 14 --
  Evaluated 175 individuals
  Min 74.0
  Max 86.0
  Avg 81.3966666667
  Std 2.03780655499
-- Generation 15 --
  Evaluated 181 individuals
  Min 74.0
  Max 87.0
  Avg 82.33
  Std 2.18504767301
-- Generation 16 --
  Evaluated 198 individuals
  Min 74.0
  Max 88.0
  Avg 83.4033333333
  Std 2.22575580172
-- Generation 17 --
  Evaluated 190 individuals
  Min 72.0
  Max 88.0
  Avg 84.14
  Std 2.34955314901
-- Generation 18 --
  Evaluated 170 individuals
  Min 76.0
  Max 89.0
  Avg 85.1
  Std 2.20529665427
-- Generation 19 --
  Evaluated 189 individuals
  Min 75.0
  Max 90.0
  Avg 85.77
  Std 2.1564863397
-- Generation 20 --
  Evaluated 188 individuals
  Min 77.0
  Max 91.0
  Avg 86.4833333333
  Std 2.2589943682
-- Generation 21 --
  Evaluated 180 individuals
  Min 80.0
  Max 91.0
  Avg 87.24
  Std 2.0613264338
-- Generation 22 --
  Evaluated 179 individuals
  Min 80.0
  Max 92.0
  Avg 87.95
  Std 1.95298916194
-- Generation 23 --
  Evaluated 196 individuals
  Min 79.0
  Max 93.0
  Avg 88.42
  Std 2.2249194742
-- Generation 24 --
  Evaluated 168 individuals
  Min 82.0
  Max 93.0
  Avg 89.2833333333
  Std 1.89289607627
-- Generation 25 --
  Evaluated 186 individuals
  Min 78.0
  Max 94.0
  Avg 89.7666666667
  Std 2.26102238428
-- Generation 26 --
  Evaluated 182 individuals
  Min 82.0
  Max 94.0
  Avg 90.4633333333
  Std 2.21404356075
-- Generation 27 --
  Evaluated 179 individuals
  Min 81.0
  Max 95.0
  Avg 90.8733333333
  Std 2.41328729238
-- Generation 28 --
  Evaluated 183 individuals
  Min 83.0
  Max 95.0
  Avg 91.7166666667
  Std 2.18701978856
-- Generation 29 --
  Evaluated 167 individuals
  Min 83.0
  Max 98.0
  Avg 92.3466666667
  Std 2.21656390739
-- Generation 30 --
  Evaluated 170 individuals
  Min 84.0
  Max 98.0
  Avg 92.9533333333
  Std 2.09868742048
-- Generation 31 --
  Evaluated 172 individuals
  Min 83.0
  Max 97.0
  Avg 93.5266666667
  Std 2.28238666507
-- Generation 32 --
  Evaluated 196 individuals
  Min 86.0
  Max 98.0
  Avg 94.28
  Std 2.16985406575
-- Generation 33 --
  Evaluated 176 individuals
  Min 85.0
  Max 98.0
  Avg 94.9133333333
  Std 2.22392046221
-- Generation 34 --
  Evaluated 176 individuals
  Min 86.0
  Max 99.0
  Avg 95.6333333333
  Std 2.13359373411
-- Generation 35 --
  Evaluated 174 individuals
  Min 86.0
  Max 99.0
  Avg 96.2966666667
  Std 2.23651266236
-- Generation 36 --
  Evaluated 174 individuals
  Min 87.0
  Max 100.0
  Avg 96.5866666667
  Std 2.41436442062
-- Generation 37 --
  Evaluated 195 individuals
  Min 84.0
  Max 100.0
  Avg 97.3666666667
  Std 2.16153237825
-- Generation 38 --
  Evaluated 180 individuals
  Min 89.0
  Max 100.0
  Avg 97.7466666667
  Std 2.32719191779
-- Generation 39 --
  Evaluated 196 individuals
  Min 88.0
  Max 100.0
  Avg 98.1833333333
  Std 2.33589145486

In [8]:

best_ind = tools.selBest(pop, 1)[0]
print("Best individual is %s, %s" % (best_ind, best_ind.fitness.values))

Best individual is [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], (100.0,)

Symbolic Regression (GP)¶

Symbolic regression is one of the best known problems in GP. It is commonly used as a tuning problem for new algorithms, but is also widely used with real-life distributions, where other regression methods may not work.

All symbolic regression problems use an arbitrary data distribution, and try to fit the most accurately the data with a symbolic formula. Usually, a measure like the RMSE (Root Mean Square Error) is used to measure an individual’s fitness.

In this example, we use a classical distribution, the quartic polynomial (x^4 + x^3 + x^2 + x), a one-dimension distribution. 20 equidistant points are generated in the range [-1, 1], and are used to evaluate the fitness.

In [9]:

import operator
import math
import random

import numpy

from deap import algorithms
from deap import base
from deap import creator
from deap import tools
from deap import gp

# define a new function for divison that guards against divide by 0
def protectedDiv(left, right):
    try:
        return left / right
    except ZeroDivisionError:
        return 1

In [10]:

# add aritmetic primitives
pset = gp.PrimitiveSet("MAIN", 1)
pset.addPrimitive(operator.add, 2)
pset.addPrimitive(operator.sub, 2)
pset.addPrimitive(operator.mul, 2)
pset.addPrimitive(protectedDiv, 2)
pset.addPrimitive(operator.neg, 1)
pset.addPrimitive(math.cos, 1)
pset.addPrimitive(math.sin, 1)

# constant terminal
pset.addEphemeralConstant("rand101", lambda: random.randint(-1,1))

# define number of inputs
pset.renameArguments(ARG0='x')

In [11]:

# create fitness and individual objects
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)

In [12]:

# register evolution process parameters through the toolbox
toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)

# evaluation function
def evalSymbReg(individual, points):
    # transform the tree expression in a callable function
    func = toolbox.compile(expr=individual)
    # evaluate the mean squared error between the expression
    # and the real function : x**4 + x**3 + x**2 + x
    sqerrors = ((func(x) - x**4 - x**3 - x**2 - x)**2 for x in points)
    return math.fsum(sqerrors) / len(points),

toolbox.register("evaluate", evalSymbReg, points=[x/10. for x in range(-10,10)])
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)

# prevent functions from getting too deep/complex
toolbox.decorate("mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))
toolbox.decorate("mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))

In [13]:

# compute some statistics about the population
stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
stats_size = tools.Statistics(len)
mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
mstats.register("avg", numpy.mean)
mstats.register("std", numpy.std)
mstats.register("min", numpy.min)
mstats.register("max", numpy.max)

In [14]:

random.seed(318)

pop = toolbox.population(n=300)
hof = tools.HallOfFame(1)

# run the algorithm
pop, log = algorithms.eaSimple(pop, toolbox, 0.5, 0.1, 40, stats=mstats,
                               halloffame=hof, verbose=True)

   	      	                fitness                	              size             
   	      	---------------------------------------	-------------------------------
gen	nevals	avg    	max    	min     	std    	avg    	max	min	std    
0  	300   	2.39949	59.2593	0.165572	4.64122	3.69667	7  	2  	1.61389
1  	146   	1.0971 	10.1   	0.165572	0.845978	3.80667	13 	1  	1.78586
2  	169   	0.902365	6.5179 	0.165572	0.72362 	4.16   	13 	1  	2.0366 
3  	167   	0.852725	9.6327 	0.165572	0.869381	4.63667	13 	1  	2.20408
4  	158   	0.74829 	14.1573	0.165572	1.01281 	4.88333	13 	1  	2.14392
5  	160   	0.630299	7.90605	0.165572	0.904373	5.52333	14 	1  	2.09351
6  	181   	0.495118	4.09456	0.165572	0.524658	6.08333	13 	1  	1.99409
7  	170   	0.403873	2.6434 	0.165572	0.440596	6.34667	14 	1  	1.84386
8  	173   	0.393405	2.9829 	0.165572	0.425415	6.37   	12 	1  	1.78132
9  	168   	0.414299	13.5996	0.165572	0.841226	6.25333	11 	2  	1.76328
10 	142   	0.384179	4.07808	0.165572	0.477269	6.25667	13 	1  	1.78067
11 	156   	0.459639	19.8316	0.165572	1.47254 	6.35333	15 	1  	2.04983
12 	167   	0.384348	6.79674	0.165572	0.495807	6.25   	13 	1  	1.92029
13 	157   	0.42446 	11.0636	0.165572	0.818953	6.43667	15 	1  	2.11959
14 	175   	0.342257	2.552  	0.165572	0.325872	6.23333	15 	1  	2.14295
15 	154   	0.442374	13.8349	0.165572	0.950612	6.05667	14 	1  	1.90266
16 	181   	0.455697	19.7228	0.101561	1.39528 	6.08667	13 	1  	1.84006
17 	178   	0.36256 	2.54124	0.101561	0.340555	6.24   	15 	1  	2.20055
18 	171   	0.411532	14.2339	0.101561	0.897785	6.44   	15 	1  	2.2715 
19 	156   	0.43193 	15.5923	0.101561	0.9949  	6.66667	15 	1  	2.40185
20 	169   	0.398163	4.09456	0.0976781	0.450231	6.96667	15 	1  	2.62022
21 	162   	0.385774	4.09456	0.0976781	0.421867	7.13   	14 	1  	2.65577
22 	162   	0.35318 	2.55465	0.0253803	0.389453	7.66667	19 	2  	3.04995
23 	164   	0.3471  	3.66792	0.0253803	0.482334	8.24   	21 	1  	3.48364
24 	159   	1.46248 	331.247	0.0253803	19.0841 	9.42667	19 	3  	3.238  
25 	164   	0.382697	6.6452 	0.0173316	0.652247	10.1867	25 	1  	3.46292
26 	139   	0.367651	11.9045	0.0173316	0.855067	10.67  	19 	3  	3.32582
27 	167   	0.345866	6.6452 	0.0173316	0.586155	11.4   	27 	3  	3.44384
28 	183   	0.388404	4.53076	0.0173316	0.58986 	11.5767	24 	3  	3.4483 
29 	163   	0.356009	6.33264	0.0173316	0.563266	12.2433	29 	2  	4.23211
30 	174   	0.31506 	2.54124	0.0173316	0.412507	12.92  	27 	3  	4.5041 
31 	206   	0.361197	2.9829 	0.0173316	0.486155	13.9333	33 	1  	5.6747 
32 	168   	0.302704	4.01244	0.0173316	0.502277	15.04  	31 	3  	5.40849
33 	160   	0.246509	3.30873	0.012947 	0.433212	16.3967	34 	2  	5.66092
34 	158   	0.344791	26.1966	0.012947 	1.57277 	17.39  	43 	1  	6.13008
35 	162   	0.212572	2.85856	0.0148373	0.363023	17.64  	37 	2  	6.04349
36 	183   	0.240268	5.06093	0.0112887	0.482794	17.4333	41 	3  	6.33184
37 	185   	0.514635	65.543 	0.0103125	3.7864  	16.6167	41 	1  	6.58456
38 	134   	0.340433	11.2506	0.0103125	0.827213	16.2733	34 	1  	6.08484
39 	158   	0.329797	15.8145	4.50668e-33	1.05693 	16.4133	34 	1  	6.09993
40 	164   	0.306543	14.3573	4.50668e-33	0.947046	17.9033	53 	2  	8.23695