DEAP is a novel evolutionary computation framework for rapid prototyping and testing of ideas. It seeks to make algorithms explicit and data structures transparent. It works in perfect harmony with parallelisation mechanism such as multiprocessing and SCOOP. The following documentation presents the key concepts and many features to build your own evolutions.
Library documentation: http://deap.readthedocs.org/en/master/
This problem is very simple, we search for a 1 filled list individual. This problem is widely used in the evolutionary computation community since it is very simple and it illustrates well the potential of evolutionary algorithms.
import random
from deap import base
from deap import creator
from deap import tools
# creator is a class factory that can build new classes at run-time
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)
# a toolbox stores functions and their arguments
toolbox = base.Toolbox()
# attribute generator
toolbox.register("attr_bool", random.randint, 0, 1)
# structure initializers
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, 100)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
# evaluation function
def evalOneMax(individual):
return sum(individual),
# register the required genetic operators
toolbox.register("evaluate", evalOneMax)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)
random.seed(64)
# instantiate a population
pop = toolbox.population(n=300)
CXPB, MUTPB, NGEN = 0.5, 0.2, 40
# evaluate the entire population
fitnesses = list(map(toolbox.evaluate, pop))
for ind, fit in zip(pop, fitnesses):
ind.fitness.values = fit
print(" Evaluated %i individuals" % len(pop))
Evaluated 300 individuals
# begin the evolution
for g in range(NGEN):
print("-- Generation %i --" % g)
# select the next generation individuals
offspring = toolbox.select(pop, len(pop))
# clone the selected individuals
offspring = list(map(toolbox.clone, offspring))
# apply crossover and mutation on the offspring
for child1, child2 in zip(offspring[::2], offspring[1::2]):
if random.random() < CXPB:
toolbox.mate(child1, child2)
del child1.fitness.values
del child2.fitness.values
for mutant in offspring:
if random.random() < MUTPB:
toolbox.mutate(mutant)
del mutant.fitness.values
# evaluate the individuals with an invalid fitness
invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
fitnesses = map(toolbox.evaluate, invalid_ind)
for ind, fit in zip(invalid_ind, fitnesses):
ind.fitness.values = fit
print(" Evaluated %i individuals" % len(invalid_ind))
# the population is entirely replaced by the offspring
pop[:] = offspring
# gather all the fitnesses in one list and print the stats
fits = [ind.fitness.values[0] for ind in pop]
length = len(pop)
mean = sum(fits) / length
sum2 = sum(x*x for x in fits)
std = abs(sum2 / length - mean**2)**0.5
print(" Min %s" % min(fits))
print(" Max %s" % max(fits))
print(" Avg %s" % mean)
print(" Std %s" % std)
-- Generation 0 -- Evaluated 189 individuals Min 40.0 Max 65.0 Avg 54.7433333333 Std 4.46289766358 -- Generation 1 -- Evaluated 171 individuals Min 44.0 Max 70.0 Avg 58.48 Std 3.98533980149 -- Generation 2 -- Evaluated 169 individuals Min 54.0 Max 68.0 Avg 61.6066666667 Std 2.92779021714 -- Generation 3 -- Evaluated 185 individuals Min 57.0 Max 73.0 Avg 63.82 Std 2.74364720764 -- Generation 4 -- Evaluated 175 individuals Min 54.0 Max 73.0 Avg 65.67 Std 2.57961883489 -- Generation 5 -- Evaluated 164 individuals Min 60.0 Max 76.0 Avg 67.5466666667 Std 2.57833710407 -- Generation 6 -- Evaluated 185 individuals Min 63.0 Max 77.0 Avg 69.0666666667 Std 2.50510589707 -- Generation 7 -- Evaluated 194 individuals Min 62.0 Max 78.0 Avg 70.78 Std 2.39963886172 -- Generation 8 -- Evaluated 199 individuals Min 63.0 Max 79.0 Avg 72.3133333333 Std 2.57717330077 -- Generation 9 -- Evaluated 169 individuals Min 67.0 Max 81.0 Avg 74.0 Std 2.62551582234 -- Generation 10 -- Evaluated 180 individuals Min 67.0 Max 83.0 Avg 75.9166666667 Std 2.52910831893 -- Generation 11 -- Evaluated 193 individuals Min 67.0 Max 84.0 Avg 77.5966666667 Std 2.40291258453 -- Generation 12 -- Evaluated 177 individuals Min 72.0 Max 85.0 Avg 78.97 Std 2.29690371297 -- Generation 13 -- Evaluated 195 individuals Min 70.0 Max 86.0 Avg 80.13 Std 2.35650164439 -- Generation 14 -- Evaluated 175 individuals Min 74.0 Max 86.0 Avg 81.3966666667 Std 2.03780655499 -- Generation 15 -- Evaluated 181 individuals Min 74.0 Max 87.0 Avg 82.33 Std 2.18504767301 -- Generation 16 -- Evaluated 198 individuals Min 74.0 Max 88.0 Avg 83.4033333333 Std 2.22575580172 -- Generation 17 -- Evaluated 190 individuals Min 72.0 Max 88.0 Avg 84.14 Std 2.34955314901 -- Generation 18 -- Evaluated 170 individuals Min 76.0 Max 89.0 Avg 85.1 Std 2.20529665427 -- Generation 19 -- Evaluated 189 individuals Min 75.0 Max 90.0 Avg 85.77 Std 2.1564863397 -- Generation 20 -- Evaluated 188 individuals Min 77.0 Max 91.0 Avg 86.4833333333 Std 2.2589943682 -- Generation 21 -- Evaluated 180 individuals Min 80.0 Max 91.0 Avg 87.24 Std 2.0613264338 -- Generation 22 -- Evaluated 179 individuals Min 80.0 Max 92.0 Avg 87.95 Std 1.95298916194 -- Generation 23 -- Evaluated 196 individuals Min 79.0 Max 93.0 Avg 88.42 Std 2.2249194742 -- Generation 24 -- Evaluated 168 individuals Min 82.0 Max 93.0 Avg 89.2833333333 Std 1.89289607627 -- Generation 25 -- Evaluated 186 individuals Min 78.0 Max 94.0 Avg 89.7666666667 Std 2.26102238428 -- Generation 26 -- Evaluated 182 individuals Min 82.0 Max 94.0 Avg 90.4633333333 Std 2.21404356075 -- Generation 27 -- Evaluated 179 individuals Min 81.0 Max 95.0 Avg 90.8733333333 Std 2.41328729238 -- Generation 28 -- Evaluated 183 individuals Min 83.0 Max 95.0 Avg 91.7166666667 Std 2.18701978856 -- Generation 29 -- Evaluated 167 individuals Min 83.0 Max 98.0 Avg 92.3466666667 Std 2.21656390739 -- Generation 30 -- Evaluated 170 individuals Min 84.0 Max 98.0 Avg 92.9533333333 Std 2.09868742048 -- Generation 31 -- Evaluated 172 individuals Min 83.0 Max 97.0 Avg 93.5266666667 Std 2.28238666507 -- Generation 32 -- Evaluated 196 individuals Min 86.0 Max 98.0 Avg 94.28 Std 2.16985406575 -- Generation 33 -- Evaluated 176 individuals Min 85.0 Max 98.0 Avg 94.9133333333 Std 2.22392046221 -- Generation 34 -- Evaluated 176 individuals Min 86.0 Max 99.0 Avg 95.6333333333 Std 2.13359373411 -- Generation 35 -- Evaluated 174 individuals Min 86.0 Max 99.0 Avg 96.2966666667 Std 2.23651266236 -- Generation 36 -- Evaluated 174 individuals Min 87.0 Max 100.0 Avg 96.5866666667 Std 2.41436442062 -- Generation 37 -- Evaluated 195 individuals Min 84.0 Max 100.0 Avg 97.3666666667 Std 2.16153237825 -- Generation 38 -- Evaluated 180 individuals Min 89.0 Max 100.0 Avg 97.7466666667 Std 2.32719191779 -- Generation 39 -- Evaluated 196 individuals Min 88.0 Max 100.0 Avg 98.1833333333 Std 2.33589145486
best_ind = tools.selBest(pop, 1)[0]
print("Best individual is %s, %s" % (best_ind, best_ind.fitness.values))
Best individual is [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], (100.0,)
Symbolic regression is one of the best known problems in GP. It is commonly used as a tuning problem for new algorithms, but is also widely used with real-life distributions, where other regression methods may not work.
All symbolic regression problems use an arbitrary data distribution, and try to fit the most accurately the data with a symbolic formula. Usually, a measure like the RMSE (Root Mean Square Error) is used to measure an individual’s fitness.
In this example, we use a classical distribution, the quartic polynomial (x^4 + x^3 + x^2 + x), a one-dimension distribution. 20 equidistant points are generated in the range [-1, 1], and are used to evaluate the fitness.
import operator
import math
import random
import numpy
from deap import algorithms
from deap import base
from deap import creator
from deap import tools
from deap import gp
# define a new function for divison that guards against divide by 0
def protectedDiv(left, right):
try:
return left / right
except ZeroDivisionError:
return 1
# add aritmetic primitives
pset = gp.PrimitiveSet("MAIN", 1)
pset.addPrimitive(operator.add, 2)
pset.addPrimitive(operator.sub, 2)
pset.addPrimitive(operator.mul, 2)
pset.addPrimitive(protectedDiv, 2)
pset.addPrimitive(operator.neg, 1)
pset.addPrimitive(math.cos, 1)
pset.addPrimitive(math.sin, 1)
# constant terminal
pset.addEphemeralConstant("rand101", lambda: random.randint(-1,1))
# define number of inputs
pset.renameArguments(ARG0='x')
# create fitness and individual objects
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)
# register evolution process parameters through the toolbox
toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)
# evaluation function
def evalSymbReg(individual, points):
# transform the tree expression in a callable function
func = toolbox.compile(expr=individual)
# evaluate the mean squared error between the expression
# and the real function : x**4 + x**3 + x**2 + x
sqerrors = ((func(x) - x**4 - x**3 - x**2 - x)**2 for x in points)
return math.fsum(sqerrors) / len(points),
toolbox.register("evaluate", evalSymbReg, points=[x/10. for x in range(-10,10)])
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)
# prevent functions from getting too deep/complex
toolbox.decorate("mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))
toolbox.decorate("mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))
# compute some statistics about the population
stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
stats_size = tools.Statistics(len)
mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
mstats.register("avg", numpy.mean)
mstats.register("std", numpy.std)
mstats.register("min", numpy.min)
mstats.register("max", numpy.max)
random.seed(318)
pop = toolbox.population(n=300)
hof = tools.HallOfFame(1)
# run the algorithm
pop, log = algorithms.eaSimple(pop, toolbox, 0.5, 0.1, 40, stats=mstats,
halloffame=hof, verbose=True)
fitness size --------------------------------------- ------------------------------- gen nevals avg max min std avg max min std 0 300 2.39949 59.2593 0.165572 4.64122 3.69667 7 2 1.61389 1 146 1.0971 10.1 0.165572 0.845978 3.80667 13 1 1.78586 2 169 0.902365 6.5179 0.165572 0.72362 4.16 13 1 2.0366 3 167 0.852725 9.6327 0.165572 0.869381 4.63667 13 1 2.20408 4 158 0.74829 14.1573 0.165572 1.01281 4.88333 13 1 2.14392 5 160 0.630299 7.90605 0.165572 0.904373 5.52333 14 1 2.09351 6 181 0.495118 4.09456 0.165572 0.524658 6.08333 13 1 1.99409 7 170 0.403873 2.6434 0.165572 0.440596 6.34667 14 1 1.84386 8 173 0.393405 2.9829 0.165572 0.425415 6.37 12 1 1.78132 9 168 0.414299 13.5996 0.165572 0.841226 6.25333 11 2 1.76328 10 142 0.384179 4.07808 0.165572 0.477269 6.25667 13 1 1.78067 11 156 0.459639 19.8316 0.165572 1.47254 6.35333 15 1 2.04983 12 167 0.384348 6.79674 0.165572 0.495807 6.25 13 1 1.92029 13 157 0.42446 11.0636 0.165572 0.818953 6.43667 15 1 2.11959 14 175 0.342257 2.552 0.165572 0.325872 6.23333 15 1 2.14295 15 154 0.442374 13.8349 0.165572 0.950612 6.05667 14 1 1.90266 16 181 0.455697 19.7228 0.101561 1.39528 6.08667 13 1 1.84006 17 178 0.36256 2.54124 0.101561 0.340555 6.24 15 1 2.20055 18 171 0.411532 14.2339 0.101561 0.897785 6.44 15 1 2.2715 19 156 0.43193 15.5923 0.101561 0.9949 6.66667 15 1 2.40185 20 169 0.398163 4.09456 0.0976781 0.450231 6.96667 15 1 2.62022 21 162 0.385774 4.09456 0.0976781 0.421867 7.13 14 1 2.65577 22 162 0.35318 2.55465 0.0253803 0.389453 7.66667 19 2 3.04995 23 164 0.3471 3.66792 0.0253803 0.482334 8.24 21 1 3.48364 24 159 1.46248 331.247 0.0253803 19.0841 9.42667 19 3 3.238 25 164 0.382697 6.6452 0.0173316 0.652247 10.1867 25 1 3.46292 26 139 0.367651 11.9045 0.0173316 0.855067 10.67 19 3 3.32582 27 167 0.345866 6.6452 0.0173316 0.586155 11.4 27 3 3.44384 28 183 0.388404 4.53076 0.0173316 0.58986 11.5767 24 3 3.4483 29 163 0.356009 6.33264 0.0173316 0.563266 12.2433 29 2 4.23211 30 174 0.31506 2.54124 0.0173316 0.412507 12.92 27 3 4.5041 31 206 0.361197 2.9829 0.0173316 0.486155 13.9333 33 1 5.6747 32 168 0.302704 4.01244 0.0173316 0.502277 15.04 31 3 5.40849 33 160 0.246509 3.30873 0.012947 0.433212 16.3967 34 2 5.66092 34 158 0.344791 26.1966 0.012947 1.57277 17.39 43 1 6.13008 35 162 0.212572 2.85856 0.0148373 0.363023 17.64 37 2 6.04349 36 183 0.240268 5.06093 0.0112887 0.482794 17.4333 41 3 6.33184 37 185 0.514635 65.543 0.0103125 3.7864 16.6167 41 1 6.58456 38 134 0.340433 11.2506 0.0103125 0.827213 16.2733 34 1 6.08484 39 158 0.329797 15.8145 4.50668e-33 1.05693 16.4133 34 1 6.09993 40 164 0.306543 14.3573 4.50668e-33 0.947046 17.9033 53 2 8.23695