#!/usr/bin/env python # coding: utf-8 # # DEAP # DEAP is a novel evolutionary computation framework for rapid prototyping and testing of ideas. It seeks to make algorithms explicit and data structures transparent. It works in perfect harmony with parallelisation mechanism such as multiprocessing and SCOOP. The following documentation presents the key concepts and many features to build your own evolutions. # # Library documentation: http://deap.readthedocs.org/en/master/ # ## One Max Problem (GA) # This problem is very simple, we search for a 1 filled list individual. This problem is widely used in the evolutionary computation community since it is very simple and it illustrates well the potential of evolutionary algorithms. # In[1]: import random from deap import base from deap import creator from deap import tools # In[2]: # creator is a class factory that can build new classes at run-time creator.create("FitnessMax", base.Fitness, weights=(1.0,)) creator.create("Individual", list, fitness=creator.FitnessMax) # In[3]: # a toolbox stores functions and their arguments toolbox = base.Toolbox() # attribute generator toolbox.register("attr_bool", random.randint, 0, 1) # structure initializers toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, 100) toolbox.register("population", tools.initRepeat, list, toolbox.individual) # In[4]: # evaluation function def evalOneMax(individual): return sum(individual), # In[5]: # register the required genetic operators toolbox.register("evaluate", evalOneMax) toolbox.register("mate", tools.cxTwoPoint) toolbox.register("mutate", tools.mutFlipBit, indpb=0.05) toolbox.register("select", tools.selTournament, tournsize=3) # In[6]: random.seed(64) # instantiate a population pop = toolbox.population(n=300) CXPB, MUTPB, NGEN = 0.5, 0.2, 40 # evaluate the entire population fitnesses = list(map(toolbox.evaluate, pop)) for ind, fit in zip(pop, fitnesses): ind.fitness.values = fit print(" Evaluated %i individuals" % len(pop)) # In[7]: # begin the evolution for g in range(NGEN): print("-- Generation %i --" % g) # select the next generation individuals offspring = toolbox.select(pop, len(pop)) # clone the selected individuals offspring = list(map(toolbox.clone, offspring)) # apply crossover and mutation on the offspring for child1, child2 in zip(offspring[::2], offspring[1::2]): if random.random() < CXPB: toolbox.mate(child1, child2) del child1.fitness.values del child2.fitness.values for mutant in offspring: if random.random() < MUTPB: toolbox.mutate(mutant) del mutant.fitness.values # evaluate the individuals with an invalid fitness invalid_ind = [ind for ind in offspring if not ind.fitness.valid] fitnesses = map(toolbox.evaluate, invalid_ind) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit print(" Evaluated %i individuals" % len(invalid_ind)) # the population is entirely replaced by the offspring pop[:] = offspring # gather all the fitnesses in one list and print the stats fits = [ind.fitness.values[0] for ind in pop] length = len(pop) mean = sum(fits) / length sum2 = sum(x*x for x in fits) std = abs(sum2 / length - mean**2)**0.5 print(" Min %s" % min(fits)) print(" Max %s" % max(fits)) print(" Avg %s" % mean) print(" Std %s" % std) # In[8]: best_ind = tools.selBest(pop, 1)[0] print("Best individual is %s, %s" % (best_ind, best_ind.fitness.values)) # ## Symbolic Regression (GP) # Symbolic regression is one of the best known problems in GP. It is commonly used as a tuning problem for new algorithms, but is also widely used with real-life distributions, where other regression methods may not work. # # All symbolic regression problems use an arbitrary data distribution, and try to fit the most accurately the data with a symbolic formula. Usually, a measure like the RMSE (Root Mean Square Error) is used to measure an individual’s fitness. # # In this example, we use a classical distribution, the quartic polynomial (x^4 + x^3 + x^2 + x), a one-dimension distribution. 20 equidistant points are generated in the range [-1, 1], and are used to evaluate the fitness. # In[9]: import operator import math import random import numpy from deap import algorithms from deap import base from deap import creator from deap import tools from deap import gp # define a new function for divison that guards against divide by 0 def protectedDiv(left, right): try: return left / right except ZeroDivisionError: return 1 # In[10]: # add aritmetic primitives pset = gp.PrimitiveSet("MAIN", 1) pset.addPrimitive(operator.add, 2) pset.addPrimitive(operator.sub, 2) pset.addPrimitive(operator.mul, 2) pset.addPrimitive(protectedDiv, 2) pset.addPrimitive(operator.neg, 1) pset.addPrimitive(math.cos, 1) pset.addPrimitive(math.sin, 1) # constant terminal pset.addEphemeralConstant("rand101", lambda: random.randint(-1,1)) # define number of inputs pset.renameArguments(ARG0='x') # In[11]: # create fitness and individual objects creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin) # In[12]: # register evolution process parameters through the toolbox toolbox = base.Toolbox() toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2) toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr) toolbox.register("population", tools.initRepeat, list, toolbox.individual) toolbox.register("compile", gp.compile, pset=pset) # evaluation function def evalSymbReg(individual, points): # transform the tree expression in a callable function func = toolbox.compile(expr=individual) # evaluate the mean squared error between the expression # and the real function : x**4 + x**3 + x**2 + x sqerrors = ((func(x) - x**4 - x**3 - x**2 - x)**2 for x in points) return math.fsum(sqerrors) / len(points), toolbox.register("evaluate", evalSymbReg, points=[x/10. for x in range(-10,10)]) toolbox.register("select", tools.selTournament, tournsize=3) toolbox.register("mate", gp.cxOnePoint) toolbox.register("expr_mut", gp.genFull, min_=0, max_=2) toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset) # prevent functions from getting too deep/complex toolbox.decorate("mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17)) toolbox.decorate("mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17)) # In[13]: # compute some statistics about the population stats_fit = tools.Statistics(lambda ind: ind.fitness.values) stats_size = tools.Statistics(len) mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size) mstats.register("avg", numpy.mean) mstats.register("std", numpy.std) mstats.register("min", numpy.min) mstats.register("max", numpy.max) # In[14]: random.seed(318) pop = toolbox.population(n=300) hof = tools.HallOfFame(1) # run the algorithm pop, log = algorithms.eaSimple(pop, toolbox, 0.5, 0.1, 40, stats=mstats, halloffame=hof, verbose=True)