from IPython.display import display
from IPython.display import HTML
By listing the first six prime numbers: 2, 3, 5, 7, 11, and 13, we can see that the 6th prime is 13.
Another prime question. Calculating 10,000. We did this before in question 3, time to reuse it..
Method 1: brute force
def isPrime(x):
if (x==1):
return False
for i in range(2,x):
if x%i==0:
return False
return True
def getPrimes(maxValue):
primes = []
for i in range(1,maxValue):
if isPrime(i):
primes.append(i)
return primes
primes = getPrimes(10000)
%%timeit
getPrimes(10000)
1 loops, best of 3: 1.25 s per loop
len(primes)
1229
The brute force solution solution takes more than a second to calculate primes up to 10000. And how many primes did that yield? Only 1229! This doesn't look like a reasonable way to calculate 10000 primes. Luckily, there is a very simple and clever algorithm that can do this job much faster.
Method 2: Sieve of Eratosthenes
The basic notion of the sieve of Eratosthenes is to pre-allocate a list of numbers up to n, and then, taking a prime (starting with 2), cross out every multiple of that prime, as those multiples clearly can't be primes. The next prime is then the next unmarked value in the list. The process repeats until there are no more primes to be found.
def showState(l, p, nx):
numbers = ''
for n in l:
style=''
if n<0:
style+='text-decoration: line-through; background-color: rgb(171, 231, 255);'
if n==p:
style+='background-color: rgb(230,255,95);'
if n==nx:
style+='background-color: rgb(150, 233, 150);'
if n==0:
style+='background-color: rgb(220,220,220); color: rgb(220,220,220);'
numbers+='<td style="color: rgb(50,50,50); padding:0; width:1.5em; border-color: rgb(240,240,240); text-align:center;{0}">{1}</td>'.format(style, abs(n))
s = """<table style='font-size: 12px; height:0.7em; '>
<tr style="height: 0.7em;"'>{0}</tr></table>""".format(numbers)
h = HTML(s)
display(h)
def sieve(size, showStates=True):
l = list(range(2,size+1)) #generate the candidate set
idx = lambda x: x-2 #just a simple mapping from number in list to list index
p = 2 #seed with initial prime
for iteration in range(len(l)):
#mark every multiple of p
for i in range(p*2, size+1, p):
l[idx(i)] = -i
#find the next unmarked value, that's the next p
nextPrime = 0
for i in l[idx(p+1):]:
if i>0:
nextPrime = i
break
if (showStates):
showState(l, p, nextPrime)
for i in range(p*2, size+1, p):
l[idx(i)] = 0
p = nextPrime
#if we haven't found any unmarked values, we're done
if p == 0:
break
#return all unmarked values
return filter(lambda x: x>0, l)
sieve(58, True)
2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 |
2 | 3 | 0 | 5 | 6 | 7 | 0 | 9 | 0 | 11 | 12 | 13 | 0 | 15 | 0 | 17 | 18 | 19 | 0 | 21 | 0 | 23 | 24 | 25 | 0 | 27 | 0 | 29 | 30 | 31 | 0 | 33 | 0 | 35 | 36 | 37 | 0 | 39 | 0 | 41 | 42 | 43 | 0 | 45 | 0 | 47 | 48 | 49 | 0 | 51 | 0 | 53 | 54 | 55 | 0 | 57 | 0 |
2 | 3 | 0 | 5 | 0 | 7 | 0 | 0 | 10 | 11 | 0 | 13 | 0 | 15 | 0 | 17 | 0 | 19 | 20 | 0 | 0 | 23 | 0 | 25 | 0 | 0 | 0 | 29 | 30 | 31 | 0 | 0 | 0 | 35 | 0 | 37 | 0 | 0 | 40 | 41 | 0 | 43 | 0 | 45 | 0 | 47 | 0 | 49 | 50 | 0 | 0 | 53 | 0 | 55 | 0 | 0 | 0 |
2 | 3 | 0 | 5 | 0 | 7 | 0 | 0 | 0 | 11 | 0 | 13 | 14 | 0 | 0 | 17 | 0 | 19 | 0 | 21 | 0 | 23 | 0 | 0 | 0 | 0 | 28 | 29 | 0 | 31 | 0 | 0 | 0 | 35 | 0 | 37 | 0 | 0 | 0 | 41 | 42 | 43 | 0 | 0 | 0 | 47 | 0 | 49 | 0 | 0 | 0 | 53 | 0 | 0 | 56 | 0 | 0 |
2 | 3 | 0 | 5 | 0 | 7 | 0 | 0 | 0 | 11 | 0 | 13 | 0 | 0 | 0 | 17 | 0 | 19 | 0 | 0 | 22 | 23 | 0 | 0 | 0 | 0 | 0 | 29 | 0 | 31 | 0 | 33 | 0 | 0 | 0 | 37 | 0 | 0 | 0 | 41 | 0 | 43 | 44 | 0 | 0 | 47 | 0 | 0 | 0 | 0 | 0 | 53 | 0 | 55 | 0 | 0 | 0 |
2 | 3 | 0 | 5 | 0 | 7 | 0 | 0 | 0 | 11 | 0 | 13 | 0 | 0 | 0 | 17 | 0 | 19 | 0 | 0 | 0 | 23 | 0 | 0 | 26 | 0 | 0 | 29 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 37 | 0 | 39 | 0 | 41 | 0 | 43 | 0 | 0 | 0 | 47 | 0 | 0 | 0 | 0 | 52 | 53 | 0 | 0 | 0 | 0 | 0 |
2 | 3 | 0 | 5 | 0 | 7 | 0 | 0 | 0 | 11 | 0 | 13 | 0 | 0 | 0 | 17 | 0 | 19 | 0 | 0 | 0 | 23 | 0 | 0 | 0 | 0 | 0 | 29 | 0 | 31 | 0 | 0 | 34 | 0 | 0 | 37 | 0 | 0 | 0 | 41 | 0 | 43 | 0 | 0 | 0 | 47 | 0 | 0 | 0 | 51 | 0 | 53 | 0 | 0 | 0 | 0 | 0 |
2 | 3 | 0 | 5 | 0 | 7 | 0 | 0 | 0 | 11 | 0 | 13 | 0 | 0 | 0 | 17 | 0 | 19 | 0 | 0 | 0 | 23 | 0 | 0 | 0 | 0 | 0 | 29 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 37 | 38 | 0 | 0 | 41 | 0 | 43 | 0 | 0 | 0 | 47 | 0 | 0 | 0 | 0 | 0 | 53 | 0 | 0 | 0 | 57 | 0 |
2 | 3 | 0 | 5 | 0 | 7 | 0 | 0 | 0 | 11 | 0 | 13 | 0 | 0 | 0 | 17 | 0 | 19 | 0 | 0 | 0 | 23 | 0 | 0 | 0 | 0 | 0 | 29 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 37 | 0 | 0 | 0 | 41 | 0 | 43 | 0 | 0 | 46 | 47 | 0 | 0 | 0 | 0 | 0 | 53 | 0 | 0 | 0 | 0 | 0 |
2 | 3 | 0 | 5 | 0 | 7 | 0 | 0 | 0 | 11 | 0 | 13 | 0 | 0 | 0 | 17 | 0 | 19 | 0 | 0 | 0 | 23 | 0 | 0 | 0 | 0 | 0 | 29 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 37 | 0 | 0 | 0 | 41 | 0 | 43 | 0 | 0 | 0 | 47 | 0 | 0 | 0 | 0 | 0 | 53 | 0 | 0 | 0 | 0 | 58 |
2 | 3 | 0 | 5 | 0 | 7 | 0 | 0 | 0 | 11 | 0 | 13 | 0 | 0 | 0 | 17 | 0 | 19 | 0 | 0 | 0 | 23 | 0 | 0 | 0 | 0 | 0 | 29 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 37 | 0 | 0 | 0 | 41 | 0 | 43 | 0 | 0 | 0 | 47 | 0 | 0 | 0 | 0 | 0 | 53 | 0 | 0 | 0 | 0 | 0 |
2 | 3 | 0 | 5 | 0 | 7 | 0 | 0 | 0 | 11 | 0 | 13 | 0 | 0 | 0 | 17 | 0 | 19 | 0 | 0 | 0 | 23 | 0 | 0 | 0 | 0 | 0 | 29 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 37 | 0 | 0 | 0 | 41 | 0 | 43 | 0 | 0 | 0 | 47 | 0 | 0 | 0 | 0 | 0 | 53 | 0 | 0 | 0 | 0 | 0 |
2 | 3 | 0 | 5 | 0 | 7 | 0 | 0 | 0 | 11 | 0 | 13 | 0 | 0 | 0 | 17 | 0 | 19 | 0 | 0 | 0 | 23 | 0 | 0 | 0 | 0 | 0 | 29 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 37 | 0 | 0 | 0 | 41 | 0 | 43 | 0 | 0 | 0 | 47 | 0 | 0 | 0 | 0 | 0 | 53 | 0 | 0 | 0 | 0 | 0 |
2 | 3 | 0 | 5 | 0 | 7 | 0 | 0 | 0 | 11 | 0 | 13 | 0 | 0 | 0 | 17 | 0 | 19 | 0 | 0 | 0 | 23 | 0 | 0 | 0 | 0 | 0 | 29 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 37 | 0 | 0 | 0 | 41 | 0 | 43 | 0 | 0 | 0 | 47 | 0 | 0 | 0 | 0 | 0 | 53 | 0 | 0 | 0 | 0 | 0 |
2 | 3 | 0 | 5 | 0 | 7 | 0 | 0 | 0 | 11 | 0 | 13 | 0 | 0 | 0 | 17 | 0 | 19 | 0 | 0 | 0 | 23 | 0 | 0 | 0 | 0 | 0 | 29 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 37 | 0 | 0 | 0 | 41 | 0 | 43 | 0 | 0 | 0 | 47 | 0 | 0 | 0 | 0 | 0 | 53 | 0 | 0 | 0 | 0 | 0 |
2 | 3 | 0 | 5 | 0 | 7 | 0 | 0 | 0 | 11 | 0 | 13 | 0 | 0 | 0 | 17 | 0 | 19 | 0 | 0 | 0 | 23 | 0 | 0 | 0 | 0 | 0 | 29 | 0 | 31 | 0 | 0 | 0 | 0 | 0 | 37 | 0 | 0 | 0 | 41 | 0 | 43 | 0 | 0 | 0 | 47 | 0 | 0 | 0 | 0 | 0 | 53 | 0 | 0 | 0 | 0 | 0 |
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53]
Above is the state of the preallocated list at each iteration of sifting primes up to 58.
Starting with a fully unmarked list, and the first prime, 2 (shown in yellow), every multiple of 2 is marked off in the list (shown in blue). The next prime (green) is then found by moving up the list until the first unmarked number.
The next iteration starts at the newly found prime, 3, and proceeds to mark off every multiple of 3 in the list, and so forth.
Finally, the last iteration attempts to find unmarked values to the right of 53 and finds none. At that point the algorithm can terminate and return the remaining unmarked values in the list.
%%timeit
v = sieve(10000, False)
10 loops, best of 3: 55.2 ms per loop
len(sieve(10000, False))
1229
At less than 60ms to find all primes less than 10000, this algorithm is orders of magnitude faster.
It can be further optimized by recognizing that if one divisor or factor of a number (other than a perfect square) is greater than its square root, then the other factor will be less than its square root. Hence all multiples of primes greater than the square root of n need not be considered[1]. The sieve function can be trivially modified to use this knowledge by limiting the marking phase to $\sqrt{n}$
#comments removed for brevity
def sieve(size, showStates=True):
l = list(range(2,size+1))
idx = lambda x: x-2
p = 2
for iteration in range(int(0.5+len(l)**0.5)):
#mark every multiple of p up to sqrt(n)
for i in range(p*2, size+1, p):
l[idx(i)] = -i
nextPrime = 0
for i in l[idx(p+1):]:
if i>0:
nextPrime = i
break
if (showStates):
showState(l, p, nextPrime)
for i in range(p*2, size+1, p):
l[idx(i)] = 0
p = nextPrime
if p == 0:
break
return filter(lambda x: x>0, l)
%%timeit
v = sieve(10000, False)
100 loops, best of 3: 13.2 ms per loop
So the Eratosthenes sieve is very fast at finding primes up to some limit m. At m=10000, we find n=1229. What range do we have to sieve to actually get our n=1000 primes?
Rosser's theorem[2] provides a useful inequality that establishes bounds on the value of the nth prime number:
$\ln n + \ln\ln n - 1 < \frac{p_n}{n} < \ln n + \ln \ln n \quad\text{for } n \ge 6$
[2] http://en.wikipedia.org/wiki/Prime_number_theorem#Approximations_for_the_nth_prime_number
def maxPrime(n):
return int(0.5+(float(n)*log(n)+ n*log(log(n))))
limit = maxPrime(10000)
print('The 10000th prime has a value < {0}'.format(limit))
The 10000th prime has a value < 114307
primes = sieve(limit, False)
len(primes)
10816
The upper bound function appears to have done it's job and netted just over 10000 primes. We can now obtain the 10001st
primes[10000]
104743
The four adjacent digits in the 1000-digit number that have the greatest product are 9 × 9 × 8 × 9 = 5832.
73167176531330624919225119674426574742355349194934 96983520312774506326239578318016984801869478851843 85861560789112949495459501737958331952853208805511 12540698747158523863050715693290963295227443043557 66896648950445244523161731856403098711121722383113 62229893423380308135336276614282806444486645238749 30358907296290491560440772390713810515859307960866 70172427121883998797908792274921901699720888093776 65727333001053367881220235421809751254540594752243 52584907711670556013604839586446706324415722155397 53697817977846174064955149290862569321978468622482 83972241375657056057490261407972968652414535100474 82166370484403199890008895243450658541227588666881 16427171479924442928230863465674813919123162824586 17866458359124566529476545682848912883142607690042 24219022671055626321111109370544217506941658960408 07198403850962455444362981230987879927244284909188 84580156166097919133875499200524063689912560717606 05886116467109405077541002256983155200055935729725 71636269561882670428252483600823257530420752963450
Find the thirteen adjacent digits in the 1000-digit number that have the greatest product. What is the value of this product?
source = '''
73167176531330624919225119674426574742355349194934
96983520312774506326239578318016984801869478851843
85861560789112949495459501737958331952853208805511
12540698747158523863050715693290963295227443043557
66896648950445244523161731856403098711121722383113
62229893423380308135336276614282806444486645238749
30358907296290491560440772390713810515859307960866
70172427121883998797908792274921901699720888093776
65727333001053367881220235421809751254540594752243
52584907711670556013604839586446706324415722155397
53697817977846174064955149290862569321978468622482
83972241375657056057490261407972968652414535100474
82166370484403199890008895243450658541227588666881
16427171479924442928230863465674813919123162824586
17866458359124566529476545682848912883142607690042
24219022671055626321111109370544217506941658960408
07198403850962455444362981230987879927244284909188
84580156166097919133875499200524063689912560717606
05886116467109405077541002256983155200055935729725
71636269561882670428252483600823257530420752963450
'''.replace('\n','')
#break the source string into a series of 13 character long slices at every possible position
window_size = 13
slices = [source[x:x+window_size] for x in range(len(source) - window_size + 1)]
#compute the product of each slice
products = [product(map(int, row), dtype='int64') for row in slices]
max(products)
23514624000