#!/usr/bin/env python
# coding: utf-8

# In[1]:


get_ipython().run_line_magic('load_ext', 'watermark')
get_ipython().run_line_magic('watermark', "-a 'Sebastian Raschka' -u -d -v")


# # Finding the Maximum Pairwise Product

# Our task is to simply fine the largest product between 2 different numbers in an array. For example, given the array
# 
# a = [7, 1, 2, 8, 1, 3],
# 
# the maximum product between a pair of numbers would be 7*8 = 56.
# For simplicity, we will only consider arrays consisting of positive numbers; however, the same concepts applies to negative valued arrays. Also, to keep the code simple, we assume that the array consists of at least 2 numbers.

# First, let us create 2 arrays, a small and a large one that we will use for testing later.

# In[2]:


import random

random.seed(1)

a = [7, 1, 2, 8, 1, 3]
rnd_lst = random.sample(range(0, 10000000), 1000)


# The probably simplest implementation may be to compute the product of each pair in this list while keeping track of the largest product as we traverse through the list.
# Here, we need to use two-nested for-loops. The outer for-loop traverses through the list indexes {*i ,..., n*}, and the inner loop traverses through the list indeces {*i+1, ..., n*}. Thus, we have an algorithm with quadratic complexity, $O(n^2)$.

# In[3]:


import sys

def max_pairprod_1(ary):
    n = len(ary)
    max_prod = -sys.float_info.max
    
    for i in range(0, n):
        for j in range(i + 1, n):
            prod = ary[i] * ary[j]
            if prod > max_prod:
                max_prod = prod
                
    return max_prod

print(max_pairprod_1(ary=a))
print(max_pairprod_1(ary=rnd_lst))


# Now, let's think about improving this solution in terms of computational efficiency. One solution would be to sort the list so that we just need to return the first (or last) 2 numbers and compute their product. This would be a substantial improvement over our previous solution, since efficient sorting algorithms have a time complexity of $O(n\;log(n))$. However, we can do better ...
# 
# If our array consists of positive numbers only, we just need to traverse through the array once and keep track of the 2 largest numbers in the array. (If the array contains negative values as well, we also want to track the 2 most negative numbers, since their product will be positive number as well.)

# In[4]:


import sys

def max_pairprod_2(ary):
    pos_1 = -sys.float_info.max
    pos_2 = -sys.float_info.max
    
    for i in range(0, len(ary)):
        if ary[i] > pos_1:
            tmp = pos_1
            pos_1 = ary[i]
            if tmp > pos_2:
                pos_2 = tmp
        elif ary[i] > pos_2:
            pos_2 = ary[i]

    return pos_1 * pos_2

print(max_pairprod_2(ary=a))
print(max_pairprod_2(ary=rnd_lst))


# Although our second implementation contains more if-else statements, which may come with some additional overhead, we now find the maximum pairwise product in linear time, $O(n)$, which is a big improvement over our previous $O(n^2)$ algorithm. Let's do a quick benchmark using the `%timeit` magic:

# In[5]:


get_ipython().run_line_magic('timeit', '-n 1000 -r 3 max_pairprod_1(ary=rnd_lst)')


# In[6]:


get_ipython().run_line_magic('timeit', '-n 1000 -r 3 max_pairprod_2(ary=rnd_lst)')


# Finally, to get a better sence of the **order of the function**, big O, let us repeat the benchmark for different array sizes:

# In[13]:


import timeit

funcs = ['max_pairprod_1', 'max_pairprod_2']
orders_n = [10**n for n in range(1, 5)]
times_n = {f:[] for f in funcs}

for n in orders_n:
    rnd_lst = random.sample(range(0, 10**6), n)
    for f in funcs:
        times_n[f].append(min(timeit.Timer('%s(rnd_lst)' % f, 
                'from __main__ import %s, rnd_lst' % f)
                    .repeat(repeat=3, number=5)))


# In[20]:


get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt

def plot_timing():

    labels = [('max_pairprod_1', 'max_pairprod_1'), 
              ('max_pairprod_2', 'max_pairprod_2')]

    plt.rcParams.update({'font.size': 12})

    fig = plt.figure(figsize=(10, 8))
    for lb in labels:
        plt.plot(orders_n, times_n[lb[0]], 
             alpha=0.5, label=lb[1], marker='o', lw=3)
    plt.xlabel('sample size n')
    plt.ylabel('time per computation in milliseconds [ms]')
    plt.legend(loc=2)
    plt.ylim([-1, 60])
    plt.grid()
    plt.show()


# In[21]:


plot_timing()