def python_lstsqr(x_list, y_list): """ Computes the least-squares solution to a linear matrix equation. """ N = len(x_list) x_avg = sum(x_list)/N y_avg = sum(y_list)/N var_x, cov_xy = 0, 0 for x,y in zip(x_list, y_list): temp = x - x_avg var_x += temp**2 cov_xy += temp * (y - y_avg) slope = cov_xy / var_x y_interc = y_avg - slope*x_avg return (slope, y_interc) %load_ext cythonmagic %%cython import numpy as np cimport numpy as np cimport cython @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) cpdef cython_lstsqr(x_ary, y_ary): """ Computes the least-squares solution to a linear matrix equation. """ cdef double x_avg, y_avg, var_x, cov_xy,\ slope, y_interc, temp cdef double[:] x = x_ary # memoryview cdef double[:] y = y_ary cdef unsigned long N, i N = x.shape[0] x_avg = 0 y_avg = 0 for i in range(N): x_avg += x[i] y_avg += y[i] x_avg = x_avg/N y_avg = y_avg/N var_x = 0 cov_xy = 0 for i in range(N): temp = (x[i] - x_avg) var_x += temp**2 cov_xy += temp*(y[i] - y_avg) slope = cov_xy / var_x y_interc = y_avg - slope*x_avg return (slope, y_interc) %%cython cimport cython cpdef cython_lstsqr_untyped(x_list, y_list): """ Computes the least-squares solution to a linear matrix equation. """ N = len(x_list) x_avg = sum(x_list)/N y_avg = sum(y_list)/N var_x, cov_xy = 0, 0 for x,y in zip(x_list, y_list): temp = x - x_avg var_x += temp**2 cov_xy += temp * (y - y_avg) slope = cov_xy / var_x y_interc = y_avg - slope*x_avg return (slope, y_interc) %install_ext https://raw.github.com/mgaitan/fortran_magic/master/fortranmagic.py %load_ext fortranmagic %%fortran SUBROUTINE fortran_lstsqr(ary_x, ary_y, slope, y_interc) ! Computes the least-squares solution to a linear matrix equation. """ IMPLICIT NONE REAL(8), INTENT(in), DIMENSION(:) :: ary_x, ary_y REAL(8), INTENT(out) :: slope, y_interc REAL(8) :: x_avg, y_avg, var_x, cov_xy, temp INTEGER(8) :: N, i N = SIZE(ary_x) x_avg = SUM(ary_x) / N y_avg = SUM(ary_y) / N var_x = 0 cov_xy = 0 DO i = 1, N temp = ary_x(i) - x_avg var_x = var_x + temp**2 cov_xy = cov_xy + (temp*(ary_y(i) - y_avg)) END DO slope = cov_xy / var_x y_interc = y_avg - slope*x_avg END SUBROUTINE fortran_lstsqr import random import numpy as np random.seed(12345) x = [x_i*random.randrange(8,12)/10 for x_i in range(500)] y = [y_i*random.randrange(8,12)/10 for y_i in range(100,600)] x_ary = np.asarray(x) y_ary = np.asarray(y) reference = python_lstsqr(x_ary, y_ary) funcs = [python_lstsqr, cython_lstsqr, cython_lstsqr_untyped,fortran_lstsqr] for f in funcs: assert(reference == f(x_ary, y_ary)) print('ok') %matplotlib inline from matplotlib import pyplot as plt import random random.seed(12345) x = [x_i*random.randrange(8,12)/10 for x_i in range(500)] y = [y_i*random.randrange(8,12)/10 for y_i in range(100,600)] slope, intercept = python_lstsqr(x, y) line_x = [round(min(x)) - 1, round(max(x)) + 1] line_y = [slope*x_i + intercept for x_i in line_x] plt.figure(figsize=(8,8)) plt.scatter(x,y) plt.plot(line_x, line_y, color='red', lw='2') plt.ylabel('y') plt.xlabel('x') plt.title('Linear regression via least squares fit') ftext = 'y = ax + b = {:.3f} + {:.3f}x'\ .format(slope, intercept) plt.figtext(.15,.8, ftext, fontsize=11, ha='left') plt.show() import timeit import random random.seed(12345) orders_n = [10**n for n in range(1, 6)] timings = {f.__name__:[] for f in funcs} for n in orders_n: x = ([x_i*np.random.randint(8,12)/10 for x_i in range(n)]) y = ([y_i*np.random.randint(10,14)/10 for y_i in range(n)]) x_ary = np.asarray(x) y_ary = np.asarray(y) x_fary = np.asfortranarray(x) y_fary = np.asfortranarray(y) timings['python_lstsqr'].append(min(timeit.Timer('python_lstsqr(x, y)', 'from __main__ import python_lstsqr, x, y')\ .repeat(repeat=3, number=1000))) timings['cython_lstsqr'].append(min(timeit.Timer('cython_lstsqr(x_ary, y_ary)', 'from __main__ import cython_lstsqr, x_ary, y_ary')\ .repeat(repeat=3, number=1000))) timings['cython_lstsqr_untyped'].append(min(timeit.Timer('cython_lstsqr_untyped(x, y)', 'from __main__ import cython_lstsqr_untyped, x, y')\ .repeat(repeat=3, number=1000))) timings['fortran_lstsqr'].append(min(timeit.Timer('fortran_lstsqr(x_fary, y_fary)', 'from __main__ import fortran_lstsqr, x_fary, y_fary')\ .repeat(repeat=3, number=1000))) import platform import multiprocessing from cython import __version__ as cython__version__ def print_sysinfo(): print('\nPython version :', platform.python_version()) print('compiler :', platform.python_compiler()) print('Cython version :', cython__version__) print('NumPy version :', np.__version__) print('\nsystem :', platform.system()) print('release :', platform.release()) print('machine :', platform.machine()) print('processor :', platform.processor()) print('CPU count :', multiprocessing.cpu_count()) print('interpreter:', platform.architecture()[0]) print('\n\n') import matplotlib.pyplot as plt def plot(timings, title, labels, orders_n): plt.rcParams.update({'font.size': 12}) fig = plt.figure(figsize=(11,10)) for lb in labels: plt.plot(orders_n, timings[lb], alpha=0.5, label=labels[lb], marker='o', lw=3) plt.xlabel('sample size n') plt.ylabel('time per computation in milliseconds') plt.xlim([min(orders_n) / 10, max(orders_n)* 10]) plt.legend(loc=2) plt.grid() plt.xscale('log') plt.yscale('log') plt.title(title) plt.show() import prettytable labels = {'python_lstsqr':'Python (Standard Library func.)', 'cython_lstsqr':'Cython (NumPy arrays)', 'cython_lstsqr_untyped':'Cython untyped (equiv. to Python impl.)', 'fortran_lstsqr': 'Fortran (NumPy arrays)', } def summary_table(funcs): fit_table = prettytable.PrettyTable(['n=%s' %orders_n[-1], 'Implementation' , 'time in msec']) fit_table.align['Implementation'] = 'l' for l in ['python_lstsqr', 'cython_lstsqr_untyped', 'cython_lstsqr', 'fortran_lstsqr']: fit_table.add_row(['', labels[l], '{:.3f}'.format(timings[l][-1])]) print(fit_table) title = 'Performance of Linear Regression Least Squares Fits in Python, Cython, and Fortran' print_sysinfo() plot(timings, title, labels, orders_n) summary_table(funcs)