In [1]:
from hypergraph.hypergraph_models import HyperGraph
from hypergraph.markov_diffusion import (create_markov_matrix_model_nodes,
                                         create_markov_matrix_model_hyper_edges)
 
from matplotlib import pyplot as plt

def create_line_hypergraph(N, cardinality):
    HG = HyperGraph()
    nodes = range(N)
    hyper_edges = [range(i, i + cardinality) for i in range(N + 1 - cardinality)]
    HG.add_nodes_from(nodes)
    HG.add_edges_from(hyper_edges)
    return HG
In [2]:
import pykov


def transition_matrix_to_pykov_chain(matrix):
    chain = pykov.Chain()
    
    for i, row in enumerate(matrix):
        for j, column in enumerate(row):
            chain[(i, j)] = column
    return chain
In [3]:
%matplotlib inline

from collections import Counter
import numpy as np
In [4]:
np.random.multinomial(1000, [1/3.]*3, size=1)
Out[4]:
array([[325, 348, 327]])
In [5]:
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt


def fit_normal_distribution(data):
    # Fit a normal distribution to the data:
    mu, std = norm.fit(data)

    # Plot the PDF.
    xmin, xmax = plt.xlim()
    x = np.linspace(xmin, xmax, 100)
    p = norm.pdf(x, mu, std)
    plt.plot(x, p, 'k', linewidth=2)
    title = "Fit results: mu = %.2f,  std = %.2f" % (mu, std)
    plt.title(title)
    plt.show()
    return std
In [10]:
def centralize_bins(xs):
    return xs[:-1] + (1 / 2) * (xs[1] - xs[0])

def simulate_walk_on_line(size_of_edge=3, steps=1000):
    max_len = 500
    start = max_len / 2
    size = size_of_edge
    HG = create_line_hypergraph(max_len, size)
    hypergraph_line_walk_matrix = create_markov_matrix_model_nodes(HG)

    chain = transition_matrix_to_pykov_chain(hypergraph_line_walk_matrix)
    c = Counter()

    all_items = []
    for _ in range(200):
        all_items += chain.walk(steps=steps, start=start)
    plt.figure(figsize=(10, 8))
    ys, bins, _ = plt.hist(all_items, bins=25, normed=True)
    return centralize_bins(bins), ys, all_items
In [10]:
def compute_expected_std(n, steps):
    return np.sqrt(np.var([(i - n // 2) for i in range(n)]) * steps)

compute_expected_std(3, 2000)
Out[10]:
36.514837167011073

Quick summary

http://www.real-statistics.com/binomial-and-related-distributions/binomial-distribution-and-random-walks/

I tried to match multinomial distributions but it wasn't the good direction actually.

The much more fruitful approach was to use a normal distribution for big n and look for characteristic variance.

It's actually pretty easy to compute. And results look more less fine with this hypothesis. Nothing too much exciting, but actually pretty solid.

Example of variance for n = 3.

-1 0 1

v = ((-1 - 0)^2 + 0^2 + (1 - 0)^2) / 3

stdev = n * sqrt v / sqrt(n)

In [13]:
def parametrize(parameter_name, parameter_values):
    def decorator(fn):
        def wrapper(*args, **kwargs):
            return_values = []
            for parameter_value in parameter_values:
                kwargs[parameter_name] = parameter_value
                return_values.append(fn(*args, **kwargs))
            return return_values
        return wrapper
    return decorator
In [14]:
steps = [100, 200, 300, 1000, 2000]
sizes = [3, 4, 5, 6, 7]

@parametrize("steps", steps)
@parametrize("size", sizes)
def show_walks(size, steps):
    print("size", size)
    print("steps", steps)
    plt.figure()
    xs, ys, all_items = simulate_walk_on_line(size, steps)
    std = fit_normal_distribution(all_items)
    expected_std = compute_expected_std(size, steps)
    print(std, expected_std)
    return std, expected_std
In [15]:
res = show_walks()
size 3
steps 100
<matplotlib.figure.Figure at 0x7ff5e8e00ba8>
8.61147290147 8.16496580928
size 4
steps 100
<matplotlib.figure.Figure at 0x7ff5e8cd05f8>
11.2589653083 11.1803398875
size 5
steps 100
<matplotlib.figure.Figure at 0x7ff5e8b95400>
13.4079111045 14.1421356237
size 6
steps 100
<matplotlib.figure.Figure at 0x7ff5e8b85c50>
16.3895127332 17.0782512766
size 7
steps 100
<matplotlib.figure.Figure at 0x7ff5e8b44710>
20.4643439989 20.0
size 3
steps 200
<matplotlib.figure.Figure at 0x7ff5e8e2cbe0>
11.4705414369 11.5470053838
size 4
steps 200
<matplotlib.figure.Figure at 0x7ff60d583fd0>
16.0895632711 15.8113883008
size 5
steps 200
<matplotlib.figure.Figure at 0x7ff60d583fd0>
20.5725685481 20.0
size 6
steps 200
<matplotlib.figure.Figure at 0x7ff5f0260748>
25.4521575952 24.152294577
size 7
steps 200
<matplotlib.figure.Figure at 0x7ff5e8b3a588>
27.7959276979 28.2842712475
size 3
steps 300
<matplotlib.figure.Figure at 0x7ff5e8e10668>
15.5873472704 14.1421356237
size 4
steps 300
<matplotlib.figure.Figure at 0x7ff60d5750f0>
19.3829694401 19.364916731
size 5
steps 300
<matplotlib.figure.Figure at 0x7ff60d57a6d8>
23.0666830384 24.4948974278
size 6
steps 300
<matplotlib.figure.Figure at 0x7ff5e8a342b0>
28.6824204009 29.5803989155
size 7
steps 300
<matplotlib.figure.Figure at 0x7ff5e8a50320>
35.8055241519 34.6410161514
size 3
steps 1000
<matplotlib.figure.Figure at 0x7ff5e8e17d68>
25.8934232262 25.8198889747
size 4
steps 1000
<matplotlib.figure.Figure at 0x7ff5e8b87748>
36.3836229994 35.3553390593
size 5
steps 1000
<matplotlib.figure.Figure at 0x7ff5e8bb39e8>
42.1808189856 44.72135955
size 6
steps 1000
<matplotlib.figure.Figure at 0x7ff5f00566a0>
54.1145738391 54.0061724867
size 7
steps 1000
<matplotlib.figure.Figure at 0x7ff60d4b33c8>
63.2057432869 63.2455532034
size 3
steps 2000
<matplotlib.figure.Figure at 0x7ff5e8b304e0>
37.1977502263 36.514837167
size 4
steps 2000
<matplotlib.figure.Figure at 0x7ff60d575860>
51.2522159224 50.0
size 5
steps 2000
<matplotlib.figure.Figure at 0x7ff60d583ac8>
65.5838224584 63.2455532034
size 6
steps 2000
<matplotlib.figure.Figure at 0x7ff5e8cfae80>
75.8059988972 76.3762615826
size 7
steps 2000
<matplotlib.figure.Figure at 0x7ff60d575c88>
85.7221685986 89.4427191
In [16]:
plt.figure(figsize=(12, 9))
for step, series in zip(steps, res):
    first, second = zip(*series)
    plt.plot(sizes, first, 'o', label='real, %s steps' % step, markersize=10)
    plt.plot(sizes, second, label='expected, %s steps' % step)
plt.xlabel('Hyperedge cardinality')
plt.ylabel('Stdev of normal fit')
plt.legend(loc=0)
plt.title('Random walks 1D using hypergraphs')
Out[16]:
<matplotlib.text.Text at 0x7ff5e8b3add8>
In [24]:
ys = []
xs = [3, 5, 7, 9]
for x in xs:
    ys.append(compute_expected_std(x, 10000))
plt.plot(xs, ys)
Out[24]:
[<matplotlib.lines.Line2D at 0x7ff5e8a9fba8>]
In [29]:
def compute_expected_std_easy(n):
    return np.sqrt(np.var([(i - n // 2) for i in range(n)]))

def compute_expected_std_easy2(n):
    return np.sqrt(np.var([(i - n // 2) for i in range(n)]) * 10000)
In [32]:
xs = np.arange(1, 101, 1)
ys = [compute_expected_std_easy(x) for x in xs]
ys2 = [compute_expected_std_easy2(x) for x in xs]
In [38]:
from scipy.optimize import curve_fit
In [39]:
def line(x, a, b):
    return a * x + b
In [40]:
curve_fit(line, xs[1:], ys[1:])
Out[40]:
(array([ 0.28888443, -0.01686067]),
 array([[  8.78736157e-10,  -4.48155600e-08],
        [ -4.48155600e-08,   3.00322367e-06]]))
In [61]:
from matplotlib import pyplot as plt
from hypergraph.utils import plot_transition_matrix
---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-61-1617430f9667> in <module>()
      1 from matplotlib import pyplot as plt
----> 2 from hypergraph.utils import plot_transition_matrix

ImportError: cannot import name 'plot_transition_matrix'
In [62]:
max_len = 500
start = max_len / 2
size = 3
HG = create_line_hypergraph(max_len, size)
tiny_HG = create_line_hypergraph(10, size)
hypergraph_line_walk_edges_matrix = create_markov_matrix_model_hyper_edges(HG)
tiny_hypergraph_line_walk_edges_matrix = create_markov_matrix_model_hyper_edges(tiny_HG)
plot_transition_matrix(tiny_hypergraph_line_walk_edges_matrix)
In [63]:
tiny_HG.hyper_edges()
Out[63]:
[{0, 1, 2},
 {1, 2, 3},
 {2, 3, 4},
 {3, 4, 5},
 {4, 5, 6},
 {5, 6, 7},
 {6, 7, 8},
 {7, 8, 9}]
In [81]:
def simulate_walk_on_line_edge(HG, steps=1000):
    
    hypergraph_line_walk_matrix = create_markov_matrix_model_hyper_edges(HG)
    print('matrix ready')
    chain = transition_matrix_to_pykov_chain(hypergraph_line_walk_matrix)
    c = Counter()

    all_items = []
    for _ in range(2000):
        all_items += chain.walk(steps=steps, start=start)
        
    plt.figure(figsize=(10, 8))
    all_nodes = convert_covered_edges_to_nodes(all_items, HG.hyper_edges())
    ys, bins, _ = plt.hist(all_nodes, bins=25, normed=True)
    return centralize_bins(bins), ys, all_nodes
In [60]:
simulate_walk_on_line_edge(3, 100);
matrix ready