In [1]:
%matplotlib inline

In this notebook I ask a question how does diffusion simulation converge or does it converge at all?

todo

  1. write about purpose of this notebook
  2. write about assumptions
  3. write about steps
  4. write about results and conclusion
  5. roadmap to future?
In [2]:
from hypergraph import utils
In [3]:
n = 10
k = 3
f = 0.8
HG = utils.create_graph(n, k, f)
utils.plot_different_representations(HG.nodes(), HG.hyper_edges())
Drawing different representations of hypergraph
Bipartite graph
Graph of hypereges as nodes
Clique graph
In [4]:
from hypergraph import diffusion_convergence_analytic
In [7]:
from matplotlib import pyplot as plt


def diffusion_on_hypergraph(hypergraph, markov_matrix,
                            t_max, plot_results=False):
    """Simulate numerically diffusion on a hypergraph,

    Diffusion is simulated using markov chains.

    """
    nodes = hypergraph.nodes()
    hyper_edges = hypergraph.hyper_edges()

    most_common, most_common_nodes, states = simulate_diffusion(
        nodes, hyper_edges, markov_matrix, t_max)

    if plot_results:
        return plot_diffusion_results(most_common, most_common_nodes,
                               hyper_edges, "hypergraph")
    else:
        return utils.get_names_and_occurrences(most_common_nodes)[1]


def plot_diffusion_results(most_common, most_common_nodes, edges, name):
    plt.figure(figsize=(8, 4))
    utils.plot_hyperedges_frequencies(most_common, edges,
                                      ('Ocurrences of hyperedges in'
                                       ' a {}').format(name),
                                      normed=True)

    plt.figure(figsize=(8, 4))
    return utils.plot_nodes_frequencies(most_common_nodes,
                                        'Nodes in a {}'.format(name),
                                        normed=True)


def simulate_diffusion(nodes, edges, markov_matrix, t_max):
    engine = DiffusionEngine(markov_matrix)
    most_common, states = engine.simulate(t_max)
    most_common_nodes = count_nodes(nodes, edges, most_common)
    return most_common, most_common_nodes, states
In [9]:
from hypergraph.diffusion_engine import DiffusionEngine
from hypergraph.markov_diffusion import (create_markov_matrix,
                                         count_nodes)
In [10]:
t_max = 100
markov_matrix = create_markov_matrix(HG.hyper_edges())
nodes = HG.nodes()
edges = HG.hyper_edges()
engine = DiffusionEngine(markov_matrix, t_per_walker=20)
most_common, states = engine.simulate(t_max)
most_common_nodes = count_nodes(nodes, edges, most_common)
In [11]:
states
Out[11]:
[[2, 5, 1, 4, 3, 4, 1, 6, 1, 4, 2, 4, 2, 0, 2, 4, 3, 6, 1, 5],
 [0, 2, 0, 2, 6, 5, 6, 1, 5, 2, 6, 2, 6, 4, 6, 1, 3, 4, 2, 6],
 [7, 5, 1, 6, 5, 2, 6, 5, 6, 2, 0, 5, 6, 0, 5, 6, 0, 5, 0, 5],
 [1, 4, 3, 4, 3, 6, 5, 7, 0, 5, 6, 1, 5, 0, 7, 5, 2, 4, 3, 6],
 [5, 0, 7, 0, 7, 5, 1, 2, 6, 4, 2, 4, 1, 2, 6, 1, 3, 1, 2, 6]]
In [12]:
most_common
Out[12]:
[(6, 19), (5, 17), (2, 16), (1, 13), (4, 12), (0, 11), (3, 7), (7, 5)]
In [13]:
most_common_nodes
Out[13]:
Counter({1: 63, 8: 51, 10: 48, 7: 33, 6: 30, 4: 28, 2: 19, 5: 16, 9: 7, 3: 5})
In [14]:
plot_diffusion_results(most_common, most_common_nodes, nodes, 'Hyper 100')
Out[14]:
[0.21,
 0.06333333333333334,
 0.016666666666666666,
 0.09333333333333334,
 0.05333333333333334,
 0.1,
 0.11,
 0.17,
 0.023333333333333334,
 0.16]
In [15]:
def plot_diffusion_results(most_common, most_common_nodes, edges, name):
    plt.figure(figsize=(8, 4))
    utils.plot_hyperedges_frequencies(most_common, edges,
                                      ('Ocurrences of hyperedges in'
                                       ' a {}').format(name),
                                      normed=True)

    plt.figure(figsize=(8, 4))
    return utils.plot_nodes_frequencies(most_common_nodes,
                                        'Nodes in a {}'.format(name),
                                        normed=True)
In [16]:
from pylab import get_cmap, cm
import numpy as np


def plot_comparison_bars(x, ys, labels=None):
    plt.figure(figsize=(12, 8))
    colors = ["crimson", "burlywood", "#65df25", "#dcab11", "magenta"] + ["blue"] * 50
    N = len(ys)
    width = 1.0 / N - 0.04
    
    colors = cm.get_cmap('jet', N)
    if labels is None:
        labels = [''] * N
    print(width)
    for i,  y in enumerate(ys):
        
        plt.bar(np.array(x) + i * width, y,
                width=width, color=colors(i),
                label=labels[i], alpha=0.7)


    plt.legend(loc=0)
In [17]:
plot_comparison_bars(range(4), [range(4), [1, 2, 5, 2]])
0.46
/home/att/Projects/hypergraph/py3/lib/python3.3/site-packages/matplotlib/axes.py:4749: UserWarning: No labeled objects found. Use label='...' kwarg on individual plots.
  warnings.warn("No labeled objects found. "
In [18]:
t_max = 100
markov_matrix = create_markov_matrix(HG.hyper_edges())
nodes = HG.nodes()
edges = HG.hyper_edges()
engine = DiffusionEngine(markov_matrix, t_per_walker=30)
most_common, states = engine.simulate(t_max)
most_common_nodes = count_nodes(nodes, edges, most_common)

most_common_to_t_max = {}
for t_max in [100, 300, 600, 800, 900, 1200, 1300]:
    most_common, states = engine.simulate(t_max)
    most_common_nodes = count_nodes(nodes, edges, most_common)
    most_common = list(most_common)
    most_common.sort(key=lambda x: x[0])
    print(most_common)
    _, occurrences = zip(*most_common)
    
    
    most_common_to_t_max[t_max] = utils.get_names_and_occurrences(most_common_nodes)[1]
[(0, 11), (1, 14), (2, 13), (3, 7), (4, 8), (5, 16), (6, 14), (7, 7)]
[(0, 38), (1, 47), (2, 36), (3, 33), (4, 31), (5, 40), (6, 57), (7, 18)]
[(0, 92), (1, 76), (2, 70), (3, 55), (4, 56), (5, 93), (6, 101), (7, 57)]
[(0, 62), (1, 119), (2, 111), (3, 96), (4, 132), (5, 72), (6, 170), (7, 18)]
[(0, 117), (1, 96), (2, 128), (3, 81), (4, 99), (5, 149), (6, 162), (7, 68)]
[(0, 165), (1, 155), (2, 169), (3, 108), (4, 117), (5, 177), (6, 234), (7, 75)]
[(0, 147), (1, 177), (2, 162), (3, 153), (4, 178), (5, 188), (6, 216), (7, 69)]
In [19]:
most_common_to_t_max.values()
Out[19]:
dict_values([[0.17735042735042736, 0.09743589743589744, 0.007692307692307693, 0.10384615384615385, 0.03418803418803419, 0.08162393162393163, 0.06495726495726496, 0.22094017094017093, 0.041025641025641026, 0.17094017094017094], [0.20694444444444443, 0.0625, 0.020833333333333332, 0.07944444444444444, 0.06666666666666667, 0.09222222222222222, 0.11583333333333333, 0.17055555555555554, 0.03, 0.155], [0.20592592592592593, 0.06666666666666667, 0.025185185185185185, 0.08407407407407408, 0.06851851851851852, 0.09074074074074075, 0.1237037037037037, 0.1622222222222222, 0.03, 0.14296296296296296], [0.18423772609819122, 0.0855297157622739, 0.017829457364341085, 0.08785529715762273, 0.05581395348837209, 0.09431524547803617, 0.10439276485788114, 0.18708010335917313, 0.03953488372093023, 0.1434108527131783], [0.19777777777777777, 0.06166666666666667, 0.03166666666666667, 0.07, 0.08277777777777778, 0.09388888888888888, 0.13444444444444445, 0.16, 0.030555555555555555, 0.13722222222222222], [0.2, 0.05555555555555555, 0.025925925925925925, 0.07777777777777778, 0.06666666666666667, 0.1111111111111111, 0.1259259259259259, 0.15925925925925927, 0.025925925925925925, 0.15185185185185185], [0.19, 0.07111111111111111, 0.02, 0.07444444444444444, 0.06222222222222222, 0.09666666666666666, 0.10666666666666667, 0.18666666666666668, 0.03666666666666667, 0.15555555555555556]])
In [20]:
from hypergraph import analytical
from pylab import cm
to_compare = list(most_common_to_t_max.values())
nodes, predictions = analytical.analytical_hypergraph_diffusion(HG)
to_compare.append(predictions)
plot_comparison_bars(range(len(nodes)), to_compare)
0.08499999999999999

It look like it does converge. However, what is the impact of number of walkers?

In [21]:
markov_matrix
Out[21]:
array([[ 0.        ,  0.        ,  0.16666667,  0.        ,  0.        ,
         0.33333333,  0.16666667,  0.33333333],
       [ 0.        ,  0.        ,  0.16666667,  0.16666667,  0.16666667,
         0.16666667,  0.33333333,  0.        ],
       [ 0.16666667,  0.16666667,  0.        ,  0.        ,  0.16666667,
         0.16666667,  0.33333333,  0.        ],
       [ 0.        ,  0.25      ,  0.        ,  0.        ,  0.5       ,
         0.        ,  0.25      ,  0.        ],
       [ 0.        ,  0.2       ,  0.2       ,  0.4       ,  0.        ,
         0.        ,  0.2       ,  0.        ],
       [ 0.33333333,  0.16666667,  0.16666667,  0.        ,  0.        ,
         0.        ,  0.16666667,  0.16666667],
       [ 0.125     ,  0.25      ,  0.25      ,  0.125     ,  0.125     ,
         0.125     ,  0.        ,  0.        ],
       [ 0.66666667,  0.        ,  0.        ,  0.        ,  0.        ,
         0.33333333,  0.        ,  0.        ]])
In [21]:
 
In [ ]: