In [1]:
%matplotlib inline


# In this notebook I ask a question how does diffusion simulation converge or does it converge at all?¶

## todo¶

1. write about purpose of this notebook
4. write about results and conclusion
In [2]:
from hypergraph import utils

In [3]:
n = 10
k = 3
f = 0.8
HG = utils.create_graph(n, k, f)
utils.plot_different_representations(HG.nodes(), HG.hyper_edges())

Drawing different representations of hypergraph
Bipartite graph
Graph of hypereges as nodes
Clique graph

In [4]:
from hypergraph import diffusion_convergence_analytic

In [7]:
from matplotlib import pyplot as plt

def diffusion_on_hypergraph(hypergraph, markov_matrix,
t_max, plot_results=False):
"""Simulate numerically diffusion on a hypergraph,

Diffusion is simulated using markov chains.

"""
nodes = hypergraph.nodes()
hyper_edges = hypergraph.hyper_edges()

most_common, most_common_nodes, states = simulate_diffusion(
nodes, hyper_edges, markov_matrix, t_max)

if plot_results:
return plot_diffusion_results(most_common, most_common_nodes,
hyper_edges, "hypergraph")
else:
return utils.get_names_and_occurrences(most_common_nodes)[1]

def plot_diffusion_results(most_common, most_common_nodes, edges, name):
plt.figure(figsize=(8, 4))
utils.plot_hyperedges_frequencies(most_common, edges,
('Ocurrences of hyperedges in'
' a {}').format(name),
normed=True)

plt.figure(figsize=(8, 4))
return utils.plot_nodes_frequencies(most_common_nodes,
'Nodes in a {}'.format(name),
normed=True)

def simulate_diffusion(nodes, edges, markov_matrix, t_max):
engine = DiffusionEngine(markov_matrix)
most_common, states = engine.simulate(t_max)
most_common_nodes = count_nodes(nodes, edges, most_common)
return most_common, most_common_nodes, states

In [9]:
from hypergraph.diffusion_engine import DiffusionEngine
from hypergraph.markov_diffusion import (create_markov_matrix,
count_nodes)

In [10]:
t_max = 100
markov_matrix = create_markov_matrix(HG.hyper_edges())
nodes = HG.nodes()
edges = HG.hyper_edges()
engine = DiffusionEngine(markov_matrix, t_per_walker=20)
most_common, states = engine.simulate(t_max)
most_common_nodes = count_nodes(nodes, edges, most_common)

In [11]:
states

Out[11]:
[[2, 5, 1, 4, 3, 4, 1, 6, 1, 4, 2, 4, 2, 0, 2, 4, 3, 6, 1, 5],
[0, 2, 0, 2, 6, 5, 6, 1, 5, 2, 6, 2, 6, 4, 6, 1, 3, 4, 2, 6],
[7, 5, 1, 6, 5, 2, 6, 5, 6, 2, 0, 5, 6, 0, 5, 6, 0, 5, 0, 5],
[1, 4, 3, 4, 3, 6, 5, 7, 0, 5, 6, 1, 5, 0, 7, 5, 2, 4, 3, 6],
[5, 0, 7, 0, 7, 5, 1, 2, 6, 4, 2, 4, 1, 2, 6, 1, 3, 1, 2, 6]]
In [12]:
most_common

Out[12]:
[(6, 19), (5, 17), (2, 16), (1, 13), (4, 12), (0, 11), (3, 7), (7, 5)]
In [13]:
most_common_nodes

Out[13]:
Counter({1: 63, 8: 51, 10: 48, 7: 33, 6: 30, 4: 28, 2: 19, 5: 16, 9: 7, 3: 5})
In [14]:
plot_diffusion_results(most_common, most_common_nodes, nodes, 'Hyper 100')

Out[14]:
[0.21,
0.06333333333333334,
0.016666666666666666,
0.09333333333333334,
0.05333333333333334,
0.1,
0.11,
0.17,
0.023333333333333334,
0.16]
In [15]:
def plot_diffusion_results(most_common, most_common_nodes, edges, name):
plt.figure(figsize=(8, 4))
utils.plot_hyperedges_frequencies(most_common, edges,
('Ocurrences of hyperedges in'
' a {}').format(name),
normed=True)

plt.figure(figsize=(8, 4))
return utils.plot_nodes_frequencies(most_common_nodes,
'Nodes in a {}'.format(name),
normed=True)

In [16]:
from pylab import get_cmap, cm
import numpy as np

def plot_comparison_bars(x, ys, labels=None):
plt.figure(figsize=(12, 8))
colors = ["crimson", "burlywood", "#65df25", "#dcab11", "magenta"] + ["blue"] * 50
N = len(ys)
width = 1.0 / N - 0.04

colors = cm.get_cmap('jet', N)
if labels is None:
labels = [''] * N
print(width)
for i,  y in enumerate(ys):

plt.bar(np.array(x) + i * width, y,
width=width, color=colors(i),
label=labels[i], alpha=0.7)

plt.legend(loc=0)

In [17]:
plot_comparison_bars(range(4), [range(4), [1, 2, 5, 2]])

0.46

/home/att/Projects/hypergraph/py3/lib/python3.3/site-packages/matplotlib/axes.py:4749: UserWarning: No labeled objects found. Use label='...' kwarg on individual plots.
warnings.warn("No labeled objects found. "

In [18]:
t_max = 100
markov_matrix = create_markov_matrix(HG.hyper_edges())
nodes = HG.nodes()
edges = HG.hyper_edges()
engine = DiffusionEngine(markov_matrix, t_per_walker=30)
most_common, states = engine.simulate(t_max)
most_common_nodes = count_nodes(nodes, edges, most_common)

most_common_to_t_max = {}
for t_max in [100, 300, 600, 800, 900, 1200, 1300]:
most_common, states = engine.simulate(t_max)
most_common_nodes = count_nodes(nodes, edges, most_common)
most_common = list(most_common)
most_common.sort(key=lambda x: x[0])
print(most_common)
_, occurrences = zip(*most_common)

most_common_to_t_max[t_max] = utils.get_names_and_occurrences(most_common_nodes)[1]

[(0, 11), (1, 14), (2, 13), (3, 7), (4, 8), (5, 16), (6, 14), (7, 7)]
[(0, 38), (1, 47), (2, 36), (3, 33), (4, 31), (5, 40), (6, 57), (7, 18)]
[(0, 92), (1, 76), (2, 70), (3, 55), (4, 56), (5, 93), (6, 101), (7, 57)]
[(0, 62), (1, 119), (2, 111), (3, 96), (4, 132), (5, 72), (6, 170), (7, 18)]
[(0, 117), (1, 96), (2, 128), (3, 81), (4, 99), (5, 149), (6, 162), (7, 68)]
[(0, 165), (1, 155), (2, 169), (3, 108), (4, 117), (5, 177), (6, 234), (7, 75)]
[(0, 147), (1, 177), (2, 162), (3, 153), (4, 178), (5, 188), (6, 216), (7, 69)]

In [19]:
most_common_to_t_max.values()

Out[19]:
dict_values([[0.17735042735042736, 0.09743589743589744, 0.007692307692307693, 0.10384615384615385, 0.03418803418803419, 0.08162393162393163, 0.06495726495726496, 0.22094017094017093, 0.041025641025641026, 0.17094017094017094], [0.20694444444444443, 0.0625, 0.020833333333333332, 0.07944444444444444, 0.06666666666666667, 0.09222222222222222, 0.11583333333333333, 0.17055555555555554, 0.03, 0.155], [0.20592592592592593, 0.06666666666666667, 0.025185185185185185, 0.08407407407407408, 0.06851851851851852, 0.09074074074074075, 0.1237037037037037, 0.1622222222222222, 0.03, 0.14296296296296296], [0.18423772609819122, 0.0855297157622739, 0.017829457364341085, 0.08785529715762273, 0.05581395348837209, 0.09431524547803617, 0.10439276485788114, 0.18708010335917313, 0.03953488372093023, 0.1434108527131783], [0.19777777777777777, 0.06166666666666667, 0.03166666666666667, 0.07, 0.08277777777777778, 0.09388888888888888, 0.13444444444444445, 0.16, 0.030555555555555555, 0.13722222222222222], [0.2, 0.05555555555555555, 0.025925925925925925, 0.07777777777777778, 0.06666666666666667, 0.1111111111111111, 0.1259259259259259, 0.15925925925925927, 0.025925925925925925, 0.15185185185185185], [0.19, 0.07111111111111111, 0.02, 0.07444444444444444, 0.06222222222222222, 0.09666666666666666, 0.10666666666666667, 0.18666666666666668, 0.03666666666666667, 0.15555555555555556]])
In [20]:
from hypergraph import analytical
from pylab import cm
to_compare = list(most_common_to_t_max.values())
nodes, predictions = analytical.analytical_hypergraph_diffusion(HG)
to_compare.append(predictions)
plot_comparison_bars(range(len(nodes)), to_compare)

0.08499999999999999


It look like it does converge. However, what is the impact of number of walkers?

In [21]:
markov_matrix

Out[21]:
array([[ 0.        ,  0.        ,  0.16666667,  0.        ,  0.        ,
0.33333333,  0.16666667,  0.33333333],
[ 0.        ,  0.        ,  0.16666667,  0.16666667,  0.16666667,
0.16666667,  0.33333333,  0.        ],
[ 0.16666667,  0.16666667,  0.        ,  0.        ,  0.16666667,
0.16666667,  0.33333333,  0.        ],
[ 0.        ,  0.25      ,  0.        ,  0.        ,  0.5       ,
0.        ,  0.25      ,  0.        ],
[ 0.        ,  0.2       ,  0.2       ,  0.4       ,  0.        ,
0.        ,  0.2       ,  0.        ],
[ 0.33333333,  0.16666667,  0.16666667,  0.        ,  0.        ,
0.        ,  0.16666667,  0.16666667],
[ 0.125     ,  0.25      ,  0.25      ,  0.125     ,  0.125     ,
0.125     ,  0.        ,  0.        ],
[ 0.66666667,  0.        ,  0.        ,  0.        ,  0.        ,
0.33333333,  0.        ,  0.        ]])
In [21]:


In [ ]: