# Rachunek teoretyczny prawdopodobieńst [hipoteza]¶

$C_{ij} = \frac{[i \in j-tej chmury]} {ile\ chmur\ ma\ i}$

$D_{ij} = \frac{[j \in i-tej chmury]} {ile\ wierzchołków\ ma\ j}$

A = DC

In [1]:
%matplotlib inline


Deprecated

# todo¶

• document properly what happens here
• how can I improve this pipeline?
In [2]:
import numpy as np
import networkx as nx
from collections import Counter
from matplotlib import pyplot as plt
from numpy.random import random_sample
from hypergraph.generators import uniform_hypergraph
from hypergraph import converters
from hypergraph.markov_diffusion import create_markov_matrix
from hypergraph import utils

def create_graph(number_of_nodes, cardinality, fraction_of_hyperedges):
HG = uniform_hypergraph(
n=number_of_nodes,
k=cardinality,
number_of_edges=int(
number_of_nodes *
fraction_of_hyperedges))

nodes = HG.nodes()
hyper_edges = HG.hyper_edges()
markov_matrix = create_markov_matrix(hyper_edges)
current_state = 1
return HG

In [3]:
def analytical_solution_of_diffusion(nodes, hyperedges, plot_results=False):
all_nodes = []
for hyperedge in hyperedges:
all_nodes += hyperedge

c = Counter(all_nodes)
for node in nodes:
if node not in all_nodes:
c[node]=0

xs, ys = zip(*c.items())

sum_of_ys = sum(ys)
ys = [float(y) / sum_of_ys for y in ys]

if plot_results:
plt.bar(xs, ys)
plt.title("Analytical prediction on diffusion on hypergraph")

return ys

In [4]:
HG = create_graph(10, 3, 0.7)

utils.plot_different_representations(HG.nodes(), HG.hyper_edges())

Drawing different representations of hypergraph
Bipartite graph
Graph of hypereges as nodes
Clique graph

In [5]:
analytical_solution_of_diffusion(HG.nodes(), HG.hyper_edges())

Out[5]:
[0.0,
0.09523809523809523,
0.09523809523809523,
0.19047619047619047,
0.14285714285714285,
0.09523809523809523,
0.09523809523809523,
0.09523809523809523,
0.14285714285714285,
0.047619047619047616]
In [6]:
def compute_C(hypergraph):
nodes = hypergraph.nodes()
hyper_edges = hypergraph.hyper_edges()

N = len(nodes)
M = len(hyper_edges)
C = np.zeros((N, M))
for i, node in enumerate(nodes):
for j, edge in enumerate(hyper_edges):
if node in edge:
C[i][j] = 1

for i in range(N):
if np.sum(C[i][:]):
C[i] /= np.sum(C[i][:])
return C

def compute_D(hypergraph):
nodes = hypergraph.nodes()
hyper_edges = hypergraph.hyper_edges()

N = len(nodes)
M = len(hyper_edges)
D = np.zeros((M, N))
for i, edge in enumerate(hyper_edges):
for j, node in enumerate(nodes):
if node in edge:
D[i][j] = 1
for i in range(M):
if np.sum(D[i][:]):
D[i] /= np.sum(D[i][:])
return D

In [7]:
C = compute_C(HG)

In [9]:
D = compute_D(HG)

In [10]:
C

Out[10]:
array([[ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
0.        ,  0.        ],
[ 0.        ,  0.        ,  0.        ,  0.        ,  1.        ,
0.        ,  0.        ],
[ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
1.        ,  0.        ],
[ 0.25      ,  0.        ,  0.25      ,  0.        ,  0.        ,
0.25      ,  0.25      ],
[ 0.        ,  0.        ,  0.33333333,  0.33333333,  0.33333333,
0.        ,  0.        ],
[ 0.33333333,  0.33333333,  0.        ,  0.        ,  0.        ,
0.33333333,  0.        ],
[ 0.        ,  0.33333333,  0.33333333,  0.        ,  0.        ,
0.        ,  0.33333333],
[ 0.        ,  0.        ,  0.        ,  0.        ,  0.5       ,
0.        ,  0.5       ],
[ 0.        ,  0.5       ,  0.        ,  0.5       ,  0.        ,
0.        ,  0.        ],
[ 0.5       ,  0.        ,  0.        ,  0.5       ,  0.        ,
0.        ,  0.        ]])
In [11]:
D

Out[11]:
array([[ 0.        ,  0.        ,  0.        ,  0.33333333,  0.        ,
0.33333333,  0.        ,  0.        ,  0.        ,  0.33333333],
[ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
0.33333333,  0.33333333,  0.        ,  0.33333333,  0.        ],
[ 0.        ,  0.        ,  0.        ,  0.33333333,  0.33333333,
0.        ,  0.33333333,  0.        ,  0.        ,  0.        ],
[ 0.        ,  0.        ,  0.        ,  0.        ,  0.33333333,
0.        ,  0.        ,  0.        ,  0.33333333,  0.33333333],
[ 0.        ,  0.33333333,  0.        ,  0.        ,  0.33333333,
0.        ,  0.        ,  0.33333333,  0.        ,  0.        ],
[ 0.        ,  0.        ,  0.33333333,  0.33333333,  0.        ,
0.33333333,  0.        ,  0.        ,  0.        ,  0.        ],
[ 0.        ,  0.        ,  0.        ,  0.33333333,  0.        ,
0.        ,  0.33333333,  0.33333333,  0.        ,  0.        ]])
In [12]:
np.dot(C, D)

Out[12]:
array([[ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
[ 0.        ,  0.33333333,  0.        ,  0.        ,  0.33333333,
0.        ,  0.        ,  0.33333333,  0.        ,  0.        ],
[ 0.        ,  0.        ,  0.33333333,  0.33333333,  0.        ,
0.33333333,  0.        ,  0.        ,  0.        ,  0.        ],
[ 0.        ,  0.        ,  0.08333333,  0.33333333,  0.08333333,
0.16666667,  0.16666667,  0.08333333,  0.        ,  0.08333333],
[ 0.        ,  0.11111111,  0.        ,  0.11111111,  0.33333333,
0.        ,  0.11111111,  0.11111111,  0.11111111,  0.11111111],
[ 0.        ,  0.        ,  0.11111111,  0.22222222,  0.        ,
0.33333333,  0.11111111,  0.        ,  0.11111111,  0.11111111],
[ 0.        ,  0.        ,  0.        ,  0.22222222,  0.11111111,
0.11111111,  0.33333333,  0.11111111,  0.11111111,  0.        ],
[ 0.        ,  0.16666667,  0.        ,  0.16666667,  0.16666667,
0.        ,  0.16666667,  0.33333333,  0.        ,  0.        ],
[ 0.        ,  0.        ,  0.        ,  0.        ,  0.16666667,
0.16666667,  0.16666667,  0.        ,  0.33333333,  0.16666667],
[ 0.        ,  0.        ,  0.        ,  0.16666667,  0.16666667,
0.16666667,  0.        ,  0.        ,  0.16666667,  0.33333333]])
In [13]:
%load_ext autoreload

In [14]:
markov_matrix = create_markov_matrix(HG.hyper_edges(), count_itself=False)

In [15]:
markov_matrix_with_itself = create_markov_matrix(HG.hyper_edges(), count_itself=True)

In [16]:
markov_matrix_with_itself

Out[16]:
array([[ 0.        ,  0.16666667,  0.16666667,  0.16666667,  0.        ,
0.33333333,  0.16666667],
[ 0.2       ,  0.        ,  0.2       ,  0.2       ,  0.        ,
0.2       ,  0.2       ],
[ 0.14285714,  0.14285714,  0.        ,  0.14285714,  0.14285714,
0.14285714,  0.28571429],
[ 0.25      ,  0.25      ,  0.25      ,  0.        ,  0.25      ,
0.        ,  0.        ],
[ 0.        ,  0.        ,  0.25      ,  0.25      ,  0.25      ,
0.        ,  0.25      ],
[ 0.33333333,  0.16666667,  0.16666667,  0.        ,  0.        ,
0.16666667,  0.16666667],
[ 0.16666667,  0.16666667,  0.33333333,  0.        ,  0.16666667,
0.16666667,  0.        ]])
In [17]:
markov_matrix

Out[17]:
array([[ 0.        ,  0.16666667,  0.16666667,  0.16666667,  0.        ,
0.33333333,  0.16666667],
[ 0.2       ,  0.        ,  0.2       ,  0.2       ,  0.        ,
0.2       ,  0.2       ],
[ 0.14285714,  0.14285714,  0.        ,  0.14285714,  0.14285714,
0.14285714,  0.28571429],
[ 0.25      ,  0.25      ,  0.25      ,  0.        ,  0.25      ,
0.        ,  0.        ],
[ 0.        ,  0.        ,  0.33333333,  0.33333333,  0.        ,
0.        ,  0.33333333],
[ 0.4       ,  0.2       ,  0.2       ,  0.        ,  0.        ,
0.        ,  0.2       ],
[ 0.16666667,  0.16666667,  0.33333333,  0.        ,  0.16666667,
0.16666667,  0.        ]])
In [18]:
np.dot(np.dot(np.dot(np.dot(markov_matrix, markov_matrix), markov_matrix), markov_matrix), markov_matrix)

Out[18]:
array([[ 0.15864414,  0.13472476,  0.19218626,  0.11842592,  0.07871192,
0.14532802,  0.17197897],
[ 0.16166972,  0.1348435 ,  0.19271618,  0.11746677,  0.07875497,
0.1422268 ,  0.17232206],
[ 0.16473108,  0.13765442,  0.19229482,  0.1135725 ,  0.08217065,
0.14007713,  0.1694994 ],
[ 0.17763888,  0.14683346,  0.19875188,  0.09810091,  0.09233917,
0.13118372,  0.15515198],
[ 0.15742385,  0.13125828,  0.19173151,  0.12311889,  0.07547052,
0.14369381,  0.17730313],
[ 0.17439363,  0.1422268 ,  0.19610798,  0.10494698,  0.08621629,
0.13322011,  0.16288822],
[ 0.17197897,  0.14360172,  0.1977493 ,  0.10343466,  0.08865157,
0.13574018,  0.15884361]])
In [19]:
from numpy import linalg as LA

LA.matrix_power(markov_matrix_with_itself, 1011)

Out[19]:
array([[ 0.15789474,  0.13157895,  0.18421053,  0.10526316,  0.10526316,
0.15789474,  0.15789474],
[ 0.15789474,  0.13157895,  0.18421053,  0.10526316,  0.10526316,
0.15789474,  0.15789474],
[ 0.15789474,  0.13157895,  0.18421053,  0.10526316,  0.10526316,
0.15789474,  0.15789474],
[ 0.15789474,  0.13157895,  0.18421053,  0.10526316,  0.10526316,
0.15789474,  0.15789474],
[ 0.15789474,  0.13157895,  0.18421053,  0.10526316,  0.10526316,
0.15789474,  0.15789474],
[ 0.15789474,  0.13157895,  0.18421053,  0.10526316,  0.10526316,
0.15789474,  0.15789474],
[ 0.15789474,  0.13157895,  0.18421053,  0.10526316,  0.10526316,
0.15789474,  0.15789474]])
In [20]:
LA.matrix_power(markov_matrix, 3)
len(markov_matrix)

Out[20]:
7
In [24]:
from hypergraph.markov_diffusion import DiffusionEngine, count_nodes
from hypergraph import utils

In [25]:
def simulate_diffusion(hypergraph, markov_matrix, t_max, current_state=1, plot_results=False):
nodes = hypergraph.nodes()
hyper_edges = hypergraph.hyper_edges()
engine = DiffusionEngine(markov_matrix)
most_common, states = engine.simulate(t_max)
most_common_nodes = count_nodes(nodes, hyper_edges, most_common)
if plot_results:
plt.figure(figsize=(8, 4))
utils.plot_hyperedges_frequencies(most_common, hyper_edges,
'Ocurrences of hyperedges in a hypergraph', normed=True)

plt.figure(figsize=(8, 4))
return utils.plot_nodes_frequencies(most_common_nodes, 'Nodes in a hypergraph', normed=True)
else:
return utils.get_names_and_occurrences(most_common_nodes)[1]


In [26]:
simulate_diffusion(HG, markov_matrix, 100)

Out[26]:
[0.0,
0.023333333333333334,
0.05,
0.21333333333333335,
0.11333333333333333,
0.15,
0.17666666666666667,
0.09333333333333334,
0.09666666666666666,
0.08333333333333333]
In [27]:
LA.matrix_power(markov_matrix_with_itself, 10)

Out[27]:
array([[ 0.15796624,  0.13162897,  0.18425894,  0.10517952,  0.1052864 ,
0.15786611,  0.15781383],
[ 0.15795476,  0.13162236,  0.18425508,  0.10519071,  0.10528765,
0.15786676,  0.15782268],
[ 0.15793623,  0.13161077,  0.18424984,  0.10520921,  0.10529296,
0.15786411,  0.15783688],
[ 0.15776928,  0.13148839,  0.18411611,  0.10541481,  0.10520923,
0.15795596,  0.15804622],
[ 0.15792959,  0.13160956,  0.18426268,  0.10520923,  0.10533039,
0.1578301 ,  0.15782845],
[ 0.15786611,  0.13155563,  0.18417479,  0.10530397,  0.10522007,
0.15793716,  0.15794226],
[ 0.15781383,  0.1315189 ,  0.18414303,  0.10536415,  0.10521897,
0.15794226,  0.15799886]])
In [28]:
analytical_solution_of_diffusion(HG.nodes(), HG.hyper_edges(), True)

Out[28]:
[0.0,
0.047619047619047616,
0.047619047619047616,
0.19047619047619047,
0.14285714285714285,
0.14285714285714285,
0.14285714285714285,
0.09523809523809523,
0.09523809523809523,
0.09523809523809523]
In [29]:
simulate_diffusion(HG, markov_matrix_with_itself, 100, 1,  True)

Out[29]:
[0.0,
0.06666666666666667,
0.04,
0.16,
0.18,
0.13333333333333333,
0.14,
0.08666666666666667,
0.10666666666666667,
0.08666666666666667]
In [30]:
def diffusion_on_clique(nodes, hyper_edges, t_max, plot_results=False):
clique_graph = converters.convert_to_clique_graph(nodes, hyper_edges)
clique_markov_matrix = create_markov_matrix(clique_graph.edges(), count_itself=False)

current_state = 1
engine = DiffusionEngine(clique_markov_matrix)
most_common, states = engine.simulate(t_max)
most_common_nodes = count_nodes(clique_graph.nodes(), clique_graph.edges(),
most_common)

if plot_results:
plt.figure(figsize=(8, 4))
utils.plot_hyperedges_frequencies(most_common, clique_graph.edges(),
'Ocurrences of edges in a graph')

plt.figure(figsize=(8, 4))
return utils.plot_nodes_frequencies(most_common_nodes, 'Nodes in a graph')
else:
return utils.get_names_and_occurrences(most_common_nodes)[1]


In [31]:
diffusion_on_clique(HG.nodes(), HG.hyper_edges(), t_max=1000)

Out[31]:
[0.0, 0.035, 0.047, 0.191, 0.172, 0.1255, 0.154, 0.095, 0.0895, 0.091]
In [32]:
from scipy import stats

def analytical_clique_diffusion(hypergraph, plot_results=False):
xs, ys = zip(*hypergraph.degree().items())
sum_of_ys = sum(ys)
ys = [float(y) / sum_of_ys for y in ys]

if plot_results:
plt.bar(xs, ys)
plt.title("Analytical prediction of diffusion on clique")
return ys

def correct_zero(node):
if node == 0:
node += 0.00001
return node

def compare_to_theory(experimental, theoretical_1, theoretical_2):
experimental = [correct_zero(node) for node in experimental]
theoretical_1 = [correct_zero(node) for node in theoretical_1]
theoretical_2 = [correct_zero(node) for node in theoretical_2]

return stats.chisquare(experimental, f_exp=[theoretical_1, theoretical_2], axis=1)

def comparing_pipeline(t_max=100000):
for n in range(5, 6, 5):
for k in range(3, 4):
for f in range(80, 81, 10):
f = float(f) / 100
# number_of_nodes, cardinality, fraction_of_hyperedges
print(n, k, f)
HG = create_graph(n, k, f)
number_of_nodes = analytical_solution_of_diffusion(HG.nodes(), HG.hyper_edges(), plot_results=False)
number_of_nodes_clique = analytical_clique_diffusion(HG, plot_results=False)
markov_matrix_with_itself = create_markov_matrix(HG.hyper_edges(), count_itself=True)
markov_matrix = create_markov_matrix(HG.hyper_edges(), count_itself=False)

simulated_n_o_n = simulate_diffusion(HG, markov_matrix, t_max, plot_results=False)
simulated_n_o_n_i = simulate_diffusion(HG, markov_matrix_with_itself, t_max)

simulated_n_o_n_c = diffusion_on_clique(HG.nodes(), HG.hyper_edges(), t_max=t_max)

print(compare_to_theory(simulated_n_o_n, number_of_nodes, number_of_nodes_clique))
print(compare_to_theory(simulated_n_o_n_i, number_of_nodes, number_of_nodes_clique))
print(compare_to_theory(simulated_n_o_n_c, number_of_nodes, number_of_nodes_clique))

print(simulated_n_o_n)
print(simulated_n_o_n_i)
print(simulated_n_o_n_c)
plt.figure(figsize=(12, 10))

width = 0.15
plt.bar(HG.nodes(), simulated_n_o_n,
width=width, color='crimson',
label='Simulated markov hypergraph')

plt.bar(np.array(HG.nodes()) + width, simulated_n_o_n_i, width,
color='burlywood',
label='Simulated markov hypergraph with loops')

plt.bar(np.array(HG.nodes()) + 2 * width, number_of_nodes, width,
label='Analytical diffusion model on hypergraph',
color="#65df25")

plt.bar(np.array(HG.nodes()) + 3 * width, simulated_n_o_n_c, width,
label='Simulated clique graph')

plt.bar(np.array(HG.nodes()) + 4 * width, number_of_nodes_clique, width,
label='Analytical diffusion model on clique', color="#dcab11")

plt.legend(loc=0)

In [33]:
comparing_pipeline()

5 3 0.8
(array([ 0.00473559,  0.06387746]), array([ 0.9999972 ,  0.99950069]))
(array([ 0.00083584,  0.04284002]), array([ 0.99999991,  0.99977384]))
(array([ 0.02596636,  0.00471509]), array([ 0.99991644,  0.99999723]))
[0.16686333333333334, 0.24992, 0.24988333333333335, 0.26720333333333335, 0.06613]
[0.15691333333333332, 0.25465333333333334, 0.2551, 0.25215666666666664, 0.08117666666666666]
[0.185145, 0.26233, 0.26336, 0.184525, 0.10464]

In [ ]: