In [1]:
%matplotlib inline


## todo¶

• Is this form of code the most recent one?
• How can I make this more educational?
• Divide code into smaller snippets and comment it a lot
In [2]:
%matplotlib inline

import numpy as np
import networkx as nx
from collections import Counter
from matplotlib import pyplot as plt
from numpy.random import random_sample
from hypergraph.generators import uniform_hypergraph
from hypergraph import converters
from hypergraph import utils

def create_markov_matrix(edges):
a_matrix = np.zeros((len(edges), len(edges)))

for i, edge_1 in enumerate(edges):
for j, edge_2 in enumerate(edges):
if i != j:
a_matrix[i][j] = len(set(edge_1) & set(edge_2))

for i, edge_1 in enumerate(edges):
a_matrix[i] = a_matrix[i] / float(sum(a_matrix[i]))

return a_matrix

def weighted_values(values, probabilities, size):
bins[-1] = max(1, bins[-1])
index = np.digitize(random_sample(size), bins)
return values[index]

def step_diffusion(current_state, markov_matrix):
values_size = len(markov_matrix)
values = range(values_size)
probs = markov_matrix[current_state]
next_state = weighted_values(values, probs, 1)
return next_state

def simulate(current_state, markov_matrix, t_max):
c = Counter()
states = []
for _ in range(t_max):
current_state = step_diffusion(current_state, markov_matrix)
visited_hyperedge = current_state
c[visited_hyperedge] += 1
states.append(visited_hyperedge)
return c.most_common(), states

def count_nodes(nodes, edges, occurences):
c = Counter()
for index, count in occurences:
for edge in edges[index]:
c[edge] += count
for node in nodes:
if node not in c:
c[node] = 0
return c

def plot_nodes_frequencies(most_common_nodes, title):
plt.bar(list(most_common_nodes.keys()),
list(most_common_nodes.values()),
alpha=0.7, color='magenta')

plt.title(title)
plt.show()

def plot_different_representations(nodes, hyperedges):
print("Drawing different representations of hypergraph")

print("Bipartite graph")
nx_bipartite = converters.convert_to_nx_bipartite_graph(nodes, hyperedges)
utils.draw_bipartite_graph(nx_bipartite,
*utils.hypergraph_to_bipartite_parts(nx_bipartite))

print("Graph of hypereges as nodes")
custom_hyper_g = converters.convert_to_custom_hyper_G(nodes, hyperedges)
plt.figure()
nx.draw(custom_hyper_g)

print("Clique graph")

clique_graph = converters.convert_to_clique_graph(nodes, hyperedges)
plt.figure()
nx.draw(clique_graph)

def plot_hyperedges_frequencies(most_common, hyperedges, title):
hyperedges_indexes, occurences = zip(*most_common)
plt.bar(hyperedges_indexes, occurences)
plt.title(title)
plt.xticks(range(len(hyperedges)), hyperedges)

def compare_hypergraph_with_cliques(number_of_nodes,
cardinality, fraction, t_max, plot_representations=False):
HG = uniform_hypergraph(
n=number_of_nodes,
k=cardinality,
number_of_edges=int(
number_of_nodes *
fraction))

nodes = HG.nodes()
hyperedges = HG.hyper_edges()

all_nodes = []
for hyperedge in hyperedges:
all_nodes += hyperedge
c = Counter(all_nodes)
xs, ys = zip(*c.items())
#plt.bar(xs, ys)

if plot_representations:
plot_different_representations(nodes, hyperedges)

markov_matrix = create_markov_matrix(hyperedges)
print(markov_matrix)
current_state = 1
most_common, states = simulate(current_state, markov_matrix, t_max)

plt.figure(figsize=(8, 4))
plot_hyperedges_frequencies(most_common, hyperedges,
'Ocurrences of hyperedges in a hypergraph')

most_common_nodes = count_nodes(nodes, hyperedges, most_common)

plt.figure(figsize=(8, 4))
plot_nodes_frequencies(most_common_nodes, 'Nodes in a hypergraph')

clique_graph = converters.convert_to_clique_graph(nodes, hyperedges)
clique_markov_matrix = create_markov_matrix(clique_graph.edges())

print("clique markov matrix")
print(clique_markov_matrix)
most_common, states = simulate(current_state, clique_markov_matrix, t_max)

plt.figure(figsize=(8, 4))
plot_hyperedges_frequencies(most_common, clique_graph.edges(),
'Ocurrences of edges in a graph')

most_common_nodes = count_nodes(clique_graph.nodes(), clique_graph.edges(),
most_common)
plt.figure(figsize=(12, 10))
plot_nodes_frequencies(most_common_nodes, 'Nodes in a graph')

def demo():
n = 10
k = 3
fraction = 2.0 / 3

for simulation_time in [100]:
compare_hypergraph_with_cliques(n, k, fraction, simulation_time)

if __name__ == '__main__':
demo()

[[ 0.          0.14285714  0.28571429  0.28571429  0.28571429]
[ 0.2         0.          0.4         0.          0.4       ]
[ 0.28571429  0.28571429  0.          0.14285714  0.28571429]
[ 0.5         0.          0.25        0.          0.25      ]
[ 0.28571429  0.28571429  0.28571429  0.14285714  0.        ]]

clique markov matrix
[[ 0.          0.14285714  0.14285714  0.14285714  0.14285714  0.
0.14285714  0.          0.14285714  0.14285714]
[ 0.2         0.          0.2         0.2         0.          0.          0.2
0.2         0.          0.        ]
[ 0.2         0.2         0.          0.2         0.          0.2         0.
0.          0.2         0.        ]
[ 0.2         0.2         0.2         0.          0.          0.          0.
0.2         0.          0.2       ]
[ 0.2         0.          0.          0.          0.          0.2         0.2
0.          0.2         0.2       ]
[ 0.          0.          0.33333333  0.          0.33333333  0.          0.
0.          0.33333333  0.        ]
[ 0.16666667  0.16666667  0.          0.          0.16666667  0.          0.
0.16666667  0.16666667  0.16666667]
[ 0.          0.25        0.          0.25        0.          0.          0.25
0.          0.          0.25      ]
[ 0.16666667  0.          0.16666667  0.          0.16666667  0.16666667
0.16666667  0.          0.          0.16666667]
[ 0.16666667  0.          0.          0.16666667  0.16666667  0.
0.16666667  0.16666667  0.16666667  0.        ]]


# How can we represent hypergraphs in code?¶

A very good representation is bipartite graph of nodes and hyperedges. Another representation is a graph with hyperedges as nodes connected if hyperedges have common nodes. Is it a good representation for our diffusion problem? Can we use it as a reference model?

# How different are hypergraphs from graphs with cliques?¶

Cliques are sets of nodes in which every node is connected to every other node. They are a bit similar to hypergraphs, but how the nodes are connected is conceptually different from hypergraphs.

How is it different in diffusion simulation with markov chain?

In [ ]: