In [2]:
import numpy as np

from functools import partial
from matplotlib import pyplot as plt

from numpy import linalg as LA

from hypergraph import generators
from hypergraph.analytical import prediction
from hypergraph.diffusion_engine import DiffusionEngine
from hypergraph import utils
from hypergraph.markov_diffusion import create_markov_matrix_model_nodes
from hypergraph.markov_diffusion import create_markov_matrix_model_hyper_edges

# Define model's definitions
ALL_MODELS = {
"node": {
"analytical": partial(prediction, model='hypergraph_nodes'),
"numerical": create_markov_matrix_model_nodes,
"name": "node",
},
"hyperedges": {
"analytical": partial(prediction, model='hypergraph_edges'),
"numerical": create_markov_matrix_model_hyper_edges,
"name": "hyperedges",
}
}

# Constants for atomistic simulation
t_max = 100000
number_of_walkers = 1

In [3]:
%matplotlib inline
from hypergraph.hypergraph_models import HyperGraph
from hypergraph.generators import generic_hypergraph


How it works:

• generate random hypergraphs of given properties (another way of generating hypergraphs?)
• have function which evaluates model so that, stationary distribution is available
• saves hypergraph and results from three? (what about pykov?) ways of computing stationary distributions to later compare them
• another function for comparing results later with nice graphs and pictures!
In [ ]:
import json
import pykov
import os

def generate_hypergraph(generator_function, hypergraph_properties):
HG = generator_function(**hypergraph_properties)
return HG

def transition_matrix_to_pykov_chain(matrix):
chain = pykov.Chain()

for i, row in enumerate(matrix):
for j, column in enumerate(row):
chain[(i, j)] = column
return chain

In [4]:
from collections import Counter

# for nodes' model

def compute_atomistic_results_nodes(HG):
markov_matrix = create_markov_matrix_model_nodes(HG)
chain = transition_matrix_to_pykov_chain(mm)
pykov_chain = pykov.Chain(chain)
all_states = []
for x in range(100):
states = pykov_chain.walk(1000)
all_states += states

freqs = Counter(all_states)
for x in range(len(mm)):
if x not in freqs:
freqs = 0
else:
freqs[x] /= 100
xs, ys = zip(*freq.items())
return xs, ys

def compute_matrix_power_results_nodes(HG):
markov_matrix = create_markov_matrix_model_nodes(HG)

freqs_matrix = LA.matrix_power(markov_matrix, 40)[0]
ys = freqs_matrix
xs = range(len(ys))
return xs, ys

def compute_pykov_results_nodes(HG):
mm = create_markov_matrix_model_nodes(HG)
chain = transition_matrix_to_pykov_chain(mm)
chain_transposed = pykov.Chain(chain)
return xs, ys

def compute_analytical_prediction_nodes(HG):
ys = prediction(model='hypergraph_nodes', graph=HG)
xs = range(len(ys))
return xs, ys

# for hyper edges' model
def compute_atomistic_results_edges(HG):
markov_matrix = create_markov_matrix_model_hyper_edges(HG)
t_per_walker = int(t_max / number_of_walkers)
engine = DiffusionEngine(markov_matrix, t_per_walker=t_per_walker)

frequencies, states = engine.simulate(t_max)

frequencies = [(node, frequency) for node, frequency in frequencies]
frequencies.sort(key=lambda x: x[0])
xs, ys = zip(*frequencies)

ys = np.array(ys, dtype='float')
ys /= sum(ys)
return xs, ys

def compute_matrix_power_results_edges(HG):
markov_matrix = create_markov_matrix_model_hyper_edges(HG)

freqs_matrix = LA.matrix_power(markov_matrix, 40)[0]
ys = freqs_matrix
xs = range(len(ys))
return xs, ys

def compute_pykov_results_edges(HG):
mm = create_markov_matrix_model_hyper_edges(HG)
chain = transition_matrix_to_pykov_chain(mm)
pykov_chain = pykov.Chain(chain)
return xs, ys

def compute_analytical_prediction_edges(HG):
ys = prediction(model='hypergraph_edges', graph=HG)
xs = range(len(ys))
return xs, ys

In [5]:
def compute_stationary_distributions(HG, name_to_computation_functions_mapping):
results = {}
for name, computation_function in name_to_computation_functions_mapping.items():
xs, pies = computation_function(HG)
results[name] = pies
return results

def serialize(HG):
edges = [list(edge) for edge in HG.hyper_edges()]
return json.dumps(edges)

def save_result_distribution(filename, result_distribution):
with open(filename, 'w') as f:
for value in result_distribution:
f.write("%s\n" % value)

def save_hypergraph_values(filename, hg_description):
with open(filename, 'w') as f:
f.write(hg_description)

def save_results_to_files(HG, results, counter, directory_name=None):
base_filename = '%s_{name}.csv' % counter
if directory_name:
if not os.path.exists(directory_name):
os.mkdir(directory_name)
base_filename = directory_name + '/' + base_filename

for name, result_distribution in results.items():
filename = base_filename.format(name=name)
save_result_distribution(filename, result_distribution)

hg_description = serialize(HG)
filename = base_filename.format(name='hypergraph')
save_hypergraph_values(filename, hg_description)

In [3]:
nodes_mapping = {
'analytical_nodes': compute_analytical_prediction_nodes,
'atomistic_nodes': compute_atomistic_results_nodes,
'matrix_power_nodes': compute_matrix_power_results_nodes,
'pykov_nodes': compute_pykov_results_nodes,
}

edges_mapping = {
'analytical_edges': compute_analytical_prediction_edges,
'atomistic_edges': compute_atomistic_results_edges,
'matrix_power_edges': compute_matrix_power_results_edges,
'pykov_edges': compute_pykov_results_edges,
}

def execute_pipeline(generator_function, hypergraph_properties, directory_name, name_to_computation_functions_mapping, n=10):
for counter in range(n):
HG = generate_hypergraph(generator_function, hypergraph_properties)
results = compute_stationary_distributions(HG, name_to_computation_functions_mapping)
save_results_to_files(HG, results, counter, directory_name=directory_name)
print("%s/%s" % (counter + 1, n))
print('done')

In [50]:
for number_of_nodes in range(50, 90, 10):
print(number_of_nodes)
generator_function = generic_hypergraph
hypergraph_properties = {
'number_of_nodes': number_of_nodes,
'edges_params': ((2, 20), (3, 30), (4, 20), (5, 15), (6, 10))
}

print('Nodes models')
directory_name = 'hypergraph_nodes_%s' % number_of_nodes
execute_pipeline(generator_function, hypergraph_properties, directory_name, nodes_mapping)

print('\nEdges models')
directory_name = 'hypergraph_edges_%s' % number_of_nodes
execute_pipeline(generator_function, hypergraph_properties, directory_name, edges_mapping)

50
Nodes models
1/10
2/10
3/10
4/10
5/10
6/10
7/10
8/10
9/10
10/10
done

Edges models
1/10
2/10
3/10
4/10
5/10
6/10
7/10
8/10
9/10
10/10
done
60
Nodes models
1/10
2/10
3/10
4/10
5/10
6/10
7/10
8/10
9/10
10/10
done

Edges models
1/10
2/10
3/10
4/10
5/10
6/10
7/10
8/10
9/10
10/10
done
70
Nodes models
1/10
2/10
3/10
4/10
5/10
6/10
7/10
8/10
9/10
10/10
done

Edges models
1/10
2/10
3/10
4/10
5/10
6/10
7/10
8/10
9/10
10/10
done
80
Nodes models
1/10
2/10
3/10
4/10
5/10
6/10
7/10
8/10
9/10
10/10
done

Edges models
1/10
2/10
3/10
4/10
5/10
6/10
7/10
8/10
9/10
10/10
done

In [42]:
# show serialized form of hypergraph (possible to recreate it later)
!cat hypergraph_nodes_50/0_hypergraph.csv

[[21, 46], [9, 5], [17, 50], [28, 37], [42, 36], [14, 7], [25, 18], [27, 21], [40, 11], [28, 23], [18, 37], [42, 35], [18, 34], [19, 12], [22, 39], [10, 20], [10, 27], [18, 50], [20, 14], [2, 26], [2, 21, 14], [1, 34, 14], [24, 26, 50], [16, 13, 45], [11, 12, 43], [49, 5, 31], [2, 34, 30], [2, 5, 14], [16, 41, 39], [49, 18, 34], [32, 48, 27], [26, 19, 23], [8, 25, 18], [16, 3, 43], [31, 38, 6], [32, 40, 18], [25, 20, 22], [16, 17, 19], [25, 3, 19], [8, 9, 23], [16, 27, 36], [41, 27, 15], [40, 2, 37], [41, 12, 45], [27, 43, 21], [32, 10, 21], [33, 20, 44], [18, 43, 15], [16, 49, 44], [40, 48, 3], [2, 44, 22, 38], [24, 15, 42, 39], [48, 42, 27, 22], [19, 28, 5, 30], [8, 29, 6, 30], [16, 26, 43, 7], [25, 2, 4, 23], [48, 3, 44, 14], [7, 10, 29, 39], [17, 5, 38, 31], [1, 37, 22, 15], [25, 20, 45, 7], [8, 50, 11, 21], [24, 9, 42, 13], [16, 9, 3, 15], [1, 26, 19, 7], [25, 41, 27, 45], [24, 32, 50, 45], [16, 41, 30, 7], [9, 12, 36, 30], [24, 25, 29, 38, 31], [16, 17, 32, 5, 6], [9, 34, 41, 50, 15], [16, 17, 25, 20, 5], [40, 10, 35, 50, 29], [40, 49, 11, 21, 45], [32, 41, 7, 13, 47], [15, 25, 2, 36, 39], [3, 11, 29, 35, 15], [6, 14, 18, 22, 23], [17, 42, 3, 36, 39], [32, 49, 35, 37, 38], [40, 17, 2, 11, 44], [8, 33, 50, 13, 22], [24, 48, 43, 21, 14], [49, 2, 24, 42, 14, 47], [2, 23, 8, 40, 27, 44], [47, 7, 25, 29, 14, 15], [17, 49, 36, 7, 12, 14], [2, 36, 5, 20, 28, 14], [1, 4, 5, 21, 28, 14], [48, 2, 34, 5, 23, 41], [48, 23, 10, 42, 26, 13], [33, 17, 19, 25, 42, 12], [33, 18, 50, 21, 24, 41]]

This way I generated a lot of data for a pretty complex hypergraph. However, how do I analyze it? It would be nice to load data from disc and make some basic statistics. I know that atomistic is the most divergent, huh. However, rest should be fine.

If I for example get to compare pykov (steady state distributions of Markov Chain based on transition matrix generated from the hypergraph) with model nodes, how big differences will be?

In [43]:
# read example of results with numpy

In [44]:
analytical_results = np.loadtxt('hypergraph_nodes_50/0_analytical_nodes.csv')

In [45]:
# compare arrays of results by computing their difference
pykov_results - analytical_results

Out[45]:
array([  4.92748203e-14,   1.01446629e-14,   3.62349040e-14,
4.63778321e-15,   2.31897834e-14,   4.92748203e-14,
-3.91284227e-14,  -2.60867716e-14,  -2.60867716e-14,
-2.60867716e-14,  -2.60867716e-14,  -2.60867716e-14,
1.15948917e-14,   1.01446629e-14,  -3.91284227e-14,
-1.44884105e-14,  -3.91284227e-14,   2.31897834e-14,
3.62349040e-14,   3.62349040e-14,   2.31897834e-14,
3.62349040e-14,  -1.45022883e-15,  -1.45022883e-15,
4.78228568e-14,  -2.60867716e-14,  -3.91284227e-14,
1.15948917e-14,  -2.60867716e-14,   1.15948917e-14,
4.92748203e-14,   3.62349040e-14,   4.92748203e-14,
-2.60867716e-14,   4.92748203e-14,   3.62349040e-14,
1.15948917e-14,   1.15948917e-14,  -2.60867716e-14,
-1.45022883e-15,  -3.91284227e-14,  -3.91284227e-14,
-2.60867716e-14,  -2.60867716e-14,  -2.60867716e-14,
2.31889161e-15,  -3.04443970e-15,   3.62349040e-14,
3.62349040e-14,  -3.91284227e-14])
In [46]:
def compare_results(base_directory, suffix_one, suffix_two):
"""Compute differences between two different methods for computing the same result"""
filenames = os.listdir(base_directory)
first_filenames = [filename for filename in filenames if filename.endswith(suffix_one)]
first_filenames.sort()
second_filenames = [filename for filename in filenames if filename.endswith(suffix_two)]
second_filenames.sort()
differences = []
for first, second in zip(first_filenames, second_filenames):
difference = np.loadtxt(base_directory + '/' + first) - np.loadtxt(base_directory + '/' + second)
differences.append(difference)
return differences

In [47]:
# compare all the sets of sizes
def compare_sets(base_directory_template, suffix_one, suffix_two):
directories = (base_directory_template % number_of_nodes for number_of_nodes in range(50, 90, 10))
for directory in directories:
print(directory)
differences = compare_results(directory, suffix_one, suffix_two)
print('average difference', np.average(np.abs(differences)))
print('variance of differences', np.var(differences))
print('-' * 80)
print()
compare_sets('hypergraph_nodes_%s', 'pykov_nodes.csv', 'analytical_nodes.csv')

hypergraph_nodes_50
average difference 2.48766320043e-14
variance of differences 8.226043135e-28
--------------------------------------------------------------------------------

hypergraph_nodes_60
average difference 2.36902345252e-14
variance of differences 7.83562261469e-28
--------------------------------------------------------------------------------

hypergraph_nodes_70
average difference 2.19816315447e-14
variance of differences 6.67990066372e-28
--------------------------------------------------------------------------------

hypergraph_nodes_80
average difference 2.07090556726e-14
variance of differences 6.26384928982e-28
--------------------------------------------------------------------------------


In [48]:
compare_sets('hypergraph_nodes_%s', 'atomistic_nodes.csv', 'analytical_nodes.csv')

hypergraph_nodes_50
average difference 0.000474616811594
variance of differences 3.63764214325e-07
--------------------------------------------------------------------------------

hypergraph_nodes_60
average difference 0.000444607246377
variance of differences 3.26531986978e-07
--------------------------------------------------------------------------------

hypergraph_nodes_70
average difference 0.000407904347826
variance of differences 2.66943587062e-07
--------------------------------------------------------------------------------

hypergraph_nodes_80
average difference 0.000356900724638
variance of differences 2.21134558129e-07
--------------------------------------------------------------------------------


In [51]:
compare_sets('hypergraph_edges_%s', 'matrix_power_edges.csv', 'analytical_edges.csv')

hypergraph_edges_50
average difference 8.44996189011e-11
variance of differences 5.54932472914e-20
--------------------------------------------------------------------------------

hypergraph_edges_60
average difference 2.99177497468e-09
variance of differences 1.80022375173e-16
--------------------------------------------------------------------------------

hypergraph_edges_70
average difference 8.39051564135e-09
variance of differences 5.36273893841e-16
--------------------------------------------------------------------------------

hypergraph_edges_80
average difference 3.22182446427e-06
variance of differences 6.41941085925e-10
--------------------------------------------------------------------------------



## Summing up:¶

• differences are really small between pykov (steady states distributions of markov chain and analytical solutions
In [ ]: