import numpy as np
from functools import partial
from matplotlib import pyplot as plt
from numpy import linalg as LA
from hypergraph import generators
from hypergraph.analytical import prediction
from hypergraph.diffusion_engine import DiffusionEngine
from hypergraph import utils
from hypergraph.markov_diffusion import create_markov_matrix_model_nodes
from hypergraph.markov_diffusion import create_markov_matrix_model_hyper_edges
# Define model's definitions
ALL_MODELS = {
"node": {
"analytical": partial(prediction, model='hypergraph_nodes'),
"numerical": create_markov_matrix_model_nodes,
"name": "node",
},
"hyperedges": {
"analytical": partial(prediction, model='hypergraph_edges'),
"numerical": create_markov_matrix_model_hyper_edges,
"name": "hyperedges",
}
}
# Constants for atomistic simulation
t_max = 100000
number_of_walkers = 1
%matplotlib inline
from hypergraph.hypergraph_models import HyperGraph
from hypergraph.generators import generic_hypergraph
How it works:
import json
import pykov
import os
def generate_hypergraph(generator_function, hypergraph_properties):
HG = generator_function(**hypergraph_properties)
return HG
def transition_matrix_to_pykov_chain(matrix):
chain = pykov.Chain()
for i, row in enumerate(matrix):
for j, column in enumerate(row):
chain[(i, j)] = column
return chain
from collections import Counter
# for nodes' model
def compute_atomistic_results_nodes(HG):
markov_matrix = create_markov_matrix_model_nodes(HG)
chain = transition_matrix_to_pykov_chain(mm)
pykov_chain = pykov.Chain(chain)
all_states = []
for x in range(100):
states = pykov_chain.walk(1000)
all_states += states
freqs = Counter(all_states)
for x in range(len(mm)):
if x not in freqs:
freqs = 0
else:
freqs[x] /= 100
xs, ys = zip(*freq.items())
return xs, ys
def compute_matrix_power_results_nodes(HG):
markov_matrix = create_markov_matrix_model_nodes(HG)
freqs_matrix = LA.matrix_power(markov_matrix, 40)[0]
ys = freqs_matrix
xs = range(len(ys))
return xs, ys
def compute_pykov_results_nodes(HG):
mm = create_markov_matrix_model_nodes(HG)
chain = transition_matrix_to_pykov_chain(mm)
chain_transposed = pykov.Chain(chain)
xs, ys = zip(*chain_transposed.steady().items())
return xs, ys
def compute_analytical_prediction_nodes(HG):
ys = prediction(model='hypergraph_nodes', graph=HG)
xs = range(len(ys))
return xs, ys
# for hyper edges' model
def compute_atomistic_results_edges(HG):
markov_matrix = create_markov_matrix_model_hyper_edges(HG)
t_per_walker = int(t_max / number_of_walkers)
engine = DiffusionEngine(markov_matrix, t_per_walker=t_per_walker)
frequencies, states = engine.simulate(t_max)
frequencies = [(node, frequency) for node, frequency in frequencies]
frequencies.sort(key=lambda x: x[0])
xs, ys = zip(*frequencies)
ys = np.array(ys, dtype='float')
ys /= sum(ys)
return xs, ys
def compute_matrix_power_results_edges(HG):
markov_matrix = create_markov_matrix_model_hyper_edges(HG)
freqs_matrix = LA.matrix_power(markov_matrix, 40)[0]
ys = freqs_matrix
xs = range(len(ys))
return xs, ys
def compute_pykov_results_edges(HG):
mm = create_markov_matrix_model_hyper_edges(HG)
chain = transition_matrix_to_pykov_chain(mm)
pykov_chain = pykov.Chain(chain)
xs, ys = zip(*pykov_chain.steady().items())
return xs, ys
def compute_analytical_prediction_edges(HG):
ys = prediction(model='hypergraph_edges', graph=HG)
xs = range(len(ys))
return xs, ys
def compute_stationary_distributions(HG, name_to_computation_functions_mapping):
results = {}
for name, computation_function in name_to_computation_functions_mapping.items():
xs, pies = computation_function(HG)
results[name] = pies
return results
def serialize(HG):
edges = [list(edge) for edge in HG.hyper_edges()]
return json.dumps(edges)
def save_result_distribution(filename, result_distribution):
with open(filename, 'w') as f:
for value in result_distribution:
f.write("%s\n" % value)
def save_hypergraph_values(filename, hg_description):
with open(filename, 'w') as f:
f.write(hg_description)
def save_results_to_files(HG, results, counter, directory_name=None):
base_filename = '%s_{name}.csv' % counter
if directory_name:
if not os.path.exists(directory_name):
os.mkdir(directory_name)
base_filename = directory_name + '/' + base_filename
for name, result_distribution in results.items():
filename = base_filename.format(name=name)
save_result_distribution(filename, result_distribution)
hg_description = serialize(HG)
filename = base_filename.format(name='hypergraph')
save_hypergraph_values(filename, hg_description)
nodes_mapping = {
'analytical_nodes': compute_analytical_prediction_nodes,
'atomistic_nodes': compute_atomistic_results_nodes,
'matrix_power_nodes': compute_matrix_power_results_nodes,
'pykov_nodes': compute_pykov_results_nodes,
}
edges_mapping = {
'analytical_edges': compute_analytical_prediction_edges,
'atomistic_edges': compute_atomistic_results_edges,
'matrix_power_edges': compute_matrix_power_results_edges,
'pykov_edges': compute_pykov_results_edges,
}
def execute_pipeline(generator_function, hypergraph_properties, directory_name, name_to_computation_functions_mapping, n=10):
for counter in range(n):
HG = generate_hypergraph(generator_function, hypergraph_properties)
results = compute_stationary_distributions(HG, name_to_computation_functions_mapping)
save_results_to_files(HG, results, counter, directory_name=directory_name)
print("%s/%s" % (counter + 1, n))
print('done')
for number_of_nodes in range(50, 90, 10):
print(number_of_nodes)
generator_function = generic_hypergraph
hypergraph_properties = {
'number_of_nodes': number_of_nodes,
'edges_params': ((2, 20), (3, 30), (4, 20), (5, 15), (6, 10))
}
print('Nodes models')
directory_name = 'hypergraph_nodes_%s' % number_of_nodes
execute_pipeline(generator_function, hypergraph_properties, directory_name, nodes_mapping)
print('\nEdges models')
directory_name = 'hypergraph_edges_%s' % number_of_nodes
execute_pipeline(generator_function, hypergraph_properties, directory_name, edges_mapping)
# show serialized form of hypergraph (possible to recreate it later)
!cat hypergraph_nodes_50/0_hypergraph.csv
This way I generated a lot of data for a pretty complex hypergraph. However, how do I analyze it? It would be nice to load data from disc and make some basic statistics. I know that atomistic is the most divergent, huh. However, rest should be fine.
If I for example get to compare pykov (steady state distributions of Markov Chain based on transition matrix generated from the hypergraph) with model nodes, how big differences will be?
# read example of results with numpy
pykov_results = np.loadtxt('hypergraph_nodes_50/0_pykov_nodes.csv')
analytical_results = np.loadtxt('hypergraph_nodes_50/0_analytical_nodes.csv')
# compare arrays of results by computing their difference
pykov_results - analytical_results
def compare_results(base_directory, suffix_one, suffix_two):
"""Compute differences between two different methods for computing the same result"""
filenames = os.listdir(base_directory)
first_filenames = [filename for filename in filenames if filename.endswith(suffix_one)]
first_filenames.sort()
second_filenames = [filename for filename in filenames if filename.endswith(suffix_two)]
second_filenames.sort()
differences = []
for first, second in zip(first_filenames, second_filenames):
difference = np.loadtxt(base_directory + '/' + first) - np.loadtxt(base_directory + '/' + second)
differences.append(difference)
return differences
# compare all the sets of sizes
def compare_sets(base_directory_template, suffix_one, suffix_two):
directories = (base_directory_template % number_of_nodes for number_of_nodes in range(50, 90, 10))
for directory in directories:
print(directory)
differences = compare_results(directory, suffix_one, suffix_two)
print('average difference', np.average(np.abs(differences)))
print('variance of differences', np.var(differences))
print('-' * 80)
print()
compare_sets('hypergraph_nodes_%s', 'pykov_nodes.csv', 'analytical_nodes.csv')
compare_sets('hypergraph_nodes_%s', 'atomistic_nodes.csv', 'analytical_nodes.csv')
compare_sets('hypergraph_edges_%s', 'matrix_power_edges.csv', 'analytical_edges.csv')