In [2]:
import numpy as np

from functools import partial
from matplotlib import pyplot as plt

from numpy import linalg as LA

from hypergraph import generators
from hypergraph.analytical import prediction
from hypergraph.diffusion_engine import DiffusionEngine
from hypergraph import utils
from hypergraph.markov_diffusion import create_markov_matrix_model_nodes
from hypergraph.markov_diffusion import create_markov_matrix_model_hyper_edges


# Define model's definitions
ALL_MODELS = {
    "node": {
        "analytical": partial(prediction, model='hypergraph_nodes'),
        "numerical": create_markov_matrix_model_nodes,
        "name": "node",
    },
    "hyperedges": {
        "analytical": partial(prediction, model='hypergraph_edges'),
        "numerical": create_markov_matrix_model_hyper_edges,
        "name": "hyperedges",
    }
}

# Constants for atomistic simulation
t_max = 100000
number_of_walkers = 1
In [3]:
%matplotlib inline
from hypergraph.hypergraph_models import HyperGraph
from hypergraph.generators import generic_hypergraph

How it works:

  • generate random hypergraphs of given properties (another way of generating hypergraphs?)
  • have function which evaluates model so that, stationary distribution is available
  • saves hypergraph and results from three? (what about pykov?) ways of computing stationary distributions to later compare them
  • another function for comparing results later with nice graphs and pictures!
In [ ]:
import json
import pykov
import os


def generate_hypergraph(generator_function, hypergraph_properties):
    HG = generator_function(**hypergraph_properties)
    return HG


def transition_matrix_to_pykov_chain(matrix):
    chain = pykov.Chain()
    
    for i, row in enumerate(matrix):
        for j, column in enumerate(row):
            chain[(i, j)] = column
    return chain
In [4]:
from collections import Counter


# for nodes' model

def compute_atomistic_results_nodes(HG):
    markov_matrix = create_markov_matrix_model_nodes(HG)
    chain = transition_matrix_to_pykov_chain(mm)
    pykov_chain = pykov.Chain(chain)
    all_states = []
    for x in range(100):
        states = pykov_chain.walk(1000)
        all_states += states
        
    freqs = Counter(all_states)
    for x in range(len(mm)):
        if x not in freqs:
            freqs = 0
        else:
            freqs[x] /= 100
    xs, ys = zip(*freq.items())        
    return xs, ys


def compute_matrix_power_results_nodes(HG):
    markov_matrix = create_markov_matrix_model_nodes(HG)

    freqs_matrix = LA.matrix_power(markov_matrix, 40)[0]
    ys = freqs_matrix
    xs = range(len(ys))
    return xs, ys


def compute_pykov_results_nodes(HG):
    mm = create_markov_matrix_model_nodes(HG)
    chain = transition_matrix_to_pykov_chain(mm)
    chain_transposed = pykov.Chain(chain)
    xs, ys = zip(*chain_transposed.steady().items())
    return xs, ys


def compute_analytical_prediction_nodes(HG):
    ys = prediction(model='hypergraph_nodes', graph=HG)
    xs = range(len(ys))
    return xs, ys


# for hyper edges' model
def compute_atomistic_results_edges(HG):
    markov_matrix = create_markov_matrix_model_hyper_edges(HG)
    t_per_walker = int(t_max / number_of_walkers)
    engine = DiffusionEngine(markov_matrix, t_per_walker=t_per_walker)

    frequencies, states = engine.simulate(t_max)

    frequencies = [(node, frequency) for node, frequency in frequencies]
    frequencies.sort(key=lambda x: x[0])
    xs, ys = zip(*frequencies)

    ys = np.array(ys, dtype='float')
    ys /= sum(ys)
    return xs, ys


def compute_matrix_power_results_edges(HG):
    markov_matrix = create_markov_matrix_model_hyper_edges(HG)

    freqs_matrix = LA.matrix_power(markov_matrix, 40)[0]
    ys = freqs_matrix
    xs = range(len(ys))
    return xs, ys


def compute_pykov_results_edges(HG):
    mm = create_markov_matrix_model_hyper_edges(HG)
    chain = transition_matrix_to_pykov_chain(mm)
    pykov_chain = pykov.Chain(chain)
    xs, ys = zip(*pykov_chain.steady().items())
    return xs, ys


def compute_analytical_prediction_edges(HG):
    ys = prediction(model='hypergraph_edges', graph=HG)
    xs = range(len(ys))
    return xs, ys
In [5]:
def compute_stationary_distributions(HG, name_to_computation_functions_mapping):
    results = {}
    for name, computation_function in name_to_computation_functions_mapping.items():
        xs, pies = computation_function(HG)
        results[name] = pies
    return results


def serialize(HG):
    edges = [list(edge) for edge in HG.hyper_edges()]
    return json.dumps(edges)


def save_result_distribution(filename, result_distribution):
    with open(filename, 'w') as f:
        for value in result_distribution:
            f.write("%s\n" % value)


def save_hypergraph_values(filename, hg_description):
    with open(filename, 'w') as f:
        f.write(hg_description)


def save_results_to_files(HG, results, counter, directory_name=None):
    base_filename = '%s_{name}.csv' % counter
    if directory_name:
        if not os.path.exists(directory_name):
            os.mkdir(directory_name)
        base_filename = directory_name + '/' + base_filename
    
    for name, result_distribution in results.items():
        filename = base_filename.format(name=name)
        save_result_distribution(filename, result_distribution)
    
    hg_description = serialize(HG)
    filename = base_filename.format(name='hypergraph')
    save_hypergraph_values(filename, hg_description)
In [3]:
nodes_mapping = {
    'analytical_nodes': compute_analytical_prediction_nodes,
    'atomistic_nodes': compute_atomistic_results_nodes,
    'matrix_power_nodes': compute_matrix_power_results_nodes,
    'pykov_nodes': compute_pykov_results_nodes,
}

edges_mapping = {
    'analytical_edges': compute_analytical_prediction_edges,
    'atomistic_edges': compute_atomistic_results_edges,
    'matrix_power_edges': compute_matrix_power_results_edges,
    'pykov_edges': compute_pykov_results_edges,
}


def execute_pipeline(generator_function, hypergraph_properties, directory_name, name_to_computation_functions_mapping, n=10):    
    for counter in range(n):
        HG = generate_hypergraph(generator_function, hypergraph_properties)
        results = compute_stationary_distributions(HG, name_to_computation_functions_mapping)
        save_results_to_files(HG, results, counter, directory_name=directory_name)
        print("%s/%s" % (counter + 1, n))
    print('done')
In [50]:
for number_of_nodes in range(50, 90, 10):
    print(number_of_nodes)
    generator_function = generic_hypergraph
    hypergraph_properties = {
        'number_of_nodes': number_of_nodes,
        'edges_params': ((2, 20), (3, 30), (4, 20), (5, 15), (6, 10))
    }
    
    print('Nodes models')
    directory_name = 'hypergraph_nodes_%s' % number_of_nodes
    execute_pipeline(generator_function, hypergraph_properties, directory_name, nodes_mapping)
    
    print('\nEdges models')
    directory_name = 'hypergraph_edges_%s' % number_of_nodes
    execute_pipeline(generator_function, hypergraph_properties, directory_name, edges_mapping)
50
Nodes models
1/10
2/10
3/10
4/10
5/10
6/10
7/10
8/10
9/10
10/10
done

Edges models
1/10
2/10
3/10
4/10
5/10
6/10
7/10
8/10
9/10
10/10
done
60
Nodes models
1/10
2/10
3/10
4/10
5/10
6/10
7/10
8/10
9/10
10/10
done

Edges models
1/10
2/10
3/10
4/10
5/10
6/10
7/10
8/10
9/10
10/10
done
70
Nodes models
1/10
2/10
3/10
4/10
5/10
6/10
7/10
8/10
9/10
10/10
done

Edges models
1/10
2/10
3/10
4/10
5/10
6/10
7/10
8/10
9/10
10/10
done
80
Nodes models
1/10
2/10
3/10
4/10
5/10
6/10
7/10
8/10
9/10
10/10
done

Edges models
1/10
2/10
3/10
4/10
5/10
6/10
7/10
8/10
9/10
10/10
done
In [42]:
# show serialized form of hypergraph (possible to recreate it later)
!cat hypergraph_nodes_50/0_hypergraph.csv
[[21, 46], [9, 5], [17, 50], [28, 37], [42, 36], [14, 7], [25, 18], [27, 21], [40, 11], [28, 23], [18, 37], [42, 35], [18, 34], [19, 12], [22, 39], [10, 20], [10, 27], [18, 50], [20, 14], [2, 26], [2, 21, 14], [1, 34, 14], [24, 26, 50], [16, 13, 45], [11, 12, 43], [49, 5, 31], [2, 34, 30], [2, 5, 14], [16, 41, 39], [49, 18, 34], [32, 48, 27], [26, 19, 23], [8, 25, 18], [16, 3, 43], [31, 38, 6], [32, 40, 18], [25, 20, 22], [16, 17, 19], [25, 3, 19], [8, 9, 23], [16, 27, 36], [41, 27, 15], [40, 2, 37], [41, 12, 45], [27, 43, 21], [32, 10, 21], [33, 20, 44], [18, 43, 15], [16, 49, 44], [40, 48, 3], [2, 44, 22, 38], [24, 15, 42, 39], [48, 42, 27, 22], [19, 28, 5, 30], [8, 29, 6, 30], [16, 26, 43, 7], [25, 2, 4, 23], [48, 3, 44, 14], [7, 10, 29, 39], [17, 5, 38, 31], [1, 37, 22, 15], [25, 20, 45, 7], [8, 50, 11, 21], [24, 9, 42, 13], [16, 9, 3, 15], [1, 26, 19, 7], [25, 41, 27, 45], [24, 32, 50, 45], [16, 41, 30, 7], [9, 12, 36, 30], [24, 25, 29, 38, 31], [16, 17, 32, 5, 6], [9, 34, 41, 50, 15], [16, 17, 25, 20, 5], [40, 10, 35, 50, 29], [40, 49, 11, 21, 45], [32, 41, 7, 13, 47], [15, 25, 2, 36, 39], [3, 11, 29, 35, 15], [6, 14, 18, 22, 23], [17, 42, 3, 36, 39], [32, 49, 35, 37, 38], [40, 17, 2, 11, 44], [8, 33, 50, 13, 22], [24, 48, 43, 21, 14], [49, 2, 24, 42, 14, 47], [2, 23, 8, 40, 27, 44], [47, 7, 25, 29, 14, 15], [17, 49, 36, 7, 12, 14], [2, 36, 5, 20, 28, 14], [1, 4, 5, 21, 28, 14], [48, 2, 34, 5, 23, 41], [48, 23, 10, 42, 26, 13], [33, 17, 19, 25, 42, 12], [33, 18, 50, 21, 24, 41]]

This way I generated a lot of data for a pretty complex hypergraph. However, how do I analyze it? It would be nice to load data from disc and make some basic statistics. I know that atomistic is the most divergent, huh. However, rest should be fine.

If I for example get to compare pykov (steady state distributions of Markov Chain based on transition matrix generated from the hypergraph) with model nodes, how big differences will be?

In [43]:
# read example of results with numpy
pykov_results = np.loadtxt('hypergraph_nodes_50/0_pykov_nodes.csv')
In [44]:
analytical_results = np.loadtxt('hypergraph_nodes_50/0_analytical_nodes.csv')
In [45]:
# compare arrays of results by computing their difference
pykov_results - analytical_results
Out[45]:
array([  4.92748203e-14,   1.01446629e-14,   3.62349040e-14,
         4.63778321e-15,   2.31897834e-14,   4.92748203e-14,
        -3.91284227e-14,  -2.60867716e-14,  -2.60867716e-14,
        -2.60867716e-14,  -2.60867716e-14,  -2.60867716e-14,
         1.15948917e-14,   1.01446629e-14,  -3.91284227e-14,
        -1.44884105e-14,  -3.91284227e-14,   2.31897834e-14,
         3.62349040e-14,   3.62349040e-14,   2.31897834e-14,
         3.62349040e-14,  -1.45022883e-15,  -1.45022883e-15,
         4.78228568e-14,  -2.60867716e-14,  -3.91284227e-14,
         1.15948917e-14,  -2.60867716e-14,   1.15948917e-14,
         4.92748203e-14,   3.62349040e-14,   4.92748203e-14,
        -2.60867716e-14,   4.92748203e-14,   3.62349040e-14,
         1.15948917e-14,   1.15948917e-14,  -2.60867716e-14,
        -1.45022883e-15,  -3.91284227e-14,  -3.91284227e-14,
        -2.60867716e-14,  -2.60867716e-14,  -2.60867716e-14,
         2.31889161e-15,  -3.04443970e-15,   3.62349040e-14,
         3.62349040e-14,  -3.91284227e-14])
In [46]:
def compare_results(base_directory, suffix_one, suffix_two):
    """Compute differences between two different methods for computing the same result"""
    filenames = os.listdir(base_directory)
    first_filenames = [filename for filename in filenames if filename.endswith(suffix_one)]
    first_filenames.sort()
    second_filenames = [filename for filename in filenames if filename.endswith(suffix_two)]
    second_filenames.sort()
    differences = []
    for first, second in zip(first_filenames, second_filenames):
        difference = np.loadtxt(base_directory + '/' + first) - np.loadtxt(base_directory + '/' + second)
        differences.append(difference)
    return differences
In [47]:
# compare all the sets of sizes
def compare_sets(base_directory_template, suffix_one, suffix_two):
    directories = (base_directory_template % number_of_nodes for number_of_nodes in range(50, 90, 10))
    for directory in directories:
        print(directory)
        differences = compare_results(directory, suffix_one, suffix_two)
        print('average difference', np.average(np.abs(differences)))
        print('variance of differences', np.var(differences))
        print('-' * 80)
        print()
compare_sets('hypergraph_nodes_%s', 'pykov_nodes.csv', 'analytical_nodes.csv')
hypergraph_nodes_50
average difference 2.48766320043e-14
variance of differences 8.226043135e-28
--------------------------------------------------------------------------------

hypergraph_nodes_60
average difference 2.36902345252e-14
variance of differences 7.83562261469e-28
--------------------------------------------------------------------------------

hypergraph_nodes_70
average difference 2.19816315447e-14
variance of differences 6.67990066372e-28
--------------------------------------------------------------------------------

hypergraph_nodes_80
average difference 2.07090556726e-14
variance of differences 6.26384928982e-28
--------------------------------------------------------------------------------

In [48]:
compare_sets('hypergraph_nodes_%s', 'atomistic_nodes.csv', 'analytical_nodes.csv')
hypergraph_nodes_50
average difference 0.000474616811594
variance of differences 3.63764214325e-07
--------------------------------------------------------------------------------

hypergraph_nodes_60
average difference 0.000444607246377
variance of differences 3.26531986978e-07
--------------------------------------------------------------------------------

hypergraph_nodes_70
average difference 0.000407904347826
variance of differences 2.66943587062e-07
--------------------------------------------------------------------------------

hypergraph_nodes_80
average difference 0.000356900724638
variance of differences 2.21134558129e-07
--------------------------------------------------------------------------------

In [51]:
compare_sets('hypergraph_edges_%s', 'matrix_power_edges.csv', 'analytical_edges.csv')
hypergraph_edges_50
average difference 8.44996189011e-11
variance of differences 5.54932472914e-20
--------------------------------------------------------------------------------

hypergraph_edges_60
average difference 2.99177497468e-09
variance of differences 1.80022375173e-16
--------------------------------------------------------------------------------

hypergraph_edges_70
average difference 8.39051564135e-09
variance of differences 5.36273893841e-16
--------------------------------------------------------------------------------

hypergraph_edges_80
average difference 3.22182446427e-06
variance of differences 6.41941085925e-10
--------------------------------------------------------------------------------

Summing up:

  • differences are really small between pykov (steady states distributions of markov chain and analytical solutions
In [ ]: