CSCS530 Winter 2015

Complex Systems 530 - Computer Modeling of Complex Systems (Winter 2015)

View this repository on NBViewer

Storing Model Results

In this notebook, we'll learn a common pattern for storing the results of a model run. Specifically, we'll:

  • create a model results folder to store all output
  • create a per-run results folder to store output for a single model run
  • learn to save our model parameters
  • learn to save figures
  • learn to save tabular data

    We'll do this using the basic HIV model.

    N.B.: We won't be dealing with RNG seeds in this notebook. However, please see the supplemental notebook for instruction on properly setting, using, and recording the RNG seed.

Local Imports

In the first import section, we use an import from our own module. We took the imports and class definitions (Model, Person) from our notebooks and pasted them into a .py file, creating a module. Please review the hiv_model.py file to understand how this works.

In [33]:
# Imports
from hiv_model import Model, Person
In [2]:
# Imports
import datetime
import os
import time

# Scientific computing imports
import numpy
import matplotlib.pyplot as plt
import networkx
import pandas
import seaborn; seaborn.set()

# Import widget methods
from IPython.html.widgets import *
:0: FutureWarning: IPython widgets are experimental and may change in the future.

Testing our model output functions

In this section, we'll define our model output functions. These will manage:

  • creating output directories
  • creating output CSV files
  • creating output figures

    We'll create one sample model, run it, and then test our methods.

In [3]:
# Create our test model
m = Model(grid_size=10, num_people=10)
for t in xrange(10):
    m.step()
In [30]:
# Now, we'll define our methods to store a model's output

def store_model_parameters(model, run_output_path):
    """
    Store model parameters from a model to the run output path.
    """
    # Create parameters dictionary
    model_parameters = {"grid_size": model.grid_size,
                        "num_people": model.num_people,
                        "min_subsidy": model.min_subsidy,
                        "max_subsidy": model.max_subsidy,
                        "min_condom_budget": model.min_condom_budget,
                        "max_condom_budget": model.max_condom_budget,
                        "condom_cost": model.condom_cost,
                        "min_prob_hookup": model.min_prob_hookup,
                        "max_prob_hookup": model.max_prob_hookup,
                        "prob_transmit": model.prob_transmit,
                        "prob_transmit_condom": model.prob_transmit_condom,
                       }
    # Convert to dataframe and save
    model_parameters_df = pandas.DataFrame(model_parameters.items(),
                             columns=["parameter", "value"])
    model_parameters_df.to_csv(os.path.join(run_output_path, "parameters.csv"))
    

def store_model_csv(model, run_output_path):
    """
    Store CSV data from a model to the run output path.
    """
    # Create interaction dataframe
    try:
        interaction_df = pandas.DataFrame(model.history_interactions,
                     columns=["time", "agent_a", "agent_b", "use_condom", "is_transmission"])
    except ValueError:
        # Sometimes, we have no interactions in "sparse" parameter configurations.
        interaction_df = pandas.DataFrame(columns=["time", "agent_a", "agent_b", "use_condom", "is_transmission"])
    
    # Create time series data frame
    tsdata_df = pandas.DataFrame(model.history_num_infected,
                            columns=["num_infected"])
    tsdata_df["num_interactions"] = model.history_num_interactions
    tsdata_df["num_interactions_condoms"] = model.history_num_interactions_condoms
    
    # Save the dataframes
    interaction_df.to_csv(os.path.join(run_output_path, "interactions.csv"))
    tsdata_df.to_csv(os.path.join(run_output_path, "timeseries.csv"))

    
def store_model_figures(model, run_output_path):
    """
    Store figures data from a model to the run output path.
    """
    # Plot time series of infections and interactions.
    f = plt.figure(figsize=(10, 8))
    
    # Create our top panel
    plt.subplot(211)
    plt.plot(model.history_num_infected)
    plt.legend(("Number of infections"), loc="best")
    
    # Create our bottom panel and add the legend
    plt.subplot(212)
    plt.plot(numpy.array(model.history_num_interactions) - numpy.array(model.history_num_interactions_condoms))
    plt.plot(model.history_num_interactions_condoms)
    plt.legend(("Number of interactions without condoms",
               "Number of interactions with condoms"),
              loc="best")
    plt.tight_layout()
    
    # Save
    plt.savefig(os.path.join(run_output_path, "infections_interactions.png"))
    
    # Next, plot the initial and final space timesteps.
    
    # Get colormap
    cmap = seaborn.cubehelix_palette(light=1, as_cmap=True)

    # Plot initial step.
    f = plt.figure(figsize=(10, 10))
    plt.title("Infected space at t={0}".format(0))
    plt.pcolor(model.get_space_infected(0), vmin=-1, vmax=1, cmap=cmap)
    ax = f.gca()
    ax.set_aspect(1./ax.get_data_ratio())   
    plt.tight_layout()
    plt.colorbar()
    
    # Save
    plt.savefig(os.path.join(run_output_path, "space_initial.png"))
    
    # Plot final step
    plt.title("Infected space at t={0}".format(model.t-1))
    plt.pcolor(model.get_space_infected(model.t-1), vmin=-1, vmax=1, cmap=cmap)
    ax = f.gca()
    ax.set_aspect(1./ax.get_data_ratio())   
    plt.tight_layout()
    plt.colorbar()
    
    # Save
    plt.savefig(os.path.join(run_output_path, "space_final.png"))    
    

def store_model(model, output_path="output"):
    """
    Store a model to the model output path.
    """
    # First, we need to make sure the directory exists.
    try:
        os.makedirs(output_path)
    except:
        pass
    
    """
    Next, we need to create a unique timestamp for the model.
    We'll do that using a timestamp of the form: YYYYMMDD-Run#
    
    We then need to create that directory too.
    """
    timestamp_suffix = time.strftime("%Y%m%d")
    
    run_id = 0
    run_output_path = os.path.join(output_path,
                                 "run-{0}-{1}".format(timestamp_suffix,
                                                     run_id))
    # Get a unique run #
    while os.path.exists(run_output_path):
        run_id += 1
        run_output_path = os.path.join(output_path,
                                 "run-{0}-{1}".format(timestamp_suffix,
                                                     run_id))        

    try:
        os.makedirs(run_output_path)
    except:
        pass
    
    """
    Finally, we need to store data and figures to the path.
    """
    store_model_parameters(model, run_output_path)
    store_model_csv(model, run_output_path)
    store_model_figures(model, run_output_path)
In [36]:
# Finally, test our output method with the model.
store_model(m)

Running our parameter sweep

In [37]:
# Set number of samples per value and steps per sample
num_samples = 10
num_steps = 100

# Set basic model parameters
grid_size = 10
num_people =10

# Set subsidy values to "sweep" over
subsidy_sweep_values = [0.0, 0.33, 0.66, 1.0]
prob_hookup_values = [0.1, 0.5, 0.9]
subsidy_sweep_output = []

# Iterate over subsidy
for subsidy_value in subsidy_sweep_values:
    # Iterate over prob_hookup
    for prob_hookup_value in prob_hookup_values:
        print("Running {0} samples for subsidy value {1}, prob_hookup value {2}"\
                  .format(num_samples, subsidy_value, prob_hookup_value))
        for n in xrange(num_samples):
            # Output info
            m = Model(grid_size=grid_size,
                      num_people=num_people,
                      min_condom_budget=0.0,
                      max_condom_budget=1.0,
                      min_prob_hookup=prob_hookup_value-0.1,
                      max_prob_hookup=prob_hookup_value+0.1,
                      min_subsidy=subsidy_value,
                      max_subsidy=subsidy_value)

            # Run the model for num-steps
            for t in xrange(num_steps):
                m.step()

            # Output our model
            store_model(m)
Running 10 samples for subsidy value 0.0, prob_hookup value 0.1
Running 10 samples for subsidy value 0.0, prob_hookup value 0.5
Running 10 samples for subsidy value 0.0, prob_hookup value 0.9
Running 10 samples for subsidy value 0.33, prob_hookup value 0.1
Running 10 samples for subsidy value 0.33, prob_hookup value 0.5
Running 10 samples for subsidy value 0.33, prob_hookup value 0.9
Running 10 samples for subsidy value 0.66, prob_hookup value 0.1
Running 10 samples for subsidy value 0.66, prob_hookup value 0.5
Running 10 samples for subsidy value 0.66, prob_hookup value 0.9
Running 10 samples for subsidy value 1.0, prob_hookup value 0.1
Running 10 samples for subsidy value 1.0, prob_hookup value 0.5
Running 10 samples for subsidy value 1.0, prob_hookup value 0.9