#!/usr/bin/env python # coding: utf-8 # # Influence propagation. Threshold Models # During this lab we will consider simulation of influence propagation through network and see what can thread its spread. # In[1]: import numpy as np import scipy as sp import networkx as nx import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # ## Linear Threshold Model # To recall the setting: # * Influence spread iteratively given some initially activated nodes (adopters) # * Each node $v_i$ have an activation threshold $\theta_i \in [0, 1]$ # * If fraction of "activated" neighbours is greater than $\theta_i$, node $v_i$ also become activated # # This information (or behaviour) spreading process is also called cascade. If after some number of iterations the whole network became activated we have a **complete cascade**. # In[2]: # Some initials G = nx.erdos_renyi_graph(10, 0.4) pos = nx.spring_layout(G) A = np.array(nx.adjacency_matrix(G).todense()) n = A.shape[0] theta = 2.0/5 idx = [0,1,2] initActive = np.zeros((n,), dtype=bool) initActive[idx] = 1 # In[3]: # Influence propagation simulation def InfluenceProp(A, intiActive, theta, itersNum = np.inf): deg = np.sum(A,axis=0, dtype=float) i = 1 # iteration resActive = initActive.copy() while i < itersNum: i+=1 # currently inactive nodes inactiveId = np.where(resActive == 0)[0] # activated nodes idx = np.sum(A[np.ix_(resActive==1, resActive==0)], axis=0) / deg[resActive==0] > theta if np.any(idx): resActive[inactiveId[idx]] = 1 else: break return resActive # Demonstration def ShowIteration(G, initActive, resultActive, pos): fig = plt.figure(figsize=(15,10)) plt.subplot(1,2,1) nx.draw(G, pos=pos, nodelist=np.where(initActive)[0].tolist(), node_color = 'r') nx.draw(G, pos=pos, nodelist=np.where(1-initActive)[0].tolist(), node_color = 'b') plt.subplot(1,2,2) nx.draw(G, pos=pos, nodelist=np.where(resultActive)[0].tolist(), node_color = 'r') nx.draw(G, pos=pos, nodelist=np.where(1-resultActive)[0].tolist(), node_color = 'b') # In[6]: # Run resultActive = InfluenceProp(A, initActive, theta, itersNum = 2) # Look ShowIteration(G, initActive, resultActive, pos) # ## Clusters # Lets call a cluster with density $p$ a set of nodes s.t. each node has at least $p$ fraction of its neighbours in the set. And it turn out that then only thing that can stop cascades are clusters, particularly: # # # *Consider a network with a threshold of $\theta$ a set of initialy activated nodes* # 1. *If the remaining network contains a cluster of density greater than $1 − \theta$, then the set of initial adopters **will not** cause a complete cascade.* # 2. *Whenever a set of initial adopters does not cause a complete cascade with threshold $\theta$, the remaining network must contain a cluster of density greater than $1 − \theta$.* # # In[7]: # Illustrative Example G = nx.cycle_graph(4) arrG = [G]*3 G = reduce(lambda g1,g2: nx.disjoint_union(g1,g2), arrG) edList = [(0,2),(4,6),(9,11)] G.add_edges_from(edList) edList = [(3,5),(4,7),(7,11)] G.add_edges_from(edList) nx.draw_spring(G) # In[10]: # Randomized Option. (may not end well..) arrG = [nx.erdos_renyi_graph(10, 0.6)]* 2 arrG.append(nx.erdos_renyi_graph(20, 0.6)) G = reduce(lambda g1,g2: nx.disjoint_union(g1,g2), arrG) # nx.draw(G) edList = zip(np.random.randint(0,10,size=(5,)), np.random.randint(10,20,size=(5,))) G.add_edges_from(edList) edList = zip(np.random.randint(0,10,size=(5,)), np.random.randint(20,40,size=(5,))) G.add_edges_from(edList) edList = zip(np.random.randint(10,20,size=(5,)), np.random.randint(20,40,size=(5,))) G.add_edges_from(edList) pos = nx.spring_layout(G) nx.draw(G, pos=pos) # In[17]: A = np.array(nx.adjacency_matrix(G).todense()) n = A.shape[0] theta = 1.0/10 idx = range(0, 5) initActive = np.zeros((n,), dtype=bool) initActive[idx] = 1 # In[18]: # Run resultActive = InfluenceProp(A, initActive, theta) # Look ShowIteration(G, initActive, resultActive, pos) # ## Note on Greedy Algorithm for Threshold Models # Let $S$ be a target nodes (initally activated set of nodes). $f(S)$ is a number of activated nodes as propagation stops # # **Good news:**
# We have greedy algorithm for submodular functions that gives constant factor approximation of optimal solution: $$f(S) \geq(1 - \frac{1}{1-e})f(S^*)$$ # **Bad news:**
# 1. Not clear how to compute $f(S)$ in threshold and cascade models # 2. Even if we know we need to speed-up the process # **Solutions:** # 1. Instead of computing the exact value of $f(S)$ we can estimate it via numerous simulation of Independent Cascade process (provides the same results) # 2. 'Lazy' greedy algorithm uses the fact that if $A \subseteq B$ then: # $$ f(s \cup A) - f(A) \geq f(s \cup B) - f(B) $$ # In other words: as the set increases its marginal gain can only decrease. And in fact it won't decrease drammatically, so essentially we need to keep track of only top of the list. # # So the idea is # * Keep ordered list of marginal gains from previous iterations # * Re-evaluate marginal gains only for top of elements # * Resort and pick $\arg\max$