# 113th Congress as News Commentators on Twitter¶

In this project I am answering the following questions:

• Who are the most active news commentators among members of Congress ?
• Which events (news) got the most attention by these politicians ?
• How many news (of 7376) are commentated by democrats and/or republicans...
• How many comments made on these news by each group ?
• Which news in particular describe each group?

See here for other iPython notebooks on this project.

Project (datasets and the source code) is available on GitHub

The news and the curated tweets used in this study are scraped from theplazz.com approximately matching the duration of 113th US Congress, i.e. between Jan 2013 - Jan 2015. Here is an annotated screenshot of one of the news published on this news media site:

In [1]:
cd ..

/Users/toz/Documents/workspace/News-Commentary-Tweets-of-Elites

In [2]:
import twitter
import pandas as pd
import numpy as np
import plotly.plotly as py
from plotly.graph_objs import *
from mykeys import tw
import networkx as nx
import itertools
from collections import Counter
%matplotlib inline

In [3]:
def oauth_login():
#tw is a dictionary, the only variable in mykeys.py
tw['CONSUMER_KEY'], tw['CONSUMER_SECRET'])

def get_members(members):
"""Scrape only the interesting info from twitter json response """
return [(m['id'],m['screen_name'],m['name'],m['location'],m['description'],
m['created_at'], m['friends_count'],m['followers_count'],
m['statuses_count'],m['favourites_count']) for m in members['users']]

"""Get members of a twitter list with known political group into a dataframe """
members = get_members(resp)
df['party'] = group
return df

def get_politicians():

'friends','followers','statuses','favorites']

polists = [{'slug':'senaterepublicans', 'owner_screen_name':'Senate_GOPs', 'group':'gop'}, #62
{'slug':'house-republicans', 'owner_screen_name':'HouseGOP', 'group':'gop'}, #260
{'slug':'elected-democrats', 'owner_screen_name':'TheDemocrats', 'group':'dem'}, #259
{'slug':'house-democrats', 'owner_screen_name':'DannyMariachi', 'group':'dem'}, #188
{'slug':'senatedemocrats', 'owner_screen_name':'SenateDems', 'group':'dem'} #52
]

for polist in polists:
df = df.drop_duplicates()
df.to_csv('data/US-politicians.csv',encoding='utf-8',index=False)
return df

In [3]:
# get twitter IDs of congressmen and senators
gop = df[df['party']=='gop']
dem = df[df['party']=='dem']
dem_tweeps = set(dem.screen_name.values)
gop_tweeps = set(gop.screen_name.values)
# Principal Accounts of Members of the U.S. Senate (a mix of campaign and government accounts)

In [4]:
# get commentary tweets of US newsmakers and opinion-shapers
tweets.twhandle = tweets.twhandle.str[1:]
#tweets.dt = pd.to_datetime(tweets.dt,unit='D')

In [5]:
# print politician counts curated at least once by theplazz.com
title = tweets.groupby(by=['title','dt'])['twhandle']
print (len(title),'news commentated between',tweets.dt.order().iloc[0].strftime('%d-%b-%Y'),
'and',tweets.dt.order().iloc[-1].strftime('%d-%b-%Y'),'by')
tweepset = set(tweets.twhandle.unique())
senateset = set(senate.screen_name.values)
twcounts = pd.DataFrame(columns=['# of tweeps'])
twcounts.loc['senator'] = [len(senateset & tweepset)]
twcounts.loc['democrat'] = [len(dem_tweeps & tweepset)]
twcounts.loc['republican']= [len(gop_tweeps & tweepset)]
twcounts.loc['total'] = [len(tweepset)]
twcounts

7376 news commentated between 14-Jan-2013 and 09-Jan-2015 by

Out[5]:
# of tweeps
senator 44
democrat 36
republican 30
total 1442
In [6]:
# plot commentating activity of these politicians
tweeps = tweets.groupby(by='twhandle')['twtext'].count().order(ascending=False)
poltweeps = tweeps[tweeps.index.isin(df.screen_name)]
colors = ['blue' if x in dem_tweeps else 'red' for x in poltweeps.index]
data = Data([Bar(
x=poltweeps.index,
y=poltweeps.values,
marker=Marker(color=colors)
)])
layout = Layout(yaxis=YAxis(title='# of news commentated (Jan 2013 - Jan 2015)'),
title="News counts commentated by 113th US Congress (curated by theplazz.com)")
fig = Figure(data=data, layout=layout)
py.iplot(fig,filename="113th US Congress as News Commentators")

Out[6]:
In [7]:
# Stats: how many news are commentated by how many democrats and/or republicans...
demnews = title.apply(lambda g: len(dem_tweeps & set(g.values)))
gopnews = title.apply(lambda g: len(gop_tweeps & set(g.values)))
dgtotl = (demnews + gopnews)
print ('News commentated by any member of either group:',(dgtotl[dgtotl>0].size))
# Number of comments by dems - number of comments by gops
dgdiff = (demnews - gopnews)
# Normalize the polarity
dgdiv = dgdiff/dgtotl
digdiv = dgdiv.order()[:dgtotl[dgtotl>0].size]
print ('News commentated by democrats only:',(digdiv[digdiv == 1].size))
print ('News commentated by republicans only:',(digdiv[digdiv == -1].size))
print ('News commentated by both of the parties:',(digdiv[(digdiv > -1) & (digdiv < 1)].size))

2829 comments made on 1239 news by democrats.
News commentated by any member of either group: 1916
News commentated by democrats only: 733
News commentated by republicans only: 677
News commentated by both of the parties: 506

In [8]:
# commentator group polarity distribution of news
digdiv[(digdiv > -1) & (digdiv < 1)].plot();

In [9]:
data = Data([Bar(
x=digdiv.index.get_level_values(0),
y=digdiv[(digdiv > -1) & (digdiv < 1)]
)])
layout = Layout(yaxis=YAxis(title='# of news commentated (Jan 2013 - Jan 2015)'),
margin=Margin(l=150,r=150,b=150),
title="News polarized by 113th US Congress (curated by theplazz.com)")
fig = Figure(data=data, layout=layout)
py.iplot(fig,filename="Polarity distribution of news")

Out[9]:
In [10]:
# Which news got the most attention by the politicians ?

Out[10]:
title                                               dt
A day to honor veterans’ courage, sacrifrice        2014-05-26    36
Apartheid resistance leader Mandela has died        2013-12-05    31
World marks 70th anniversary of D-Day               2014-06-06    24
US observes somber 9/11 anniversary                 2014-09-11    21
Senate passes 5-month UI extension                  2014-04-07    21
Dems’ attempt to raise minimum wage fails           2014-04-30    20
Shinseki resigns from VA following apology          2014-05-30    20
Deportation to focus on ‘felons not families’       2014-11-20    19
A reflection on rights on Constitution Day          2014-09-17    19
Dems push #EqualPay bill on Equal Pay Day           2014-04-08    19
Around the world, Jews welcome 5774                 2013-09-04    18
Remembering Rev. Dr. Martin Luther King Jr.         2014-01-20    18
4 dead after Fort Hood shooting incident            2014-04-02    18
Congress critters @work: #DontDoubleMyRate          2013-07-09    18
Politicians get patriotic in July 4th holiday rush  2014-07-04    17
Dems host climate-change talk-a-thon                2014-03-10    17
Boston honors dead in Marathon bombing              2014-04-15    16
Tweeps count blessings both big and small           2014-11-27    15
Obama unveils his \$4T budget based on ‘values’      2014-03-04    15
Earth Day observers consider climate change         2014-04-21    15
Senate ‘goes nuclear’ on filibuster option          2013-11-21    15
EPA unveils reduction plan for air pollution        2014-06-02    15
Politicos, businesses thank teachers, nurses        2014-05-06    15
Senate’s unemployment bill defeated by 1 vote       2014-02-06    15
Obama to governors: ‘sequester hurts states’        2013-02-25    15
VA bills unanimously pass House, Senate             2014-06-11    15
Sundown ushers in the Jewish New Year               2014-09-24    14
Jews worldwide reflect on Holocaust Day             2014-04-28    14
Renowned poet, author Maya Angelou dies             2014-05-28    14
6 dead in attack on Jerusalem synagogue             2014-11-18    14
Obama delivers climate change speech                2013-06-25    14
The Civil Rights Act of 1964 turns 50               2014-07-02    14
Season’s tweetings: It’s Christmas Day greetings    2014-12-25    14
Put on your yarmulke, it’s time for Hanukkah        2014-12-16    14
Americans mark 100 years of Mother’s Day            2014-05-11    14
World AIDS Day to raise awareness, funds            2014-12-01    14
Day of remembrance for Pearl Harbor veterans        2014-12-07    13
Military sexual assault bill rejected by Senate     2014-03-06    13
Most diverse Congress yet opens despite snow        2015-01-06    13
ACA website surpasses 6M signups                    2014-03-27    13
Dems tweet support for ‘Not My Boss’ Business’      2014-07-14    13
Hobby Lobby case begins in SCOTUS                   2014-03-25    13
Lawmakers pass bipartisan budget deal               2013-12-18    13
Paycheck Fairness Act voted down by GOP             2014-04-09    13
Nobel Peace Prize goes to child rights activists    2014-10-10    12
Senate passes farm bill over to the White House     2014-02-04    12
Feinstein, McCain unveil CIA ‘Torture Report’       2014-12-09    12
College affordability makes Dem agenda              2014-05-05    12
Obama to announce executive order in SOTU           2014-01-28    12
‘Not My Boss’ Business Act’ shot down in Senate     2014-07-16    12
#ConstitutionDay trends on Twitter                  2013-09-17    12
As Passover arrives, nontrad seders on rise         2014-04-13    12
SCOTUS ends overall campaign donor limits           2014-04-02    12
SCOTUS rules in favor of Hobby Lobby                2014-06-30    12
Keystone pipeline bill fails 59-41 in Senate        2014-11-18    12
Obama to tackle LGBT job discrimination             2014-06-16    11
Obama promises a year of action for 2014            2014-01-28    11
Obama pushes to keep student loan rates low         2013-05-31    11
ENDA clears first hurdle, Senate votes 64-32        2013-11-07    11
#WomensEqualityDay marks suffrage anniv.            2014-08-26    11
Name: twhandle, dtype: int64
In [11]:
# On which news the comment-count differences maximized?
dgdiff.order()

Out[11]:
title                                             dt
Republicans call Obama executive action illegal   2014-11-20    -9
Tweeps wish US’ 40th prez a happy birthday        2014-02-06    -9
Britain’s Iron Lady dies of stroke at 87          2013-04-08    -8
Late filers beware: The taxman cometh             2014-04-15    -8
SCOTUS rules for prayer at public meetings        2014-05-05    -7
National Day of Prayer offers controversy, unity  2014-05-01    -6
GOP calls for ‘permanent delay’ of ObamaCare      2013-07-10    -6
WH may not cooperate with Benghazi probe          2014-05-05    -6
Repubs squelch short-term budget fix              2013-02-05    -6
Shinseki resigns from VA following apology        2014-05-30    -6
Boehner pressed for Obamacare alternative         2013-12-03    -5
Sudan detains Christian woman after release       2014-06-24    -5
214 days later: Mexican court frees Tahmooressi   2014-11-01    -5
Obama extends to 2016 ACA employer mandate        2014-02-10    -5
Obamacare head steps down from HHS                2014-04-10    -5
Vets at WWII Memorial become props in dispute     2013-10-02    -5
SCOTUS curbs Obama’s recess appointments          2014-06-26    -5
After IG report, calls for Shinseki to resign     2014-05-28    -5
A Presidential icon, now gone for a decade        2014-06-05    -5
CBO: O’care kills equivalent of 2M jobs           2014-02-04    -5
House holds Lerner in contempt of Congress        2014-05-07    -5
March for Life continues despite cold weather     2014-01-22    -5
House Republicans push Benghazi committee         2014-05-08    -4
Obamacare’s employer mandates delayed             2013-07-02    -4
GOP to form special Benghazi committee            2014-05-02    -4
Thousands gather on Washington Mall               2013-02-17    -4
Will beheadings become call to action for Obama?  2014-09-02    -4
O’care figures frustrate pro’bama fact checkers   2014-02-24    -4
IRS to investigators: Can’t find Lerner’s emails  2014-06-13    -4
..
Gun control in focus 2 years after Sandy Hook     2014-12-14     8
Senate advances bill to extend jobless aid        2014-01-07     8
SOTU puts spotlight on min. wage, gender gap      2014-01-29     8
Obama to governors: ‘sequester hurts states’      2013-02-25     9
Paycheck Fairness Act voted down by GOP           2014-04-09     9
Secret US-China climate agreement unveiled        2014-11-12     9
Senate’s unemployment bill defeated by 1 vote     2014-02-06     9
Exec. order ends federal LGBT job discrimination  2014-07-21     9
Politicos, businesses thank teachers, nurses      2014-05-06     9
Sundown ushers in the Jewish New Year             2014-09-24    10
LGBT non-discrimination act clears Senate hurdle  2013-11-04    10
Nobel Peace Prize goes to child rights activists  2014-10-10    10
World AIDS Day to raise awareness, funds          2014-12-01    10
Obama to tackle LGBT job discrimination           2014-06-16    11
Military sexual assault bill rejected by Senate   2014-03-06    11
ENDA clears first hurdle, Senate votes 64-32      2013-11-07    11
#WomensEqualityDay marks suffrage anniv.          2014-08-26    11
Renowned poet, author Maya Angelou dies           2014-05-28    12
‘Not My Boss’ Business Act’ shot down in Senate   2014-07-16    12
College affordability makes Dem agenda            2014-05-05    12
SCOTUS rules in favor of Hobby Lobby              2014-06-30    12
The Civil Rights Act of 1964 turns 50             2014-07-02    12
SCOTUS ends overall campaign donor limits         2014-04-02    12
Deportation to focus on ‘felons not families’     2014-11-20    13
Earth Day observers consider climate change       2014-04-21    13
Dems tweet support for ‘Not My Boss’ Business’    2014-07-14    13
Dems host climate-change talk-a-thon              2014-03-10    15
Senate passes 5-month UI extension                2014-04-07    17
Dems’ attempt to raise minimum wage fails         2014-04-30    18
Dems push #EqualPay bill on Equal Pay Day         2014-04-08    19
Name: twhandle, dtype: int64
In [19]:
#crate bipartite network for bpnet
G2=nx.Graph()
for politician,color in list(zip(poltweeps.index.tolist(),colors)):
for e in dgtotl.index.tolist():

In [20]:
def updateG2(group,G2,politicians):
""" Create two-mode edges """
actors = set(group.tolist()) #this can be extended, no weight on two-mode
for actor in actors:
if actor not in politicians:
continue

In [21]:
# add edges
title.apply(updateG2,G2,set(poltweeps.index))

# print number of nodes and edges
actors = events = 0
for n in G2.nodes(data=True):
if n[1]['bipartite']==0:
actors += 1
else:
events += 1
print ('actors:',actors,'\tevents:',events,'\tedges:',G2.number_of_edges())

actors: 66 	events: 7376 	edges: 4938

In [23]:
"""The Network File is text file with a binary rectangular matrix.
The number of rows for the matrix should be the same as the number of Actors(A),
and the number of columns is the number of Actors(P)."""
actors = [n[0] for n in G2.nodes(data=True) if n[1]['bipartite']==0]
events = [n[0] for n in G2.nodes(data=True) if n[1]['bipartite']==1]

M = np.matrix(np.zeros((len(events),len(actors))))
for i,event in enumerate(events):
for j,actor in enumerate(actors):
if G2.has_edge(actor, event):
M[i,j]=1
np.savetxt("data/congress_2mode.txt", M, fmt='%d')

In [ ]:
#ERGM analysis:
"""
b2nodematch is a homophily based two-star statistic. This term adds one statistic to the model unless diff is set to TRUE, in which case the term adds multiple network statistics to the model, one for each of (a subset of) the unique values of the attrname attribute.
"""
R code (ERGM on the bipartite, two-mode network):
#two mode
commentaries <- as.data.frame(t(commentaries))
two_mode <-network(commentaries, vertex.attr=parties,  matrix.type='bipartite',
directed=F, hyper=F, loops=F, multiple=F, bipartite=66)
set.vertex.attribute(two_mode, 'party', NA, v=seq_len(network.size(two_mode)-66)+66)
two_mode.diff<-ergm(two_mode~edges+b1nodematch("party",diff=T))

In [ ]:
Monte Carlo MLE Results:
Estimate Std. Error MCMC % p-value
edges               -4.921374   0.016658      2  <1e-04 ***
b1nodematch.party.D  0.520067   0.003541      2  <1e-04 ***
b1nodematch.party.R  0.261082   0.017989      2  <1e-04 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Null Deviance: 674870  on 486816  degrees of freedom
Residual Deviance:  52601  on 486813  degrees of freedom

AIC: 52607    BIC: 52640    (Smaller is better.)
============================ (another run results)
Monte Carlo MLE Results:
Estimate Std. Error MCMC % p-value
edges               -4.950125   0.023855      1  <1e-04 ***
b1nodematch.party.D  0.377780   0.023558      2  <1e-04 ***
b1nodematch.party.R  0.628042   0.003964      2  <1e-04 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Null Deviance: 674870  on 486816  degrees of freedom
Residual Deviance:  52144  on 486813  degrees of freedom

AIC: 52150    BIC: 52183    (Smaller is better.)
============================ (another run results)
Monte Carlo MLE Results:
Estimate Std. Error MCMC % p-value
edges               -4.920042   0.019739      1  <1e-04 ***
b2nodematch.party.D  0.358749   0.034140      1  <1e-04 ***
b2nodematch.party.R  0.601879   0.002838      3  <1e-04 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Null Deviance: 674870  on 486816  degrees of freedom
Residual Deviance:  52254  on 486813  degrees of freedom

AIC: 52260    BIC: 52293    (Smaller is better.)
============================ (another run results)
Monte Carlo MLE Results:
Estimate Std. Error MCMC % p-value
edges               -4.949630   0.028055      1  <1e-04 ***
b2nodematch.party.D  0.530463   0.003897      1  <1e-04 ***
b2nodematch.party.R  0.273482   0.020214      2  <1e-04 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Null Deviance: 674870  on 486816  degrees of freedom
Residual Deviance:  52477  on 486813  degrees of freedom

AIC: 52483    BIC: 52516    (Smaller is better.)

In [ ]:
#when party types are not differentiated
two_mode_b<-ergm(two_mode~edges+b1nodematch("party"))
summary(two_mode_b)

==========================
Summary of model fit
==========================

Formula:   two_mode ~ edges + b1nodematch("party")

Iterations:  20 out of 20

Monte Carlo MLE Results:
Estimate Std. Error MCMC % p-value
edges             -4.82148    0.02591      1  <1e-04 ***
b1nodematch.party  0.31572    0.02996      1  <1e-04 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Null Deviance: 674870  on 486816  degrees of freedom
Residual Deviance:  52736  on 486814  degrees of freedom

AIC: 52740    BIC: 52762    (Smaller is better.)
============================ (another run results)
Monte Carlo MLE Results:
Estimate Std. Error MCMC % p-value
edges             -4.86517    0.01552      2  <1e-04 ***
b2nodematch.party  0.34602    0.01707      2  <1e-04 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Null Deviance: 674870  on 486816  degrees of freedom
Residual Deviance:  52590  on 486814  degrees of freedom

AIC: 52594    BIC: 52616    (Smaller is better.)
============================ (another run results)
Monte Carlo MLE Results:
Estimate Std. Error MCMC % p-value
edges             -4.77669    0.02745      0  <1e-04 ***
b2nodematch.party  0.23968    0.02059      1  <1e-04 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Null Deviance: 674870  on 486816  degrees of freedom
Residual Deviance:  53149  on 486814  degrees of freedom

AIC: 53153    BIC: 53175    (Smaller is better.)
============================ (another run results)
Monte Carlo MLE Results:
Estimate Std. Error MCMC % p-value
edges             -4.826963   0.018368      1  <1e-04 ***
b1nodematch.party  0.247133   0.007967      4  <1e-04 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Null Deviance: 674870  on 486816  degrees of freedom
Residual Deviance:  53158  on 486814  degrees of freedom

AIC: 53162    BIC: 53184    (Smaller is better.)

In [12]:
# let's create the actor network
# vertices <- commentators
# edges (weighted) <- number of news commentated by vertices incident to the edge

G=nx.Graph()
for politician,color in list(zip(poltweeps.index.tolist(),colors)):
# print(G.nodes(data=True))

In [13]:
def updateG(group,G,politicians):
""" Create weighted edges """
edges = itertools.combinations(group.tolist(), 2)
for v1,v2 in edges:
if v1 not in politicians or v2 not in politicians:
continue
if G.has_edge(v1, v2):
G[v1][v2]['weight'] += 1
else:

In [16]:
# add edges
title.apply(updateG,G,set(poltweeps.index))
# print number of nodes and edges
print (G.number_of_nodes(),G.number_of_edges())

66 1863

In [17]:
# Exporting to be read by Gephi for better visualization
# nx.write_gml(G,"data/theplazz_politics.gml")

# export for R-ergm
A = nx.to_numpy_matrix(G, weight='weight')
np.savetxt("data/congress_actors_weighted.txt", A, fmt='%d')

In [15]:
# network file for pnet
A = nx.to_numpy_matrix(G, weight=None)
np.savetxt("data/congress_actor.txt", A, fmt='%d')

In [22]:
#attribute file for pnet
party = [str(2) if n[1]['color']=='red' else str(1) for n in G.nodes(data=True)]
party.insert(0,'party')
with open('data/congress_attribute.txt','w') as w:
w.write('\n'.join(party))

In [28]:
#attribute file for R-ergm
party = ['R' if n[1]['color']=='red' else 'D' for n in G.nodes(data=True)]
party.insert(0,'party')
with open('data/congress_attributes.txt','w') as w:
w.write('\n'.join(party))

In [23]:
# 62 of the 65 monitored Congress members are found to be in the same group as their co-party members
# 3 congresspeople not in the same group as their co-party members are circled
from IPython.display import Image