from IPython.display import HTML
lic = """
"Replication of results for The socio-cultural sources of urban buzz" by Daniel Arribas-Bel is licensed under a Creative Commons Attribution 4.0 International License."""
HTML(lic)
import copy
import pysal as ps
import numpy as np
import pandas as pd
import statsmodels.api as sm
from pyGDsandbox.dataIO import dbf2df, df2dbf
db = pd.read_csv('buzz_data.csv', index_col=0)
url = 'https://raw.github.com/darribas/buzz_adam/master/buzz_data.csv'
db = pd.read_csv(url, index_col=0)
db
vars = ['4sq-pct_Arts & Entertainment', \
'4sq-pct_College & University', \
'4sq-pct_Food', \
'4sq-pct_Other', \
'4sq-pct_Outdoors & Recreation', \
'4sq-pct_Professional & Other Places', \
'4sq-pct_Travel & Transport', \
'industrie_pct_area', \
'kantoor_pct_area', \
'sport_pct_area', \
'winkel_pct_area', \
'4sq_total_venues', \
'total_units', \
'div_i', \
'checkins_all']
np.round(db[vars].describe().T[['mean', 'std', 'min', '25%', '50%', '75%', 'max']], 3)
from pysal.contrib.viz import mapping as maps
shp_link = 'adam_buzz_buurts.shp'
_ = maps.plot_choropleth(shp_link, db['checkins_all'].values, 'quantiles', \
k=9, cmap='YlGn', figsize=(12, 12), \
title='Checkin volume')
name_y = 'checkins_all'
x = db[vars[:-1]]
x['constant'] = 1
model = sm.OLS(np.log(db[name_y]), x).fit()
print model.summary()
wo = ps.queen_from_shapefile('adam_buzz_buurts.shp')
neigh = copy.copy(wo.neighbors)
weigh = copy.copy(wo.weights)
neigh[27].extend([26, 28])
weigh[27].extend([1., 1.])
w = ps.W(neigh, weigh)
ps.open('adam_buzz_buurts_queenPS.gal', 'w').write(w)
w.transform = 'r'
mmodel = ps.spreg.OLS(np.log(db[[name_y]].values),
db[vars[:-1]].values, w=w, spat_diag=True, nonspat_diag=False)
from pysal.spreg.summary_output import summary_spat_diag_ols
print '\n\t\tSPATIAL DIAGNOSTICS'
print '\t\t-------------------\n'
print summary_spat_diag_ols(mmodel, False)
%load_ext rmagic
%R library(spdep)
%%R
db.link = 'adam_vars_richdiv.csv'
db = read.csv(db.link, sep=';')
landuses = names(db)[grep('_pct_area', names(db))]
landuses = c('industrie_pct_area', 'kantoor_pct_area', 'sport_pct_area',
'winkel_pct_area')
sq4 = names(db)[grep('X4sq.pct', names(db))][1:7]
other = c()
name.X = c(sq4, landuses,
c(
'X4sq_total_venues',
'total_units',
'div_i'
)
)
x = db[name.X]
########### Rescaling ##############
x['total_units'] <- x['total_units'] / 1000
x['div_i'] <- x['div_i'] * 10
####################################
y = 'checkins_all'
y = log(db[, y])
wnb <- read.gal('adam_buzz_buurts_queenPS.gal',
override.id=T)
w <- nb2listw(wnb, style="W")
model <- lagsarlm(y ~ ., data=x, w)
print(summary(model))
%%R
cross.walk <- list("X..WeekDay....Morning.." = "Morning-WeekDay",
"X..WeekDay....Afternoon.." = "Afternoon-WeekDay",
"X..WeekDay....Evening.." = "Evening-WeekDay",
"X..WeekDay....Night.." = "Night-WeekDay",
"X..WeekEnd....Morning.." = "Morning-WeekEnd",
"X..WeekEnd....Afternoon.." = "Afternoon-WeekEnd",
"X..WeekEnd....Evening.." = "Evening-WeekEnd",
"X..WeekEnd....Night.." = "Night-WeekEnd"
)
db.link = 'buzz_data.csv'
db = read.csv(db.link)
landuses = names(db)[grep('_pct_area', names(db))]
landuses = c('industrie_pct_area', 'kantoor_pct_area', 'sport_pct_area',
'winkel_pct_area')
sq4 = names(db)[grep('X4sq.pct', names(db))][1:7]
other = c()
name.X = c(sq4, landuses,
c(
'X4sq_total_venues',
'total_units',
'div_i'
)
)
########### Rescaling ##############
db['total_units'] <- db['total_units'] / 1000
db['div_i'] <- db['div_i'] * 10
####################################
wnb <- read.gal('adam_buzz_buurts_queenPS.gal',
override.id=T)
w <- nb2listw(wnb, style="W")
week.times <- names(db)[grep('Week', names(db))]
res <- data.frame()
divs <- data.frame()
vari <- 15 # Position for Div_i
print('Running regressions')
sink('lag_models_summaries.txt')
for(wtime in week.times){
print(paste('Model for week time:', wtime))
y = db[, wtime]
y[y == 0] = 0.000001
y = log(y)
x = db[name.X]
model <- lagsarlm(y ~ ., data=x, w)
coefs <- as.data.frame(model$coefficients)
colnames(coefs) <- cross.walk[wtime]
res <- c(res, coefs)
print(summary(model))
div <- data.frame(beta=model$coefficients[vari], se=model$rest.se[vari],
lower=model$coefficients[vari] - model$rest.se[vari]*1.96,
upper=model$coefficients[vari] + model$rest.se[vari]*1.96
)
rownames(div) <- cross.walk[wtime]
divs <- rbind(div, divs)
}
sink()
res <- as.data.frame(res)
time.line <- c("Morning-WeekDay", "Afternoon-WeekDay", "Evening-WeekDay", "Night-WeekDay",
"Morning-WeekEnd", "Afternoon-WeekEnd", "Evening-WeekEnd", "Night-WeekEnd")
divs <- divs[time.line, ]
print(divs)
%%R
par(mar = c(7, 4, 4, 2) + 0.1, bty='n')
ylab <- 'Div. coefficients'
plot(seq(8), xaxt = "n", divs$beta, type='l', xlab='', ylab=ylab,
ylim=c(min(divs$lower), max(divs$upper)))
#lines(seq(8), xaxt = "n", divs$lower, col='green', lw=0.5, type='l', xlab='', ylab='Div_i Coef.')
#lines(seq(8), xaxt = "n", divs$upper, col='green', lw=0.5, type='l', xlab='', ylab='Div_i Coef.')
xx <- c(seq(8), rev(seq(8)))
yy <- c(divs$upper, rev(divs$lower))
polygon(xx, yy, col = 'darkgray', border = NA)
lines(seq(8), xaxt = "n", divs$beta, type='l', xlab='', ylab='Div. Coef.',
ylim=c(min(divs$lower), max(divs$upper)))
abline(h=0, col='red', lw=2)
abline(v=4.5, col='black', lw=0.5)
axis(1, labels = FALSE)
text(1:8, par("usr")[3] - 0.075, srt = 45, adj = 1,
labels = time.line, xpd = TRUE)
text(2.25, 1.5, labels='Week days')
text(6.75, 1.5, labels='Weekend')
title(main=paste('Effect of diversity on buzz\n(spatial lag models)'))