from IPython.display import HTML lic = """ Creative Commons License
"Replication of results for The socio-cultural sources of urban buzz" by Daniel Arribas-Bel is licensed under a Creative Commons Attribution 4.0 International License.""" HTML(lic) import copy import pysal as ps import numpy as np import pandas as pd import statsmodels.api as sm from pyGDsandbox.dataIO import dbf2df, df2dbf db = pd.read_csv('buzz_data.csv', index_col=0) url = 'https://raw.github.com/darribas/buzz_adam/master/buzz_data.csv' db = pd.read_csv(url, index_col=0) db vars = ['4sq-pct_Arts & Entertainment', \ '4sq-pct_College & University', \ '4sq-pct_Food', \ '4sq-pct_Other', \ '4sq-pct_Outdoors & Recreation', \ '4sq-pct_Professional & Other Places', \ '4sq-pct_Travel & Transport', \ 'industrie_pct_area', \ 'kantoor_pct_area', \ 'sport_pct_area', \ 'winkel_pct_area', \ '4sq_total_venues', \ 'total_units', \ 'div_i', \ 'checkins_all'] np.round(db[vars].describe().T[['mean', 'std', 'min', '25%', '50%', '75%', 'max']], 3) from pysal.contrib.viz import mapping as maps shp_link = 'adam_buzz_buurts.shp' _ = maps.plot_choropleth(shp_link, db['checkins_all'].values, 'quantiles', \ k=9, cmap='YlGn', figsize=(12, 12), \ title='Checkin volume') name_y = 'checkins_all' x = db[vars[:-1]] x['constant'] = 1 model = sm.OLS(np.log(db[name_y]), x).fit() print model.summary() wo = ps.queen_from_shapefile('adam_buzz_buurts.shp') neigh = copy.copy(wo.neighbors) weigh = copy.copy(wo.weights) neigh[27].extend([26, 28]) weigh[27].extend([1., 1.]) w = ps.W(neigh, weigh) ps.open('adam_buzz_buurts_queenPS.gal', 'w').write(w) w.transform = 'r' mmodel = ps.spreg.OLS(np.log(db[[name_y]].values), db[vars[:-1]].values, w=w, spat_diag=True, nonspat_diag=False) from pysal.spreg.summary_output import summary_spat_diag_ols print '\n\t\tSPATIAL DIAGNOSTICS' print '\t\t-------------------\n' print summary_spat_diag_ols(mmodel, False) %load_ext rmagic %R library(spdep) %%R db.link = 'adam_vars_richdiv.csv' db = read.csv(db.link, sep=';') landuses = names(db)[grep('_pct_area', names(db))] landuses = c('industrie_pct_area', 'kantoor_pct_area', 'sport_pct_area', 'winkel_pct_area') sq4 = names(db)[grep('X4sq.pct', names(db))][1:7] other = c() name.X = c(sq4, landuses, c( 'X4sq_total_venues', 'total_units', 'div_i' ) ) x = db[name.X] ########### Rescaling ############## x['total_units'] <- x['total_units'] / 1000 x['div_i'] <- x['div_i'] * 10 #################################### y = 'checkins_all' y = log(db[, y]) wnb <- read.gal('adam_buzz_buurts_queenPS.gal', override.id=T) w <- nb2listw(wnb, style="W") model <- lagsarlm(y ~ ., data=x, w) print(summary(model)) %%R cross.walk <- list("X..WeekDay....Morning.." = "Morning-WeekDay", "X..WeekDay....Afternoon.." = "Afternoon-WeekDay", "X..WeekDay....Evening.." = "Evening-WeekDay", "X..WeekDay....Night.." = "Night-WeekDay", "X..WeekEnd....Morning.." = "Morning-WeekEnd", "X..WeekEnd....Afternoon.." = "Afternoon-WeekEnd", "X..WeekEnd....Evening.." = "Evening-WeekEnd", "X..WeekEnd....Night.." = "Night-WeekEnd" ) db.link = 'buzz_data.csv' db = read.csv(db.link) landuses = names(db)[grep('_pct_area', names(db))] landuses = c('industrie_pct_area', 'kantoor_pct_area', 'sport_pct_area', 'winkel_pct_area') sq4 = names(db)[grep('X4sq.pct', names(db))][1:7] other = c() name.X = c(sq4, landuses, c( 'X4sq_total_venues', 'total_units', 'div_i' ) ) ########### Rescaling ############## db['total_units'] <- db['total_units'] / 1000 db['div_i'] <- db['div_i'] * 10 #################################### wnb <- read.gal('adam_buzz_buurts_queenPS.gal', override.id=T) w <- nb2listw(wnb, style="W") week.times <- names(db)[grep('Week', names(db))] res <- data.frame() divs <- data.frame() vari <- 15 # Position for Div_i print('Running regressions') sink('lag_models_summaries.txt') for(wtime in week.times){ print(paste('Model for week time:', wtime)) y = db[, wtime] y[y == 0] = 0.000001 y = log(y) x = db[name.X] model <- lagsarlm(y ~ ., data=x, w) coefs <- as.data.frame(model$coefficients) colnames(coefs) <- cross.walk[wtime] res <- c(res, coefs) print(summary(model)) div <- data.frame(beta=model$coefficients[vari], se=model$rest.se[vari], lower=model$coefficients[vari] - model$rest.se[vari]*1.96, upper=model$coefficients[vari] + model$rest.se[vari]*1.96 ) rownames(div) <- cross.walk[wtime] divs <- rbind(div, divs) } sink() res <- as.data.frame(res) time.line <- c("Morning-WeekDay", "Afternoon-WeekDay", "Evening-WeekDay", "Night-WeekDay", "Morning-WeekEnd", "Afternoon-WeekEnd", "Evening-WeekEnd", "Night-WeekEnd") divs <- divs[time.line, ] print(divs) %%R par(mar = c(7, 4, 4, 2) + 0.1, bty='n') ylab <- 'Div. coefficients' plot(seq(8), xaxt = "n", divs$beta, type='l', xlab='', ylab=ylab, ylim=c(min(divs$lower), max(divs$upper))) #lines(seq(8), xaxt = "n", divs$lower, col='green', lw=0.5, type='l', xlab='', ylab='Div_i Coef.') #lines(seq(8), xaxt = "n", divs$upper, col='green', lw=0.5, type='l', xlab='', ylab='Div_i Coef.') xx <- c(seq(8), rev(seq(8))) yy <- c(divs$upper, rev(divs$lower)) polygon(xx, yy, col = 'darkgray', border = NA) lines(seq(8), xaxt = "n", divs$beta, type='l', xlab='', ylab='Div. Coef.', ylim=c(min(divs$lower), max(divs$upper))) abline(h=0, col='red', lw=2) abline(v=4.5, col='black', lw=0.5) axis(1, labels = FALSE) text(1:8, par("usr")[3] - 0.075, srt = 45, adj = 1, labels = time.line, xpd = TRUE) text(2.25, 1.5, labels='Week days') text(6.75, 1.5, labels='Weekend') title(main=paste('Effect of diversity on buzz\n(spatial lag models)'))