from pymongo import MongoClient mg_client = MongoClient() mg_db = mg_client.ocn_player_count data = mg_db.server_stats # This is what one entry looks like in mongodb: data.find_one() import psycopg2 # connect to postgres db pg_conn = psycopg2.connect("dbname='ocnserverstats' user='ben'") cur = pg_conn.cursor() # make table for data cur.execute("CREATE TABLE playercounts (id serial PRIMARY KEY, region varchar, section varchar, server varchar, timestamp timestamp, count int);") # Now we need to loop over all of the entries and convert them into SQL rows for d in data.find(): timestamp = d['time'] for key, counts in d['stats'].iteritems(): region = key[:2] # 'us' or 'eu' section = key[3:] # 'project-ares', 'blitz', etc. for server, count in counts.iteritems(): # insert into db cur.execute("INSERT INTO playercounts (region, section, server, timestamp, count) VALUES (%s, %s, %s, %s, %s)", (region, section, server, timestamp, count)) pg_conn.commit() mg_client.close() # and here's how the data looks in postgres print "playercount table:" cur.execute("""SELECT * from playercounts""") for row in cur.fetchmany(5): print row print "...\n\nNumber of entries in table: %s" % cur.rowcount pg_conn.close() # now onto the analysis! import pandas as pd import matplotlib as mpl # for displaying inline graphs in notebook %matplotlib inline def plot_counts(servers=('alpha',), region='us', errorbars=True): """ Get data from postgres and make some simple plots! """ plt = mpl.pyplot mpl.rcParams['savefig.dpi'] = 110 # make the graph display nicer # get data pg_conn = psycopg2.connect("dbname='ocnserverstats' user='ben'") cur = pg_conn.cursor() cur.execute("""SELECT timestamp, count, server from playercounts WHERE region=(%s) AND server IN %s""", (region, servers,)) d = cur.fetchall() pg_conn.close() df = pd.DataFrame(d, columns=['timestamp', 'count', 'server']) # replace datetime field with hour of the day df['hour'] = df['timestamp'].apply(lambda x: x.hour + x.minute / 60.0) df = df.drop('timestamp', 1) server_group = df.groupby('server') fig = plt.figure() ax = fig.add_subplot(111) ax.set_xlabel('hour of the day (EDT)') ax.set_ylabel('average player count') ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(2.0)) ax.set_xlim(0,23) ax.yaxis.grid(b=True, which='major', color='black', alpha='0.1') plot_filename = [] for sg in server_group: name = region + '-' + sg[0] plot_filename.append(name) gp = sg[1].groupby('hour') means = gp.mean() # plot line y = means['count'].tolist() x = means.index.tolist() if errorbars: errors = gp.std() # one standard deviation error bar yerr = errors['count'].tolist() ax.errorbar(x,y,yerr=yerr ,ls='None') ax.plot(x,y, label=name) ax.legend(loc='upper left') plot_filename = '--'.join(plot_filename) + '.png' fig.savefig(plot_filename, dpi=120) # make some plots! plot_counts(servers=('alpha','beta')) plot_counts(servers=('alpha','mini01')) plot_counts(servers=('capture1','capture2','destroy')) plot_counts(servers=('capture','destroy'), region='eu') # the error bars are helpful, but they crowd the graph when you have multiple series plot_counts(servers=('capture1','capture2','destroy'),errorbars=False) plot_counts(servers=('mini01','mini02','mini03','mini04','mini05'), region='eu',errorbars=False)