from __future__ import division import json, sys from datetime import datetime import numpy as np import pandas as pd %matplotlib inline import matplotlib.pyplot as plt import matplotlib matplotlib.rcParams["figure.figsize"] = (15, 7) %%sh cd /opt/bitcoin/git git rev-parse HEAD %%sh for x in `seq 1 25` do bitcoin-cli estimatefee $x done def get_estimates(n = 25): ret = [datetime.utcnow()] bc = !bitcoin-cli getblockcount ret.append(int(bc[0])) mempool = !bitcoin-cli getrawmempool mempool_size = len(json.loads("".join(mempool))) ret.append(mempool_size) # estimate fee for x in range(1, n + 1): fee = !bitcoin-cli estimatefee $x ret.append(float(fee[0])) # estimate priority for x in range(1, n + 1): fee = !bitcoin-cli estimatepriority $x ret.append(float(fee[0])) return ret print get_estimates() from IPython.display import clear_output import threading raw_data = [] def gather_data(MAX = 5 * 60 * 2): clear_output() if len(raw_data) >= MAX or gather_data.stop: print("gather_data finished") sys.stdout.flush() return threading.Timer(gather_data.delay, gather_data, args=(MAX,)).start() raw_data.append(get_estimates()) rem = (MAX - len(raw_data)) * gather_data.delay / 60. print "collected %d of %d (ETA: %.2f [min])" % (len(raw_data), MAX, rem) print "last row: %s" % raw_data[-1] sys.stdout.flush() gather_data.delay = 30 # [secs] gather_data.stop = False gather_data.stop = False raw_data = [] gather_data() gather_data.stop = True from itertools import repeat cols = zip(repeat("info"), ["time", "blockcount", "mempool_size"]) N = (len(raw_data[0]) - len(cols))//2 + 1 cols.extend(zip(repeat("estimate_fee"), range(1,N))) cols.extend(zip(repeat("estimate_priority"), range(1,N))) cols = pd.MultiIndex.from_tuples(cols) data = pd.DataFrame(raw_data, columns = cols) # index on the left, time series like data = data.set_index(("info", "time")) # rename the index data.index.name = "time" # -1 is the marker for missing data data.estimate_fee = data.estimate_fee.replace(-1, np.nan) data.estimate_priority = data.estimate_priority.replace(-1, np.nan) data timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M") data.to_hdf("float_fee_%s.hdf5" % timestamp, "txfee", mode="w", format="table", complevel=5, complib="blosc") !ls -1 *.hdf5 data = pd.read_hdf("float_fee_20140715_2056.hdf5", "txfee") bc = data["info", "blockcount"] diffs = bc.diff() # just an offset by one to make it look better diffs = diffs.shift(-1) # get the timestamps where new blocks have been found new_block_times = bc[diffs > 0].index ef = data.estimate_fee ymax = ef.values.max()*1.1 ax = ef.plot(legend=False, colormap="copper", lw=2, ylim=(0, ymax)) ax.vlines(new_block_times, 0, ymax, color="green") mps = data["info", "mempool_size"] ax = mps.plot(ylim=0) ax.vlines(new_block_times, 0, mps.values.max()*1.1, color="green") _ = plt.ylabel("Mempool size") fig = plt.figure() for line in data.estimate_fee.iterrows(): plt.plot(line[1:][0], color="k", alpha=.01) q = [0., .1, .25, .5, .75, .9, 1.] quantiles = data.estimate_fee.quantile(q).T print quantiles fig = plt.figure() ax = fig.add_subplot(1,1,1) qax = ax.matshow(quantiles.as_matrix().T, interpolation='nearest') fig.colorbar(qax, shrink=.5) ax.set_yticklabels([''] + q) ax.set_xticklabels([''] + list(quantiles.index.tolist())) quantiles.plot(colormap="brg") _ = plt.title("Quantiles at %s" % q) ep = data.estimate_priority ymin, ymax = ep.values.min()*.5, ep.values.max()*2 ax = ep.plot(legend=False, colormap="copper", ylim=(ymin, ymax), lw=2, logy=True) ax.vlines(new_block_times, ymin, ymax, color="green") _ = plt.ylabel("Priority") _ = plt.title("Priority, logarithmic")