from __future__ import division
import json, sys
from datetime import datetime
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams["figure.figsize"] = (15, 7)

%%sh
cd /opt/bitcoin/git
git rev-parse HEAD

%%sh
for x in `seq 1 25`
do
    bitcoin-cli estimatefee $x
done

def get_estimates(n = 25):
    ret = [datetime.utcnow()]
    
    bc = !bitcoin-cli getblockcount
    ret.append(int(bc[0]))
    
    mempool = !bitcoin-cli getrawmempool
    mempool_size = len(json.loads("".join(mempool)))
    ret.append(mempool_size)
    # estimate fee
    for x in range(1, n + 1):
        fee = !bitcoin-cli estimatefee $x
        ret.append(float(fee[0]))
    # estimate priority
    for x in range(1, n + 1):
        fee = !bitcoin-cli estimatepriority $x
        ret.append(float(fee[0]))
    return ret

print get_estimates()

from IPython.display import clear_output
import threading
raw_data = []
def gather_data(MAX = 5 * 60 * 2):
    clear_output()
    if len(raw_data) >= MAX or gather_data.stop:
        print("gather_data finished")
        sys.stdout.flush()
        return
    threading.Timer(gather_data.delay, gather_data, args=(MAX,)).start()
    raw_data.append(get_estimates())
    rem = (MAX - len(raw_data)) * gather_data.delay / 60.
    print "collected %d of %d (ETA: %.2f [min])" % (len(raw_data), MAX, rem)
    print "last row: %s" % raw_data[-1]
    sys.stdout.flush()

gather_data.delay = 30 # [secs]
gather_data.stop = False

gather_data.stop = False
raw_data = []
gather_data()

gather_data.stop = True

from itertools import repeat
cols = zip(repeat("info"), ["time", "blockcount", "mempool_size"])
N = (len(raw_data[0]) - len(cols))//2 + 1
cols.extend(zip(repeat("estimate_fee"), range(1,N)))
cols.extend(zip(repeat("estimate_priority"), range(1,N)))
cols = pd.MultiIndex.from_tuples(cols)

data = pd.DataFrame(raw_data, columns = cols)
# index on the left, time series like
data = data.set_index(("info", "time"))
# rename the index
data.index.name = "time"
# -1 is the marker for missing data
data.estimate_fee = data.estimate_fee.replace(-1, np.nan)
data.estimate_priority = data.estimate_priority.replace(-1, np.nan)
data

timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M")
data.to_hdf("float_fee_%s.hdf5" % timestamp,
            "txfee",
            mode="w",
            format="table",
            complevel=5, complib="blosc")

!ls -1 *.hdf5

data = pd.read_hdf("float_fee_20140715_2056.hdf5", "txfee")

bc = data["info", "blockcount"]
diffs = bc.diff()
# just an offset by one to make it look better
diffs = diffs.shift(-1)
# get the timestamps where new blocks have been found
new_block_times = bc[diffs > 0].index

ef = data.estimate_fee
ymax = ef.values.max()*1.1
ax = ef.plot(legend=False, colormap="copper", lw=2, ylim=(0, ymax))
ax.vlines(new_block_times, 0, ymax, color="green")

mps = data["info", "mempool_size"]
ax = mps.plot(ylim=0)
ax.vlines(new_block_times, 0, mps.values.max()*1.1, color="green")
_ = plt.ylabel("Mempool size")

fig = plt.figure()
for line in data.estimate_fee.iterrows():
    plt.plot(line[1:][0], color="k", alpha=.01)

q = [0., .1, .25, .5, .75, .9, 1.]
quantiles = data.estimate_fee.quantile(q).T
print quantiles

fig = plt.figure()
ax = fig.add_subplot(1,1,1)
qax = ax.matshow(quantiles.as_matrix().T, interpolation='nearest')
fig.colorbar(qax, shrink=.5)
ax.set_yticklabels([''] + q)
ax.set_xticklabels([''] + list(quantiles.index.tolist()))

quantiles.plot(colormap="brg")
_ = plt.title("Quantiles at %s" % q)

ep = data.estimate_priority
ymin, ymax = ep.values.min()*.5, ep.values.max()*2
ax = ep.plot(legend=False, colormap="copper", ylim=(ymin, ymax), lw=2, logy=True)
ax.vlines(new_block_times, ymin, ymax, color="green")
_ = plt.ylabel("Priority")
_ = plt.title("Priority, logarithmic")