import matplotlib.pyplot as plt
import numpy as np
import tempfile
import uuid
import os
%matplotlib notebook
x = np.linspace(0, 10, 10000)
y = np.sin(x) + np.random.random(x.shape)*.1
fig, ax = plt.subplots()
ax.plot(x, y)
ax.set_title('data before going in to filestore')
plt.show()
data_dir = tempfile.gettempdir()
# generate a totally random name for the file to ensure that there are no
# collisions
x_datapath = os.path.join(data_dir, str(uuid.uuid4()) + '.npy')
y_datapath = os.path.join(data_dir, str(uuid.uuid4()) + '.npy')
np.save(x_datapath, x)
np.save(y_datapath, y)
print('x data path = %s' % x_datapath)
print('y data path = %s' % y_datapath)
x_uid = str(uuid.uuid4())
y_uid = str(uuid.uuid4())
x data path = /tmp/66b3b303-3c5f-41e4-9e9d-c324635ed37a.npy y data path = /tmp/4ae119f2-799f-4032-8eb8-3fe9bf036067.npy
import filestore.api as fsapi
from filestore.handlers import NpyHandler
# spec is the identifier that will be used later to link
spec = 'npy'
# insert the records into filestore for the x data set
resource_document = fsapi.insert_resource(spec, x_datapath)
datum_document = fsapi.insert_datum(resource_document, x_uid)
# insert the records into filestore for the y data set
resource_document = fsapi.insert_resource(spec, y_datapath)
datum_document = fsapi.insert_datum(resource_document, y_uid)
/home/edill/miniconda/envs/nikea3/lib/python3.4/importlib/_bootstrap.py:1161: UserWarning: Module bson was already imported from /home/edill/miniconda/envs/nikea3/lib/python3.4/site-packages/bson/__init__.py, but /home/edill/miniconda/envs/nikea3/lib/python3.4/site-packages/mongoengine-0.8.7-py3.4.egg is being added to sys.path spec.loader.load_module(spec.name)
# Here is where the payoff happens for using this framework
# Because all you have to keep track of is the uid for the
# datum document and not the filepath. Client code can
# then keep track of this uid and use the retrieve api to
# get the contents of the file back in ram
# Make sure that the 'npy' spec has a registered handler
fsapi.register_handler('npy', NpyHandler)
# use the retrieve function to get the data back in ram
x_from_filestore = fsapi.retrieve(x_uid)
y_from_filestore = fsapi.retrieve(y_uid)
fig, ax = plt.subplots()
ax.plot(x_from_filestore, y_from_filestore)
ax.set_title('data after going in to filestore')
plt.show()
print('difference between x before and after it went into filestore = '
'{}'.format(np.sum(x - x_from_filestore)))
print('difference between y before and after it went into filestore = '
'{}'.format(np.sum(y - y_from_filestore)))
plt.show()
difference between x before and after it went into filestore = 0.0 difference between y before and after it went into filestore = 0.0