import itertools as itl import numpy as np k = 10 def sample_after(stream, k): """ Return a random sample ok k elements drawn without replacement from stream. This function is designed to be used when the elements of stream cannot fit into memory. """ r = np.array(list(itl.islice(stream, k))) for t, x in enumerate(stream, k + 1): i = np.random.randint(1, t + 1) if i <= k: r[i - 1] = x return r sample_after(xrange(1000000000), 10)