%matplotlib inline
import datetime
import matplotlib.pyplot as plt
def plot_hundred_microseconds(year, ax=None, num_ticks=5, micros=20):
"""Plot a window of a hundred microseconds around January 1 midnight of year"""
xs = [datetime.datetime(year, 1, 1, 0, 0, 0, i) for i in range(0, num_ticks * micros, micros)]
ys = [i % 2 for i in range(num_ticks)]
if ax is None:
ax = plt.gca()
ax.plot(xs, ys)
ax.figure.autofmt_xdate()
return ax
64-bit floating point python datetime objects have less microsecond precision the farther the date is from 0 AD (see for example PEP 410):
plot_hundred_microseconds(2014, micros=20)
<matplotlib.axes._subplots.AxesSubplot at 0x2c408d0>
For better microsecond (but not nanosecond) precision without having to write custom datetime classes and matplotlib handlers on our own, we could just introduce the hack of shifting our year closer to 0 AD. (The tick formatters show %f due to this issue)
plot_hundred_microseconds(14, micros=20)
<matplotlib.axes._subplots.AxesSubplot at 0x7f9548328bd0>
If we import pandas, then microsecond-level resolution will not be displayed at all, since pandas attempts to overload the default date locator of the x-axis with one only capable of millisecond resolution. (And we aren't even using pandas featuers!) This is partially because the change introducing the pandas millisecond locator occurred before the change introducing the matplotlib microsecond locator, and partially because of the strange decision by pandas to quietly register its own default unit converters. These unit converters set the formatter/locators to the millisecond-resolution ones from pandas.
import pandas
ax = plot_hundred_microseconds(2014, micros=20)
ax.xaxis.get_major_locator()
<pandas.tseries.converter.PandasAutoDateLocator at 0x383f590>
So even in a larger time range (tens of milliseconds), a limited number of ticks will be displayed, and they will be formatted with floating point rounding errors:
plot_hundred_microseconds(2014, micros=20000)
<matplotlib.axes._subplots.AxesSubplot at 0x3821e50>
To get around this, we will set our own custom microsecond formatter class which (1) takes the default matplotlib automatic date formatter (2) rounds the microseconds to the nearest thousand microseconds, assuming that the current plot is zoomed out far enough (say > 1ms viewing range).
from dateutil.relativedelta import relativedelta
import six
import matplotlib.dates as dates
def format_microseconds(dt, pos=None, locator=None):
"""String formatting at microseconds resolution.
Override first tick to display abbreviated date.
Round microseconds to the nearest thousands to account for
datetime 64-bit floating point precision issues, assuming
that current axis is zoomed out sufficiently far (i.e.
showing more than 1000 microseconds time range).
"""
microseconds_epsilon = 15
should_round_microseconds = False
if locator is not None:
dmin, dmax = locator.viewlim_to_dt()
if dmin > dmax:
dmax, dmin = dmin, dmax
delta = relativedelta(dmax, dmin)
if any([delta.years, delta.months, delta.days,
delta.hours, delta.minutes, delta.seconds]):
# Date range is at least in seconds
num_micros = 1e6
else:
num_micros = delta.microseconds
if num_micros > 1000:
should_round_microseconds = True
dt = dates.num2date(dt)
if should_round_microseconds:
# TODO: maybe snap to a smaller multiple of microseconds
# than a thousand? With epsilon of 15, maybe even 100 would be okay.
mus = dt.microsecond % 1000
if mus < microseconds_epsilon:
dt -= datetime.timedelta(microseconds=mus)
elif mus > 1000 - microseconds_epsilon:
dt += datetime.timedelta(microseconds=(1000 - mus))
return dt.strftime('%H:%M:%S.%f')
class AutoMicrosecondFormatter(dates.AutoDateFormatter):
"""
Refer to AutoDateFormatter documentation:
https://github.com/matplotlib/matplotlib/blob/master/lib/mdates.py
We adjust the default microsecond tick formatting by rounding.
"""
def __init__(self, locator=None, tz=None, defaultfmt='%Y-%m-%d'):
self._auto_locator = dates.AutoDateLocator() if locator is None else locator
super(AutoMicrosecondFormatter, self).__init__(self._auto_locator, tz, defaultfmt)
self.scaled = {365.0: '%Y',
30.0: '%b %Y',
1.0: '%b %d %Y',
1. / 24.: '%H:%M:%S',
1. / (24. * 60 * 60 * 1000): format_microseconds}
def __call__(self, x, pos=None):
"""This callback does the same exact thing as the underlying
matplotlib.dates.AutoDateFormatter class implementation, except
it also passes the locator to the formatter which is called if
the formatter is a function. The locator contains information
such as the current viewing window limit range.
"""
locator_unit_scale = float(self._locator._get_unit())
fmt = self.defaultfmt
# Pick the first scale which is greater than the locator unit.
for possible_scale in sorted(self.scaled):
if possible_scale >= locator_unit_scale:
fmt = self.scaled[possible_scale]
break
if isinstance(fmt, six.string_types):
self._formatter = dates.DateFormatter(fmt, self._tz)
result = self._formatter(x, pos)
elif six.callable(fmt):
result = fmt(x, pos, locator=self._locator)
else:
raise TypeError('Unexpected type passed to {!r}.'.formatter(self))
return result
ax = plot_hundred_microseconds(2014, micros=20000)
locator = dates.AutoDateLocator()
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(AutoMicrosecondFormatter(locator))
Unfortunately this does not help us much at the tens of microsecond precision. Question: is there some way we can overload matplotlib's date to 64-bit floating precision conversion, in order to get evenly distributed ticks? For example if the date conversion changed the year to be 0 AD, if we are plotting an intraday time series?
ax = plot_hundred_microseconds(2014, micros=20)
locator = dates.AutoDateLocator()
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(AutoMicrosecondFormatter(locator))