import re
from operator import itemgetter
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
We parse the table with the times copied from the PDF file to a TXT file.
times = {}
lanes = {}
pattern_header = r"^([0-9]+) ([0-9]+) ([A-Z]+ ([A-Z]{2,})*)"
pattern_time = r"^([0-9]+\.[0-9]+)$"
with open('1500_women_free.txt', 'r') as f:
for line in f:
mh = re.match(pattern_header, line)
if mh:
name = mh.group(3)
times[name] = []
lanes[name] = int(mh.group(2))
mt = re.match(pattern_time, line)
if mt:
times[name].append(float(mt.group(1)))
times = {name: np.array(t) for name, t in times.iteritems()}
Let's sort the names by lane.
names = [name for name, _ in sorted(lanes.iteritems(), key=itemgetter(1))]
For each swimmer, we plot the times of laps, splitted among forward (blue), and backward (red).
plt.figure(figsize=(12,4));
for i, name in enumerate(names):
t = times[name]
lane = lanes[name]
plt.subplot(2, 4, i + 1);
plt.plot(t[::2], 'o-b');
plt.plot(t[1::2], 'o-r');
plt.xlabel('Length number');
plt.ylabel('Time');
plt.title("{0:s} (lane {1:d})".format(name, lane));
plt.tight_layout()
We immediately observe that the direct way is longer for the swimmers in lanes 1-4, whereas the return is longer in lanes 5-8. Let's look at the average difference between forward and backward times.
plt.figure(figsize=(12,4));
lanes = np.arange(1, len(lanes) + 1)
differences = [times[name][::2].mean() - times[name][1::2].mean() for name in names]
plt.bar(lanes, differences);
plt.xlabel('Lane');
plt.ylabel('Average difference between forward and backward ways');
This monotonic dependence of this time difference from lane to lane is troubling to say the least, even though we cannot draw conclusions directly from this basic analysis.