import numpy datadirs = ['../execublog1/data-athyra/', '../execublog1/data-ec2-i1/', '../execublog1/data-ec2-i2/', '../execublog1/data-ec2-i3/'] names = ['our server', 'm2.2xlarge', 'm2.2xlarge/1TB EBS 100 IOPS', 'm2.4xlarge'] order = ['a', 'b', 'd', 'c'] # note: different zones! figsize(12,6) pwd def get_time_mem(filename): "Extract the user time and max memory as generated by 'time' command" for line in open(filename): line = line.rstrip() if 'system' in line: fields1 = line.split('user') time1 = float(fields1[0]) fields1b = line.split('system')[0].split()[-1] time2 = float(fields1b) walltime = line.split('elapsed')[0].split()[-1].rsplit(':') assert len(walltime) <= 3 hours = 0. minutes = 0. seconds = walltime[-1] if len(walltime) == 3: hours = float(walltime[0]) minutes = float(walltime[1]) elif len(walltime) == 2: minutes = float(walltime[0]) wall_seconds = hours*60*60 + minutes*60 + float(walltime[1]) time = wall_seconds fields2 = line.split('avgdata ') fields3 = fields2[1].split('max') mem = fields3[0] return float(time), float(mem) raise Exception(filename) jelly = {} khmer = {} dsk = {} tally = {} for datadir in datadirs: tally[datadir] = get_time_mem(datadir + 'mkindex_5_part1_22.time')[0] + \ get_time_mem(datadir + 'suffix_5_part1.time')[0] jelly[datadir] = get_time_mem(datadir + 'jelly_5_22.time1')[0] + \ get_time_mem(datadir + 'jelly_5_22.time2')[0] khmer[datadir] = get_time_mem(datadir + 'bloom_5_1_22.time1')[0] dsk[datadir] = get_time_mem(datadir + 'dsk_5_22.time')[0] print jelly print khmer print dsk jelly_y = [] dsk_y = [] khmer_y = [] tally_y = [] labels = [] for _, label, dirname in sorted(zip(order, names, datadirs)): print dirname labels.append(label) jelly_y.append(jelly[dirname]) dsk_y.append(dsk[dirname]) khmer_y.append(khmer[dirname]) tally_y.append(tally[dirname]) import numpy as np import matplotlib.pyplot as plt N = 5 ind = np.arange(4) # the x locations for the groups width = 0.2 # the width of the bars fig = plt.figure() ax = fig.add_subplot(111) rects1 = ax.bar(ind, jelly_y, width, color='r') rects2 = ax.bar(ind+width, dsk_y, width, color='y') rects3 = ax.bar(ind+2*width, tally_y, width, color='g') rects4 = ax.bar(ind+3*width, khmer_y, width, color='b') # add some ax.set_ylabel('Time (s)') ax.set_title('Time to generate abundance histograms of 48.7m reads') ax.set_xticks(ind+2*width) ax.set_xticklabels(labels) ax.set_ylim(0, 20000) ax.legend( (rects1[0], rects2[0], rects3[0], rects4[0]), ('Jellyfish', 'DSK (no threads!)', 'Tallymer', 'khmer (1% fp)'), loc='upper left') def autolabel(rects): # attach some text labels for rect in rects: height = rect.get_height() ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%d'%int(height), ha='center', va='bottom') autolabel(rects1) autolabel(rects2) autolabel(rects3) autolabel(rects4) #plt.show() savefig('../execublog1/time.png') def _get_time_all(filename): for line in open(filename): line = line.rstrip() if 'system' in line: fields1 = line.split('user') user = float(fields1[0]) fields1b = line.split('system')[0].split()[-1] system = float(fields1b) walltime = line.split('elapsed')[0].split()[-1].rsplit(':') assert len(walltime) <= 3 hours = 0. minutes = 0. seconds = walltime[-1] if len(walltime) == 3: hours = float(walltime[0]) minutes = float(walltime[1]) elif len(walltime) == 2: minutes = float(walltime[0]) wall_seconds = hours*60*60 + minutes*60 + float(walltime[1]) return float(system), float(user), float(wall_seconds) raise Exception(filename) def get_time_all(*filenames): aa, bb, cc = 0., 0., 0. for filename in filenames: a, b, c = _get_time_all(filename) aa += a bb += b cc += c return aa, bb, cc jelly2 = {} khmer2 = {} dsk2 = {} tally2 = {} for datadir in datadirs: tally2[datadir] = get_time_all(datadir + 'mkindex_5_part1_22.time', datadir + 'suffix_5_part1.time') jelly2[datadir] = get_time_all(datadir + 'jelly_5_22.time1', datadir + 'jelly_5_22.time2') khmer2[datadir] = get_time_all(datadir + 'bloom_5_1_22.time1') dsk2[datadir] = get_time_all(datadir + 'dsk_5_22.time') def get_wall_ratio(triple): return triple[1] / triple[2] def get_sys_ratio(triple): return triple[0] / triple[2] jelly_sysr = {} khmer_sysr = {} dsk_sysr = {} tally_sysr = {} jelly_wallr = {} khmer_wallr = {} dsk_wallr = {} tally_wallr = {} for datadir in datadirs: jelly_sysr[datadir] = get_sys_ratio(jelly2[datadir]) jelly_wallr[datadir] = get_wall_ratio(jelly2[datadir]) khmer_sysr[datadir] = get_sys_ratio(khmer2[datadir]) khmer_wallr[datadir] = get_wall_ratio(khmer2[datadir]) dsk_sysr[datadir] = get_sys_ratio(dsk2[datadir]) dsk_wallr[datadir] = get_wall_ratio(dsk2[datadir]) tally_sysr[datadir] = get_sys_ratio(tally2[datadir]) tally_wallr[datadir] = get_wall_ratio(tally2[datadir]) print 'jelly', jelly_sysr print 'dsk', dsk_sysr print 'tally', tally_sysr print 'khmer', khmer_sysr print jelly2 print khmer2 print dsk2 print tally2 print jelly_wallr print dsk_wallr print tally_wallr print khmer_wallr jelly_w = [] dsk_w = [] khmer_w = [] tally_w = [] labels = [] for _, label, dirname in sorted(zip(order, names, datadirs)): print dirname labels.append(label) jelly_w.append(jelly_wallr[dirname]) dsk_w.append(dsk_wallr[dirname]) khmer_w.append(khmer_wallr[dirname]) tally_w.append(tally_wallr[dirname]) import numpy as np import matplotlib.pyplot as plt N = 5 ind = np.arange(4) # the x locations for the groups width = 0.2 # the width of the bars fig = plt.figure() ax = fig.add_subplot(111) rects1 = ax.bar(ind, jelly_w, width, color='r') rects2 = ax.bar(ind+width, dsk_w, width, color='y') rects3 = ax.bar(ind+2*width, tally_w, width, color='g') rects4 = ax.bar(ind+3*width, khmer_w, width, color='b') # add some ax.set_ylabel('Time (s)') ax.set_title('ratio of usertime to walltime') ax.set_xticks(ind+2*width) ax.set_xticklabels(labels) ax.set_ylim(0, 10) ax.legend( (rects1[0], rects2[0], rects3[0], rects4[0]), ('Jellyfish', 'DSK (no threads!)', 'Tallymer', 'khmer (1% fp)'), loc='upper right') def autolabel(rects): # attach some text labels for rect in rects: height = rect.get_height() ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%1.2f'%(height + 0.005), ha='center', va='bottom') autolabel(rects1) autolabel(rects2) autolabel(rects3) autolabel(rects4) #plt.show() savefig('../execublog1/usertime_ratio.png') jelly_z = [] dsk_z = [] khmer_z = [] tally_z = [] labels = [] for _, label, dirname in sorted(zip(order, names, datadirs)): print dirname labels.append(label) jelly_z.append(jelly2[dirname][0]) dsk_z.append(dsk2[dirname][0]) khmer_z.append(khmer2[dirname][0]) tally_z.append(tally2[dirname][0]) import numpy as np import matplotlib.pyplot as plt N = 5 ind = np.arange(4) # the x locations for the groups width = 0.2 # the width of the bars fig = plt.figure() ax = fig.add_subplot(111) rects1 = ax.bar(ind, jelly_z, width, color='r') rects2 = ax.bar(ind+width, dsk_z, width, color='y') rects3 = ax.bar(ind+2*width, tally_z, width, color='g') rects4 = ax.bar(ind+3*width, khmer_z, width, color='b') # add some ax.set_ylabel('Time (s)') ax.set_title('system time') ax.set_xticks(ind+2*width) ax.set_xticklabels(labels) ax.set_ylim(0, 450) ax.legend( (rects1[0], rects2[0], rects3[0], rects4[0]), ('Jellyfish', 'DSK (no threads!)', 'Tallymer', 'khmer (1% fp)'), loc='upper left') def autolabel(rects): # attach some text labels for rect in rects: height = rect.get_height() ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%.1f'%(height + 0.05), ha='center', va='bottom') autolabel(rects1) autolabel(rects2) autolabel(rects3) autolabel(rects4) #plt.show() savefig('../execublog1/system_time.png')