%cd g3/run
import getData
reload(getData)
matplotlib.rc('font', size=12, weight='normal')
matplotlib.rc('legend', fontsize=10)
matplotlib.rc('xtick', direction='in')
matplotlib.rc('xtick.major', size=4)
rot = 50
colors = ['#3377bb', '#33bb77', '#bb7733', '#bb3377']
from matplotlib.font_manager import FontProperties
font = FontProperties()
font.set_size('small')
--------------------------------------------------------------------------- ImportError Traceback (most recent call last) <ipython-input-1-95d87d2108eb> in <module>() 1 get_ipython().magic(u'cd g3/run') ----> 2 import getData 3 reload(getData) 4 matplotlib.rc('font', size=12, weight='normal') 5 matplotlib.rc('legend', fontsize=10) ImportError: No module named getData
[Errno 2] No such file or directory: 'g3/run'
bms = ['backprop', 'bfs', 'gaussian', 'hotspot', 'lud'] # kmeans, lavaMD, cfd
sizes = ['simlarge', 'simlarge', 'simsmall', 'simlarge', 'sim1day']
bms += ['nn', 'nw', 'pathfinder', 'srad', 'sort']
sizes+= ['simlarge', 'simlarge', 'simlarge', 'simlarge', 'simlarge']
def getBms(path):
%cd $path
b = []
for name,size in zip(bms, sizes):
bm = getData.Benchmark('rodinia-nocopy')
bm.name = name
bm.biggest = size
bm.fullsystem = True
b.append(bm)
getData.setupBenchmarks(b)
return b
b128_0_0 = getBms('g3/run/128-0-0')
b64_0_8k = getBms('g3/run/64-0-8k')
b64_1024_0 = getBms('g3/run/64-1024-0')
b32_512_8k = getBms('g3/run/32-512-8k')
bbaseline = getBms('g3/run/baseline')
bbaseline2 = getBms('g3/run/inf-inf-inf')
b64_0_inf = getBms('g3/run/64-0-inf')
#b128_0_0_infpw = getBms('g3/run/128-0-0-inf-pw') #simlarge for sort, bfs, nw (&1day) run out of memory
bprefetch = getBms('g3/run/64-0-8k-p')
bpercu_128 = getBms('g3/run/perCU-128')
%cd g3/run
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-2-982dec044c86> in <module>() 15 getData.setupBenchmarks(b) 16 return b ---> 17 b128_0_0 = getBms('g3/run/128-0-0') 18 b64_0_8k = getBms('g3/run/64-0-8k') 19 b64_1024_0 = getBms('g3/run/64-1024-0') <ipython-input-2-982dec044c86> in getBms(path) 8 b = [] 9 for name,size in zip(bms, sizes): ---> 10 bm = getData.Benchmark('rodinia-nocopy') 11 bm.name = name 12 bm.biggest = size NameError: global name 'getData' is not defined
[Errno 2] No such file or directory: 'g3/run/128-0-0'
allaccesses = sum([getData.allMemAccessesPKC(bm) for bm in bbaseline])/len(bbaseline)
laneaccesses = sum([getData.memAccessesPKC(bm) for bm in bbaseline])/len(bbaseline)
globalaccesses = sum([getData.tlbPKCycle(bm) for bm in bbaseline])/len(bbaseline)
first = True
e = .2
w = .4
gap = .2
figure(figsize=(7,4))
for bm in bbaseline:
if first:
first = False
bar(e, getData.allMemAccessesPKC(bm), width=w, color=colors[0], label="Shared mem lane instructions")
bar(e, getData.memAccessesPKC(bm), width=w, color=colors[1], label="Global mem lane instructions")
e += w
bar(e, getData.tlbPKCycle(bm), width=w, color=colors[2], label="Global memory accesses")
e += w
else:
bar(e, getData.allMemAccessesPKC(bm), width=w, color=colors[0])
bar(e, getData.memAccessesPKC(bm), width=w, color=colors[1])
e += w
bar(e, getData.tlbPKCycle(bm), width=w, color=colors[2])
e += w
e += gap
e += gap + w*2
bar(e, allaccesses, width=w, color=colors[0])
bar(e, laneaccesses, width=w, color=colors[1])
e += w
bar(e, globalaccesses, width=w, color=colors[2])
e += w
bmNames = [bm.name for bm in bbaseline] + ['', 'Average']
bmNames = ['particlefilter' if i=='particlefilter_naive' else i for i in bmNames]
tw = w*2
xticks(arange(len(bmNames))*(tw+gap)+gap+(tw)/2, bmNames, rotation=rot)
#legend(loc='upper left', ncol=1)
#ylim((0,1500))
xlim((0,12.5))
ylabel('Operations per thousand cycles')
print "Average memory lane instructions PKC:", allaccesses
print "average shared lane:", (sum([getData.allMemAccessesPKC(bm) for bm in bbaseline])-sum([getData.memAccessesPKC(bm) for bm in bbaseline]))/len(bbaseline)
print "average global lane:", laneaccesses
print "average accesses:", globalaccesses
print "one global memory accesses for every", laneaccesses/globalaccesses, "global memory lane instructions"
print "This is a", 1-globalaccesses/laneaccesses, "reduction", "(", laneaccesses/globalaccesses, "x )"
print "Average global mem accesses PKC:", globalaccesses
print "Max global mem accesses PKC:", max([getData.tlbPKCycle(bm) for bm in bbaseline])
tight_layout()
savefig('mem_ops_pkc.png', dpi=600)
Average memory lane instructions PKC: 602.503665734 average shared lane: 334.053721174 average global lane: 268.449944559 average accesses: 39.3116536215 one global memory accesses for every 6.82876246173 global memory lane instructions This is a 0.853560582081 reduction ( 6.82876246173 x ) Average global mem accesses PKC: 39.3116536215 Max global mem accesses PKC: 117.048785325
I bet the problem here is that since there are so many simultaneous requests, requests from one CU get queued behind requests from another CU.
timebaseline = array([float(bm.totalActiveCycles) for bm in bbaseline])
d1 = timebaseline/array([bm.totalActiveCycles for bm in bpercu_128])
d2 = timebaseline/array([bm.totalActiveCycles for bm in b128_0_0])
d3 = timebaseline/array([bm.totalActiveCycles for bm in b64_0_8k])
first = True
e = .2
w = .4
gap = .2
figure(figsize=(7,4))
for i in range(len(timebaseline)):
if first:
first = False
bar(e, d1[i], width=w, color=colors[0], label="Design 1")
e += w
bar(e, d2[i], width=w, color=colors[1], label="Design 2")
e += w
bar(e, d3[i], width=w, color=colors[2], label="Design 3")
e += w
else:
bar(e, d1[i], width=w, color=colors[0])
e += w
bar(e, d2[i], width=w, color=colors[1])
e += w
bar(e, d3[i], width=w, color=colors[2])
e += w
e += gap
e += gap + w*3
bar(e, float(sum(timebaseline))/sum([bm.totalActiveCycles for bm in bpercu_128]), width=w, color=colors[0])
e += w
bar(e, float(sum(timebaseline))/sum([bm.totalActiveCycles for bm in b128_0_0]), width=w, color=colors[1])
e += w
bar(e, float(sum(timebaseline))/sum([bm.totalActiveCycles for bm in b64_0_8k]), width=w, color=colors[2])
e += w
bmNames = [bm.name for bm in bbaseline] + ['', 'Average']
bmNames = ['particlefilter' if i=='particlefilter_naive' else i for i in bmNames]
tw = w*3
xticks(arange(len(bmNames))*(tw+gap)+gap+(tw)/2, bmNames, rotation=60)
legend(prop=font, ncol=3)
ylabel('Performance relative\nto ideal MMU', multialignment='center')
hlines(1, 0, 19, color='grey', linewidth=3)
print "Worst case for design 3:", 1-min(d3)
d1avg = float(sum(timebaseline))/sum([bm.totalActiveCycles for bm in bpercu_128])
print "Average d1:", d1avg, "slowdown", 1/d1avg
print "Average d2:", float(sum(timebaseline))/sum([bm.totalActiveCycles for bm in b128_0_0])
print "Average d3:", float(sum(timebaseline))/sum([bm.totalActiveCycles for bm in b64_0_8k])
tight_layout()
savefig('rel_perf.png', dpi=600)
print timebaseline/array([bm.totalActiveCycles for bm in bpercu_128])
Worst case for design 3: 0.125905544128 Average d1: 0.3034547249 slowdown 3.29538451026 Average d2: 0.29981480886 Average d3: 0.986396429335 [ 0.09885423 0.05300799 0.91710031 0.32362244 0.84591207 0.21621842 0.47121307 0.10630337 0.08068128 0.27030214]
e = .2
w = .4
gap = .2
first = True
figure(figsize=(7,4))
for bm in bpercu_128:
if first:
# add +.01 if you get an error
bar(e, bm.getStat('m5stats', 'system.gpu.shader_mmu.concurrentWalks::mean'), width=w, color=colors[0], label="Average", log=True)
e += w
bar(e, bm.getStat('m5stats', 'system.gpu.shader_mmu.concurrentWalks').mx, width=w, color=colors[1], label="Maximum", log=True)
first = False
else:
bar(e, bm.getStat('m5stats', 'system.gpu.shader_mmu.concurrentWalks::mean'), width=w, color=colors[0], log=True)
e += w
bar(e, bm.getStat('m5stats', 'system.gpu.shader_mmu.concurrentWalks').mx, width=w, color=colors[1], log=True)
e += w
e += gap
e += w*2+gap
weighted = [bm.getStat('m5stats', 'system.gpu.shader_mmu.concurrentWalks::mean') for bm in bpercu_128]
weightedmx = [bm.getStat('m5stats', 'system.gpu.shader_mmu.concurrentWalks').mx for bm in bpercu_128]
bar(e, float(sum(weighted))/len(bpercu_128), width=w, color=colors[0], log=True)
e += w
bar(e, float(sum(weightedmx))/len(bpercu_128), width=w, color=colors[1], log=True)
yscale('log')
grid(True, axis='y', ls='--')
bmNames = [bm.name for bm in bbaseline] + ['', 'Average']
bmNames = ['particlefilter' if i=='particlefilter_naive' else i for i in bmNames]
xticks(arange(len(bmNames))*(w*2+gap)+gap+(w*2)/2, bmNames, rotation=60)
xlim((0,12.5))
ylim((1,1500))
hlines(2, 0, 13, color='red', linewidth=3)
legend(loc="upper left", ncol=2)
ylabel('Page walk queue size (log scale)')
tight_layout()
savefig('pw_queue.png', dpi=600)
print "Average average:", float(sum(weighted))/len(bpercu_128)
print "Max average:", max(weighted)
print "Average max:", float(sum(weightedmx))/len(bpercu_128)
print "NOTE: THIS IS NOT WEIGHTED ANYMORE"6.22
Average average: 60.3987777 Max average: 139.726087 Average max: 277.3 NOTE: THIS IS NOT WEIGHTED ANYMORE
timebaseline = array([float(bm.totalActiveCycles) for bm in bpercu_128])
d2 = timebaseline/array([bm.totalActiveCycles for bm in b128_0_0])
d3 = timebaseline/array([bm.totalActiveCycles for bm in b64_0_8k])
e = .2
w = .4
gap = .2
figure(figsize=(7,4))
lim = 10
first = True
for i in range(len(timebaseline)):
if first:
first = False
bar(e, 1, width=w, color=colors[0], label="Design 1")
e += w
bar(e, d2[i], width=w, color=colors[1], label="Design 2")
e += w
bar(e, d3[i], width=w, color=colors[2], label="Design 3")
e += w
else:
bar(e, 1, width=w, color=colors[0])
e += w
bar(e, d2[i], width=w, color=colors[1])
e += w
bar(e, d3[i], width=w, color=colors[2])
e += w
e += gap
if d3[i] > lim:
annotate(str(int(d3[i]))+'x', xy=((arange(len(bmNames))*(tw+gap)+gap+(tw)/2)[i], lim), \
bbox=dict(boxstyle='round,pad=0.5', fc=colors[2], alpha=0.3), \
xytext = (19, -15), textcoords = 'offset points')
if d2[i] > lim:
annotate(str(int(d2[i]))+'x', xy=((arange(len(bmNames))*(tw+gap)+gap+(tw)/2)[i], lim), \
bbox=dict(boxstyle='round,pad=0.5', fc=colors[1], alpha=0.3), \
xytext = (-16, -15), textcoords = 'offset points')
e += gap + w*3
bar(e, 1, width=w, color=colors[0])
e += w
bar(e, float(sum(timebaseline))/sum([bm.totalActiveCycles for bm in b128_0_0]), width=w, color=colors[1])
e += w
bar(e, float(sum(timebaseline))/sum([bm.totalActiveCycles for bm in b64_0_8k]), width=w, color=colors[2])
e += w
ylim((0,lim))
xlim((0,17))
hlines(1, 0, 17, color='grey', linewidth=3)
bmNames = [bm.name for bm in bbaseline] + ['', 'Average']
bmNames = ['particlefilter' if i=='particlefilter_naive' else i for i in bmNames]
tw = w*3
xticks(arange(len(bmNames))*(tw+gap)+gap+(tw)/2, bmNames, rotation=60)
ylabel('Speedup relative to Design 1')
legend()
tight_layout()
savefig('rel_perf_1_2_3.png', dpi=600)
print "Average speedup D1:", float(sum(timebaseline))/sum([bm.totalActiveCycles for bm in b128_0_0])
print "Average speedup D2:", float(sum(timebaseline))/sum([bm.totalActiveCycles for bm in b64_0_8k])
Average speedup D1: 0.988005077063 Average speedup D2: 3.25055551421
e = .2
w = .4
gap = .2
figure(figsize=(7,4))
for bm in b128_0_0:
bar(e, (1-getData.tlbHitRate(bm))*100, width=w, color=colors[0])
e += w
e += gap
e += w+gap
misses = sum([getData.totalTLBMiss(bm) for bm in b128_0_0])
hits = sum([getData.totalTLBHit(bm) for bm in b128_0_0])
bar(e, float(misses)/(hits+misses) * 100, width=w, color=colors[0])
bmNames = [bm.name for bm in bbaseline] + ['', 'Average']
bmNames = ['particlefilter' if i=='particlefilter_naive' else i for i in bmNames]
xticks(arange(len(bmNames))*(w+gap)+gap+(w)/2, bmNames, rotation=60)
ylabel('Miss rate')
print "Average miss rate:", float(misses)/(hits+misses)
print "Max miss rate:", max([1-getData.tlbHitRate(bm) for bm in b128_0_0])
from matplotlib.ticker import FuncFormatter
gca().yaxis.set_major_formatter(FuncFormatter(lambda y, position: str(y)+'%'))
tight_layout()
savefig('miss_rate.png', dpi=600)
Average miss rate: 0.290589595693 Max miss rate: 0.671991013389
e = .2
w = .4
gap = .2
figure(figsize=(7,4))
for bm in b128_0_0:
bar(e, getData.tlbMissPKC(bm), width=w, color=colors[0])
e += w
e += gap
e += w+gap
misses = sum([getData.totalTLBMiss(bm) for bm in b128_0_0])
hits = sum([getData.totalTLBHit(bm) for bm in b128_0_0])
cycles = sum([bm.totalActiveCycles for bm in b128_0_0])
bar(e, float(misses)/(cycles)*1000, width=w, color=colors[0])
bmNames = [bm.name for bm in bbaseline] + ['', 'Average']
bmNames = ['particlefilter' if i=='particlefilter_naive' else i for i in bmNames]
xticks(arange(len(bmNames))*(w+gap)+gap+(w)/2, bmNames, rotation=60)
ylabel('Misses per thousand cycles')
print "Average misses PKC:", float(misses)/(cycles)*1000
print "Max misses PKC:", max([getData.tlbMissPKC(bm) for bm in b128_0_0])
tight_layout()
savefig('miss_pkc.png', dpi=600)
Average misses PKC: 1.42145965551 Max misses PKC: 12.9517447675
e = .2
w = .4
gap = .2
figure(figsize=(7,4))
for bm in b64_1024_0:
bar(e, bm.getStat('m5stats', 'system.gpu.shader_mmu.l2Sharers::mean'), width=w, color=colors[0])
e += w
e += gap
bmNames = [bm.name for bm in bbaseline]
bmNames = ['particlefilter' if i=='particlefilter_naive' else i for i in bmNames]
xticks(arange(len(bmNames))*(w+gap)+gap+(w)/2, bmNames, rotation=rot)
ylim((0,16))
ylabel("CUs sharing L2 TLB entry")
tight_layout()
savefig('sharing.png', dpi=600)
timebaseline = array([float(bm.totalActiveCycles) for bm in bbaseline])
l21 = timebaseline/array([bm.totalActiveCycles for bm in b64_1024_0])
l22 = timebaseline/array([bm.totalActiveCycles for bm in b32_512_8k])
idealpwc = timebaseline/array([bm.totalActiveCycles for bm in b64_0_inf])
first = True
e = .2
w = .4
gap = .2
figure(figsize=(7,4))
for i in range(len(timebaseline)):
if first:
first = False
bar(e, l21[i], width=w, color=colors[0], label="Shared L2")
e += w
bar(e, l22[i], width=w, color=colors[1], label="Shared L2 PWC")
e += w
bar(e, idealpwc[i], width=w, color=colors[2], label="Ideal PWC")
e += w
else:
bar(e, l21[i], width=w, color=colors[0])
e += w
bar(e, l22[i], width=w, color=colors[1])
e += w
bar(e, idealpwc[i], width=w, color=colors[2])
e += w
e += gap
e += gap + w*3
bar(e, float(sum(timebaseline))/sum([bm.totalActiveCycles for bm in b64_1024_0]), width=w, color=colors[0])
e += w
bar(e, float(sum(timebaseline))/sum([bm.totalActiveCycles for bm in b32_512_8k]), width=w, color=colors[1])
e += w
bar(e, float(sum(timebaseline))/sum([bm.totalActiveCycles for bm in b64_0_inf]), width=w, color=colors[2])
e += w
bmNames = [bm.name for bm in bbaseline] + ['', 'Average']
bmNames = ['particlefilter' if i=='particlefilter_naive' else i for i in bmNames]
tw = w*3
xticks(arange(len(bmNames))*(tw+gap)+gap+(tw)/2, bmNames, rotation=rot)
legend(prop=font, ncol=3)
ylim((0,1.3))
ylabel('Performance relative\nto ideal MMU', multialignment='center')
hlines(1, 0, 19, color='grey', linewidth=3)
tight_layout()
savefig('rel_perf_other.png', dpi=600)
print "BFS with 32-512-8k:", l22[1]
print "BFS with 64-0-inf:", idealpwc[1]
print "NW with 64-0-inf:", idealpwc[6]
print "average shared l2 pwc:", float(sum(timebaseline))/sum([bm.totalActiveCycles for bm in b32_512_8k])
print "average with ideal:", float(sum(timebaseline))/sum([bm.totalActiveCycles for bm in b64_0_inf])
%pwd
BFS with 32-512-8k: 0.892865086929 BFS with 64-0-inf: 0.962639565301 NW with 64-0-inf: 0.993998526389 average shared l2 pwc: 0.987681541834 average with ideal: 0.997207642569
timebaseline = array([float(bm.totalActiveCycles) for bm in b64_0_8k])
pref = timebaseline/array([bm.totalActiveCycles for bm in bprefetch])
first = True
e = .2
w = .4
gap = .2
figure(figsize=(7,4))
for i in range(len(timebaseline)):
if first:
first = False
bar(e, pref[i], width=w, color=colors[0])
e += w
else:
bar(e, pref[i], width=w, color=colors[0])
e += w
e += gap
e += gap + w
bar(e, float(sum(timebaseline))/sum([bm.totalActiveCycles for bm in bprefetch]), width=w, color=colors[0])
e += w
bmNames = [bm.name for bm in bbaseline] + ['', 'Average']
bmNames = ['particlefilter' if i=='particlefilter_naive' else i for i in bmNames]
tw = w
xticks(arange(len(bmNames))*(tw+gap)+gap+(tw)/2, bmNames, rotation=rot)
legend(prop=font)
ylim((.8,1.2))
ylabel('Performance relative\nto Design 3', multialignment='center')
#hlines(1, 0, 19, color='grey', linewidth=3)
tight_layout()
#savefig('rel_perf_other.png', dpi=600)
print "Average relative performance:", float(sum(timebaseline))/sum([bm.totalActiveCycles for bm in bprefetch])
Average relative performance: 0.990918545043
energy = [1, float(13)/16, float(74)/16, float(20)/16]
area = [1.0198384, 0.8442249, 0.881394158690578, 0.791155158690578]
area = [i/area[0] for i in area]
w = .4
g = .1
e = g
figure(figsize=(8,4))
subplot(122)
for i in range(4):
bar(e, energy[i], width=w, color=colors[0])
e += w + g
tw = w+g
xticks(arange(4)*(tw)+tw/2+g/2, ["Design 2", "Design 3", "Shared L2", "Shared L2\nPWC"], rotation=rot)
ylabel('MMU dynamic energy\nrelative to Design 2', multialignment='center')
title('(b) Dynamic Energy\nof the MMU')
subplot(121)
e = g
for i in range(4):
bar(e, area[i], width=w, color=colors[1])
e += w + g
xticks(arange(4)*(tw)+tw/2+g/2, ["Design 2", "Design 3", "Shared L2", "Shared L2\nPWC"], rotation=rot)
ylabel('MMU area relative\nto Design 2', multialignment='center')
title('(a) Area')
tight_layout()
for i in range(4):
print ["Design 2", "Shared L2", "Design 3", "Shared L2 PWC"][i], '\t', energy[i], '\t', area[i]
savefig('energy_area.png', dpi=600)
Design 2 1 1.0 Shared L2 0.8125 0.827802620494 Design 3 4.625 0.864248844416 Shared L2 PWC 1.25 0.77576521799
misses = sum([getData.totalTLBMiss(bm) for bm in b64_1024_0])
hits = sum([getData.totalTLBHit(bm) for bm in b64_1024_0])
print "Average miss rate for 64_1024_0:", float(misses)/(misses+hits)
Average miss rate for 64_1024_0: 0.268029268781
getData.printStats(bbaseline2, [lambda bm: bm.getStat('m5stats', 'system.gpu.shader_mmu.numPagewalks')*4/float(1024)])
<lambda> rodinia-nocopy backprop simlarge 74.03 rodinia-nocopy bfs simlarge 37.22 rodinia-nocopy gaussian simsmall 0.34 rodinia-nocopy hotspot simlarge 12.01 rodinia-nocopy lud sim1day 4.02 rodinia-nocopy nn simlarge 0.5 rodinia-nocopy nw simlarge 128.06 rodinia-nocopy pathfinder simlarge 38.54 rodinia-nocopy srad simlarge 96.01 rodinia-nocopy sort simlarge 208.01
stats = [lambda bm: bm.totalActiveCycles, lambda bm: bm.ipc, getData.totalTLBMiss, getData.tlbHitRate]
labels = ['Active cycles', 'IPC', 'TLB Misses', 'TLB Hit rate']
bms = [b128_0_0, b64_0_8k, b128_0_0_infpw, bbaseline]
names = ["128-0-0", "64-0-8k", "b128_0_0_infpw", "baseline"]
getData.compareStat(lambda bm: bm.sumShaderStat('lsq.tlbMissLatency').avg, bms, "Average miss latency", names)
getData.compareStat(lambda bm: bm.ipc, bms, "IPC", names)
getData.compareStat(lambda bm: bm.totalActiveCycles, bms, "IPC", names)
getData.compareStat(getData.avgPFLatency, bms, "Average page fault latency", names)
getData.compareStat(lambda bm: bm.getStat('m5stats', 'system.gpu.shader_mmu.concurrentWalks::mean'), bms, "Outstanding walks", names)
Average miss latency 128-0-0 64-0-8k b128_0_0_infpw baseline backprop 672.57(1.00) 141.07(0.21) 701.27(1.04) 215.37(0.32) bfs 4653.36(1.00) 27.03(0.01) 4779.42(1.03) 23.20(0.00) gaussian 2636.10(1.00) 25.11(0.01) 2791.59(1.06) 4.40(0.00) hotspot 763.89(1.00) 47.21(0.06) 763.89(1.00) 15.69(0.02) kmeans 966.50(1.00) 303.31(0.31) 949.45(0.98) 191.60(0.20) lud 1664.61(1.00) 26.73(0.02) 1661.83(1.00) 5.18(0.00) nn 360.28(1.00) 34.77(0.10) 378.12(1.05) 7.38(0.02) nw 4649.40(1.00) 40.06(0.01) 4893.22(1.05) 5.52(0.00) pathfinder 1067.05(1.00) 43.31(0.04) 1067.05(1.00) 12.76(0.01) srad 3510.11(1.00) 42.35(0.01) 3401.47(0.97) 19.91(0.01) sort 3133.56(1.00) 37.82(0.01) 3461.47(1.10) 6.08(0.00) IPC 128-0-0 64-0-8k b128_0_0_infpw baseline backprop 8.25(1.00) 8.73(1.06) 8.24(1.00) 8.72(1.06) bfs 0.33(1.00) 4.51(13.64) 0.33(0.99) 4.76(14.39) gaussian 9.13(1.00) 9.52(1.04) 9.14(1.00) 9.71(1.06) hotspot 11.55(1.00) 12.94(1.12) 11.55(1.00) 13.10(1.13) kmeans 4.71(1.00) 5.36(1.14) 4.73(1.00) 5.43(1.15) lud 0.69(1.00) 0.71(1.02) 0.69(1.00) 0.71(1.02) nn 1.16(1.00) 2.03(1.75) 1.11(0.95) 2.16(1.86) nw 0.38(1.00) 0.56(1.45) 0.38(0.99) 0.56(1.46) pathfinder 9.65(1.00) 9.65(1.00) 9.65(1.00) 9.63(1.00) srad 2.79(1.00) 8.98(3.21) 2.86(1.02) 9.07(3.24) sort 2.11(1.00) 3.09(1.46) 2.22(1.05) 3.13(1.49) IPC 128-0-0 64-0-8k b128_0_0_infpw baseline backprop 181186631.00(1.00) 171247075.00(0.95) 181560363.00(1.00) 171392143.00(0.95) bfs 1403330598.00(1.00) 102892913.00(0.07) 1419789346.00(1.01) 97502165.00(0.07) gaussian 24629610.00(1.00) 23605026.00(0.96) 24581706.00(1.00) 23158993.00(0.94) hotspot 9497092.00(1.00) 8478401.00(0.89) 9497092.00(1.00) 8371003.00(0.88) kmeans 293847.00(1.00) 257797.00(0.88) 292589.00(1.00) 254602.00(0.87) lud 3396552714.00(1.00) 3330494121.00(0.98) 3396704318.00(1.00) 3329397094.00(0.98) nn 920193.00(1.00) 525783.00(0.57) 965320.00(1.05) 495495.00(0.54) nw 34366669.00(1.00) 23717763.00(0.69) 34555829.00(1.01) 23482290.00(0.68) pathfinder 10633699.00(1.00) 10635493.00(1.00) 10633699.00(1.00) 10651209.00(1.00) srad 53050321.00(1.00) 16517235.00(0.31) 51816627.00(0.98) 16352609.00(0.31) sort 5020225.00(1.00) 3428705.00(0.68) 4773737.00(0.95) 3379317.00(0.67) Average page fault latency 128-0-0 64-0-8k b128_0_0_infpw baseline backprop 4558.64(1.00) 4356.24(0.96) 4547.00(1.00) 4363.61(0.96) bfs -- -- -- -- gaussian -- -- -- -- hotspot -- -- -- -- kmeans 4069.00(1.00) 3300.00(0.81) 4110.00(1.01) 3301.00(0.81) lud 1497.00(1.00) 1499.00(1.00) 1499.00(1.00) 1499.00(1.00) nn -- -- -- -- nw -- -- -- -- pathfinder -- -- -- -- srad -- -- -- -- sort -- -- -- -- Outstanding walks 128-0-0 64-0-8k b128_0_0_infpw baseline backprop 3.70(1.00) 0.17(0.05) 3.77(1.02) 0.09(0.02) bfs 29.01(1.00) 3.02(0.10) 55.45(1.91) 0.10(0.00) gaussian 20.35(1.00) 0.89(0.04) 20.06(0.99) 4.52(0.22) hotspot 9.72(1.00) 1.07(0.11) 9.72(1.00) 0.48(0.05) kmeans 0.09(1.00) 0.07(0.77) 0.09(1.00) 0.09(1.00) lud 15.04(1.00) 3.09(0.21) 16.89(1.12) 1.46(0.10) nn 2.65(1.00) 0.21(0.08) 2.88(1.09) 0.14(0.05) nw 19.60(1.00) 10.58(0.54) 31.80(1.62) 6.29(0.32) pathfinder 10.53(1.00) 0.45(0.04) 10.53(1.00) 0.18(0.02) srad 25.69(1.00) 3.46(0.13) 29.20(1.14) 1.06(0.04) sort 28.18(1.00) 3.23(0.11) 49.43(1.75) 2.87(0.10)