This IPython notebook can be used to generate the plots in the following paper:
@inproceedings{yosinski_2014_NIPS
title={How transferable are features in deep neural networks?},
author={Yosinski, Jason and Clune, Jeff and Bengio, Yoshua and Lipson, Hod},
booktitle={Advances in Neural Information Processing Systems 27 (NIPS '14)},
editor = {Z. Ghahramani and M. Welling and C. Cortes and N.D. Lawrence and K.Q. Weinberger},
publisher = {Curran Associates, Inc.},
pages = {3320--3328},
year={2014}
}
For more information and code, please visit http://yosinski.com/transfer
from pylab import *
%matplotlib inline
# Don't use Type 3 fonts when saving pdf
matplotlib.rcParams['pdf.fonttype'] = 42
from collections import defaultdict
import glob
import gzip
Define paths here
# Directory to load results from
results_root = '../results/'
# Directory to save plots in, None to skip saving
#plot_dir = None
plot_dir = '.'
Function definitions for reading data/etc.
def figsize(width,height):
from pylab import rcParams
rcParams['figure.figsize'] = (width,height)
def insert_most_recent(dictionary, key, value, timestamp):
if key in dictionary:
stored_item = dictionary[key]
stored_value, stored_timestamp = stored_item
if timestamp > stored_timestamp:
dictionary[key] = (value, timestamp)
else:
dictionary[key] = (value, timestamp)
def read_info(filenames):
loss_train = {}
lr = {}
test_score_0 = {}
test_score_1 = {}
for filename in filenames:
expecting_test_lines = 0
if filename[-3:] == '.gz':
ff = gzip.open(filename)
else:
ff = open(filename)
for line in ff:
# We need to deal with lines like the following:
# I0311 23:51:37.270727 24248 solver.cpp:207] Iteration 230020, lr = 0.0001
# I0311 23:51:37.433928 24248 solver.cpp:65] Iteration 230020, loss = 1.56229
#
# I0311 14:41:38.035086 19955 solver.cpp:87] Iteration 236000, Testing net
# I0311 14:43:52.531891 19955 solver.cpp:114] Test score #0: 0.56976
# I0311 14:43:52.535956 19955 solver.cpp:114] Test score #1: 1.83616
fields = line.split()
if len(fields) <= 4:
continue
timestamp = fields[0] + ' ' + fields[1]
if expecting_test_lines > 0:
# We're one of the lines just after the 'Testing net' line
# OLD: demand scores (caused a problem once)
#assert 'Test score #' in line, 'Unexpected pattern found'
# NEW: just stop looking
if not 'Test score #' in line:
expecting_test_lines = 0
testing_iter = None
continue
if 'Test score #0:' in line:
insert_most_recent(test_score_0, testing_iter, float(fields[7]), timestamp)
elif 'Test score #1:' in line:
insert_most_recent(test_score_1, testing_iter, float(fields[7]), timestamp)
else:
raise Exception('Expected test score 0 or 1.')
expecting_test_lines -= 1
if expecting_test_lines == 0:
testing_iter = None
elif fields[4] == 'Iteration':
iteration = int(fields[5].strip(','))
if fields[6] == 'lr':
insert_most_recent(lr, iteration, float(fields[8]), timestamp)
elif fields[6] == 'loss':
insert_most_recent(loss_train, iteration, float(fields[8]), timestamp)
elif ' '.join(fields[6:8]) == 'Testing net':
testing_iter = iteration
expecting_test_lines = 2
return loss_train, lr, test_score_0, test_score_1
def convert_dict(dd):
keys = array(sorted(dd.keys()))
values = []
for key in keys:
values.append(dd[key][0])
return keys, array(values)
def load_results(filename_pattern):
logfiles = glob.glob(filename_pattern)
#print 'got files:', logfiles
dicts = read_info(logfiles)
arrays = [convert_dict(dd) for dd in dicts]
ret = dict()
ret['train_loss'] = {'idx': arrays[0][0], 'val': arrays[0][1]} # training loss
ret['lr'] = {'idx': arrays[1][0], 'val': arrays[1][1]} # learning rate
ret['test_score_0'] = {'idx': arrays[2][0], 'val': arrays[2][1]} # top-1 accuracy on valid set
ret['test_score_1'] = {'idx': arrays[3][0], 'val': arrays[3][1]} # loss on valid set
# Also make test_score_* available under more friendly names:
ret['valid_loss'] = ret['test_score_1']
ret['valid_top1_acc'] = ret['test_score_0']
return ret
def mean_highest(record, number = 10):
vals = record['val']
return sort(vals)[-number:].mean()
def mean_lowest(record, number = 10):
vals = record['val']
return sort(vals)[:number].mean()
def get_mean_best(results, number = 10):
ret = {}
for key,result in results.iteritems():
try:
layer = int(key[-1])
except ValueError:
layer = 0
if result['train_loss']['val'].shape[0] == 0:
# Skip empty results
continue
this_layer_dict = ret.get(layer, defaultdict(list))
this_layer_dict['train_loss'].append(mean_lowest(result['train_loss'], number=number))
this_layer_dict['valid_loss'].append(mean_lowest(result['valid_loss'], number=number))
this_layer_dict['valid_top1_acc'].append( mean_highest(result['valid_top1_acc'], number=number))
this_layer_dict['valid_top1_err'].append(1-mean_highest(result['valid_top1_acc'], number=number))
ret[layer] = this_layer_dict
for key,val in ret.iteritems():
for k,thelist in val.iteritems():
val[k] = array(thelist)
return ret
def get_mean_best_single(results, number = 10):
ret = {}
for key,result in results.iteritems():
this_layer_dict = ret.get(key, defaultdict(list))
this_layer_dict['train_loss'].append(mean_lowest(result['train_loss'], number=number))
this_layer_dict['valid_loss'].append(mean_lowest(result['valid_loss'], number=number))
this_layer_dict['valid_top1_acc'].append( mean_highest(result['valid_top1_acc'], number=number))
this_layer_dict['valid_top1_err'].append(1-mean_highest(result['valid_top1_acc'], number=number))
ret[key] = this_layer_dict
return ret
def load_globbed_results(regexps, key_last_n, must_be_in=None):
print 'Loading results from', regexps
paths = []
for regexp in regexps:
paths.extend(glob.glob(results_root + regexp))
if must_be_in:
must_be_in_paths = [results_root + dir for dir in must_be_in]
filtered_paths = [path for path in paths if path in must_be_in_paths]
print ' filtered %d paths down to %d' % (len(paths), len(filtered_paths))
paths = filtered_paths
results = {}
for path in paths:
if path in seen:
raise Exception('Already loaded %s' % path)
seen.add(path)
key = path[-key_last_n:] # something like '1A1A_1_4'
result = load_results('%s/*.[eo]*.gz' % path)
results[key] = result
print ' loaded %d results' % len(paths)
for path in paths:
print ' ', path
return results
# We load directories by specifying regular expressions to match to the directory name.
# We keep track of which we've seen as a sanity check to make sure that each directory
# only matches one regex.
seen = set()
# Base splits
results_base = load_globbed_results(['*half??'], 2)
# Self-fer A->A and B->B
results_selffer = load_globbed_results(['*transfer?A?A_1_?', '*transfer?B?B_1_?'], 8)
# transfer A->B and B->A
results_transfer = load_globbed_results(['*transfer?A?B_1_?', '*transfer?B?A_1_?'], 8)
# Self-fer A->A and B->B, fine tuned
results_selffer_ft = load_globbed_results(['*transfer-ft?A?A_1_?', '*transfer-ft?B?B_1_?'], 8)
results_transfer_ft = load_globbed_results(['*transfer-ft?A?B_1_?', '*transfer-ft?B?A_1_?'], 8)
# Transfer for the Natman splits
# As originally created, A is the natural half and B is the man-made half.
# Note that in the paper, we switched the order to make it less confusing.
# The natural dataset (here: A, in paper: B) performs better, perhaps due to easier task or less classes
results_nm_base = load_globbed_results(['*halfnatman?'], 1)
results_nm_transfer = load_globbed_results(['*transfernatmanAB_1_?', '*transfernatmanBA_1_?'], 6)
# Reduced dataset size results
results_reduced = load_globbed_results(['reduced-????'], 4)
Loading results from ['*half??'] loaded 8 results ../results/half0A ../results/half0B ../results/half1A ../results/half1B ../results/half2A ../results/half2B ../results/half3A ../results/half3B Loading results from ['*transfer?A?A_1_?', '*transfer?B?B_1_?'] loaded 28 results ../results/transfer0A0A_1_1 ../results/transfer0A0A_1_2 ../results/transfer0A0A_1_3 ../results/transfer0A0A_1_4 ../results/transfer0A0A_1_5 ../results/transfer0A0A_1_6 ../results/transfer0A0A_1_7 ../results/transfer1A1A_1_1 ../results/transfer1A1A_1_2 ../results/transfer1A1A_1_3 ../results/transfer1A1A_1_4 ../results/transfer1A1A_1_5 ../results/transfer1A1A_1_6 ../results/transfer1A1A_1_7 ../results/transfer2A2A_1_1 ../results/transfer2A2A_1_2 ../results/transfer2A2A_1_3 ../results/transfer2A2A_1_4 ../results/transfer2A2A_1_5 ../results/transfer2A2A_1_6 ../results/transfer2A2A_1_7 ../results/transfer3A3A_1_1 ../results/transfer3A3A_1_2 ../results/transfer3A3A_1_3 ../results/transfer3A3A_1_4 ../results/transfer3A3A_1_5 ../results/transfer3A3A_1_6 ../results/transfer3A3A_1_7 Loading results from ['*transfer?A?B_1_?', '*transfer?B?A_1_?'] loaded 56 results ../results/transfer0A0B_1_1 ../results/transfer0A0B_1_2 ../results/transfer0A0B_1_3 ../results/transfer0A0B_1_4 ../results/transfer0A0B_1_5 ../results/transfer0A0B_1_6 ../results/transfer0A0B_1_7 ../results/transfer1A1B_1_1 ../results/transfer1A1B_1_2 ../results/transfer1A1B_1_3 ../results/transfer1A1B_1_4 ../results/transfer1A1B_1_5 ../results/transfer1A1B_1_6 ../results/transfer1A1B_1_7 ../results/transfer2A2B_1_1 ../results/transfer2A2B_1_2 ../results/transfer2A2B_1_3 ../results/transfer2A2B_1_4 ../results/transfer2A2B_1_5 ../results/transfer2A2B_1_6 ../results/transfer2A2B_1_7 ../results/transfer3A3B_1_1 ../results/transfer3A3B_1_2 ../results/transfer3A3B_1_3 ../results/transfer3A3B_1_4 ../results/transfer3A3B_1_5 ../results/transfer3A3B_1_6 ../results/transfer3A3B_1_7 ../results/transfer0B0A_1_1 ../results/transfer0B0A_1_2 ../results/transfer0B0A_1_3 ../results/transfer0B0A_1_4 ../results/transfer0B0A_1_5 ../results/transfer0B0A_1_6 ../results/transfer0B0A_1_7 ../results/transfer1B1A_1_1 ../results/transfer1B1A_1_2 ../results/transfer1B1A_1_3 ../results/transfer1B1A_1_4 ../results/transfer1B1A_1_5 ../results/transfer1B1A_1_6 ../results/transfer1B1A_1_7 ../results/transfer2B2A_1_1 ../results/transfer2B2A_1_2 ../results/transfer2B2A_1_3 ../results/transfer2B2A_1_4 ../results/transfer2B2A_1_5 ../results/transfer2B2A_1_6 ../results/transfer2B2A_1_7 ../results/transfer3B3A_1_1 ../results/transfer3B3A_1_2 ../results/transfer3B3A_1_3 ../results/transfer3B3A_1_4 ../results/transfer3B3A_1_5 ../results/transfer3B3A_1_6 ../results/transfer3B3A_1_7 Loading results from ['*transfer-ft?A?A_1_?', '*transfer-ft?B?B_1_?'] loaded 14 results ../results/transfer-ft0A0A_1_1 ../results/transfer-ft0A0A_1_2 ../results/transfer-ft0A0A_1_3 ../results/transfer-ft0A0A_1_4 ../results/transfer-ft0A0A_1_5 ../results/transfer-ft0A0A_1_6 ../results/transfer-ft0A0A_1_7 ../results/transfer-ft1A1A_1_1 ../results/transfer-ft1A1A_1_2 ../results/transfer-ft1A1A_1_3 ../results/transfer-ft1A1A_1_4 ../results/transfer-ft1A1A_1_5 ../results/transfer-ft1A1A_1_6 ../results/transfer-ft1A1A_1_7 Loading results from ['*transfer-ft?A?B_1_?', '*transfer-ft?B?A_1_?'] loaded 28 results ../results/transfer-ft0A0B_1_1 ../results/transfer-ft0A0B_1_2 ../results/transfer-ft0A0B_1_3 ../results/transfer-ft0A0B_1_4 ../results/transfer-ft0A0B_1_5 ../results/transfer-ft0A0B_1_6 ../results/transfer-ft0A0B_1_7 ../results/transfer-ft1A1B_1_1 ../results/transfer-ft1A1B_1_2 ../results/transfer-ft1A1B_1_3 ../results/transfer-ft1A1B_1_4 ../results/transfer-ft1A1B_1_5 ../results/transfer-ft1A1B_1_6 ../results/transfer-ft1A1B_1_7 ../results/transfer-ft0B0A_1_1 ../results/transfer-ft0B0A_1_2 ../results/transfer-ft0B0A_1_3 ../results/transfer-ft0B0A_1_4 ../results/transfer-ft0B0A_1_5 ../results/transfer-ft0B0A_1_6 ../results/transfer-ft0B0A_1_7 ../results/transfer-ft1B1A_1_1 ../results/transfer-ft1B1A_1_2 ../results/transfer-ft1B1A_1_3 ../results/transfer-ft1B1A_1_4 ../results/transfer-ft1B1A_1_5 ../results/transfer-ft1B1A_1_6 ../results/transfer-ft1B1A_1_7 Loading results from ['*halfnatman?'] loaded 2 results ../results/halfnatmanA ../results/halfnatmanB Loading results from ['*transfernatmanAB_1_?', '*transfernatmanBA_1_?'] loaded 14 results ../results/transfernatmanAB_1_1 ../results/transfernatmanAB_1_2 ../results/transfernatmanAB_1_3 ../results/transfernatmanAB_1_4 ../results/transfernatmanAB_1_5 ../results/transfernatmanAB_1_6 ../results/transfernatmanAB_1_7 ../results/transfernatmanBA_1_1 ../results/transfernatmanBA_1_2 ../results/transfernatmanBA_1_3 ../results/transfernatmanBA_1_4 ../results/transfernatmanBA_1_5 ../results/transfernatmanBA_1_6 ../results/transfernatmanBA_1_7 Loading results from ['reduced-????'] loaded 12 results ../results/reduced-0001 ../results/reduced-0002 ../results/reduced-0005 ../results/reduced-0010 ../results/reduced-0025 ../results/reduced-0050 ../results/reduced-0100 ../results/reduced-0250 ../results/reduced-0500 ../results/reduced-0750 ../results/reduced-1000 ../results/reduced-1300
last_n = 10
mb_base = get_mean_best(results_base, last_n)
mb_selffer = get_mean_best(results_selffer, last_n)
mb_transfer = get_mean_best(results_transfer, last_n)
mb_selffer_ft = get_mean_best(results_selffer_ft, last_n)
mb_transfer_ft = get_mean_best(results_transfer_ft, last_n)
mb_nm_base = get_mean_best(results_nm_base, last_n)
mb_nm_transfer = get_mean_best(results_nm_transfer, last_n)
# Split by target A vs B
results_nm_base_A = dict((k, v) for k, v in results_nm_base.iteritems() if k == 'A')
results_nm_base_B = dict((k, v) for k, v in results_nm_base.iteritems() if k == 'B')
results_nm_transfer_A = dict((k, v) for k, v in results_nm_transfer.iteritems() if k[1] == 'A') # transfer TO A
results_nm_transfer_B = dict((k, v) for k, v in results_nm_transfer.iteritems() if k[1] == 'B') # transfer TO B
mb_nm_base_A = get_mean_best(results_nm_base_A, last_n)
mb_nm_base_B = get_mean_best(results_nm_base_B, last_n)
mb_nm_transfer_A = get_mean_best(results_nm_transfer_A, last_n)
mb_nm_transfer_B = get_mean_best(results_nm_transfer_B, last_n)
# Reduced results
mb_reduced = get_mean_best_single(results_reduced, last_n)
figsize(15,8)
hold(True)
handles = {}
plot(0*mb_base[0]['valid_top1_acc'],
mb_base[0]['valid_top1_acc'],
'o', color = 'k', markersize=10)
for layer in range(1,8):
vals = mb_selffer[layer]['valid_top1_acc']
plot(layer + 0*vals, vals, 'o', color = 'b', markersize=10)
for layer in range(1,8):
vals = mb_transfer[layer]['valid_top1_acc']
plot(layer + 0*vals + .1, vals, 'o', color = 'r', markersize=10)
figsize(15,8)
rcParams.update({'font.size': 16})
msize = 14
msized = 12
msizep = 8
hh = {}
hh['k'],=plot(.0 + 0*mb_base[0]['valid_top1_acc'],
mb_base[0]['valid_top1_acc'],
'o', color = 'w', mew=2, markersize=msize)
for layer in range(1,8):
vals = mb_selffer[layer]['valid_top1_acc']
hh['b'],=plot(-.25+layer + 0*vals, vals, 'o', color = 'b', markersize=msize)
for layer in range(1,8):
vals = mb_transfer[layer]['valid_top1_acc']
hh['r'],=plot(.09+layer + 0*vals, vals, 'D', color = 'r', markersize=msized)
for layer in range(1,8):
vals = mb_selffer_ft[layer]['valid_top1_acc']
hh['bft1'],=plot(-.09+layer + 0*vals, vals, 'o', color = '#aaaaff', markersize=msize)
hh['bft2'],=plot(-.09+layer + 0*vals, vals, '+', color = 'k', mew=2, markersize=msizep)
for layer in range(1,8):
vals = mb_transfer_ft[layer]['valid_top1_acc']
hh['rft1'],=plot(.27+layer + 0*vals, vals, 'D', color = '#ffaaaa', markersize=msized)
hh['rft2'],=plot(.27+layer + 0*vals, vals, ls='', marker='+', color = 'k', mew=2, markersize=msizep)
ax=axis()
axis((-.2, 7.3) + ax[2:4])
print 'axis is', axis()
legend([hh['k'], hh['b'], (hh['bft1'],hh['bft2']), hh['r'], (hh['rft1'],hh['rft2'])],
['baseB', 'selffer BnB', 'selffer BnB$^+$', 'transfer AnB', 'transfer AnB$^+$'],
loc='lower left', numpoints=1)
#xlabel('n - layer at which network is chopped and retrained', fontsize=16)
ylabel('Top-1 accuracy (higher is better)')
if plot_dir != None:
savefig('%s/result_transfer.pdf' % plot_dir)
axis is (-0.20000000000000001, 7.2999999999999998, 0.52000000000000002, 0.66000000000000003)
# Now plot version with just lines
def getline(mb):
return [mb_base[0]['valid_top1_acc'].mean()] + [mb[ll]['valid_top1_acc'].mean() for ll in range(1,8)]
def just_lines(with_text):
figsize(15,8)
hh = {}
base_mean = mb_base[0]['valid_top1_acc'].mean()
hh['k'], =plot([0,7], [base_mean,base_mean], '--', color='k', markersize=10, lw=2)
hh['b'], =plot(range(8), getline(mb_selffer), color='b', lw=2)
hh['r'],=plot(range(8), getline(mb_transfer), color='r', lw=2)
hh['bft'], =plot(range(8), getline(mb_selffer_ft), color='#aaaaff', lw=2)
hh['rft'],=plot(range(8), getline(mb_transfer_ft), color='#ffaaaa', lw=2)
plot(0, base_mean, 'o', color='w', mew=2, markersize=msize)
fill_between(range(8), base_mean, getline(mb_transfer_ft), color='#ffeeee')
fill_between(range(8), base_mean, getline(mb_selffer), color='#eeeeff')
fill_between(range(8), getline(mb_selffer), getline(mb_transfer), color='#ffcccc')
if with_text:
alternate=True
if alternate:
text(2.5,.649,'5: Transfer + fine-tuning improves generalization', fontsize=20)
text(2.5,.632,'3: Fine-tuning recovers co-adapted interactions', fontsize=20)
else:
text(2.5,.633,'5: Transfer + fine-tuning helps generalization', fontsize=20)
if alternate:
text(4.7,.606,'2: Performance drops\n due to fragile\n co-adaptation', ha='center', fontsize=20)
else:
text(4.8,.603,'2: Co-adapted neural\ninteractions\nlost', ha='center', fontsize=20)
if alternate:
text(6.9,.583,'4: Performance\ndrops due to\n representation\nspecificity', ha='right', fontsize=20)
else:
text(5.3,.586,'4: Representation\n specificity', fontsize=20)
ax=axis()
axis((-.2, 7.3) + ax[2:3] + (ax[3]-.005,))
#legend([hh['k'], hh['b'], hh['bft'], hh['r'], hh['rft']],
# ['baseA', 'AnA', 'AnA+', 'BnA', 'BnA+'],
# loc='lower left')
xlabel('Layer $n$ at which network is chopped and retrained', fontsize=16)
ylabel('Top-1 accuracy (higher is better)')
just_lines(with_text = True)
if plot_dir != None:
savefig('%s/result_transfer_lines.pdf' % plot_dir)
just_lines(with_text = False)
if plot_dir != None:
savefig('%s/result_transfer_lines_notext.pdf' % plot_dir)
def just_lines_incremental(step = 0):
figsize(15,8)
hh = {}
msize = 14
msized = 12
msizep = 8
base_mean = mb_base[0]['valid_top1_acc'].mean()
if step in (1,2,3,4,5,6):
hh['kd'], =plot([0,7], [base_mean,base_mean], '--', color='k', markersize=10, lw=2)
hh['b'], =plot(range(8), getline(mb_selffer), color='b', lw=2)
hh['r'],=plot(range(8), getline(mb_transfer), color='r', lw=2)
if step in (4,5,6):
hh['bft'], =plot(range(8), getline(mb_selffer_ft), color='#aaaaff', lw=2)
if step in (6,):
hh['rft'],=plot(range(8), getline(mb_transfer_ft), color='#ffaaaa', lw=2)
if step in (2,3,4,5,6):
hh['k'], = plot(0, base_mean, 'o', color='w', mew=2, markersize=msize)
if step in (6,):
fill_between(range(8), base_mean, getline(mb_transfer_ft), color='#ffeeee')
if step in (1,2,3,4,5,6):
fill_between(range(8), base_mean, getline(mb_selffer), color='#eeeeff')
fill_between(range(8), getline(mb_selffer), getline(mb_transfer), color='#ffcccc')
if step in (0,1):
hh['k'],=plot(.0 + 0*mb_base[0]['valid_top1_acc'],
mb_base[0]['valid_top1_acc'],
'o', color = 'w', mew=2, markersize=msize)
if step in (0,1):
for layer in range(1,8):
vals = mb_selffer[layer]['valid_top1_acc']
hh['b'],=plot(-.09+layer + 0*vals, vals, 'o', color = 'b', markersize=msize)
for layer in range(1,8):
vals = mb_transfer[layer]['valid_top1_acc']
hh['r'],=plot(.09+layer + 0*vals, vals, 'D', color = 'r', markersize=msized)
if step in (3,):
for layer in range(1,8):
vals = mb_selffer_ft[layer]['valid_top1_acc']
hh['bft1'],=plot(-.09+layer + 0*vals, vals, 'o', color = '#aaaaff', markersize=msize)
hh['bft2'],=plot(-.09+layer + 0*vals, vals, '+', color = 'k', mew=2, markersize=msizep)
if step in (5,):
for layer in range(1,8):
vals = mb_transfer_ft[layer]['valid_top1_acc']
hh['rft1'],=plot(.09+layer + 0*vals, vals, 'D', color = '#ffaaaa', markersize=msized)
hh['rft2'],=plot(.09+layer + 0*vals, vals, ls='', marker='+', color = 'k', mew=2, markersize=msizep)
axis((-0.20000000000000001, 7.2999999999999998, 0.52000000000000002, 0.66000000000000003))
if step in (0,):
legend([hh['k'], hh['b'], hh['r']],
['baseB', 'selffer BnB', 'transfer AnB'],
loc='lower left', numpoints=1)
elif step in (1,):
legend([(hh['kd'],hh['k']), hh['b'], hh['r']],
['baseB', 'selffer BnB', 'transfer AnB'],
loc='lower left', numpoints=1)
elif step in (2,):
legend([(hh['kd'],hh['k']), hh['b'], hh['r']],
['baseB', 'selffer BnB', 'transfer AnB'],
loc='lower left', numpoints=1)
elif step in (3,):
legend([(hh['kd'],hh['k']), hh['b'], (hh['bft1'],hh['bft2']), hh['r']],
['baseB', 'selffer BnB', 'selffer BnB$^+$', 'transfer AnB'],
loc='lower left', numpoints=1)
elif step in (4,):
legend([(hh['kd'],hh['k']), hh['b'], hh['bft'], hh['r']],
['baseB', 'selffer BnB', 'selffer BnB$^+$', 'transfer AnB'],
loc='lower left', numpoints=1)
elif step in (5,):
legend([(hh['kd'],hh['k']), hh['b'], hh['bft'], hh['r'], (hh['rft1'],hh['rft2'])],
['baseB', 'selffer BnB', 'selffer BnB$^+$', 'transfer AnB', 'transfer AnB$^+$'],
loc='lower left', numpoints=1)
else:
legend([(hh['kd'],hh['k']), hh['b'], hh['bft'], hh['r'], hh['rft']],
['baseB', 'selffer BnB', 'selffer BnB$^+$', 'transfer AnB', 'transfer AnB$^+$'],
loc='lower left', numpoints=1)
ylabel('Top-1 accuracy (higher is better)')
xlabel('Layer $n$ at which network is chopped and retrained', fontsize=16)
if plot_dir != None:
savefig('%s/result_transfer_pres_%02d.pdf' % (plot_dir, step))
just_lines_incremental(0)
just_lines_incremental(1)
just_lines_incremental(2)
just_lines_incremental(3)
just_lines_incremental(4)
just_lines_incremental(5)
just_lines_incremental(6)
result_base = load_results(results_root + 'netbase/*.[eo]*.gz')
print 'Iterations base:', result_base['valid_loss']['idx'].max()
result_nolearn_1_x = {}
for ii in range(1, 7+1):
result_nolearn_1_x[ii] = load_results(results_root + 'nolearn_1_%d/*.[eo]*.gz' % ii)
eg = result_nolearn_1_x[ii]['valid_loss']['idx']
print 'Iterations result_nolearn_1_x[%d]:' % ii, len(eg), eg.max()
print 'Available fields are:'
print result_base.keys()
Iterations base: 450000 Iterations result_nolearn_1_x[1]: 450 450000 Iterations result_nolearn_1_x[2]: 450 450000 Iterations result_nolearn_1_x[3]: 450 450000 Iterations result_nolearn_1_x[4]: 450 450000 Iterations result_nolearn_1_x[5]: 450 450000 Iterations result_nolearn_1_x[6]: 450 450000 Iterations result_nolearn_1_x[7]: 450 450000 Available fields are: ['train_loss', 'valid_top1_acc', 'valid_loss', 'lr', 'test_score_0', 'test_score_1']
train_loss = []
train_loss.append(mean_lowest(result_base['train_loss']))
train_loss.extend([mean_lowest(result_nolearn_1_x[ll]['train_loss']) for ll in range(1,8)])
valid_loss = []
valid_loss.append(mean_lowest(result_base['valid_loss']))
valid_loss.extend([mean_lowest(result_nolearn_1_x[ll]['valid_loss']) for ll in range(1,8)])
valid_acc = []
valid_acc.append(1-mean_highest(result_base['valid_top1_acc']))
valid_acc.extend([1-mean_highest(result_nolearn_1_x[ll]['valid_top1_acc']) for ll in range(1,8)])
figsize(5,3)
subplot(2,1,1)
plot(arange(8), train_loss, 'bo-')
plot(arange(8), valid_loss, 'ro-')
legend(('train loss', 'valid loss'), loc='best')
title('loss')
subplot(2,1,2)
plot(arange(8), valid_acc, 'ro-')
title('top1 err')
figure()
_=plot(arange(8), 1-array(valid_acc), 'ro-')
# Version with A and B separate
figsize(6,4)
vals = mb_nm_base_A[0]['valid_top1_acc']
plot(0*vals, vals,
'o', color = 'k', markersize=10)
for layer in range(1,8):
vals = mb_nm_transfer_A[layer]['valid_top1_acc']
plot(layer + 0*vals, vals, 'o', color = 'r', markersize=10)
vals = mb_nm_base_B[0]['valid_top1_acc']
plot(0*vals, vals,
's', color = 'k', markersize=10)
for layer in range(1,8):
vals = mb_nm_transfer_B[layer]['valid_top1_acc']
plot(layer + 0*vals, vals, 's', color = 'r', markersize=10)
ax=axis()
_=axis((-.2, 7.3) + ax[2:4])
Print exact accuracies
print 'Accuracy with target dataset "Man-made"'
print ' base ', mb_nm_base_B[0]['valid_top1_acc'][0]
for ii in range(1,8):
print ' layer', ii, mb_nm_transfer_B[ii]['valid_top1_acc'][0]
print 'Accuracy with target dataset "Natural"'
print ' base ', mb_nm_base_A[0]['valid_top1_acc'][0]
for ii in range(1,8):
print ' layer', ii, mb_nm_transfer_A[ii]['valid_top1_acc'][0]
Accuracy with target dataset "Man-made" base 0.537244 layer 1 0.5392359 layer 2 0.52142 layer 3 0.500754 layer 4 0.461626 layer 5 0.409802 layer 6 0.374248 layer 7 0.291372 Accuracy with target dataset "Natural" base 0.6533991 layer 1 0.6571475 layer 2 0.6505914 layer 3 0.6250239 layer 4 0.5823855 layer 5 0.5219 layer 6 0.48505 layer 7 0.399026
figsize(14,10)
# TOP LEFT
subplot2grid((3,2), (0,0))
clr='#ff8800'
vals_nmA=[]
vals_nmA.append((mb_nm_base_A[0]['valid_top1_acc']))
for layer in range(1,8):
vals_nmA.append((mb_nm_transfer_A[layer]['valid_top1_acc']))
vals_nmA = array(vals_nmA)
vals_nmB=[]
vals_nmB.append((mb_nm_base_B[0]['valid_top1_acc']))
for layer in range(1,8):
vals_nmB.append((mb_nm_transfer_B[layer]['valid_top1_acc']))
vals_nmB = array(vals_nmB)
plot(range(8), vals_nmA, '-', lw=2, color=clr)
plot(range(8), vals_nmB, '-', lw=2, color=clr)
vals = mb_nm_base_A[0]['valid_top1_acc']
plot(0*vals, vals,
'o', color = 'w', mew=2, markersize=msize)
for layer in range(1,8):
vals = mb_nm_transfer_A[layer]['valid_top1_acc']
plot(layer + 0*vals, vals, 'h', color = clr, markersize=msize)
vals = mb_nm_base_B[0]['valid_top1_acc']
plot(0*vals, vals,
'o', color = 'w', mew=2, markersize=msize)
for layer in range(1,8):
vals = mb_nm_transfer_B[layer]['valid_top1_acc']
plot(layer + 0*vals, vals, 'h', color = clr, markersize=msize)
ax=axis()
axis((-.3, 7.3) + ax[2:3] + (ax[3]+.001,))
title('Man-made/Natural split')
ylabel('Top-1 accuracy')
# TOP RIGHT
subplot2grid((3,2), (0,1))
plot(arange(0,8), 1-array(valid_acc)[0:8], '-', color='y', lw=2, markersize=msize)
plot(arange(0,1), 1-array(valid_acc)[0:1], 'o', color='w', mew=2, markersize=msize)
plot(arange(1,8), 1-array(valid_acc)[1:8], 'v', color='y', markersize=msize)
ax=axis()
axis((-.3, 7.3) + ax[2:3] + (ax[3]+.01,))
title('Random, untrained filters')
# BOTTOM
# Random
subplot2grid((3,2), (1,0), colspan=2, rowspan=2)
hh['r1'], = plot(arange(0,8), -(array(valid_acc)[0:]-array(valid_acc)[0]), 'y-', lw=2, markersize=msize)
hh['r2'], = plot(arange(1,8), -(array(valid_acc)[1:]-array(valid_acc)[0]), 'yv', lw=2, markersize=msize)
# Natman
vals=[]
vals.append((mb_nm_base_A[0]['valid_top1_acc'] + mb_nm_base_B[0]['valid_top1_acc']) / 2)
for layer in range(1,8):
vals.append((mb_nm_transfer_A[layer]['valid_top1_acc'] + mb_nm_transfer_B[layer]['valid_top1_acc']) / 2)
hh['nm1'], = plot(arange(0,8), array(vals)[0:]-array(vals)[0], '-', color = clr, lw=2, markersize=msize)
hh['nm2'], = plot(arange(1,8), array(vals)[1:]-array(vals)[0], 'h', color = clr, lw=2, markersize=msize)
# Random A/B
vals = getline(mb_transfer)
hh['s1'], = plot(range(0,8), vals[0:]-vals[0], '-', color='r', lw=2, markersize=msize)
hh['s2'], = plot(range(1,8), vals[1:]-vals[0], 'D', color='r', lw=2, markersize=msized)
# Zero
hh['o'], = plot(0, 0, 'ow', mew=2, markersize=msize)
ax=axis()
axis((-.2, 7.3, -.3, .03))
xlabel('Layer $n$ at which network is chopped and retrained', fontsize=16)
ylabel('Relative top-1 accuracy (higher is better)')
legend([hh['o'], (hh['s1'],hh['s2']), (hh['nm1'],hh['nm2']), (hh['r1'],hh['r2']),],
['reference', 'mean AnB, random splits', 'mean AnB, m/n split', 'random features'],
bbox_to_anchor=(.79,.39),
numpoints=1)
if plot_dir != None:
savefig('%s/result_random_nm_combined.pdf' % plot_dir)
reduced_sorted = sorted(mb_reduced.iteritems(), key=lambda key_value: key_value[0])
figsize(12,10)
# Reduced
reduced_n_examples = array([int(item[0]) for item in reduced_sorted])
reduced_acc = array([item[1]['valid_top1_acc'][0]for item in reduced_sorted])
for ii in reversed(range(len(reduced_n_examples))):
print '%05s' % reduced_n_examples[ii], reduced_acc[ii]
subplot2grid((2,1), (0,0))
plot(reduced_n_examples, reduced_acc, 'o-', lw=2, ms=msize)
ax=axis()
axis(ax[0:1] + (1350,) + ax[2:4])
ylabel('Top-1 accuracy')
xlabel('Number of examples per class (linear)')
subplot2grid((2,1), (1,0))
loglog(reduced_n_examples, reduced_acc, 'o-', lw=2, ms=msize)
ax=axis()
axis(ax[0:1] + (1350,) + ax[2:4])
ylabel('Top-1 accuracy')
xlabel('Number of examples per class (log)')
if plot_dir != None:
savefig('%s/result_reduced.pdf' % plot_dir)
1300 0.5517618 1000 0.5409419 750 0.5147 500 0.475682 250 0.384284 100 0.001106 50 0.00111 25 0.001078 10 0.001068 5 0.001084 2 0.004448 1 0.003796
To attempt to mitigate the effects of fragile co-adaptation, we can try initializing layers beyond the frozen layers with the originally found solution (the weights from the base model). Here we try one and two layers beyond. Of course, this solution only works if the subsequent layers of the target model are the same size as the base model, which may often not be the case.
# Selffer, first N layers frozen, last 8-N randomly initialized
bnb = array([0.61664, 0.61608, 0.61196, 0.597479, 0.58788, 0.610799, 0.6154])
# Transfer, as above
anb = array([0.635239, 0.631279, 0.628439, 0.6112, 0.58516, 0.59108, 0.5516])
# Selffer, first N layers frozen, 1 layer of weights copied from base and fine tuned, last 8-N-1 = 7-N randomly initialized
bnb_p1 = array([0.61944, 0.62212, 0.614279, 0.60448, 0.58748, 0.612, 0.61532])
# Transfer, as above
anb_p1 = array([0.6382, 0.638039, 0.63024, 0.61428, 0.587679, 0.59092, 0.55204])
# Selffer, first N layers frozen, 2 layers of weights copied from base and fine tuned, last 8-N-2 = 6-N randomly initialized
bnb_p2 = array([0.62848, 0.62164, 0.614, 0.6038, 0.585199, 0.61256])
# Transfer, as above
anb_p2 = array([0.645599, 0.640999, 0.62892, 0.61616, 0.58716, 0.590919])
to7 = range(1,8)
to6 = range(1,7)
plot(to7, bnb, 'b', lw=2)
plot(to7, bnb_p1, 'b--', lw=2)
plot(to6, bnb_p2, 'b:', lw=2)
plot(to7, anb, 'r', lw=2)
plot(to7, anb_p1, 'r--', lw=2)
plot(to6, anb_p2, 'r:', lw=2)
legend(('BnB', 'BnB preserve 1', 'BnB preserve 2', 'AnB', 'AnB preserve 1', 'AnB preserve 2'),
loc='lower left')
ylabel('accuracy')
xlabel('layer number')
title('transfer and selffer with frozen features plus 0, 1, or 2 layers of preseved\nfeatures above freeze point')
if plot_dir != None:
savefig('%s/result_preserved.pdf' % plot_dir)
Conclusion: at least with this training regimen, preserving 1 or 2 layers beyond the freeze point does not seem to prevent co-adaptation. However, a different training schedule might. For example, for N = 4, one could copy and freeze the first 6 layers, train the last two to convergence, and only then release the 5th and 6th layers, once again training to convergence. As mentioned above, this requires the target model to have the same layers sizes in the 5th and 6th layers, which will not always be desirable.
For more information and code, please visit http://yosinski.com/transfer