#!/usr/bin/env python # coding: utf-8 # ## CIFAR 10 # In[2]: get_ipython().run_line_magic('matplotlib', 'inline') get_ipython().run_line_magic('reload_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') from fastai.conv_learner import * PATH = 'data/cifar/' os.makedirs(PATH, exist_ok=True) # You can get the data via: # # wget http://pjreddie.com/media/files/cifar.tgz # In[3]: classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') stats = (np.array([ 0.4914 , 0.48216, 0.44653]), np.array([ 0.24703, 0.24349, 0.26159])) # In[4]: def to_label_subdirs(path, subdirs, classes, labelfn): for sd in subdirs: for rf in os.listdir(os.path.join(path, sd)): af = os.path.join(path, sd, rf) if not os.path.isfile(af): continue lb = labelfn(rf) if not lb: continue os.renames(af, os.path.join(path, sd, lb, rf)) # In[5]: to_label_subdirs(PATH, 'train test'.split(), classes, lambda f: f[f.find('_') + 1 : f.find('.')]) # In[6]: def get_data(sz,bs): tfms = tfms_from_stats(stats, sz, aug_tfms=[RandomFlip()], pad=sz // 8) return ImageClassifierData.from_paths(PATH, val_name='test', tfms=tfms, bs=bs) # In[7]: bs=256 # ### Look at data # In[8]: data = get_data(32, 4) # In[9]: x, y = next(iter(data.trn_dl)) # In[10]: plt.imshow(data.trn_ds.denorm(x)[0]) # In[11]: plt.imshow(data.trn_ds.denorm(x)[1]) # ## Fully connected model # In[12]: data = get_data(32, bs) # In[13]: lr = 1e-2 # From [this notebook](https://github.com/KeremTurgutlu/deeplearning/blob/master/Exploring%20Optimizers.ipynb) by our student Kerem Turgutlu: # In[14]: class SimpleNet(nn.Module): def __init__(self, layers): super().__init__() self.layers = nn.ModuleList([ nn.Linear(layers[i], layers[i + 1]) for i in range(len(layers) - 1)]) def forward(self, x): x = x.view(x.size(0), -1) for l in self.layers: l_x = l(x) x = F.relu(l_x) return F.log_softmax(l_x, dim=-1) # In[15]: learn = ConvLearner.from_model_data(SimpleNet([32*32*3, 40, 10]), data) # In[16]: learn, [o.numel() for o in learn.model.parameters()] # In[17]: learn.summary() # In[18]: learn.lr_find() # In[19]: learn.sched.plot() # In[20]: get_ipython().run_line_magic('time', 'learn.fit(lr, 2)') # In[21]: get_ipython().run_line_magic('time', 'learn.fit(lr, 2, cycle_len=1)') # ## CNN # In[22]: class ConvNet(nn.Module): def __init__(self, layers, c): super().__init__() self.layers = nn.ModuleList([ nn.Conv2d(layers[i], layers[i + 1], kernel_size=3, stride=2) for i in range(len(layers) - 1)]) self.pool = nn.AdaptiveMaxPool2d(1) self.out = nn.Linear(layers[-1], c) def forward(self, x): for l in self.layers: x = F.relu(l(x)) x = self.pool(x) x = x.view(x.size(0), -1) return F.log_softmax(self.out(x), dim=-1) # In[23]: learn = ConvLearner.from_model_data(ConvNet([3, 20, 40, 80], 10), data) # In[24]: learn.summary() # In[25]: learn.lr_find(end_lr=100) # In[26]: learn.sched.plot() # In[27]: get_ipython().run_line_magic('time', 'learn.fit(1e-1, 2)') # In[28]: get_ipython().run_line_magic('time', 'learn.fit(1e-1, 4, cycle_len=1)') # ## Refactored # In[29]: class ConvLayer(nn.Module): def __init__(self, ni, nf): super().__init__() self.conv = nn.Conv2d(ni, nf, kernel_size=3, stride=2, padding=1) def forward(self, x): return F.relu(self.conv(x)) # In[30]: class ConvNet2(nn.Module): def __init__(self, layers, c): super().__init__() self.layers = nn.ModuleList([ConvLayer(layers[i], layers[i + 1]) for i in range(len(layers) - 1)]) self.out = nn.Linear(layers[-1], c) def forward(self, x): for l in self.layers: x = l(x) x = F.adaptive_max_pool2d(x, 1) x = x.view(x.size(0), -1) return F.log_softmax(self.out(x), dim=-1) # In[31]: learn = ConvLearner.from_model_data(ConvNet2([3, 20, 40, 80], 10), data) # In[32]: learn.summary() # In[33]: get_ipython().run_line_magic('time', 'learn.fit(1e-1, 2)') # In[34]: get_ipython().run_line_magic('time', 'learn.fit(1e-1, 2, cycle_len=1)') # ## BatchNorm # In[35]: class BnLayer(nn.Module): def __init__(self, ni, nf, stride=2, kernel_size=3): super().__init__() self.conv = nn.Conv2d(ni, nf, kernel_size=kernel_size, stride=stride, bias=False, padding=1) self.a = nn.Parameter(torch.zeros(nf, 1, 1)) self.m = nn.Parameter(torch.ones(nf, 1, 1)) def forward(self, x): x = F.relu(self.conv(x)) x_chan = x.transpose(0, 1).contiguous().view(x.size(1), -1) if self.training: self.means = x_chan.mean(1)[:, None, None] self.stds = x_chan.std (1)[:, None, None] return (x-self.means) / self.stds * self.m + self.a # In[36]: class ConvBnNet(nn.Module): def __init__(self, layers, c): super().__init__() self.conv1 = nn.Conv2d(3, 10, kernel_size=5, stride=1, padding=2) self.layers = nn.ModuleList([BnLayer(layers[i], layers[i + 1]) for i in range(len(layers) - 1)]) self.out = nn.Linear(layers[-1], c) def forward(self, x): x = self.conv1(x) for l in self.layers: x = l(x) x = F.adaptive_max_pool2d(x, 1) x = x.view(x.size(0), -1) return F.log_softmax(self.out(x), dim=-1) # In[37]: learn = ConvLearner.from_model_data(ConvBnNet([10, 20, 40, 80, 160], 10), data) # In[38]: learn.summary() # In[39]: get_ipython().run_line_magic('time', 'learn.fit(3e-2, 2)') # In[40]: get_ipython().run_line_magic('time', 'learn.fit(1e-1, 4, cycle_len=1)') # ## Deep BatchNorm # In[41]: class ConvBnNet2(nn.Module): def __init__(self, layers, c): super().__init__() self.conv1 = nn.Conv2d(3, 10, kernel_size=5, stride=1, padding=2) self.layers = nn.ModuleList([BnLayer(layers[i], layers[i + 1]) for i in range(len(layers) - 1)]) self.layers2 = nn.ModuleList([BnLayer(layers[i + 1], layers[i + 1], 1) for i in range(len(layers) - 1)]) self.out = nn.Linear(layers[-1], c) def forward(self, x): x = self.conv1(x) for l,l2 in zip(self.layers, self.layers2): x = l(x) x = l2(x) x = F.adaptive_max_pool2d(x, 1) x = x.view(x.size(0), -1) return F.log_softmax(self.out(x), dim=-1) # In[42]: learn = ConvLearner.from_model_data(ConvBnNet2([10, 20, 40, 80, 160], 10), data) # In[43]: get_ipython().run_line_magic('time', 'learn.fit(1e-2, 2)') # In[44]: get_ipython().run_line_magic('time', 'learn.fit(1e-2, 2, cycle_len=1)') # ## Resnet # In[45]: class ResnetLayer(BnLayer): def forward(self, x): return x + super().forward(x) # In[46]: class Resnet(nn.Module): def __init__(self, layers, c): super().__init__() self.conv1 = nn.Conv2d(3, 10, kernel_size=5, stride=1, padding=2) self.layers = nn.ModuleList([BnLayer(layers[i], layers[i + 1]) for i in range(len(layers) - 1)]) self.layers2 = nn.ModuleList([ResnetLayer(layers[i + 1], layers[i + 1], 1) for i in range(len(layers) - 1)]) self.layers3 = nn.ModuleList([ResnetLayer(layers[i + 1], layers[i + 1], 1) for i in range(len(layers) - 1)]) self.out = nn.Linear(layers[-1], c) def forward(self, x): x = self.conv1(x) for l,l2,l3 in zip(self.layers, self.layers2, self.layers3): x = l3(l2(l(x))) x = F.adaptive_max_pool2d(x, 1) x = x.view(x.size(0), -1) return F.log_softmax(self.out(x), dim=-1) # In[47]: learn = ConvLearner.from_model_data(Resnet([10, 20, 40, 80, 160], 10), data) # In[48]: wd = 1e-5 # In[49]: get_ipython().run_line_magic('time', 'learn.fit(1e-2, 2, wds=wd)') # In[50]: get_ipython().run_line_magic('time', 'learn.fit(1e-2, 3, cycle_len=1, cycle_mult=2, wds=wd)') # In[51]: get_ipython().run_line_magic('time', 'learn.fit(1e-2, 4, cycle_len=4, wds=wd)') # ## Resnet 2 # In[52]: class Resnet2(nn.Module): def __init__(self, layers, c, p=0.5): super().__init__() self.conv1 = BnLayer(3, 16, stride=1, kernel_size=7) self.layers = nn.ModuleList([BnLayer(layers[i], layers[i + 1]) for i in range(len(layers) - 1)]) self.layers2 = nn.ModuleList([ResnetLayer(layers[i + 1], layers[i + 1], 1) for i in range(len(layers) - 1)]) self.layers3 = nn.ModuleList([ResnetLayer(layers[i + 1], layers[i + 1], 1) for i in range(len(layers) - 1)]) self.out = nn.Linear(layers[-1], c) self.drop = nn.Dropout(p) def forward(self, x): x = self.conv1(x) for l,l2,l3 in zip(self.layers, self.layers2, self.layers3): x = l3(l2(l(x))) x = F.adaptive_max_pool2d(x, 1) x = x.view(x.size(0), -1) x = self.drop(x) return F.log_softmax(self.out(x), dim=-1) # In[53]: learn = ConvLearner.from_model_data(Resnet2([16, 32, 64, 128, 256], 10, 0.2), data) # In[54]: wd = 1e-6 # In[55]: get_ipython().run_line_magic('time', 'learn.fit(1e-2, 2, wds=wd)') # In[56]: get_ipython().run_line_magic('time', 'learn.fit(1e-2, 3, cycle_len=1, cycle_mult=2, wds=wd)') # In[57]: get_ipython().run_line_magic('time', 'learn.fit(1e-2, 4, cycle_len=4, wds=wd)')