#!/usr/bin/env python
# coding: utf-8

# ## CIFAR 10

# In[2]:


get_ipython().run_line_magic('matplotlib', 'inline')
get_ipython().run_line_magic('reload_ext', 'autoreload')
get_ipython().run_line_magic('autoreload', '2')

from fastai.conv_learner import *
PATH = 'data/cifar/'
os.makedirs(PATH, exist_ok=True)


# You can get the data via:
# 
#     wget http://pjreddie.com/media/files/cifar.tgz

# In[3]:


classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
stats = (np.array([ 0.4914 ,  0.48216,  0.44653]), np.array([ 0.24703,  0.24349,  0.26159]))


# In[4]:


def to_label_subdirs(path, subdirs, classes, labelfn):
    for sd in subdirs:
        for rf in os.listdir(os.path.join(path, sd)):
            af = os.path.join(path, sd, rf)
            if not os.path.isfile(af):
                continue
            lb = labelfn(rf)
            if not lb:
                continue
            os.renames(af, os.path.join(path, sd, lb, rf))


# In[5]:


to_label_subdirs(PATH, 'train test'.split(), classes, lambda f: f[f.find('_') + 1 : f.find('.')])


# In[6]:


def get_data(sz,bs):
    tfms = tfms_from_stats(stats, sz, aug_tfms=[RandomFlip()], pad=sz // 8)
    return ImageClassifierData.from_paths(PATH, val_name='test', tfms=tfms, bs=bs)


# In[7]:


bs=256


# ### Look at data

# In[8]:


data = get_data(32, 4)


# In[9]:


x, y = next(iter(data.trn_dl))


# In[10]:


plt.imshow(data.trn_ds.denorm(x)[0])


# In[11]:


plt.imshow(data.trn_ds.denorm(x)[1])


# ## Fully connected model

# In[12]:


data = get_data(32, bs)


# In[13]:


lr = 1e-2


# From [this notebook](https://github.com/KeremTurgutlu/deeplearning/blob/master/Exploring%20Optimizers.ipynb) by our student Kerem Turgutlu:

# In[14]:


class SimpleNet(nn.Module):
    def __init__(self, layers):
        super().__init__()
        self.layers = nn.ModuleList([
            nn.Linear(layers[i], layers[i + 1]) for i in range(len(layers) - 1)])
        
    def forward(self, x):
        x = x.view(x.size(0), -1)
        for l in self.layers:
            l_x = l(x)
            x = F.relu(l_x)
        return F.log_softmax(l_x, dim=-1)


# In[15]:


learn = ConvLearner.from_model_data(SimpleNet([32*32*3, 40, 10]), data)


# In[16]:


learn, [o.numel() for o in learn.model.parameters()]


# In[17]:


learn.summary()


# In[18]:


learn.lr_find()


# In[19]:


learn.sched.plot()


# In[20]:


get_ipython().run_line_magic('time', 'learn.fit(lr, 2)')


# In[21]:


get_ipython().run_line_magic('time', 'learn.fit(lr, 2, cycle_len=1)')


# ## CNN

# In[22]:


class ConvNet(nn.Module):
    def __init__(self, layers, c):
        super().__init__()
        self.layers = nn.ModuleList([
            nn.Conv2d(layers[i], layers[i + 1], kernel_size=3, stride=2)
            for i in range(len(layers) - 1)])
        self.pool = nn.AdaptiveMaxPool2d(1)
        self.out = nn.Linear(layers[-1], c)
        
    def forward(self, x):
        for l in self.layers: x = F.relu(l(x))
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        return F.log_softmax(self.out(x), dim=-1)


# In[23]:


learn = ConvLearner.from_model_data(ConvNet([3, 20, 40, 80], 10), data)


# In[24]:


learn.summary()


# In[25]:


learn.lr_find(end_lr=100)


# In[26]:


learn.sched.plot()


# In[27]:


get_ipython().run_line_magic('time', 'learn.fit(1e-1, 2)')


# In[28]:


get_ipython().run_line_magic('time', 'learn.fit(1e-1, 4, cycle_len=1)')


# ## Refactored

# In[29]:


class ConvLayer(nn.Module):
    def __init__(self, ni, nf):
        super().__init__()
        self.conv = nn.Conv2d(ni, nf, kernel_size=3, stride=2, padding=1)
        
    def forward(self, x): return F.relu(self.conv(x))


# In[30]:


class ConvNet2(nn.Module):
    def __init__(self, layers, c):
        super().__init__()
        self.layers = nn.ModuleList([ConvLayer(layers[i], layers[i + 1])
            for i in range(len(layers) - 1)])
        self.out = nn.Linear(layers[-1], c)
        
    def forward(self, x):
        for l in self.layers: x = l(x)
        x = F.adaptive_max_pool2d(x, 1)
        x = x.view(x.size(0), -1)
        return F.log_softmax(self.out(x), dim=-1)


# In[31]:


learn = ConvLearner.from_model_data(ConvNet2([3, 20, 40, 80], 10), data)


# In[32]:


learn.summary()


# In[33]:


get_ipython().run_line_magic('time', 'learn.fit(1e-1, 2)')


# In[34]:


get_ipython().run_line_magic('time', 'learn.fit(1e-1, 2, cycle_len=1)')


# ## BatchNorm

# In[35]:


class BnLayer(nn.Module):
    def __init__(self, ni, nf, stride=2, kernel_size=3):
        super().__init__()
        self.conv = nn.Conv2d(ni, nf, kernel_size=kernel_size, stride=stride,
                              bias=False, padding=1)
        self.a = nn.Parameter(torch.zeros(nf, 1, 1))
        self.m = nn.Parameter(torch.ones(nf, 1, 1))
        
    def forward(self, x):
        x = F.relu(self.conv(x))
        x_chan = x.transpose(0, 1).contiguous().view(x.size(1), -1)
        if self.training:
            self.means = x_chan.mean(1)[:, None, None]
            self.stds  = x_chan.std (1)[:, None, None]
        return (x-self.means) / self.stds * self.m + self.a


# In[36]:


class ConvBnNet(nn.Module):
    def __init__(self, layers, c):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 10, kernel_size=5, stride=1, padding=2)
        self.layers = nn.ModuleList([BnLayer(layers[i], layers[i + 1])
            for i in range(len(layers) - 1)])
        self.out = nn.Linear(layers[-1], c)
        
    def forward(self, x):
        x = self.conv1(x)
        for l in self.layers: x = l(x)
        x = F.adaptive_max_pool2d(x, 1)
        x = x.view(x.size(0), -1)
        return F.log_softmax(self.out(x), dim=-1)


# In[37]:


learn = ConvLearner.from_model_data(ConvBnNet([10, 20, 40, 80, 160], 10), data)


# In[38]:


learn.summary()


# In[39]:


get_ipython().run_line_magic('time', 'learn.fit(3e-2, 2)')


# In[40]:


get_ipython().run_line_magic('time', 'learn.fit(1e-1, 4, cycle_len=1)')


# ## Deep BatchNorm

# In[41]:


class ConvBnNet2(nn.Module):
    def __init__(self, layers, c):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 10, kernel_size=5, stride=1, padding=2)
        self.layers = nn.ModuleList([BnLayer(layers[i], layers[i + 1])
            for i in range(len(layers) - 1)])
        self.layers2 = nn.ModuleList([BnLayer(layers[i + 1], layers[i + 1], 1)
            for i in range(len(layers) - 1)])
        self.out = nn.Linear(layers[-1], c)
        
    def forward(self, x):
        x = self.conv1(x)
        for l,l2 in zip(self.layers, self.layers2):
            x = l(x)
            x = l2(x)
        x = F.adaptive_max_pool2d(x, 1)
        x = x.view(x.size(0), -1)
        return F.log_softmax(self.out(x), dim=-1)


# In[42]:


learn = ConvLearner.from_model_data(ConvBnNet2([10, 20, 40, 80, 160], 10), data)


# In[43]:


get_ipython().run_line_magic('time', 'learn.fit(1e-2, 2)')


# In[44]:


get_ipython().run_line_magic('time', 'learn.fit(1e-2, 2, cycle_len=1)')


# ## Resnet

# In[45]:


class ResnetLayer(BnLayer):
    def forward(self, x): return x + super().forward(x)


# In[46]:


class Resnet(nn.Module):
    def __init__(self, layers, c):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 10, kernel_size=5, stride=1, padding=2)
        self.layers = nn.ModuleList([BnLayer(layers[i], layers[i + 1])
            for i in range(len(layers) - 1)])
        self.layers2 = nn.ModuleList([ResnetLayer(layers[i + 1], layers[i + 1], 1)
            for i in range(len(layers) - 1)])
        self.layers3 = nn.ModuleList([ResnetLayer(layers[i + 1], layers[i + 1], 1)
            for i in range(len(layers) - 1)])
        self.out = nn.Linear(layers[-1], c)
        
    def forward(self, x):
        x = self.conv1(x)
        for l,l2,l3 in zip(self.layers, self.layers2, self.layers3):
            x = l3(l2(l(x)))
        x = F.adaptive_max_pool2d(x, 1)
        x = x.view(x.size(0), -1)
        return F.log_softmax(self.out(x), dim=-1)


# In[47]:


learn = ConvLearner.from_model_data(Resnet([10, 20, 40, 80, 160], 10), data)


# In[48]:


wd = 1e-5


# In[49]:


get_ipython().run_line_magic('time', 'learn.fit(1e-2, 2, wds=wd)')


# In[50]:


get_ipython().run_line_magic('time', 'learn.fit(1e-2, 3, cycle_len=1, cycle_mult=2, wds=wd)')


# In[51]:


get_ipython().run_line_magic('time', 'learn.fit(1e-2, 4, cycle_len=4, wds=wd)')


# ## Resnet 2

# In[52]:


class Resnet2(nn.Module):
    def __init__(self, layers, c, p=0.5):
        super().__init__()
        self.conv1 = BnLayer(3, 16, stride=1, kernel_size=7)
        self.layers = nn.ModuleList([BnLayer(layers[i], layers[i + 1])
            for i in range(len(layers) - 1)])
        self.layers2 = nn.ModuleList([ResnetLayer(layers[i + 1], layers[i + 1], 1)
            for i in range(len(layers) - 1)])
        self.layers3 = nn.ModuleList([ResnetLayer(layers[i + 1], layers[i + 1], 1)
            for i in range(len(layers) - 1)])
        self.out = nn.Linear(layers[-1], c)
        self.drop = nn.Dropout(p)
        
    def forward(self, x):
        x = self.conv1(x)
        for l,l2,l3 in zip(self.layers, self.layers2, self.layers3):
            x = l3(l2(l(x)))
        x = F.adaptive_max_pool2d(x, 1)
        x = x.view(x.size(0), -1)
        x = self.drop(x)
        return F.log_softmax(self.out(x), dim=-1)


# In[53]:


learn = ConvLearner.from_model_data(Resnet2([16, 32, 64, 128, 256], 10, 0.2), data)


# In[54]:


wd = 1e-6


# In[55]:


get_ipython().run_line_magic('time', 'learn.fit(1e-2, 2, wds=wd)')


# In[56]:


get_ipython().run_line_magic('time', 'learn.fit(1e-2, 3, cycle_len=1, cycle_mult=2, wds=wd)')


# In[57]:


get_ipython().run_line_magic('time', 'learn.fit(1e-2, 4, cycle_len=4, wds=wd)')