In [1]:
# Written by Litu Rout
In [2]:
import numpy as np
import matplotlib.pyplot as plt
import random

%matplotlib inline
In [3]:
plt.rcParams.update({'font.size': 20})

Prepare Paired-Training Data

In [4]:
path = './input/MNIST/'

x_train = np.load(path+'train_data.npy')
y_train = np.load(path+'train_label.npy')

x_test = np.load(path+'test_data.npy')
y_test = np.load(path+'test_label.npy')

x_train = x_train/255 - 0.5
x_test = x_test/255 - 0.5

x_train.shape, y_train.shape, x_test.shape, y_test.shape
Out[4]:
((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))
In [5]:
X_train = x_train.reshape(60000,1,784)
X_test = x_test.reshape(10000,1,784)

X_train.shape, X_test.shape
Out[5]:
((60000, 1, 784), (10000, 1, 784))
In [6]:
plt.imshow(x_train[0]+0.5,cmap='gray')
plt.title('Label: '+str(y_train[0]))
Out[6]:
Text(0.5, 1.0, 'Label: 5')
In [7]:
Y_train = np.zeros((60000,10))
Y_train[np.arange(60000),y_train]=1
Y_train[0]
Out[7]:
array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.])
In [8]:
plt.imshow(x_test[0]+0.5,cmap='gray')
plt.title('Label: '+str(y_test[0]))
Out[8]:
Text(0.5, 1.0, 'Label: 7')
In [9]:
Y_test = np.zeros((10000,10))
Y_test[np.arange(10000),y_test]=1

Y_test[0]
Out[9]:
array([0., 0., 0., 0., 0., 0., 0., 1., 0., 0.])

Build Function Approximator

In [10]:
import torch
import torch.optim as optim
import torch.nn as nn
from torchsummary import summary
import torch.nn.functional as F
In [11]:
hidden_units_id = 10
In [12]:
class Model(nn.Module):
    def __init__(self, in_features=784,hidden_units= 2**hidden_units_id,out_features=10):
        super(Model,self).__init__()
        self.in_features = in_features
        self.hidden_units = hidden_units
        self.out_features = out_features
        
        self.u = nn.Linear(in_features=self.in_features,out_features=self.hidden_units)
        self.v = nn.Linear(in_features=self.hidden_units,out_features=self.out_features)
        
    def forward(self,x):
        h = F.relu(self.u(x))
        y = self.v(h)
        return y

model = Model().cuda()
summary(model,(1,784))
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Linear-1              [-1, 1, 1024]         803,840
            Linear-2                [-1, 1, 10]          10,250
================================================================
Total params: 814,090
Trainable params: 814,090
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.01
Params size (MB): 3.11
Estimated Total Size (MB): 3.12
----------------------------------------------------------------
In [13]:
X_train.shape,Y_train.shape
Out[13]:
((60000, 1, 784), (60000, 10))

Start Training

In [14]:
# Hyper-parameters
learning_rate = 1e-2
epochs = 1001
batch_size = 200
iterations = X_train.shape[0]//batch_size

# Optimizer
opt = optim.SGD(model.parameters(),learning_rate)

# Loss function
loss = nn.MSELoss()

# Create a list to store error at interemediate steps
errors = []

# Training loop
for epoch in range(epochs):
    err = []
    for iteration in range(iterations):
        # fetch data
        batch_indexes = random.sample(range(X_train.shape[0]), batch_size)
        x_batch = torch.FloatTensor(X_train[batch_indexes]).cuda()
        y_batch = torch.FloatTensor(Y_train[batch_indexes]).unsqueeze(dim=1).cuda()
        # print(x_batch.shape, y_batch.shape)

        # define function approximator
        y_pred_batch = model(x_batch)
        # print(y_pred_batch.shape, y_batch.shape)

        # compute error
        error = loss(y_pred_batch,y_batch)

        # accumulate error at intermediat steps
        err.append(error.item())

        # zero previously computed gradients
        opt.zero_grad()

        # compute new gradients
        error.backward()

        # update parameters
        opt.step()

    # monitor learning progress
    errors.append(np.mean(err))
    if epoch%100==0:
        print("Epoch: ", epoch, "\tError: ", errors[-1])
Epoch:  0 	Error:  0.07821032625933488
Epoch:  100 	Error:  0.01855660560230414
Epoch:  200 	Error:  0.014260896059374015
Epoch:  300 	Error:  0.01235302557858328
Epoch:  400 	Error:  0.011114649654676517
Epoch:  500 	Error:  0.010151809410502513
Epoch:  600 	Error:  0.009391724673720698
Epoch:  700 	Error:  0.0087497780409952
Epoch:  800 	Error:  0.008373267337058982
Epoch:  900 	Error:  0.00784061488850663
Epoch:  1000 	Error:  0.007541161480670174
In [15]:
plt.plot(errors,'yd-')
plt.grid()
plt.xlabel('No. of Epochs')
plt.ylabel('Error')
Out[15]:
Text(0, 0.5, 'Error')

Inference

In [16]:
x_test_indxs = np.arange(X_test.shape[0])
x_test_full = torch.FloatTensor(X_test[x_test_indxs])
y_test_full = np.zeros((X_test.shape[0],10))

test_batch = 200

for i in range(0,x_test_full.shape[0],test_batch):
    if i+test_batch>x_test_full.shape[0]:
        i_end = x_test_full.shape[0]
    else:
        i_end = i+test_batch
    y_test_full[i:i_end] = model(x_test_full[i:i_end].cuda()).squeeze(dim=1).cpu().detach().numpy()
y_test_full.shape
Out[16]:
(10000, 10)
In [17]:
((Y_test-y_test_full)**2).mean()
Out[17]:
0.00874489401692303
In [18]:
test_id = 11
y_test_full[test_id], np.argmax(y_test_full[test_id])
Out[18]:
(array([ 0.01969169, -0.06020579, -0.05183589, -0.02231752, -0.01335832,
        -0.02508897,  0.87721282,  0.01019056,  0.08771875,  0.1387412 ]), 6)
In [19]:
Y_test[test_id], np.argmax(Y_test[test_id])
Out[19]:
(array([0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]), 6)
In [20]:
plt.imshow(x_test[test_id]+0.5,cmap='gray')
plt.title('Predicted: '+str(np.argmax(y_test_full[test_id])))
Out[20]:
Text(0.5, 1.0, 'Predicted: 6')
In [ ]: