# Written by Litu Rout

import numpy as np
import matplotlib.pyplot as plt
import random

%matplotlib inline

plt.rcParams.update({'font.size': 20})

Prepare Paired-Training Data¶

path = './input/MNIST/'

x_train = np.load(path+'train_data.npy')
y_train = np.load(path+'train_label.npy')

x_test = np.load(path+'test_data.npy')
y_test = np.load(path+'test_label.npy')

x_train = x_train/255 - 0.5
x_test = x_test/255 - 0.5

x_train.shape, y_train.shape, x_test.shape, y_test.shape

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

X_train = x_train.reshape(60000,1,784)
X_test = x_test.reshape(10000,1,784)

X_train.shape, X_test.shape

((60000, 1, 784), (10000, 1, 784))

plt.imshow(x_train[0]+0.5,cmap='gray')
plt.title('Label: '+str(y_train[0]))

Text(0.5, 1.0, 'Label: 5')

Y_train = np.zeros((60000,10))
Y_train[np.arange(60000),y_train]=1
Y_train[0]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.])

plt.imshow(x_test[0]+0.5,cmap='gray')
plt.title('Label: '+str(y_test[0]))

Text(0.5, 1.0, 'Label: 7')

Y_test = np.zeros((10000,10))
Y_test[np.arange(10000),y_test]=1

Y_test[0]

array([0., 0., 0., 0., 0., 0., 0., 1., 0., 0.])

Build Function Approximator¶

import torch
import torch.optim as optim
import torch.nn as nn
from torchsummary import summary
import torch.nn.functional as F

hidden_units_id = 10

class Model(nn.Module):
    def __init__(self, in_features=784,hidden_units= 2**hidden_units_id,out_features=10):
        super(Model,self).__init__()
        self.in_features = in_features
        self.hidden_units = hidden_units
        self.out_features = out_features
        
        self.u = nn.Linear(in_features=self.in_features,out_features=self.hidden_units)
        self.v = nn.Linear(in_features=self.hidden_units,out_features=self.out_features)
        
    def forward(self,x):
        h = F.relu(self.u(x))
        y = self.v(h)
        return y

model = Model().cuda()
summary(model,(1,784))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Linear-1              [-1, 1, 1024]         803,840
            Linear-2                [-1, 1, 10]          10,250
================================================================
Total params: 814,090
Trainable params: 814,090
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.01
Params size (MB): 3.11
Estimated Total Size (MB): 3.12
----------------------------------------------------------------

X_train.shape,Y_train.shape

((60000, 1, 784), (60000, 10))

Start Training¶

# Hyper-parameters
learning_rate = 1e-2
epochs = 1001
batch_size = 200
iterations = X_train.shape[0]//batch_size

# Optimizer
opt = optim.SGD(model.parameters(),learning_rate)

# Loss function
loss = nn.MSELoss()

# Create a list to store error at interemediate steps
errors = []

# Training loop
for epoch in range(epochs):
    err = []
    for iteration in range(iterations):
        # fetch data
        batch_indexes = random.sample(range(X_train.shape[0]), batch_size)
        x_batch = torch.FloatTensor(X_train[batch_indexes]).cuda()
        y_batch = torch.FloatTensor(Y_train[batch_indexes]).unsqueeze(dim=1).cuda()
        # print(x_batch.shape, y_batch.shape)

        # define function approximator
        y_pred_batch = model(x_batch)
        # print(y_pred_batch.shape, y_batch.shape)

        # compute error
        error = loss(y_pred_batch,y_batch)

        # accumulate error at intermediat steps
        err.append(error.item())

        # zero previously computed gradients
        opt.zero_grad()

        # compute new gradients
        error.backward()

        # update parameters
        opt.step()

    # monitor learning progress
    errors.append(np.mean(err))
    if epoch%100==0:
        print("Epoch: ", epoch, "\tError: ", errors[-1])

Epoch:  0 	Error:  0.07821032625933488
Epoch:  100 	Error:  0.01855660560230414
Epoch:  200 	Error:  0.014260896059374015
Epoch:  300 	Error:  0.01235302557858328
Epoch:  400 	Error:  0.011114649654676517
Epoch:  500 	Error:  0.010151809410502513
Epoch:  600 	Error:  0.009391724673720698
Epoch:  700 	Error:  0.0087497780409952
Epoch:  800 	Error:  0.008373267337058982
Epoch:  900 	Error:  0.00784061488850663
Epoch:  1000 	Error:  0.007541161480670174

plt.plot(errors,'yd-')
plt.grid()
plt.xlabel('No. of Epochs')
plt.ylabel('Error')

Text(0, 0.5, 'Error')

Inference¶

x_test_indxs = np.arange(X_test.shape[0])
x_test_full = torch.FloatTensor(X_test[x_test_indxs])
y_test_full = np.zeros((X_test.shape[0],10))

test_batch = 200

for i in range(0,x_test_full.shape[0],test_batch):
    if i+test_batch>x_test_full.shape[0]:
        i_end = x_test_full.shape[0]
    else:
        i_end = i+test_batch
    y_test_full[i:i_end] = model(x_test_full[i:i_end].cuda()).squeeze(dim=1).cpu().detach().numpy()
y_test_full.shape

(10000, 10)

((Y_test-y_test_full)**2).mean()

0.00874489401692303

test_id = 11
y_test_full[test_id], np.argmax(y_test_full[test_id])

(array([ 0.01969169, -0.06020579, -0.05183589, -0.02231752, -0.01335832,
        -0.02508897,  0.87721282,  0.01019056,  0.08771875,  0.1387412 ]), 6)

Y_test[test_id], np.argmax(Y_test[test_id])

(array([0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]), 6)

plt.imshow(x_test[test_id]+0.5,cmap='gray')
plt.title('Predicted: '+str(np.argmax(y_test_full[test_id])))

Text(0.5, 1.0, 'Predicted: 6')