import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import matplotlib as mpl
%config InlineBackend.figure_format = 'retina'
%config InlineBackend.print_figure_kwargs={'facecolor' : "w"}
This notebook is inspired by the Andrew Ng’s amazing Coursera course on Deep learning. The dataset we will be using the train the model on is the MNIST dataset which one of the default datasets in PyTorch.
= 'cpu' #torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device print(device)
cpu
#Import MNIST dataset
= torchvision.datasets.MNIST(root='data/',
train_dataset =True,
train=torchvision.transforms.ToTensor(),
transform=True)
download
= torchvision.datasets.MNIST(root='data/',
val_dataset =False,
train=torchvision.transforms.ToTensor(),
transform=True)
download= train_dataset[0]
input_tensor, label print('MNIST dataset with {} train data and {} test data'.format(len(train_dataset), len(val_dataset)))
print('Type of data in dataset: {} AND {}'.format(type(input_tensor), type(label)))
print('Input tensor image dimensions: {}'.format(input_tensor.shape))
MNIST dataset with 60000 train data and 10000 test data
Type of data in dataset: <class 'torch.Tensor'> AND <class 'int'>
Input tensor image dimensions: torch.Size([1, 28, 28])
#Model hyper-parameters for the fully connected Neural network
= 784 # Image input for the digits - 28 x 28 x 1 (W-H-C) -- flattened in the end before being fed in the NN
input_size = 1
num_hidden_layers = 50
hidden_layer_size = 10
num_classes = 50
num_epochs = 64
batch_size = 10e-4 learning_rate
#Convert dataset to a dataloader class for ease of doing batching and SGD operations
from torch.utils.data import Dataset, DataLoader
= DataLoader(dataset = train_dataset,
train_loader = batch_size,
batch_size =True,
shuffle= 2)
num_workers
= DataLoader(dataset = val_dataset,
test_loader = batch_size,
batch_size = 2)
num_workers
#Take a look at one batch
= iter(train_loader)
examples = examples.next()
samples, labels print(samples.shape, labels.shape)
#Plotting first 4 digits in the dataset:
for i in range(4):
2, 2, i+1)
plt.subplot(0], cmap=mpl.cm.binary, interpolation="nearest")
plt.imshow(samples[i]['Digit:{}'.format(labels[i]))
plt.title("off"); plt.axis(
torch.Size([64, 1, 28, 28]) torch.Size([64])
Above, we have defined a batch-size of 100 for the training dataset with the samples as seen here to be of size = 100 x 1 x 28 x 28
#Define a model
class NeuralNet(nn.Module):
def __init__(self, input_size, num_hidden_layers, hidden_layer_size, num_classes):
super(NeuralNet, self).__init__()
self.L1 = nn.Linear(in_features = input_size, out_features = hidden_layer_size)
self.relu = nn.ReLU()
self.num_hidden_layers = num_hidden_layers
if (self.num_hidden_layers-1) > 1:
self.L_hidden = nn.ModuleList( [nn.Linear(in_features = hidden_layer_size, out_features = hidden_layer_size) for _ in range(num_hidden_layers-1)] )
self.relu_hidden = nn.ModuleList( [nn.ReLU() for _ in range(num_hidden_layers-1)] )
else:
self.L2 = nn.Linear(in_features = hidden_layer_size, out_features = hidden_layer_size)
self.L_out = nn.Linear(in_features = hidden_layer_size, out_features = num_classes)
def forward(self, x):
= self.relu(self.L1(x))
out
if (self.num_hidden_layers-1) > 1:
for L_hidden, relu_hidden in zip(self.L_hidden, self.relu_hidden):
= relu_hidden(L_hidden(out))
out else:
= self.relu(self.L2(out))
out = self.L_out(out) #No softmax or cross-entropy activation just the output from linear transformation
out return out
= NeuralNet(input_size=input_size,
model =num_hidden_layers,
num_hidden_layers=hidden_layer_size,
hidden_layer_size=num_classes) num_classes
model
NeuralNet(
(L1): Linear(in_features=784, out_features=50, bias=True)
(relu): ReLU()
(L2): Linear(in_features=50, out_features=50, bias=True)
(L_out): Linear(in_features=50, out_features=10, bias=True)
)
CrossEntropyLoss
in Pytorch implementes Softmax activation and NLLLoss in one class.
#Loss and optimizer
= nn.CrossEntropyLoss() #This is implement softmax activation for us so it is not implemented in the model
criterion = torch.optim.Adam(model.parameters(), lr=learning_rate)
optimizer
#Training loop
= len(train_loader)
total_batches = []
losses = []
epochs for epoch in range(num_epochs):
for i, (image_tensors, labels) in enumerate(train_loader):
= 0
running_loss = 0
batch_count
#image tensor = 100, 1, 28, 28 --> 100, 784 input needed
= image_tensors.view(-1,28*28).to(device)
image_input_to_NN = labels.to(device)
labels
#Forward pass
= model(image_input_to_NN)
outputs = criterion(outputs, labels)
loss
+= loss.item()
running_loss += 1
batch_count
#Backward
#Detach and flush the gradients
optimizer.zero_grad() #Backward gradients evaluation
loss.backward() #To update the weights/parameters in the NN
optimizer.step()
if (epoch) % 10 == 0 and (i+1) % 500 == 0:
print(f'epoch {epoch+1} / {num_epochs}, batch {i+1}/{total_batches}, loss = {loss.item():.4f}')
= running_loss / batch_count
loss_per_epoch
epochs.append(epoch) losses.append(loss_per_epoch)
epoch 1 / 50, batch 500/938, loss = 0.2568
epoch 11 / 50, batch 500/938, loss = 0.0431
epoch 21 / 50, batch 500/938, loss = 0.0141
epoch 31 / 50, batch 500/938, loss = 0.0032
epoch 41 / 50, batch 500/938, loss = 0.0518
= plt.subplots(1,1, figsize=(10,10))
fig, ax
ax.plot(epochs, losses)'Loss Curve (Training)')
plt.title('Epochs')
ax.set_xlabel('Loss Value') ax.set_ylabel(
Text(0, 0.5, 'Loss Value')
#Test
with torch.no_grad():
= 0
n_correct = 0
n_samples for images, labels in test_loader:
= images.view(-1, 28*28).to(device)
images = labels.to(device)
labels = model(images)
outputs
= torch.max(outputs, 1)
_, predictions += labels.shape[0]
n_samples += (predictions == labels).sum().item() #For each correction prediction we add the correct samples
n_correct = 100 * n_correct / n_samples
acc print(f'Accuracy = {acc:.2f}%')
Accuracy = 97.54%