How to build a CNN with PyTorch

Convolutional neural networks (CNNs) have enhanced the field of computer vision, enabling machines to understand and interpret visual data with remarkable accuracy. PyTorch is a famous deep learning framework and a powerful platform for building and training models. In this Answer, we will build a simple CNN using PyTorch and train it using the MNIST dataset for handwritten digit recognition.

Getting started with `PyTorch`

We’ll first get PyTorch installed in our Python environment. We can install PyTorch using the following command:

# Define a custom class 
class SimpleCNN(nn.Module):
    def __init__(self):
        # Call the constructor
        super(SimpleCNN, self).__init__()
        # Define the first convolutional layer with its parameters
        self.conv_layer1 = nn.Conv2d(in_channels=1, out_channels=16, padding=1, kernel_size=3, stride=1)
        # Define the second convolutional layer with its parameters
        self.conv_layer2 = nn.Conv2d(in_channels=16, out_channels=32, padding=1, kernel_size=3, stride=1)
        # Define the first fully connected layer
        self.fc_layer1 = nn.Linear(32 * 7 * 7, 128)
        # Define the second fully connected layer
        self.fc_layer2 = nn.Linear(128, 10)
    def forward(self, x):
        # Apply ReLU to the output of the first conv layer
        x = torch.relu(self.conv_layer1(x))
        # Perform max pooling operation
        x = torch.max_pool2d(x, stride=2, kernel_size=2)
        # Apply ReLU to the output of the second conv layer
        x = torch.relu(self.conv_layer2(x)) 
        x = torch.max_pool2d(x, stride=2, kernel_size=2)
        # Flatten the output
        x = x.view(-1, 32 * 7 * 7)
        # Apply ReLU to the output of the first fully connected layer
        x = torch.relu(self.fc_layer1(x))
        # Apply the second fully connected layer
        x = self.fc_layer2(x)
        return x

num_epochs = 5
# Iterate over each epoch
for epoch in range(num_epochs):
    running_loss = 0.0
    
    # Iterate over batches 
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = net(inputs)   # Forward pass
        loss = criterion(outputs, labels)  # Calculate the loss
        loss.backward() # Backward pass
        optimizer.step() # Update the parameters (weights)
        running_loss += loss.item()
        if i % 100 == 99:  # Print every 100 mini-batches
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0
print('Training completed successfully.')

# Evaluate the model
correct = 0 # Initialize variables 
total = 0
# Disable gradient computation for efficiency
with torch.no_grad():
    # Iterate over the batches
    for images, labels in train_loader:
        
        # Forward pass
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        
        # Count the number of correct predictions 
        correct += (predicted == labels).sum().item()
# Calculate the accuracy
accuracy = 100 * correct / total
print('Accuracy of the model on the train dataset: {:.2f}%'.format(accuracy))

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Define a CNN model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv_layer1 = nn.Conv2d(in_channels=1, out_channels=16, padding=1, kernel_size=3, stride=1)
        self.conv_layer2 = nn.Conv2d(in_channels=16, out_channels=32, padding=1, kernel_size=3, stride=1)
        self.fc_layer1 = nn.Linear(32 * 7 * 7, 128)
        self.fc_layer2 = nn.Linear(128, 10)

    def forward(self, x):
        x = torch.relu(self.conv_layer1(x))
        x = torch.max_pool2d(x, stride=2, kernel_size=2)
        x = torch.relu(self.conv_layer2(x))
        x = torch.max_pool2d(x, stride=2, kernel_size=2)
        x = x.view(-1, 32 * 7 * 7)
        x = torch.relu(self.fc_layer1(x))
        x = self.fc_layer2(x)
        return x


# Load the MNIST dataset
transform_data = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
trainset_data = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_data)
train_loader = torch.utils.data.DataLoader(trainset_data, batch_size=64, shuffle=True)

# Initialize the network
net = SimpleCNN()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

# Train the network
numb_epochs = 5
for eachEpoch in range(numb_epochs):
    running_Loss = 0.0
    for j, Data in enumerate(train_loader, 0):
        inputS, labelS = Data
        optimizer.zero_grad()
        outputS = net(inputS)
        loss = criterion(outputS, labelS)
        loss.backward()
        optimizer.step()
        
        running_Loss += loss.item()
        if j % 100 == 99:
            print('[%d, %5d] loss: %.3f' % (eachEpoch + 1, j + 1, running_Loss / 100))
            running_Loss = 0.0

print('Training completed successfully.')


# Evaluate the model
correct = 0
total = 0
with torch.no_grad():
    for images, labels in train_loader:
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print('Accuracy of the model on the train dataset: {:.2f}%'.format(accuracy))

Complete code for building and training a CNN model

How to build a CNN with PyTorch

Getting started with PyTorch

Conclusion

Getting started with `PyTorch`