Mnist torch
MSA on PyTorch Network trained on MNIST¶
This is an example of how we can perform "Multiperturbation Shapley value Analysis" on a PyTorch neural network. We train a three layer [input, hidden, output] network with 32 neurons in the hidden layer. We use MSA to analyse the contribution of each neuron in the hidden layer in accurately predicting the classes.
%load_ext autoreload
%autoreload 2
# Imports
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
# ---------
from msapy import msa, utils as ut
# ---------
from functools import partial
from typing import Union, Optional, List
CM = 1 / 2.54
SEED = 42
RNG = np.random.default_rng(SEED)
FIGPATH = "figures/mnist/"
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as T
from torch.utils.data import DataLoader, TensorDataset
from torchvision import datasets
from sklearn.metrics import accuracy_score, confusion_matrix
# Device configuration
device = torch.device('cpu')
device
device(type='cpu')
Loading Data¶
mnist_transforms = T.Lambda(lambda x: torch.flatten(x, 1)/255)
train_data = datasets.MNIST(
root = 'data',
train = True,
transform = mnist_transforms,
download = True,
)
test_data = datasets.MNIST(
root = 'data',
train = False,
transform = mnist_transforms
)
train_data = TensorDataset(mnist_transforms(train_data.data.to(device)), train_data.targets.to(device))
test_data = TensorDataset(mnist_transforms(test_data.data.to(device)), test_data.targets.to(device))
trainloader = DataLoader(train_data,
batch_size=64,
shuffle=True)
testloader = DataLoader(test_data,
batch_size=1024)
Model Definition¶
class MNISTNet(nn.Module):
def __init__(self):
super(MNISTNet, self).__init__()
self.layer1 = nn.Linear(28*28, 32)
self.layer2 = nn.Sequential(nn.LeakyReLU(),
nn.Linear(32, 10))
def forward(self, x: torch.Tensor, lesion_idx: Optional[Union[int, List[int]]] = None) -> torch.Tensor:
"""forward function to calculate the scores for each class
Args:
x (torch.Tensor): data of shape [batch_size, 28*28]
lesion_idx (Optional[Union[int,List[int]]], optional): the neuron that we want to lesion in the hidden layer1. Defaults to None i.e. no lesioning performed.
Returns:
torch.Tensor: scores for each class
"""
out = self.layer1(x)
if lesion_idx:
out[:, lesion_idx] = 0 # set the value to 0 for the lesioned neuron
return self.layer2(out)
model = MNISTNet().to(device)
Training the Model¶
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.01)
for epoch in range(2): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = model(inputs)
loss = loss_func(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 400 == 399: # print every 200 mini-batches
print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 400:.3f}')
running_loss = 0.0
print('Finished Training')
/home/sdixit/miniconda3/envs/msapy/lib/python3.8/site-packages/torch/autograd/__init__.py:173: UserWarning: CUDA initialization: The NVIDIA driver on your system is too old (found version 9010). Please update your GPU driver by downloading and installing a new version from the URL: http://www.nvidia.com/Download/index.aspx Alternatively, go to: https://pytorch.org to install a PyTorch version that has been compiled with your version of the CUDA driver. (Triggered internally at /opt/conda/conda-bld/pytorch_1659484657607/work/c10/cuda/CUDAFunctions.cpp:109.) Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
[1, 400] loss: 0.343 [1, 800] loss: 0.215 [2, 400] loss: 0.157 [2, 800] loss: 0.160 Finished Training
MSA¶
@torch.no_grad()
def evaluate_model(lesion_idx: Optional[Union[int, List[int]]] = None, num_batches: int = -1, score_fn=accuracy_score):
"""return the accuracy of the model on test dataset
Args:
lesion_idx (Optional[Union[int,List[int]]], optional): the neuron that we want to lesion in the hidden layer1. Defaults to None i.e. no lesioning performed.
num_batches (int, optional): the number of batches we want to test our model. Defaults to -1 i.e. all data
Returns:
float: test accuracy
"""
targets = []
preds = []
for i, data in enumerate(testloader):
images, labels = data
# calculate outputs by running images through the network
outputs = model(images, lesion_idx)
# the class with the highest score is what we choose as prediction
_, predicted = torch.max(outputs.data, 1)
preds.append(predicted)
targets.append(labels)
if i == (num_batches-1):
break
return score_fn(torch.concat(preds).cpu(), torch.concat(targets).cpu())
def accuracy_each_class(targets, preds):
matrix = confusion_matrix(targets, preds)
acc = []
for i, val in enumerate(matrix.diagonal()/(matrix.sum(axis=1)+ 1e-6)):
acc.append(val)
return np.array(acc)
print(f"the accuracy of the model on the first batch without leasoning is: {evaluate_model(num_batches=1)}")
the accuracy of the model on the first batch without leasoning is: 0.9580078125
ground_truth_elements = list(range(32)) #Indices for the neurons in the hidden layer
print(f'total number of possible lesions: {2**len(ground_truth_elements)}')
total number of possible lesions: 4294967296
Now we perform the MSA to calculate the contributions for each neuron
Note: You might want to change the device to cpu while running MSA if you have to load the data into the GPU at every iteration. This could become a bottleneck. If your data is already on the gpu than it's not a problem
shapley_table = msa.interface(
elements=ground_truth_elements,
n_permutations=1000, # might want to increase it for better results
objective_function=partial(evaluate_model, score_fn=accuracy_score, num_batches=1), #only the first batch to save time. But the batch size is 1024 i.e ~100 images per class
rng=RNG)
shapley_table.plot_shapley_ranks(150, xlabel="Shapley values", ylabel="Elements",
title="Shapley values for neurons in hidden layer", savepath=f"{FIGPATH}Shapley.pdf")
We can also analyse the contributions of each neuron w.r.t each class by changing the objective function to return a dictionary of accuracies instead of the average accuracy for the whole dataset. Notice that the objective function also returns a dictionary of shapley tables in case the objective function returns a dictionary.
shapley_table = msa.interface(
elements=ground_truth_elements,
n_permutations=1000, # might want to increase it for better results
objective_function=partial(evaluate_model, score_fn=accuracy_each_class, num_batches=1), #only the first batch to save time. But the batch size is 1024 i.e ~100 images per class
n_parallel_games=-1, #parallelized over all CPU cores
rng=RNG)
shapley_table_digit0 = shapley_table.iloc[0]
shapley_table_digit0.plot(kind='barh', figsize=(15,8), xlabel="Shapley values", ylabel="Elements",
title="Shapley values for neurons in hidden layer")
<AxesSubplot:title={'center':'Shapley values for neurons in hidden layer'}, ylabel='Shapley values'>
shapley_table_digit8 = shapley_table.iloc[8]
shapley_table_digit8.plot(kind='barh', figsize=(15,8), xlabel="Shapley values", ylabel="Elements",
title="Shapley values for neurons in hidden layer")
<AxesSubplot:title={'center':'Shapley values for neurons in hidden layer'}, ylabel='Shapley values'>