GNN 需要可解释性

GNNExplainer的直观解释

## GNNExplainer

GNNExplainer 是一种与模型无关的基于扰动的方法，可以为任何基于图的机器学习任务上的任何基于 GNN 的模型的预测提供可解释的报告。

GNNExplainer 学习边和节点特征的软掩码，然后通过掩码的优化来解释预测。

GNNExplainer 会获取输入图并识别紧凑的子图结构和在预测中起关键作用的一小部分节点特征。

GNNExplainer通过生成传递关键语义的掩码来捕获重要的输入特征，从而产生与原始预测相似的预测。它学习边缘和节点特征的软掩码，通过掩码优化来解释预测。

## GNNExplainer 示例

explain_node() 学习并返回一个节点特征掩码和一个边缘掩码，它们在解释 GNN 对节点分类所做的预测中起着至关重要的作用。

```#Import Library
import numpy as np
import pandas as pd
import os
import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, GNNExplainer
import torch_geometric
from torch_geometric.utils import to_networkx
dataset = Planetoid(root='.', name="Pubmed")
data = dataset[0]
#Set the device dynamically
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Create batches with neighbor sampling
data,
num_neighbors=[5, 10],
batch_size=16,
)
# Define the GCN model
class Net(torch.nn.Module):
def __init__(self):
super().__init__()

self.conv1 = GCNConv(dataset.num_features, 16, normalize=False)
self.conv2 = GCNConv(16, dataset.num_classes, normalize=False)
def forward(self, x, edge_index):
x = F.relu(self.conv1(x, edge_index))
x = F.dropout(x, training=self.training)
x = self.conv2(x, edge_index)
return F.log_softmax(x, dim=1)
model = Net().to(device)
def accuracy(pred_y, y):
"""Calculate accuracy."""
return ((pred_y == y).sum() / len(y)).item()
# define the function to Train the model
def train_nn(model, x,edge_index,epochs):
criterion = torch.nn.CrossEntropyLoss()
optimizer = model.optimizer
model.train()
for epoch in range(epochs+1):
total_loss = 0
acc = 0
val_loss = 0
val_acc = 0
# Train on batches

out = model(batch.x, batch.edge_index)

total_loss += loss
loss.backward()
optimizer.step()
# Validation
# Print metrics every 10 epochs
if(epoch % 10 == 0):
print(f'Epoch {epoch:>3} | Train Loss: {total_loss/len(train_loader):.3f} '
f'| Train Acc: {acc/len(train_loader)*100:>6.2f}% | Val Loss: '
# define the function to Test the model
def test(model, data):
"""Evaluate the model on test set and print the accuracy score."""
model.eval()
out = model(data.x, data.edge_index)
return acc
# Train the Model
train_nn(model, data.x, data.edge_index, 200)
# Test
print(f'
GCN test accuracy: {test(model, data)*100:.2f}%
')
# Explain the GCN for node
node_idx = 20
x, edge_index = data.x, data.edge_index
# Pass the model to explain to GNNExplainer
explainer = GNNExplainer(model, epochs=100,return_type='log_prob')
#returns a node feature mask and an edge mask that play a crucial role to explain the prediction made by the GNN for node 20
ax, G = explainer.visualize_subgraph(node_idx, edge_index, edge_mask, y=data.y)
plt.show()
print("Ground Truth label for node: ",node_idx, " is ", data.y.numpy()[node_idx])
out = torch.softmax(model(data.x, data.edge_index), dim=1).argmax(dim=1)
print("Prediction for node ",node_idx, "is " ,out[node_idx].cpu().detach().numpy().squeeze())```

Explain_graph()用于图分类;它学习并返回一个节点特征掩码和一个边缘掩码，这两个掩码在解释GNN对一个图的预测时起着至关重要的作用

```# Import libararies
import numpy as np
import pandas as pd
import os
import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F
from torch_geometric.nn import GraphConv
import torch_geometric
from torch.nn import Parameter
from torch_geometric.nn.conv import MessagePassing
import urllib.request
import tarfile
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, GNNExplainer
from torch_geometric.nn import global_mean_pool
from torch_geometric.datasets import TUDataset
dataset = TUDataset(root='data/TUDataset', name='MUTAG')
# print details about the graph
print(f'Dataset: {dataset}:')
print("Number of Graphs: ",len(dataset))
print("Number of Freatures: ", dataset.num_features)
print("Number of Classes: ", dataset.num_classes)
data= dataset[0]
print(data)
print("No. of nodes: ", data.num_nodes)
print("No. of Edges: ", data.num_edges)
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')
# Create train and test dataset
torch.manual_seed(12345)
dataset = dataset.shuffle()
train_dataset = dataset[:50]
test_dataset = dataset[50:]
print(f'Number of training graphs: {len(train_dataset)}')
print(f'Number of test graphs: {len(test_dataset)}')
'''graphs in graph classification datasets are usually small,
a good idea is to batch the graphs before inputting
them into a Graph Neural Network to guarantee full GPU utilization__
_In pytorch Geometric adjacency matrices are stacked in a diagonal fashion
(creating a giant graph that holds multiple isolated subgraphs), a
nd node and target features are simply concatenated in the node dimension:
'''
print(f'Step {step + 1}:')
print('=======')
print(f'Number of graphs in the current batch: {data.num_graphs}')
print(data)
print()
# Build the model
class GNN(torch.nn.Module):
def __init__(self, hidden_channels):
super(GNN, self).__init__()
torch.manual_seed(12345)
self.conv1 = GraphConv(dataset.num_node_features, hidden_channels)
self.conv2 = GraphConv(hidden_channels, hidden_channels)
self.conv3 = GraphConv(hidden_channels, hidden_channels )
self.lin = Linear(hidden_channels, dataset.num_classes)
def forward(self, x, edge_index, batch):
x = self.conv1(x, edge_index)
x = x.relu()
x = self.conv2(x, edge_index)
x = x.relu()
x = self.conv3(x, edge_index)
x = global_mean_pool(x, batch)

x = F.dropout(x, p=0.5, training=self.training)
x = self.lin(x)

return x
model = GNN(hidden_channels=64)
print(model)
# set the optimizer
# set the loss function
criterion = torch.nn.CrossEntropyLoss()
# Creating the function to train the model
def train():
model.train()
for data in train_loader:  # Iterate in batches over the training dataset.
out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
loss = criterion(out, data.y)  # Compute the loss.
optimizer.step()  # Update parameters based on gradients.
# function to test the model
model.eval()
correct = 0
for data in loader:  # Iterate in batches over the training/test dataset.
out = model(data.x, data.edge_index, data.batch)
pred = out.argmax(dim=1)  # Use the class with highest probability.
correct += int((pred == data.y).sum())  # Check against ground-truth labels.
return correct / len(loader.dataset)  # Derive ratio of correct predictions.
# Train the model for 150 epochs
for epoch in range(1, 160):
train()
if(epoch % 10 == 0):
'''print(f'Epoch {epoch:>3} | Train Loss: {total_loss/len(train_loader):.3f} '
f'| Train Acc: {acc/len(train_loader)*100:>6.2f}% | Val Loss: '
'''
print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')
#Explain the Graph
explainer = GNNExplainer(model, epochs=100,return_type='log_prob')
data = dataset[0]
ax, G = explainer.visualize_subgraph(-1,data.edge_index, edge_mask, data.y)
plt.show()```

https://avoid.overfit.cn/post/3a01457fe6094941a2bca2961f742dce