Note
Click here to download the full example code or to run this example in your browser via Binder
XAI Functions¶
ACTM Performer: CSU team;
Author: Elizabeth A. Barnes (eabarnes@colostate.edu) and Antonios Mamalakis (amamalak@colostate.edu)
Here we provide a clean code snippet to implement XAI methods to explain AI models.
Specifically, we provide the code to compute the gradients and integrated gradients of a specific model output with respect to the corresponding input (local explanation).
To execute the snippet:
Step 1: Download the code in a Jupyter notebook format, using the corresponding option below.
Step 2: Integrate the snippet into your code and run it.
Step 3: Define the object “model” (i.e., the machine learning model that you want to explain) and call the XAI function you want to use.
#.............................................
# IMPORT STATEMENTS
#.............................................
#Handling data
import numpy as np
#machine learning package
import tensorflow as tf
#.............................................
# XAI functions
#.............................................
# Before calling these functions in your notebook, make sure you have defined the object "model".
# The "model" is the machine learning model (e.g., neural network) that you want to explain.
def get_gradients(inputs, top_pred_idx=None):
"""Computes the gradients of outputs w.r.t input image.
Args:
inputs: 2D/3D/4D matrix of samples
top_pred_idx: (optional) Predicted label for the x_data
if classification problem. If regression,
do not include.
Returns:
Gradients of the predictions w.r.t img_input
"""
inputs = tf.cast(inputs, tf.float32)
with tf.GradientTape() as tape:
tape.watch(inputs)
# Run the forward pass of the layer and record operations
# on GradientTape.
preds = model(inputs, training=False)
# For classification, grab the top class
if top_pred_idx is not None:
preds = preds[:, top_pred_idx]
# Use the gradient tape to automatically retrieve
# the gradients of the trainable variables with respect to the loss.
grads = tape.gradient(preds, inputs)
return grads
def get_integrated_gradients(inputs, baseline=None, num_steps=50, top_pred_idx=None):
"""Computes Integrated Gradients for a prediction.
Args:
inputs (ndarray): 2D/3D/4D matrix of samples
baseline (ndarray): The baseline image to start with for interpolation
num_steps: Number of interpolation steps between the baseline
and the input used in the computation of integrated gradients. These
steps along determine the integral approximation error. By default,
num_steps is set to 50.
top_pred_idx: (optional) Predicted label for the x_data
if classification problem. If regression,
do not include.
Returns:
Integrated gradients w.r.t input image
"""
# If baseline is not provided, start with zeros
# having same size as the input image.
if baseline is None:
input_size = np.shape(inputs)[1:]
baseline = np.zeros(input_size).astype(np.float32)
else:
baseline = baseline.astype(np.float32)
# 1. Do interpolation.
inputs = inputs.astype(np.float32)
interpolated_inputs = [
baseline + (step / num_steps) * (inputs - baseline)
for step in range(num_steps + 1)
]
interpolated_inputs = np.array(interpolated_inputs).astype(np.float32)
# 3. Get the gradients
grads = []
for i, x_data in enumerate(interpolated_inputs):
grad = get_gradients(x_data, top_pred_idx=top_pred_idx)
grads.append(grad)
grads = tf.convert_to_tensor(grads, dtype=tf.float32)
# 4. Approximate the integral using the trapezoidal rule
grads = (grads[:-1] + grads[1:]) / 2.0
avg_grads = tf.reduce_mean(grads, axis=0)
# 5. Calculate integrated gradients and return
integrated_grads = (inputs - baseline) * avg_grads
return integrated_grads
def random_baseline_integrated_gradients(inputs, num_steps=50, num_runs=5, top_pred_idx=None):
"""Generates a number of random baseline images.
Args:
inputs (ndarray): 2D/3D/4D matrix of samples
num_steps: Number of interpolation steps between the baseline
and the input used in the computation of integrated gradients. These
steps along determine the integral approximation error. By default,
num_steps is set to 50.
num_runs: number of baseline images to generate
top_pred_idx: (optional) Predicted label for the x_data
if classification problem. If regression,
do not include.
Returns:
Averaged integrated gradients for `num_runs` baseline images
"""
# 1. List to keep track of Integrated Gradients (IG) for all the images
integrated_grads = []
# 2. Get the integrated gradients for all the baselines
for run in range(num_runs):
baseline = np.zeros(np.shape(inputs)[1:])
for i in np.arange(0,np.shape(baseline)[0]):
j = np.random.choice(np.arange(0,np.shape(inputs)[0]))
baseline[i] = inputs[j,i]
igrads = get_integrated_gradients(
inputs=inputs,
baseline=baseline,
num_steps=num_steps,
top_pred_idx=top_pred_idx)
integrated_grads.append(igrads)
# 3. Return the average integrated gradients for the image
integrated_grads = tf.convert_to_tensor(integrated_grads)
return tf.reduce_mean(integrated_grads, axis=0)
Total running time of the script: ( 0 minutes 0.000 seconds)