{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# XAI Functions\n\n* **ACTM Performer:** CSU team;\n* **Author:** Elizabeth A. Barnes (eabarnes@colostate.edu) and Antonios Mamalakis (amamalak@colostate.edu)\n\nHere we provide a clean code snippet to implement XAI methods to explain AI models. \n\nSpecifically, we provide the code to compute the gradients and integrated gradients of a specific model output with respect to the corresponding input (local explanation).\n\nTo execute the snippet:\n\n**Step 1:** Download the code in a Jupyter notebook format, using the corresponding option below.\n\n**Step 2:** Integrate the snippet into your code and run it. \n\n**Step 3:** Define the object \"model\" (i.e., the machine learning model that you want to explain) and call the XAI function you want to use. \n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "#.............................................\n# IMPORT STATEMENTS\n#.............................................\n\n#Handling data\nimport numpy as np\n\n#machine learning package\nimport tensorflow as tf\n\n\n#.............................................\n# XAI functions\n#.............................................\n\n# Before calling these functions in your notebook, make sure you have defined the object \"model\". \n# The \"model\" is the machine learning model (e.g., neural network) that you want to explain. \n\ndef get_gradients(inputs, top_pred_idx=None):\n    \"\"\"Computes the gradients of outputs w.r.t input image.\n\n    Args:\n        inputs: 2D/3D/4D matrix of samples\n        top_pred_idx: (optional) Predicted label for the x_data\n                      if classification problem. If regression,\n                      do not include.\n\n    Returns:\n        Gradients of the predictions w.r.t img_input\n    \"\"\"\n    inputs = tf.cast(inputs, tf.float32)\n\n    with tf.GradientTape() as tape:\n        tape.watch(inputs)\n        \n        # Run the forward pass of the layer and record operations\n        # on GradientTape.\n        preds = model(inputs, training=False)  \n        \n        # For classification, grab the top class\n        if top_pred_idx is not None:\n            preds = preds[:, top_pred_idx]\n        \n    # Use the gradient tape to automatically retrieve\n    # the gradients of the trainable variables with respect to the loss.        \n    grads = tape.gradient(preds, inputs)\n    return grads\n\ndef get_integrated_gradients(inputs, baseline=None, num_steps=50, top_pred_idx=None):\n    \"\"\"Computes Integrated Gradients for a prediction.\n\n    Args:\n        inputs (ndarray): 2D/3D/4D matrix of samples\n        baseline (ndarray): The baseline image to start with for interpolation\n        num_steps: Number of interpolation steps between the baseline\n            and the input used in the computation of integrated gradients. These\n            steps along determine the integral approximation error. By default,\n            num_steps is set to 50.\n        top_pred_idx: (optional) Predicted label for the x_data\n                      if classification problem. If regression,\n                      do not include.            \n\n    Returns:\n        Integrated gradients w.r.t input image\n    \"\"\"\n    # If baseline is not provided, start with zeros\n    # having same size as the input image.\n    if baseline is None:\n        input_size = np.shape(inputs)[1:]\n        baseline = np.zeros(input_size).astype(np.float32)\n    else:\n        baseline = baseline.astype(np.float32)\n\n    # 1. Do interpolation.\n    inputs = inputs.astype(np.float32)\n    interpolated_inputs = [\n        baseline + (step / num_steps) * (inputs - baseline)\n        for step in range(num_steps + 1)\n    ]\n    interpolated_inputs = np.array(interpolated_inputs).astype(np.float32)\n\n    # 3. Get the gradients\n    grads = []\n    for i, x_data in enumerate(interpolated_inputs):\n        grad = get_gradients(x_data, top_pred_idx=top_pred_idx)\n        grads.append(grad)\n    grads = tf.convert_to_tensor(grads, dtype=tf.float32)\n\n    # 4. Approximate the integral using the trapezoidal rule\n    grads = (grads[:-1] + grads[1:]) / 2.0\n    avg_grads = tf.reduce_mean(grads, axis=0)\n\n    # 5. Calculate integrated gradients and return\n    integrated_grads = (inputs - baseline) * avg_grads\n    return integrated_grads\n\ndef random_baseline_integrated_gradients(inputs, num_steps=50, num_runs=5, top_pred_idx=None):\n    \"\"\"Generates a number of random baseline images.\n\n    Args:\n        inputs (ndarray): 2D/3D/4D matrix of samples\n        num_steps: Number of interpolation steps between the baseline\n            and the input used in the computation of integrated gradients. These\n            steps along determine the integral approximation error. By default,\n            num_steps is set to 50.\n        num_runs: number of baseline images to generate\n        top_pred_idx: (optional) Predicted label for the x_data\n                      if classification problem. If regression,\n                      do not include.      \n\n    Returns:\n        Averaged integrated gradients for `num_runs` baseline images\n    \"\"\"\n    # 1. List to keep track of Integrated Gradients (IG) for all the images\n    integrated_grads = []\n\n    # 2. Get the integrated gradients for all the baselines\n    for run in range(num_runs):\n        baseline = np.zeros(np.shape(inputs)[1:])\n        for i in np.arange(0,np.shape(baseline)[0]):\n            j = np.random.choice(np.arange(0,np.shape(inputs)[0]))\n            baseline[i] = inputs[j,i]\n\n        igrads = get_integrated_gradients(\n            inputs=inputs,\n            baseline=baseline,\n            num_steps=num_steps,\n            top_pred_idx=top_pred_idx)\n        integrated_grads.append(igrads)\n\n    # 3. Return the average integrated gradients for the image\n    integrated_grads = tf.convert_to_tensor(integrated_grads)\n    return tf.reduce_mean(integrated_grads, axis=0)"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.9"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}