diff --git a/demo/scenarios/1_evidence.ipynb b/demo/scenarios/1_evidence.ipynb
deleted file mode 100644
index ba9a8384..00000000
--- a/demo/scenarios/1_evidence.ipynb
+++ /dev/null
@@ -1,841 +0,0 @@
-{
- "cells": [
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 2. Collect Evidence\n",
-    "\n",
-    "In the second phase of SDMT, we collect _evidence_ to attest to the fact that the model realized the properties specified in the previous phase.\n",
-    "\n",
-    "We define and instantiate `Measurement`s to generate this evidence. Each individual piece of evidence is a `Value`. Once `Value`s are produced, we can persist them to an _artifact store_ to maintain our evidence across sessions. "
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Initialize MLTE Context\n",
-    "\n",
-    "MLTE contains a global context that manages the currently active _session_. Initializing the context tells MLTE how to store all of the artifacts that it produces."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "from mlte.session import set_context, set_store\n",
-    "\n",
-    "store_path = os.path.join(os.getcwd(), \"store\")\n",
-    "os.makedirs(\n",
-    "    store_path, exist_ok=True\n",
-    ")  # Ensure we are creating the folder if it is not there.\n",
-    "\n",
-    "set_context(\"OxfordFlower\", \"0.0.1\")\n",
-    "set_store(f\"local://{store_path}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Define different folders that will be used as input or output for the data gathering process."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from pathlib import Path\n",
-    "\n",
-    "# The path at which datasets are stored\n",
-    "DATASETS_DIR = Path.cwd() / \"data\"\n",
-    "\n",
-    "# Path where the model files are stored.\n",
-    "MODELS_DIR = Path.cwd() / \"model\"\n",
-    "\n",
-    "# The path at which media is stored\n",
-    "MEDIA_DIR = Path.cwd() / \"media\"\n",
-    "os.makedirs(MEDIA_DIR, exist_ok=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Download the model that will be used for some of these measurements."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "--2024-09-26 16:36:36--  https://docs.google.com/uc?export=download&id=15kAII1kOPGIAI46OP01ecNkq4tdf5yXw\n",
-      "Resolving cloudproxy.sei.cmu.edu (cloudproxy.sei.cmu.edu)... 100.64.1.3\n",
-      "Connecting to cloudproxy.sei.cmu.edu (cloudproxy.sei.cmu.edu)|100.64.1.3|:80... connected.\n",
-      "Proxy request sent, awaiting response... 303 See Other\n",
-      "Location: https://drive.usercontent.google.com/download?id=15kAII1kOPGIAI46OP01ecNkq4tdf5yXw&export=download [following]\n",
-      "--2024-09-26 16:36:36--  https://drive.usercontent.google.com/download?id=15kAII1kOPGIAI46OP01ecNkq4tdf5yXw&export=download\n",
-      "Connecting to cloudproxy.sei.cmu.edu (cloudproxy.sei.cmu.edu)|100.64.1.3|:80... connected.\n",
-      "Proxy request sent, awaiting response... 200 OK\n",
-      "Length: 103401752 (99M) [application/octet-stream]\n",
-      "Saving to: ‘./model/model_f_a.h5’\n",
-      "\n",
-      "./model/model_f_a.h 100%[===================>]  98.61M  2.67MB/s    in 39s     \n",
-      "\n",
-      "2024-09-26 16:37:21 (2.55 MB/s) - ‘./model/model_f_a.h5’ saved [103401752/103401752]\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "!sh get_model.sh"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "In the next sections, we will define additional functions and gather evidence for the different QA scenarios."
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Fairnesss QAS Measurements\n",
-    "\n",
-    "Evidence collected in this section checks for the Fairness scenario defined in the previous step. Note that some functions will be loaded from exernal Python files."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# General functions.\n",
-    "\n",
-    "import garden\n",
-    "import numpy as np\n",
-    "\n",
-    "\n",
-    "def load_data(data_folder: str):\n",
-    "    \"\"\"Loads all garden data results and taxonomy categories.\"\"\"\n",
-    "    df_results = garden.load_base_results(data_folder)\n",
-    "    df_results.head()\n",
-    "\n",
-    "    # Load the taxonomic data and merge with results.\n",
-    "    df_info = garden.load_taxonomy(data_folder)\n",
-    "    df_results.rename(columns={\"label\": \"Label\"}, inplace=True)\n",
-    "    df_all = garden.merge_taxonomy_with_results(df_results, df_info)\n",
-    "\n",
-    "    return df_info, df_all\n",
-    "\n",
-    "\n",
-    "def split_data(df_info, df_all):\n",
-    "    \"\"\"Splits the data into 3 different populations to evaluate them.\"\"\"\n",
-    "    df_gardenpop = df_info.copy()\n",
-    "    df_gardenpop[\"Population1\"] = (\n",
-    "        np.around(\n",
-    "            np.random.dirichlet(np.ones(df_gardenpop.shape[0]), size=1)[0],\n",
-    "            decimals=3,\n",
-    "        )\n",
-    "        * 1000\n",
-    "    ).astype(int)\n",
-    "    df_gardenpop[\"Population2\"] = (\n",
-    "        np.around(\n",
-    "            np.random.dirichlet(np.ones(df_gardenpop.shape[0]), size=1)[0],\n",
-    "            decimals=3,\n",
-    "        )\n",
-    "        * 1000\n",
-    "    ).astype(int)\n",
-    "    df_gardenpop[\"Population3\"] = (\n",
-    "        np.around(\n",
-    "            np.random.dirichlet(np.ones(df_gardenpop.shape[0]), size=1)[0],\n",
-    "            decimals=3,\n",
-    "        )\n",
-    "        * 1000\n",
-    "    ).astype(int)\n",
-    "    df_gardenpop\n",
-    "\n",
-    "    # build populations from test data set that match the garden compositions\n",
-    "    from random import choices\n",
-    "\n",
-    "    # build 3 gardens with populations of 1000.\n",
-    "    pop_names = [\"Population1\", \"Population2\", \"Population3\"]\n",
-    "    gardenpops = np.zeros((3, 1000), int)\n",
-    "    gardenmems = np.zeros((3, 1000), int)\n",
-    "\n",
-    "    for j in range(1000):\n",
-    "        for i in range(len(df_gardenpop)):\n",
-    "            my_flower = df_gardenpop.iloc[i][\"Common Name\"]\n",
-    "\n",
-    "            for g in range(3):\n",
-    "                n_choices = df_gardenpop.iloc[i][pop_names[g]]\n",
-    "                my_choices = df_all[df_all[\"Common Name\"] == my_flower][\n",
-    "                    \"model correct\"\n",
-    "                ].to_list()\n",
-    "                my_selection = choices(my_choices, k=n_choices)\n",
-    "\n",
-    "                gardenpops[g][j] += sum(my_selection)\n",
-    "                gardenmems[g][j] += len(my_selection)\n",
-    "\n",
-    "    gardenpops\n",
-    "\n",
-    "    return gardenpops, gardenmems\n",
-    "\n",
-    "\n",
-    "def calculate_model_performance_acc(gardenpops, gardenmems):\n",
-    "    \"\"\"Get accucray of models across the garden populations\"\"\"\n",
-    "    gardenacc = np.zeros((3, 1000), float)\n",
-    "    for i in range(1000):\n",
-    "        for g in range(3):\n",
-    "            gardenacc[g][i] = gardenpops[g][i] / gardenmems[g][i]\n",
-    "    gardenacc\n",
-    "\n",
-    "    model_performance_acc = []\n",
-    "    for g in range(3):\n",
-    "        avg = round(np.average(gardenacc[g][:]), 3)\n",
-    "        std = round(np.std(gardenacc[g][:]), 3)\n",
-    "        min = round(np.amin(gardenacc[g][:]), 3)\n",
-    "        max = round(np.amax(gardenacc[g][:]), 3)\n",
-    "        model_performance_acc.append(round(avg, 3))\n",
-    "\n",
-    "        print(\"%1d %1.3f %1.3f %1.3f %1.3f\" % (g, avg, std, min, max))\n",
-    "\n",
-    "    return model_performance_acc"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Prepare the data. For this section, instead of executing the model, we will use CSV files containing the results of an already executed run of the model.\n",
-    "data = load_data(DATASETS_DIR)\n",
-    "split_data = split_data(data[0], data[1])"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "In this first example, we simply wrap the output from `accuracy_score` with a custom `Result` type to cope with the output of a third-party library that is not supported by a MLTE builtin."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from values.multiple_accuracy import MultipleAccuracy\n",
-    "from mlte.measurement.external_measurement import ExternalMeasurement\n",
-    "\n",
-    "# Evaluate accuracy, identifier has to be the same one defined in the Spec.\n",
-    "accuracy_measurement = ExternalMeasurement(\n",
-    "    \"accuracy across gardens\", MultipleAccuracy, calculate_model_performance_acc\n",
-    ")\n",
-    "accuracy = accuracy_measurement.evaluate(split_data[0], split_data[1])\n",
-    "\n",
-    "# Inspect value\n",
-    "print(accuracy)\n",
-    "\n",
-    "# Save to artifact store\n",
-    "accuracy.save(force=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Robustness QAS Measurements\n",
-    "\n",
-    "Evidence collected in this section checks for the Robustness scenarios."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# General functions.\n",
-    "import pandas as pd\n",
-    "\n",
-    "\n",
-    "def calculate_base_accuracy(df_results: pd.DataFrame) -> pd.DataFrame:\n",
-    "    # Calculate the base model accuracy result per data label\n",
-    "    df_pos = (\n",
-    "        df_results[df_results[\"model correct\"] == True].groupby(\"label\").count()\n",
-    "    )\n",
-    "    df_pos.drop(columns=[\"prediced_label\"], inplace=True)\n",
-    "    df_neg = (\n",
-    "        df_results[df_results[\"model correct\"] == False]\n",
-    "        .groupby(\"label\")\n",
-    "        .count()\n",
-    "    )\n",
-    "    df_neg.drop(columns=[\"prediced_label\"], inplace=True)\n",
-    "    df_neg.rename(columns={\"model correct\": \"model incorrect\"}, inplace=True)\n",
-    "    df_res = df_pos.merge(\n",
-    "        df_neg, right_on=\"label\", left_on=\"label\", how=\"outer\"\n",
-    "    )\n",
-    "    df_res.fillna(0, inplace=True)\n",
-    "    df_res[\"model acc\"] = df_res[\"model correct\"] / (\n",
-    "        df_res[\"model correct\"] + df_res[\"model incorrect\"]\n",
-    "    )\n",
-    "    df_res[\"count\"] = df_res[\"model correct\"] + df_res[\"model incorrect\"]\n",
-    "    df_res.drop(columns=[\"model correct\", \"model incorrect\"], inplace=True)\n",
-    "    df_res.head()\n",
-    "\n",
-    "    return df_res\n",
-    "\n",
-    "\n",
-    "def calculate_accuracy_per_set(\n",
-    "    data_folder: str, df_results: pd.DataFrame, df_res: pd.DataFrame\n",
-    ") -> pd.DataFrame:\n",
-    "    # Calculate the model accuracy per data label for each blurred data set\n",
-    "    base_filename = \"FlowerModelv1_TestSetResults\"\n",
-    "    ext_filename = \".csv\"\n",
-    "    set_filename = [\"_blur2x8\", \"_blur5x8\", \"_blur0x8\", \"_noR\", \"_noG\", \"_noB\"]\n",
-    "\n",
-    "    col_root = \"model acc\"\n",
-    "\n",
-    "    for fs in set_filename:\n",
-    "        filename = os.path.join(data_folder, base_filename + fs + ext_filename)\n",
-    "        colname = col_root + fs\n",
-    "\n",
-    "        df_temp = pd.read_csv(filename)\n",
-    "        df_temp.drop(columns=[\"Unnamed: 0\"], inplace=True)\n",
-    "\n",
-    "        df_pos = (\n",
-    "            df_temp[df_temp[\"model correct\"] == True].groupby(\"label\").count()\n",
-    "        )\n",
-    "        df_pos.drop(columns=[\"prediced_label\"], inplace=True)\n",
-    "        df_neg = (\n",
-    "            df_results[df_results[\"model correct\"] == False]\n",
-    "            .groupby(\"label\")\n",
-    "            .count()\n",
-    "        )\n",
-    "        df_neg.drop(columns=[\"prediced_label\"], inplace=True)\n",
-    "        df_neg.rename(\n",
-    "            columns={\"model correct\": \"model incorrect\"}, inplace=True\n",
-    "        )\n",
-    "        df_res2 = df_pos.merge(\n",
-    "            df_neg, right_on=\"label\", left_on=\"label\", how=\"outer\"\n",
-    "        )\n",
-    "        df_res2.fillna(0, inplace=True)\n",
-    "\n",
-    "        df_res2[colname] = df_res2[\"model correct\"] / (\n",
-    "            df_res2[\"model correct\"] + df_res2[\"model incorrect\"]\n",
-    "        )\n",
-    "        df_res2.drop(columns=[\"model correct\", \"model incorrect\"], inplace=True)\n",
-    "\n",
-    "        df_res = df_res.merge(\n",
-    "            df_res2, right_on=\"label\", left_on=\"label\", how=\"outer\"\n",
-    "        )\n",
-    "\n",
-    "    df_res.head()\n",
-    "    return df_res\n",
-    "\n",
-    "\n",
-    "def print_model_accuracy(df_res: pd.DataFrame, key: str, name: str):\n",
-    "    model_acc = sum(df_res[key] * df_res[\"count\"]) / sum(df_res[\"count\"])\n",
-    "    print(name, model_acc)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Prepare all data. Same as the case above, we will use CSV files that contain results of a previous execution of the model.\n",
-    "df_results = garden.load_base_results(DATASETS_DIR)\n",
-    "df_res = calculate_base_accuracy(df_results)\n",
-    "df_res = calculate_accuracy_per_set(DATASETS_DIR, df_results, df_res)\n",
-    "df_info = garden.load_taxonomy(DATASETS_DIR)\n",
-    "df_all = garden.merge_taxonomy_with_results(df_res, df_info, \"label\", \"Label\")\n",
-    "\n",
-    "# fill in missing model accuracy data\n",
-    "df_all[\"model acc_noR\"].fillna(0, inplace=True)\n",
-    "df_all[\"model acc_noG\"].fillna(0, inplace=True)\n",
-    "df_all[\"model acc_noB\"].fillna(0, inplace=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now do the actual measurements. First simply see the model accuracy across blurs."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# view changes in model accuracy\n",
-    "print_model_accuracy(df_res, \"model acc\", \"base model accuracy\")\n",
-    "print_model_accuracy(\n",
-    "    df_res, \"model acc_blur2x8\", \"model accuracy with 2x8 blur\"\n",
-    ")\n",
-    "print_model_accuracy(\n",
-    "    df_res, \"model acc_blur5x8\", \"model accuracy with 5x8 blur\"\n",
-    ")\n",
-    "print_model_accuracy(\n",
-    "    df_res, \"model acc_blur0x8\", \"model accuracy with 0x8 blur\"\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Measure the ranksums (p-value) for all blur cases, using `scipy.stats.ranksums` and the `ExternalMeasurement` wrapper."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import scipy.stats\n",
-    "\n",
-    "from values.ranksums import RankSums\n",
-    "from mlte.measurement.external_measurement import ExternalMeasurement\n",
-    "\n",
-    "my_blur = [\"2x8\", \"5x8\", \"0x8\"]\n",
-    "for i in range(len(my_blur)):\n",
-    "    # Define measurements.\n",
-    "    ranksum_measurement = ExternalMeasurement(\n",
-    "        f\"ranksums blur{my_blur[i]}\", RankSums, scipy.stats.ranksums\n",
-    "    )\n",
-    "\n",
-    "    # Evaluate.\n",
-    "    ranksum: RankSums = ranksum_measurement.evaluate(\n",
-    "        df_res[\"model acc\"], df_res[f\"model acc_blur{my_blur[i]}\"]\n",
-    "    )\n",
-    "\n",
-    "    # Inspect values\n",
-    "    print(ranksum)\n",
-    "\n",
-    "    # Save to artifact store\n",
-    "    ranksum.save(force=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now to next part of the question- is this equal across the phylogenic groups?"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "First we will check the effect of blur for Clade 2."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from typing import List\n",
-    "\n",
-    "from values.multiple_ranksums import MultipleRanksums\n",
-    "\n",
-    "# use the initial result, blur columns to anaylze effect of blur\n",
-    "df_all[\"delta_2x8\"] = df_all[\"model acc\"] - df_all[\"model acc_blur2x8\"]\n",
-    "df_all[\"delta_5x8\"] = df_all[\"model acc\"] - df_all[\"model acc_blur5x8\"]\n",
-    "df_all[\"delta_0x8\"] = df_all[\"model acc\"] - df_all[\"model acc_blur0x8\"]\n",
-    "\n",
-    "pops = df_all[\"Clade2\"].unique().tolist()\n",
-    "blurs = [\n",
-    "    \"delta_2x8\",\n",
-    "    \"delta_5x8\",\n",
-    "    \"delta_0x8\",\n",
-    "]\n",
-    "\n",
-    "ranksums: List = []\n",
-    "for i in range(len(blurs)):\n",
-    "    for pop1 in pops:\n",
-    "        for pop2 in pops:\n",
-    "            ranksum_measurement = ExternalMeasurement(\n",
-    "                f\"ranksums clade2 {pop1}-{pop2} blur{blurs[i]}\",\n",
-    "                RankSums,\n",
-    "                scipy.stats.ranksums,\n",
-    "            )\n",
-    "            ranksum: RankSums = ranksum_measurement.evaluate(\n",
-    "                df_all[df_all[\"Clade2\"] == pop1][blurs[i]],\n",
-    "                df_all[df_all[\"Clade2\"] == pop2][blurs[i]],\n",
-    "            )\n",
-    "            print(f\"blur {blurs[i]}: {ranksum}\")\n",
-    "            ranksums.append({ranksum.identifier: ranksum.array})\n",
-    "\n",
-    "multiple_ranksums_meas = ExternalMeasurement(\n",
-    "    f\"multiple ranksums for clade2\", MultipleRanksums, lambda x: x\n",
-    ")\n",
-    "multiple_ranksums: MultipleRanksums = multiple_ranksums_meas.evaluate(ranksums)\n",
-    "multiple_ranksums.num_pops = len(pops)\n",
-    "multiple_ranksums.save(force=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now we check between clade 2 and clade 3."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_now = (\n",
-    "    df_all[[\"Clade2\", \"Clade 3\"]]\n",
-    "    .copy()\n",
-    "    .groupby([\"Clade2\", \"Clade 3\"])\n",
-    "    .count()\n",
-    "    .reset_index()\n",
-    ")\n",
-    "ps1 = df_now[\"Clade2\"].to_list()\n",
-    "ps2 = df_now[\"Clade 3\"].to_list()\n",
-    "print(df_now)\n",
-    "\n",
-    "ranksums: List = []\n",
-    "for k in range(len(blurs)):\n",
-    "    print(\"\\n\", blurs[k])\n",
-    "    for i in range(len(ps1)):\n",
-    "        p1c1 = ps1[i]\n",
-    "        p1c2 = ps2[i]\n",
-    "        for j in range(len(ps1)):\n",
-    "            p2c1 = ps1[j]\n",
-    "            p2c2 = ps2[j]\n",
-    "            if (\n",
-    "                len(\n",
-    "                    df_all[\n",
-    "                        (df_all[\"Clade2\"] == p1c1) & (df_all[\"Clade 3\"] == p2c2)\n",
-    "                    ][blurs[k]]\n",
-    "                )\n",
-    "                > 0\n",
-    "                | len(\n",
-    "                    df_all[\n",
-    "                        (df_all[\"Clade2\"] == p2c1) & (df_all[\"Clade 3\"] == p2c2)\n",
-    "                    ][blurs[k]]\n",
-    "                )\n",
-    "                > 0\n",
-    "            ):\n",
-    "                ranksum_measurement = ExternalMeasurement(\n",
-    "                    f\"ranksums {p1c1}-{p2c2} - {p2c1}-{p2c2} blur{blurs[k]}\",\n",
-    "                    RankSums,\n",
-    "                    scipy.stats.ranksums,\n",
-    "                )\n",
-    "                ranksum: RankSums = ranksum_measurement.evaluate(\n",
-    "                    df_all[\n",
-    "                        (df_all[\"Clade2\"] == p1c1) & (df_all[\"Clade 3\"] == p2c2)\n",
-    "                    ][blurs[k]],\n",
-    "                    df_all[\n",
-    "                        (df_all[\"Clade2\"] == p2c1) & (df_all[\"Clade 3\"] == p2c2)\n",
-    "                    ][blurs[k]],\n",
-    "                )\n",
-    "                ranksums.append({ranksum.identifier: ranksum.array})\n",
-    "\n",
-    "multiple_ranksums_meas = ExternalMeasurement(\n",
-    "    f\"multiple ranksums between clade2 and 3\", MultipleRanksums, lambda x: x\n",
-    ")\n",
-    "multiple_ranksums: MultipleRanksums = multiple_ranksums_meas.evaluate(ranksums)\n",
-    "multiple_ranksums.num_pops = len(ps1)\n",
-    "multiple_ranksums.save(force=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Performance QAS Measurements\n",
-    "\n",
-    "Now we collect stored, CPU and memory usage data when predicting with the model, for the Performance scenario. NOTE: the version of tensorflow used in this demo requires running it under Python 3.9 or higher."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# This is the external script that will load and run the model for inference/prediction.\n",
-    "script = Path.cwd() / \"model_predict.py\"\n",
-    "args = [\n",
-    "    \"--images\",\n",
-    "    DATASETS_DIR,\n",
-    "    \"--model\",\n",
-    "    MODELS_DIR / \"model_f3_a.json\",\n",
-    "    \"--weights\",\n",
-    "    MODELS_DIR / \"model_f_a.h5\",\n",
-    "]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from mlte.measurement.storage import LocalObjectSize\n",
-    "from mlte.value.types.integer import Integer\n",
-    "\n",
-    "store_measurement = LocalObjectSize(\"model size\")\n",
-    "size: Integer = store_measurement.evaluate(MODELS_DIR)\n",
-    "print(size)\n",
-    "size.save(force=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from mlte.measurement.process_measurement import ProcessMeasurement\n",
-    "from mlte.measurement.cpu import LocalProcessCPUUtilization, CPUStatistics\n",
-    "\n",
-    "cpu_measurement = LocalProcessCPUUtilization(\"predicting cpu\")\n",
-    "cpu_stats: CPUStatistics = cpu_measurement.evaluate(\n",
-    "    ProcessMeasurement.start_script(script, args)\n",
-    ")\n",
-    "print(cpu_stats)\n",
-    "cpu_stats.save(force=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from mlte.measurement.memory import (\n",
-    "    LocalProcessMemoryConsumption,\n",
-    "    MemoryStatistics,\n",
-    ")\n",
-    "\n",
-    "mem_measurement = LocalProcessMemoryConsumption(\"predicting memory\")\n",
-    "mem_stats: MemoryStatistics = mem_measurement.evaluate(\n",
-    "    ProcessMeasurement.start_script(script, args)\n",
-    ")\n",
-    "print(mem_stats)\n",
-    "mem_stats.save(force=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Interpretability QAS Measurements.\n",
-    "\n",
-    "Now we proceed to gather data about the Interpretability of the model, for the corresponding scenario. NOTE: the version of tensorflow used in this demo requires running it under Python 3.9 or higher."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model_filename = (\n",
-    "    MODELS_DIR / \"model_f3_a.json\"\n",
-    ")  # The json file of the model to load\n",
-    "weights_filename = MODELS_DIR / \"model_f_a.h5\"  # The weights file for the model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from model_analysis import *\n",
-    "\n",
-    "# Load the model/\n",
-    "loaded_model = load_model(model_filename, weights_filename)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Load and show the image.\n",
-    "\n",
-    "flower_img = \"flower3.jpg\"  # Filename of flower image to use, public domain image adapted from: https://commons.wikimedia.org/wiki/File:Beautiful_white_flower_in_garden.jpg\n",
-    "flower_idx = (\n",
-    "    42  # Classifier index of associated flower (see OxfordFlower102Labels.csv)\n",
-    ")\n",
-    "\n",
-    "im = read_image(os.path.join(DATASETS_DIR, flower_img))\n",
-    "\n",
-    "plt.imshow(im)\n",
-    "plt.axis(\"off\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "predictions = run_model(im, loaded_model)\n",
-    "\n",
-    "baseline, alphas = generate_baseline_and_alphas()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "interpolated_images = interpolate_images(\n",
-    "    baseline=baseline, image=im, alphas=alphas\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fig = plt.figure(figsize=(20, 20))\n",
-    "\n",
-    "i = 0\n",
-    "for alpha, image in zip(alphas[0::10], interpolated_images[0::10]):\n",
-    "    i += 1\n",
-    "    plt.subplot(1, len(alphas[0::10]), i)\n",
-    "    plt.title(f\"alpha: {alpha:.1f}\")\n",
-    "    plt.imshow(image)\n",
-    "    plt.axis(\"off\")\n",
-    "\n",
-    "plt.tight_layout()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "path_gradients = compute_gradients(\n",
-    "    loaded_model=loaded_model,\n",
-    "    images=interpolated_images,\n",
-    "    target_class_idx=flower_idx,\n",
-    ")\n",
-    "print(path_gradients.shape)\n",
-    "\n",
-    "ig = integral_approximation(gradients=path_gradients)\n",
-    "print(ig.shape)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ig_attributions = integrated_gradients(\n",
-    "    baseline=baseline,\n",
-    "    image=im,\n",
-    "    target_class_idx=flower_idx,\n",
-    "    loaded_model=loaded_model,\n",
-    "    m_steps=240,\n",
-    ")\n",
-    "print(ig_attributions.shape)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fig = plot_img_attributions(\n",
-    "    image=im,\n",
-    "    baseline=baseline,\n",
-    "    target_class_idx=flower_idx,\n",
-    "    loaded_model=loaded_model,\n",
-    "    m_steps=240,\n",
-    "    cmap=plt.cm.inferno,\n",
-    "    overlay_alpha=0.4,\n",
-    ")\n",
-    "\n",
-    "plt.savefig(MEDIA_DIR / \"attributions.png\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from mlte.measurement.external_measurement import ExternalMeasurement\n",
-    "from mlte.value.types.image import Image\n",
-    "\n",
-    "# Save to MLTE store.\n",
-    "img_collector = ExternalMeasurement(\"image attributions\", Image)\n",
-    "img = img_collector.ingest(MEDIA_DIR / \"attributions.png\")\n",
-    "img.save(force=True)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.4"
-  },
-  "vscode": {
-   "interpreter": {
-    "hash": "82adda432962015d5f71beb9387a99f24d390514e497c776c87ff3434daf7312"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}