Demo: fixed issues with interoperability, resilience and monitorability

mlte-team · Oct 4, 2024 · 29b737c · 29b737c
1 parent e6b8a4e
commit 29b737c
Show file tree

Hide file tree

Showing 4 changed files with 34 additions and 25 deletions.
diff --git a/demo/scenarios/1_requirements.ipynb b/demo/scenarios/1_requirements.ipynb
@@ -37,7 +37,7 @@
     "  * The application that runs on the loaned device should indicate the main features that were used to recognize the flower, as part of the educational experience. The app will display the image highlighting the most informative features in flower identification, in addition to the flower name. The original test data set can be used. The model needs to return evidence, in this case a heat map implementing the Integrated Gradients algorithm, showing the pixels that were most informative in the classification decision. This evidence should be returned with each inference. \n",
     "\n",
     "* **Functional Correctness - Accuracy**\n",
-    "  * The model receives receives a picture taken at the garden and can identify it correctly at least 90% of the time during normal operation. \n",
+    "  * The model receives receives a picture taken at the garden and can identify it correctly at least 90% of the time during normal operation.\n",
     "\n",
     "* **Functional Correctness - Input and Output Specification**\n",
     "  * The model reads inputs and provides outputs according to established input and output specifications during normal operation. During test execution all data in the test dataset produces an output that conforms to the output specification.\n",
@@ -69,7 +69,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -96,7 +96,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -208,7 +208,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -496,7 +496,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -532,7 +532,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -613,13 +613,16 @@
     "        Interoperability(\n",
     "            \"Model output format must conform to specified format\"\n",
     "        ): {\n",
-    "            \"output format validation\": String.contains(\n",
-    "                \"Model - Output Validation Error\"\n",
-    "            )\n",
+    "            \"input format validation success\": String.contains(\n",
+    "                \"Model - Input Validation Pass\"\n",
+    "            ),\n",
+    "            \"output format validation success\": String.contains(\n",
+    "                \"Model - Output Validation Pass\"\n",
+    "            ),\n",
     "        },\n",
     "        # Resilience: Input Validation\n",
     "        Resilience(\"Model inputs must conform to specified format\"): {\n",
-    "            \"input format validation\": String.contains(\n",
+    "            \"input format validation error\": String.contains(\n",
     "                \"Model - Input Validation Error\"\n",
     "            )\n",
     "        },\n",

diff --git a/demo/scenarios/2f_evidence_interoperability.ipynb b/demo/scenarios/2f_evidence_interoperability.ipynb
@@ -22,7 +22,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -41,7 +41,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -76,16 +76,20 @@
     "from mlte.measurement.external_measurement import ExternalMeasurement\n",
     "from demo.scenarios.values.string import String\n",
     "\n",
-    "# Evaluate, identifier has to be the same one defined in the Spec.\n",
+    "# Check input format is okay.\n",
     "measurement = ExternalMeasurement(\n",
-    "    \"output format validation\", String, run_and_get_log\n",
+    "    \"input format validation success\", String, run_and_get_log\n",
     ")\n",
     "result = measurement.evaluate()\n",
-    "\n",
-    "# Inspect value\n",
     "print(result)\n",
+    "result.save(force=True)\n",
     "\n",
-    "# Save to artifact store\n",
+    "# Check output format is okay.\n",
+    "measurement = ExternalMeasurement(\n",
+    "    \"output format validation success\", String, run_and_get_log\n",
+    ")\n",
+    "result = measurement.evaluate()\n",
+    "print(result)\n",
     "result.save(force=True)"
    ]
   }

diff --git a/demo/scenarios/2g_evidence_resilience.ipynb b/demo/scenarios/2g_evidence_resilience.ipynb
@@ -22,7 +22,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -41,7 +41,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -52,7 +52,7 @@
     "def run_and_get_log() -> str:\n",
     "    \"\"\"Runs the model and gets the log.\"\"\"\n",
     "    model_predict.run_model(\n",
-    "        SAMPLE_DATASET_DIR, MODEL_FILE_PATH, MODEL_WEIGHTS_PATH\n",
+    "        OOD_DATASET_DIR, MODEL_FILE_PATH, MODEL_WEIGHTS_PATH\n",
     "    )\n",
     "\n",
     "    return model_predict.load_log()"
@@ -78,7 +78,7 @@
     "\n",
     "# Evaluate, identifier has to be the same one defined in the Spec.\n",
     "measurement = ExternalMeasurement(\n",
-    "    \"input format validation\", String, run_and_get_log\n",
+    "    \"input format validation error\", String, run_and_get_log\n",
     ")\n",
     "result = measurement.evaluate()\n",
     "\n",

diff --git a/demo/scenarios/model_predict.py b/demo/scenarios/model_predict.py
@@ -117,10 +117,12 @@ def run_model(image_folder_path, model_file, weights_file):
         image_np = image.numpy()
         print(image_np.shape)
         if image_np.shape[-1] == 3:
-            print_and_log("Model - Input Validation Okay - RGB image loaded")
-        else: 
-            print_and_log(f"Model - Input Validation Error - RGB image expected but  has wrong number of channels")
-           # Not sure if this is the best way to deal with the spec: "input specification it will generate the output "N/A"
+            print_and_log("Model - Input Validation Pass - RGB image loaded")
+        else:
+            print_and_log(
+                f"Model - Input Validation Error - RGB image expected but  has wrong number of channels"
+            )
+            # Not sure if this is the best way to deal with the spec: "input specification it will generate the output "N/A"
             break
         # OOD
         r_avg = image_np[:, :, 0].mean()