From 9f1f11a2e7d4b32e85c0fc5770014f4f675ebb00 Mon Sep 17 00:00:00 2001
From: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
Date: Thu, 7 Dec 2023 15:09:30 +0100
Subject: [PATCH] Show new failing tests in a more clear way in slack report
 (#27881)

* fix

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
---
 .github/workflows/self-scheduled.yml |   4 +-
 utils/notification_service.py        | 139 +++++++++++++++++++++++----
 2 files changed, 120 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
index 4a04cb14ac7bb3..8d3c23d01e8018 100644
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@@ -494,5 +494,5 @@ jobs:
         if: ${{ always() }}
         uses: actions/upload-artifact@v3
         with:
-          name: test_failure_tables
-          path: test_failure_tables
+          name: prev_ci_results
+          path: prev_ci_results
diff --git a/utils/notification_service.py b/utils/notification_service.py
index 17ce31c59a076b..d232809a64eead 100644
--- a/utils/notification_service.py
+++ b/utils/notification_service.py
@@ -100,7 +100,13 @@ def dicts_to_sum(objects: Union[Dict[str, Dict], List[dict]]):
 
 class Message:
     def __init__(
-        self, title: str, ci_title: str, model_results: Dict, additional_results: Dict, selected_warnings: List = None
+        self,
+        title: str,
+        ci_title: str,
+        model_results: Dict,
+        additional_results: Dict,
+        selected_warnings: List = None,
+        prev_ci_artifacts=None,
     ):
         self.title = title
         self.ci_title = ci_title
@@ -150,6 +156,8 @@ def __init__(
             selected_warnings = []
         self.selected_warnings = selected_warnings
 
+        self.prev_ci_artifacts = prev_ci_artifacts
+
     @property
     def time(self) -> str:
         all_results = [*self.model_results.values(), *self.additional_results.values()]
@@ -398,8 +406,6 @@ def per_model_sum(model_category_dict):
 
         # Save the complete (i.e. no truncation) failure tables (of the current workflow run)
         # (to be uploaded as artifacts)
-        if not os.path.isdir(os.path.join(os.getcwd(), "test_failure_tables")):
-            os.makedirs(os.path.join(os.getcwd(), "test_failure_tables"))
 
         model_failures_report = prepare_reports(
             title="These following model modules had failures",
@@ -407,7 +413,7 @@ def per_model_sum(model_category_dict):
             reports=sorted_model_reports,
             to_truncate=False,
         )
-        file_path = os.path.join(os.getcwd(), "test_failure_tables/model_failures_report.txt")
+        file_path = os.path.join(os.getcwd(), "prev_ci_results/model_failures_report.txt")
         with open(file_path, "w", encoding="UTF-8") as fp:
             fp.write(model_failures_report)
 
@@ -417,27 +423,18 @@ def per_model_sum(model_category_dict):
             reports=sorted_module_reports,
             to_truncate=False,
         )
-        file_path = os.path.join(os.getcwd(), "test_failure_tables/module_failures_report.txt")
+        file_path = os.path.join(os.getcwd(), "prev_ci_results/module_failures_report.txt")
         with open(file_path, "w", encoding="UTF-8") as fp:
             fp.write(module_failures_report)
 
-        target_workflow = "huggingface/transformers/.github/workflows/self-scheduled.yml@refs/heads/main"
-        if os.environ.get("CI_WORKFLOW_REF") == target_workflow:
-            # Get the last previously completed CI's failure tables
-            artifact_names = ["test_failure_tables"]
-            output_dir = os.path.join(os.getcwd(), "previous_reports")
-            os.makedirs(output_dir, exist_ok=True)
-            prev_tables = get_last_daily_ci_reports(
-                artifact_names=artifact_names, output_dir=output_dir, token=os.environ["ACCESS_REPO_INFO_TOKEN"]
-            )
-
-            # if the last run produces artifact named `test_failure_tables`
+        if self.prev_ci_artifacts is not None:
+            # if the last run produces artifact named `prev_ci_results`
             if (
-                "test_failure_tables" in prev_tables
-                and "model_failures_report.txt" in prev_tables["test_failure_tables"]
+                "prev_ci_results" in self.prev_ci_artifacts
+                and "model_failures_report.txt" in self.prev_ci_artifacts["prev_ci_results"]
             ):
                 # Compute the difference of the previous/current (model failure) table
-                prev_model_failures = prev_tables["test_failure_tables"]["model_failures_report.txt"]
+                prev_model_failures = self.prev_ci_artifacts["prev_ci_results"]["model_failures_report.txt"]
                 entries_changed = self.compute_diff_for_failure_reports(model_failures_report, prev_model_failures)
                 if len(entries_changed) > 0:
                     # Save the complete difference
@@ -447,7 +444,7 @@ def per_model_sum(model_category_dict):
                         reports=entries_changed,
                         to_truncate=False,
                     )
-                    file_path = os.path.join(os.getcwd(), "test_failure_tables/changed_model_failures_report.txt")
+                    file_path = os.path.join(os.getcwd(), "prev_ci_results/changed_model_failures_report.txt")
                     with open(file_path, "w", encoding="UTF-8") as fp:
                         fp.write(diff_report)
 
@@ -513,6 +510,10 @@ def payload(self) -> str:
         if len(self.selected_warnings) > 0:
             blocks.append(self.warnings)
 
+        new_failure_blocks = self.get_new_model_failure_blocks(with_header=False)
+        if len(new_failure_blocks) > 0:
+            blocks.extend(new_failure_blocks)
+
         return json.dumps(blocks)
 
     @staticmethod
@@ -632,6 +633,64 @@ def get_reply_blocks(self, job_name, job_result, failures, device, text):
             {"type": "section", "text": {"type": "mrkdwn", "text": failure_text}},
         ]
 
+    def get_new_model_failure_blocks(self, with_header=True):
+        sorted_dict = sorted(self.model_results.items(), key=lambda t: t[0])
+
+        prev_model_results = {}
+        if (
+            "prev_ci_results" in self.prev_ci_artifacts
+            and "model_results.json" in self.prev_ci_artifacts["prev_ci_results"]
+        ):
+            prev_model_results = json.loads(self.prev_ci_artifacts["prev_ci_results"]["model_results.json"])
+
+        all_failure_lines = {}
+        for job, job_result in sorted_dict:
+            if len(job_result["failures"]):
+                devices = sorted(job_result["failures"].keys(), reverse=True)
+                for device in devices:
+                    failures = job_result["failures"][device]
+                    prev_error_lines = {}
+                    if job in prev_model_results and device in prev_model_results[job]["failures"]:
+                        prev_error_lines = {error["line"] for error in prev_model_results[job]["failures"][device]}
+
+                    url = None
+                    if job_result["job_link"] is not None and job_result["job_link"][device] is not None:
+                        url = job_result["job_link"][device]
+
+                    for idx, error in enumerate(failures):
+                        if error["line"] in prev_error_lines:
+                            continue
+
+                        new_text = f'{error["line"]}\n\n'
+
+                        if new_text not in all_failure_lines:
+                            all_failure_lines[new_text] = []
+
+                        all_failure_lines[new_text].append(f"<{url}|{device}>" if url is not None else device)
+
+        MAX_ERROR_TEXT = 3000 - len("[Truncated]") - len("```New model failures```\n\n")
+        failure_text = ""
+        for line, devices in all_failure_lines.items():
+            new_text = failure_text + f"{'|'.join(devices)} gpu\n{line}"
+            if len(new_text) > MAX_ERROR_TEXT:
+                # `failure_text` here has length <= 3000
+                failure_text = failure_text + "[Truncated]"
+                break
+            # `failure_text` here has length <= MAX_ERROR_TEXT
+            failure_text = new_text
+
+        blocks = []
+        if failure_text:
+            if with_header:
+                blocks.append(
+                    {"type": "header", "text": {"type": "plain_text", "text": "New model failures", "emoji": True}}
+                )
+            else:
+                failure_text = f"*New model failures*\n\n{failure_text}"
+            blocks.append({"type": "section", "text": {"type": "mrkdwn", "text": failure_text}})
+
+        return blocks
+
     def post_reply(self):
         if self.thread_ts is None:
             raise ValueError("Can only post reply if a post has been made.")
@@ -681,6 +740,20 @@ def post_reply(self):
 
                     time.sleep(1)
 
+        blocks = self.get_new_model_failure_blocks()
+        if blocks:
+            print("Sending the following reply")
+            print(json.dumps({"blocks": blocks}))
+
+            client.chat_postMessage(
+                channel=os.environ["CI_SLACK_REPORT_CHANNEL_ID"],
+                text="Results for new failures",
+                blocks=blocks,
+                thread_ts=self.thread_ts["ts"],
+            )
+
+            time.sleep(1)
+
 
 def retrieve_artifact(artifact_path: str, gpu: Optional[str]):
     if gpu not in [None, "single", "multi"]:
@@ -1045,7 +1118,31 @@ def prepare_reports(title, header, reports, to_truncate=True):
         with open(os.path.join(directory, "selected_warnings.json")) as fp:
             selected_warnings = json.load(fp)
 
-    message = Message(title, ci_title, model_results, additional_results, selected_warnings=selected_warnings)
+    if not os.path.isdir(os.path.join(os.getcwd(), "prev_ci_results")):
+        os.makedirs(os.path.join(os.getcwd(), "prev_ci_results"))
+
+    with open("prev_ci_results/model_results.json", "w", encoding="UTF-8") as fp:
+        json.dump(model_results, fp, indent=4, ensure_ascii=False)
+
+    prev_ci_artifacts = None
+    target_workflow = "huggingface/transformers/.github/workflows/self-scheduled.yml@refs/heads/main"
+    if os.environ.get("CI_WORKFLOW_REF") == target_workflow:
+        # Get the last previously completed CI's failure tables
+        artifact_names = ["prev_ci_results"]
+        output_dir = os.path.join(os.getcwd(), "previous_reports")
+        os.makedirs(output_dir, exist_ok=True)
+        prev_ci_artifacts = get_last_daily_ci_reports(
+            artifact_names=artifact_names, output_dir=output_dir, token=os.environ["ACCESS_REPO_INFO_TOKEN"]
+        )
+
+    message = Message(
+        title,
+        ci_title,
+        model_results,
+        additional_results,
+        selected_warnings=selected_warnings,
+        prev_ci_artifacts=prev_ci_artifacts,
+    )
 
     # send report only if there is any failure (for push CI)
     if message.n_failures or (ci_event != "push" and not ci_event.startswith("Push CI (AMD)")):