From 025f471a17085ba8bc3011f4062d8f8177da6571 Mon Sep 17 00:00:00 2001 From: cccs-rs Date: Fri, 22 Sep 2023 16:10:01 +0000 Subject: [PATCH] Explain reason for phishing heuristic if raised --- document_preview/document_preview.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/document_preview/document_preview.py b/document_preview/document_preview.py index 9c3e0fb..fc02899 100644 --- a/document_preview/document_preview.py +++ b/document_preview/document_preview.py @@ -5,7 +5,7 @@ from assemblyline_v4_service.common.base import ServiceBase from assemblyline_v4_service.common.request import ServiceRequest as Request -from assemblyline_v4_service.common.result import Result, ResultImageSection +from assemblyline_v4_service.common.result import Heuristic, Result, ResultImageSection, ResultTextSection from natsort import natsorted from pdf2image import convert_from_path, pdfinfo_from_path @@ -132,7 +132,7 @@ def execute(self, request): # Create an image gallery section to show the renderings if any("output" in s for s in os.listdir(self.working_directory)): previews = [s for s in os.listdir(self.working_directory) if "output" in s] - image_section = ResultImageSection(request, "Successfully extracted the preview.") + image_section = ResultImageSection(request, "Preview Image(s)") run_ocr_on_first_n_pages = request.get_param("run_ocr_on_first_n_pages") for i, preview in enumerate(natsorted(previews)): # Trigger OCR on the first N pages as specified in the submission @@ -154,7 +154,12 @@ def execute(self, request): try: if pdfinfo_from_path(request.file_path)["Pages"] == 1 and "click" in ocr_content.lower(): # Suspected document is part of a phishing campaign - image_section.set_heuristic(2) + ResultTextSection( + "Suspected Phishing", + body='Single-paged document containing the term "click"', + heuristic=Heuristic(2), + parent=result, + ) except Exception: # There was a problem fetching the page count from the PDF, move on.. pass