From 38060fbe7abf027a4fd4aee6b4d2269c1553ee7f Mon Sep 17 00:00:00 2001 From: I748376 Date: Thu, 4 Jul 2024 12:43:01 +0000 Subject: [PATCH] adds a check using regex to remove delimiters if they are returned by the LLM --- prospector/llm/llm_service.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/prospector/llm/llm_service.py b/prospector/llm/llm_service.py index 3804a1012..cbc4e69e4 100644 --- a/prospector/llm/llm_service.py +++ b/prospector/llm/llm_service.py @@ -1,3 +1,5 @@ +import re + import validators from langchain_core.language_models.llms import LLM from langchain_core.output_parsers import StrOutputParser @@ -59,6 +61,12 @@ def get_repository_url(self, advisory_description, advisory_references) -> str: ) logger.info(f"LLM returned the following URL: {url}") + # delimiters are often returned by the LLM, remove them, if the case + pattern = r"\s*(https?://[^\s]+)\s*" + match = re.search(pattern, url) + if match: + return match.group(1) + if not validators.url(url): raise TypeError(f"LLM returned invalid URL: {url}")