-
Notifications
You must be signed in to change notification settings - Fork 205
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into fix-draft
- Loading branch information
Showing
11 changed files
with
309 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Empty file.
100 changes: 100 additions & 0 deletions
100
wiki/wiki/report/wiki_broken_links/test_broken_link_checker.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
# Copyright (c) 2024, Frappe and Contributors | ||
# See license.txt | ||
|
||
from unittest.mock import patch | ||
|
||
import frappe | ||
from frappe.tests.utils import FrappeTestCase | ||
|
||
from wiki.wiki.report.wiki_broken_links.wiki_broken_links import execute, get_broken_links | ||
|
||
WORKING_EXTERNAL_URL = "https://frappe.io" | ||
BROKEN_EXTERNAL_URL = "https://frappewiki.notavalidtld" | ||
BROKEN_IMG_URL = "https://img.notavalidtld/failed.jpeg" | ||
WORKING_INTERNAL_URL = "/api/method/ping" | ||
BROKEN_INTERNAL_URL = "/api/method/ring" | ||
|
||
|
||
def internal_to_external_urls(internal_url: str) -> str: | ||
if internal_url == WORKING_INTERNAL_URL: | ||
return WORKING_EXTERNAL_URL | ||
else: | ||
return BROKEN_EXTERNAL_URL | ||
|
||
|
||
TEST_MD_WITH_BROKEN_LINK = f""" | ||
## Hello | ||
This is a test for a [broken link]({BROKEN_EXTERNAL_URL}). | ||
This is a [valid link]({WORKING_EXTERNAL_URL}). | ||
And [this is a correct relative link]({WORKING_INTERNAL_URL}). | ||
And [this is an incorrect relative link]({BROKEN_INTERNAL_URL}). | ||
This [hash link](#hash-link) should be ignored. | ||
![Broken Image]({BROKEN_IMG_URL}) | ||
""" | ||
|
||
|
||
class TestWikiBrokenLinkChecker(FrappeTestCase): | ||
def setUp(self): | ||
frappe.db.delete("Wiki Page") | ||
self.test_wiki_page = frappe.get_doc( | ||
{ | ||
"doctype": "Wiki Page", | ||
"content": TEST_MD_WITH_BROKEN_LINK, | ||
"title": "My Wiki Page", | ||
"route": "test-wiki-page-route", | ||
} | ||
).insert() | ||
|
||
self.test_wiki_space = frappe.get_doc({"doctype": "Wiki Space", "route": "test-ws-route"}).insert() | ||
|
||
def test_returns_correct_broken_links(self): | ||
broken_links = get_broken_links(TEST_MD_WITH_BROKEN_LINK) | ||
self.assertEqual(len(broken_links), 2) | ||
|
||
def test_wiki_broken_link_report(self): | ||
_, data = execute() | ||
self.assertEqual(len(data), 1) | ||
self.assertEqual(data[0]["broken_link"], BROKEN_EXTERNAL_URL) | ||
|
||
def test_wiki_broken_link_report_with_wiki_space_filter(self): | ||
_, data = execute({"wiki_space": self.test_wiki_space.name}) | ||
self.assertEqual(len(data), 0) | ||
|
||
self.test_wiki_space.append( | ||
"wiki_sidebars", {"wiki_page": self.test_wiki_page, "parent_label": "Test Parent Label"} | ||
) | ||
self.test_wiki_space.save() | ||
|
||
_, data = execute({"wiki_space": self.test_wiki_space.name}) | ||
self.assertEqual(len(data), 1) | ||
self.assertEqual(data[0]["wiki_page"], self.test_wiki_page.name) | ||
self.assertEqual(data[0]["broken_link"], BROKEN_EXTERNAL_URL) | ||
|
||
def test_wiki_broken_link_report_with_image_filter(self): | ||
_, data = execute({"check_images": 1}) | ||
self.assertEqual(len(data), 2) | ||
self.assertEqual(data[0]["wiki_page"], self.test_wiki_page.name) | ||
self.assertEqual(data[0]["broken_link"], BROKEN_EXTERNAL_URL) | ||
|
||
self.assertEqual(data[1]["wiki_page"], self.test_wiki_page.name) | ||
self.assertEqual(data[1]["broken_link"], BROKEN_IMG_URL) | ||
|
||
@patch.object(frappe.utils.data, "get_url", side_effect=internal_to_external_urls) | ||
def test_wiki_broken_link_report_with_internal_links(self, _get_url): | ||
# patch the get_url to return valid/invalid external links instead | ||
# of internal links in test | ||
_, data = execute({"check_internal_links": 1}) | ||
|
||
self.assertEqual(len(data), 2) | ||
self.assertEqual(data[0]["wiki_page"], self.test_wiki_page.name) | ||
self.assertEqual(data[0]["broken_link"], BROKEN_EXTERNAL_URL) | ||
|
||
self.assertEqual(data[1]["wiki_page"], self.test_wiki_page.name) | ||
self.assertEqual(data[1]["broken_link"], BROKEN_INTERNAL_URL) | ||
|
||
def tearDown(self): | ||
frappe.db.rollback() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
// Copyright (c) 2024, Frappe and contributors | ||
// For license information, please see license.txt | ||
|
||
frappe.query_reports["Wiki Broken Links"] = { | ||
filters: [ | ||
{ | ||
fieldname: "wiki_space", | ||
label: __("Wiki Space"), | ||
fieldtype: "Link", | ||
options: "Wiki Space", | ||
}, | ||
{ | ||
fieldname: "check_images", | ||
label: __("Include images?"), | ||
fieldtype: "Check", | ||
default: 1, | ||
}, | ||
{ | ||
fieldname: "check_internal_links", | ||
label: __("Include internal links?"), | ||
fieldtype: "Check", | ||
default: 0, | ||
}, | ||
], | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
{ | ||
"add_total_row": 0, | ||
"columns": [], | ||
"creation": "2024-12-11 14:43:18.799835", | ||
"disabled": 0, | ||
"docstatus": 0, | ||
"doctype": "Report", | ||
"filters": [], | ||
"idx": 0, | ||
"is_standard": "Yes", | ||
"letterhead": null, | ||
"modified": "2024-12-11 18:58:14.479423", | ||
"modified_by": "Administrator", | ||
"module": "Wiki", | ||
"name": "Wiki Broken Links", | ||
"owner": "Administrator", | ||
"prepared_report": 1, | ||
"ref_doctype": "Wiki Page", | ||
"report_name": "Wiki Broken Links", | ||
"report_type": "Script Report", | ||
"roles": [ | ||
{ | ||
"role": "System Manager" | ||
}, | ||
{ | ||
"role": "Wiki Approver" | ||
} | ||
], | ||
"timeout": 0 | ||
} |
132 changes: 132 additions & 0 deletions
132
wiki/wiki/report/wiki_broken_links/wiki_broken_links.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
# Copyright (c) 2024, Frappe and contributors | ||
# For license information, please see license.txt | ||
|
||
import frappe | ||
import requests | ||
from bs4 import BeautifulSoup | ||
from frappe import _ | ||
|
||
|
||
def execute(filters: dict | None = None): | ||
"""Return columns and data for the report. | ||
This is the main entry point for the report. It accepts the filters as a | ||
dictionary and should return columns and data. It is called by the framework | ||
every time the report is refreshed or a filter is updated. | ||
""" | ||
columns = get_columns() | ||
data = get_data(filters) | ||
|
||
return columns, data | ||
|
||
|
||
def get_columns() -> list[dict]: | ||
"""Return columns for the report. | ||
One field definition per column, just like a DocType field definition. | ||
""" | ||
return [ | ||
{ | ||
"label": _("Wiki Page"), | ||
"fieldname": "wiki_page", | ||
"fieldtype": "Link", | ||
"options": "Wiki Page", | ||
"width": 200, | ||
}, | ||
{ | ||
"label": _("Broken Link"), | ||
"fieldname": "broken_link", | ||
"fieldtype": "Data", | ||
"options": "URL", | ||
"width": 400, | ||
}, | ||
] | ||
|
||
|
||
def get_data(filters: dict | None = None) -> list[list]: | ||
"""Return data for the report. | ||
The report data is a list of rows, with each row being a list of cell values. | ||
""" | ||
data = [] | ||
|
||
wiki_pages = frappe.db.get_all("Wiki Page", fields=["name", "content"]) | ||
|
||
if filters and filters.get("wiki_space"): | ||
wiki_space = filters.get("wiki_space") | ||
wiki_pages = frappe.db.get_all( | ||
"Wiki Group Item", | ||
fields=["wiki_page as name", "wiki_page.content as content"], | ||
filters={"parent": wiki_space, "parenttype": "Wiki Space"}, | ||
) | ||
|
||
include_images = filters and bool(filters.get("check_images")) | ||
check_internal_links = filters and bool(filters.get("check_internal_links")) | ||
|
||
for page in wiki_pages: | ||
broken_links_for_page = get_broken_links(page.content, include_images, check_internal_links) | ||
rows = [{"broken_link": link, "wiki_page": page["name"]} for link in broken_links_for_page] | ||
data.extend(rows) | ||
|
||
return data | ||
|
||
|
||
def get_broken_links( | ||
md_content: str, include_images: bool = True, include_relative_urls: bool = False | ||
) -> list[str]: | ||
html = frappe.utils.md_to_html(md_content) | ||
soup = BeautifulSoup(html, "html.parser") | ||
|
||
links = soup.find_all("a") | ||
if include_images: | ||
links += soup.find_all("img") | ||
|
||
broken_links = [] | ||
for el in links: | ||
url = el.attrs.get("href") or el.attrs.get("src") | ||
|
||
if is_hash_link(url): | ||
continue | ||
|
||
is_relative = is_relative_url(url) | ||
relative_url = None | ||
|
||
if is_relative and not include_relative_urls: | ||
continue | ||
|
||
if is_relative: | ||
relative_url = url | ||
url = frappe.utils.data.get_url(url) # absolute URL | ||
|
||
is_broken = is_broken_link(url) | ||
if is_broken: | ||
if is_relative: | ||
broken_links.append(relative_url) # original URL | ||
else: | ||
broken_links.append(url) | ||
|
||
return broken_links | ||
|
||
|
||
def is_relative_url(url: str) -> bool: | ||
return url.startswith("/") | ||
|
||
|
||
def is_hash_link(url: str) -> bool: | ||
return url.startswith("#") | ||
|
||
|
||
def is_broken_link(url: str) -> bool: | ||
try: | ||
status_code = get_request_status_code(url) | ||
if status_code >= 400: | ||
return True | ||
except Exception: | ||
return True | ||
|
||
return False | ||
|
||
|
||
def get_request_status_code(url: str) -> int: | ||
response = requests.head(url, verify=False, timeout=5) | ||
return response.status_code |
Oops, something went wrong.