From bbf96a45bafb07bf81d0a966464f93af73f9e876 Mon Sep 17 00:00:00 2001 From: wzdnzd Date: Sun, 10 Nov 2024 10:03:12 +0800 Subject: [PATCH] enhance decode and status check --- subscribe/airport.py | 18 ++++++++++++++++-- subscribe/crawl.py | 6 +++++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/subscribe/airport.py b/subscribe/airport.py index 6aa4f9753..977a0803f 100644 --- a/subscribe/airport.py +++ b/subscribe/airport.py @@ -3,6 +3,7 @@ # @Author : wzdnzd # @Time : 2022-07-15 +import base64 import concurrent.futures import json import os @@ -622,6 +623,14 @@ def parse( ) return [] + @staticmethod + def check_protocol(link: str) -> bool: + return re.match( + r"^(vmess|trojan|ss|ssr|vless|hysteria|hysteria2|tuic|snell)://[a-zA-Z0-9:.?+=@%&#_\-/]{10,}", + utils.trim(link).replace("\r", ""), + flags=re.I, + ) + @staticmethod def decode( text: str, program: str, artifact: str = "", ignore: bool = False, special: bool = False, throw: bool = False @@ -647,9 +656,10 @@ def clean_text(document: str) -> str: if not text: return [] + is_b64encode, is_json = False, False if ( - utils.isb64encode(text) - or (text.startswith("{") and text.endswith("}")) + (is_b64encode := utils.isb64encode(text)) + or (is_json := (text.startswith("{") and text.endswith("}"))) or not re.search(r"^proxies:([\s\r\n]+)?$", text, flags=re.MULTILINE) ): artifact = utils.trim(text=artifact) @@ -659,6 +669,10 @@ def clean_text(document: str) -> str: v2ray_file = os.path.join(PATH, "subconverter", f"{artifact}.txt") clash_file = os.path.join(PATH, "subconverter", f"{artifact}.yaml") + # base64 encoding if all lines start with valid protocol + if not is_b64encode and not is_json and all(AirPort.check_protocol(x) for x in text.split("\n") if x): + text = base64.b64encode(text.encode(encoding="UTF8")).decode(encoding="UTF8") + try: with open(v2ray_file, "w+", encoding="UTF8") as f: f.write(text) diff --git a/subscribe/crawl.py b/subscribe/crawl.py index 49f5f52a8..eaf3ba5ec 100644 --- a/subscribe/crawl.py +++ b/subscribe/crawl.py @@ -1083,7 +1083,7 @@ def extract_subscribes( limits, collections, proxies = max(1, limits), {}, [] sub_regex = r"https?://(?:[a-zA-Z0-9\u4e00-\u9fa5\-]+\.)+[a-zA-Z0-9\u4e00-\u9fa5\-]+(?:(?:(?:/index.php)?/api/v1/client/subscribe\?token=[a-zA-Z0-9]{16,32})|(?:/link/[a-zA-Z0-9]+\?(?:sub|mu|clash)=\d))|https://jmssub\.net/members/getsub\.php\?service=\d+&id=[a-zA-Z0-9\-]{36}(?:\S+)?" extra_regex = r"https?://(?:[a-zA-Z0-9\u4e00-\u9fa5\-]+\.)+[a-zA-Z0-9\u4e00-\u9fa5\-]+/sub\?(?:\S+)?target=\S+" - protocal_regex = r"(?:vmess|trojan|ss|ssr|snell|hysteria2|vless|hysteria)://[a-zA-Z0-9:.?+=@%&#_\-/]{10,}" + protocal_regex = r"(?:vmess|trojan|ss|ssr|snell|hysteria2|vless|hysteria|tuic)://[a-zA-Z0-9:.?+=@%&#_\-/]{10,}" regex = f"{sub_regex}|{extra_regex}" @@ -1278,6 +1278,10 @@ def check_status( yaml.add_multi_constructor("str", lambda loader, suffix, node: str(node.value), Loader=yaml.SafeLoader) proxies = yaml.load(content, Loader=yaml.FullLoader).get("proxies", []) except: + if all(airport.AirPort.check_protocol(x) for x in content.split("\n") if x): + return True, False + + # TODO: 如果配置文件为 singbox、quanx、loon、surge等,需要解析出代理节点信息,并判断是否过期 proxies = [] if proxies is None or len(proxies) == 0: