From fbaaffc464128d269748bc5edcaeb73885db2164 Mon Sep 17 00:00:00 2001 From: hect0x7 <93357912+hect0x7@users.noreply.github.com> Date: Fri, 23 Feb 2024 00:17:50 +0800 Subject: [PATCH] =?UTF-8?q?v2.5.6:=20=E6=9B=B4=E6=96=B0=E7=A6=81=E6=BC=AB?= =?UTF-8?q?=E5=8F=91=E5=B8=83=E9=A1=B5URl;=20=E4=BC=98=E5=8C=96=E6=AD=A3?= =?UTF-8?q?=E5=88=99=E8=A1=A8=E8=BE=BE=E5=BC=8F=E7=9A=84=E9=80=82=E9=85=8D?= =?UTF-8?q?;=20=E6=94=AF=E6=8C=81=E9=80=9A=E8=BF=87github=E7=BD=91?= =?UTF-8?q?=E5=9D=80=E8=8E=B7=E5=8F=96=E7=A6=81=E6=BC=AB=E7=BD=91=E9=A1=B5?= =?UTF-8?q?=E5=9F=9F=E5=90=8D;=20=E4=BC=98=E5=8C=96=E4=BB=A3=E7=A0=81?= =?UTF-8?q?=E5=92=8C=E6=96=87=E6=A1=A3.=20(#212)=20(#213)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- assets/docs/sources/option_file_syntax.md | 2 +- assets/docs/sources/tutorial/11_log_custom.md | 2 +- assets/option/option_test_api.yml | 2 +- assets/option/option_test_html.yml | 2 +- assets/option/option_workflow_download.yml | 2 +- src/jmcomic/__init__.py | 2 +- src/jmcomic/api.py | 14 +++++- src/jmcomic/jm_client_impl.py | 34 +++++++------- src/jmcomic/jm_client_interface.py | 11 +++-- src/jmcomic/jm_config.py | 37 ++++++++++++++- src/jmcomic/jm_downloader.py | 46 +++++++++++++------ src/jmcomic/jm_entity.py | 18 +++++--- src/jmcomic/jm_plugin.py | 4 +- src/jmcomic/jm_toolkit.py | 42 ++++++++++------- tests/test_jmcomic/test_jm_api.py | 5 +- 15 files changed, 153 insertions(+), 70 deletions(-) diff --git a/assets/docs/sources/option_file_syntax.md b/assets/docs/sources/option_file_syntax.md index f8872763..66124426 100644 --- a/assets/docs/sources/option_file_syntax.md +++ b/assets/docs/sources/option_file_syntax.md @@ -127,7 +127,7 @@ plugins: - plugin: client_proxy # 客户端实现类代理插件,不建议非开发人员使用 kwargs: - proxy_client_key: cl_proxy_future # 代理类的client_key + proxy_client_key: photo_concurrent_fetcher_proxy # 代理类的client_key whitelist: [ api, ] # 白名单,当client.impl匹配白名单时才代理 - plugin: auto_set_browser_cookies # 自动获取浏览器cookies,详见插件类 diff --git a/assets/docs/sources/tutorial/11_log_custom.md b/assets/docs/sources/tutorial/11_log_custom.md index 9c7c0a29..bf56b91d 100644 --- a/assets/docs/sources/tutorial/11_log_custom.md +++ b/assets/docs/sources/tutorial/11_log_custom.md @@ -46,7 +46,7 @@ plugins: - plugin: client_proxy # 提高移动端的请求效率的插件 log: false # 插件自身不打印日志 kwargs: - proxy_client_key: cl_proxy_future + proxy_client_key: photo_concurrent_fetcher_proxy whitelist: [ api, ] ``` diff --git a/assets/option/option_test_api.yml b/assets/option/option_test_api.yml index 5f2f5f95..343fa699 100644 --- a/assets/option/option_test_api.yml +++ b/assets/option/option_test_api.yml @@ -24,5 +24,5 @@ plugins: - plugin: client_proxy kwargs: - proxy_client_key: cl_proxy_future + proxy_client_key: photo_concurrent_fetcher_proxy whitelist: [ api, ] \ No newline at end of file diff --git a/assets/option/option_test_html.yml b/assets/option/option_test_html.yml index fbb68860..53dfecb4 100644 --- a/assets/option/option_test_html.yml +++ b/assets/option/option_test_html.yml @@ -25,5 +25,5 @@ plugins: - plugin: client_proxy kwargs: - proxy_client_key: cl_proxy_future + proxy_client_key: photo_concurrent_fetcher_proxy whitelist: [ api, ] \ No newline at end of file diff --git a/assets/option/option_workflow_download.yml b/assets/option/option_workflow_download.yml index 92310e39..5b469916 100644 --- a/assets/option/option_workflow_download.yml +++ b/assets/option/option_workflow_download.yml @@ -18,7 +18,7 @@ plugins: - plugin: client_proxy # 提高移动端的请求效率的插件 kwargs: - proxy_client_key: cl_proxy_future + proxy_client_key: photo_concurrent_fetcher_proxy whitelist: [ api, ] - plugin: login # 登录插件 diff --git a/src/jmcomic/__init__.py b/src/jmcomic/__init__.py index fc1156c5..ebf62ba7 100644 --- a/src/jmcomic/__init__.py +++ b/src/jmcomic/__init__.py @@ -2,7 +2,7 @@ # 被依赖方 <--- 使用方 # config <--- entity <--- toolkit <--- client <--- option <--- downloader -__version__ = '2.5.5' +__version__ = '2.5.6' from .api import * from .jm_plugin import * diff --git a/src/jmcomic/api.py b/src/jmcomic/api.py index 058ff41f..34e826d3 100644 --- a/src/jmcomic/api.py +++ b/src/jmcomic/api.py @@ -100,7 +100,7 @@ def new_downloader(option=None, downloader=None) -> JmDownloader: return downloader(option) -def create_option(filepath): +def create_option_by_file(filepath): return JmModuleConfig.option_class().from_file(filepath) @@ -110,4 +110,14 @@ def create_option_by_env(env_name='JM_OPTION_PATH'): filepath = get_env(env_name, None) ExceptionTool.require_true(filepath is not None, f'未配置环境变量: {env_name},请配置为option的文件路径') - return create_option(filepath) + return create_option_by_file(filepath) + + +def create_option_by_str(text: str, mode=None): + if mode is None: + mode = PackerUtil.mode_yml + data = PackerUtil.unpack_by_str(text, mode)[0] + return JmModuleConfig.option_class().construct(data) + + +create_option = create_option_by_file diff --git a/src/jmcomic/jm_client_impl.py b/src/jmcomic/jm_client_impl.py index a9d1415a..a18c03d0 100644 --- a/src/jmcomic/jm_client_impl.py +++ b/src/jmcomic/jm_client_impl.py @@ -79,9 +79,9 @@ def request_with_retry(self, """ if domain_index >= len(self.domain_list): return self.fallback(request, url, domain_index, retry_count, **kwargs) - + url_backup = url - + if url.startswith('/'): # path → url domain = self.domain_list[domain_index] @@ -976,10 +976,14 @@ def get_cookies(self): return cookies -class FutureClientProxy(JmcomicClient): +class PhotoConcurrentFetcherProxy(JmcomicClient): """ - 在Client上做了一层线程池封装来实现异步,对外仍然暴露JmcomicClient的接口,可以看作Client的代理。 - 除了使用线程池做异步,还通过加锁和缓存结果,实现同一个请求不会被多个线程发出,减少开销 + 为了解决 JmApiClient.get_photo_detail 方法的排队调用问题, + 即在访问完photo的接口后,需要另外排队访问获取album和scramble_id的接口。 + + 这三个接口可以并发请求,这样可以提高效率。 + + 此Proxy代理了get_photo_detail,实现了并发请求这三个接口,然后组装返回值返回photo。 可通过插件 ClientProxyPlugin 启用本类,配置如下: ```yml @@ -987,10 +991,10 @@ class FutureClientProxy(JmcomicClient): after_init: - plugin: client_proxy kwargs: - proxy_client_key: cl_proxy_future + proxy_client_key: photo_concurrent_fetcher_proxy ``` """ - client_key = 'cl_proxy_future' + client_key = 'photo_concurrent_fetcher_proxy' class FutureWrapper: def __init__(self, future, after_done_callback): @@ -1024,16 +1028,15 @@ def __init__(self, executors = ThreadPoolExecutor(max_workers) self.executors = executors - self.future_dict: Dict[str, FutureClientProxy.FutureWrapper] = {} + self.future_dict: Dict[str, PhotoConcurrentFetcherProxy.FutureWrapper] = {} from threading import Lock self.lock = Lock() def route_notimpl_method_to_internal_client(self, client): - impl_methods = str_to_set(''' + proxy_methods = str_to_set(''' get_album_detail get_photo_detail - search ''') # 获取对象的所有属性和方法的名称列表 @@ -1043,7 +1046,7 @@ def route_notimpl_method_to_internal_client(self, client): # 判断是否为方法(可调用对象) if (not method.startswith('_') and callable(getattr(client, method)) - and method not in impl_methods + and method not in proxy_methods ): setattr(self, method, getattr(client, method)) @@ -1055,15 +1058,19 @@ def get_album_detail(self, album_id) -> JmAlbumDetail: def get_future(self, cache_key, task): if cache_key in self.future_dict: + # cache hit, means that a same task is running return self.future_dict[cache_key] with self.lock: if cache_key in self.future_dict: return self.future_dict[cache_key] + # after future done, remove it from future_dict. + # cache depends on self.client instead of self.future_dict future = self.FutureWrapper(self.executors.submit(task), after_done_callback=lambda: self.future_dict.pop(cache_key, None) ) + self.future_dict[cache_key] = future return future @@ -1115,8 +1122,3 @@ def get_photo_detail(self, photo_id, fetch_album=True, fetch_scramble_id=True) - photo.scramble_id = scramble_id return photo - - def search(self, search_query: str, page: int, main_tag: int, order_by: str, time: str) -> JmSearchPage: - cache_key = f'search_query_{search_query}_page_{page}_main_tag_{main_tag}_order_by_{order_by}_time_{time}' - future = self.get_future(cache_key, task=lambda: self.client.search(search_query, page, main_tag, order_by, time)) - return future.result() diff --git a/src/jmcomic/jm_client_interface.py b/src/jmcomic/jm_client_interface.py index b6486392..d3f64f2f 100644 --- a/src/jmcomic/jm_client_interface.py +++ b/src/jmcomic/jm_client_interface.py @@ -469,11 +469,14 @@ def get_cache_dict(self) -> Optional[Dict]: def of_api_url(self, api_path, domain): raise NotImplementedError - def get_html_domain(self, postman=None): - return JmModuleConfig.get_html_domain(postman or self.get_root_postman()) + def get_html_domain(self): + return JmModuleConfig.get_html_domain(self.get_root_postman()) - def get_html_domain_all(self, postman=None): - return JmModuleConfig.get_html_domain_all(postman or self.get_root_postman()) + def get_html_domain_all(self): + return JmModuleConfig.get_html_domain_all(self.get_root_postman()) + + def get_html_domain_all_via_github(self): + return JmModuleConfig.get_html_domain_all_via_github(self.get_root_postman()) # noinspection PyMethodMayBeStatic def do_page_iter(self, params: dict, page: int, get_page_method): diff --git a/src/jmcomic/jm_config.py b/src/jmcomic/jm_config.py index 02aacfed..479277ac 100644 --- a/src/jmcomic/jm_config.py +++ b/src/jmcomic/jm_config.py @@ -81,7 +81,7 @@ class JmModuleConfig: # 网站相关 PROT = "https://" JM_REDIRECT_URL = f'{PROT}jm365.work/3YeBdF' # 永久網域,怕走失的小伙伴收藏起来 - JM_PUB_URL = f'{PROT}jmcomic.ltd' + JM_PUB_URL = f'{PROT}jmcomic-fb.vip' JM_CDN_IMAGE_URL_TEMPLATE = PROT + 'cdn-msp.{domain}/media/photos/{photo_id}/{index:05}{suffix}' # index 从1开始 JM_IMAGE_SUFFIX = ['.jpg', '.webp', '.png', '.gif'] @@ -259,6 +259,41 @@ def get_html_domain_all(cls, postman=None): cls.jm_log('module.html_domain_all', f'获取禁漫网页全部域名: [{resp.url}] → {domain_list}') return domain_list + @classmethod + def get_html_domain_all_via_github(cls, + postman=None, + template='https://jmcmomic.github.io/go/{}.html', + index_range=(300, 309) + ): + """ + 通过禁漫官方的github号的repo获取最新的禁漫域名 + https://github.com/jmcmomic/jmcmomic.github.io + """ + postman = postman or cls.new_postman(headers={ + 'authority': 'github.com', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 ' + 'Safari/537.36' + }) + domain_set = set() + + def fetch_domain(url): + resp = postman.get(url, allow_redirects=False) + text = resp.text + from .jm_toolkit import JmcomicText + for domain in JmcomicText.analyse_jm_pub_html(text): + if domain.startswith('jm365'): + continue + domain_set.add(domain) + + from common import multi_thread_launcher + + multi_thread_launcher( + iter_objs=[template.format(i) for i in range(*index_range)], + apply_each_obj_func=fetch_domain, + ) + + return domain_set + @classmethod def new_html_headers(cls, domain='18comic.vip'): """ diff --git a/src/jmcomic/jm_downloader.py b/src/jmcomic/jm_downloader.py index 3445988e..76fce3ee 100644 --- a/src/jmcomic/jm_downloader.py +++ b/src/jmcomic/jm_downloader.py @@ -169,11 +169,11 @@ def client_for_photo(self, jm_photo_id) -> JmcomicClient: def before_album(self, album: JmAlbumDetail): super().before_album(album) self.all_downloaded.setdefault(album, {}) - - def before_photo(self, photo: JmPhotoDetail): - super().before_photo(photo) - self.all_downloaded.setdefault(photo.from_album, {}) - self.all_downloaded[photo.from_album].setdefault(photo, []) + self.option.call_all_plugin( + 'before_album', + album=album, + downloader=self, + ) def after_album(self, album: JmAlbumDetail): super().after_album(album) @@ -183,6 +183,16 @@ def after_album(self, album: JmAlbumDetail): downloader=self, ) + def before_photo(self, photo: JmPhotoDetail): + super().before_photo(photo) + self.all_downloaded.setdefault(photo.from_album, {}) + self.all_downloaded[photo.from_album].setdefault(photo, []) + self.option.call_all_plugin( + 'before_photo', + photo=photo, + downloader=self, + ) + def after_photo(self, photo: JmPhotoDetail): super().after_photo(photo) self.option.call_all_plugin( @@ -191,12 +201,25 @@ def after_photo(self, photo: JmPhotoDetail): downloader=self, ) + def before_image(self, image: JmImageDetail, img_save_path): + super().before_image(image, img_save_path) + self.option.call_all_plugin( + 'before_image', + image=image, + downloader=self, + ) + def after_image(self, image: JmImageDetail, img_save_path): super().after_image(image, img_save_path) photo = image.from_photo album = photo.from_album self.all_downloaded.get(album).get(photo).append((img_save_path, image)) + self.option.call_all_plugin( + 'after_image', + image=image, + downloader=self, + ) # 下面是对with语法的支持 @@ -219,28 +242,23 @@ def use(cls, *args, **kwargs): class DoNotDownloadImage(JmDownloader): """ - 本类仅用于测试 - - 用法: - - JmModuleConfig.CLASS_DOWNLOADER = DoNotDownloadImage + 不会下载任何图片的Downloader,用作测试 """ def download_by_image_detail(self, image: JmImageDetail, client: JmcomicClient): # ensure make dir self.option.decide_image_filepath(image) - pass class JustDownloadSpecificCountImage(JmDownloader): + """ + 只下载特定数量图片的Downloader,用作测试 + """ from threading import Lock count_lock = Lock() count = 0 - def __init__(self, option: JmOption) -> None: - super().__init__(option) - def download_by_image_detail(self, image: JmImageDetail, client: JmcomicClient): # ensure make dir self.option.decide_image_filepath(image) diff --git a/src/jmcomic/jm_entity.py b/src/jmcomic/jm_entity.py index a225c9c7..a86c7487 100644 --- a/src/jmcomic/jm_entity.py +++ b/src/jmcomic/jm_entity.py @@ -91,17 +91,17 @@ def authoroname(self): """ authoroname = author + oname - 比较好识别的一种本子名称方式 + 个人认为识别度比较高的本子名称,一眼看去就能获取到本子的关键信息 - 具体格式: f'【author】{oname}' + 具体格式: '【author】oname' 示例: - 原本子名:喂我吃吧 老師! [欶瀾漢化組] [BLVEFO9] たべさせて、せんせい! (ブルーアーカイブ) [中國翻譯] [無修正] + Pname:喂我吃吧 老師! [欶瀾漢化組] [BLVEFO9] たべさせて、せんせい! (ブルーアーカイブ) [中國翻譯] [無修正] - authoroname:【BLVEFO9】喂我吃吧 老師! + Pauthoroname:【BLVEFO9】喂我吃吧 老師! - :return: 返回作者名+作品原名,格式为: '【author】{oname}' + :return: 返回作者名+本子原始名称,格式为: '【author】oname' """ return f'【{self.author}】{self.oname}' @@ -109,12 +109,16 @@ def authoroname(self): def idoname(self): """ 类似 authoroname - :return: '[id] {oname}' + + :return: '[id] oname' """ return f'[{self.id}] {self.oname}' def __str__(self): - return f'{self.__class__.__name__}({self.id}-{self.title})' + return f'{self.__class__.__name__}' \ + '{' \ + f'{self.id}: {self.title}'\ + '}' @classmethod def __alias__(cls): diff --git a/src/jmcomic/jm_plugin.py b/src/jmcomic/jm_plugin.py index 082ea860..f1814199 100644 --- a/src/jmcomic/jm_plugin.py +++ b/src/jmcomic/jm_plugin.py @@ -814,8 +814,6 @@ def invoke(self, if self.running is True: return - self.running = True - # 服务器的代码位于一个独立库:plugin_jm_server,需要独立安装 # 源代码仓库:https://github.com/hect0x7/plugin-jm-server try: @@ -842,6 +840,7 @@ def blocking_run_server(): # 不是主线程,return return self.warning_wrong_usage_of_debug() else: + self.running = True # 是主线程,启动服务器 blocking_run_server() @@ -849,6 +848,7 @@ def blocking_run_server(): # 非debug模式,开新线程启动 threading.Thread(target=blocking_run_server, daemon=True).start() atexit_register(self.wait_server_stop) + self.running = True def warning_wrong_usage_of_debug(self): self.log('注意!当配置debug=True时,请确保当前插件是在主线程中被调用。\n' diff --git a/src/jmcomic/jm_toolkit.py b/src/jmcomic/jm_toolkit.py index f38c6311..62c3c84c 100644 --- a/src/jmcomic/jm_toolkit.py +++ b/src/jmcomic/jm_toolkit.py @@ -357,17 +357,16 @@ class JmPageTool: # 用来缩减html的长度 pattern_html_search_shorten_for = compile(r'