From fbaaffc464128d269748bc5edcaeb73885db2164 Mon Sep 17 00:00:00 2001
From: hect0x7 <93357912+hect0x7@users.noreply.github.com>
Date: Fri, 23 Feb 2024 00:17:50 +0800
Subject: [PATCH] =?UTF-8?q?v2.5.6:=20=E6=9B=B4=E6=96=B0=E7=A6=81=E6=BC=AB?=
 =?UTF-8?q?=E5=8F=91=E5=B8=83=E9=A1=B5URl;=20=E4=BC=98=E5=8C=96=E6=AD=A3?=
 =?UTF-8?q?=E5=88=99=E8=A1=A8=E8=BE=BE=E5=BC=8F=E7=9A=84=E9=80=82=E9=85=8D?=
 =?UTF-8?q?;=20=E6=94=AF=E6=8C=81=E9=80=9A=E8=BF=87github=E7=BD=91?=
 =?UTF-8?q?=E5=9D=80=E8=8E=B7=E5=8F=96=E7=A6=81=E6=BC=AB=E7=BD=91=E9=A1=B5?=
 =?UTF-8?q?=E5=9F=9F=E5=90=8D;=20=E4=BC=98=E5=8C=96=E4=BB=A3=E7=A0=81?=
 =?UTF-8?q?=E5=92=8C=E6=96=87=E6=A1=A3.=20(#212)=20(#213)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 assets/docs/sources/option_file_syntax.md     |  2 +-
 assets/docs/sources/tutorial/11_log_custom.md |  2 +-
 assets/option/option_test_api.yml             |  2 +-
 assets/option/option_test_html.yml            |  2 +-
 assets/option/option_workflow_download.yml    |  2 +-
 src/jmcomic/__init__.py                       |  2 +-
 src/jmcomic/api.py                            | 14 +++++-
 src/jmcomic/jm_client_impl.py                 | 34 +++++++-------
 src/jmcomic/jm_client_interface.py            | 11 +++--
 src/jmcomic/jm_config.py                      | 37 ++++++++++++++-
 src/jmcomic/jm_downloader.py                  | 46 +++++++++++++------
 src/jmcomic/jm_entity.py                      | 18 +++++---
 src/jmcomic/jm_plugin.py                      |  4 +-
 src/jmcomic/jm_toolkit.py                     | 42 ++++++++++-------
 tests/test_jmcomic/test_jm_api.py             |  5 +-
 15 files changed, 153 insertions(+), 70 deletions(-)

diff --git a/assets/docs/sources/option_file_syntax.md b/assets/docs/sources/option_file_syntax.md
index f8872763..66124426 100644
--- a/assets/docs/sources/option_file_syntax.md
+++ b/assets/docs/sources/option_file_syntax.md
@@ -127,7 +127,7 @@ plugins:
 
     - plugin: client_proxy # 客户端实现类代理插件，不建议非开发人员使用
       kwargs:
-        proxy_client_key: cl_proxy_future # 代理类的client_key
+        proxy_client_key: photo_concurrent_fetcher_proxy # 代理类的client_key
         whitelist: [ api, ] # 白名单，当client.impl匹配白名单时才代理
 
     - plugin: auto_set_browser_cookies # 自动获取浏览器cookies，详见插件类
diff --git a/assets/docs/sources/tutorial/11_log_custom.md b/assets/docs/sources/tutorial/11_log_custom.md
index 9c7c0a29..bf56b91d 100644
--- a/assets/docs/sources/tutorial/11_log_custom.md
+++ b/assets/docs/sources/tutorial/11_log_custom.md
@@ -46,7 +46,7 @@ plugins:
     - plugin: client_proxy # 提高移动端的请求效率的插件
       log: false # 插件自身不打印日志
       kwargs:
-        proxy_client_key: cl_proxy_future
+        proxy_client_key: photo_concurrent_fetcher_proxy
         whitelist: [ api, ]
 ```
 
diff --git a/assets/option/option_test_api.yml b/assets/option/option_test_api.yml
index 5f2f5f95..343fa699 100644
--- a/assets/option/option_test_api.yml
+++ b/assets/option/option_test_api.yml
@@ -24,5 +24,5 @@ plugins:
 
     - plugin: client_proxy
       kwargs:
-        proxy_client_key: cl_proxy_future
+        proxy_client_key: photo_concurrent_fetcher_proxy
         whitelist: [ api, ]
\ No newline at end of file
diff --git a/assets/option/option_test_html.yml b/assets/option/option_test_html.yml
index fbb68860..53dfecb4 100644
--- a/assets/option/option_test_html.yml
+++ b/assets/option/option_test_html.yml
@@ -25,5 +25,5 @@ plugins:
 
     - plugin: client_proxy
       kwargs:
-        proxy_client_key: cl_proxy_future
+        proxy_client_key: photo_concurrent_fetcher_proxy
         whitelist: [ api, ]
\ No newline at end of file
diff --git a/assets/option/option_workflow_download.yml b/assets/option/option_workflow_download.yml
index 92310e39..5b469916 100644
--- a/assets/option/option_workflow_download.yml
+++ b/assets/option/option_workflow_download.yml
@@ -18,7 +18,7 @@ plugins:
 
     - plugin: client_proxy # 提高移动端的请求效率的插件
       kwargs:
-        proxy_client_key: cl_proxy_future
+        proxy_client_key: photo_concurrent_fetcher_proxy
         whitelist: [ api, ]
 
     - plugin: login # 登录插件
diff --git a/src/jmcomic/__init__.py b/src/jmcomic/__init__.py
index fc1156c5..ebf62ba7 100644
--- a/src/jmcomic/__init__.py
+++ b/src/jmcomic/__init__.py
@@ -2,7 +2,7 @@
 # 被依赖方 <--- 使用方
 # config <--- entity <--- toolkit <--- client <--- option <--- downloader
 
-__version__ = '2.5.5'
+__version__ = '2.5.6'
 
 from .api import *
 from .jm_plugin import *
diff --git a/src/jmcomic/api.py b/src/jmcomic/api.py
index 058ff41f..34e826d3 100644
--- a/src/jmcomic/api.py
+++ b/src/jmcomic/api.py
@@ -100,7 +100,7 @@ def new_downloader(option=None, downloader=None) -> JmDownloader:
     return downloader(option)
 
 
-def create_option(filepath):
+def create_option_by_file(filepath):
     return JmModuleConfig.option_class().from_file(filepath)
 
 
@@ -110,4 +110,14 @@ def create_option_by_env(env_name='JM_OPTION_PATH'):
     filepath = get_env(env_name, None)
     ExceptionTool.require_true(filepath is not None,
                                f'未配置环境变量: {env_name}，请配置为option的文件路径')
-    return create_option(filepath)
+    return create_option_by_file(filepath)
+
+
+def create_option_by_str(text: str, mode=None):
+    if mode is None:
+        mode = PackerUtil.mode_yml
+    data = PackerUtil.unpack_by_str(text, mode)[0]
+    return JmModuleConfig.option_class().construct(data)
+
+
+create_option = create_option_by_file
diff --git a/src/jmcomic/jm_client_impl.py b/src/jmcomic/jm_client_impl.py
index a9d1415a..a18c03d0 100644
--- a/src/jmcomic/jm_client_impl.py
+++ b/src/jmcomic/jm_client_impl.py
@@ -79,9 +79,9 @@ def request_with_retry(self,
         """
         if domain_index >= len(self.domain_list):
             return self.fallback(request, url, domain_index, retry_count, **kwargs)
-        
+
         url_backup = url
-        
+
         if url.startswith('/'):
             # path → url
             domain = self.domain_list[domain_index]
@@ -976,10 +976,14 @@ def get_cookies(self):
         return cookies
 
 
-class FutureClientProxy(JmcomicClient):
+class PhotoConcurrentFetcherProxy(JmcomicClient):
     """
-    在Client上做了一层线程池封装来实现异步，对外仍然暴露JmcomicClient的接口，可以看作Client的代理。
-    除了使用线程池做异步，还通过加锁和缓存结果，实现同一个请求不会被多个线程发出，减少开销
+    为了解决 JmApiClient.get_photo_detail 方法的排队调用问题，
+    即在访问完photo的接口后，需要另外排队访问获取album和scramble_id的接口。
+
+    这三个接口可以并发请求，这样可以提高效率。
+
+    此Proxy代理了get_photo_detail，实现了并发请求这三个接口，然后组装返回值返回photo。
 
     可通过插件 ClientProxyPlugin 启用本类，配置如下:
     ```yml
@@ -987,10 +991,10 @@ class FutureClientProxy(JmcomicClient):
       after_init:
         - plugin: client_proxy
           kwargs:
-            proxy_client_key: cl_proxy_future
+            proxy_client_key: photo_concurrent_fetcher_proxy
     ```
     """
-    client_key = 'cl_proxy_future'
+    client_key = 'photo_concurrent_fetcher_proxy'
 
     class FutureWrapper:
         def __init__(self, future, after_done_callback):
@@ -1024,16 +1028,15 @@ def __init__(self,
             executors = ThreadPoolExecutor(max_workers)
 
         self.executors = executors
-        self.future_dict: Dict[str, FutureClientProxy.FutureWrapper] = {}
+        self.future_dict: Dict[str, PhotoConcurrentFetcherProxy.FutureWrapper] = {}
         from threading import Lock
         self.lock = Lock()
 
     def route_notimpl_method_to_internal_client(self, client):
 
-        impl_methods = str_to_set('''
+        proxy_methods = str_to_set('''
         get_album_detail
         get_photo_detail
-        search
         ''')
 
         # 获取对象的所有属性和方法的名称列表
@@ -1043,7 +1046,7 @@ def route_notimpl_method_to_internal_client(self, client):
             # 判断是否为方法（可调用对象）
             if (not method.startswith('_')
                     and callable(getattr(client, method))
-                    and method not in impl_methods
+                    and method not in proxy_methods
             ):
                 setattr(self, method, getattr(client, method))
 
@@ -1055,15 +1058,19 @@ def get_album_detail(self, album_id) -> JmAlbumDetail:
 
     def get_future(self, cache_key, task):
         if cache_key in self.future_dict:
+            # cache hit, means that a same task is running
             return self.future_dict[cache_key]
 
         with self.lock:
             if cache_key in self.future_dict:
                 return self.future_dict[cache_key]
 
+            # after future done, remove it from future_dict.
+            # cache depends on self.client instead of self.future_dict
             future = self.FutureWrapper(self.executors.submit(task),
                                         after_done_callback=lambda: self.future_dict.pop(cache_key, None)
                                         )
+
             self.future_dict[cache_key] = future
             return future
 
@@ -1115,8 +1122,3 @@ def get_photo_detail(self, photo_id, fetch_album=True, fetch_scramble_id=True) -
             photo.scramble_id = scramble_id
 
         return photo
-
-    def search(self, search_query: str, page: int, main_tag: int, order_by: str, time: str) -> JmSearchPage:
-        cache_key = f'search_query_{search_query}_page_{page}_main_tag_{main_tag}_order_by_{order_by}_time_{time}'
-        future = self.get_future(cache_key, task=lambda: self.client.search(search_query, page, main_tag, order_by, time))
-        return future.result()
diff --git a/src/jmcomic/jm_client_interface.py b/src/jmcomic/jm_client_interface.py
index b6486392..d3f64f2f 100644
--- a/src/jmcomic/jm_client_interface.py
+++ b/src/jmcomic/jm_client_interface.py
@@ -469,11 +469,14 @@ def get_cache_dict(self) -> Optional[Dict]:
     def of_api_url(self, api_path, domain):
         raise NotImplementedError
 
-    def get_html_domain(self, postman=None):
-        return JmModuleConfig.get_html_domain(postman or self.get_root_postman())
+    def get_html_domain(self):
+        return JmModuleConfig.get_html_domain(self.get_root_postman())
 
-    def get_html_domain_all(self, postman=None):
-        return JmModuleConfig.get_html_domain_all(postman or self.get_root_postman())
+    def get_html_domain_all(self):
+        return JmModuleConfig.get_html_domain_all(self.get_root_postman())
+
+    def get_html_domain_all_via_github(self):
+        return JmModuleConfig.get_html_domain_all_via_github(self.get_root_postman())
 
     # noinspection PyMethodMayBeStatic
     def do_page_iter(self, params: dict, page: int, get_page_method):
diff --git a/src/jmcomic/jm_config.py b/src/jmcomic/jm_config.py
index 02aacfed..479277ac 100644
--- a/src/jmcomic/jm_config.py
+++ b/src/jmcomic/jm_config.py
@@ -81,7 +81,7 @@ class JmModuleConfig:
     # 网站相关
     PROT = "https://"
     JM_REDIRECT_URL = f'{PROT}jm365.work/3YeBdF'  # 永久網域，怕走失的小伙伴收藏起来
-    JM_PUB_URL = f'{PROT}jmcomic.ltd'
+    JM_PUB_URL = f'{PROT}jmcomic-fb.vip'
     JM_CDN_IMAGE_URL_TEMPLATE = PROT + 'cdn-msp.{domain}/media/photos/{photo_id}/{index:05}{suffix}'  # index 从1开始
     JM_IMAGE_SUFFIX = ['.jpg', '.webp', '.png', '.gif']
 
@@ -259,6 +259,41 @@ def get_html_domain_all(cls, postman=None):
         cls.jm_log('module.html_domain_all', f'获取禁漫网页全部域名: [{resp.url}] → {domain_list}')
         return domain_list
 
+    @classmethod
+    def get_html_domain_all_via_github(cls,
+                                       postman=None,
+                                       template='https://jmcmomic.github.io/go/{}.html',
+                                       index_range=(300, 309)
+                                       ):
+        """
+        通过禁漫官方的github号的repo获取最新的禁漫域名
+        https://github.com/jmcmomic/jmcmomic.github.io
+        """
+        postman = postman or cls.new_postman(headers={
+            'authority': 'github.com',
+            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 '
+                          'Safari/537.36'
+        })
+        domain_set = set()
+
+        def fetch_domain(url):
+            resp = postman.get(url, allow_redirects=False)
+            text = resp.text
+            from .jm_toolkit import JmcomicText
+            for domain in JmcomicText.analyse_jm_pub_html(text):
+                if domain.startswith('jm365'):
+                    continue
+                domain_set.add(domain)
+
+        from common import multi_thread_launcher
+
+        multi_thread_launcher(
+            iter_objs=[template.format(i) for i in range(*index_range)],
+            apply_each_obj_func=fetch_domain,
+        )
+
+        return domain_set
+
     @classmethod
     def new_html_headers(cls, domain='18comic.vip'):
         """
diff --git a/src/jmcomic/jm_downloader.py b/src/jmcomic/jm_downloader.py
index 3445988e..76fce3ee 100644
--- a/src/jmcomic/jm_downloader.py
+++ b/src/jmcomic/jm_downloader.py
@@ -169,11 +169,11 @@ def client_for_photo(self, jm_photo_id) -> JmcomicClient:
     def before_album(self, album: JmAlbumDetail):
         super().before_album(album)
         self.all_downloaded.setdefault(album, {})
-
-    def before_photo(self, photo: JmPhotoDetail):
-        super().before_photo(photo)
-        self.all_downloaded.setdefault(photo.from_album, {})
-        self.all_downloaded[photo.from_album].setdefault(photo, [])
+        self.option.call_all_plugin(
+            'before_album',
+            album=album,
+            downloader=self,
+        )
 
     def after_album(self, album: JmAlbumDetail):
         super().after_album(album)
@@ -183,6 +183,16 @@ def after_album(self, album: JmAlbumDetail):
             downloader=self,
         )
 
+    def before_photo(self, photo: JmPhotoDetail):
+        super().before_photo(photo)
+        self.all_downloaded.setdefault(photo.from_album, {})
+        self.all_downloaded[photo.from_album].setdefault(photo, [])
+        self.option.call_all_plugin(
+            'before_photo',
+            photo=photo,
+            downloader=self,
+        )
+
     def after_photo(self, photo: JmPhotoDetail):
         super().after_photo(photo)
         self.option.call_all_plugin(
@@ -191,12 +201,25 @@ def after_photo(self, photo: JmPhotoDetail):
             downloader=self,
         )
 
+    def before_image(self, image: JmImageDetail, img_save_path):
+        super().before_image(image, img_save_path)
+        self.option.call_all_plugin(
+            'before_image',
+            image=image,
+            downloader=self,
+        )
+
     def after_image(self, image: JmImageDetail, img_save_path):
         super().after_image(image, img_save_path)
         photo = image.from_photo
         album = photo.from_album
 
         self.all_downloaded.get(album).get(photo).append((img_save_path, image))
+        self.option.call_all_plugin(
+            'after_image',
+            image=image,
+            downloader=self,
+        )
 
     # 下面是对with语法的支持
 
@@ -219,28 +242,23 @@ def use(cls, *args, **kwargs):
 
 class DoNotDownloadImage(JmDownloader):
     """
-    本类仅用于测试
-
-    用法：
-
-    JmModuleConfig.CLASS_DOWNLOADER = DoNotDownloadImage
+    不会下载任何图片的Downloader，用作测试
     """
 
     def download_by_image_detail(self, image: JmImageDetail, client: JmcomicClient):
         # ensure make dir
         self.option.decide_image_filepath(image)
-        pass
 
 
 class JustDownloadSpecificCountImage(JmDownloader):
+    """
+    只下载特定数量图片的Downloader，用作测试
+    """
     from threading import Lock
 
     count_lock = Lock()
     count = 0
 
-    def __init__(self, option: JmOption) -> None:
-        super().__init__(option)
-
     def download_by_image_detail(self, image: JmImageDetail, client: JmcomicClient):
         # ensure make dir
         self.option.decide_image_filepath(image)
diff --git a/src/jmcomic/jm_entity.py b/src/jmcomic/jm_entity.py
index a225c9c7..a86c7487 100644
--- a/src/jmcomic/jm_entity.py
+++ b/src/jmcomic/jm_entity.py
@@ -91,17 +91,17 @@ def authoroname(self):
         """
         authoroname = author + oname
 
-        比较好识别的一种本子名称方式
+        个人认为识别度比较高的本子名称，一眼看去就能获取到本子的关键信息
 
-        具体格式: f'【author】{oname}'
+        具体格式: '【author】oname'
 
         示例:
 
-        原本子名：喂我吃吧 老師! [欶瀾漢化組] [BLVEFO9] たべさせて、せんせい! (ブルーアーカイブ) [中國翻譯] [無修正]
+        Pname：喂我吃吧 老師! [欶瀾漢化組] [BLVEFO9] たべさせて、せんせい! (ブルーアーカイブ) [中國翻譯] [無修正]
 
-        authoroname：【BLVEFO9】喂我吃吧 老師!
+        Pauthoroname：【BLVEFO9】喂我吃吧 老師!
 
-        :return: 返回作者名+作品原名，格式为: '【author】{oname}'
+        :return: 返回作者名+本子原始名称，格式为: '【author】oname'
         """
         return f'【{self.author}】{self.oname}'
 
@@ -109,12 +109,16 @@ def authoroname(self):
     def idoname(self):
         """
         类似 authoroname
-        :return: '[id] {oname}'
+        
+        :return: '[id] oname'
         """
         return f'[{self.id}] {self.oname}'
 
     def __str__(self):
-        return f'{self.__class__.__name__}({self.id}-{self.title})'
+        return f'{self.__class__.__name__}' \
+               '{' \
+               f'{self.id}: {self.title}'\
+               '}'
 
     @classmethod
     def __alias__(cls):
diff --git a/src/jmcomic/jm_plugin.py b/src/jmcomic/jm_plugin.py
index 082ea860..f1814199 100644
--- a/src/jmcomic/jm_plugin.py
+++ b/src/jmcomic/jm_plugin.py
@@ -814,8 +814,6 @@ def invoke(self,
             if self.running is True:
                 return
 
-            self.running = True
-
             # 服务器的代码位于一个独立库：plugin_jm_server，需要独立安装
             # 源代码仓库：https://github.com/hect0x7/plugin-jm-server
             try:
@@ -842,6 +840,7 @@ def blocking_run_server():
                     # 不是主线程，return
                     return self.warning_wrong_usage_of_debug()
                 else:
+                    self.running = True
                     # 是主线程，启动服务器
                     blocking_run_server()
 
@@ -849,6 +848,7 @@ def blocking_run_server():
                 # 非debug模式，开新线程启动
                 threading.Thread(target=blocking_run_server, daemon=True).start()
                 atexit_register(self.wait_server_stop)
+                self.running = True
 
     def warning_wrong_usage_of_debug(self):
         self.log('注意！当配置debug=True时，请确保当前插件是在主线程中被调用。\n'
diff --git a/src/jmcomic/jm_toolkit.py b/src/jmcomic/jm_toolkit.py
index f38c6311..62c3c84c 100644
--- a/src/jmcomic/jm_toolkit.py
+++ b/src/jmcomic/jm_toolkit.py
@@ -357,17 +357,16 @@ class JmPageTool:
     # 用来缩减html的长度
     pattern_html_search_shorten_for = compile(r'<div class="well well-sm">([\s\S]*)<div class="row">')
 
-    # 用来提取搜索页面的的album的信息
+    # 用来提取搜索页面的album的信息
     pattern_html_search_album_info_list = compile(
         r'<a href="/album/(\d+)/[\s\S]*?title="(.*?)"([\s\S]*?)<div class="title-truncate tags .*>([\s\S]*?)</div>'
     )
 
-    # 用来提取分类页面的的album的信息
+    # 用来提取分类页面的album的信息
     pattern_html_category_album_info_list = compile(
-        r'<a href="/album/(\d+)/[^>]*>[\s\S]*?title="(.*?)"[^>]*>'
-        r'\n</a>\n'
-        r'<div class="label-loveicon">'
-        r'([\s\S]*?)'
+        r'<a href="/album/(\d+)/[^>]*>[^>]*?'
+        r'title="(.*?)"[^>]*>[ \n]*</a>[ \n]*'
+        r'<div class="label-loveicon">([\s\S]*?)'
         r'<div class="clearfix">'
     )
 
@@ -755,23 +754,34 @@ def decode_and_save(cls,
 
         # 创建新的解密图片
         img_decode = Image.new("RGB", (w, h))
-        remainder = h % num
-        copyW = w
+        over = h % num
         for i in range(num):
-            copyH = math.floor(h / num)
-            py = copyH * i
-            y = h - (copyH * (i + 1)) - remainder
+            move = math.floor(h / num)
+            y_src = h - (move * (i + 1)) - over
+            y_dst = move * i
 
             if i == 0:
-                copyH += remainder
+                move += over
             else:
-                py += remainder
+                y_dst += over
 
             img_decode.paste(
-                img_src.crop((0, y, copyW, y + copyH)),
-                (0, py, copyW, py + copyH)
+                img_src.crop((
+                    0, y_src,
+                    w, y_src + move
+                )),
+                (
+                    0, y_dst,
+                    w, y_dst + move
+                )
             )
 
+            # save every step result
+            # cls.save_image(img_decode, change_file_name(
+            #     decoded_save_path,
+            #     f'{of_file_name(decoded_save_path, trim_suffix=True)}_{i}{of_file_suffix(decoded_save_path)}'
+            # ))
+
         # 保存到新的解密文件
         cls.save_image(img_decode, decoded_save_path)
 
@@ -867,7 +877,7 @@ def decode_resp_data(cls,
         """
         解密接口返回值
 
-        :param data: data = resp.json()['data]
+        :param data: resp.json()['data']
         :param ts: 时间戳
         :param secret: 密钥
         :return: json格式的字符串
diff --git a/tests/test_jmcomic/test_jm_api.py b/tests/test_jmcomic/test_jm_api.py
index f54a8012..078d3ce4 100644
--- a/tests/test_jmcomic/test_jm_api.py
+++ b/tests/test_jmcomic/test_jm_api.py
@@ -43,10 +43,11 @@ def test_batch(self):
         ret2 = jmcomic.download_album((e for e in album_ls), self.option)
         self.assertEqual(len(ret2), len(album_ls), 'Generator')
 
-    def test_get_jmcomic_url(self):
+    def test_get_jmcomic_domain(self):
         func_list = {
             self.client.get_html_domain,
             self.client.get_html_domain_all,
+            self.client.get_html_domain_all_via_github,
             # JmModuleConfig.get_jmcomic_url,
             # JmModuleConfig.get_jmcomic_domain_all,
         }
@@ -55,7 +56,7 @@ def test_get_jmcomic_url(self):
 
         def run_func_async(func):
             try:
-                func()
+                print(func())
             except BaseException as e:
                 exception_list.append(e)
                 traceback_print_exec()