Skip to content

Commit

Permalink
v2.5.6: 更新禁漫发布页URl; 优化正则表达式的适配; 支持通过github网址获取禁漫网页域名; 优化代码和文档. (hect0…
Browse files Browse the repository at this point in the history
  • Loading branch information
hect0x7 authored Feb 22, 2024
1 parent 9a3d0f1 commit fbaaffc
Show file tree
Hide file tree
Showing 15 changed files with 153 additions and 70 deletions.
2 changes: 1 addition & 1 deletion assets/docs/sources/option_file_syntax.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ plugins:

- plugin: client_proxy # 客户端实现类代理插件,不建议非开发人员使用
kwargs:
proxy_client_key: cl_proxy_future # 代理类的client_key
proxy_client_key: photo_concurrent_fetcher_proxy # 代理类的client_key
whitelist: [ api, ] # 白名单,当client.impl匹配白名单时才代理

- plugin: auto_set_browser_cookies # 自动获取浏览器cookies,详见插件类
Expand Down
2 changes: 1 addition & 1 deletion assets/docs/sources/tutorial/11_log_custom.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ plugins:
- plugin: client_proxy # 提高移动端的请求效率的插件
log: false # 插件自身不打印日志
kwargs:
proxy_client_key: cl_proxy_future
proxy_client_key: photo_concurrent_fetcher_proxy
whitelist: [ api, ]
```
Expand Down
2 changes: 1 addition & 1 deletion assets/option/option_test_api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,5 @@ plugins:

- plugin: client_proxy
kwargs:
proxy_client_key: cl_proxy_future
proxy_client_key: photo_concurrent_fetcher_proxy
whitelist: [ api, ]
2 changes: 1 addition & 1 deletion assets/option/option_test_html.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,5 @@ plugins:

- plugin: client_proxy
kwargs:
proxy_client_key: cl_proxy_future
proxy_client_key: photo_concurrent_fetcher_proxy
whitelist: [ api, ]
2 changes: 1 addition & 1 deletion assets/option/option_workflow_download.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ plugins:

- plugin: client_proxy # 提高移动端的请求效率的插件
kwargs:
proxy_client_key: cl_proxy_future
proxy_client_key: photo_concurrent_fetcher_proxy
whitelist: [ api, ]

- plugin: login # 登录插件
Expand Down
2 changes: 1 addition & 1 deletion src/jmcomic/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# 被依赖方 <--- 使用方
# config <--- entity <--- toolkit <--- client <--- option <--- downloader

__version__ = '2.5.5'
__version__ = '2.5.6'

from .api import *
from .jm_plugin import *
Expand Down
14 changes: 12 additions & 2 deletions src/jmcomic/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def new_downloader(option=None, downloader=None) -> JmDownloader:
return downloader(option)


def create_option(filepath):
def create_option_by_file(filepath):
return JmModuleConfig.option_class().from_file(filepath)


Expand All @@ -110,4 +110,14 @@ def create_option_by_env(env_name='JM_OPTION_PATH'):
filepath = get_env(env_name, None)
ExceptionTool.require_true(filepath is not None,
f'未配置环境变量: {env_name},请配置为option的文件路径')
return create_option(filepath)
return create_option_by_file(filepath)


def create_option_by_str(text: str, mode=None):
if mode is None:
mode = PackerUtil.mode_yml
data = PackerUtil.unpack_by_str(text, mode)[0]
return JmModuleConfig.option_class().construct(data)


create_option = create_option_by_file
34 changes: 18 additions & 16 deletions src/jmcomic/jm_client_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ def request_with_retry(self,
"""
if domain_index >= len(self.domain_list):
return self.fallback(request, url, domain_index, retry_count, **kwargs)

url_backup = url

if url.startswith('/'):
# path → url
domain = self.domain_list[domain_index]
Expand Down Expand Up @@ -976,21 +976,25 @@ def get_cookies(self):
return cookies


class FutureClientProxy(JmcomicClient):
class PhotoConcurrentFetcherProxy(JmcomicClient):
"""
在Client上做了一层线程池封装来实现异步,对外仍然暴露JmcomicClient的接口,可以看作Client的代理。
除了使用线程池做异步,还通过加锁和缓存结果,实现同一个请求不会被多个线程发出,减少开销
为了解决 JmApiClient.get_photo_detail 方法的排队调用问题,
即在访问完photo的接口后,需要另外排队访问获取album和scramble_id的接口。
这三个接口可以并发请求,这样可以提高效率。
此Proxy代理了get_photo_detail,实现了并发请求这三个接口,然后组装返回值返回photo。
可通过插件 ClientProxyPlugin 启用本类,配置如下:
```yml
plugins:
after_init:
- plugin: client_proxy
kwargs:
proxy_client_key: cl_proxy_future
proxy_client_key: photo_concurrent_fetcher_proxy
```
"""
client_key = 'cl_proxy_future'
client_key = 'photo_concurrent_fetcher_proxy'

class FutureWrapper:
def __init__(self, future, after_done_callback):
Expand Down Expand Up @@ -1024,16 +1028,15 @@ def __init__(self,
executors = ThreadPoolExecutor(max_workers)

self.executors = executors
self.future_dict: Dict[str, FutureClientProxy.FutureWrapper] = {}
self.future_dict: Dict[str, PhotoConcurrentFetcherProxy.FutureWrapper] = {}
from threading import Lock
self.lock = Lock()

def route_notimpl_method_to_internal_client(self, client):

impl_methods = str_to_set('''
proxy_methods = str_to_set('''
get_album_detail
get_photo_detail
search
''')

# 获取对象的所有属性和方法的名称列表
Expand All @@ -1043,7 +1046,7 @@ def route_notimpl_method_to_internal_client(self, client):
# 判断是否为方法(可调用对象)
if (not method.startswith('_')
and callable(getattr(client, method))
and method not in impl_methods
and method not in proxy_methods
):
setattr(self, method, getattr(client, method))

Expand All @@ -1055,15 +1058,19 @@ def get_album_detail(self, album_id) -> JmAlbumDetail:

def get_future(self, cache_key, task):
if cache_key in self.future_dict:
# cache hit, means that a same task is running
return self.future_dict[cache_key]

with self.lock:
if cache_key in self.future_dict:
return self.future_dict[cache_key]

# after future done, remove it from future_dict.
# cache depends on self.client instead of self.future_dict
future = self.FutureWrapper(self.executors.submit(task),
after_done_callback=lambda: self.future_dict.pop(cache_key, None)
)

self.future_dict[cache_key] = future
return future

Expand Down Expand Up @@ -1115,8 +1122,3 @@ def get_photo_detail(self, photo_id, fetch_album=True, fetch_scramble_id=True) -
photo.scramble_id = scramble_id

return photo

def search(self, search_query: str, page: int, main_tag: int, order_by: str, time: str) -> JmSearchPage:
cache_key = f'search_query_{search_query}_page_{page}_main_tag_{main_tag}_order_by_{order_by}_time_{time}'
future = self.get_future(cache_key, task=lambda: self.client.search(search_query, page, main_tag, order_by, time))
return future.result()
11 changes: 7 additions & 4 deletions src/jmcomic/jm_client_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,11 +469,14 @@ def get_cache_dict(self) -> Optional[Dict]:
def of_api_url(self, api_path, domain):
raise NotImplementedError

def get_html_domain(self, postman=None):
return JmModuleConfig.get_html_domain(postman or self.get_root_postman())
def get_html_domain(self):
return JmModuleConfig.get_html_domain(self.get_root_postman())

def get_html_domain_all(self, postman=None):
return JmModuleConfig.get_html_domain_all(postman or self.get_root_postman())
def get_html_domain_all(self):
return JmModuleConfig.get_html_domain_all(self.get_root_postman())

def get_html_domain_all_via_github(self):
return JmModuleConfig.get_html_domain_all_via_github(self.get_root_postman())

# noinspection PyMethodMayBeStatic
def do_page_iter(self, params: dict, page: int, get_page_method):
Expand Down
37 changes: 36 additions & 1 deletion src/jmcomic/jm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ class JmModuleConfig:
# 网站相关
PROT = "https://"
JM_REDIRECT_URL = f'{PROT}jm365.work/3YeBdF' # 永久網域,怕走失的小伙伴收藏起来
JM_PUB_URL = f'{PROT}jmcomic.ltd'
JM_PUB_URL = f'{PROT}jmcomic-fb.vip'
JM_CDN_IMAGE_URL_TEMPLATE = PROT + 'cdn-msp.{domain}/media/photos/{photo_id}/{index:05}{suffix}' # index 从1开始
JM_IMAGE_SUFFIX = ['.jpg', '.webp', '.png', '.gif']

Expand Down Expand Up @@ -259,6 +259,41 @@ def get_html_domain_all(cls, postman=None):
cls.jm_log('module.html_domain_all', f'获取禁漫网页全部域名: [{resp.url}] → {domain_list}')
return domain_list

@classmethod
def get_html_domain_all_via_github(cls,
postman=None,
template='https://jmcmomic.github.io/go/{}.html',
index_range=(300, 309)
):
"""
通过禁漫官方的github号的repo获取最新的禁漫域名
https://github.com/jmcmomic/jmcmomic.github.io
"""
postman = postman or cls.new_postman(headers={
'authority': 'github.com',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 '
'Safari/537.36'
})
domain_set = set()

def fetch_domain(url):
resp = postman.get(url, allow_redirects=False)
text = resp.text
from .jm_toolkit import JmcomicText
for domain in JmcomicText.analyse_jm_pub_html(text):
if domain.startswith('jm365'):
continue
domain_set.add(domain)

from common import multi_thread_launcher

multi_thread_launcher(
iter_objs=[template.format(i) for i in range(*index_range)],
apply_each_obj_func=fetch_domain,
)

return domain_set

@classmethod
def new_html_headers(cls, domain='18comic.vip'):
"""
Expand Down
46 changes: 32 additions & 14 deletions src/jmcomic/jm_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,11 @@ def client_for_photo(self, jm_photo_id) -> JmcomicClient:
def before_album(self, album: JmAlbumDetail):
super().before_album(album)
self.all_downloaded.setdefault(album, {})

def before_photo(self, photo: JmPhotoDetail):
super().before_photo(photo)
self.all_downloaded.setdefault(photo.from_album, {})
self.all_downloaded[photo.from_album].setdefault(photo, [])
self.option.call_all_plugin(
'before_album',
album=album,
downloader=self,
)

def after_album(self, album: JmAlbumDetail):
super().after_album(album)
Expand All @@ -183,6 +183,16 @@ def after_album(self, album: JmAlbumDetail):
downloader=self,
)

def before_photo(self, photo: JmPhotoDetail):
super().before_photo(photo)
self.all_downloaded.setdefault(photo.from_album, {})
self.all_downloaded[photo.from_album].setdefault(photo, [])
self.option.call_all_plugin(
'before_photo',
photo=photo,
downloader=self,
)

def after_photo(self, photo: JmPhotoDetail):
super().after_photo(photo)
self.option.call_all_plugin(
Expand All @@ -191,12 +201,25 @@ def after_photo(self, photo: JmPhotoDetail):
downloader=self,
)

def before_image(self, image: JmImageDetail, img_save_path):
super().before_image(image, img_save_path)
self.option.call_all_plugin(
'before_image',
image=image,
downloader=self,
)

def after_image(self, image: JmImageDetail, img_save_path):
super().after_image(image, img_save_path)
photo = image.from_photo
album = photo.from_album

self.all_downloaded.get(album).get(photo).append((img_save_path, image))
self.option.call_all_plugin(
'after_image',
image=image,
downloader=self,
)

# 下面是对with语法的支持

Expand All @@ -219,28 +242,23 @@ def use(cls, *args, **kwargs):

class DoNotDownloadImage(JmDownloader):
"""
本类仅用于测试
用法:
JmModuleConfig.CLASS_DOWNLOADER = DoNotDownloadImage
不会下载任何图片的Downloader,用作测试
"""

def download_by_image_detail(self, image: JmImageDetail, client: JmcomicClient):
# ensure make dir
self.option.decide_image_filepath(image)
pass


class JustDownloadSpecificCountImage(JmDownloader):
"""
只下载特定数量图片的Downloader,用作测试
"""
from threading import Lock

count_lock = Lock()
count = 0

def __init__(self, option: JmOption) -> None:
super().__init__(option)

def download_by_image_detail(self, image: JmImageDetail, client: JmcomicClient):
# ensure make dir
self.option.decide_image_filepath(image)
Expand Down
18 changes: 11 additions & 7 deletions src/jmcomic/jm_entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,30 +91,34 @@ def authoroname(self):
"""
authoroname = author + oname
比较好识别的一种本子名称方式
个人认为识别度比较高的本子名称,一眼看去就能获取到本子的关键信息
具体格式: f'【author】{oname}'
具体格式: '【author】oname'
示例:
原本子名:喂我吃吧 老師! [欶瀾漢化組] [BLVEFO9] たべさせて、せんせい! (ブルーアーカイブ) [中國翻譯] [無修正]
Pname:喂我吃吧 老師! [欶瀾漢化組] [BLVEFO9] たべさせて、せんせい! (ブルーアーカイブ) [中國翻譯] [無修正]
authoroname:【BLVEFO9】喂我吃吧 老師!
Pauthoroname:【BLVEFO9】喂我吃吧 老師!
:return: 返回作者名+作品原名,格式为: '【author】{oname}'
:return: 返回作者名+本子原始名称,格式为: '【author】oname'
"""
return f'【{self.author}{self.oname}'

@property
def idoname(self):
"""
类似 authoroname
:return: '[id] {oname}'
:return: '[id] oname'
"""
return f'[{self.id}] {self.oname}'

def __str__(self):
return f'{self.__class__.__name__}({self.id}-{self.title})'
return f'{self.__class__.__name__}' \
'{' \
f'{self.id}: {self.title}'\
'}'

@classmethod
def __alias__(cls):
Expand Down
Loading

0 comments on commit fbaaffc

Please sign in to comment.