From e59b6d0b483ff9360d7ce6710d77ac5638f7669e Mon Sep 17 00:00:00 2001 From: shingyu Date: Mon, 13 Nov 2023 23:06:10 +0800 Subject: [PATCH] =?UTF-8?q?feat(epub):=20=E6=96=B0=E5=A2=9Eepub=E6=A8=A1?= =?UTF-8?q?=E5=BC=8F=E4=B8=93=E7=94=A8=E8=8E=B7=E5=8F=96=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/epub_bookinfo.py | 101 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 src/epub_bookinfo.py diff --git a/src/epub_bookinfo.py b/src/epub_bookinfo.py new file mode 100644 index 0000000..765b3aa --- /dev/null +++ b/src/epub_bookinfo.py @@ -0,0 +1,101 @@ + +# 开发者注意: +# 七猫网页在点击类名为:tab-inner 的”作品目录“按钮后 +# 才会显示目录内容 +# TODO: 实现获取EPUB格式需要的书籍信息 + +import asyncio +import os +import public as p +from bs4 import BeautifulSoup +import re + +# 设置镜像下载地址 +os.environ["PYPPETEER_DOWNLOAD_HOST"] = "https://mirrors.huaweicloud.com" +from pyppeteer import launch # noqa: E402 + + +async def get_book_info(url): + # 创建一个Pyppeteer的Browser实例 + browser = await launch() + + # 创建一个新的页面 + page = await browser.newPage() + + # 访问网页 + await page.goto(url) + + # 等待加载完成 + await page.waitForSelector('.tab-inner') + +# ==================== 获取简介 ==================== + + # 在获取目录前,先获取小说简介 + html = await page.content() + soup = BeautifulSoup(html, "html.parser") + intro = soup.find('p', class_='intro').get_text().replace(' ', '\n') + +# ==================== 获取简介结束 ==================== + + # 模拟点击目录按钮,切换网页内容 + # 在页面上执行JavaScript代码,模拟点击目录 + await page.evaluate('''() => { + var elements = document.getElementsByClassName('tab-inner'); + for(var i=0; i