diff --git a/docs/includes/site.md b/docs/includes/site.md index 3ccd6977..c2128742 100644 --- a/docs/includes/site.md +++ b/docs/includes/site.md @@ -16,3 +16,4 @@ | DeviantArt | | | ✔ | | Lofter | | | ✔ | | Kemono.party | | | ✔ | +| Misskey | Any Misskey instance | | ✔ | diff --git a/docs/includes/site.zh.md b/docs/includes/site.zh.md index 0fd3bfac..5b3cd686 100644 --- a/docs/includes/site.zh.md +++ b/docs/includes/site.zh.md @@ -16,3 +16,4 @@ | DeviantArt | | | ✔ | | Lofter | | | ✔ | | Kemono.party | | | ✔ | +| Misskey | 各种 Misskey 实例 | | ✔ | diff --git a/docs/site/misskey.md b/docs/site/misskey.md new file mode 100644 index 00000000..fe188aaf --- /dev/null +++ b/docs/site/misskey.md @@ -0,0 +1,57 @@ +# Misskey Notes + +From any Misskey instance. + +## Customizing Storage Path & File Name + +For more information, refer to [Customizing Storage Path & File Name](./index.md/#customizing-storage-path--file-name). + +### MISSKEY_FILE_PATH + +:material-lightbulb-on: Optional, defaults to `Misskey` + +Storage path for downloaded images. + +### MISSKEY_FILE_NAME + +:material-lightbulb-on: Optional, defaults to `{id} - {filename} - {user[name]}({user[username]})` + +File name for downloaded images. + +### Available Variables + +_Only common used ones are listed._ + +```json +{ + "id": "", + "createdAt": "2023-10-05T23:10:13.016Z", + "userId": "", + "user": { + "id": "", + "name": "", + "username": "", + "host": "" + }, + "text": "", + "fileIds": [""], + "files": [ + { + "id": "", + "createdAt": "2023-10-05T23:10:16.445Z", + "name": "", + "type": "image/webp", + "md5": "", + "size": 800000, + "isSensitive": false, + "properties": { + "width": 2048, + "height": 1969 + }, + "url": "", + "thumbnailUrl": "" + } + ], + "uri": "" // Only available when the note is from a remote instance. +} +``` diff --git a/docs/site/misskey_zh.md b/docs/site/misskey_zh.md new file mode 100644 index 00000000..2cc32658 --- /dev/null +++ b/docs/site/misskey_zh.md @@ -0,0 +1,57 @@ +# Misskey Notes + +从任何 Misskey 实例获取。 + +## 自定义存储路径和文件名 + +更多信息请查阅 [自定义存储路径和文件名](./index.zh.md/#customizing-storage-path--file-name)。 + +### Misskey_FILE_PATH + +:material-lightbulb-on: 可选,默认为 `Misskey` + +存储路径。 + +### Misskey_FILE_NAME + +:material-lightbulb-on: 可选,默认为 `{id} - {filename} - {user[name]}({user[username]})` + +文件名称。 + +### 可用变量 + +_此处只列出常用项。_ + +```json +{ + "id": "", + "createdAt": "2023-10-05T23:10:13.016Z", + "userId": "<用户 id>", + "user": { + "id": "<用户 id>", + "name": "<用户展示名称>", + "username": "<用户名>", + "host": "<用户所在实例 URL>" + }, + "text": "", + "fileIds": ["<文件 id>"], + "files": [ + { + "id": "<文件 id>", + "createdAt": "2023-10-05T23:10:16.445Z", + "name": "<文件名>", + "type": "image/webp", + "md5": "<文件 md5>", + "size": 800000, + "isSensitive": false, + "properties": { + "width": 2048, + "height": 1969 + }, + "url": "<文件 URL>", + "thumbnailUrl": "<缩略图 URL>" + } + ], + "uri": "<该 note 在源实例的 URL>" // Only available when the note is from a remote instance. +} +``` diff --git a/nazurin/sites/misskey/__init__.py b/nazurin/sites/misskey/__init__.py new file mode 100644 index 00000000..0bf129f2 --- /dev/null +++ b/nazurin/sites/misskey/__init__.py @@ -0,0 +1,6 @@ +"""Misskey site plugin""" +from .api import Misskey +from .config import PRIORITY +from .interface import handle, patterns + +__all__ = ["Misskey", "PRIORITY", "patterns", "handle"] diff --git a/nazurin/sites/misskey/api.py b/nazurin/sites/misskey/api.py new file mode 100644 index 00000000..ce46f1fe --- /dev/null +++ b/nazurin/sites/misskey/api.py @@ -0,0 +1,164 @@ +import os +import shlex +import subprocess +from pathlib import Path +from typing import List, Tuple + +from aiohttp.client_exceptions import ClientResponseError +from pydantic import ValidationError + +from nazurin.models import Caption, Illust, Image, Ugoira +from nazurin.models.file import File +from nazurin.sites.misskey.models import File as NoteFile +from nazurin.sites.misskey.models import Note +from nazurin.utils import Request, logger +from nazurin.utils.decorators import async_wrap, network_retry +from nazurin.utils.exceptions import NazurinError +from nazurin.utils.helpers import fromisoformat + +from .config import DESTINATION, FILENAME + + +class Misskey: + @network_retry + async def get_note(self, site_url: str, note_id: str) -> Note: + """Fetch a note from a Misskey instance.""" + api = f"https://{site_url}/api/notes/show" + json = {"noteId": note_id} + + async with Request() as request: + async with request.post(url=api, json=json) as response: + if response.status == 400: + result = await response.json() + error = result["error"] + raise NazurinError( + f"Error: {error['message']} ({error['code']})" + ) from None + try: + response.raise_for_status() + except ClientResponseError as err: + raise NazurinError(err) from None + + data = await response.json() + try: + note = Note.model_validate(data) + if note.visibility not in ["public", "home"]: + raise NazurinError("Note is not public.") + except ValidationError as err: + raise NazurinError(err) from None + + def build_caption(self, note: Note, site_url: str) -> Caption: + url = f"https://{site_url}/notes/{note.id}" + caption = { + "url": url, + "author": f"{note.user.name} #{note.user.username}", + "text": note.text, + } + # URL from the original instance + if note.uri is not None: + caption["original_url"] = note.uri + return Caption(caption) + + async def get_video(self, file: NoteFile, destination: str, filename: str) -> File: + file_type = file.type + if file_type not in ["video/mp4", "image/gif"]: + + @async_wrap + def convert(config: File, output: File): + config_path = Path(config.path).as_posix() + # Copy video and audio streams + args = [ + "ffmpeg", + "-i", + config_path, + "-vcodec", + "copy", + "-acodec", + "copy", + "-y", + output.path, + ] + cmd = shlex.join(args) + logger.info("Calling FFmpeg with command: {}", cmd) + try: + output = subprocess.check_output( + args, stderr=subprocess.STDOUT, shell=False + ) + except subprocess.CalledProcessError as error: + logger.error( + "FFmpeg failed with code {}, output:\n {}", + error.returncode, + error.output.decode(), + ) + raise NazurinError("Failed to convert ugoira to mp4.") from None + + ori_video = File(filename, file.url) + async with Request() as session: + await ori_video.download(session) + filename, _ = os.path.splitext(filename) + video = File(filename + ".mp4", "", destination) + await convert(ori_video, video) + else: + video = File(filename, file.url, destination) + return video + + async def parse_note(self, note: Note, site_url: str) -> Illust: + """Build caption and get images.""" + # Build note caption + caption = self.build_caption(note, site_url) + + images: List[Image] = [] + files: List[File] = [] + note_files = note.files + for index, file in enumerate(note_files): + if not file.url: + continue + destination, filename = self.get_storage_dest(note, file, index) + file_type = file.type + if file_type.startswith("image") and not file_type.endswith("gif"): + images.append( + Image( + filename, + file.url, + destination, + file.thumbnailUrl, + file.size, + file.properties.width, + file.properties.height, + ) + ) + elif file_type.startswith("video") or file_type.endswith("gif"): + return Ugoira( + await self.get_video(file, destination, filename), + caption, + note.model_dump(), + ) + + return Illust(images, caption, note.model_dump(), files) + + async def fetch(self, site_url: str, note_id: str) -> Illust: + note = await self.get_note(site_url, note_id) + return await self.parse_note(note, site_url) + + @staticmethod + def get_storage_dest(note: Note, file: NoteFile, index: int) -> Tuple[str, str]: + """ + Format destination and filename. + """ + created_at = fromisoformat(note.createdAt) + filename, extension = os.path.splitext(file.name) + context = { + "user": note.user.model_dump(), + **file.properties.model_dump(), + "md5": file.md5, + # Human-friendly filename, without extension + "filename": filename, + "index": index, + "id": note.id, + "created_at": created_at, + "extension": extension, + } + return ( + DESTINATION.format_map(context), + FILENAME.format_map(context) + extension, + ) diff --git a/nazurin/sites/misskey/config.py b/nazurin/sites/misskey/config.py new file mode 100644 index 00000000..0daf26a6 --- /dev/null +++ b/nazurin/sites/misskey/config.py @@ -0,0 +1,11 @@ +from nazurin.config import env + +PRIORITY = 30 +COLLECTION = "misskey" + +with env.prefixed("MISSKEY_"): + with env.prefixed("FILE_"): + DESTINATION: str = env.str("PATH", default="Misskey") + FILENAME: str = env.str( + "NAME", default="{id}_{index} - {filename} - {user[name]}({user[username]})" + ) diff --git a/nazurin/sites/misskey/interface.py b/nazurin/sites/misskey/interface.py new file mode 100644 index 00000000..00a766ed --- /dev/null +++ b/nazurin/sites/misskey/interface.py @@ -0,0 +1,24 @@ +from time import time + +from nazurin.database import Database +from nazurin.models import Illust + +from .api import Misskey +from .config import COLLECTION + +patterns = [ + # https://site.example/notes/9khcu788zb + r"https?://(.*?)/notes/([0-9a-z]+)", +] + + +async def handle(match) -> Illust: + site_url = match.group(1) + post_id = match.group(2) + db = Database().driver() + collection = db.collection(COLLECTION) + + illust = await Misskey().fetch(site_url, post_id) + illust.metadata["collected_at"] = time() + await collection.insert(str(post_id), illust.metadata) + return illust diff --git a/nazurin/sites/misskey/models.py b/nazurin/sites/misskey/models.py new file mode 100644 index 00000000..490eef55 --- /dev/null +++ b/nazurin/sites/misskey/models.py @@ -0,0 +1,43 @@ +from typing import List, Optional + +from pydantic import BaseModel, ConfigDict + + +class User(BaseModel): + model_config = ConfigDict(extra="allow") + + id: str + username: str + name: Optional[str] + + +class FileProperties(BaseModel): + model_config = ConfigDict(extra="allow") + + width: int + height: int + + +class File(BaseModel): + model_config = ConfigDict(extra="allow") + + name: str + type: str + md5: str + size: int + properties: FileProperties + url: Optional[str] + thumbnailUrl: Optional[str] + + +class Note(BaseModel): + model_config = ConfigDict(extra="allow") + + id: str + createdAt: str + userId: str + user: User + text: Optional[str] + files: List[File] + uri: Optional[str] = None + visibility: str diff --git a/requirements.txt b/requirements.txt index 4044bfdf..e7c4b735 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,6 +8,7 @@ environs~=9.3.4 async_lru~=2.0.2 loguru~=0.6.0 humanize~=4.8.0 +pydantic~=2.5.1 pixivpy3~=3.7.2 beautifulsoup4~=4.10.0