From 9f0d403e1bd175be83b39a809370673a7df55acd Mon Sep 17 00:00:00 2001 From: tarepan Date: Wed, 29 May 2024 18:21:39 +0900 Subject: [PATCH] =?UTF-8?q?=E6=95=B4=E7=90=86:=20`CoreAdapter.speakers`=20?= =?UTF-8?q?=E5=87=BA=E5=8A=9B=E3=82=92=E3=83=A2=E3=83=87=E3=83=AB=E5=8C=96?= =?UTF-8?q?=20(#1260)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * refactor: `CoreSpeaker` をコアへ移設 * refactor: `CoreAdapter.speakers` 出力をパース --------- Co-authored-by: Hiroshiba Kazuyuki --- voicevox_engine/app/routers/speaker.py | 3 +-- voicevox_engine/core/core_adapter.py | 33 ++++++++++++++++++++--- voicevox_engine/metas/MetasStore.py | 37 ++++---------------------- 3 files changed, 36 insertions(+), 37 deletions(-) diff --git a/voicevox_engine/app/routers/speaker.py b/voicevox_engine/app/routers/speaker.py index 080b6624a..3f5c5f48b 100644 --- a/voicevox_engine/app/routers/speaker.py +++ b/voicevox_engine/app/routers/speaker.py @@ -1,7 +1,6 @@ """話者情報機能を提供する API Router""" import base64 -import json import traceback from pathlib import Path from typing import Annotated, Literal @@ -76,7 +75,7 @@ def _speaker_info( # 該当話者を検索する speakers = parse_obj_as( - list[Speaker], json.loads(core_manager.get_core(core_version).speakers) + list[Speaker], core_manager.get_core(core_version).speakers ) speakers = filter_speakers_and_styles(speakers, speaker_or_singer) speaker = next( diff --git a/voicevox_engine/core/core_adapter.py b/voicevox_engine/core/core_adapter.py index 61981c089..95fe0725e 100644 --- a/voicevox_engine/core/core_adapter.py +++ b/voicevox_engine/core/core_adapter.py @@ -1,13 +1,39 @@ import json import threading from dataclasses import dataclass +from typing import Literal, NewType import numpy as np from numpy.typing import NDArray +from pydantic import BaseModel, Field from ..metas.Metas import StyleId from .core_wrapper import CoreWrapper, OldCoreError +CoreStyleId = NewType("CoreStyleId", int) +CoreStyleType = Literal["talk", "singing_teacher", "frame_decode", "sing"] + + +class CoreSpeakerStyle(BaseModel): + """ + 話者のスタイル情報 + """ + + name: str + id: CoreStyleId + type: CoreStyleType | None = Field(default="talk") + + +class CoreSpeaker(BaseModel): + """ + コアに含まれる話者情報 + """ + + name: str + speaker_uuid: str + styles: list[CoreSpeakerStyle] + version: str = Field("話者のバージョン") + @dataclass(frozen=True) class DeviceSupport: @@ -34,9 +60,10 @@ def default_sampling_rate(self) -> int: return self.core.default_sampling_rate @property - def speakers(self) -> str: - """話者情報(json文字列)""" - return self.core.metas() + def speakers(self) -> list[CoreSpeaker]: + """話者情報""" + metas = self.core.metas() + return [CoreSpeaker(**speaker) for speaker in json.loads(metas)] @property def supported_devices(self) -> DeviceSupport | None: diff --git a/voicevox_engine/metas/MetasStore.py b/voicevox_engine/metas/MetasStore.py index d5655a6c2..da8860334 100644 --- a/voicevox_engine/metas/MetasStore.py +++ b/voicevox_engine/metas/MetasStore.py @@ -1,10 +1,11 @@ import json from copy import deepcopy from pathlib import Path -from typing import TYPE_CHECKING, Literal, NewType +from typing import Literal from pydantic import BaseModel, Field +from voicevox_engine.core.core_adapter import CoreAdapter, CoreSpeakerStyle from voicevox_engine.metas.Metas import ( Speaker, SpeakerStyle, @@ -13,25 +14,8 @@ StyleType, ) -if TYPE_CHECKING: - from voicevox_engine.core.core_adapter import CoreAdapter - -_CoreStyleId = NewType("_CoreStyleId", int) -_CoreStyleType = Literal["talk", "singing_teacher", "frame_decode", "sing"] - - -class _CoreSpeakerStyle(BaseModel): - """ - 話者のスタイル情報 - """ - - name: str - id: _CoreStyleId - type: _CoreStyleType | None = Field(default="talk") - - -def cast_styles(cores: list[_CoreSpeakerStyle]) -> list[SpeakerStyle]: +def cast_styles(cores: list[CoreSpeakerStyle]) -> list[SpeakerStyle]: """コアから取得したスタイル情報をエンジン形式へキャストする。""" return [ SpeakerStyle(name=core.name, id=StyleId(core.id), type=core.type) @@ -39,17 +23,6 @@ def cast_styles(cores: list[_CoreSpeakerStyle]) -> list[SpeakerStyle]: ] -class _CoreSpeaker(BaseModel): - """ - コアに含まれる話者情報 - """ - - name: str - speaker_uuid: str - styles: list[_CoreSpeakerStyle] - version: str = Field("話者のバージョン") - - class _EngineSpeaker(BaseModel): """ エンジンに含まれる話者情報 @@ -82,7 +55,7 @@ def __init__(self, engine_speakers_path: Path) -> None: # FIXME: engineではなくlist[CoreSpeaker]を渡す形にすることで # TTSEngineによる循環importを修正する - def load_combined_metas(self, core: "CoreAdapter") -> list[Speaker]: + def load_combined_metas(self, core: CoreAdapter) -> list[Speaker]: """ コアに含まれる話者メタ情報とエンジンに含まれる話者メタ情報を統合 Parameters @@ -95,7 +68,7 @@ def load_combined_metas(self, core: "CoreAdapter") -> list[Speaker]: エンジンとコアに含まれる話者メタ情報 """ # コアに含まれる話者メタ情報の収集 - core_metas = [_CoreSpeaker(**speaker) for speaker in json.loads(core.speakers)] + core_metas = core.speakers # エンジンに含まれる話者メタ情報との統合 return [ Speaker(