diff --git a/test/unit/user_dict/test_user_dict.py b/test/unit/user_dict/test_user_dict.py index 35d13c9cf..8515eb66f 100644 --- a/test/unit/user_dict/test_user_dict.py +++ b/test/unit/user_dict/test_user_dict.py @@ -8,7 +8,7 @@ from voicevox_engine.user_dict.model import UserDictWord, WordTypes from voicevox_engine.user_dict.user_dict_manager import UserDictionary from voicevox_engine.user_dict.user_dict_word import ( - MAX_PRIORITY, + USER_DICT_MAX_PRIORITY, UserDictInputError, WordProperty, create_word, @@ -201,7 +201,7 @@ def test_delete_word_valid_id(tmp_path: Path) -> None: def test_priority() -> None: for pos in part_of_speech_data: - for i in range(MAX_PRIORITY + 1): + for i in range(USER_DICT_MAX_PRIORITY + 1): assert ( create_word( WordProperty( diff --git a/voicevox_engine/app/routers/user_dict.py b/voicevox_engine/app/routers/user_dict.py index 9a10e84d1..a1a918d93 100644 --- a/voicevox_engine/app/routers/user_dict.py +++ b/voicevox_engine/app/routers/user_dict.py @@ -9,8 +9,8 @@ from voicevox_engine.user_dict.model import UserDictWord, WordTypes from voicevox_engine.user_dict.user_dict_manager import UserDictionary from voicevox_engine.user_dict.user_dict_word import ( - MAX_PRIORITY, - MIN_PRIORITY, + USER_DICT_MAX_PRIORITY, + USER_DICT_MIN_PRIORITY, UserDictInputError, WordProperty, ) @@ -58,11 +58,14 @@ def add_user_dict_word( priority: Annotated[ int | SkipJsonSchema[None], Query( - ge=MIN_PRIORITY, - le=MAX_PRIORITY, + ge=USER_DICT_MIN_PRIORITY, + le=USER_DICT_MAX_PRIORITY, description="単語の優先度(0から10までの整数)。数字が大きいほど優先度が高くなる。1から9までの値を指定することを推奨", # "SkipJsonSchema[None]"の副作用でスキーマーが欠落する問題に対するワークアラウンド - json_schema_extra={"maximum": MAX_PRIORITY, "minimum": MIN_PRIORITY}, + json_schema_extra={ + "maximum": USER_DICT_MAX_PRIORITY, + "minimum": USER_DICT_MIN_PRIORITY, + }, ), ] = None, ) -> str: @@ -112,11 +115,14 @@ def rewrite_user_dict_word( priority: Annotated[ int | SkipJsonSchema[None], Query( - ge=MIN_PRIORITY, - le=MAX_PRIORITY, + ge=USER_DICT_MIN_PRIORITY, + le=USER_DICT_MAX_PRIORITY, description="単語の優先度(0から10までの整数)。数字が大きいほど優先度が高くなる。1から9までの値を指定することを推奨。", # "SkipJsonSchema[None]"の副作用でスキーマーが欠落する問題に対するワークアラウンド - json_schema_extra={"maximum": MAX_PRIORITY, "minimum": MIN_PRIORITY}, + json_schema_extra={ + "maximum": USER_DICT_MAX_PRIORITY, + "minimum": USER_DICT_MIN_PRIORITY, + }, ), ] = None, ) -> None: diff --git a/voicevox_engine/user_dict/user_dict_word.py b/voicevox_engine/user_dict/user_dict_word.py index 3ddf3e033..378e951b8 100644 --- a/voicevox_engine/user_dict/user_dict_word.py +++ b/voicevox_engine/user_dict/user_dict_word.py @@ -12,11 +12,8 @@ WordTypes, ) -MIN_PRIORITY = USER_DICT_MIN_PRIORITY -MAX_PRIORITY = USER_DICT_MAX_PRIORITY - -class PartOfSpeechDetail(BaseModel): +class _PartOfSpeechDetail(BaseModel): """ 品詞ごとの情報 """ @@ -32,136 +29,58 @@ class PartOfSpeechDetail(BaseModel): accent_associative_rules: list[str] = Field(title="アクセント結合規則の一覧") -part_of_speech_data: dict[WordTypes, PartOfSpeechDetail] = { - WordTypes.PROPER_NOUN: PartOfSpeechDetail( +_costs_proper_noun = [-988, 3488, 4768, 6048, 7328, 8609, 8734, 8859, 8984, 9110, 14176] +_costs_common_noun = [-4445, 49, 1473, 2897, 4321, 5746, 6554, 7362, 8170, 8979, 15001] +_costs_verb = [3100, 6160, 6360, 6561, 6761, 6962, 7414, 7866, 8318, 8771, 13433] +_costs_adjective = [1527, 3266, 3561, 3857, 4153, 4449, 5149, 5849, 6549, 7250, 10001] +_costs_suffix = [4399, 5373, 6041, 6710, 7378, 8047, 9440, 10834, 12228, 13622, 15847] + + +part_of_speech_data: dict[WordTypes, _PartOfSpeechDetail] = { + WordTypes.PROPER_NOUN: _PartOfSpeechDetail( part_of_speech="名詞", part_of_speech_detail_1="固有名詞", part_of_speech_detail_2="一般", part_of_speech_detail_3="*", context_id=1348, - cost_candidates=[ - -988, - 3488, - 4768, - 6048, - 7328, - 8609, - 8734, - 8859, - 8984, - 9110, - 14176, - ], - accent_associative_rules=[ - "*", - "C1", - "C2", - "C3", - "C4", - "C5", - ], + cost_candidates=_costs_proper_noun, + accent_associative_rules=["*", "C1", "C2", "C3", "C4", "C5"], ), - WordTypes.COMMON_NOUN: PartOfSpeechDetail( + WordTypes.COMMON_NOUN: _PartOfSpeechDetail( part_of_speech="名詞", part_of_speech_detail_1="一般", part_of_speech_detail_2="*", part_of_speech_detail_3="*", context_id=1345, - cost_candidates=[ - -4445, - 49, - 1473, - 2897, - 4321, - 5746, - 6554, - 7362, - 8170, - 8979, - 15001, - ], - accent_associative_rules=[ - "*", - "C1", - "C2", - "C3", - "C4", - "C5", - ], + cost_candidates=_costs_common_noun, + accent_associative_rules=["*", "C1", "C2", "C3", "C4", "C5"], ), - WordTypes.VERB: PartOfSpeechDetail( + WordTypes.VERB: _PartOfSpeechDetail( part_of_speech="動詞", part_of_speech_detail_1="自立", part_of_speech_detail_2="*", part_of_speech_detail_3="*", context_id=642, - cost_candidates=[ - 3100, - 6160, - 6360, - 6561, - 6761, - 6962, - 7414, - 7866, - 8318, - 8771, - 13433, - ], - accent_associative_rules=[ - "*", - ], + cost_candidates=_costs_verb, + accent_associative_rules=["*"], ), - WordTypes.ADJECTIVE: PartOfSpeechDetail( + WordTypes.ADJECTIVE: _PartOfSpeechDetail( part_of_speech="形容詞", part_of_speech_detail_1="自立", part_of_speech_detail_2="*", part_of_speech_detail_3="*", context_id=20, - cost_candidates=[ - 1527, - 3266, - 3561, - 3857, - 4153, - 4449, - 5149, - 5849, - 6549, - 7250, - 10001, - ], - accent_associative_rules=[ - "*", - ], + cost_candidates=_costs_adjective, + accent_associative_rules=["*"], ), - WordTypes.SUFFIX: PartOfSpeechDetail( + WordTypes.SUFFIX: _PartOfSpeechDetail( part_of_speech="名詞", part_of_speech_detail_1="接尾", part_of_speech_detail_2="一般", part_of_speech_detail_3="*", context_id=1358, - cost_candidates=[ - 4399, - 5373, - 6041, - 6710, - 7378, - 8047, - 9440, - 10834, - 12228, - 13622, - 15847, - ], - accent_associative_rules=[ - "*", - "C1", - "C2", - "C3", - "C4", - "C5", - ], + cost_candidates=_costs_suffix, + accent_associative_rules=["*", "C1", "C2", "C3", "C4", "C5"], ), } @@ -188,7 +107,7 @@ def create_word(word_property: WordProperty) -> UserDictWord: priority: int | None = word_property.priority if priority is None: priority = 5 - if not MIN_PRIORITY <= priority <= MAX_PRIORITY: + if not USER_DICT_MIN_PRIORITY <= priority <= USER_DICT_MAX_PRIORITY: raise UserDictInputError("優先度の値が無効です") pos_detail = part_of_speech_data[word_type] @@ -230,10 +149,13 @@ def cost2priority(context_id: int, cost: int) -> int: # cost_candidatesの中にある値で最も近い値を元にpriorityを返す # 参考: https://qiita.com/Krypf/items/2eada91c37161d17621d # この関数とpriority2cost関数によって、辞書ファイルのcostを操作しても最も近いpriorityのcostに上書きされる - return MAX_PRIORITY - np.argmin(np.abs(np.array(cost_candidates) - cost)).item() + return ( + USER_DICT_MAX_PRIORITY + - np.argmin(np.abs(np.array(cost_candidates) - cost)).item() + ) def priority2cost(context_id: int, priority: int) -> int: - assert MIN_PRIORITY <= priority <= MAX_PRIORITY + assert USER_DICT_MIN_PRIORITY <= priority <= USER_DICT_MAX_PRIORITY cost_candidates = _search_cost_candidates(context_id) - return cost_candidates[MAX_PRIORITY - priority] + return cost_candidates[USER_DICT_MAX_PRIORITY - priority]