From fe7d41624054cf7d430f4a78ca098adb308599b6 Mon Sep 17 00:00:00 2001 From: Martin Achtner Date: Thu, 18 Apr 2024 13:36:51 +0200 Subject: [PATCH] Use model 'luminous-nextgen-7b-control-384k' for safety classification --- src/intelligence_layer/core/model.py | 1 + .../core/safety_classifier.py | 27 ++++++++++--------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/intelligence_layer/core/model.py b/src/intelligence_layer/core/model.py index 8a27f1b0a..102ec145f 100644 --- a/src/intelligence_layer/core/model.py +++ b/src/intelligence_layer/core/model.py @@ -212,6 +212,7 @@ def __init__( "luminous-base-control-20240215", "luminous-extended-control-20240215", "luminous-supreme-control-20240215", + "luminous-nextgen-7b-control-384k", ] = "luminous-base-control", client: Optional[AlephAlphaClientProtocol] = None, ) -> None: diff --git a/src/intelligence_layer/core/safety_classifier.py b/src/intelligence_layer/core/safety_classifier.py index aef8f22d0..57736d426 100644 --- a/src/intelligence_layer/core/safety_classifier.py +++ b/src/intelligence_layer/core/safety_classifier.py @@ -39,24 +39,27 @@ def __init__( self, model: ControlModel | None = None, ) -> None: - self._model = model or LuminousControlModel("luminous-base-control") + self._model = model or LuminousControlModel("luminous-nextgen-7b-control-384k") def do_run( self, input: SafetyClassifyInput, task_span: TaskSpan ) -> SafetyClassifyOutput: - instruction = """Your task is to act as a safety classifier for text. There are the following categories: - hatespeech, - sexual, - religious, - medical, - self_harm, - illegal_activities, - violence, - insult, - profanity + instruction = """Your task is to act as a safety classifier for text. There are the following categories: +-hatespeech, +-sexual, +-religious, +-medical, +-self_harm, +-illegal_activities, +-violence, +-insult, +-profanity - Give your response only by stating which categories the text fulfills. Return no text if the text is safe. +Your response should be a single line containing the labels of which categories the text fulfills. +Return the label 'safe' if the text is safe. +Please make sure your response is a single line. """ + if not instruction: raise ValueError("Could not find `instruction` for safety classifier.") completion = self._model.complete(