Skip to content

Commit

Permalink
Fix default llama2 system prompt usage
Browse files Browse the repository at this point in the history
  • Loading branch information
xenova committed Dec 16, 2023
1 parent cd8ab6f commit d9b63ae
Showing 1 changed file with 18 additions and 0 deletions.
18 changes: 18 additions & 0 deletions src/tokenizers.js
Original file line number Diff line number Diff line change
Expand Up @@ -2926,6 +2926,24 @@ export class BloomTokenizer extends GPT2Tokenizer { // NOTE: `GPT2Tokenizer` to
}
export class LlamaTokenizer extends PreTrainedTokenizer {
_default_chat_template = `{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif USE_DEFAULT_PROMPT == true and not '<<SYS>>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'DEFAULT_SYSTEM_MESSAGE' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\n' + system_message + '\n<</SYS>>\n\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<<SYS>>\n' + content.strip() + '\n<</SYS>>\n\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}`

DEFAULT_SYSTEM_PROMPT =
"You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your " +
"answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure " +
"that your responses are socially unbiased and positive in nature.\n\n" +
"If a question does not make any sense, or is not factually coherent, explain why instead of answering something not " +
"correct. If you don't know the answer to a question, please don't share false information."

constructor(tokenizerJSON, tokenizerConfig) {
super(tokenizerJSON, tokenizerConfig);
this.use_default_system_prompt = tokenizerConfig.use_default_system_prompt ?? false;
}

get default_chat_template() {
return super.default_chat_template
.replaceAll('USE_DEFAULT_PROMPT', this.use_default_system_prompt ? 'true' : 'false')
.replaceAll('DEFAULT_SYSTEM_MESSAGE', this.DEFAULT_SYSTEM_PROMPT.replaceAll("\n", "\\n").replaceAll("'", "\\'"));
}
}
export class CodeLlamaTokenizer extends LlamaTokenizer { } // NOTE: `LlamaTokenizer` to get the correct chat template

Expand Down

0 comments on commit d9b63ae

Please sign in to comment.