-
Notifications
You must be signed in to change notification settings - Fork 110
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
239 additions
and
241 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,71 +1,103 @@ | ||
import sys | ||
import json | ||
import os | ||
from sentencepiece import SentencePieceProcessor | ||
writer = __import__('tokenizer-writer') | ||
|
||
def openJson(path): | ||
with open(path, 'r', encoding='utf-8') as file: | ||
return json.load(file) | ||
|
||
class TokensResolver: | ||
def __init__(self, dirPath, tokenizerConfig): | ||
self.dirPath = dirPath | ||
self.tokenizerConfig = tokenizerConfig | ||
self.bosId = None | ||
self.eosId = None | ||
self.tokens = [] | ||
self.scores = [] | ||
|
||
def resolvePreTrainedTokenizerFast(self): | ||
tokenizer = openJson(os.path.join(self.dirPath, 'tokenizer.json')) | ||
assert(tokenizer['model']['type'] == 'BPE') | ||
|
||
i = 0 | ||
for token in tokenizer['model']['vocab'].keys(): | ||
assert(tokenizer['model']['vocab'][token] == i) | ||
self.tokens.append(token.encode('utf8')) | ||
self.scores.append(-float(i)) | ||
i += 1 | ||
if ('added_tokens' in tokenizer): | ||
for at in tokenizer['added_tokens']: | ||
assert(at['id'] == i) | ||
self.tokens.append(at['content'].encode('utf8')) | ||
self.scores.append(-float(i)) | ||
if (at['content'] == self.tokenizerConfig['bos_token']): | ||
self.bosId = i | ||
if (at['content'] == self.tokenizerConfig['eos_token']): | ||
self.eosId = i | ||
i += 1 | ||
|
||
def resolveLlamaTokenizer(self): | ||
modelPath = os.path.join(self.dirPath, 'tokenizer.model') | ||
processor = SentencePieceProcessor(model_file=modelPath) | ||
|
||
assert processor.vocab_size() == processor.get_piece_size() | ||
self.bosId = processor.bos_id() | ||
self.eosId = processor.eos_id() | ||
|
||
vocabSize = processor.vocab_size() | ||
for i in range(vocabSize): | ||
t = processor.id_to_piece(i) | ||
s = processor.get_score(i) | ||
t = t.replace('▁', ' ') # sentencepiece uses this character as whitespace | ||
b = t.encode('utf-8') | ||
self.tokens.append(b) | ||
self.scores.append(s) | ||
|
||
def resolve(self): | ||
cls = self.tokenizerConfig['tokenizer_class'] | ||
if (cls == 'PreTrainedTokenizerFast'): | ||
return self.resolvePreTrainedTokenizerFast() | ||
if (cls == 'LlamaTokenizer'): | ||
return self.resolveLlamaTokenizer() | ||
raise Exception(f'Tokenizer {cls} is not supported') | ||
|
||
def printUsage(): | ||
print('Usage: python convert-tokenizer-hf.py <tokenizerFolderPath> <name>') | ||
print() | ||
print('Options:') | ||
print(' <tokenizerFolderPath> The path to the folder with tokenizer.json and tokenizer_config.json') | ||
print(' <tokenizerFolderPath> The path to the folder with tokenizer_config.json') | ||
print(' <name> The name of the tokenizer (e.g. "llama3")') | ||
|
||
if __name__ == '__main__': | ||
if (len(sys.argv) < 2): | ||
printUsage() | ||
exit(1) | ||
|
||
dirPath = sys.argv[1] | ||
name = sys.argv[2] | ||
tokenizerConfig = openJson(os.path.join(dirPath, 'tokenizer_config.json')) | ||
tokenizer = openJson(os.path.join(dirPath, 'tokenizer.json')) | ||
|
||
assert(tokenizerConfig['tokenizer_class'] == 'PreTrainedTokenizerFast') | ||
assert(tokenizer['model']['type'] == 'BPE') | ||
i = 0 | ||
tokens = [] | ||
scores = [] | ||
bosId = None | ||
eosId = None | ||
for token in tokenizer['model']['vocab'].keys(): | ||
assert(tokenizer['model']['vocab'][token] == i) | ||
tokens.append(token.encode('utf8')) | ||
scores.append(-float(i)) | ||
i += 1 | ||
if ('added_tokens' in tokenizer): | ||
for at in tokenizer['added_tokens']: | ||
assert(at['id'] == i) | ||
tokens.append(at['content'].encode('utf8')) | ||
scores.append(-float(i)) | ||
if (at['content'] == tokenizerConfig['bos_token']): | ||
bosId = i | ||
if (at['content'] == tokenizerConfig['eos_token']): | ||
eosId = i | ||
i += 1 | ||
resolver = TokensResolver(dirPath, tokenizerConfig) | ||
resolver.resolve() | ||
|
||
print(f'bosId: {resolver.bosId} ({resolver.tokens[resolver.bosId]})') | ||
print(f'eosId: {resolver.eosId} ({resolver.tokens[resolver.eosId]})') | ||
|
||
templateChat = None | ||
chatTemplate = None | ||
chatExtraStop = None | ||
if ('chat_template' in tokenizerConfig): | ||
template = tokenizerConfig['chat_template'] | ||
print('⭐ Found chat template:') | ||
print() | ||
print(template.replace('\n', '\\n')) | ||
print() | ||
print('⭐ To create the tokenizer file you need to manually specify chat template values. Enter \\n for new line.') | ||
templateChat = {} | ||
templateKeys = ['chat_message_start', 'chat_role_start', 'chat_role_end', 'chat_message_end', 'chat_generation_prompt', 'chat_extra_stop'] | ||
for key in templateKeys: | ||
value = input(f'⏩ Enter value for chat template key "{key}":\n') | ||
templateChat[key] = value.replace('\\n', '\n') | ||
chatTemplate = tokenizerConfig['chat_template'].encode('utf-8') | ||
input = input('⏩ Enter value for chat extra stop (enter to skip): ') | ||
if (input != ''): | ||
chatExtraStop = input.encode('utf-8') | ||
|
||
outputFileName = f'dllama_tokenizer_{name}.t' | ||
with open(outputFileName, 'wb') as outputFile: | ||
writer.writeTokenizer(outputFile, { | ||
'bos_id': bosId, | ||
'eos_id': eosId, | ||
'chat_eos_id': eosId, | ||
}, templateChat, tokens, scores) | ||
'bos_id': resolver.bosId, | ||
'eos_id': resolver.eosId, | ||
'chat_eos_id': resolver.eosId, | ||
}, resolver.tokens, resolver.scores, chatTemplate, chatExtraStop) | ||
print(f'✅ Created {outputFileName}') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,13 @@ | ||
# Distributed Llama API | ||
|
||
This is an early version of the server that is compatible with the OpenAi API. It supports only the `/v1/chat/completions` endpoint. Currently it's adjusted to the Llama 3 8B Instruct only. | ||
This is an early version of the server that is compatible with the OpenAi API. It supports only the `/v1/chat/completions` endpoint. To run this server you need a chat model and a tokenizer with the chat support. | ||
|
||
How to run? | ||
|
||
1. Download the model and the tokenizer from [here](https://huggingface.co/Azamorn/Meta-Llama-3-8B-Instruct-Distributed). | ||
1. Download the model and the tokenizer from [here](https://huggingface.co/b4rtaz/Llama-3-8B-Q40-Instruct-Distributed-Llama). | ||
2. Run the server with the following command: | ||
```bash | ||
./dllama-api --model converter/dllama_original_q40.bin --tokenizer converter/dllama-llama3-tokenizer.t --weights-float-type q40 --buffer-float-type q80 --nthreads 4 | ||
./dllama-api --model converter/dllama_model_lama3_instruct_q40.m --tokenizer converter/dllama_tokenizer_llama3.t --weights-float-type q40 --buffer-float-type q80 --nthreads 4 | ||
``` | ||
|
||
Check the [chat-api-client.js](../../../examples/chat-api-client.js) file to see how to use the API from NodeJS application. |
Oops, something went wrong.