Skip to content

Commit

Permalink
llama : vocab
Browse files Browse the repository at this point in the history
ggml-ci
  • Loading branch information
ggerganov committed Jan 8, 2025
1 parent be9a25f commit 9d6f9df
Show file tree
Hide file tree
Showing 6 changed files with 355 additions and 359 deletions.
4 changes: 2 additions & 2 deletions src/llama-grammar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1094,7 +1094,7 @@ void llama_grammar_apply_impl(const struct llama_grammar & grammar, llama_token_
const llama_token id = cur_p->data[i].id;
const std::string & piece = grammar.vocab->cache_token_to_piece.at(id);

if (llama_token_is_eog_impl(*grammar.vocab, id)) {
if (grammar.vocab->token_is_eog(id)) {
if (!allow_eog) {
cur_p->data[i].logit = -INFINITY;
}
Expand All @@ -1115,7 +1115,7 @@ void llama_grammar_apply_impl(const struct llama_grammar & grammar, llama_token_
void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token) {
GGML_ASSERT(grammar.vocab != nullptr);

if (llama_token_is_eog_impl(*grammar.vocab, token)) {
if (grammar.vocab->token_is_eog(token)) {
for (const auto & stack : grammar.stacks) {
if (stack.empty()) {
return;
Expand Down
8 changes: 4 additions & 4 deletions src/llama-model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1586,19 +1586,19 @@ void llama_model::load_vocab(llama_model_loader & ml) {
// determine the newline token: LLaMA "<0x0A>" == 10 == '\n', Falcon 193 == '\n'
if (vocab.type == LLAMA_VOCAB_TYPE_SPM) {
try {
vocab.linefeed_id = llama_byte_to_token_impl(vocab, '\n');
vocab.linefeed_id = vocab.byte_to_token('\n');
} catch (const std::exception & e) {
LLAMA_LOG_WARN("%s: SPM vocabulary, but newline token not found: %s! Using special_pad_id instead.", __func__, e.what());
vocab.linefeed_id = vocab.special_pad_id;
}
} else if (vocab.type == LLAMA_VOCAB_TYPE_WPM) {
vocab.linefeed_id = vocab.special_pad_id;
} else if (vocab.type == LLAMA_VOCAB_TYPE_RWKV) {
const std::vector<int> ids = llama_tokenize_internal(vocab, "\n", false);
const std::vector<int> ids = vocab.tokenize("\n", false);
GGML_ASSERT(!ids.empty() && "model vocab missing newline token");
vocab.linefeed_id = ids[0];
} else {
const std::vector<int> ids = llama_tokenize_internal(vocab, "\xC4\x8A", false); // U+010A
const std::vector<int> ids = vocab.tokenize("\xC4\x8A", false); // U+010A

//GGML_ASSERT(!ids.empty() && "model vocab missing newline token");
if (ids.empty()) {
Expand Down Expand Up @@ -4190,7 +4190,7 @@ void llama_model::print_info() const {

// hparams
LLAMA_LOG_INFO("%s: arch = %s\n", __func__, arch_name().c_str());
LLAMA_LOG_INFO("%s: vocab type = %s\n", __func__, llama_model_vocab_type_name(vocab.type));
LLAMA_LOG_INFO("%s: vocab type = %s\n", __func__, vocab.type_name().c_str());
LLAMA_LOG_INFO("%s: n_vocab = %u\n", __func__, hparams.n_vocab);
LLAMA_LOG_INFO("%s: n_merges = %u\n", __func__, (int) vocab.bpe_ranks.size());
LLAMA_LOG_INFO("%s: vocab_only = %d\n", __func__, hparams.vocab_only);
Expand Down
22 changes: 11 additions & 11 deletions src/llama-sampling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1664,7 +1664,7 @@ struct llama_sampler_dry {
// Ported from Koboldcpp, original PR: https://github.com/LostRuins/koboldcpp/pull/982 (Original author: pi6am)
static void get_overlapping_token_sequences(const llama_vocab & vocab, const std::string& str, std::unordered_multimap<llama_token, std::vector<llama_token>>& token_sequences, int max_tail_len = -1) {
for (llama_token token_id = 0; token_id < (llama_token)vocab.n_vocab; token_id++) {
std::string word = llama_detokenize(vocab, {token_id}, true);
std::string word = vocab.detokenize({token_id}, true);
if (word.find(str) != std::string::npos) {
token_sequences.emplace(token_id, std::vector<llama_token>());
} else {
Expand All @@ -1681,7 +1681,7 @@ static void get_overlapping_token_sequences(const llama_vocab & vocab, const std
}
}
if (match) {
std::vector<llama_token> tokenization = llama_tokenize_internal(vocab, str.substr(i), false, false);
std::vector<llama_token> tokenization = vocab.tokenize(str.substr(i), false, false);
if (max_tail_len >= 0 && tokenization.size() > (size_t)max_tail_len) {
tokenization.resize(max_tail_len);
}
Expand Down Expand Up @@ -2153,7 +2153,7 @@ static void llama_sampler_infill_apply(struct llama_sampler * smpl, llama_token_
float p_eog_sum = 0.0f;

for (size_t i = 0; i < cur_p->size; ++i) {
if (llama_token_is_eog_impl(*ctx->vocab, cur_p->data[i].id)) {
if (ctx->vocab->token_is_eog(cur_p->data[i].id)) {
p_eog_sum += cur_p->data[i].p;
} else {
p_txt_sum += cur_p->data[i].p;
Expand All @@ -2175,7 +2175,7 @@ static void llama_sampler_infill_apply(struct llama_sampler * smpl, llama_token_
float p_sum = 0.0f;

for (size_t i = 0; i < size_org; ++i) {
if (llama_token_is_eog_impl(*ctx->vocab, cur_p->data[i].id)) {
if (ctx->vocab->token_is_eog(cur_p->data[i].id)) {
p_sum += cur_p->data[i].p;

cur_p->data[cur_p->size++] = cur_p->data[i];
Expand Down Expand Up @@ -2203,17 +2203,17 @@ static void llama_sampler_infill_apply(struct llama_sampler * smpl, llama_token_
continue;
}

int len0 = llama_token_to_piece_impl(*ctx->vocab, cur_p->data[i0].id, ctx->buf0.data(), ctx->buf0.size(), 0, false);
int len0 = ctx->vocab->token_to_piece(cur_p->data[i0].id, ctx->buf0.data(), ctx->buf0.size(), 0, false);
if (len0 < 0) {
ctx->buf0.resize(len0);
len0 = llama_token_to_piece_impl(*ctx->vocab, cur_p->data[i0].id, ctx->buf0.data(), ctx->buf0.size(), 0, false);
len0 = ctx->vocab->token_to_piece(cur_p->data[i0].id, ctx->buf0.data(), ctx->buf0.size(), 0, false);
assert(len0 > 0);
}

int len1 = llama_token_to_piece_impl(*ctx->vocab, cur_p->data[i1].id, ctx->buf1.data(), ctx->buf1.size(), 0, false);
int len1 = ctx->vocab->token_to_piece(cur_p->data[i1].id, ctx->buf1.data(), ctx->buf1.size(), 0, false);
if (len1 < 0) {
ctx->buf1.resize(len1);
len1 = llama_token_to_piece_impl(*ctx->vocab, cur_p->data[i1].id, ctx->buf1.data(), ctx->buf1.size(), 0, false);
len1 = ctx->vocab->token_to_piece(cur_p->data[i1].id, ctx->buf1.data(), ctx->buf1.size(), 0, false);
assert(len1 > 0);
}

Expand Down Expand Up @@ -2248,7 +2248,7 @@ static void llama_sampler_infill_apply(struct llama_sampler * smpl, llama_token_
LOG_DBG_CUR("%s: n_combined = %zu, applying thold = %.3f\n", __func__, n_combined, thold);

for (size_t i = 0; i < size_org; ++i) {
const bool is_eog = llama_token_is_eog_impl(*ctx->vocab, cur_p->data[i].id);
const bool is_eog = ctx->vocab->token_is_eog(cur_p->data[i].id);

if (cur_p->data[i].p < thold && !is_eog) {
continue;
Expand All @@ -2269,7 +2269,7 @@ static void llama_sampler_infill_apply(struct llama_sampler * smpl, llama_token_
// if no non-EOG tokens are left -> reduce cur_p to single EOT token
if (n_non_eog == 0) {
cur_p->size = 1;
cur_p->data[0].id = llama_token_eot_impl(*ctx->vocab);
cur_p->data[0].id = ctx->vocab->token_eot();
cur_p->data[0].logit = 1.0f;

return;
Expand All @@ -2291,7 +2291,7 @@ static void llama_sampler_infill_apply(struct llama_sampler * smpl, llama_token_
LOG_DBG_CUR("%s: applying thold = %.3f\n", __func__, thold);

for (size_t i = 0; i < size_org; ++i) {
const bool is_eog = llama_token_is_eog_impl(*ctx->vocab, cur_p->data[i].id);
const bool is_eog = ctx->vocab->token_is_eog(cur_p->data[i].id);

if (cur_p->data[i].p < thold && !is_eog) {
continue;
Expand Down
Loading

0 comments on commit 9d6f9df

Please sign in to comment.