Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
l3utterfly committed Jan 16, 2024
1 parent ad76c51 commit 15253f0
Showing 1 changed file with 66 additions and 0 deletions.
66 changes: 66 additions & 0 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8060,6 +8060,72 @@ llama_token llama_sample_token_greedy(struct llama_context * ctx, llama_token_da
return result;
}

llama_token llama_sample_entropy(struct llama_context * ctx, llama_token_data_array * candidates_p, float temp, float min_temp = 0, float max_temp = 2.0f) {
const int64_t t_start_sample_us = ggml_time_us();

llama_sample_softmax(ctx, candidates_p);

float exponent_val = 1.0f;

// Calculate entropy of the softmax probabilities
float entropy = 0.0f;
for (size_t i = 0; i < candidates_p->size; ++i) {
float prob = candidates_p->data[i].p;
if (prob > 0.0f) { // Ensure no log(0)
entropy -= prob * logf(prob);
}
}

// Calculate maximum possible entropy
float max_entropy = -logf(1.0f / candidates_p->size);

// Guard against division by zero
if (max_entropy == 0.0f) {
max_entropy = 1.0f; // This ensures that normalized_entropy will be 0 when entropy is 0
}

// Normalize the entropy
float normalized_entropy = entropy / max_entropy;

// Map the normalized entropy to the desired temperature range using the power function
float dyn_temp = min_temp + (max_temp - min_temp) * powf(normalized_entropy, exponent_val);

// create our probability table
std::vector<float> probs;
probs.reserve(candidates->size);
for (size_t i = 0; i < candidates->size; ++i) {
probs.push_back(candidates->data[i].p);
}

// Apply the dynamically calculated temperature scaling
for (size_t i = 0; i < candidates_p->size; ++i) {
candidates_p->data[i].logit /= dyn_temp;
}

// Re-compute softmax probabilities after scaling logits with dynamic temperature
double max_l_double = candidates_p->data[0].logit;
double cum_sum_double = 0.0;
for (size_t i = 0; i < candidates_p->size; ++i) {
double p = exp(candidates_p->data[i].logit - max_l_double);
candidates_p->data[i].p = p; // Store the scaled probability
cum_sum_double += p;
}
for (size_t i = 0; i < candidates_p->size; ++i) {
candidates_p->data[i].p /= cum_sum_double; // Re-normalize the probabilities
}

// //todo: Ensure to hide print statements unless debugging!
// // Print the updated top 25 probabilities after temperature scaling
// printf("\nUpdated Top 25 Probabilities After Dynamic Temperature Scaling (in percentages):\n");
// for (size_t i = 0; i < 25 && i < candidates_p->size; ++i) {
// printf("Token %zu: %f%%\n", i + 1, candidates_p->data[i].p * 100.0f);
// }

if (ctx) {
ctx->t_sample_us += ggml_time_us() - t_start_sample_us;
}
}

llama_token llama_sample_token(struct llama_context * ctx, llama_token_data_array * candidates) {
GGML_ASSERT(ctx);

Expand Down

0 comments on commit 15253f0

Please sign in to comment.