Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

llama : remove Tail-Free sampling #10071

Merged
merged 1 commit into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 1 addition & 8 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -943,13 +943,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.sparams.min_p = std::stof(value);
}
).set_sparam());
add_opt(common_arg(
{"--tfs"}, "N",
string_format("tail free sampling, parameter z (default: %.1f, 1.0 = disabled)", (double)params.sparams.tfs_z),
[](common_params & params, const std::string & value) {
params.sparams.tfs_z = std::stof(value);
}
).set_sparam());
add_opt(common_arg(
{"--xtc-probability"}, "N",
string_format("xtc probability (default: %.1f, 0.0 = disabled)", (double)params.sparams.xtc_probability),
Expand Down Expand Up @@ -1074,7 +1067,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
).set_sparam());
add_opt(common_arg(
{"--mirostat"}, "N",
string_format("use Mirostat sampling.\nTop K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n"
string_format("use Mirostat sampling.\nTop K, Nucleus and Locally Typical samplers are ignored if used.\n"
"(default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)", params.sparams.mirostat),
[](common_params & params, int value) {
params.sparams.mirostat = value;
Expand Down
1 change: 0 additions & 1 deletion common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2090,7 +2090,6 @@ void yaml_dump_non_result_info(FILE * stream, const common_params & params, cons
const std::vector<float> tensor_split_vector(params.tensor_split, params.tensor_split + llama_max_devices());
yaml_dump_vector_float(stream, "tensor_split", tensor_split_vector);

fprintf(stream, "tfs: %f # default: 1.0\n", sparams.tfs_z);
fprintf(stream, "threads: %d # default: %u\n", params.cpuparams.n_threads, std::thread::hardware_concurrency());
fprintf(stream, "top_k: %d # default: 40\n", sparams.top_k);
fprintf(stream, "top_p: %f # default: 0.95\n", sparams.top_p);
Expand Down
4 changes: 1 addition & 3 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ enum common_sampler_type {
COMMON_SAMPLER_TYPE_TOP_K = 2,
COMMON_SAMPLER_TYPE_TOP_P = 3,
COMMON_SAMPLER_TYPE_MIN_P = 4,
COMMON_SAMPLER_TYPE_TFS_Z = 5,
//COMMON_SAMPLER_TYPE_TFS_Z = 5,
COMMON_SAMPLER_TYPE_TYPICAL_P = 6,
COMMON_SAMPLER_TYPE_TEMPERATURE = 7,
COMMON_SAMPLER_TYPE_XTC = 8,
Expand All @@ -113,7 +113,6 @@ struct common_sampler_params {
float min_p = 0.05f; // 0.0 = disabled
float xtc_probability = 0.00f; // 0.0 = disabled
float xtc_threshold = 0.10f; // > 0.5 disables XTC
float tfs_z = 1.00f; // 1.0 = disabled
float typ_p = 1.00f; // typical_p, 1.0 = disabled
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
float dynatemp_range = 0.00f; // 0.0 = disabled
Expand All @@ -139,7 +138,6 @@ struct common_sampler_params {
std::vector<enum common_sampler_type> samplers = {
COMMON_SAMPLER_TYPE_DRY,
COMMON_SAMPLER_TYPE_TOP_K,
COMMON_SAMPLER_TYPE_TFS_Z,
COMMON_SAMPLER_TYPE_TYPICAL_P,
COMMON_SAMPLER_TYPE_TOP_P,
COMMON_SAMPLER_TYPE_MIN_P,
Expand Down
13 changes: 2 additions & 11 deletions common/sampling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,11 @@ std::string common_sampler_params::print() const {
snprintf(result, sizeof(result),
"\trepeat_last_n = %d, repeat_penalty = %.3f, frequency_penalty = %.3f, presence_penalty = %.3f\n"
"\tdry_multiplier = %.3f, dry_base = %.3f, dry_allowed_length = %d, dry_penalty_last_n = %d\n"
"\ttop_k = %d, tfs_z = %.3f, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, temp = %.3f\n"
"\ttop_k = %d, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, temp = %.3f\n"
"\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f",
penalty_last_n, penalty_repeat, penalty_freq, penalty_present,
dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n,
top_k, tfs_z, top_p, min_p, xtc_probability, xtc_threshold, typ_p, temp,
top_k, top_p, min_p, xtc_probability, xtc_threshold, typ_p, temp,
mirostat, mirostat_eta, mirostat_tau);

return std::string(result);
Expand Down Expand Up @@ -199,9 +199,6 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
case COMMON_SAMPLER_TYPE_XTC:
llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed));
break;
case COMMON_SAMPLER_TYPE_TFS_Z:
llama_sampler_chain_add(result->chain, llama_sampler_init_tail_free(params.tfs_z, params.min_keep));
break;
case COMMON_SAMPLER_TYPE_TYPICAL_P:
llama_sampler_chain_add(result->chain, llama_sampler_init_typical (params.typ_p, params.min_keep));
break;
Expand Down Expand Up @@ -373,7 +370,6 @@ char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
switch (cnstr) {
case COMMON_SAMPLER_TYPE_DRY: return 'd';
case COMMON_SAMPLER_TYPE_TOP_K: return 'k';
case COMMON_SAMPLER_TYPE_TFS_Z: return 'f';
case COMMON_SAMPLER_TYPE_TYPICAL_P: return 'y';
case COMMON_SAMPLER_TYPE_TOP_P: return 'p';
case COMMON_SAMPLER_TYPE_MIN_P: return 'm';
Expand All @@ -388,7 +384,6 @@ std::string common_sampler_type_to_str(enum common_sampler_type cnstr) {
switch (cnstr) {
case COMMON_SAMPLER_TYPE_DRY: return "dry";
case COMMON_SAMPLER_TYPE_TOP_K: return "top_k";
case COMMON_SAMPLER_TYPE_TFS_Z: return "tfs_z";
case COMMON_SAMPLER_TYPE_TYPICAL_P: return "typ_p";
case COMMON_SAMPLER_TYPE_TOP_P: return "top_p";
case COMMON_SAMPLER_TYPE_MIN_P: return "min_p";
Expand All @@ -406,7 +401,6 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
{ "top_p", COMMON_SAMPLER_TYPE_TOP_P },
{ "typ_p", COMMON_SAMPLER_TYPE_TYPICAL_P },
{ "min_p", COMMON_SAMPLER_TYPE_MIN_P },
{ "tfs_z", COMMON_SAMPLER_TYPE_TFS_Z },
{ "temperature", COMMON_SAMPLER_TYPE_TEMPERATURE },
{ "xtc", COMMON_SAMPLER_TYPE_XTC },
{ "infill", COMMON_SAMPLER_TYPE_INFILL },
Expand All @@ -423,8 +417,6 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
{ "typ-p", COMMON_SAMPLER_TYPE_TYPICAL_P },
{ "typ", COMMON_SAMPLER_TYPE_TYPICAL_P },
{ "min-p", COMMON_SAMPLER_TYPE_MIN_P },
{ "tfs-z", COMMON_SAMPLER_TYPE_TFS_Z },
{ "tfs", COMMON_SAMPLER_TYPE_TFS_Z },
{ "temp", COMMON_SAMPLER_TYPE_TEMPERATURE },
};

Expand Down Expand Up @@ -452,7 +444,6 @@ std::vector<common_sampler_type> common_sampler_types_from_chars(const std::stri
std::unordered_map<char, common_sampler_type> sampler_name_map = {
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_DRY), COMMON_SAMPLER_TYPE_DRY },
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TOP_K), COMMON_SAMPLER_TYPE_TOP_K },
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TFS_Z), COMMON_SAMPLER_TYPE_TFS_Z },
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TYPICAL_P), COMMON_SAMPLER_TYPE_TYPICAL_P },
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TOP_P), COMMON_SAMPLER_TYPE_TOP_P },
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_MIN_P), COMMON_SAMPLER_TYPE_MIN_P },
Expand Down
8 changes: 0 additions & 8 deletions examples/main/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -235,14 +235,6 @@ The Min-P sampling method was designed as an alternative to Top-P, and aims to e

Example usage: `--min-p 0.05`

### Tail-Free Sampling (TFS)

- `--tfs N`: Enable tail free sampling with parameter z (default: 1.0, 1.0 = disabled).

Tail-free sampling (TFS) is a text generation technique that aims to reduce the impact of less likely tokens, which may be less relevant, less coherent, or nonsensical, on the output. Similar to Top-P it tries to determine the bulk of the most likely tokens dynamically. But TFS filters out logits based on the second derivative of their probabilities. Adding tokens is stopped after the sum of the second derivatives reaches the parameter z. In short: TFS looks at how quickly the probabilities of the tokens decrease and cuts off the tail of unlikely tokens using the parameter z. Typical values for z are in the range of 0.9 to 0.95. A value of 1.0 would include all tokens and thus disables the effect of TFS.

Example usage: `--tfs 0.95`

### Locally Typical Sampling

- `--typical N`: Enable locally typical sampling with parameter p (default: 1.0, 1.0 = disabled).
Expand Down
11 changes: 3 additions & 8 deletions examples/server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ The project is under active development, and we are [looking for feedback and co

| Argument | Explanation |
| -------- | ----------- |
| `--samplers SAMPLERS` | samplers that will be used for generation in the order, separated by ';'<br/>(default: top_k;tfs_z;typ_p;top_p;min_p;temperature) |
| `--samplers SAMPLERS` | samplers that will be used for generation in the order, separated by ';'<br/>(default: top_k;typ_p;top_p;min_p;temperature) |
| `-s, --seed SEED` | RNG seed (default: -1, use random seed for -1) |
| `--sampling-seq SEQUENCE` | simplified sequence for samplers that will be used (default: kfypmt) |
| `--ignore-eos` | ignore end of stream token and continue generating (implies --logit-bias EOS-inf) |
Expand All @@ -108,7 +108,6 @@ The project is under active development, and we are [looking for feedback and co
| `--top-k N` | top-k sampling (default: 40, 0 = disabled) |
| `--top-p N` | top-p sampling (default: 0.9, 1.0 = disabled) |
| `--min-p N` | min-p sampling (default: 0.1, 0.0 = disabled) |
| `--tfs N` | tail free sampling, parameter z (default: 1.0, 1.0 = disabled) |
| `--typical N` | locally typical sampling, parameter p (default: 1.0, 1.0 = disabled) |
| `--repeat-last-n N` | last n tokens to consider for penalize (default: 64, 0 = disabled, -1 = ctx_size) |
| `--repeat-penalty N` | penalize repeat sequence of tokens (default: 1.0, 1.0 = disabled) |
Expand All @@ -121,7 +120,7 @@ The project is under active development, and we are [looking for feedback and co
| `--dry-sequence-breaker STRING` | add sequence breaker for DRY sampling, clearing out default breakers (`['\n', ':', '"', '*']`) in the process; use `"none"` to not use any sequence breakers
| `--dynatemp-range N` | dynamic temperature range (default: 0.0, 0.0 = disabled) |
| `--dynatemp-exp N` | dynamic temperature exponent (default: 1.0) |
| `--mirostat N` | use Mirostat sampling.<br/>Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.<br/>(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0) |
| `--mirostat N` | use Mirostat sampling.<br/>Top K, Nucleus and Locally Typical samplers are ignored if used.<br/>(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0) |
| `--mirostat-lr N` | Mirostat learning rate, parameter eta (default: 0.1) |
| `--mirostat-ent N` | Mirostat target entropy, parameter tau (default: 5.0) |
| `-l, --logit-bias TOKEN_ID(+/-)BIAS` | modifies the likelihood of token appearing in the completion,<br/>i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',<br/>or `--logit-bias 15043-1` to decrease likelihood of token ' Hello' |
Expand Down Expand Up @@ -360,8 +359,6 @@ node index.js
`stop`: Specify a JSON array of stopping strings.
These words will not be included in the completion, so make sure to add them to the prompt for the next iteration. Default: `[]`

`tfs_z`: Enable tail free sampling with parameter z. Default: `1.0`, which is disabled.

`typical_p`: Enable locally typical sampling with parameter p. Default: `1.0`, which is disabled.

`repeat_penalty`: Control the repetition of token sequences in the generated text. Default: `1.1`
Expand Down Expand Up @@ -412,7 +409,7 @@ node index.js

`cache_prompt`: Re-use KV cache from a previous request if possible. This way the common prefix does not have to be re-processed, only the suffix that differs between the requests. Because (depending on the backend) the logits are **not** guaranteed to be bit-for-bit identical for different batch sizes (prompt processing vs. token generation) enabling this option can cause nondeterministic results. Default: `false`

`samplers`: The order the samplers should be applied in. An array of strings representing sampler type names. If a sampler is not set, it will not be used. If a sampler is specified more than once, it will be applied multiple times. Default: `["top_k", "tfs_z", "typical_p", "top_p", "min_p", "temperature"]` - these are all the available values.
`samplers`: The order the samplers should be applied in. An array of strings representing sampler type names. If a sampler is not set, it will not be used. If a sampler is specified more than once, it will be applied multiple times. Default: `["top_k", "typical_p", "top_p", "min_p", "temperature"]` - these are all the available values.

**Response format**

Expand Down Expand Up @@ -738,7 +735,6 @@ Example:
"repeat_penalty": 1.100000023841858,
"samplers": [
"top_k",
"tfs_z",
"typical_p",
"top_p",
"min_p",
Expand All @@ -752,7 +748,6 @@ Example:
"stream": false,
"task_id": 0,
"temperature": 0.0,
"tfs_z": 1.0,
"top_k": 40,
"top_p": 0.949999988079071,
"typical_p": 1.0
Expand Down
3 changes: 0 additions & 3 deletions examples/server/public/index-new.html
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
min_p: 0.05, // 0 = disabled; recommended for non-english: ~ 0.4
xtc_probability: 0.0, // 0 = disabled;
xtc_threshold: 0.1, // > 0.5 disables XTC;
tfs_z: 1.0, // 1.0 = disabled
typical_p: 1.0, // 1.0 = disabled
presence_penalty: 0.0, // 0.0 = disabled
frequency_penalty: 0.0, // 0.0 = disabled
Expand Down Expand Up @@ -847,7 +846,6 @@
${FloatField({ label: "DRY Base", title: "Set the DRY repetition penalty base value. Default is 1.75", max: 3.0, min: 1.0, name: "dry_base", step: 0.01, value: params.value.dry_base })}
${IntField({ label: "DRY Allowed Length", title: "Tokens that extend repetition beyond this receive exponentially increasing penalty. Default is 2", max: 10, min: 1, step: 1, name: "dry_allowed_length", value: params.value.dry_allowed_length })}
${IntField({ label: "DRY Penalty Last N", title: "How many tokens to scan for repetitions. Default is -1, where 0 is disabled and -1 is context size", max: 2048, min: -1, step: 16, name: "dry_penalty_last_n", value: params.value.dry_penalty_last_n })}
${FloatField({ label: "TFS-Z", title: "Activates tail-free sampling, a method used to limit the prediction of tokens that are too frequent. The parameter z controls the strength of this limitation. A value of 1.0 means that this function is deactivated.", max: 1.0, min: 0.0, name: "tfs_z", step: 0.01, value: params.value.tfs_z })}
${IntField({ label: "Min Keep", title: "If greater than 0, samplers are forced to return N possible tokens at minimum. Default is 0", max: 10, min: 0, name: "min_keep", value: params.value.min_keep })}
</fieldset>

Expand Down Expand Up @@ -1147,7 +1145,6 @@ <h2>llama.cpp</h2>
xtc_probability: { snapValue: 0.0, snapRangeMultiplier: 4 },
xtc_threshold: { snapValue: 0.5, snapRangeMultiplier: 4 },
top_p: { snapValue: 1.0, snapRangeMultiplier: 4 },
tfs_z: { snapValue: 1.0, snapRangeMultiplier: 4 },
typical_p: { snapValue: 1.0, snapRangeMultiplier: 4 },
repeat_penalty: { snapValue: 1.0, snapRangeMultiplier: 4 },
presence_penalty: { snapValue: 0.0, snapRangeMultiplier: 4 },
Expand Down
2 changes: 0 additions & 2 deletions examples/server/public/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,6 @@
min_p: 0.05, // 0 = disabled
xtc_probability: 0.0, // 0 = disabled;
xtc_threshold: 0.1, // > 0.5 disables XTC;
tfs_z: 1.0, // 1.0 = disabled
typical_p: 1.0, // 1.0 = disabled
presence_penalty: 0.0, // 0.0 = disabled
frequency_penalty: 0.0, // 0.0 = disabled
Expand Down Expand Up @@ -1015,7 +1014,6 @@
<details>
<summary>More options</summary>
<fieldset class="two">
${FloatField({ label: "TFS-Z", max: 1.0, min: 0.0, name: "tfs_z", step: 0.01, value: params.value.tfs_z })}
${FloatField({ label: "Typical P", max: 1.0, min: 0.0, name: "typical_p", step: 0.01, value: params.value.typical_p })}
${FloatField({ label: "Presence penalty", max: 1.0, min: 0.0, name: "presence_penalty", step: 0.01, value: params.value.presence_penalty })}
${FloatField({ label: "Frequency penalty", max: 1.0, min: 0.0, name: "frequency_penalty", step: 0.01, value: params.value.frequency_penalty })}
Expand Down
2 changes: 0 additions & 2 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -809,7 +809,6 @@ struct server_context {
slot.sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
slot.sparams.xtc_probability = json_value(data, "xtc_probability", default_sparams.xtc_probability);
slot.sparams.xtc_threshold = json_value(data, "xtc_threshold", default_sparams.xtc_threshold);
slot.sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
slot.sparams.typ_p = json_value(data, "typical_p", default_sparams.typ_p);
slot.sparams.temp = json_value(data, "temperature", default_sparams.temp);
slot.sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
Expand Down Expand Up @@ -1149,7 +1148,6 @@ struct server_context {
{"min_p", slot.sparams.min_p},
{"xtc_probability", slot.sparams.xtc_probability},
{"xtc_threshold", slot.sparams.xtc_threshold},
{"tfs_z", slot.sparams.tfs_z},
{"typical_p", slot.sparams.typ_p},
{"repeat_last_n", slot.sparams.penalty_last_n},
{"repeat_penalty", slot.sparams.penalty_repeat},
Expand Down
2 changes: 0 additions & 2 deletions examples/server/themes/buttons-top/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,6 @@
top_k: 40, // <= 0 to use vocab size
top_p: 0.95, // 1.0 = disabled
min_p: 0.05, // 0 = disabled
tfs_z: 1.0, // 1.0 = disabled
typical_p: 1.0, // 1.0 = disabled
presence_penalty: 0.0, // 0.0 = disabled
frequency_penalty: 0.0, // 0.0 = disabled
Expand Down Expand Up @@ -788,7 +787,6 @@
<details>
<summary>More options</summary>
<fieldset class="two">
${FloatField({ label: "TFS-Z", max: 1.0, min: 0.0, name: "tfs_z", step: 0.01, value: params.value.tfs_z })}
${FloatField({ label: "Typical P", max: 1.0, min: 0.0, name: "typical_p", step: 0.01, value: params.value.typical_p })}
${FloatField({ label: "Presence penalty", max: 1.0, min: 0.0, name: "presence_penalty", step: 0.01, value: params.value.presence_penalty })}
${FloatField({ label: "Frequency penalty", max: 1.0, min: 0.0, name: "frequency_penalty", step: 0.01, value: params.value.frequency_penalty })}
Expand Down
2 changes: 0 additions & 2 deletions examples/server/themes/wild/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,6 @@
top_k: 40, // <= 0 to use vocab size
top_p: 0.95, // 1.0 = disabled
min_p: 0.05, // 0 = disabled
tfs_z: 1.0, // 1.0 = disabled
typical_p: 1.0, // 1.0 = disabled
presence_penalty: 0.0, // 0.0 = disabled
frequency_penalty: 0.0, // 0.0 = disabled
Expand Down Expand Up @@ -791,7 +790,6 @@
<details>
<summary>More options</summary>
<fieldset class="two">
${FloatField({ label: "TFS-Z", max: 1.0, min: 0.0, name: "tfs_z", step: 0.01, value: params.value.tfs_z })}
${FloatField({ label: "Typical P", max: 1.0, min: 0.0, name: "typical_p", step: 0.01, value: params.value.typical_p })}
${FloatField({ label: "Presence penalty", max: 1.0, min: 0.0, name: "presence_penalty", step: 0.01, value: params.value.presence_penalty })}
${FloatField({ label: "Frequency penalty", max: 1.0, min: 0.0, name: "frequency_penalty", step: 0.01, value: params.value.frequency_penalty })}
Expand Down
Loading
Loading