From 8be172ce0fd8b8a1a620e43626284dfb626d517b Mon Sep 17 00:00:00 2001 From: macie Date: Tue, 2 Jan 2024 22:38:45 +0100 Subject: [PATCH] feat: Turn off sampling by default `llama.cpp` offers multiple sampling methods. To get predictable results we will use temperature and min_p only. --- llama/client.go | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/llama/client.go b/llama/client.go index eb6f258..3288e84 100644 --- a/llama/client.go +++ b/llama/client.go @@ -28,6 +28,8 @@ type completionRequest struct { TopP float32 `json:"top_p"` Seed int `json:"seed"` WithoutNewlines bool `json:"penalize_nl"` + RepeatPenalty float32 `json:"repeat_penalty"` + RepeatLastN int `json:"repeat_last_n"` PredictNum int `json:"n_predict"` Streaming bool `json:"stream"` } @@ -61,12 +63,14 @@ func (c *Client) Complete(ctx context.Context, p Prompt) (chan string, error) { } req := completionRequest{ Prompt: p.String(), - Temp: c.Options.Temp, - TopK: 40, - MinP: c.Options.MinP, - TopP: 0.0, - Seed: -1, - PredictNum: 150, + Temp: c.Options.Temp, // 1.0 means disable + TopK: 0, // 0 means disable + MinP: c.Options.MinP, // 0.0 means disable + TopP: 1.0, // 1.0 means disable + Seed: 5489, // -1 means random seed + PredictNum: -1, // -1 means infinite + RepeatPenalty: 1.0, // 1.0 means disable + RepeatLastN: 0, // 0 means disable Streaming: true, WithoutNewlines: false, }