From 8be172ce0fd8b8a1a620e43626284dfb626d517b Mon Sep 17 00:00:00 2001
From: macie <macie@users.noreply.github.com>
Date: Tue, 2 Jan 2024 22:38:45 +0100
Subject: [PATCH] feat: Turn off sampling by default

`llama.cpp` offers multiple sampling methods. To get predictable results
we will use temperature and min_p only.
---
 llama/client.go | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/llama/client.go b/llama/client.go
index eb6f258..3288e84 100644
--- a/llama/client.go
+++ b/llama/client.go
@@ -28,6 +28,8 @@ type completionRequest struct {
 	TopP            float32 `json:"top_p"`
 	Seed            int     `json:"seed"`
 	WithoutNewlines bool    `json:"penalize_nl"`
+	RepeatPenalty   float32 `json:"repeat_penalty"`
+	RepeatLastN     int     `json:"repeat_last_n"`
 	PredictNum      int     `json:"n_predict"`
 	Streaming       bool    `json:"stream"`
 }
@@ -61,12 +63,14 @@ func (c *Client) Complete(ctx context.Context, p Prompt) (chan string, error) {
 	}
 	req := completionRequest{
 		Prompt:          p.String(),
-		Temp:            c.Options.Temp,
-		TopK:            40,
-		MinP:            c.Options.MinP,
-		TopP:            0.0,
-		Seed:            -1,
-		PredictNum:      150,
+		Temp:            c.Options.Temp, // 1.0 means disable
+		TopK:            0,              // 0 means disable
+		MinP:            c.Options.MinP, // 0.0 means disable
+		TopP:            1.0,            // 1.0 means disable
+		Seed:            5489,           // -1 means random seed
+		PredictNum:      -1,             // -1 means infinite
+		RepeatPenalty:   1.0,            // 1.0 means disable
+		RepeatLastN:     0,              // 0 means disable
 		Streaming:       true,
 		WithoutNewlines: false,
 	}