From d930724e82cd5368796e95456a2893ad174a062d Mon Sep 17 00:00:00 2001 From: drbh Date: Fri, 21 Jun 2024 14:28:26 -0400 Subject: [PATCH] feat: sort cuda graphs in descending order (#2104) --- server/text_generation_server/models/globals.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/server/text_generation_server/models/globals.py b/server/text_generation_server/models/globals.py index 11a9f030faf..970a673b179 100644 --- a/server/text_generation_server/models/globals.py +++ b/server/text_generation_server/models/globals.py @@ -15,6 +15,13 @@ else: cuda_graphs = None + +# sorting the cuda graphs in descending order helps reduce the +# memory impact and results in less memory usage +if cuda_graphs is not None: + cuda_graphs.sort(reverse=True) + + CUDA_GRAPHS = cuda_graphs # This is overridden at model loading.