Update available types for cache_type_k and cache_type_v (#134)

I've tested, based on [kv_cache_type_from_str](https://github.com/ngxson/wllama/blob/ac7dc45c2d4a99867eea589e9a30650015f8f52d/actions.hpp#L61-L78), and confirmed that all those types works. Tested with SmolLM2 360M Instruct model.
ngxson · Dec 3, 2024 · 4ff6b5f · 4ff6b5f
1 parent ac7dc45
commit 4ff6b5f
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/src/wllama.ts b/src/wllama.ts
@@ -88,8 +88,8 @@ export interface LoadModelConfig {
   yarn_orig_ctx?: number;
   // TODO: add group attention
   // optimizations
-  cache_type_k?: 'f16' | 'q8_0' | 'q4_0';
-  cache_type_v?: 'f16';
+  cache_type_k?: 'f32' | 'f16' | 'q8_0' | 'q5_1' | 'q5_0' | 'q4_1' | 'q4_0';
+  cache_type_v?: 'f32' | 'f16' | 'q8_0' | 'q5_1' | 'q5_0' | 'q4_1' | 'q4_0';
 }
 
 export interface SamplingConfig {