ggerganov · stduhpf · Sep 24, 2024 · Sep 24, 2024 · Sep 24, 2024 · Sep 24, 2024
diff --git a/common/arg.cpp b/common/arg.cpp
@@ -1633,7 +1633,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
             params.cvector_outfile = value;
             params.lora_outfile = value;
         }
-    ).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA}));
+    ).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA, LLAMA_EXAMPLE_COMPRESS}));
     add_opt(llama_arg(
         {"-ofreq", "--output-frequency"}, "N",
         format("output the imatrix every N iterations (default: %d)", params.n_out_freq),
@@ -1938,6 +1938,24 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
             else { std::invalid_argument("invalid value"); }
         }
     ).set_examples({LLAMA_EXAMPLE_BENCH}));
+    add_opt(llama_arg(
+        {"--compression_header_size"}, "N",
+        "Number of tokens to keep in header (default: 1)",
+        [](gpt_params & params, int value){
+            params.num_tokens_header = value;
+        }).set_examples({LLAMA_EXAMPLE_COMPRESS}));
+    add_opt(llama_arg(
+        {"--mode"}, "{compress,expand,test}",
+        "What task to run (default: test)",
+        [](gpt_params & params,  const std::string & value){
+            if (value == "test"){
+                return; }
+            else if (value == "compress"){
+                params.compress_mode = 1; }
+            else if (value == "expand"){
+                params.compress_mode = 2; }
+            else { std::invalid_argument("invalid value"); }
+        }).set_examples({LLAMA_EXAMPLE_COMPRESS}));
     add_opt(llama_arg(
         {"--log-disable"},
         "Log disable",

diff --git a/common/common.h b/common/common.h
@@ -80,6 +80,7 @@ enum llama_example {
     LLAMA_EXAMPLE_PARALLEL,
 
     LLAMA_EXAMPLE_COUNT,
+    LLAMA_EXAMPLE_COMPRESS
 };
 
 enum gpt_sampler_type {
@@ -340,6 +341,9 @@ struct gpt_params {
 
     // batched-bench params
     bool batched_bench_output_jsonl = false;
+
+    int num_tokens_header = 1;
+    int compress_mode = 0;
 };
 
 // call once at the start of a program if it uses libcommon

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -17,6 +17,7 @@ else()
     add_subdirectory(batched-bench)
     add_subdirectory(batched)
     add_subdirectory(benchmark)
+    add_subdirectory(compress)
     add_subdirectory(convert-llama2c-to-ggml)
     add_subdirectory(embedding)
     add_subdirectory(eval-callback)

diff --git a/examples/compress/CMakeLists.txt b/examples/compress/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(TARGET llama-compress)
+add_executable(${TARGET} compress.cpp)
+install(TARGETS ${TARGET} RUNTIME)
+target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
+target_compile_features(${TARGET} PRIVATE cxx_std_11)
diff --git a/examples/compress/README.md b/examples/compress/README.md
@@ -0,0 +1,3 @@
+# llama.cpp/examples/compress
+
+Demonstration of LLM-based natural language compression.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# llama.cpp/examples/compress

		Demonstration of LLM-based natural language compression.