huggingface · ErikKaum · Jul 19, 2024 · Jul 11, 2024 · Jul 11, 2024 · Jul 11, 2024
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/docs/source/basic_tutorials/launcher.md b/docs/source/basic_tutorials/launcher.md
@@ -424,6 +424,23 @@ Options:
 
           [env: LORA_ADAPTERS=]
 
+```
+## DISABLE_USAGE_STATS
+```shell
+      --disable-usage-stats
+          Disable sending of all usage statistics
+
+          [env: DISABLE_USAGE_STATS=]
+
+```
+## DISABLE_CRASH_REPORTS
+```shell
+      --disable-crash-reports
+          Disable sending of crash reports, but allow anonymous usage statistics
+
+          [env: DISABLE_CRASH_REPORTS=]
+
+
 ```
 ## HELP
 ```shell

diff --git a/docs/source/usage_statistics.md b/docs/source/usage_statistics.md
@@ -0,0 +1,73 @@
+
+# Collection of Usage Statistics
+
+Text Generation Inference collects anonymous usage statistics to help us improve the service. The collected data is used to improve TGI and to understand what causes failures. The data is collected transparently and any sensitive information is omitted.
+
+Data is sent twice, once on server startup and once when server stops. Also, usage statistics are only enabled when TGI is running in docker to avoid collecting data then TGI runs directly on the host machine.
+
+## What data is collected
+
+The code that collects the data is available [here](https://github.com/huggingface/text-generation-inference/blob/main/router/src/usage_stats.rs).
+As of release 2.1.2 this is an example of the data collected:
+
+- From the TGI configuration:
+```json
+{
+  "event_type": "start",
+  "disable_grammar_support": false,
+  "max_batch_prefill_tokens": 4096,
+  "max_batch_size": null,
+  "max_batch_total_tokens": null,
+  "max_best_of": 2,
+  "max_client_batch_size": 4,
+  "max_concurrent_requests": 128,
+  "max_input_tokens": 1024,
+  "max_stop_sequences": 4,
+  "max_top_n_tokens": 5,
+  "max_total_tokens": 2048,
+  "max_waiting_tokens": 20,
+  "messages_api_enabled": false,
+  "model_config": {
+    "model_type": "Bloom"
+  },
+  "revision": null,
+  "tokenizer_class": "BloomTokenizerFast",
+  "validation_workers": 2,
+  "waiting_served_ratio": 1.2,
+  "docker_label": "latest",
+  "git_sha": "cfc118704880453d29bcbe4fbbd91dda501cf5fe",
+  "nvidia_env": {
+    "name": "NVIDIA A10G",
+    "pci_bus_id": "00000000:00:1E.0",
+    "driver_version": "535.183.01",
+    "pstate": "P8",
+    "pcie_link_gen_max": "4",
+    "pcie_link_gen_current": "1",
+    "temperature_gpu": "31",
+    "utilization_gpu": "0 %",
+    "utilization_memory": "0 %",
+    "memory_total": "23028 MiB",
+    "memory_free": "22515 MiB",
+    "memory_used": "0 MiB",
+    "reset_status_reset_required": "No",
+    "reset_status_drain_and_reset_recommended": "No",
+    "compute_cap": "8.6",
+    "ecc_errors_corrected_volatile_total": "0",
+    "mig_mode_current": "[N/A]",
+    "power_draw_instant": "10.86 W",
+    "power_limit": "300.00 W"
+  },
+  "system_env": {
+    "cpu_count": 16,
+    "cpu_type": "AMD EPYC 7R32",
+    "total_memory": 66681196544,
+    "architecture": "x86_64",
+    "platform": "linux-unix-x86_64"
+  }
+}
+
+```
+
+## How to opt-out
+
+You can easily opt out by passing the `--disable-usage-stats` to the text-generation-launcher command. This will disable all usage statistics. You can also pass `--disable-crash-reports` which disables sending specific crash reports, but allows anonymous usage statistics.
diff --git a/launcher/src/main.rs b/launcher/src/main.rs
@@ -457,6 +457,14 @@ struct Args {
     /// startup that will be available to callers via the `adapter_id` field in a request.
     #[clap(long, env)]
     lora_adapters: Option<String>,
+
+    /// Disable sending of all usage statistics
+    #[clap(default_value = "false", long, env)]
+    disable_usage_stats: bool,
+
+    /// Disable sending of crash reports, but allow anonymous usage statistics
+    #[clap(default_value = "false", long, env)]
+    disable_crash_reports: bool,
 }
 
 #[derive(Debug)]
@@ -1201,6 +1209,14 @@ fn spawn_webserver(
         args.model_id,
     ];
 
+    // Pass usage stats flags to router
+    if args.disable_usage_stats {
+        router_args.push("--disable-usage-stats".to_string());
+    }
+    if args.disable_crash_reports {
+        router_args.push("--disable-crash-reports".to_string());
+    }
+
     // Grammar support
     if args.disable_grammar_support {
         router_args.push("--disable-grammar-support".to_string());

diff --git a/router/Cargo.toml b/router/Cargo.toml
@@ -52,6 +52,10 @@ regex = "1.10.3"
 once_cell = "1.19.0"
 image = "0.25.1"
 base64 = { workspace = true }
+sysinfo = "0.30.13"
+uuid = { version = "1.9.1", default-features = false, features = ["v4", "fast-rng", "macro-diagnostics"] }
+csv = "1.3.0"
+
 
 [build-dependencies]
 vergen = { version = "8.2.5", features = ["build", "git", "gitcl"] }

diff --git a/router/src/lib.rs b/router/src/lib.rs
@@ -7,6 +7,8 @@ mod validation;
 #[cfg(feature = "kserve")]
 mod kserve;
 
+pub mod usage_stats;
+
 use serde::{Deserialize, Serialize};
 use tracing::warn;
 use utoipa::ToSchema;
@@ -40,13 +42,13 @@ pub struct HubModelInfo {
     pub pipeline_tag: Option<String>,
 }
 
-#[derive(Debug, Clone, Deserialize, PartialEq)]
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 pub struct ChatTemplate {
     name: String,
     template: String,
 }
 
-#[derive(Debug, Clone, Deserialize, PartialEq)]
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 #[serde(untagged)]
 pub enum ChatTemplateVersions {
     Single(String),
@@ -55,7 +57,7 @@ pub enum ChatTemplateVersions {
 
 use std::path::Path;
 
-#[derive(Debug, Clone, Deserialize, Default)]
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
 pub struct HubTokenizerConfig {
     pub chat_template: Option<ChatTemplateVersions>,
     pub completion_template: Option<String>,