fix intel gpu peak mem

pytorch · Nov 14, 2024 · 5fb782a · 5fb782a
1 parent d769d90
commit 5fb782a
Showing 1 changed file with 4 additions and 1 deletion.
diff --git a/torchao/_models/llama/generate.py b/torchao/_models/llama/generate.py
@@ -428,7 +428,10 @@ def callback(x):
 
     tokpersec = torch.mean(torch.tensor(aggregate_metrics['tokens_per_sec'])).item()
     bandwidth = model_size * tokpersec
-    mem = torch.cuda.max_memory_reserved() /1e9
+    if device == "cuda": 
+        mem = torch.cuda.max_memory_reserved() /1e9
+    elif device == "xpu":
+        mem = torch.xpu.max_memory_reserved() /1e9
     print(f"Average tokens/sec: {tokpersec:.2f}")
     if batch_size > 1:
         print(f"Average tokens/sec including batches {batch_size*tokpersec:.2f}")