Skip to content

Commit

Permalink
fix intel gpu peak mem
Browse files Browse the repository at this point in the history
  • Loading branch information
sunjiweiswift committed Nov 14, 2024
1 parent d769d90 commit 5fb782a
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion torchao/_models/llama/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,10 @@ def callback(x):

tokpersec = torch.mean(torch.tensor(aggregate_metrics['tokens_per_sec'])).item()
bandwidth = model_size * tokpersec
mem = torch.cuda.max_memory_reserved() /1e9
if device == "cuda":
mem = torch.cuda.max_memory_reserved() /1e9
elif device == "xpu":
mem = torch.xpu.max_memory_reserved() /1e9
print(f"Average tokens/sec: {tokpersec:.2f}")
if batch_size > 1:
print(f"Average tokens/sec including batches {batch_size*tokpersec:.2f}")
Expand Down

0 comments on commit 5fb782a

Please sign in to comment.