Update run_llm_inference.sh with recommended models.

PiperOrigin-RevId: 698802697
google-ai-edge · Nov 21, 2024 · df03022 · df03022
1 parent 2fab0dd
commit df03022
Showing 1 changed file with 6 additions and 0 deletions.
diff --git a/run_llm_inference.sh b/run_llm_inference.sh
@@ -2,6 +2,12 @@
 
 # This is a simple script to run LLM inference on Android via the MediaPipe
 # LLM inference engine.
+#
+# This script allows running transformer-based LLM models in *.task or *.bin
+# format. We recommend using `gemma2-2b-it-cpu-int8.task` (from
+# https://www.kaggle.com/models/google/gemma-2/tfLite/gemma2-2b-it-cpu-int8) or
+# the smaller `gemma-1.1-2b-it-cpu-int4.bin` model (from
+# https://www.kaggle.com/models/google/gemma/tfLite/gemma-1.1-2b-it-cpu-int4).
 
 MODEL_FILENAME="gemma2-2b-it-cpu-int8.task"
 ADB_WORK_DIR="/data/local/tmp"