diff --git a/.gitignore b/.gitignore index 5ae030200f897a..24efa85629b2c4 100644 --- a/.gitignore +++ b/.gitignore @@ -129,3 +129,6 @@ poetry.toml # Scripts !/scripts/install-oneapi.bat + +# Test models for lora adapters +/reduce-llms-for-testing diff --git a/tests/test_lora_conversion_and_inference.sh b/tests/test_lora_conversion_and_inference.sh new file mode 100755 index 00000000000000..76d293d3c44531 --- /dev/null +++ b/tests/test_lora_conversion_and_inference.sh @@ -0,0 +1,127 @@ +#!/bin/bash +set -e + +# Array of models to iterate over +declare -a params=( + # "Gemma2ForCausalLM 64" TODO Add this model + "LlamaForCausalLM 64" + "Phi3ForCausalLM 64" +) + +MODELS_REPO=reduce-llms-for-testing +MODELS_REPO_URL=https://huggingface.co/ltoniazzi/$MODELS_REPO + +# Clone the Hugging Face repository if the directory does not exist +if [ ! -d "$MODELS_REPO" ]; then + echo "Cloning the Hugging Face repository..." + git clone $MODELS_REPO_URL +else + echo "Repository already exists. Skipping clone." +fi + +# Load the expected starting strings from the text file +EXPECTED_BASE=$(cat $MODELS_REPO/data/pale_blue_dot.txt) +EXPECTED_LORA_HOT=$(cat $MODELS_REPO/data/bohemian_rhapsody.txt) +EXPECTED_LORA_MERGED=$(cat $MODELS_REPO/data/bohemian_rhapsody.txt) + +# Declare a regular array to store results +results=() + +trim_leading_whitespace() { + local input_string="$1" + # Remove leading whitespace + echo "${input_string#"${input_string%%[![:space:]]*}"}" +} + +extract_starting_substring() { + local reference_string="$1" + local target_string="$2" + + local target_length=${#target_string} + echo "${reference_string:0:$target_length}" +} + +run_conversion_and_inference_lora() { + local model_name=$1 + local size_matrix=$2 + + # Convert safetensors to gguf + echo "Running convert_hf_to_gguf.py for $model_name with size $size_matrix..." + python convert_hf_to_gguf.py $MODELS_REPO/$model_name/size=$size_matrix/base \ + --outfile $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32.gguf \ + --outtype f32 + + echo "Running convert_lora_to_gguf.py for $model_name with size $size_matrix..." + python3 convert_lora_to_gguf.py $MODELS_REPO/$model_name/size=$size_matrix/lora \ + --base $MODELS_REPO/$model_name/size=$size_matrix/base \ + --outtype f32 + + echo "Running llama-export-lora with lora for $model_name with size $size_matrix..." + ./llama-export-lora \ + -m $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32.gguf \ + -o $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32-lora-merged.gguf \ + --lora $MODELS_REPO/$model_name/size=$size_matrix/lora/Lora-F32-LoRA.gguf + + # Run inference + echo "Running llama-cli without lora for $model_name with size $size_matrix..." + OUTPUT_BASE=$(./llama-cli -m $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32.gguf \ + -p "Look again at that dot." -n 50 --seed 42 --temp 0) + + echo "Running llama-cli with lora for $model_name with size $size_matrix..." + OUTPUT_LORA_HOT=$(./llama-cli -m $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32.gguf \ + --lora $MODELS_REPO/$model_name/size=$size_matrix/lora/Lora-F32-LoRA.gguf \ + -p "I see a little silhouetto" -n 50 --seed 42 --temp 0) + + echo "Running llama-cli with exported lora for $model_name with size $size_matrix..." + OUTPUT_LORA_MERGED=$(./llama-cli -m $MODELS_REPO/$model_name/size=$size_matrix/base/Base-F32-lora-merged.gguf \ + -p "I see a little silhouetto" -n 50 --seed 42 --temp 0) + + # Extract the corresponding substring from EXPECTED_BASE + # and remove initial white spaces in OUTPUT_BASE + OUTPUT_BASE=$(trim_leading_whitespace "$OUTPUT_BASE") + EXPECTED_BASE=$(extract_starting_substring "$EXPECTED_BASE" "$OUTPUT_BASE") + OUTPUT_LORA_HOT=$(trim_leading_whitespace "$OUTPUT_LORA_HOT") + EXPECTED_LORA_HOT=$(extract_starting_substring "$EXPECTED_LORA_HOT" "$OUTPUT_LORA_HOT") + OUTPUT_LORA_MERGED=$(trim_leading_whitespace "$OUTPUT_LORA_MERGED") + EXPECTED_LORA_MERGED=$(extract_starting_substring "$EXPECTED_LORA_MERGED" "$OUTPUT_LORA_MERGED") + + # Compare the actual output with the expected start + if [[ "$OUTPUT_BASE" != "$EXPECTED_BASE" ]]; then + echo "Error: $model_name OUTPUT_BASE does not start with the expected string." + echo -e "Out=$OUTPUT_BASE\n\nExp=$EXPECTED_BASE" + exit 1 + fi + if [[ "$OUTPUT_LORA_HOT" != "$EXPECTED_LORA_HOT" ]]; then + echo "Error: $model_name OUTPUT_LORA_HOT does not start with the expected string." + echo -e "Out=$OUTPUT_LORA_HOT\n\nExp=$EXPECTED_LORA_HOT" + exit 1 + fi + if [[ "$OUTPUT_LORA_MERGED" != "$EXPECTED_LORA_MERGED" ]]; then + echo "Error: $model_name OUTPUT_LORA_MERGED does not start with the expected string." + echo -e "Out=$OUTPUT_LORA_MERGED\n\nExp=$EXPECTED_LORA_MERGED" + exit 1 + fi + + + # Store the results in the regular array + results+=(" + \n\033[1mResults for $model_name with size $size_matrix:\033[0m + \n\033[32m • Base:\n$OUTPUT_BASE + \n\033[34m • Lora hot:\n$OUTPUT_LORA_HOT + \n\033[36m • Lora merged:\n$OUTPUT_LORA_MERGED + \n \033[0m + ") + + echo "All steps completed for $model_name with size $size_matrix!" +} + +# Run test for each model +for param in "${params[@]}"; do + run_conversion_and_inference_lora $param +done + +# Print all collected results +echo -e "\n\n\033[1mSummary of All Results:\033[0m" +for result in "${results[@]}"; do + echo -e "$result" +done