diff --git a/python/src/aiconfig/eval/promptfoo/README.md b/python/src/aiconfig/eval/promptfoo/README.md new file mode 100644 index 000000000..9d1c2a7ec --- /dev/null +++ b/python/src/aiconfig/eval/promptfoo/README.md @@ -0,0 +1,30 @@ +# Promptfoo integration + +Use case: I'm a SWE who wants to run my AIConfig against a set of test cases specified in a config file. Each test case has the input and a success condition of my choosing. + +## Philosophy / design + +Prompfoo has a pretty nice interface (both input and outputs) for addressing the use case. Tests are specified in a yaml file and the test suite can be run with a simple command. The same config file makes it easy to connect your test suite to an AI config with a small amount of code. + +## How-to guide + +1. Write your test cases in a Promptfoo config file. See examples/travel/travel_promtfooconfig.yaml as an example. +2. Define an AIConfig test suite settings file. It should have the prompt name and path to your aiconfig. See examples/travel/travel_aiconfig_test_suite_settings.json for example. +3. Set your provider to point to run_aiconfig.py with your settings file as the argument. For e.g. see examples/travel/travel_promtfooconfig.yaml. Like this: + +``` +providers: + - exec:python ../../run_aiconfig.py ./travel_aiconfig_test_suite_settings.json +``` + +4. export your provider API key if needed so it's available to subprocess environments: + `export OPENAI_API_KEY=...` + +5. Make sure your shell environment (including subshells) contains a python3 executable called `python` on its path. One way to do this is to set up an anaconda/miniconda environment. + +6. Run `npx promptfoo@latest eval -c path/to/promptfooconfig.yaml` +You should see one passing test and one failing test. + +### Debugging / Troubleshooting +Help, something went wrong! +-> Try adding --verbose flag at the end of the `npx` command. \ No newline at end of file diff --git a/python/src/aiconfig/eval/promptfoo/examples/travel/travel_aiconfig_test_suite_settings.json b/python/src/aiconfig/eval/promptfoo/examples/travel/travel_aiconfig_test_suite_settings.json new file mode 100644 index 000000000..eaa60e745 --- /dev/null +++ b/python/src/aiconfig/eval/promptfoo/examples/travel/travel_aiconfig_test_suite_settings.json @@ -0,0 +1,4 @@ +{ + "prompt_name": "get_activities", + "aiconfig_path": "travel_parametrized_for_testing.aiconfig.json" +} diff --git a/python/src/aiconfig/eval/promptfoo/examples/travel/travel_parametrized_for_testing.aiconfig.json b/python/src/aiconfig/eval/promptfoo/examples/travel/travel_parametrized_for_testing.aiconfig.json new file mode 100644 index 000000000..555a571cd --- /dev/null +++ b/python/src/aiconfig/eval/promptfoo/examples/travel/travel_parametrized_for_testing.aiconfig.json @@ -0,0 +1,36 @@ +{ + "name": "NYC Trip Planner", + "description": "Intrepid explorer with ChatGPT and AIConfig", + "schema_version": "latest", + "metadata": { + "models": { + "gpt-3.5-turbo": { + "model": "gpt-3.5-turbo", + "top_p": 1, + "temperature": 1 + }, + "gpt-4": { + "model": "gpt-4", + "max_tokens": 3000, + "system_prompt": "You are an expert travel coordinator with exquisite taste." + } + }, + "default_model": "gpt-3.5-turbo" + }, + "prompts": [ + { + "name": "get_activities", + "input": "{{the_query}}" + }, + { + "name": "gen_itinerary", + "input": "Generate an itinerary ordered by {{order_by}} for these activities: {{get_activities.output}}.", + "metadata": { + "model": "gpt-4", + "parameters": { + "order_by": "geographic location" + } + } + } + ] +} diff --git a/python/src/aiconfig/eval/promptfoo/examples/travel/travel_promptfooconfig.yaml b/python/src/aiconfig/eval/promptfoo/examples/travel/travel_promptfooconfig.yaml new file mode 100644 index 000000000..95513b5bf --- /dev/null +++ b/python/src/aiconfig/eval/promptfoo/examples/travel/travel_promptfooconfig.yaml @@ -0,0 +1,16 @@ +prompts: ["{{question}}"] +providers: + - exec:python ../../run_aiconfig.py ./travel_aiconfig_test_suite_settings.json +tests: + - description: "Test if output is equal to the expected value" + vars: + question: Empire State Building is on fifth avenue. What is the cross street? + assert: + - type: python + value: output.lower().find('34th street') != -1 + - description: "Test if output is equal to the expected value" + vars: + question: "which is the best borough?" + assert: + - type: python + value: output == "Brooklyn" diff --git a/python/src/aiconfig/eval/promptfoo/run_aiconfig.py b/python/src/aiconfig/eval/promptfoo/run_aiconfig.py new file mode 100644 index 000000000..36aed54ce --- /dev/null +++ b/python/src/aiconfig/eval/promptfoo/run_aiconfig.py @@ -0,0 +1,41 @@ +import asyncio +import os + +import openai +from dotenv import load_dotenv + +from aiconfig import AIConfigRuntime +import sys +import json +from typing import Any + + +async def main(): + settings_path = sys.argv[1] + settings = _load_settings(settings_path) + + question = sys.argv[2] + prompt_name = settings["prompt_name"] + aiconfig_path = settings["aiconfig_path"] + + load_dotenv() + openai.api_key = os.getenv("OPENAI_API_KEY") + runtime = AIConfigRuntime.load(aiconfig_path) + + params = { + "the_query": question, + } + + result = await runtime.run(prompt_name, params) + final_output = runtime.get_output_text(prompt_name, result[0]) + print(final_output) + + +def _load_settings(settings_path: str) -> dict[str, Any]: + with open(settings_path) as f: + settings = json.load(f) + return settings + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/typescript/scripts/eval/promptfoo/README.md b/typescript/scripts/eval/promptfoo/README.md new file mode 100644 index 000000000..6cfc58f01 --- /dev/null +++ b/typescript/scripts/eval/promptfoo/README.md @@ -0,0 +1,30 @@ +# Promptfoo integration + +Use case: I'm a SWE who wants to run my AIConfig against a set of test cases specified in a config file. Each test case has the input and a success condition of my choosing. + +## Philosophy / design + +Prompfoo has a pretty nice interface (both input and outputs) for addressing the use case. Tests are specified in a yaml file and the test suite can be run with a simple command. The same config file makes it easy to connect your test suite to an AI config with a small amount of code. + +## How-to guide + +1. Write your test cases in a Promptfoo config file. See examples/travel/travel_promtfooconfig.yaml as an example. +2. Define an AIConfig test suite settings file. It should have the prompt name and path to your aiconfig. See examples/travel/travel_aiconfig_test_suite_settings.json for example. +3. Set your provider to point to run_aiconfig.ts with your settings file as the argument. For e.g. see examples/travel/travel_promtfooconfig.yaml. Like this: + +``` +providers: + - exec:npx ts-node ../../run_aiconfig.ts ./travel_aiconfig_test_suite_settings.json + +``` + +4. export your provider API key if needed so it's available to subprocess environments: + `export OPENAI_API_KEY=...` + +5. Run `cd typescript; npx promptfoo@latest eval -c path/to/promptfooconfig.yaml` + You should see one passing test and one failing test. + +### Debugging / Troubleshooting + +Help, something went wrong! +-> Try adding --verbose flag at the end of the `npx` command. diff --git a/typescript/scripts/eval/promptfoo/examples/travel/travel_aiconfig_test_suite_settings.json b/typescript/scripts/eval/promptfoo/examples/travel/travel_aiconfig_test_suite_settings.json new file mode 100644 index 000000000..6cb5eb3a3 --- /dev/null +++ b/typescript/scripts/eval/promptfoo/examples/travel/travel_aiconfig_test_suite_settings.json @@ -0,0 +1,4 @@ +{ + "prompt_name": "get_activities", + "aiconfig_path": "examples/travel/travel_parametrized_for_testing.aiconfig.json" +} diff --git a/typescript/scripts/eval/promptfoo/examples/travel/travel_parametrized_for_testing.aiconfig.json b/typescript/scripts/eval/promptfoo/examples/travel/travel_parametrized_for_testing.aiconfig.json new file mode 100644 index 000000000..555a571cd --- /dev/null +++ b/typescript/scripts/eval/promptfoo/examples/travel/travel_parametrized_for_testing.aiconfig.json @@ -0,0 +1,36 @@ +{ + "name": "NYC Trip Planner", + "description": "Intrepid explorer with ChatGPT and AIConfig", + "schema_version": "latest", + "metadata": { + "models": { + "gpt-3.5-turbo": { + "model": "gpt-3.5-turbo", + "top_p": 1, + "temperature": 1 + }, + "gpt-4": { + "model": "gpt-4", + "max_tokens": 3000, + "system_prompt": "You are an expert travel coordinator with exquisite taste." + } + }, + "default_model": "gpt-3.5-turbo" + }, + "prompts": [ + { + "name": "get_activities", + "input": "{{the_query}}" + }, + { + "name": "gen_itinerary", + "input": "Generate an itinerary ordered by {{order_by}} for these activities: {{get_activities.output}}.", + "metadata": { + "model": "gpt-4", + "parameters": { + "order_by": "geographic location" + } + } + } + ] +} diff --git a/typescript/scripts/eval/promptfoo/examples/travel/travel_promptfooconfig.yaml b/typescript/scripts/eval/promptfoo/examples/travel/travel_promptfooconfig.yaml new file mode 100644 index 000000000..dc1f20cf7 --- /dev/null +++ b/typescript/scripts/eval/promptfoo/examples/travel/travel_promptfooconfig.yaml @@ -0,0 +1,16 @@ +prompts: ["{{question}}"] +providers: + - exec:npx ts-node ../../run_aiconfig.ts ./travel_aiconfig_test_suite_settings.json +tests: + - description: "Test if output is equal to the expected value" + vars: + question: Empire State Building is on fifth avenue. What is the cross street? + assert: + - type: python + value: output.lower().find('34th street') != -1 + - description: "Test if output is equal to the expected value" + vars: + question: "which is the best borough?" + assert: + - type: python + value: output == "Brooklyn" diff --git a/typescript/scripts/eval/promptfoo/run_aiconfig.ts b/typescript/scripts/eval/promptfoo/run_aiconfig.ts new file mode 100644 index 000000000..026118770 --- /dev/null +++ b/typescript/scripts/eval/promptfoo/run_aiconfig.ts @@ -0,0 +1,31 @@ +import { AIConfigRuntime } from "../../../lib/config"; +import fs from "fs"; +import path from "path"; + +async function main() { + let settings_path = process.argv[2]; + let question = process.argv[3]; + let settings = _load_settings(settings_path); + let prompt_name = settings["prompt_name"]; + let aiconfig_path = settings["aiconfig_path"]; + + let fullAIConfigPath = path.join(__dirname, aiconfig_path); + let runtime = AIConfigRuntime.load(fullAIConfigPath); + let params = { + the_query: question, + }; + let result = await runtime.run(prompt_name, params); + let final_output: string; + + let r0 = Array.isArray(result) ? result[0] : result; + final_output = runtime.getOutputText(prompt_name, r0); + console.log(final_output); +} + +function _load_settings(settings_path: string) { + let rawdata = fs.readFileSync(settings_path, "utf-8"); + let settings = JSON.parse(rawdata); + return settings; +} + +main();