-
Notifications
You must be signed in to change notification settings - Fork 81
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AIC-py] promptfoo integration prototype
Make a custom provider using AIConfig and configure it with promptfoo yaml. Run (TS connector): `npx promptfoo@latest eval -c typescript/scripts/eval/promptfoo/examples/travel/travel_promptfooconfig.yaml` Run (Py connector): `npx promptfoo@latest eval -c python/src/aiconfig/eval/promptfoo/examples/travel/travel_promptfooconfig.yaml` <img width="1919" alt="image" src="https://github.com/lastmile-ai/aiconfig/assets/148090348/c1e7c6a3-2d4e-4799-bfa0-131dca06c5f6">
- Loading branch information
1 parent
802ce1d
commit b566b9c
Showing
10 changed files
with
244 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# Promptfoo integration | ||
|
||
Use case: I'm a SWE who wants to run my AIConfig against a set of test cases specified in a config file. Each test case has the input and a success condition of my choosing. | ||
|
||
## Philosophy / design | ||
|
||
Prompfoo has a pretty nice interface (both input and outputs) for addressing the use case. Tests are specified in a yaml file and the test suite can be run with a simple command. The same config file makes it easy to connect your test suite to an AI config with a small amount of code. | ||
|
||
## How-to guide | ||
|
||
1. Write your test cases in a Promptfoo config file. See examples/travel/travel_promtfooconfig.yaml as an example. | ||
2. Define an AIConfig test suite settings file. It should have the prompt name and path to your aiconfig. See examples/travel/travel_aiconfig_test_suite_settings.json for example. | ||
3. Set your provider to point to run_aiconfig.py with your settings file as the argument. For e.g. see examples/travel/travel_promtfooconfig.yaml. Like this: | ||
|
||
``` | ||
providers: | ||
- exec:python ../../run_aiconfig.py ./travel_aiconfig_test_suite_settings.json | ||
``` | ||
|
||
4. export your provider API key if needed so it's available to subprocess environments: | ||
`export OPENAI_API_KEY=...` | ||
|
||
5. Make sure your shell environment (including subshells) contains a python3 executable called `python` on its path. One way to do this is to set up an anaconda/miniconda environment. | ||
|
||
6. Run `npx promptfoo@latest eval -c path/to/promptfooconfig.yaml` | ||
You should see one passing test and one failing test. | ||
|
||
### Debugging / Troubleshooting | ||
Help, something went wrong! | ||
-> Try adding --verbose flag at the end of the `npx` command. |
4 changes: 4 additions & 0 deletions
4
python/src/aiconfig/eval/promptfoo/examples/travel/travel_aiconfig_test_suite_settings.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
{ | ||
"prompt_name": "get_activities", | ||
"aiconfig_path": "travel_parametrized_for_testing.aiconfig.json" | ||
} |
36 changes: 36 additions & 0 deletions
36
...src/aiconfig/eval/promptfoo/examples/travel/travel_parametrized_for_testing.aiconfig.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
{ | ||
"name": "NYC Trip Planner", | ||
"description": "Intrepid explorer with ChatGPT and AIConfig", | ||
"schema_version": "latest", | ||
"metadata": { | ||
"models": { | ||
"gpt-3.5-turbo": { | ||
"model": "gpt-3.5-turbo", | ||
"top_p": 1, | ||
"temperature": 1 | ||
}, | ||
"gpt-4": { | ||
"model": "gpt-4", | ||
"max_tokens": 3000, | ||
"system_prompt": "You are an expert travel coordinator with exquisite taste." | ||
} | ||
}, | ||
"default_model": "gpt-3.5-turbo" | ||
}, | ||
"prompts": [ | ||
{ | ||
"name": "get_activities", | ||
"input": "{{the_query}}" | ||
}, | ||
{ | ||
"name": "gen_itinerary", | ||
"input": "Generate an itinerary ordered by {{order_by}} for these activities: {{get_activities.output}}.", | ||
"metadata": { | ||
"model": "gpt-4", | ||
"parameters": { | ||
"order_by": "geographic location" | ||
} | ||
} | ||
} | ||
] | ||
} |
16 changes: 16 additions & 0 deletions
16
python/src/aiconfig/eval/promptfoo/examples/travel/travel_promptfooconfig.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
prompts: ["{{question}}"] | ||
providers: | ||
- exec:python ../../run_aiconfig.py ./travel_aiconfig_test_suite_settings.json | ||
tests: | ||
- description: "Test if output is equal to the expected value" | ||
vars: | ||
question: Empire State Building is on fifth avenue. What is the cross street? | ||
assert: | ||
- type: python | ||
value: output.lower().find('34th street') != -1 | ||
- description: "Test if output is equal to the expected value" | ||
vars: | ||
question: "which is the best borough?" | ||
assert: | ||
- type: python | ||
value: output == "Brooklyn" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
import asyncio | ||
import os | ||
|
||
import openai | ||
from dotenv import load_dotenv | ||
|
||
from aiconfig import AIConfigRuntime | ||
import sys | ||
import json | ||
from typing import Any | ||
|
||
|
||
async def main(): | ||
settings_path = sys.argv[1] | ||
settings = _load_settings(settings_path) | ||
|
||
question = sys.argv[2] | ||
prompt_name = settings["prompt_name"] | ||
aiconfig_path = settings["aiconfig_path"] | ||
|
||
load_dotenv() | ||
openai.api_key = os.getenv("OPENAI_API_KEY") | ||
runtime = AIConfigRuntime.load(aiconfig_path) | ||
|
||
params = { | ||
"the_query": question, | ||
} | ||
|
||
result = await runtime.run(prompt_name, params) | ||
final_output = runtime.get_output_text(prompt_name, result[0]) | ||
print(final_output) | ||
|
||
|
||
def _load_settings(settings_path: str) -> dict[str, Any]: | ||
with open(settings_path) as f: | ||
settings = json.load(f) | ||
return settings | ||
|
||
|
||
if __name__ == "__main__": | ||
asyncio.run(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# Promptfoo integration | ||
|
||
Use case: I'm a SWE who wants to run my AIConfig against a set of test cases specified in a config file. Each test case has the input and a success condition of my choosing. | ||
|
||
## Philosophy / design | ||
|
||
Prompfoo has a pretty nice interface (both input and outputs) for addressing the use case. Tests are specified in a yaml file and the test suite can be run with a simple command. The same config file makes it easy to connect your test suite to an AI config with a small amount of code. | ||
|
||
## How-to guide | ||
|
||
1. Write your test cases in a Promptfoo config file. See examples/travel/travel_promtfooconfig.yaml as an example. | ||
2. Define an AIConfig test suite settings file. It should have the prompt name and path to your aiconfig. See examples/travel/travel_aiconfig_test_suite_settings.json for example. | ||
3. Set your provider to point to run_aiconfig.ts with your settings file as the argument. For e.g. see examples/travel/travel_promtfooconfig.yaml. Like this: | ||
|
||
``` | ||
providers: | ||
- exec:npx ts-node ../../run_aiconfig.ts ./travel_aiconfig_test_suite_settings.json | ||
``` | ||
|
||
4. export your provider API key if needed so it's available to subprocess environments: | ||
`export OPENAI_API_KEY=...` | ||
|
||
5. Run `cd typescript; npx promptfoo@latest eval -c path/to/promptfooconfig.yaml` | ||
You should see one passing test and one failing test. | ||
|
||
### Debugging / Troubleshooting | ||
|
||
Help, something went wrong! | ||
-> Try adding --verbose flag at the end of the `npx` command. |
4 changes: 4 additions & 0 deletions
4
typescript/scripts/eval/promptfoo/examples/travel/travel_aiconfig_test_suite_settings.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
{ | ||
"prompt_name": "get_activities", | ||
"aiconfig_path": "examples/travel/travel_parametrized_for_testing.aiconfig.json" | ||
} |
36 changes: 36 additions & 0 deletions
36
...ript/scripts/eval/promptfoo/examples/travel/travel_parametrized_for_testing.aiconfig.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
{ | ||
"name": "NYC Trip Planner", | ||
"description": "Intrepid explorer with ChatGPT and AIConfig", | ||
"schema_version": "latest", | ||
"metadata": { | ||
"models": { | ||
"gpt-3.5-turbo": { | ||
"model": "gpt-3.5-turbo", | ||
"top_p": 1, | ||
"temperature": 1 | ||
}, | ||
"gpt-4": { | ||
"model": "gpt-4", | ||
"max_tokens": 3000, | ||
"system_prompt": "You are an expert travel coordinator with exquisite taste." | ||
} | ||
}, | ||
"default_model": "gpt-3.5-turbo" | ||
}, | ||
"prompts": [ | ||
{ | ||
"name": "get_activities", | ||
"input": "{{the_query}}" | ||
}, | ||
{ | ||
"name": "gen_itinerary", | ||
"input": "Generate an itinerary ordered by {{order_by}} for these activities: {{get_activities.output}}.", | ||
"metadata": { | ||
"model": "gpt-4", | ||
"parameters": { | ||
"order_by": "geographic location" | ||
} | ||
} | ||
} | ||
] | ||
} |
16 changes: 16 additions & 0 deletions
16
typescript/scripts/eval/promptfoo/examples/travel/travel_promptfooconfig.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
prompts: ["{{question}}"] | ||
providers: | ||
- exec:npx ts-node ../../run_aiconfig.ts ./travel_aiconfig_test_suite_settings.json | ||
tests: | ||
- description: "Test if output is equal to the expected value" | ||
vars: | ||
question: Empire State Building is on fifth avenue. What is the cross street? | ||
assert: | ||
- type: python | ||
value: output.lower().find('34th street') != -1 | ||
- description: "Test if output is equal to the expected value" | ||
vars: | ||
question: "which is the best borough?" | ||
assert: | ||
- type: python | ||
value: output == "Brooklyn" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import { AIConfigRuntime } from "../../../lib/config"; | ||
import fs from "fs"; | ||
import path from "path"; | ||
|
||
async function main() { | ||
let settings_path = process.argv[2]; | ||
let question = process.argv[3]; | ||
let settings = _load_settings(settings_path); | ||
let prompt_name = settings["prompt_name"]; | ||
let aiconfig_path = settings["aiconfig_path"]; | ||
|
||
let fullAIConfigPath = path.join(__dirname, aiconfig_path); | ||
let runtime = AIConfigRuntime.load(fullAIConfigPath); | ||
let params = { | ||
the_query: question, | ||
}; | ||
let result = await runtime.run(prompt_name, params); | ||
let final_output: string; | ||
|
||
let r0 = Array.isArray(result) ? result[0] : result; | ||
final_output = runtime.getOutputText(prompt_name, r0); | ||
console.log(final_output); | ||
} | ||
|
||
function _load_settings(settings_path: string) { | ||
let rawdata = fs.readFileSync(settings_path, "utf-8"); | ||
let settings = JSON.parse(rawdata); | ||
return settings; | ||
} | ||
|
||
main(); |