Skip to content

Commit

Permalink
[AIC-py] promptfoo integration prototype
Browse files Browse the repository at this point in the history
Make a custom provider using AIConfig and configure it with promptfoo yaml.

Run (TS connector):
`npx promptfoo@latest eval -c typescript/scripts/eval/promptfoo/examples/travel/travel_promptfooconfig.yaml`

Run (Py connector):
`npx promptfoo@latest eval -c python/src/aiconfig/eval/promptfoo/examples/travel/travel_promptfooconfig.yaml`

<img width="1919" alt="image" src="https://github.com/lastmile-ai/aiconfig/assets/148090348/c1e7c6a3-2d4e-4799-bfa0-131dca06c5f6">
  • Loading branch information
jonathanlastmileai committed Dec 4, 2023
1 parent 802ce1d commit b566b9c
Show file tree
Hide file tree
Showing 10 changed files with 244 additions and 0 deletions.
30 changes: 30 additions & 0 deletions python/src/aiconfig/eval/promptfoo/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Promptfoo integration

Use case: I'm a SWE who wants to run my AIConfig against a set of test cases specified in a config file. Each test case has the input and a success condition of my choosing.

## Philosophy / design

Prompfoo has a pretty nice interface (both input and outputs) for addressing the use case. Tests are specified in a yaml file and the test suite can be run with a simple command. The same config file makes it easy to connect your test suite to an AI config with a small amount of code.

## How-to guide

1. Write your test cases in a Promptfoo config file. See examples/travel/travel_promtfooconfig.yaml as an example.
2. Define an AIConfig test suite settings file. It should have the prompt name and path to your aiconfig. See examples/travel/travel_aiconfig_test_suite_settings.json for example.
3. Set your provider to point to run_aiconfig.py with your settings file as the argument. For e.g. see examples/travel/travel_promtfooconfig.yaml. Like this:

```
providers:
- exec:python ../../run_aiconfig.py ./travel_aiconfig_test_suite_settings.json
```

4. export your provider API key if needed so it's available to subprocess environments:
`export OPENAI_API_KEY=...`

5. Make sure your shell environment (including subshells) contains a python3 executable called `python` on its path. One way to do this is to set up an anaconda/miniconda environment.

6. Run `npx promptfoo@latest eval -c path/to/promptfooconfig.yaml`
You should see one passing test and one failing test.

### Debugging / Troubleshooting
Help, something went wrong!
-> Try adding --verbose flag at the end of the `npx` command.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"prompt_name": "get_activities",
"aiconfig_path": "travel_parametrized_for_testing.aiconfig.json"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"name": "NYC Trip Planner",
"description": "Intrepid explorer with ChatGPT and AIConfig",
"schema_version": "latest",
"metadata": {
"models": {
"gpt-3.5-turbo": {
"model": "gpt-3.5-turbo",
"top_p": 1,
"temperature": 1
},
"gpt-4": {
"model": "gpt-4",
"max_tokens": 3000,
"system_prompt": "You are an expert travel coordinator with exquisite taste."
}
},
"default_model": "gpt-3.5-turbo"
},
"prompts": [
{
"name": "get_activities",
"input": "{{the_query}}"
},
{
"name": "gen_itinerary",
"input": "Generate an itinerary ordered by {{order_by}} for these activities: {{get_activities.output}}.",
"metadata": {
"model": "gpt-4",
"parameters": {
"order_by": "geographic location"
}
}
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
prompts: ["{{question}}"]
providers:
- exec:python ../../run_aiconfig.py ./travel_aiconfig_test_suite_settings.json
tests:
- description: "Test if output is equal to the expected value"
vars:
question: Empire State Building is on fifth avenue. What is the cross street?
assert:
- type: python
value: output.lower().find('34th street') != -1
- description: "Test if output is equal to the expected value"
vars:
question: "which is the best borough?"
assert:
- type: python
value: output == "Brooklyn"
41 changes: 41 additions & 0 deletions python/src/aiconfig/eval/promptfoo/run_aiconfig.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import asyncio
import os

import openai
from dotenv import load_dotenv

from aiconfig import AIConfigRuntime
import sys
import json
from typing import Any


async def main():
settings_path = sys.argv[1]
settings = _load_settings(settings_path)

question = sys.argv[2]
prompt_name = settings["prompt_name"]
aiconfig_path = settings["aiconfig_path"]

load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
runtime = AIConfigRuntime.load(aiconfig_path)

params = {
"the_query": question,
}

result = await runtime.run(prompt_name, params)
final_output = runtime.get_output_text(prompt_name, result[0])
print(final_output)


def _load_settings(settings_path: str) -> dict[str, Any]:
with open(settings_path) as f:
settings = json.load(f)
return settings


if __name__ == "__main__":
asyncio.run(main())
30 changes: 30 additions & 0 deletions typescript/scripts/eval/promptfoo/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Promptfoo integration

Use case: I'm a SWE who wants to run my AIConfig against a set of test cases specified in a config file. Each test case has the input and a success condition of my choosing.

## Philosophy / design

Prompfoo has a pretty nice interface (both input and outputs) for addressing the use case. Tests are specified in a yaml file and the test suite can be run with a simple command. The same config file makes it easy to connect your test suite to an AI config with a small amount of code.

## How-to guide

1. Write your test cases in a Promptfoo config file. See examples/travel/travel_promtfooconfig.yaml as an example.
2. Define an AIConfig test suite settings file. It should have the prompt name and path to your aiconfig. See examples/travel/travel_aiconfig_test_suite_settings.json for example.
3. Set your provider to point to run_aiconfig.ts with your settings file as the argument. For e.g. see examples/travel/travel_promtfooconfig.yaml. Like this:

```
providers:
- exec:npx ts-node ../../run_aiconfig.ts ./travel_aiconfig_test_suite_settings.json
```

4. export your provider API key if needed so it's available to subprocess environments:
`export OPENAI_API_KEY=...`

5. Run `cd typescript; npx promptfoo@latest eval -c path/to/promptfooconfig.yaml`
You should see one passing test and one failing test.

### Debugging / Troubleshooting

Help, something went wrong!
-> Try adding --verbose flag at the end of the `npx` command.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"prompt_name": "get_activities",
"aiconfig_path": "examples/travel/travel_parametrized_for_testing.aiconfig.json"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"name": "NYC Trip Planner",
"description": "Intrepid explorer with ChatGPT and AIConfig",
"schema_version": "latest",
"metadata": {
"models": {
"gpt-3.5-turbo": {
"model": "gpt-3.5-turbo",
"top_p": 1,
"temperature": 1
},
"gpt-4": {
"model": "gpt-4",
"max_tokens": 3000,
"system_prompt": "You are an expert travel coordinator with exquisite taste."
}
},
"default_model": "gpt-3.5-turbo"
},
"prompts": [
{
"name": "get_activities",
"input": "{{the_query}}"
},
{
"name": "gen_itinerary",
"input": "Generate an itinerary ordered by {{order_by}} for these activities: {{get_activities.output}}.",
"metadata": {
"model": "gpt-4",
"parameters": {
"order_by": "geographic location"
}
}
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
prompts: ["{{question}}"]
providers:
- exec:npx ts-node ../../run_aiconfig.ts ./travel_aiconfig_test_suite_settings.json
tests:
- description: "Test if output is equal to the expected value"
vars:
question: Empire State Building is on fifth avenue. What is the cross street?
assert:
- type: python
value: output.lower().find('34th street') != -1
- description: "Test if output is equal to the expected value"
vars:
question: "which is the best borough?"
assert:
- type: python
value: output == "Brooklyn"
31 changes: 31 additions & 0 deletions typescript/scripts/eval/promptfoo/run_aiconfig.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import { AIConfigRuntime } from "../../../lib/config";
import fs from "fs";
import path from "path";

async function main() {
let settings_path = process.argv[2];
let question = process.argv[3];
let settings = _load_settings(settings_path);
let prompt_name = settings["prompt_name"];
let aiconfig_path = settings["aiconfig_path"];

let fullAIConfigPath = path.join(__dirname, aiconfig_path);
let runtime = AIConfigRuntime.load(fullAIConfigPath);
let params = {
the_query: question,
};
let result = await runtime.run(prompt_name, params);
let final_output: string;

let r0 = Array.isArray(result) ? result[0] : result;
final_output = runtime.getOutputText(prompt_name, r0);
console.log(final_output);
}

function _load_settings(settings_path: string) {
let rawdata = fs.readFileSync(settings_path, "utf-8");
let settings = JSON.parse(rawdata);
return settings;
}

main();

0 comments on commit b566b9c

Please sign in to comment.