Skip to content

Commit

Permalink
Add data challenges (Significant-Gravitas#5361)
Browse files Browse the repository at this point in the history
Signed-off-by: Merwane Hamadi <[email protected]>
  • Loading branch information
waynehamadi authored Sep 27, 2023
1 parent 6f289e6 commit 793ff1c
Show file tree
Hide file tree
Showing 15 changed files with 620 additions and 25 deletions.
4 changes: 0 additions & 4 deletions benchmark/agbenchmark/agent_api_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,6 @@ async def run_api_agent(
raise TimeoutError("Time limit exceeded")
if not step or step.is_last:
steps_remaining = False
if os.getenv("IS_MOCK"):
time.sleep(
1
) # will help with the integration og the "polling updates" features since mock agent is too fast.
# if we're calling a mock agent, we "cheat" and give the correct artifacts to pass the tests
if os.getenv("IS_MOCK"):
await upload_artifacts(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
id,name,timestamp
3,Alice,2023-09-25 14:10:00
1,Bob,2023-09-24 12:05:00
2,Charlie,2023-09-24 12:10:00
4,David,2023-09-26 16:20:00
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
id,name,timestamp
1,Bob,2023-09-24 12:05:00
2,Charlie,2023-09-24 12:10:00
3,Alice,2023-09-25 14:10:00
4,David,2023-09-26 16:20:00
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"category": [
"data"
],
"cutoff": 60,
"dependencies": [
"TestReadFile"
],
"eval_id": "db4654d7-fc97-4290-ab27-a710c2b5ce15",
"ground": {
"answer": "The csv sorted by date",
"eval": {
"type": "file"
},
"files": [
"output.csv"
],
"should_contain": [
"id,name,timestamp\n1,Bob,2023-09-24 12:05:00\n2,Charlie,2023-09-24 12:10:00\n3,Alice,2023-09-25 14:10:00\n4,David,2023-09-26 16:20:00"
]
},
"info": {
"description": "Tests if the agent can sort a csv",
"difficulty": "basic",
"side_effects": [
""
]
},
"name": "SortCsv",
"task": "Sort the input.csv by the 'timestamp' column and write the new csv in the output.csv file. The order of the columns should be preserved."
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Item
Banana
Leaf
Sky
Sunflower
Grass
Jeans
Lemon
Tree
Ocean
Daisy
Fern
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Item, Color
Banana, Yellow
Leaf, Green
Sky, Blue
Sunflower, Yellow
Grass, Green
Jeans, Blue
Lemon, Yellow
Tree, Green
Ocean, Blue
Daisy, Yellow
Fern, Green
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"category": [
"data"
],
"cutoff": 60,
"dependencies": [
"TestSortCsv"
],
"eval_id": "6c58e229-aa22-4c4f-a053-4a78931ad41e",
"ground": {
"answer": "The csv labelled",
"eval": {
"type": "file"
},
"files": [
"output.csv"
],
"should_contain": [
"Item, Color\nBanana, Yellow\nLeaf, Green\nSky, Blue\nSunflower, Yellow\nGrass, Green\nJeans, Blue\nLemon, Yellow\nTree, Green\nOcean, Blue\nDaisy, Yellow\nFern, Green"
]
},
"info": {
"description": "Tests if the agent can sort a csv",
"difficulty": "basic",
"side_effects": [
""
]
},
"name": "LabelData",
"task": "The csv 'input.csv' has many items. create a 'Color' column for these items and classify them as either 'blue', 'green', or 'yellow' depending on what the most likely color is. Preserve the order of the rows. The color column should be the second column. Write the output in output.csv"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ID,Name,Age
101,John,28
102,Alice,34
103,Bob,45
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ID,Occupation,Salary
101,Engineer,80000
102,Doctor,120000
103,Lawyer,95000
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Age,ID,Name,Occupation,Salary
28,101,John,Engineer,80000
34,102,Alice,Doctor,120000
45,103,Bob,Lawyer,95000
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"category": [
"data"
],
"cutoff": 60,
"dependencies": [
"TestLabelData"
],
"eval_id": "d5f04342-983f-45a4-b84a-fe8d96863375",
"ground": {
"answer": "The csv data is combined",
"eval": {
"type": "file"
},
"files": [
"output.csv"
],
"should_contain": [
"Age,ID,Name,Occupation,Salary\n28,101,John,Engineer,80000\n34,102,Alice,Doctor,120000\n45,103,Bob,Lawyer,95000"
]
},
"info": {
"description": "Tests if the agent can combine data from a csv",
"difficulty": "intermediate",
"side_effects": [
""
]
},
"name": "CombineCsv",
"task": "The csvs 'file1.csv' and 'file2.csv' both have a column 'ID'. Combine these 2 csvs using the 'ID' column. Sort the rows by ID in ascending order and the columns alphabetically. Write the output in output.csv"
}
129 changes: 129 additions & 0 deletions benchmark/frontend/public/graph.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
"id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestBattleship::test_method[challenge_data0]",
"to": "agbenchmark/generate_test.py::TestBattleship::test_method[challenge_data0]"
},
{
"arrows": "to",
"from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
"id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
"to": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]"
},
{
"arrows": "to",
"from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
Expand Down Expand Up @@ -83,6 +89,18 @@
"from": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]",
"id": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]",
"to": "agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]"
},
{
"arrows": "to",
"from": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
"id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]",
"to": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]"
},
{
"arrows": "to",
"from": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
"id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
"to": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]"
}
],
"nodes": [
Expand Down Expand Up @@ -574,6 +592,117 @@
"label": "RevenueRetrieval2",
"shape": "dot"
},
{
"color": "grey",
"data": {
"category": [
"data"
],
"cutoff": 60,
"dependencies": [
"TestSortCsv"
],
"eval_id": "6c58e229-aa22-4c4f-a053-4a78931ad41e",
"ground": {
"answer": "The csv labelled",
"eval": {
"type": "file"
},
"files": [
"output.csv"
],
"should_contain": [
"Item, Color\nBanana, Yellow\nLeaf, Green\nSky, Blue\nSunflower, Yellow\nGrass, Green\nJeans, Blue\nLemon, Yellow\nTree, Green\nOcean, Blue\nDaisy, Yellow\nFern, Green"
]
},
"info": {
"description": "Tests if the agent can sort a csv",
"difficulty": "basic",
"side_effects": [
""
]
},
"name": "TestLabelData",
"task": "The csv 'input.csv' has many items. create a 'Color' column for these items and classify them as either 'blue', 'green', or 'yellow' depending on what the most likely color is. Preserve the order of the rows. The color column should be the second column. Write the output in output.csv"
},
"id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
"label": "LabelData",
"shape": "dot"
},
{
"color": "grey",
"data": {
"category": [
"data"
],
"cutoff": 60,
"dependencies": [
"TestReadFile"
],
"eval_id": "db4654d7-fc97-4290-ab27-a710c2b5ce15",
"ground": {
"answer": "The csv sorted by date",
"eval": {
"type": "file"
},
"files": [
"output.csv"
],
"should_contain": [
"id,name,timestamp\n1,Bob,2023-09-24 12:05:00\n2,Charlie,2023-09-24 12:10:00\n3,Alice,2023-09-25 14:10:00\n4,David,2023-09-26 16:20:00"
]
},
"info": {
"description": "Tests if the agent can sort a csv",
"difficulty": "basic",
"side_effects": [
""
]
},
"name": "TestSortCsv",
"task": "Sort the input.csv by the 'timestamp' column and write the new csv in the output.csv file. The order of the columns should be preserved."
},
"id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
"label": "SortCsv",
"shape": "dot"
},
{
"color": "grey",
"data": {
"category": [
"data"
],
"cutoff": 60,
"dependencies": [
"TestLabelData"
],
"eval_id": "d5f04342-983f-45a4-b84a-fe8d96863375",
"ground": {
"answer": "The csv data is combined",
"eval": {
"type": "file"
},
"files": [
"output.csv"
],
"should_contain": [
"Age,ID,Name,Occupation,Salary\n28,101,John,Engineer,80000\n34,102,Alice,Doctor,120000\n45,103,Bob,Lawyer,95000"
]
},
"info": {
"description": "Tests if the agent can combine data from a csv",
"difficulty": "intermediate",
"side_effects": [
""
]
},
"name": "TestCombineCsv",
"task": "The csvs 'file1.csv' and 'file2.csv' both have a column 'ID'. Combine these 2 csvs using the 'ID' column. Sort the rows by ID and the columns alphabetically. Write the output in output.csv"
},
"id": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]",
"label": "CombineCsv",
"shape": "dot"
},
{
"color": "grey",
"data": {
Expand Down
Loading

0 comments on commit 793ff1c

Please sign in to comment.