From 793ff1c163bb0f9bd3e0c788b4978b8dc193ba6a Mon Sep 17 00:00:00 2001 From: merwanehamadi Date: Wed, 27 Sep 2023 10:47:34 -0700 Subject: [PATCH] Add data challenges (#5361) Signed-off-by: Merwane Hamadi --- benchmark/agbenchmark/agent_api_interface.py | 4 - .../data/1_sort_csv/artifacts_in/input.csv | 5 + .../data/1_sort_csv/artifacts_out/output.csv | 5 + .../verticals/data/1_sort_csv/data.json | 31 +++ .../data/2_label_data/artifacts_in/input.csv | 12 + .../2_label_data/artifacts_out/output.csv | 12 + .../verticals/data/2_label_data/data.json | 31 +++ .../data/3_combine_csv/artifacts_in/file1.csv | 4 + .../data/3_combine_csv/artifacts_in/file2.csv | 4 + .../3_combine_csv/artifacts_out/output.csv | 4 + .../verticals/data/3_combine_csv/data.json | 31 +++ benchmark/frontend/public/graph.json | 129 +++++++++++ frontend/assets/data_tree_structure.json | 214 +++++++++++++++++- frontend/assets/tree_structure.json | 129 +++++++++++ frontend/pubspec.lock | 30 +-- 15 files changed, 620 insertions(+), 25 deletions(-) create mode 100644 benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_in/input.csv create mode 100644 benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_out/output.csv create mode 100644 benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/data.json create mode 100644 benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_in/input.csv create mode 100644 benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_out/output.csv create mode 100644 benchmark/agbenchmark/challenges/verticals/data/2_label_data/data.json create mode 100644 benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file1.csv create mode 100644 benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file2.csv create mode 100644 benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_out/output.csv create mode 100644 benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/data.json diff --git a/benchmark/agbenchmark/agent_api_interface.py b/benchmark/agbenchmark/agent_api_interface.py index ce6ab6494fc5..f35ef81a95a5 100644 --- a/benchmark/agbenchmark/agent_api_interface.py +++ b/benchmark/agbenchmark/agent_api_interface.py @@ -50,10 +50,6 @@ async def run_api_agent( raise TimeoutError("Time limit exceeded") if not step or step.is_last: steps_remaining = False - if os.getenv("IS_MOCK"): - time.sleep( - 1 - ) # will help with the integration og the "polling updates" features since mock agent is too fast. # if we're calling a mock agent, we "cheat" and give the correct artifacts to pass the tests if os.getenv("IS_MOCK"): await upload_artifacts( diff --git a/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_in/input.csv b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_in/input.csv new file mode 100644 index 000000000000..a52510f189da --- /dev/null +++ b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_in/input.csv @@ -0,0 +1,5 @@ +id,name,timestamp +3,Alice,2023-09-25 14:10:00 +1,Bob,2023-09-24 12:05:00 +2,Charlie,2023-09-24 12:10:00 +4,David,2023-09-26 16:20:00 diff --git a/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_out/output.csv b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_out/output.csv new file mode 100644 index 000000000000..6cac77333370 --- /dev/null +++ b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_out/output.csv @@ -0,0 +1,5 @@ +id,name,timestamp +1,Bob,2023-09-24 12:05:00 +2,Charlie,2023-09-24 12:10:00 +3,Alice,2023-09-25 14:10:00 +4,David,2023-09-26 16:20:00 diff --git a/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/data.json b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/data.json new file mode 100644 index 000000000000..8515af89fc65 --- /dev/null +++ b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/data.json @@ -0,0 +1,31 @@ +{ + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestReadFile" + ], + "eval_id": "db4654d7-fc97-4290-ab27-a710c2b5ce15", + "ground": { + "answer": "The csv sorted by date", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "id,name,timestamp\n1,Bob,2023-09-24 12:05:00\n2,Charlie,2023-09-24 12:10:00\n3,Alice,2023-09-25 14:10:00\n4,David,2023-09-26 16:20:00" + ] + }, + "info": { + "description": "Tests if the agent can sort a csv", + "difficulty": "basic", + "side_effects": [ + "" + ] + }, + "name": "SortCsv", + "task": "Sort the input.csv by the 'timestamp' column and write the new csv in the output.csv file. The order of the columns should be preserved." +} diff --git a/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_in/input.csv b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_in/input.csv new file mode 100644 index 000000000000..ae4ca502b4ab --- /dev/null +++ b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_in/input.csv @@ -0,0 +1,12 @@ +Item +Banana +Leaf +Sky +Sunflower +Grass +Jeans +Lemon +Tree +Ocean +Daisy +Fern diff --git a/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_out/output.csv b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_out/output.csv new file mode 100644 index 000000000000..7b19bce982c6 --- /dev/null +++ b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_out/output.csv @@ -0,0 +1,12 @@ +Item, Color +Banana, Yellow +Leaf, Green +Sky, Blue +Sunflower, Yellow +Grass, Green +Jeans, Blue +Lemon, Yellow +Tree, Green +Ocean, Blue +Daisy, Yellow +Fern, Green diff --git a/benchmark/agbenchmark/challenges/verticals/data/2_label_data/data.json b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/data.json new file mode 100644 index 000000000000..a024a6eb8a85 --- /dev/null +++ b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/data.json @@ -0,0 +1,31 @@ +{ + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestSortCsv" + ], + "eval_id": "6c58e229-aa22-4c4f-a053-4a78931ad41e", + "ground": { + "answer": "The csv labelled", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "Item, Color\nBanana, Yellow\nLeaf, Green\nSky, Blue\nSunflower, Yellow\nGrass, Green\nJeans, Blue\nLemon, Yellow\nTree, Green\nOcean, Blue\nDaisy, Yellow\nFern, Green" + ] + }, + "info": { + "description": "Tests if the agent can sort a csv", + "difficulty": "basic", + "side_effects": [ + "" + ] + }, + "name": "LabelData", + "task": "The csv 'input.csv' has many items. create a 'Color' column for these items and classify them as either 'blue', 'green', or 'yellow' depending on what the most likely color is. Preserve the order of the rows. The color column should be the second column. Write the output in output.csv" +} diff --git a/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file1.csv b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file1.csv new file mode 100644 index 000000000000..fe552d6774ef --- /dev/null +++ b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file1.csv @@ -0,0 +1,4 @@ +ID,Name,Age +101,John,28 +102,Alice,34 +103,Bob,45 diff --git a/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file2.csv b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file2.csv new file mode 100644 index 000000000000..685e24f4b9f0 --- /dev/null +++ b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file2.csv @@ -0,0 +1,4 @@ +ID,Occupation,Salary +101,Engineer,80000 +102,Doctor,120000 +103,Lawyer,95000 diff --git a/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_out/output.csv b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_out/output.csv new file mode 100644 index 000000000000..8afe84bf0147 --- /dev/null +++ b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_out/output.csv @@ -0,0 +1,4 @@ +Age,ID,Name,Occupation,Salary +28,101,John,Engineer,80000 +34,102,Alice,Doctor,120000 +45,103,Bob,Lawyer,95000 diff --git a/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/data.json b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/data.json new file mode 100644 index 000000000000..0739fe031815 --- /dev/null +++ b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/data.json @@ -0,0 +1,31 @@ +{ + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestLabelData" + ], + "eval_id": "d5f04342-983f-45a4-b84a-fe8d96863375", + "ground": { + "answer": "The csv data is combined", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "Age,ID,Name,Occupation,Salary\n28,101,John,Engineer,80000\n34,102,Alice,Doctor,120000\n45,103,Bob,Lawyer,95000" + ] + }, + "info": { + "description": "Tests if the agent can combine data from a csv", + "difficulty": "intermediate", + "side_effects": [ + "" + ] + }, + "name": "CombineCsv", + "task": "The csvs 'file1.csv' and 'file2.csv' both have a column 'ID'. Combine these 2 csvs using the 'ID' column. Sort the rows by ID in ascending order and the columns alphabetically. Write the output in output.csv" +} diff --git a/benchmark/frontend/public/graph.json b/benchmark/frontend/public/graph.json index 8116dacfdae3..f5401f48c6fe 100644 --- a/benchmark/frontend/public/graph.json +++ b/benchmark/frontend/public/graph.json @@ -12,6 +12,12 @@ "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestBattleship::test_method[challenge_data0]", "to": "agbenchmark/generate_test.py::TestBattleship::test_method[challenge_data0]" }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]" + }, { "arrows": "to", "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]", @@ -83,6 +89,18 @@ "from": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]", "id": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]", "to": "agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]" + }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]" + }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]" } ], "nodes": [ @@ -574,6 +592,117 @@ "label": "RevenueRetrieval2", "shape": "dot" }, + { + "color": "grey", + "data": { + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestSortCsv" + ], + "eval_id": "6c58e229-aa22-4c4f-a053-4a78931ad41e", + "ground": { + "answer": "The csv labelled", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "Item, Color\nBanana, Yellow\nLeaf, Green\nSky, Blue\nSunflower, Yellow\nGrass, Green\nJeans, Blue\nLemon, Yellow\nTree, Green\nOcean, Blue\nDaisy, Yellow\nFern, Green" + ] + }, + "info": { + "description": "Tests if the agent can sort a csv", + "difficulty": "basic", + "side_effects": [ + "" + ] + }, + "name": "TestLabelData", + "task": "The csv 'input.csv' has many items. create a 'Color' column for these items and classify them as either 'blue', 'green', or 'yellow' depending on what the most likely color is. Preserve the order of the rows. The color column should be the second column. Write the output in output.csv" + }, + "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]", + "label": "LabelData", + "shape": "dot" + }, + { + "color": "grey", + "data": { + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestReadFile" + ], + "eval_id": "db4654d7-fc97-4290-ab27-a710c2b5ce15", + "ground": { + "answer": "The csv sorted by date", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "id,name,timestamp\n1,Bob,2023-09-24 12:05:00\n2,Charlie,2023-09-24 12:10:00\n3,Alice,2023-09-25 14:10:00\n4,David,2023-09-26 16:20:00" + ] + }, + "info": { + "description": "Tests if the agent can sort a csv", + "difficulty": "basic", + "side_effects": [ + "" + ] + }, + "name": "TestSortCsv", + "task": "Sort the input.csv by the 'timestamp' column and write the new csv in the output.csv file. The order of the columns should be preserved." + }, + "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]", + "label": "SortCsv", + "shape": "dot" + }, + { + "color": "grey", + "data": { + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestLabelData" + ], + "eval_id": "d5f04342-983f-45a4-b84a-fe8d96863375", + "ground": { + "answer": "The csv data is combined", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "Age,ID,Name,Occupation,Salary\n28,101,John,Engineer,80000\n34,102,Alice,Doctor,120000\n45,103,Bob,Lawyer,95000" + ] + }, + "info": { + "description": "Tests if the agent can combine data from a csv", + "difficulty": "intermediate", + "side_effects": [ + "" + ] + }, + "name": "TestCombineCsv", + "task": "The csvs 'file1.csv' and 'file2.csv' both have a column 'ID'. Combine these 2 csvs using the 'ID' column. Sort the rows by ID and the columns alphabetically. Write the output in output.csv" + }, + "id": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]", + "label": "CombineCsv", + "shape": "dot" + }, { "color": "grey", "data": { diff --git a/frontend/assets/data_tree_structure.json b/frontend/assets/data_tree_structure.json index 1f1a2ad1390c..48432d2d32a2 100644 --- a/frontend/assets/data_tree_structure.json +++ b/frontend/assets/data_tree_structure.json @@ -1,4 +1,214 @@ { - "edges": [], - "nodes": [] + "edges": [ + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]" + }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]" + }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]" + }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]" + } + ], + "nodes": [ + { + "color": "grey", + "data": { + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestSortCsv" + ], + "eval_id": "6c58e229-aa22-4c4f-a053-4a78931ad41e", + "ground": { + "answer": "The csv labelled", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "Item, Color\nBanana, Yellow\nLeaf, Green\nSky, Blue\nSunflower, Yellow\nGrass, Green\nJeans, Blue\nLemon, Yellow\nTree, Green\nOcean, Blue\nDaisy, Yellow\nFern, Green" + ] + }, + "info": { + "description": "Tests if the agent can sort a csv", + "difficulty": "basic", + "side_effects": [ + "" + ] + }, + "name": "TestLabelData", + "task": "The csv 'input.csv' has many items. create a 'Color' column for these items and classify them as either 'blue', 'green', or 'yellow' depending on what the most likely color is. Preserve the order of the rows. The color column should be the second column. Write the output in output.csv" + }, + "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]", + "label": "LabelData", + "shape": "dot" + }, + { + "color": "grey", + "data": { + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestReadFile" + ], + "eval_id": "db4654d7-fc97-4290-ab27-a710c2b5ce15", + "ground": { + "answer": "The csv sorted by date", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "id,name,timestamp\n1,Bob,2023-09-24 12:05:00\n2,Charlie,2023-09-24 12:10:00\n3,Alice,2023-09-25 14:10:00\n4,David,2023-09-26 16:20:00" + ] + }, + "info": { + "description": "Tests if the agent can sort a csv", + "difficulty": "basic", + "side_effects": [ + "" + ] + }, + "name": "TestSortCsv", + "task": "Sort the input.csv by the 'timestamp' column and write the new csv in the output.csv file. The order of the columns should be preserved." + }, + "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]", + "label": "SortCsv", + "shape": "dot" + }, + { + "color": "grey", + "data": { + "category": [ + "interface" + ], + "cutoff": 60, + "dependencies": [ + "TestWriteFile" + ], + "eval_id": "261ccfaa-02a2-4c1a-8a56-c76c66f7dba1", + "ground": { + "answer": "The content of output.txt should be 'Hello World!'", + "eval": { + "type": "file" + }, + "files": [ + "output.txt" + ], + "should_contain": [ + "Hello World!" + ] + }, + "info": { + "description": "tests the ability for an agent to read a file.", + "difficulty": "interface", + "side_effects": [ + "" + ] + }, + "name": "TestReadFile", + "task": "Read the file called file_to_read.txt and write its content to a file called output.txt" + }, + "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]", + "label": "ReadFile", + "shape": "dot" + }, + { + "color": "grey", + "data": { + "category": [ + "interface" + ], + "cutoff": 60, + "dependencies": [], + "eval_id": "81b64bf9-2b6a-4ac8-bcd2-8bfe36244ac0", + "ground": { + "answer": "The word 'Washington', printed to a .txt file named anything", + "eval": { + "type": "file" + }, + "files": [ + ".txt" + ], + "should_contain": [ + "Washington" + ], + "should_not_contain": [] + }, + "info": { + "description": "Tests the agents ability to write to a file", + "difficulty": "interface", + "side_effects": [ + "" + ] + }, + "name": "TestWriteFile", + "task": "Write the word 'Washington' to a .txt file" + }, + "id": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]", + "label": "WriteFile", + "shape": "dot" + }, + { + "color": "grey", + "data": { + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestLabelData" + ], + "eval_id": "d5f04342-983f-45a4-b84a-fe8d96863375", + "ground": { + "answer": "The csv data is combined", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "Age,ID,Name,Occupation,Salary\n28,101,John,Engineer,80000\n34,102,Alice,Doctor,120000\n45,103,Bob,Lawyer,95000" + ] + }, + "info": { + "description": "Tests if the agent can combine data from a csv", + "difficulty": "intermediate", + "side_effects": [ + "" + ] + }, + "name": "TestCombineCsv", + "task": "The csvs 'file1.csv' and 'file2.csv' both have a column 'ID'. Combine these 2 csvs using the 'ID' column. Sort the rows by ID and the columns alphabetically. Write the output in output.csv" + }, + "id": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]", + "label": "CombineCsv", + "shape": "dot" + } + ] } diff --git a/frontend/assets/tree_structure.json b/frontend/assets/tree_structure.json index 8116dacfdae3..f5401f48c6fe 100644 --- a/frontend/assets/tree_structure.json +++ b/frontend/assets/tree_structure.json @@ -12,6 +12,12 @@ "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestBattleship::test_method[challenge_data0]", "to": "agbenchmark/generate_test.py::TestBattleship::test_method[challenge_data0]" }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]" + }, { "arrows": "to", "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]", @@ -83,6 +89,18 @@ "from": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]", "id": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]", "to": "agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]" + }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]" + }, + { + "arrows": "to", + "from": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]", + "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]", + "to": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]" } ], "nodes": [ @@ -574,6 +592,117 @@ "label": "RevenueRetrieval2", "shape": "dot" }, + { + "color": "grey", + "data": { + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestSortCsv" + ], + "eval_id": "6c58e229-aa22-4c4f-a053-4a78931ad41e", + "ground": { + "answer": "The csv labelled", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "Item, Color\nBanana, Yellow\nLeaf, Green\nSky, Blue\nSunflower, Yellow\nGrass, Green\nJeans, Blue\nLemon, Yellow\nTree, Green\nOcean, Blue\nDaisy, Yellow\nFern, Green" + ] + }, + "info": { + "description": "Tests if the agent can sort a csv", + "difficulty": "basic", + "side_effects": [ + "" + ] + }, + "name": "TestLabelData", + "task": "The csv 'input.csv' has many items. create a 'Color' column for these items and classify them as either 'blue', 'green', or 'yellow' depending on what the most likely color is. Preserve the order of the rows. The color column should be the second column. Write the output in output.csv" + }, + "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]", + "label": "LabelData", + "shape": "dot" + }, + { + "color": "grey", + "data": { + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestReadFile" + ], + "eval_id": "db4654d7-fc97-4290-ab27-a710c2b5ce15", + "ground": { + "answer": "The csv sorted by date", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "id,name,timestamp\n1,Bob,2023-09-24 12:05:00\n2,Charlie,2023-09-24 12:10:00\n3,Alice,2023-09-25 14:10:00\n4,David,2023-09-26 16:20:00" + ] + }, + "info": { + "description": "Tests if the agent can sort a csv", + "difficulty": "basic", + "side_effects": [ + "" + ] + }, + "name": "TestSortCsv", + "task": "Sort the input.csv by the 'timestamp' column and write the new csv in the output.csv file. The order of the columns should be preserved." + }, + "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]", + "label": "SortCsv", + "shape": "dot" + }, + { + "color": "grey", + "data": { + "category": [ + "data" + ], + "cutoff": 60, + "dependencies": [ + "TestLabelData" + ], + "eval_id": "d5f04342-983f-45a4-b84a-fe8d96863375", + "ground": { + "answer": "The csv data is combined", + "eval": { + "type": "file" + }, + "files": [ + "output.csv" + ], + "should_contain": [ + "Age,ID,Name,Occupation,Salary\n28,101,John,Engineer,80000\n34,102,Alice,Doctor,120000\n45,103,Bob,Lawyer,95000" + ] + }, + "info": { + "description": "Tests if the agent can combine data from a csv", + "difficulty": "intermediate", + "side_effects": [ + "" + ] + }, + "name": "TestCombineCsv", + "task": "The csvs 'file1.csv' and 'file2.csv' both have a column 'ID'. Combine these 2 csvs using the 'ID' column. Sort the rows by ID and the columns alphabetically. Write the output in output.csv" + }, + "id": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]", + "label": "CombineCsv", + "shape": "dot" + }, { "color": "grey", "data": { diff --git a/frontend/pubspec.lock b/frontend/pubspec.lock index 33d60d70adb2..abadca4ad8a0 100644 --- a/frontend/pubspec.lock +++ b/frontend/pubspec.lock @@ -45,10 +45,10 @@ packages: dependency: "direct main" description: name: collection - sha256: f092b211a4319e98e5ff58223576de6c2803db36221657b46c82574721240687 + sha256: "4a07be6cb69c84d677a6c3096fcf960cc3285a8330b4603e0d463d15d9bd934c" url: "https://pub.dev" source: hosted - version: "1.17.2" + version: "1.17.1" crypto: dependency: transitive description: @@ -292,18 +292,18 @@ packages: dependency: transitive description: name: matcher - sha256: "1803e76e6653768d64ed8ff2e1e67bea3ad4b923eb5c56a295c3e634bad5960e" + sha256: "6501fbd55da300384b768785b83e5ce66991266cec21af89ab9ae7f5ce1c4cbb" url: "https://pub.dev" source: hosted - version: "0.12.16" + version: "0.12.15" material_color_utilities: dependency: transitive description: name: material_color_utilities - sha256: "9528f2f296073ff54cb9fee677df673ace1218163c3bc7628093e7eed5203d41" + sha256: d92141dc6fe1dad30722f9aa826c7fbc896d021d792f80678280601aff8cf724 url: "https://pub.dev" source: hosted - version: "0.5.0" + version: "0.2.0" meta: dependency: transitive description: @@ -449,10 +449,10 @@ packages: dependency: transitive description: name: source_span - sha256: "53e943d4206a5e30df338fd4c6e7a077e02254531b138a15aec3bd143c1a8b3c" + sha256: dd904f795d4b4f3b870833847c461801f6750a9fa8e61ea5ac53f9422b31f250 url: "https://pub.dev" source: hosted - version: "1.10.0" + version: "1.9.1" sprintf: dependency: transitive description: @@ -497,10 +497,10 @@ packages: dependency: transitive description: name: test_api - sha256: "75760ffd7786fffdfb9597c35c5b27eaeec82be8edfb6d71d32651128ed7aab8" + sha256: eb6ac1540b26de412b3403a163d919ba86f6a973fe6cc50ae3541b80092fdcfb url: "https://pub.dev" source: hosted - version: "0.6.0" + version: "0.5.1" typed_data: dependency: transitive description: @@ -589,14 +589,6 @@ packages: url: "https://pub.dev" source: hosted version: "2.1.4" - web: - dependency: transitive - description: - name: web - sha256: dc8ccd225a2005c1be616fe02951e2e342092edf968cf0844220383757ef8f10 - url: "https://pub.dev" - source: hosted - version: "0.1.4-beta" win32: dependency: transitive description: @@ -614,5 +606,5 @@ packages: source: hosted version: "1.0.3" sdks: - dart: ">=3.1.0-185.0.dev <4.0.0" + dart: ">=3.0.0 <4.0.0" flutter: ">=3.10.0"