From 793ff1c163bb0f9bd3e0c788b4978b8dc193ba6a Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Wed, 27 Sep 2023 10:47:34 -0700
Subject: [PATCH] Add data challenges (#5361)

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
---
 benchmark/agbenchmark/agent_api_interface.py  |   4 -
 .../data/1_sort_csv/artifacts_in/input.csv    |   5 +
 .../data/1_sort_csv/artifacts_out/output.csv  |   5 +
 .../verticals/data/1_sort_csv/data.json       |  31 +++
 .../data/2_label_data/artifacts_in/input.csv  |  12 +
 .../2_label_data/artifacts_out/output.csv     |  12 +
 .../verticals/data/2_label_data/data.json     |  31 +++
 .../data/3_combine_csv/artifacts_in/file1.csv |   4 +
 .../data/3_combine_csv/artifacts_in/file2.csv |   4 +
 .../3_combine_csv/artifacts_out/output.csv    |   4 +
 .../verticals/data/3_combine_csv/data.json    |  31 +++
 benchmark/frontend/public/graph.json          | 129 +++++++++++
 frontend/assets/data_tree_structure.json      | 214 +++++++++++++++++-
 frontend/assets/tree_structure.json           | 129 +++++++++++
 frontend/pubspec.lock                         |  30 +--
 15 files changed, 620 insertions(+), 25 deletions(-)
 create mode 100644 benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_in/input.csv
 create mode 100644 benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_out/output.csv
 create mode 100644 benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/data.json
 create mode 100644 benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_in/input.csv
 create mode 100644 benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_out/output.csv
 create mode 100644 benchmark/agbenchmark/challenges/verticals/data/2_label_data/data.json
 create mode 100644 benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file1.csv
 create mode 100644 benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file2.csv
 create mode 100644 benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_out/output.csv
 create mode 100644 benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/data.json

diff --git a/benchmark/agbenchmark/agent_api_interface.py b/benchmark/agbenchmark/agent_api_interface.py
index ce6ab6494fc5..f35ef81a95a5 100644
--- a/benchmark/agbenchmark/agent_api_interface.py
+++ b/benchmark/agbenchmark/agent_api_interface.py
@@ -50,10 +50,6 @@ async def run_api_agent(
                 raise TimeoutError("Time limit exceeded")
             if not step or step.is_last:
                 steps_remaining = False
-            if os.getenv("IS_MOCK"):
-                time.sleep(
-                    1
-                )  # will help with the integration og the "polling updates" features since mock agent is too fast.
         # if we're calling a mock agent, we "cheat" and give the correct artifacts to pass the tests
         if os.getenv("IS_MOCK"):
             await upload_artifacts(
diff --git a/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_in/input.csv b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_in/input.csv
new file mode 100644
index 000000000000..a52510f189da
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_in/input.csv
@@ -0,0 +1,5 @@
+id,name,timestamp
+3,Alice,2023-09-25 14:10:00
+1,Bob,2023-09-24 12:05:00
+2,Charlie,2023-09-24 12:10:00
+4,David,2023-09-26 16:20:00
diff --git a/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_out/output.csv b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_out/output.csv
new file mode 100644
index 000000000000..6cac77333370
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/artifacts_out/output.csv
@@ -0,0 +1,5 @@
+id,name,timestamp
+1,Bob,2023-09-24 12:05:00
+2,Charlie,2023-09-24 12:10:00
+3,Alice,2023-09-25 14:10:00
+4,David,2023-09-26 16:20:00
diff --git a/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/data.json b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/data.json
new file mode 100644
index 000000000000..8515af89fc65
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/verticals/data/1_sort_csv/data.json
@@ -0,0 +1,31 @@
+{
+    "category": [
+        "data"
+    ],
+    "cutoff": 60,
+    "dependencies": [
+        "TestReadFile"
+    ],
+    "eval_id": "db4654d7-fc97-4290-ab27-a710c2b5ce15",
+    "ground": {
+        "answer": "The csv sorted by date",
+        "eval": {
+            "type": "file"
+        },
+        "files": [
+            "output.csv"
+        ],
+        "should_contain": [
+            "id,name,timestamp\n1,Bob,2023-09-24 12:05:00\n2,Charlie,2023-09-24 12:10:00\n3,Alice,2023-09-25 14:10:00\n4,David,2023-09-26 16:20:00"
+        ]
+    },
+    "info": {
+        "description": "Tests if the agent can sort a csv",
+        "difficulty": "basic",
+        "side_effects": [
+            ""
+        ]
+    },
+    "name": "SortCsv",
+    "task": "Sort the input.csv by the 'timestamp' column and write the new csv in the output.csv file. The order of the columns should be preserved."
+}
diff --git a/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_in/input.csv b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_in/input.csv
new file mode 100644
index 000000000000..ae4ca502b4ab
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_in/input.csv
@@ -0,0 +1,12 @@
+Item
+Banana
+Leaf
+Sky
+Sunflower
+Grass
+Jeans
+Lemon
+Tree
+Ocean
+Daisy
+Fern
diff --git a/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_out/output.csv b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_out/output.csv
new file mode 100644
index 000000000000..7b19bce982c6
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/artifacts_out/output.csv
@@ -0,0 +1,12 @@
+Item, Color
+Banana, Yellow
+Leaf, Green
+Sky, Blue
+Sunflower, Yellow
+Grass, Green
+Jeans, Blue
+Lemon, Yellow
+Tree, Green
+Ocean, Blue
+Daisy, Yellow
+Fern, Green
diff --git a/benchmark/agbenchmark/challenges/verticals/data/2_label_data/data.json b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/data.json
new file mode 100644
index 000000000000..a024a6eb8a85
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/verticals/data/2_label_data/data.json
@@ -0,0 +1,31 @@
+{
+    "category": [
+        "data"
+    ],
+    "cutoff": 60,
+    "dependencies": [
+        "TestSortCsv"
+    ],
+    "eval_id": "6c58e229-aa22-4c4f-a053-4a78931ad41e",
+    "ground": {
+        "answer": "The csv labelled",
+        "eval": {
+            "type": "file"
+        },
+        "files": [
+            "output.csv"
+        ],
+        "should_contain": [
+            "Item, Color\nBanana, Yellow\nLeaf, Green\nSky, Blue\nSunflower, Yellow\nGrass, Green\nJeans, Blue\nLemon, Yellow\nTree, Green\nOcean, Blue\nDaisy, Yellow\nFern, Green"
+        ]
+    },
+    "info": {
+        "description": "Tests if the agent can sort a csv",
+        "difficulty": "basic",
+        "side_effects": [
+            ""
+        ]
+    },
+    "name": "LabelData",
+    "task": "The csv 'input.csv' has many items. create a 'Color' column for these items and classify them as either 'blue', 'green', or 'yellow' depending on what the most likely color is. Preserve the order of the rows. The color column should be the second column. Write the output in output.csv"
+}
diff --git a/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file1.csv b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file1.csv
new file mode 100644
index 000000000000..fe552d6774ef
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file1.csv
@@ -0,0 +1,4 @@
+ID,Name,Age
+101,John,28
+102,Alice,34
+103,Bob,45
diff --git a/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file2.csv b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file2.csv
new file mode 100644
index 000000000000..685e24f4b9f0
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_in/file2.csv
@@ -0,0 +1,4 @@
+ID,Occupation,Salary
+101,Engineer,80000
+102,Doctor,120000
+103,Lawyer,95000
diff --git a/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_out/output.csv b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_out/output.csv
new file mode 100644
index 000000000000..8afe84bf0147
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/artifacts_out/output.csv
@@ -0,0 +1,4 @@
+Age,ID,Name,Occupation,Salary
+28,101,John,Engineer,80000
+34,102,Alice,Doctor,120000
+45,103,Bob,Lawyer,95000
diff --git a/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/data.json b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/data.json
new file mode 100644
index 000000000000..0739fe031815
--- /dev/null
+++ b/benchmark/agbenchmark/challenges/verticals/data/3_combine_csv/data.json
@@ -0,0 +1,31 @@
+{
+    "category": [
+        "data"
+    ],
+    "cutoff": 60,
+    "dependencies": [
+        "TestLabelData"
+    ],
+    "eval_id": "d5f04342-983f-45a4-b84a-fe8d96863375",
+    "ground": {
+        "answer": "The csv data is combined",
+        "eval": {
+            "type": "file"
+        },
+        "files": [
+            "output.csv"
+        ],
+        "should_contain": [
+            "Age,ID,Name,Occupation,Salary\n28,101,John,Engineer,80000\n34,102,Alice,Doctor,120000\n45,103,Bob,Lawyer,95000"
+        ]
+    },
+    "info": {
+        "description": "Tests if the agent can combine data from a csv",
+        "difficulty": "intermediate",
+        "side_effects": [
+            ""
+        ]
+    },
+    "name": "CombineCsv",
+    "task": "The csvs 'file1.csv' and 'file2.csv' both have a column 'ID'. Combine these 2 csvs using the 'ID' column. Sort the rows by ID in ascending order and the columns alphabetically. Write the output in output.csv"
+}
diff --git a/benchmark/frontend/public/graph.json b/benchmark/frontend/public/graph.json
index 8116dacfdae3..f5401f48c6fe 100644
--- a/benchmark/frontend/public/graph.json
+++ b/benchmark/frontend/public/graph.json
@@ -12,6 +12,12 @@
             "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestBattleship::test_method[challenge_data0]",
             "to": "agbenchmark/generate_test.py::TestBattleship::test_method[challenge_data0]"
         },
+        {
+            "arrows": "to",
+            "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
+            "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
+            "to": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]"
+        },
         {
             "arrows": "to",
             "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
@@ -83,6 +89,18 @@
             "from": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]",
             "id": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]",
             "to": "agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]"
+        },
+        {
+            "arrows": "to",
+            "from": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
+            "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]",
+            "to": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]"
+        },
+        {
+            "arrows": "to",
+            "from": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
+            "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
+            "to": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]"
         }
     ],
     "nodes": [
@@ -574,6 +592,117 @@
             "label": "RevenueRetrieval2",
             "shape": "dot"
         },
+        {
+            "color": "grey",
+            "data": {
+                "category": [
+                    "data"
+                ],
+                "cutoff": 60,
+                "dependencies": [
+                    "TestSortCsv"
+                ],
+                "eval_id": "6c58e229-aa22-4c4f-a053-4a78931ad41e",
+                "ground": {
+                    "answer": "The csv labelled",
+                    "eval": {
+                        "type": "file"
+                    },
+                    "files": [
+                        "output.csv"
+                    ],
+                    "should_contain": [
+                        "Item, Color\nBanana, Yellow\nLeaf, Green\nSky, Blue\nSunflower, Yellow\nGrass, Green\nJeans, Blue\nLemon, Yellow\nTree, Green\nOcean, Blue\nDaisy, Yellow\nFern, Green"
+                    ]
+                },
+                "info": {
+                    "description": "Tests if the agent can sort a csv",
+                    "difficulty": "basic",
+                    "side_effects": [
+                        ""
+                    ]
+                },
+                "name": "TestLabelData",
+                "task": "The csv 'input.csv' has many items. create a 'Color' column for these items and classify them as either 'blue', 'green', or 'yellow' depending on what the most likely color is. Preserve the order of the rows. The color column should be the second column. Write the output in output.csv"
+            },
+            "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
+            "label": "LabelData",
+            "shape": "dot"
+        },
+        {
+            "color": "grey",
+            "data": {
+                "category": [
+                    "data"
+                ],
+                "cutoff": 60,
+                "dependencies": [
+                    "TestReadFile"
+                ],
+                "eval_id": "db4654d7-fc97-4290-ab27-a710c2b5ce15",
+                "ground": {
+                    "answer": "The csv sorted by date",
+                    "eval": {
+                        "type": "file"
+                    },
+                    "files": [
+                        "output.csv"
+                    ],
+                    "should_contain": [
+                        "id,name,timestamp\n1,Bob,2023-09-24 12:05:00\n2,Charlie,2023-09-24 12:10:00\n3,Alice,2023-09-25 14:10:00\n4,David,2023-09-26 16:20:00"
+                    ]
+                },
+                "info": {
+                    "description": "Tests if the agent can sort a csv",
+                    "difficulty": "basic",
+                    "side_effects": [
+                        ""
+                    ]
+                },
+                "name": "TestSortCsv",
+                "task": "Sort the input.csv by the 'timestamp' column and write the new csv in the output.csv file. The order of the columns should be preserved."
+            },
+            "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
+            "label": "SortCsv",
+            "shape": "dot"
+        },
+        {
+            "color": "grey",
+            "data": {
+                "category": [
+                    "data"
+                ],
+                "cutoff": 60,
+                "dependencies": [
+                    "TestLabelData"
+                ],
+                "eval_id": "d5f04342-983f-45a4-b84a-fe8d96863375",
+                "ground": {
+                    "answer": "The csv data is combined",
+                    "eval": {
+                        "type": "file"
+                    },
+                    "files": [
+                        "output.csv"
+                    ],
+                    "should_contain": [
+                        "Age,ID,Name,Occupation,Salary\n28,101,John,Engineer,80000\n34,102,Alice,Doctor,120000\n45,103,Bob,Lawyer,95000"
+                    ]
+                },
+                "info": {
+                    "description": "Tests if the agent can combine data from a csv",
+                    "difficulty": "intermediate",
+                    "side_effects": [
+                        ""
+                    ]
+                },
+                "name": "TestCombineCsv",
+                "task": "The csvs 'file1.csv' and 'file2.csv' both have a column 'ID'. Combine these 2 csvs using the 'ID' column. Sort the rows by ID and the columns alphabetically. Write the output in output.csv"
+            },
+            "id": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]",
+            "label": "CombineCsv",
+            "shape": "dot"
+        },
         {
             "color": "grey",
             "data": {
diff --git a/frontend/assets/data_tree_structure.json b/frontend/assets/data_tree_structure.json
index 1f1a2ad1390c..48432d2d32a2 100644
--- a/frontend/assets/data_tree_structure.json
+++ b/frontend/assets/data_tree_structure.json
@@ -1,4 +1,214 @@
 {
-    "edges": [],
-    "nodes": []
+    "edges": [
+        {
+            "arrows": "to",
+            "from": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
+            "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
+            "to": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]"
+        },
+        {
+            "arrows": "to",
+            "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
+            "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
+            "to": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]"
+        },
+        {
+            "arrows": "to",
+            "from": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]",
+            "id": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
+            "to": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]"
+        },
+        {
+            "arrows": "to",
+            "from": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
+            "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]",
+            "to": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]"
+        }
+    ],
+    "nodes": [
+        {
+            "color": "grey",
+            "data": {
+                "category": [
+                    "data"
+                ],
+                "cutoff": 60,
+                "dependencies": [
+                    "TestSortCsv"
+                ],
+                "eval_id": "6c58e229-aa22-4c4f-a053-4a78931ad41e",
+                "ground": {
+                    "answer": "The csv labelled",
+                    "eval": {
+                        "type": "file"
+                    },
+                    "files": [
+                        "output.csv"
+                    ],
+                    "should_contain": [
+                        "Item, Color\nBanana, Yellow\nLeaf, Green\nSky, Blue\nSunflower, Yellow\nGrass, Green\nJeans, Blue\nLemon, Yellow\nTree, Green\nOcean, Blue\nDaisy, Yellow\nFern, Green"
+                    ]
+                },
+                "info": {
+                    "description": "Tests if the agent can sort a csv",
+                    "difficulty": "basic",
+                    "side_effects": [
+                        ""
+                    ]
+                },
+                "name": "TestLabelData",
+                "task": "The csv 'input.csv' has many items. create a 'Color' column for these items and classify them as either 'blue', 'green', or 'yellow' depending on what the most likely color is. Preserve the order of the rows. The color column should be the second column. Write the output in output.csv"
+            },
+            "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
+            "label": "LabelData",
+            "shape": "dot"
+        },
+        {
+            "color": "grey",
+            "data": {
+                "category": [
+                    "data"
+                ],
+                "cutoff": 60,
+                "dependencies": [
+                    "TestReadFile"
+                ],
+                "eval_id": "db4654d7-fc97-4290-ab27-a710c2b5ce15",
+                "ground": {
+                    "answer": "The csv sorted by date",
+                    "eval": {
+                        "type": "file"
+                    },
+                    "files": [
+                        "output.csv"
+                    ],
+                    "should_contain": [
+                        "id,name,timestamp\n1,Bob,2023-09-24 12:05:00\n2,Charlie,2023-09-24 12:10:00\n3,Alice,2023-09-25 14:10:00\n4,David,2023-09-26 16:20:00"
+                    ]
+                },
+                "info": {
+                    "description": "Tests if the agent can sort a csv",
+                    "difficulty": "basic",
+                    "side_effects": [
+                        ""
+                    ]
+                },
+                "name": "TestSortCsv",
+                "task": "Sort the input.csv by the 'timestamp' column and write the new csv in the output.csv file. The order of the columns should be preserved."
+            },
+            "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
+            "label": "SortCsv",
+            "shape": "dot"
+        },
+        {
+            "color": "grey",
+            "data": {
+                "category": [
+                    "interface"
+                ],
+                "cutoff": 60,
+                "dependencies": [
+                    "TestWriteFile"
+                ],
+                "eval_id": "261ccfaa-02a2-4c1a-8a56-c76c66f7dba1",
+                "ground": {
+                    "answer": "The content of output.txt should be 'Hello World!'",
+                    "eval": {
+                        "type": "file"
+                    },
+                    "files": [
+                        "output.txt"
+                    ],
+                    "should_contain": [
+                        "Hello World!"
+                    ]
+                },
+                "info": {
+                    "description": "tests the ability for an agent to read a file.",
+                    "difficulty": "interface",
+                    "side_effects": [
+                        ""
+                    ]
+                },
+                "name": "TestReadFile",
+                "task": "Read the file called file_to_read.txt and write its content to a file called output.txt"
+            },
+            "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
+            "label": "ReadFile",
+            "shape": "dot"
+        },
+        {
+            "color": "grey",
+            "data": {
+                "category": [
+                    "interface"
+                ],
+                "cutoff": 60,
+                "dependencies": [],
+                "eval_id": "81b64bf9-2b6a-4ac8-bcd2-8bfe36244ac0",
+                "ground": {
+                    "answer": "The word 'Washington', printed to a .txt file named anything",
+                    "eval": {
+                        "type": "file"
+                    },
+                    "files": [
+                        ".txt"
+                    ],
+                    "should_contain": [
+                        "Washington"
+                    ],
+                    "should_not_contain": []
+                },
+                "info": {
+                    "description": "Tests the agents ability to write to a file",
+                    "difficulty": "interface",
+                    "side_effects": [
+                        ""
+                    ]
+                },
+                "name": "TestWriteFile",
+                "task": "Write the word 'Washington' to a .txt file"
+            },
+            "id": "agbenchmark/generate_test.py::TestWriteFile::test_method[challenge_data0]",
+            "label": "WriteFile",
+            "shape": "dot"
+        },
+        {
+            "color": "grey",
+            "data": {
+                "category": [
+                    "data"
+                ],
+                "cutoff": 60,
+                "dependencies": [
+                    "TestLabelData"
+                ],
+                "eval_id": "d5f04342-983f-45a4-b84a-fe8d96863375",
+                "ground": {
+                    "answer": "The csv data is combined",
+                    "eval": {
+                        "type": "file"
+                    },
+                    "files": [
+                        "output.csv"
+                    ],
+                    "should_contain": [
+                        "Age,ID,Name,Occupation,Salary\n28,101,John,Engineer,80000\n34,102,Alice,Doctor,120000\n45,103,Bob,Lawyer,95000"
+                    ]
+                },
+                "info": {
+                    "description": "Tests if the agent can combine data from a csv",
+                    "difficulty": "intermediate",
+                    "side_effects": [
+                        ""
+                    ]
+                },
+                "name": "TestCombineCsv",
+                "task": "The csvs 'file1.csv' and 'file2.csv' both have a column 'ID'. Combine these 2 csvs using the 'ID' column. Sort the rows by ID and the columns alphabetically. Write the output in output.csv"
+            },
+            "id": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]",
+            "label": "CombineCsv",
+            "shape": "dot"
+        }
+    ]
 }
diff --git a/frontend/assets/tree_structure.json b/frontend/assets/tree_structure.json
index 8116dacfdae3..f5401f48c6fe 100644
--- a/frontend/assets/tree_structure.json
+++ b/frontend/assets/tree_structure.json
@@ -12,6 +12,12 @@
             "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestBattleship::test_method[challenge_data0]",
             "to": "agbenchmark/generate_test.py::TestBattleship::test_method[challenge_data0]"
         },
+        {
+            "arrows": "to",
+            "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
+            "id": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
+            "to": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]"
+        },
         {
             "arrows": "to",
             "from": "agbenchmark/generate_test.py::TestReadFile::test_method[challenge_data0]",
@@ -83,6 +89,18 @@
             "from": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]",
             "id": "agbenchmark/generate_test.py::TestRevenueRetrieval::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]",
             "to": "agbenchmark/generate_test.py::TestRevenueRetrieval2::test_method[challenge_data0]"
+        },
+        {
+            "arrows": "to",
+            "from": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
+            "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]",
+            "to": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]"
+        },
+        {
+            "arrows": "to",
+            "from": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
+            "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]_to_agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
+            "to": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]"
         }
     ],
     "nodes": [
@@ -574,6 +592,117 @@
             "label": "RevenueRetrieval2",
             "shape": "dot"
         },
+        {
+            "color": "grey",
+            "data": {
+                "category": [
+                    "data"
+                ],
+                "cutoff": 60,
+                "dependencies": [
+                    "TestSortCsv"
+                ],
+                "eval_id": "6c58e229-aa22-4c4f-a053-4a78931ad41e",
+                "ground": {
+                    "answer": "The csv labelled",
+                    "eval": {
+                        "type": "file"
+                    },
+                    "files": [
+                        "output.csv"
+                    ],
+                    "should_contain": [
+                        "Item, Color\nBanana, Yellow\nLeaf, Green\nSky, Blue\nSunflower, Yellow\nGrass, Green\nJeans, Blue\nLemon, Yellow\nTree, Green\nOcean, Blue\nDaisy, Yellow\nFern, Green"
+                    ]
+                },
+                "info": {
+                    "description": "Tests if the agent can sort a csv",
+                    "difficulty": "basic",
+                    "side_effects": [
+                        ""
+                    ]
+                },
+                "name": "TestLabelData",
+                "task": "The csv 'input.csv' has many items. create a 'Color' column for these items and classify them as either 'blue', 'green', or 'yellow' depending on what the most likely color is. Preserve the order of the rows. The color column should be the second column. Write the output in output.csv"
+            },
+            "id": "agbenchmark/generate_test.py::TestLabelData::test_method[challenge_data0]",
+            "label": "LabelData",
+            "shape": "dot"
+        },
+        {
+            "color": "grey",
+            "data": {
+                "category": [
+                    "data"
+                ],
+                "cutoff": 60,
+                "dependencies": [
+                    "TestReadFile"
+                ],
+                "eval_id": "db4654d7-fc97-4290-ab27-a710c2b5ce15",
+                "ground": {
+                    "answer": "The csv sorted by date",
+                    "eval": {
+                        "type": "file"
+                    },
+                    "files": [
+                        "output.csv"
+                    ],
+                    "should_contain": [
+                        "id,name,timestamp\n1,Bob,2023-09-24 12:05:00\n2,Charlie,2023-09-24 12:10:00\n3,Alice,2023-09-25 14:10:00\n4,David,2023-09-26 16:20:00"
+                    ]
+                },
+                "info": {
+                    "description": "Tests if the agent can sort a csv",
+                    "difficulty": "basic",
+                    "side_effects": [
+                        ""
+                    ]
+                },
+                "name": "TestSortCsv",
+                "task": "Sort the input.csv by the 'timestamp' column and write the new csv in the output.csv file. The order of the columns should be preserved."
+            },
+            "id": "agbenchmark/generate_test.py::TestSortCsv::test_method[challenge_data0]",
+            "label": "SortCsv",
+            "shape": "dot"
+        },
+        {
+            "color": "grey",
+            "data": {
+                "category": [
+                    "data"
+                ],
+                "cutoff": 60,
+                "dependencies": [
+                    "TestLabelData"
+                ],
+                "eval_id": "d5f04342-983f-45a4-b84a-fe8d96863375",
+                "ground": {
+                    "answer": "The csv data is combined",
+                    "eval": {
+                        "type": "file"
+                    },
+                    "files": [
+                        "output.csv"
+                    ],
+                    "should_contain": [
+                        "Age,ID,Name,Occupation,Salary\n28,101,John,Engineer,80000\n34,102,Alice,Doctor,120000\n45,103,Bob,Lawyer,95000"
+                    ]
+                },
+                "info": {
+                    "description": "Tests if the agent can combine data from a csv",
+                    "difficulty": "intermediate",
+                    "side_effects": [
+                        ""
+                    ]
+                },
+                "name": "TestCombineCsv",
+                "task": "The csvs 'file1.csv' and 'file2.csv' both have a column 'ID'. Combine these 2 csvs using the 'ID' column. Sort the rows by ID and the columns alphabetically. Write the output in output.csv"
+            },
+            "id": "agbenchmark/generate_test.py::TestCombineCsv::test_method[challenge_data0]",
+            "label": "CombineCsv",
+            "shape": "dot"
+        },
         {
             "color": "grey",
             "data": {
diff --git a/frontend/pubspec.lock b/frontend/pubspec.lock
index 33d60d70adb2..abadca4ad8a0 100644
--- a/frontend/pubspec.lock
+++ b/frontend/pubspec.lock
@@ -45,10 +45,10 @@ packages:
     dependency: "direct main"
     description:
       name: collection
-      sha256: f092b211a4319e98e5ff58223576de6c2803db36221657b46c82574721240687
+      sha256: "4a07be6cb69c84d677a6c3096fcf960cc3285a8330b4603e0d463d15d9bd934c"
       url: "https://pub.dev"
     source: hosted
-    version: "1.17.2"
+    version: "1.17.1"
   crypto:
     dependency: transitive
     description:
@@ -292,18 +292,18 @@ packages:
     dependency: transitive
     description:
       name: matcher
-      sha256: "1803e76e6653768d64ed8ff2e1e67bea3ad4b923eb5c56a295c3e634bad5960e"
+      sha256: "6501fbd55da300384b768785b83e5ce66991266cec21af89ab9ae7f5ce1c4cbb"
       url: "https://pub.dev"
     source: hosted
-    version: "0.12.16"
+    version: "0.12.15"
   material_color_utilities:
     dependency: transitive
     description:
       name: material_color_utilities
-      sha256: "9528f2f296073ff54cb9fee677df673ace1218163c3bc7628093e7eed5203d41"
+      sha256: d92141dc6fe1dad30722f9aa826c7fbc896d021d792f80678280601aff8cf724
       url: "https://pub.dev"
     source: hosted
-    version: "0.5.0"
+    version: "0.2.0"
   meta:
     dependency: transitive
     description:
@@ -449,10 +449,10 @@ packages:
     dependency: transitive
     description:
       name: source_span
-      sha256: "53e943d4206a5e30df338fd4c6e7a077e02254531b138a15aec3bd143c1a8b3c"
+      sha256: dd904f795d4b4f3b870833847c461801f6750a9fa8e61ea5ac53f9422b31f250
       url: "https://pub.dev"
     source: hosted
-    version: "1.10.0"
+    version: "1.9.1"
   sprintf:
     dependency: transitive
     description:
@@ -497,10 +497,10 @@ packages:
     dependency: transitive
     description:
       name: test_api
-      sha256: "75760ffd7786fffdfb9597c35c5b27eaeec82be8edfb6d71d32651128ed7aab8"
+      sha256: eb6ac1540b26de412b3403a163d919ba86f6a973fe6cc50ae3541b80092fdcfb
       url: "https://pub.dev"
     source: hosted
-    version: "0.6.0"
+    version: "0.5.1"
   typed_data:
     dependency: transitive
     description:
@@ -589,14 +589,6 @@ packages:
       url: "https://pub.dev"
     source: hosted
     version: "2.1.4"
-  web:
-    dependency: transitive
-    description:
-      name: web
-      sha256: dc8ccd225a2005c1be616fe02951e2e342092edf968cf0844220383757ef8f10
-      url: "https://pub.dev"
-    source: hosted
-    version: "0.1.4-beta"
   win32:
     dependency: transitive
     description:
@@ -614,5 +606,5 @@ packages:
     source: hosted
     version: "1.0.3"
 sdks:
-  dart: ">=3.1.0-185.0.dev <4.0.0"
+  dart: ">=3.0.0 <4.0.0"
   flutter: ">=3.10.0"