Feature/iterate on prompt (#472)

* checkin prompt progress * further cleanup
emrgnt-cmplxty · Aug 9, 2023 · e58452e · e58452e
1 parent efb79ed
commit e58452e
Show file tree

Hide file tree

Showing 6 changed files with 95 additions and 44 deletions.
diff --git a/automata/experimental/tools/builders/py_interpreter.py b/automata/experimental/tools/builders/py_interpreter.py
@@ -3,7 +3,7 @@
 import contextlib
 import io
 import logging
-from typing import List, Tuple
+from typing import List, Optional, Tuple
 
 # Import the entire symbol module so that we can properly patch convert_to_ast_object
 from automata.agent import (
@@ -93,9 +93,21 @@ def set_tests(self, code: str, overwrite: bool = True) -> str:
             self.test_context = []
         code = self._clean_markdown(code)
         try:
+            result: Optional[str] = None
             ast.parse(code)
+            if self.code_context != PyInterpreter.DEFAULT_CODE_CONTEXT.split(
+                "\n"
+            ):
+                code = "\n".join(self.code_context) + "\n" + code
+                status, result = self._attempt_execution(code)
+                if not status:
+                    return result
             self.test_context.extend(code.split("\n"))
-            return PyInterpreter.SUCCESS_STRING
+            return (
+                f"{PyInterpreter.SUCCESS_STRING}\nresult = {result}"
+                if result is not None
+                else PyInterpreter.SUCCESS_STRING
+            )
         except Exception as e:
             return f"Execution failed with error '{e}'."
 
@@ -117,10 +129,11 @@ def set_code(self, code: str, overwrite: bool = True) -> Tuple[bool, str]:
         return status, result
 
     def set_code_and_run_tests(self, code: str, overwrite: bool = True) -> str:
-        """Set hte code and then run the local tests"""
+        """Set the code and then run the local tests"""
         status, result = self.set_code(code, overwrite)
+        result = f"Code Exec Result:\n{result}"
         if status:
-            result += "\n" + self._run_tests()
+            result += "\n" + f"Test Exec Result:\n{self._run_tests()}"
         return result
 
     def _run_tests(self) -> str:

diff --git a/automata/tests/unit/py_interpreter/test_py_interpreter.py b/automata/tests/unit/py_interpreter/test_py_interpreter.py
@@ -75,7 +75,10 @@ def test_py_interpreter_empty_code_and_tests():
     assert status
     assert result == PyInterpreter.SUCCESS_STRING
     result = interpreter.set_tests("```python\n\n```")
-    assert result == PyInterpreter.SUCCESS_STRING
+    assert (
+        result
+        == f"{PyInterpreter.SUCCESS_STRING}\nresult = {PyInterpreter.SUCCESS_STRING}"
+    )
 
 
 def test_py_interpreter_non_python_code():

diff --git a/research/study_leetcode/constants.py b/research/study_leetcode/constants.py
@@ -31,91 +31,120 @@
 # agent prompts
 SOLVER_SYSTEM_PROMPT = textwrap.dedent(
     """
-  You are Automata Master, an advanced autonomous software architect developed by OpenAI. You are specifically designed to operate within local Python repositories. With your capability to understand and process natural language instructions, you perform tasks efficiently using your available functions. When you have completed your task, return the final result to the user as soon as possible via the `call_termination` function.
+    You are Automata Master, an advanced autonomous software architect developed by OpenAI. With your capability to understand and process natural language instructions, you perform tasks efficiently using your available functions.
 
-  Persistently execute multiple actions until you have amassed enough information to ensure a high likelihood of successfully completing the given task. Use ReAct + CoT reasoning to improve your likelihood of success.
 
-  In case you are not familiar with ReAct, this involves executing actions which follow the Thoughts --> Action --> Observation --> Thoughts --> Action --> chain demonstrated below:
+    Persistently execute multiple actions until you have amassed enough information to ensure a high likelihood of successfully completing the given task. Use ReAct + CoT reasoning to improve your likelihood of success. When you have completed your task, return the final result to the user as soon as possible via the `call_termination` function.
 
 
   **Example Pattern**
 
     *User*
       content:
-        Please carry out the following instruction "Determine how to best use Automata".
+        You are given two non-empty linked lists representing two non-negative integers. 
+        # ... (Omitted problem description) ...
 
     *Assistant*
       content:
         Thoughts: 
-          I should start by searching for the most relevant documentation. To accomplish this I will first retrieve the top matches for "Automata". 
-          
-         After retrieving the relevant matches, I will proceed to retrieving the most relevant documentation. After this, I will retrieve relevant code snippets and return the result to the user.
+          This problem requires constructing a linked list and handling carry-over during addition. I will set up test cases.
 
         Action:
-          I will call `search-top-matches` to see the most relevant matches to 'Automata'.
+          I will call `py-set-tests` to establish the environment testbed.
 
       function_call:
         {
-          'name': "search-top-matches",
-          'arguments': '{"query": "Automata"}'
+          'name': "py-set-tests",
+          'arguments': '{"code": "```python\\n# ... (Test code with real test case) ...```"}'
         }
 
     *User*
       content:
         Observation:
-          ...
+          Test Setup Successful.
 
     *Assistant*
       content:
         Thoughts:
-          I should ...
+          This problem requires constructing a linked list and handling carry-over during addition. The task can be elegantly solved by initializing a dummy linked list to store the sum and a carry variable to handle overflow between digits.
 
-        Action:
-          I will ...
+          Key Steps:
+          1. Initialize a dummy linked list with a value of 0, and a 'carry' variable set to 0.
+          2. Traverse both linked lists simultaneously, summing corresponding digits along with the carry.
+          3. The sum for each position is calculated as `sum = l1 + l2 + carry`.
+          4. The carry for the next step is calculated as `carry = sum / 10`.
+          5. The result linked list is constructed using the remainder `sum % 10`.
+          6. Continue until both linked lists are traversed, and any remaining carry is also added.
 
-      function_call:
-        ...
+          This approach ensures that the linked lists are effectively added together, handling any varying lengths and carry-over between digits.
+
+          Time Complexity: O(max(N, M)) where N is the length of l1 and M is the length of l2.
+          Space Complexity: O(max(N, M)) as we construct a new linked list for the result.
 
-    ...CONVERSATION CONTINUES...
-    
-    *Assistant*
-      content:
-        Thoughts:
-          We have sufficient information to return the correct result.
-        
         Action:
-          I will call `call_termination` to return the result.
-      
+          I will call `py-set-code-and-run-tests` to set the solution and run the tests.
+
       function_call:
         {
-          'name': 'call_termination', 
-          'arguments': '{"result": "```python\\nclass  SymbolDocEmbeddingHandler(SymbolEmbeddingHandler):\\n...CODE CONTINUES...```"}'
+          'name': "py-set-code-and-run-tests",
+          'arguments': '{"code": "```python\\nclass Solution:\\n  def addTwoNumbers(self, l1: Optional[ListNode], l2: Optional[ListNode]) -> Optional[ListNode]:\\n    # ... (Rest of the implementation) ...\\n```"}'
         }
 
+    *User*
+      content:
+        Observation:
+          Code Exec Result:
+          Execution Successful.
 
+          Test Exec Result:
+          executing for l1 = [2,4,3], l2 = [5,6,4], found result = [7,0,8]
+          # ... (Further tests) ...
 
-  Note, the examples are only provided above to give necessary context around the operating procedure. In production, the string '...CODE CONTINUES...' will be replaced with actual code. Documentation can be helpful in preserving token space and actions, so take advantage of this functionality. However, raw source code must be accessed at times, but when doing so attempt to retrieve a specific method whenever possible. Lastly, note that this is a production environment and that you will be graded on your ability to successfully exeute the exact request provided by the user. Please keep this in mind as you carry out the task.
 
+        # ... (Continued interaction) ...
 
-"""
+
+      *Assistant*
+        content:
+          Thoughts:
+            All tests have all passed and the algorithm has been optimized. We can now be confident that a correct solution has been obtained.
+
+          Action:
+            I will call `call_termination` to return the result.
+        
+        function_call:
+          {
+            'name': 'call_termination', 
+            'arguments': '{"result": "```python\\nclass Solution:\\n  def addTwoNumbers(self, l1: Optional[ListNode], l2: Optional[ListNode]) -> Optional[ListNode]:\\n    # Final implementation goes here```"}'
+          }
+
+    Note, the examples are only provided above to give necessary context around the operating procedure. In production, the string '...CODE CONTINUES...' will be replaced with actual code. Documentation can be helpful in preserving token space and actions, so take advantage of this functionality. However, raw source code must be accessed at times, but when doing so attempt to retrieve a specific method whenever possible. Lastly, note that this is a production environment and that you will be graded on your ability to successfully execute the exact request provided by the user. Please keep this in mind as you carry out the task.
+
+    """
 )
 
 
 SOLVER_INSTRUCTIONS = """
 You are tasked with solving the following problem with an algorithm implemented in python:
 {PROBLEM_STATEMENT}
 
-To solve this, start by querying the solution oracle for the most similar solution.
+As an advanced autonomous software architect, Automata Master is expected to uphold high standards of reliability, which includes robust error handling and the production of quality code.
+
+1.) Start by querying the solution oracle to obtain the most similar solution.
+
+2.) Analyze the oracle response. Proceed to perform any additional queries for additional related solutions, like `Solving Dijkstra's algorithm`.
+
+3.) Write four unique test cases which your final solution must pass. 
 
-Next, analyze hte provided response and then proceed to devise three unique test cases which will be used to test your final solution. 
+4.) Plan a step by step approach for implementing your algorithmic solution solution.
 
-Afterwards, in your next planning sequence you should outline a step by step approach for implementing your solution.
+5.) Write your solution using `py-set-code-and-run-tests`, iterate until all tests are passed.
 
-Then, proceed to write your algorithm and test it against the pre-selected test examples. 
+6.) Optimize the algorithm if possible. Because this is a LeetCode problem, it is likely that a relatively efficient solution exists.
 
-If your algorithm passes the tests, consider whether or not optimization is warrented. Because this is a leetcode problem, it is likely that a relatively efficient solution exists. If your algorithm fails the test cases, then proceed to modify it until all test cases are passed. 
+7. Finally, return the result as a python markdown snippet using `call_termination`. 
 
-Finally, return the final result as a python markdown snippet using `call_termination`. Lastly, remember that passed newline chars should be double-escaped, like \\n.
+Reminder, note that passed newline chars should be double-escaped, like \\n when passing code snippets.
 """
 
 

diff --git a/research/study_leetcode/leetcode_problems_loader.py b/research/study_leetcode/leetcode_problems_loader.py
@@ -20,12 +20,13 @@ def get_problem_header(self, idx: int) -> str:
     def get_problem_context(self, idx: int) -> str:
         """Retrieve a problem by its index."""
         row = self.data.iloc[idx]
-        return f"Title:{row['question_title']}\n\nDescription:\n{row['description']}\n\nNote, your final solution MUST conform to the snippet shown here - {row['python3_snippet']}"
+        return f"Title:{row['question_title']}\n\nDescription:\n{row['description']}\n\nNote, your final solution MUST conform to the snippet shown here - ```python\\n{row['python3_snippet']}```"
 
     def get_problem_id_slug(self, idx: int) -> Tuple[int, str]:
         """Retrieve a problem by its index."""
         row = self.data.iloc[idx]
         return (
             int(row["frontend_question_id"]),
+            int(row["question_id"]),
             row["question_slug"],
         )
diff --git a/research/study_leetcode/leetcode_solutions_finder.py b/research/study_leetcode/leetcode_solutions_finder.py
@@ -127,7 +127,7 @@ def find_best_match_and_explanation(self, query: str) -> str:
                 int(
                     MAX_TOKENS
                     / examples_tokens_consumed
-                    * 0.9
+                    * 0.8
                     * len(examples_formatted)
                 ),
                 len(examples_formatted),

diff --git a/research/study_leetcode/run_automata_problem_solver.py b/research/study_leetcode/run_automata_problem_solver.py
@@ -105,6 +105,7 @@ def main():  # sourcery skip: docstrings-for-functions
                 problem_context,
                 (
                     problem_id,
+                    backend_problem_id,
                     problem_slug,
                 ),
             ) = (
@@ -175,12 +176,16 @@ def main():  # sourcery skip: docstrings-for-functions
             configure_logging("DEBUG")
             result = agent.run()
 
-            code = result.split("```python")[1].split("```")[0]
+            code = (
+                result.split("```python")[1]
+                .split("```")[0]
+                .replace("\\n", "\n")
+            )
             lang = ProgrammingLanguage.PYTHON3
             sub = LeetCodeSubmission(
                 code=code,
                 lang=lang,
-                question_id=problem_id,
+                question_id=backend_problem_id,
                 question_slug=problem_slug,
             )