attempt

All-Hands-AI · Nov 13, 2024 · 413caa6 · 413caa6
1 parent a9e346a
commit 413caa6
Show file tree

Hide file tree

Showing 8 changed files with 139 additions and 138 deletions.
diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py
@@ -47,6 +47,7 @@
     'CodeActAgent': codeact_user_response,
     'CodeActSWEAgent': codeact_user_response,
     'SupervisorAgent': codeact_user_response,
+    'DelegatorAgent': codeact_user_response,
 }
 
 
@@ -69,6 +70,13 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
                 f'--- BEGIN HINTS ---\n{instance.hints_text}\n--- END HINTS ---\n'
             )
         instruction += CODEACT_SWE_PROMPT.format(workspace_dir_name=workspace_dir_name)
+    elif metadata.agent_class == 'DelegatorAgent':
+        instruction = (
+            f"I've uploaded a python code repository in the directory {workspace_dir_name}. Consider the following PR description:\n\n"
+            f'<pr_description>\n'
+            f'{instance.problem_statement}\n'
+            '</pr_description>\n\n'
+        )
     else:
         # Instruction based on Anthropic's official trajectory
         # https://github.com/eschluntz/swe-bench-experiments/tree/main/evaluation/verified/20241022_tools_claude-3-5-sonnet-updated/trajs
@@ -92,20 +100,6 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
             "Your thinking should be thorough and so it's fine if it's very long.\n"
         )
 
-    instruction += (
-        '<IMPORTANT>\n'
-        '- You MUST generate only one action per turn!\n'
-        '- A patch is a set of changes to the source code of the codebase that you are given\n'
-        '- You MUST generate a patch that attempts to fix the issue described in the <pr_description>\n'
-        '</IMPORTANT>\n'
-    )
-
-    if RUN_WITH_BROWSING:
-        instruction += (
-            '<IMPORTANT!>\n'
-            'You SHOULD NEVER attempt to browse the web. '
-            '</IMPORTANT!>\n'
-        )
     return instruction
 
 

diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -73,7 +73,7 @@ class CodeActAgent(Agent):
         JupyterRequirement(),
     ]
     obs_prefix = 'OBSERVATION:\n'
-    when_to_stop = 6
+    when_to_stop = -1
     number_of_events = -1
 
     def __init__(
@@ -363,16 +363,6 @@ def step(self, state: State) -> Action:
                 outputs={'fixed': True, 'trayectory': serialized_messages}
             )
 
-        # if we've reached the max number of iterations, go back for an evaluation on the approach
-        if self.when_to_stop > 0 and state.local_iteration % self.when_to_stop == 0:
-            messages = self._get_messages(state)
-            serialized_messages = [
-                msg.model_dump() for msg in messages
-            ]  # Serialize each Message object
-            return AgentFinishAction(
-                outputs={'trayectory': serialized_messages, 'fixed': False}
-            )
-
         # prepare what we want to send to the LLM
         messages = self._get_messages(state)
         params: dict = {
@@ -390,6 +380,15 @@ def step(self, state: State) -> Action:
             ]
         response = self.llm.completion(**params)
 
+        # if we've reached the max number of iterations, go back for an evaluation on the approach
+        if self.when_to_stop > 0 and state.local_iteration % self.when_to_stop == 0:
+            return AgentFinishAction(
+                outputs={
+                    'response': response['choices'][0]['message']['content'],
+                    'fixed': False,
+                }
+            )
+
         if self.function_calling_active:
             actions = codeact_function_calling.response_to_actions(response)
             for action in actions:

diff --git a/openhands/agenthub/delegator_agent/agent.py b/openhands/agenthub/delegator_agent/agent.py
@@ -49,7 +49,6 @@ def step(self, state: State) -> Action:
 
         if not isinstance(last_observation, AgentDelegateObservation):
             raise Exception('Last observation is not an AgentDelegateObservation')
-
         goal, _ = state.get_current_user_intent()
         if self.current_delegate == 'study':
             self.current_delegate = 'coder'

diff --git a/openhands/agenthub/micro/coder/prompt.md b/openhands/agenthub/micro/coder/prompt.md
@@ -21,7 +21,13 @@ Do NOT finish until you have completed the tasks.
 
 ## History
 {{ instructions.history_truncated }}
-{{ history_to_json(state.history, max_events=20) }}
+{% for event in state.history[-20:] %}
+{% if event.source == "agent" %}
+Agent: {{ event.action }} - {{ event.content if event.content else event.observation }}
+{% else %}
+User: {{ event.content if event.content else event.observation }}
+{% endif %}
+{% endfor %}
 
 ## Format
 {{ instructions.format.action }}
diff --git a/openhands/agenthub/micro/study_repo_for_task/prompt.md b/openhands/agenthub/micro/study_repo_for_task/prompt.md
@@ -24,7 +24,13 @@ implement the solution. If the codebase is empty, you should call the `finish` a
 
 ## History
 {{ instructions.history_truncated }}
-{{ history_to_json(state.history, max_events=20) }}
+{% for event in state.history[-20:] %}
+{% if event.source == "agent" %}
+Agent: {{ event.action }} - {{ event.content if event.content else event.observation }}
+{% else %}
+User: {{ event.content if event.content else event.observation }}
+{% endif %}
+{% endfor %}
 
 ## Format
 {{ instructions.format.action }}

diff --git a/openhands/agenthub/micro/verifier/prompt.md b/openhands/agenthub/micro/verifier/prompt.md
@@ -22,7 +22,13 @@ explaining what the problem is.
 
 ## History
 {{ instructions.history_truncated }}
-{{ history_to_json(state.history, max_events=20) }}
+{% for event in state.history[-20:] %}
+{% if event.source == "agent" %}
+Agent: {{ event.action }} - {{ event.content if event.content else event.observation }}
+{% else %}
+User: {{ event.content if event.content else event.observation }}
+{% endif %}
+{% endfor %}
 
 ## Format
 {{ instructions.format.action }}
diff --git a/openhands/agenthub/supervisor_agent/agent.py b/openhands/agenthub/supervisor_agent/agent.py
@@ -2,16 +2,15 @@
 import re
 from typing import Any, Dict, List
 
-from openhands.agenthub.supervisor_agent.prompt import (
-    get_prompt,
-)
+from openhands.agenthub.supervisor_agent.prompt import code_act_agent_prompt, get_prompt
 from openhands.controller.agent import Agent
 from openhands.controller.state.state import State
 from openhands.core.config import AgentConfig
 from openhands.core.config.llm_config import LLMConfig
 from openhands.core.message import Message, TextContent
 from openhands.events.action import Action, AgentDelegateAction, AgentFinishAction
 from openhands.events.observation.delegate import AgentDelegateObservation
+from openhands.events.observation.observation import Observation
 from openhands.llm.llm import LLM
 from openhands.runtime.plugins.agent_skills import AgentSkillsRequirement
 from openhands.runtime.plugins.jupyter import JupyterRequirement
@@ -34,6 +33,7 @@ class SupervisorAgent(Agent):
     task: str = ''
     test_command: str = ''
     time_to_stop: int = 60  # Every 60 iterations, we stop and evaluate the approach
+    phase: int = 0
 
     sandbox_plugins: list[PluginRequirement] = [
         # NOTE: AgentSkillsRequirement need to go before JupyterRequirement, since
@@ -56,7 +56,7 @@ def __init__(self, llm: LLM, config: AgentConfig):
         - llm (LLM): The llm to be used by this agent
         """
         llm_config = LLMConfig(
-            model='openai/o1-mini', api_key='REDACTED', temperature=1.0
+            model='openai/o1-preview', api_key='REDACTED', temperature=1.0
         )
         llm = LLM(llm_config)
         # TODO: Remove this once we have a real AgentConfig
@@ -70,77 +70,53 @@ def __init__(self, llm: LLM, config: AgentConfig):
     def step(self, state: State) -> Action:
         self.logger.debug('Starting step with state: %s', state)
         self.logger.debug('LLM config: %s', self.llm_config)
-        last_observation = state.history[-1]
+        last_observation: Observation | None = None
+        for event in reversed(state.history):
+            if isinstance(event, Observation):
+                last_observation = event
+                break
+
         task, _ = state.get_current_user_intent()
         self.task = task or ''
 
-        # import pdb; pdb.set_trace()
-        # Try CodeActAgent first if we haven't tried it yet
-        if not self.tried_direct_code:
-            prompt = get_prompt(self.task, [], 'initial')
-            raw_response = self.get_response(prompt)
-            match = re.search(
-                r'<augmented_pr_description>(.*?)</augmented_pr_description>',
-                raw_response,
-                re.DOTALL,
-            )
-            self.augmented_task = match.group(1).strip('"') if match else self.task
-            self.tried_direct_code = True
+        if self.phase == 0:
+            self.phase += 1
+            prompt = get_prompt(self.task, None, 'high_level_task')
             return AgentDelegateAction(
                 agent='CodeActAgent',
                 inputs={
-                    'task': self.task,
-                    'augmented_task': self.augmented_task,
-                    'when_to_stop': self.time_to_stop,
+                    'task': prompt,
+                    'when_to_stop': 1,
                 },
             )
 
         if not isinstance(last_observation, AgentDelegateObservation):
-            raise ValueError('Last observation is not an AgentDelegateObservation')
+            return AgentFinishAction()
 
         if not last_observation.outputs.get('fixed', False):
-            trayectory: List[Dict] = last_observation.outputs['trayectory']
-            deserialized_trajectory = [
-                Message(
-                    role=msg_dict['role'],
-                    content=[
-                        TextContent(text=content_text)
-                        for content_text in [
-                            msg_dict['content'][0]['text']
-                            if isinstance(msg_dict['content'], list)
-                            else msg_dict['content']
-                        ]
-                    ],
-                    tool_call_id=msg_dict.get('tool_call_id'),
-                    name=msg_dict.get('name'),
-                )
-                for msg_dict in trayectory
-            ]
-            # import pdb; pdb.set_trace()
-            prompt = get_prompt(self.task, deserialized_trajectory, 'right_track')
-            raw_response = self.get_response(prompt)
-            match = re.search(r'<answer>(.*?)</answer>', raw_response, re.DOTALL)
-            if match and 'yes' in match.group(1).lower():
-                return AgentDelegateAction(
-                    agent='CodeActAgent',
-                    inputs={
-                        'task': self.task,
-                        'trayectory': trayectory,
-                        'when_to_stop': self.time_to_stop,
-                    },
-                )
-            # pdb.set_trace()
-            prompt = get_prompt(self.task, deserialized_trajectory, 'refactor')
+            response: str = last_observation.outputs['response']
+            match = re.search(
+                r'<requirements>(.*?)</requirements>', str(response), re.DOTALL
+            )
+            self.requirements = match.group(1).strip('"') if match else ''
+
+            self.phase += 1
+            prompt = get_prompt(
+                self.task, None, 'initial', requirements=self.requirements
+            )
             raw_response = self.get_response(prompt)
-            match = re.search(r'<next_step>(.*?)</next_step>', raw_response, re.DOTALL)
-            next_step = match.group(1).strip('"') if match else ''
-            self.logger.debug('Suggested approach: %s', next_step)
+            match = re.search(
+                r'<steps>(.*?)</steps>',
+                raw_response,
+                re.DOTALL,
+            )
+            steps = match.group(1).strip('"') if match else self.task
+
             return AgentDelegateAction(
                 agent='CodeActAgent',
                 inputs={
                     'task': self.task,
-                    'trayectory': trayectory,
-                    'next_step': next_step,
+                    'next_step': code_act_agent_prompt % {'steps': steps},
                     'when_to_stop': self.time_to_stop,
                 },
             )