Skip to content

Commit

Permalink
attempt
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexCuadron committed Nov 13, 2024
1 parent a9e346a commit 413caa6
Show file tree
Hide file tree
Showing 8 changed files with 139 additions and 138 deletions.
22 changes: 8 additions & 14 deletions evaluation/swe_bench/run_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
'CodeActAgent': codeact_user_response,
'CodeActSWEAgent': codeact_user_response,
'SupervisorAgent': codeact_user_response,
'DelegatorAgent': codeact_user_response,
}


Expand All @@ -69,6 +70,13 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
f'--- BEGIN HINTS ---\n{instance.hints_text}\n--- END HINTS ---\n'
)
instruction += CODEACT_SWE_PROMPT.format(workspace_dir_name=workspace_dir_name)
elif metadata.agent_class == 'DelegatorAgent':
instruction = (
f"I've uploaded a python code repository in the directory {workspace_dir_name}. Consider the following PR description:\n\n"
f'<pr_description>\n'
f'{instance.problem_statement}\n'
'</pr_description>\n\n'
)
else:
# Instruction based on Anthropic's official trajectory
# https://github.com/eschluntz/swe-bench-experiments/tree/main/evaluation/verified/20241022_tools_claude-3-5-sonnet-updated/trajs
Expand All @@ -92,20 +100,6 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
"Your thinking should be thorough and so it's fine if it's very long.\n"
)

instruction += (
'<IMPORTANT>\n'
'- You MUST generate only one action per turn!\n'
'- A patch is a set of changes to the source code of the codebase that you are given\n'
'- You MUST generate a patch that attempts to fix the issue described in the <pr_description>\n'
'</IMPORTANT>\n'
)

if RUN_WITH_BROWSING:
instruction += (
'<IMPORTANT!>\n'
'You SHOULD NEVER attempt to browse the web. '
'</IMPORTANT!>\n'
)
return instruction


Expand Down
21 changes: 10 additions & 11 deletions openhands/agenthub/codeact_agent/codeact_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class CodeActAgent(Agent):
JupyterRequirement(),
]
obs_prefix = 'OBSERVATION:\n'
when_to_stop = 6
when_to_stop = -1
number_of_events = -1

def __init__(
Expand Down Expand Up @@ -363,16 +363,6 @@ def step(self, state: State) -> Action:
outputs={'fixed': True, 'trayectory': serialized_messages}
)

# if we've reached the max number of iterations, go back for an evaluation on the approach
if self.when_to_stop > 0 and state.local_iteration % self.when_to_stop == 0:
messages = self._get_messages(state)
serialized_messages = [
msg.model_dump() for msg in messages
] # Serialize each Message object
return AgentFinishAction(
outputs={'trayectory': serialized_messages, 'fixed': False}
)

# prepare what we want to send to the LLM
messages = self._get_messages(state)
params: dict = {
Expand All @@ -390,6 +380,15 @@ def step(self, state: State) -> Action:
]
response = self.llm.completion(**params)

# if we've reached the max number of iterations, go back for an evaluation on the approach
if self.when_to_stop > 0 and state.local_iteration % self.when_to_stop == 0:
return AgentFinishAction(
outputs={
'response': response['choices'][0]['message']['content'],
'fixed': False,
}
)

if self.function_calling_active:
actions = codeact_function_calling.response_to_actions(response)
for action in actions:
Expand Down
1 change: 0 additions & 1 deletion openhands/agenthub/delegator_agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ def step(self, state: State) -> Action:

if not isinstance(last_observation, AgentDelegateObservation):
raise Exception('Last observation is not an AgentDelegateObservation')

goal, _ = state.get_current_user_intent()
if self.current_delegate == 'study':
self.current_delegate = 'coder'
Expand Down
8 changes: 7 additions & 1 deletion openhands/agenthub/micro/coder/prompt.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,13 @@ Do NOT finish until you have completed the tasks.

## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history, max_events=20) }}
{% for event in state.history[-20:] %}
{% if event.source == "agent" %}
Agent: {{ event.action }} - {{ event.content if event.content else event.observation }}
{% else %}
User: {{ event.content if event.content else event.observation }}
{% endif %}
{% endfor %}

## Format
{{ instructions.format.action }}
8 changes: 7 additions & 1 deletion openhands/agenthub/micro/study_repo_for_task/prompt.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,13 @@ implement the solution. If the codebase is empty, you should call the `finish` a

## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history, max_events=20) }}
{% for event in state.history[-20:] %}
{% if event.source == "agent" %}
Agent: {{ event.action }} - {{ event.content if event.content else event.observation }}
{% else %}
User: {{ event.content if event.content else event.observation }}
{% endif %}
{% endfor %}

## Format
{{ instructions.format.action }}
Expand Down
8 changes: 7 additions & 1 deletion openhands/agenthub/micro/verifier/prompt.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,13 @@ explaining what the problem is.

## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history, max_events=20) }}
{% for event in state.history[-20:] %}
{% if event.source == "agent" %}
Agent: {{ event.action }} - {{ event.content if event.content else event.observation }}
{% else %}
User: {{ event.content if event.content else event.observation }}
{% endif %}
{% endfor %}

## Format
{{ instructions.format.action }}
92 changes: 34 additions & 58 deletions openhands/agenthub/supervisor_agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,15 @@
import re
from typing import Any, Dict, List

from openhands.agenthub.supervisor_agent.prompt import (
get_prompt,
)
from openhands.agenthub.supervisor_agent.prompt import code_act_agent_prompt, get_prompt
from openhands.controller.agent import Agent
from openhands.controller.state.state import State
from openhands.core.config import AgentConfig
from openhands.core.config.llm_config import LLMConfig
from openhands.core.message import Message, TextContent
from openhands.events.action import Action, AgentDelegateAction, AgentFinishAction
from openhands.events.observation.delegate import AgentDelegateObservation
from openhands.events.observation.observation import Observation
from openhands.llm.llm import LLM
from openhands.runtime.plugins.agent_skills import AgentSkillsRequirement
from openhands.runtime.plugins.jupyter import JupyterRequirement
Expand All @@ -34,6 +33,7 @@ class SupervisorAgent(Agent):
task: str = ''
test_command: str = ''
time_to_stop: int = 60 # Every 60 iterations, we stop and evaluate the approach
phase: int = 0

sandbox_plugins: list[PluginRequirement] = [
# NOTE: AgentSkillsRequirement need to go before JupyterRequirement, since
Expand All @@ -56,7 +56,7 @@ def __init__(self, llm: LLM, config: AgentConfig):
- llm (LLM): The llm to be used by this agent
"""
llm_config = LLMConfig(
model='openai/o1-mini', api_key='REDACTED', temperature=1.0
model='openai/o1-preview', api_key='REDACTED', temperature=1.0
)
llm = LLM(llm_config)
# TODO: Remove this once we have a real AgentConfig
Expand All @@ -70,77 +70,53 @@ def __init__(self, llm: LLM, config: AgentConfig):
def step(self, state: State) -> Action:
self.logger.debug('Starting step with state: %s', state)
self.logger.debug('LLM config: %s', self.llm_config)
last_observation = state.history[-1]
last_observation: Observation | None = None
for event in reversed(state.history):
if isinstance(event, Observation):
last_observation = event
break

task, _ = state.get_current_user_intent()
self.task = task or ''

# import pdb; pdb.set_trace()
# Try CodeActAgent first if we haven't tried it yet
if not self.tried_direct_code:
prompt = get_prompt(self.task, [], 'initial')
raw_response = self.get_response(prompt)
match = re.search(
r'<augmented_pr_description>(.*?)</augmented_pr_description>',
raw_response,
re.DOTALL,
)
self.augmented_task = match.group(1).strip('"') if match else self.task
self.tried_direct_code = True
if self.phase == 0:
self.phase += 1
prompt = get_prompt(self.task, None, 'high_level_task')
return AgentDelegateAction(
agent='CodeActAgent',
inputs={
'task': self.task,
'augmented_task': self.augmented_task,
'when_to_stop': self.time_to_stop,
'task': prompt,
'when_to_stop': 1,
},
)

if not isinstance(last_observation, AgentDelegateObservation):
raise ValueError('Last observation is not an AgentDelegateObservation')
return AgentFinishAction()

if not last_observation.outputs.get('fixed', False):
trayectory: List[Dict] = last_observation.outputs['trayectory']
deserialized_trajectory = [
Message(
role=msg_dict['role'],
content=[
TextContent(text=content_text)
for content_text in [
msg_dict['content'][0]['text']
if isinstance(msg_dict['content'], list)
else msg_dict['content']
]
],
tool_call_id=msg_dict.get('tool_call_id'),
name=msg_dict.get('name'),
)
for msg_dict in trayectory
]
# import pdb; pdb.set_trace()
prompt = get_prompt(self.task, deserialized_trajectory, 'right_track')
raw_response = self.get_response(prompt)
match = re.search(r'<answer>(.*?)</answer>', raw_response, re.DOTALL)
if match and 'yes' in match.group(1).lower():
return AgentDelegateAction(
agent='CodeActAgent',
inputs={
'task': self.task,
'trayectory': trayectory,
'when_to_stop': self.time_to_stop,
},
)
# pdb.set_trace()
prompt = get_prompt(self.task, deserialized_trajectory, 'refactor')
response: str = last_observation.outputs['response']
match = re.search(
r'<requirements>(.*?)</requirements>', str(response), re.DOTALL
)
self.requirements = match.group(1).strip('"') if match else ''

self.phase += 1
prompt = get_prompt(
self.task, None, 'initial', requirements=self.requirements
)
raw_response = self.get_response(prompt)
match = re.search(r'<next_step>(.*?)</next_step>', raw_response, re.DOTALL)
next_step = match.group(1).strip('"') if match else ''
self.logger.debug('Suggested approach: %s', next_step)
match = re.search(
r'<steps>(.*?)</steps>',
raw_response,
re.DOTALL,
)
steps = match.group(1).strip('"') if match else self.task

return AgentDelegateAction(
agent='CodeActAgent',
inputs={
'task': self.task,
'trayectory': trayectory,
'next_step': next_step,
'next_step': code_act_agent_prompt % {'steps': steps},
'when_to_stop': self.time_to_stop,
},
)
Expand Down
Loading

0 comments on commit 413caa6

Please sign in to comment.