diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index 91d04a75ef6a..2926d7d5e459 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -1,4 +1,3 @@ -import json import os from collections import deque from itertools import islice @@ -13,6 +12,7 @@ from openhands.core.config.llm_config import LLMConfig from openhands.core.logger import openhands_logger as logger from openhands.core.message import ImageContent, Message, TextContent +from openhands.core.utils import json from openhands.events.action import ( Action, AgentDelegateAction, @@ -73,6 +73,8 @@ class CodeActAgent(Agent): JupyterRequirement(), ] obs_prefix = 'OBSERVATION:\n' + when_to_stop = 6 + number_of_events = -1 def __init__( self, @@ -85,6 +87,7 @@ def __init__( - llm (LLM): The llm to be used by this agent """ + # import pdb; pdb.set_trace() llm_config = LLMConfig( model='litellm_proxy/claude-3-5-sonnet-20241022', api_key='REDACTED', @@ -93,10 +96,9 @@ def __init__( ) llm = LLM(llm_config) # TODO: Remove this once we have a real AgentConfig - config = AgentConfig(llm_config='o1-mini') + config = AgentConfig() super().__init__(llm, config) self.reset() - self.micro_agent = ( MicroAgent( os.path.join( @@ -343,6 +345,11 @@ def step(self, state: State) -> Action: - MessageAction(content) - Message action to run (e.g. ask for clarification) - AgentFinishAction() - end the interaction """ + + # If this agent has a supervisor, we need to get the time to stop from the supervisor + if self.when_to_stop < 0 and state.inputs.get('when_to_stop', None): + self.when_to_stop = state.inputs['when_to_stop'] + # Continue with pending actions if any if self.pending_actions: return self.pending_actions.popleft() @@ -350,7 +357,21 @@ def step(self, state: State) -> Action: # if we're done, go back last_user_message = state.get_last_user_message() if last_user_message and last_user_message.strip() == '/exit': - return AgentFinishAction() + messages = self._get_messages(state) + serialized_messages = [msg.model_dump() for msg in messages] + return AgentFinishAction( + outputs={'fixed': True, 'trayectory': serialized_messages} + ) + + # if we've reached the max number of iterations, go back for an evaluation on the approach + if self.when_to_stop > 0 and state.local_iteration % self.when_to_stop == 0: + messages = self._get_messages(state) + serialized_messages = [ + msg.model_dump() for msg in messages + ] # Serialize each Message object + return AgentFinishAction( + outputs={'trayectory': serialized_messages, 'fixed': False} + ) # prepare what we want to send to the LLM messages = self._get_messages(state) @@ -409,17 +430,60 @@ def _get_messages(self, state: State) -> list[Message]: - Messages from the same role are combined to prevent consecutive same-role messages - For Anthropic models, specific messages are cached according to their documentation """ - messages: list[Message] = [ - Message( - role='system', - content=[ - TextContent( - text=self.system_prompt, - cache_prompt=self.llm.is_caching_prompt_active(), # Cache system prompt - ) - ], + # import pdb; pdb.set_trace() + messages: list[Message] = [] + trayectory = state.inputs.get('trayectory', '') + # If there is no trayectory, its the first time we are seeing the task + if not trayectory: + messages.append( + Message( + role='system', + content=[ + TextContent( + text=self.system_prompt, + cache_prompt=self.llm.is_caching_prompt_active(), # Cache system prompt + ) + ], + ) ) - ] + if state.inputs.get('task', '') != '': + # During AgentDelegation the history is empty, so we add the task as the user message. + messages.append( + Message( + role='user', + content=[TextContent(text=state.inputs['task'])], + ) + ) + + if state.inputs.get('augmented_task', ''): + messages.append( + Message( + role='user', + content=[TextContent(text=state.inputs['augmented_task'])], + ) + ) + else: + # If there is a previous trayectory, we restore it. + deserialized_trajectory = [ + Message( + role='user', + content=[ + TextContent(text=content_text) + for content_text in [ + msg_dict['content'][0]['text'] + if isinstance(msg_dict['content'], list) + else msg_dict['content'] + ] + if content_text # Skip empty content + ], + tool_call_id=msg_dict.get('tool_call_id'), + name=msg_dict.get('name'), + ) + for msg_dict in trayectory + if msg_dict.get('content') # Skip messages with no content + ] + messages.extend(deserialized_trajectory) + if self.initial_user_message: messages.append( Message( @@ -431,7 +495,9 @@ def _get_messages(self, state: State) -> list[Message]: pending_tool_call_action_messages: dict[str, Message] = {} tool_call_id_to_message: dict[str, Message] = {} events = list(state.history) - for event in events: + if self.number_of_events < 0: + self.number_of_events = len(events) + for i, event in enumerate(events): # create a regular message from an event if isinstance(event, Action): messages_to_add = self.get_action_message( @@ -446,6 +512,14 @@ def _get_messages(self, state: State) -> list[Message]: else: raise ValueError(f'Unknown event type: {type(event)}') + if i == self.number_of_events and state.inputs.get('next_step', ''): + messages_to_add = [ + Message( + role='user', + content=[TextContent(text=state.inputs['next_step'])], + ) + ] + # Check pending tool call action messages and see if they are complete _response_ids_to_remove = [] for ( @@ -488,6 +562,13 @@ def _get_messages(self, state: State) -> list[Message]: else: messages.append(message) + if self.number_of_events == len(events) and state.inputs.get('next_step', ''): + messages.append( + Message( + role='user', content=[TextContent(text=state.inputs['next_step'])] + ) + ) + if self.llm.is_caching_prompt_active(): # NOTE: this is only needed for anthropic # following logic here: diff --git a/openhands/agenthub/codeact_agent/function_calling.py b/openhands/agenthub/codeact_agent/function_calling.py index 1799478601bd..3a888f2e11b1 100644 --- a/openhands/agenthub/codeact_agent/function_calling.py +++ b/openhands/agenthub/codeact_agent/function_calling.py @@ -13,6 +13,7 @@ ) from openhands.core.logger import openhands_logger as logger +from openhands.core.message import Message from openhands.events.action import ( Action, AgentDelegateAction, @@ -448,7 +449,11 @@ def combine_thought(action: Action, thought: str) -> Action: return action -def response_to_actions(response: ModelResponse) -> list[Action]: +def response_to_actions( + response: ModelResponse, messages: list[Message] | None = None +) -> list[Action]: + if messages is None: + messages = [] actions: list[Action] = [] assert len(response.choices) == 1, 'Only one choice is supported for now' assistant_msg = response.choices[0].message @@ -481,7 +486,9 @@ def response_to_actions(response: ModelResponse) -> list[Action]: inputs=arguments, ) elif tool_call.function.name == 'finish': - action = AgentFinishAction() + action = AgentFinishAction( + outputs={'fixed': True, 'trayectory': messages} + ) elif tool_call.function.name == 'edit_file': action = FileEditAction(**arguments) elif tool_call.function.name == 'str_replace_editor': diff --git a/openhands/agenthub/supervisor_agent/agent.py b/openhands/agenthub/supervisor_agent/agent.py index 722d7365cb3a..96e04348581f 100644 --- a/openhands/agenthub/supervisor_agent/agent.py +++ b/openhands/agenthub/supervisor_agent/agent.py @@ -1,8 +1,8 @@ import logging -from typing import Any, Dict, List, Literal, Union +import re +from typing import Any, Dict, List from openhands.agenthub.supervisor_agent.prompt import ( - TASK_TYPE_ISSUE, get_prompt, ) from openhands.controller.agent import Agent @@ -10,11 +10,12 @@ from openhands.core.config import AgentConfig from openhands.core.config.llm_config import LLMConfig from openhands.core.message import Message, TextContent -from openhands.core.utils import json from openhands.events.action import Action, AgentDelegateAction, AgentFinishAction -from openhands.events.action.agent import AgentRejectAction from openhands.events.observation.delegate import AgentDelegateObservation from openhands.llm.llm import LLM +from openhands.runtime.plugins.agent_skills import AgentSkillsRequirement +from openhands.runtime.plugins.jupyter import JupyterRequirement +from openhands.runtime.plugins.requirement import PluginRequirement class SupervisorAgent(Agent): @@ -32,7 +33,21 @@ class SupervisorAgent(Agent): does_it_needs_a_test: bool = False task: str = '' test_command: str = '' - phase: Literal['search', 'summary', 'code'] = 'search' + time_to_stop: int = 60 # Every 60 iterations, we stop and evaluate the approach + + sandbox_plugins: list[PluginRequirement] = [ + # NOTE: AgentSkillsRequirement need to go before JupyterRequirement, since + # AgentSkillsRequirement provides a lot of Python functions, + # and it needs to be initialized before Jupyter for Jupyter to use those functions. + AgentSkillsRequirement(), + JupyterRequirement(), + ] + + # Add class attribute for tried_direct_code + tried_direct_code: bool = False + + # Add class attribute for augmented_task + augmented_task: str = '' def __init__(self, llm: LLM, config: AgentConfig): """Initialize the Supervisor Agent with an LLM @@ -55,122 +70,85 @@ def __init__(self, llm: LLM, config: AgentConfig): def step(self, state: State) -> Action: self.logger.debug('Starting step with state: %s', state) self.logger.debug('LLM config: %s', self.llm_config) - - if len(self.suggested_approaches) == 0: - self.suggested_approaches = self.get_suggested_approaches(state) - self.suggested_approach_index += 1 - - last_observation = state.history[-1] if state.history else None - if isinstance(last_observation, AgentDelegateObservation): - self.results[self.phase].append(last_observation.outputs.get('output', '')) - - if self.suggested_approach_index < len(self.suggested_approaches): - # Delegate to the SearcherAgent as we need to gather more information - return self.delegate_to_agent( - 'SearcherAgent', - self.task, - self.suggested_approaches[self.suggested_approach_index].get( - 'suggested_approach', [] - ), + last_observation = state.history[-1] + task, _ = state.get_current_user_intent() + self.task = task or '' + + # import pdb; pdb.set_trace() + # Try CodeActAgent first if we haven't tried it yet + if not self.tried_direct_code: + prompt = get_prompt(self.task, [], 'initial') + raw_response = self.get_response(prompt) + match = re.search( + r'(.*?)', + raw_response, + re.DOTALL, ) - - if self.phase == 'search': - condensed_information = self.ask_llm( - self.task, 'summary', self.results[self.phase] + self.augmented_task = match.group(1).strip('"') if match else self.task + self.tried_direct_code = True + return AgentDelegateAction( + agent='CodeActAgent', + inputs={ + 'task': self.task, + 'augmented_task': self.augmented_task, + 'when_to_stop': self.time_to_stop, + }, ) - if condensed_information and len(condensed_information) > 0: - first_result = condensed_information[0] - if first_result.get('summary', '') != '': - self.phase = 'summary' - self.condensed_information = first_result.get('summary', '') - else: - suggested_approach: str | list[str] = first_result.get( - 'suggested_approach', [] - ) - self.results['search'].append(suggested_approach) - return self.delegate_to_agent( - 'SearcherAgent', self.task, suggested_approach - ) - if self.phase == 'summary': - if not self.does_it_needs_a_test: - test_check = self.ask_llm(self.task, 'code', self.condensed_information) - first_check = ( - test_check[0] if test_check and len(test_check) > 0 else {} - ) - self.does_it_needs_a_test = ( - first_check.get('suggested_approach', '') == TASK_TYPE_ISSUE + if not isinstance(last_observation, AgentDelegateObservation): + raise ValueError('Last observation is not an AgentDelegateObservation') + + if not last_observation.outputs.get('fixed', False): + trayectory: List[Dict] = last_observation.outputs['trayectory'] + deserialized_trajectory = [ + Message( + role=msg_dict['role'], + content=[ + TextContent(text=content_text) + for content_text in [ + msg_dict['content'][0]['text'] + if isinstance(msg_dict['content'], list) + else msg_dict['content'] + ] + ], + tool_call_id=msg_dict.get('tool_call_id'), + name=msg_dict.get('name'), ) - self.phase = 'code' - if self.does_it_needs_a_test: - self.current_delegate = 'TesterAgent' - return AgentDelegateAction( - agent='TesterAgent', - inputs={ - 'task': self.task, - 'summary': self.condensed_information, - }, - ) - if self.phase == 'code': - if ( - self.does_it_needs_a_test - and last_observation is not None - and isinstance(last_observation, AgentDelegateObservation) - ): - self.test_command = last_observation.outputs.get('output', '') + for msg_dict in trayectory + ] + # import pdb; pdb.set_trace() + prompt = get_prompt(self.task, deserialized_trajectory, 'right_track') + raw_response = self.get_response(prompt) + match = re.search(r'(.*?)', raw_response, re.DOTALL) + if match and 'yes' in match.group(1).lower(): return AgentDelegateAction( - agent='CoderAgent', + agent='CodeActAgent', inputs={ 'task': self.task, - 'summary': self.condensed_information, - 'test_command': self.test_command, + 'trayectory': trayectory, + 'when_to_stop': self.time_to_stop, }, ) - + # pdb.set_trace() + prompt = get_prompt(self.task, deserialized_trajectory, 'refactor') + raw_response = self.get_response(prompt) + match = re.search(r'(.*?)', raw_response, re.DOTALL) + next_step = match.group(1).strip('"') if match else '' + self.logger.debug('Suggested approach: %s', next_step) + return AgentDelegateAction( + agent='CodeActAgent', + inputs={ + 'task': self.task, + 'trayectory': trayectory, + 'next_step': next_step, + 'when_to_stop': self.time_to_stop, + }, + ) return AgentFinishAction() - def get_suggested_approaches(self, state: State): - self.logger.debug('No suggested approaches found, breaking down task.') - task, _ = state.get_current_user_intent() - if not task: - return [] - self.task = task - suggested_approaches = self.ask_llm(self.task, 'search') - self.logger.debug('Suggested approaches: %s', self.suggested_approaches) - if not suggested_approaches: - return AgentRejectAction() - return suggested_approaches - - def delegate_to_agent( - self, agent_name: str, task: str, suggested_approach: Union[str, List[str]] - ) -> AgentDelegateAction: - self.logger.debug(f'Delegating to agent: {agent_name}') - self.current_delegate = agent_name - # Join the list of strings with newlines if it's a list - approach = ( - '\n'.join(suggested_approach) - if isinstance(suggested_approach, list) - else suggested_approach - ) - return AgentDelegateAction( - agent=agent_name, inputs={'task': task, 'suggested_approach': approach} - ) - - def ask_llm( - self, task: str, phase: str, search_results: Union[str, List[str]] = '' - ) -> List[Dict[str, str]]: - # Format search_results as one item per line if it's a list - if isinstance(search_results, list): - search_results = '\n'.join(search_results) - prompt = get_prompt(task, phase, search_results) - return self.get_response(prompt) - - def get_response(self, prompt: str) -> List[Dict[str, str]]: - content = [TextContent(text=prompt)] - message = Message(role='user', content=content) + def get_response(self, prompt: str) -> str: + message = Message(role='user', content=[TextContent(text=prompt)]) response = self.llm.completion( messages=self.llm.format_messages_for_llm(message) ) - if isinstance(response, list): - return json.loads(response[0]['message']['content']) - return json.loads(response['choices'][0]['message']['content']) + return response['choices'][0]['message']['content'] diff --git a/openhands/agenthub/supervisor_agent/prompt.py b/openhands/agenthub/supervisor_agent/prompt.py index 4d8e68a92df9..f2a032eeddf3 100644 --- a/openhands/agenthub/supervisor_agent/prompt.py +++ b/openhands/agenthub/supervisor_agent/prompt.py @@ -1,3 +1,5 @@ +from openhands.core.message import Message, TextContent + HISTORY_SIZE = 20 # General Description, the goal is to devise a manager that is able to iterate if the solution has not been found yet. @@ -6,329 +8,203 @@ # 2. Implementing the solution. # Then the manager needs to check if the issue has been fixed, if not, it needs to iterate. general_description = """ -You are a strategic planner AI in a software development team. You have a team of agents -who will complete the tasks you give them. Each agent is an expert in a specific area, -but it can only focus on one very specific sub-task at a time. +You are a helpful assistant that can provides DETAILED guidance on how to fix an issue in a codebase. +""" -Your goal is to complete the following task: -%(task)s +side_effects_description = """ +You are a helpful assistant that creative insights into the side-effects of changes made. + +%(approach)s + +Imagine that the changes described in have been implemented. +Now this feature is being used. During the usage of this feature, what are the parts of the codebase that could be affected? +Your thinking should be thorough and so it's fine if it's very long. +ALWAYS output all your reasoning, be as detailed as possible. + + +- Documentation has been taken into account, so you should not mention it in any way! +- Testing has been taken into account, so you should not mention it in any way! +- Be aware of consistency issues! +- Provide ONLY the related functions. (e.g. If the mentions the write function, then generate the read function). + + +EXAMPLE: + +The changes require to change how the data is stored. + +After implementing those changes: +- The parser functions that read the data might need to be updated to adapt to the new format. +""" -This task is very complex, it requires careful planning and thinking. -In order to properly complete the task, there are two phases: -- Search: exploring the codebase, finding the relevant details. (e.g. what is the root cause of the issue?) -- Summary: summarising the information you have gathered. -- Code: implementing the solution. (e.g. how to fix the issue?) +initial_prompt = """ +I am trying to fix the following issue: -As a strategic manager, your goal is to create a suggested approach for phase %(phase)s. +%(task)s -## Detailed Suggested Approaches -Generate several detailed suggested approaches that will be used by your agents to complete the task. -Each agent will be assigned one of the suggested approaches and will bring you back feedback. -So, be creative and think of as many different approaches as possible. -You are trying to HELP the agents complete the task, you MUST be AS DETAILED AS POSSIBLE. +Try to imagine with all details how would you fix the . What is the root cause of the issue? +Consider opposite scenarios (eg. if the is writing to a file, consider what happens when the file is read). +Consider edge cases (eg. what if the file doesn't exist?). + +I've already taken care of all changes to any of the test files described in the . This means you DON'T have to think about the testing logic or any of the tests in any way! +The idea is to make the minimal changes to non-tests files in the /workspace directory to ensure the is satisfied. + +How would you fix the issue described in the with the least amount of steps? Generate the augmented with the least amount of steps to fix the issue in between and tags. +Each step MUST be very detailed as to why is needed. +Your thinking should be thorough and so it's fine if it's very long. +Be as detailed as possible. + +Documentation has been taken into account, so you should not repeat it in the . +Testing has been taken into account, so you should not repeat it in the . You can create new tests, but never use existing tests. +ALWAYS output all your reasoning, be as detailed as possible. + +Follow this structure: +1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure. + - Files to explore, parts of the codebase I should focus on, keywords to look for... + - Extended reasoning... +2. Create a script to reproduce the error and execute it to confirm that the error is reproducible + - Ensure that when executing the script, you get the error described in the + - Suggested code to reproduce the error, keeping in mind the side-effects described in the previous step, so that the error and side-effects are reproducible + - Extended reasoning... +3. Edit the sourcecode of the repo to resolve the issue + - Suggest what files to change and code SUGGESTIONS. Trying to fix the issue in with the least amount of changes. + - Keep in mind for the code suggestions that I might need to change some other functions to prevent the side-effects described in the previous steps. + - Extended reasoning... +4. Rerun your reproduce script and confirm that the error is fixed! + + +One step MUST be to recreate the issue and ensure that the error log is the same as the one described in the . + + +Example: + + + + +REMEMBER: you ARE ONLY suggesting steps to fix the issue, do NOT be assertive, use the language of a suggestion. """ +right_track_prompt = """ -condense_information_prompt = """ -Previously, your agents were tasked to gather information about the codebase. -They have now returned their findings. +I am trying to fix the issue described in the following the steps described in the +I keep track of everything I did in the -As a strategic manager, your job is to look CAREFULLY at the information they have gathered. -You need to make sure you have a good understanding of the codebase, and the potential solutions -to the task. + +%(approach)s + -## Information Gathered -%(search_results)s +Take a step back and reconsider everything I have done in the . +Your thinking should be thorough and so it's fine if it's very long. +Can you help me identify if I am on the right track? -## Summary -Do you think you have enough information to complete the task? -If not, you need to request more information from the agents. -Return a list of 1 JSON describing what extra information you would need and the suggested approach to gather that information. -[ - { - "suggested_approach": [""] - } -] -If you have enough information, you need to summarise the information you have gathered. -How would you explain this to a new joiner to the team? -Where would you point them to? -Provide a detailed step by step guide. -Remember, the agents DON'T have access to the internet. Every task must be conducted OFFLINE. -The agents have cloned the repo, so they can open files, browse the code, interact with it... -In the information gathered, there might be some repeated information, or some information -that is actually not relevant. -You need to be able to distinguish what is relevant, and what is not. -In the information you have gathered, there might be file names, function names, class names. You MUST include -them in the summary, so the agents know where to look. -Generate a list of 1 JSON with the following format: -[ - { - "summary": [""] - } -] - -IMPORTANT: Be VERY VERY VERY SPECIFIC. -IMPORTANT: Include the file names, function names, class names, code blocks, in the step by step guide. -IMPORTANT: Generate as many steps as possible. + +- If there are many code changes, I am probably not on the right track. +- Only reply with yes or no enclosed in between and tags + """ -# Constants for task type choices -TASK_TYPE_ISSUE = 'yes, the task is an issue that needs to be replicated' -TASK_TYPE_FEATURE = 'no, the task is a new feature that needs to be implemented' +refactor_prompt = """ +The assistant is super CREATIVE always thinks of different ways of approaching the problem. -does_it_needs_a_test_prompt = ( - """ -As a strategic manager, you need to judge if the task is an issue that needs to be replicated first -or if it is a new feature that just needs to be implemented. - -Your agents have already gathered information about the codebase. - -## Information Gathered -%(search_results)s - -Think CAREFULLY before answering. -What do you think is the best course of action? -IMPORTANT: You MUST return a list of 1 JSON with the following format: -[ - { - "suggested_approach": [""] - } -] +I am trying to fix the issue described in the following the steps described in the +I keep track of everything I did in the -IMPORTANT: You MUST choose one of the two options. + +%(approach)s + + +Take a step back and reconsider everything I have done in the . +The idea is to make the minimal changes to non-tests files in the /workspace directory to ensure the is satisfied. +I believe my approach is not the best one, can you suggest what my INMEDIATE next step should be? (You can suggest to revert changes and try to do something else) +Your thinking should be thorough and so it's fine if it's very long. +if possible suggest ONLY code changes and the reasoning behind those changes. +Do not use assertive language, use the language of a suggestion. +REMEMBER: I might have written too many lines of code, so it might be better to discard those changes and start again. + + +- Reply with the suggested approach enclosed in between and tags + """ -) -initial_prompt = """ -You MUST ONLY generate a list of JSONs: - -[ - { - "suggested_approach": [""] - }, - { - "suggested_approach": [""] - }, -] - -Suggested approaches MUST be independent. -You MUST generate at least 1 suggested approach. -IMPORTANT: the agents DON'T have access to the internet. Every task must be conducted OFFLINE. -The agents have cloned the repo, so they can open files, browse the code, interact with it... -The goal of phase 1, exploring the codebase, finding the relevant details is ONLY to collect information. -Be as HELPFUL and DETAILED as possible. -Use the suggested approach to guide the agents in their exploration of the codebase. -They MUST interact with the environment: -- Open as many files as needed to gather as much information as possible. -- Read every piece of code that might be relevant to the task, summarise what does it do. -- Decide which functions are important to the task, understand how they are used and how they are called. - -Remember that the agents can use a Python environment with , e.g.: - -print("Hello World!") - - -They can execute bash commands wrapped with , e.g. ls . -If a bash command returns exit code `-1`, this means the process is not yet finished. -They must then send a second . The second can be empty -(which will retrieve any additional logs), or it can contain text to be sent to STDIN of the running process, -or it can contain the text `ctrl+c` to interrupt the process. - -For commands that may run indefinitely, the output should be redirected to a file and the command run -in the background, e.g. python3 app.py > server.log 2>&1 & -If a command execution result says "Command timed out. Sending SIGINT to the process", -the assistant should retry running the command in the background. - -Be VERY VERY SPECIFIC. - ----- START OF EXAMPLE ---- - -## TASK - -" -Enable quiet mode/no-verbose in CLI for use in pre-commit hook There seems to be only an option to increase the level of verbosity when using -SQLFluff [CLI](https://docs.sqlfluff.com/en/stable/cli.html), not to limit it further. It would be great to have an option to further limit the amount of prints when running -`sqlfluff fix`, especially in combination with deployment using a pre-commit hook. For example, only print the return status and the number of fixes applied, similar to how it -is when using `black` in a pre-commit hook: ![image](https://user-images.githubusercontent.com/10177212/140480676-dc98d00b-4383-44f2-bb90-3301a6eedec2.png) This hides the potentially -long list of fixes that are being applied to the SQL files, which can get quite verbose. -" - -## YOUR RESPONSE: - -[ - { - "suggested_approach": [ - "1. Open the SQLFluff codebase and navigate to the CLI module, likely located in 'src/sqlfluff/cli/'.", - "2. Locate the file responsible for parsing command-line arguments, such as 'commands.py' or 'cli.py'.", - "3. Examine how the '--verbose' flag is implemented in the code.", - "4. Identify if there is an existing '--quiet' or '--no-verbose' option.", - "5. Understand how verbosity levels are set and managed within the CLI code.", - "6. Look for any variables or settings that control the default verbosity level.", - "7. Determine how the '--verbose' flag increases verbosity and see if a similar mechanism can decrease verbosity.", - "8. Note down any functions or methods that output information to the console.", - "9. Identify how these functions can be controlled via verbosity levels.", - "10. Summarize findings and consider how to implement a '--quiet' flag." - ] - }, - { - "suggested_approach": [ - "1. Investigate the logging configuration in SQLFluff, possibly located in 'src/sqlfluff/core/logger.py' or similar.", - "2. Understand how logging levels are set (e.g., DEBUG, INFO, WARNING, ERROR).", - "3. Examine if the logging levels are affected by CLI arguments.", - "4. Identify where in the code the logging configuration is initialized based on user input.", - "5. Check if there is a way to adjust the logging level via a CLI option.", - "6. Determine if adding a '--quiet' flag can set the logging level to WARNING or ERROR to suppress INFO messages.", - "7. Note the changes needed in the logging setup to support a quiet mode.", - "8. Identify all logging statements that may need to respect the new logging level.", - "9. Consider the impact on existing functionality and ensure that critical messages are still displayed.", - "10. Summarize how logging can be adjusted to implement a quiet mode." - ] - }, - { - "suggested_approach": [ - "1. Analyze how output to the console is handled throughout the codebase.", - "2. Identify the functions used for outputting messages, such as 'click.echo', 'print', or custom wrapper functions.", - "3. Trace where these output functions are called in the code, especially during 'sqlfluff fix' execution.", - "4. Determine if there is a centralized output function or if output is scattered across multiple functions.", - "5. Assess whether output functions can be modified to check a verbosity level before printing.", - "6. Consider creating or modifying a wrapper function that respects a verbosity or quiet setting.", - "7. Identify any messages that should always be displayed, regardless of verbosity settings (e.g., errors).", - "8. Note the locations in the code where changes need to be made to control output.", - "9. Evaluate the feasibility of implementing a quiet mode by adjusting output functions.", - "10. Summarize the steps required to control output at the source." - ] - }, - { - "suggested_approach": [ - "1. Explore the configuration options available in SQLFluff by examining the configuration parser code, possibly in 'src/sqlfluff/core/config.py'.", - "2. Look for existing configuration parameters related to verbosity or output control.", - "3. Determine how configuration files (like '.sqlfluff') are parsed and applied.", - "4. Assess if a new configuration option can be introduced to control verbosity levels.", - "5. Identify how this configuration option can be read and applied during runtime.", - "6. Check if the CLI options can override configuration file settings for verbosity.", - "7. Map out the code changes required to implement and support a new configuration option.", - "8. Ensure that the new configuration integrates smoothly with existing settings.", - "9. Consider user documentation and how users would be informed about the new option.", - "10. Summarize the process of adding a verbosity control via configuration files." - ] - }, - { - "suggested_approach": [ - "1. Examine the implementation of the 'sqlfluff fix' command to understand its workflow.", - "2. Identify where the command generates output and how that output is formatted.", - "3. Determine if 'sqlfluff fix' has different output modes or formats based on context.", - "4. Check if the command detects when it's running in a pre-commit hook or similar environment.", - "5. Consider if output suppression can be contextually applied when running in certain environments.", - "6. Identify any existing mechanisms for output control based on execution context.", - "7. Explore how the 'black' formatter handles output suppression in pre-commit hooks.", - "8. Analyze if similar techniques can be applied within SQLFluff's codebase.", - "9. Note any dependencies or external factors that influence output generation.", - "10. Summarize how context-aware output control can be implemented." - ] - } -] - - ----- END OF EXAMPLE ---- - - ---- START OF EXAMPLE 2 --- - -## TASK -" -ModelChain.prepare_inputs can succeed with missing dhi From the docstring for `ModelChain.prepare_inputs()` -I believe the method should fail if `weather` does not have a `dhi` column. The validation checks for `'ghi'` twice, -but not `'dhi`' https://github.com/pvlib/pvlib-python/blob/11c356f9a89fc88b4d3ff368ce1aae170a97ebd7/pvlib/modelchain.py#L1136 -" - -## YOUR RESPONSE: - -[ - { - "suggested_approach": [ - "1. Open the file pvlib/modelchain.py and locate the ModelChain.prepare_inputs method. Carefully read through the method's code, focusing on the section where it validates the weather DataFrame columns, specifically around line 1136.", - "2. Identify the validation checks for the weather DataFrame. Note whether it checks for the presence of 'dhi' or mistakenly checks for 'ghi' twice.", - "3. Examine the docstring of ModelChain.prepare_inputs to understand the expected behavior when dhi is missing from the weather data.", - "4. Investigate any helper functions called within prepare_inputs that handle irradiance data, such as methods for inferring missing components.", - "5. Review the unit tests related to prepare_inputs in pvlib/tests/test_modelchain.py to see if cases with missing dhi are covered.", - "6. Use the Python environment to simulate calling prepare_inputs with weather data missing the dhi column and observe the outcome.", - "", - "import pvlib", - "from pvlib import modelchain, location, pvsystem", - "import pandas as pd", - "mc = modelchain.ModelChain(pvsystem.PVSystem(), location.Location(32.2, -110.9))", - "weather = pd.DataFrame({'ghi': [1000], 'dni': [800]})", - "mc.prepare_inputs(weather)", - "", - "7. Document any discrepancies between the code and the documentation, and note any unexpected behaviors." - ] - }, - { - "suggested_approach": [ - "1. Generate a flowchart of the prepare_inputs method to understand its logic and how it processes the weather DataFrame.", - "2. Open pvlib/modelchain.py and trace each step within prepare_inputs, paying attention to how it handles missing data.", - "3. Look for any conditional statements that manage cases where dhi is not provided and see if alternative calculations are performed or if an error is raised.", - "4. Explore related methods like complete_irradiance or irradiance.get_total_irradiance to see how missing components are handled.", - "5. Test different weather DataFrame scenarios in the Python environment to observe how prepare_inputs behaves with various missing columns.", - "", - "import pvlib", - "from pvlib import modelchain, location, pvsystem", - "import pandas as pd", - "mc = modelchain.ModelChain(pvsystem.PVSystem(), location.Location(32.2, -110.9))", - "# Weather data missing 'dhi'", - "weather_missing_dhi = pd.DataFrame({'ghi': [1000], 'dni': [800]})", - "mc.prepare_inputs(weather_missing_dhi)", - "# Weather data missing 'ghi'", - "weather_missing_ghi = pd.DataFrame({'dhi': [200], 'dni': [800]})", - "mc.prepare_inputs(weather_missing_ghi)", - "", - "6. Record the outcomes and any exceptions raised to determine if the method behaves as intended." - ] - }, - { - "suggested_approach": [ - "1. Analyze the git commit history for modelchain.py to identify when the validation issue was introduced.", - "", - "cd pvlib-python", - "git log -L 1136,1140 /modelchain.py", - "", - "2. Review the changes in each commit affecting the validation checks in prepare_inputs.", - "3. Open the relevant commits and examine the differences in the validation code.", - "4. Check for any related issues or pull requests in the repository's local clone that discuss missing dhi validation.", - "5. Look into the test coverage reports (if available locally) to see if the validation logic is adequately tested.", - "6. Summarize findings on whether the issue is a recent regression or an existing oversight." - ] - } -] - ---- END OF EXAMPLE 2 --- - ---- YOUR TURN --- - -## TASK -%(task)s +critical_prompt = """ +The assistant is super CREATIVE, it considers every possible scenario that is DIFFERENT from the ones described in the . + +I believe I have fixed the issue described in the following the steps described in the + +%(approach)s + -## YOUR RESPONSE: +After fixing the issue, there might be some side-effects that we need to consider. +(e.g. if we fix the way data is written, then we might need to modify the way data is read) +Your thinking should be thorough and so it's fine if it's very long. + + +- Only reply with ONE side-effect enclosed in between and tags starting with the phrase "Have you considered..." +- If you thing everything is covered, just reply with "everything is covered" enclosed in between and tags + """ -def get_prompt(task: str, phase: str, search_results: str = '') -> str: - if phase == 'search': - base_prompt = general_description + initial_prompt - elif phase == 'summary': - base_prompt = general_description + condense_information_prompt +def format_conversation(trajectory: list[Message]) -> str: + """Format a conversation history into a readable string. + + Args: + trajectory: List of Message objects containing conversation turns - formatted_prompt = base_prompt % { + Returns: + Formatted string representing the conversation + """ + formatted_parts = [] + + for message in trajectory: + role = message.role + # Join all TextContent messages together + content_text = ' '.join( + item.text for item in message.content if isinstance(item, TextContent) + ) + + if content_text.strip(): # Only add non-empty content + formatted_parts.append(f'{role}: {content_text}\n') + + return '\n'.join(formatted_parts) + + +def get_prompt( + task: str, + trajectory: list[Message], + prompt_type: str = 'initial', + augmented_task: str = '', +) -> str: + """Format and return the appropriate prompt based on prompt_type. + + Args: + task: The task description + trajectory: List of Message objects containing conversation history + prompt_type: Type of prompt to return ("initial" or "refactor") + augmented_task: The augmented task description + Returns: + Formatted prompt string + """ + # If approach is a conversation history, format it + if trajectory: + approach = format_conversation(trajectory) + else: + approach = '' + + # Select the appropriate prompt template + if prompt_type == 'initial': + template = initial_prompt + elif prompt_type == 'right_track': + template = right_track_prompt + elif prompt_type == 'refactor': + template = refactor_prompt + elif prompt_type == 'critical': + template = critical_prompt + + # Format the selected template with the task and approach + formatted_prompt = general_description + template % { 'task': task, - 'phase': phase, - 'search_results': search_results, + 'approach': approach, + 'augmented_pr_description': augmented_task, } - # Add instruction to not include json formatting - formatted_prompt += '\n\nIMPORTANT: Do not include ```json at the start or ``` at the end of your response. Just return the raw JSON list.' - return formatted_prompt