From a951c7c883a30477e060f0cdc9c20290d0e27bc2 Mon Sep 17 00:00:00 2001 From: ishan Date: Fri, 25 Oct 2024 17:26:50 -0500 Subject: [PATCH] ui changes --- anthropic_client.py | 29 +++++++- window.py | 172 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 192 insertions(+), 9 deletions(-) diff --git a/anthropic_client.py b/anthropic_client.py index 5fd8f6a..fa6c43a 100644 --- a/anthropic_client.py +++ b/anthropic_client.py @@ -1,7 +1,8 @@ import anthropic -from anthropic.types.beta import BetaMessage +from anthropic.types.beta import BetaMessage, BetaTextBlock, BetaToolUseBlock import os from dotenv import load_dotenv +import logging class AnthropicClient: def __init__(self): @@ -9,7 +10,11 @@ def __init__(self): self.api_key = os.getenv("ANTHROPIC_API_KEY") if not self.api_key: raise ValueError("ANTHROPIC_API_KEY not found in environment variables") - self.client = anthropic.Anthropic(api_key=self.api_key) + + try: + self.client = anthropic.Anthropic(api_key=self.api_key) + except Exception as e: + raise ValueError(f"Failed to initialize Anthropic client: {str(e)}") def get_next_action(self, run_history) -> BetaMessage: try: @@ -57,10 +62,28 @@ def get_next_action(self, run_history) -> BetaMessage: } ], messages=cleaned_history, - system="The user will ask you to perform a task and you should use their computer to do so. After each step, take a screenshot and carefully evaluate if you have achieved the right outcome. Explicitly show your thinking: 'I have evaluated step X...' If not correct, try again. Only when you confirm a step was executed correctly should you move on to the next one. Note that you have to click into the browser address bar before typing a URL. You should always call a tool! Always return a tool call. Remember call the finish_run tool when you have achieved the goal of the task. Do not explain you have finished the task, just call the tool. Use keyboard shortcuts to navigate whenever possible.", + system="The user will ask you to perform a task and you should use their computer to do so. After each step, take a screenshot and carefully evaluate if you have achieved the right outcome. Explicitly show your thinking: 'I have evaluated step X...' If not correct, try again. Only when you confirm a step was executed correctly should you move on to the next one. Note that you have to click into the browser address bar before typing a URL. You should always call a tool! Always return a tool call. Remember call the finish_run tool when you have achieved the goal of the task. Do not explain you have finished the task, just call the tool. Use keyboard shortcuts to navigate whenever possible. Please remember to take a screenshot before any clicks.", betas=["computer-use-2024-10-22"], ) + + # If Claude responds with just text (no tool use), create a finish_run action with the message + has_tool_use = any(isinstance(content, BetaToolUseBlock) for content in response.content) + if not has_tool_use: + text_content = next((content.text for content in response.content if isinstance(content, BetaTextBlock)), "") + # Create a synthetic tool use block for finish_run + response.content.append(BetaToolUseBlock( + id="synthetic_finish", + type="tool_use", + name="finish_run", + input={ + "success": False, + "error": f"Claude needs more information: {text_content}" + } + )) + logging.info(f"Added synthetic finish_run for text-only response: {text_content}") + return response + except anthropic.APIError as e: raise Exception(f"API Error: {str(e)}") except Exception as e: diff --git a/window.py b/window.py index 2989c1e..df20a93 100644 --- a/window.py +++ b/window.py @@ -1,8 +1,11 @@ from PyQt6.QtWidgets import (QMainWindow, QVBoxLayout, QHBoxLayout, QWidget, QTextEdit, - QPushButton, QLabel, QProgressBar, QSystemTrayIcon, QMenu, QApplication) + QPushButton, QLabel, QProgressBar, QSystemTrayIcon, QMenu, QApplication, QDialog, QLineEdit) from PyQt6.QtCore import Qt, QPoint, pyqtSignal, QThread -from PyQt6.QtGui import QFont, QIcon, QColor, QPalette, QFontDatabase +from PyQt6.QtGui import QFont, QIcon, QColor, QPalette, QFontDatabase, QKeySequence, QShortcut import qtawesome as qta +from store import Store # Add this import +from anthropic_client import AnthropicClient # Add this import + class AgentThread(QThread): update_signal = pyqtSignal(str) @@ -22,13 +25,90 @@ def __init__(self, store, anthropic_client): self.store = store self.anthropic_client = anthropic_client + # Check if API key is missing + if self.store.error and "ANTHROPIC_API_KEY not found" in self.store.error: + self.show_api_key_dialog() + self.setWindowTitle("Grunty 👨🏽‍💻") - self.setGeometry(100, 100, 350, 600) + self.setGeometry(100, 100, 400, 700) self.setWindowFlags(Qt.WindowType.FramelessWindowHint | Qt.WindowType.WindowStaysOnTopHint) self.setup_ui() self.setup_tray() + self.setup_shortcuts() + + def show_api_key_dialog(self): + dialog = QDialog(self) + dialog.setWindowTitle("API Key Required") + dialog.setFixedWidth(400) + + layout = QVBoxLayout() + + # Icon and title + title_layout = QHBoxLayout() + icon_label = QLabel() + icon_label.setPixmap(qta.icon('fa5s.key', color='#4CAF50').pixmap(32, 32)) + title_layout.addWidget(icon_label) + title_label = QLabel("Anthropic API Key Required") + title_label.setStyleSheet("font-size: 16px; font-weight: bold; color: #4CAF50;") + title_layout.addWidget(title_label) + layout.addLayout(title_layout) + # Description + desc_label = QLabel("Please enter your Anthropic API key to continue. You can find this in your Anthropic dashboard.") + desc_label.setWordWrap(True) + desc_label.setStyleSheet("color: #666; margin: 10px 0;") + layout.addWidget(desc_label) + + # API Key input + self.api_key_input = QLineEdit() + self.api_key_input.setPlaceholderText("sk-ant-...") + self.api_key_input.setStyleSheet(""" + QLineEdit { + padding: 10px; + border: 2px solid #4CAF50; + border-radius: 5px; + font-size: 14px; + } + """) + layout.addWidget(self.api_key_input) + + # Save button + save_btn = QPushButton("Save API Key") + save_btn.setStyleSheet(""" + QPushButton { + background-color: #4CAF50; + color: white; + border: none; + padding: 10px; + border-radius: 5px; + font-size: 14px; + font-weight: bold; + } + QPushButton:hover { + background-color: #45a049; + } + """) + save_btn.clicked.connect(lambda: self.save_api_key(dialog)) + layout.addWidget(save_btn) + + dialog.setLayout(layout) + dialog.exec() + + def save_api_key(self, dialog): + api_key = self.api_key_input.text().strip() + if not api_key: + return + + # Save to .env file + with open('.env', 'w') as f: + f.write(f'ANTHROPIC_API_KEY={api_key}') + + # Reinitialize the store and anthropic client + self.store = Store() + self.anthropic_client = AnthropicClient() + dialog.accept() + def setup_ui(self): central_widget = QWidget() self.setCentralWidget(central_widget) @@ -205,11 +285,29 @@ def agent_finished(self): def update_log(self, message): if message.startswith("Assistant:"): - self.action_log.append(f'

{message}

') + icon = qta.icon('fa5s.robot', color='#4CAF50') + pixmap = icon.pixmap(32, 32) + icon_html = f'' + self.action_log.append(f''' +
+ {icon_html} +
+

{message}

+
+
+ ''') elif message.startswith("Assistant action:"): - self.action_log.append(f'

{message}

') + icon = qta.icon('fa5s.cogs', color='#2196F3') + pixmap = icon.pixmap(24, 24) + icon_html = f'' + self.action_log.append(f''' +
+ {icon_html} +

{message}

+
+ ''') else: - self.action_log.append(f'

{message}

') + self.action_log.append(f'

{message}

') # Scroll to the bottom of the log self.action_log.verticalScrollBar().setValue(self.action_log.verticalScrollBar().maximum()) @@ -235,3 +333,65 @@ def closeEvent(self, event): def quit_application(self): self.tray_icon.hide() QApplication.quit() + + def pixmap_to_base64(self, pixmap): + from PyQt6.QtCore import QByteArray, QBuffer + import base64 + + byte_array = QByteArray() + buffer = QBuffer(byte_array) + buffer.open(QBuffer.OpenModeFlag.WriteOnly) + pixmap.save(buffer, 'PNG') + + return base64.b64encode(byte_array.data()).decode() + + def setup_shortcuts(self): + # Send message with Ctrl+Enter or just Enter + send_shortcut = QShortcut(QKeySequence("Ctrl+Return"), self) + send_shortcut.activated.connect(self.run_agent) + + enter_shortcut = QShortcut(QKeySequence("Return"), self) + enter_shortcut.activated.connect(self.handle_return) + + # Clear input with Escape + clear_shortcut = QShortcut(QKeySequence("Escape"), self) + clear_shortcut.activated.connect(self.clear_input) + + # Focus input with Ctrl+L + focus_shortcut = QShortcut(QKeySequence("Ctrl+L"), self) + focus_shortcut.activated.connect(lambda: self.input_area.setFocus()) + + # Stop agent with Ctrl+S + stop_shortcut = QShortcut(QKeySequence("Ctrl+S"), self) + stop_shortcut.activated.connect(self.stop_agent) + + # Toggle window visibility with Ctrl+H + toggle_shortcut = QShortcut(QKeySequence("Ctrl+H"), self) + toggle_shortcut.activated.connect(self.toggle_visibility) + + # Clear log with Ctrl+K + clear_log_shortcut = QShortcut(QKeySequence("Ctrl+K"), self) + clear_log_shortcut.activated.connect(lambda: self.action_log.clear()) + + def handle_return(self): + # If Shift is not pressed when Enter is hit, send the message + if not QApplication.keyboardModifiers() & Qt.KeyboardModifier.ShiftModifier: + # Only run if there's text and the run button is enabled + if self.run_button.isEnabled(): + self.run_agent() + else: + # If Shift+Enter, insert a newline + cursor = self.input_area.textCursor() + cursor.insertText('\n') + + def clear_input(self): + self.input_area.clear() + self.input_area.setFocus() + + def toggle_visibility(self): + if self.isVisible(): + self.hide() + else: + self.show() + self.raise_() + self.activateWindow()