Skip to content

Commit

Permalink
fix all the pyright errors
Browse files Browse the repository at this point in the history
  • Loading branch information
toshok committed Oct 1, 2024
1 parent 54e1cb4 commit f8c3160
Show file tree
Hide file tree
Showing 8 changed files with 45 additions and 23 deletions.
8 changes: 4 additions & 4 deletions sweagent/investigations/google_drive.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import io
import os
from typing import Callable
from typing import Callable, Any

from google.oauth2.service_account import Credentials
from googleapiclient.discovery import build
Expand All @@ -12,7 +12,7 @@

from sweagent.investigations.constants import DRIVE_DEFAULT_USER_INDEX

google_drive_service: any = None
google_drive_service: Any = None


def get_google_drive_folder_href(id: str) -> str:
Expand All @@ -24,7 +24,7 @@ def get_absolute_path(relative_path: str) -> str:
return os.path.join(script_dir, relative_path)


def get_google_drive_service() -> any:
def get_google_drive_service() -> Any:
global google_drive_service
if google_drive_service:
return google_drive_service
Expand Down Expand Up @@ -125,7 +125,7 @@ def get_or_create_drive_folder(parent_folder_id: str, relative_path: list[str])


def download_drive_file(
dest_path: str, drive_file_id: str, drive_file_size: int = None
dest_path: str, drive_file_id: str, drive_file_size: int | None = None
):
skipped: bool = False
service = get_google_drive_service()
Expand Down
22 changes: 11 additions & 11 deletions sweagent/investigations/google_drive_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ def __init__(self):
self.downloaded_files = 0
self.skipped_files = 0
self.failed_files = 0
self.progress_bar = None
self.files_to_download = []

def download_folder_but_slow(
Expand All @@ -32,6 +31,9 @@ def download_folder_but_slow(

print("Calculating total files and size...")
drive_item_id = get_drive_file_id(drive_parent_folder_id, [folder_name])
if drive_item_id is None:
print(f"Could not find folder {folder_name} in parent folder with ID {drive_parent_folder_id}")
return False
with tqdm(
total=0, unit=" items", bar_format="{desc}{bar:10}", leave=False
) as pbar:
Expand All @@ -42,16 +44,14 @@ def download_folder_but_slow(
print(f"Total files: {self.total_files}")
print(f"Total size: {self.total_size / (1024*1024):.2f} MB")

self.progress_bar = tqdm(
with tqdm(
total=len(self.files_to_download), unit=" files", desc="Overall Progress"
)

self.download_files()
) as pbar:
self._download_files(pbar)

self.progress_bar.close()
return True

def calculate_total_size(self, item_id: str, is_folder: bool, pbar, dest_path: str):
def calculate_total_size(self, item_id: str, is_folder: bool, pbar: tqdm, dest_path: str):
service = get_google_drive_service()
if is_folder:
self._calculate_folder_size(item_id, pbar, dest_path)
Expand All @@ -69,7 +69,7 @@ def calculate_total_size(self, item_id: str, is_folder: bool, pbar, dest_path: s

pbar.update(1)

def _calculate_folder_size(self, folder_id: str, pbar, dest_path: str):
def _calculate_folder_size(self, folder_id: str, pbar: tqdm, dest_path: str):
service = get_google_drive_service()
query = f"'{folder_id}' in parents"
results = (
Expand All @@ -93,7 +93,7 @@ def _calculate_folder_size(self, folder_id: str, pbar, dest_path: str):

pbar.update(1)

def download_files(self):
def _download_files(self, pbar: tqdm):
service = get_google_drive_service()
for file_id, dest_path, file_size in self.files_to_download:
try:
Expand All @@ -107,12 +107,12 @@ def download_files(self):
self.downloaded_files += 1
self.processed_files += 1
self.processed_size += file_size
self.progress_bar.update(1)
pbar.update(1)
except HttpError as error:
print(f"Error downloading file {dest_path}: {str(error)}")
self.failed_files += 1
self.processed_files += 1
self.progress_bar.update(1)
pbar.update(1)

print(f"Downloaded: {self.downloaded_files}, Skipped: {self.skipped_files}, Failed: {self.failed_files}")

Expand Down
13 changes: 11 additions & 2 deletions sweagent/investigations/instance_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import hashlib
import sys
from typing import Union

import pandas as pd

Expand All @@ -17,8 +18,12 @@ def get_swe_bench_instance_markdown(instance_id: str):
df = get_swe_bench_data()

# Select the specific row
specific_row = df[df['instance_id'] == instance_id]
specific_row: Union[pd.DataFrame, pd.Series] = df[df['instance_id'] == instance_id]

if isinstance(specific_row, pd.Series):
# Convert to DataFrame if it's a Series
specific_row = specific_row.to_frame().T

if specific_row.empty:
return "No data found for the given instance_id."

Expand All @@ -45,7 +50,11 @@ def generate_cached_image_id(instance_id: str, environment_setup: str = "no_setu
df = get_swe_bench_data()

# Find the row with the matching instance_id
row = df[df["instance_id"] == instance_id]
row: Union[pd.DataFrame, pd.Series] = df[df["instance_id"] == instance_id]

if isinstance(row, pd.Series):
# Convert to DataFrame if it's a Series
row = row.to_frame().T

if row.empty:
msg = f"No data found for instance_id: {instance_id}"
Expand Down
4 changes: 2 additions & 2 deletions sweagent/investigations/local_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def recursive_move(src, dst):

elif src_path.is_dir():
for item in src_path.iterdir():
recursive_move(item, dst_path / item.relative_to(src_path), missing_files)
recursive_move(item, dst_path / item.relative_to(src_path))
src_path.rmdir()
# print(f"Removed empty directory: {src_path}")

Expand Down Expand Up @@ -216,7 +216,7 @@ def disentangle_prediction_run_logs(self) -> None:
disentangled_files = self.disentangle_raw_run_log_file(run_log_path)
all_disentangled_files.extend(disentangled_files)

print(f"Disentangled {len(disentangled_files)} instance log files.")
print(f"Disentangled {len(all_disentangled_files)} instance log files.")


LocalPathsT = TypeVar("LocalPathsT", bound="LocalPaths")
Expand Down
2 changes: 1 addition & 1 deletion sweagent/investigations/lock_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def __init__(self, label: str, path: str, content: str = "") -> None:
def had_lock(self) -> bool:
return self.locked

def _lock_file_path(self, label: str = None) -> str:
def _lock_file_path(self, label: str | None = None) -> str:
parent_path = os.path.dirname(self.path)
target_name = os.path.basename(self.path)
return f"{parent_path}/{label or self.label}.{target_name}.lock"
Expand Down
14 changes: 13 additions & 1 deletion sweagent/investigations/run_logs_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,35 @@ def __init__(self, run_name: str) -> None:

def download_instance_prediction_run_log(self, instance_id: str):
run_logs_folder_id = get_drive_file_id(DRIVE_REPRO_DATA_ROOT_FOLDER_ID, [self.run_name, "trajectories", SANITIZED_RUN_LOGS_FOLDER_NAME])
if run_logs_folder_id is None:
# should we raise an error here?
return []
instance_file_name = get_instance_run_log_name(instance_id)
return drive_download_files(run_logs_folder_id, f"name='{instance_file_name}'", self.get_prediction_run_log_path)

def download_instance_prediction_trajectory_json(self, instance_id: str):
folder_id = get_drive_file_id(DRIVE_REPRO_DATA_ROOT_FOLDER_ID, [self.run_name, "trajectories"])
if folder_id is None:
# should we raise an error here?
return []
instance_file_name = f"{instance_id}.traj"
return drive_download_files(folder_id, f"name='{instance_file_name}'", self.get_prediction_trajectories_path)

def get_instance_eval_folder_href(self, instance_id: str):
folder_id = get_drive_file_id(DRIVE_REPRO_DATA_ROOT_FOLDER_ID, [self.run_name, "evaluation_logs", instance_id])
if folder_id is None:
# should we raise an error here?
return []
return get_google_drive_folder_href(folder_id)

def download_eval_instance_patch(self, instance_id: str):
folder_id = get_drive_file_id(DRIVE_REPRO_DATA_ROOT_FOLDER_ID, [self.run_name, "evaluation_logs", instance_id])
if folder_id is None:
# should we raise an error here?
return []
file_name = "patch.diff"
def local_path_fn(_fname: str) -> str:
return self.get_local_run_path(f"{instance_id}-patch.diff")
return self.get_run_path(f"{instance_id}-patch.diff")
return drive_download_files(folder_id, f"name='{file_name}'", local_path_fn)

def download_entire_run(self):
Expand Down
2 changes: 1 addition & 1 deletion sweagent/investigations/summarize_instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def make_relative_path(fpath: str):
return os.path.relpath(fpath, get_investigation_data_folder())

def summarize_instance(instance_id: str):
nlnl = "\n\n"
run_data = []
for run_name in RUN_NAMES:
syncer = RunLogsSync(run_name)
Expand All @@ -56,7 +57,6 @@ def summarize_instance(instance_id: str):
# eval_folder_href = syncer.get_instance_eval_folder_href(instance_id)
eval_test_meta_log = syncer.get_eval_meta_log(instance_id)
eval_test_output = syncer.get_eval_test_output_log(instance_id)
nlnl = "\n\n"
run_data.append(f"""
### {run_name}
Expand Down
3 changes: 2 additions & 1 deletion tdd/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import os
from pathlib import Path
from typing import Any

from instances import instance_id_list

Expand All @@ -28,7 +29,7 @@
def trajectory_dir():
return f"trajectories/{os.getlogin()}/{model_name}__{Path(dataset_name).stem}__{config}__t-{temperature:.2f}__p-{top_p:.2f}__c-{per_instance_cost_limit_usd:.2f}__{install_env}-1"

def run_python_cmd(cmd: str, args: dict[str, str]):
def run_python_cmd(cmd: str, args: dict[str, Any]):
# convert args to a string
args_str = " ".join([f"--{k} {v}" for k, v in args.items()])
# execute the command
Expand Down

0 comments on commit f8c3160

Please sign in to comment.