Skip to content

Commit

Permalink
tweak: uniresizer: adding report interval arg to make logs simpler
Browse files Browse the repository at this point in the history
  • Loading branch information
trojblue committed Mar 5, 2024
1 parent 7c533f6 commit 282407d
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 7 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "unibox"
version = "0.3.15"
version = "0.3.16"
description = "Unibox is a tool that aims to provide a unified interface for various common daily operations"
authors = ["yada <[email protected]>"]
license = "MIT"
Expand Down
41 changes: 35 additions & 6 deletions unibox/utils/uni_resizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from PIL import Image
from pathlib import Path
from tqdm.auto import tqdm
import time

import unibox
from unibox import UniLoader, UniLogger
Expand Down Expand Up @@ -174,13 +175,38 @@ def _resize_single_image_task(self, og_rel_image_path: str) -> None:
self.logger.error(f"Error saving image {dst_file_path}. Skipping...")

@staticmethod
def _execute_resize_tasks(tasks: List[Tuple], max_workers: int) -> None:
def _execute_resize_tasks(tasks: List[Tuple], max_workers: int, report_interval:int=5) -> None:
"""
Execute tasks using ProcessPoolExecutor
Execute tasks using ProcessPoolExecutor, with tqdm progress updates every 5 seconds.
"""
with ProcessPoolExecutor(max_workers=max_workers) as executor:
futures = [executor.submit(task, *args) for task, *args in tasks]
list(tqdm(as_completed(futures), total=len(tasks), desc="Resizing images"))
# Submit all tasks and create a mapping from future to its index (or any other identifier)
future_to_task = {executor.submit(task, *args): i for i, (task, *args) in enumerate(tasks)}

# Initialize tqdm progress bar
pbar = tqdm(total=len(tasks), desc=f"[interval={report_interval}] Resizing images")
last_update_time = time.time()

# Collect futures as they complete
for future in as_completed(future_to_task):
# Calculate elapsed time since last update
current_time = time.time()
elapsed_time = current_time - last_update_time

# Update the progress bar if more than 5 seconds have passed since last update
if elapsed_time >= report_interval:
# Calculate the number of completed tasks since the last update
# Update progress bar by the number of newly completed tasks
completed_tasks = sum(1 for f in future_to_task if f.done())
pbar.update(completed_tasks - pbar.n)

# Reset the last update time
last_update_time = current_time

# Ensure the progress bar is complete
pbar.n = len(tasks)
pbar.refresh()
pbar.close()

def get_resize_jobs(self) -> List[str]:
"""
Expand Down Expand Up @@ -209,7 +235,7 @@ def get_resize_jobs(self) -> List[str]:

return todo_image_files

def execute_resize_jobs(self, image_files: List[str]) -> None:
def execute_resize_jobs(self, image_files: List[str], simplified_print=True) -> None:
"""
Execute resizing tasks for the given list of image files.
Expand All @@ -219,7 +245,10 @@ def execute_resize_jobs(self, image_files: List[str]) -> None:
tasks = [(self._resize_single_image_task, og_rel_image_path) for og_rel_image_path in image_files]

self.logger.info(f"Resizing {len(tasks)} images...")
self._execute_resize_tasks(tasks, self.max_workers)
if simplified_print:
self._execute_resize_tasks(tasks, self.max_workers, report_interval=5)
else:
self._execute_resize_tasks(tasks, self.max_workers, report_interval=1)


if __name__ == '__main__':
Expand Down

0 comments on commit 282407d

Please sign in to comment.