Merge branch 'main' into feature/better_yoeo_versions_20245

bit-bots · Dec 18, 2024 · f7abd77 · f7abd77
2 parents dbe2372 + d0393c1
commit f7abd77
Show file tree

Hide file tree

Showing 13 changed files with 1,794 additions and 1,327 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -7,23 +7,21 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
         matrix:
-            os: [ubuntu-22.04, ubuntu-20.04, windows-latest]
+            os: [ubuntu-24.04, ubuntu-22.04, windows-latest]
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
+
+      - name: Install Poetry
+        run: pipx install poetry
 
       - name: Set up Python
-        uses: actions/setup-python@v1
+        uses: actions/setup-python@v4
         with:
-          python-version: 3.8
-
-      - name: Upgrade pip
-        run: python3 -m pip install --upgrade pip
-
-      - name: Install Poetry
-        run: pip3 install poetry --user
+          python-version: 3.x
+          cache: "poetry"
 
       - name: Install Dependencies
-        run: poetry install
+        run: poetry install --with dev
 
       # Prints the help pages of all scripts to see if the imports etc. work
       - name: Test the help pages

diff --git a/class_config/colored_robots.yaml b/class_config/colored_robots.yaml
@@ -0,0 +1,6 @@
+group_classes:
+  - robot_red
+  - robot_blue
+  - robot_unknown
+
+surrogate_class: robot
diff --git a/class_config/default.yaml b/class_config/default.yaml
@@ -0,0 +1,2 @@
+group_classes:
+surrogate_class: ""
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,25 +1,25 @@
 [tool.poetry]
 name = "YOEO"
-version = "1.4.3"
+version = "1.6.0"
 description = "A hybrid CNN for object detection and semantic segmentation"
 authors = ["Florian Vahl <[email protected]>", "Jan Gutsche <[email protected]>"]
 
 [tool.poetry.dependencies]
-python = ">=3.8,<4.0"
-numpy = "^1.21.1"
-torch = ">=1.10.1, <1.13.0"
-torchvision = ">=0.8.2"
+python = ">=3.10,<4.0"
 matplotlib = "^3.3.3"
-tensorboard = "^2.12.2"
 terminaltables = "^3.1.10"
-Pillow = "^9.1.0"
-tqdm = "^4.64.1"
 imgaug = "^0.4.0"
+numpy = "^1.26"
+torch = "^2.5.1"
+torchvision = "^0.20.1"
+pillow = "^11.0.0"
 torchsummary = "^1.5.1"
-PyYAML = "^6.0"
-opencv-python = "^4.5.2"
+tqdm = "^4.67.0"
+opencv-python = "^4.10.0.84"
+pyyaml = "^6.0.2"
+tensorboard = "^2.18.0"
 
-[tool.poetry.dev-dependencies]
+[tool.poetry.group.dev.dependencies]
 onnxruntime = "^1.14.0"
 profilehooks = "^1.12.0"
 onnx = "^1.9.0"

diff --git a/yoeo/detect.py b/yoeo/detect.py
@@ -12,12 +12,14 @@
 from torch.utils.data import DataLoader
 from torch.autograd import Variable
 
-from typing import Optional, List
+from typing import Optional
 
 from imgaug.augmentables.segmaps import SegmentationMapsOnImage
 
 from yoeo.models import load_model
-from yoeo.utils.utils import load_classes, rescale_boxes, non_max_suppression, print_environment_info, rescale_segmentation
+from yoeo.utils.class_config import ClassConfig
+from yoeo.utils.dataclasses import ClassNames, GroupConfig
+from yoeo.utils.utils import rescale_boxes, non_max_suppression, print_environment_info, rescale_segmentation
 from yoeo.utils.datasets import ImageFolder
 from yoeo.utils.transforms import Resize, DEFAULT_TRANSFORMS
 
@@ -26,9 +28,9 @@
 from matplotlib.ticker import NullLocator
 
 
-def detect_directory(model_path, weights_path, img_path, classes, output_path,
+def detect_directory(model_path, weights_path, img_path, class_config: ClassConfig, output_path,
                      batch_size=8, img_size=416, n_cpu=8, conf_thres=0.5, nms_thres=0.5,
-                     robot_class_ids: Optional[List[int]] = None):
+                     ):
     """Detects objects on all images in specified directory and saves output images with drawn detections.
 
     :param model_path: Path to model definition file (.cfg)
@@ -37,8 +39,8 @@ def detect_directory(model_path, weights_path, img_path, classes, output_path,
     :type weights_path: str
     :param img_path: Path to directory with images to inference
     :type img_path: str
-    :param classes: List of class names
-    :type classes: [str]
+    :param class_config: Class configuration
+    :type class_config: ClassConfig
     :param output_path: Path to output directory
     :type output_path: str
     :param batch_size: Size of each image batch, defaults to 8
@@ -51,8 +53,6 @@ def detect_directory(model_path, weights_path, img_path, classes, output_path,
     :type conf_thres: float, optional
     :param nms_thres: IOU threshold for non-maximum suppression, defaults to 0.5
     :type nms_thres: float, optional
-    :param robot_class_ids: List of class IDs of robot classes if multiple robot classes exist.
-    :type robot_class_ids: List[int], optional
     """
     dataloader = _create_data_loader(img_path, batch_size, img_size, n_cpu)
     model = load_model(model_path, weights_path)
@@ -63,29 +63,36 @@ def detect_directory(model_path, weights_path, img_path, classes, output_path,
         output_path,
         conf_thres,
         nms_thres,
-        robot_class_ids=robot_class_ids
+        class_config.get_group_config()
     )
     _draw_and_save_output_images(
-        img_detections, segmentations, imgs, img_size, output_path, classes)
+        img_detections, segmentations, imgs, img_size, output_path, class_config.get_ungrouped_det_class_names())
 
     print(f"---- Detections were saved to: '{output_path}' ----")
 
 
-def detect_image(model, image, img_size=416, conf_thres=0.5, nms_thres=0.5, robot_class_ids: Optional[List[int]] = None):
+def detect_image(model, 
+                 image: np.ndarray,  
+                 img_size: int = 416, 
+                 conf_thres: float = 0.5, 
+                 nms_thres: float = 0.5,
+                 group_config: Optional[GroupConfig] = None
+                 ):
     """Inferences one image with model.
 
     :param model: Model for inference
     :type model: models.Darknet
     :param image: Image to inference
-    :type image: nd.array
+    :type image: np.ndarray
     :param img_size: Size of each image dimension for yolo, defaults to 416
-    :type img_size: int, optional
+    :type img_size: int
     :param conf_thres: Object confidence threshold, defaults to 0.5
-    :type conf_thres: float, optional
+    :type conf_thres: float
     :param nms_thres: IOU threshold for non-maximum suppression, defaults to 0.5
-    :type nms_thres: float, optional
-    :param robot_class_ids: List of class IDs of robot classes if multiple robot classes exist.
-    :type robot_class_ids: List[int], optional
+    :type nms_thres: float
+    :param group_config: GroupConfiguration for this model (optional, defaults to None)
+    :type group_config: Optional[GroupConfig]
+
     :return: Detections on image with each detection in the format: [x1, y1, x2, y2, confidence, class], Segmentation as 2d numpy array with the coresponding class id in each cell
     :rtype: nd.array, nd.array
     """
@@ -105,13 +112,24 @@ def detect_image(model, image, img_size=416, conf_thres=0.5, nms_thres=0.5, robo
     # Get detections
     with torch.no_grad():
         detections, segmentations = model(input_img)
-        detections = non_max_suppression(detections, conf_thres, nms_thres, robot_class_ids=robot_class_ids)
+        detections = non_max_suppression(
+            prediction=detections, 
+            conf_thres=conf_thres, 
+            iou_thres=nms_thres, 
+            group_config=group_config
+        )
         detections = rescale_boxes(detections[0], img_size, image.shape[0:2])
         segmentations = rescale_segmentation(segmentations, image.shape[0:2])
     return detections.numpy(), segmentations.cpu().detach().numpy()
 
 
-def detect(model, dataloader, output_path, conf_thres, nms_thres, robot_class_ids: Optional[List[int]] = None):
+def detect(model,
+           dataloader: DataLoader, 
+           output_path: str, 
+           conf_thres: float = 0.5, 
+           nms_thres: float = 0.5,
+           group_config: Optional[GroupConfig] = None
+            ):
     """Inferences images with model.
 
     :param model: Model for inference
@@ -121,11 +139,12 @@ def detect(model, dataloader, output_path, conf_thres, nms_thres, robot_class_id
     :param output_path: Path to output directory
     :type output_path: str
     :param conf_thres: Object confidence threshold, defaults to 0.5
-    :type conf_thres: float, optional
+    :type conf_thres: float
     :param nms_thres: IOU threshold for non-maximum suppression, defaults to 0.5
-    :type nms_thres: float, optional
-    :param robot_class_ids: List of class IDs of robot classes if multiple robot classes exist.
-    :type robot_class_ids: List[int], optional
+    :type nms_thres: float
+    :param group_config: GroupConfig for this model (optional, defaults to None)
+    :type group_config: Optional[GroupConfig]
+
     :return: List of detections. The coordinates are given for the padded image that is provided by the dataloader.
         Use `utils.rescale_boxes` to transform them into the desired input image coordinate system before its transformed by the dataloader),
         List of input image paths
@@ -149,7 +168,12 @@ def detect(model, dataloader, output_path, conf_thres, nms_thres, robot_class_id
         # Get detections
         with torch.no_grad():
             detections, segmentations = model(input_imgs)
-            detections = non_max_suppression(detections, conf_thres, nms_thres, robot_class_ids=robot_class_ids)
+            detections = non_max_suppression(
+                prediction=detections, 
+                conf_thres=conf_thres, 
+                iou_thres=nms_thres, 
+                group_config=group_config
+            )
 
         # Store image and detections
         img_detections.extend(detections)
@@ -310,33 +334,24 @@ def run():
     parser.add_argument("--n_cpu", type=int, default=8, help="Number of cpu threads to use during batch generation")
     parser.add_argument("--conf_thres", type=float, default=0.5, help="Object confidence threshold")
     parser.add_argument("--nms_thres", type=float, default=0.4, help="IOU threshold for non-maximum suppression")
-    parser.add_argument("--multiple_robot_classes", action="store_true",
-                        help="If multiple robot classes exist and nms shall be performed across all robot classes")
+    parser.add_argument("--class_config", type=str, default="class_config/default.yaml", help="Class configuration for evaluation")
     args = parser.parse_args()
     print(f"Command line arguments: {args}")
 
-    # Extract class names from file
-    classes = load_classes(args.classes)['detection']  # List of class names
-
-    robot_class_ids = None
-    if args.multiple_robot_classes:
-        robot_class_ids = []
-        for idx, c in enumerate(classes):
-            if "robot" in c:
-                robot_class_ids.append(idx)
+    class_names = ClassNames.load_from(args.classes)
+    class_config = ClassConfig.load_from(args.class_config, class_names)
 
     detect_directory(
         args.model,
         args.weights,
         args.images,
-        classes,
+        class_config,
         args.output,
         batch_size=args.batch_size,
         img_size=args.img_size,
         n_cpu=args.n_cpu,
         conf_thres=args.conf_thres,
         nms_thres=args.nms_thres,
-        robot_class_ids=robot_class_ids
     )
 
 

diff --git a/yoeo/scripts/createYOEOLabelsFromTORSO-21.py b/yoeo/scripts/createYOEOLabelsFromTORSO-21.py
@@ -8,14 +8,6 @@
 from tqdm import tqdm
 
 
-# Available classes for YOEO
-CLASSES = {
-    'bb_classes': ['ball', 'goalpost', 'robot'],
-    'segmentation_classes': ['background', 'lines', 'field'],
-    'skip_classes': ['obstacle', 'L-Intersection', 'X-Intersection', 'T-Intersection']
-    }
-
-
 def range_limited_float_type_0_to_1(arg):
     """Type function for argparse - a float within some predefined bounds
     Derived from 'https://stackoverflow.com/questions/55324449/how-to-specify-a-minimum-or-maximum-float-value-with-argparse/55410582#55410582'.
@@ -37,8 +29,16 @@ def range_limited_float_type_0_to_1(arg):
 parser.add_argument("--skip-blurred", action="store_true", help="Skip blurred labels")
 parser.add_argument("--skip-concealed", action="store_true", help="Skip concealed labels")
 parser.add_argument("--skip-classes", nargs="+", default=[], help="These bounding box classes will be skipped")
+parser.add_argument("--robots-with-team-colors", action="store_true", help="The robot class will be subdivided into subclasses, one for each team color (currently either 'blue', 'red' or 'unknown').")
 args = parser.parse_args()
 
+# Available classes for YOEO
+CLASSES = {
+    'bb_classes': ['ball', 'goalpost', 'robot'] if not args.robots_with_team_colors else ['ball', 'goalpost', 'robot_blue', 'robot_red', 'robot_unknown'],
+    'segmentation_classes': ['background', 'lines', 'field'],
+    'skip_classes': ['obstacle', 'L-Intersection', 'X-Intersection', 'T-Intersection'],
+    }
+
 # Remove skipped classes from CLASSES list
 for skip_class in args.skip_classes:
     if skip_class in CLASSES['bb_classes']:
@@ -122,33 +122,42 @@ def range_limited_float_type_0_to_1(arg):
         annotations = []
 
         for annotation in image_data['annotations']:
+            # Skip annotations that are not in the image
+            if not annotation['in_image']:
+                continue
+
+            # Derive the class name of the current annotation
+            class_name = annotation['type']
+            if args.robots_with_team_colors and class_name == 'robot':
+                class_name += f"_{annotation['color']}"
+
             # Skip annotations, if is not a bounding box or should be skipped or is blurred or concealed and user chooses to skip them
-            if (annotation['type'] in CLASSES['segmentation_classes'] or  # Handled by segmentations
-                annotation['type'] in CLASSES['skip_classes'] or  # Skip this annotation class
+            if (class_name in CLASSES['segmentation_classes'] or  # Handled by segmentations
+                class_name in CLASSES['skip_classes'] or  # Skip this annotation class
                 (args.skip_blurred and annotation.get('blurred', False)) or
                 (args.skip_concealed and annotation.get('concealed', False))):
                 continue
-            elif annotation['type'] in CLASSES['bb_classes']:  # Handle bounding boxes
-                if annotation['in_image']:  # If annotation is not in image, do nothing
-                    min_x = min(map(lambda x: x[0], annotation['vector']))
-                    max_x = max(map(lambda x: x[0], annotation['vector']))
-                    min_y = min(map(lambda x: x[1], annotation['vector']))
-                    max_y = max(map(lambda x: x[1], annotation['vector']))
-
-                    annotation_width = max_x - min_x
-                    annotation_height = max_y - min_y
-                    relative_annotation_width = annotation_width / img_width
-                    relative_annotation_height = annotation_height / img_height
-
-                    center_x = min_x + (annotation_width / 2)
-                    center_y = min_y + (annotation_height / 2)
-                    relative_center_x = center_x / img_width
-                    relative_center_y = center_y / img_height
-
-                    classID = CLASSES['bb_classes'].index(annotation['type'])  # Derive classID from index in predefined classes
-                    annotations.append(f"{classID} {relative_center_x} {relative_center_y} {relative_annotation_width} {relative_annotation_height}")
+            elif class_name in CLASSES['bb_classes']:  # Handle bounding boxes
+                min_x = min(map(lambda x: x[0], annotation['vector']))
+                max_x = max(map(lambda x: x[0], annotation['vector']))
+                min_y = min(map(lambda x: x[1], annotation['vector']))
+                max_y = max(map(lambda x: x[1], annotation['vector']))
+
+                annotation_width = max_x - min_x
+                annotation_height = max_y - min_y
+                relative_annotation_width = annotation_width / img_width
+                relative_annotation_height = annotation_height / img_height
+
+                center_x = min_x + (annotation_width / 2)
+                center_y = min_y + (annotation_height / 2)
+                relative_center_x = center_x / img_width
+                relative_center_y = center_y / img_height
+
+                # Derive classID from index in predefined classes
+                classID = CLASSES['bb_classes'].index(class_name)                
+                annotations.append(f"{classID} {relative_center_x} {relative_center_y} {relative_annotation_width} {relative_annotation_height}")
             else:
-                print(f"The annotation type '{annotation['type']}' is not supported. Image: '{img_name_with_extension}'")
+                print(f"The annotation type '{class_name}' is not supported. Image: '{img_name_with_extension}'")
 
         # Store bounding box annotations in .txt file
         with open(os.path.join(labels_dir, img_name_without_extension + ".txt"), "w") as output: