diff --git a/.gitignore b/.gitignore index 184a610..8f073c2 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,7 @@ checkpoints/ logs/ .python-version + +*.onnx +*.pth +**/tuning_records.json diff --git a/README.md b/README.md index b3ee34e..1b6765e 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,14 @@ You can adjust the log directory using `--logdir ` when running `tensorboa #### Classes Add class names to `data/custom/yoeo_names.yaml`. +Run the following command to adapt the model file (cfg) to the new number of classes: + +```bash +poetry run yoeo-custiomize-cfg -c config/yoeo.cfg -d config/custom.data -o config/yoeo-custom.cfg +``` + +This changes the layers of the model to fit the number of classes in your dataset. + #### Image Folder Move the images of your dataset to `data/custom/images/`. diff --git a/config/create_custom_model.sh b/config/create_custom_model.sh deleted file mode 100755 index eba2ebe..0000000 --- a/config/create_custom_model.sh +++ /dev/null @@ -1,794 +0,0 @@ -#!/bin/bash - -NUM_CLASSES=$1 - -echo " -[net] -# Testing -#batch=1 -#subdivisions=1 -# Training -batch=16 -subdivisions=1 -width=416 -height=416 -channels=3 -momentum=0.9 -decay=0.0005 -angle=0 -saturation = 1.5 -exposure = 1.5 -hue=.1 - -learning_rate=0.001 -burn_in=1000 -max_batches = 500200 -policy=steps -steps=400000,450000 -scales=.1,.1 - -[convolutional] -batch_normalize=1 -filters=32 -size=3 -stride=1 -pad=1 -activation=leaky - -# Downsample - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=2 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=32 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -# Downsample - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=2 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -# Downsample - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=2 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -# Downsample - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=2 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -# Downsample - -[convolutional] -batch_normalize=1 -filters=1024 -size=3 -stride=2 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=1024 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=1024 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=1024 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=1024 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -###################### - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=1024 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=1024 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=1024 -activation=leaky - -[convolutional] -size=1 -stride=1 -pad=1 -filters=$(expr 3 \* $(expr $NUM_CLASSES \+ 5)) -activation=linear - - -[yolo] -mask = 6,7,8 -anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 -classes=$NUM_CLASSES -num=9 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 - - -[route] -layers = -4 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[upsample] -stride=2 - -[route] -layers = -1, 61 - - - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=leaky - -[convolutional] -size=1 -stride=1 -pad=1 -filters=$(expr 3 \* $(expr $NUM_CLASSES \+ 5)) -activation=linear - - -[yolo] -mask = 3,4,5 -anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 -classes=$NUM_CLASSES -num=9 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 - - - -[route] -layers = -4 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[upsample] -stride=2 - -[route] -layers = -1, 36 - - - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=leaky - -[convolutional] -size=1 -stride=1 -pad=1 -filters=$(expr 3 \* $(expr $NUM_CLASSES \+ 5)) -activation=linear - - -[yolo] -mask = 0,1,2 -anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 -classes=$NUM_CLASSES -num=9 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -" >> yolov3-custom.cfg diff --git a/pyproject.toml b/pyproject.toml index 1cd1093..ad15414 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "YOEO" -version = "1.6.0" +version = "1.6.1" description = "A hybrid CNN for object detection and semantic segmentation" authors = ["Florian Vahl ", "Jan Gutsche "] @@ -36,3 +36,4 @@ yoeo-test = "yoeo.test:run" yoeo-to-onnx = "yoeo.scripts.convertPyTorchModelToONNX:run" yoeo-onnx-to-openvino = "yoeo.scripts.convertONNXModelToOpenVinoIR:run" yoeo-onnx-to-tvm = "yoeo.scripts.convertONNXModelToTVM:run" +yoeo-custiomize-cfg = "yoeo.scripts.customizeCfg:run" diff --git a/yoeo/models.py b/yoeo/models.py index 1b69cd8..784627c 100644 --- a/yoeo/models.py +++ b/yoeo/models.py @@ -335,7 +335,7 @@ def load_model(model_path, weights_path=None): if weights_path: if weights_path.endswith(".pth"): # Load checkpoint weights - model.load_state_dict(torch.load(weights_path, map_location=device)) + model.load_state_dict(torch.load(weights_path, map_location=device, weights_only=True)) else: # Load darknet weights model.load_darknet_weights(weights_path) diff --git a/yoeo/scripts/customizeCfg.py b/yoeo/scripts/customizeCfg.py new file mode 100644 index 0000000..9aa3584 --- /dev/null +++ b/yoeo/scripts/customizeCfg.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 + +""" +This file takes a given model architecture configuration file (cfg / toml) +and adapts it to the classes of a given dataset defined in a yaml file. +""" + +import argparse +import yaml +from yoeo.utils.parse_config import ( + parse_model_config, + write_model_config, + parse_data_config, +) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Customize a model architecture configuration file to a dataset" + ) + parser.add_argument( + "--cfg", + "-c", + type=str, + default="config/yoeo.cfg", + help="Path to the model architecture configuration file", + ) + parser.add_argument( + "--dataset", + "-d", + type=str, + default="config/custom.data", + help="Path to the dataset configuration file", + ) + parser.add_argument( + "--output", + "-o", + type=str, + default="config/yoeo-custom.cfg", + help="Path to the output model architecture configuration file", + ) + return parser.parse_args() + + +def run(): + args = parse_args() + + # Load the dataset configuration + dataset_config = parse_data_config(args.dataset) + + # Load the class names from the dataset configuration + with open(dataset_config["names"], "r") as f: + class_names = yaml.safe_load(f) + + # Validate the dataset configuration + assert ( + "detection" in class_names + ), "Dataset configuration file must contain a 'detection' key listing all the object classes" + assert isinstance( + class_names["detection"], list + ), "The 'detection' key in the dataset configuration file must be a list" + assert ( + len(class_names["detection"]) > 0 + ), "The 'detection' key in the dataset configuration file must contain at least one class" + assert all( + isinstance(c, str) for c in class_names["detection"] + ), "All classes in the 'detection' key must be strings" + assert ( + "segmentation" in class_names + ), "Dataset configuration file must contain a 'segmentation' key listing all the segmentation classes" + assert isinstance( + class_names["segmentation"], list + ), "The 'segmentation' key in the dataset configuration file must be a list" + assert ( + len(class_names["segmentation"]) > 0 + ), "The 'segmentation' key in the dataset configuration file must contain at least one class" + assert all( + isinstance(c, str) for c in class_names["segmentation"] + ), "All classes in the 'segmentation' key must be strings" + + number_of_object_detection_classes = len(class_names["detection"]) + number_of_segmentation_classes = len(class_names["segmentation"]) + + print( + f"Found {number_of_object_detection_classes} object detection classes and {number_of_segmentation_classes} segmentation classes" + ) + + # Load the model configuration + model_architecture = parse_model_config(args.cfg) + + # Search for all yolo layers in the model configuration and + # adapt the number of classes as well as + # the number of filters in the preceding convolutional layer + for i, layer in enumerate(model_architecture): + if layer["type"] == "yolo": + # Adapt the number of classes + layer["classes"] = number_of_object_detection_classes + # Adapt the number of filters in the preceding convolutional layer + assert ( + i > 0 + ), "Yolo layer can not be the first layer in the model architecture" + prev_layer = model_architecture[i - 1] + assert prev_layer.get("filters") is not None, ( + "Yolo layer must be preceded by a convolutional layer for this script to work, " + "if you do more complex stuff, you have to adapt the configuration manually" + ) + prev_layer["filters"] = (number_of_object_detection_classes + 5) * len( + layer["mask"].split(",") + ) + if layer["type"] == "seg": + # Adapt the number of classes + layer["classes"] = number_of_segmentation_classes + # Adapt the number of filters in the preceding convolutional layer + assert ( + i > 0 + ), "Seg layer can not be the first layer in the model architecture" + prev_layer = model_architecture[i - 1] + assert prev_layer.get("filters") is not None, ( + "Seg layer must be preceded by a convolutional layer for this script to work, " + "if you do more complex stuff, you have to adapt the configuration manually" + ) + prev_layer["filters"] = number_of_segmentation_classes + + # Write the adapted model configuration to the output file + write_model_config(model_architecture, args.output) + + print(f"Model architecture adapted and saved to {args.output}") + + +if __name__ == "__main__": + run() diff --git a/yoeo/utils/parse_config.py b/yoeo/utils/parse_config.py index 4c9fa7f..df307a6 100644 --- a/yoeo/utils/parse_config.py +++ b/yoeo/utils/parse_config.py @@ -1,6 +1,7 @@ +from typing import Any -def parse_model_config(path): +def parse_model_config(path: str) -> list[dict[str, str]]: """Parses the yolo-v3 layer configuration file and returns module definitions""" file = open(path, 'r') lines = file.read().split('\n') @@ -21,6 +22,17 @@ def parse_model_config(path): return module_defs +def write_model_config(module_defs: list[dict[str, Any]], path: str): + """Writes module definitions to the file""" + with open(path, 'w') as f: + for module_def in module_defs: + f.write(f"[{module_def['type']}]\n") + for key, value in module_def.items(): + if key != 'type' and not (key == 'batch_normalize' and int(value) == 0): + f.write(f"{key}={value}\n") + f.write("\n") + + def parse_data_config(path): """Parses the data configuration file""" options = dict()