diff --git a/config/yoeo_light_decoder_deconv.cfg b/config/yoeo_light_decoder_deconv.cfg new file mode 100644 index 0000000..f0b551e --- /dev/null +++ b/config/yoeo_light_decoder_deconv.cfg @@ -0,0 +1,312 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=4 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.0001 +burn_in=100 +max_batches = 4000 +policy=steps +steps=50000,60000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=8 +activation=linear + +[yolo] +mask = 0 +anchors = 100, 100 +classes=3 +num=1 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 24 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=8 +activation=linear + +[yolo] +mask = 0 +anchors = 100, 100 +classes=3 +num=1 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = 18 + +[deconvolutional] +batch_normalize=1 +filters=128 +size=2 +stride=2 +pad=0 +activation=leaky + +[route] +layers = -1, 10 + +[deconvolutional] +batch_normalize=1 +filters=64 +size=2 +stride=2 +pad=0 +activation=leaky + +[route] +layers = -1, 2 + +[deconvolutional] +batch_normalize=1 +filters=32 +size=2 +stride=2 +pad=0 +activation=leaky + +[route] +layers = -1, 0 + +[deconvolutional] +batch_normalize=1 +filters=16 +size=2 +stride=2 +pad=0 +activation=linear + +[seg] +classes=3 diff --git a/config/yoeo_medium_decoder_deconv.cfg b/config/yoeo_medium_decoder_deconv.cfg new file mode 100644 index 0000000..45df2c2 --- /dev/null +++ b/config/yoeo_medium_decoder_deconv.cfg @@ -0,0 +1,356 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=4 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.0001 +burn_in=100 +max_batches = 4000 +policy=steps +steps=50000,60000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=8 +activation=linear + +[yolo] +mask = 0 +anchors = 100, 100 +classes=3 +num=1 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 24 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=8 +activation=linear + +[yolo] +mask = 0 +anchors = 100, 100 +classes=3 +num=1 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = 18 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-2 + +[deconvolutional] +batch_normalize=1 +filters=128 +size=2 +stride=2 +pad=0 +activation=leaky + +[route] +layers = -1, 10 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-2 + +[deconvolutional] +batch_normalize=1 +filters=64 +size=2 +stride=2 +pad=0 +activation=leaky + +[route] +layers = -1, 2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-2 + +[deconvolutional] +batch_normalize=1 +filters=32 +size=2 +stride=2 +pad=0 +activation=leaky + +[route] +layers = -1, 0 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-2 + +[deconvolutional] +batch_normalize=1 +filters=3 +size=2 +stride=2 +pad=0 +activation=linear + +[seg] +classes=3 diff --git a/config/yoeo_medium_decoder_deconv_context.cfg b/config/yoeo_medium_decoder_deconv_context.cfg new file mode 100644 index 0000000..cd1803d --- /dev/null +++ b/config/yoeo_medium_decoder_deconv_context.cfg @@ -0,0 +1,381 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=4 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.0001 +burn_in=100 +max_batches = 4000 +policy=steps +steps=50000,60000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=8 +activation=linear + +[yolo] +mask = 0 +anchors = 100, 100 +classes=3 +num=1 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 24 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=8 +activation=linear + +[yolo] +mask = 0 +anchors = 100, 100 +classes=3 +num=1 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = 18 +groups=2 +group_id=1 + +[maxpool] +size=13 +stride=13 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=13 + +[route] +layers = 18 +groups=2 +group_id=0 + +[route] +layers = -1, -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-2 + +[deconvolutional] +batch_normalize=1 +filters=128 +size=2 +stride=2 +pad=0 +activation=leaky + +[route] +layers = -1, 10 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-2 + +[deconvolutional] +batch_normalize=1 +filters=64 +size=2 +stride=2 +pad=0 +activation=leaky + +[route] +layers = -1, 2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-2 + +[deconvolutional] +batch_normalize=1 +filters=32 +size=2 +stride=2 +pad=0 +activation=leaky + +[route] +layers = -1, 0 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-2 + +[deconvolutional] +batch_normalize=1 +filters=3 +size=2 +stride=2 +pad=0 +activation=linear + +[seg] +classes=3 diff --git a/yoeo/models.py b/yoeo/models.py index 1b69cd8..cd964c0 100644 --- a/yoeo/models.py +++ b/yoeo/models.py @@ -60,8 +60,40 @@ def create_modules(module_defs): nn.BatchNorm2d(filters, momentum=0.1, eps=1e-5)) if module_def["activation"] == "leaky": modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1)) - if module_def["activation"] == "mish": + elif module_def["activation"] == "mish": modules.add_module(f"mish_{module_i}", Mish()) + elif module_def["activation"] == "linear": + pass + else: + raise ValueError(f"Unknown activation: {module_def['activation']}") + + elif module_def["type"] == "deconvolutional": + bn = int(module_def["batch_normalize"]) + filters = int(module_def["filters"]) + kernel_size = int(module_def["size"]) + pad = int(module_def["pad"]) + modules.add_module( + f"deconv_{module_i}", + nn.ConvTranspose2d( + in_channels=output_filters[-1], + out_channels=filters, + kernel_size=kernel_size, + stride=int(module_def["stride"]), + padding=pad, + bias=not bn, + ), + ) + if bn: + modules.add_module(f"batch_norm_{module_i}", + nn.BatchNorm2d(filters, momentum=0.1, eps=1e-5)) + if module_def["activation"] == "leaky": + modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1)) + elif module_def["activation"] == "mish": + modules.add_module(f"mish_{module_i}", Mish()) + elif module_def["activation"] == "linear": + pass + else: + raise ValueError(f"Unknown activation: {module_def['activation']}") elif module_def["type"] == "maxpool": kernel_size = int(module_def["size"]) @@ -197,10 +229,9 @@ def __init__(self, config_path): def forward(self, x, bb_targets=None, mask_targets=None): img_size = x.size(2) - loss = 0 layer_outputs, yolo_outputs, segmentation_outputs = [], [], [] for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)): - if module_def["type"] in ["convolutional", "upsample", "maxpool"]: + if module_def["type"] in ["convolutional", "deconvolutional", "upsample", "maxpool"]: x = module(x) elif module_def["type"] == "route": combined_outputs = torch.cat([layer_outputs[int(layer_i)] for layer_i in module_def["layers"].split(",")], 1)