diff --git a/Server.openvino.dockerfile b/Server.openvino.dockerfile index 8561940..e3f36b6 100644 --- a/Server.openvino.dockerfile +++ b/Server.openvino.dockerfile @@ -3,8 +3,8 @@ FROM openvino/ubuntu18_runtime:latest RUN mkdir /home/openvino/app && mkdir /home/openvino/app/snapshots WORKDIR /home/openvino/app COPY inference_openvino.py /home/openvino/app/inference_openvino.py -COPY snapshots/resnet50_liza_alert_v1_interface.bin /home/openvino/app/snapshots/resnet50_liza_alert_v1_interface.bin -COPY snapshots/resnet50_liza_alert_v1_interface.xml /home/openvino/app/snapshots/resnet50_liza_alert_v1_interface.xml +COPY snapshots/lacmus_v5_interface.bin /home/openvino/app/snapshots/lacmus_v5_interface.bin +COPY snapshots/lacmus_v5_interface.xml /home/openvino/app/snapshots/lacmus_v5_interface.xml RUN pip3 install flask pybase64 diff --git a/app/api/v0/routes/predict.py b/app/api/v0/routes/predict.py index af17bc1..457aec6 100644 --- a/app/api/v0/routes/predict.py +++ b/app/api/v0/routes/predict.py @@ -1,6 +1,7 @@ from fastapi import APIRouter, HTTPException, File, UploadFile -from core.api_models.common import Object, Result +from core.api_models.common import Result from core.ml.retina import Model + model = Model() model.load() @@ -13,18 +14,7 @@ async def predict_on_image(image: UploadFile = File(...)) -> Result: try: image_bytes = await image.read() - predicts = model.infer(in_data=image_bytes) - result = Result(objects=[]) - for predict in predicts['objects']: - obj = Object( - label = predict['label'], - xmax = predict['xmax'], - xmin = predict['xmin'], - ymax = predict['ymax'], - ymin = predict['ymin'], - score = predict['score'] - ) - result.objects.append(obj) - return result + predicts = await model.infer(in_data=image_bytes) + return Result(objects=predicts) except Exception as ex: raise HTTPException(status_code=500, detail=str(ex)) \ No newline at end of file diff --git a/app/core/api_models/common.py b/app/core/api_models/common.py index 6aab6c4..c856874 100644 --- a/app/core/api_models/common.py +++ b/app/core/api_models/common.py @@ -1,19 +1,16 @@ from pydantic import BaseModel -from fastapi_utils.enums import StrEnum -from core.config import get_config from typing import List -from enum import auto class Pong(BaseModel): pong: str = "Lacmus web API, version X.Y.Z" -class Object(BaseModel): +class Prediction(BaseModel): xmin: int - xmax: int ymin: int + xmax: int ymax: int - label: str score: float + label: str = 'Pedestrian' class Result(BaseModel): - objects: List[Object] = None \ No newline at end of file + objects: List[Prediction] = None \ No newline at end of file diff --git a/app/core/ml/retina.py b/app/core/ml/retina.py index 4f8186c..8bf4c58 100644 --- a/app/core/ml/retina.py +++ b/app/core/ml/retina.py @@ -4,6 +4,8 @@ from keras_retinanet.utils.image import preprocess_image, resize_image from core.config import WorkerConfig, get_config from core.ml.enum import InferTypeEnum +from core.api_models.common import Prediction +from typing import List import os import cv2 import time @@ -37,7 +39,7 @@ def load(self) -> None: f"\timage max side: {self.config.max_side}\n", flush=True ) - def infer(self, in_data: bytes) -> dict: + async def infer(self, in_data: bytes) -> List[Prediction]: # pre-processing img_bytes = np.asarray(bytearray(in_data), dtype=np.uint8) image = cv2.imdecode(img_bytes, cv2.IMREAD_COLOR) @@ -51,25 +53,56 @@ def infer(self, in_data: bytes) -> dict: # post-processing boxes /= scale - objects = [] - result = { - 'objects': objects - } + result_bboxes: List[Prediction] = [] + + # filter detections for box, score, label in zip(boxes[0], scores[0], labels[0]): - if score < 0.5: + if score < 0.15: break + b = np.array(box.astype(int)).astype(int) - # x1 y1 x2 y2 - obj = { - 'label': self.config.labels[label], - 'xmin': b[0], - 'ymin': b[1], - 'xmax': b[2], - 'ymax': b[3], - 'score': score - } - objects.append(obj) - return result + # x0 y0 x1 y1 + tagret = Prediction( + xmin=b[0], + ymin=b[1], + xmax=b[2], + ymax=b[3], + score=score, + label=self.config.labels[label] + ) + is_merged = False + + for res in result_bboxes: + if res.label != tagret.label: + continue + + if res.xmin <= tagret.xmin and res.xmax >= tagret.xmin: + res.xmax = max(res.xmax, tagret.xmax) + is_merged = True + if res.xmin <= tagret.xmax and res.xmax >= tagret.xmax: + res.xmin = min(res.xmin, tagret.xmin) + is_merged = True + if res.ymin <= tagret.ymin and res.ymax >= tagret.ymin: + res.ymax = max(res.ymax, tagret.ymax) + is_merged = True + if res.ymin <= tagret.ymax and res.ymax >= tagret.ymax: + res.ymin = min(res.ymin, tagret.ymin) + is_merged = True + if tagret.xmin <= res.xmin and tagret.xmax >= res.xmax: + res.xmax = max(res.xmax, tagret.xmax) + res.xmin = min(res.xmin, tagret.xmin) + is_merged = True + if tagret.ymin <= res.ymin and tagret.ymax >= res.ymax: + res.ymax = max(res.ymax, tagret.ymax) + res.ymin = min(res.ymin, tagret.ymin) + is_merged = True + + if is_merged: + res.score = max(res.score, tagret.score) + + if not is_merged: + result_bboxes.append(tagret) + return result_bboxes def _setup_gpu(self, gpu_id: int) -> None: diff --git a/inference.py b/inference.py index b138054..c7a9a82 100644 --- a/inference.py +++ b/inference.py @@ -161,7 +161,7 @@ def parse_args(args): """ Parse the arguments. """ parser = argparse.ArgumentParser(description='Evaluation script for a RetinaNet network.') - parser.add_argument('--model', help='Path to RetinaNet model.', default=os.path.join('snapshots', 'resnet50_liza_alert_v5_interface.h5')) + parser.add_argument('--model', help='Path to RetinaNet model.', default=os.path.join('snapshots', 'lacmus_v5_interface.h5')) parser.add_argument('--gpu', help='Visile gpu device. Set to -1 if CPU', type=int, default=0) return parser.parse_args(args) diff --git a/inference_openvino.py b/inference_openvino.py index abc06b0..f969333 100644 --- a/inference_openvino.py +++ b/inference_openvino.py @@ -12,6 +12,22 @@ app = Flask(__name__) +class Prediction: + def __init__(self, + xmin: int, + ymin: int, + xmax: int, + ymax: int, + score: float, + label: str) -> None: + + self.xmin: int = xmin + self.ymin: int = ymin + self.xmax: int = xmax + self.ymax: int = ymax + self.score: float = score + self.label: str = label + def decode_openvino_detections(detections, input_shape = (800, 1333)): """ Converts openvino detections to understandable format @@ -130,12 +146,11 @@ def predict_image(): def run_detection_image(OpenVinoExecutable, InputLayer, OutputLayer, h, w, labels_to_names, data): - print("start predict...") start_time = time.time() imgdata = pybase64.b64decode(data) file_bytes = np.asarray(bytearray(imgdata), dtype=np.uint8) image = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR) - image, scale = resize_image(image) + image, scale = resize_image(image, min_side=min(h, w), max_side=max(h, w)) image = create_blank(image, w, h) image = preprocess_image(image) image = image.transpose((2, 0, 1)) @@ -150,24 +165,67 @@ def run_detection_image(OpenVinoExecutable, InputLayer, OutputLayer, h, w, label reaponse = { 'objects': objects } - - # visualize detections + result_bboxes: List[Prediction] = [] + + # filter detections for box, score, label in zip(boxes[0], scores[0], labels[0]): - # scores are sorted so we can break - if score < 0.4: + if score < 0.15: break + b = np.array(box.astype(int)).astype(int) - # x1 y1 x2 y2 + # x0 y0 x1 y1 + tagret = Prediction( + xmin=b[0], + ymin=b[1], + xmax=b[2], + ymax=b[3], + score=score, + label=labels_to_names[label] + ) + is_merged = False + + for res in result_bboxes: + if res.label != tagret.label: + continue + + if res.xmin <= tagret.xmin and res.xmax >= tagret.xmin: + res.xmax = max(res.xmax, tagret.xmax) + is_merged = True + if res.xmin <= tagret.xmax and res.xmax >= tagret.xmax: + res.xmin = min(res.xmin, tagret.xmin) + is_merged = True + if res.ymin <= tagret.ymin and res.ymax >= tagret.ymin: + res.ymax = max(res.ymax, tagret.ymax) + is_merged = True + if res.ymin <= tagret.ymax and res.ymax >= tagret.ymax: + res.ymin = min(res.ymin, tagret.ymin) + is_merged = True + if tagret.xmin <= res.xmin and tagret.xmax >= res.xmax: + res.xmax = max(res.xmax, tagret.xmax) + res.xmin = min(res.xmin, tagret.xmin) + is_merged = True + if tagret.ymin <= res.ymin and tagret.ymax >= res.ymax: + res.ymax = max(res.ymax, tagret.ymax) + res.ymin = min(res.ymin, tagret.ymin) + is_merged = True + + if is_merged: + res.score = max(res.score, tagret.score) + + if not is_merged: + result_bboxes.append(tagret) + + # visualize detections + for res in result_bboxes: obj = { - 'name': labels_to_names[label], - 'score': str(score), - 'xmin': str(b[0]), - 'ymin': str(b[1]), - 'xmax': str(b[2]), - 'ymax': str(b[3]) + 'name': res.label, + 'score': str(res.score), + 'xmin': str(res.xmin), + 'ymin': str(res.ymin), + 'xmax': str(res.xmax), + 'ymax': str(res.ymax) } objects.append(obj) - reaponse_json = json.dumps(reaponse) print("done in {} s".format(time.time() - start_time), flush=True) return reaponse_json @@ -203,13 +261,13 @@ def parse_args(args): '--bin', help='path to bin openVINO inference model', type=str, - default=os.path.join('snapshots', 'resnet50_liza_alert_v1_interface.bin') + default=os.path.join('snapshots', 'lacmus_v5_interface.bin') ) parser.add_argument( '--xml', help='path to xml model sheme', type=str, - default=os.path.join('snapshots', 'resnet50_liza_alert_v1_interface.xml') + default=os.path.join('snapshots', 'lacmus_v5_interface.xml') ) return parser.parse_args(args)