Merge pull request #131 from lacmus-foundation/inference_v2

upd web app
lacmus-foundation · Dec 29, 2020 · f1dd0da · f1dd0da
2 parents f1cae0d + cd65753
commit f1dd0da
Show file tree

Hide file tree

Showing 9 changed files with 218 additions and 113 deletions.
diff --git a/Server.openvino.dockerfile b/Server.openvino.dockerfile
@@ -3,8 +3,8 @@ FROM openvino/ubuntu18_runtime:latest
 RUN mkdir /home/openvino/app && mkdir /home/openvino/app/snapshots
 WORKDIR /home/openvino/app
 COPY inference_openvino.py /home/openvino/app/inference_openvino.py
-COPY snapshots/resnet50_liza_alert_v1_interface.bin /home/openvino/app/snapshots/resnet50_liza_alert_v1_interface.bin
-COPY snapshots/resnet50_liza_alert_v1_interface.xml /home/openvino/app/snapshots/resnet50_liza_alert_v1_interface.xml
+COPY snapshots/lacmus_v5_interface.bin /home/openvino/app/snapshots/lacmus_v5_interface.bin
+COPY snapshots/lacmus_v5_interface.xml /home/openvino/app/snapshots/lacmus_v5_interface.xml
 
 RUN pip3 install flask pybase64
 

diff --git a/app/api/v0/routes/predict.py b/app/api/v0/routes/predict.py
@@ -1,6 +1,7 @@
 from fastapi import APIRouter, HTTPException, File, UploadFile
-from core.api_models.common import Object, Result
+from core.api_models.common import Result
 from core.ml.retina import Model
+
 model = Model()
 model.load()
 
@@ -13,18 +14,7 @@ async def predict_on_image(image: UploadFile = File(...)) -> Result:
 
     try:
         image_bytes = await image.read()
-        predicts = model.infer(in_data=image_bytes)
-        result = Result(objects=[])
-        for predict in predicts['objects']:
-            obj = Object(
-                label = predict['label'],
-                xmax = predict['xmax'],
-                xmin = predict['xmin'],
-                ymax = predict['ymax'],
-                ymin = predict['ymin'],
-                score = predict['score']
-            )    
-            result.objects.append(obj)
-        return result
+        predicts = await model.infer(in_data=image_bytes)
+        return Result(objects=predicts)
     except Exception as ex:
         raise HTTPException(status_code=500, detail=str(ex))
diff --git a/app/core/api_models/common.py b/app/core/api_models/common.py
@@ -1,19 +1,16 @@
 from pydantic import BaseModel
-from fastapi_utils.enums import StrEnum
-from core.config import get_config
 from typing import List
-from enum import auto
 
 class Pong(BaseModel):
     pong: str = "Lacmus web API, version X.Y.Z"
 
-class Object(BaseModel):
+class Prediction(BaseModel):
     xmin: int
-    xmax: int
     ymin: int
+    xmax: int
     ymax: int
-    label: str
     score: float
+    label: str = 'Pedestrian'
 
 class Result(BaseModel):
-    objects: List[Object] = None
+    objects: List[Prediction] = None
diff --git a/app/core/ml/retina.py b/app/core/ml/retina.py
@@ -4,6 +4,8 @@
 from keras_retinanet.utils.image import preprocess_image, resize_image
 from core.config import WorkerConfig, get_config
 from core.ml.enum import InferTypeEnum
+from core.api_models.common import Prediction
+from typing import List
 import os
 import cv2
 import time
@@ -37,7 +39,7 @@ def load(self) -> None:
             f"\timage max side: {self.config.max_side}\n", flush=True 
             )
 
-    def infer(self, in_data: bytes) -> dict:
+    async def infer(self, in_data: bytes) -> List[Prediction]:
         # pre-processing
         img_bytes = np.asarray(bytearray(in_data), dtype=np.uint8)
         image = cv2.imdecode(img_bytes, cv2.IMREAD_COLOR)
@@ -51,25 +53,56 @@ def infer(self, in_data: bytes) -> dict:
 
         # post-processing
         boxes /= scale
-        objects = []
-        result = {
-            'objects': objects
-        }
+        result_bboxes: List[Prediction] = []
+
+        # filter detections
         for box, score, label in zip(boxes[0], scores[0], labels[0]):
-            if score < 0.5:
+            if score < 0.15:
                 break
+
             b = np.array(box.astype(int)).astype(int)
-            # x1 y1 x2 y2
-            obj = {
-                'label': self.config.labels[label],
-                'xmin': b[0],
-                'ymin': b[1],
-                'xmax': b[2],
-                'ymax': b[3],
-                'score': score
-            }
-            objects.append(obj)
-        return result
+            # x0 y0 x1 y1
+            tagret = Prediction(
+                xmin=b[0],
+                ymin=b[1],
+                xmax=b[2],
+                ymax=b[3],
+                score=score,
+                label=self.config.labels[label]
+            )
+            is_merged = False
+
+            for res in result_bboxes:
+                if res.label != tagret.label:
+                    continue
+
+                if res.xmin <= tagret.xmin and res.xmax >= tagret.xmin:
+                    res.xmax = max(res.xmax, tagret.xmax)
+                    is_merged = True
+                if res.xmin <= tagret.xmax and res.xmax >= tagret.xmax:
+                    res.xmin = min(res.xmin, tagret.xmin)
+                    is_merged = True
+                if res.ymin <= tagret.ymin and res.ymax >= tagret.ymin:
+                    res.ymax = max(res.ymax, tagret.ymax)
+                    is_merged = True
+                if res.ymin <= tagret.ymax and res.ymax >= tagret.ymax:
+                    res.ymin = min(res.ymin, tagret.ymin)
+                    is_merged = True
+                if tagret.xmin <= res.xmin and tagret.xmax >= res.xmax:
+                    res.xmax = max(res.xmax, tagret.xmax)
+                    res.xmin = min(res.xmin, tagret.xmin)
+                    is_merged = True
+                if tagret.ymin <= res.ymin and tagret.ymax >= res.ymax:
+                    res.ymax = max(res.ymax, tagret.ymax)
+                    res.ymin = min(res.ymin, tagret.ymin)
+                    is_merged = True
+
+                if is_merged:
+                    res.score = max(res.score, tagret.score)
+
+            if not is_merged:
+                result_bboxes.append(tagret)
+        return result_bboxes
 
 
     def _setup_gpu(self, gpu_id: int) -> None:

diff --git a/cli_inference.py b/cli_inference.py
@@ -45,6 +45,20 @@ def parse_args(args):
         required=False,
         default=3
     )
+    parser.add_argument(
+        '--height',
+        help='iference count',
+        type=int,
+        required=False,
+        default=2100
+    )
+    parser.add_argument(
+        '--width',
+        help='iference count',
+        type=int,
+        required=False,
+        default=2100
+    )
     parser.add_argument(
         '--gpu',
         help='use gpu',
@@ -58,60 +72,6 @@ def create_model(backbone_name, num_classes=1):
     model = backbone_factory.retinanet(num_classes)
     return models.convert_model(model)
 
-def compute_resize_scale(image_shape, min_side=800, max_side=1333):
-    """ Compute an image scale such that the image size is constrained to min_side and max_side.
-
-    Args
-        min_side: The image's min side will be equal to min_side after resizing.
-        max_side: If after resizing the image's max side is above max_side, resize until the max side is equal to max_side.
-
-    Returns
-        A resizing scale.
-    """
-    (rows, cols, _) = image_shape
-
-    smallest_side = min(rows, cols)
-
-    # rescale the image so the smallest side is min_side
-    scale = min_side / smallest_side
-
-    # check if the largest side is now greater than max_side, which can happen
-    # when images have a large aspect ratio
-    largest_side = max(rows, cols)
-    if largest_side * scale > max_side:
-        scale = max_side / largest_side
-
-    return scale
-
-def preprocess_image(x, mode='caffe'):
-    """ Preprocess an image by subtracting the ImageNet mean.
-
-    Args
-        x: np.array of shape (None, None, 3) or (3, None, None).
-        mode: One of "caffe" or "tf".
-            - caffe: will zero-center each color channel with
-                respect to the ImageNet dataset, without scaling.
-            - tf: will scale pixels between -1 and 1, sample-wise.
-
-    Returns
-        The input with the ImageNet mean subtracted.
-    """
-    # mostly identical to "https://github.com/keras-team/keras-applications/blob/master/keras_applications/imagenet_utils.py"
-    # except for converting RGB -> BGR since we assume BGR already
-
-    # covert always to float32 to keep compatibility with opencv
-    x = x.astype(np.float32)
-
-    if mode == 'tf':
-        x /= 127.5
-        x -= 1.
-    elif mode == 'caffe':
-        x[..., 0] -= 103.939
-        x[..., 1] -= 116.779
-        x[..., 2] -= 123.68
-
-    return x
-
 def setup_gpu(gpu_id: int):
     if gpu_id == -1:
         tf.config.experimental.set_visible_devices([], 'GPU')
@@ -135,6 +95,8 @@ def main(args=None):
     img_fn = args.img
     predict_count = args.count
     backbone = args.backbone
+    min_side = min(args.height, args.width)
+    max_side = max(args.height, args.width)
 
     print("loading model...")
     if args.gpu:
@@ -147,7 +109,7 @@ def main(args=None):
     start_time = time.time()
 
     image = cv2.imread(img_fn)
-    image, scale = resize_image(image)
+    image, scale = resize_image(image, min_side=min_side, max_side=max_side)
     image = preprocess_image(image)
     print("prepoocess image at {} s".format(time.time() - start_time))
 

diff --git a/cli_inference_openvino.py b/cli_inference_openvino.py
@@ -171,7 +171,7 @@ def main(args=None):
 
     # load images
     image = cv2.imread(img_fn)
-    image, scale = resize_image(image)
+    image, scale = resize_image(image, min_side=min(h, w), max_side=max(h, w))
     image = create_blank(image, w, h)
     image = preprocess_image(image)
 

diff --git a/inference.py b/inference.py
@@ -161,7 +161,7 @@ def parse_args(args):
     """ Parse the arguments.
     """
     parser = argparse.ArgumentParser(description='Evaluation script for a RetinaNet network.')
-    parser.add_argument('--model', help='Path to RetinaNet model.', default=os.path.join('snapshots', 'resnet50_liza_alert_v5_interface.h5'))
+    parser.add_argument('--model', help='Path to RetinaNet model.', default=os.path.join('snapshots', 'lacmus_v5_interface.h5'))
     parser.add_argument('--gpu', help='Visile gpu device. Set to -1 if CPU', type=int, default=0)
     return parser.parse_args(args)