nnstreamer · jaeyun-jung · Aug 9, 2024 · Aug 8, 2024
@@ -3,27 +3,26 @@
 
 ## 1. Yolov5
 
-### Install yolov5
+### Install ultralytics
 
 ```bash
-$ git clone https://github.com/ultralytics/yolov5
-$ cd yolov5
-$ pip install -r requirements.txt
+$ pip install ultralytics
 ```
-And download weight files in https://github.com/ultralytics/yolov5/releases/tag/v7.0
 
-### Export to tflite and torchscript model
+### Export to different model representations
 
-```bash
-$ python export.py --weights=yolov5s.pt --img=320 --include tflite torchscript
-$ ls
-... yolov5s.torchscript yolov5s-fp16.tflite ...
-```
+REF: https://github.com/ultralytics/ultralytics#documentation
+```python3
+from ultralytics import YOLO
+
+# Load a model
+model = YOLO("yolov5su.pt") # load a pretrained model
+
+# Export the model
+model.export(format="tflite", imgsz=320) # export the model to tflite format
+model.export(format="torchscript", imgsz=320) # export the model to torchscript format
+model.export(format="onnx", imgsz=320) # export the model to onnx format
 
-### Export to quantized tflite model
-```bash
-$ python export.py --weights=yolov5s.pt --img=320 --include tflite --int8
-... yolov5s-int8.tflite ...
 ```
 
 Note that setting the input image size as 320px, rather than the default 640px to increase inference speed. You can take other weight options (n, s, m, l, x) and input image size.
@@ -32,26 +31,26 @@ Note that setting the input image size as 320px, rather than the default 640px t
 
 ```bash
 $ ./gst-launch-object-detection-yolov5-tflite.sh
-$ ./gst-launch-object-detection-yolov5-tflite.sh quantize ## use quantized tflite model
 $ ./gst-launch-object-detection-yolov5-torchscript.sh
+$ ./gst-launch-object-detection-yolov5-tensorrt.sh
 ```
 
 ## 2. Yolov8
 
-### Install yolov8
+### Install ultralytics
 
 ```bash
 $ pip install ultralytics
 ```
 
-### Export to tflite and torchscript model
+### Export to different model representations
 
 REF: https://github.com/ultralytics/ultralytics#documentation
-```python
+```python3
 from ultralytics import YOLO
 
 # Load a model
-model = YOLO("yolov8s.pt") # load a pretrained model
+model = YOLO("yolov8.pt") # load a pretrained model
 
 # Export the model
 model.export(format="tflite", imgsz=320) # export the model to tflite format

@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+
+## torchscript (pytorch)
+gst-launch-1.0 \
+  v4l2src name=cam_src ! videoconvert ! videoscale ! \
+    video/x-raw,width=1000,height=1000,format=RGB,pixel-aspect-ratio=1/1,framerate=30/1 ! tee name=t \
+  t. ! queue leaky=2 max-size-buffers=2 ! videoscale ! \
+    video/x-raw,width=320,height=320,format=RGB ! tensor_converter ! \
+    tensor_transform mode=transpose option=1:2:0:3 ! \
+    tensor_transform mode=arithmetic option=typecast:float32,div:255.0 ! \
+    queue ! tensor_filter framework=onnxruntime model=yolov5su.onnx input=320:320:3:1 inputtype=float32 output=2100:84:1 outputtype=float32 latency=1 ! \
+    tensor_transform mode=transpose option=1:0:2:3 ! \
+    other/tensors,num_tensors=1,types=float32,dimensions=84:2100:1,format=static ! \
+    tensor_decoder mode=bounding_boxes option1=yolov8 option2=coco.txt option3=1 option4=1000:1000 option5=320:320 ! \
+    video/x-raw,width=1000,height=1000,format=RGBA ! mix.sink_0 \
+  t. ! queue leaky=2 max-size-buffers=10 ! mix.sink_1 \
+  compositor name=mix sink_0::zorder=2 sink_1::zorder=1 ! videoconvert ! ximagesink sync=false
+
+## Note that the `tensor_transfrom mode=transpose option=1:2:0:3`
+## change the data layout NHWC -> NCHW:
+## [1, 320h, 320w, 3] -> [1, 3, 320h, 320w]
+## The NCHW layout required by pytorch model
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+## tensorrt
+gst-launch-1.0 \
+  v4l2src name=cam_src ! videoconvert ! videoscale ! \
+    video/x-raw,width=1000,height=1000,format=RGB,pixel-aspect-ratio=1/1,framerate=30/1 ! tee name=t \
+  t. ! queue leaky=2 max-size-buffers=2 ! videoscale ! \
+    video/x-raw,width=320,height=320,format=RGB ! tensor_converter ! \
+    tensor_transform mode=transpose option=1:2:0:3 ! \
+    tensor_transform mode=arithmetic option=typecast:float32,div:255.0 ! \
+    queue ! tensor_filter framework=tensorrt10 model=yolov5su.onnx latency=1 ! \
+    tensor_transform mode=transpose option=1:0:2:3 ! \
+    other/tensors,num_tensors=1,types=float32,dimensions=84:2100:1,format=static ! \
+    tensor_decoder mode=bounding_boxes option1=yolov8 option2=coco.txt option3=1 option4=1000:1000 option5=320:320 ! \
+    video/x-raw,width=1000,height=1000,format=RGBA ! mix.sink_0 \
+  t. ! queue leaky=2 max-size-buffers=10 ! mix.sink_1 \
+  compositor name=mix sink_0::zorder=2 sink_1::zorder=1 ! videoconvert ! ximagesink sync=false
+
+## Note that the `tensor_transfrom mode=transpose option=1:2:0:3`
+## change the data layout NHWC -> NCHW:
+## [1, 320h, 320w, 3] -> [1, 3, 320h, 320w]
@@ -10,7 +10,7 @@ gst-launch-1.0 \
   t. ! queue leaky=2 max-size-buffers=2 ! videoscale ! \
     video/x-raw,width=320,height=320,format=RGB ! tensor_converter ! \
     tensor_transform mode=arithmetic option=typecast:float32,div:255.0 ! \
-    queue ! tensor_filter framework=tensorflow2-lite model=yolov5s-fp16.tflite custom=Delegate:XNNPACK,NumThreads:4 ! \
+    queue ! tensor_filter framework=tensorflow2-lite model=yolov5su-fp16.tflite custom=Delegate:XNNPACK,NumThreads:4 ! \
     other/tensors,num_tensors=1,types=float32,dimensions=85:6300:1,format=static ! \
     tensor_decoder mode=bounding_boxes option1=yolov5 option2=coco.txt option3=0 option4=1000:1000 option5=320:320 ! \
     video/x-raw,width=1000,height=1000,format=RGBA ! mix.sink_0 \
@@ -26,7 +26,7 @@ gst-launch-1.0 \
     video/x-raw,width=1000,height=1000,format=RGB,pixel-aspect-ratio=1/1,framerate=30/1 ! tee name=t \
   t. ! queue leaky=2 max-size-buffers=2 ! videoscale ! \
     video/x-raw,width=320,height=320,format=RGB ! tensor_converter ! \
-    queue ! tensor_filter framework=tensorflow2-lite model=yolov5s-int8.tflite custom=Delegate:GPU ! \
+    queue ! tensor_filter framework=tensorflow2-lite model=yolov5su-int8.tflite custom=Delegate:GPU ! \
     other/tensors,num_tensors=1,types=uint8,dimensions=85:6300:1,format=static ! \
     tensor_transform mode=arithmetic option=typecast:float32,add:-4.0,mul:0.0051498096 ! \
     queue ! tensor_decoder mode=bounding_boxes option1=yolov5 option2=coco.txt option3=0 option4=1000:1000 option5=320:320 ! \

@@ -8,9 +8,10 @@ gst-launch-1.0 \
     video/x-raw,width=320,height=320,format=RGB ! tensor_converter ! \
     tensor_transform mode=transpose option=1:2:0:3 ! \
     tensor_transform mode=arithmetic option=typecast:float32,div:255.0 ! \
-    queue ! tensor_filter framework=pytorch model=yolov5s.torchscript input=320:320:3:1 inputtype=float32 output=85:6300:1:1 outputtype=float32 latency=1 ! \
-    other/tensors,num_tensors=1,types=float32,dimensions=85:6300:1:1,format=static ! \
-    tensor_decoder mode=bounding_boxes option1=yolov5 option2=coco.txt option3=1 option4=1000:1000 option5=320:320 ! \
+    queue ! tensor_filter framework=pytorch model=yolov5su.torchscript input=320:320:3:1 inputtype=float32 output=2100:84:1 outputtype=float32 latency=1 ! \
+    tensor_transform mode=transpose option=1:0:2:3 ! \
+    other/tensors,num_tensors=1,types=float32,dimensions=84:2100:1,format=static ! \
+    tensor_decoder mode=bounding_boxes option1=yolov8 option2=coco.txt option3=1 option4=1000:1000 option5=320:320 ! \
     video/x-raw,width=1000,height=1000,format=RGBA ! mix.sink_0 \
   t. ! queue leaky=2 max-size-buffers=10 ! mix.sink_1 \
   compositor name=mix sink_0::zorder=2 sink_1::zorder=1 ! videoconvert ! ximagesink sync=false