Yolov8 for image segmentation

Donal BEP progress testing yolov8
tue-robotics · Oct 19, 2023 · 269d14e · 269d14e
1 parent f948868
commit 269d14e
Show file tree

Hide file tree

Showing 5 changed files with 346 additions and 0 deletions.
diff --git a/ed_sensor_integration/CMakeLists.txt b/ed_sensor_integration/CMakeLists.txt
@@ -179,6 +179,10 @@ install(
 
 add_executable(ed_empty_spot_designator tools/empty_spot_designator.cpp)
 target_link_libraries(ed_empty_spot_designator ed_kinect ${catkin_LIBRARIES})
+
+catkin_install_python(PROGRAMS scripts/talker.py scripts/listener.py
+  DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
+)
 # ------------------------------------------------------------------------------------------------
 #                                           TESTS
 # ------------------------------------------------------------------------------------------------

diff --git a/ed_sensor_integration/scripts/YOLOv8_single_image.py b/ed_sensor_integration/scripts/YOLOv8_single_image.py
@@ -0,0 +1,165 @@
+import cv2
+from matplotlib import image as img
+from ultralytics import YOLO
+import numpy as np
+import time
+def detect(model,img):
+
+        results = model.predict(source=img.copy(),save=False,save_txt=False)
+        result=results[0]
+        segmentation_contours_idx = []
+        for seg in result.masks.xy:
+            segment = np.array(seg,dtype=np.int32)
+            segmentation_contours_idx.append(segment)
+        bboxes = np.array(result.boxes.xyxy.cpu(),dtype="int")
+        class_ids = np.array(result.boxes.cls.cpu(),dtype="int")
+        #scores = np.array(result.boxes.conf.cpu(),dtype="float").round(2)
+        return bboxes, class_ids, segmentation_contours_idx#, scores
+
+# def brightness(image,brightness_factor): #positive value -> brighter, negative -> darker
+#     # Convert the image to 32-bit float
+#     image_float = image.astype(np.float32)
+#     # Apply the darkness factor to each pixel
+#     darkened_image_float = image_float + brightness_factor
+#     # Clip the values to ensure they stay within the valid range [0, 255]
+#     darkened_image_float = np.clip(darkened_image_float, 0, 255)
+#     # Convert the result back to 8-bit unsigned integer
+#     darkened_image = darkened_image_float.astype(np.uint8)
+#     return darkened_image
+
+# def change_saturation(image, saturation_factor):
+#     # Convert the image from BGR to HSV color space
+#     hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
+
+#     # Split the HSV image into its components
+#     h, s, v = cv2.split(hsv_image)
+
+#     # Apply the saturation factor
+#     s = np.clip(s * saturation_factor, 0, 255).astype(np.uint8)
+
+#     # Merge the modified components back into an HSV image
+#     modified_hsv = cv2.merge((h, s, v))
+
+#     # Convert the modified HSV image back to BGR color space
+#     modified_image = cv2.cvtColor(modified_hsv, cv2.COLOR_HSV2BGR)
+
+#     return modified_image
+#Colours dict
+colours = {
+    'red': (0, 0, 255),
+    'green': (0, 255, 0),
+    'blue': (255, 0, 0),
+    'yellow': (0, 255, 255),
+    'purple': (128, 0, 128),
+    # Add more colors as needed
+}
+
+#Inputs
+#image_path = r"C:\Users\Dónal\Desktop\segment_any\images\corner2.jpg"
+model_path = "/home/donal/ros/noetic/system/src/ed_sensor_integration/scripts/yolov8n-seg.pt"
+device = "cuda"
+
+#Loads image, converts to BGR colour channel, darkens image and loads model
+#image = img.imread(image_path)
+#image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+# darkness_factor = -25  # Decrease the brightness by 50 units
+# image = brightness(image,darkness_factor) 
+# saturation_factor = 0.8
+# image = change_saturation(image,saturation_factor)
+
+class_id_to_extract = 0
+colours = {
+    'yellow': (0, 255, 255),
+    'blue': (255, 0, 0),
+}
+
+cap = cv2.VideoCapture(0)  # Open the webcam
+
+if not cap.isOpened():
+    print("Error: Could not open the camera.")
+    exit()
+
+while True:
+    ret, frame = cap.read()
+
+    if not ret:
+        print("Error: Could not read a frame.")
+        break
+    model = YOLO(model_path)
+    bboxes, classes, segmentations = detect(model, frame) #, scores = detect(model, frame)
+
+    # Extract the segment of class id 60 (table)
+    table_indices = [i for i, class_id in enumerate(classes) if class_id == class_id_to_extract]
+
+    for i in table_indices:
+        x, y, x2, y2 = bboxes[i]
+        seg = segmentations[i]
+
+        cv2.rectangle(frame, (x, y), (x2, y2), colours['yellow'], 2)
+        cv2.polylines(frame, [seg], True, colours['blue'], 2)
+        cv2.putText(frame, "Table", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, colours['yellow'], 2)
+
+    cv2.imshow("Video Feed", frame)
+
+    if cv2.waitKey(1) & 0xFF == ord('q'):
+        break
+
+cap.release()
+cv2.destroyAllWindows()
+# cap = cv2.VideoCapture(0)
+
+# while cap.isOpened():
+#     success, frame = cap.read()
+
+#     if success:
+#         start = time.perf_counter()
+
+#         model = YOLO(model_path)
+
+#         end = time.perf_counter()
+#         total_time = end -start
+#         dps = 1/ total_time
+
+#         #extracts data from model
+#         bboxes, classes, segmentations, scores = detect(model,frame)
+
+#         #Extracting only table mask
+#         for bbox, class_id, seg, score in zip(bboxes, classes, segmentations, scores):
+#             (x, y, x2, y2) = bbox
+#                 # Table id from trained dataset is 60
+#             if class_id == 0:
+#                 cv2.rectangle(frame, (x, y), (x2, y2), colours['yellow'], 2) #Colour is Blue Green Red BGR
+#                 cv2.polylines(frame, [seg], True, colours['blue'], 2)
+#                 # cv2.fillPoly(image, [seg], colours['purple'])
+#                 cv2.putText(frame, "Table", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, colours['yellow'], 2)
+#         cv2.imshow("Video Feed",frame)
+#         if cv2.waitKey(1) & 0xFF == ord('q'): 
+#             break
+
+# # After the loop release the cap object 
+# cap.release() 
+# # Destroy all the windows 
+# cv2.destroyAllWindows() 
+
+
+
+
+# model = YOLO(model_path)
+
+# #extracts data from model
+# bboxes, classes, segmentations, scores = detect(model,image)
+
+# #Extracting only table mask
+# for bbox, class_id, seg, score in zip(bboxes, classes, segmentations, scores):
+#     (x, y, x2, y2) = bbox
+#     # Table id from trained dataset is 60
+#     if class_id == 60:
+#         cv2.rectangle(image, (x, y), (x2, y2), colours['yellow'], 2) #Colour is Blue Green Red BGR
+#         cv2.polylines(image, [seg], True, colours['blue'], 2)
+#         # cv2.fillPoly(image, [seg], colours['purple'])
+#         cv2.putText(image, "Table", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, colours['yellow'], 2)
+
+# #Visualization
+# cv2.imshow('Result Image', image)
+# cv2.waitKey(0)
+# cv2.destroyAllWindows()
diff --git a/ed_sensor_integration/scripts/YOLOv8_video_optimized.py b/ed_sensor_integration/scripts/YOLOv8_video_optimized.py
@@ -0,0 +1,61 @@
+import cv2
+from ultralytics import YOLO
+import numpy as np
+import time
+
+def detect(model, frame):
+    results = model(frame)
+    result = results[0]
+    segmentation_contours_idx = [np.array(seg, dtype=np.int32) for seg in result.masks.xy]
+    class_ids = np.array(result.boxes.cls.cpu(), dtype="int")
+    return class_ids, segmentation_contours_idx
+
+colours = {
+    'yellow': (0, 255, 255),
+    'blue': (255, 0, 0),
+}
+
+model_path = "/home/donal/ros/noetic/system/src/ed_sensor_integration/scripts/yolov8n-seg.pt"
+device = "cpu"
+model = YOLO(model_path).to(device)
+table_class = 0 #table class defined with index 60 (person = 0)
+
+# Detection Loop with webcam
+cap = cv2.VideoCapture(0)  
+
+if not cap.isOpened():
+    print("Error: Could not open the camera.")
+    exit()
+
+# Initialize refresh rate calc
+start_time = time.time()
+frame_count = 0
+
+while True:
+    ret, frame = cap.read()
+
+    if not ret:
+        print("Error: Could not read a frame.")
+        break
+    #Get classes and segments
+    classes, segmentations = detect(model, frame)
+    #extract table segment and add to frame
+    for class_id, seg in zip(classes, segmentations):
+        if class_id == table_class: 
+            cv2.polylines(frame, [seg], True, colours['blue'], 2)
+    # Calculate the refresh rate for segmentation
+    frame_count += 1
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    segmentation_frame_rate = int(frame_count / elapsed_time)
+    start_time = end_time
+    frame_count = 0
+    # Display segmentation refresh rate
+    cv2.putText(frame, f"Seg FPS: {segmentation_frame_rate}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
+    cv2.imshow("Video Feed", frame)
+    #press q to quit window
+    if cv2.waitKey(1) & 0xFF == ord('q'):
+        break
+
+cap.release()
+cv2.destroyAllWindows()
diff --git a/ed_sensor_integration/scripts/listener.py b/ed_sensor_integration/scripts/listener.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+# Software License Agreement (BSD License)
+#
+# Copyright (c) 2008, Willow Garage, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above
+#    copyright notice, this list of conditions and the following
+#    disclaimer in the documentation and/or other materials provided
+#    with the distribution.
+#  * Neither the name of Willow Garage, Inc. nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Revision $Id$
+
+## Simple talker demo that listens to std_msgs/Strings published 
+## to the 'chatter' topic
+
+import rospy
+from std_msgs.msg import String
+
+def callback(data):
+    rospy.loginfo(rospy.get_caller_id() + 'I heard %s', data.data)
+
+def listener():
+
+    # In ROS, nodes are uniquely named. If two nodes with the same
+    # name are launched, the previous one is kicked off. The
+    # anonymous=True flag means that rospy will choose a unique
+    # name for our 'listener' node so that multiple listeners can
+    # run simultaneously.
+    rospy.init_node('listener', anonymous=True)
+
+    rospy.Subscriber('chatter', String, callback)
+
+    # spin() simply keeps python from exiting until this node is stopped
+    rospy.spin()
+
+if __name__ == '__main__':
+    listener()
diff --git a/ed_sensor_integration/scripts/talker.py b/ed_sensor_integration/scripts/talker.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+# Software License Agreement (BSD License)
+#
+# Copyright (c) 2008, Willow Garage, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above
+#    copyright notice, this list of conditions and the following
+#    disclaimer in the documentation and/or other materials provided
+#    with the distribution.
+#  * Neither the name of Willow Garage, Inc. nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# Revision $Id$
+
+## Simple talker demo that published std_msgs/Strings messages
+## to the 'chatter' topic
+
+import rospy
+from std_msgs.msg import String
+
+def talker():
+    pub = rospy.Publisher('chatter', String, queue_size=10)
+    rospy.init_node('talker', anonymous=True)
+    rate = rospy.Rate(10) # 10hz
+    while not rospy.is_shutdown():
+        hello_str = "hello world %s" % rospy.get_time()
+        rospy.loginfo(hello_str)
+        pub.publish(hello_str)
+        rate.sleep()
+
+if __name__ == '__main__':
+    try:
+        talker()
+    except rospy.ROSInterruptException:
+        pass