From 269d14efc8f6903d0f38fe3731c5154ef2886ee1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=B3nal?= Date: Thu, 19 Oct 2023 10:21:28 +0200 Subject: [PATCH] Yolov8 for image segmentation Donal BEP progress testing yolov8 --- ed_sensor_integration/CMakeLists.txt | 4 + .../scripts/YOLOv8_single_image.py | 165 ++++++++++++++++++ .../scripts/YOLOv8_video_optimized.py | 61 +++++++ ed_sensor_integration/scripts/listener.py | 60 +++++++ ed_sensor_integration/scripts/talker.py | 56 ++++++ 5 files changed, 346 insertions(+) create mode 100644 ed_sensor_integration/scripts/YOLOv8_single_image.py create mode 100644 ed_sensor_integration/scripts/YOLOv8_video_optimized.py create mode 100755 ed_sensor_integration/scripts/listener.py create mode 100755 ed_sensor_integration/scripts/talker.py diff --git a/ed_sensor_integration/CMakeLists.txt b/ed_sensor_integration/CMakeLists.txt index b150630f..c6796338 100644 --- a/ed_sensor_integration/CMakeLists.txt +++ b/ed_sensor_integration/CMakeLists.txt @@ -179,6 +179,10 @@ install( add_executable(ed_empty_spot_designator tools/empty_spot_designator.cpp) target_link_libraries(ed_empty_spot_designator ed_kinect ${catkin_LIBRARIES}) + +catkin_install_python(PROGRAMS scripts/talker.py scripts/listener.py + DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +) # ------------------------------------------------------------------------------------------------ # TESTS # ------------------------------------------------------------------------------------------------ diff --git a/ed_sensor_integration/scripts/YOLOv8_single_image.py b/ed_sensor_integration/scripts/YOLOv8_single_image.py new file mode 100644 index 00000000..89fa6717 --- /dev/null +++ b/ed_sensor_integration/scripts/YOLOv8_single_image.py @@ -0,0 +1,165 @@ +import cv2 +from matplotlib import image as img +from ultralytics import YOLO +import numpy as np +import time +def detect(model,img): + + results = model.predict(source=img.copy(),save=False,save_txt=False) + result=results[0] + segmentation_contours_idx = [] + for seg in result.masks.xy: + segment = np.array(seg,dtype=np.int32) + segmentation_contours_idx.append(segment) + bboxes = np.array(result.boxes.xyxy.cpu(),dtype="int") + class_ids = np.array(result.boxes.cls.cpu(),dtype="int") + #scores = np.array(result.boxes.conf.cpu(),dtype="float").round(2) + return bboxes, class_ids, segmentation_contours_idx#, scores + +# def brightness(image,brightness_factor): #positive value -> brighter, negative -> darker +# # Convert the image to 32-bit float +# image_float = image.astype(np.float32) +# # Apply the darkness factor to each pixel +# darkened_image_float = image_float + brightness_factor +# # Clip the values to ensure they stay within the valid range [0, 255] +# darkened_image_float = np.clip(darkened_image_float, 0, 255) +# # Convert the result back to 8-bit unsigned integer +# darkened_image = darkened_image_float.astype(np.uint8) +# return darkened_image + +# def change_saturation(image, saturation_factor): +# # Convert the image from BGR to HSV color space +# hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) + +# # Split the HSV image into its components +# h, s, v = cv2.split(hsv_image) + +# # Apply the saturation factor +# s = np.clip(s * saturation_factor, 0, 255).astype(np.uint8) + +# # Merge the modified components back into an HSV image +# modified_hsv = cv2.merge((h, s, v)) + +# # Convert the modified HSV image back to BGR color space +# modified_image = cv2.cvtColor(modified_hsv, cv2.COLOR_HSV2BGR) + +# return modified_image +#Colours dict +colours = { + 'red': (0, 0, 255), + 'green': (0, 255, 0), + 'blue': (255, 0, 0), + 'yellow': (0, 255, 255), + 'purple': (128, 0, 128), + # Add more colors as needed +} + +#Inputs +#image_path = r"C:\Users\Dónal\Desktop\segment_any\images\corner2.jpg" +model_path = "/home/donal/ros/noetic/system/src/ed_sensor_integration/scripts/yolov8n-seg.pt" +device = "cuda" + +#Loads image, converts to BGR colour channel, darkens image and loads model +#image = img.imread(image_path) +#image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) +# darkness_factor = -25 # Decrease the brightness by 50 units +# image = brightness(image,darkness_factor) +# saturation_factor = 0.8 +# image = change_saturation(image,saturation_factor) + +class_id_to_extract = 0 +colours = { + 'yellow': (0, 255, 255), + 'blue': (255, 0, 0), +} + +cap = cv2.VideoCapture(0) # Open the webcam + +if not cap.isOpened(): + print("Error: Could not open the camera.") + exit() + +while True: + ret, frame = cap.read() + + if not ret: + print("Error: Could not read a frame.") + break + model = YOLO(model_path) + bboxes, classes, segmentations = detect(model, frame) #, scores = detect(model, frame) + + # Extract the segment of class id 60 (table) + table_indices = [i for i, class_id in enumerate(classes) if class_id == class_id_to_extract] + + for i in table_indices: + x, y, x2, y2 = bboxes[i] + seg = segmentations[i] + + cv2.rectangle(frame, (x, y), (x2, y2), colours['yellow'], 2) + cv2.polylines(frame, [seg], True, colours['blue'], 2) + cv2.putText(frame, "Table", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, colours['yellow'], 2) + + cv2.imshow("Video Feed", frame) + + if cv2.waitKey(1) & 0xFF == ord('q'): + break + +cap.release() +cv2.destroyAllWindows() +# cap = cv2.VideoCapture(0) + +# while cap.isOpened(): +# success, frame = cap.read() + +# if success: +# start = time.perf_counter() + +# model = YOLO(model_path) + +# end = time.perf_counter() +# total_time = end -start +# dps = 1/ total_time + +# #extracts data from model +# bboxes, classes, segmentations, scores = detect(model,frame) + +# #Extracting only table mask +# for bbox, class_id, seg, score in zip(bboxes, classes, segmentations, scores): +# (x, y, x2, y2) = bbox +# # Table id from trained dataset is 60 +# if class_id == 0: +# cv2.rectangle(frame, (x, y), (x2, y2), colours['yellow'], 2) #Colour is Blue Green Red BGR +# cv2.polylines(frame, [seg], True, colours['blue'], 2) +# # cv2.fillPoly(image, [seg], colours['purple']) +# cv2.putText(frame, "Table", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, colours['yellow'], 2) +# cv2.imshow("Video Feed",frame) +# if cv2.waitKey(1) & 0xFF == ord('q'): +# break + +# # After the loop release the cap object +# cap.release() +# # Destroy all the windows +# cv2.destroyAllWindows() + + + + +# model = YOLO(model_path) + +# #extracts data from model +# bboxes, classes, segmentations, scores = detect(model,image) + +# #Extracting only table mask +# for bbox, class_id, seg, score in zip(bboxes, classes, segmentations, scores): +# (x, y, x2, y2) = bbox +# # Table id from trained dataset is 60 +# if class_id == 60: +# cv2.rectangle(image, (x, y), (x2, y2), colours['yellow'], 2) #Colour is Blue Green Red BGR +# cv2.polylines(image, [seg], True, colours['blue'], 2) +# # cv2.fillPoly(image, [seg], colours['purple']) +# cv2.putText(image, "Table", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, colours['yellow'], 2) + +# #Visualization +# cv2.imshow('Result Image', image) +# cv2.waitKey(0) +# cv2.destroyAllWindows() \ No newline at end of file diff --git a/ed_sensor_integration/scripts/YOLOv8_video_optimized.py b/ed_sensor_integration/scripts/YOLOv8_video_optimized.py new file mode 100644 index 00000000..8950266d --- /dev/null +++ b/ed_sensor_integration/scripts/YOLOv8_video_optimized.py @@ -0,0 +1,61 @@ +import cv2 +from ultralytics import YOLO +import numpy as np +import time + +def detect(model, frame): + results = model(frame) + result = results[0] + segmentation_contours_idx = [np.array(seg, dtype=np.int32) for seg in result.masks.xy] + class_ids = np.array(result.boxes.cls.cpu(), dtype="int") + return class_ids, segmentation_contours_idx + +colours = { + 'yellow': (0, 255, 255), + 'blue': (255, 0, 0), +} + +model_path = "/home/donal/ros/noetic/system/src/ed_sensor_integration/scripts/yolov8n-seg.pt" +device = "cpu" +model = YOLO(model_path).to(device) +table_class = 0 #table class defined with index 60 (person = 0) + +# Detection Loop with webcam +cap = cv2.VideoCapture(0) + +if not cap.isOpened(): + print("Error: Could not open the camera.") + exit() + +# Initialize refresh rate calc +start_time = time.time() +frame_count = 0 + +while True: + ret, frame = cap.read() + + if not ret: + print("Error: Could not read a frame.") + break + #Get classes and segments + classes, segmentations = detect(model, frame) + #extract table segment and add to frame + for class_id, seg in zip(classes, segmentations): + if class_id == table_class: + cv2.polylines(frame, [seg], True, colours['blue'], 2) + # Calculate the refresh rate for segmentation + frame_count += 1 + end_time = time.time() + elapsed_time = end_time - start_time + segmentation_frame_rate = int(frame_count / elapsed_time) + start_time = end_time + frame_count = 0 + # Display segmentation refresh rate + cv2.putText(frame, f"Seg FPS: {segmentation_frame_rate}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2) + cv2.imshow("Video Feed", frame) + #press q to quit window + if cv2.waitKey(1) & 0xFF == ord('q'): + break + +cap.release() +cv2.destroyAllWindows() \ No newline at end of file diff --git a/ed_sensor_integration/scripts/listener.py b/ed_sensor_integration/scripts/listener.py new file mode 100755 index 00000000..ed27e1e5 --- /dev/null +++ b/ed_sensor_integration/scripts/listener.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# Software License Agreement (BSD License) +# +# Copyright (c) 2008, Willow Garage, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# * Neither the name of Willow Garage, Inc. nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# Revision $Id$ + +## Simple talker demo that listens to std_msgs/Strings published +## to the 'chatter' topic + +import rospy +from std_msgs.msg import String + +def callback(data): + rospy.loginfo(rospy.get_caller_id() + 'I heard %s', data.data) + +def listener(): + + # In ROS, nodes are uniquely named. If two nodes with the same + # name are launched, the previous one is kicked off. The + # anonymous=True flag means that rospy will choose a unique + # name for our 'listener' node so that multiple listeners can + # run simultaneously. + rospy.init_node('listener', anonymous=True) + + rospy.Subscriber('chatter', String, callback) + + # spin() simply keeps python from exiting until this node is stopped + rospy.spin() + +if __name__ == '__main__': + listener() diff --git a/ed_sensor_integration/scripts/talker.py b/ed_sensor_integration/scripts/talker.py new file mode 100755 index 00000000..033dc449 --- /dev/null +++ b/ed_sensor_integration/scripts/talker.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python +# Software License Agreement (BSD License) +# +# Copyright (c) 2008, Willow Garage, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# * Neither the name of Willow Garage, Inc. nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# Revision $Id$ + +## Simple talker demo that published std_msgs/Strings messages +## to the 'chatter' topic + +import rospy +from std_msgs.msg import String + +def talker(): + pub = rospy.Publisher('chatter', String, queue_size=10) + rospy.init_node('talker', anonymous=True) + rate = rospy.Rate(10) # 10hz + while not rospy.is_shutdown(): + hello_str = "hello world %s" % rospy.get_time() + rospy.loginfo(hello_str) + pub.publish(hello_str) + rate.sleep() + +if __name__ == '__main__': + try: + talker() + except rospy.ROSInterruptException: + pass