diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index acfab7a2821fa..5009b82c73efa 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -199,6 +199,7 @@ planning/sampling_based_planner/autoware_bezier_sampler/** maxime.clement@tier4. planning/sampling_based_planner/autoware_frenet_planner/** maxime.clement@tier4.jp planning/sampling_based_planner/autoware_path_sampler/** maxime.clement@tier4.jp planning/sampling_based_planner/autoware_sampler_common/** maxime.clement@tier4.jp +sensing/autoware_cuda_utils/** amadeusz.szymko.2@tier4.jp kenzo.lobos@tier4.jp manato.hirabayashi@tier4.jp sensing/autoware_gnss_poser/** anh.nguyen.2@tier4.jp kento.yabuuchi.2@tier4.jp masahiro.sakamoto@tier4.jp ryu.yamamoto@tier4.jp shintaro.sakoda@tier4.jp taiki.yamada@tier4.jp yamato.ando@tier4.jp sensing/autoware_image_diagnostics/** dai.nguyen@tier4.jp yoshi.ri@tier4.jp sensing/autoware_image_transport_decompressor/** kenzo.lobos@tier4.jp yukihiro.saito@tier4.jp @@ -209,7 +210,6 @@ sensing/autoware_radar_scan_to_pointcloud2/** satoshi.tanaka@tier4.jp shunsuke.m sensing/autoware_radar_static_pointcloud_filter/** satoshi.tanaka@tier4.jp shunsuke.miura@tier4.jp taekjin.lee@tier4.jp yoshi.ri@tier4.jp sensing/autoware_radar_threshold_filter/** satoshi.tanaka@tier4.jp shunsuke.miura@tier4.jp taekjin.lee@tier4.jp yoshi.ri@tier4.jp sensing/autoware_radar_tracks_noise_filter/** satoshi.tanaka@tier4.jp shunsuke.miura@tier4.jp taekjin.lee@tier4.jp yoshi.ri@tier4.jp -sensing/cuda_utils/** amadeusz.szymko.2@tier4.jp kenzo.lobos@tier4.jp manato.hirabayashi@tier4.jp sensing/livox/autoware_livox_tag_filter/** kenzo.lobos@tier4.jp ryohsuke.mitsudome@tier4.jp sensing/vehicle_velocity_converter/** ryu.yamamoto@tier4.jp simulator/autoware_carla_interface/** maxime.clement@tier4.jp mradityagio@gmail.com diff --git a/perception/autoware_bytetrack/package.xml b/perception/autoware_bytetrack/package.xml index fd30fe00f64df..e22e897007cc0 100644 --- a/perception/autoware_bytetrack/package.xml +++ b/perception/autoware_bytetrack/package.xml @@ -15,10 +15,10 @@ cudnn_cmake_module tensorrt_cmake_module + autoware_cuda_utils autoware_kalman_filter autoware_perception_msgs autoware_tensorrt_common - cuda_utils cv_bridge eigen image_transport diff --git a/perception/autoware_lidar_apollo_instance_segmentation/CMakeLists.txt b/perception/autoware_lidar_apollo_instance_segmentation/CMakeLists.txt index 5da8ed745be15..16ab7ecccae60 100644 --- a/perception/autoware_lidar_apollo_instance_segmentation/CMakeLists.txt +++ b/perception/autoware_lidar_apollo_instance_segmentation/CMakeLists.txt @@ -23,7 +23,7 @@ if(NOT ${autoware_tensorrt_common_FOUND}) ) return() endif() -find_package(cuda_utils REQUIRED) +find_package(autoware_cuda_utils REQUIRED) add_library(${PROJECT_NAME} SHARED src/node.cpp @@ -38,7 +38,7 @@ add_library(${PROJECT_NAME} SHARED target_include_directories(${PROJECT_NAME} PRIVATE "$" "$" - ${cuda_utils_INCLUDE_DIRS} + ${autoware_cuda_utils_INCLUDE_DIRS} ${pcl_conversions_INCLUDE_DIRS} ${autoware_universe_utils_INCLUDE_DIRS} ${autoware_perception_msgs_INCLUDE_DIRS} diff --git a/perception/autoware_lidar_apollo_instance_segmentation/include/autoware/lidar_apollo_instance_segmentation/detector.hpp b/perception/autoware_lidar_apollo_instance_segmentation/include/autoware/lidar_apollo_instance_segmentation/detector.hpp index c724a5ed31706..07e823bdafada 100644 --- a/perception/autoware_lidar_apollo_instance_segmentation/include/autoware/lidar_apollo_instance_segmentation/detector.hpp +++ b/perception/autoware_lidar_apollo_instance_segmentation/include/autoware/lidar_apollo_instance_segmentation/detector.hpp @@ -19,10 +19,10 @@ #include "autoware/lidar_apollo_instance_segmentation/feature_generator.hpp" #include "autoware/lidar_apollo_instance_segmentation/node.hpp" +#include +#include #include #include -#include -#include #include #include @@ -35,10 +35,10 @@ namespace autoware { namespace lidar_apollo_instance_segmentation { -using cuda_utils::CudaUniquePtr; -using cuda_utils::CudaUniquePtrHost; -using cuda_utils::makeCudaStream; -using cuda_utils::StreamUniquePtr; +using autoware::cuda_utils::CudaUniquePtr; +using autoware::cuda_utils::CudaUniquePtrHost; +using autoware::cuda_utils::makeCudaStream; +using autoware::cuda_utils::StreamUniquePtr; class LidarApolloInstanceSegmentation : public LidarInstanceSegmentationInterface { diff --git a/perception/autoware_lidar_apollo_instance_segmentation/package.xml b/perception/autoware_lidar_apollo_instance_segmentation/package.xml index c924701841d7e..0ad1ca50b75f7 100755 --- a/perception/autoware_lidar_apollo_instance_segmentation/package.xml +++ b/perception/autoware_lidar_apollo_instance_segmentation/package.xml @@ -13,10 +13,10 @@ ament_cmake + autoware_cuda_utils autoware_perception_msgs autoware_tensorrt_common autoware_universe_utils - cuda_utils libpcl-all-dev pcl_conversions rclcpp diff --git a/perception/autoware_lidar_apollo_instance_segmentation/src/detector.cpp b/perception/autoware_lidar_apollo_instance_segmentation/src/detector.cpp index a0f48a39ecc6a..86b1bf10c75a1 100644 --- a/perception/autoware_lidar_apollo_instance_segmentation/src/detector.cpp +++ b/perception/autoware_lidar_apollo_instance_segmentation/src/detector.cpp @@ -65,12 +65,12 @@ LidarApolloInstanceSegmentation::LidarApolloInstanceSegmentation(rclcpp::Node * const auto input_dims = trt_common_->getBindingDimensions(0); const auto input_size = std::accumulate(input_dims.d + 1, input_dims.d + input_dims.nbDims, 1, std::multiplies()); - input_d_ = cuda_utils::make_unique(input_size); + input_d_ = autoware::cuda_utils::make_unique(input_size); const auto output_dims = trt_common_->getBindingDimensions(1); output_size_ = std::accumulate( output_dims.d + 1, output_dims.d + output_dims.nbDims, 1, std::multiplies()); - output_d_ = cuda_utils::make_unique(output_size_); - output_h_ = cuda_utils::make_unique_host(output_size_, cudaHostAllocPortable); + output_d_ = autoware::cuda_utils::make_unique(output_size_); + output_h_ = autoware::cuda_utils::make_unique_host(output_size_, cudaHostAllocPortable); // feature map generator: pre process feature_generator_ = std::make_shared( diff --git a/perception/autoware_shape_estimation/include/autoware/shape_estimation/tensorrt_shape_estimator.hpp b/perception/autoware_shape_estimation/include/autoware/shape_estimation/tensorrt_shape_estimator.hpp index e74b3c6e8e809..d39dce65d224d 100644 --- a/perception/autoware_shape_estimation/include/autoware/shape_estimation/tensorrt_shape_estimator.hpp +++ b/perception/autoware_shape_estimation/include/autoware/shape_estimation/tensorrt_shape_estimator.hpp @@ -15,10 +15,10 @@ #ifndef AUTOWARE__SHAPE_ESTIMATION__TENSORRT_SHAPE_ESTIMATOR_HPP_ #define AUTOWARE__SHAPE_ESTIMATION__TENSORRT_SHAPE_ESTIMATOR_HPP_ +#include +#include +#include #include -#include -#include -#include #include #include @@ -34,10 +34,10 @@ namespace autoware::shape_estimation { -using cuda_utils::CudaUniquePtr; -using cuda_utils::CudaUniquePtrHost; -using cuda_utils::makeCudaStream; -using cuda_utils::StreamUniquePtr; +using autoware::cuda_utils::CudaUniquePtr; +using autoware::cuda_utils::CudaUniquePtrHost; +using autoware::cuda_utils::makeCudaStream; +using autoware::cuda_utils::StreamUniquePtr; using autoware_perception_msgs::msg::DetectedObject; using autoware_perception_msgs::msg::DetectedObjects; diff --git a/perception/autoware_shape_estimation/lib/tensorrt_shape_estimator.cpp b/perception/autoware_shape_estimation/lib/tensorrt_shape_estimator.cpp index 05f5d59813c2b..072d3b661b8b3 100644 --- a/perception/autoware_shape_estimation/lib/tensorrt_shape_estimator.cpp +++ b/perception/autoware_shape_estimation/lib/tensorrt_shape_estimator.cpp @@ -41,13 +41,14 @@ TrtShapeEstimator::TrtShapeEstimator( const auto pc_input_dims = trt_common_->getBindingDimensions(0); const auto pc_input_size = std::accumulate( pc_input_dims.d + 1, pc_input_dims.d + pc_input_dims.nbDims, 1, std::multiplies()); - input_pc_d_ = cuda_utils::make_unique(pc_input_size * batch_config[2]); + input_pc_d_ = autoware::cuda_utils::make_unique(pc_input_size * batch_config[2]); batch_size_ = batch_config[2]; const auto one_hot_input_dims = trt_common_->getBindingDimensions(1); const auto one_hot_input_size = std::accumulate( one_hot_input_dims.d + 1, one_hot_input_dims.d + one_hot_input_dims.nbDims, 1, std::multiplies()); - input_one_hot_d_ = cuda_utils::make_unique(one_hot_input_size * batch_config[2]); + input_one_hot_d_ = + autoware::cuda_utils::make_unique(one_hot_input_size * batch_config[2]); const auto stage1_center_out_dims = trt_common_->getBindingDimensions(2); out_s1center_elem_num_ = std::accumulate( @@ -55,18 +56,18 @@ TrtShapeEstimator::TrtShapeEstimator( std::multiplies()); out_s1center_elem_num_ = out_s1center_elem_num_ * batch_config[2]; out_s1center_elem_num_per_batch_ = static_cast(out_s1center_elem_num_ / batch_config[2]); - out_s1center_prob_d_ = cuda_utils::make_unique(out_s1center_elem_num_); + out_s1center_prob_d_ = autoware::cuda_utils::make_unique(out_s1center_elem_num_); out_s1center_prob_h_ = - cuda_utils::make_unique_host(out_s1center_elem_num_, cudaHostAllocPortable); + autoware::cuda_utils::make_unique_host(out_s1center_elem_num_, cudaHostAllocPortable); const auto pred_out_dims = trt_common_->getBindingDimensions(3); out_pred_elem_num_ = std::accumulate( pred_out_dims.d + 1, pred_out_dims.d + pred_out_dims.nbDims, 1, std::multiplies()); out_pred_elem_num_ = out_pred_elem_num_ * batch_config[2]; out_pred_elem_num_per_batch_ = static_cast(out_pred_elem_num_ / batch_config[2]); - out_pred_prob_d_ = cuda_utils::make_unique(out_pred_elem_num_); + out_pred_prob_d_ = autoware::cuda_utils::make_unique(out_pred_elem_num_); out_pred_prob_h_ = - cuda_utils::make_unique_host(out_pred_elem_num_, cudaHostAllocPortable); + autoware::cuda_utils::make_unique_host(out_pred_elem_num_, cudaHostAllocPortable); g_type_mean_size_ = {{4.6344314, 1.9600292, 1.7375569}, {6.936331, 2.5178623, 2.8506238}, {11.194943, 2.9501154, 3.4918275}, {12.275775, 2.9231303, 3.87086}, diff --git a/perception/autoware_shape_estimation/package.xml b/perception/autoware_shape_estimation/package.xml index cad2d93e38f1c..1476f1219624b 100644 --- a/perception/autoware_shape_estimation/package.xml +++ b/perception/autoware_shape_estimation/package.xml @@ -17,10 +17,10 @@ cudnn_cmake_module tensorrt_cmake_module + autoware_cuda_utils autoware_perception_msgs autoware_tensorrt_common autoware_universe_utils - cuda_utils eigen libopencv-dev libpcl-all-dev diff --git a/perception/autoware_tensorrt_classifier/include/autoware/tensorrt_classifier/calibrator.hpp b/perception/autoware_tensorrt_classifier/include/autoware/tensorrt_classifier/calibrator.hpp index 088297f5bf796..0977621894920 100644 --- a/perception/autoware_tensorrt_classifier/include/autoware/tensorrt_classifier/calibrator.hpp +++ b/perception/autoware_tensorrt_classifier/include/autoware/tensorrt_classifier/calibrator.hpp @@ -36,8 +36,8 @@ #ifndef AUTOWARE__TENSORRT_CLASSIFIER__CALIBRATOR_HPP_ #define AUTOWARE__TENSORRT_CLASSIFIER__CALIBRATOR_HPP_ -#include "cuda_utils/cuda_check_error.hpp" -#include "cuda_utils/cuda_unique_ptr.hpp" +#include "autoware/cuda_utils/cuda_check_error.hpp" +#include "autoware/cuda_utils/cuda_unique_ptr.hpp" #include #include diff --git a/perception/autoware_tensorrt_classifier/include/autoware/tensorrt_classifier/tensorrt_classifier.hpp b/perception/autoware_tensorrt_classifier/include/autoware/tensorrt_classifier/tensorrt_classifier.hpp index f5bafbd04cd51..345e663441557 100644 --- a/perception/autoware_tensorrt_classifier/include/autoware/tensorrt_classifier/tensorrt_classifier.hpp +++ b/perception/autoware_tensorrt_classifier/include/autoware/tensorrt_classifier/tensorrt_classifier.hpp @@ -15,10 +15,10 @@ #ifndef AUTOWARE__TENSORRT_CLASSIFIER__TENSORRT_CLASSIFIER_HPP_ #define AUTOWARE__TENSORRT_CLASSIFIER__TENSORRT_CLASSIFIER_HPP_ +#include +#include +#include #include -#include -#include -#include #include #include @@ -27,10 +27,10 @@ namespace autoware::tensorrt_classifier { -using cuda_utils::CudaUniquePtr; -using cuda_utils::CudaUniquePtrHost; -using cuda_utils::makeCudaStream; -using cuda_utils::StreamUniquePtr; +using autoware::cuda_utils::CudaUniquePtr; +using autoware::cuda_utils::CudaUniquePtrHost; +using autoware::cuda_utils::makeCudaStream; +using autoware::cuda_utils::StreamUniquePtr; /** * @class TrtClassifier diff --git a/perception/autoware_tensorrt_classifier/package.xml b/perception/autoware_tensorrt_classifier/package.xml index a3daf27f8dbf2..c91ab392bbe4b 100644 --- a/perception/autoware_tensorrt_classifier/package.xml +++ b/perception/autoware_tensorrt_classifier/package.xml @@ -19,8 +19,8 @@ autoware_cmake + autoware_cuda_utils autoware_tensorrt_common - cuda_utils libopencv-dev rclcpp diff --git a/perception/autoware_tensorrt_classifier/src/tensorrt_classifier.cpp b/perception/autoware_tensorrt_classifier/src/tensorrt_classifier.cpp index 8889406e48eaa..4ca70c2f75618 100644 --- a/perception/autoware_tensorrt_classifier/src/tensorrt_classifier.cpp +++ b/perception/autoware_tensorrt_classifier/src/tensorrt_classifier.cpp @@ -170,13 +170,14 @@ TrtClassifier::TrtClassifier( std::accumulate(input_dims.d + 1, input_dims.d + input_dims.nbDims, 1, std::multiplies()); const auto output_dims = trt_common_->getBindingDimensions(1); - input_d_ = cuda_utils::make_unique(batch_config[2] * input_size); + input_d_ = autoware::cuda_utils::make_unique(batch_config[2] * input_size); out_elem_num_ = std::accumulate( output_dims.d + 1, output_dims.d + output_dims.nbDims, 1, std::multiplies()); out_elem_num_ = out_elem_num_ * batch_config[2]; out_elem_num_per_batch_ = static_cast(out_elem_num_ / batch_config[2]); - out_prob_d_ = cuda_utils::make_unique(out_elem_num_); - out_prob_h_ = cuda_utils::make_unique_host(out_elem_num_, cudaHostAllocPortable); + out_prob_d_ = autoware::cuda_utils::make_unique(out_elem_num_); + out_prob_h_ = + autoware::cuda_utils::make_unique_host(out_elem_num_, cudaHostAllocPortable); if (cuda) { m_cuda = true; diff --git a/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/calibrator.hpp b/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/calibrator.hpp index 73f24b644eb2c..3a2bac315a3d0 100644 --- a/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/calibrator.hpp +++ b/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/calibrator.hpp @@ -37,8 +37,8 @@ #ifndef AUTOWARE__TENSORRT_YOLOX__CALIBRATOR_HPP_ #define AUTOWARE__TENSORRT_YOLOX__CALIBRATOR_HPP_ -#include "cuda_utils/cuda_check_error.hpp" -#include "cuda_utils/cuda_unique_ptr.hpp" +#include "autoware/cuda_utils/cuda_check_error.hpp" +#include "autoware/cuda_utils/cuda_unique_ptr.hpp" #include #include diff --git a/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp b/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp index c9f14e818a781..483adfbdf2757 100644 --- a/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp +++ b/perception/autoware_tensorrt_yolox/include/autoware/tensorrt_yolox/tensorrt_yolox.hpp @@ -15,10 +15,10 @@ #ifndef AUTOWARE__TENSORRT_YOLOX__TENSORRT_YOLOX_HPP_ #define AUTOWARE__TENSORRT_YOLOX__TENSORRT_YOLOX_HPP_ +#include +#include #include #include -#include -#include #include #include @@ -27,10 +27,10 @@ namespace autoware::tensorrt_yolox { -using cuda_utils::CudaUniquePtr; -using cuda_utils::CudaUniquePtrHost; -using cuda_utils::makeCudaStream; -using cuda_utils::StreamUniquePtr; +using autoware::cuda_utils::CudaUniquePtr; +using autoware::cuda_utils::CudaUniquePtrHost; +using autoware::cuda_utils::makeCudaStream; +using autoware::cuda_utils::StreamUniquePtr; struct Object { diff --git a/perception/autoware_tensorrt_yolox/package.xml b/perception/autoware_tensorrt_yolox/package.xml index b9f2feaf7464c..68cf0a7f2e67e 100644 --- a/perception/autoware_tensorrt_yolox/package.xml +++ b/perception/autoware_tensorrt_yolox/package.xml @@ -19,10 +19,10 @@ cudnn_cmake_module tensorrt_cmake_module + autoware_cuda_utils autoware_object_recognition_utils autoware_perception_msgs autoware_tensorrt_common - cuda_utils cv_bridge image_transport libopencv-dev diff --git a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp index ab2d772be0c20..451af305a4410 100644 --- a/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp +++ b/perception/autoware_tensorrt_yolox/src/tensorrt_yolox.cpp @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "cuda_utils/cuda_check_error.hpp" -#include "cuda_utils/cuda_unique_ptr.hpp" +#include "autoware/cuda_utils/cuda_check_error.hpp" +#include "autoware/cuda_utils/cuda_unique_ptr.hpp" #include #include @@ -277,13 +277,14 @@ TrtYoloX::TrtYoloX( std::accumulate(input_dims.d + 1, input_dims.d + input_dims.nbDims, 1, std::multiplies()); if (needs_output_decode_) { const auto output_dims = trt_common_->getBindingDimensions(1); - input_d_ = cuda_utils::make_unique(batch_config[2] * input_size); + input_d_ = autoware::cuda_utils::make_unique(batch_config[2] * input_size); out_elem_num_ = std::accumulate( output_dims.d + 1, output_dims.d + output_dims.nbDims, 1, std::multiplies()); out_elem_num_ = out_elem_num_ * batch_config[2]; out_elem_num_per_batch_ = static_cast(out_elem_num_ / batch_config[2]); - out_prob_d_ = cuda_utils::make_unique(out_elem_num_); - out_prob_h_ = cuda_utils::make_unique_host(out_elem_num_, cudaHostAllocPortable); + out_prob_d_ = autoware::cuda_utils::make_unique(out_elem_num_); + out_prob_h_ = + autoware::cuda_utils::make_unique_host(out_elem_num_, cudaHostAllocPortable); int w = input_dims.d[3]; int h = input_dims.d[2]; int sum_tensors = (w / 8) * (h / 8) + (w / 16) * (h / 16) + (w / 32) * (h / 32); @@ -298,11 +299,13 @@ TrtYoloX::TrtYoloX( } else { const auto out_scores_dims = trt_common_->getBindingDimensions(3); max_detections_ = out_scores_dims.d[1]; - input_d_ = cuda_utils::make_unique(batch_config[2] * input_size); - out_num_detections_d_ = cuda_utils::make_unique(batch_config[2]); - out_boxes_d_ = cuda_utils::make_unique(batch_config[2] * max_detections_ * 4); - out_scores_d_ = cuda_utils::make_unique(batch_config[2] * max_detections_); - out_classes_d_ = cuda_utils::make_unique(batch_config[2] * max_detections_); + input_d_ = autoware::cuda_utils::make_unique(batch_config[2] * input_size); + out_num_detections_d_ = autoware::cuda_utils::make_unique(batch_config[2]); + out_boxes_d_ = + autoware::cuda_utils::make_unique(batch_config[2] * max_detections_ * 4); + out_scores_d_ = autoware::cuda_utils::make_unique(batch_config[2] * max_detections_); + out_classes_d_ = + autoware::cuda_utils::make_unique(batch_config[2] * max_detections_); } if (multitask_) { // Allocate buffer for segmentation @@ -317,9 +320,10 @@ TrtYoloX::TrtYoloX( } segmentation_out_elem_num_per_batch_ = static_cast(segmentation_out_elem_num_ / batch_config[2]); - segmentation_out_prob_d_ = cuda_utils::make_unique(segmentation_out_elem_num_); - segmentation_out_prob_h_ = - cuda_utils::make_unique_host(segmentation_out_elem_num_, cudaHostAllocPortable); + segmentation_out_prob_d_ = + autoware::cuda_utils::make_unique(segmentation_out_elem_num_); + segmentation_out_prob_h_ = autoware::cuda_utils::make_unique_host( + segmentation_out_elem_num_, cudaHostAllocPortable); } if (use_gpu_preprocess) { use_gpu_preprocess_ = true; @@ -394,9 +398,10 @@ void TrtYoloX::initPreprocessBuffer(int width, int height) for (int b = 0; b < batch_size_; b++) { scales_.emplace_back(scale); } - image_buf_h_ = cuda_utils::make_unique_host( + image_buf_h_ = autoware::cuda_utils::make_unique_host( width * height * 3 * batch_size_, cudaHostAllocWriteCombined); - image_buf_d_ = cuda_utils::make_unique(width * height * 3 * batch_size_); + image_buf_d_ = + autoware::cuda_utils::make_unique(width * height * 3 * batch_size_); } if (multitask_) { size_t argmax_out_elem_num = 0; @@ -414,9 +419,9 @@ void TrtYoloX::initPreprocessBuffer(int width, int height) size_t out_elem_num = out_w * out_h * batch_size_; argmax_out_elem_num += out_elem_num; } - argmax_buf_h_ = - cuda_utils::make_unique_host(argmax_out_elem_num, cudaHostAllocPortable); - argmax_buf_d_ = cuda_utils::make_unique(argmax_out_elem_num); + argmax_buf_h_ = autoware::cuda_utils::make_unique_host( + argmax_out_elem_num, cudaHostAllocPortable); + argmax_buf_d_ = autoware::cuda_utils::make_unique(argmax_out_elem_num); } } } @@ -468,10 +473,10 @@ void TrtYoloX::preprocessGpu(const std::vector & images) if (!image_buf_h_) { const float scale = std::min(input_width / image.cols, input_height / image.rows); scales_.emplace_back(scale); - image_buf_h_ = cuda_utils::make_unique_host( + image_buf_h_ = autoware::cuda_utils::make_unique_host( image.cols * image.rows * 3 * batch_size, cudaHostAllocWriteCombined); - image_buf_d_ = - cuda_utils::make_unique(image.cols * image.rows * 3 * batch_size); + image_buf_d_ = autoware::cuda_utils::make_unique( + image.cols * image.rows * 3 * batch_size); } int index = b * image.cols * image.rows * 3; // Copy into pinned memory @@ -496,11 +501,11 @@ void TrtYoloX::preprocessGpu(const std::vector & images) if (multitask_) { if (!argmax_buf_h_) { - argmax_buf_h_ = - cuda_utils::make_unique_host(argmax_out_elem_num, cudaHostAllocPortable); + argmax_buf_h_ = autoware::cuda_utils::make_unique_host( + argmax_out_elem_num, cudaHostAllocPortable); } if (!argmax_buf_d_) { - argmax_buf_d_ = cuda_utils::make_unique(argmax_out_elem_num); + argmax_buf_d_ = autoware::cuda_utils::make_unique(argmax_out_elem_num); } } @@ -606,8 +611,8 @@ void TrtYoloX::preprocessWithRoiGpu( scales_.clear(); if (!roi_h_) { - roi_h_ = cuda_utils::make_unique_host(batch_size, cudaHostAllocWriteCombined); - roi_d_ = cuda_utils::make_unique(batch_size); + roi_h_ = autoware::cuda_utils::make_unique_host(batch_size, cudaHostAllocWriteCombined); + roi_d_ = autoware::cuda_utils::make_unique(batch_size); } for (const auto & image : images) { @@ -616,10 +621,10 @@ void TrtYoloX::preprocessWithRoiGpu( input_height / static_cast(rois[b].height)); scales_.emplace_back(scale); if (!image_buf_h_) { - image_buf_h_ = cuda_utils::make_unique_host( + image_buf_h_ = autoware::cuda_utils::make_unique_host( image.cols * image.rows * 3 * batch_size, cudaHostAllocWriteCombined); - image_buf_d_ = - cuda_utils::make_unique(image.cols * image.rows * 3 * batch_size); + image_buf_d_ = autoware::cuda_utils::make_unique( + image.cols * image.rows * 3 * batch_size); } int index = b * image.cols * image.rows * 3; // Copy into pinned memory @@ -718,8 +723,8 @@ void TrtYoloX::multiScalePreprocessGpu(const cv::Mat & image, const std::vector< scales_.clear(); if (!roi_h_) { - roi_h_ = cuda_utils::make_unique_host(batch_size, cudaHostAllocWriteCombined); - roi_d_ = cuda_utils::make_unique(batch_size); + roi_h_ = autoware::cuda_utils::make_unique_host(batch_size, cudaHostAllocWriteCombined); + roi_d_ = autoware::cuda_utils::make_unique(batch_size); } for (size_t b = 0; b < rois.size(); b++) { @@ -733,9 +738,10 @@ void TrtYoloX::multiScalePreprocessGpu(const cv::Mat & image, const std::vector< roi_h_[b].h = rois[b].height; } if (!image_buf_h_) { - image_buf_h_ = cuda_utils::make_unique_host( + image_buf_h_ = autoware::cuda_utils::make_unique_host( image.cols * image.rows * 3 * 1, cudaHostAllocWriteCombined); - image_buf_d_ = cuda_utils::make_unique(image.cols * image.rows * 3 * 1); + image_buf_d_ = + autoware::cuda_utils::make_unique(image.cols * image.rows * 3 * 1); } int index = 0 * image.cols * image.rows * 3; // Copy into pinned memory diff --git a/perception/autoware_traffic_light_classifier/package.xml b/perception/autoware_traffic_light_classifier/package.xml index 663c7a91204b1..d85ea7aae6d79 100644 --- a/perception/autoware_traffic_light_classifier/package.xml +++ b/perception/autoware_traffic_light_classifier/package.xml @@ -14,9 +14,9 @@ autoware_cmake + autoware_cuda_utils autoware_tensorrt_classifier autoware_tensorrt_common - cuda_utils cv_bridge image_transport libboost-filesystem-dev diff --git a/perception/autoware_traffic_light_classifier/src/classifier/cnn_classifier.hpp b/perception/autoware_traffic_light_classifier/src/classifier/cnn_classifier.hpp index b6338a0d89358..0b61850850db4 100644 --- a/perception/autoware_traffic_light_classifier/src/classifier/cnn_classifier.hpp +++ b/perception/autoware_traffic_light_classifier/src/classifier/cnn_classifier.hpp @@ -17,10 +17,10 @@ #include "classifier_interface.hpp" +#include +#include #include #include -#include -#include #include #include #include @@ -44,10 +44,10 @@ namespace autoware::traffic_light { -using cuda_utils::CudaUniquePtr; -using cuda_utils::CudaUniquePtrHost; -using cuda_utils::makeCudaStream; -using cuda_utils::StreamUniquePtr; +using autoware::cuda_utils::CudaUniquePtr; +using autoware::cuda_utils::CudaUniquePtrHost; +using autoware::cuda_utils::makeCudaStream; +using autoware::cuda_utils::StreamUniquePtr; class CNNClassifier : public ClassifierInterface { diff --git a/sensing/cuda_utils/CMakeLists.txt b/sensing/autoware_cuda_utils/CMakeLists.txt similarity index 58% rename from sensing/cuda_utils/CMakeLists.txt rename to sensing/autoware_cuda_utils/CMakeLists.txt index e5e89ed9cfb3c..f1113905b832b 100644 --- a/sensing/cuda_utils/CMakeLists.txt +++ b/sensing/autoware_cuda_utils/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.5) -project(cuda_utils) +project(autoware_cuda_utils) find_package(autoware_cmake REQUIRED) autoware_package() @@ -7,21 +7,15 @@ autoware_package() find_package(CUDA) if(NOT ${CUDA_FOUND}) - message(WARNING "cuda is not found, so the cuda_utils package won't be built.") + message(WARNING "cuda is not found, so the autoware_cuda_utils package won't be built.") return() endif() -install( - DIRECTORY include/${PROJECT_NAME}/ - DESTINATION include/${PROJECT_NAME} -) - if(BUILD_TESTING) find_package(ament_lint_auto REQUIRED) ament_lint_auto_find_test_dependencies() endif() -ament_export_include_directories(include) ament_export_dependencies(CUDA) ament_auto_package() diff --git a/sensing/cuda_utils/README.md b/sensing/autoware_cuda_utils/README.md similarity index 78% rename from sensing/cuda_utils/README.md rename to sensing/autoware_cuda_utils/README.md index 57dd657f654d5..7cb65ce6bb614 100644 --- a/sensing/cuda_utils/README.md +++ b/sensing/autoware_cuda_utils/README.md @@ -1,4 +1,4 @@ -# cuda_utils +# autoware_cuda_utils ## Purpose diff --git a/sensing/cuda_utils/include/cuda_utils/cuda_check_error.hpp b/sensing/autoware_cuda_utils/include/autoware/cuda_utils/cuda_check_error.hpp similarity index 79% rename from sensing/cuda_utils/include/cuda_utils/cuda_check_error.hpp rename to sensing/autoware_cuda_utils/include/autoware/cuda_utils/cuda_check_error.hpp index 8c2d96eee5f13..6ed6a426ff899 100644 --- a/sensing/cuda_utils/include/cuda_utils/cuda_check_error.hpp +++ b/sensing/autoware_cuda_utils/include/autoware/cuda_utils/cuda_check_error.hpp @@ -17,15 +17,15 @@ // https://creativecommons.org/publicdomain/zero/1.0/deed.en // borrowed from https://proc-cpuinfo.fixstars.com/2019/02/cuda_smart_pointer/ -#ifndef CUDA_UTILS__CUDA_CHECK_ERROR_HPP_ -#define CUDA_UTILS__CUDA_CHECK_ERROR_HPP_ +#ifndef AUTOWARE__CUDA_UTILS__CUDA_CHECK_ERROR_HPP_ +#define AUTOWARE__CUDA_UTILS__CUDA_CHECK_ERROR_HPP_ #include #include #include -namespace cuda_utils +namespace autoware::cuda_utils { template void cuda_check_error(const ::cudaError_t e, F && f, N && n) @@ -37,8 +37,8 @@ void cuda_check_error(const ::cudaError_t e, F && f, N && n) throw std::runtime_error{s.str()}; } } -} // namespace cuda_utils +} // namespace autoware::cuda_utils -#define CHECK_CUDA_ERROR(e) (cuda_utils::cuda_check_error(e, __FILE__, __LINE__)) +#define CHECK_CUDA_ERROR(e) (autoware::cuda_utils::cuda_check_error(e, __FILE__, __LINE__)) -#endif // CUDA_UTILS__CUDA_CHECK_ERROR_HPP_ +#endif // AUTOWARE__CUDA_UTILS__CUDA_CHECK_ERROR_HPP_ diff --git a/sensing/cuda_utils/include/cuda_utils/cuda_unique_ptr.hpp b/sensing/autoware_cuda_utils/include/autoware/cuda_utils/cuda_unique_ptr.hpp similarity index 89% rename from sensing/cuda_utils/include/cuda_utils/cuda_unique_ptr.hpp rename to sensing/autoware_cuda_utils/include/autoware/cuda_utils/cuda_unique_ptr.hpp index 6e01a00d6681c..0d6cdd6d3b4b3 100644 --- a/sensing/cuda_utils/include/cuda_utils/cuda_unique_ptr.hpp +++ b/sensing/autoware_cuda_utils/include/autoware/cuda_utils/cuda_unique_ptr.hpp @@ -17,15 +17,15 @@ // https://creativecommons.org/publicdomain/zero/1.0/deed.en // borrowed from https://proc-cpuinfo.fixstars.com/2019/02/cuda_smart_pointer/ -#ifndef CUDA_UTILS__CUDA_UNIQUE_PTR_HPP_ -#define CUDA_UTILS__CUDA_UNIQUE_PTR_HPP_ +#ifndef AUTOWARE__CUDA_UTILS__CUDA_UNIQUE_PTR_HPP_ +#define AUTOWARE__CUDA_UTILS__CUDA_UNIQUE_PTR_HPP_ -#include "cuda_utils/cuda_check_error.hpp" +#include "autoware/cuda_utils/cuda_check_error.hpp" #include #include -namespace cuda_utils +namespace autoware::cuda_utils { struct CudaDeleter { @@ -76,6 +76,6 @@ CudaUniquePtrHost make_unique_host(unsigned int flag = cudaHostAllocDefault) CHECK_CUDA_ERROR(::cudaHostAlloc(reinterpret_cast(&p), sizeof(T), flag)); return CudaUniquePtrHost{p}; } -} // namespace cuda_utils +} // namespace autoware::cuda_utils -#endif // CUDA_UTILS__CUDA_UNIQUE_PTR_HPP_ +#endif // AUTOWARE__CUDA_UTILS__CUDA_UNIQUE_PTR_HPP_ diff --git a/sensing/cuda_utils/include/cuda_utils/stream_unique_ptr.hpp b/sensing/autoware_cuda_utils/include/autoware/cuda_utils/stream_unique_ptr.hpp similarity index 85% rename from sensing/cuda_utils/include/cuda_utils/stream_unique_ptr.hpp rename to sensing/autoware_cuda_utils/include/autoware/cuda_utils/stream_unique_ptr.hpp index 0f3a1bc6ccebf..d87ba133ad20c 100644 --- a/sensing/cuda_utils/include/cuda_utils/stream_unique_ptr.hpp +++ b/sensing/autoware_cuda_utils/include/autoware/cuda_utils/stream_unique_ptr.hpp @@ -17,14 +17,14 @@ // https://creativecommons.org/publicdomain/zero/1.0/deed.en // borrowed from https://proc-cpuinfo.fixstars.com/2019/02/cuda_smart_pointer/ -#ifndef CUDA_UTILS__STREAM_UNIQUE_PTR_HPP_ -#define CUDA_UTILS__STREAM_UNIQUE_PTR_HPP_ +#ifndef AUTOWARE__CUDA_UTILS__STREAM_UNIQUE_PTR_HPP_ +#define AUTOWARE__CUDA_UTILS__STREAM_UNIQUE_PTR_HPP_ #include #include -namespace cuda_utils +namespace autoware::cuda_utils { struct StreamDeleter { @@ -47,6 +47,6 @@ inline StreamUniquePtr makeCudaStream(const uint32_t flags = cudaStreamDefault) } return stream; } -} // namespace cuda_utils +} // namespace autoware::cuda_utils -#endif // CUDA_UTILS__STREAM_UNIQUE_PTR_HPP_ +#endif // AUTOWARE__CUDA_UTILS__STREAM_UNIQUE_PTR_HPP_ diff --git a/sensing/cuda_utils/package.xml b/sensing/autoware_cuda_utils/package.xml similarity index 95% rename from sensing/cuda_utils/package.xml rename to sensing/autoware_cuda_utils/package.xml index b7690bd9e8cc8..2c4916cbbd0a8 100644 --- a/sensing/cuda_utils/package.xml +++ b/sensing/autoware_cuda_utils/package.xml @@ -1,6 +1,6 @@ - cuda_utils + autoware_cuda_utils 0.0.1 cuda utility library