diff --git a/.gitignore b/.gitignore index 0dca5a1..0107fe4 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,9 @@ cmake_example.egg-info/ dist/ logs/ point_pillars.egg-info/ +logs_*/ +.vscode/ +.ipynb_checkpoints/ +archive/ +visualization/ +logs.zip \ No newline at end of file diff --git a/Analyse_Input_Pipeline.ipynb b/Analyse_Input_Pipeline.ipynb new file mode 100644 index 0000000..0d4039c --- /dev/null +++ b/Analyse_Input_Pipeline.ipynb @@ -0,0 +1,356 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import time\n", + "import numpy as np\n", + "# import tensorflow as tf\n", + "from glob import glob\n", + "import cv2\n", + "\n", + "from config import Parameters\n", + "# from processors import SimpleDataGenerator\n", + "# from custom_processors import AnalyseCustomDataGenerator\n", + "from point_pillars_custom_processors_v2 import AnalyseCustomDataGenerator\n", + "from det3d.pc_kitti_dataset import PCKittiAugmentedDataset\n", + "from tqdm.notebook import trange\n", + "# print(dir(tqdm))\n", + "\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# DATA_ROOT = \"/media/data3/tjtanaa/kitti_dataset/KITTI/object/training\" # TODO make main arg\n", + "DATA_ROOT = \"/media/data3/tjtanaa/kitti_dataset/\" # TODO make main arg\n", + "MODEL_ROOT = \"./logs_Car_Pedestrian_Custom_Dataset_single_process\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "params = Parameters()\n", + "\n", + "gt_database_dir = os.path.join(DATA_ROOT, \"gt_database\")\n", + "\n", + "# training_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size,root_dir = DATA_ROOT,\n", + "# npoints=20000, split='train', classes=list(params.classes_map.keys()), \n", + "# random_select=False, gt_database_dir=None, aug_hard_ratio=0.7)\n", + "\n", + "\n", + "# validation_gen = PCKittiAugmentedDataset(root_dir=DATA_ROOT, \n", + "# npoints=20000, split='val', random_select=False, classes=list(params.classes_map.keys()))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# # get the min and max (range) of every axis\n", + "# x_max = np.array(-9999.0)\n", + "# x_min = np.array(9999.0)\n", + "# y_max = np.array(-9999.0)\n", + "# y_min = np.array(9999.0)\n", + "# z_max = np.array(-9999.0)\n", + "# z_min = np.array(9999.0)\n", + "# for i in trange(len(validation_gen)):\n", + "# sample = validation_gen[i]\n", + "# x_max = np.max([x_max, np.max(sample['pts_rect'][:,0])])\n", + "# x_min = np.min([x_min, np.min(sample['pts_rect'][:,0])])\n", + "# y_max = np.max([y_max, np.max(sample['pts_rect'][:,1])])\n", + "# y_min = np.min([y_min, np.min(sample['pts_rect'][:,1])])\n", + "# z_max = np.max([z_max, np.max(sample['pts_rect'][:,2])])\n", + "# z_min = np.min([z_min, np.min(sample['pts_rect'][:,2])])\n", + "# print(x_min, x_max)\n", + "# print(y_min, y_max)\n", + "# print(z_min, z_max)\n", + "# # 1.349664568901062 78.88325500488281\n", + "# # -52.52265167236328 50.9976806640625\n", + "# # -1.0496952533721924 3.1484153270721436\n", + " \n", + " \n", + "# # 1.349664568901062 78.88325500488281\n", + "# # -52.52265167236328 50.9976806640625\n", + "# # -1.0496952533721924 3.1484153270721436" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "bc52e70dae13465eaddc01ca8fd85669", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, max=942.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1, 12000, 100, 9) (1, 12000, 3)\n", + "[ 6.1949997e+00 -1.4270003e+00 -1.6610003e+00 -2.9000002e-01\n", + " -1.0499954e-02 -6.4125061e-02 -1.3750792e-03 1.1499977e-01\n", + " 1.3002157e-02]\n", + "[ 6.1959996e+00 -1.4070001e+00 -1.6590002e+00 -2.5000000e-01\n", + " -9.5000267e-03 -4.4124842e-02 6.2501431e-04 1.1599970e-01\n", + " 3.3002377e-02]\n", + "[ 6.1980000e+00 -1.3870002e+00 -1.6590002e+00 -2.6999998e-01\n", + " -7.4996948e-03 -2.4124980e-02 6.2501431e-04 1.1800003e-01\n", + " 5.3002238e-02]\n", + "[ 6.2029991e+00 -1.3670001e+00 -1.6590003e+00 -2.2000000e-01\n", + " -2.5005341e-03 -4.1248798e-03 6.2489510e-04 1.2299919e-01\n", + " 7.3002338e-02]\n", + "[ 6.2049999e+00 -1.3580000e+00 -1.6590003e+00 -3.4000000e-01\n", + " -4.9972534e-04 4.8751831e-03 6.2489510e-04 1.2500000e-01\n", + " 8.2002401e-02]\n", + "[ 6.2149997e+00 -1.3390002e+00 -1.6610001e+00 -2.2999999e-01\n", + " 9.5000267e-03 2.3874998e-02 -1.3749599e-03 1.3499975e-01\n", + " 1.0100222e-01]\n", + "[ 6.2089992e+00 -1.3180001e+00 -1.6580001e+00 -2.8000000e-01\n", + " 3.4995079e-03 4.4875145e-02 1.6250610e-03 1.2899923e-01\n", + " 1.2200236e-01]\n", + "[ 6.2230000e+00 -1.3000002e+00 -1.6610000e+00 -2.9000002e-01\n", + " 1.7500401e-02 6.2875032e-02 -1.3748407e-03 1.4300013e-01\n", + " 1.4000225e-01]\n", + "[0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + "[0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + "\n" + ] + }, + { + "ename": "NameError", + "evalue": "name 'exit' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;31m# sample = validation_gen[i]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mpillars\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvoxels\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0moccupancy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mposition\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msize\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mangle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheading\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mclassification\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0;34m[\u001b[0m\u001b[0mpts_input\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgt_boxes3d\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalidation_gen\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mj\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpts_input\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mx_max\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mx_max\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpts_input\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/tjtanaa/PointPillars/point_pillars_custom_processors_v2.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, batch_id)\u001b[0m\n\u001b[1;32m 379\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpillars_\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 380\u001b[0m \u001b[0;31m# print(np.sum(pillars_ > 0))\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 381\u001b[0;31m \u001b[0mexit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 382\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 383\u001b[0m \u001b[0mpillars\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpillars_\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'exit' is not defined" + ] + } + ], + "source": [ + "\n", + "validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size,root_dir = DATA_ROOT,\n", + " npoints=20000, split='val', classes=list(params.classes_map.keys()), \n", + " random_select=False, gt_database_dir=None, aug_hard_ratio=0.7)\n", + "# get the min and max (range) of every axis\n", + "x_max = np.array(-9999.0)\n", + "x_min = np.array(9999.0)\n", + "y_max = np.array(-9999.0)\n", + "y_min = np.array(9999.0)\n", + "z_max = np.array(-9999.0)\n", + "z_min = np.array(9999.0)\n", + "for i in trange(len(validation_gen)):\n", + "# sample = validation_gen[i]\n", + " [pillars, voxels], [occupancy, position, size, angle, heading, classification], \\\n", + " [pts_input, gt_boxes3d, sample] = validation_gen[i]\n", + " for j in range(len(pts_input)):\n", + " x_max = np.max([x_max, np.max(pts_input[j][:,0])])\n", + " x_min = np.min([x_min, np.min(pts_input[j][:,0])])\n", + " y_max = np.max([y_max, np.max(pts_input[j][:,1])])\n", + " y_min = np.min([y_min, np.min(pts_input[j][:,1])])\n", + " z_max = np.max([z_max, np.max(pts_input[j][:,2])])\n", + " z_min = np.min([z_min, np.min(pts_input[j][:,2])])\n", + "print(x_min, x_max)\n", + "print(y_min, y_max)\n", + "print(z_min, z_max)\n", + "# 1.8940000534057617 70.76299285888672\n", + "# -39.99699783325195 40.195003509521484\n", + "# -3.204000234603882 2.0290002822875977" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# import numba\n", + "\n", + "# [pillars, voxels], \\\n", + "# [occupancy, position, size, angle, heading, classification], \\\n", + "# [pts_input, gt_boxes3d, sample] = validation_gen[8]\n", + "\n", + "\n", + "# @numba.jit(nopython=True)\n", + "# def _points_to_bevmap_reverse_kernel(\n", + "# points,\n", + "# voxel_size,\n", + "# coors_range,\n", + "# coor_to_voxelidx,\n", + "# # coors_2d,\n", + "# bev_map,\n", + "# height_lowers,\n", + "# # density_norm_num=16,\n", + "# with_reflectivity=False,\n", + "# max_voxels=40000):\n", + "# # put all computations to one loop.\n", + "# # we shouldn't create large array in main jit code, otherwise\n", + "# # reduce performance\n", + "# N = points.shape[0]\n", + "# ndim = 3\n", + "# ndim_minus_1 = ndim - 1\n", + "# grid_size = (coors_range[3:] - coors_range[:3]) / voxel_size\n", + "# # np.round(grid_size)\n", + "# # grid_size = np.round(grid_size).astype(np.int64)(np.int32)\n", + "# grid_size = np.round(grid_size, 0, grid_size).astype(np.int32)\n", + "# height_slice_size = voxel_size[-1]\n", + "# coor = np.zeros(shape=(3, ), dtype=np.int32) # DHW\n", + "# voxel_num = 0\n", + "# failed = False\n", + "# for i in range(N):\n", + "# failed = False\n", + "# for j in range(ndim):\n", + "# c = np.floor((points[i, j] - coors_range[j]) / voxel_size[j])\n", + "# if c < 0 or c >= grid_size[j]:\n", + "# failed = True\n", + "# break\n", + "# coor[ndim_minus_1 - j] = c\n", + "# if failed:\n", + "# continue\n", + "# voxelidx = coor_to_voxelidx[coor[0], coor[1], coor[2]]\n", + "# if voxelidx == -1:\n", + "# voxelidx = voxel_num\n", + "# if voxel_num >= max_voxels:\n", + "# break\n", + "# voxel_num += 1\n", + "# coor_to_voxelidx[coor[0], coor[1], coor[2]] = voxelidx\n", + "# # coors_2d[voxelidx] = coor[1:]\n", + "# bev_map[-1, coor[1], coor[2]] += 1\n", + "# height_norm = bev_map[coor[0], coor[1], coor[2]]\n", + "# incomimg_height_norm = (\n", + "# points[i, 2] - height_lowers[coor[0]]) / height_slice_size\n", + "# if incomimg_height_norm > height_norm:\n", + "# bev_map[coor[0], coor[1], coor[2]] = incomimg_height_norm\n", + "# if with_reflectivity:\n", + "# bev_map[-2, coor[1], coor[2]] = points[i, 3]\n", + "# # return voxel_num\n", + "\n", + "# def points_to_bev(points,\n", + "# voxel_size,\n", + "# coors_range,\n", + "# with_reflectivity=False,\n", + "# density_norm_num=16,\n", + "# max_voxels=40000):\n", + "# \"\"\"convert kitti points(N, 4) to a bev map. return [C, H, W] map.\n", + "# this function based on algorithm in points_to_voxel.\n", + "# takes 5ms in a reduced pointcloud with voxel_size=[0.1, 0.1, 0.8]\n", + "\n", + "# Args:\n", + "# points: [N, ndim] float tensor. points[:, :3] contain xyz points and\n", + "# points[:, 3] contain reflectivity.\n", + "# voxel_size: [3] list/tuple or array, float. xyz, indicate voxel size\n", + "# coors_range: [6] list/tuple or array, float. indicate voxel range.\n", + "# format: xyzxyz, minmax\n", + "# with_reflectivity: bool. if True, will add a intensity map to bev map.\n", + "# Returns:\n", + "# bev_map: [num_height_maps + 1(2), H, W] float tensor. \n", + "# `WARNING`: bev_map[-1] is num_points map, NOT density map, \n", + "# because calculate density map need more time in cpu rather than gpu. \n", + "# if with_reflectivity is True, bev_map[-2] is intensity map. \n", + "# \"\"\"\n", + "# if not isinstance(voxel_size, np.ndarray):\n", + "# voxel_size = np.array(voxel_size, dtype=points.dtype)\n", + "# if not isinstance(coors_range, np.ndarray):\n", + "# coors_range = np.array(coors_range, dtype=points.dtype)\n", + "# voxelmap_shape = (coors_range[3:] - coors_range[:3]) / voxel_size\n", + "# voxelmap_shape = tuple(np.round(voxelmap_shape).astype(np.int32).tolist())\n", + "# voxelmap_shape = voxelmap_shape[::-1] # DHW format\n", + "# coor_to_voxelidx = -np.ones(shape=voxelmap_shape, dtype=np.int32)\n", + "# # coors_2d = np.zeros(shape=(max_voxels, 2), dtype=np.int32)\n", + "# bev_map_shape = list(voxelmap_shape)\n", + "# bev_map_shape[0] += 1\n", + "# height_lowers = np.linspace(\n", + "# coors_range[2], coors_range[5], voxelmap_shape[0], endpoint=False)\n", + "# if with_reflectivity:\n", + "# bev_map_shape[0] += 1\n", + "# bev_map = np.zeros(shape=bev_map_shape, dtype=points.dtype)\n", + "# _points_to_bevmap_reverse_kernel(points, voxel_size, coors_range,\n", + "# coor_to_voxelidx, bev_map, height_lowers,\n", + "# with_reflectivity, max_voxels)\n", + "# # print(voxel_num)\n", + "# return bev_map\n", + "\n", + "# def point_to_vis_bev(points,\n", + "# voxel_size=None,\n", + "# coors_range=None,\n", + "# max_voxels=80000):\n", + "# if voxel_size is None:\n", + "# voxel_size = [0.1, 0.1, 0.1]\n", + "# if coors_range is None:\n", + "# coors_range = [-50, -50, -3, 50, 50, 1]\n", + "# voxel_size[2] = coors_range[5] - coors_range[2]\n", + "# bev_map = points_to_bev(\n", + "# points, voxel_size, coors_range, max_voxels=max_voxels)\n", + "# height_map = (bev_map[0] * 255).astype(np.uint8)\n", + "# return cv2.cvtColor(height_map, cv2.COLOR_GRAY2RGB)\n", + "\n", + "# img = point_to_vis_bev(pts_input[0], voxel_size=[params.x_step/2, params.y_step/2, 0.01],\n", + "# coors_range=[params.x_min, params.y_min+20.32, params.z_min, \n", + "# params.x_max-40.32, params.y_max-20.32, params.z_max] )\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# fig1 = plt.figure(figsize=(10,10))\n", + "# ax = fig1.add_subplot(111)\n", + "# plt.imshow(img)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/CMakeLists.txt b/CMakeLists.txt index c21696f..0dea901 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,5 @@ cmake_minimum_required(VERSION 3.5) project(point_pillars) add_subdirectory(pybind11) -pybind11_add_module(point_pillars SHARED src/point_pillars.cpp) \ No newline at end of file +pybind11_add_module(point_pillars SHARED src/point_pillars.cpp) +pybind11_add_module(point_pillars_v2 SHARED src/point_pillars_v2.cpp) \ No newline at end of file diff --git a/README.md b/README.md index 54c2ba0..2e415ef 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,10 @@ +# Forked from (https://github.com/tyagi-iiitv/PointPillars.git) [Under Development] + +The files with the v2_2 extentions are scripts that does not do class regression. +It only learn to classify single class. The confidence of objectness is used to determine +whether there is an object of a particular class or not. + + # About Point Pillars Point Pillars is a very famous Deep Neural Network for 3D Object Detection for LiDAR point clouds. With the application of object detection on the LiDAR devices fitted in the self driving cars, Point Pillars focuse on fast inference ~50fps, which was magnitudes above as compared to other networks for 3D Object detection. In this repo, we are trying to develop point pillars in TensorFlow. [Here's](https://medium.com/@a_tyagi/pointpillars-3d-point-clouds-bounding-box-detection-and-tracking-pointnet-pointnet-lasernet-67e26116de5a?source=friends_link&sk=4a27f55f2cea645af39f72117984fd22) a good first post to familiarize yourself with Point Pillars. diff --git a/Visualizing_Point_Pillar_Image.ipynb b/Visualizing_Point_Pillar_Image.ipynb new file mode 100644 index 0000000..8835ae9 --- /dev/null +++ b/Visualizing_Point_Pillar_Image.ipynb @@ -0,0 +1,365 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import time\n", + "import numpy as np\n", + "# import tensorflow as tf\n", + "from glob import glob\n", + "import cv2\n", + "\n", + "from config import Parameters\n", + "# from processors import SimpleDataGenerator\n", + "from custom_processors import AnalyseCustomDataGenerator\n", + "from det3d.pc_kitti_dataset import PCKittiAugmentedDataset\n", + "\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# DATA_ROOT = \"/media/data3/tjtanaa/kitti_dataset/KITTI/object/training\" # TODO make main arg\n", + "DATA_ROOT = \"/media/data3/tjtanaa/kitti_dataset/\" # TODO make main arg\n", + "MODEL_ROOT = \"./logs_Car_Pedestrian_Custom_Dataset_single_process\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "params = Parameters()\n", + "\n", + "gt_database_dir = os.path.join(DATA_ROOT, \"gt_database\")\n", + "\n", + "training_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size,root_dir = DATA_ROOT,\n", + " npoints=20000, split='train', classes=list(params.classes_map.keys()), \n", + " random_select=False, gt_database_dir=None, aug_hard_ratio=0.7)\n", + "\n", + "validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, \n", + " npoints=20000, split='val', random_select=False, classes=list(params.classes_map.keys()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# # get the min and max (range) of every axis\n", + "# x_max = np.array(-9999.0)\n", + "# x_min = np.array(9999.0)\n", + "# y_max = np.array(-9999.0)\n", + "# y_min = np.array(9999.0)\n", + "# z_max = np.array(-9999.0)\n", + "# z_min = np.array(9999.0)\n", + "# for i in range(len(training_gen)):\n", + "# [pillars, voxels], \\\n", + "# [occupancy, position, size, angle, heading, classification], \\\n", + "# [pts_input, gt_boxes3d, sample] = training_gen[i]\n", + "# for pts in pts_input:\n", + "# # print(type(pts))\n", + "# # print(pts.shape)\n", + "# x_max = np.max([x_max, np.max(pts[:,0])])\n", + "# x_min = np.min([x_min, np.min(pts[:,0])])\n", + "# y_max = np.max([y_max, np.max(pts[:,1])])\n", + "# y_min = np.min([y_min, np.min(pts[:,1])])\n", + "# z_max = np.max([z_max, np.max(pts[:,2])])\n", + "# z_min = np.min([z_min, np.min(pts[:,2])])\n", + "# print(x_min, x_max)\n", + "# print(y_min, y_max)\n", + "# print(z_min, z_max)\n", + "# 1.349664568901062 78.88325500488281\n", + "# -52.52265167236328 50.9976806640625\n", + "# -1.0496952533721924 3.1484153270721436\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING: /home/tan/tjtanaa/PointPillars/visualization/custom_processor already exists, operation skipped.\n", + "WARNING: /home/tan/tjtanaa/PointPillars/visualization/custom_processor/data already exists, operation skipped.\n", + "WARNING: /home/tan/tjtanaa/PointPillars/visualization/custom_processor/html already exists, operation skipped.\n", + "36\n", + "37\n", + "38\n", + "39\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAk4AAAJBCAYAAACwDzogAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAUyklEQVR4nO3dT8ild3n/8c/1S2wW6iLiLyGdDDXIFBq7iGUIhUBJF21SN6MLy7iQFIS4SEChiyZudCml2p3CiKEpWNMBFbMobdMguDOZCUEzmaYOTWrGGTKIBW0XKYlXF8+dekyeyVwzz59zHny9YDjnfM99n+c7fOdm3pz7Puep7g4AAFf2/9Y9AQCAg0I4AQAMCScAgCHhBAAwJJwAAIaEEwDA0J6FU1XdW1UvVNW5qnpor34OAMB+qb34Hqequi7JvyX5oyTnkzyd5GPd/fyu/zAAgH1y/R697p1JznX3vydJVT2W5FiSbcOpqnwLJwCw337S3f//anbYq1N1h5K8vPL4/DL2f6rq/qo6VVWn9mgOAABv5z+udoe9esepthn7lXeVuvtEkhOJd5wAgINhr95xOp/k8MrjW5Nc2KOfBQCwL/YqnJ5OcqSqbquq30hyPMnje/SzAAD2xZ6cquvu16rqwST/lOS6JI9095m9+FkAAPtlT76O4Kon4RonAGD/ne7uo1ezg28OBwAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBg6Pqd7FxVLyX5eZLXk7zW3Uer6j1J/j7J+5K8lORPu/s/dzZNAID12413nP6wu+/o7qPL44eSPNndR5I8uTwGADjw9uJU3bEkjy73H03y4T34GQAA+26n4dRJ/rmqTlfV/cvYzd19MUmW25u227Gq7q+qU1V1aodzAADYFzu6xinJXd19oapuSvJEVf3rdMfuPpHkRJJUVe9wHgAAe25H7zh194Xl9lKSbyW5M8krVXVLkiy3l3Y6SQCATXDN4VRV76yqd79xP8kfJ3kuyeNJ7ls2uy/Jt3c6SQCATbCTU3U3J/lWVb3xOn/X3f9YVU8nOVlVn0jyoyQf3fk0AQDWr7rXf3mRa5wAgDU4vfJ1SiO+ORwAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgKErhlNVPVJVl6rquZWx91TVE1X1w+X2xpXnHq6qc1X1QlXds1cTBwDYb5N3nP4myb1vGnsoyZPdfSTJk8vjVNXtSY4n+cCyz5eq6rpdmy0AwBpdMZy6+7tJfvqm4WNJHl3uP5rkwyvjj3X3q939YpJzSe7cpbkCAKzVtV7jdHN3X0yS5famZfxQkpdXtju/jL1FVd1fVaeq6tQ1zgEAYF9dv8uvV9uM9XYbdveJJCeSpKq23QYAYJNc6ztOr1TVLUmy3F5axs8nObyy3a1JLlz79AAANse1htPjSe5b7t+X5Nsr48er6oaqui3JkSRP7WyKAACb4Yqn6qrq60nuTvLeqjqf5LNJPp/kZFV9IsmPknw0Sbr7TFWdTPJ8kteSPNDdr+/R3AEA9lV1r//yItc4AQBrcLq7j17NDr45HABgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAoSuGU1U9UlWXquq5lbHPVdWPq+rZ5c+HVp57uKrOVdULVXXPXk0cAGC/Td5x+psk924z/tfdfcfy5x+SpKpuT3I8yQeWfb5UVdft1mQBANbpiuHU3d9N8tPh6x1L8lh3v9rdLyY5l+TOHcwPAGBj7OQapwer6vvLqbwbl7FDSV5e2eb8MvYWVXV/VZ2qqlM7mAMAwL651nD6cpL3J7kjycUkX1jGa5tte7sX6O4T3X20u49e4xwAAPbVNYVTd7/S3a939y+SfCW/PB13PsnhlU1vTXJhZ1MEANgM1xROVXXLysOPJHnjE3ePJzleVTdU1W1JjiR5amdTBADYDNdfaYOq+nqSu5O8t6rOJ/lskrur6o5snYZ7Kcknk6S7z1TVySTPJ3ktyQPd/freTB0AYH9V97aXIO3vJKrWPwkA4NfN6au91to3hwMADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMHTFcKqqw1X1nao6W1VnqupTy/h7quqJqvrhcnvjyj4PV9W5qnqhqu7Zy78AAMB+mbzj9FqSP+/u30ny+0keqKrbkzyU5MnuPpLkyeVxlueOJ/lAknuTfKmqrtuLyQMA7KcrhlN3X+zuZ5b7P09yNsmhJMeSPLps9miSDy/3jyV5rLtf7e4Xk5xLcuduTxwAYL9d1TVOVfW+JB9M8r0kN3f3xWQrrpLctGx2KMnLK7udX8be/Fr3V9Wpqjp19dMGANh/1083rKp3JflGkk9398+q6rKbbjPWbxnoPpHkxPLab3keAGDTjN5xqqp3ZCuavtbd31yGX6mqW5bnb0lyaRk/n+Twyu63JrmwO9MFAFifyafqKslXk5zt7i+uPPV4kvuW+/cl+fbK+PGquqGqbktyJMlTuzdlAID1mJyquyvJx5P8oKqeXcY+k+TzSU5W1SeS/CjJR5Oku89U1ckkz2frE3kPdPfruz5zAIB9Vt3rv7zINU4AwBqc7u6jV7ODbw4HABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ1cMp6o6XFXfqaqzVXWmqj61jH+uqn5cVc8ufz60ss/DVXWuql6oqnv28i8AALBfrh9s81qSP+/uZ6rq3UlOV9UTy3N/3d1/tbpxVd2e5HiSDyT5zST/UlW/3d2v7+bEAQD22xXfcerui939zHL/50nOJjn0NrscS/JYd7/a3S8mOZfkzt2YLADAOl3VNU5V9b4kH0zyvWXowar6flU9UlU3LmOHkry8stv5vH1oAQAcCONwqqp3JflGkk9398+SfDnJ+5PckeRiki+8sek2u/c2r3d/VZ2qqlNXPWsAgDUYhVNVvSNb0fS17v5mknT3K939enf/IslX8svTceeTHF7Z/dYkF978mt19oruPdvfRnfwFAAD2y+RTdZXkq0nOdvcXV8ZvWdnsI0meW+4/nuR4Vd1QVbclOZLkqd2bMgDAekw+VXdXko8n+UFVPbuMfSbJx6rqjmydhnspySeTpLvPVNXJJM9n6xN5Dww+UfeTJP+93HIwvDfW6yCxXgeL9TpYrNfBsrpev3W1O1f3Wy4/WouqOuW03cFhvQ4W63WwWK+DxXodLDtdL98cDgAwJJwAAIY2KZxOrHsCXBXrdbBYr4PFeh0s1utg2dF6bcw1TgAAm26T3nECANhowgkAYGgjwqmq7q2qF6rqXFU9tO758FZV9VJV/aCqnn3j1+RU1Xuq6omq+uFye+OVXoe9sfy+yEtV9dzK2GXXp6oeXo63F6rqnvXM+tfXZdbrc1X14+UYe7aqPrTynPVak6o6XFXfqaqzVXWmqj61jDu+NtDbrNeuHV9rv8apqq5L8m9J/ihbv67l6SQf6+7n1zoxfkVVvZTkaHf/ZGXsL5P8tLs/vwTvjd39F+ua46+zqvqDJP+V5G+7+3eXsW3Xp6puT/L1bP2apN9M8i9JfnvwRbXsksus1+eS/Fd3/9WbtrVea7T8loxbuvuZqnp3ktNJPpzkz+L42jhvs15/ml06vjbhHac7k5zr7n/v7v9J8liSY2ueEzPHkjy63H80W/84WYPu/m6Sn75p+HLrcyzJY939ane/mORcfvm7JtkHl1mvy7Fea9TdF7v7meX+z5OcTXIojq+N9DbrdTlXvV6bEE6Hkry88vh83v4vyXp0kn+uqtNVdf8ydnN3X0y2/rEmuWlts2M7l1sfx9zmerCqvr+cynvj1I/12hBV9b4kH0zyvTi+Nt6b1ivZpeNrE8KpthnzHQmb567u/r0kf5LkgeVUAweTY24zfTnJ+5PckeRiki8s49ZrA1TVu5J8I8mnu/tnb7fpNmPWa59ts167dnxtQjidT3J45fGtSS6saS5cRndfWG4vJflWtt7KfGU5n/zGeeVL65sh27jc+jjmNlB3v9Ldr3f3L5J8Jb88XWC91qyq3pGt/4S/1t3fXIYdXxtqu/XazeNrE8Lp6SRHquq2qvqNJMeTPL7mObGiqt65XGSXqnpnkj9O8ly21um+ZbP7knx7PTPkMi63Po8nOV5VN1TVbUmOJHlqDfNjxRv/CS8+kq1jLLFea1VVleSrSc529xdXnnJ8baDLrdduHl/X7+6Ur153v1ZVDyb5pyTXJXmku8+seVr8qpuTfGvr32OuT/J33f2PVfV0kpNV9YkkP0ry0TXO8ddaVX09yd1J3ltV55N8Nsnns836dPeZqjqZ5PkkryV5wCd+9tdl1uvuqrojW6cJXkryycR6bYC7knw8yQ+q6tll7DNxfG2qy63Xx3br+Fr71xEAABwUm3CqDgDgQBBOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIb+F/eiKo2m6X3aAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "[pillars, voxels], \\\n", + "[occupancy, position, size, angle, heading, classification], \\\n", + "[pts_input, gt_boxes3d, sample] = validation_gen[9]\n", + "# print(pillars[0].shape)\n", + "# print(voxels[0].shape)\n", + "# print(occupancy[0].shape)\n", + "# # print(occupancy[0,0,:])\n", + "# print(angle[0].shape)\n", + "\n", + "# for i in range(len(occupancy)):\n", + "# print(i, \"l \")\n", + "# print(np.sum(occupancy[i][:,:,0] == 0))\n", + "# print(np.sum(occupancy[i][:,:,0] == 1))\n", + "# print(np.sum(occupancy[i][:,:,0] == -1))\n", + "# print(np.sum(occupancy[i][:,:,1] == 0))\n", + "# print(np.sum(occupancy[i][:,:,1] == 1))\n", + "# print(np.sum(occupancy[i][:,:,1] == -1))\n", + "# print(np.sum(occupancy[i][:,:,2] == 0))\n", + "# print(np.sum(occupancy[i][:,:,2] == 1))\n", + "# print(np.sum(occupancy[i][:,:,2] == -1))\n", + "# print(np.sum(occupancy[i][:,:,3] == 0))\n", + "# print(np.sum(occupancy[i][:,:,3] == 1))\n", + "# print(np.sum(occupancy[i][:,:,3] == -1))\n", + "\n", + "\n", + "fig = plt.figure(figsize=(10,10)) \n", + " \n", + "ax = fig.add_subplot(111) \n", + "ax.imshow(occupancy[0][:,:,3] == 1, cmap = plt.cm.gray, \n", + " interpolation ='nearest') " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING: /home/tan/tjtanaa/PointPillars/visualization/custom_processor already exists, operation skipped.\n", + "WARNING: /home/tan/tjtanaa/PointPillars/visualization/custom_processor/data already exists, operation skipped.\n", + "WARNING: /home/tan/tjtanaa/PointPillars/visualization/custom_processor/html already exists, operation skipped.\n", + "32\n", + "33\n", + "34\n", + "35\n" + ] + } + ], + "source": [ + "\n", + "import numba\n", + "\n", + "[pillars, voxels], \\\n", + "[occupancy, position, size, angle, heading, classification], \\\n", + "[pts_input, gt_boxes3d, sample] = validation_gen[8]\n", + "\n", + "\n", + "@numba.jit(nopython=True)\n", + "def _points_to_bevmap_reverse_kernel(\n", + " points,\n", + " voxel_size,\n", + " coors_range,\n", + " coor_to_voxelidx,\n", + " # coors_2d,\n", + " bev_map,\n", + " height_lowers,\n", + " # density_norm_num=16,\n", + " with_reflectivity=False,\n", + " max_voxels=40000):\n", + " # put all computations to one loop.\n", + " # we shouldn't create large array in main jit code, otherwise\n", + " # reduce performance\n", + " N = points.shape[0]\n", + " ndim = 3\n", + " ndim_minus_1 = ndim - 1\n", + " grid_size = (coors_range[3:] - coors_range[:3]) / voxel_size\n", + " # np.round(grid_size)\n", + " # grid_size = np.round(grid_size).astype(np.int64)(np.int32)\n", + " grid_size = np.round(grid_size, 0, grid_size).astype(np.int32)\n", + " height_slice_size = voxel_size[-1]\n", + " coor = np.zeros(shape=(3, ), dtype=np.int32) # DHW\n", + " voxel_num = 0\n", + " failed = False\n", + " for i in range(N):\n", + " failed = False\n", + " for j in range(ndim):\n", + " c = np.floor((points[i, j] - coors_range[j]) / voxel_size[j])\n", + " if c < 0 or c >= grid_size[j]:\n", + " failed = True\n", + " break\n", + " coor[ndim_minus_1 - j] = c\n", + " if failed:\n", + " continue\n", + " voxelidx = coor_to_voxelidx[coor[0], coor[1], coor[2]]\n", + " if voxelidx == -1:\n", + " voxelidx = voxel_num\n", + " if voxel_num >= max_voxels:\n", + " break\n", + " voxel_num += 1\n", + " coor_to_voxelidx[coor[0], coor[1], coor[2]] = voxelidx\n", + " # coors_2d[voxelidx] = coor[1:]\n", + " bev_map[-1, coor[1], coor[2]] += 1\n", + " height_norm = bev_map[coor[0], coor[1], coor[2]]\n", + " incomimg_height_norm = (\n", + " points[i, 2] - height_lowers[coor[0]]) / height_slice_size\n", + " if incomimg_height_norm > height_norm:\n", + " bev_map[coor[0], coor[1], coor[2]] = incomimg_height_norm\n", + " if with_reflectivity:\n", + " bev_map[-2, coor[1], coor[2]] = points[i, 3]\n", + " # return voxel_num\n", + "\n", + "def points_to_bev(points,\n", + " voxel_size,\n", + " coors_range,\n", + " with_reflectivity=False,\n", + " density_norm_num=16,\n", + " max_voxels=40000):\n", + " \"\"\"convert kitti points(N, 4) to a bev map. return [C, H, W] map.\n", + " this function based on algorithm in points_to_voxel.\n", + " takes 5ms in a reduced pointcloud with voxel_size=[0.1, 0.1, 0.8]\n", + "\n", + " Args:\n", + " points: [N, ndim] float tensor. points[:, :3] contain xyz points and\n", + " points[:, 3] contain reflectivity.\n", + " voxel_size: [3] list/tuple or array, float. xyz, indicate voxel size\n", + " coors_range: [6] list/tuple or array, float. indicate voxel range.\n", + " format: xyzxyz, minmax\n", + " with_reflectivity: bool. if True, will add a intensity map to bev map.\n", + " Returns:\n", + " bev_map: [num_height_maps + 1(2), H, W] float tensor. \n", + " `WARNING`: bev_map[-1] is num_points map, NOT density map, \n", + " because calculate density map need more time in cpu rather than gpu. \n", + " if with_reflectivity is True, bev_map[-2] is intensity map. \n", + " \"\"\"\n", + " if not isinstance(voxel_size, np.ndarray):\n", + " voxel_size = np.array(voxel_size, dtype=points.dtype)\n", + " if not isinstance(coors_range, np.ndarray):\n", + " coors_range = np.array(coors_range, dtype=points.dtype)\n", + " voxelmap_shape = (coors_range[3:] - coors_range[:3]) / voxel_size\n", + " voxelmap_shape = tuple(np.round(voxelmap_shape).astype(np.int32).tolist())\n", + " voxelmap_shape = voxelmap_shape[::-1] # DHW format\n", + " coor_to_voxelidx = -np.ones(shape=voxelmap_shape, dtype=np.int32)\n", + " # coors_2d = np.zeros(shape=(max_voxels, 2), dtype=np.int32)\n", + " bev_map_shape = list(voxelmap_shape)\n", + " bev_map_shape[0] += 1\n", + " height_lowers = np.linspace(\n", + " coors_range[2], coors_range[5], voxelmap_shape[0], endpoint=False)\n", + " if with_reflectivity:\n", + " bev_map_shape[0] += 1\n", + " bev_map = np.zeros(shape=bev_map_shape, dtype=points.dtype)\n", + " _points_to_bevmap_reverse_kernel(points, voxel_size, coors_range,\n", + " coor_to_voxelidx, bev_map, height_lowers,\n", + " with_reflectivity, max_voxels)\n", + " # print(voxel_num)\n", + " return bev_map\n", + "\n", + "def point_to_vis_bev(points,\n", + " voxel_size=None,\n", + " coors_range=None,\n", + " max_voxels=80000):\n", + " if voxel_size is None:\n", + " voxel_size = [0.1, 0.1, 0.1]\n", + " if coors_range is None:\n", + " coors_range = [-50, -50, -3, 50, 50, 1]\n", + " voxel_size[2] = coors_range[5] - coors_range[2]\n", + " bev_map = points_to_bev(\n", + " points, voxel_size, coors_range, max_voxels=max_voxels)\n", + " height_map = (bev_map[0] * 255).astype(np.uint8)\n", + " return cv2.cvtColor(height_map, cv2.COLOR_GRAY2RGB)\n", + "\n", + "img = point_to_vis_bev(pts_input[0], voxel_size=[params.x_step/2, params.y_step/2, 0.01],\n", + " coors_range=[params.x_min, params.y_min+20.32, params.z_min, \n", + " params.x_max-40.32, params.y_max-20.32, params.z_max] )\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig1 = plt.figure(figsize=(10,10))\n", + "ax = fig1.add_subplot(111)\n", + "plt.imshow(img)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/compute_intersection_single_element.ipynb b/compute_intersection_single_element.ipynb new file mode 100644 index 0000000..d158d52 --- /dev/null +++ b/compute_intersection_single_element.ipynb @@ -0,0 +1,389 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import numba\n", + "from numba import jit" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# @jit(nopython=True, parallel=True)\n", + "def compute_intersection_single_element(x1,y1, x2,y2, x3,y3, x4,y4, \n", + " delta_x_tolerance=1e-6, grad_diff_tolerance=1e-6,\n", + " verbose=False):\n", + " \n", + " \n", + " inf_slope_status = np.array([0,0], np.int32)\n", + " parallel_flag = False # for readability\n", + " # check line 1 conditions\n", + " dx_line_1 = x2 - x1\n", + " dy_line_1 = y2 - y1\n", + " if np.abs(dx_line_1) < delta_x_tolerance:\n", + " inf_slope_status[0] = 1\n", + " \n", + " # check line 2 conditions\n", + " dx_line_2 = x4 - x3\n", + " dy_line_2 = y4 - y3\n", + " if np.abs(dx_line_2) < delta_x_tolerance:\n", + " inf_slope_status[1] = 1\n", + " \n", + " # if both have inf slope, they are parallel\n", + " parallel_flag = (np.sum(inf_slope_status) == 2)\n", + " if parallel_flag:\n", + " if verbose:\n", + " print(\"compute_intersection_single_element: Lines are parallel and have inf slope\")\n", + " return (-9999,-9999) # this value is chosen as it will be outside \n", + " # of our detection range (to be filtered later)\n", + " \n", + " # since one is inf and the other is not, they are not parallel\n", + " if (np.sum(inf_slope_status) > 0):\n", + " # there is one line that has inf slope\n", + " if (inf_slope_status[0] == 1): # line one has infinite slope\n", + " x = x1 # pick either one point x1 or x2 as they are the \"equal\"\n", + " y = (x - x3)*(y4-y3) / (x4-x3) + y3\n", + " if verbose:\n", + " print(\"compute_intersection_single_element: Line1 has inf slope\")\n", + " return (x, y)\n", + " \n", + " if (inf_slope_status[1] == 1):\n", + " x = x3 # pick either one point x3 or x4 as they are the \"equal\"\n", + " y = (x - x1)*(y2-y1) / (x2-x1) + y1\n", + " if verbose:\n", + " print(\"compute_intersection_single_element: Line2 has inf slope\")\n", + " return (x, y)\n", + " \n", + " else:\n", + " \n", + " # check both line conditions\n", + " m1 = dy_line_1 / dx_line_1\n", + " m2 = dy_line_2 / dx_line_2\n", + "\n", + " # Note: if the two lines are collinear, they are parallel\n", + " if np.abs(m1 - m2) < grad_diff_tolerance:\n", + " parallel_flag = True\n", + "\n", + "\n", + " if parallel_flag:\n", + " if verbose:\n", + " print(\"compute_intersection_single_element: Lines are parallel\")\n", + " return (-9999,-9999) # this value is chosen as it will be outside \n", + " # of our detection range (to be filtered later)\n", + "\n", + " if verbose:\n", + " print(\"compute_intersection_single_element: No line has inf slope\")\n", + "\n", + " x = (m2 *x4 - m1* x2 - y4 + y2) / (m2 - m1)\n", + " y = m2 * (x - x4) + y4\n", + " return (x, y)\n", + " \n", + " raise ValueError # np.sum(inf_slope_status) is abnormal\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "# from PointPillar\n", + "def compute_intersection_single_element(x1,y1, x2,y2, x3,y3, x4,y4, verbose=True):\n", + " num = (x1*y2 - y1*x2) * (x3-x4) - (x1-x2) * (x3*y4 - y3*x4);\n", + " den = (x1-x2) * (y3-y4) - (y1-y2) * (x3-x4);\n", + " x = num/(den + 1e-6)\n", + " \n", + " num = (x1*y2 - y1*x2) * (y3-y4) - (y1-y2) * (x3*y4 - y3*x4);\n", + " den = (x1-x2) * (y3-y4) - (y1-y2) * (x3-x4);\n", + " y = num/(den + 1e-6)\n", + " return (x,y)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TestCase1i\n", + "(0.0, -8000000.0)\n", + "TestCase2i\n", + "(30000000.0, -30000000.0)\n", + "TestCase3i\n", + "(-0.0, 1.0000000185185187)\n", + "TestCase3ii\n", + "(0.8333333391203704, 1.1666666747685186)\n", + "TestCase4i\n", + "(0.8333333564814821, 1.1666666990740748)\n", + "TestCase4ii\n", + "(0.8333333101851859, 1.1666666342592602)\n", + "TestCase5i\n", + "(5.000000050000001, 2.0000000200000003)\n", + "TestCase5ii\n", + "(4.99999995, 1.9999999800000003)\n", + "TestCase6i\n", + "(5.000000125000003, 2.000000050000001)\n", + "TestCase6ii\n", + "(4.999999875000004, 1.9999999500000014)\n" + ] + } + ], + "source": [ + "# test the compute_intersection_single_element:\n", + "# test cases:\n", + "# 1. two lines have inf slope, both lines are parallel\n", + "# 2. no line has inf slope, both lines are parallel\n", + "# 3. two lines intersect internally, both lines are not parallel, no line has inf slope\n", + "# 4. two lines intersect externally, both lines are not parallel, no line has inf slope\n", + "# 5. two lines intersect internally, both lines are not parallel, 1 line has inf slope\n", + "# 6. two lines intersect externally, both lines are not parallel, 1 line has inf slope\n", + "# 7. two lines intersect internally, both lines are not parallel, one line has 0 slope\n", + "# 8. two lines intersect externally, both lines are not parallel, one line has 0 slope\n", + "# 9. two lines intersect internally, both lines are not parallel, 1 line has inf slope, 1 line has 0 slope\n", + "# 10. two lines intersect externally, both lines are not parallel, 1 line has inf slope, 1 line has 0 slope\n", + "\n", + "# case 1 (i)\n", + "class TestCase1i():\n", + " line1 = (-2, 1, -2, 3)\n", + " line2 = (-6, 2, -6, 3)\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test1i = TestCase1i()\n", + "\n", + "class TestCase2i():\n", + " line1 = (0, 2, 2, 0)\n", + " line2 = (-3, 0, 0, -3)\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test2i = TestCase2i()\n", + "\n", + "\n", + "class TestCase3i():\n", + " line1 = (-2, -1, 1, 2)\n", + " line2 = (-4, 5, 5, -4)\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test3i = TestCase3i()\n", + "\n", + "\n", + "\n", + "class TestCase3ii():\n", + " line1 = (-10, -1, 5, 2)\n", + " line2 = (-4, 6, 4, -2)\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test3ii = TestCase3ii()\n", + "\n", + "\n", + "\n", + "class TestCase4i():\n", + " line1 = (-10, -1, 5, 2)\n", + " line2 = (-4, 6, -2, 4)\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "# intersection = compute_intersection_single_element(*self.line2, *self.line1, verbose=True)\n", + "# print(intersection)\n", + "\n", + "test4i = TestCase4i()\n", + "\n", + "class TestCase4ii():\n", + " line1 = (-10, -1, 5, 2)\n", + " line2 = (-2, 4, -4, 6) # reverse the order of points\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "# intersection = compute_intersection_single_element(*self.line2, *self.line1, verbose=True)\n", + "# print(intersection)\n", + "\n", + "test4ii = TestCase4ii()\n", + "\n", + "class TestCase5i():\n", + " line1 = (-10, -1, 15, 4)\n", + " line2 = (5, 4, 5, 0) # inf slope\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test5i = TestCase5i()\n", + "\n", + "class TestCase5ii():\n", + " line1 = (5, 4, 5, 0) # inf slope\n", + " line2 = (-10, -1, 15, 4)\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test5ii = TestCase5ii()\n", + "\n", + "class TestCase6i():\n", + " line1 = (-10, -1, 0, 1)\n", + " line2 = (5, 4, 5, 0) # inf slope\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test6i = TestCase6i()\n", + "\n", + "class TestCase6ii():\n", + " line1 = (5, 4, 5, 0) # inf slope\n", + " line2 = (-10, -1, 0, 1)\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test6ii = TestCase6ii()" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TestCase7i\n", + "(1.4999999750000004, 3.9999999333333345)\n", + "TestCase7ii\n", + "(1.5000000250000003, 4.000000066666668)\n", + "TestCase8i\n", + "(1.4999999250000036, 3.9999998000000097)\n", + "TestCase9i\n", + "(3.0000000375000004, 4.000000050000001)\n", + "TestCase10i\n", + "(3.0000001500000075, 4.00000020000001)\n" + ] + } + ], + "source": [ + "# Test cases\n", + "# 7. two lines intersect internally, both lines are not parallel, one line has 0 slope\n", + "# 8. two lines intersect externally, both lines are not parallel, one line has 0 slope\n", + "# 9. two lines intersect internally, both lines are not parallel, 1 line has inf slope, 1 line has 0 slope\n", + "# 10. two lines intersect externally, both lines are not parallel, 1 line has inf slope, 1 line has 0 slope\n", + "\n", + "\n", + "class TestCase7i():\n", + " line1 = (-2, -3, 4, 9)\n", + " line2 = (5, 4, 0, 4) # 0 slope\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test7i = TestCase7i()\n", + "\n", + "class TestCase7ii():\n", + " line1 = (5, 4, 0, 4) # 0 slope\n", + " line2 = (-2, -3, 4, 9)\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test7ii = TestCase7ii()\n", + "\n", + "class TestCase8i():\n", + " line1 = (-2, -3, 0, 1)\n", + " line2 = (5, 4, 0, 4) # 0 slope\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test8i = TestCase8i()\n", + "\n", + "\n", + "class TestCase9i():\n", + " line1 = (-2, 4, 6, 4)\n", + " line2 = (3, 6, 3, -4) # 0 slope\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test9i = TestCase9i()\n", + "\n", + "class TestCase10i():\n", + " line1 = (-2, 4, 0, 4)\n", + " line2 = (3, 6, 3, -4) # 0 slope\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test10i = TestCase10i()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/config.py b/config.py index ec65ffd..7337600 100644 --- a/config.py +++ b/config.py @@ -10,8 +10,10 @@ class GridParameters: y_max = 40.32 y_step = 0.16 - z_min = -1.0 - z_max = 3.0 + # z_min = -1.0 + # z_max = 3.0 + z_min = -3.0 + z_max = 1.0 # derived parameters Xn_f = float(x_max - x_min) / x_step @@ -19,13 +21,28 @@ class GridParameters: Xn = int(Xn_f) Yn = int(Yn_f) - def __init__(self): - super(GridParameters, self).__init__() + def __init__(self, **kwargs): + super(GridParameters, self).__init__(**kwargs) class DataParameters: - classes = {"Car": 0, + # classes_map = {"Car": 0, + # "Pedestrian": 1, + # "Person_sitting": 1, + # "Cyclist": 2, + # "Truck": 3, + # "Van": 3, + # "Tram": 3, + # "Misc": 3, + # } + + map_classes = { + 0: "Car", + 1: "Pedestrian" + } + + classes_map = {"Car": 0, "Pedestrian": 1, "Person_sitting": 1, "Cyclist": 2, @@ -35,11 +52,24 @@ class DataParameters: "Misc": 3, } - nb_classes = len(np.unique(list(classes.values()))) - assert nb_classes == np.max(np.unique(list(classes.values()))) + 1, 'Starting class indexing at zero.' + nb_classes = len(np.unique(list(classes_map.values()))) + assert nb_classes == np.max(np.unique(list(classes_map.values()))) + 1, 'Starting class indexing at zero.' - def __init__(self): - super(DataParameters, self).__init__() + # classes = {"Car": 0, + # "Pedestrian": 1, + # "Person_sitting": 1, + # "Cyclist": 2, + # "Truck": 3, + # "Van": 3, + # "Tram": 3, + # "Misc": 3, + # } + + # nb_classes = len(np.unique(list(classes.values()))) + # assert nb_classes == np.max(np.unique(list(classes.values()))) + 1, 'Starting class indexing at zero.' + + def __init__(self, **kwargs): + super(DataParameters, self).__init__(**kwargs) class NetworkParameters: @@ -50,7 +80,7 @@ class NetworkParameters: nb_channels = 64 downscaling_factor = 2 - # length, width, height, z-center, orientation + # length (x), width (y), height (z), z-center, orientation anchor_dims = np.array([[3.9, 1.6, 1.56, -1, 0], [3.9, 1.6, 1.56, -1, 1.5708], [0.8, 0.6, 1.73, -0.6, 0], @@ -77,11 +107,11 @@ class NetworkParameters: heading_weight = 0.2 # 0.2 class_weight = 0.5 # 0.2 - def __init__(self): - super(NetworkParameters, self).__init__() + def __init__(self, **kwargs): + super(NetworkParameters, self).__init__(**kwargs) class Parameters(GridParameters, DataParameters, NetworkParameters): - def __init__(self): - super(Parameters, self).__init__() + def __init__(self, **kwargs): + super(Parameters, self).__init__(**kwargs) diff --git a/config_mtr_v1.py b/config_mtr_v1.py new file mode 100644 index 0000000..a1207f5 --- /dev/null +++ b/config_mtr_v1.py @@ -0,0 +1,159 @@ +import numpy as np + + +class GridParameters: + x_min = -10.08 + x_max = 10.08 + x_step = 0.04 + + y_min = -10.08 #-5 + y_max = 10.08 #7.5 + y_step = 0.04 + + # z_min = -1.0 + # z_max = 3.0 + z_min = -1.0 + z_max = 6.0 + + # derived parameters + Xn_f = float(x_max - x_min) / x_step + Yn_f = float(y_max - y_min) / y_step + Xn = int(Xn_f) + Yn = int(Yn_f) + + def __init__(self, **kwargs): + super(GridParameters, self).__init__(**kwargs) + + +class DataParameters: + + # classes_map = {"Car": 0, + # "Pedestrian": 1, + # "Person_sitting": 1, + # "Cyclist": 2, + # "Truck": 3, + # "Van": 3, + # "Tram": 3, + # "Misc": 3, + # } + + # for Car and Pedestrian + # map_classes = { + # 0: "Car", + # 1: "Pedestrian" + # } + + # classes_map = {"Car": 0, + # "Pedestrian": 1, + # "Person_sitting": 1, + # # "Cyclist": 2, + # # "Truck": 3, + # # "Van": 3, + # # "Tram": 3, + # # "Misc": 3, + # } + + + # for Car only + map_classes = { + 0: "pedestrian" + } + + classes_map = {"pedestrian": 0 + } + + # # for Pedestrian only + # map_classes = { + # 0: "Pedestrian" + # } + + # classes_map = { + # "Pedestrian": 0, + # "Person_sitting": 0, + # } + + nb_classes = len(np.unique(list(classes_map.values()))) + assert nb_classes == np.max(np.unique(list(classes_map.values()))) + 1, 'Starting class indexing at zero.' + + # classes = {"Car": 0, + # "Pedestrian": 1, + # "Person_sitting": 1, + # "Cyclist": 2, + # "Truck": 3, + # "Van": 3, + # "Tram": 3, + # "Misc": 3, + # } + + # nb_classes = len(np.unique(list(classes.values()))) + # assert nb_classes == np.max(np.unique(list(classes.values()))) + 1, 'Starting class indexing at zero.' + + def __init__(self, **kwargs): + super(DataParameters, self).__init__(**kwargs) + + +class NetworkParameters: + + max_points_per_pillar = 100 + max_pillars = 12000 + nb_features = 9 + nb_channels = 64 + downscaling_factor = 2 + + # length (x), width (y), height (z), z-center, orientation + # for car and pedestrian + # anchor_dims = np.array([[3.9, 1.6, 1.56, -1, 0], + # [3.9, 1.6, 1.56, -1, np.pi/2], + # [0.8, 0.6, 1.73, -0.6, 0], + # [0.8, 0.6, 1.73, -0.6, np.pi/2], + # ], dtype=np.float32).tolist() + + # for car only + # anchor_dims = np.array([[3.9, 1.6, 1.56, -1, 0], + # [3.9, 1.6, 1.56, -1, np.pi/2]], dtype=np.float32).tolist() + + # for pedestrian only + anchor_dims = np.array([ + [0.62, 0.56, 0.7, 1.8, 0], + [0.62, 0.56, 0.7, 1.8, np.pi/2], + [0.62, 0.56, 1.5, 1.63646424, 0], + [0.62, 0.56, 1.5, 1.63646424, np.pi/2], + ], dtype=np.float32).tolist() + nb_dims = 3 + + # for car + # positive_iou_threshold = 0.6 + # negative_iou_threshold = 0.3 + + # for pedestrian + positive_iou_threshold = 0.5 + negative_iou_threshold = 0.35 + + # batch_size = 1 + num_gpus = 1 + batch_size = 4 + total_training_epochs = 160 + # iters_to_decay = 101040. # 15 * 4 * ceil(6733. / 4) --> every 15 epochs on 6733 kitti samples, cf. pillar paper + iters_to_decay = 100500 + learning_rate = 2e-4 + decay_rate = 1e-8 + L1 = 0 + L2 = 0 + alpha = 0.25 + gamma = 2.0 + # original pillars paper values + focal_weight = 1.0 # 1.0 + loc_weight = 2.0 # 2.0 + size_weight = 2.0 # 2.0 + angle_weight = 2.0 # 2.0 + heading_weight = 0.2 # 0.2 + class_weight = 0.5 # 0.2 + + def __init__(self, **kwargs): + super(NetworkParameters, self).__init__(**kwargs) + + +class Parameters(GridParameters, DataParameters, NetworkParameters): + + def __init__(self, **kwargs): + super(Parameters, self).__init__(**kwargs) diff --git a/config_v2.py b/config_v2.py new file mode 100644 index 0000000..5ad41a0 --- /dev/null +++ b/config_v2.py @@ -0,0 +1,155 @@ +import numpy as np + + +class GridParameters: + x_min = 0.0 + x_max = 80.64 + x_step = 0.16 + + y_min = -40.32 + y_max = 40.32 + y_step = 0.16 + + # z_min = -1.0 + # z_max = 3.0 + z_min = -3.0 + z_max = 1.0 + + # derived parameters + Xn_f = float(x_max - x_min) / x_step + Yn_f = float(y_max - y_min) / y_step + Xn = int(Xn_f) + Yn = int(Yn_f) + + def __init__(self, **kwargs): + super(GridParameters, self).__init__(**kwargs) + + +class DataParameters: + + # classes_map = {"Car": 0, + # "Pedestrian": 1, + # "Person_sitting": 1, + # "Cyclist": 2, + # "Truck": 3, + # "Van": 3, + # "Tram": 3, + # "Misc": 3, + # } + + # for Car and Pedestrian + # map_classes = { + # 0: "Car", + # 1: "Pedestrian" + # } + + # classes_map = {"Car": 0, + # "Pedestrian": 1, + # "Person_sitting": 1, + # # "Cyclist": 2, + # # "Truck": 3, + # # "Van": 3, + # # "Tram": 3, + # # "Misc": 3, + # } + + + # for Car only + map_classes = { + 0: "Car" + } + + classes_map = {"Car": 0 + } + + # # for Pedestrian only + # map_classes = { + # 0: "Pedestrian" + # } + + # classes_map = { + # "Pedestrian": 0, + # "Person_sitting": 0, + # } + + nb_classes = len(np.unique(list(classes_map.values()))) + assert nb_classes == np.max(np.unique(list(classes_map.values()))) + 1, 'Starting class indexing at zero.' + + # classes = {"Car": 0, + # "Pedestrian": 1, + # "Person_sitting": 1, + # "Cyclist": 2, + # "Truck": 3, + # "Van": 3, + # "Tram": 3, + # "Misc": 3, + # } + + # nb_classes = len(np.unique(list(classes.values()))) + # assert nb_classes == np.max(np.unique(list(classes.values()))) + 1, 'Starting class indexing at zero.' + + def __init__(self, **kwargs): + super(DataParameters, self).__init__(**kwargs) + + +class NetworkParameters: + + max_points_per_pillar = 100 + max_pillars = 12000 + nb_features = 9 + nb_channels = 64 + downscaling_factor = 2 + + # length (x), width (y), height (z), z-center, orientation + # for car and pedestrian + # anchor_dims = np.array([[3.9, 1.6, 1.56, -1, 0], + # [3.9, 1.6, 1.56, -1, np.pi/2], + # [0.8, 0.6, 1.73, -0.6, 0], + # [0.8, 0.6, 1.73, -0.6, np.pi/2], + # ], dtype=np.float32).tolist() + + # for car only + anchor_dims = np.array([[3.9, 1.6, 1.56, -1, 0], + [3.9, 1.6, 1.56, -1, np.pi/2]], dtype=np.float32).tolist() + + # for pedestrian only + # anchor_dims = np.array([[0.8, 0.6, 1.73, -0.6, 0], + # [0.8, 0.6, 1.73, -0.6, np.pi/2], + # ], dtype=np.float32).tolist() + nb_dims = 3 + + # for car + positive_iou_threshold = 0.6 + negative_iou_threshold = 0.3 + + # for pedestrian + # positive_iou_threshold = 0.5 + # negative_iou_threshold = 0.35 + + # batch_size = 1 + batch_size = 4 + total_training_epochs = 160 + # iters_to_decay = 101040. # 15 * 4 * ceil(6733. / 4) --> every 15 epochs on 6733 kitti samples, cf. pillar paper + iters_to_decay = 100500 + learning_rate = 2e-4 + decay_rate = 1e-8 + L1 = 0 + L2 = 0 + alpha = 0.25 + gamma = 2.0 + # original pillars paper values + focal_weight = 1.0 # 1.0 + loc_weight = 2.0 # 2.0 + size_weight = 2.0 # 2.0 + angle_weight = 2.0 # 2.0 + heading_weight = 0.2 # 0.2 + class_weight = 0.5 # 0.2 + + def __init__(self, **kwargs): + super(NetworkParameters, self).__init__(**kwargs) + + +class Parameters(GridParameters, DataParameters, NetworkParameters): + + def __init__(self, **kwargs): + super(Parameters, self).__init__(**kwargs) diff --git a/config_v2_2.py b/config_v2_2.py new file mode 100644 index 0000000..5143f05 --- /dev/null +++ b/config_v2_2.py @@ -0,0 +1,156 @@ +import numpy as np + + +class GridParameters: + x_min = 0.0 + x_max = 80.64 + x_step = 0.16 + + y_min = -40.32 + y_max = 40.32 + y_step = 0.16 + + # z_min = -1.0 + # z_max = 3.0 + z_min = -3.0 + z_max = 1.0 + + # derived parameters + Xn_f = float(x_max - x_min) / x_step + Yn_f = float(y_max - y_min) / y_step + Xn = int(Xn_f) + Yn = int(Yn_f) + + def __init__(self, **kwargs): + super(GridParameters, self).__init__(**kwargs) + + +class DataParameters: + + # classes_map = {"Car": 0, + # "Pedestrian": 1, + # "Person_sitting": 1, + # "Cyclist": 2, + # "Truck": 3, + # "Van": 3, + # "Tram": 3, + # "Misc": 3, + # } + + # for Car and Pedestrian + # map_classes = { + # 0: "Car", + # 1: "Pedestrian" + # } + + # classes_map = {"Car": 0, + # "Pedestrian": 1, + # "Person_sitting": 1, + # # "Cyclist": 2, + # # "Truck": 3, + # # "Van": 3, + # # "Tram": 3, + # # "Misc": 3, + # } + + + # for Car only + map_classes = { + 0: "Car" + } + + classes_map = {"Car": 0 + } + + # # for Pedestrian only + # map_classes = { + # 0: "Pedestrian" + # } + + # classes_map = { + # "Pedestrian": 0, + # "Person_sitting": 0, + # } + + nb_classes = len(np.unique(list(classes_map.values()))) + assert nb_classes == np.max(np.unique(list(classes_map.values()))) + 1, 'Starting class indexing at zero.' + + # classes = {"Car": 0, + # "Pedestrian": 1, + # "Person_sitting": 1, + # "Cyclist": 2, + # "Truck": 3, + # "Van": 3, + # "Tram": 3, + # "Misc": 3, + # } + + # nb_classes = len(np.unique(list(classes.values()))) + # assert nb_classes == np.max(np.unique(list(classes.values()))) + 1, 'Starting class indexing at zero.' + + def __init__(self, **kwargs): + super(DataParameters, self).__init__(**kwargs) + + +class NetworkParameters: + + max_points_per_pillar = 100 + max_pillars = 12000 + nb_features = 9 + nb_channels = 64 + downscaling_factor = 2 + + # length (x), width (y), height (z), z-center, orientation + # for car and pedestrian + # anchor_dims = np.array([[3.9, 1.6, 1.56, -1, 0], + # [3.9, 1.6, 1.56, -1, np.pi/2], + # [0.8, 0.6, 1.73, -0.6, 0], + # [0.8, 0.6, 1.73, -0.6, np.pi/2], + # ], dtype=np.float32).tolist() + + # for car only + anchor_dims = np.array([[3.9, 1.6, 1.56, -1, 0], + [3.9, 1.6, 1.56, -1, np.pi/2]], dtype=np.float32).tolist() + + # for pedestrian only + # anchor_dims = np.array([[0.8, 0.6, 1.73, -0.6, 0], + # [0.8, 0.6, 1.73, -0.6, np.pi/2], + # ], dtype=np.float32).tolist() + nb_dims = 3 + + # for car + positive_iou_threshold = 0.6 + negative_iou_threshold = 0.3 + + # for pedestrian + # positive_iou_threshold = 0.5 + # negative_iou_threshold = 0.35 + + # batch_size = 1 + num_gpus = 1 + batch_size = 4 + total_training_epochs = 160 + # iters_to_decay = 101040. # 15 * 4 * ceil(6733. / 4) --> every 15 epochs on 6733 kitti samples, cf. pillar paper + iters_to_decay = 100500 + learning_rate = 2e-4 + decay_rate = 1e-8 + L1 = 0 + L2 = 0 + alpha = 0.25 + gamma = 2.0 + # original pillars paper values + focal_weight = 1.0 # 1.0 + loc_weight = 2.0 # 2.0 + size_weight = 2.0 # 2.0 + angle_weight = 2.0 # 2.0 + heading_weight = 0.2 # 0.2 + class_weight = 0.5 # 0.2 + + def __init__(self, **kwargs): + super(NetworkParameters, self).__init__(**kwargs) + + +class Parameters(GridParameters, DataParameters, NetworkParameters): + + def __init__(self, **kwargs): + super(Parameters, self).__init__(**kwargs) diff --git a/inference_utils.py b/inference_utils.py index 20ba7ac..e80b99d 100644 --- a/inference_utils.py +++ b/inference_utils.py @@ -44,6 +44,7 @@ def rotational_nms(set_boxes, confidences, occ_threshold=0.7, nms_iou_thr=0.5): for boxes, confs in zip(set_boxes, confidences): assert len(boxes) == len(confs) indices = cv.dnn.NMSBoxesRotated(boxes, confs, occ_threshold, nms_iou_thr) + print(indices) indices = indices.reshape(len(indices)).tolist() nms_boxes.append([boxes[i] for i in indices]) return nms_boxes @@ -85,9 +86,79 @@ def generate_bboxes_from_pred(occ, pos, siz, ang, hdg, clf, anchor_dims, occ_thr predicted_boxes.append(BBox(bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, bb_yaw, bb_heading, bb_cls, bb_conf)) + return predicted_boxes + +def limit_period(val, offset=0.5, period=np.pi): + return val - np.floor(val / period + offset) * period + +def inverse_yaw_element(bb_yaw): + + + bb_yaw -= np.pi / 2 + while bb_yaw > np.pi: + print("larger than pi") + bb_yaw -= (np.pi * 2) + while bb_yaw < -np.pi: + print("smaller than -pi") + bb_yaw += (np.pi * 2) + + return bb_yaw + + # if bb_yaw > np.pi /2: + # bb_yaw -= 2 * np.pi + + # bb_yaw += np.pi/2 + # return bb_yaw + +def generate_bboxes_from_pred_and_np_array(occ, pos, siz, ang, hdg, clf, anchor_dims, occ_threshold=0.5): + """ Generating the bounding boxes based on the regression targets """ + + # Get only the boxes where occupancy is greater or equal threshold. + real_boxes = np.where(occ >= occ_threshold) + # Get the indices of the occupancy array + coordinates = list(zip(real_boxes[0], real_boxes[1], real_boxes[2])) + # Assign anchor dimensions as original bounding box coordinates which will eventually be changed + # according to the predicted regression targets + anchor_dims = anchor_dims + real_anchors = np.random.rand(len(coordinates), len(anchor_dims[0])) + + for i, value in enumerate(real_boxes[2]): + real_anchors[i, ...] = anchor_dims[value] + + # Change the anchor boxes based on regression targets, this is the inverse of the operations given in + # createPillarTargets function (src/PointPillars.cpp) + predicted_boxes = [] + predicted_boxes_list = [] + for i, value in enumerate(coordinates): + # print("coordinate ", i) + real_diag = np.sqrt(np.square(real_anchors[i][0]) + np.square(real_anchors[i][1])) + real_x = value[0] * Parameters.x_step * Parameters.downscaling_factor + Parameters.x_min + real_y = value[1] * Parameters.y_step * Parameters.downscaling_factor + Parameters.y_min + bb_x = pos[value][0] * real_diag + real_x + bb_y = pos[value][1] * real_diag + real_y + bb_z = pos[value][2] * real_anchors[i][2] + real_anchors[i][3] + # print(position[value], real_x, real_y, real_diag) + bb_length = np.exp(siz[value][0]) * real_anchors[i][0] + bb_width = np.exp(siz[value][1]) * real_anchors[i][1] + bb_height = np.exp(siz[value][2]) * real_anchors[i][2] + bb_yaw = -np.arcsin(np.clip(ang[value], -1, 1)) + real_anchors[i][4] + # bb_yaw = np.arcsin(np.clip(ang[value], -1, 1)) + real_anchors[i][4] + # bb_yaw = inverse_yaw_element(bb_yaw) + bb_heading = np.round(hdg[value]) + # print(bb_heading, bb_yaw) + bb_cls = np.argmax(clf[value]) + bb_conf = occ[value] + predicted_boxes.append(BBox(bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, + bb_yaw, bb_heading, bb_cls, bb_conf)) + predicted_boxes_list.append([bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, + bb_yaw, bb_heading, bb_cls, bb_conf]) + + return predicted_boxes, np.array(predicted_boxes_list) + + class GroundTruthGenerator(DataProcessor): """ Multiprocessing-safe data generator for training, validation or testing, without fancy augmentation """ diff --git a/inference_utils_mtr_v1.py b/inference_utils_mtr_v1.py new file mode 100644 index 0000000..fa91bd6 --- /dev/null +++ b/inference_utils_mtr_v1.py @@ -0,0 +1,190 @@ +import numpy as np +import cv2 as cv +from typing import List +from config_mtr_v1 import Parameters +from mtr_processors_v1 import DataProcessor + + +class BBox(tuple): + """ bounding box tuple that can easily be accessed while being compatible to cv2 rotational rects """ + + def __new__(cls, bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, bb_yaw, bb_heading, bb_cls, bb_conf): + bbx_tuple = ((float(bb_x), float(bb_y)), (float(bb_length), float(bb_width)), float(np.rad2deg(bb_yaw))) + return super(BBox, cls).__new__(cls, tuple(bbx_tuple)) + + def __init__(self, bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, bb_yaw, bb_heading, bb_cls, bb_conf): + self.x = bb_x + self.y = bb_y + self.z = bb_z + self.length = bb_length + self.width = bb_width + self.height = bb_height + self.yaw = bb_yaw + self.heading = bb_heading + self.cls = bb_cls + self.conf = bb_conf + + def __str__(self): + return "BB | Cls: %s, x: %f, y: %f, l: %f, w: %f, yaw: %f" % ( + self.cls, self.x, self.y, self.length, self.width, self.yaw) + + +def rotational_nms(set_boxes, confidences, occ_threshold=0.7, nms_iou_thr=0.5): + """ rotational NMS + set_boxes = size NSeqs list of size NDet lists of tuples. each tuple has the form ((pos, pos), (size, size), angle) + confidences = size NSeqs list of lists containing NDet floats, i.e. one per detection + """ + assert len(set_boxes) == len(confidences) and 0 < occ_threshold < 1 and 0 < nms_iou_thr < 1 + if not len(set_boxes): + return [] + assert (isinstance(set_boxes[0][0][0][0], float) or isinstance(set_boxes[0][0][0][0], int)) and \ + (isinstance(confidences[0][0], float) or isinstance(confidences[0][0], int)) + nms_boxes = [] + for boxes, confs in zip(set_boxes, confidences): + assert len(boxes) == len(confs) + indices = cv.dnn.NMSBoxesRotated(boxes, confs, occ_threshold, nms_iou_thr) + # print(indices) + indices = indices.reshape(len(indices)).tolist() + nms_boxes.append([boxes[i] for i in indices]) + return nms_boxes + + +def generate_bboxes_from_pred(occ, pos, siz, ang, hdg, clf, anchor_dims, occ_threshold=0.5): + """ Generating the bounding boxes based on the regression targets """ + + # Get only the boxes where occupancy is greater or equal threshold. + real_boxes = np.where(occ >= occ_threshold) + # Get the indices of the occupancy array + coordinates = list(zip(real_boxes[0], real_boxes[1], real_boxes[2])) + # Assign anchor dimensions as original bounding box coordinates which will eventually be changed + # according to the predicted regression targets + anchor_dims = anchor_dims + real_anchors = np.random.rand(len(coordinates), len(anchor_dims[0])) + + for i, value in enumerate(real_boxes[2]): + real_anchors[i, ...] = anchor_dims[value] + + # Change the anchor boxes based on regression targets, this is the inverse of the operations given in + # createPillarTargets function (src/PointPillars.cpp) + predicted_boxes = [] + for i, value in enumerate(coordinates): + real_diag = np.sqrt(np.square(real_anchors[i][0]) + np.square(real_anchors[i][1])) + real_x = value[0] * Parameters.x_step * Parameters.downscaling_factor + Parameters.x_min + real_y = value[1] * Parameters.y_step * Parameters.downscaling_factor + Parameters.y_min + bb_x = pos[value][0] * real_diag + real_x + bb_y = pos[value][1] * real_diag + real_y + bb_z = pos[value][2] * real_anchors[i][2] + real_anchors[i][3] + # print(position[value], real_x, real_y, real_diag) + bb_length = np.exp(siz[value][0]) * real_anchors[i][0] + bb_width = np.exp(siz[value][1]) * real_anchors[i][1] + bb_height = np.exp(siz[value][2]) * real_anchors[i][2] + bb_yaw = ang[value] + real_anchors[i][4] + # bb_yaw = -np.arcsin(np.clip(ang[value], -1, 1)) + real_anchors[i][4] + bb_heading = np.round(hdg[value]) + bb_cls = np.argmax(clf[value]) + bb_conf = occ[value] + predicted_boxes.append(BBox(bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, + bb_yaw, bb_heading, bb_cls, bb_conf)) + + + return predicted_boxes + + + +def limit_period(val, offset=0.5, period=np.pi): + return val - np.floor(val / period + offset) * period + +def inverse_yaw_element(bb_yaw): + bb_yaw -= np.pi / 2 + while bb_yaw > np.pi: + # print("larger than pi") + bb_yaw -= (np.pi * 2) + while bb_yaw < -np.pi: + # print("smaller than -pi") + bb_yaw += (np.pi * 2) + + return bb_yaw + + # if bb_yaw > np.pi /2: + # bb_yaw -= 2 * np.pi + + # bb_yaw += np.pi/2 + # return bb_yaw + +def generate_bboxes_from_pred_and_np_array(occ, pos, siz, ang, hdg, clf, anchor_dims, occ_threshold=0.5): + """ Generating the bounding boxes based on the regression targets """ + + # Get only the boxes where occupancy is greater or equal threshold. + real_boxes = np.where(occ >= occ_threshold) + # print(occ.shape) + # Get the indices of the occupancy array + coordinates = list(zip(real_boxes[0], real_boxes[1], real_boxes[2])) + # Assign anchor dimensions as original bounding box coordinates which will eventually be changed + # according to the predicted regression targets + anchor_dims = anchor_dims + real_anchors = np.random.rand(len(coordinates), len(anchor_dims[0])) + + for i, value in enumerate(real_boxes[2]): + real_anchors[i, ...] = anchor_dims[value] + + # Change the anchor boxes based on regression targets, this is the inverse of the operations given in + # createPillarTargets function (src/PointPillars.cpp) + predicted_boxes = [] + predicted_boxes_list = [] + for i, value in enumerate(coordinates): + # print("coordinate ", i) + real_diag = np.sqrt(np.square(real_anchors[i][0]) + np.square(real_anchors[i][1])) + real_x = value[0] * Parameters.x_step * Parameters.downscaling_factor + Parameters.x_min + real_y = value[1] * Parameters.y_step * Parameters.downscaling_factor + Parameters.y_min + # print("i: ", i, "\tx: ", real_x, "\ty:", real_y) + # print("i: ", i, "\tx: ", value[0], "\ty:", value[1]) + bb_x = pos[value][0] * real_diag + real_x + bb_y = pos[value][1] * real_diag + real_y + bb_z = pos[value][2] * real_anchors[i][2] + real_anchors[i][3] + # print(position[value], real_x, real_y, real_diag) + bb_length = np.exp(siz[value][0]) * real_anchors[i][0] + bb_width = np.exp(siz[value][1]) * real_anchors[i][1] + bb_height = np.exp(siz[value][2]) * real_anchors[i][2] + bb_heading = np.round(hdg[value]) + bb_yaw = ang[value] + real_anchors[i][4] + # if np.int32(bb_heading) == 0: + # bb_yaw -= np.pi + + bb_cls = np.argmax(clf[value]) + bb_conf = occ[value] + predicted_boxes.append(BBox(bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, + bb_yaw, bb_heading, bb_cls, bb_conf)) + predicted_boxes_list.append([bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, + bb_yaw, bb_heading, bb_cls, bb_conf]) + + return predicted_boxes, np.array(predicted_boxes_list) + +def convert_boxes_to_list(set_boxes): + # (B, N) + batch_predicted_boxes_list = [] + for batch_idx in range(len(set_boxes)): + predicted_boxes_list = [] + + for box in set_boxes[batch_idx]: + + predicted_boxes_list.append([box.x, box.y, box.z, box.length, box.width, box.height, + box.yaw, box.heading, box.cls, box.conf]) + + batch_predicted_boxes_list.append(predicted_boxes_list) + return batch_predicted_boxes_list + + +def focal_loss_checker(y_true, y_pred, n_occs=-1): + y_true = np.stack(np.where(y_true == 1)) + if n_occs == -1: + n_occs = y_true.shape[1] + occ_thr = np.sort(y_pred.flatten())[-n_occs] + y_pred = np.stack(np.where(y_pred >= occ_thr)) + p = 0 + for gt in range(y_true.shape[1]): + for pr in range(y_pred.shape[1]): + if np.all(y_true[:, gt] == y_pred[:, pr]): + p += 1 + break + print("#matched gt: ", p, " #unmatched gt: ", y_true.shape[1] - p, " #unmatched pred: ", y_pred.shape[1] - p, + " occupancy threshold: ", occ_thr) diff --git a/inference_utils_v2.py b/inference_utils_v2.py new file mode 100644 index 0000000..a34441a --- /dev/null +++ b/inference_utils_v2.py @@ -0,0 +1,190 @@ +import numpy as np +import cv2 as cv +from typing import List +from config_v2 import Parameters +from point_pillars_custom_processors_v2 import DataProcessor + + +class BBox(tuple): + """ bounding box tuple that can easily be accessed while being compatible to cv2 rotational rects """ + + def __new__(cls, bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, bb_yaw, bb_heading, bb_cls, bb_conf): + bbx_tuple = ((float(bb_x), float(bb_y)), (float(bb_length), float(bb_width)), float(np.rad2deg(bb_yaw))) + return super(BBox, cls).__new__(cls, tuple(bbx_tuple)) + + def __init__(self, bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, bb_yaw, bb_heading, bb_cls, bb_conf): + self.x = bb_x + self.y = bb_y + self.z = bb_z + self.length = bb_length + self.width = bb_width + self.height = bb_height + self.yaw = bb_yaw + self.heading = bb_heading + self.cls = bb_cls + self.conf = bb_conf + + def __str__(self): + return "BB | Cls: %s, x: %f, y: %f, l: %f, w: %f, yaw: %f" % ( + self.cls, self.x, self.y, self.length, self.width, self.yaw) + + +def rotational_nms(set_boxes, confidences, occ_threshold=0.7, nms_iou_thr=0.5): + """ rotational NMS + set_boxes = size NSeqs list of size NDet lists of tuples. each tuple has the form ((pos, pos), (size, size), angle) + confidences = size NSeqs list of lists containing NDet floats, i.e. one per detection + """ + assert len(set_boxes) == len(confidences) and 0 < occ_threshold < 1 and 0 < nms_iou_thr < 1 + if not len(set_boxes): + return [] + assert (isinstance(set_boxes[0][0][0][0], float) or isinstance(set_boxes[0][0][0][0], int)) and \ + (isinstance(confidences[0][0], float) or isinstance(confidences[0][0], int)) + nms_boxes = [] + for boxes, confs in zip(set_boxes, confidences): + assert len(boxes) == len(confs) + indices = cv.dnn.NMSBoxesRotated(boxes, confs, occ_threshold, nms_iou_thr) + print(indices) + indices = indices.reshape(len(indices)).tolist() + nms_boxes.append([boxes[i] for i in indices]) + return nms_boxes + + +def generate_bboxes_from_pred(occ, pos, siz, ang, hdg, clf, anchor_dims, occ_threshold=0.5): + """ Generating the bounding boxes based on the regression targets """ + + # Get only the boxes where occupancy is greater or equal threshold. + real_boxes = np.where(occ >= occ_threshold) + # Get the indices of the occupancy array + coordinates = list(zip(real_boxes[0], real_boxes[1], real_boxes[2])) + # Assign anchor dimensions as original bounding box coordinates which will eventually be changed + # according to the predicted regression targets + anchor_dims = anchor_dims + real_anchors = np.random.rand(len(coordinates), len(anchor_dims[0])) + + for i, value in enumerate(real_boxes[2]): + real_anchors[i, ...] = anchor_dims[value] + + # Change the anchor boxes based on regression targets, this is the inverse of the operations given in + # createPillarTargets function (src/PointPillars.cpp) + predicted_boxes = [] + for i, value in enumerate(coordinates): + real_diag = np.sqrt(np.square(real_anchors[i][0]) + np.square(real_anchors[i][1])) + real_x = value[0] * Parameters.x_step * Parameters.downscaling_factor + Parameters.x_min + real_y = value[1] * Parameters.y_step * Parameters.downscaling_factor + Parameters.y_min + bb_x = pos[value][0] * real_diag + real_x + bb_y = pos[value][1] * real_diag + real_y + bb_z = pos[value][2] * real_anchors[i][2] + real_anchors[i][3] + # print(position[value], real_x, real_y, real_diag) + bb_length = np.exp(siz[value][0]) * real_anchors[i][0] + bb_width = np.exp(siz[value][1]) * real_anchors[i][1] + bb_height = np.exp(siz[value][2]) * real_anchors[i][2] + bb_yaw = ang[value] + real_anchors[i][4] + # bb_yaw = -np.arcsin(np.clip(ang[value], -1, 1)) + real_anchors[i][4] + bb_heading = np.round(hdg[value]) + bb_cls = np.argmax(clf[value]) + bb_conf = occ[value] + predicted_boxes.append(BBox(bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, + bb_yaw, bb_heading, bb_cls, bb_conf)) + + + return predicted_boxes + + + +def limit_period(val, offset=0.5, period=np.pi): + return val - np.floor(val / period + offset) * period + +def inverse_yaw_element(bb_yaw): + bb_yaw -= np.pi / 2 + while bb_yaw > np.pi: + # print("larger than pi") + bb_yaw -= (np.pi * 2) + while bb_yaw < -np.pi: + # print("smaller than -pi") + bb_yaw += (np.pi * 2) + + return bb_yaw + + # if bb_yaw > np.pi /2: + # bb_yaw -= 2 * np.pi + + # bb_yaw += np.pi/2 + # return bb_yaw + +def generate_bboxes_from_pred_and_np_array(occ, pos, siz, ang, hdg, clf, anchor_dims, occ_threshold=0.5): + """ Generating the bounding boxes based on the regression targets """ + + # Get only the boxes where occupancy is greater or equal threshold. + real_boxes = np.where(occ >= occ_threshold) + # print(occ.shape) + # Get the indices of the occupancy array + coordinates = list(zip(real_boxes[0], real_boxes[1], real_boxes[2])) + # Assign anchor dimensions as original bounding box coordinates which will eventually be changed + # according to the predicted regression targets + anchor_dims = anchor_dims + real_anchors = np.random.rand(len(coordinates), len(anchor_dims[0])) + + for i, value in enumerate(real_boxes[2]): + real_anchors[i, ...] = anchor_dims[value] + + # Change the anchor boxes based on regression targets, this is the inverse of the operations given in + # createPillarTargets function (src/PointPillars.cpp) + predicted_boxes = [] + predicted_boxes_list = [] + for i, value in enumerate(coordinates): + # print("coordinate ", i) + real_diag = np.sqrt(np.square(real_anchors[i][0]) + np.square(real_anchors[i][1])) + real_x = value[0] * Parameters.x_step * Parameters.downscaling_factor + Parameters.x_min + real_y = value[1] * Parameters.y_step * Parameters.downscaling_factor + Parameters.y_min + # print("i: ", i, "\tx: ", real_x, "\ty:", real_y) + # print("i: ", i, "\tx: ", value[0], "\ty:", value[1]) + bb_x = pos[value][0] * real_diag + real_x + bb_y = pos[value][1] * real_diag + real_y + bb_z = pos[value][2] * real_anchors[i][2] + real_anchors[i][3] + # print(position[value], real_x, real_y, real_diag) + bb_length = np.exp(siz[value][0]) * real_anchors[i][0] + bb_width = np.exp(siz[value][1]) * real_anchors[i][1] + bb_height = np.exp(siz[value][2]) * real_anchors[i][2] + bb_heading = np.round(hdg[value]) + bb_yaw = ang[value] + real_anchors[i][4] + # if np.int32(bb_heading) == 0: + # bb_yaw -= np.pi + + bb_cls = np.argmax(clf[value]) + bb_conf = occ[value] + predicted_boxes.append(BBox(bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, + bb_yaw, bb_heading, bb_cls, bb_conf)) + predicted_boxes_list.append([bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, + bb_yaw, bb_heading, bb_cls, bb_conf]) + + return predicted_boxes, np.array(predicted_boxes_list) + +def convert_boxes_to_list(set_boxes): + # (B, N) + batch_predicted_boxes_list = [] + for batch_idx in range(len(set_boxes)): + predicted_boxes_list = [] + + for box in set_boxes[batch_idx]: + + predicted_boxes_list.append([box.x, box.y, box.z, box.length, box.width, box.height, + box.yaw, box.heading, box.cls, box.conf]) + + batch_predicted_boxes_list.append(predicted_boxes_list) + return batch_predicted_boxes_list + + +def focal_loss_checker(y_true, y_pred, n_occs=-1): + y_true = np.stack(np.where(y_true == 1)) + if n_occs == -1: + n_occs = y_true.shape[1] + occ_thr = np.sort(y_pred.flatten())[-n_occs] + y_pred = np.stack(np.where(y_pred >= occ_thr)) + p = 0 + for gt in range(y_true.shape[1]): + for pr in range(y_pred.shape[1]): + if np.all(y_true[:, gt] == y_pred[:, pr]): + p += 1 + break + print("#matched gt: ", p, " #unmatched gt: ", y_true.shape[1] - p, " #unmatched pred: ", y_pred.shape[1] - p, + " occupancy threshold: ", occ_thr) diff --git a/inference_utils_v2_2.py b/inference_utils_v2_2.py new file mode 100644 index 0000000..e024624 --- /dev/null +++ b/inference_utils_v2_2.py @@ -0,0 +1,190 @@ +import numpy as np +import cv2 as cv +from typing import List +from config_v2_2 import Parameters +from point_pillars_custom_processors_v2_2 import DataProcessor + + +class BBox(tuple): + """ bounding box tuple that can easily be accessed while being compatible to cv2 rotational rects """ + + def __new__(cls, bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, bb_yaw, bb_heading, bb_cls, bb_conf): + bbx_tuple = ((float(bb_x), float(bb_y)), (float(bb_length), float(bb_width)), float(np.rad2deg(bb_yaw))) + return super(BBox, cls).__new__(cls, tuple(bbx_tuple)) + + def __init__(self, bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, bb_yaw, bb_heading, bb_cls, bb_conf): + self.x = bb_x + self.y = bb_y + self.z = bb_z + self.length = bb_length + self.width = bb_width + self.height = bb_height + self.yaw = bb_yaw + self.heading = bb_heading + self.cls = bb_cls + self.conf = bb_conf + + def __str__(self): + return "BB | Cls: %s, x: %f, y: %f, l: %f, w: %f, yaw: %f" % ( + self.cls, self.x, self.y, self.length, self.width, self.yaw) + + +def rotational_nms(set_boxes, confidences, occ_threshold=0.7, nms_iou_thr=0.5): + """ rotational NMS + set_boxes = size NSeqs list of size NDet lists of tuples. each tuple has the form ((pos, pos), (size, size), angle) + confidences = size NSeqs list of lists containing NDet floats, i.e. one per detection + """ + assert len(set_boxes) == len(confidences) and 0 < occ_threshold < 1 and 0 < nms_iou_thr < 1 + if not len(set_boxes): + return [] + assert (isinstance(set_boxes[0][0][0][0], float) or isinstance(set_boxes[0][0][0][0], int)) and \ + (isinstance(confidences[0][0], float) or isinstance(confidences[0][0], int)) + nms_boxes = [] + for boxes, confs in zip(set_boxes, confidences): + assert len(boxes) == len(confs) + indices = cv.dnn.NMSBoxesRotated(boxes, confs, occ_threshold, nms_iou_thr) + print(indices) + indices = indices.reshape(len(indices)).tolist() + nms_boxes.append([boxes[i] for i in indices]) + return nms_boxes + + +def generate_bboxes_from_pred(occ, pos, siz, ang, hdg, clf, anchor_dims, occ_threshold=0.5): + """ Generating the bounding boxes based on the regression targets """ + + # Get only the boxes where occupancy is greater or equal threshold. + real_boxes = np.where(occ >= occ_threshold) + # Get the indices of the occupancy array + coordinates = list(zip(real_boxes[0], real_boxes[1], real_boxes[2])) + # Assign anchor dimensions as original bounding box coordinates which will eventually be changed + # according to the predicted regression targets + anchor_dims = anchor_dims + real_anchors = np.random.rand(len(coordinates), len(anchor_dims[0])) + + for i, value in enumerate(real_boxes[2]): + real_anchors[i, ...] = anchor_dims[value] + + # Change the anchor boxes based on regression targets, this is the inverse of the operations given in + # createPillarTargets function (src/PointPillars.cpp) + predicted_boxes = [] + for i, value in enumerate(coordinates): + real_diag = np.sqrt(np.square(real_anchors[i][0]) + np.square(real_anchors[i][1])) + real_x = value[0] * Parameters.x_step * Parameters.downscaling_factor + Parameters.x_min + real_y = value[1] * Parameters.y_step * Parameters.downscaling_factor + Parameters.y_min + bb_x = pos[value][0] * real_diag + real_x + bb_y = pos[value][1] * real_diag + real_y + bb_z = pos[value][2] * real_anchors[i][2] + real_anchors[i][3] + # print(position[value], real_x, real_y, real_diag) + bb_length = np.exp(siz[value][0]) * real_anchors[i][0] + bb_width = np.exp(siz[value][1]) * real_anchors[i][1] + bb_height = np.exp(siz[value][2]) * real_anchors[i][2] + bb_yaw = ang[value] + real_anchors[i][4] + # bb_yaw = -np.arcsin(np.clip(ang[value], -1, 1)) + real_anchors[i][4] + bb_heading = np.round(hdg[value]) + bb_cls = np.argmax(clf[value]) + bb_conf = occ[value] + predicted_boxes.append(BBox(bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, + bb_yaw, bb_heading, bb_cls, bb_conf)) + + + return predicted_boxes + + + +def limit_period(val, offset=0.5, period=np.pi): + return val - np.floor(val / period + offset) * period + +def inverse_yaw_element(bb_yaw): + bb_yaw -= np.pi / 2 + while bb_yaw > np.pi: + # print("larger than pi") + bb_yaw -= (np.pi * 2) + while bb_yaw < -np.pi: + # print("smaller than -pi") + bb_yaw += (np.pi * 2) + + return bb_yaw + + # if bb_yaw > np.pi /2: + # bb_yaw -= 2 * np.pi + + # bb_yaw += np.pi/2 + # return bb_yaw + +def generate_bboxes_from_pred_and_np_array(occ, pos, siz, ang, hdg, clf, anchor_dims, occ_threshold=0.5): + """ Generating the bounding boxes based on the regression targets """ + + # Get only the boxes where occupancy is greater or equal threshold. + real_boxes = np.where(occ >= occ_threshold) + # print(occ.shape) + # Get the indices of the occupancy array + coordinates = list(zip(real_boxes[0], real_boxes[1], real_boxes[2])) + # Assign anchor dimensions as original bounding box coordinates which will eventually be changed + # according to the predicted regression targets + anchor_dims = anchor_dims + real_anchors = np.random.rand(len(coordinates), len(anchor_dims[0])) + + for i, value in enumerate(real_boxes[2]): + real_anchors[i, ...] = anchor_dims[value] + + # Change the anchor boxes based on regression targets, this is the inverse of the operations given in + # createPillarTargets function (src/PointPillars.cpp) + predicted_boxes = [] + predicted_boxes_list = [] + for i, value in enumerate(coordinates): + # print("coordinate ", i) + real_diag = np.sqrt(np.square(real_anchors[i][0]) + np.square(real_anchors[i][1])) + real_x = value[0] * Parameters.x_step * Parameters.downscaling_factor + Parameters.x_min + real_y = value[1] * Parameters.y_step * Parameters.downscaling_factor + Parameters.y_min + # print("i: ", i, "\tx: ", real_x, "\ty:", real_y) + # print("i: ", i, "\tx: ", value[0], "\ty:", value[1]) + bb_x = pos[value][0] * real_diag + real_x + bb_y = pos[value][1] * real_diag + real_y + bb_z = pos[value][2] * real_anchors[i][2] + real_anchors[i][3] + # print(position[value], real_x, real_y, real_diag) + bb_length = np.exp(siz[value][0]) * real_anchors[i][0] + bb_width = np.exp(siz[value][1]) * real_anchors[i][1] + bb_height = np.exp(siz[value][2]) * real_anchors[i][2] + bb_heading = np.round(hdg[value]) + bb_yaw = ang[value] + real_anchors[i][4] + # if np.int32(bb_heading) == 0: + # bb_yaw -= np.pi + + bb_cls = np.argmax(clf[value]) + bb_conf = occ[value] + predicted_boxes.append(BBox(bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, + bb_yaw, bb_heading, bb_cls, bb_conf)) + predicted_boxes_list.append([bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, + bb_yaw, bb_heading, bb_cls, bb_conf]) + + return predicted_boxes, np.array(predicted_boxes_list) + +def convert_boxes_to_list(set_boxes): + # (B, N) + batch_predicted_boxes_list = [] + for batch_idx in range(len(set_boxes)): + predicted_boxes_list = [] + + for box in set_boxes[batch_idx]: + + predicted_boxes_list.append([box.x, box.y, box.z, box.length, box.width, box.height, + box.yaw, box.heading, box.cls, box.conf]) + + batch_predicted_boxes_list.append(predicted_boxes_list) + return batch_predicted_boxes_list + + +def focal_loss_checker(y_true, y_pred, n_occs=-1): + y_true = np.stack(np.where(y_true == 1)) + if n_occs == -1: + n_occs = y_true.shape[1] + occ_thr = np.sort(y_pred.flatten())[-n_occs] + y_pred = np.stack(np.where(y_pred >= occ_thr)) + p = 0 + for gt in range(y_true.shape[1]): + for pr in range(y_pred.shape[1]): + if np.all(y_true[:, gt] == y_pred[:, pr]): + p += 1 + break + print("#matched gt: ", p, " #unmatched gt: ", y_true.shape[1] - p, " #unmatched pred: ", y_pred.shape[1] - p, + " occupancy threshold: ", occ_thr) diff --git a/loss.py b/loss.py index 0cb355a..092064c 100644 --- a/loss.py +++ b/loss.py @@ -1,7 +1,7 @@ import tensorflow as tf import tensorflow_probability as tfp from tensorflow.python.keras import backend as K -from config import Parameters +from config_v2 import Parameters class PointPillarNetworkLoss: @@ -47,6 +47,7 @@ def loc_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): mask = tf.tile(tf.expand_dims(self.mask, -1), [1, 1, 1, 1, 3]) loss = tf.compat.v1.losses.huber_loss(y_true, y_pred, + delta=3.0, reduction="none") masked_loss = tf.boolean_mask(loss, mask) @@ -56,14 +57,25 @@ def size_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): mask = tf.tile(tf.expand_dims(self.mask, -1), [1, 1, 1, 1, 3]) loss = tf.compat.v1.losses.huber_loss(y_true, y_pred, + delta=3.0, reduction="none") masked_loss = tf.boolean_mask(loss, mask) return self.size_weight * tf.reduce_mean(masked_loss) + def add_sin_difference(self, y_true, y_pred, factor=1.0): + if factor != 1.0: + y_true = factor * y_true + y_pred = factor * y_pred + rad_pred_encoding = tf.math.sin(y_pred) * tf.math.cos(y_true) + rad_tg_encoding = tf.math.cos(y_pred) * tf.math.sin(y_true) + return rad_tg_encoding, rad_pred_encoding + def angle_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): + y_true, y_pred = self.add_sin_difference(y_true, y_pred, self.angle_weight) loss = tf.compat.v1.losses.huber_loss(y_true, y_pred, + delta=3.0, reduction="none") masked_loss = tf.boolean_mask(loss, self.mask) @@ -75,6 +87,7 @@ def heading_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): return self.heading_weight * tf.reduce_mean(masked_loss) def class_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): - loss = tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred) - masked_loss = tf.boolean_mask(loss, self.mask) - return self.class_weight * tf.reduce_mean(masked_loss) + return 0 # for now since we are only learning one class, the class label is equivalent to the occupancy + # loss = tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred) + # masked_loss = tf.boolean_mask(loss, self.mask) + # return self.class_weight * tf.reduce_mean(masked_loss) diff --git a/loss_v2.py b/loss_v2.py new file mode 100644 index 0000000..6f272dd --- /dev/null +++ b/loss_v2.py @@ -0,0 +1,87 @@ +import tensorflow as tf +import tensorflow_probability as tfp +from tensorflow.python.keras import backend as K +from config_v2 import Parameters + + +class PointPillarNetworkLoss: + + def __init__(self, params: Parameters): + self.alpha = float(params.alpha) + self.gamma = float(params.gamma) + self.focal_weight = float(params.focal_weight) + self.loc_weight = float(params.loc_weight) + self.size_weight = float(params.size_weight) + self.angle_weight = float(params.angle_weight) + self.heading_weight = float(params.heading_weight) + # self.class_weight = float(params.class_weight) + + def losses(self): + return [self.focal_loss, self.loc_loss, self.size_loss, self.angle_loss, self.heading_loss] + + def focal_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): + """ y_true value from occ in {-1, 0, 1}, i.e. {bad match, neg box, pos box} """ + + self.mask = tf.equal(y_true, 1) + + cross_entropy = K.binary_crossentropy(y_true, y_pred) + + p_t = y_true * y_pred + (tf.subtract(1.0, y_true) * tf.subtract(1.0, y_pred)) + + gamma_factor = tf.pow(1.0 - p_t, self.gamma) + + alpha_factor = y_true * self.alpha + (1.0 - y_true) * (1.0 - self.alpha) + + focal_loss = gamma_factor * alpha_factor * cross_entropy + + neg_mask = tf.equal(y_true, 0) + thr = tfp.stats.percentile(tf.boolean_mask(focal_loss, neg_mask), 90.) + hard_neg_mask = tf.greater(focal_loss, thr) + # mask = tf.logical_or(tf.equal(y_true, 0), tf.equal(y_true, 1)) + mask = tf.logical_or(self.mask, tf.logical_and(neg_mask, hard_neg_mask)) + masked_loss = tf.boolean_mask(focal_loss, mask) + + return self.focal_weight * tf.reduce_mean(masked_loss) + + def loc_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): + mask = tf.tile(tf.expand_dims(self.mask, -1), [1, 1, 1, 1, 3]) + loss = tf.compat.v1.losses.huber_loss(y_true, + y_pred, + delta=3.0, + reduction="none") + + masked_loss = tf.boolean_mask(loss, mask) + return self.loc_weight * tf.reduce_mean(masked_loss) + + def size_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): + mask = tf.tile(tf.expand_dims(self.mask, -1), [1, 1, 1, 1, 3]) + loss = tf.compat.v1.losses.huber_loss(y_true, + y_pred, + delta=3.0, + reduction="none") + + masked_loss = tf.boolean_mask(loss, mask) + return self.size_weight * tf.reduce_mean(masked_loss) + + def add_sin_difference(self, y_true, y_pred, factor=1.0): + if factor != 1.0: + y_true = factor * y_true + y_pred = factor * y_pred + rad_pred_encoding = tf.math.sin(y_pred) * tf.math.cos(y_true) + rad_tg_encoding = tf.math.cos(y_pred) * tf.math.sin(y_true) + return rad_tg_encoding, rad_pred_encoding + + def angle_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): + y_true, y_pred = self.add_sin_difference(y_true, y_pred) + loss = tf.compat.v1.losses.huber_loss(y_true, + y_pred, + delta=3.0, + reduction="none") + + masked_loss = tf.boolean_mask(loss, self.mask) + return self.angle_weight * tf.reduce_mean(masked_loss) + + def heading_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): + loss = K.binary_crossentropy(y_true, y_pred) + masked_loss = tf.boolean_mask(loss, self.mask) + return self.heading_weight * tf.reduce_mean(masked_loss) \ No newline at end of file diff --git a/loss_v2_2.py b/loss_v2_2.py new file mode 100644 index 0000000..611a7ce --- /dev/null +++ b/loss_v2_2.py @@ -0,0 +1,87 @@ +import tensorflow as tf +import tensorflow_probability as tfp +from tensorflow.python.keras import backend as K +from config_v2_2 import Parameters + + +class PointPillarNetworkLoss: + + def __init__(self, params: Parameters): + self.alpha = float(params.alpha) + self.gamma = float(params.gamma) + self.focal_weight = float(params.focal_weight) + self.loc_weight = float(params.loc_weight) + self.size_weight = float(params.size_weight) + self.angle_weight = float(params.angle_weight) + self.heading_weight = float(params.heading_weight) + # self.class_weight = float(params.class_weight) + + def losses(self): + return [self.focal_loss, self.loc_loss, self.size_loss, self.angle_loss, self.heading_loss] + + def focal_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): + """ y_true value from occ in {-1, 0, 1}, i.e. {bad match, neg box, pos box} """ + + self.mask = tf.equal(y_true, 1) + + cross_entropy = K.binary_crossentropy(y_true, y_pred) + + p_t = y_true * y_pred + (tf.subtract(1.0, y_true) * tf.subtract(1.0, y_pred)) + + gamma_factor = tf.pow(1.0 - p_t, self.gamma) + + alpha_factor = y_true * self.alpha + (1.0 - y_true) * (1.0 - self.alpha) + + focal_loss = gamma_factor * alpha_factor * cross_entropy + + neg_mask = tf.equal(y_true, 0) + thr = tfp.stats.percentile(tf.boolean_mask(focal_loss, neg_mask), 90.) + hard_neg_mask = tf.greater(focal_loss, thr) + # mask = tf.logical_or(tf.equal(y_true, 0), tf.equal(y_true, 1)) + mask = tf.logical_or(self.mask, tf.logical_and(neg_mask, hard_neg_mask)) + masked_loss = tf.boolean_mask(focal_loss, mask) + + return self.focal_weight * tf.reduce_mean(masked_loss) + + def loc_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): + mask = tf.tile(tf.expand_dims(self.mask, -1), [1, 1, 1, 1, 3]) + loss = tf.compat.v1.losses.huber_loss(y_true, + y_pred, + delta=3.0, + reduction="none") + + masked_loss = tf.boolean_mask(loss, mask) + return self.loc_weight * tf.reduce_mean(masked_loss) + + def size_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): + mask = tf.tile(tf.expand_dims(self.mask, -1), [1, 1, 1, 1, 3]) + loss = tf.compat.v1.losses.huber_loss(y_true, + y_pred, + delta=3.0, + reduction="none") + + masked_loss = tf.boolean_mask(loss, mask) + return self.size_weight * tf.reduce_mean(masked_loss) + + def add_sin_difference(self, y_true, y_pred, factor=1.0): + if factor != 1.0: + y_true = factor * y_true + y_pred = factor * y_pred + rad_pred_encoding = tf.math.sin(y_pred) * tf.math.cos(y_true) + rad_tg_encoding = tf.math.cos(y_pred) * tf.math.sin(y_true) + return rad_tg_encoding, rad_pred_encoding + + def angle_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): + y_true, y_pred = self.add_sin_difference(y_true, y_pred) + loss = tf.compat.v1.losses.huber_loss(y_true, + y_pred, + delta=3.0, + reduction="none") + + masked_loss = tf.boolean_mask(loss, self.mask) + return self.angle_weight * tf.reduce_mean(masked_loss) + + def heading_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): + loss = K.binary_crossentropy(y_true, y_pred) + masked_loss = tf.boolean_mask(loss, self.mask) + return self.heading_weight * tf.reduce_mean(masked_loss) \ No newline at end of file diff --git a/mtr_point_pillars_evaluation_v1.py b/mtr_point_pillars_evaluation_v1.py new file mode 100644 index 0000000..2c0acd5 --- /dev/null +++ b/mtr_point_pillars_evaluation_v1.py @@ -0,0 +1,164 @@ + + +import os +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"] = "1" + +from glob import glob +import numpy as np +import tensorflow as tf +from mtr_processors_v1 import CustomDataGenerator, AnalyseCustomDataGenerator +from inference_utils_mtr_v1 import generate_bboxes_from_pred +from inference_utils_mtr_v1 import focal_loss_checker, rotational_nms, generate_bboxes_from_pred_and_np_array, convert_boxes_to_list +# from readers import KittiDataReader +from config_mtr_v1 import Parameters +from network_v2_2 import build_point_pillar_graph +from datetime import datetime + +from det3d.kitti_dataset.utils.evaluation import save_kitti_format, save_kitti_format_for_evaluation + +from point_viz.converter import PointvizConverter + +DATA_ROOT = "/media/data3/tjtanaa/Project4-MTR" # TODO make main arg +MODEL_ROOT = "./logs_Pedestrian_MTR_No_Early_Stopping_wo_Aug_with_val" +PC_STATISTICS_PATH = "/home/tan/tjtanaa/det3d/det3d/mtr_dataset/point_cloud_statistics" + +occ_threshold = 0.5 +nms_iou_thr=0.5 +# KITTI_EVALUATION_OUTPUT = os.path.join(MODEL_ROOT, "Evaluation") +# if not os.path.exists(KITTI_EVALUATION_OUTPUT): +# os.makedirs(KITTI_EVALUATION_OUTPUT) + + +def limit_period(val, offset=0.5, period=np.pi): + return val - np.floor(val / period + offset) * period + +if __name__ == "__main__": + + params = Parameters() + pillar_net = build_point_pillar_graph(params) + pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) + pillar_net.summary() + + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_prediction_multiprocessing" + save_viz_path = os.path.join("/home/tan/tjtanaa/PointPillars/visualization", MODEL_ROOT.split('/')[-1]) + # Initialize and setup output directory. + Converter = PointvizConverter(save_viz_path) + + gt_database_dir = os.path.join(DATA_ROOT, "gt_database") + + validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, + point_cloud_statistics_path=PC_STATISTICS_PATH, + random_select=False, + npoints=8000, split='test', classes=list(params.classes_map.keys())) + inference_duration = [] + sample_index = 0 # has to be controlled manually to ensure that the sequence number is continuous + + for batch_idx in range(0,len(validation_gen)): + [pillars, voxels], [occupancy_, position_, size_, angle_, heading_], [pts_input, gt_boxes3d] = validation_gen[batch_idx] + + start=datetime.now() + + occupancy, position, size, angle, heading = pillar_net.predict([pillars, voxels]) + + inference_duration.append( datetime.now()-start) + + classification = np.zeros(shape=np.array(occupancy).shape) + classification_ = classification + + set_boxes, confidences = [], [] + loop_range = occupancy_.shape[0] if len(occupancy_.shape) == 4 else 1 + print("batch_idx ", batch_idx, " has ", loop_range, "batch sample", " with occupancy sum: ", np.sum(occupancy)) + for i in range(loop_range): + set_box, predicted_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy[i], position[i], size[i], angle[i], + heading[i], + classification[i], params.anchor_dims, occ_threshold=occ_threshold) + + + _, decoded_gt_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy_[i], position_[i], size_[i], angle_[i], + heading_[i], + classification_[i], params.anchor_dims, occ_threshold=occ_threshold) + + # gt_boxes3d_ = gt_boxes3d[i] + gt_boxes3d_ = decoded_gt_boxes3d + + # print(gt_boxes3d_.shape) + if(len(gt_boxes3d_) == 0): + gt_bbox_params_list = [] + else: + gt_bbox_params = np.stack([gt_boxes3d_[:,3], gt_boxes3d_[:,5], gt_boxes3d_[:,4], + gt_boxes3d_[:,1], gt_boxes3d_[:,2] , + gt_boxes3d_[:,0], + gt_boxes3d_[:,6]], axis=1) + + + gt_bbox_params_list = gt_bbox_params.tolist() + # gt_bbox_params_list = [] + for k in range(len(gt_bbox_params_list)): + msg = "%.5f, %s, %.5f"%(decoded_gt_boxes3d[k,9], params.map_classes[int(decoded_gt_boxes3d[k,8])], decoded_gt_boxes3d[k,6]) + # msg = "%.5f, %.5f"%(gt_bbox_params_list[k][3],gt_bbox_params_list[k][5]) + gt_bbox_params_list[k].append("Green") + # gt_bbox_params_list[k].append("1.0") + gt_bbox_params_list[k].append(msg) + + if len(set_box) > 0: + + + # NMS + # set_box + # print("start nms") + confidence = [float(box.conf) for box in set_box] + nms_boxes = rotational_nms([set_box], [confidence], occ_threshold=occ_threshold, nms_iou_thr=nms_iou_thr) + + predicted_boxes3d_list = convert_boxes_to_list(nms_boxes) + + predicted_boxes3d = np.array(predicted_boxes3d_list[0]) + predicted_boxes3d_ = predicted_boxes3d + + print("sample_: ", batch_idx * params.batch_size + i, " has ", predicted_boxes3d_.shape, "predictions") + + bbox_params = np.stack([predicted_boxes3d_[:,3], predicted_boxes3d_[:,5], predicted_boxes3d_[:,4], + predicted_boxes3d_[:,1], predicted_boxes3d_[:,2] , + predicted_boxes3d_[:,0], + predicted_boxes3d_[:,6]], axis=1) + # print("z ", predicted_boxes3d[:,2]) + + bbox_params_list = bbox_params.tolist() + # bbox_labels_conf = [str(predicted_boxes3d[k,9]) for k in range(predicted_boxes3d.shape[0])] + for k in range(predicted_boxes3d.shape[0]): + msg = "%.5f, %s, %.5f"%(predicted_boxes3d[k,9],params.map_classes[int(predicted_boxes3d[k,8])], predicted_boxes3d[k,6]) + bbox_params_list[k].append("Magenta") + bbox_params_list[k].append(msg) + # bbox_params_list[k].append(str(predicted_boxes3d[k,9]) + "=" + params.map_classes[int(predicted_boxes3d[k,8])]) + gt_bbox_params_list.append(bbox_params_list[k]) + + + # # save as kitti format for evaluation + # cur_sample_id = batch_idx * params.batch_size + i + # sample_file_name = validation_gen.sample_id_list[cur_sample_id] + # calib = sample[i]['calib'] + # # cur_boxes3d = cur_boxes3d.cpu().numpy() + + # cur_boxes3d_xyz = calib.lidar_to_rect(predicted_boxes3d[:, 0:3]) + + # cur_boxes3d = np.concatenate(( + # cur_boxes3d_xyz[:,0,np.newaxis], # 0 x + # cur_boxes3d_xyz[:,1,np.newaxis] + predicted_boxes3d[:,5,np.newaxis] / 2, # 1 y + # cur_boxes3d_xyz[:,2,np.newaxis], # 2 z + # predicted_boxes3d[:,5,np.newaxis], # 3 l # same as the original label + # predicted_boxes3d[:,4,np.newaxis], # 4 w # same as the original label + # predicted_boxes3d[:,3,np.newaxis], # 5 h # same as the original label + # -predicted_boxes3d[:,6,np.newaxis], # 6 ry + # ), axis=1) + # cur_scores_raw = predicted_boxes3d[:,-1] + # image_shape = validation_gen.get_image_shape(sample_file_name) + # labels_obj = validation_gen.get_label(sample_file_name) + # classes = ['Car' for i in range(len(predicted_boxes3d))] + # save_kitti_format_for_evaluation(sample_index, calib, cur_boxes3d, KITTI_EVALUATION_OUTPUT, cur_scores_raw, image_shape, classes, labels_obj) + sample_index += 1 + + coor = pts_input[i][:,[1,2,0]] + Converter.compile("evaluation_sample_{}".format(batch_idx * params.batch_size+i), coors=coor, intensity=pts_input[i][:,3], + bbox_params=gt_bbox_params_list) + # print("Average runtime speed: ", np.mean(inference_duration[20:])) + diff --git a/mtr_point_pillars_prediction_v1.py b/mtr_point_pillars_prediction_v1.py new file mode 100644 index 0000000..70623d6 --- /dev/null +++ b/mtr_point_pillars_prediction_v1.py @@ -0,0 +1,138 @@ + + +import os +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"] = "1" + +from glob import glob +import numpy as np +import tensorflow as tf +from mtr_processors_v1 import CustomDataGenerator, AnalyseCustomDataGenerator +from inference_utils_mtr_v1 import generate_bboxes_from_pred +from inference_utils_mtr_v1 import focal_loss_checker, rotational_nms, generate_bboxes_from_pred_and_np_array, convert_boxes_to_list +# from readers import KittiDataReader +from config_mtr_v1 import Parameters +from network_v2_2 import build_point_pillar_graph +from datetime import datetime + +from det3d.kitti_dataset.utils.evaluation import save_kitti_format, save_kitti_format_for_evaluation + +from point_viz.converter import PointvizConverter + +DATA_ROOT = "/media/data3/tjtanaa/Project4-MTR" # TODO make main arg +MODEL_ROOT = "./logs_Pedestrian_MTR_No_Early_Stopping_wo_Aug_with_val" +PC_STATISTICS_PATH = "/home/tan/tjtanaa/det3d/det3d/mtr_dataset/point_cloud_statistics" + +occ_threshold = 0.3 +nms_iou_thr=0.1 +# KITTI_EVALUATION_OUTPUT = os.path.join(MODEL_ROOT, "Evaluation") +# if not os.path.exists(KITTI_EVALUATION_OUTPUT): +# os.makedirs(KITTI_EVALUATION_OUTPUT) + + +def limit_period(val, offset=0.5, period=np.pi): + return val - np.floor(val / period + offset) * period + +if __name__ == "__main__": + + params = Parameters() + pillar_net = build_point_pillar_graph(params) + pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) + pillar_net.summary() + + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_prediction_multiprocessing" + save_viz_path = os.path.join("/home/tan/tjtanaa/PointPillars/visualization", MODEL_ROOT.split('/')[-1]) + # Initialize and setup output directory. + Converter = PointvizConverter(save_viz_path) + + gt_database_dir = os.path.join(DATA_ROOT, "gt_database") + + real_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, + point_cloud_statistics_path=PC_STATISTICS_PATH, + random_select=False, + npoints=8000, split='real', classes=list(params.classes_map.keys())) + inference_duration = [] + sample_index = 0 # has to be controlled manually to ensure that the sequence number is continuous + print("total number of batch: ", len(real_gen)) + for batch_idx in range(0, len(real_gen)): + [pillars, voxels], [pts_input] = real_gen[batch_idx] + + start=datetime.now() + + occupancy, position, size, angle, heading = pillar_net.predict([pillars, voxels]) + + inference_duration.append( datetime.now()-start) + + classification = np.zeros(shape=np.array(occupancy).shape) + classification_ = classification + + set_boxes, confidences = [], [] + loop_range = occupancy.shape[0] if len(occupancy.shape) == 4 else 1 + print("batch_idx ", batch_idx, " has ", loop_range, "batch sample", " with occupancy sum: ", np.sum(occupancy)) + for i in range(loop_range): + set_box, predicted_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy[i], position[i], size[i], angle[i], + heading[i], + classification[i], params.anchor_dims, occ_threshold=occ_threshold) + + gt_bbox_params_list = [] + if len(set_box) > 0: + + + # NMS + # set_box + # print("start nms") + confidence = [float(box.conf) for box in set_box] + nms_boxes = rotational_nms([set_box], [confidence], occ_threshold=occ_threshold, nms_iou_thr=nms_iou_thr) + + predicted_boxes3d_list = convert_boxes_to_list(nms_boxes) + + predicted_boxes3d = np.array(predicted_boxes3d_list[0]) + predicted_boxes3d_ = predicted_boxes3d + + print("sample_: ", batch_idx * params.batch_size + i, " has ", predicted_boxes3d_.shape, "predictions") + + bbox_params = np.stack([predicted_boxes3d_[:,3], predicted_boxes3d_[:,5], predicted_boxes3d_[:,4], + predicted_boxes3d_[:,1], predicted_boxes3d_[:,2] , + predicted_boxes3d_[:,0], + predicted_boxes3d_[:,6]], axis=1) + # print("z ", predicted_boxes3d[:,2]) + + bbox_params_list = bbox_params.tolist() + # bbox_labels_conf = [str(predicted_boxes3d[k,9]) for k in range(predicted_boxes3d.shape[0])] + for k in range(predicted_boxes3d.shape[0]): + msg = "%.5f, %s, %.5f"%(predicted_boxes3d[k,9],params.map_classes[int(predicted_boxes3d[k,8])], predicted_boxes3d[k,6]) + bbox_params_list[k].append("Magenta") + bbox_params_list[k].append(msg) + # bbox_params_list[k].append(str(predicted_boxes3d[k,9]) + "=" + params.map_classes[int(predicted_boxes3d[k,8])]) + gt_bbox_params_list.append(bbox_params_list[k]) + + + # # save as kitti format for evaluation + # cur_sample_id = batch_idx * params.batch_size + i + # sample_file_name = validation_gen.sample_id_list[cur_sample_id] + # calib = sample[i]['calib'] + # # cur_boxes3d = cur_boxes3d.cpu().numpy() + + # cur_boxes3d_xyz = calib.lidar_to_rect(predicted_boxes3d[:, 0:3]) + + # cur_boxes3d = np.concatenate(( + # cur_boxes3d_xyz[:,0,np.newaxis], # 0 x + # cur_boxes3d_xyz[:,1,np.newaxis] + predicted_boxes3d[:,5,np.newaxis] / 2, # 1 y + # cur_boxes3d_xyz[:,2,np.newaxis], # 2 z + # predicted_boxes3d[:,5,np.newaxis], # 3 l # same as the original label + # predicted_boxes3d[:,4,np.newaxis], # 4 w # same as the original label + # predicted_boxes3d[:,3,np.newaxis], # 5 h # same as the original label + # -predicted_boxes3d[:,6,np.newaxis], # 6 ry + # ), axis=1) + # cur_scores_raw = predicted_boxes3d[:,-1] + # image_shape = validation_gen.get_image_shape(sample_file_name) + # labels_obj = validation_gen.get_label(sample_file_name) + # classes = ['Car' for i in range(len(predicted_boxes3d))] + # save_kitti_format_for_evaluation(sample_index, calib, cur_boxes3d, KITTI_EVALUATION_OUTPUT, cur_scores_raw, image_shape, classes, labels_obj) + # sample_index += 1 + + coor = pts_input[i][:,[1,2,0]] + Converter.compile("real_sample_{}".format(batch_idx * params.batch_size+i), coors=coor, intensity=pts_input[i][:,3], + bbox_params=gt_bbox_params_list) + # print("Average runtime speed: ", np.mean(inference_duration[20:])) + diff --git a/mtr_point_pilllars_training_run_v1.py b/mtr_point_pilllars_training_run_v1.py new file mode 100644 index 0000000..947ec4b --- /dev/null +++ b/mtr_point_pilllars_training_run_v1.py @@ -0,0 +1,102 @@ + +import os +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"] = "2" +import time +import numpy as np +import tensorflow as tf +from glob import glob + +# from config import Parameters +from config_mtr_v1 import Parameters +from loss_v2_2 import PointPillarNetworkLoss +from network_v2_2 import build_point_pillar_graph +from mtr_processors_v1 import CustomDataGenerator +# from readers import KittiDataReader + +from det3d.mtr_dataset import MTRDatasetBase + +# from point_viz.converter import PointvizConverter + +tf.get_logger().setLevel("ERROR") + + + +# DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/KITTI/object/training" # TODO make main arg +DATA_ROOT = "/media/data3/tjtanaa/Project4-MTR" # TODO make main arg +MODEL_ROOT = "./logs_Pedestrian_MTR_No_Early_Stopping_wo_Aug_with_val" +PC_STATISTICS_PATH = "/home/tan/tjtanaa/det3d/det3d/mtr_dataset/point_cloud_statistics" + +# from tensorflow.python.client import device_lib +# print(device_lib.list_local_devices()) +# exit() +if __name__ == "__main__": + params = Parameters() + + # gpus = tf.config.experimental.list_physical_devices('GPU') + + pillar_net = build_point_pillar_graph(params) + # pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) + pillar_net.summary() + # exit() + loss = PointPillarNetworkLoss(params) + + optimizer = tf.keras.optimizers.Adam(lr=params.learning_rate, decay=params.decay_rate) + + pillar_net.compile(optimizer, loss=loss.losses()) + + + train_dataset = MTRDatasetBase(DATA_ROOT, 'train', PC_STATISTICS_PATH) + + gt_database_dir = os.path.join(DATA_ROOT, "gt_database") + # gt_database_dir = None + + training_gen = CustomDataGenerator(batch_size=params.batch_size,root_dir = DATA_ROOT, + point_cloud_statistics_path=PC_STATISTICS_PATH, + npoints=8000, split='train', classes=list(params.classes_map.keys()), + random_select=True, gt_database_dir=gt_database_dir, aug_hard_ratio=0.7) + + validation_gen = CustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, + point_cloud_statistics_path=PC_STATISTICS_PATH, + npoints=8000, split='test', classes=list(params.classes_map.keys())) + + + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_processor" + # Initialize and setup output directory. + # Converter = PointvizConverter(save_viz_path) + + + + # bbox_params = self.convert_labels_into_point_viz_format(gt_boxes3d) + # print(bbox_params.shape) + # Converter.compile("custom_sample_{}".format(i), coors=pts_input[:,:3], intensity=pts_input[:,3], + # bbox_params=bbox_params) + + + log_dir = MODEL_ROOT + epoch_to_decay = int( + np.round(params.iters_to_decay / params.batch_size * int(len(training_gen)))) + callbacks = [ + tf.keras.callbacks.TensorBoard(log_dir=log_dir), + tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(log_dir, "model.h5"), + monitor='val_loss', save_best_only=True), + tf.keras.callbacks.LearningRateScheduler( + lambda epoch, lr: lr * 0.8 if ((epoch % epoch_to_decay == 0) and (epoch != 0)) else lr, verbose=True), + # tf.keras.callbacks.EarlyStopping(patience=20, monitor='val_loss'), + ] + + try: + pillar_net.fit(training_gen, + validation_data = validation_gen, + steps_per_epoch=len(training_gen), + callbacks=callbacks, + use_multiprocessing=True, + max_queue_size = 16, + epochs=int(params.total_training_epochs), + workers=6) + except KeyboardInterrupt: + model_str = "interrupted_%s.h5" % time.strftime("%Y%m%d-%H%M%S") + pillar_net.save(os.path.join(log_dir, model_str)) + print("Interrupt. Saving output to %s" % os.path.join(os.getcwd(), log_dir[1:], model_str)) + + diff --git a/mtr_processors_v1.py b/mtr_processors_v1.py new file mode 100644 index 0000000..3a83f07 --- /dev/null +++ b/mtr_processors_v1.py @@ -0,0 +1,379 @@ +from typing import List, Any +import numpy as np +import tensorflow as tf + +from tensorflow.python.keras.utils.data_utils import Sequence + +from config_mtr_v1 import Parameters +# from point_pillars import createPillars, createPillarsTarget +from point_pillars_v2 import createPillars, createPillarsTarget +# from readers import DataReader, Label3D +from sklearn.utils import shuffle +import sys +from det3d.mtr_dataset import MTRDatasetBase +from det3d.mtr_dataset.utils import mtr_utils + +# from point_viz.converter import PointvizConverter +from datetime import datetime + + +def limit_period(val, offset=0.5, period=np.pi): + return val - np.floor(val / period + offset) * period + +def select_best_anchors(arr): + dims = np.indices(arr.shape[1:]) + # arr[..., 0:1] gets the occupancy value from occ in {-1, 0, 1}, i.e. {bad match, neg box, pos box} + ind = (np.argmax(arr[..., 0:1], axis=0),) + tuple(dims) + + return arr[ind] + + +class DataProcessor(Parameters): + + def __init__(self, **kwargs): + super(DataProcessor, self).__init__(**kwargs) + anchor_dims = np.array(self.anchor_dims, dtype=np.float32) + self.anchor_dims = anchor_dims[:, 0:3] + self.anchor_z = anchor_dims[:, 3] + self.anchor_yaw = anchor_dims[:, 4] + # Counts may be used to make statistic about how well the anchor boxes fit the objects + self.pos_cnt, self.neg_cnt = 0, 0 + + def make_point_pillars(self, points: np.ndarray): + + assert points.ndim == 2 + assert points.shape[1] == 4 + assert points.dtype == np.float32 + # start=datetime.now() + pillars, indices = createPillars(points, + self.max_points_per_pillar, + self.max_pillars, + self.x_step, + self.y_step, + self.x_min, + self.x_max, + self.y_min, + self.y_max, + self.z_min, + self.z_max, + False) + # print("Create pillar takes : ", datetime.now()-start) + + return pillars, indices + + def make_ground_truth(self, gt_boxes_3d: Any, gt_cls_type_list: List[str]): + """ Generate the ground truth label for each pillars + + Args: + gt_boxes_3d (numpy[float]): A list of floats containing [x, y, z, h, w, l, ry] + gt_cls_type_list (List[str]): A list of floats containing [cls_type] + + Returns: + [type]: [description] + """ + + # filter labels by classes (cars, pedestrians and Trams) + # Label has 4 properties (Classification (0th index of labels file), + # centroid coordinates, dimensions, yaw) + + if len(gt_boxes_3d) == 0: + pX, pY = int(self.Xn / self.downscaling_factor), int(self.Yn / self.downscaling_factor) + a = int(self.anchor_dims.shape[0]) + return np.zeros((pX, pY, a), dtype='float32'), np.zeros((pX, pY, a, self.nb_dims), dtype='float32'), \ + np.zeros((pX, pY, a, self.nb_dims), dtype='float32'), np.zeros((pX, pY, a), dtype='float32'), \ + np.zeros((pX, pY, a), dtype='float32'), np.zeros((pX, pY, a, self.nb_classes), dtype='float64') + + # For each label file, generate these properties except for the Don't care class + target_positions = gt_boxes_3d[:,:3] + target_dimension = gt_boxes_3d[:,3:6] # don't have to translate again + target_yaw = gt_boxes_3d[:, 6] + # print(type(self.classes)) + # print(type(self.classes_map)) + # # print(gt_cls_type_list[0]) + # print(self.classes_map[gt_cls_type_list[0]]) + + target_class = np.array([self.classes_map[gt_cls_type_list[k]] for k in range(len(gt_cls_type_list))], dtype=np.int32) + + assert np.all(target_yaw >= -np.pi) & np.all(target_yaw <= np.pi) + assert len(target_positions) == len(target_dimension) == len(target_yaw) == len(target_class) + + # start=datetime.now() + + target, pos, neg = createPillarsTarget(target_positions, + target_dimension, + target_yaw, + target_class, + self.anchor_dims, + self.anchor_z, + self.anchor_yaw, + self.positive_iou_threshold, + self.negative_iou_threshold, + self.nb_classes, + self.downscaling_factor, + self.x_step, + self.y_step, + self.x_min, + self.x_max, + self.y_min, + self.y_max, + self.z_min, + self.z_max, + False) + + # print("Create target takes : ", datetime.now()-start) + + self.pos_cnt += pos + self.neg_cnt += neg + + # return a merged target view for all objects in the ground truth and get categorical labels + + sel = select_best_anchors(target) + ohe = tf.keras.utils.to_categorical(sel[..., 9], num_classes=self.nb_classes, dtype='float64') + # print("self.shape: ", sel[...,0].shape) + + + return sel[..., 0], sel[..., 1:4], sel[..., 4:7], sel[..., 7], sel[..., 8], ohe + + +class CustomDataGenerator(DataProcessor, Sequence, MTRDatasetBase): + """ Multiprocessing-safe data generator for training, validation or testing, without fancy augmentation """ + + def __init__(self, batch_size: int, root_dir:str, point_cloud_statistics_path: str, + npoints:int =16384, split: str ='train', + classes:List[str] =['Car', 'Pedestrian', 'Person_sitting'], random_select:bool =True, + gt_database_dir=None, aug_hard_ratio:float=0.5, **kwargs): + + super(CustomDataGenerator, self).__init__( + root_dir = root_dir, + split = split, + point_cloud_statistics_path = point_cloud_statistics_path, + **kwargs + # batch_size=batch_size, root_dir=root_dir, + # npoints=npoints, split=split, classes=classes, + # random_select=random_select, gt_database_dir=gt_database_dir, + # aug_hard_ratio=aug_hard_ratio, **kwargs + ) + self.batch_size = batch_size + + + def get_sample(self, index): + return super().get_sample(index) + + + def __len__(self): + return len(self.sample_list) // self.batch_size + + def __getitem__(self, batch_id: int): + file_ids = range(batch_id * self.batch_size, self.batch_size * (batch_id + 1)) + pillars = [] + voxels = [] + occupancy = [] + position = [] + size = [] + angle = [] + heading = [] + classification = [] + + + + for i in file_ids: + point_cloud = self.get_lidar_without_background(i) + + pts_features = point_cloud[:, 3:] + + pts_input = np.concatenate([point_cloud[:,:3], pts_features[:,1,np.newaxis]], axis=1) + + # Voxels are the pillar ids + pillars_, voxels_ = self.make_point_pillars(pts_input) + + pillars.append(pillars_) + voxels.append(voxels_) + + + + obj_list = self.get_label(i) # are labels + + gt_boxes3d = np.zeros((obj_list.__len__(), 7), dtype=np.float32) + gt_bbox_params_list = [] + for k, obj in enumerate(obj_list): + gt_boxes3d[k, 0:3], gt_boxes3d[k, 3], gt_boxes3d[k, 4], gt_boxes3d[k, 5], gt_boxes3d[k, 6] \ + = obj.pos, obj.h, obj.w, obj.l, limit_period(obj.ry, offset=0.5, period=2*np.pi) # mtr format + # = obj.pos, obj.h, obj.w, obj.l, obj.ry # kitti + + # print(bboxes3d_[:,:3].shape) + invalid_region_mask = self._get_invalid_region_mask(gt_boxes3d[:,:3]) + gt_boxes3d = gt_boxes3d[~invalid_region_mask,:] + + gt_boxes3d = np.concatenate(( + gt_boxes3d[:,0,np.newaxis], # 0 x + gt_boxes3d[:,1,np.newaxis], # 1 y + gt_boxes3d[:,2,np.newaxis], # 2 z + gt_boxes3d[:,5,np.newaxis], # 3 l # same as the original label + gt_boxes3d[:,4,np.newaxis], # 4 w # same as the original label + gt_boxes3d[:,3,np.newaxis], # 5 h # same as the original label + gt_boxes3d[:,6,np.newaxis], # 6 ry + ), axis=1) + + if self.split=='train' or self.split =='test': + occupancy_, position_, size_, angle_, heading_, classification_ = self.make_ground_truth( + gt_boxes3d, ['pedestrian' for i in range(len(gt_boxes3d))]) + + occupancy.append(occupancy_) + position.append(position_) + size.append(size_) + angle.append(angle_) + heading.append(heading_) + classification.append(classification_) + + pillars = np.concatenate(pillars, axis=0) + voxels = np.concatenate(voxels, axis=0) + + if self.split=='train' or self.split =='test': + occupancy = np.array(occupancy) + position = np.array(position) + size = np.array(size) + angle = np.array(angle) + heading = np.array(heading) + classification = np.array(classification) + # return [pillars, voxels], [occupancy, position, size, angle, heading, classification] # network + return [pillars, voxels], [occupancy, position, size, angle, heading] # network_v2 + else: + return [pillars, voxels] + + def on_epoch_end(self): + if self.split=='train' or self.split =='test': + self.sample_list=shuffle(self.sample_list) + + + + +class AnalyseCustomDataGenerator(CustomDataGenerator): + """ Multiprocessing-safe data generator for training, validation or testing, without fancy augmentation """ + + def __init__(self, batch_size: int, root_dir:str, point_cloud_statistics_path: str, + npoints:int =16384, split: str ='train', + classes:List[str] =['Car', 'Pedestrian', 'Person_sitting'], random_select:bool =True, + gt_database_dir=None, aug_hard_ratio:float=0.5, **kwargs): + + super(AnalyseCustomDataGenerator, self).__init__( + batch_size=batch_size, root_dir=root_dir, + point_cloud_statistics_path = point_cloud_statistics_path, + npoints=npoints, split=split, classes=classes, + random_select=random_select, gt_database_dir=gt_database_dir, + aug_hard_ratio=aug_hard_ratio, **kwargs + ) + self.batch_size = batch_size + + + def get_sample(self, index): + return super().get_sample(index) + + + def __len__(self): + return len(self.sample_list) // self.batch_size + + def __getitem__(self, batch_id: int): + file_ids = range(batch_id * self.batch_size, self.batch_size * (batch_id + 1)) + pillars = [] + voxels = [] + occupancy = [] + position = [] + size = [] + angle = [] + heading = [] + classification = [] + pts_input_ = [] + gt_boxes3d_ = [] + + + + for i in file_ids: + point_cloud = self.get_lidar_without_background(i) + + pts_features = point_cloud[:, 3:] + + pts_input = np.concatenate([point_cloud[:,:3], pts_features[:,1,np.newaxis]], axis=1) + + # Voxels are the pillar ids + pillars_, voxels_ = self.make_point_pillars(pts_input) + + pillars.append(pillars_) + voxels.append(voxels_) + + + + + if self.split=='train' or self.split =='test': + obj_list = self.get_label(i) # are labels + + gt_boxes3d = np.zeros((obj_list.__len__(), 7), dtype=np.float32) + # gt_bbox_params_list = [] + for k, obj in enumerate(obj_list): + gt_boxes3d[k, 0:3], gt_boxes3d[k, 3], gt_boxes3d[k, 4], gt_boxes3d[k, 5], gt_boxes3d[k, 6] \ + = obj.pos, obj.h, obj.w, obj.l, limit_period(obj.ry, offset=0.5, period=2*np.pi) # mtr format + # = obj.pos, obj.h, obj.w, obj.l, obj.ry # kitti + + # print(bboxes3d_[:,:3].shape) + invalid_region_mask = self._get_invalid_region_mask(gt_boxes3d[:,:3]) + gt_boxes3d = gt_boxes3d[~invalid_region_mask,:] + + # for k in range(len(gt_boxes3d)): + # gt_bbox_params = [gt_boxes3d[k, 5], gt_boxes3d[k, 3], gt_boxes3d[k, 4], + # gt_boxes3d[k, 1], gt_boxes3d[k, 2], gt_boxes3d[k, 0], + # gt_boxes3d[k, 6]] + + # gt_bbox_params_list.append(gt_bbox_params) + + + + + # if gt_boxes3d.__len__() == 0: + # print('No gt object') + # continue + + gt_boxes3d = np.concatenate(( + gt_boxes3d[:,0,np.newaxis], # 0 x + gt_boxes3d[:,1,np.newaxis], # 1 y + gt_boxes3d[:,2,np.newaxis], # 2 z + gt_boxes3d[:,5,np.newaxis], # 3 l # same as the original label + gt_boxes3d[:,4,np.newaxis], # 4 w # same as the original label + gt_boxes3d[:,3,np.newaxis], # 5 h # same as the original label + gt_boxes3d[:,6,np.newaxis], # 6 ry + ), axis=1) + + occupancy_, position_, size_, angle_, heading_, classification_ = self.make_ground_truth( + gt_boxes3d, ['pedestrian' for i in range(len(gt_boxes3d))]) + + occupancy.append(occupancy_) + position.append(position_) + size.append(size_) + angle.append(angle_) + heading.append(heading_) + classification.append(classification_) + + gt_boxes3d_.append(gt_boxes3d) + pts_input_.append(pts_input) + elif self.split=='real': + pts_input_.append(pts_input) + + + pillars = np.concatenate(pillars, axis=0) + voxels = np.concatenate(voxels, axis=0) + + if self.split=='train' or self.split =='test': + occupancy = np.array(occupancy) + position = np.array(position) + size = np.array(size) + angle = np.array(angle) + heading = np.array(heading) + classification = np.array(classification) + # return [pillars, voxels], [occupancy, position, size, angle, heading, classification] # network + return [pillars, voxels], [occupancy, position, size, angle, heading], [pts_input_, gt_boxes3d_] # network_v2 + elif self.split=='real': + return [pillars, voxels], [pts_input_] + else: + return [pillars, voxels] + + def on_epoch_end(self): + if self.split=='train' or self.split =='test': + self.sample_list=shuffle(self.sample_list) \ No newline at end of file diff --git a/network.py b/network.py index 4108aec..1404751 100644 --- a/network.py +++ b/network.py @@ -115,4 +115,4 @@ def correct_batch_indices(tensor, batch_size): pillar_net = tf.keras.models.Model([input_pillars, input_indices], [occ, loc, size, angle, heading, clf]) # print(pillar_net.summary()) - return pillar_net + return pillar_net \ No newline at end of file diff --git a/network_v2.py b/network_v2.py new file mode 100644 index 0000000..1fba5e4 --- /dev/null +++ b/network_v2.py @@ -0,0 +1,121 @@ +import tensorflow as tf +import numpy as np +# from config import Parameters +from config_v2 import Parameters + + +def build_point_pillar_graph(params: Parameters): + + # extract required parameters + max_pillars = int(params.max_pillars) + max_points = int(params.max_points_per_pillar) + nb_features = int(params.nb_features) + nb_channels = int(params.nb_channels) + batch_size = int(params.batch_size) + image_size = tuple([params.Xn, params.Yn]) + nb_classes = int(params.nb_classes) + nb_anchors = len(params.anchor_dims) + + if tf.keras.backend.image_data_format() == "channels_first": + raise NotImplementedError + else: + input_shape = (max_pillars, max_points, nb_features) + + input_pillars = tf.keras.layers.Input(input_shape, batch_size=batch_size, name="pillars/input") + input_indices = tf.keras.layers.Input((max_pillars, 3), batch_size=batch_size, name="pillars/indices", + dtype=tf.int32) + + def correct_batch_indices(tensor, batch_size): + array = np.zeros((batch_size, max_pillars, 3), dtype=np.float32) + for i in range(batch_size): + array[i, :, 0] = i + return tensor + tf.constant(array, dtype=tf.int32) + + if batch_size > 1: + corrected_indices = tf.keras.layers.Lambda(lambda t: correct_batch_indices(t, batch_size))(input_indices) + else: + corrected_indices = input_indices + + # pillars + x = tf.keras.layers.Conv2D(nb_channels, (1, 1), activation='relu', use_bias=False, name="pillars/conv2d")(input_pillars) + x = tf.keras.layers.BatchNormalization(name="pillars/batchnorm", fused=True, epsilon=1e-3, momentum=0.01)(x) + x = tf.keras.layers.MaxPool2D((1, max_points), name="pillars/maxpooling2d")(x) + + if tf.keras.backend.image_data_format() == "channels_first": + reshape_shape = (nb_channels, max_pillars) + else: + reshape_shape = (max_pillars, nb_channels) + + x = tf.keras.layers.Reshape(reshape_shape, name="pillars/reshape")(x) + pillars = tf.keras.layers.Lambda(lambda inp: tf.scatter_nd(inp[0], inp[1], + (batch_size,) + image_size + (nb_channels,)), + name="pillars/scatter_nd")([corrected_indices, x]) + + # 2d cnn backbone + + # # Block1(S, 4, C) + # Block1(S, 4, C) + x = pillars + for n in range(4): + S = (2, 2) if n == 0 else (1, 1) + # # S = (1, 1) # pedestrian + x = tf.keras.layers.Conv2D(nb_channels, (3, 3), strides=S, padding="same", activation="relu", + name="cnn/block1/conv2d%i" % n)(x) + x = tf.keras.layers.BatchNormalization(name="cnn/block1/bn%i" % n, fused=True, epsilon=1e-3, momentum=0.01)(x) + x1 = x + + # Block2(2S, 6, 2C) + for n in range(6): + S = (2, 2) if n == 0 else (1, 1) + x = tf.keras.layers.Conv2D(2 * nb_channels, (3, 3), strides=S, padding="same", activation="relu", + name="cnn/block2/conv2d%i" % n)(x) + x = tf.keras.layers.BatchNormalization(name="cnn/block2/bn%i" % n, fused=True, epsilon=1e-3, momentum=0.01)(x) + x2 = x + + # Block3(4S, 6, 4C) + for n in range(6): + S = (2, 2) if n == 0 else (1, 1) + x = tf.keras.layers.Conv2D(4 * nb_channels, (3, 3), strides=S, padding="same", activation="relu", + name="cnn/block3/conv2d%i" % n)(x) + x = tf.keras.layers.BatchNormalization(name="cnn/block3/bn%i" % n, fused=True, epsilon=1e-3, momentum=0.01)(x) + x3 = x + + + # Up1 (S, S, 2C) + up1 = tf.keras.layers.Conv2DTranspose(2 * nb_channels, (1, 1), strides=(1, 1), padding="same", activation="relu", + name="cnn/up1/conv2dt")(x1) + up1 = tf.keras.layers.BatchNormalization(name="cnn/up1/bn", fused=True, epsilon=1e-3, momentum=0.01)(up1) + + # Up2 (2S, S, 2C) + up2 = tf.keras.layers.Conv2DTranspose(2 * nb_channels, (2, 2), strides=(2, 2), padding="same", activation="relu", + name="cnn/up2/conv2dt")(x2) + up2 = tf.keras.layers.BatchNormalization(name="cnn/up2/bn", fused=True, epsilon=1e-3, momentum=0.01)(up2) + + # Up3 (4S, S, 2C) + up3 = tf.keras.layers.Conv2DTranspose(2 * nb_channels, (4, 4), strides=(4, 4), padding="same", activation="relu", + name="cnn/up3/conv2dt")(x3) + up3 = tf.keras.layers.BatchNormalization(name="cnn/up3/bn", fused=True, epsilon=1e-3, momentum=0.01)(up3) + + # Concat + concat = tf.keras.layers.Concatenate(name="cnn/concatenate")([up1, up2, up3]) + + # Detection head + occ = tf.keras.layers.Conv2D(nb_anchors, (1, 1), name="occupancy/conv2d", activation="sigmoid")(concat) + + loc = tf.keras.layers.Conv2D(nb_anchors * 3, (1, 1), name="loc/conv2d", kernel_initializer=tf.keras.initializers.TruncatedNormal(0, 0.001))(concat) + loc = tf.keras.layers.Reshape(tuple(i//2 for i in image_size) + (nb_anchors, 3), name="loc/reshape")(loc) + + size = tf.keras.layers.Conv2D(nb_anchors * 3, (1, 1), name="size/conv2d", kernel_initializer=tf.keras.initializers.TruncatedNormal(0, 0.001))(concat) + size = tf.keras.layers.Reshape(tuple(i//2 for i in image_size) + (nb_anchors, 3), name="size/reshape")(size) + + angle = tf.keras.layers.Conv2D(nb_anchors, (1, 1), name="angle/conv2d")(concat) + + heading = tf.keras.layers.Conv2D(nb_anchors, (1, 1), name="heading/conv2d", activation="sigmoid")(concat) + + clf = tf.keras.layers.Conv2D(nb_anchors * nb_classes, (1, 1), name="clf/conv2d")(concat) + clf = tf.keras.layers.Reshape(tuple(i // 2 for i in image_size) + (nb_anchors, nb_classes), name="clf/reshape")(clf) + + pillar_net = tf.keras.models.Model([input_pillars, input_indices], [occ, loc, size, angle, heading]) +# print(pillar_net.summary()) + + return pillar_net diff --git a/network_v2_2.py b/network_v2_2.py new file mode 100644 index 0000000..7298525 --- /dev/null +++ b/network_v2_2.py @@ -0,0 +1,127 @@ +import tensorflow as tf +import numpy as np +# from config import Parameters +from config_v2_2 import Parameters + + +def build_point_pillar_graph(params: Parameters): + + # extract required parameters + max_pillars = int(params.max_pillars) + max_points = int(params.max_points_per_pillar) + nb_features = int(params.nb_features) + nb_channels = int(params.nb_channels) + batch_size = int(params.batch_size) + image_size = tuple([params.Xn, params.Yn]) + nb_classes = int(params.nb_classes) + nb_anchors = len(params.anchor_dims) + num_gpus = int(params.num_gpus) + # batch_size = batch_size // num_gpus + + # print(batch_size) + + if tf.keras.backend.image_data_format() == "channels_first": + raise NotImplementedError + else: + input_shape = (max_pillars, max_points, nb_features) + + input_pillars = tf.keras.layers.Input(input_shape, batch_size=batch_size, name="pillars/input") + input_indices = tf.keras.layers.Input((max_pillars, 3), batch_size=batch_size, name="pillars/indices", + dtype=tf.int32) + # print(batch_size, input_indices.shape, input_pillars.shape) + + def correct_batch_indices(tensor, batch_size): + array = np.zeros((batch_size//num_gpus, max_pillars, 3), dtype=np.float32) + # print(batch_size, array.shape, input_pillars.shape) + for i in range(batch_size//num_gpus): + array[i, :, 0] = i + return tensor + tf.constant(array, dtype=tf.int32) + + if batch_size > 1: + corrected_indices = tf.keras.layers.Lambda(lambda t: correct_batch_indices(t, batch_size))(input_indices) + else: + corrected_indices = input_indices + + # pillars + x = tf.keras.layers.Conv2D(nb_channels, (1, 1), activation='relu', use_bias=False, name="pillars/conv2d")(input_pillars) + x = tf.keras.layers.BatchNormalization(name="pillars/batchnorm", fused=True, epsilon=1e-3, momentum=0.01)(x) + x = tf.keras.layers.MaxPool2D((1, max_points), name="pillars/maxpooling2d")(x) + + if tf.keras.backend.image_data_format() == "channels_first": + reshape_shape = (nb_channels, max_pillars) + else: + reshape_shape = (max_pillars, nb_channels) + + x = tf.keras.layers.Reshape(reshape_shape, name="pillars/reshape")(x) + pillars = tf.keras.layers.Lambda(lambda inp: tf.scatter_nd(inp[0], inp[1], + (batch_size,) + image_size + (nb_channels,)), + name="pillars/scatter_nd")([corrected_indices, x]) + + # 2d cnn backbone + + # # Block1(S, 4, C) + # Block1(S, 4, C) + x = pillars + for n in range(4): + S = (2, 2) if n == 0 else (1, 1) + # # S = (1, 1) # pedestrian + x = tf.keras.layers.Conv2D(nb_channels, (3, 3), strides=S, padding="same", activation="relu", + name="cnn/block1/conv2d%i" % n)(x) + x = tf.keras.layers.BatchNormalization(name="cnn/block1/bn%i" % n, fused=True, epsilon=1e-3, momentum=0.01)(x) + x1 = x + + # Block2(2S, 6, 2C) + for n in range(6): + S = (2, 2) if n == 0 else (1, 1) + x = tf.keras.layers.Conv2D(2 * nb_channels, (3, 3), strides=S, padding="same", activation="relu", + name="cnn/block2/conv2d%i" % n)(x) + x = tf.keras.layers.BatchNormalization(name="cnn/block2/bn%i" % n, fused=True, epsilon=1e-3, momentum=0.01)(x) + x2 = x + + # Block3(4S, 6, 4C) + for n in range(6): + S = (2, 2) if n == 0 else (1, 1) + x = tf.keras.layers.Conv2D(4 * nb_channels, (3, 3), strides=S, padding="same", activation="relu", + name="cnn/block3/conv2d%i" % n)(x) + x = tf.keras.layers.BatchNormalization(name="cnn/block3/bn%i" % n, fused=True, epsilon=1e-3, momentum=0.01)(x) + x3 = x + + + # Up1 (S, S, 2C) + up1 = tf.keras.layers.Conv2DTranspose(2 * nb_channels, (1, 1), strides=(1, 1), padding="same", activation="relu", + name="cnn/up1/conv2dt")(x1) + up1 = tf.keras.layers.BatchNormalization(name="cnn/up1/bn", fused=True, epsilon=1e-3, momentum=0.01)(up1) + + # Up2 (2S, S, 2C) + up2 = tf.keras.layers.Conv2DTranspose(2 * nb_channels, (2, 2), strides=(2, 2), padding="same", activation="relu", + name="cnn/up2/conv2dt")(x2) + up2 = tf.keras.layers.BatchNormalization(name="cnn/up2/bn", fused=True, epsilon=1e-3, momentum=0.01)(up2) + + # Up3 (4S, S, 2C) + up3 = tf.keras.layers.Conv2DTranspose(2 * nb_channels, (4, 4), strides=(4, 4), padding="same", activation="relu", + name="cnn/up3/conv2dt")(x3) + up3 = tf.keras.layers.BatchNormalization(name="cnn/up3/bn", fused=True, epsilon=1e-3, momentum=0.01)(up3) + + # Concat + concat = tf.keras.layers.Concatenate(name="cnn/concatenate")([up1, up2, up3]) + + # Detection head + occ = tf.keras.layers.Conv2D(nb_anchors, (1, 1), name="occupancy/conv2d", activation="sigmoid")(concat) + + loc = tf.keras.layers.Conv2D(nb_anchors * 3, (1, 1), name="loc/conv2d", kernel_initializer=tf.keras.initializers.TruncatedNormal(0, 0.001))(concat) + loc = tf.keras.layers.Reshape(tuple(i//2 for i in image_size) + (nb_anchors, 3), name="loc/reshape")(loc) + + size = tf.keras.layers.Conv2D(nb_anchors * 3, (1, 1), name="size/conv2d", kernel_initializer=tf.keras.initializers.TruncatedNormal(0, 0.001))(concat) + size = tf.keras.layers.Reshape(tuple(i//2 for i in image_size) + (nb_anchors, 3), name="size/reshape")(size) + + angle = tf.keras.layers.Conv2D(nb_anchors, (1, 1), name="angle/conv2d")(concat) + + heading = tf.keras.layers.Conv2D(nb_anchors, (1, 1), name="heading/conv2d", activation="sigmoid")(concat) + + # clf = tf.keras.layers.Conv2D(nb_anchors * nb_classes, (1, 1), name="clf/conv2d")(concat) + # clf = tf.keras.layers.Reshape(tuple(i // 2 for i in image_size) + (nb_anchors, nb_classes), name="clf/reshape")(clf) + + pillar_net = tf.keras.models.Model([input_pillars, input_indices], [occ, loc, size, angle, heading]) +# print(pillar_net.summary()) + + return pillar_net diff --git a/point_pillars_check_input.py b/point_pillars_check_input.py new file mode 100644 index 0000000..8ab59f6 --- /dev/null +++ b/point_pillars_check_input.py @@ -0,0 +1,148 @@ +import os +from glob import glob +import numpy as np +import tensorflow as tf +# from processors import SimpleDataGenerator +# from custom_processors import CustomDataGenerator, AnalyseCustomDataGenerator +from point_pillars_custom_processors_v2 import CustomDataGenerator, AnalyseCustomDataGenerator +from inference_utils_v2 import generate_bboxes_from_pred, GroundTruthGenerator, focal_loss_checker +from inference_utils_v2 import rotational_nms, generate_bboxes_from_pred_and_np_array +from readers import KittiDataReader +from config_v2 import Parameters +from network import build_point_pillar_graph + + +from point_viz.converter import PointvizConverter + +DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/" +# MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_single_process" +MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_No_Early_Stopping_Input_Coordinate_Analysis_v2" + +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"] = "2" + + +def limit_period(val, offset=0.5, period=np.pi): + return val - np.floor(val / period + offset) * period + +if __name__ == "__main__": + + params = Parameters() + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_prediction_multiprocessing" + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/input_coordinate_analysis_point_pillar_v2_gt_only" + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/input_coordinate_analysis_point_pillar_v2_labels_only" + save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/pedestrian_input_coordinate_analysis_point_pillar_v2_gt_and_labels" + # Initialize and setup output directory. + Converter = PointvizConverter(save_viz_path) + + gt_database_dir = os.path.join(DATA_ROOT, "gt_database") + + validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, + npoints=16384, split='train_val_test',random_select=False, classes=list(params.classes_map.keys())) + + for sample_id in validation_gen.sample_id_list: + print(sample_id) + + # for batch_idx in range(0,20): + # [pillars, voxels], [occupancy_, position_, size_, angle_, heading_, classification_], [pts_input, gt_boxes3d, sample] = validation_gen[batch_idx] + + + # set_boxes, confidences = [], [] + # loop_range = occupancy_.shape[0] if len(occupancy_.shape) == 4 else 1 + # for i in range(loop_range): + # # set_box, predicted_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy[i], position[i], size[i], angle[i], + # # heading[i], + # # classification[i], params.anchor_dims, occ_threshold=0.15) + # gt_set_box, decoded_gt_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy_[i], position_[i], size_[i], angle_[i], + # heading_[i], + # classification_[i], params.anchor_dims, occ_threshold=0.5) + + # # exit() + # gt_boxes3d_ = gt_boxes3d[i] + + # print(gt_boxes3d_.shape) + # gt_bbox_params = np.stack([gt_boxes3d_[:,3], gt_boxes3d_[:,5], gt_boxes3d_[:,4], + # gt_boxes3d_[:,1], gt_boxes3d_[:,2] , + # gt_boxes3d_[:,0], + # gt_boxes3d_[:,6]], axis=1) + + + # gt_bbox_params_list = gt_bbox_params.tolist() + # # gt_bbox_params_list = [] + # # print(gt_bbox_params_list) + # # print(len(gt_bbox_params_list)) + # # print(len(gt_bbox_params_list[0])) + + # for k in range(len(gt_bbox_params_list)): + # msg = "%.5f, %.5f"%(gt_bbox_params_list[k][3],gt_bbox_params_list[k][5]) + # gt_bbox_params_list[k].append("Green") + # gt_bbox_params_list[k].append(msg) + + # if len(gt_set_box) > 0: + # decoded_gt_boxes3d_ = decoded_gt_boxes3d + # # bbox_params = validation_gen.convert_predictions_into_point_viz_format(predicted_boxes3d[:,[1, 2, 0, 5, 3, 4, 6 ]]) + + # print(decoded_gt_boxes3d_.shape) + # # print(predicted_boxes3d_) + # # print(size[i]) + + # bbox_params = np.stack([decoded_gt_boxes3d_[:,3], decoded_gt_boxes3d_[:,5], decoded_gt_boxes3d_[:,4], + # decoded_gt_boxes3d_[:,1], decoded_gt_boxes3d_[:,2] , + # decoded_gt_boxes3d_[:,0], + # decoded_gt_boxes3d_[:,6]], axis=1) + + + # # bbox_params = np.stack([predicted_boxes3d[:,4], predicted_boxes3d[:,5], predicted_boxes3d[:,3], + # # predicted_boxes3d[:,1], -(predicted_boxes3d[:,2] - predicted_boxes3d[:,5] / 2), + # # predicted_boxes3d[:,0], + # # predicted_boxes3d[:,6]], axis=1) + + # bbox_params_list = bbox_params.tolist() + # # bbox_labels_conf = [str(predicted_boxes3d[k,9]) for k in range(predicted_boxes3d.shape[0])] + # for k in range(decoded_gt_boxes3d.shape[0]): + # msg = "%.5f, %.5f"%(bbox_params_list[k][3],bbox_params_list[k][5]) + # # msg = (str(bbox_params_list[k][3:5])) + # bbox_params_list[k].append("Magenta") + # bbox_params_list[k].append(msg) + # # bbox_params_list[k].append(str(decoded_gt_boxes3d[k,9]) + params.map_classes[int(decoded_gt_boxes3d[k,8])]) + # gt_bbox_params_list.append(bbox_params_list[k]) + + # # print(gt_bbox_params_list) + # # print(gt_bbox_params.tolist()) + + # coor = pts_input[i][:,[1,2,0]] + # # coor[:,1] *= -1 + # Converter.compile("val_custom_sample_{}".format(batch_idx * params.batch_size+i), coors=coor, intensity=pts_input[i][:,3], + # bbox_params=gt_bbox_params_list) + + # exit() + # set_boxes.append(set_box) + # # set_boxes.append(generate_bboxes_from_pred(occupancy, position, size, angle, heading, + # # classification, params.anchor_dims, occ_threshold=0.1)) + # # confidences.append([float(boxes.conf) for boxes in set_boxes[-1]]) + + # sum_bboxes = 0 + # for h in range(len(set_boxes)): + # sum_bboxes += len(set_boxes[h]) + + # print('Batch ', str(batch_idx) ,': Box predictions with occupancy > occ_thr: ', sum_bboxes) + # print('Scene 1: Box predictions with occupancy > occ_thr: ', len(set_boxes[0])) + # exit() + # print(set_boxes[-1]) + + # # NMS + # nms_boxes = rotational_nms(set_boxes, confidences, occ_threshold=0.7, nms_iou_thr=0.5) + + # print('Scene 1: Boxes after NMS with iou_thr: ', len(nms_boxes[0])) + + # # Do all the further operations on predicted_boxes array, which contains the predicted bounding boxes + # gt_gen = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=False) + # gt_gen0 = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=True) + # for seq_boxes, gt_label, gt0 in zip(nms_boxes, gt_gen, gt_gen0): + # print("---------- New Scenario ---------- ") + # focal_loss_checker(gt0[0], occupancy[0], n_occs=-1) + # print("---------- ------------ ---------- ") + # for gt in gt_label: + # print(gt) + # for pred in seq_boxes: + # print(pred) diff --git a/point_pillars_custom_prediction_v2_2.py b/point_pillars_custom_prediction_v2_2.py new file mode 100644 index 0000000..090820b --- /dev/null +++ b/point_pillars_custom_prediction_v2_2.py @@ -0,0 +1,138 @@ +import os +from glob import glob +import numpy as np +import tensorflow as tf +from point_pillars_custom_processors_v2_2 import CustomDataGenerator, AnalyseCustomDataGenerator +from inference_utils_v2_2 import generate_bboxes_from_pred +from inference_utils_v2_2 import focal_loss_checker, rotational_nms, generate_bboxes_from_pred_and_np_array +from readers import KittiDataReader +from config_v2_2 import Parameters +from network_v2_2 import build_point_pillar_graph +from datetime import datetime + +from point_viz.converter import PointvizConverter + +DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/" +MODEL_ROOT = "./logs_Car_Custom_Dataset_No_Early_Stopping_wo_Aug_wo_val_new_network" + +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"] = "3" + + +def limit_period(val, offset=0.5, period=np.pi): + return val - np.floor(val / period + offset) * period + +if __name__ == "__main__": + + params = Parameters() + pillar_net = build_point_pillar_graph(params) + pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) + pillar_net.summary() + + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_prediction_multiprocessing" + save_viz_path = os.path.join("/home/tan/tjtanaa/PointPillars/visualization", MODEL_ROOT.split('/')[-1]) + # Initialize and setup output directory. + Converter = PointvizConverter(save_viz_path) + + + + + + + gt_database_dir = os.path.join(DATA_ROOT, "gt_database") + + # validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size,root_dir = DATA_ROOT, + # npoints=20000, split='train', classes=list(params.classes_map.keys()), + # random_select=True, gt_database_dir=None, aug_hard_ratio=0.7) + + validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, + npoints=20000, split='train_val_test',random_select=False, classes=list(params.classes_map.keys())) + # validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, + # npoints=20000, split='val',random_select=False, classes=list(params.classes_map.keys())) + + inference_duration = [] + + for batch_idx in range(0,10): + [pillars, voxels], [occupancy_, position_, size_, angle_, heading_], [pts_input, gt_boxes3d, sample] = validation_gen[batch_idx] + + # 4 * 12000 * 100 * 9, 502 * 502 * 2 + + # 4 * 20000 * 4 + + start=datetime.now() + + occupancy, position, size, angle, heading = pillar_net.predict([pillars, voxels]) + + inference_duration.append( datetime.now()-start) + + # angle = limit_period(angle, offset=0.5, period=2*np.pi) + + classification = np.zeros(shape=np.array(occupancy).shape) + classification_ = classification + # occupancy[:,:,:,:2] = 0 + + # print(occupancy.shape) + # exit() + + set_boxes, confidences = [], [] + loop_range = occupancy_.shape[0] if len(occupancy_.shape) == 4 else 1 + for i in range(loop_range): + set_box, predicted_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy[i], position[i], size[i], angle[i], + heading[i], + classification[i], params.anchor_dims, occ_threshold=0.5) + + + _, decoded_gt_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy_[i], position_[i], size_[i], angle_[i], + heading_[i], + classification_[i], params.anchor_dims, occ_threshold=0.4) + + # gt_boxes3d_ = gt_boxes3d[i] + gt_boxes3d_ = decoded_gt_boxes3d + + print(gt_boxes3d_.shape) + if(len(gt_boxes3d_) == 0): + gt_bbox_params_list = [] + else: + gt_bbox_params = np.stack([gt_boxes3d_[:,3], gt_boxes3d_[:,5], gt_boxes3d_[:,4], + gt_boxes3d_[:,1], gt_boxes3d_[:,2] , + gt_boxes3d_[:,0], + gt_boxes3d_[:,6]], axis=1) + + + gt_bbox_params_list = gt_bbox_params.tolist() + # gt_bbox_params_list = [] + for k in range(len(gt_bbox_params_list)): + msg = "%.5f, %s, %.5f"%(decoded_gt_boxes3d[k,9], params.map_classes[int(decoded_gt_boxes3d[k,8])], decoded_gt_boxes3d[k,6]) + # msg = "%.5f, %.5f"%(gt_bbox_params_list[k][3],gt_bbox_params_list[k][5]) + gt_bbox_params_list[k].append("Green") + # gt_bbox_params_list[k].append("1.0") + gt_bbox_params_list[k].append(msg) + + if len(set_box) > 0: + predicted_boxes3d_ = predicted_boxes3d + # bbox_params = validation_gen.convert_predictions_into_point_viz_format(predicted_boxes3d[:,[1, 2, 0, 5, 3, 4, 6 ]]) + + print("batch_idx: ", batch_idx * params.batch_size + i, " has ", predicted_boxes3d_.shape, "predictions") + # print(predicted_boxes3d_) + # print(size[i]) + + bbox_params = np.stack([predicted_boxes3d_[:,3], predicted_boxes3d_[:,5], predicted_boxes3d_[:,4], + predicted_boxes3d_[:,1], predicted_boxes3d_[:,2] , + predicted_boxes3d_[:,0], + predicted_boxes3d_[:,6]], axis=1) + + + bbox_params_list = bbox_params.tolist() + # bbox_labels_conf = [str(predicted_boxes3d[k,9]) for k in range(predicted_boxes3d.shape[0])] + for k in range(predicted_boxes3d.shape[0]): + msg = "%.5f, %s, %.5f"%(predicted_boxes3d[k,9],params.map_classes[int(predicted_boxes3d[k,8])], predicted_boxes3d[k,6]) + bbox_params_list[k].append("Magenta") + bbox_params_list[k].append(msg) + # bbox_params_list[k].append(str(predicted_boxes3d[k,9]) + "=" + params.map_classes[int(predicted_boxes3d[k,8])]) + gt_bbox_params_list.append(bbox_params_list[k]) + + coor = pts_input[i][:,[1,2,0]] + # coor[:,1] *= -1 + Converter.compile("val_custom_sample_{}".format(batch_idx * params.batch_size+i), coors=coor, intensity=pts_input[i][:,3], + bbox_params=gt_bbox_params_list) + print("Average runtime speed: ", np.mean(inference_duration[4:])) \ No newline at end of file diff --git a/point_pillars_custom_processors_v2_2.py b/point_pillars_custom_processors_v2_2.py new file mode 100644 index 0000000..656dc21 --- /dev/null +++ b/point_pillars_custom_processors_v2_2.py @@ -0,0 +1,329 @@ +from typing import List, Any +import numpy as np +import tensorflow as tf + +from tensorflow.python.keras.utils.data_utils import Sequence + +from config_v2 import Parameters +# from point_pillars import createPillars, createPillarsTarget +from point_pillars_v2 import createPillars, createPillarsTarget +# from readers import DataReader, Label3D +from sklearn.utils import shuffle +import sys + +from det3d.pc_kitti_dataset import PCKittiAugmentedDataset + +from point_viz.converter import PointvizConverter +from datetime import datetime + + +def select_best_anchors(arr): + dims = np.indices(arr.shape[1:]) + # arr[..., 0:1] gets the occupancy value from occ in {-1, 0, 1}, i.e. {bad match, neg box, pos box} + ind = (np.argmax(arr[..., 0:1], axis=0),) + tuple(dims) + + return arr[ind] + + +class DataProcessor(Parameters): + + def __init__(self, **kwargs): + super(DataProcessor, self).__init__(**kwargs) + anchor_dims = np.array(self.anchor_dims, dtype=np.float32) + self.anchor_dims = anchor_dims[:, 0:3] + self.anchor_z = anchor_dims[:, 3] + self.anchor_yaw = anchor_dims[:, 4] + # Counts may be used to make statistic about how well the anchor boxes fit the objects + self.pos_cnt, self.neg_cnt = 0, 0 + + def make_point_pillars(self, points: np.ndarray): + + assert points.ndim == 2 + assert points.shape[1] == 4 + assert points.dtype == np.float32 + # start=datetime.now() + pillars, indices = createPillars(points, + self.max_points_per_pillar, + self.max_pillars, + self.x_step, + self.y_step, + self.x_min, + self.x_max, + self.y_min, + self.y_max, + self.z_min, + self.z_max, + False) + # print("Create pillar takes : ", datetime.now()-start) + + return pillars, indices + + def make_ground_truth(self, gt_boxes_3d: Any, gt_cls_type_list: List[str]): + """ Generate the ground truth label for each pillars + + Args: + gt_boxes_3d (numpy[float]): A list of floats containing [x, y, z, h, w, l, ry] + gt_cls_type_list (List[str]): A list of floats containing [cls_type] + + Returns: + [type]: [description] + """ + + # filter labels by classes (cars, pedestrians and Trams) + # Label has 4 properties (Classification (0th index of labels file), + # centroid coordinates, dimensions, yaw) + + if len(gt_boxes_3d) == 0: + pX, pY = int(self.Xn / self.downscaling_factor), int(self.Yn / self.downscaling_factor) + a = int(self.anchor_dims.shape[0]) + return np.zeros((pX, pY, a), dtype='float32'), np.zeros((pX, pY, a, self.nb_dims), dtype='float32'), \ + np.zeros((pX, pY, a, self.nb_dims), dtype='float32'), np.zeros((pX, pY, a), dtype='float32'), \ + np.zeros((pX, pY, a), dtype='float32'), np.zeros((pX, pY, a, self.nb_classes), dtype='float64') + + # For each label file, generate these properties except for the Don't care class + target_positions = gt_boxes_3d[:,:3] + target_dimension = gt_boxes_3d[:,3:6] # don't have to translate again + target_yaw = gt_boxes_3d[:, 6] + # print(type(self.classes)) + # print(type(self.classes_map)) + # # print(gt_cls_type_list[0]) + # print(self.classes_map[gt_cls_type_list[0]]) + + target_class = np.array([self.classes_map[gt_cls_type_list[k]] for k in range(len(gt_cls_type_list))], dtype=np.int32) + + assert np.all(target_yaw >= -np.pi) & np.all(target_yaw <= np.pi) + assert len(target_positions) == len(target_dimension) == len(target_yaw) == len(target_class) + + # start=datetime.now() + + target, pos, neg = createPillarsTarget(target_positions, + target_dimension, + target_yaw, + target_class, + self.anchor_dims, + self.anchor_z, + self.anchor_yaw, + self.positive_iou_threshold, + self.negative_iou_threshold, + self.nb_classes, + self.downscaling_factor, + self.x_step, + self.y_step, + self.x_min, + self.x_max, + self.y_min, + self.y_max, + self.z_min, + self.z_max, + False) + + # print("Create target takes : ", datetime.now()-start) + + self.pos_cnt += pos + self.neg_cnt += neg + + # return a merged target view for all objects in the ground truth and get categorical labels + + sel = select_best_anchors(target) + ohe = tf.keras.utils.to_categorical(sel[..., 9], num_classes=self.nb_classes, dtype='float64') + # print("self.shape: ", sel[...,0].shape) + + + return sel[..., 0], sel[..., 1:4], sel[..., 4:7], sel[..., 7], sel[..., 8], ohe + + +class CustomDataGenerator(DataProcessor, Sequence, PCKittiAugmentedDataset): + """ Multiprocessing-safe data generator for training, validation or testing, without fancy augmentation """ + + def __init__(self, batch_size: int, root_dir:str, npoints:int =16384, split: str ='train', + classes:List[str] =['Car', 'Pedestrian', 'Person_sitting'], random_select:bool =True, + gt_database_dir=None, aug_hard_ratio:float=0.5, **kwargs): + + super(CustomDataGenerator, self).__init__( + batch_size=batch_size, root_dir=root_dir, + npoints=npoints, split=split, classes=classes, + random_select=random_select, gt_database_dir=gt_database_dir, + aug_hard_ratio=aug_hard_ratio, **kwargs + ) + self.batch_size = batch_size + self.sample_id_list=self.get_sample_id_list() + + + def get_sample(self, index): + return super().get_sample(index) + + + def __len__(self): + return len(self.sample_id_list) // self.batch_size + + def __getitem__(self, batch_id: int): + file_ids = range(batch_id * self.batch_size, self.batch_size * (batch_id + 1)) + pillars = [] + voxels = [] + occupancy = [] + position = [] + size = [] + angle = [] + heading = [] + classification = [] + + + + for i in file_ids: + sample = self.get_sample(i) + # For each file, dividing the space into a x-y grid to create pillars + pts_lidar = sample['calib'].rect_to_lidar(sample['pts_rect']) + pts_input = np.concatenate((pts_lidar, sample['pts_features']), axis=1) # (N, C) + + gt_boxes3d_xyz = sample['calib'].rect_to_lidar(sample['gt_boxes3d'][:,:3]) + + gt_boxes3d = np.concatenate(( + gt_boxes3d_xyz[:,0,np.newaxis], # 0 x + gt_boxes3d_xyz[:,1,np.newaxis], # 1 y + gt_boxes3d_xyz[:,2,np.newaxis] + sample['gt_boxes3d'][:,3,np.newaxis] / 2, # 2 z + sample['gt_boxes3d'][:,5,np.newaxis], # 3 l # same as the original label + sample['gt_boxes3d'][:,4,np.newaxis], # 4 w # same as the original label + sample['gt_boxes3d'][:,3,np.newaxis], # 5 h # same as the original label + -sample['gt_boxes3d'][:,6,np.newaxis], # 6 ry + ), axis=1) + + # Voxels are the pillar ids + pillars_, voxels_ = self.make_point_pillars(pts_input) + + pillars.append(pillars_) + voxels.append(voxels_) + + + if self.split=='train' or self.split =='val': + occupancy_, position_, size_, angle_, heading_, classification_ = self.make_ground_truth( + gt_boxes3d, sample['gt_cls_type_list']) + + occupancy.append(occupancy_) + position.append(position_) + size.append(size_) + angle.append(angle_) + heading.append(heading_) + classification.append(classification_) + + pillars = np.concatenate(pillars, axis=0) + voxels = np.concatenate(voxels, axis=0) + + if self.split=='train' or self.split =='val': + occupancy = np.array(occupancy) + position = np.array(position) + size = np.array(size) + angle = np.array(angle) + heading = np.array(heading) + classification = np.array(classification) + # return [pillars, voxels], [occupancy, position, size, angle, heading, classification] # network + return [pillars, voxels], [occupancy, position, size, angle, heading] # network_v2 + else: + return [pillars, voxels] + + def on_epoch_end(self): + if self.split=='train' or self.split =='val': + self.sample_id_list=shuffle(self.sample_id_list) + + +class AnalyseCustomDataGenerator(CustomDataGenerator): + """ Multiprocessing-safe data generator for training, validation or testing, without fancy augmentation """ + + def __init__(self, batch_size: int, root_dir:str, npoints:int =16384, split: str ='train', + classes:List[str] =['Car', 'Pedestrian', 'Person_sitting'], random_select:bool =True, + gt_database_dir=None, aug_hard_ratio:float=0.5, **kwargs): + + super(AnalyseCustomDataGenerator, self).__init__( + batch_size=batch_size, root_dir=root_dir, + npoints=npoints, split=split, classes=classes, + random_select=random_select, gt_database_dir=gt_database_dir, + aug_hard_ratio=aug_hard_ratio, **kwargs + ) + # self.data_reader = data_reader + self.batch_size = batch_size + self.sample_id_list=self.get_sample_id_list() + + + def _get_sample(self, index): + return super().get_sample(index) + + + def __len__(self): + return len(self.sample_id_list) // self.batch_size + + def __getitem__(self, batch_id: int): + file_ids = range(batch_id * self.batch_size, self.batch_size * (batch_id + 1)) + pillars = [] + voxels = [] + occupancy = [] + position = [] + size = [] + angle = [] + heading = [] + classification = [] + + pts_input_ = [] + gt_boxes3d_ = [] + sample_ = [] + + for i in file_ids: + sample = self._get_sample(i) + # For each file, dividing the space into a x-y grid to create pillars + pts_lidar = sample['calib'].rect_to_lidar(sample['pts_rect']) + + pts_input = np.concatenate((pts_lidar, sample['pts_features']), axis=1) # (N, C) + + gt_boxes3d_xyz = sample['calib'].rect_to_lidar(sample['gt_boxes3d'][:,:3]) + + + gt_boxes3d = np.concatenate(( + gt_boxes3d_xyz[:,0,np.newaxis], # 0 x + gt_boxes3d_xyz[:,1,np.newaxis], # 1 y + gt_boxes3d_xyz[:,2,np.newaxis] + sample['gt_boxes3d'][:,3,np.newaxis] / 2, # 2 z + sample['gt_boxes3d'][:,5,np.newaxis], # 3 l # same as the original label + sample['gt_boxes3d'][:,4,np.newaxis], # 4 w # same as the original label + sample['gt_boxes3d'][:,3,np.newaxis], # 5 h # same as the original label + -sample['gt_boxes3d'][:,6,np.newaxis], # 6 ry + ), axis=1) + # Voxels are the pillar ids + pillars_, voxels_ = self.make_point_pillars(pts_input) + + pillars.append(pillars_) + voxels.append(voxels_) + + if self.split=='train' or self.split =='val': + if (len(gt_boxes3d) == 0): + print("file id: ", i, " has zero gt label") + occupancy_, position_, size_, angle_, heading_, classification_ = self.make_ground_truth( + gt_boxes3d, sample['gt_cls_type_list']) + + occupancy.append(occupancy_) + position.append(position_) + size.append(size_) + angle.append(angle_) + heading.append(heading_) + classification.append(classification_) + + sample_.append(sample) + gt_boxes3d_.append(gt_boxes3d) + pts_input_.append(pts_input) + + pillars = np.concatenate(pillars, axis=0) + voxels = np.concatenate(voxels, axis=0) + + if self.split=='train' or self.split =='val': + occupancy = np.array(occupancy) + position = np.array(position) + size = np.array(size) + angle = np.array(angle) + heading = np.array(heading) + classification = np.array(classification) + # return [pillars, voxels], [occupancy, position, size, angle, heading, classification], [pts_input_, gt_boxes3d_, sample_] + return [pillars, voxels], [occupancy, position, size, angle, heading], [pts_input_, gt_boxes3d_, sample_] # new network + else: + return [pillars, voxels] + + def on_epoch_end(self): + # print("inside epoch") + if self.split=='train' or self.split =='val': + self.sample_id_list=shuffle(self.sample_id_list) + \ No newline at end of file diff --git a/point_pillars_evaluation_v2_2.py b/point_pillars_evaluation_v2_2.py new file mode 100644 index 0000000..88e87ae --- /dev/null +++ b/point_pillars_evaluation_v2_2.py @@ -0,0 +1,158 @@ + + +import os +from glob import glob +import numpy as np +import tensorflow as tf +from point_pillars_custom_processors_v2_2 import CustomDataGenerator, AnalyseCustomDataGenerator +from inference_utils_v2_2 import generate_bboxes_from_pred +from inference_utils_v2_2 import focal_loss_checker, rotational_nms, generate_bboxes_from_pred_and_np_array, convert_boxes_to_list +# from readers import KittiDataReader +from config_v2_2 import Parameters +from network_v2_2 import build_point_pillar_graph +from datetime import datetime + +from det3d.kitti_dataset.utils.evaluation import save_kitti_format, save_kitti_format_for_evaluation + +from point_viz.converter import PointvizConverter + +DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/" +MODEL_ROOT = "./logs_Car_Custom_Dataset_No_Early_Stopping_wo_Aug_wo_val_new_network" + +KITTI_EVALUATION_OUTPUT = os.path.join(MODEL_ROOT, "Evaluation") +if not os.path.exists(KITTI_EVALUATION_OUTPUT): + os.makedirs(KITTI_EVALUATION_OUTPUT) + +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"] = "3" + + +def limit_period(val, offset=0.5, period=np.pi): + return val - np.floor(val / period + offset) * period + +if __name__ == "__main__": + + params = Parameters() + pillar_net = build_point_pillar_graph(params) + pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) + pillar_net.summary() + + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_prediction_multiprocessing" + save_viz_path = os.path.join("/home/tan/tjtanaa/PointPillars/visualization", MODEL_ROOT.split('/')[-1]) + # Initialize and setup output directory. + Converter = PointvizConverter(save_viz_path) + + gt_database_dir = os.path.join(DATA_ROOT, "gt_database") + + validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, + npoints=20000, split='train_val_test',random_select=False, classes=list(params.classes_map.keys())) + inference_duration = [] + sample_index = 0 # has to be controlled manually to ensure that the sequence number is continuous + + for batch_idx in range(0,len(validation_gen)): + [pillars, voxels], [occupancy_, position_, size_, angle_, heading_], [pts_input, gt_boxes3d, sample] = validation_gen[batch_idx] + + start=datetime.now() + + occupancy, position, size, angle, heading = pillar_net.predict([pillars, voxels]) + + inference_duration.append( datetime.now()-start) + + classification = np.zeros(shape=np.array(occupancy).shape) + classification_ = classification + + set_boxes, confidences = [], [] + loop_range = occupancy_.shape[0] if len(occupancy_.shape) == 4 else 1 + for i in range(loop_range): + set_box, predicted_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy[i], position[i], size[i], angle[i], + heading[i], + classification[i], params.anchor_dims, occ_threshold=0.5) + + + _, decoded_gt_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy_[i], position_[i], size_[i], angle_[i], + heading_[i], + classification_[i], params.anchor_dims, occ_threshold=0.4) + + # gt_boxes3d_ = gt_boxes3d[i] + gt_boxes3d_ = decoded_gt_boxes3d + + print(gt_boxes3d_.shape) + if(len(gt_boxes3d_) == 0): + gt_bbox_params_list = [] + else: + gt_bbox_params = np.stack([gt_boxes3d_[:,3], gt_boxes3d_[:,5], gt_boxes3d_[:,4], + gt_boxes3d_[:,1], gt_boxes3d_[:,2] , + gt_boxes3d_[:,0], + gt_boxes3d_[:,6]], axis=1) + + + gt_bbox_params_list = gt_bbox_params.tolist() + # gt_bbox_params_list = [] + for k in range(len(gt_bbox_params_list)): + msg = "%.5f, %s, %.5f"%(decoded_gt_boxes3d[k,9], params.map_classes[int(decoded_gt_boxes3d[k,8])], decoded_gt_boxes3d[k,6]) + # msg = "%.5f, %.5f"%(gt_bbox_params_list[k][3],gt_bbox_params_list[k][5]) + gt_bbox_params_list[k].append("Green") + # gt_bbox_params_list[k].append("1.0") + gt_bbox_params_list[k].append(msg) + + if len(set_box) > 0: + + + # NMS + # set_box + # print("start nms") + confidence = [float(box.conf) for box in set_box] + nms_boxes = rotational_nms([set_box], [confidence], occ_threshold=0.5, nms_iou_thr=0.5) + + predicted_boxes3d_list = convert_boxes_to_list(nms_boxes) + + predicted_boxes3d = np.array(predicted_boxes3d_list[0]) + predicted_boxes3d_ = predicted_boxes3d + + print("batch_idx: ", batch_idx * params.batch_size + i, " has ", predicted_boxes3d_.shape, "predictions") + + bbox_params = np.stack([predicted_boxes3d_[:,3], predicted_boxes3d_[:,5], predicted_boxes3d_[:,4], + predicted_boxes3d_[:,1], predicted_boxes3d_[:,2] , + predicted_boxes3d_[:,0], + predicted_boxes3d_[:,6]], axis=1) + + + bbox_params_list = bbox_params.tolist() + # bbox_labels_conf = [str(predicted_boxes3d[k,9]) for k in range(predicted_boxes3d.shape[0])] + for k in range(predicted_boxes3d.shape[0]): + msg = "%.5f, %s, %.5f"%(predicted_boxes3d[k,9],params.map_classes[int(predicted_boxes3d[k,8])], predicted_boxes3d[k,6]) + bbox_params_list[k].append("Magenta") + bbox_params_list[k].append(msg) + # bbox_params_list[k].append(str(predicted_boxes3d[k,9]) + "=" + params.map_classes[int(predicted_boxes3d[k,8])]) + gt_bbox_params_list.append(bbox_params_list[k]) + + + # save as kitti format for evaluation + cur_sample_id = batch_idx * params.batch_size + i + sample_file_name = validation_gen.sample_id_list[cur_sample_id] + calib = sample[i]['calib'] + # cur_boxes3d = cur_boxes3d.cpu().numpy() + + cur_boxes3d_xyz = calib.lidar_to_rect(predicted_boxes3d[:, 0:3]) + + cur_boxes3d = np.concatenate(( + cur_boxes3d_xyz[:,0,np.newaxis], # 0 x + cur_boxes3d_xyz[:,1,np.newaxis] + predicted_boxes3d[:,5,np.newaxis] / 2, # 1 y + cur_boxes3d_xyz[:,2,np.newaxis], # 2 z + predicted_boxes3d[:,5,np.newaxis], # 3 l # same as the original label + predicted_boxes3d[:,4,np.newaxis], # 4 w # same as the original label + predicted_boxes3d[:,3,np.newaxis], # 5 h # same as the original label + -predicted_boxes3d[:,6,np.newaxis], # 6 ry + ), axis=1) + cur_scores_raw = predicted_boxes3d[:,-1] + image_shape = validation_gen.get_image_shape(sample_file_name) + labels_obj = validation_gen.get_label(sample_file_name) + classes = ['Car' for i in range(len(predicted_boxes3d))] + save_kitti_format_for_evaluation(sample_index, calib, cur_boxes3d, KITTI_EVALUATION_OUTPUT, cur_scores_raw, image_shape, classes, labels_obj) + sample_index += 1 + + coor = pts_input[i][:,[1,2,0]] + Converter.compile("evaluation_sample_{}".format(batch_idx * params.batch_size+i), coors=coor, intensity=pts_input[i][:,3], + bbox_params=gt_bbox_params_list) + # print("Average runtime speed: ", np.mean(inference_duration[20:])) + diff --git a/point_pillars_prediction.py b/point_pillars_prediction.py index 58c60a3..75f5406 100644 --- a/point_pillars_prediction.py +++ b/point_pillars_prediction.py @@ -2,13 +2,18 @@ from glob import glob import numpy as np import tensorflow as tf -from processors import SimpleDataGenerator +from processors import SimpleDataGenerator, AnalyseSimpleDataGenerator from inference_utils import generate_bboxes_from_pred, GroundTruthGenerator, focal_loss_checker, rotational_nms +from inference_utils import generate_bboxes_from_pred_and_np_array from readers import KittiDataReader from config import Parameters from network import build_point_pillar_graph +from inference_utils import inverse_yaw_element -DATA_ROOT = "../training" +from point_viz.converter import PointvizConverter + +DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/KITTI/object/training" +# MODEL_ROOT = "./logs_Car_Pedestrian_Original_2" MODEL_ROOT = "./logs" os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" @@ -16,6 +21,11 @@ if __name__ == "__main__": + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_prediction_multiprocessing" + save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/prediction" + # Initialize and setup output directory. + Converter = PointvizConverter(save_viz_path) + params = Parameters() pillar_net = build_point_pillar_graph(params) pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) @@ -23,35 +33,67 @@ data_reader = KittiDataReader() - lidar_files = sorted(glob(os.path.join(DATA_ROOT, "velodyne", "*.bin"))) - label_files = sorted(glob(os.path.join(DATA_ROOT, "label_2", "*.txt"))) - calibration_files = sorted(glob(os.path.join(DATA_ROOT, "calib", "*.txt"))) + lidar_files = sorted(glob(os.path.join(DATA_ROOT, "velodyne", "*.bin")))[:100] + print(len(lidar_files)) + print() + label_files = sorted(glob(os.path.join(DATA_ROOT, "label_2", "*.txt")))[:100] + calibration_files = sorted(glob(os.path.join(DATA_ROOT, "calib", "*.txt")))[:100] assert len(lidar_files) == len(label_files) == len(calibration_files), "Input dirs require equal number of files." - eval_gen = SimpleDataGenerator(data_reader, params.batch_size, lidar_files, label_files, calibration_files) - - occupancy, position, size, angle, heading, classification = pillar_net.predict(eval_gen, - batch_size=params.batch_size) - set_boxes, confidences = [], [] - loop_range = occupancy.shape[0] if len(occupancy.shape) == 4 else 1 - for i in range(loop_range): - set_boxes.append(generate_bboxes_from_pred(occupancy[i], position[i], size[i], angle[i], heading[i], - classification[i], params.anchor_dims, occ_threshold=0.7)) - confidences.append([float(boxes.conf) for boxes in set_boxes[-1]]) - print('Scene 1: Box predictions with occupancy > occ_thr: ', len(set_boxes[0])) + eval_gen = AnalyseSimpleDataGenerator(data_reader, params.batch_size, lidar_files, label_files, calibration_files) + + + for batch_idx in range(0,10): + [pillars, voxels], [occupancy, position, size, angle, heading, classification], [pts_input, gt_boxes3d] \ + = eval_gen[batch_idx] + + # exit() + + occupancy, position, size, angle, heading, classification = pillar_net.predict([pillars, voxels]) + set_boxes, confidences = [], [] + loop_range = occupancy.shape[0] if len(occupancy.shape) == 4 else 1 + for i in range(loop_range): + set_box, prediction = generate_bboxes_from_pred_and_np_array(occupancy[i], position[i], size[i], angle[i], heading[i], + classification[i], params.anchor_dims, occ_threshold=0.3) + + if len(set_box) == 0: + continue + set_boxes.append(set_box) + # set_boxes.append(generate_bboxes_from_pred(occupancy[i], position[i], size[i], angle[i], heading[i], + # classification[i], params.anchor_dims, occ_threshold=0.3)) + confidences.append([float(boxes.conf) for boxes in set_boxes[-1]]) + + # print(set_boxes[0]) + # print(np.array(set_boxes[0]).shape) + # print(prediction.shape) + gt_boxes3d_ = [] + for j in range(len(gt_boxes3d[i])): + bbox = gt_boxes3d[i][j] + gt_boxes3d_.append([bbox.dimension[1], bbox.dimension[2], bbox.dimension[0], + bbox.centroid[1], bbox.centroid[2] + bbox.dimension[2]/2, bbox.centroid[0] + , -bbox.yaw]) + gt_boxes3d_np = np.array(gt_boxes3d_) + print(gt_boxes3d_np.shape) + + Converter.compile("eval_sample_{}".format(batch_idx*params.batch_size + i), coors=pts_input[i][:,[1,2,0]], intensity=pts_input[i][:,3], + bbox_params=gt_boxes3d_np) + # bbox_params=gt_boxes3d_np[:,[3,5,4,1,2,0,6]]) + # print('Scene 1: Box predictions with occupancy > occ_thr: ', len(set_boxes[0])) # NMS - nms_boxes = rotational_nms(set_boxes, confidences, occ_threshold=0.7, nms_iou_thr=0.5) - - print('Scene 1: Boxes after NMS with iou_thr: ', len(nms_boxes[0])) - - # Do all the further operations on predicted_boxes array, which contains the predicted bounding boxes - gt_gen = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=False) - gt_gen0 = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=True) - for seq_boxes, gt_label, gt0 in zip(nms_boxes, gt_gen, gt_gen0): - print("---------- New Scenario ---------- ") - focal_loss_checker(gt0[0], occupancy[0], n_occs=-1) - print("---------- ------------ ---------- ") - for gt in gt_label: - print(gt) - for pred in seq_boxes: - print(pred) + # nms_boxes = rotational_nms(set_boxes, confidences, occ_threshold=0.3, nms_iou_thr=0.5) + + # print('Scene 1: Boxes after NMS with iou_thr: ', len(nms_boxes[0])) + + + + # # Do all the further operations on predicted_boxes array, which contains the predicted bounding boxes + # gt_gen = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=False) + # gt_gen0 = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=True) + # for seq_boxes, gt_label, gt0 in zip(nms_boxes, gt_gen, gt_gen0): + # print("---------- New Scenario ---------- ") + # focal_loss_checker(gt0[0], occupancy[0], n_occs=-1) + # print("---------- ------------ ---------- ") + # for gt in gt_label: + # print(gt) + # for pred in seq_boxes: + # print(pred) diff --git a/point_pillars_test.py b/point_pillars_test.py deleted file mode 100644 index 8cfa872..0000000 --- a/point_pillars_test.py +++ /dev/null @@ -1,70 +0,0 @@ -import unittest -import numpy as np -import tensorflow as tf - -from point_pillars import createPillars, createPillarsTarget, select - - -class PointPillarsTest(unittest.TestCase): - - def setUp(self): - np.random.seed(42) - xy = np.random.randint(-100, 100+1, size=(100000, 2)) - z = np.random.randint(-3, 1+1, size=(100000, 1)) - i = np.random.rand(100000) - self.arr = np.c_[xy, z, i] - assert self.arr.shape == (100000, 4) - - def test_pillar_creation(self): - pillars, indices = createPillars(self.arr, 100, 12000, 0.16, 0.16, 0, 80.64, -40.32, 40.32, -3, 1, True) - - assert pillars.shape == (1, 12000, 100, 7) - assert pillars.dtype == np.float32 - assert indices.shape == (1, 12000, 3) - assert indices.dtype == np.int32 - - session = tf.Session() - pillars = tf.constant(pillars, dtype=tf.float32) - indices = tf.constant(indices, dtype=tf.int32) - feature_map = tf.scatter_nd(indices, tf.reduce_mean(pillars, axis=2), (1, 504, 504, 7))[0] - arr, = session.run([feature_map]) - assert (arr.shape == (504, 504, 7)) - - @staticmethod - def test_pillar_target_creation(): - - dims = np.array([[3.7, 1.6, 1.4], [3.7, 1.6, 1.4], [0.8, 0.6, 1.7]], dtype=np.float32) - posn = np.array([[50, 10, 0], [20, 0, 0], [30, 5, 0]], dtype=np.float32) - yaws = np.array([0, 0, 90], dtype=np.float32) - - target = createPillarsTarget(posn, - dims, - yaws, - np.array([1, 1, 2], dtype=np.int32), - dims[[0, 2]], - np.array([0, 0], dtype=np.float32), - np.array([0, 90], dtype=np.float32), - 0.5, - 0.4, - 10, - 2, - 0.1, - 0.1, - 0, - 80, - -40, - 40, - -3, - 1, - True) - - assert target.shape == (3, 400, 400, 2, 10) - assert (target[..., 0] == 1).sum() == 83 - - selected = target[..., 0:1].argmax(axis=0) - target = select(target, selected) - assert (target.shape == (400, 400, 2, 10)) - - -if __name__ == "__main__": - unittest.main() diff --git a/point_pillars_training_custom_run_v2_2.py b/point_pillars_training_custom_run_v2_2.py new file mode 100644 index 0000000..056002a --- /dev/null +++ b/point_pillars_training_custom_run_v2_2.py @@ -0,0 +1,108 @@ +import os +import time +import numpy as np +import tensorflow as tf +from glob import glob + +# from config import Parameters +from config_v2_2 import Parameters +from loss_v2_2 import PointPillarNetworkLoss +from network_v2_2 import build_point_pillar_graph +from point_pillars_custom_processors_v2_2 import CustomDataGenerator +from readers import KittiDataReader + +# from point_viz.converter import PointvizConverter + +tf.get_logger().setLevel("ERROR") + +# DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/KITTI/object/training" # TODO make main arg +DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/" # TODO make main arg +# MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_single_process" +# MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_No_Early_Stopping_wo_Aug_wo_val" +MODEL_ROOT = "./logs_Car_Custom_Dataset_No_Early_Stopping_Aug_val_new_network_multigpu" + +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"] = "3" + +if __name__ == "__main__": + params = Parameters() + + # gpus = tf.config.experimental.list_physical_devices('GPU') + + pillar_net = build_point_pillar_graph(params) + # pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) + pillar_net.summary() + # exit() + loss = PointPillarNetworkLoss(params) + + optimizer = tf.keras.optimizers.Adam(lr=params.learning_rate, decay=params.decay_rate) + + pillar_net.compile(optimizer, loss=loss.losses()) + + + # loss = PointPillarNetworkLoss(params) + + # optimizer = tf.keras.optimizers.Adam(lr=params.learning_rate, decay=params.decay_rate) + + # if len(gpus)>1: + # strategy = tf.distribute.MirroredStrategy(cross_device_ops=tf.distribute.HierarchicalCopyAllReduce()) + # with strategy.scope(): + # pillar_net = build_point_pillar_graph(params) + # # pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) + # pillar_net.compile(optimizer, loss=loss.losses()) + # else: + # pillar_net = build_point_pillar_graph(params) + # # pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) + # pillar_net.compile(optimizer, loss=loss.losses()) + + # pillar_net.summary() + + + gt_database_dir = os.path.join(DATA_ROOT, "gt_database") + # gt_database_dir = None + + training_gen = CustomDataGenerator(batch_size=params.batch_size,root_dir = DATA_ROOT, + npoints=20000, split='train', classes=list(params.classes_map.keys()), + random_select=True, gt_database_dir=gt_database_dir, aug_hard_ratio=0.7) + + validation_gen = CustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, + npoints=20000, split='val', classes=list(params.classes_map.keys())) + + + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_processor" + # Initialize and setup output directory. + # Converter = PointvizConverter(save_viz_path) + + + + # bbox_params = self.convert_labels_into_point_viz_format(gt_boxes3d) + # print(bbox_params.shape) + # Converter.compile("custom_sample_{}".format(i), coors=pts_input[:,:3], intensity=pts_input[:,3], + # bbox_params=bbox_params) + + + log_dir = MODEL_ROOT + epoch_to_decay = int( + np.round(params.iters_to_decay / params.batch_size * int(len(training_gen)))) + callbacks = [ + tf.keras.callbacks.TensorBoard(log_dir=log_dir), + tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(log_dir, "model.h5"), + monitor='val_loss', save_best_only=True), + tf.keras.callbacks.LearningRateScheduler( + lambda epoch, lr: lr * 0.8 if ((epoch % epoch_to_decay == 0) and (epoch != 0)) else lr, verbose=True), + # tf.keras.callbacks.EarlyStopping(patience=20, monitor='val_loss'), + ] + + try: + pillar_net.fit(training_gen, + validation_data = validation_gen, + steps_per_epoch=len(training_gen), + callbacks=callbacks, + use_multiprocessing=True, + max_queue_size = 16, + epochs=int(params.total_training_epochs), + workers=6) + except KeyboardInterrupt: + model_str = "interrupted_%s.h5" % time.strftime("%Y%m%d-%H%M%S") + pillar_net.save(os.path.join(log_dir, model_str)) + print("Interrupt. Saving output to %s" % os.path.join(os.getcwd(), log_dir[1:], model_str)) diff --git a/point_pillars_training_run.py b/point_pillars_training_run.py deleted file mode 100644 index bc1c517..0000000 --- a/point_pillars_training_run.py +++ /dev/null @@ -1,68 +0,0 @@ -import os -import time -import numpy as np -import tensorflow as tf -from glob import glob - -from config import Parameters -from loss import PointPillarNetworkLoss -from network import build_point_pillar_graph -from processors import SimpleDataGenerator -from readers import KittiDataReader - -tf.get_logger().setLevel("ERROR") - -DATA_ROOT = "../training" # TODO make main arg -MODEL_ROOT = "./logs" - -os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" -os.environ["CUDA_VISIBLE_DEVICES"] = "0" - -if __name__ == "__main__": - - params = Parameters() - - pillar_net = build_point_pillar_graph(params) - pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) - - loss = PointPillarNetworkLoss(params) - - optimizer = tf.keras.optimizers.Adam(lr=params.learning_rate, decay=params.decay_rate) - - pillar_net.compile(optimizer, loss=loss.losses()) - - data_reader = KittiDataReader() - - lidar_files = sorted(glob(os.path.join(DATA_ROOT, "velodyne", "*.bin"))) - label_files = sorted(glob(os.path.join(DATA_ROOT, "label_2", "*.txt"))) - calibration_files = sorted(glob(os.path.join(DATA_ROOT, "calib", "*.txt"))) - assert len(lidar_files) == len(label_files) == len(calibration_files), "Input dirs require equal number of files." - validation_len = int(0.3*len(label_files)) - - training_gen = SimpleDataGenerator(data_reader, params.batch_size, lidar_files[:-validation_len], label_files[:-validation_len], calibration_files[:-validation_len]) - validation_gen = SimpleDataGenerator(data_reader, params.batch_size, lidar_files[-validation_len:], label_files[-validation_len:], calibration_files[-validation_len:]) - - log_dir = MODEL_ROOT - epoch_to_decay = int( - np.round(params.iters_to_decay / params.batch_size * int(np.ceil(float(len(label_files)) / params.batch_size)))) - callbacks = [ - tf.keras.callbacks.TensorBoard(log_dir=log_dir), - tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(log_dir, "model.h5"), - monitor='val_loss', save_best_only=True), - tf.keras.callbacks.LearningRateScheduler( - lambda epoch, lr: lr * 0.8 if ((epoch % epoch_to_decay == 0) and (epoch != 0)) else lr, verbose=True), - tf.keras.callbacks.EarlyStopping(patience=20, monitor='val_loss'), - ] - - try: - pillar_net.fit(training_gen, - validation_data = validation_gen, - steps_per_epoch=len(training_gen), - callbacks=callbacks, - use_multiprocessing=True, - epochs=int(params.total_training_epochs), - workers=6) - except KeyboardInterrupt: - model_str = "interrupted_%s.h5" % time.strftime("%Y%m%d-%H%M%S") - pillar_net.save(os.path.join(log_dir, model_str)) - print("Interrupt. Saving output to %s" % os.path.join(os.getcwd(), log_dir[1:], model_str)) diff --git a/point_pillars_visualize_input.py b/point_pillars_visualize_input.py new file mode 100644 index 0000000..838a5c6 --- /dev/null +++ b/point_pillars_visualize_input.py @@ -0,0 +1,142 @@ +import os +from glob import glob +import numpy as np +import tensorflow as tf +from point_pillars_custom_processors_v2 import CustomDataGenerator, AnalyseCustomDataGenerator +from inference_utils_v2 import generate_bboxes_from_pred, GroundTruthGenerator, focal_loss_checker +from inference_utils_v2 import rotational_nms, generate_bboxes_from_pred_and_np_array +from config_v2 import Parameters +from network import build_point_pillar_graph + + +from point_viz.converter import PointvizConverter + +DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/" +# MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_single_process" +MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_No_Early_Stopping_Input_Coordinate_Analysis_v2" + +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"] = "2" + + +def limit_period(val, offset=0.5, period=np.pi): + return val - np.floor(val / period + offset) * period + +if __name__ == "__main__": + + params = Parameters() + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_prediction_multiprocessing" + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/input_coordinate_analysis_point_pillar_v2_gt_only" + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/input_coordinate_analysis_point_pillar_v2_labels_only" + save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/pedestrian_input_coordinate_analysis_point_pillar_v2_gt_and_labels" + # Initialize and setup output directory. + Converter = PointvizConverter(save_viz_path) + + gt_database_dir = os.path.join(DATA_ROOT, "gt_database") + + validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, + npoints=16384, split='train_val_test',random_select=False, classes=list(params.classes_map.keys())) + + for batch_idx in range(0,20): + [pillars, voxels], [occupancy_, position_, size_, angle_, heading_, classification_], [pts_input, gt_boxes3d, sample] = validation_gen[batch_idx] + + + set_boxes, confidences = [], [] + loop_range = occupancy_.shape[0] if len(occupancy_.shape) == 4 else 1 + for i in range(loop_range): + # set_box, predicted_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy[i], position[i], size[i], angle[i], + # heading[i], + # classification[i], params.anchor_dims, occ_threshold=0.15) + gt_set_box, decoded_gt_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy_[i], position_[i], size_[i], angle_[i], + heading_[i], + classification_[i], params.anchor_dims, occ_threshold=0.5) + + # exit() + gt_boxes3d_ = gt_boxes3d[i] + + print(gt_boxes3d_.shape) + gt_bbox_params = np.stack([gt_boxes3d_[:,3], gt_boxes3d_[:,5], gt_boxes3d_[:,4], + gt_boxes3d_[:,1], gt_boxes3d_[:,2] , + gt_boxes3d_[:,0], + gt_boxes3d_[:,6]], axis=1) + + + gt_bbox_params_list = gt_bbox_params.tolist() + # gt_bbox_params_list = [] + # print(gt_bbox_params_list) + # print(len(gt_bbox_params_list)) + # print(len(gt_bbox_params_list[0])) + + for k in range(len(gt_bbox_params_list)): + msg = "%.5f, %.5f"%(gt_bbox_params_list[k][3],gt_bbox_params_list[k][5]) + gt_bbox_params_list[k].append("Green") + gt_bbox_params_list[k].append(msg) + + if len(gt_set_box) > 0: + decoded_gt_boxes3d_ = decoded_gt_boxes3d + # bbox_params = validation_gen.convert_predictions_into_point_viz_format(predicted_boxes3d[:,[1, 2, 0, 5, 3, 4, 6 ]]) + + print(decoded_gt_boxes3d_.shape) + # print(predicted_boxes3d_) + # print(size[i]) + + bbox_params = np.stack([decoded_gt_boxes3d_[:,3], decoded_gt_boxes3d_[:,5], decoded_gt_boxes3d_[:,4], + decoded_gt_boxes3d_[:,1], decoded_gt_boxes3d_[:,2] , + decoded_gt_boxes3d_[:,0], + decoded_gt_boxes3d_[:,6]], axis=1) + + + # bbox_params = np.stack([predicted_boxes3d[:,4], predicted_boxes3d[:,5], predicted_boxes3d[:,3], + # predicted_boxes3d[:,1], -(predicted_boxes3d[:,2] - predicted_boxes3d[:,5] / 2), + # predicted_boxes3d[:,0], + # predicted_boxes3d[:,6]], axis=1) + + bbox_params_list = bbox_params.tolist() + # bbox_labels_conf = [str(predicted_boxes3d[k,9]) for k in range(predicted_boxes3d.shape[0])] + for k in range(decoded_gt_boxes3d.shape[0]): + msg = "%.5f, %.5f"%(bbox_params_list[k][3],bbox_params_list[k][5]) + # msg = (str(bbox_params_list[k][3:5])) + bbox_params_list[k].append("Magenta") + bbox_params_list[k].append(msg) + # bbox_params_list[k].append(str(decoded_gt_boxes3d[k,9]) + params.map_classes[int(decoded_gt_boxes3d[k,8])]) + gt_bbox_params_list.append(bbox_params_list[k]) + + # print(gt_bbox_params_list) + # print(gt_bbox_params.tolist()) + + coor = pts_input[i][:,[1,2,0]] + # coor[:,1] *= -1 + Converter.compile("val_custom_sample_{}".format(batch_idx * params.batch_size+i), coors=coor, intensity=pts_input[i][:,3], + bbox_params=gt_bbox_params_list) + + # exit() + # set_boxes.append(set_box) + # # set_boxes.append(generate_bboxes_from_pred(occupancy, position, size, angle, heading, + # # classification, params.anchor_dims, occ_threshold=0.1)) + # # confidences.append([float(boxes.conf) for boxes in set_boxes[-1]]) + + # sum_bboxes = 0 + # for h in range(len(set_boxes)): + # sum_bboxes += len(set_boxes[h]) + + # print('Batch ', str(batch_idx) ,': Box predictions with occupancy > occ_thr: ', sum_bboxes) + # print('Scene 1: Box predictions with occupancy > occ_thr: ', len(set_boxes[0])) + # exit() + # print(set_boxes[-1]) + + # # NMS + # nms_boxes = rotational_nms(set_boxes, confidences, occ_threshold=0.7, nms_iou_thr=0.5) + + # print('Scene 1: Boxes after NMS with iou_thr: ', len(nms_boxes[0])) + + # # Do all the further operations on predicted_boxes array, which contains the predicted bounding boxes + # gt_gen = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=False) + # gt_gen0 = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=True) + # for seq_boxes, gt_label, gt0 in zip(nms_boxes, gt_gen, gt_gen0): + # print("---------- New Scenario ---------- ") + # focal_loss_checker(gt0[0], occupancy[0], n_occs=-1) + # print("---------- ------------ ---------- ") + # for gt in gt_label: + # print(gt) + # for pred in seq_boxes: + # print(pred) diff --git a/processors.py b/processors.py index 05b1412..dee118e 100644 --- a/processors.py +++ b/processors.py @@ -11,6 +11,8 @@ import sys +from point_viz.converter import PointvizConverter + def select_best_anchors(arr): dims = np.indices(arr.shape[1:]) # arr[..., 0:1] gets the occupancy value from occ in {-1, 0, 1}, i.e. {bad match, neg box, pos box} @@ -33,16 +35,30 @@ def __init__(self): def transform_labels_into_lidar_coordinates(labels: List[Label3D], R: np.ndarray, t: np.ndarray): transformed = [] for label in labels: - label.centroid = label.centroid @ np.linalg.inv(R).T - t - label.dimension = label.dimension[[2, 1, 0]] + label.centroid = (label.centroid - t) @ np.linalg.inv(R).T + label.dimension = label.dimension[[2, 1, 0]] # h w l => l ,w ,h label.yaw -= np.pi / 2 while label.yaw < -np.pi: + print("smaller than -pi") label.yaw += (np.pi * 2) while label.yaw > np.pi: + print("larger than pi") label.yaw -= (np.pi * 2) transformed.append(label) return labels + + def convert_labels_into_point_viz_format(self, labels: List[Label3D]): + label_list = [] + + for label in labels: + label_ = [label.dimension[2], label.dimension[0], label.dimension[1]] + label_.extend([label.centroid[0], label.centroid[1], label.centroid[2]]) + label_.extend([label.yaw]) + label_list.append(label_) + + return np.array(label_list) + def make_point_pillars(self, points: np.ndarray): assert points.ndim == 2 @@ -69,7 +85,7 @@ def make_ground_truth(self, labels: List[Label3D]): # filter labels by classes (cars, pedestrians and Trams) # Label has 4 properties (Classification (0th index of labels file), # centroid coordinates, dimensions, yaw) - labels = list(filter(lambda x: x.classification in self.classes, labels)) + labels = list(filter(lambda x: x.classification in self.classes_map, labels)) if len(labels) == 0: pX, pY = int(self.Xn / self.downscaling_factor), int(self.Yn / self.downscaling_factor) @@ -82,7 +98,7 @@ def make_ground_truth(self, labels: List[Label3D]): target_positions = np.array([label.centroid for label in labels], dtype=np.float32) target_dimension = np.array([label.dimension for label in labels], dtype=np.float32) target_yaw = np.array([label.yaw for label in labels], dtype=np.float32) - target_class = np.array([self.classes[label.classification] for label in labels], dtype=np.int32) + target_class = np.array([self.classes_map[label.classification] for label in labels], dtype=np.int32) assert np.all(target_yaw >= -np.pi) & np.all(target_yaw <= np.pi) assert len(target_positions) == len(target_dimension) == len(target_yaw) == len(target_class) @@ -151,12 +167,17 @@ def __getitem__(self, batch_id: int): heading = [] classification = [] + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/original_processor" + # # Initialize and setup output directory. + # Converter = PointvizConverter(save_viz_path) + for i in file_ids: lidar = self.data_reader.read_lidar(self.lidar_files[i]) # For each file, dividing the space into a x-y grid to create pillars # Voxels are the pillar ids pillars_, voxels_ = self.make_point_pillars(lidar) + # print(pillars_.shape, voxels_.shape) pillars.append(pillars_) voxels.append(voxels_) @@ -166,11 +187,30 @@ def __getitem__(self, batch_id: int): # Labels are transformed into the lidar coordinate bounding boxes # Label has 7 values, centroid, dimensions and yaw value. label_transformed = self.transform_labels_into_lidar_coordinates(label, R, t) + + + # # Pass data and create html files. + # pts_rect = lidar[:,:3] + # intensity = lidar[:,3] + # # sample_info['pts_rect'][:,1] *= -1 # mirror the y axis + # # pts_rect[:,1] *= -1 + # # coors = sample_info['pts_rect'] + # bbox_params = self.convert_labels_into_point_viz_format(label_transformed) + # print(bbox_params) + # Converter.compile("ori_sample_{}".format(i), coors=pts_rect, intensity=intensity, + # bbox_params=bbox_params) + + + # exit() + # These definitions can be found in point_pillars.cpp file # We are splitting a 10 dim vector that contains this information. occupancy_, position_, size_, angle_, heading_, classification_ = self.make_ground_truth( label_transformed) + # print(occupancy_.shape, position_.shape, size_.shape, angle_.shape, heading_.shape, classification_.shape) + # exit() + occupancy.append(occupancy_) position.append(position_) size.append(size_) @@ -197,3 +237,100 @@ def on_epoch_end(self): if self.label_files is not None: self.lidar_files, self.label_files, self.calibration_files = \ shuffle(self.lidar_files, self.label_files, self.calibration_files) + + + +class AnalyseSimpleDataGenerator(DataProcessor, Sequence): + """ Multiprocessing-safe data generator for training, validation or testing, without fancy augmentation """ + + def __init__(self, data_reader: DataReader, batch_size: int, lidar_files: List[str], label_files: List[str] = None, + calibration_files: List[str] = None): + super(AnalyseSimpleDataGenerator, self).__init__() + self.data_reader = data_reader + self.batch_size = batch_size + self.lidar_files = lidar_files + self.label_files = label_files + self.calibration_files = calibration_files + + assert (calibration_files is None and label_files is None) or \ + (calibration_files is not None and label_files is not None) + + if self.calibration_files is not None: + assert len(self.calibration_files) == len(self.lidar_files) + assert len(self.label_files) == len(self.lidar_files) + + def __len__(self): + return len(self.lidar_files) // self.batch_size + + def __getitem__(self, batch_id: int): + file_ids = np.arange(batch_id * self.batch_size, self.batch_size * (batch_id + 1)) + # print("inside getitem") + pillars = [] + voxels = [] + occupancy = [] + position = [] + size = [] + angle = [] + heading = [] + classification = [] + pts_input = [] + gt_boxes3d = [] + + save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/original_processor" + # Initialize and setup output directory. + Converter = PointvizConverter(save_viz_path) + + for i in file_ids: + lidar = self.data_reader.read_lidar(self.lidar_files[i]) + + + Converter.compile("transform_sample_{}".format(i), coors=lidar[:,:3], intensity=lidar[:,3]) + + # For each file, dividing the space into a x-y grid to create pillars + # Voxels are the pillar ids + pillars_, voxels_ = self.make_point_pillars(lidar) + + # print(pillars_.shape, voxels_.shape) + pillars.append(pillars_) + voxels.append(voxels_) + + if self.label_files is not None: + label = self.data_reader.read_label(self.label_files[i]) + R, t = self.data_reader.read_calibration(self.calibration_files[i]) + # Labels are transformed into the lidar coordinate bounding boxes + # Label has 7 values, centroid, dimensions and yaw value. + label_transformed = self.transform_labels_into_lidar_coordinates(label, R, t) + + # These definitions can be found in point_pillars.cpp file + # We are splitting a 10 dim vector that contains this information. + occupancy_, position_, size_, angle_, heading_, classification_ = self.make_ground_truth( + label_transformed) + + occupancy.append(occupancy_) + position.append(position_) + size.append(size_) + angle.append(angle_) + heading.append(heading_) + classification.append(classification_) + pts_input.append(lidar) + gt_boxes3d.append(label_transformed) + + pillars = np.concatenate(pillars, axis=0) + voxels = np.concatenate(voxels, axis=0) + + if self.label_files is not None: + occupancy = np.array(occupancy) + position = np.array(position) + size = np.array(size) + angle = np.array(angle) + heading = np.array(heading) + classification = np.array(classification) + return [pillars, voxels], [occupancy, position, size, angle, heading, classification], [pts_input, gt_boxes3d] + else: + return [pillars, voxels] + + def on_epoch_end(self): + # print("inside epoch") + if self.label_files is not None: + self.lidar_files, self.label_files, self.calibration_files = \ + shuffle(self.lidar_files, self.label_files, self.calibration_files) \ No newline at end of file diff --git a/readers.py b/readers.py index b8ca8d9..c2a4880 100644 --- a/readers.py +++ b/readers.py @@ -8,7 +8,7 @@ class Label3D: def __init__(self, classification: str, centroid: np.ndarray, dimension: np.ndarray, yaw: float): self.classification = classification self.centroid = centroid - self.dimension = dimension + self.dimension = dimension # hwl self.yaw = yaw def __str__(self): diff --git a/src/point_pillars.cpp b/src/point_pillars.cpp index 4c2127d..e30d087 100644 --- a/src/point_pillars.cpp +++ b/src/point_pillars.cpp @@ -7,6 +7,7 @@ #include #include #include +// #include namespace py = pybind11; struct IntPairHash { @@ -26,6 +27,8 @@ struct PillarPoint { float xc; float yc; float zc; + float xp; + float yp; }; pybind11::tuple createPillars(pybind11::array_t points, @@ -63,13 +66,15 @@ pybind11::tuple createPillars(pybind11::array_t points, auto yIndex = static_cast(std::floor((points.at(i, 1) - yMin) / yStep)); PillarPoint p = { - points.at(i, 0), - points.at(i, 1), - points.at(i, 2), - points.at(i, 3), - 0, - 0, - 0, + points.at(i, 0), // x + points.at(i, 1), // y + points.at(i, 2), // z + points.at(i, 3), // intensity + 0, // xc + 0, // yc + 0, // zc + 0, // xp + 0, // yp }; map[{xIndex, yIndex}].emplace_back(p); @@ -78,7 +83,7 @@ pybind11::tuple createPillars(pybind11::array_t points, pybind11::array_t tensor; pybind11::array_t indices; - tensor.resize({1, maxPillars, maxPointsPerPillar, 7}); + tensor.resize({1, maxPillars, maxPointsPerPillar, 9}); indices.resize({1, maxPillars, 3}); int pillarId = 0; @@ -123,13 +128,23 @@ pybind11::tuple createPillars(pybind11::array_t points, break; } - tensor.mutable_at(0, pillarId, pointId, 0) = p.x - (xIndex * xStep + xMin); - tensor.mutable_at(0, pillarId, pointId, 1) = p.y - (yIndex * yStep + yMin); - tensor.mutable_at(0, pillarId, pointId, 2) = p.z - zMid; + // tensor.mutable_at(0, pillarId, pointId, 0) = p.x - (xIndex * xStep + xMin); + // tensor.mutable_at(0, pillarId, pointId, 1) = p.y - (yIndex * yStep + yMin); + // tensor.mutable_at(0, pillarId, pointId, 2) = p.z - zMid; + // tensor.mutable_at(0, pillarId, pointId, 3) = p.intensity; + // tensor.mutable_at(0, pillarId, pointId, 4) = p.xc; + // tensor.mutable_at(0, pillarId, pointId, 5) = p.yc; + // tensor.mutable_at(0, pillarId, pointId, 6) = p.zc; + + tensor.mutable_at(0, pillarId, pointId, 0) = p.x; + tensor.mutable_at(0, pillarId, pointId, 1) = p.y; + tensor.mutable_at(0, pillarId, pointId, 2) = p.z; tensor.mutable_at(0, pillarId, pointId, 3) = p.intensity; tensor.mutable_at(0, pillarId, pointId, 4) = p.xc; tensor.mutable_at(0, pillarId, pointId, 5) = p.yc; tensor.mutable_at(0, pillarId, pointId, 6) = p.zc; + tensor.mutable_at(0, pillarId, pointId, 7) = p.x - (xIndex * xStep + xMin); + tensor.mutable_at(0, pillarId, pointId, 8) = p.y - (yIndex * yStep + yMin); pointId++; } @@ -328,6 +343,42 @@ float iou(const BoundingBox3D& box1, return area_overlap / (area_poly1 + area_poly2 - area_overlap); } + +// Calculates the IOU between two bounding boxes. +float iou(const BoundingBox3D& box1, + const BoundingBox3D& box2) +{ + const auto& box_as_vector = boundingBox3DToTopDown(box1); + const auto& box_as_vector_2 = boundingBox3DToTopDown(box2); + const auto& clipped_vector = sutherlandHodgmanClip(box_as_vector, box_as_vector_2); + + float area_poly1 = polygonArea(box_as_vector); + float area_poly2 = polygonArea(box_as_vector_2); + float area_overlap = polygonArea(clipped_vector); + + return area_overlap / (area_poly1 + area_poly2 - area_overlap); +} + + +// Calculates the IOU between two bounding boxes. +float iou3D(const BoundingBox3D& box1, + const BoundingBox3D& box2) +{ + const auto& box_as_vector = boundingBox3DToTopDown(box1); + const auto& box_as_vector_2 = boundingBox3DToTopDown(box2); + const auto& clipped_vector = sutherlandHodgmanClip(box_as_vector, box_as_vector_2); + + float volume_poly1 = polygonArea(box_as_vector) * box1.height; + float volume_poly2 = polygonArea(box_as_vector_2) * box2.height; + + float topZ = min(box1.height/2 + box1.z, box2.height/2 + box2.z); + float bottomZ = max( - box1.height/2 + box1.z, - box2.height/2 + box2.z); + + float volume_overlap = polygonArea(clipped_vector) * (topZ - bottomZ); + + return volume_overlap / (volume_poly1 + volume_poly2 - volume_overlap); +} + int clip(int n, int lower, int upper) { return std::max(lower, std::min(n, upper)); } @@ -461,6 +512,8 @@ std::tuple, int, int> createPillarsTarget(const pybind1 float maxIou = 0; BoundingBox3D bestAnchor = {}; int bestAnchorId = 0; + int bestAnchorXId = 0; + int bestAnchorYId = 0; for (int xId = xStart; xId < xEnd; xId++) // Iterate through every box within search diameter // In our example case, from 3 till 8 { @@ -486,6 +539,14 @@ std::tuple, int, int> createPillarsTarget(const pybind1 maxIou = iouOverlap; bestAnchor = anchorBox; bestAnchorId = anchorCount; + bestAnchorXId = xId; + bestAnchorYId = yId; + // if(printTime){ + // if(anchorCount == 3){ + // py::print("\nIoU old: " +std::to_string(iouOverlap) + " new: " + std::to_string(maxIou)); + // } + // } + } if (iouOverlap > positiveThreshold) // Accept the Anchor. Add the anchor details to the tensor. @@ -502,7 +563,8 @@ std::tuple, int, int> createPillarsTarget(const pybind1 tensor.mutable_at(objectCount, xId, yId, anchorCount, 5) = std::log(labelBox.width / anchorBox.width); tensor.mutable_at(objectCount, xId, yId, anchorCount, 6) = std::log(labelBox.height / anchorBox.height); - tensor.mutable_at(objectCount, xId, yId, anchorCount, 7) = std::sin(labelBox.yaw - anchorBox.yaw); //delta yaw + // tensor.mutable_at(objectCount, xId, yId, anchorCount, 7) = std::sin(labelBox.yaw - anchorBox.yaw); //delta yaw + tensor.mutable_at(objectCount, xId, yId, anchorCount, 7) = labelBox.yaw - anchorBox.yaw; //delta yaw if (labelBox.yaw > 0) // Is yaw > 0 { tensor.mutable_at(objectCount, xId, yId, anchorCount, 8) = 1; @@ -535,14 +597,20 @@ std::tuple, int, int> createPillarsTarget(const pybind1 negCnt++; if (printTime) { -// std::cout << "\nThere was no sufficiently overlapping anchor anywhere for object " << objectCount << std::endl; -// py::print("There was no sufficiently overlapping anchor anywhere for object " +str(objectCount)); -// std::cout << "Best IOU was " << maxIou << ". Adding the best location regardless of threshold." << std::endl; -// py::print("Best IOU was "+str(maxIou)+" Adding the best location regardless of threshold"); + // std::cout << "\nThere was no sufficiently overlapping anchor anywhere for object " << objectCount << std::endl; + py::print("\nThere was no sufficiently overlapping anchor anywhere for object " +std::to_string(objectCount)); + // std::cout << "Best IOU was " << maxIou << ". Adding the best location regardless of threshold." << std::endl; + py::print("\nBest IOU of anchor " + std::to_string(bestAnchorId) + " was "+std::to_string(maxIou)+" Adding the best location regardless of threshold"); + // py::print("\nBest IOU.x was "+std::to_string(bestAnchor.x)+" "); + // py::print("\nBest IOU.y was "+std::to_string(bestAnchor.y)+" "); + // py::print("\nBest IOU.z was "+std::to_string(bestAnchor.z)+" "); + // py::print("\nBest IOU.ry was "+std::to_string(bestAnchor.yaw)+" "); } - const auto xId = static_cast(std::floor((labelBox.x - xMin) / (xStep * downscalingFactor))); - const auto yId = static_cast(std::floor((labelBox.y - yMin) / (yStep * downscalingFactor))); + const auto xId = bestAnchorXId; + const auto yId = bestAnchorYId; + // const auto xId = static_cast(std::floor((labelBox.x - xMin) / (xStep * downscalingFactor))); + // const auto yId = static_cast(std::floor((labelBox.y - yMin) / (yStep * downscalingFactor))); const float diag = std::sqrt(std::pow(bestAnchor.width, 2) + std::pow(bestAnchor.length, 2)); tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 0) = 1; @@ -555,7 +623,8 @@ std::tuple, int, int> createPillarsTarget(const pybind1 tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 5) = std::log(labelBox.width / bestAnchor.width); tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 6) = std::log(labelBox.height / bestAnchor.height); - tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 7) = std::sin(labelBox.yaw - bestAnchor.yaw); + // tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 7) = std::sin(labelBox.yaw - bestAnchor.yaw); + tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 7) = labelBox.yaw - bestAnchor.yaw; if (labelBox.yaw > 0) { tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 8) = 1; @@ -588,9 +657,14 @@ std::tuple, int, int> createPillarsTarget(const pybind1 return std::make_tuple(tensor, posCnt, negCnt); } +float cmath_sin(float value){ + return std::sin(value); +} + PYBIND11_MODULE(point_pillars, m) { m.def("createPillars", &createPillars, "Runs function to create point pillars input tensors"); m.def("createPillarsTarget", &createPillarsTarget, "Runs function to create point pillars output ground truth"); + m.def("cmath_sin", &cmath_sin, "Runs function to compute sine"); } diff --git a/viz.py b/viz.py new file mode 100644 index 0000000..e69de29