From 5824e2d0c71f3d023d2cfa793fa4cced187cfafb Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Wed, 18 Nov 2020 00:55:21 +0800 Subject: [PATCH 1/4] Fixed wrong inputs: rotation, transformation matrix, inference utils, point_pillar_v2.cpp --- .gitignore | 6 + Analyse_Input_Pipeline.ipynb | 356 ++++++++++++ CMakeLists.txt | 3 +- Visualizing_Point_Pillar_Image.ipynb | 365 +++++++++++++ compute_intersection_single_element.ipynb | 389 +++++++++++++ config.py | 58 +- config_v2.py | 118 ++++ inference_utils.py | 71 +++ inference_utils_v2.py | 209 +++++++ loss.py | 11 + network.py | 3 +- point_pillars_custom_prediction.py | 157 ++++++ point_pillars_custom_processors_v2.py | 372 +++++++++++++ point_pillars_prediction.py | 104 ++-- point_pillars_training_custom_run_v2.py | 87 +++ point_pillars_training_run.py | 21 +- point_pillars_visualize_input.py | 145 +++++ processors.py | 145 ++++- readers.py | 2 +- src/point_pillars.cpp | 9 +- src/point_pillars_v2.cpp | 634 ++++++++++++++++++++++ viz.py | 0 22 files changed, 3201 insertions(+), 64 deletions(-) create mode 100644 Analyse_Input_Pipeline.ipynb create mode 100644 Visualizing_Point_Pillar_Image.ipynb create mode 100644 compute_intersection_single_element.ipynb create mode 100644 config_v2.py create mode 100644 inference_utils_v2.py create mode 100644 point_pillars_custom_prediction.py create mode 100644 point_pillars_custom_processors_v2.py create mode 100644 point_pillars_training_custom_run_v2.py create mode 100644 point_pillars_visualize_input.py create mode 100644 src/point_pillars_v2.cpp create mode 100644 viz.py diff --git a/.gitignore b/.gitignore index 0dca5a1..0107fe4 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,9 @@ cmake_example.egg-info/ dist/ logs/ point_pillars.egg-info/ +logs_*/ +.vscode/ +.ipynb_checkpoints/ +archive/ +visualization/ +logs.zip \ No newline at end of file diff --git a/Analyse_Input_Pipeline.ipynb b/Analyse_Input_Pipeline.ipynb new file mode 100644 index 0000000..0d4039c --- /dev/null +++ b/Analyse_Input_Pipeline.ipynb @@ -0,0 +1,356 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import time\n", + "import numpy as np\n", + "# import tensorflow as tf\n", + "from glob import glob\n", + "import cv2\n", + "\n", + "from config import Parameters\n", + "# from processors import SimpleDataGenerator\n", + "# from custom_processors import AnalyseCustomDataGenerator\n", + "from point_pillars_custom_processors_v2 import AnalyseCustomDataGenerator\n", + "from det3d.pc_kitti_dataset import PCKittiAugmentedDataset\n", + "from tqdm.notebook import trange\n", + "# print(dir(tqdm))\n", + "\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# DATA_ROOT = \"/media/data3/tjtanaa/kitti_dataset/KITTI/object/training\" # TODO make main arg\n", + "DATA_ROOT = \"/media/data3/tjtanaa/kitti_dataset/\" # TODO make main arg\n", + "MODEL_ROOT = \"./logs_Car_Pedestrian_Custom_Dataset_single_process\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "params = Parameters()\n", + "\n", + "gt_database_dir = os.path.join(DATA_ROOT, \"gt_database\")\n", + "\n", + "# training_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size,root_dir = DATA_ROOT,\n", + "# npoints=20000, split='train', classes=list(params.classes_map.keys()), \n", + "# random_select=False, gt_database_dir=None, aug_hard_ratio=0.7)\n", + "\n", + "\n", + "# validation_gen = PCKittiAugmentedDataset(root_dir=DATA_ROOT, \n", + "# npoints=20000, split='val', random_select=False, classes=list(params.classes_map.keys()))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# # get the min and max (range) of every axis\n", + "# x_max = np.array(-9999.0)\n", + "# x_min = np.array(9999.0)\n", + "# y_max = np.array(-9999.0)\n", + "# y_min = np.array(9999.0)\n", + "# z_max = np.array(-9999.0)\n", + "# z_min = np.array(9999.0)\n", + "# for i in trange(len(validation_gen)):\n", + "# sample = validation_gen[i]\n", + "# x_max = np.max([x_max, np.max(sample['pts_rect'][:,0])])\n", + "# x_min = np.min([x_min, np.min(sample['pts_rect'][:,0])])\n", + "# y_max = np.max([y_max, np.max(sample['pts_rect'][:,1])])\n", + "# y_min = np.min([y_min, np.min(sample['pts_rect'][:,1])])\n", + "# z_max = np.max([z_max, np.max(sample['pts_rect'][:,2])])\n", + "# z_min = np.min([z_min, np.min(sample['pts_rect'][:,2])])\n", + "# print(x_min, x_max)\n", + "# print(y_min, y_max)\n", + "# print(z_min, z_max)\n", + "# # 1.349664568901062 78.88325500488281\n", + "# # -52.52265167236328 50.9976806640625\n", + "# # -1.0496952533721924 3.1484153270721436\n", + " \n", + " \n", + "# # 1.349664568901062 78.88325500488281\n", + "# # -52.52265167236328 50.9976806640625\n", + "# # -1.0496952533721924 3.1484153270721436" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "bc52e70dae13465eaddc01ca8fd85669", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(FloatProgress(value=0.0, max=942.0), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1, 12000, 100, 9) (1, 12000, 3)\n", + "[ 6.1949997e+00 -1.4270003e+00 -1.6610003e+00 -2.9000002e-01\n", + " -1.0499954e-02 -6.4125061e-02 -1.3750792e-03 1.1499977e-01\n", + " 1.3002157e-02]\n", + "[ 6.1959996e+00 -1.4070001e+00 -1.6590002e+00 -2.5000000e-01\n", + " -9.5000267e-03 -4.4124842e-02 6.2501431e-04 1.1599970e-01\n", + " 3.3002377e-02]\n", + "[ 6.1980000e+00 -1.3870002e+00 -1.6590002e+00 -2.6999998e-01\n", + " -7.4996948e-03 -2.4124980e-02 6.2501431e-04 1.1800003e-01\n", + " 5.3002238e-02]\n", + "[ 6.2029991e+00 -1.3670001e+00 -1.6590003e+00 -2.2000000e-01\n", + " -2.5005341e-03 -4.1248798e-03 6.2489510e-04 1.2299919e-01\n", + " 7.3002338e-02]\n", + "[ 6.2049999e+00 -1.3580000e+00 -1.6590003e+00 -3.4000000e-01\n", + " -4.9972534e-04 4.8751831e-03 6.2489510e-04 1.2500000e-01\n", + " 8.2002401e-02]\n", + "[ 6.2149997e+00 -1.3390002e+00 -1.6610001e+00 -2.2999999e-01\n", + " 9.5000267e-03 2.3874998e-02 -1.3749599e-03 1.3499975e-01\n", + " 1.0100222e-01]\n", + "[ 6.2089992e+00 -1.3180001e+00 -1.6580001e+00 -2.8000000e-01\n", + " 3.4995079e-03 4.4875145e-02 1.6250610e-03 1.2899923e-01\n", + " 1.2200236e-01]\n", + "[ 6.2230000e+00 -1.3000002e+00 -1.6610000e+00 -2.9000002e-01\n", + " 1.7500401e-02 6.2875032e-02 -1.3748407e-03 1.4300013e-01\n", + " 1.4000225e-01]\n", + "[0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + "[0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + "\n" + ] + }, + { + "ename": "NameError", + "evalue": "name 'exit' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;31m# sample = validation_gen[i]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mpillars\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvoxels\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0moccupancy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mposition\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msize\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mangle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheading\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mclassification\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0;34m[\u001b[0m\u001b[0mpts_input\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgt_boxes3d\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalidation_gen\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mj\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpts_input\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mx_max\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mx_max\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpts_input\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/tjtanaa/PointPillars/point_pillars_custom_processors_v2.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, batch_id)\u001b[0m\n\u001b[1;32m 379\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpillars_\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 380\u001b[0m \u001b[0;31m# print(np.sum(pillars_ > 0))\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 381\u001b[0;31m \u001b[0mexit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 382\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 383\u001b[0m \u001b[0mpillars\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpillars_\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'exit' is not defined" + ] + } + ], + "source": [ + "\n", + "validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size,root_dir = DATA_ROOT,\n", + " npoints=20000, split='val', classes=list(params.classes_map.keys()), \n", + " random_select=False, gt_database_dir=None, aug_hard_ratio=0.7)\n", + "# get the min and max (range) of every axis\n", + "x_max = np.array(-9999.0)\n", + "x_min = np.array(9999.0)\n", + "y_max = np.array(-9999.0)\n", + "y_min = np.array(9999.0)\n", + "z_max = np.array(-9999.0)\n", + "z_min = np.array(9999.0)\n", + "for i in trange(len(validation_gen)):\n", + "# sample = validation_gen[i]\n", + " [pillars, voxels], [occupancy, position, size, angle, heading, classification], \\\n", + " [pts_input, gt_boxes3d, sample] = validation_gen[i]\n", + " for j in range(len(pts_input)):\n", + " x_max = np.max([x_max, np.max(pts_input[j][:,0])])\n", + " x_min = np.min([x_min, np.min(pts_input[j][:,0])])\n", + " y_max = np.max([y_max, np.max(pts_input[j][:,1])])\n", + " y_min = np.min([y_min, np.min(pts_input[j][:,1])])\n", + " z_max = np.max([z_max, np.max(pts_input[j][:,2])])\n", + " z_min = np.min([z_min, np.min(pts_input[j][:,2])])\n", + "print(x_min, x_max)\n", + "print(y_min, y_max)\n", + "print(z_min, z_max)\n", + "# 1.8940000534057617 70.76299285888672\n", + "# -39.99699783325195 40.195003509521484\n", + "# -3.204000234603882 2.0290002822875977" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# import numba\n", + "\n", + "# [pillars, voxels], \\\n", + "# [occupancy, position, size, angle, heading, classification], \\\n", + "# [pts_input, gt_boxes3d, sample] = validation_gen[8]\n", + "\n", + "\n", + "# @numba.jit(nopython=True)\n", + "# def _points_to_bevmap_reverse_kernel(\n", + "# points,\n", + "# voxel_size,\n", + "# coors_range,\n", + "# coor_to_voxelidx,\n", + "# # coors_2d,\n", + "# bev_map,\n", + "# height_lowers,\n", + "# # density_norm_num=16,\n", + "# with_reflectivity=False,\n", + "# max_voxels=40000):\n", + "# # put all computations to one loop.\n", + "# # we shouldn't create large array in main jit code, otherwise\n", + "# # reduce performance\n", + "# N = points.shape[0]\n", + "# ndim = 3\n", + "# ndim_minus_1 = ndim - 1\n", + "# grid_size = (coors_range[3:] - coors_range[:3]) / voxel_size\n", + "# # np.round(grid_size)\n", + "# # grid_size = np.round(grid_size).astype(np.int64)(np.int32)\n", + "# grid_size = np.round(grid_size, 0, grid_size).astype(np.int32)\n", + "# height_slice_size = voxel_size[-1]\n", + "# coor = np.zeros(shape=(3, ), dtype=np.int32) # DHW\n", + "# voxel_num = 0\n", + "# failed = False\n", + "# for i in range(N):\n", + "# failed = False\n", + "# for j in range(ndim):\n", + "# c = np.floor((points[i, j] - coors_range[j]) / voxel_size[j])\n", + "# if c < 0 or c >= grid_size[j]:\n", + "# failed = True\n", + "# break\n", + "# coor[ndim_minus_1 - j] = c\n", + "# if failed:\n", + "# continue\n", + "# voxelidx = coor_to_voxelidx[coor[0], coor[1], coor[2]]\n", + "# if voxelidx == -1:\n", + "# voxelidx = voxel_num\n", + "# if voxel_num >= max_voxels:\n", + "# break\n", + "# voxel_num += 1\n", + "# coor_to_voxelidx[coor[0], coor[1], coor[2]] = voxelidx\n", + "# # coors_2d[voxelidx] = coor[1:]\n", + "# bev_map[-1, coor[1], coor[2]] += 1\n", + "# height_norm = bev_map[coor[0], coor[1], coor[2]]\n", + "# incomimg_height_norm = (\n", + "# points[i, 2] - height_lowers[coor[0]]) / height_slice_size\n", + "# if incomimg_height_norm > height_norm:\n", + "# bev_map[coor[0], coor[1], coor[2]] = incomimg_height_norm\n", + "# if with_reflectivity:\n", + "# bev_map[-2, coor[1], coor[2]] = points[i, 3]\n", + "# # return voxel_num\n", + "\n", + "# def points_to_bev(points,\n", + "# voxel_size,\n", + "# coors_range,\n", + "# with_reflectivity=False,\n", + "# density_norm_num=16,\n", + "# max_voxels=40000):\n", + "# \"\"\"convert kitti points(N, 4) to a bev map. return [C, H, W] map.\n", + "# this function based on algorithm in points_to_voxel.\n", + "# takes 5ms in a reduced pointcloud with voxel_size=[0.1, 0.1, 0.8]\n", + "\n", + "# Args:\n", + "# points: [N, ndim] float tensor. points[:, :3] contain xyz points and\n", + "# points[:, 3] contain reflectivity.\n", + "# voxel_size: [3] list/tuple or array, float. xyz, indicate voxel size\n", + "# coors_range: [6] list/tuple or array, float. indicate voxel range.\n", + "# format: xyzxyz, minmax\n", + "# with_reflectivity: bool. if True, will add a intensity map to bev map.\n", + "# Returns:\n", + "# bev_map: [num_height_maps + 1(2), H, W] float tensor. \n", + "# `WARNING`: bev_map[-1] is num_points map, NOT density map, \n", + "# because calculate density map need more time in cpu rather than gpu. \n", + "# if with_reflectivity is True, bev_map[-2] is intensity map. \n", + "# \"\"\"\n", + "# if not isinstance(voxel_size, np.ndarray):\n", + "# voxel_size = np.array(voxel_size, dtype=points.dtype)\n", + "# if not isinstance(coors_range, np.ndarray):\n", + "# coors_range = np.array(coors_range, dtype=points.dtype)\n", + "# voxelmap_shape = (coors_range[3:] - coors_range[:3]) / voxel_size\n", + "# voxelmap_shape = tuple(np.round(voxelmap_shape).astype(np.int32).tolist())\n", + "# voxelmap_shape = voxelmap_shape[::-1] # DHW format\n", + "# coor_to_voxelidx = -np.ones(shape=voxelmap_shape, dtype=np.int32)\n", + "# # coors_2d = np.zeros(shape=(max_voxels, 2), dtype=np.int32)\n", + "# bev_map_shape = list(voxelmap_shape)\n", + "# bev_map_shape[0] += 1\n", + "# height_lowers = np.linspace(\n", + "# coors_range[2], coors_range[5], voxelmap_shape[0], endpoint=False)\n", + "# if with_reflectivity:\n", + "# bev_map_shape[0] += 1\n", + "# bev_map = np.zeros(shape=bev_map_shape, dtype=points.dtype)\n", + "# _points_to_bevmap_reverse_kernel(points, voxel_size, coors_range,\n", + "# coor_to_voxelidx, bev_map, height_lowers,\n", + "# with_reflectivity, max_voxels)\n", + "# # print(voxel_num)\n", + "# return bev_map\n", + "\n", + "# def point_to_vis_bev(points,\n", + "# voxel_size=None,\n", + "# coors_range=None,\n", + "# max_voxels=80000):\n", + "# if voxel_size is None:\n", + "# voxel_size = [0.1, 0.1, 0.1]\n", + "# if coors_range is None:\n", + "# coors_range = [-50, -50, -3, 50, 50, 1]\n", + "# voxel_size[2] = coors_range[5] - coors_range[2]\n", + "# bev_map = points_to_bev(\n", + "# points, voxel_size, coors_range, max_voxels=max_voxels)\n", + "# height_map = (bev_map[0] * 255).astype(np.uint8)\n", + "# return cv2.cvtColor(height_map, cv2.COLOR_GRAY2RGB)\n", + "\n", + "# img = point_to_vis_bev(pts_input[0], voxel_size=[params.x_step/2, params.y_step/2, 0.01],\n", + "# coors_range=[params.x_min, params.y_min+20.32, params.z_min, \n", + "# params.x_max-40.32, params.y_max-20.32, params.z_max] )\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# fig1 = plt.figure(figsize=(10,10))\n", + "# ax = fig1.add_subplot(111)\n", + "# plt.imshow(img)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/CMakeLists.txt b/CMakeLists.txt index c21696f..0dea901 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,5 @@ cmake_minimum_required(VERSION 3.5) project(point_pillars) add_subdirectory(pybind11) -pybind11_add_module(point_pillars SHARED src/point_pillars.cpp) \ No newline at end of file +pybind11_add_module(point_pillars SHARED src/point_pillars.cpp) +pybind11_add_module(point_pillars_v2 SHARED src/point_pillars_v2.cpp) \ No newline at end of file diff --git a/Visualizing_Point_Pillar_Image.ipynb b/Visualizing_Point_Pillar_Image.ipynb new file mode 100644 index 0000000..8835ae9 --- /dev/null +++ b/Visualizing_Point_Pillar_Image.ipynb @@ -0,0 +1,365 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import time\n", + "import numpy as np\n", + "# import tensorflow as tf\n", + "from glob import glob\n", + "import cv2\n", + "\n", + "from config import Parameters\n", + "# from processors import SimpleDataGenerator\n", + "from custom_processors import AnalyseCustomDataGenerator\n", + "from det3d.pc_kitti_dataset import PCKittiAugmentedDataset\n", + "\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# DATA_ROOT = \"/media/data3/tjtanaa/kitti_dataset/KITTI/object/training\" # TODO make main arg\n", + "DATA_ROOT = \"/media/data3/tjtanaa/kitti_dataset/\" # TODO make main arg\n", + "MODEL_ROOT = \"./logs_Car_Pedestrian_Custom_Dataset_single_process\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "params = Parameters()\n", + "\n", + "gt_database_dir = os.path.join(DATA_ROOT, \"gt_database\")\n", + "\n", + "training_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size,root_dir = DATA_ROOT,\n", + " npoints=20000, split='train', classes=list(params.classes_map.keys()), \n", + " random_select=False, gt_database_dir=None, aug_hard_ratio=0.7)\n", + "\n", + "validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, \n", + " npoints=20000, split='val', random_select=False, classes=list(params.classes_map.keys()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# # get the min and max (range) of every axis\n", + "# x_max = np.array(-9999.0)\n", + "# x_min = np.array(9999.0)\n", + "# y_max = np.array(-9999.0)\n", + "# y_min = np.array(9999.0)\n", + "# z_max = np.array(-9999.0)\n", + "# z_min = np.array(9999.0)\n", + "# for i in range(len(training_gen)):\n", + "# [pillars, voxels], \\\n", + "# [occupancy, position, size, angle, heading, classification], \\\n", + "# [pts_input, gt_boxes3d, sample] = training_gen[i]\n", + "# for pts in pts_input:\n", + "# # print(type(pts))\n", + "# # print(pts.shape)\n", + "# x_max = np.max([x_max, np.max(pts[:,0])])\n", + "# x_min = np.min([x_min, np.min(pts[:,0])])\n", + "# y_max = np.max([y_max, np.max(pts[:,1])])\n", + "# y_min = np.min([y_min, np.min(pts[:,1])])\n", + "# z_max = np.max([z_max, np.max(pts[:,2])])\n", + "# z_min = np.min([z_min, np.min(pts[:,2])])\n", + "# print(x_min, x_max)\n", + "# print(y_min, y_max)\n", + "# print(z_min, z_max)\n", + "# 1.349664568901062 78.88325500488281\n", + "# -52.52265167236328 50.9976806640625\n", + "# -1.0496952533721924 3.1484153270721436\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING: /home/tan/tjtanaa/PointPillars/visualization/custom_processor already exists, operation skipped.\n", + "WARNING: /home/tan/tjtanaa/PointPillars/visualization/custom_processor/data already exists, operation skipped.\n", + "WARNING: /home/tan/tjtanaa/PointPillars/visualization/custom_processor/html already exists, operation skipped.\n", + "36\n", + "37\n", + "38\n", + "39\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAk4AAAJBCAYAAACwDzogAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAUyklEQVR4nO3dT8ild3n/8c/1S2wW6iLiLyGdDDXIFBq7iGUIhUBJF21SN6MLy7iQFIS4SEChiyZudCml2p3CiKEpWNMBFbMobdMguDOZCUEzmaYOTWrGGTKIBW0XKYlXF8+dekyeyVwzz59zHny9YDjnfM99n+c7fOdm3pz7Puep7g4AAFf2/9Y9AQCAg0I4AQAMCScAgCHhBAAwJJwAAIaEEwDA0J6FU1XdW1UvVNW5qnpor34OAMB+qb34Hqequi7JvyX5oyTnkzyd5GPd/fyu/zAAgH1y/R697p1JznX3vydJVT2W5FiSbcOpqnwLJwCw337S3f//anbYq1N1h5K8vPL4/DL2f6rq/qo6VVWn9mgOAABv5z+udoe9esepthn7lXeVuvtEkhOJd5wAgINhr95xOp/k8MrjW5Nc2KOfBQCwL/YqnJ5OcqSqbquq30hyPMnje/SzAAD2xZ6cquvu16rqwST/lOS6JI9095m9+FkAAPtlT76O4Kon4RonAGD/ne7uo1ezg28OBwAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBg6Pqd7FxVLyX5eZLXk7zW3Uer6j1J/j7J+5K8lORPu/s/dzZNAID12413nP6wu+/o7qPL44eSPNndR5I8uTwGADjw9uJU3bEkjy73H03y4T34GQAA+26n4dRJ/rmqTlfV/cvYzd19MUmW25u227Gq7q+qU1V1aodzAADYFzu6xinJXd19oapuSvJEVf3rdMfuPpHkRJJUVe9wHgAAe25H7zh194Xl9lKSbyW5M8krVXVLkiy3l3Y6SQCATXDN4VRV76yqd79xP8kfJ3kuyeNJ7ls2uy/Jt3c6SQCATbCTU3U3J/lWVb3xOn/X3f9YVU8nOVlVn0jyoyQf3fk0AQDWr7rXf3mRa5wAgDU4vfJ1SiO+ORwAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgKErhlNVPVJVl6rquZWx91TVE1X1w+X2xpXnHq6qc1X1QlXds1cTBwDYb5N3nP4myb1vGnsoyZPdfSTJk8vjVNXtSY4n+cCyz5eq6rpdmy0AwBpdMZy6+7tJfvqm4WNJHl3uP5rkwyvjj3X3q939YpJzSe7cpbkCAKzVtV7jdHN3X0yS5famZfxQkpdXtju/jL1FVd1fVaeq6tQ1zgEAYF9dv8uvV9uM9XYbdveJJCeSpKq23QYAYJNc6ztOr1TVLUmy3F5axs8nObyy3a1JLlz79AAANse1htPjSe5b7t+X5Nsr48er6oaqui3JkSRP7WyKAACb4Yqn6qrq60nuTvLeqjqf5LNJPp/kZFV9IsmPknw0Sbr7TFWdTPJ8kteSPNDdr+/R3AEA9lV1r//yItc4AQBrcLq7j17NDr45HABgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAoSuGU1U9UlWXquq5lbHPVdWPq+rZ5c+HVp57uKrOVdULVXXPXk0cAGC/Td5x+psk924z/tfdfcfy5x+SpKpuT3I8yQeWfb5UVdft1mQBANbpiuHU3d9N8tPh6x1L8lh3v9rdLyY5l+TOHcwPAGBj7OQapwer6vvLqbwbl7FDSV5e2eb8MvYWVXV/VZ2qqlM7mAMAwL651nD6cpL3J7kjycUkX1jGa5tte7sX6O4T3X20u49e4xwAAPbVNYVTd7/S3a939y+SfCW/PB13PsnhlU1vTXJhZ1MEANgM1xROVXXLysOPJHnjE3ePJzleVTdU1W1JjiR5amdTBADYDNdfaYOq+nqSu5O8t6rOJ/lskrur6o5snYZ7Kcknk6S7z1TVySTPJ3ktyQPd/freTB0AYH9V97aXIO3vJKrWPwkA4NfN6au91to3hwMADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMHTFcKqqw1X1nao6W1VnqupTy/h7quqJqvrhcnvjyj4PV9W5qnqhqu7Zy78AAMB+mbzj9FqSP+/u30ny+0keqKrbkzyU5MnuPpLkyeVxlueOJ/lAknuTfKmqrtuLyQMA7KcrhlN3X+zuZ5b7P09yNsmhJMeSPLps9miSDy/3jyV5rLtf7e4Xk5xLcuduTxwAYL9d1TVOVfW+JB9M8r0kN3f3xWQrrpLctGx2KMnLK7udX8be/Fr3V9Wpqjp19dMGANh/1083rKp3JflGkk9398+q6rKbbjPWbxnoPpHkxPLab3keAGDTjN5xqqp3ZCuavtbd31yGX6mqW5bnb0lyaRk/n+Twyu63JrmwO9MFAFifyafqKslXk5zt7i+uPPV4kvuW+/cl+fbK+PGquqGqbktyJMlTuzdlAID1mJyquyvJx5P8oKqeXcY+k+TzSU5W1SeS/CjJR5Oku89U1ckkz2frE3kPdPfruz5zAIB9Vt3rv7zINU4AwBqc7u6jV7ODbw4HABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ8IJAGBIOAEADAknAIAh4QQAMCScAACGhBMAwJBwAgAYEk4AAEPCCQBgSDgBAAwJJwCAIeEEADAknAAAhoQTAMCQcAIAGBJOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIaEEwDAkHACABgSTgAAQ1cMp6o6XFXfqaqzVXWmqj61jH+uqn5cVc8ufz60ss/DVXWuql6oqnv28i8AALBfrh9s81qSP+/uZ6rq3UlOV9UTy3N/3d1/tbpxVd2e5HiSDyT5zST/UlW/3d2v7+bEAQD22xXfcerui939zHL/50nOJjn0NrscS/JYd7/a3S8mOZfkzt2YLADAOl3VNU5V9b4kH0zyvWXowar6flU9UlU3LmOHkry8stv5vH1oAQAcCONwqqp3JflGkk9398+SfDnJ+5PckeRiki+8sek2u/c2r3d/VZ2qqlNXPWsAgDUYhVNVvSNb0fS17v5mknT3K939enf/IslX8svTceeTHF7Z/dYkF978mt19oruPdvfRnfwFAAD2y+RTdZXkq0nOdvcXV8ZvWdnsI0meW+4/nuR4Vd1QVbclOZLkqd2bMgDAekw+VXdXko8n+UFVPbuMfSbJx6rqjmydhnspySeTpLvPVNXJJM9n6xN5Dww+UfeTJP+93HIwvDfW6yCxXgeL9TpYrNfBsrpev3W1O1f3Wy4/WouqOuW03cFhvQ4W63WwWK+DxXodLDtdL98cDgAwJJwAAIY2KZxOrHsCXBXrdbBYr4PFeh0s1utg2dF6bcw1TgAAm26T3nECANhowgkAYGgjwqmq7q2qF6rqXFU9tO758FZV9VJV/aCqnn3j1+RU1Xuq6omq+uFye+OVXoe9sfy+yEtV9dzK2GXXp6oeXo63F6rqnvXM+tfXZdbrc1X14+UYe7aqPrTynPVak6o6XFXfqaqzVXWmqj61jDu+NtDbrNeuHV9rv8apqq5L8m9J/ihbv67l6SQf6+7n1zoxfkVVvZTkaHf/ZGXsL5P8tLs/vwTvjd39F+ua46+zqvqDJP+V5G+7+3eXsW3Xp6puT/L1bP2apN9M8i9JfnvwRbXsksus1+eS/Fd3/9WbtrVea7T8loxbuvuZqnp3ktNJPpzkz+L42jhvs15/ml06vjbhHac7k5zr7n/v7v9J8liSY2ueEzPHkjy63H80W/84WYPu/m6Sn75p+HLrcyzJY939ane/mORcfvm7JtkHl1mvy7Fea9TdF7v7meX+z5OcTXIojq+N9DbrdTlXvV6bEE6Hkry88vh83v4vyXp0kn+uqtNVdf8ydnN3X0y2/rEmuWlts2M7l1sfx9zmerCqvr+cynvj1I/12hBV9b4kH0zyvTi+Nt6b1ivZpeNrE8KpthnzHQmb567u/r0kf5LkgeVUAweTY24zfTnJ+5PckeRiki8s49ZrA1TVu5J8I8mnu/tnb7fpNmPWa59ts167dnxtQjidT3J45fGtSS6saS5cRndfWG4vJflWtt7KfGU5n/zGeeVL65sh27jc+jjmNlB3v9Ldr3f3L5J8Jb88XWC91qyq3pGt/4S/1t3fXIYdXxtqu/XazeNrE8Lp6SRHquq2qvqNJMeTPL7mObGiqt65XGSXqnpnkj9O8ly21um+ZbP7knx7PTPkMi63Po8nOV5VN1TVbUmOJHlqDfNjxRv/CS8+kq1jLLFea1VVleSrSc529xdXnnJ8baDLrdduHl/X7+6Ur153v1ZVDyb5pyTXJXmku8+seVr8qpuTfGvr32OuT/J33f2PVfV0kpNV9YkkP0ry0TXO8ddaVX09yd1J3ltV55N8Nsnns836dPeZqjqZ5PkkryV5wCd+9tdl1uvuqrojW6cJXkryycR6bYC7knw8yQ+q6tll7DNxfG2qy63Xx3br+Fr71xEAABwUm3CqDgDgQBBOAABDwgkAYEg4AQAMCScAgCHhBAAwJJwAAIb+F/eiKo2m6X3aAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "[pillars, voxels], \\\n", + "[occupancy, position, size, angle, heading, classification], \\\n", + "[pts_input, gt_boxes3d, sample] = validation_gen[9]\n", + "# print(pillars[0].shape)\n", + "# print(voxels[0].shape)\n", + "# print(occupancy[0].shape)\n", + "# # print(occupancy[0,0,:])\n", + "# print(angle[0].shape)\n", + "\n", + "# for i in range(len(occupancy)):\n", + "# print(i, \"l \")\n", + "# print(np.sum(occupancy[i][:,:,0] == 0))\n", + "# print(np.sum(occupancy[i][:,:,0] == 1))\n", + "# print(np.sum(occupancy[i][:,:,0] == -1))\n", + "# print(np.sum(occupancy[i][:,:,1] == 0))\n", + "# print(np.sum(occupancy[i][:,:,1] == 1))\n", + "# print(np.sum(occupancy[i][:,:,1] == -1))\n", + "# print(np.sum(occupancy[i][:,:,2] == 0))\n", + "# print(np.sum(occupancy[i][:,:,2] == 1))\n", + "# print(np.sum(occupancy[i][:,:,2] == -1))\n", + "# print(np.sum(occupancy[i][:,:,3] == 0))\n", + "# print(np.sum(occupancy[i][:,:,3] == 1))\n", + "# print(np.sum(occupancy[i][:,:,3] == -1))\n", + "\n", + "\n", + "fig = plt.figure(figsize=(10,10)) \n", + " \n", + "ax = fig.add_subplot(111) \n", + "ax.imshow(occupancy[0][:,:,3] == 1, cmap = plt.cm.gray, \n", + " interpolation ='nearest') " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING: /home/tan/tjtanaa/PointPillars/visualization/custom_processor already exists, operation skipped.\n", + "WARNING: /home/tan/tjtanaa/PointPillars/visualization/custom_processor/data already exists, operation skipped.\n", + "WARNING: /home/tan/tjtanaa/PointPillars/visualization/custom_processor/html already exists, operation skipped.\n", + "32\n", + "33\n", + "34\n", + "35\n" + ] + } + ], + "source": [ + "\n", + "import numba\n", + "\n", + "[pillars, voxels], \\\n", + "[occupancy, position, size, angle, heading, classification], \\\n", + "[pts_input, gt_boxes3d, sample] = validation_gen[8]\n", + "\n", + "\n", + "@numba.jit(nopython=True)\n", + "def _points_to_bevmap_reverse_kernel(\n", + " points,\n", + " voxel_size,\n", + " coors_range,\n", + " coor_to_voxelidx,\n", + " # coors_2d,\n", + " bev_map,\n", + " height_lowers,\n", + " # density_norm_num=16,\n", + " with_reflectivity=False,\n", + " max_voxels=40000):\n", + " # put all computations to one loop.\n", + " # we shouldn't create large array in main jit code, otherwise\n", + " # reduce performance\n", + " N = points.shape[0]\n", + " ndim = 3\n", + " ndim_minus_1 = ndim - 1\n", + " grid_size = (coors_range[3:] - coors_range[:3]) / voxel_size\n", + " # np.round(grid_size)\n", + " # grid_size = np.round(grid_size).astype(np.int64)(np.int32)\n", + " grid_size = np.round(grid_size, 0, grid_size).astype(np.int32)\n", + " height_slice_size = voxel_size[-1]\n", + " coor = np.zeros(shape=(3, ), dtype=np.int32) # DHW\n", + " voxel_num = 0\n", + " failed = False\n", + " for i in range(N):\n", + " failed = False\n", + " for j in range(ndim):\n", + " c = np.floor((points[i, j] - coors_range[j]) / voxel_size[j])\n", + " if c < 0 or c >= grid_size[j]:\n", + " failed = True\n", + " break\n", + " coor[ndim_minus_1 - j] = c\n", + " if failed:\n", + " continue\n", + " voxelidx = coor_to_voxelidx[coor[0], coor[1], coor[2]]\n", + " if voxelidx == -1:\n", + " voxelidx = voxel_num\n", + " if voxel_num >= max_voxels:\n", + " break\n", + " voxel_num += 1\n", + " coor_to_voxelidx[coor[0], coor[1], coor[2]] = voxelidx\n", + " # coors_2d[voxelidx] = coor[1:]\n", + " bev_map[-1, coor[1], coor[2]] += 1\n", + " height_norm = bev_map[coor[0], coor[1], coor[2]]\n", + " incomimg_height_norm = (\n", + " points[i, 2] - height_lowers[coor[0]]) / height_slice_size\n", + " if incomimg_height_norm > height_norm:\n", + " bev_map[coor[0], coor[1], coor[2]] = incomimg_height_norm\n", + " if with_reflectivity:\n", + " bev_map[-2, coor[1], coor[2]] = points[i, 3]\n", + " # return voxel_num\n", + "\n", + "def points_to_bev(points,\n", + " voxel_size,\n", + " coors_range,\n", + " with_reflectivity=False,\n", + " density_norm_num=16,\n", + " max_voxels=40000):\n", + " \"\"\"convert kitti points(N, 4) to a bev map. return [C, H, W] map.\n", + " this function based on algorithm in points_to_voxel.\n", + " takes 5ms in a reduced pointcloud with voxel_size=[0.1, 0.1, 0.8]\n", + "\n", + " Args:\n", + " points: [N, ndim] float tensor. points[:, :3] contain xyz points and\n", + " points[:, 3] contain reflectivity.\n", + " voxel_size: [3] list/tuple or array, float. xyz, indicate voxel size\n", + " coors_range: [6] list/tuple or array, float. indicate voxel range.\n", + " format: xyzxyz, minmax\n", + " with_reflectivity: bool. if True, will add a intensity map to bev map.\n", + " Returns:\n", + " bev_map: [num_height_maps + 1(2), H, W] float tensor. \n", + " `WARNING`: bev_map[-1] is num_points map, NOT density map, \n", + " because calculate density map need more time in cpu rather than gpu. \n", + " if with_reflectivity is True, bev_map[-2] is intensity map. \n", + " \"\"\"\n", + " if not isinstance(voxel_size, np.ndarray):\n", + " voxel_size = np.array(voxel_size, dtype=points.dtype)\n", + " if not isinstance(coors_range, np.ndarray):\n", + " coors_range = np.array(coors_range, dtype=points.dtype)\n", + " voxelmap_shape = (coors_range[3:] - coors_range[:3]) / voxel_size\n", + " voxelmap_shape = tuple(np.round(voxelmap_shape).astype(np.int32).tolist())\n", + " voxelmap_shape = voxelmap_shape[::-1] # DHW format\n", + " coor_to_voxelidx = -np.ones(shape=voxelmap_shape, dtype=np.int32)\n", + " # coors_2d = np.zeros(shape=(max_voxels, 2), dtype=np.int32)\n", + " bev_map_shape = list(voxelmap_shape)\n", + " bev_map_shape[0] += 1\n", + " height_lowers = np.linspace(\n", + " coors_range[2], coors_range[5], voxelmap_shape[0], endpoint=False)\n", + " if with_reflectivity:\n", + " bev_map_shape[0] += 1\n", + " bev_map = np.zeros(shape=bev_map_shape, dtype=points.dtype)\n", + " _points_to_bevmap_reverse_kernel(points, voxel_size, coors_range,\n", + " coor_to_voxelidx, bev_map, height_lowers,\n", + " with_reflectivity, max_voxels)\n", + " # print(voxel_num)\n", + " return bev_map\n", + "\n", + "def point_to_vis_bev(points,\n", + " voxel_size=None,\n", + " coors_range=None,\n", + " max_voxels=80000):\n", + " if voxel_size is None:\n", + " voxel_size = [0.1, 0.1, 0.1]\n", + " if coors_range is None:\n", + " coors_range = [-50, -50, -3, 50, 50, 1]\n", + " voxel_size[2] = coors_range[5] - coors_range[2]\n", + " bev_map = points_to_bev(\n", + " points, voxel_size, coors_range, max_voxels=max_voxels)\n", + " height_map = (bev_map[0] * 255).astype(np.uint8)\n", + " return cv2.cvtColor(height_map, cv2.COLOR_GRAY2RGB)\n", + "\n", + "img = point_to_vis_bev(pts_input[0], voxel_size=[params.x_step/2, params.y_step/2, 0.01],\n", + " coors_range=[params.x_min, params.y_min+20.32, params.z_min, \n", + " params.x_max-40.32, params.y_max-20.32, params.z_max] )\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig1 = plt.figure(figsize=(10,10))\n", + "ax = fig1.add_subplot(111)\n", + "plt.imshow(img)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/compute_intersection_single_element.ipynb b/compute_intersection_single_element.ipynb new file mode 100644 index 0000000..d158d52 --- /dev/null +++ b/compute_intersection_single_element.ipynb @@ -0,0 +1,389 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import numba\n", + "from numba import jit" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# @jit(nopython=True, parallel=True)\n", + "def compute_intersection_single_element(x1,y1, x2,y2, x3,y3, x4,y4, \n", + " delta_x_tolerance=1e-6, grad_diff_tolerance=1e-6,\n", + " verbose=False):\n", + " \n", + " \n", + " inf_slope_status = np.array([0,0], np.int32)\n", + " parallel_flag = False # for readability\n", + " # check line 1 conditions\n", + " dx_line_1 = x2 - x1\n", + " dy_line_1 = y2 - y1\n", + " if np.abs(dx_line_1) < delta_x_tolerance:\n", + " inf_slope_status[0] = 1\n", + " \n", + " # check line 2 conditions\n", + " dx_line_2 = x4 - x3\n", + " dy_line_2 = y4 - y3\n", + " if np.abs(dx_line_2) < delta_x_tolerance:\n", + " inf_slope_status[1] = 1\n", + " \n", + " # if both have inf slope, they are parallel\n", + " parallel_flag = (np.sum(inf_slope_status) == 2)\n", + " if parallel_flag:\n", + " if verbose:\n", + " print(\"compute_intersection_single_element: Lines are parallel and have inf slope\")\n", + " return (-9999,-9999) # this value is chosen as it will be outside \n", + " # of our detection range (to be filtered later)\n", + " \n", + " # since one is inf and the other is not, they are not parallel\n", + " if (np.sum(inf_slope_status) > 0):\n", + " # there is one line that has inf slope\n", + " if (inf_slope_status[0] == 1): # line one has infinite slope\n", + " x = x1 # pick either one point x1 or x2 as they are the \"equal\"\n", + " y = (x - x3)*(y4-y3) / (x4-x3) + y3\n", + " if verbose:\n", + " print(\"compute_intersection_single_element: Line1 has inf slope\")\n", + " return (x, y)\n", + " \n", + " if (inf_slope_status[1] == 1):\n", + " x = x3 # pick either one point x3 or x4 as they are the \"equal\"\n", + " y = (x - x1)*(y2-y1) / (x2-x1) + y1\n", + " if verbose:\n", + " print(\"compute_intersection_single_element: Line2 has inf slope\")\n", + " return (x, y)\n", + " \n", + " else:\n", + " \n", + " # check both line conditions\n", + " m1 = dy_line_1 / dx_line_1\n", + " m2 = dy_line_2 / dx_line_2\n", + "\n", + " # Note: if the two lines are collinear, they are parallel\n", + " if np.abs(m1 - m2) < grad_diff_tolerance:\n", + " parallel_flag = True\n", + "\n", + "\n", + " if parallel_flag:\n", + " if verbose:\n", + " print(\"compute_intersection_single_element: Lines are parallel\")\n", + " return (-9999,-9999) # this value is chosen as it will be outside \n", + " # of our detection range (to be filtered later)\n", + "\n", + " if verbose:\n", + " print(\"compute_intersection_single_element: No line has inf slope\")\n", + "\n", + " x = (m2 *x4 - m1* x2 - y4 + y2) / (m2 - m1)\n", + " y = m2 * (x - x4) + y4\n", + " return (x, y)\n", + " \n", + " raise ValueError # np.sum(inf_slope_status) is abnormal\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "# from PointPillar\n", + "def compute_intersection_single_element(x1,y1, x2,y2, x3,y3, x4,y4, verbose=True):\n", + " num = (x1*y2 - y1*x2) * (x3-x4) - (x1-x2) * (x3*y4 - y3*x4);\n", + " den = (x1-x2) * (y3-y4) - (y1-y2) * (x3-x4);\n", + " x = num/(den + 1e-6)\n", + " \n", + " num = (x1*y2 - y1*x2) * (y3-y4) - (y1-y2) * (x3*y4 - y3*x4);\n", + " den = (x1-x2) * (y3-y4) - (y1-y2) * (x3-x4);\n", + " y = num/(den + 1e-6)\n", + " return (x,y)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TestCase1i\n", + "(0.0, -8000000.0)\n", + "TestCase2i\n", + "(30000000.0, -30000000.0)\n", + "TestCase3i\n", + "(-0.0, 1.0000000185185187)\n", + "TestCase3ii\n", + "(0.8333333391203704, 1.1666666747685186)\n", + "TestCase4i\n", + "(0.8333333564814821, 1.1666666990740748)\n", + "TestCase4ii\n", + "(0.8333333101851859, 1.1666666342592602)\n", + "TestCase5i\n", + "(5.000000050000001, 2.0000000200000003)\n", + "TestCase5ii\n", + "(4.99999995, 1.9999999800000003)\n", + "TestCase6i\n", + "(5.000000125000003, 2.000000050000001)\n", + "TestCase6ii\n", + "(4.999999875000004, 1.9999999500000014)\n" + ] + } + ], + "source": [ + "# test the compute_intersection_single_element:\n", + "# test cases:\n", + "# 1. two lines have inf slope, both lines are parallel\n", + "# 2. no line has inf slope, both lines are parallel\n", + "# 3. two lines intersect internally, both lines are not parallel, no line has inf slope\n", + "# 4. two lines intersect externally, both lines are not parallel, no line has inf slope\n", + "# 5. two lines intersect internally, both lines are not parallel, 1 line has inf slope\n", + "# 6. two lines intersect externally, both lines are not parallel, 1 line has inf slope\n", + "# 7. two lines intersect internally, both lines are not parallel, one line has 0 slope\n", + "# 8. two lines intersect externally, both lines are not parallel, one line has 0 slope\n", + "# 9. two lines intersect internally, both lines are not parallel, 1 line has inf slope, 1 line has 0 slope\n", + "# 10. two lines intersect externally, both lines are not parallel, 1 line has inf slope, 1 line has 0 slope\n", + "\n", + "# case 1 (i)\n", + "class TestCase1i():\n", + " line1 = (-2, 1, -2, 3)\n", + " line2 = (-6, 2, -6, 3)\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test1i = TestCase1i()\n", + "\n", + "class TestCase2i():\n", + " line1 = (0, 2, 2, 0)\n", + " line2 = (-3, 0, 0, -3)\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test2i = TestCase2i()\n", + "\n", + "\n", + "class TestCase3i():\n", + " line1 = (-2, -1, 1, 2)\n", + " line2 = (-4, 5, 5, -4)\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test3i = TestCase3i()\n", + "\n", + "\n", + "\n", + "class TestCase3ii():\n", + " line1 = (-10, -1, 5, 2)\n", + " line2 = (-4, 6, 4, -2)\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test3ii = TestCase3ii()\n", + "\n", + "\n", + "\n", + "class TestCase4i():\n", + " line1 = (-10, -1, 5, 2)\n", + " line2 = (-4, 6, -2, 4)\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "# intersection = compute_intersection_single_element(*self.line2, *self.line1, verbose=True)\n", + "# print(intersection)\n", + "\n", + "test4i = TestCase4i()\n", + "\n", + "class TestCase4ii():\n", + " line1 = (-10, -1, 5, 2)\n", + " line2 = (-2, 4, -4, 6) # reverse the order of points\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "# intersection = compute_intersection_single_element(*self.line2, *self.line1, verbose=True)\n", + "# print(intersection)\n", + "\n", + "test4ii = TestCase4ii()\n", + "\n", + "class TestCase5i():\n", + " line1 = (-10, -1, 15, 4)\n", + " line2 = (5, 4, 5, 0) # inf slope\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test5i = TestCase5i()\n", + "\n", + "class TestCase5ii():\n", + " line1 = (5, 4, 5, 0) # inf slope\n", + " line2 = (-10, -1, 15, 4)\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test5ii = TestCase5ii()\n", + "\n", + "class TestCase6i():\n", + " line1 = (-10, -1, 0, 1)\n", + " line2 = (5, 4, 5, 0) # inf slope\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test6i = TestCase6i()\n", + "\n", + "class TestCase6ii():\n", + " line1 = (5, 4, 5, 0) # inf slope\n", + " line2 = (-10, -1, 0, 1)\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test6ii = TestCase6ii()" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TestCase7i\n", + "(1.4999999750000004, 3.9999999333333345)\n", + "TestCase7ii\n", + "(1.5000000250000003, 4.000000066666668)\n", + "TestCase8i\n", + "(1.4999999250000036, 3.9999998000000097)\n", + "TestCase9i\n", + "(3.0000000375000004, 4.000000050000001)\n", + "TestCase10i\n", + "(3.0000001500000075, 4.00000020000001)\n" + ] + } + ], + "source": [ + "# Test cases\n", + "# 7. two lines intersect internally, both lines are not parallel, one line has 0 slope\n", + "# 8. two lines intersect externally, both lines are not parallel, one line has 0 slope\n", + "# 9. two lines intersect internally, both lines are not parallel, 1 line has inf slope, 1 line has 0 slope\n", + "# 10. two lines intersect externally, both lines are not parallel, 1 line has inf slope, 1 line has 0 slope\n", + "\n", + "\n", + "class TestCase7i():\n", + " line1 = (-2, -3, 4, 9)\n", + " line2 = (5, 4, 0, 4) # 0 slope\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test7i = TestCase7i()\n", + "\n", + "class TestCase7ii():\n", + " line1 = (5, 4, 0, 4) # 0 slope\n", + " line2 = (-2, -3, 4, 9)\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test7ii = TestCase7ii()\n", + "\n", + "class TestCase8i():\n", + " line1 = (-2, -3, 0, 1)\n", + " line2 = (5, 4, 0, 4) # 0 slope\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test8i = TestCase8i()\n", + "\n", + "\n", + "class TestCase9i():\n", + " line1 = (-2, 4, 6, 4)\n", + " line2 = (3, 6, 3, -4) # 0 slope\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test9i = TestCase9i()\n", + "\n", + "class TestCase10i():\n", + " line1 = (-2, 4, 0, 4)\n", + " line2 = (3, 6, 3, -4) # 0 slope\n", + " \n", + " def __init__(self):\n", + " print(type(self).__name__)\n", + " intersection = compute_intersection_single_element(*self.line1, *self.line2, verbose=True)\n", + " print(intersection)\n", + "\n", + "test10i = TestCase10i()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/config.py b/config.py index ec65ffd..7337600 100644 --- a/config.py +++ b/config.py @@ -10,8 +10,10 @@ class GridParameters: y_max = 40.32 y_step = 0.16 - z_min = -1.0 - z_max = 3.0 + # z_min = -1.0 + # z_max = 3.0 + z_min = -3.0 + z_max = 1.0 # derived parameters Xn_f = float(x_max - x_min) / x_step @@ -19,13 +21,28 @@ class GridParameters: Xn = int(Xn_f) Yn = int(Yn_f) - def __init__(self): - super(GridParameters, self).__init__() + def __init__(self, **kwargs): + super(GridParameters, self).__init__(**kwargs) class DataParameters: - classes = {"Car": 0, + # classes_map = {"Car": 0, + # "Pedestrian": 1, + # "Person_sitting": 1, + # "Cyclist": 2, + # "Truck": 3, + # "Van": 3, + # "Tram": 3, + # "Misc": 3, + # } + + map_classes = { + 0: "Car", + 1: "Pedestrian" + } + + classes_map = {"Car": 0, "Pedestrian": 1, "Person_sitting": 1, "Cyclist": 2, @@ -35,11 +52,24 @@ class DataParameters: "Misc": 3, } - nb_classes = len(np.unique(list(classes.values()))) - assert nb_classes == np.max(np.unique(list(classes.values()))) + 1, 'Starting class indexing at zero.' + nb_classes = len(np.unique(list(classes_map.values()))) + assert nb_classes == np.max(np.unique(list(classes_map.values()))) + 1, 'Starting class indexing at zero.' - def __init__(self): - super(DataParameters, self).__init__() + # classes = {"Car": 0, + # "Pedestrian": 1, + # "Person_sitting": 1, + # "Cyclist": 2, + # "Truck": 3, + # "Van": 3, + # "Tram": 3, + # "Misc": 3, + # } + + # nb_classes = len(np.unique(list(classes.values()))) + # assert nb_classes == np.max(np.unique(list(classes.values()))) + 1, 'Starting class indexing at zero.' + + def __init__(self, **kwargs): + super(DataParameters, self).__init__(**kwargs) class NetworkParameters: @@ -50,7 +80,7 @@ class NetworkParameters: nb_channels = 64 downscaling_factor = 2 - # length, width, height, z-center, orientation + # length (x), width (y), height (z), z-center, orientation anchor_dims = np.array([[3.9, 1.6, 1.56, -1, 0], [3.9, 1.6, 1.56, -1, 1.5708], [0.8, 0.6, 1.73, -0.6, 0], @@ -77,11 +107,11 @@ class NetworkParameters: heading_weight = 0.2 # 0.2 class_weight = 0.5 # 0.2 - def __init__(self): - super(NetworkParameters, self).__init__() + def __init__(self, **kwargs): + super(NetworkParameters, self).__init__(**kwargs) class Parameters(GridParameters, DataParameters, NetworkParameters): - def __init__(self): - super(Parameters, self).__init__() + def __init__(self, **kwargs): + super(Parameters, self).__init__(**kwargs) diff --git a/config_v2.py b/config_v2.py new file mode 100644 index 0000000..b477acf --- /dev/null +++ b/config_v2.py @@ -0,0 +1,118 @@ +import numpy as np + + +class GridParameters: + x_min = 0.0 + x_max = 80.64 + x_step = 0.16 + + y_min = -40.32 + y_max = 40.32 + y_step = 0.16 + + # z_min = -1.0 + # z_max = 3.0 + z_min = -3.0 + z_max = 1.0 + + # derived parameters + Xn_f = float(x_max - x_min) / x_step + Yn_f = float(y_max - y_min) / y_step + Xn = int(Xn_f) + Yn = int(Yn_f) + + def __init__(self, **kwargs): + super(GridParameters, self).__init__(**kwargs) + + +class DataParameters: + + # classes_map = {"Car": 0, + # "Pedestrian": 1, + # "Person_sitting": 1, + # "Cyclist": 2, + # "Truck": 3, + # "Van": 3, + # "Tram": 3, + # "Misc": 3, + # } + + map_classes = { + 0: "Car", + 1: "Pedestrian" + } + + classes_map = {"Car": 0, + "Pedestrian": 1, + "Person_sitting": 1, + # "Cyclist": 2, + # "Truck": 3, + # "Van": 3, + # "Tram": 3, + # "Misc": 3, + } + + nb_classes = len(np.unique(list(classes_map.values()))) + assert nb_classes == np.max(np.unique(list(classes_map.values()))) + 1, 'Starting class indexing at zero.' + + # classes = {"Car": 0, + # "Pedestrian": 1, + # "Person_sitting": 1, + # "Cyclist": 2, + # "Truck": 3, + # "Van": 3, + # "Tram": 3, + # "Misc": 3, + # } + + # nb_classes = len(np.unique(list(classes.values()))) + # assert nb_classes == np.max(np.unique(list(classes.values()))) + 1, 'Starting class indexing at zero.' + + def __init__(self, **kwargs): + super(DataParameters, self).__init__(**kwargs) + + +class NetworkParameters: + + max_points_per_pillar = 100 + max_pillars = 12000 + nb_features = 9 + nb_channels = 64 + downscaling_factor = 2 + + # length (x), width (y), height (z), z-center, orientation + anchor_dims = np.array([[3.9, 1.6, 1.56, -1, 0], + [3.9, 1.6, 1.56, -1, np.pi/2], + [0.8, 0.6, 1.73, -0.6, 0], + [0.8, 0.6, 1.73, -0.6, np.pi/2], + ], dtype=np.float32).tolist() + nb_dims = 3 + + positive_iou_threshold = 0.6 + negative_iou_threshold = 0.3 + # batch_size = 1 + batch_size = 4 + total_training_epochs = 160 + iters_to_decay = 101040. # 15 * 4 * ceil(6733. / 4) --> every 15 epochs on 6733 kitti samples, cf. pillar paper + learning_rate = 2e-4 + decay_rate = 1e-8 + L1 = 0 + L2 = 0 + alpha = 0.25 + gamma = 2.0 + # original pillars paper values + focal_weight = 3.0 # 1.0 + loc_weight = 2.0 # 2.0 + size_weight = 2.0 # 2.0 + angle_weight = 1.0 # 2.0 + heading_weight = 0.2 # 0.2 + class_weight = 0.5 # 0.2 + + def __init__(self, **kwargs): + super(NetworkParameters, self).__init__(**kwargs) + + +class Parameters(GridParameters, DataParameters, NetworkParameters): + + def __init__(self, **kwargs): + super(Parameters, self).__init__(**kwargs) diff --git a/inference_utils.py b/inference_utils.py index 20ba7ac..e80b99d 100644 --- a/inference_utils.py +++ b/inference_utils.py @@ -44,6 +44,7 @@ def rotational_nms(set_boxes, confidences, occ_threshold=0.7, nms_iou_thr=0.5): for boxes, confs in zip(set_boxes, confidences): assert len(boxes) == len(confs) indices = cv.dnn.NMSBoxesRotated(boxes, confs, occ_threshold, nms_iou_thr) + print(indices) indices = indices.reshape(len(indices)).tolist() nms_boxes.append([boxes[i] for i in indices]) return nms_boxes @@ -85,9 +86,79 @@ def generate_bboxes_from_pred(occ, pos, siz, ang, hdg, clf, anchor_dims, occ_thr predicted_boxes.append(BBox(bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, bb_yaw, bb_heading, bb_cls, bb_conf)) + return predicted_boxes + +def limit_period(val, offset=0.5, period=np.pi): + return val - np.floor(val / period + offset) * period + +def inverse_yaw_element(bb_yaw): + + + bb_yaw -= np.pi / 2 + while bb_yaw > np.pi: + print("larger than pi") + bb_yaw -= (np.pi * 2) + while bb_yaw < -np.pi: + print("smaller than -pi") + bb_yaw += (np.pi * 2) + + return bb_yaw + + # if bb_yaw > np.pi /2: + # bb_yaw -= 2 * np.pi + + # bb_yaw += np.pi/2 + # return bb_yaw + +def generate_bboxes_from_pred_and_np_array(occ, pos, siz, ang, hdg, clf, anchor_dims, occ_threshold=0.5): + """ Generating the bounding boxes based on the regression targets """ + + # Get only the boxes where occupancy is greater or equal threshold. + real_boxes = np.where(occ >= occ_threshold) + # Get the indices of the occupancy array + coordinates = list(zip(real_boxes[0], real_boxes[1], real_boxes[2])) + # Assign anchor dimensions as original bounding box coordinates which will eventually be changed + # according to the predicted regression targets + anchor_dims = anchor_dims + real_anchors = np.random.rand(len(coordinates), len(anchor_dims[0])) + + for i, value in enumerate(real_boxes[2]): + real_anchors[i, ...] = anchor_dims[value] + + # Change the anchor boxes based on regression targets, this is the inverse of the operations given in + # createPillarTargets function (src/PointPillars.cpp) + predicted_boxes = [] + predicted_boxes_list = [] + for i, value in enumerate(coordinates): + # print("coordinate ", i) + real_diag = np.sqrt(np.square(real_anchors[i][0]) + np.square(real_anchors[i][1])) + real_x = value[0] * Parameters.x_step * Parameters.downscaling_factor + Parameters.x_min + real_y = value[1] * Parameters.y_step * Parameters.downscaling_factor + Parameters.y_min + bb_x = pos[value][0] * real_diag + real_x + bb_y = pos[value][1] * real_diag + real_y + bb_z = pos[value][2] * real_anchors[i][2] + real_anchors[i][3] + # print(position[value], real_x, real_y, real_diag) + bb_length = np.exp(siz[value][0]) * real_anchors[i][0] + bb_width = np.exp(siz[value][1]) * real_anchors[i][1] + bb_height = np.exp(siz[value][2]) * real_anchors[i][2] + bb_yaw = -np.arcsin(np.clip(ang[value], -1, 1)) + real_anchors[i][4] + # bb_yaw = np.arcsin(np.clip(ang[value], -1, 1)) + real_anchors[i][4] + # bb_yaw = inverse_yaw_element(bb_yaw) + bb_heading = np.round(hdg[value]) + # print(bb_heading, bb_yaw) + bb_cls = np.argmax(clf[value]) + bb_conf = occ[value] + predicted_boxes.append(BBox(bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, + bb_yaw, bb_heading, bb_cls, bb_conf)) + predicted_boxes_list.append([bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, + bb_yaw, bb_heading, bb_cls, bb_conf]) + + return predicted_boxes, np.array(predicted_boxes_list) + + class GroundTruthGenerator(DataProcessor): """ Multiprocessing-safe data generator for training, validation or testing, without fancy augmentation """ diff --git a/inference_utils_v2.py b/inference_utils_v2.py new file mode 100644 index 0000000..685b1ec --- /dev/null +++ b/inference_utils_v2.py @@ -0,0 +1,209 @@ +import numpy as np +import cv2 as cv +from typing import List +from config_v2 import Parameters +from readers import DataReader +from point_pillars_custom_processors_v2 import DataProcessor + + +class BBox(tuple): + """ bounding box tuple that can easily be accessed while being compatible to cv2 rotational rects """ + + def __new__(cls, bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, bb_yaw, bb_heading, bb_cls, bb_conf): + bbx_tuple = ((float(bb_x), float(bb_y)), (float(bb_length), float(bb_width)), float(np.rad2deg(bb_yaw))) + return super(BBox, cls).__new__(cls, tuple(bbx_tuple)) + + def __init__(self, bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, bb_yaw, bb_heading, bb_cls, bb_conf): + self.x = bb_x + self.y = bb_y + self.z = bb_z + self.length = bb_length + self.width = bb_width + self.height = bb_height + self.yaw = bb_yaw + self.heading = bb_heading + self.cls = bb_cls + self.conf = bb_conf + + def __str__(self): + return "BB | Cls: %s, x: %f, y: %f, l: %f, w: %f, yaw: %f" % ( + self.cls, self.x, self.y, self.length, self.width, self.yaw) + + +def rotational_nms(set_boxes, confidences, occ_threshold=0.7, nms_iou_thr=0.5): + """ rotational NMS + set_boxes = size NSeqs list of size NDet lists of tuples. each tuple has the form ((pos, pos), (size, size), angle) + confidences = size NSeqs list of lists containing NDet floats, i.e. one per detection + """ + assert len(set_boxes) == len(confidences) and 0 < occ_threshold < 1 and 0 < nms_iou_thr < 1 + if not len(set_boxes): + return [] + assert (isinstance(set_boxes[0][0][0][0], float) or isinstance(set_boxes[0][0][0][0], int)) and \ + (isinstance(confidences[0][0], float) or isinstance(confidences[0][0], int)) + nms_boxes = [] + for boxes, confs in zip(set_boxes, confidences): + assert len(boxes) == len(confs) + indices = cv.dnn.NMSBoxesRotated(boxes, confs, occ_threshold, nms_iou_thr) + print(indices) + indices = indices.reshape(len(indices)).tolist() + nms_boxes.append([boxes[i] for i in indices]) + return nms_boxes + + +def generate_bboxes_from_pred(occ, pos, siz, ang, hdg, clf, anchor_dims, occ_threshold=0.5): + """ Generating the bounding boxes based on the regression targets """ + + # Get only the boxes where occupancy is greater or equal threshold. + real_boxes = np.where(occ >= occ_threshold) + # Get the indices of the occupancy array + coordinates = list(zip(real_boxes[0], real_boxes[1], real_boxes[2])) + # Assign anchor dimensions as original bounding box coordinates which will eventually be changed + # according to the predicted regression targets + anchor_dims = anchor_dims + real_anchors = np.random.rand(len(coordinates), len(anchor_dims[0])) + + for i, value in enumerate(real_boxes[2]): + real_anchors[i, ...] = anchor_dims[value] + + # Change the anchor boxes based on regression targets, this is the inverse of the operations given in + # createPillarTargets function (src/PointPillars.cpp) + predicted_boxes = [] + for i, value in enumerate(coordinates): + real_diag = np.sqrt(np.square(real_anchors[i][0]) + np.square(real_anchors[i][1])) + real_x = value[0] * Parameters.x_step * Parameters.downscaling_factor + Parameters.x_min + real_y = value[1] * Parameters.y_step * Parameters.downscaling_factor + Parameters.y_min + bb_x = pos[value][0] * real_diag + real_x + bb_y = pos[value][1] * real_diag + real_y + bb_z = pos[value][2] * real_anchors[i][2] + real_anchors[i][3] + # print(position[value], real_x, real_y, real_diag) + bb_length = np.exp(siz[value][0]) * real_anchors[i][0] + bb_width = np.exp(siz[value][1]) * real_anchors[i][1] + bb_height = np.exp(siz[value][2]) * real_anchors[i][2] + bb_yaw = ang[value] + real_anchors[i][4] + # bb_yaw = -np.arcsin(np.clip(ang[value], -1, 1)) + real_anchors[i][4] + bb_heading = np.round(hdg[value]) + bb_cls = np.argmax(clf[value]) + bb_conf = occ[value] + predicted_boxes.append(BBox(bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, + bb_yaw, bb_heading, bb_cls, bb_conf)) + + + return predicted_boxes + + + +def limit_period(val, offset=0.5, period=np.pi): + return val - np.floor(val / period + offset) * period + +def inverse_yaw_element(bb_yaw): + + + bb_yaw -= np.pi / 2 + while bb_yaw > np.pi: + print("larger than pi") + bb_yaw -= (np.pi * 2) + while bb_yaw < -np.pi: + print("smaller than -pi") + bb_yaw += (np.pi * 2) + + return bb_yaw + + # if bb_yaw > np.pi /2: + # bb_yaw -= 2 * np.pi + + # bb_yaw += np.pi/2 + # return bb_yaw + +def generate_bboxes_from_pred_and_np_array(occ, pos, siz, ang, hdg, clf, anchor_dims, occ_threshold=0.5): + """ Generating the bounding boxes based on the regression targets """ + + # Get only the boxes where occupancy is greater or equal threshold. + real_boxes = np.where(occ >= occ_threshold) + # print(occ.shape) + # Get the indices of the occupancy array + coordinates = list(zip(real_boxes[0], real_boxes[1], real_boxes[2])) + # Assign anchor dimensions as original bounding box coordinates which will eventually be changed + # according to the predicted regression targets + anchor_dims = anchor_dims + real_anchors = np.random.rand(len(coordinates), len(anchor_dims[0])) + + for i, value in enumerate(real_boxes[2]): + real_anchors[i, ...] = anchor_dims[value] + + # Change the anchor boxes based on regression targets, this is the inverse of the operations given in + # createPillarTargets function (src/PointPillars.cpp) + predicted_boxes = [] + predicted_boxes_list = [] + for i, value in enumerate(coordinates): + # print("coordinate ", i) + real_diag = np.sqrt(np.square(real_anchors[i][0]) + np.square(real_anchors[i][1])) + real_x = value[0] * Parameters.x_step * Parameters.downscaling_factor + Parameters.x_min + real_y = value[1] * Parameters.y_step * Parameters.downscaling_factor + Parameters.y_min + # print("i: ", i, "\tx: ", real_x, "\ty:", real_y) + # print("i: ", i, "\tx: ", value[0], "\ty:", value[1]) + bb_x = pos[value][0] * real_diag + real_x + bb_y = pos[value][1] * real_diag + real_y + bb_z = pos[value][2] * real_anchors[i][2] + real_anchors[i][3] + # print(position[value], real_x, real_y, real_diag) + bb_length = np.exp(siz[value][0]) * real_anchors[i][0] + bb_width = np.exp(siz[value][1]) * real_anchors[i][1] + bb_height = np.exp(siz[value][2]) * real_anchors[i][2] + bb_heading = np.round(hdg[value]) + bb_yaw = ang[value] + real_anchors[i][4] + # if np.int32(bb_heading) == 0: + # bb_yaw -= np.pi + + bb_cls = np.argmax(clf[value]) + bb_conf = occ[value] + predicted_boxes.append(BBox(bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, + bb_yaw, bb_heading, bb_cls, bb_conf)) + predicted_boxes_list.append([bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, + bb_yaw, bb_heading, bb_cls, bb_conf]) + + return predicted_boxes, np.array(predicted_boxes_list) + + +class GroundTruthGenerator(DataProcessor): + """ Multiprocessing-safe data generator for training, validation or testing, without fancy augmentation """ + + def __init__(self, data_reader: DataReader, label_files: List[str], calibration_files: List[str] = None, + network_format: bool = False): + super(GroundTruthGenerator, self).__init__() + self.data_reader = data_reader + self.label_files = label_files + self.calibration_files = calibration_files + self.network_format = network_format + + def __len__(self): + return len(self.label_files) + + def __getitem__(self, file_id: int): + label = self.data_reader.read_label(self.label_files[file_id]) + R, t = self.data_reader.read_calibration(self.calibration_files[file_id]) + label_transformed = self.transform_labels_into_lidar_coordinates(label, R, t) + if self.network_format: + occupancy, position, size, angle, heading, classification = self.make_ground_truth(label_transformed) + occupancy = np.array(occupancy) + position = np.array(position) + size = np.array(size) + angle = np.array(angle) + heading = np.array(heading) + classification = np.array(classification) + return [occupancy, position, size, angle, heading, classification] + return label_transformed + + +def focal_loss_checker(y_true, y_pred, n_occs=-1): + y_true = np.stack(np.where(y_true == 1)) + if n_occs == -1: + n_occs = y_true.shape[1] + occ_thr = np.sort(y_pred.flatten())[-n_occs] + y_pred = np.stack(np.where(y_pred >= occ_thr)) + p = 0 + for gt in range(y_true.shape[1]): + for pr in range(y_pred.shape[1]): + if np.all(y_true[:, gt] == y_pred[:, pr]): + p += 1 + break + print("#matched gt: ", p, " #unmatched gt: ", y_true.shape[1] - p, " #unmatched pred: ", y_pred.shape[1] - p, + " occupancy threshold: ", occ_thr) diff --git a/loss.py b/loss.py index 0cb355a..bd9aa21 100644 --- a/loss.py +++ b/loss.py @@ -61,7 +61,18 @@ def size_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): masked_loss = tf.boolean_mask(loss, mask) return self.size_weight * tf.reduce_mean(masked_loss) + + + def add_sin_difference(self, y_true, y_pred, factor=1.0): + if factor != 1.0: + y_true = factor * y_true + y_pred = factor * y_pred + rad_pred_encoding = tf.math.sin(y_pred) * tf.math.cos(y_true) + rad_tg_encoding = tf.math.cos(y_pred) * tf.math.sin(y_true) + return rad_tg_encoding, rad_pred_encoding + def angle_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): + y_true, y_pred = self.add_sin_difference(y_true, y_pred, self.angle_weight) loss = tf.compat.v1.losses.huber_loss(y_true, y_pred, reduction="none") diff --git a/network.py b/network.py index 4108aec..3d697eb 100644 --- a/network.py +++ b/network.py @@ -1,6 +1,7 @@ import tensorflow as tf import numpy as np -from config import Parameters +# from config import Parameters +from config_v2 import Parameters def build_point_pillar_graph(params: Parameters): diff --git a/point_pillars_custom_prediction.py b/point_pillars_custom_prediction.py new file mode 100644 index 0000000..96cc9e6 --- /dev/null +++ b/point_pillars_custom_prediction.py @@ -0,0 +1,157 @@ +import os +from glob import glob +import numpy as np +import tensorflow as tf +# from processors import SimpleDataGenerator +# from custom_processors import CustomDataGenerator, AnalyseCustomDataGenerator +from point_pillars_custom_processors_v2 import CustomDataGenerator, AnalyseCustomDataGenerator +from inference_utils import generate_bboxes_from_pred, GroundTruthGenerator, focal_loss_checker, rotational_nms, generate_bboxes_from_pred_and_np_array +from readers import KittiDataReader +from config import Parameters +from network import build_point_pillar_graph + + +from point_viz.converter import PointvizConverter + +DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/" +# MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_single_process" +MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_No_Early_Stopping_Input_Coordinate_Analysis_v2" + +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"] = "2" + + +def limit_period(val, offset=0.5, period=np.pi): + return val - np.floor(val / period + offset) * period + +if __name__ == "__main__": + + params = Parameters() + # pillar_net = build_point_pillar_graph(params) + # pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) + # pillar_net.summary() + + # data_reader = KittiDataReader() + + # lidar_files = sorted(glob(os.path.join(DATA_ROOT, "velodyne", "*.bin"))) + # label_files = sorted(glob(os.path.join(DATA_ROOT, "label_2", "*.txt"))) + # calibration_files = sorted(glob(os.path.join(DATA_ROOT, "calib", "*.txt"))) + # assert len(lidar_files) == len(label_files) == len(calibration_files), "Input dirs require equal number of files." + # eval_gen = SimpleDataGenerator(data_reader, params.batch_size, lidar_files, label_files, calibration_files) + + + + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_prediction_multiprocessing" + save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_prediction_input_coordinate_analysis_point_pillar_v2" + # Initialize and setup output directory. + Converter = PointvizConverter(save_viz_path) + + + + + + + gt_database_dir = os.path.join(DATA_ROOT, "gt_database") + + # validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size,root_dir = DATA_ROOT, + # npoints=20000, split='train', classes=list(params.classes_map.keys()), + # random_select=True, gt_database_dir=None, aug_hard_ratio=0.7) + + validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, + npoints=20000, split='val',random_select=False, classes=list(params.classes_map.keys())) + + for batch_idx in range(0,20): + [pillars, voxels], [occupancy_, position_, size_, angle_, heading_, classification_], [pts_input, gt_boxes3d, sample] = validation_gen[batch_idx] + + # occupancy, position, size, angle, heading, classification = pillar_net.predict([pillars, voxels]) + + # angle = limit_period(angle, offset=0.5, period=2*np.pi) + + + # occupancy[:,:,:,:2] = 0 + + # print(occupancy.shape) + # exit() + + set_boxes, confidences = [], [] + loop_range = occupancy_.shape[0] if len(occupancy_.shape) == 4 else 1 + for i in range(loop_range): + # set_box, predicted_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy[i], position[i], size[i], angle[i], + # heading[i], + # classification[i], params.anchor_dims, occ_threshold=0.15) + + gt_boxes3d_ = gt_boxes3d[i] + + print(gt_boxes3d_.shape) + gt_bbox_params = np.stack([gt_boxes3d_[:,3], gt_boxes3d_[:,5], gt_boxes3d_[:,4], + gt_boxes3d_[:,1], gt_boxes3d_[:,2] , + gt_boxes3d_[:,0], + gt_boxes3d_[:,6]], axis=1) + + + gt_bbox_params_list = gt_bbox_params.tolist() + for k in range(len(gt_bbox_params_list)): + gt_bbox_params_list[k].append("Green") + gt_bbox_params_list[k].append("1.0") + + # if len(set_box) > 0: + # predicted_boxes3d_ = predicted_boxes3d + # # bbox_params = validation_gen.convert_predictions_into_point_viz_format(predicted_boxes3d[:,[1, 2, 0, 5, 3, 4, 6 ]]) + + # # print(predicted_boxes3d_.shape) + # # print(predicted_boxes3d_) + # # print(size[i]) + + # bbox_params = np.stack([predicted_boxes3d_[:,3], predicted_boxes3d_[:,5], predicted_boxes3d_[:,4], + # predicted_boxes3d_[:,1], (predicted_boxes3d_[:,2] - predicted_boxes3d_[:,5] / 2) , + # predicted_boxes3d_[:,0], + # predicted_boxes3d_[:,6]], axis=1) + + + # # bbox_params = np.stack([predicted_boxes3d[:,4], predicted_boxes3d[:,5], predicted_boxes3d[:,3], + # # predicted_boxes3d[:,1], -(predicted_boxes3d[:,2] - predicted_boxes3d[:,5] / 2), + # # predicted_boxes3d[:,0], + # # predicted_boxes3d[:,6]], axis=1) + + # bbox_params_list = bbox_params.tolist() + # # bbox_labels_conf = [str(predicted_boxes3d[k,9]) for k in range(predicted_boxes3d.shape[0])] + # for k in range(predicted_boxes3d.shape[0]): + # bbox_params_list[k].append("Magenta") + # bbox_params_list[k].append(str(predicted_boxes3d[k,9]) + params.map_classes[int(predicted_boxes3d[k,8])]) + # gt_bbox_params_list.append(bbox_params_list[k]) + + coor = pts_input[i][:,[1,2,0]] + # coor[:,1] *= -1 + Converter.compile("train_custom_sample_{}".format(batch_idx * params.batch_size+i), coors=coor, intensity=pts_input[i][:,3], + bbox_params=gt_bbox_params_list) + + # set_boxes.append(set_box) + # # set_boxes.append(generate_bboxes_from_pred(occupancy, position, size, angle, heading, + # # classification, params.anchor_dims, occ_threshold=0.1)) + # # confidences.append([float(boxes.conf) for boxes in set_boxes[-1]]) + + # sum_bboxes = 0 + # for h in range(len(set_boxes)): + # sum_bboxes += len(set_boxes[h]) + + # print('Batch ', str(batch_idx) ,': Box predictions with occupancy > occ_thr: ', sum_bboxes) + # print('Scene 1: Box predictions with occupancy > occ_thr: ', len(set_boxes[0])) + # exit() + # print(set_boxes[-1]) + + # # NMS + # nms_boxes = rotational_nms(set_boxes, confidences, occ_threshold=0.7, nms_iou_thr=0.5) + + # print('Scene 1: Boxes after NMS with iou_thr: ', len(nms_boxes[0])) + + # # Do all the further operations on predicted_boxes array, which contains the predicted bounding boxes + # gt_gen = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=False) + # gt_gen0 = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=True) + # for seq_boxes, gt_label, gt0 in zip(nms_boxes, gt_gen, gt_gen0): + # print("---------- New Scenario ---------- ") + # focal_loss_checker(gt0[0], occupancy[0], n_occs=-1) + # print("---------- ------------ ---------- ") + # for gt in gt_label: + # print(gt) + # for pred in seq_boxes: + # print(pred) diff --git a/point_pillars_custom_processors_v2.py b/point_pillars_custom_processors_v2.py new file mode 100644 index 0000000..3c825c4 --- /dev/null +++ b/point_pillars_custom_processors_v2.py @@ -0,0 +1,372 @@ +from typing import List, Any +import numpy as np +import tensorflow as tf + +from tensorflow.python.keras.utils.data_utils import Sequence + +from config_v2 import Parameters +# from point_pillars import createPillars, createPillarsTarget +from point_pillars_v2 import createPillars, createPillarsTarget +# from readers import DataReader, Label3D +from sklearn.utils import shuffle +import sys + +from det3d.pc_kitti_dataset import PCKittiAugmentedDataset + +from point_viz.converter import PointvizConverter + + +def select_best_anchors(arr): + dims = np.indices(arr.shape[1:]) + # arr[..., 0:1] gets the occupancy value from occ in {-1, 0, 1}, i.e. {bad match, neg box, pos box} + ind = (np.argmax(arr[..., 0:1], axis=0),) + tuple(dims) + + return arr[ind] + + +class DataProcessor(Parameters): + + def __init__(self, **kwargs): + super(DataProcessor, self).__init__(**kwargs) + anchor_dims = np.array(self.anchor_dims, dtype=np.float32) + self.anchor_dims = anchor_dims[:, 0:3] + self.anchor_z = anchor_dims[:, 3] + self.anchor_yaw = anchor_dims[:, 4] + # Counts may be used to make statistic about how well the anchor boxes fit the objects + self.pos_cnt, self.neg_cnt = 0, 0 + + def make_point_pillars(self, points: np.ndarray): + + assert points.ndim == 2 + assert points.shape[1] == 4 + assert points.dtype == np.float32 + + pillars, indices = createPillars(points, + self.max_points_per_pillar, + self.max_pillars, + self.x_step, + self.y_step, + self.x_min, + self.x_max, + self.y_min, + self.y_max, + self.z_min, + self.z_max, + False) + + return pillars, indices + + def make_ground_truth(self, gt_boxes_3d: Any, gt_cls_type_list: List[str]): + """ Generate the ground truth label for each pillars + + Args: + gt_boxes_3d (numpy[float]): A list of floats containing [x, y, z, h, w, l, ry] + gt_cls_type_list (List[str]): A list of floats containing [cls_type] + + Returns: + [type]: [description] + """ + + # filter labels by classes (cars, pedestrians and Trams) + # Label has 4 properties (Classification (0th index of labels file), + # centroid coordinates, dimensions, yaw) + # labels = list(filter(lambda x: x.classification in self.classes, labels)) + + + + if len(gt_boxes_3d) == 0: + pX, pY = int(self.Xn / self.downscaling_factor), int(self.Yn / self.downscaling_factor) + a = int(self.anchor_dims.shape[0]) + return np.zeros((pX, pY, a), dtype='float32'), np.zeros((pX, pY, a, self.nb_dims), dtype='float32'), \ + np.zeros((pX, pY, a, self.nb_dims), dtype='float32'), np.zeros((pX, pY, a), dtype='float32'), \ + np.zeros((pX, pY, a, self.nb_classes), dtype='float64') + + # For each label file, generate these properties except for the Don't care class + + # target_positions = np.array([label.centroid for label in labels], dtype=np.float32) + # target_dimension = np.array([label.dimension for label in labels], dtype=np.float32) + # target_yaw = np.array([label.yaw for label in labels], dtype=np.float32) + # target_class = np.array([self.classes[label.classification] for label in labels], dtype=np.int32) + + target_positions = gt_boxes_3d[:,:3] + target_dimension = gt_boxes_3d[:,3:6] # don't have to translate again + target_yaw = gt_boxes_3d[:, 6] + # print(type(self.classes)) + # print(type(self.classes_map)) + # # print(gt_cls_type_list[0]) + # print(self.classes_map[gt_cls_type_list[0]]) + + target_class = np.array([self.classes_map[gt_cls_type_list[k]] for k in range(len(gt_cls_type_list))], dtype=np.int32) + + assert np.all(target_yaw >= -np.pi) & np.all(target_yaw <= np.pi) + assert len(target_positions) == len(target_dimension) == len(target_yaw) == len(target_class) + + target, pos, neg = createPillarsTarget(target_positions, + target_dimension, + target_yaw, + target_class, + self.anchor_dims, + self.anchor_z, + self.anchor_yaw, + self.positive_iou_threshold, + self.negative_iou_threshold, + self.nb_classes, + self.downscaling_factor, + self.x_step, + self.y_step, + self.x_min, + self.x_max, + self.y_min, + self.y_max, + self.z_min, + self.z_max, + False) + self.pos_cnt += pos + self.neg_cnt += neg + + # return a merged target view for all objects in the ground truth and get categorical labels + # print("target.shape: ", target.shape) + sel = select_best_anchors(target) + ohe = tf.keras.utils.to_categorical(sel[..., 9], num_classes=self.nb_classes, dtype='float64') + # print("self.shape: ", sel[...,0].shape) + + return sel[..., 0], sel[..., 1:4], sel[..., 4:7], sel[..., 7], sel[..., 8], ohe + + +class CustomDataGenerator(DataProcessor, Sequence, PCKittiAugmentedDataset): + """ Multiprocessing-safe data generator for training, validation or testing, without fancy augmentation """ + + def __init__(self, batch_size: int, root_dir:str, npoints:int =16384, split: str ='train', + classes:List[str] =['Car', 'Pedestrian', 'Person_sitting'], random_select:bool =True, + gt_database_dir=None, aug_hard_ratio:float=0.5, **kwargs): + + super(CustomDataGenerator, self).__init__( + batch_size=batch_size, root_dir=root_dir, + npoints=npoints, split=split, classes=classes, + random_select=random_select, gt_database_dir=gt_database_dir, + aug_hard_ratio=aug_hard_ratio, **kwargs + ) + # self.data_reader = data_reader + self.batch_size = batch_size + self.sample_id_list=self.get_sample_id_list() + self.split = split + + + def get_sample(self, index): + return super().get_sample(index) + + + def __len__(self): + return len(self.sample_id_list) // self.batch_size + + def __getitem__(self, batch_id: int): + file_ids = range(batch_id * self.batch_size, self.batch_size * (batch_id + 1)) + # print("inside getitem") + pillars = [] + voxels = [] + occupancy = [] + position = [] + size = [] + angle = [] + heading = [] + classification = [] + + + + for i in file_ids: + sample = self.get_sample(i) + # For each file, dividing the space into a x-y grid to create pillars + pts_lidar = sample['calib'].rect_to_lidar(sample['pts_rect']) + pts_input = np.concatenate((pts_lidar, sample['pts_features']), axis=1) # (N, C) + + gt_boxes3d_xyz = sample['calib'].rect_to_lidar(sample['gt_boxes3d'][:,:3]) + + gt_boxes3d = np.concatenate(( + gt_boxes3d_xyz[:,0,np.newaxis], # 0 x + gt_boxes3d_xyz[:,1,np.newaxis], # 1 y + gt_boxes3d_xyz[:,2,np.newaxis] + sample['gt_boxes3d'][:,3,np.newaxis] / 2, # 2 z + sample['gt_boxes3d'][:,5,np.newaxis], # 3 l # same as the original label + sample['gt_boxes3d'][:,4,np.newaxis], # 4 w # same as the original label + sample['gt_boxes3d'][:,3,np.newaxis], # 5 h # same as the original label + -sample['gt_boxes3d'][:,6,np.newaxis], # 6 ry + ), axis=1) + + # Voxels are the pillar ids + pillars_, voxels_ = self.make_point_pillars(pts_input) + + pillars.append(pillars_) + voxels.append(voxels_) + + + if self.split=='train' or self.split =='val': + occupancy_, position_, size_, angle_, heading_, classification_ = self.make_ground_truth( + gt_boxes3d, sample['gt_cls_type_list']) + + occupancy.append(occupancy_) + position.append(position_) + size.append(size_) + angle.append(angle_) + heading.append(heading_) + classification.append(classification_) + + pillars = np.concatenate(pillars, axis=0) + voxels = np.concatenate(voxels, axis=0) + + if self.split=='train' or self.split =='val': + occupancy = np.array(occupancy) + position = np.array(position) + size = np.array(size) + angle = np.array(angle) + heading = np.array(heading) + classification = np.array(classification) + return [pillars, voxels], [occupancy, position, size, angle, heading, classification] + else: + return [pillars, voxels] + + def on_epoch_end(self): + # print("inside epoch") + if self.split=='train' or self.split =='val': + self.sample_id_list=shuffle(self.sample_id_list) + + +class AnalyseCustomDataGenerator(CustomDataGenerator): + """ Multiprocessing-safe data generator for training, validation or testing, without fancy augmentation """ + + def __init__(self, batch_size: int, root_dir:str, npoints:int =16384, split: str ='train', + classes:List[str] =['Car', 'Pedestrian', 'Person_sitting'], random_select:bool =True, + gt_database_dir=None, aug_hard_ratio:float=0.5, **kwargs): + + super(AnalyseCustomDataGenerator, self).__init__( + batch_size=batch_size, root_dir=root_dir, + npoints=npoints, split=split, classes=classes, + random_select=random_select, gt_database_dir=gt_database_dir, + aug_hard_ratio=aug_hard_ratio, **kwargs + ) + # self.data_reader = data_reader + self.batch_size = batch_size + self.sample_id_list=self.get_sample_id_list() + self.split = split + + + def get_sample(self, index): + return super().get_sample(index) + + + + # def convert_labels_into_point_viz_format(self, gt_boxes3d): + # gt_boxes3d = gt_boxes3d[:,[3,4,5,0,1,2, 6]] # [xyz,3l4w5h,ry] => [3l,5h,4w] + # gt_boxes3d[:,5] -= (gt_boxes3d[:,2] /2) + # return gt_boxes3d + + def __len__(self): + return len(self.sample_id_list) // self.batch_size + + def __getitem__(self, batch_id: int): + file_ids = range(batch_id * self.batch_size, self.batch_size * (batch_id + 1)) + # print("inside getitem") + pillars = [] + voxels = [] + occupancy = [] + position = [] + size = [] + angle = [] + heading = [] + classification = [] + + pts_input_ = [] + gt_boxes3d_ = [] + sample_ = [] + + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_processor" + # # Initialize and setup output directory. + # Converter = PointvizConverter(save_viz_path) + + for i in file_ids: + # print(i) + # print(type(i)) + sample = self.get_sample(i) + # For each file, dividing the space into a x-y grid to create pillars + pts_lidar = sample['calib'].rect_to_lidar(sample['pts_rect']) + # print(pts_lidar.shape) + + pts_input = np.concatenate((pts_lidar, sample['pts_features']), axis=1) # (N, C) + + gt_boxes3d_xyz = sample['calib'].rect_to_lidar(sample['gt_boxes3d'][:,:3]) + + # print(gt_boxes3d_xyz.shape) + + gt_boxes3d = np.concatenate(( + gt_boxes3d_xyz[:,0,np.newaxis], # 0 x + gt_boxes3d_xyz[:,1,np.newaxis], # 1 y + gt_boxes3d_xyz[:,2,np.newaxis] + sample['gt_boxes3d'][:,3,np.newaxis] / 2, # 2 z + sample['gt_boxes3d'][:,5,np.newaxis], # 3 l # same as the original label + sample['gt_boxes3d'][:,4,np.newaxis], # 4 w # same as the original label + sample['gt_boxes3d'][:,3,np.newaxis], # 5 h # same as the original label + -sample['gt_boxes3d'][:,6,np.newaxis], # 6 ry + ), axis=1) + + # print(type(gt_boxes3d)) + # gt_boxes3d = self.limit_yaw(gt_boxes3d) + + # bbox_params = self.convert_labels_into_point_viz_format(gt_boxes3d) + # print(bbox_params.shape) + # Converter.compile("custom_sample_{}".format(i), coors=pts_input[:,:3], intensity=pts_input[:,3], + # bbox_params=bbox_params) + + + # exit() + + # print(pts_input.shape) + # Voxels are the pillar ids + pillars_, voxels_ = self.make_point_pillars(pts_input) + + print(pillars_.shape, voxels_.shape) + # for i in range(10): + # print(pillars_[0,0,i,:]) + # print(np.sum(pillars_ > 0)) + # exit() + + pillars.append(pillars_) + voxels.append(voxels_) + + # print(sample['gt_cls_type_list']) + + if self.split=='train' or self.split =='val': + occupancy_, position_, size_, angle_, heading_, classification_ = self.make_ground_truth( + gt_boxes3d, sample['gt_cls_type_list']) + + # print(occupancy_.shape, position_.shape, size_.shape, angle_.shape, heading_.shape, classification_.shape) + + + occupancy.append(occupancy_) + position.append(position_) + size.append(size_) + angle.append(angle_) + heading.append(heading_) + classification.append(classification_) + + sample_.append(sample) + gt_boxes3d_.append(gt_boxes3d) + pts_input_.append(pts_input) + + # exit() + + pillars = np.concatenate(pillars, axis=0) + voxels = np.concatenate(voxels, axis=0) + + if self.split=='train' or self.split =='val': + occupancy = np.array(occupancy) + position = np.array(position) + size = np.array(size) + angle = np.array(angle) + heading = np.array(heading) + classification = np.array(classification) + return [pillars, voxels], [occupancy, position, size, angle, heading, classification], [pts_input_, gt_boxes3d_, sample_] + else: + return [pillars, voxels] + + def on_epoch_end(self): + # print("inside epoch") + if self.split=='train' or self.split =='val': + self.sample_id_list=shuffle(self.sample_id_list) + \ No newline at end of file diff --git a/point_pillars_prediction.py b/point_pillars_prediction.py index 58c60a3..75f5406 100644 --- a/point_pillars_prediction.py +++ b/point_pillars_prediction.py @@ -2,13 +2,18 @@ from glob import glob import numpy as np import tensorflow as tf -from processors import SimpleDataGenerator +from processors import SimpleDataGenerator, AnalyseSimpleDataGenerator from inference_utils import generate_bboxes_from_pred, GroundTruthGenerator, focal_loss_checker, rotational_nms +from inference_utils import generate_bboxes_from_pred_and_np_array from readers import KittiDataReader from config import Parameters from network import build_point_pillar_graph +from inference_utils import inverse_yaw_element -DATA_ROOT = "../training" +from point_viz.converter import PointvizConverter + +DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/KITTI/object/training" +# MODEL_ROOT = "./logs_Car_Pedestrian_Original_2" MODEL_ROOT = "./logs" os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" @@ -16,6 +21,11 @@ if __name__ == "__main__": + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_prediction_multiprocessing" + save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/prediction" + # Initialize and setup output directory. + Converter = PointvizConverter(save_viz_path) + params = Parameters() pillar_net = build_point_pillar_graph(params) pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) @@ -23,35 +33,67 @@ data_reader = KittiDataReader() - lidar_files = sorted(glob(os.path.join(DATA_ROOT, "velodyne", "*.bin"))) - label_files = sorted(glob(os.path.join(DATA_ROOT, "label_2", "*.txt"))) - calibration_files = sorted(glob(os.path.join(DATA_ROOT, "calib", "*.txt"))) + lidar_files = sorted(glob(os.path.join(DATA_ROOT, "velodyne", "*.bin")))[:100] + print(len(lidar_files)) + print() + label_files = sorted(glob(os.path.join(DATA_ROOT, "label_2", "*.txt")))[:100] + calibration_files = sorted(glob(os.path.join(DATA_ROOT, "calib", "*.txt")))[:100] assert len(lidar_files) == len(label_files) == len(calibration_files), "Input dirs require equal number of files." - eval_gen = SimpleDataGenerator(data_reader, params.batch_size, lidar_files, label_files, calibration_files) - - occupancy, position, size, angle, heading, classification = pillar_net.predict(eval_gen, - batch_size=params.batch_size) - set_boxes, confidences = [], [] - loop_range = occupancy.shape[0] if len(occupancy.shape) == 4 else 1 - for i in range(loop_range): - set_boxes.append(generate_bboxes_from_pred(occupancy[i], position[i], size[i], angle[i], heading[i], - classification[i], params.anchor_dims, occ_threshold=0.7)) - confidences.append([float(boxes.conf) for boxes in set_boxes[-1]]) - print('Scene 1: Box predictions with occupancy > occ_thr: ', len(set_boxes[0])) + eval_gen = AnalyseSimpleDataGenerator(data_reader, params.batch_size, lidar_files, label_files, calibration_files) + + + for batch_idx in range(0,10): + [pillars, voxels], [occupancy, position, size, angle, heading, classification], [pts_input, gt_boxes3d] \ + = eval_gen[batch_idx] + + # exit() + + occupancy, position, size, angle, heading, classification = pillar_net.predict([pillars, voxels]) + set_boxes, confidences = [], [] + loop_range = occupancy.shape[0] if len(occupancy.shape) == 4 else 1 + for i in range(loop_range): + set_box, prediction = generate_bboxes_from_pred_and_np_array(occupancy[i], position[i], size[i], angle[i], heading[i], + classification[i], params.anchor_dims, occ_threshold=0.3) + + if len(set_box) == 0: + continue + set_boxes.append(set_box) + # set_boxes.append(generate_bboxes_from_pred(occupancy[i], position[i], size[i], angle[i], heading[i], + # classification[i], params.anchor_dims, occ_threshold=0.3)) + confidences.append([float(boxes.conf) for boxes in set_boxes[-1]]) + + # print(set_boxes[0]) + # print(np.array(set_boxes[0]).shape) + # print(prediction.shape) + gt_boxes3d_ = [] + for j in range(len(gt_boxes3d[i])): + bbox = gt_boxes3d[i][j] + gt_boxes3d_.append([bbox.dimension[1], bbox.dimension[2], bbox.dimension[0], + bbox.centroid[1], bbox.centroid[2] + bbox.dimension[2]/2, bbox.centroid[0] + , -bbox.yaw]) + gt_boxes3d_np = np.array(gt_boxes3d_) + print(gt_boxes3d_np.shape) + + Converter.compile("eval_sample_{}".format(batch_idx*params.batch_size + i), coors=pts_input[i][:,[1,2,0]], intensity=pts_input[i][:,3], + bbox_params=gt_boxes3d_np) + # bbox_params=gt_boxes3d_np[:,[3,5,4,1,2,0,6]]) + # print('Scene 1: Box predictions with occupancy > occ_thr: ', len(set_boxes[0])) # NMS - nms_boxes = rotational_nms(set_boxes, confidences, occ_threshold=0.7, nms_iou_thr=0.5) - - print('Scene 1: Boxes after NMS with iou_thr: ', len(nms_boxes[0])) - - # Do all the further operations on predicted_boxes array, which contains the predicted bounding boxes - gt_gen = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=False) - gt_gen0 = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=True) - for seq_boxes, gt_label, gt0 in zip(nms_boxes, gt_gen, gt_gen0): - print("---------- New Scenario ---------- ") - focal_loss_checker(gt0[0], occupancy[0], n_occs=-1) - print("---------- ------------ ---------- ") - for gt in gt_label: - print(gt) - for pred in seq_boxes: - print(pred) + # nms_boxes = rotational_nms(set_boxes, confidences, occ_threshold=0.3, nms_iou_thr=0.5) + + # print('Scene 1: Boxes after NMS with iou_thr: ', len(nms_boxes[0])) + + + + # # Do all the further operations on predicted_boxes array, which contains the predicted bounding boxes + # gt_gen = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=False) + # gt_gen0 = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=True) + # for seq_boxes, gt_label, gt0 in zip(nms_boxes, gt_gen, gt_gen0): + # print("---------- New Scenario ---------- ") + # focal_loss_checker(gt0[0], occupancy[0], n_occs=-1) + # print("---------- ------------ ---------- ") + # for gt in gt_label: + # print(gt) + # for pred in seq_boxes: + # print(pred) diff --git a/point_pillars_training_custom_run_v2.py b/point_pillars_training_custom_run_v2.py new file mode 100644 index 0000000..196003d --- /dev/null +++ b/point_pillars_training_custom_run_v2.py @@ -0,0 +1,87 @@ +import os +import time +import numpy as np +import tensorflow as tf +from glob import glob + +# from config import Parameters +from config_v2 import Parameters +from loss import PointPillarNetworkLoss +from network import build_point_pillar_graph +# from processors import SimpleDataGenerator +# from custom_processors import CustomDataGenerator +from point_pillars_custom_processors_v2 import CustomDataGenerator +from readers import KittiDataReader + +# from point_viz.converter import PointvizConverter + +tf.get_logger().setLevel("ERROR") + +# DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/KITTI/object/training" # TODO make main arg +DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/" # TODO make main arg +# MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_single_process" +MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_No_Early_Stopping_wo_Aug_wo_val" + +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"] = "2" + +if __name__ == "__main__": + + params = Parameters() + + pillar_net = build_point_pillar_graph(params) + # pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) + + loss = PointPillarNetworkLoss(params) + + optimizer = tf.keras.optimizers.Adam(lr=params.learning_rate, decay=params.decay_rate) + + pillar_net.compile(optimizer, loss=loss.losses()) + + # gt_database_dir = os.path.join(DATA_ROOT, "gt_database") + gt_database_dir = None + + training_gen = CustomDataGenerator(batch_size=params.batch_size,root_dir = DATA_ROOT, + npoints=20000, split='train', classes=list(params.classes_map.keys()), + random_select=True, gt_database_dir=None, aug_hard_ratio=0.7) + + # validation_gen = CustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, + # npoints=20000, split='val', classes=list(params.classes_map.keys())) + + + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_processor" + # Initialize and setup output directory. + # Converter = PointvizConverter(save_viz_path) + + + + # bbox_params = self.convert_labels_into_point_viz_format(gt_boxes3d) + # print(bbox_params.shape) + # Converter.compile("custom_sample_{}".format(i), coors=pts_input[:,:3], intensity=pts_input[:,3], + # bbox_params=bbox_params) + + + log_dir = MODEL_ROOT + epoch_to_decay = int( + np.round(params.iters_to_decay / params.batch_size * int(len(training_gen)))) + callbacks = [ + tf.keras.callbacks.TensorBoard(log_dir=log_dir), + tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(log_dir, "model.h5"), + monitor='loss', save_best_only=True), + tf.keras.callbacks.LearningRateScheduler( + lambda epoch, lr: lr * 0.8 if ((epoch % epoch_to_decay == 0) and (epoch != 0)) else lr, verbose=True), + # tf.keras.callbacks.EarlyStopping(patience=20, monitor='val_loss'), + ] + + try: + pillar_net.fit(training_gen, + # validation_data = validation_gen, + steps_per_epoch=len(training_gen), + callbacks=callbacks, + use_multiprocessing=True, + epochs=int(params.total_training_epochs), + workers=6) + except KeyboardInterrupt: + model_str = "interrupted_%s.h5" % time.strftime("%Y%m%d-%H%M%S") + pillar_net.save(os.path.join(log_dir, model_str)) + print("Interrupt. Saving output to %s" % os.path.join(os.getcwd(), log_dir[1:], model_str)) diff --git a/point_pillars_training_run.py b/point_pillars_training_run.py index bc1c517..2dd1c27 100644 --- a/point_pillars_training_run.py +++ b/point_pillars_training_run.py @@ -8,22 +8,23 @@ from loss import PointPillarNetworkLoss from network import build_point_pillar_graph from processors import SimpleDataGenerator +# from custom_processors import CustomDataGenerator from readers import KittiDataReader tf.get_logger().setLevel("ERROR") -DATA_ROOT = "../training" # TODO make main arg -MODEL_ROOT = "./logs" +DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/KITTI/object/training" # TODO make main arg +MODEL_ROOT = "./logs_Car_Pedestrian_Original_2" os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" -os.environ["CUDA_VISIBLE_DEVICES"] = "0" +os.environ["CUDA_VISIBLE_DEVICES"] = "2" if __name__ == "__main__": params = Parameters() pillar_net = build_point_pillar_graph(params) - pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) + # pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) loss = PointPillarNetworkLoss(params) @@ -43,8 +44,12 @@ validation_gen = SimpleDataGenerator(data_reader, params.batch_size, lidar_files[-validation_len:], label_files[-validation_len:], calibration_files[-validation_len:]) log_dir = MODEL_ROOT + # epoch_to_decay = int( + # np.round(params.iters_to_decay / params.batch_size * int(np.ceil(float(len(label_files)) / params.batch_size)))) + epoch_to_decay = int( - np.round(params.iters_to_decay / params.batch_size * int(np.ceil(float(len(label_files)) / params.batch_size)))) + np.round(params.iters_to_decay / params.batch_size * int(len(training_gen)))) + callbacks = [ tf.keras.callbacks.TensorBoard(log_dir=log_dir), tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(log_dir, "model.h5"), @@ -59,9 +64,9 @@ validation_data = validation_gen, steps_per_epoch=len(training_gen), callbacks=callbacks, - use_multiprocessing=True, - epochs=int(params.total_training_epochs), - workers=6) + # use_multiprocessing=True, + epochs=int(params.total_training_epochs)) + # workers=6) except KeyboardInterrupt: model_str = "interrupted_%s.h5" % time.strftime("%Y%m%d-%H%M%S") pillar_net.save(os.path.join(log_dir, model_str)) diff --git a/point_pillars_visualize_input.py b/point_pillars_visualize_input.py new file mode 100644 index 0000000..91ce836 --- /dev/null +++ b/point_pillars_visualize_input.py @@ -0,0 +1,145 @@ +import os +from glob import glob +import numpy as np +import tensorflow as tf +# from processors import SimpleDataGenerator +# from custom_processors import CustomDataGenerator, AnalyseCustomDataGenerator +from point_pillars_custom_processors_v2 import CustomDataGenerator, AnalyseCustomDataGenerator +from inference_utils_v2 import generate_bboxes_from_pred, GroundTruthGenerator, focal_loss_checker +from inference_utils_v2 import rotational_nms, generate_bboxes_from_pred_and_np_array +from readers import KittiDataReader +from config_v2 import Parameters +from network import build_point_pillar_graph + + +from point_viz.converter import PointvizConverter + +DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/" +# MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_single_process" +MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_No_Early_Stopping_Input_Coordinate_Analysis_v2" + +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"] = "2" + + +def limit_period(val, offset=0.5, period=np.pi): + return val - np.floor(val / period + offset) * period + +if __name__ == "__main__": + + params = Parameters() + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_prediction_multiprocessing" + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/input_coordinate_analysis_point_pillar_v2_gt_only" + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/input_coordinate_analysis_point_pillar_v2_labels_only" + save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/input_coordinate_analysis_point_pillar_v2_gt_and_labels" + # Initialize and setup output directory. + Converter = PointvizConverter(save_viz_path) + + gt_database_dir = os.path.join(DATA_ROOT, "gt_database") + + validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, + npoints=16384, split='val',random_select=False, classes=list(params.classes_map.keys())) + + for batch_idx in range(0,20): + [pillars, voxels], [occupancy_, position_, size_, angle_, heading_, classification_], [pts_input, gt_boxes3d, sample] = validation_gen[batch_idx] + + + set_boxes, confidences = [], [] + loop_range = occupancy_.shape[0] if len(occupancy_.shape) == 4 else 1 + for i in range(loop_range): + # set_box, predicted_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy[i], position[i], size[i], angle[i], + # heading[i], + # classification[i], params.anchor_dims, occ_threshold=0.15) + gt_set_box, decoded_gt_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy_[i], position_[i], size_[i], angle_[i], + heading_[i], + classification_[i], params.anchor_dims, occ_threshold=0.5) + + # exit() + gt_boxes3d_ = gt_boxes3d[i] + + print(gt_boxes3d_.shape) + gt_bbox_params = np.stack([gt_boxes3d_[:,3], gt_boxes3d_[:,5], gt_boxes3d_[:,4], + gt_boxes3d_[:,1], gt_boxes3d_[:,2] , + gt_boxes3d_[:,0], + gt_boxes3d_[:,6]], axis=1) + + + gt_bbox_params_list = gt_bbox_params.tolist() + # gt_bbox_params_list = [] + # print(gt_bbox_params_list) + # print(len(gt_bbox_params_list)) + # print(len(gt_bbox_params_list[0])) + + for k in range(len(gt_bbox_params_list)): + msg = "%.5f, %.5f"%(gt_bbox_params_list[k][3],gt_bbox_params_list[k][5]) + gt_bbox_params_list[k].append("Green") + gt_bbox_params_list[k].append(msg) + + if len(gt_set_box) > 0: + decoded_gt_boxes3d_ = decoded_gt_boxes3d + # bbox_params = validation_gen.convert_predictions_into_point_viz_format(predicted_boxes3d[:,[1, 2, 0, 5, 3, 4, 6 ]]) + + print(decoded_gt_boxes3d_.shape) + # print(predicted_boxes3d_) + # print(size[i]) + + bbox_params = np.stack([decoded_gt_boxes3d_[:,3], decoded_gt_boxes3d_[:,5], decoded_gt_boxes3d_[:,4], + decoded_gt_boxes3d_[:,1], decoded_gt_boxes3d_[:,2] , + decoded_gt_boxes3d_[:,0], + decoded_gt_boxes3d_[:,6]], axis=1) + + + # bbox_params = np.stack([predicted_boxes3d[:,4], predicted_boxes3d[:,5], predicted_boxes3d[:,3], + # predicted_boxes3d[:,1], -(predicted_boxes3d[:,2] - predicted_boxes3d[:,5] / 2), + # predicted_boxes3d[:,0], + # predicted_boxes3d[:,6]], axis=1) + + bbox_params_list = bbox_params.tolist() + # bbox_labels_conf = [str(predicted_boxes3d[k,9]) for k in range(predicted_boxes3d.shape[0])] + for k in range(decoded_gt_boxes3d.shape[0]): + msg = "%.5f, %.5f"%(bbox_params_list[k][3],bbox_params_list[k][5]) + # msg = (str(bbox_params_list[k][3:5])) + bbox_params_list[k].append("Magenta") + bbox_params_list[k].append(msg) + # bbox_params_list[k].append(str(decoded_gt_boxes3d[k,9]) + params.map_classes[int(decoded_gt_boxes3d[k,8])]) + gt_bbox_params_list.append(bbox_params_list[k]) + + # print(gt_bbox_params_list) + # print(gt_bbox_params.tolist()) + + coor = pts_input[i][:,[1,2,0]] + # coor[:,1] *= -1 + Converter.compile("val_custom_sample_{}".format(batch_idx * params.batch_size+i), coors=coor, intensity=pts_input[i][:,3], + bbox_params=gt_bbox_params_list) + + # exit() + # set_boxes.append(set_box) + # # set_boxes.append(generate_bboxes_from_pred(occupancy, position, size, angle, heading, + # # classification, params.anchor_dims, occ_threshold=0.1)) + # # confidences.append([float(boxes.conf) for boxes in set_boxes[-1]]) + + # sum_bboxes = 0 + # for h in range(len(set_boxes)): + # sum_bboxes += len(set_boxes[h]) + + # print('Batch ', str(batch_idx) ,': Box predictions with occupancy > occ_thr: ', sum_bboxes) + # print('Scene 1: Box predictions with occupancy > occ_thr: ', len(set_boxes[0])) + # exit() + # print(set_boxes[-1]) + + # # NMS + # nms_boxes = rotational_nms(set_boxes, confidences, occ_threshold=0.7, nms_iou_thr=0.5) + + # print('Scene 1: Boxes after NMS with iou_thr: ', len(nms_boxes[0])) + + # # Do all the further operations on predicted_boxes array, which contains the predicted bounding boxes + # gt_gen = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=False) + # gt_gen0 = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=True) + # for seq_boxes, gt_label, gt0 in zip(nms_boxes, gt_gen, gt_gen0): + # print("---------- New Scenario ---------- ") + # focal_loss_checker(gt0[0], occupancy[0], n_occs=-1) + # print("---------- ------------ ---------- ") + # for gt in gt_label: + # print(gt) + # for pred in seq_boxes: + # print(pred) diff --git a/processors.py b/processors.py index 05b1412..dee118e 100644 --- a/processors.py +++ b/processors.py @@ -11,6 +11,8 @@ import sys +from point_viz.converter import PointvizConverter + def select_best_anchors(arr): dims = np.indices(arr.shape[1:]) # arr[..., 0:1] gets the occupancy value from occ in {-1, 0, 1}, i.e. {bad match, neg box, pos box} @@ -33,16 +35,30 @@ def __init__(self): def transform_labels_into_lidar_coordinates(labels: List[Label3D], R: np.ndarray, t: np.ndarray): transformed = [] for label in labels: - label.centroid = label.centroid @ np.linalg.inv(R).T - t - label.dimension = label.dimension[[2, 1, 0]] + label.centroid = (label.centroid - t) @ np.linalg.inv(R).T + label.dimension = label.dimension[[2, 1, 0]] # h w l => l ,w ,h label.yaw -= np.pi / 2 while label.yaw < -np.pi: + print("smaller than -pi") label.yaw += (np.pi * 2) while label.yaw > np.pi: + print("larger than pi") label.yaw -= (np.pi * 2) transformed.append(label) return labels + + def convert_labels_into_point_viz_format(self, labels: List[Label3D]): + label_list = [] + + for label in labels: + label_ = [label.dimension[2], label.dimension[0], label.dimension[1]] + label_.extend([label.centroid[0], label.centroid[1], label.centroid[2]]) + label_.extend([label.yaw]) + label_list.append(label_) + + return np.array(label_list) + def make_point_pillars(self, points: np.ndarray): assert points.ndim == 2 @@ -69,7 +85,7 @@ def make_ground_truth(self, labels: List[Label3D]): # filter labels by classes (cars, pedestrians and Trams) # Label has 4 properties (Classification (0th index of labels file), # centroid coordinates, dimensions, yaw) - labels = list(filter(lambda x: x.classification in self.classes, labels)) + labels = list(filter(lambda x: x.classification in self.classes_map, labels)) if len(labels) == 0: pX, pY = int(self.Xn / self.downscaling_factor), int(self.Yn / self.downscaling_factor) @@ -82,7 +98,7 @@ def make_ground_truth(self, labels: List[Label3D]): target_positions = np.array([label.centroid for label in labels], dtype=np.float32) target_dimension = np.array([label.dimension for label in labels], dtype=np.float32) target_yaw = np.array([label.yaw for label in labels], dtype=np.float32) - target_class = np.array([self.classes[label.classification] for label in labels], dtype=np.int32) + target_class = np.array([self.classes_map[label.classification] for label in labels], dtype=np.int32) assert np.all(target_yaw >= -np.pi) & np.all(target_yaw <= np.pi) assert len(target_positions) == len(target_dimension) == len(target_yaw) == len(target_class) @@ -151,12 +167,17 @@ def __getitem__(self, batch_id: int): heading = [] classification = [] + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/original_processor" + # # Initialize and setup output directory. + # Converter = PointvizConverter(save_viz_path) + for i in file_ids: lidar = self.data_reader.read_lidar(self.lidar_files[i]) # For each file, dividing the space into a x-y grid to create pillars # Voxels are the pillar ids pillars_, voxels_ = self.make_point_pillars(lidar) + # print(pillars_.shape, voxels_.shape) pillars.append(pillars_) voxels.append(voxels_) @@ -166,11 +187,30 @@ def __getitem__(self, batch_id: int): # Labels are transformed into the lidar coordinate bounding boxes # Label has 7 values, centroid, dimensions and yaw value. label_transformed = self.transform_labels_into_lidar_coordinates(label, R, t) + + + # # Pass data and create html files. + # pts_rect = lidar[:,:3] + # intensity = lidar[:,3] + # # sample_info['pts_rect'][:,1] *= -1 # mirror the y axis + # # pts_rect[:,1] *= -1 + # # coors = sample_info['pts_rect'] + # bbox_params = self.convert_labels_into_point_viz_format(label_transformed) + # print(bbox_params) + # Converter.compile("ori_sample_{}".format(i), coors=pts_rect, intensity=intensity, + # bbox_params=bbox_params) + + + # exit() + # These definitions can be found in point_pillars.cpp file # We are splitting a 10 dim vector that contains this information. occupancy_, position_, size_, angle_, heading_, classification_ = self.make_ground_truth( label_transformed) + # print(occupancy_.shape, position_.shape, size_.shape, angle_.shape, heading_.shape, classification_.shape) + # exit() + occupancy.append(occupancy_) position.append(position_) size.append(size_) @@ -197,3 +237,100 @@ def on_epoch_end(self): if self.label_files is not None: self.lidar_files, self.label_files, self.calibration_files = \ shuffle(self.lidar_files, self.label_files, self.calibration_files) + + + +class AnalyseSimpleDataGenerator(DataProcessor, Sequence): + """ Multiprocessing-safe data generator for training, validation or testing, without fancy augmentation """ + + def __init__(self, data_reader: DataReader, batch_size: int, lidar_files: List[str], label_files: List[str] = None, + calibration_files: List[str] = None): + super(AnalyseSimpleDataGenerator, self).__init__() + self.data_reader = data_reader + self.batch_size = batch_size + self.lidar_files = lidar_files + self.label_files = label_files + self.calibration_files = calibration_files + + assert (calibration_files is None and label_files is None) or \ + (calibration_files is not None and label_files is not None) + + if self.calibration_files is not None: + assert len(self.calibration_files) == len(self.lidar_files) + assert len(self.label_files) == len(self.lidar_files) + + def __len__(self): + return len(self.lidar_files) // self.batch_size + + def __getitem__(self, batch_id: int): + file_ids = np.arange(batch_id * self.batch_size, self.batch_size * (batch_id + 1)) + # print("inside getitem") + pillars = [] + voxels = [] + occupancy = [] + position = [] + size = [] + angle = [] + heading = [] + classification = [] + pts_input = [] + gt_boxes3d = [] + + save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/original_processor" + # Initialize and setup output directory. + Converter = PointvizConverter(save_viz_path) + + for i in file_ids: + lidar = self.data_reader.read_lidar(self.lidar_files[i]) + + + Converter.compile("transform_sample_{}".format(i), coors=lidar[:,:3], intensity=lidar[:,3]) + + # For each file, dividing the space into a x-y grid to create pillars + # Voxels are the pillar ids + pillars_, voxels_ = self.make_point_pillars(lidar) + + # print(pillars_.shape, voxels_.shape) + pillars.append(pillars_) + voxels.append(voxels_) + + if self.label_files is not None: + label = self.data_reader.read_label(self.label_files[i]) + R, t = self.data_reader.read_calibration(self.calibration_files[i]) + # Labels are transformed into the lidar coordinate bounding boxes + # Label has 7 values, centroid, dimensions and yaw value. + label_transformed = self.transform_labels_into_lidar_coordinates(label, R, t) + + # These definitions can be found in point_pillars.cpp file + # We are splitting a 10 dim vector that contains this information. + occupancy_, position_, size_, angle_, heading_, classification_ = self.make_ground_truth( + label_transformed) + + occupancy.append(occupancy_) + position.append(position_) + size.append(size_) + angle.append(angle_) + heading.append(heading_) + classification.append(classification_) + pts_input.append(lidar) + gt_boxes3d.append(label_transformed) + + pillars = np.concatenate(pillars, axis=0) + voxels = np.concatenate(voxels, axis=0) + + if self.label_files is not None: + occupancy = np.array(occupancy) + position = np.array(position) + size = np.array(size) + angle = np.array(angle) + heading = np.array(heading) + classification = np.array(classification) + return [pillars, voxels], [occupancy, position, size, angle, heading, classification], [pts_input, gt_boxes3d] + else: + return [pillars, voxels] + + def on_epoch_end(self): + # print("inside epoch") + if self.label_files is not None: + self.lidar_files, self.label_files, self.calibration_files = \ + shuffle(self.lidar_files, self.label_files, self.calibration_files) \ No newline at end of file diff --git a/readers.py b/readers.py index b8ca8d9..c2a4880 100644 --- a/readers.py +++ b/readers.py @@ -8,7 +8,7 @@ class Label3D: def __init__(self, classification: str, centroid: np.ndarray, dimension: np.ndarray, yaw: float): self.classification = classification self.centroid = centroid - self.dimension = dimension + self.dimension = dimension # hwl self.yaw = yaw def __str__(self): diff --git a/src/point_pillars.cpp b/src/point_pillars.cpp index 4c2127d..6451f6c 100644 --- a/src/point_pillars.cpp +++ b/src/point_pillars.cpp @@ -7,6 +7,7 @@ #include #include #include +// #include namespace py = pybind11; struct IntPairHash { @@ -535,10 +536,10 @@ std::tuple, int, int> createPillarsTarget(const pybind1 negCnt++; if (printTime) { -// std::cout << "\nThere was no sufficiently overlapping anchor anywhere for object " << objectCount << std::endl; -// py::print("There was no sufficiently overlapping anchor anywhere for object " +str(objectCount)); -// std::cout << "Best IOU was " << maxIou << ". Adding the best location regardless of threshold." << std::endl; -// py::print("Best IOU was "+str(maxIou)+" Adding the best location regardless of threshold"); + // std::cout << "\nThere was no sufficiently overlapping anchor anywhere for object " << objectCount << std::endl; + // py::print("There was no sufficiently overlapping anchor anywhere for object " +std::to_string(objectCount)); + // std::cout << "Best IOU was " << maxIou << ". Adding the best location regardless of threshold." << std::endl; + // py::print("Best IOU was "+std::to_string(maxIou)+" Adding the best location regardless of threshold"); } const auto xId = static_cast(std::floor((labelBox.x - xMin) / (xStep * downscalingFactor))); diff --git a/src/point_pillars_v2.cpp b/src/point_pillars_v2.cpp new file mode 100644 index 0000000..85a240e --- /dev/null +++ b/src/point_pillars_v2.cpp @@ -0,0 +1,634 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +// #include +namespace py = pybind11; + +struct IntPairHash { + std::size_t operator()(const std::pair &p) const { + assert(sizeof(std::size_t)>=8); + //Shift first integer over to make room for the second integer. The two are + //then packed side by side. + return (((uint64_t)p.first)<<32) | ((uint64_t)p.second); + } +}; + +struct PillarPoint { + float x; + float y; + float z; + float intensity; + float xc; + float yc; + float zc; + float xp; + float yp; +}; + +pybind11::tuple createPillars(pybind11::array_t points, + int maxPointsPerPillar, + int maxPillars, + float xStep, + float yStep, + float xMin, + float xMax, + float yMin, + float yMax, + float zMin, + float zMax, + bool printTime = false) +{ + std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now(); + + if (points.ndim() != 2 || points.shape()[1] != 4) + { + throw std::runtime_error("numpy array with shape (n, 4) expected (n being the number of points)"); + } + + std::unordered_map, std::vector, IntPairHash> map; + + for (int i = 0; i < points.shape()[0]; ++i) + { + if ((points.at(i, 0) < xMin) || (points.at(i, 0) >= xMax) || \ + (points.at(i, 1) < yMin) || (points.at(i, 1) >= yMax) || \ + (points.at(i, 2) < zMin) || (points.at(i, 2) >= zMax)) + { + continue; + } + + auto xIndex = static_cast(std::floor((points.at(i, 0) - xMin) / xStep)); + auto yIndex = static_cast(std::floor((points.at(i, 1) - yMin) / yStep)); + + PillarPoint p = { + points.at(i, 0), // x + points.at(i, 1), // y + points.at(i, 2), // z + points.at(i, 3), // intensity + 0, // xc + 0, // yc + 0, // zc + 0, // xp + 0, // yp + }; + + map[{xIndex, yIndex}].emplace_back(p); + } + + pybind11::array_t tensor; + pybind11::array_t indices; + + tensor.resize({1, maxPillars, maxPointsPerPillar, 9}); + indices.resize({1, maxPillars, 3}); + + int pillarId = 0; + for (auto& pair: map) + { + if (pillarId >= maxPillars) + { + break; + } + + float xMean = 0; + float yMean = 0; + float zMean = 0; + for (const auto& p: pair.second) + { + xMean += p.x; + yMean += p.y; + zMean += p.z; + } + xMean /= pair.second.size(); + yMean /= pair.second.size(); + zMean /= pair.second.size(); + + for (auto& p: pair.second) + { + p.xc = p.x - xMean; + p.yc = p.y - yMean; + p.zc = p.z - zMean; + } + + auto xIndex = static_cast(std::floor((xMean - xMin) / xStep)); + auto yIndex = static_cast(std::floor((yMean - yMin) / yStep)); + auto zMid = (zMax - zMin) * 0.5f; + indices.mutable_at(0, pillarId, 1) = xIndex; + indices.mutable_at(0, pillarId, 2) = yIndex; + + int pointId = 0; + for (const auto& p: pair.second) + { + if (pointId >= maxPointsPerPillar) + { + break; + } + + // tensor.mutable_at(0, pillarId, pointId, 0) = p.x - (xIndex * xStep + xMin); + // tensor.mutable_at(0, pillarId, pointId, 1) = p.y - (yIndex * yStep + yMin); + // tensor.mutable_at(0, pillarId, pointId, 2) = p.z - zMid; + // tensor.mutable_at(0, pillarId, pointId, 3) = p.intensity; + // tensor.mutable_at(0, pillarId, pointId, 4) = p.xc; + // tensor.mutable_at(0, pillarId, pointId, 5) = p.yc; + // tensor.mutable_at(0, pillarId, pointId, 6) = p.zc; + + tensor.mutable_at(0, pillarId, pointId, 0) = p.x; + tensor.mutable_at(0, pillarId, pointId, 1) = p.y; + tensor.mutable_at(0, pillarId, pointId, 2) = p.z; + tensor.mutable_at(0, pillarId, pointId, 3) = p.intensity; + tensor.mutable_at(0, pillarId, pointId, 4) = p.xc; + tensor.mutable_at(0, pillarId, pointId, 5) = p.yc; + tensor.mutable_at(0, pillarId, pointId, 6) = p.zc; + tensor.mutable_at(0, pillarId, pointId, 7) = p.x - (xIndex * xStep + xMin); + tensor.mutable_at(0, pillarId, pointId, 8) = p.y - (yIndex * yStep + yMin); + + pointId++; + } + + pillarId++; + } + + pybind11::tuple result = pybind11::make_tuple(tensor, indices); + + std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast( t2 - t1 ).count(); + if (printTime) + std::cout << "createPillars took: " << static_cast(duration) / 1e6 << " seconds" << std::endl; + + return result; +} + +struct BoundingBox3D +{ + float x; + float y; + float z; + float length; + float width; + float height; + float yaw; + float classId; +}; + +struct Point2D { + float x; + float y; +}; + +typedef std::vector Polyline2D; + +// Returns x-value of point of intersection of two lines +float xIntersect(float x1, float y1, float x2, float y2, + float x3, float y3, float x4, float y4) +{ + float num = (x1*y2 - y1*x2) * (x3-x4) - (x1-x2) * (x3*y4 - y3*x4); + float den = (x1-x2) * (y3-y4) - (y1-y2) * (x3-x4); + return num/den; +} + +// Returns y-value of point of intersection of two lines +float yIntersect(float x1, float y1, float x2, float y2, + float x3, float y3, float x4, float y4) +{ + float num = (x1*y2 - y1*x2) * (y3-y4) - (y1-y2) * (x3*y4 - y3*x4); + float den = (x1-x2) * (y3-y4) - (y1-y2) * (x3-x4); + return num/den; +} + +// Returns area of polygon using the shoelace method +float polygonArea(const Polyline2D &polygon) +{ + float area = 0.0; + + size_t j = polygon.size()-1; + for (size_t i = 0; i < polygon.size(); i++) + { + area += (polygon[j].x + polygon[i].x) * (polygon[j].y - polygon[i].y); + j = i; // j is previous vertex to i + } + + return std::abs(area / 2.0); // Return absolute value +} + +float rotatedX(float x, float y, float angle) +{ + return x * std::cos(angle) - y * std::sin(angle); +} + +float rotatedY(float x, float y, float angle) +{ + return x * std::sin(angle) + y * std::cos(angle); +} + +// Construct bounding box in 2D, coordinates are returned in clockwise order +Polyline2D boundingBox3DToTopDown(const BoundingBox3D &box1) +{ + Polyline2D box; + box.push_back({rotatedX(-0.5 * box1.width, 0.5 * box1.length, + box1.yaw) + box1.x, + rotatedY(-0.5 * box1.width, 0.5 * box1.length, + box1.yaw) + box1.y}); + + box.push_back({rotatedX(0.5 * box1.width, 0.5 * box1.length, + box1.yaw) + box1.x, + rotatedY(0.5 * box1.width, 0.5 * box1.length, + box1.yaw) + box1.y}); + + box.push_back({rotatedX(0.5 * box1.width, -0.5 * box1.length, + box1.yaw) + box1.x, + rotatedY(0.5 * box1.width, -0.5 * box1.length, + box1.yaw) + box1.y}); + + box.push_back({rotatedX(-0.5 * box1.width, -0.5 * box1.length, + box1.yaw) + box1.x, + rotatedY(-0.5 * box1.width, -0.5 * box1.length, + box1.yaw) + box1.y}); + + return box; +} + +// This functions clips all the edges w.r.t one Clip edge of clipping area +// Returns a clipped polygon... +Polyline2D clip(const Polyline2D &poly_points, + float x1, + float y1, + float x2, + float y2) +{ + Polyline2D new_points; + + for (size_t i = 0; i < poly_points.size(); i++) + { + // (ix,iy),(kx,ky) are the co-ordinate values of the points + // i and k form a line in polygon + size_t k = (i+1) % poly_points.size(); + float ix = poly_points[i].x, iy = poly_points[i].y; + float kx = poly_points[k].x, ky = poly_points[k].y; + + // Calculating position of first point w.r.t. clipper line + float i_pos = (x2-x1) * (iy-y1) - (y2-y1) * (ix-x1); + + // Calculating position of second point w.r.t. clipper line + float k_pos = (x2-x1) * (ky-y1) - (y2-y1) * (kx-x1); + + // Case 1 : When both points are inside + if (i_pos < 0 && k_pos < 0) + { + //Only second point is added + new_points.push_back({kx,ky}); + } + + // Case 2: When only first point is outside + else if (i_pos >= 0 && k_pos < 0) + { + // Point of intersection with edge + // and the second point is added + new_points.push_back({xIntersect(x1, y1, x2, y2, ix, iy, kx, ky), + yIntersect(x1, y1, x2, y2, ix, iy, kx, ky)}); + new_points.push_back({kx,ky}); + + } + + // Case 3: When only second point is outside + else if (i_pos < 0 && k_pos >= 0) + { + //Only point of intersection with edge is added + new_points.push_back({xIntersect(x1, y1, x2, y2, ix, iy, kx, ky), + yIntersect(x1, y1, x2, y2, ix, iy, kx, ky)}); + + } + // Case 4: When both points are outside + else + { + //No points are added + } + } + + return new_points; +} + +// Implements Sutherland–Hodgman algorithm +// Returns a polygon with the intersection between two polygons. +Polyline2D sutherlandHodgmanClip(const Polyline2D &poly_points_vector, + const Polyline2D &clipper_points) +{ + Polyline2D clipped_poly_points_vector = poly_points_vector; + for (size_t i=0; i, int, int> createPillarsTarget(const pybind11::array_t& objectPositions, + const pybind11::array_t& objectDimensions, + const pybind11::array_t& objectYaws, + const pybind11::array_t& objectClassIds, + const pybind11::array_t& anchorDimensions, + const pybind11::array_t& anchorZHeights, + const pybind11::array_t& anchorYaws, + float positiveThreshold, + float negativeThreshold, + int nbClasses, + int downscalingFactor, + float xStep, + float yStep, + float xMin, + float xMax, + float yMin, + float yMax, + float zMin, + float zMax, + bool printTime = false) +{ + + std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now(); + + const auto xSize = static_cast(std::floor((xMax - xMin) / (xStep * downscalingFactor))); +// py::print("xSize", xSize); + const auto ySize = static_cast(std::floor((yMax - yMin) / (yStep * downscalingFactor))); +// py::print("ySize", ySize); + + const int nbAnchors = anchorDimensions.shape()[0]; //4 Number of anchors +// py::print("nbAnchors", nbAnchors); +// Anchor length + + if (nbAnchors <= 0) + { + throw std::runtime_error("Anchor length is zero"); + } + + const int nbObjects = objectDimensions.shape()[0]; //6 Number of labels inside a label.txt file +// BB dimensions from the label file + if (nbObjects <= 0) + { + throw std::runtime_error("Object length is zero"); + } +// py::print("nbObjects", nbObjects); + + // parse numpy arrays +// Preparing the anchor bounding box + std::vector anchorBoxes = {}; + std::vector anchorDiagonals; + for (int i = 0; i < nbAnchors; ++i) + { + BoundingBox3D anchorBox = {}; + anchorBox.x = 0; + anchorBox.y = 0; + anchorBox.length = anchorDimensions.at(i, 0); + anchorBox.width = anchorDimensions.at(i, 1); + anchorBox.height = anchorDimensions.at(i, 2); + anchorBox.z = anchorZHeights.at(i); + anchorBox.yaw = anchorYaws.at(i); + anchorBoxes.emplace_back(anchorBox); // Appends a new anchorBox to the AnchorBoxes container + // Note that anchor box doesn't have a classId as of now. + anchorDiagonals.emplace_back(std::sqrt(std::pow(anchorBox.width, 2) + std::pow(anchorBox.length, 2))); + } + +// Preparing the label bounding box + std::vector labelBoxes = {}; + for (int i = 0; i < nbObjects; ++i) + { + float x = objectPositions.at(i, 0); + float y = objectPositions.at(i, 1); + if (x < xMin | x > xMax | y < yMin | y > yMax) + { + continue; + } + BoundingBox3D labelBox = {}; + labelBox.x = x; + labelBox.y = y; + labelBox.z = objectPositions.at(i, 2); + labelBox.length = objectDimensions.at(i, 0); + labelBox.width = objectDimensions.at(i, 1); + labelBox.height = objectDimensions.at(i, 2); + labelBox.yaw = objectYaws.at(i); + labelBox.classId = objectClassIds.at(i); + labelBoxes.emplace_back(labelBox); + } + + pybind11::array_t tensor; + tensor.resize({nbObjects, xSize, ySize, nbAnchors, 10}); //Tensor of size (6,252,252,4,10) for first file + + pybind11::buffer_info tensor_buffer = tensor.request(); + float *ptr1 = (float *) tensor_buffer.ptr; + // Zero filling the tensor. Every element is presently zero + for (size_t idx = 0; idx < nbObjects * xSize * ySize * nbAnchors * 10; idx++) + { + ptr1[idx] = 0; + } + + int posCnt = 0; + int negCnt = 0; + int objectCount = 0; + if (printTime) + { +// std::cout << "Received " << labelBoxes.size() << " objects" << std::endl; +// py::print("Received "+str(labelBoxes.size())+" objects"); + } + for (const auto& labelBox: labelBoxes) //For every label box which is a 3d bounding box + { + // zone-in on potential spatial area of interest + // Length of (width,length) axis diagonal. + float objectDiameter = std::sqrt(std::pow(labelBox.width, 2) + std::pow(labelBox.length, 2)); + // Offset = Number of grid boxes that can fit on the object diameter + const auto x_offset = static_cast(std::ceil(objectDiameter / (xStep * downscalingFactor))); + const auto y_offset = static_cast(std::ceil(objectDiameter / (yStep * downscalingFactor))); + // Xc = Number of grid boxes that can fit between Xmin (Ymin) and Label's x (y) coordinate + const auto xC = static_cast(std::floor((labelBox.x - xMin) / (xStep * downscalingFactor))); + const auto yC = static_cast(std::floor((labelBox.y - yMin) / (yStep * downscalingFactor))); + // X(Y)Start = Start from Xc (Yc) - Number of boxes in object's diameter. + // For example the object is located at 5 unites and is 2 unites long. Then X(Y)start will begin + // the search from 3 + const auto xStart = clip(xC - x_offset, 0, xSize); + const auto yStart = clip(yC - y_offset, 0, ySize); + // Similarly end the search at 8 units. Because the object cannot extend beyond that. + const auto xEnd = clip(xC + x_offset, 0, xSize); + const auto yEnd = clip(yC + y_offset, 0, ySize); + + float maxIou = 0; + BoundingBox3D bestAnchor = {}; + int bestAnchorId = 0; + int bestAnchorXId = 0; + int bestAnchorYId = 0; + for (int xId = xStart; xId < xEnd; xId++) // Iterate through every box within search diameter + // In our example case, from 3 till 8 + { + const float x = xId * xStep * downscalingFactor + xMin; + // Getting the real world x coordinate + for (int yId = yStart; yId < yEnd; yId++) // Iterate through every box within search diamter in y axis + { + const float y = yId * yStep * downscalingFactor + yMin; + // Get the real world y coordinates + int anchorCount = 0; + for (auto& anchorBox: anchorBoxes) // For every anchor box (4 in our case) + // Note that we are checking every anchor box for every label in the file + { + anchorBox.x = x; // Assign the real world x and y coordinate to the anchor box + anchorBox.y = y; // Note that anchor boxes originally didn't have Xs and Ys. + // This is because we need ot check them along the X-Y grid. + // However, they did have a z value attached to them. + + const float iouOverlap = iou(anchorBox, labelBox); // Get IOU between two 3D boxes. + + if (maxIou < iouOverlap) + { + maxIou = iouOverlap; + bestAnchor = anchorBox; + bestAnchorId = anchorCount; + bestAnchorXId = xId; + bestAnchorYId = yId; + // if(printTime){ + // if(anchorCount == 3){ + // py::print("\nIoU old: " +std::to_string(iouOverlap) + " new: " + std::to_string(maxIou)); + // } + // } + + } + + if (iouOverlap > positiveThreshold) // Accept the Anchor. Add the anchor details to the tensor. + { + // Tensor at CurrentObject Id, xth grid cell, yth grid cell, currentAnchor, 0 + tensor.mutable_at(objectCount, xId, yId, anchorCount, 0) = 1; + + auto diag = anchorDiagonals[anchorCount]; + tensor.mutable_at(objectCount, xId, yId, anchorCount, 1) = (labelBox.x - anchorBox.x) / diag; // delta x,y,z + tensor.mutable_at(objectCount, xId, yId, anchorCount, 2) = (labelBox.y - anchorBox.y) / diag; + tensor.mutable_at(objectCount, xId, yId, anchorCount, 3) = (labelBox.z - anchorBox.z) / anchorBox.height; + + tensor.mutable_at(objectCount, xId, yId, anchorCount, 4) = std::log(labelBox.length / anchorBox.length); // delta l,w,h + tensor.mutable_at(objectCount, xId, yId, anchorCount, 5) = std::log(labelBox.width / anchorBox.width); + tensor.mutable_at(objectCount, xId, yId, anchorCount, 6) = std::log(labelBox.height / anchorBox.height); + + // tensor.mutable_at(objectCount, xId, yId, anchorCount, 7) = std::sin(labelBox.yaw - anchorBox.yaw); //delta yaw + tensor.mutable_at(objectCount, xId, yId, anchorCount, 7) = labelBox.yaw - anchorBox.yaw; //delta yaw + if (labelBox.yaw > 0) // Is yaw > 0 + { + tensor.mutable_at(objectCount, xId, yId, anchorCount, 8) = 1; + } + else + { + tensor.mutable_at(objectCount, xId, yId, anchorCount, 8) = 0; + } + + tensor.mutable_at(objectCount, xId, yId, anchorCount, 9) = labelBox.classId; + + } + else if (iouOverlap < negativeThreshold) + { + tensor.mutable_at(objectCount, xId, yId, anchorCount, 0) = 0; + } + else + { + tensor.mutable_at(objectCount, xId, yId, anchorCount, 0) = -1; + } + + anchorCount++; + } + } + } + + if (maxIou < positiveThreshold) // Comparing maxIOU for that object obtained after checking with every anchor box + // If none of the anchors passed the threshold, then we place the best anchor details for that object. + { + negCnt++; + // if (printTime) + // { + // // std::cout << "\nThere was no sufficiently overlapping anchor anywhere for object " << objectCount << std::endl; + // py::print("\nThere was no sufficiently overlapping anchor anywhere for object " +std::to_string(objectCount)); + // // std::cout << "Best IOU was " << maxIou << ". Adding the best location regardless of threshold." << std::endl; + // py::print("\nBest IOU was "+std::to_string(maxIou)+" Adding the best location regardless of threshold"); + // py::print("\nBest IOU.x was "+std::to_string(bestAnchor.x)+" "); + // py::print("\nBest IOU.y was "+std::to_string(bestAnchor.y)+" "); + // py::print("\nBest IOU.z was "+std::to_string(bestAnchor.z)+" "); + // py::print("\nBest IOU.ry was "+std::to_string(bestAnchor.yaw)+" "); + // } + + const auto xId = bestAnchorXId; + const auto yId = bestAnchorYId; + // const auto xId = static_cast(std::floor((labelBox.x - xMin) / (xStep * downscalingFactor))); + // const auto yId = static_cast(std::floor((labelBox.y - yMin) / (yStep * downscalingFactor))); + const float diag = std::sqrt(std::pow(bestAnchor.width, 2) + std::pow(bestAnchor.length, 2)); + + tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 0) = 1; + + tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 1) = (labelBox.x - bestAnchor.x) / diag; + tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 2) = (labelBox.y - bestAnchor.y) / diag; + tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 3) = (labelBox.z - bestAnchor.z) / bestAnchor.height; + + tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 4) = std::log(labelBox.length / bestAnchor.length); + tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 5) = std::log(labelBox.width / bestAnchor.width); + tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 6) = std::log(labelBox.height / bestAnchor.height); + + // tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 7) = std::sin(labelBox.yaw - bestAnchor.yaw); + tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 7) = labelBox.yaw - bestAnchor.yaw; + if (labelBox.yaw > 0) + { + tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 8) = 1; + } + else + { + tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 8) = 0; + } +// Class id is the classification label (0,1,2,3) + tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 9) = labelBox.classId; + } + else + { + posCnt++; + if (printTime) + { + std::cout << "\nAt least 1 anchor was positively matched for object " << objectCount << std::endl; + std::cout << "Best IOU was " << maxIou << "." << std::endl; + } + } + + objectCount++; + } + + std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast( t2 - t1 ).count(); + if (printTime) + std::cout << "createPillarsTarget took: " << static_cast(duration) / 1e6 << " seconds" << std::endl; + + return std::make_tuple(tensor, posCnt, negCnt); +} + +float cmath_sin(float value){ + return std::sin(value); +} + + +PYBIND11_MODULE(point_pillars_v2, m) +{ + m.def("createPillars", &createPillars, "Runs function to create point pillars input tensors"); + m.def("createPillarsTarget", &createPillarsTarget, "Runs function to create point pillars output ground truth"); + m.def("cmath_sin", &cmath_sin, "Runs function to compute sine"); +} diff --git a/viz.py b/viz.py new file mode 100644 index 0000000..e69de29 From a8aa0b5b00e66e692a74dcdb343e7507bca14181 Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Fri, 20 Nov 2020 23:16:37 +0800 Subject: [PATCH 2/4] Replicate Original Model Architecture from Point Pillar Official Source Code --- README.md | 7 + config_v2.py | 73 ++- config_v2_2.py | 155 ++++++ loss.py | 14 +- loss_v2.py | 87 +++ loss_v2_2.py | 87 +++ network.py | 5 +- network_v2.py | 121 +++++ network_v2_2.py | 121 +++++ point_pillars_check_input.py | 148 +++++ point_pillars_custom_prediction.py | 96 ++-- point_pillars_custom_prediction_v2_2.py | 165 ++++++ point_pillars_custom_processors_v2.py | 38 +- point_pillars_custom_processors_v2_2.py | 380 +++++++++++++ point_pillars_training_custom_run_v2.py | 7 +- point_pillars_training_custom_run_v2_2.py | 87 +++ point_pillars_visualize_input.py | 4 +- src/point_pillars.cpp | 107 +++- src/point_pillars_v2.cpp | 634 ---------------------- 19 files changed, 1590 insertions(+), 746 deletions(-) create mode 100644 config_v2_2.py create mode 100644 loss_v2.py create mode 100644 loss_v2_2.py create mode 100644 network_v2.py create mode 100644 network_v2_2.py create mode 100644 point_pillars_check_input.py create mode 100644 point_pillars_custom_prediction_v2_2.py create mode 100644 point_pillars_custom_processors_v2_2.py create mode 100644 point_pillars_training_custom_run_v2_2.py delete mode 100644 src/point_pillars_v2.cpp diff --git a/README.md b/README.md index 54c2ba0..2e415ef 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,10 @@ +# Forked from (https://github.com/tyagi-iiitv/PointPillars.git) [Under Development] + +The files with the v2_2 extentions are scripts that does not do class regression. +It only learn to classify single class. The confidence of objectness is used to determine +whether there is an object of a particular class or not. + + # About Point Pillars Point Pillars is a very famous Deep Neural Network for 3D Object Detection for LiDAR point clouds. With the application of object detection on the LiDAR devices fitted in the self driving cars, Point Pillars focuse on fast inference ~50fps, which was magnitudes above as compared to other networks for 3D Object detection. In this repo, we are trying to develop point pillars in TensorFlow. [Here's](https://medium.com/@a_tyagi/pointpillars-3d-point-clouds-bounding-box-detection-and-tracking-pointnet-pointnet-lasernet-67e26116de5a?source=friends_link&sk=4a27f55f2cea645af39f72117984fd22) a good first post to familiarize yourself with Point Pillars. diff --git a/config_v2.py b/config_v2.py index b477acf..5ad41a0 100644 --- a/config_v2.py +++ b/config_v2.py @@ -37,21 +37,41 @@ class DataParameters: # "Misc": 3, # } + # for Car and Pedestrian + # map_classes = { + # 0: "Car", + # 1: "Pedestrian" + # } + + # classes_map = {"Car": 0, + # "Pedestrian": 1, + # "Person_sitting": 1, + # # "Cyclist": 2, + # # "Truck": 3, + # # "Van": 3, + # # "Tram": 3, + # # "Misc": 3, + # } + + + # for Car only map_classes = { - 0: "Car", - 1: "Pedestrian" + 0: "Car" } - classes_map = {"Car": 0, - "Pedestrian": 1, - "Person_sitting": 1, - # "Cyclist": 2, - # "Truck": 3, - # "Van": 3, - # "Tram": 3, - # "Misc": 3, + classes_map = {"Car": 0 } + # # for Pedestrian only + # map_classes = { + # 0: "Pedestrian" + # } + + # classes_map = { + # "Pedestrian": 0, + # "Person_sitting": 0, + # } + nb_classes = len(np.unique(list(classes_map.values()))) assert nb_classes == np.max(np.unique(list(classes_map.values()))) + 1, 'Starting class indexing at zero.' @@ -81,19 +101,36 @@ class NetworkParameters: downscaling_factor = 2 # length (x), width (y), height (z), z-center, orientation + # for car and pedestrian + # anchor_dims = np.array([[3.9, 1.6, 1.56, -1, 0], + # [3.9, 1.6, 1.56, -1, np.pi/2], + # [0.8, 0.6, 1.73, -0.6, 0], + # [0.8, 0.6, 1.73, -0.6, np.pi/2], + # ], dtype=np.float32).tolist() + + # for car only anchor_dims = np.array([[3.9, 1.6, 1.56, -1, 0], - [3.9, 1.6, 1.56, -1, np.pi/2], - [0.8, 0.6, 1.73, -0.6, 0], - [0.8, 0.6, 1.73, -0.6, np.pi/2], - ], dtype=np.float32).tolist() - nb_dims = 3 + [3.9, 1.6, 1.56, -1, np.pi/2]], dtype=np.float32).tolist() + # for pedestrian only + # anchor_dims = np.array([[0.8, 0.6, 1.73, -0.6, 0], + # [0.8, 0.6, 1.73, -0.6, np.pi/2], + # ], dtype=np.float32).tolist() + nb_dims = 3 + + # for car positive_iou_threshold = 0.6 negative_iou_threshold = 0.3 + + # for pedestrian + # positive_iou_threshold = 0.5 + # negative_iou_threshold = 0.35 + # batch_size = 1 batch_size = 4 total_training_epochs = 160 - iters_to_decay = 101040. # 15 * 4 * ceil(6733. / 4) --> every 15 epochs on 6733 kitti samples, cf. pillar paper + # iters_to_decay = 101040. # 15 * 4 * ceil(6733. / 4) --> every 15 epochs on 6733 kitti samples, cf. pillar paper + iters_to_decay = 100500 learning_rate = 2e-4 decay_rate = 1e-8 L1 = 0 @@ -101,10 +138,10 @@ class NetworkParameters: alpha = 0.25 gamma = 2.0 # original pillars paper values - focal_weight = 3.0 # 1.0 + focal_weight = 1.0 # 1.0 loc_weight = 2.0 # 2.0 size_weight = 2.0 # 2.0 - angle_weight = 1.0 # 2.0 + angle_weight = 2.0 # 2.0 heading_weight = 0.2 # 0.2 class_weight = 0.5 # 0.2 diff --git a/config_v2_2.py b/config_v2_2.py new file mode 100644 index 0000000..5ad41a0 --- /dev/null +++ b/config_v2_2.py @@ -0,0 +1,155 @@ +import numpy as np + + +class GridParameters: + x_min = 0.0 + x_max = 80.64 + x_step = 0.16 + + y_min = -40.32 + y_max = 40.32 + y_step = 0.16 + + # z_min = -1.0 + # z_max = 3.0 + z_min = -3.0 + z_max = 1.0 + + # derived parameters + Xn_f = float(x_max - x_min) / x_step + Yn_f = float(y_max - y_min) / y_step + Xn = int(Xn_f) + Yn = int(Yn_f) + + def __init__(self, **kwargs): + super(GridParameters, self).__init__(**kwargs) + + +class DataParameters: + + # classes_map = {"Car": 0, + # "Pedestrian": 1, + # "Person_sitting": 1, + # "Cyclist": 2, + # "Truck": 3, + # "Van": 3, + # "Tram": 3, + # "Misc": 3, + # } + + # for Car and Pedestrian + # map_classes = { + # 0: "Car", + # 1: "Pedestrian" + # } + + # classes_map = {"Car": 0, + # "Pedestrian": 1, + # "Person_sitting": 1, + # # "Cyclist": 2, + # # "Truck": 3, + # # "Van": 3, + # # "Tram": 3, + # # "Misc": 3, + # } + + + # for Car only + map_classes = { + 0: "Car" + } + + classes_map = {"Car": 0 + } + + # # for Pedestrian only + # map_classes = { + # 0: "Pedestrian" + # } + + # classes_map = { + # "Pedestrian": 0, + # "Person_sitting": 0, + # } + + nb_classes = len(np.unique(list(classes_map.values()))) + assert nb_classes == np.max(np.unique(list(classes_map.values()))) + 1, 'Starting class indexing at zero.' + + # classes = {"Car": 0, + # "Pedestrian": 1, + # "Person_sitting": 1, + # "Cyclist": 2, + # "Truck": 3, + # "Van": 3, + # "Tram": 3, + # "Misc": 3, + # } + + # nb_classes = len(np.unique(list(classes.values()))) + # assert nb_classes == np.max(np.unique(list(classes.values()))) + 1, 'Starting class indexing at zero.' + + def __init__(self, **kwargs): + super(DataParameters, self).__init__(**kwargs) + + +class NetworkParameters: + + max_points_per_pillar = 100 + max_pillars = 12000 + nb_features = 9 + nb_channels = 64 + downscaling_factor = 2 + + # length (x), width (y), height (z), z-center, orientation + # for car and pedestrian + # anchor_dims = np.array([[3.9, 1.6, 1.56, -1, 0], + # [3.9, 1.6, 1.56, -1, np.pi/2], + # [0.8, 0.6, 1.73, -0.6, 0], + # [0.8, 0.6, 1.73, -0.6, np.pi/2], + # ], dtype=np.float32).tolist() + + # for car only + anchor_dims = np.array([[3.9, 1.6, 1.56, -1, 0], + [3.9, 1.6, 1.56, -1, np.pi/2]], dtype=np.float32).tolist() + + # for pedestrian only + # anchor_dims = np.array([[0.8, 0.6, 1.73, -0.6, 0], + # [0.8, 0.6, 1.73, -0.6, np.pi/2], + # ], dtype=np.float32).tolist() + nb_dims = 3 + + # for car + positive_iou_threshold = 0.6 + negative_iou_threshold = 0.3 + + # for pedestrian + # positive_iou_threshold = 0.5 + # negative_iou_threshold = 0.35 + + # batch_size = 1 + batch_size = 4 + total_training_epochs = 160 + # iters_to_decay = 101040. # 15 * 4 * ceil(6733. / 4) --> every 15 epochs on 6733 kitti samples, cf. pillar paper + iters_to_decay = 100500 + learning_rate = 2e-4 + decay_rate = 1e-8 + L1 = 0 + L2 = 0 + alpha = 0.25 + gamma = 2.0 + # original pillars paper values + focal_weight = 1.0 # 1.0 + loc_weight = 2.0 # 2.0 + size_weight = 2.0 # 2.0 + angle_weight = 2.0 # 2.0 + heading_weight = 0.2 # 0.2 + class_weight = 0.5 # 0.2 + + def __init__(self, **kwargs): + super(NetworkParameters, self).__init__(**kwargs) + + +class Parameters(GridParameters, DataParameters, NetworkParameters): + + def __init__(self, **kwargs): + super(Parameters, self).__init__(**kwargs) diff --git a/loss.py b/loss.py index bd9aa21..092064c 100644 --- a/loss.py +++ b/loss.py @@ -1,7 +1,7 @@ import tensorflow as tf import tensorflow_probability as tfp from tensorflow.python.keras import backend as K -from config import Parameters +from config_v2 import Parameters class PointPillarNetworkLoss: @@ -47,6 +47,7 @@ def loc_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): mask = tf.tile(tf.expand_dims(self.mask, -1), [1, 1, 1, 1, 3]) loss = tf.compat.v1.losses.huber_loss(y_true, y_pred, + delta=3.0, reduction="none") masked_loss = tf.boolean_mask(loss, mask) @@ -56,13 +57,12 @@ def size_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): mask = tf.tile(tf.expand_dims(self.mask, -1), [1, 1, 1, 1, 3]) loss = tf.compat.v1.losses.huber_loss(y_true, y_pred, + delta=3.0, reduction="none") masked_loss = tf.boolean_mask(loss, mask) return self.size_weight * tf.reduce_mean(masked_loss) - - def add_sin_difference(self, y_true, y_pred, factor=1.0): if factor != 1.0: y_true = factor * y_true @@ -75,6 +75,7 @@ def angle_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): y_true, y_pred = self.add_sin_difference(y_true, y_pred, self.angle_weight) loss = tf.compat.v1.losses.huber_loss(y_true, y_pred, + delta=3.0, reduction="none") masked_loss = tf.boolean_mask(loss, self.mask) @@ -86,6 +87,7 @@ def heading_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): return self.heading_weight * tf.reduce_mean(masked_loss) def class_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): - loss = tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred) - masked_loss = tf.boolean_mask(loss, self.mask) - return self.class_weight * tf.reduce_mean(masked_loss) + return 0 # for now since we are only learning one class, the class label is equivalent to the occupancy + # loss = tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred) + # masked_loss = tf.boolean_mask(loss, self.mask) + # return self.class_weight * tf.reduce_mean(masked_loss) diff --git a/loss_v2.py b/loss_v2.py new file mode 100644 index 0000000..6f272dd --- /dev/null +++ b/loss_v2.py @@ -0,0 +1,87 @@ +import tensorflow as tf +import tensorflow_probability as tfp +from tensorflow.python.keras import backend as K +from config_v2 import Parameters + + +class PointPillarNetworkLoss: + + def __init__(self, params: Parameters): + self.alpha = float(params.alpha) + self.gamma = float(params.gamma) + self.focal_weight = float(params.focal_weight) + self.loc_weight = float(params.loc_weight) + self.size_weight = float(params.size_weight) + self.angle_weight = float(params.angle_weight) + self.heading_weight = float(params.heading_weight) + # self.class_weight = float(params.class_weight) + + def losses(self): + return [self.focal_loss, self.loc_loss, self.size_loss, self.angle_loss, self.heading_loss] + + def focal_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): + """ y_true value from occ in {-1, 0, 1}, i.e. {bad match, neg box, pos box} """ + + self.mask = tf.equal(y_true, 1) + + cross_entropy = K.binary_crossentropy(y_true, y_pred) + + p_t = y_true * y_pred + (tf.subtract(1.0, y_true) * tf.subtract(1.0, y_pred)) + + gamma_factor = tf.pow(1.0 - p_t, self.gamma) + + alpha_factor = y_true * self.alpha + (1.0 - y_true) * (1.0 - self.alpha) + + focal_loss = gamma_factor * alpha_factor * cross_entropy + + neg_mask = tf.equal(y_true, 0) + thr = tfp.stats.percentile(tf.boolean_mask(focal_loss, neg_mask), 90.) + hard_neg_mask = tf.greater(focal_loss, thr) + # mask = tf.logical_or(tf.equal(y_true, 0), tf.equal(y_true, 1)) + mask = tf.logical_or(self.mask, tf.logical_and(neg_mask, hard_neg_mask)) + masked_loss = tf.boolean_mask(focal_loss, mask) + + return self.focal_weight * tf.reduce_mean(masked_loss) + + def loc_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): + mask = tf.tile(tf.expand_dims(self.mask, -1), [1, 1, 1, 1, 3]) + loss = tf.compat.v1.losses.huber_loss(y_true, + y_pred, + delta=3.0, + reduction="none") + + masked_loss = tf.boolean_mask(loss, mask) + return self.loc_weight * tf.reduce_mean(masked_loss) + + def size_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): + mask = tf.tile(tf.expand_dims(self.mask, -1), [1, 1, 1, 1, 3]) + loss = tf.compat.v1.losses.huber_loss(y_true, + y_pred, + delta=3.0, + reduction="none") + + masked_loss = tf.boolean_mask(loss, mask) + return self.size_weight * tf.reduce_mean(masked_loss) + + def add_sin_difference(self, y_true, y_pred, factor=1.0): + if factor != 1.0: + y_true = factor * y_true + y_pred = factor * y_pred + rad_pred_encoding = tf.math.sin(y_pred) * tf.math.cos(y_true) + rad_tg_encoding = tf.math.cos(y_pred) * tf.math.sin(y_true) + return rad_tg_encoding, rad_pred_encoding + + def angle_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): + y_true, y_pred = self.add_sin_difference(y_true, y_pred) + loss = tf.compat.v1.losses.huber_loss(y_true, + y_pred, + delta=3.0, + reduction="none") + + masked_loss = tf.boolean_mask(loss, self.mask) + return self.angle_weight * tf.reduce_mean(masked_loss) + + def heading_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): + loss = K.binary_crossentropy(y_true, y_pred) + masked_loss = tf.boolean_mask(loss, self.mask) + return self.heading_weight * tf.reduce_mean(masked_loss) \ No newline at end of file diff --git a/loss_v2_2.py b/loss_v2_2.py new file mode 100644 index 0000000..611a7ce --- /dev/null +++ b/loss_v2_2.py @@ -0,0 +1,87 @@ +import tensorflow as tf +import tensorflow_probability as tfp +from tensorflow.python.keras import backend as K +from config_v2_2 import Parameters + + +class PointPillarNetworkLoss: + + def __init__(self, params: Parameters): + self.alpha = float(params.alpha) + self.gamma = float(params.gamma) + self.focal_weight = float(params.focal_weight) + self.loc_weight = float(params.loc_weight) + self.size_weight = float(params.size_weight) + self.angle_weight = float(params.angle_weight) + self.heading_weight = float(params.heading_weight) + # self.class_weight = float(params.class_weight) + + def losses(self): + return [self.focal_loss, self.loc_loss, self.size_loss, self.angle_loss, self.heading_loss] + + def focal_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): + """ y_true value from occ in {-1, 0, 1}, i.e. {bad match, neg box, pos box} """ + + self.mask = tf.equal(y_true, 1) + + cross_entropy = K.binary_crossentropy(y_true, y_pred) + + p_t = y_true * y_pred + (tf.subtract(1.0, y_true) * tf.subtract(1.0, y_pred)) + + gamma_factor = tf.pow(1.0 - p_t, self.gamma) + + alpha_factor = y_true * self.alpha + (1.0 - y_true) * (1.0 - self.alpha) + + focal_loss = gamma_factor * alpha_factor * cross_entropy + + neg_mask = tf.equal(y_true, 0) + thr = tfp.stats.percentile(tf.boolean_mask(focal_loss, neg_mask), 90.) + hard_neg_mask = tf.greater(focal_loss, thr) + # mask = tf.logical_or(tf.equal(y_true, 0), tf.equal(y_true, 1)) + mask = tf.logical_or(self.mask, tf.logical_and(neg_mask, hard_neg_mask)) + masked_loss = tf.boolean_mask(focal_loss, mask) + + return self.focal_weight * tf.reduce_mean(masked_loss) + + def loc_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): + mask = tf.tile(tf.expand_dims(self.mask, -1), [1, 1, 1, 1, 3]) + loss = tf.compat.v1.losses.huber_loss(y_true, + y_pred, + delta=3.0, + reduction="none") + + masked_loss = tf.boolean_mask(loss, mask) + return self.loc_weight * tf.reduce_mean(masked_loss) + + def size_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): + mask = tf.tile(tf.expand_dims(self.mask, -1), [1, 1, 1, 1, 3]) + loss = tf.compat.v1.losses.huber_loss(y_true, + y_pred, + delta=3.0, + reduction="none") + + masked_loss = tf.boolean_mask(loss, mask) + return self.size_weight * tf.reduce_mean(masked_loss) + + def add_sin_difference(self, y_true, y_pred, factor=1.0): + if factor != 1.0: + y_true = factor * y_true + y_pred = factor * y_pred + rad_pred_encoding = tf.math.sin(y_pred) * tf.math.cos(y_true) + rad_tg_encoding = tf.math.cos(y_pred) * tf.math.sin(y_true) + return rad_tg_encoding, rad_pred_encoding + + def angle_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): + y_true, y_pred = self.add_sin_difference(y_true, y_pred) + loss = tf.compat.v1.losses.huber_loss(y_true, + y_pred, + delta=3.0, + reduction="none") + + masked_loss = tf.boolean_mask(loss, self.mask) + return self.angle_weight * tf.reduce_mean(masked_loss) + + def heading_loss(self, y_true: tf.Tensor, y_pred: tf.Tensor): + loss = K.binary_crossentropy(y_true, y_pred) + masked_loss = tf.boolean_mask(loss, self.mask) + return self.heading_weight * tf.reduce_mean(masked_loss) \ No newline at end of file diff --git a/network.py b/network.py index 3d697eb..1404751 100644 --- a/network.py +++ b/network.py @@ -1,7 +1,6 @@ import tensorflow as tf import numpy as np -# from config import Parameters -from config_v2 import Parameters +from config import Parameters def build_point_pillar_graph(params: Parameters): @@ -116,4 +115,4 @@ def correct_batch_indices(tensor, batch_size): pillar_net = tf.keras.models.Model([input_pillars, input_indices], [occ, loc, size, angle, heading, clf]) # print(pillar_net.summary()) - return pillar_net + return pillar_net \ No newline at end of file diff --git a/network_v2.py b/network_v2.py new file mode 100644 index 0000000..4e1deed --- /dev/null +++ b/network_v2.py @@ -0,0 +1,121 @@ +import tensorflow as tf +import numpy as np +# from config import Parameters +from config_v2 import Parameters + + +def build_point_pillar_graph(params: Parameters): + + # extract required parameters + max_pillars = int(params.max_pillars) + max_points = int(params.max_points_per_pillar) + nb_features = int(params.nb_features) + nb_channels = int(params.nb_channels) + batch_size = int(params.batch_size) + image_size = tuple([params.Xn, params.Yn]) + nb_classes = int(params.nb_classes) + nb_anchors = len(params.anchor_dims) + + if tf.keras.backend.image_data_format() == "channels_first": + raise NotImplementedError + else: + input_shape = (max_pillars, max_points, nb_features) + + input_pillars = tf.keras.layers.Input(input_shape, batch_size=batch_size, name="pillars/input") + input_indices = tf.keras.layers.Input((max_pillars, 3), batch_size=batch_size, name="pillars/indices", + dtype=tf.int32) + + def correct_batch_indices(tensor, batch_size): + array = np.zeros((batch_size, max_pillars, 3), dtype=np.float32) + for i in range(batch_size): + array[i, :, 0] = i + return tensor + tf.constant(array, dtype=tf.int32) + + if batch_size > 1: + corrected_indices = tf.keras.layers.Lambda(lambda t: correct_batch_indices(t, batch_size))(input_indices) + else: + corrected_indices = input_indices + + # pillars + x = tf.keras.layers.Conv2D(nb_channels, (1, 1), activation='relu', use_bias=False, name="pillars/conv2d")(input_pillars) + x = tf.keras.layers.BatchNormalization(name="pillars/batchnorm", fused=True, epsilon=1e-3, momentum=0.01)(x) + x = tf.keras.layers.MaxPool2D((1, max_points), name="pillars/maxpooling2d")(x) + + if tf.keras.backend.image_data_format() == "channels_first": + reshape_shape = (nb_channels, max_pillars) + else: + reshape_shape = (max_pillars, nb_channels) + + x = tf.keras.layers.Reshape(reshape_shape, name="pillars/reshape")(x) + pillars = tf.keras.layers.Lambda(lambda inp: tf.scatter_nd(inp[0], inp[1], + (batch_size,) + image_size + (nb_channels,)), + name="pillars/scatter_nd")([corrected_indices, x]) + + # 2d cnn backbone + + # # Block1(S, 4, C) + # Block1(S, 4, C) + x = pillars + for n in range(4): + S = (2, 2) if n == 0 else (1, 1) + # # S = (1, 1) # pedestrian + x = tf.keras.layers.Conv2D(nb_channels, (3, 3), strides=S, padding="same", activation="relu", + name="cnn/block1/conv2d%i" % n)(x) + x = tf.keras.layers.BatchNormalization(name="cnn/block1/bn%i" % n, fused=True, epsilon=1e-3, momentum=0.01)(x) + x1 = x + + # Block2(2S, 6, 2C) + for n in range(6): + S = (2, 2) if n == 0 else (1, 1) + x = tf.keras.layers.Conv2D(2 * nb_channels, (3, 3), strides=S, padding="same", activation="relu", + name="cnn/block2/conv2d%i" % n)(x) + x = tf.keras.layers.BatchNormalization(name="cnn/block2/bn%i" % n, fused=True, epsilon=1e-3, momentum=0.01)(x) + x2 = x + + # Block3(4S, 6, 4C) + for n in range(6): + S = (2, 2) if n == 0 else (1, 1) + x = tf.keras.layers.Conv2D(4 * nb_channels, (3, 3), strides=S, padding="same", activation="relu", + name="cnn/block3/conv2d%i" % n)(x) + x = tf.keras.layers.BatchNormalization(name="cnn/block3/bn%i" % n, fused=True, epsilon=1e-3, momentum=0.01)(x) + x3 = x + + + # Up1 (S, S, 2C) + up1 = tf.keras.layers.Conv2DTranspose(2 * nb_channels, (1, 1), strides=(1, 1), padding="same", activation="relu", + name="cnn/up1/conv2dt")(x1) + up1 = tf.keras.layers.BatchNormalization(name="cnn/up1/bn", fused=True, epsilon=1e-3, momentum=0.01)(up1) + + # Up2 (2S, S, 2C) + up2 = tf.keras.layers.Conv2DTranspose(2 * nb_channels, (2, 2), strides=(2, 2), padding="same", activation="relu", + name="cnn/up2/conv2dt")(x2) + up2 = tf.keras.layers.BatchNormalization(name="cnn/up2/bn", fused=True, epsilon=1e-3, momentum=0.01)(up2) + + # Up3 (4S, S, 2C) + up3 = tf.keras.layers.Conv2DTranspose(2 * nb_channels, (4, 4), strides=(4, 4), padding="same", activation="relu", + name="cnn/up3/conv2dt")(x3) + up3 = tf.keras.layers.BatchNormalization(name="cnn/up3/bn", fused=True, epsilon=1e-3, momentum=0.01)(up3) + + # Concat + concat = tf.keras.layers.Concatenate(name="cnn/concatenate")([up1, up2, up3]) + + # Detection head + occ = tf.keras.layers.Conv2D(nb_anchors, (1, 1), name="occupancy/conv2d", activation="sigmoid")(concat) + + loc = tf.keras.layers.Conv2D(nb_anchors * 3, (1, 1), name="loc/conv2d", kernel_initializer=tf.keras.initializers.TruncatedNormal(0, 0.001))(concat) + loc = tf.keras.layers.Reshape(tuple(i//2 for i in image_size) + (nb_anchors, 3), name="loc/reshape")(loc) + + size = tf.keras.layers.Conv2D(nb_anchors * 3, (1, 1), name="size/conv2d", kernel_initializer=tf.keras.initializers.TruncatedNormal(0, 0.001))(concat) + size = tf.keras.layers.Reshape(tuple(i//2 for i in image_size) + (nb_anchors, 3), name="size/reshape")(size) + + angle = tf.keras.layers.Conv2D(nb_anchors, (1, 1), name="angle/conv2d")(concat) + + heading = tf.keras.layers.Conv2D(nb_anchors, (1, 1), name="heading/conv2d", activation="sigmoid")(concat) + + # clf = tf.keras.layers.Conv2D(nb_anchors * nb_classes, (1, 1), name="clf/conv2d")(concat) + # clf = tf.keras.layers.Reshape(tuple(i // 2 for i in image_size) + (nb_anchors, nb_classes), name="clf/reshape")(clf) + + pillar_net = tf.keras.models.Model([input_pillars, input_indices], [occ, loc, size, angle, heading]) +# print(pillar_net.summary()) + + return pillar_net diff --git a/network_v2_2.py b/network_v2_2.py new file mode 100644 index 0000000..f30b6aa --- /dev/null +++ b/network_v2_2.py @@ -0,0 +1,121 @@ +import tensorflow as tf +import numpy as np +# from config import Parameters +from config_v2_2 import Parameters + + +def build_point_pillar_graph(params: Parameters): + + # extract required parameters + max_pillars = int(params.max_pillars) + max_points = int(params.max_points_per_pillar) + nb_features = int(params.nb_features) + nb_channels = int(params.nb_channels) + batch_size = int(params.batch_size) + image_size = tuple([params.Xn, params.Yn]) + nb_classes = int(params.nb_classes) + nb_anchors = len(params.anchor_dims) + + if tf.keras.backend.image_data_format() == "channels_first": + raise NotImplementedError + else: + input_shape = (max_pillars, max_points, nb_features) + + input_pillars = tf.keras.layers.Input(input_shape, batch_size=batch_size, name="pillars/input") + input_indices = tf.keras.layers.Input((max_pillars, 3), batch_size=batch_size, name="pillars/indices", + dtype=tf.int32) + + def correct_batch_indices(tensor, batch_size): + array = np.zeros((batch_size, max_pillars, 3), dtype=np.float32) + for i in range(batch_size): + array[i, :, 0] = i + return tensor + tf.constant(array, dtype=tf.int32) + + if batch_size > 1: + corrected_indices = tf.keras.layers.Lambda(lambda t: correct_batch_indices(t, batch_size))(input_indices) + else: + corrected_indices = input_indices + + # pillars + x = tf.keras.layers.Conv2D(nb_channels, (1, 1), activation='relu', use_bias=False, name="pillars/conv2d")(input_pillars) + x = tf.keras.layers.BatchNormalization(name="pillars/batchnorm", fused=True, epsilon=1e-3, momentum=0.01)(x) + x = tf.keras.layers.MaxPool2D((1, max_points), name="pillars/maxpooling2d")(x) + + if tf.keras.backend.image_data_format() == "channels_first": + reshape_shape = (nb_channels, max_pillars) + else: + reshape_shape = (max_pillars, nb_channels) + + x = tf.keras.layers.Reshape(reshape_shape, name="pillars/reshape")(x) + pillars = tf.keras.layers.Lambda(lambda inp: tf.scatter_nd(inp[0], inp[1], + (batch_size,) + image_size + (nb_channels,)), + name="pillars/scatter_nd")([corrected_indices, x]) + + # 2d cnn backbone + + # # Block1(S, 4, C) + # Block1(S, 4, C) + x = pillars + for n in range(4): + S = (2, 2) if n == 0 else (1, 1) + # # S = (1, 1) # pedestrian + x = tf.keras.layers.Conv2D(nb_channels, (3, 3), strides=S, padding="same", activation="relu", + name="cnn/block1/conv2d%i" % n)(x) + x = tf.keras.layers.BatchNormalization(name="cnn/block1/bn%i" % n, fused=True, epsilon=1e-3, momentum=0.01)(x) + x1 = x + + # Block2(2S, 6, 2C) + for n in range(6): + S = (2, 2) if n == 0 else (1, 1) + x = tf.keras.layers.Conv2D(2 * nb_channels, (3, 3), strides=S, padding="same", activation="relu", + name="cnn/block2/conv2d%i" % n)(x) + x = tf.keras.layers.BatchNormalization(name="cnn/block2/bn%i" % n, fused=True, epsilon=1e-3, momentum=0.01)(x) + x2 = x + + # Block3(4S, 6, 4C) + for n in range(6): + S = (2, 2) if n == 0 else (1, 1) + x = tf.keras.layers.Conv2D(4 * nb_channels, (3, 3), strides=S, padding="same", activation="relu", + name="cnn/block3/conv2d%i" % n)(x) + x = tf.keras.layers.BatchNormalization(name="cnn/block3/bn%i" % n, fused=True, epsilon=1e-3, momentum=0.01)(x) + x3 = x + + + # Up1 (S, S, 2C) + up1 = tf.keras.layers.Conv2DTranspose(2 * nb_channels, (1, 1), strides=(1, 1), padding="same", activation="relu", + name="cnn/up1/conv2dt")(x1) + up1 = tf.keras.layers.BatchNormalization(name="cnn/up1/bn", fused=True, epsilon=1e-3, momentum=0.01)(up1) + + # Up2 (2S, S, 2C) + up2 = tf.keras.layers.Conv2DTranspose(2 * nb_channels, (2, 2), strides=(2, 2), padding="same", activation="relu", + name="cnn/up2/conv2dt")(x2) + up2 = tf.keras.layers.BatchNormalization(name="cnn/up2/bn", fused=True, epsilon=1e-3, momentum=0.01)(up2) + + # Up3 (4S, S, 2C) + up3 = tf.keras.layers.Conv2DTranspose(2 * nb_channels, (4, 4), strides=(4, 4), padding="same", activation="relu", + name="cnn/up3/conv2dt")(x3) + up3 = tf.keras.layers.BatchNormalization(name="cnn/up3/bn", fused=True, epsilon=1e-3, momentum=0.01)(up3) + + # Concat + concat = tf.keras.layers.Concatenate(name="cnn/concatenate")([up1, up2, up3]) + + # Detection head + occ = tf.keras.layers.Conv2D(nb_anchors, (1, 1), name="occupancy/conv2d", activation="sigmoid")(concat) + + loc = tf.keras.layers.Conv2D(nb_anchors * 3, (1, 1), name="loc/conv2d", kernel_initializer=tf.keras.initializers.TruncatedNormal(0, 0.001))(concat) + loc = tf.keras.layers.Reshape(tuple(i//2 for i in image_size) + (nb_anchors, 3), name="loc/reshape")(loc) + + size = tf.keras.layers.Conv2D(nb_anchors * 3, (1, 1), name="size/conv2d", kernel_initializer=tf.keras.initializers.TruncatedNormal(0, 0.001))(concat) + size = tf.keras.layers.Reshape(tuple(i//2 for i in image_size) + (nb_anchors, 3), name="size/reshape")(size) + + angle = tf.keras.layers.Conv2D(nb_anchors, (1, 1), name="angle/conv2d")(concat) + + heading = tf.keras.layers.Conv2D(nb_anchors, (1, 1), name="heading/conv2d", activation="sigmoid")(concat) + + # clf = tf.keras.layers.Conv2D(nb_anchors * nb_classes, (1, 1), name="clf/conv2d")(concat) + # clf = tf.keras.layers.Reshape(tuple(i // 2 for i in image_size) + (nb_anchors, nb_classes), name="clf/reshape")(clf) + + pillar_net = tf.keras.models.Model([input_pillars, input_indices], [occ, loc, size, angle, heading]) +# print(pillar_net.summary()) + + return pillar_net diff --git a/point_pillars_check_input.py b/point_pillars_check_input.py new file mode 100644 index 0000000..8ab59f6 --- /dev/null +++ b/point_pillars_check_input.py @@ -0,0 +1,148 @@ +import os +from glob import glob +import numpy as np +import tensorflow as tf +# from processors import SimpleDataGenerator +# from custom_processors import CustomDataGenerator, AnalyseCustomDataGenerator +from point_pillars_custom_processors_v2 import CustomDataGenerator, AnalyseCustomDataGenerator +from inference_utils_v2 import generate_bboxes_from_pred, GroundTruthGenerator, focal_loss_checker +from inference_utils_v2 import rotational_nms, generate_bboxes_from_pred_and_np_array +from readers import KittiDataReader +from config_v2 import Parameters +from network import build_point_pillar_graph + + +from point_viz.converter import PointvizConverter + +DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/" +# MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_single_process" +MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_No_Early_Stopping_Input_Coordinate_Analysis_v2" + +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"] = "2" + + +def limit_period(val, offset=0.5, period=np.pi): + return val - np.floor(val / period + offset) * period + +if __name__ == "__main__": + + params = Parameters() + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_prediction_multiprocessing" + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/input_coordinate_analysis_point_pillar_v2_gt_only" + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/input_coordinate_analysis_point_pillar_v2_labels_only" + save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/pedestrian_input_coordinate_analysis_point_pillar_v2_gt_and_labels" + # Initialize and setup output directory. + Converter = PointvizConverter(save_viz_path) + + gt_database_dir = os.path.join(DATA_ROOT, "gt_database") + + validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, + npoints=16384, split='train_val_test',random_select=False, classes=list(params.classes_map.keys())) + + for sample_id in validation_gen.sample_id_list: + print(sample_id) + + # for batch_idx in range(0,20): + # [pillars, voxels], [occupancy_, position_, size_, angle_, heading_, classification_], [pts_input, gt_boxes3d, sample] = validation_gen[batch_idx] + + + # set_boxes, confidences = [], [] + # loop_range = occupancy_.shape[0] if len(occupancy_.shape) == 4 else 1 + # for i in range(loop_range): + # # set_box, predicted_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy[i], position[i], size[i], angle[i], + # # heading[i], + # # classification[i], params.anchor_dims, occ_threshold=0.15) + # gt_set_box, decoded_gt_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy_[i], position_[i], size_[i], angle_[i], + # heading_[i], + # classification_[i], params.anchor_dims, occ_threshold=0.5) + + # # exit() + # gt_boxes3d_ = gt_boxes3d[i] + + # print(gt_boxes3d_.shape) + # gt_bbox_params = np.stack([gt_boxes3d_[:,3], gt_boxes3d_[:,5], gt_boxes3d_[:,4], + # gt_boxes3d_[:,1], gt_boxes3d_[:,2] , + # gt_boxes3d_[:,0], + # gt_boxes3d_[:,6]], axis=1) + + + # gt_bbox_params_list = gt_bbox_params.tolist() + # # gt_bbox_params_list = [] + # # print(gt_bbox_params_list) + # # print(len(gt_bbox_params_list)) + # # print(len(gt_bbox_params_list[0])) + + # for k in range(len(gt_bbox_params_list)): + # msg = "%.5f, %.5f"%(gt_bbox_params_list[k][3],gt_bbox_params_list[k][5]) + # gt_bbox_params_list[k].append("Green") + # gt_bbox_params_list[k].append(msg) + + # if len(gt_set_box) > 0: + # decoded_gt_boxes3d_ = decoded_gt_boxes3d + # # bbox_params = validation_gen.convert_predictions_into_point_viz_format(predicted_boxes3d[:,[1, 2, 0, 5, 3, 4, 6 ]]) + + # print(decoded_gt_boxes3d_.shape) + # # print(predicted_boxes3d_) + # # print(size[i]) + + # bbox_params = np.stack([decoded_gt_boxes3d_[:,3], decoded_gt_boxes3d_[:,5], decoded_gt_boxes3d_[:,4], + # decoded_gt_boxes3d_[:,1], decoded_gt_boxes3d_[:,2] , + # decoded_gt_boxes3d_[:,0], + # decoded_gt_boxes3d_[:,6]], axis=1) + + + # # bbox_params = np.stack([predicted_boxes3d[:,4], predicted_boxes3d[:,5], predicted_boxes3d[:,3], + # # predicted_boxes3d[:,1], -(predicted_boxes3d[:,2] - predicted_boxes3d[:,5] / 2), + # # predicted_boxes3d[:,0], + # # predicted_boxes3d[:,6]], axis=1) + + # bbox_params_list = bbox_params.tolist() + # # bbox_labels_conf = [str(predicted_boxes3d[k,9]) for k in range(predicted_boxes3d.shape[0])] + # for k in range(decoded_gt_boxes3d.shape[0]): + # msg = "%.5f, %.5f"%(bbox_params_list[k][3],bbox_params_list[k][5]) + # # msg = (str(bbox_params_list[k][3:5])) + # bbox_params_list[k].append("Magenta") + # bbox_params_list[k].append(msg) + # # bbox_params_list[k].append(str(decoded_gt_boxes3d[k,9]) + params.map_classes[int(decoded_gt_boxes3d[k,8])]) + # gt_bbox_params_list.append(bbox_params_list[k]) + + # # print(gt_bbox_params_list) + # # print(gt_bbox_params.tolist()) + + # coor = pts_input[i][:,[1,2,0]] + # # coor[:,1] *= -1 + # Converter.compile("val_custom_sample_{}".format(batch_idx * params.batch_size+i), coors=coor, intensity=pts_input[i][:,3], + # bbox_params=gt_bbox_params_list) + + # exit() + # set_boxes.append(set_box) + # # set_boxes.append(generate_bboxes_from_pred(occupancy, position, size, angle, heading, + # # classification, params.anchor_dims, occ_threshold=0.1)) + # # confidences.append([float(boxes.conf) for boxes in set_boxes[-1]]) + + # sum_bboxes = 0 + # for h in range(len(set_boxes)): + # sum_bboxes += len(set_boxes[h]) + + # print('Batch ', str(batch_idx) ,': Box predictions with occupancy > occ_thr: ', sum_bboxes) + # print('Scene 1: Box predictions with occupancy > occ_thr: ', len(set_boxes[0])) + # exit() + # print(set_boxes[-1]) + + # # NMS + # nms_boxes = rotational_nms(set_boxes, confidences, occ_threshold=0.7, nms_iou_thr=0.5) + + # print('Scene 1: Boxes after NMS with iou_thr: ', len(nms_boxes[0])) + + # # Do all the further operations on predicted_boxes array, which contains the predicted bounding boxes + # gt_gen = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=False) + # gt_gen0 = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=True) + # for seq_boxes, gt_label, gt0 in zip(nms_boxes, gt_gen, gt_gen0): + # print("---------- New Scenario ---------- ") + # focal_loss_checker(gt0[0], occupancy[0], n_occs=-1) + # print("---------- ------------ ---------- ") + # for gt in gt_label: + # print(gt) + # for pred in seq_boxes: + # print(pred) diff --git a/point_pillars_custom_prediction.py b/point_pillars_custom_prediction.py index 96cc9e6..43e1003 100644 --- a/point_pillars_custom_prediction.py +++ b/point_pillars_custom_prediction.py @@ -5,20 +5,20 @@ # from processors import SimpleDataGenerator # from custom_processors import CustomDataGenerator, AnalyseCustomDataGenerator from point_pillars_custom_processors_v2 import CustomDataGenerator, AnalyseCustomDataGenerator -from inference_utils import generate_bboxes_from_pred, GroundTruthGenerator, focal_loss_checker, rotational_nms, generate_bboxes_from_pred_and_np_array +from inference_utils_v2 import generate_bboxes_from_pred, GroundTruthGenerator +from inference_utils_v2 import focal_loss_checker, rotational_nms, generate_bboxes_from_pred_and_np_array from readers import KittiDataReader -from config import Parameters +from config_v2 import Parameters from network import build_point_pillar_graph from point_viz.converter import PointvizConverter DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/" -# MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_single_process" -MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_No_Early_Stopping_Input_Coordinate_Analysis_v2" +MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_No_Early_Stopping_wo_Aug_wo_val" os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" -os.environ["CUDA_VISIBLE_DEVICES"] = "2" +os.environ["CUDA_VISIBLE_DEVICES"] = "0" def limit_period(val, offset=0.5, period=np.pi): @@ -27,22 +27,13 @@ def limit_period(val, offset=0.5, period=np.pi): if __name__ == "__main__": params = Parameters() - # pillar_net = build_point_pillar_graph(params) - # pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) - # pillar_net.summary() - - # data_reader = KittiDataReader() - - # lidar_files = sorted(glob(os.path.join(DATA_ROOT, "velodyne", "*.bin"))) - # label_files = sorted(glob(os.path.join(DATA_ROOT, "label_2", "*.txt"))) - # calibration_files = sorted(glob(os.path.join(DATA_ROOT, "calib", "*.txt"))) - # assert len(lidar_files) == len(label_files) == len(calibration_files), "Input dirs require equal number of files." - # eval_gen = SimpleDataGenerator(data_reader, params.batch_size, lidar_files, label_files, calibration_files) - - + pillar_net = build_point_pillar_graph(params) + pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) + pillar_net.summary() + exit() # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_prediction_multiprocessing" - save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_prediction_input_coordinate_analysis_point_pillar_v2" + save_viz_path = os.path.join("/home/tan/tjtanaa/PointPillars/visualization", MODEL_ROOT.split('/')[-1]) # Initialize and setup output directory. Converter = PointvizConverter(save_viz_path) @@ -59,11 +50,13 @@ def limit_period(val, offset=0.5, period=np.pi): validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, npoints=20000, split='val',random_select=False, classes=list(params.classes_map.keys())) + # validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, + # npoints=20000, split='val',random_select=False, classes=list(params.classes_map.keys())) for batch_idx in range(0,20): [pillars, voxels], [occupancy_, position_, size_, angle_, heading_, classification_], [pts_input, gt_boxes3d, sample] = validation_gen[batch_idx] - # occupancy, position, size, angle, heading, classification = pillar_net.predict([pillars, voxels]) + occupancy, position, size, angle, heading, classification = pillar_net.predict([pillars, voxels]) # angle = limit_period(angle, offset=0.5, period=2*np.pi) @@ -76,13 +69,19 @@ def limit_period(val, offset=0.5, period=np.pi): set_boxes, confidences = [], [] loop_range = occupancy_.shape[0] if len(occupancy_.shape) == 4 else 1 for i in range(loop_range): - # set_box, predicted_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy[i], position[i], size[i], angle[i], - # heading[i], - # classification[i], params.anchor_dims, occ_threshold=0.15) + set_box, predicted_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy[i], position[i], size[i], angle[i], + heading[i], + classification[i], params.anchor_dims, occ_threshold=0.5) - gt_boxes3d_ = gt_boxes3d[i] - print(gt_boxes3d_.shape) + _, decoded_gt_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy_[i], position_[i], size_[i], angle_[i], + heading_[i], + classification_[i], params.anchor_dims, occ_threshold=0.4) + + # gt_boxes3d_ = gt_boxes3d[i] + gt_boxes3d_ = decoded_gt_boxes3d + + # print(gt_boxes3d_.shape) gt_bbox_params = np.stack([gt_boxes3d_[:,3], gt_boxes3d_[:,5], gt_boxes3d_[:,4], gt_boxes3d_[:,1], gt_boxes3d_[:,2] , gt_boxes3d_[:,0], @@ -90,39 +89,40 @@ def limit_period(val, offset=0.5, period=np.pi): gt_bbox_params_list = gt_bbox_params.tolist() + # gt_bbox_params_list = [] for k in range(len(gt_bbox_params_list)): + msg = "%.5f, %s, %.5f"%(decoded_gt_boxes3d[k,9], params.map_classes[int(decoded_gt_boxes3d[k,8])], decoded_gt_boxes3d[k,6]) + # msg = "%.5f, %.5f"%(gt_bbox_params_list[k][3],gt_bbox_params_list[k][5]) gt_bbox_params_list[k].append("Green") - gt_bbox_params_list[k].append("1.0") - - # if len(set_box) > 0: - # predicted_boxes3d_ = predicted_boxes3d - # # bbox_params = validation_gen.convert_predictions_into_point_viz_format(predicted_boxes3d[:,[1, 2, 0, 5, 3, 4, 6 ]]) + # gt_bbox_params_list[k].append("1.0") + gt_bbox_params_list[k].append(msg) - # # print(predicted_boxes3d_.shape) - # # print(predicted_boxes3d_) - # # print(size[i]) + if len(set_box) > 0: + predicted_boxes3d_ = predicted_boxes3d + # bbox_params = validation_gen.convert_predictions_into_point_viz_format(predicted_boxes3d[:,[1, 2, 0, 5, 3, 4, 6 ]]) - # bbox_params = np.stack([predicted_boxes3d_[:,3], predicted_boxes3d_[:,5], predicted_boxes3d_[:,4], - # predicted_boxes3d_[:,1], (predicted_boxes3d_[:,2] - predicted_boxes3d_[:,5] / 2) , - # predicted_boxes3d_[:,0], - # predicted_boxes3d_[:,6]], axis=1) + print("batch_idx: ", batch_idx * params.batch_size + i, " has ", predicted_boxes3d_.shape, "predictions") + # print(predicted_boxes3d_) + # print(size[i]) + bbox_params = np.stack([predicted_boxes3d_[:,3], predicted_boxes3d_[:,5], predicted_boxes3d_[:,4], + predicted_boxes3d_[:,1], predicted_boxes3d_[:,2] , + predicted_boxes3d_[:,0], + predicted_boxes3d_[:,6]], axis=1) - # # bbox_params = np.stack([predicted_boxes3d[:,4], predicted_boxes3d[:,5], predicted_boxes3d[:,3], - # # predicted_boxes3d[:,1], -(predicted_boxes3d[:,2] - predicted_boxes3d[:,5] / 2), - # # predicted_boxes3d[:,0], - # # predicted_boxes3d[:,6]], axis=1) - # bbox_params_list = bbox_params.tolist() - # # bbox_labels_conf = [str(predicted_boxes3d[k,9]) for k in range(predicted_boxes3d.shape[0])] - # for k in range(predicted_boxes3d.shape[0]): - # bbox_params_list[k].append("Magenta") - # bbox_params_list[k].append(str(predicted_boxes3d[k,9]) + params.map_classes[int(predicted_boxes3d[k,8])]) - # gt_bbox_params_list.append(bbox_params_list[k]) + bbox_params_list = bbox_params.tolist() + # bbox_labels_conf = [str(predicted_boxes3d[k,9]) for k in range(predicted_boxes3d.shape[0])] + for k in range(predicted_boxes3d.shape[0]): + msg = "%.5f, %s, %.5f"%(predicted_boxes3d[k,9],params.map_classes[int(predicted_boxes3d[k,8])], predicted_boxes3d[k,6]) + bbox_params_list[k].append("Magenta") + bbox_params_list[k].append(msg) + # bbox_params_list[k].append(str(predicted_boxes3d[k,9]) + "=" + params.map_classes[int(predicted_boxes3d[k,8])]) + gt_bbox_params_list.append(bbox_params_list[k]) coor = pts_input[i][:,[1,2,0]] # coor[:,1] *= -1 - Converter.compile("train_custom_sample_{}".format(batch_idx * params.batch_size+i), coors=coor, intensity=pts_input[i][:,3], + Converter.compile("val_custom_sample_{}".format(batch_idx * params.batch_size+i), coors=coor, intensity=pts_input[i][:,3], bbox_params=gt_bbox_params_list) # set_boxes.append(set_box) diff --git a/point_pillars_custom_prediction_v2_2.py b/point_pillars_custom_prediction_v2_2.py new file mode 100644 index 0000000..ac942d3 --- /dev/null +++ b/point_pillars_custom_prediction_v2_2.py @@ -0,0 +1,165 @@ +import os +from glob import glob +import numpy as np +import tensorflow as tf +from point_pillars_custom_processors_v2_2 import CustomDataGenerator, AnalyseCustomDataGenerator +from inference_utils_v2 import generate_bboxes_from_pred, GroundTruthGenerator +from inference_utils_v2 import focal_loss_checker, rotational_nms, generate_bboxes_from_pred_and_np_array +from readers import KittiDataReader +from config_v2_2 import Parameters +from network_v2_2 import build_point_pillar_graph +from datetime import datetime + +from point_viz.converter import PointvizConverter + +DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/" +MODEL_ROOT = "./logs_Car_Custom_Dataset_No_Early_Stopping_wo_Aug_wo_val_new_network" + +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"] = "3" + + +def limit_period(val, offset=0.5, period=np.pi): + return val - np.floor(val / period + offset) * period + +if __name__ == "__main__": + + params = Parameters() + pillar_net = build_point_pillar_graph(params) + pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) + pillar_net.summary() + + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_prediction_multiprocessing" + save_viz_path = os.path.join("/home/tan/tjtanaa/PointPillars/visualization", MODEL_ROOT.split('/')[-1]) + # Initialize and setup output directory. + Converter = PointvizConverter(save_viz_path) + + + + + + + gt_database_dir = os.path.join(DATA_ROOT, "gt_database") + + # validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size,root_dir = DATA_ROOT, + # npoints=20000, split='train', classes=list(params.classes_map.keys()), + # random_select=True, gt_database_dir=None, aug_hard_ratio=0.7) + + validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, + npoints=20000, split='train_val_test',random_select=False, classes=list(params.classes_map.keys())) + # validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, + # npoints=20000, split='val',random_select=False, classes=list(params.classes_map.keys())) + + inference_duration = [] + + for batch_idx in range(0,70): + [pillars, voxels], [occupancy_, position_, size_, angle_, heading_], [pts_input, gt_boxes3d, sample] = validation_gen[batch_idx] + + + start=datetime.now() + + occupancy, position, size, angle, heading = pillar_net.predict([pillars, voxels]) + + inference_duration.append( datetime.now()-start) + + # angle = limit_period(angle, offset=0.5, period=2*np.pi) + + classification = np.zeros(shape=np.array(occupancy).shape) + classification_ = classification + # occupancy[:,:,:,:2] = 0 + + # print(occupancy.shape) + # exit() + + set_boxes, confidences = [], [] + loop_range = occupancy_.shape[0] if len(occupancy_.shape) == 4 else 1 + for i in range(loop_range): + set_box, predicted_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy[i], position[i], size[i], angle[i], + heading[i], + classification[i], params.anchor_dims, occ_threshold=0.5) + + + _, decoded_gt_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy_[i], position_[i], size_[i], angle_[i], + heading_[i], + classification_[i], params.anchor_dims, occ_threshold=0.4) + + # gt_boxes3d_ = gt_boxes3d[i] + gt_boxes3d_ = decoded_gt_boxes3d + + print(gt_boxes3d_.shape) + if(len(gt_boxes3d_) == 0): + gt_bbox_params_list = [] + else: + gt_bbox_params = np.stack([gt_boxes3d_[:,3], gt_boxes3d_[:,5], gt_boxes3d_[:,4], + gt_boxes3d_[:,1], gt_boxes3d_[:,2] , + gt_boxes3d_[:,0], + gt_boxes3d_[:,6]], axis=1) + + + gt_bbox_params_list = gt_bbox_params.tolist() + # gt_bbox_params_list = [] + for k in range(len(gt_bbox_params_list)): + msg = "%.5f, %s, %.5f"%(decoded_gt_boxes3d[k,9], params.map_classes[int(decoded_gt_boxes3d[k,8])], decoded_gt_boxes3d[k,6]) + # msg = "%.5f, %.5f"%(gt_bbox_params_list[k][3],gt_bbox_params_list[k][5]) + gt_bbox_params_list[k].append("Green") + # gt_bbox_params_list[k].append("1.0") + gt_bbox_params_list[k].append(msg) + + if len(set_box) > 0: + predicted_boxes3d_ = predicted_boxes3d + # bbox_params = validation_gen.convert_predictions_into_point_viz_format(predicted_boxes3d[:,[1, 2, 0, 5, 3, 4, 6 ]]) + + print("batch_idx: ", batch_idx * params.batch_size + i, " has ", predicted_boxes3d_.shape, "predictions") + # print(predicted_boxes3d_) + # print(size[i]) + + bbox_params = np.stack([predicted_boxes3d_[:,3], predicted_boxes3d_[:,5], predicted_boxes3d_[:,4], + predicted_boxes3d_[:,1], predicted_boxes3d_[:,2] , + predicted_boxes3d_[:,0], + predicted_boxes3d_[:,6]], axis=1) + + + bbox_params_list = bbox_params.tolist() + # bbox_labels_conf = [str(predicted_boxes3d[k,9]) for k in range(predicted_boxes3d.shape[0])] + for k in range(predicted_boxes3d.shape[0]): + msg = "%.5f, %s, %.5f"%(predicted_boxes3d[k,9],params.map_classes[int(predicted_boxes3d[k,8])], predicted_boxes3d[k,6]) + bbox_params_list[k].append("Magenta") + bbox_params_list[k].append(msg) + # bbox_params_list[k].append(str(predicted_boxes3d[k,9]) + "=" + params.map_classes[int(predicted_boxes3d[k,8])]) + gt_bbox_params_list.append(bbox_params_list[k]) + + coor = pts_input[i][:,[1,2,0]] + # coor[:,1] *= -1 + Converter.compile("val_custom_sample_{}".format(batch_idx * params.batch_size+i), coors=coor, intensity=pts_input[i][:,3], + bbox_params=gt_bbox_params_list) + print("Average runtime speed: ", np.mean(inference_duration[20:])) + # set_boxes.append(set_box) + # # set_boxes.append(generate_bboxes_from_pred(occupancy, position, size, angle, heading, + # # classification, params.anchor_dims, occ_threshold=0.1)) + # # confidences.append([float(boxes.conf) for boxes in set_boxes[-1]]) + + # sum_bboxes = 0 + # for h in range(len(set_boxes)): + # sum_bboxes += len(set_boxes[h]) + + # print('Batch ', str(batch_idx) ,': Box predictions with occupancy > occ_thr: ', sum_bboxes) + # print('Scene 1: Box predictions with occupancy > occ_thr: ', len(set_boxes[0])) + # exit() + # print(set_boxes[-1]) + + # # NMS + # nms_boxes = rotational_nms(set_boxes, confidences, occ_threshold=0.7, nms_iou_thr=0.5) + + # print('Scene 1: Boxes after NMS with iou_thr: ', len(nms_boxes[0])) + + # # Do all the further operations on predicted_boxes array, which contains the predicted bounding boxes + # gt_gen = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=False) + # gt_gen0 = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=True) + # for seq_boxes, gt_label, gt0 in zip(nms_boxes, gt_gen, gt_gen0): + # print("---------- New Scenario ---------- ") + # focal_loss_checker(gt0[0], occupancy[0], n_occs=-1) + # print("---------- ------------ ---------- ") + # for gt in gt_label: + # print(gt) + # for pred in seq_boxes: + # print(pred) diff --git a/point_pillars_custom_processors_v2.py b/point_pillars_custom_processors_v2.py index 3c825c4..ae2b17c 100644 --- a/point_pillars_custom_processors_v2.py +++ b/point_pillars_custom_processors_v2.py @@ -79,7 +79,7 @@ def make_ground_truth(self, gt_boxes_3d: Any, gt_cls_type_list: List[str]): a = int(self.anchor_dims.shape[0]) return np.zeros((pX, pY, a), dtype='float32'), np.zeros((pX, pY, a, self.nb_dims), dtype='float32'), \ np.zeros((pX, pY, a, self.nb_dims), dtype='float32'), np.zeros((pX, pY, a), dtype='float32'), \ - np.zeros((pX, pY, a, self.nb_classes), dtype='float64') + np.zeros((pX, pY, a), dtype='float32'), np.zeros((pX, pY, a, self.nb_classes), dtype='float64') # For each label file, generate these properties except for the Don't care class @@ -127,8 +127,11 @@ def make_ground_truth(self, gt_boxes_3d: Any, gt_cls_type_list: List[str]): # return a merged target view for all objects in the ground truth and get categorical labels # print("target.shape: ", target.shape) sel = select_best_anchors(target) + # print("self.shape: ", sel[...,0].shape) ohe = tf.keras.utils.to_categorical(sel[..., 9], num_classes=self.nb_classes, dtype='float64') # print("self.shape: ", sel[...,0].shape) + # print("ohe.shape: ", ohe.shape) + # print("sel[8].shape: ",sel[..., 8].shape) return sel[..., 0], sel[..., 1:4], sel[..., 4:7], sel[..., 7], sel[..., 8], ohe @@ -149,7 +152,8 @@ def __init__(self, batch_size: int, root_dir:str, npoints:int =16384, split: str # self.data_reader = data_reader self.batch_size = batch_size self.sample_id_list=self.get_sample_id_list() - self.split = split + # self.split = split + # print("CustomDataGenerator: " ,self.split) def get_sample(self, index): @@ -219,13 +223,15 @@ def __getitem__(self, batch_id: int): angle = np.array(angle) heading = np.array(heading) classification = np.array(classification) - return [pillars, voxels], [occupancy, position, size, angle, heading, classification] + # return [pillars, voxels], [occupancy, position, size, angle, heading, classification] # network + return [pillars, voxels], [occupancy, position, size, angle, heading] # network_v2 else: return [pillars, voxels] def on_epoch_end(self): # print("inside epoch") if self.split=='train' or self.split =='val': + # pass self.sample_id_list=shuffle(self.sample_id_list) @@ -245,19 +251,15 @@ def __init__(self, batch_size: int, root_dir:str, npoints:int =16384, split: str # self.data_reader = data_reader self.batch_size = batch_size self.sample_id_list=self.get_sample_id_list() - self.split = split + # self.split = split + # self.split = split + # print("AnalyseCustomDataGenerator: " ,self.split) - def get_sample(self, index): + def _get_sample(self, index): return super().get_sample(index) - - # def convert_labels_into_point_viz_format(self, gt_boxes3d): - # gt_boxes3d = gt_boxes3d[:,[3,4,5,0,1,2, 6]] # [xyz,3l4w5h,ry] => [3l,5h,4w] - # gt_boxes3d[:,5] -= (gt_boxes3d[:,2] /2) - # return gt_boxes3d - def __len__(self): return len(self.sample_id_list) // self.batch_size @@ -284,7 +286,7 @@ def __getitem__(self, batch_id: int): for i in file_ids: # print(i) # print(type(i)) - sample = self.get_sample(i) + sample = self._get_sample(i) # For each file, dividing the space into a x-y grid to create pillars pts_lidar = sample['calib'].rect_to_lidar(sample['pts_rect']) # print(pts_lidar.shape) @@ -320,7 +322,7 @@ def __getitem__(self, batch_id: int): # Voxels are the pillar ids pillars_, voxels_ = self.make_point_pillars(pts_input) - print(pillars_.shape, voxels_.shape) + # print(pillars_.shape, voxels_.shape) # for i in range(10): # print(pillars_[0,0,i,:]) # print(np.sum(pillars_ > 0)) @@ -330,10 +332,15 @@ def __getitem__(self, batch_id: int): voxels.append(voxels_) # print(sample['gt_cls_type_list']) - + # print("split: ", self.split) if self.split=='train' or self.split =='val': + # print(len(gt_boxes3d), ", ", len(sample['gt_cls_type_list'])) + if (len(gt_boxes3d) == 0): + print("file id: ", i, " has zero gt label") occupancy_, position_, size_, angle_, heading_, classification_ = self.make_ground_truth( gt_boxes3d, sample['gt_cls_type_list']) + # print(len(a)) + # if # print(occupancy_.shape, position_.shape, size_.shape, angle_.shape, heading_.shape, classification_.shape) @@ -361,7 +368,8 @@ def __getitem__(self, batch_id: int): angle = np.array(angle) heading = np.array(heading) classification = np.array(classification) - return [pillars, voxels], [occupancy, position, size, angle, heading, classification], [pts_input_, gt_boxes3d_, sample_] + # return [pillars, voxels], [occupancy, position, size, angle, heading, classification], [pts_input_, gt_boxes3d_, sample_] + return [pillars, voxels], [occupancy, position, size, angle, heading], [pts_input_, gt_boxes3d_, sample_] # new network else: return [pillars, voxels] diff --git a/point_pillars_custom_processors_v2_2.py b/point_pillars_custom_processors_v2_2.py new file mode 100644 index 0000000..ae2b17c --- /dev/null +++ b/point_pillars_custom_processors_v2_2.py @@ -0,0 +1,380 @@ +from typing import List, Any +import numpy as np +import tensorflow as tf + +from tensorflow.python.keras.utils.data_utils import Sequence + +from config_v2 import Parameters +# from point_pillars import createPillars, createPillarsTarget +from point_pillars_v2 import createPillars, createPillarsTarget +# from readers import DataReader, Label3D +from sklearn.utils import shuffle +import sys + +from det3d.pc_kitti_dataset import PCKittiAugmentedDataset + +from point_viz.converter import PointvizConverter + + +def select_best_anchors(arr): + dims = np.indices(arr.shape[1:]) + # arr[..., 0:1] gets the occupancy value from occ in {-1, 0, 1}, i.e. {bad match, neg box, pos box} + ind = (np.argmax(arr[..., 0:1], axis=0),) + tuple(dims) + + return arr[ind] + + +class DataProcessor(Parameters): + + def __init__(self, **kwargs): + super(DataProcessor, self).__init__(**kwargs) + anchor_dims = np.array(self.anchor_dims, dtype=np.float32) + self.anchor_dims = anchor_dims[:, 0:3] + self.anchor_z = anchor_dims[:, 3] + self.anchor_yaw = anchor_dims[:, 4] + # Counts may be used to make statistic about how well the anchor boxes fit the objects + self.pos_cnt, self.neg_cnt = 0, 0 + + def make_point_pillars(self, points: np.ndarray): + + assert points.ndim == 2 + assert points.shape[1] == 4 + assert points.dtype == np.float32 + + pillars, indices = createPillars(points, + self.max_points_per_pillar, + self.max_pillars, + self.x_step, + self.y_step, + self.x_min, + self.x_max, + self.y_min, + self.y_max, + self.z_min, + self.z_max, + False) + + return pillars, indices + + def make_ground_truth(self, gt_boxes_3d: Any, gt_cls_type_list: List[str]): + """ Generate the ground truth label for each pillars + + Args: + gt_boxes_3d (numpy[float]): A list of floats containing [x, y, z, h, w, l, ry] + gt_cls_type_list (List[str]): A list of floats containing [cls_type] + + Returns: + [type]: [description] + """ + + # filter labels by classes (cars, pedestrians and Trams) + # Label has 4 properties (Classification (0th index of labels file), + # centroid coordinates, dimensions, yaw) + # labels = list(filter(lambda x: x.classification in self.classes, labels)) + + + + if len(gt_boxes_3d) == 0: + pX, pY = int(self.Xn / self.downscaling_factor), int(self.Yn / self.downscaling_factor) + a = int(self.anchor_dims.shape[0]) + return np.zeros((pX, pY, a), dtype='float32'), np.zeros((pX, pY, a, self.nb_dims), dtype='float32'), \ + np.zeros((pX, pY, a, self.nb_dims), dtype='float32'), np.zeros((pX, pY, a), dtype='float32'), \ + np.zeros((pX, pY, a), dtype='float32'), np.zeros((pX, pY, a, self.nb_classes), dtype='float64') + + # For each label file, generate these properties except for the Don't care class + + # target_positions = np.array([label.centroid for label in labels], dtype=np.float32) + # target_dimension = np.array([label.dimension for label in labels], dtype=np.float32) + # target_yaw = np.array([label.yaw for label in labels], dtype=np.float32) + # target_class = np.array([self.classes[label.classification] for label in labels], dtype=np.int32) + + target_positions = gt_boxes_3d[:,:3] + target_dimension = gt_boxes_3d[:,3:6] # don't have to translate again + target_yaw = gt_boxes_3d[:, 6] + # print(type(self.classes)) + # print(type(self.classes_map)) + # # print(gt_cls_type_list[0]) + # print(self.classes_map[gt_cls_type_list[0]]) + + target_class = np.array([self.classes_map[gt_cls_type_list[k]] for k in range(len(gt_cls_type_list))], dtype=np.int32) + + assert np.all(target_yaw >= -np.pi) & np.all(target_yaw <= np.pi) + assert len(target_positions) == len(target_dimension) == len(target_yaw) == len(target_class) + + target, pos, neg = createPillarsTarget(target_positions, + target_dimension, + target_yaw, + target_class, + self.anchor_dims, + self.anchor_z, + self.anchor_yaw, + self.positive_iou_threshold, + self.negative_iou_threshold, + self.nb_classes, + self.downscaling_factor, + self.x_step, + self.y_step, + self.x_min, + self.x_max, + self.y_min, + self.y_max, + self.z_min, + self.z_max, + False) + self.pos_cnt += pos + self.neg_cnt += neg + + # return a merged target view for all objects in the ground truth and get categorical labels + # print("target.shape: ", target.shape) + sel = select_best_anchors(target) + # print("self.shape: ", sel[...,0].shape) + ohe = tf.keras.utils.to_categorical(sel[..., 9], num_classes=self.nb_classes, dtype='float64') + # print("self.shape: ", sel[...,0].shape) + # print("ohe.shape: ", ohe.shape) + # print("sel[8].shape: ",sel[..., 8].shape) + + return sel[..., 0], sel[..., 1:4], sel[..., 4:7], sel[..., 7], sel[..., 8], ohe + + +class CustomDataGenerator(DataProcessor, Sequence, PCKittiAugmentedDataset): + """ Multiprocessing-safe data generator for training, validation or testing, without fancy augmentation """ + + def __init__(self, batch_size: int, root_dir:str, npoints:int =16384, split: str ='train', + classes:List[str] =['Car', 'Pedestrian', 'Person_sitting'], random_select:bool =True, + gt_database_dir=None, aug_hard_ratio:float=0.5, **kwargs): + + super(CustomDataGenerator, self).__init__( + batch_size=batch_size, root_dir=root_dir, + npoints=npoints, split=split, classes=classes, + random_select=random_select, gt_database_dir=gt_database_dir, + aug_hard_ratio=aug_hard_ratio, **kwargs + ) + # self.data_reader = data_reader + self.batch_size = batch_size + self.sample_id_list=self.get_sample_id_list() + # self.split = split + # print("CustomDataGenerator: " ,self.split) + + + def get_sample(self, index): + return super().get_sample(index) + + + def __len__(self): + return len(self.sample_id_list) // self.batch_size + + def __getitem__(self, batch_id: int): + file_ids = range(batch_id * self.batch_size, self.batch_size * (batch_id + 1)) + # print("inside getitem") + pillars = [] + voxels = [] + occupancy = [] + position = [] + size = [] + angle = [] + heading = [] + classification = [] + + + + for i in file_ids: + sample = self.get_sample(i) + # For each file, dividing the space into a x-y grid to create pillars + pts_lidar = sample['calib'].rect_to_lidar(sample['pts_rect']) + pts_input = np.concatenate((pts_lidar, sample['pts_features']), axis=1) # (N, C) + + gt_boxes3d_xyz = sample['calib'].rect_to_lidar(sample['gt_boxes3d'][:,:3]) + + gt_boxes3d = np.concatenate(( + gt_boxes3d_xyz[:,0,np.newaxis], # 0 x + gt_boxes3d_xyz[:,1,np.newaxis], # 1 y + gt_boxes3d_xyz[:,2,np.newaxis] + sample['gt_boxes3d'][:,3,np.newaxis] / 2, # 2 z + sample['gt_boxes3d'][:,5,np.newaxis], # 3 l # same as the original label + sample['gt_boxes3d'][:,4,np.newaxis], # 4 w # same as the original label + sample['gt_boxes3d'][:,3,np.newaxis], # 5 h # same as the original label + -sample['gt_boxes3d'][:,6,np.newaxis], # 6 ry + ), axis=1) + + # Voxels are the pillar ids + pillars_, voxels_ = self.make_point_pillars(pts_input) + + pillars.append(pillars_) + voxels.append(voxels_) + + + if self.split=='train' or self.split =='val': + occupancy_, position_, size_, angle_, heading_, classification_ = self.make_ground_truth( + gt_boxes3d, sample['gt_cls_type_list']) + + occupancy.append(occupancy_) + position.append(position_) + size.append(size_) + angle.append(angle_) + heading.append(heading_) + classification.append(classification_) + + pillars = np.concatenate(pillars, axis=0) + voxels = np.concatenate(voxels, axis=0) + + if self.split=='train' or self.split =='val': + occupancy = np.array(occupancy) + position = np.array(position) + size = np.array(size) + angle = np.array(angle) + heading = np.array(heading) + classification = np.array(classification) + # return [pillars, voxels], [occupancy, position, size, angle, heading, classification] # network + return [pillars, voxels], [occupancy, position, size, angle, heading] # network_v2 + else: + return [pillars, voxels] + + def on_epoch_end(self): + # print("inside epoch") + if self.split=='train' or self.split =='val': + # pass + self.sample_id_list=shuffle(self.sample_id_list) + + +class AnalyseCustomDataGenerator(CustomDataGenerator): + """ Multiprocessing-safe data generator for training, validation or testing, without fancy augmentation """ + + def __init__(self, batch_size: int, root_dir:str, npoints:int =16384, split: str ='train', + classes:List[str] =['Car', 'Pedestrian', 'Person_sitting'], random_select:bool =True, + gt_database_dir=None, aug_hard_ratio:float=0.5, **kwargs): + + super(AnalyseCustomDataGenerator, self).__init__( + batch_size=batch_size, root_dir=root_dir, + npoints=npoints, split=split, classes=classes, + random_select=random_select, gt_database_dir=gt_database_dir, + aug_hard_ratio=aug_hard_ratio, **kwargs + ) + # self.data_reader = data_reader + self.batch_size = batch_size + self.sample_id_list=self.get_sample_id_list() + # self.split = split + # self.split = split + # print("AnalyseCustomDataGenerator: " ,self.split) + + + def _get_sample(self, index): + return super().get_sample(index) + + + def __len__(self): + return len(self.sample_id_list) // self.batch_size + + def __getitem__(self, batch_id: int): + file_ids = range(batch_id * self.batch_size, self.batch_size * (batch_id + 1)) + # print("inside getitem") + pillars = [] + voxels = [] + occupancy = [] + position = [] + size = [] + angle = [] + heading = [] + classification = [] + + pts_input_ = [] + gt_boxes3d_ = [] + sample_ = [] + + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_processor" + # # Initialize and setup output directory. + # Converter = PointvizConverter(save_viz_path) + + for i in file_ids: + # print(i) + # print(type(i)) + sample = self._get_sample(i) + # For each file, dividing the space into a x-y grid to create pillars + pts_lidar = sample['calib'].rect_to_lidar(sample['pts_rect']) + # print(pts_lidar.shape) + + pts_input = np.concatenate((pts_lidar, sample['pts_features']), axis=1) # (N, C) + + gt_boxes3d_xyz = sample['calib'].rect_to_lidar(sample['gt_boxes3d'][:,:3]) + + # print(gt_boxes3d_xyz.shape) + + gt_boxes3d = np.concatenate(( + gt_boxes3d_xyz[:,0,np.newaxis], # 0 x + gt_boxes3d_xyz[:,1,np.newaxis], # 1 y + gt_boxes3d_xyz[:,2,np.newaxis] + sample['gt_boxes3d'][:,3,np.newaxis] / 2, # 2 z + sample['gt_boxes3d'][:,5,np.newaxis], # 3 l # same as the original label + sample['gt_boxes3d'][:,4,np.newaxis], # 4 w # same as the original label + sample['gt_boxes3d'][:,3,np.newaxis], # 5 h # same as the original label + -sample['gt_boxes3d'][:,6,np.newaxis], # 6 ry + ), axis=1) + + # print(type(gt_boxes3d)) + # gt_boxes3d = self.limit_yaw(gt_boxes3d) + + # bbox_params = self.convert_labels_into_point_viz_format(gt_boxes3d) + # print(bbox_params.shape) + # Converter.compile("custom_sample_{}".format(i), coors=pts_input[:,:3], intensity=pts_input[:,3], + # bbox_params=bbox_params) + + + # exit() + + # print(pts_input.shape) + # Voxels are the pillar ids + pillars_, voxels_ = self.make_point_pillars(pts_input) + + # print(pillars_.shape, voxels_.shape) + # for i in range(10): + # print(pillars_[0,0,i,:]) + # print(np.sum(pillars_ > 0)) + # exit() + + pillars.append(pillars_) + voxels.append(voxels_) + + # print(sample['gt_cls_type_list']) + # print("split: ", self.split) + if self.split=='train' or self.split =='val': + # print(len(gt_boxes3d), ", ", len(sample['gt_cls_type_list'])) + if (len(gt_boxes3d) == 0): + print("file id: ", i, " has zero gt label") + occupancy_, position_, size_, angle_, heading_, classification_ = self.make_ground_truth( + gt_boxes3d, sample['gt_cls_type_list']) + # print(len(a)) + # if + + # print(occupancy_.shape, position_.shape, size_.shape, angle_.shape, heading_.shape, classification_.shape) + + + occupancy.append(occupancy_) + position.append(position_) + size.append(size_) + angle.append(angle_) + heading.append(heading_) + classification.append(classification_) + + sample_.append(sample) + gt_boxes3d_.append(gt_boxes3d) + pts_input_.append(pts_input) + + # exit() + + pillars = np.concatenate(pillars, axis=0) + voxels = np.concatenate(voxels, axis=0) + + if self.split=='train' or self.split =='val': + occupancy = np.array(occupancy) + position = np.array(position) + size = np.array(size) + angle = np.array(angle) + heading = np.array(heading) + classification = np.array(classification) + # return [pillars, voxels], [occupancy, position, size, angle, heading, classification], [pts_input_, gt_boxes3d_, sample_] + return [pillars, voxels], [occupancy, position, size, angle, heading], [pts_input_, gt_boxes3d_, sample_] # new network + else: + return [pillars, voxels] + + def on_epoch_end(self): + # print("inside epoch") + if self.split=='train' or self.split =='val': + self.sample_id_list=shuffle(self.sample_id_list) + \ No newline at end of file diff --git a/point_pillars_training_custom_run_v2.py b/point_pillars_training_custom_run_v2.py index 196003d..1293f32 100644 --- a/point_pillars_training_custom_run_v2.py +++ b/point_pillars_training_custom_run_v2.py @@ -20,10 +20,11 @@ # DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/KITTI/object/training" # TODO make main arg DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/" # TODO make main arg # MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_single_process" -MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_No_Early_Stopping_wo_Aug_wo_val" +# MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_No_Early_Stopping_wo_Aug_wo_val" +MODEL_ROOT = "./logs_Pedestrian_Custom_Dataset_No_Early_Stopping_wo_Aug_wo_val" os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" -os.environ["CUDA_VISIBLE_DEVICES"] = "2" +os.environ["CUDA_VISIBLE_DEVICES"] = "1" if __name__ == "__main__": @@ -42,7 +43,7 @@ gt_database_dir = None training_gen = CustomDataGenerator(batch_size=params.batch_size,root_dir = DATA_ROOT, - npoints=20000, split='train', classes=list(params.classes_map.keys()), + npoints=20000, split='train_val', classes=list(params.classes_map.keys()), random_select=True, gt_database_dir=None, aug_hard_ratio=0.7) # validation_gen = CustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, diff --git a/point_pillars_training_custom_run_v2_2.py b/point_pillars_training_custom_run_v2_2.py new file mode 100644 index 0000000..c7b4206 --- /dev/null +++ b/point_pillars_training_custom_run_v2_2.py @@ -0,0 +1,87 @@ +import os +import time +import numpy as np +import tensorflow as tf +from glob import glob + +# from config import Parameters +from config_v2_2 import Parameters +from loss_v2_2 import PointPillarNetworkLoss +from network_v2_2 import build_point_pillar_graph +from point_pillars_custom_processors_v2_2 import CustomDataGenerator +from readers import KittiDataReader + +# from point_viz.converter import PointvizConverter + +tf.get_logger().setLevel("ERROR") + +# DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/KITTI/object/training" # TODO make main arg +DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/" # TODO make main arg +# MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_single_process" +# MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_No_Early_Stopping_wo_Aug_wo_val" +MODEL_ROOT = "./logs_Car_Custom_Dataset_No_Early_Stopping_wo_Aug_wo_val_new_network" + +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"] = "1" + +if __name__ == "__main__": + params = Parameters() + + pillar_net = build_point_pillar_graph(params) + # pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) + pillar_net.summary() + # exit() + loss = PointPillarNetworkLoss(params) + + optimizer = tf.keras.optimizers.Adam(lr=params.learning_rate, decay=params.decay_rate) + + pillar_net.compile(optimizer, loss=loss.losses()) + + gt_database_dir = os.path.join(DATA_ROOT, "gt_database") + # gt_database_dir = None + + training_gen = CustomDataGenerator(batch_size=params.batch_size,root_dir = DATA_ROOT, + npoints=20000, split='train_val', classes=list(params.classes_map.keys()), + random_select=True, gt_database_dir=gt_database_dir, aug_hard_ratio=0.7) + + validation_gen = CustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, + npoints=20000, split='train_val_test', classes=list(params.classes_map.keys())) + + + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_processor" + # Initialize and setup output directory. + # Converter = PointvizConverter(save_viz_path) + + + + # bbox_params = self.convert_labels_into_point_viz_format(gt_boxes3d) + # print(bbox_params.shape) + # Converter.compile("custom_sample_{}".format(i), coors=pts_input[:,:3], intensity=pts_input[:,3], + # bbox_params=bbox_params) + + + log_dir = MODEL_ROOT + epoch_to_decay = int( + np.round(params.iters_to_decay / params.batch_size * int(len(training_gen)))) + callbacks = [ + tf.keras.callbacks.TensorBoard(log_dir=log_dir), + tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(log_dir, "model.h5"), + monitor='val_loss', save_best_only=True), + tf.keras.callbacks.LearningRateScheduler( + lambda epoch, lr: lr * 0.8 if ((epoch % epoch_to_decay == 0) and (epoch != 0)) else lr, verbose=True), + # tf.keras.callbacks.EarlyStopping(patience=20, monitor='val_loss'), + ] + + try: + pillar_net.fit(training_gen, + validation_data = validation_gen, + steps_per_epoch=len(training_gen), + callbacks=callbacks, + use_multiprocessing=True, + # max_queue_size = 16, + epochs=int(params.total_training_epochs), + workers=6) + except KeyboardInterrupt: + model_str = "interrupted_%s.h5" % time.strftime("%Y%m%d-%H%M%S") + pillar_net.save(os.path.join(log_dir, model_str)) + print("Interrupt. Saving output to %s" % os.path.join(os.getcwd(), log_dir[1:], model_str)) diff --git a/point_pillars_visualize_input.py b/point_pillars_visualize_input.py index 91ce836..090c3f5 100644 --- a/point_pillars_visualize_input.py +++ b/point_pillars_visualize_input.py @@ -31,14 +31,14 @@ def limit_period(val, offset=0.5, period=np.pi): # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_prediction_multiprocessing" # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/input_coordinate_analysis_point_pillar_v2_gt_only" # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/input_coordinate_analysis_point_pillar_v2_labels_only" - save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/input_coordinate_analysis_point_pillar_v2_gt_and_labels" + save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/pedestrian_input_coordinate_analysis_point_pillar_v2_gt_and_labels" # Initialize and setup output directory. Converter = PointvizConverter(save_viz_path) gt_database_dir = os.path.join(DATA_ROOT, "gt_database") validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, - npoints=16384, split='val',random_select=False, classes=list(params.classes_map.keys())) + npoints=16384, split='train_val_test',random_select=False, classes=list(params.classes_map.keys())) for batch_idx in range(0,20): [pillars, voxels], [occupancy_, position_, size_, angle_, heading_, classification_], [pts_input, gt_boxes3d, sample] = validation_gen[batch_idx] diff --git a/src/point_pillars.cpp b/src/point_pillars.cpp index 6451f6c..e30d087 100644 --- a/src/point_pillars.cpp +++ b/src/point_pillars.cpp @@ -27,6 +27,8 @@ struct PillarPoint { float xc; float yc; float zc; + float xp; + float yp; }; pybind11::tuple createPillars(pybind11::array_t points, @@ -64,13 +66,15 @@ pybind11::tuple createPillars(pybind11::array_t points, auto yIndex = static_cast(std::floor((points.at(i, 1) - yMin) / yStep)); PillarPoint p = { - points.at(i, 0), - points.at(i, 1), - points.at(i, 2), - points.at(i, 3), - 0, - 0, - 0, + points.at(i, 0), // x + points.at(i, 1), // y + points.at(i, 2), // z + points.at(i, 3), // intensity + 0, // xc + 0, // yc + 0, // zc + 0, // xp + 0, // yp }; map[{xIndex, yIndex}].emplace_back(p); @@ -79,7 +83,7 @@ pybind11::tuple createPillars(pybind11::array_t points, pybind11::array_t tensor; pybind11::array_t indices; - tensor.resize({1, maxPillars, maxPointsPerPillar, 7}); + tensor.resize({1, maxPillars, maxPointsPerPillar, 9}); indices.resize({1, maxPillars, 3}); int pillarId = 0; @@ -124,13 +128,23 @@ pybind11::tuple createPillars(pybind11::array_t points, break; } - tensor.mutable_at(0, pillarId, pointId, 0) = p.x - (xIndex * xStep + xMin); - tensor.mutable_at(0, pillarId, pointId, 1) = p.y - (yIndex * yStep + yMin); - tensor.mutable_at(0, pillarId, pointId, 2) = p.z - zMid; + // tensor.mutable_at(0, pillarId, pointId, 0) = p.x - (xIndex * xStep + xMin); + // tensor.mutable_at(0, pillarId, pointId, 1) = p.y - (yIndex * yStep + yMin); + // tensor.mutable_at(0, pillarId, pointId, 2) = p.z - zMid; + // tensor.mutable_at(0, pillarId, pointId, 3) = p.intensity; + // tensor.mutable_at(0, pillarId, pointId, 4) = p.xc; + // tensor.mutable_at(0, pillarId, pointId, 5) = p.yc; + // tensor.mutable_at(0, pillarId, pointId, 6) = p.zc; + + tensor.mutable_at(0, pillarId, pointId, 0) = p.x; + tensor.mutable_at(0, pillarId, pointId, 1) = p.y; + tensor.mutable_at(0, pillarId, pointId, 2) = p.z; tensor.mutable_at(0, pillarId, pointId, 3) = p.intensity; tensor.mutable_at(0, pillarId, pointId, 4) = p.xc; tensor.mutable_at(0, pillarId, pointId, 5) = p.yc; tensor.mutable_at(0, pillarId, pointId, 6) = p.zc; + tensor.mutable_at(0, pillarId, pointId, 7) = p.x - (xIndex * xStep + xMin); + tensor.mutable_at(0, pillarId, pointId, 8) = p.y - (yIndex * yStep + yMin); pointId++; } @@ -329,6 +343,42 @@ float iou(const BoundingBox3D& box1, return area_overlap / (area_poly1 + area_poly2 - area_overlap); } + +// Calculates the IOU between two bounding boxes. +float iou(const BoundingBox3D& box1, + const BoundingBox3D& box2) +{ + const auto& box_as_vector = boundingBox3DToTopDown(box1); + const auto& box_as_vector_2 = boundingBox3DToTopDown(box2); + const auto& clipped_vector = sutherlandHodgmanClip(box_as_vector, box_as_vector_2); + + float area_poly1 = polygonArea(box_as_vector); + float area_poly2 = polygonArea(box_as_vector_2); + float area_overlap = polygonArea(clipped_vector); + + return area_overlap / (area_poly1 + area_poly2 - area_overlap); +} + + +// Calculates the IOU between two bounding boxes. +float iou3D(const BoundingBox3D& box1, + const BoundingBox3D& box2) +{ + const auto& box_as_vector = boundingBox3DToTopDown(box1); + const auto& box_as_vector_2 = boundingBox3DToTopDown(box2); + const auto& clipped_vector = sutherlandHodgmanClip(box_as_vector, box_as_vector_2); + + float volume_poly1 = polygonArea(box_as_vector) * box1.height; + float volume_poly2 = polygonArea(box_as_vector_2) * box2.height; + + float topZ = min(box1.height/2 + box1.z, box2.height/2 + box2.z); + float bottomZ = max( - box1.height/2 + box1.z, - box2.height/2 + box2.z); + + float volume_overlap = polygonArea(clipped_vector) * (topZ - bottomZ); + + return volume_overlap / (volume_poly1 + volume_poly2 - volume_overlap); +} + int clip(int n, int lower, int upper) { return std::max(lower, std::min(n, upper)); } @@ -462,6 +512,8 @@ std::tuple, int, int> createPillarsTarget(const pybind1 float maxIou = 0; BoundingBox3D bestAnchor = {}; int bestAnchorId = 0; + int bestAnchorXId = 0; + int bestAnchorYId = 0; for (int xId = xStart; xId < xEnd; xId++) // Iterate through every box within search diameter // In our example case, from 3 till 8 { @@ -487,6 +539,14 @@ std::tuple, int, int> createPillarsTarget(const pybind1 maxIou = iouOverlap; bestAnchor = anchorBox; bestAnchorId = anchorCount; + bestAnchorXId = xId; + bestAnchorYId = yId; + // if(printTime){ + // if(anchorCount == 3){ + // py::print("\nIoU old: " +std::to_string(iouOverlap) + " new: " + std::to_string(maxIou)); + // } + // } + } if (iouOverlap > positiveThreshold) // Accept the Anchor. Add the anchor details to the tensor. @@ -503,7 +563,8 @@ std::tuple, int, int> createPillarsTarget(const pybind1 tensor.mutable_at(objectCount, xId, yId, anchorCount, 5) = std::log(labelBox.width / anchorBox.width); tensor.mutable_at(objectCount, xId, yId, anchorCount, 6) = std::log(labelBox.height / anchorBox.height); - tensor.mutable_at(objectCount, xId, yId, anchorCount, 7) = std::sin(labelBox.yaw - anchorBox.yaw); //delta yaw + // tensor.mutable_at(objectCount, xId, yId, anchorCount, 7) = std::sin(labelBox.yaw - anchorBox.yaw); //delta yaw + tensor.mutable_at(objectCount, xId, yId, anchorCount, 7) = labelBox.yaw - anchorBox.yaw; //delta yaw if (labelBox.yaw > 0) // Is yaw > 0 { tensor.mutable_at(objectCount, xId, yId, anchorCount, 8) = 1; @@ -537,13 +598,19 @@ std::tuple, int, int> createPillarsTarget(const pybind1 if (printTime) { // std::cout << "\nThere was no sufficiently overlapping anchor anywhere for object " << objectCount << std::endl; - // py::print("There was no sufficiently overlapping anchor anywhere for object " +std::to_string(objectCount)); + py::print("\nThere was no sufficiently overlapping anchor anywhere for object " +std::to_string(objectCount)); // std::cout << "Best IOU was " << maxIou << ". Adding the best location regardless of threshold." << std::endl; - // py::print("Best IOU was "+std::to_string(maxIou)+" Adding the best location regardless of threshold"); + py::print("\nBest IOU of anchor " + std::to_string(bestAnchorId) + " was "+std::to_string(maxIou)+" Adding the best location regardless of threshold"); + // py::print("\nBest IOU.x was "+std::to_string(bestAnchor.x)+" "); + // py::print("\nBest IOU.y was "+std::to_string(bestAnchor.y)+" "); + // py::print("\nBest IOU.z was "+std::to_string(bestAnchor.z)+" "); + // py::print("\nBest IOU.ry was "+std::to_string(bestAnchor.yaw)+" "); } - const auto xId = static_cast(std::floor((labelBox.x - xMin) / (xStep * downscalingFactor))); - const auto yId = static_cast(std::floor((labelBox.y - yMin) / (yStep * downscalingFactor))); + const auto xId = bestAnchorXId; + const auto yId = bestAnchorYId; + // const auto xId = static_cast(std::floor((labelBox.x - xMin) / (xStep * downscalingFactor))); + // const auto yId = static_cast(std::floor((labelBox.y - yMin) / (yStep * downscalingFactor))); const float diag = std::sqrt(std::pow(bestAnchor.width, 2) + std::pow(bestAnchor.length, 2)); tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 0) = 1; @@ -556,7 +623,8 @@ std::tuple, int, int> createPillarsTarget(const pybind1 tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 5) = std::log(labelBox.width / bestAnchor.width); tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 6) = std::log(labelBox.height / bestAnchor.height); - tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 7) = std::sin(labelBox.yaw - bestAnchor.yaw); + // tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 7) = std::sin(labelBox.yaw - bestAnchor.yaw); + tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 7) = labelBox.yaw - bestAnchor.yaw; if (labelBox.yaw > 0) { tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 8) = 1; @@ -589,9 +657,14 @@ std::tuple, int, int> createPillarsTarget(const pybind1 return std::make_tuple(tensor, posCnt, negCnt); } +float cmath_sin(float value){ + return std::sin(value); +} + PYBIND11_MODULE(point_pillars, m) { m.def("createPillars", &createPillars, "Runs function to create point pillars input tensors"); m.def("createPillarsTarget", &createPillarsTarget, "Runs function to create point pillars output ground truth"); + m.def("cmath_sin", &cmath_sin, "Runs function to compute sine"); } diff --git a/src/point_pillars_v2.cpp b/src/point_pillars_v2.cpp deleted file mode 100644 index 85a240e..0000000 --- a/src/point_pillars_v2.cpp +++ /dev/null @@ -1,634 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -// #include -namespace py = pybind11; - -struct IntPairHash { - std::size_t operator()(const std::pair &p) const { - assert(sizeof(std::size_t)>=8); - //Shift first integer over to make room for the second integer. The two are - //then packed side by side. - return (((uint64_t)p.first)<<32) | ((uint64_t)p.second); - } -}; - -struct PillarPoint { - float x; - float y; - float z; - float intensity; - float xc; - float yc; - float zc; - float xp; - float yp; -}; - -pybind11::tuple createPillars(pybind11::array_t points, - int maxPointsPerPillar, - int maxPillars, - float xStep, - float yStep, - float xMin, - float xMax, - float yMin, - float yMax, - float zMin, - float zMax, - bool printTime = false) -{ - std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now(); - - if (points.ndim() != 2 || points.shape()[1] != 4) - { - throw std::runtime_error("numpy array with shape (n, 4) expected (n being the number of points)"); - } - - std::unordered_map, std::vector, IntPairHash> map; - - for (int i = 0; i < points.shape()[0]; ++i) - { - if ((points.at(i, 0) < xMin) || (points.at(i, 0) >= xMax) || \ - (points.at(i, 1) < yMin) || (points.at(i, 1) >= yMax) || \ - (points.at(i, 2) < zMin) || (points.at(i, 2) >= zMax)) - { - continue; - } - - auto xIndex = static_cast(std::floor((points.at(i, 0) - xMin) / xStep)); - auto yIndex = static_cast(std::floor((points.at(i, 1) - yMin) / yStep)); - - PillarPoint p = { - points.at(i, 0), // x - points.at(i, 1), // y - points.at(i, 2), // z - points.at(i, 3), // intensity - 0, // xc - 0, // yc - 0, // zc - 0, // xp - 0, // yp - }; - - map[{xIndex, yIndex}].emplace_back(p); - } - - pybind11::array_t tensor; - pybind11::array_t indices; - - tensor.resize({1, maxPillars, maxPointsPerPillar, 9}); - indices.resize({1, maxPillars, 3}); - - int pillarId = 0; - for (auto& pair: map) - { - if (pillarId >= maxPillars) - { - break; - } - - float xMean = 0; - float yMean = 0; - float zMean = 0; - for (const auto& p: pair.second) - { - xMean += p.x; - yMean += p.y; - zMean += p.z; - } - xMean /= pair.second.size(); - yMean /= pair.second.size(); - zMean /= pair.second.size(); - - for (auto& p: pair.second) - { - p.xc = p.x - xMean; - p.yc = p.y - yMean; - p.zc = p.z - zMean; - } - - auto xIndex = static_cast(std::floor((xMean - xMin) / xStep)); - auto yIndex = static_cast(std::floor((yMean - yMin) / yStep)); - auto zMid = (zMax - zMin) * 0.5f; - indices.mutable_at(0, pillarId, 1) = xIndex; - indices.mutable_at(0, pillarId, 2) = yIndex; - - int pointId = 0; - for (const auto& p: pair.second) - { - if (pointId >= maxPointsPerPillar) - { - break; - } - - // tensor.mutable_at(0, pillarId, pointId, 0) = p.x - (xIndex * xStep + xMin); - // tensor.mutable_at(0, pillarId, pointId, 1) = p.y - (yIndex * yStep + yMin); - // tensor.mutable_at(0, pillarId, pointId, 2) = p.z - zMid; - // tensor.mutable_at(0, pillarId, pointId, 3) = p.intensity; - // tensor.mutable_at(0, pillarId, pointId, 4) = p.xc; - // tensor.mutable_at(0, pillarId, pointId, 5) = p.yc; - // tensor.mutable_at(0, pillarId, pointId, 6) = p.zc; - - tensor.mutable_at(0, pillarId, pointId, 0) = p.x; - tensor.mutable_at(0, pillarId, pointId, 1) = p.y; - tensor.mutable_at(0, pillarId, pointId, 2) = p.z; - tensor.mutable_at(0, pillarId, pointId, 3) = p.intensity; - tensor.mutable_at(0, pillarId, pointId, 4) = p.xc; - tensor.mutable_at(0, pillarId, pointId, 5) = p.yc; - tensor.mutable_at(0, pillarId, pointId, 6) = p.zc; - tensor.mutable_at(0, pillarId, pointId, 7) = p.x - (xIndex * xStep + xMin); - tensor.mutable_at(0, pillarId, pointId, 8) = p.y - (yIndex * yStep + yMin); - - pointId++; - } - - pillarId++; - } - - pybind11::tuple result = pybind11::make_tuple(tensor, indices); - - std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast( t2 - t1 ).count(); - if (printTime) - std::cout << "createPillars took: " << static_cast(duration) / 1e6 << " seconds" << std::endl; - - return result; -} - -struct BoundingBox3D -{ - float x; - float y; - float z; - float length; - float width; - float height; - float yaw; - float classId; -}; - -struct Point2D { - float x; - float y; -}; - -typedef std::vector Polyline2D; - -// Returns x-value of point of intersection of two lines -float xIntersect(float x1, float y1, float x2, float y2, - float x3, float y3, float x4, float y4) -{ - float num = (x1*y2 - y1*x2) * (x3-x4) - (x1-x2) * (x3*y4 - y3*x4); - float den = (x1-x2) * (y3-y4) - (y1-y2) * (x3-x4); - return num/den; -} - -// Returns y-value of point of intersection of two lines -float yIntersect(float x1, float y1, float x2, float y2, - float x3, float y3, float x4, float y4) -{ - float num = (x1*y2 - y1*x2) * (y3-y4) - (y1-y2) * (x3*y4 - y3*x4); - float den = (x1-x2) * (y3-y4) - (y1-y2) * (x3-x4); - return num/den; -} - -// Returns area of polygon using the shoelace method -float polygonArea(const Polyline2D &polygon) -{ - float area = 0.0; - - size_t j = polygon.size()-1; - for (size_t i = 0; i < polygon.size(); i++) - { - area += (polygon[j].x + polygon[i].x) * (polygon[j].y - polygon[i].y); - j = i; // j is previous vertex to i - } - - return std::abs(area / 2.0); // Return absolute value -} - -float rotatedX(float x, float y, float angle) -{ - return x * std::cos(angle) - y * std::sin(angle); -} - -float rotatedY(float x, float y, float angle) -{ - return x * std::sin(angle) + y * std::cos(angle); -} - -// Construct bounding box in 2D, coordinates are returned in clockwise order -Polyline2D boundingBox3DToTopDown(const BoundingBox3D &box1) -{ - Polyline2D box; - box.push_back({rotatedX(-0.5 * box1.width, 0.5 * box1.length, - box1.yaw) + box1.x, - rotatedY(-0.5 * box1.width, 0.5 * box1.length, - box1.yaw) + box1.y}); - - box.push_back({rotatedX(0.5 * box1.width, 0.5 * box1.length, - box1.yaw) + box1.x, - rotatedY(0.5 * box1.width, 0.5 * box1.length, - box1.yaw) + box1.y}); - - box.push_back({rotatedX(0.5 * box1.width, -0.5 * box1.length, - box1.yaw) + box1.x, - rotatedY(0.5 * box1.width, -0.5 * box1.length, - box1.yaw) + box1.y}); - - box.push_back({rotatedX(-0.5 * box1.width, -0.5 * box1.length, - box1.yaw) + box1.x, - rotatedY(-0.5 * box1.width, -0.5 * box1.length, - box1.yaw) + box1.y}); - - return box; -} - -// This functions clips all the edges w.r.t one Clip edge of clipping area -// Returns a clipped polygon... -Polyline2D clip(const Polyline2D &poly_points, - float x1, - float y1, - float x2, - float y2) -{ - Polyline2D new_points; - - for (size_t i = 0; i < poly_points.size(); i++) - { - // (ix,iy),(kx,ky) are the co-ordinate values of the points - // i and k form a line in polygon - size_t k = (i+1) % poly_points.size(); - float ix = poly_points[i].x, iy = poly_points[i].y; - float kx = poly_points[k].x, ky = poly_points[k].y; - - // Calculating position of first point w.r.t. clipper line - float i_pos = (x2-x1) * (iy-y1) - (y2-y1) * (ix-x1); - - // Calculating position of second point w.r.t. clipper line - float k_pos = (x2-x1) * (ky-y1) - (y2-y1) * (kx-x1); - - // Case 1 : When both points are inside - if (i_pos < 0 && k_pos < 0) - { - //Only second point is added - new_points.push_back({kx,ky}); - } - - // Case 2: When only first point is outside - else if (i_pos >= 0 && k_pos < 0) - { - // Point of intersection with edge - // and the second point is added - new_points.push_back({xIntersect(x1, y1, x2, y2, ix, iy, kx, ky), - yIntersect(x1, y1, x2, y2, ix, iy, kx, ky)}); - new_points.push_back({kx,ky}); - - } - - // Case 3: When only second point is outside - else if (i_pos < 0 && k_pos >= 0) - { - //Only point of intersection with edge is added - new_points.push_back({xIntersect(x1, y1, x2, y2, ix, iy, kx, ky), - yIntersect(x1, y1, x2, y2, ix, iy, kx, ky)}); - - } - // Case 4: When both points are outside - else - { - //No points are added - } - } - - return new_points; -} - -// Implements Sutherland–Hodgman algorithm -// Returns a polygon with the intersection between two polygons. -Polyline2D sutherlandHodgmanClip(const Polyline2D &poly_points_vector, - const Polyline2D &clipper_points) -{ - Polyline2D clipped_poly_points_vector = poly_points_vector; - for (size_t i=0; i, int, int> createPillarsTarget(const pybind11::array_t& objectPositions, - const pybind11::array_t& objectDimensions, - const pybind11::array_t& objectYaws, - const pybind11::array_t& objectClassIds, - const pybind11::array_t& anchorDimensions, - const pybind11::array_t& anchorZHeights, - const pybind11::array_t& anchorYaws, - float positiveThreshold, - float negativeThreshold, - int nbClasses, - int downscalingFactor, - float xStep, - float yStep, - float xMin, - float xMax, - float yMin, - float yMax, - float zMin, - float zMax, - bool printTime = false) -{ - - std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now(); - - const auto xSize = static_cast(std::floor((xMax - xMin) / (xStep * downscalingFactor))); -// py::print("xSize", xSize); - const auto ySize = static_cast(std::floor((yMax - yMin) / (yStep * downscalingFactor))); -// py::print("ySize", ySize); - - const int nbAnchors = anchorDimensions.shape()[0]; //4 Number of anchors -// py::print("nbAnchors", nbAnchors); -// Anchor length - - if (nbAnchors <= 0) - { - throw std::runtime_error("Anchor length is zero"); - } - - const int nbObjects = objectDimensions.shape()[0]; //6 Number of labels inside a label.txt file -// BB dimensions from the label file - if (nbObjects <= 0) - { - throw std::runtime_error("Object length is zero"); - } -// py::print("nbObjects", nbObjects); - - // parse numpy arrays -// Preparing the anchor bounding box - std::vector anchorBoxes = {}; - std::vector anchorDiagonals; - for (int i = 0; i < nbAnchors; ++i) - { - BoundingBox3D anchorBox = {}; - anchorBox.x = 0; - anchorBox.y = 0; - anchorBox.length = anchorDimensions.at(i, 0); - anchorBox.width = anchorDimensions.at(i, 1); - anchorBox.height = anchorDimensions.at(i, 2); - anchorBox.z = anchorZHeights.at(i); - anchorBox.yaw = anchorYaws.at(i); - anchorBoxes.emplace_back(anchorBox); // Appends a new anchorBox to the AnchorBoxes container - // Note that anchor box doesn't have a classId as of now. - anchorDiagonals.emplace_back(std::sqrt(std::pow(anchorBox.width, 2) + std::pow(anchorBox.length, 2))); - } - -// Preparing the label bounding box - std::vector labelBoxes = {}; - for (int i = 0; i < nbObjects; ++i) - { - float x = objectPositions.at(i, 0); - float y = objectPositions.at(i, 1); - if (x < xMin | x > xMax | y < yMin | y > yMax) - { - continue; - } - BoundingBox3D labelBox = {}; - labelBox.x = x; - labelBox.y = y; - labelBox.z = objectPositions.at(i, 2); - labelBox.length = objectDimensions.at(i, 0); - labelBox.width = objectDimensions.at(i, 1); - labelBox.height = objectDimensions.at(i, 2); - labelBox.yaw = objectYaws.at(i); - labelBox.classId = objectClassIds.at(i); - labelBoxes.emplace_back(labelBox); - } - - pybind11::array_t tensor; - tensor.resize({nbObjects, xSize, ySize, nbAnchors, 10}); //Tensor of size (6,252,252,4,10) for first file - - pybind11::buffer_info tensor_buffer = tensor.request(); - float *ptr1 = (float *) tensor_buffer.ptr; - // Zero filling the tensor. Every element is presently zero - for (size_t idx = 0; idx < nbObjects * xSize * ySize * nbAnchors * 10; idx++) - { - ptr1[idx] = 0; - } - - int posCnt = 0; - int negCnt = 0; - int objectCount = 0; - if (printTime) - { -// std::cout << "Received " << labelBoxes.size() << " objects" << std::endl; -// py::print("Received "+str(labelBoxes.size())+" objects"); - } - for (const auto& labelBox: labelBoxes) //For every label box which is a 3d bounding box - { - // zone-in on potential spatial area of interest - // Length of (width,length) axis diagonal. - float objectDiameter = std::sqrt(std::pow(labelBox.width, 2) + std::pow(labelBox.length, 2)); - // Offset = Number of grid boxes that can fit on the object diameter - const auto x_offset = static_cast(std::ceil(objectDiameter / (xStep * downscalingFactor))); - const auto y_offset = static_cast(std::ceil(objectDiameter / (yStep * downscalingFactor))); - // Xc = Number of grid boxes that can fit between Xmin (Ymin) and Label's x (y) coordinate - const auto xC = static_cast(std::floor((labelBox.x - xMin) / (xStep * downscalingFactor))); - const auto yC = static_cast(std::floor((labelBox.y - yMin) / (yStep * downscalingFactor))); - // X(Y)Start = Start from Xc (Yc) - Number of boxes in object's diameter. - // For example the object is located at 5 unites and is 2 unites long. Then X(Y)start will begin - // the search from 3 - const auto xStart = clip(xC - x_offset, 0, xSize); - const auto yStart = clip(yC - y_offset, 0, ySize); - // Similarly end the search at 8 units. Because the object cannot extend beyond that. - const auto xEnd = clip(xC + x_offset, 0, xSize); - const auto yEnd = clip(yC + y_offset, 0, ySize); - - float maxIou = 0; - BoundingBox3D bestAnchor = {}; - int bestAnchorId = 0; - int bestAnchorXId = 0; - int bestAnchorYId = 0; - for (int xId = xStart; xId < xEnd; xId++) // Iterate through every box within search diameter - // In our example case, from 3 till 8 - { - const float x = xId * xStep * downscalingFactor + xMin; - // Getting the real world x coordinate - for (int yId = yStart; yId < yEnd; yId++) // Iterate through every box within search diamter in y axis - { - const float y = yId * yStep * downscalingFactor + yMin; - // Get the real world y coordinates - int anchorCount = 0; - for (auto& anchorBox: anchorBoxes) // For every anchor box (4 in our case) - // Note that we are checking every anchor box for every label in the file - { - anchorBox.x = x; // Assign the real world x and y coordinate to the anchor box - anchorBox.y = y; // Note that anchor boxes originally didn't have Xs and Ys. - // This is because we need ot check them along the X-Y grid. - // However, they did have a z value attached to them. - - const float iouOverlap = iou(anchorBox, labelBox); // Get IOU between two 3D boxes. - - if (maxIou < iouOverlap) - { - maxIou = iouOverlap; - bestAnchor = anchorBox; - bestAnchorId = anchorCount; - bestAnchorXId = xId; - bestAnchorYId = yId; - // if(printTime){ - // if(anchorCount == 3){ - // py::print("\nIoU old: " +std::to_string(iouOverlap) + " new: " + std::to_string(maxIou)); - // } - // } - - } - - if (iouOverlap > positiveThreshold) // Accept the Anchor. Add the anchor details to the tensor. - { - // Tensor at CurrentObject Id, xth grid cell, yth grid cell, currentAnchor, 0 - tensor.mutable_at(objectCount, xId, yId, anchorCount, 0) = 1; - - auto diag = anchorDiagonals[anchorCount]; - tensor.mutable_at(objectCount, xId, yId, anchorCount, 1) = (labelBox.x - anchorBox.x) / diag; // delta x,y,z - tensor.mutable_at(objectCount, xId, yId, anchorCount, 2) = (labelBox.y - anchorBox.y) / diag; - tensor.mutable_at(objectCount, xId, yId, anchorCount, 3) = (labelBox.z - anchorBox.z) / anchorBox.height; - - tensor.mutable_at(objectCount, xId, yId, anchorCount, 4) = std::log(labelBox.length / anchorBox.length); // delta l,w,h - tensor.mutable_at(objectCount, xId, yId, anchorCount, 5) = std::log(labelBox.width / anchorBox.width); - tensor.mutable_at(objectCount, xId, yId, anchorCount, 6) = std::log(labelBox.height / anchorBox.height); - - // tensor.mutable_at(objectCount, xId, yId, anchorCount, 7) = std::sin(labelBox.yaw - anchorBox.yaw); //delta yaw - tensor.mutable_at(objectCount, xId, yId, anchorCount, 7) = labelBox.yaw - anchorBox.yaw; //delta yaw - if (labelBox.yaw > 0) // Is yaw > 0 - { - tensor.mutable_at(objectCount, xId, yId, anchorCount, 8) = 1; - } - else - { - tensor.mutable_at(objectCount, xId, yId, anchorCount, 8) = 0; - } - - tensor.mutable_at(objectCount, xId, yId, anchorCount, 9) = labelBox.classId; - - } - else if (iouOverlap < negativeThreshold) - { - tensor.mutable_at(objectCount, xId, yId, anchorCount, 0) = 0; - } - else - { - tensor.mutable_at(objectCount, xId, yId, anchorCount, 0) = -1; - } - - anchorCount++; - } - } - } - - if (maxIou < positiveThreshold) // Comparing maxIOU for that object obtained after checking with every anchor box - // If none of the anchors passed the threshold, then we place the best anchor details for that object. - { - negCnt++; - // if (printTime) - // { - // // std::cout << "\nThere was no sufficiently overlapping anchor anywhere for object " << objectCount << std::endl; - // py::print("\nThere was no sufficiently overlapping anchor anywhere for object " +std::to_string(objectCount)); - // // std::cout << "Best IOU was " << maxIou << ". Adding the best location regardless of threshold." << std::endl; - // py::print("\nBest IOU was "+std::to_string(maxIou)+" Adding the best location regardless of threshold"); - // py::print("\nBest IOU.x was "+std::to_string(bestAnchor.x)+" "); - // py::print("\nBest IOU.y was "+std::to_string(bestAnchor.y)+" "); - // py::print("\nBest IOU.z was "+std::to_string(bestAnchor.z)+" "); - // py::print("\nBest IOU.ry was "+std::to_string(bestAnchor.yaw)+" "); - // } - - const auto xId = bestAnchorXId; - const auto yId = bestAnchorYId; - // const auto xId = static_cast(std::floor((labelBox.x - xMin) / (xStep * downscalingFactor))); - // const auto yId = static_cast(std::floor((labelBox.y - yMin) / (yStep * downscalingFactor))); - const float diag = std::sqrt(std::pow(bestAnchor.width, 2) + std::pow(bestAnchor.length, 2)); - - tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 0) = 1; - - tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 1) = (labelBox.x - bestAnchor.x) / diag; - tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 2) = (labelBox.y - bestAnchor.y) / diag; - tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 3) = (labelBox.z - bestAnchor.z) / bestAnchor.height; - - tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 4) = std::log(labelBox.length / bestAnchor.length); - tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 5) = std::log(labelBox.width / bestAnchor.width); - tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 6) = std::log(labelBox.height / bestAnchor.height); - - // tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 7) = std::sin(labelBox.yaw - bestAnchor.yaw); - tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 7) = labelBox.yaw - bestAnchor.yaw; - if (labelBox.yaw > 0) - { - tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 8) = 1; - } - else - { - tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 8) = 0; - } -// Class id is the classification label (0,1,2,3) - tensor.mutable_at(objectCount, xId, yId, bestAnchorId, 9) = labelBox.classId; - } - else - { - posCnt++; - if (printTime) - { - std::cout << "\nAt least 1 anchor was positively matched for object " << objectCount << std::endl; - std::cout << "Best IOU was " << maxIou << "." << std::endl; - } - } - - objectCount++; - } - - std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast( t2 - t1 ).count(); - if (printTime) - std::cout << "createPillarsTarget took: " << static_cast(duration) / 1e6 << " seconds" << std::endl; - - return std::make_tuple(tensor, posCnt, negCnt); -} - -float cmath_sin(float value){ - return std::sin(value); -} - - -PYBIND11_MODULE(point_pillars_v2, m) -{ - m.def("createPillars", &createPillars, "Runs function to create point pillars input tensors"); - m.def("createPillarsTarget", &createPillarsTarget, "Runs function to create point pillars output ground truth"); - m.def("cmath_sin", &cmath_sin, "Runs function to compute sine"); -} From 4ab1d55599245c950ccc62a0e16bfe6294d0f280 Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Mon, 23 Nov 2020 15:57:26 +0800 Subject: [PATCH 3/4] Evaluation; evaluate the output using official cpp code --- inference_utils_v2.py | 19 ++- point_pillars_custom_processors_v2_2.py | 62 +--------- point_pillars_evaluation_v2_2.py | 158 ++++++++++++++++++++++++ 3 files changed, 175 insertions(+), 64 deletions(-) create mode 100644 point_pillars_evaluation_v2_2.py diff --git a/inference_utils_v2.py b/inference_utils_v2.py index 685b1ec..3d5eb4a 100644 --- a/inference_utils_v2.py +++ b/inference_utils_v2.py @@ -96,14 +96,12 @@ def limit_period(val, offset=0.5, period=np.pi): return val - np.floor(val / period + offset) * period def inverse_yaw_element(bb_yaw): - - bb_yaw -= np.pi / 2 while bb_yaw > np.pi: - print("larger than pi") + # print("larger than pi") bb_yaw -= (np.pi * 2) while bb_yaw < -np.pi: - print("smaller than -pi") + # print("smaller than -pi") bb_yaw += (np.pi * 2) return bb_yaw @@ -162,6 +160,19 @@ def generate_bboxes_from_pred_and_np_array(occ, pos, siz, ang, hdg, clf, anchor_ return predicted_boxes, np.array(predicted_boxes_list) +def convert_boxes_to_list(set_boxes): + # (B, N) + batch_predicted_boxes_list = [] + for batch_idx in range(len(set_boxes)): + predicted_boxes_list = [] + + for box in set_boxes[batch_idx]: + + predicted_boxes_list.append([box.x, box.y, box.z, box.length, box.width, box.height, + box.yaw, box.heading, box.cls, box.conf]) + + batch_predicted_boxes_list.append(predicted_boxes_list) + return batch_predicted_boxes_list class GroundTruthGenerator(DataProcessor): """ Multiprocessing-safe data generator for training, validation or testing, without fancy augmentation """ diff --git a/point_pillars_custom_processors_v2_2.py b/point_pillars_custom_processors_v2_2.py index ae2b17c..2c0886c 100644 --- a/point_pillars_custom_processors_v2_2.py +++ b/point_pillars_custom_processors_v2_2.py @@ -70,9 +70,6 @@ def make_ground_truth(self, gt_boxes_3d: Any, gt_cls_type_list: List[str]): # filter labels by classes (cars, pedestrians and Trams) # Label has 4 properties (Classification (0th index of labels file), # centroid coordinates, dimensions, yaw) - # labels = list(filter(lambda x: x.classification in self.classes, labels)) - - if len(gt_boxes_3d) == 0: pX, pY = int(self.Xn / self.downscaling_factor), int(self.Yn / self.downscaling_factor) @@ -82,12 +79,6 @@ def make_ground_truth(self, gt_boxes_3d: Any, gt_cls_type_list: List[str]): np.zeros((pX, pY, a), dtype='float32'), np.zeros((pX, pY, a, self.nb_classes), dtype='float64') # For each label file, generate these properties except for the Don't care class - - # target_positions = np.array([label.centroid for label in labels], dtype=np.float32) - # target_dimension = np.array([label.dimension for label in labels], dtype=np.float32) - # target_yaw = np.array([label.yaw for label in labels], dtype=np.float32) - # target_class = np.array([self.classes[label.classification] for label in labels], dtype=np.int32) - target_positions = gt_boxes_3d[:,:3] target_dimension = gt_boxes_3d[:,3:6] # don't have to translate again target_yaw = gt_boxes_3d[:, 6] @@ -125,13 +116,11 @@ def make_ground_truth(self, gt_boxes_3d: Any, gt_cls_type_list: List[str]): self.neg_cnt += neg # return a merged target view for all objects in the ground truth and get categorical labels - # print("target.shape: ", target.shape) + sel = select_best_anchors(target) - # print("self.shape: ", sel[...,0].shape) ohe = tf.keras.utils.to_categorical(sel[..., 9], num_classes=self.nb_classes, dtype='float64') # print("self.shape: ", sel[...,0].shape) - # print("ohe.shape: ", ohe.shape) - # print("sel[8].shape: ",sel[..., 8].shape) + return sel[..., 0], sel[..., 1:4], sel[..., 4:7], sel[..., 7], sel[..., 8], ohe @@ -149,11 +138,8 @@ def __init__(self, batch_size: int, root_dir:str, npoints:int =16384, split: str random_select=random_select, gt_database_dir=gt_database_dir, aug_hard_ratio=aug_hard_ratio, **kwargs ) - # self.data_reader = data_reader self.batch_size = batch_size self.sample_id_list=self.get_sample_id_list() - # self.split = split - # print("CustomDataGenerator: " ,self.split) def get_sample(self, index): @@ -165,7 +151,6 @@ def __len__(self): def __getitem__(self, batch_id: int): file_ids = range(batch_id * self.batch_size, self.batch_size * (batch_id + 1)) - # print("inside getitem") pillars = [] voxels = [] occupancy = [] @@ -229,9 +214,7 @@ def __getitem__(self, batch_id: int): return [pillars, voxels] def on_epoch_end(self): - # print("inside epoch") if self.split=='train' or self.split =='val': - # pass self.sample_id_list=shuffle(self.sample_id_list) @@ -251,9 +234,6 @@ def __init__(self, batch_size: int, root_dir:str, npoints:int =16384, split: str # self.data_reader = data_reader self.batch_size = batch_size self.sample_id_list=self.get_sample_id_list() - # self.split = split - # self.split = split - # print("AnalyseCustomDataGenerator: " ,self.split) def _get_sample(self, index): @@ -265,7 +245,6 @@ def __len__(self): def __getitem__(self, batch_id: int): file_ids = range(batch_id * self.batch_size, self.batch_size * (batch_id + 1)) - # print("inside getitem") pillars = [] voxels = [] occupancy = [] @@ -279,23 +258,15 @@ def __getitem__(self, batch_id: int): gt_boxes3d_ = [] sample_ = [] - # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_processor" - # # Initialize and setup output directory. - # Converter = PointvizConverter(save_viz_path) - for i in file_ids: - # print(i) - # print(type(i)) sample = self._get_sample(i) # For each file, dividing the space into a x-y grid to create pillars pts_lidar = sample['calib'].rect_to_lidar(sample['pts_rect']) - # print(pts_lidar.shape) pts_input = np.concatenate((pts_lidar, sample['pts_features']), axis=1) # (N, C) gt_boxes3d_xyz = sample['calib'].rect_to_lidar(sample['gt_boxes3d'][:,:3]) - # print(gt_boxes3d_xyz.shape) gt_boxes3d = np.concatenate(( gt_boxes3d_xyz[:,0,np.newaxis], # 0 x @@ -306,44 +277,17 @@ def __getitem__(self, batch_id: int): sample['gt_boxes3d'][:,3,np.newaxis], # 5 h # same as the original label -sample['gt_boxes3d'][:,6,np.newaxis], # 6 ry ), axis=1) - - # print(type(gt_boxes3d)) - # gt_boxes3d = self.limit_yaw(gt_boxes3d) - - # bbox_params = self.convert_labels_into_point_viz_format(gt_boxes3d) - # print(bbox_params.shape) - # Converter.compile("custom_sample_{}".format(i), coors=pts_input[:,:3], intensity=pts_input[:,3], - # bbox_params=bbox_params) - - - # exit() - - # print(pts_input.shape) # Voxels are the pillar ids pillars_, voxels_ = self.make_point_pillars(pts_input) - # print(pillars_.shape, voxels_.shape) - # for i in range(10): - # print(pillars_[0,0,i,:]) - # print(np.sum(pillars_ > 0)) - # exit() - pillars.append(pillars_) voxels.append(voxels_) - # print(sample['gt_cls_type_list']) - # print("split: ", self.split) if self.split=='train' or self.split =='val': - # print(len(gt_boxes3d), ", ", len(sample['gt_cls_type_list'])) if (len(gt_boxes3d) == 0): print("file id: ", i, " has zero gt label") occupancy_, position_, size_, angle_, heading_, classification_ = self.make_ground_truth( gt_boxes3d, sample['gt_cls_type_list']) - # print(len(a)) - # if - - # print(occupancy_.shape, position_.shape, size_.shape, angle_.shape, heading_.shape, classification_.shape) - occupancy.append(occupancy_) position.append(position_) @@ -356,8 +300,6 @@ def __getitem__(self, batch_id: int): gt_boxes3d_.append(gt_boxes3d) pts_input_.append(pts_input) - # exit() - pillars = np.concatenate(pillars, axis=0) voxels = np.concatenate(voxels, axis=0) diff --git a/point_pillars_evaluation_v2_2.py b/point_pillars_evaluation_v2_2.py new file mode 100644 index 0000000..01fd5aa --- /dev/null +++ b/point_pillars_evaluation_v2_2.py @@ -0,0 +1,158 @@ + + +import os +from glob import glob +import numpy as np +import tensorflow as tf +from point_pillars_custom_processors_v2_2 import CustomDataGenerator, AnalyseCustomDataGenerator +from inference_utils_v2 import generate_bboxes_from_pred, GroundTruthGenerator +from inference_utils_v2 import focal_loss_checker, rotational_nms, generate_bboxes_from_pred_and_np_array, convert_boxes_to_list +from readers import KittiDataReader +from config_v2_2 import Parameters +from network_v2_2 import build_point_pillar_graph +from datetime import datetime + +from det3d.kitti_dataset.utils.evaluation import save_kitti_format, save_kitti_format_for_evaluation + +from point_viz.converter import PointvizConverter + +DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/" +MODEL_ROOT = "./logs_Car_Custom_Dataset_No_Early_Stopping_wo_Aug_wo_val_new_network" + +KITTI_EVALUATION_OUTPUT = os.path.join(MODEL_ROOT, "Evaluation") +if not os.path.exists(KITTI_EVALUATION_OUTPUT): + os.makedirs(KITTI_EVALUATION_OUTPUT) + +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"] = "3" + + +def limit_period(val, offset=0.5, period=np.pi): + return val - np.floor(val / period + offset) * period + +if __name__ == "__main__": + + params = Parameters() + pillar_net = build_point_pillar_graph(params) + pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) + pillar_net.summary() + + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_prediction_multiprocessing" + save_viz_path = os.path.join("/home/tan/tjtanaa/PointPillars/visualization", MODEL_ROOT.split('/')[-1]) + # Initialize and setup output directory. + Converter = PointvizConverter(save_viz_path) + + gt_database_dir = os.path.join(DATA_ROOT, "gt_database") + + validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, + npoints=20000, split='train_val_test',random_select=False, classes=list(params.classes_map.keys())) + inference_duration = [] + sample_index = 0 # has to be controlled manually to ensure that the sequence number is continuous + + for batch_idx in range(0,len(validation_gen)): + [pillars, voxels], [occupancy_, position_, size_, angle_, heading_], [pts_input, gt_boxes3d, sample] = validation_gen[batch_idx] + + start=datetime.now() + + occupancy, position, size, angle, heading = pillar_net.predict([pillars, voxels]) + + inference_duration.append( datetime.now()-start) + + classification = np.zeros(shape=np.array(occupancy).shape) + classification_ = classification + + set_boxes, confidences = [], [] + loop_range = occupancy_.shape[0] if len(occupancy_.shape) == 4 else 1 + for i in range(loop_range): + set_box, predicted_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy[i], position[i], size[i], angle[i], + heading[i], + classification[i], params.anchor_dims, occ_threshold=0.5) + + + _, decoded_gt_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy_[i], position_[i], size_[i], angle_[i], + heading_[i], + classification_[i], params.anchor_dims, occ_threshold=0.4) + + # gt_boxes3d_ = gt_boxes3d[i] + gt_boxes3d_ = decoded_gt_boxes3d + + print(gt_boxes3d_.shape) + if(len(gt_boxes3d_) == 0): + gt_bbox_params_list = [] + else: + gt_bbox_params = np.stack([gt_boxes3d_[:,3], gt_boxes3d_[:,5], gt_boxes3d_[:,4], + gt_boxes3d_[:,1], gt_boxes3d_[:,2] , + gt_boxes3d_[:,0], + gt_boxes3d_[:,6]], axis=1) + + + gt_bbox_params_list = gt_bbox_params.tolist() + # gt_bbox_params_list = [] + for k in range(len(gt_bbox_params_list)): + msg = "%.5f, %s, %.5f"%(decoded_gt_boxes3d[k,9], params.map_classes[int(decoded_gt_boxes3d[k,8])], decoded_gt_boxes3d[k,6]) + # msg = "%.5f, %.5f"%(gt_bbox_params_list[k][3],gt_bbox_params_list[k][5]) + gt_bbox_params_list[k].append("Green") + # gt_bbox_params_list[k].append("1.0") + gt_bbox_params_list[k].append(msg) + + if len(set_box) > 0: + + + # NMS + # set_box + # print("start nms") + confidence = [float(box.conf) for box in set_box] + nms_boxes = rotational_nms([set_box], [confidence], occ_threshold=0.5, nms_iou_thr=0.5) + + predicted_boxes3d_list = convert_boxes_to_list(nms_boxes) + + predicted_boxes3d = np.array(predicted_boxes3d_list[0]) + predicted_boxes3d_ = predicted_boxes3d + + print("batch_idx: ", batch_idx * params.batch_size + i, " has ", predicted_boxes3d_.shape, "predictions") + + bbox_params = np.stack([predicted_boxes3d_[:,3], predicted_boxes3d_[:,5], predicted_boxes3d_[:,4], + predicted_boxes3d_[:,1], predicted_boxes3d_[:,2] , + predicted_boxes3d_[:,0], + predicted_boxes3d_[:,6]], axis=1) + + + bbox_params_list = bbox_params.tolist() + # bbox_labels_conf = [str(predicted_boxes3d[k,9]) for k in range(predicted_boxes3d.shape[0])] + for k in range(predicted_boxes3d.shape[0]): + msg = "%.5f, %s, %.5f"%(predicted_boxes3d[k,9],params.map_classes[int(predicted_boxes3d[k,8])], predicted_boxes3d[k,6]) + bbox_params_list[k].append("Magenta") + bbox_params_list[k].append(msg) + # bbox_params_list[k].append(str(predicted_boxes3d[k,9]) + "=" + params.map_classes[int(predicted_boxes3d[k,8])]) + gt_bbox_params_list.append(bbox_params_list[k]) + + + # save as kitti format for evaluation + cur_sample_id = batch_idx * params.batch_size + i + sample_file_name = validation_gen.sample_id_list[cur_sample_id] + calib = sample[i]['calib'] + # cur_boxes3d = cur_boxes3d.cpu().numpy() + + cur_boxes3d_xyz = calib.lidar_to_rect(predicted_boxes3d[:, 0:3]) + + cur_boxes3d = np.concatenate(( + cur_boxes3d_xyz[:,0,np.newaxis], # 0 x + cur_boxes3d_xyz[:,1,np.newaxis] + predicted_boxes3d[:,5,np.newaxis] / 2, # 1 y + cur_boxes3d_xyz[:,2,np.newaxis], # 2 z + predicted_boxes3d[:,5,np.newaxis], # 3 l # same as the original label + predicted_boxes3d[:,4,np.newaxis], # 4 w # same as the original label + predicted_boxes3d[:,3,np.newaxis], # 5 h # same as the original label + -predicted_boxes3d[:,6,np.newaxis], # 6 ry + ), axis=1) + cur_scores_raw = predicted_boxes3d[:,-1] + image_shape = validation_gen.get_image_shape(sample_file_name) + labels_obj = validation_gen.get_label(sample_file_name) + classes = ['Car' for i in range(len(predicted_boxes3d))] + save_kitti_format_for_evaluation(sample_index, calib, cur_boxes3d, KITTI_EVALUATION_OUTPUT, cur_scores_raw, image_shape, classes, labels_obj) + sample_index += 1 + + coor = pts_input[i][:,[1,2,0]] + Converter.compile("evaluation_sample_{}".format(batch_idx * params.batch_size+i), coors=coor, intensity=pts_input[i][:,3], + bbox_params=gt_bbox_params_list) + # print("Average runtime speed: ", np.mean(inference_duration[20:])) + From 78c8d8f907ba364136b6615986ac80dbec81390d Mon Sep 17 00:00:00 2001 From: tjtanaa Date: Wed, 25 Nov 2020 13:40:49 +0800 Subject: [PATCH 4/4] Added (single class no augmentation) mtr training, evaluation, and prediction script --- config_mtr_v1.py | 159 ++++++++++++ config_v2_2.py | 3 +- inference_utils_mtr_v1.py | 190 ++++++++++++++ inference_utils_v2.py | 30 --- inference_utils_v2_2.py | 190 ++++++++++++++ mtr_point_pillars_evaluation_v1.py | 164 ++++++++++++ mtr_point_pillars_prediction_v1.py | 138 ++++++++++ ...y => mtr_point_pilllars_training_run_v1.py | 62 +++-- ...m_processors_v2.py => mtr_processors_v1.py | 243 +++++++++--------- network_v2.py | 4 +- network_v2_2.py | 12 +- point_pillars_custom_prediction.py | 157 ----------- point_pillars_custom_prediction_v2_2.py | 41 +-- point_pillars_custom_processors_v2_2.py | 9 +- point_pillars_evaluation_v2_2.py | 6 +- point_pillars_test.py | 70 ----- point_pillars_training_custom_run_v2_2.py | 31 ++- point_pillars_training_run.py | 73 ------ point_pillars_visualize_input.py | 3 - 19 files changed, 1057 insertions(+), 528 deletions(-) create mode 100644 config_mtr_v1.py create mode 100644 inference_utils_mtr_v1.py create mode 100644 inference_utils_v2_2.py create mode 100644 mtr_point_pillars_evaluation_v1.py create mode 100644 mtr_point_pillars_prediction_v1.py rename point_pillars_training_custom_run_v2.py => mtr_point_pilllars_training_run_v1.py (59%) rename point_pillars_custom_processors_v2.py => mtr_processors_v1.py (62%) delete mode 100644 point_pillars_custom_prediction.py delete mode 100644 point_pillars_test.py delete mode 100644 point_pillars_training_run.py diff --git a/config_mtr_v1.py b/config_mtr_v1.py new file mode 100644 index 0000000..a1207f5 --- /dev/null +++ b/config_mtr_v1.py @@ -0,0 +1,159 @@ +import numpy as np + + +class GridParameters: + x_min = -10.08 + x_max = 10.08 + x_step = 0.04 + + y_min = -10.08 #-5 + y_max = 10.08 #7.5 + y_step = 0.04 + + # z_min = -1.0 + # z_max = 3.0 + z_min = -1.0 + z_max = 6.0 + + # derived parameters + Xn_f = float(x_max - x_min) / x_step + Yn_f = float(y_max - y_min) / y_step + Xn = int(Xn_f) + Yn = int(Yn_f) + + def __init__(self, **kwargs): + super(GridParameters, self).__init__(**kwargs) + + +class DataParameters: + + # classes_map = {"Car": 0, + # "Pedestrian": 1, + # "Person_sitting": 1, + # "Cyclist": 2, + # "Truck": 3, + # "Van": 3, + # "Tram": 3, + # "Misc": 3, + # } + + # for Car and Pedestrian + # map_classes = { + # 0: "Car", + # 1: "Pedestrian" + # } + + # classes_map = {"Car": 0, + # "Pedestrian": 1, + # "Person_sitting": 1, + # # "Cyclist": 2, + # # "Truck": 3, + # # "Van": 3, + # # "Tram": 3, + # # "Misc": 3, + # } + + + # for Car only + map_classes = { + 0: "pedestrian" + } + + classes_map = {"pedestrian": 0 + } + + # # for Pedestrian only + # map_classes = { + # 0: "Pedestrian" + # } + + # classes_map = { + # "Pedestrian": 0, + # "Person_sitting": 0, + # } + + nb_classes = len(np.unique(list(classes_map.values()))) + assert nb_classes == np.max(np.unique(list(classes_map.values()))) + 1, 'Starting class indexing at zero.' + + # classes = {"Car": 0, + # "Pedestrian": 1, + # "Person_sitting": 1, + # "Cyclist": 2, + # "Truck": 3, + # "Van": 3, + # "Tram": 3, + # "Misc": 3, + # } + + # nb_classes = len(np.unique(list(classes.values()))) + # assert nb_classes == np.max(np.unique(list(classes.values()))) + 1, 'Starting class indexing at zero.' + + def __init__(self, **kwargs): + super(DataParameters, self).__init__(**kwargs) + + +class NetworkParameters: + + max_points_per_pillar = 100 + max_pillars = 12000 + nb_features = 9 + nb_channels = 64 + downscaling_factor = 2 + + # length (x), width (y), height (z), z-center, orientation + # for car and pedestrian + # anchor_dims = np.array([[3.9, 1.6, 1.56, -1, 0], + # [3.9, 1.6, 1.56, -1, np.pi/2], + # [0.8, 0.6, 1.73, -0.6, 0], + # [0.8, 0.6, 1.73, -0.6, np.pi/2], + # ], dtype=np.float32).tolist() + + # for car only + # anchor_dims = np.array([[3.9, 1.6, 1.56, -1, 0], + # [3.9, 1.6, 1.56, -1, np.pi/2]], dtype=np.float32).tolist() + + # for pedestrian only + anchor_dims = np.array([ + [0.62, 0.56, 0.7, 1.8, 0], + [0.62, 0.56, 0.7, 1.8, np.pi/2], + [0.62, 0.56, 1.5, 1.63646424, 0], + [0.62, 0.56, 1.5, 1.63646424, np.pi/2], + ], dtype=np.float32).tolist() + nb_dims = 3 + + # for car + # positive_iou_threshold = 0.6 + # negative_iou_threshold = 0.3 + + # for pedestrian + positive_iou_threshold = 0.5 + negative_iou_threshold = 0.35 + + # batch_size = 1 + num_gpus = 1 + batch_size = 4 + total_training_epochs = 160 + # iters_to_decay = 101040. # 15 * 4 * ceil(6733. / 4) --> every 15 epochs on 6733 kitti samples, cf. pillar paper + iters_to_decay = 100500 + learning_rate = 2e-4 + decay_rate = 1e-8 + L1 = 0 + L2 = 0 + alpha = 0.25 + gamma = 2.0 + # original pillars paper values + focal_weight = 1.0 # 1.0 + loc_weight = 2.0 # 2.0 + size_weight = 2.0 # 2.0 + angle_weight = 2.0 # 2.0 + heading_weight = 0.2 # 0.2 + class_weight = 0.5 # 0.2 + + def __init__(self, **kwargs): + super(NetworkParameters, self).__init__(**kwargs) + + +class Parameters(GridParameters, DataParameters, NetworkParameters): + + def __init__(self, **kwargs): + super(Parameters, self).__init__(**kwargs) diff --git a/config_v2_2.py b/config_v2_2.py index 5ad41a0..5143f05 100644 --- a/config_v2_2.py +++ b/config_v2_2.py @@ -127,7 +127,8 @@ class NetworkParameters: # negative_iou_threshold = 0.35 # batch_size = 1 - batch_size = 4 + num_gpus = 1 + batch_size = 4 total_training_epochs = 160 # iters_to_decay = 101040. # 15 * 4 * ceil(6733. / 4) --> every 15 epochs on 6733 kitti samples, cf. pillar paper iters_to_decay = 100500 diff --git a/inference_utils_mtr_v1.py b/inference_utils_mtr_v1.py new file mode 100644 index 0000000..fa91bd6 --- /dev/null +++ b/inference_utils_mtr_v1.py @@ -0,0 +1,190 @@ +import numpy as np +import cv2 as cv +from typing import List +from config_mtr_v1 import Parameters +from mtr_processors_v1 import DataProcessor + + +class BBox(tuple): + """ bounding box tuple that can easily be accessed while being compatible to cv2 rotational rects """ + + def __new__(cls, bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, bb_yaw, bb_heading, bb_cls, bb_conf): + bbx_tuple = ((float(bb_x), float(bb_y)), (float(bb_length), float(bb_width)), float(np.rad2deg(bb_yaw))) + return super(BBox, cls).__new__(cls, tuple(bbx_tuple)) + + def __init__(self, bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, bb_yaw, bb_heading, bb_cls, bb_conf): + self.x = bb_x + self.y = bb_y + self.z = bb_z + self.length = bb_length + self.width = bb_width + self.height = bb_height + self.yaw = bb_yaw + self.heading = bb_heading + self.cls = bb_cls + self.conf = bb_conf + + def __str__(self): + return "BB | Cls: %s, x: %f, y: %f, l: %f, w: %f, yaw: %f" % ( + self.cls, self.x, self.y, self.length, self.width, self.yaw) + + +def rotational_nms(set_boxes, confidences, occ_threshold=0.7, nms_iou_thr=0.5): + """ rotational NMS + set_boxes = size NSeqs list of size NDet lists of tuples. each tuple has the form ((pos, pos), (size, size), angle) + confidences = size NSeqs list of lists containing NDet floats, i.e. one per detection + """ + assert len(set_boxes) == len(confidences) and 0 < occ_threshold < 1 and 0 < nms_iou_thr < 1 + if not len(set_boxes): + return [] + assert (isinstance(set_boxes[0][0][0][0], float) or isinstance(set_boxes[0][0][0][0], int)) and \ + (isinstance(confidences[0][0], float) or isinstance(confidences[0][0], int)) + nms_boxes = [] + for boxes, confs in zip(set_boxes, confidences): + assert len(boxes) == len(confs) + indices = cv.dnn.NMSBoxesRotated(boxes, confs, occ_threshold, nms_iou_thr) + # print(indices) + indices = indices.reshape(len(indices)).tolist() + nms_boxes.append([boxes[i] for i in indices]) + return nms_boxes + + +def generate_bboxes_from_pred(occ, pos, siz, ang, hdg, clf, anchor_dims, occ_threshold=0.5): + """ Generating the bounding boxes based on the regression targets """ + + # Get only the boxes where occupancy is greater or equal threshold. + real_boxes = np.where(occ >= occ_threshold) + # Get the indices of the occupancy array + coordinates = list(zip(real_boxes[0], real_boxes[1], real_boxes[2])) + # Assign anchor dimensions as original bounding box coordinates which will eventually be changed + # according to the predicted regression targets + anchor_dims = anchor_dims + real_anchors = np.random.rand(len(coordinates), len(anchor_dims[0])) + + for i, value in enumerate(real_boxes[2]): + real_anchors[i, ...] = anchor_dims[value] + + # Change the anchor boxes based on regression targets, this is the inverse of the operations given in + # createPillarTargets function (src/PointPillars.cpp) + predicted_boxes = [] + for i, value in enumerate(coordinates): + real_diag = np.sqrt(np.square(real_anchors[i][0]) + np.square(real_anchors[i][1])) + real_x = value[0] * Parameters.x_step * Parameters.downscaling_factor + Parameters.x_min + real_y = value[1] * Parameters.y_step * Parameters.downscaling_factor + Parameters.y_min + bb_x = pos[value][0] * real_diag + real_x + bb_y = pos[value][1] * real_diag + real_y + bb_z = pos[value][2] * real_anchors[i][2] + real_anchors[i][3] + # print(position[value], real_x, real_y, real_diag) + bb_length = np.exp(siz[value][0]) * real_anchors[i][0] + bb_width = np.exp(siz[value][1]) * real_anchors[i][1] + bb_height = np.exp(siz[value][2]) * real_anchors[i][2] + bb_yaw = ang[value] + real_anchors[i][4] + # bb_yaw = -np.arcsin(np.clip(ang[value], -1, 1)) + real_anchors[i][4] + bb_heading = np.round(hdg[value]) + bb_cls = np.argmax(clf[value]) + bb_conf = occ[value] + predicted_boxes.append(BBox(bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, + bb_yaw, bb_heading, bb_cls, bb_conf)) + + + return predicted_boxes + + + +def limit_period(val, offset=0.5, period=np.pi): + return val - np.floor(val / period + offset) * period + +def inverse_yaw_element(bb_yaw): + bb_yaw -= np.pi / 2 + while bb_yaw > np.pi: + # print("larger than pi") + bb_yaw -= (np.pi * 2) + while bb_yaw < -np.pi: + # print("smaller than -pi") + bb_yaw += (np.pi * 2) + + return bb_yaw + + # if bb_yaw > np.pi /2: + # bb_yaw -= 2 * np.pi + + # bb_yaw += np.pi/2 + # return bb_yaw + +def generate_bboxes_from_pred_and_np_array(occ, pos, siz, ang, hdg, clf, anchor_dims, occ_threshold=0.5): + """ Generating the bounding boxes based on the regression targets """ + + # Get only the boxes where occupancy is greater or equal threshold. + real_boxes = np.where(occ >= occ_threshold) + # print(occ.shape) + # Get the indices of the occupancy array + coordinates = list(zip(real_boxes[0], real_boxes[1], real_boxes[2])) + # Assign anchor dimensions as original bounding box coordinates which will eventually be changed + # according to the predicted regression targets + anchor_dims = anchor_dims + real_anchors = np.random.rand(len(coordinates), len(anchor_dims[0])) + + for i, value in enumerate(real_boxes[2]): + real_anchors[i, ...] = anchor_dims[value] + + # Change the anchor boxes based on regression targets, this is the inverse of the operations given in + # createPillarTargets function (src/PointPillars.cpp) + predicted_boxes = [] + predicted_boxes_list = [] + for i, value in enumerate(coordinates): + # print("coordinate ", i) + real_diag = np.sqrt(np.square(real_anchors[i][0]) + np.square(real_anchors[i][1])) + real_x = value[0] * Parameters.x_step * Parameters.downscaling_factor + Parameters.x_min + real_y = value[1] * Parameters.y_step * Parameters.downscaling_factor + Parameters.y_min + # print("i: ", i, "\tx: ", real_x, "\ty:", real_y) + # print("i: ", i, "\tx: ", value[0], "\ty:", value[1]) + bb_x = pos[value][0] * real_diag + real_x + bb_y = pos[value][1] * real_diag + real_y + bb_z = pos[value][2] * real_anchors[i][2] + real_anchors[i][3] + # print(position[value], real_x, real_y, real_diag) + bb_length = np.exp(siz[value][0]) * real_anchors[i][0] + bb_width = np.exp(siz[value][1]) * real_anchors[i][1] + bb_height = np.exp(siz[value][2]) * real_anchors[i][2] + bb_heading = np.round(hdg[value]) + bb_yaw = ang[value] + real_anchors[i][4] + # if np.int32(bb_heading) == 0: + # bb_yaw -= np.pi + + bb_cls = np.argmax(clf[value]) + bb_conf = occ[value] + predicted_boxes.append(BBox(bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, + bb_yaw, bb_heading, bb_cls, bb_conf)) + predicted_boxes_list.append([bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, + bb_yaw, bb_heading, bb_cls, bb_conf]) + + return predicted_boxes, np.array(predicted_boxes_list) + +def convert_boxes_to_list(set_boxes): + # (B, N) + batch_predicted_boxes_list = [] + for batch_idx in range(len(set_boxes)): + predicted_boxes_list = [] + + for box in set_boxes[batch_idx]: + + predicted_boxes_list.append([box.x, box.y, box.z, box.length, box.width, box.height, + box.yaw, box.heading, box.cls, box.conf]) + + batch_predicted_boxes_list.append(predicted_boxes_list) + return batch_predicted_boxes_list + + +def focal_loss_checker(y_true, y_pred, n_occs=-1): + y_true = np.stack(np.where(y_true == 1)) + if n_occs == -1: + n_occs = y_true.shape[1] + occ_thr = np.sort(y_pred.flatten())[-n_occs] + y_pred = np.stack(np.where(y_pred >= occ_thr)) + p = 0 + for gt in range(y_true.shape[1]): + for pr in range(y_pred.shape[1]): + if np.all(y_true[:, gt] == y_pred[:, pr]): + p += 1 + break + print("#matched gt: ", p, " #unmatched gt: ", y_true.shape[1] - p, " #unmatched pred: ", y_pred.shape[1] - p, + " occupancy threshold: ", occ_thr) diff --git a/inference_utils_v2.py b/inference_utils_v2.py index 3d5eb4a..a34441a 100644 --- a/inference_utils_v2.py +++ b/inference_utils_v2.py @@ -2,7 +2,6 @@ import cv2 as cv from typing import List from config_v2 import Parameters -from readers import DataReader from point_pillars_custom_processors_v2 import DataProcessor @@ -174,35 +173,6 @@ def convert_boxes_to_list(set_boxes): batch_predicted_boxes_list.append(predicted_boxes_list) return batch_predicted_boxes_list -class GroundTruthGenerator(DataProcessor): - """ Multiprocessing-safe data generator for training, validation or testing, without fancy augmentation """ - - def __init__(self, data_reader: DataReader, label_files: List[str], calibration_files: List[str] = None, - network_format: bool = False): - super(GroundTruthGenerator, self).__init__() - self.data_reader = data_reader - self.label_files = label_files - self.calibration_files = calibration_files - self.network_format = network_format - - def __len__(self): - return len(self.label_files) - - def __getitem__(self, file_id: int): - label = self.data_reader.read_label(self.label_files[file_id]) - R, t = self.data_reader.read_calibration(self.calibration_files[file_id]) - label_transformed = self.transform_labels_into_lidar_coordinates(label, R, t) - if self.network_format: - occupancy, position, size, angle, heading, classification = self.make_ground_truth(label_transformed) - occupancy = np.array(occupancy) - position = np.array(position) - size = np.array(size) - angle = np.array(angle) - heading = np.array(heading) - classification = np.array(classification) - return [occupancy, position, size, angle, heading, classification] - return label_transformed - def focal_loss_checker(y_true, y_pred, n_occs=-1): y_true = np.stack(np.where(y_true == 1)) diff --git a/inference_utils_v2_2.py b/inference_utils_v2_2.py new file mode 100644 index 0000000..e024624 --- /dev/null +++ b/inference_utils_v2_2.py @@ -0,0 +1,190 @@ +import numpy as np +import cv2 as cv +from typing import List +from config_v2_2 import Parameters +from point_pillars_custom_processors_v2_2 import DataProcessor + + +class BBox(tuple): + """ bounding box tuple that can easily be accessed while being compatible to cv2 rotational rects """ + + def __new__(cls, bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, bb_yaw, bb_heading, bb_cls, bb_conf): + bbx_tuple = ((float(bb_x), float(bb_y)), (float(bb_length), float(bb_width)), float(np.rad2deg(bb_yaw))) + return super(BBox, cls).__new__(cls, tuple(bbx_tuple)) + + def __init__(self, bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, bb_yaw, bb_heading, bb_cls, bb_conf): + self.x = bb_x + self.y = bb_y + self.z = bb_z + self.length = bb_length + self.width = bb_width + self.height = bb_height + self.yaw = bb_yaw + self.heading = bb_heading + self.cls = bb_cls + self.conf = bb_conf + + def __str__(self): + return "BB | Cls: %s, x: %f, y: %f, l: %f, w: %f, yaw: %f" % ( + self.cls, self.x, self.y, self.length, self.width, self.yaw) + + +def rotational_nms(set_boxes, confidences, occ_threshold=0.7, nms_iou_thr=0.5): + """ rotational NMS + set_boxes = size NSeqs list of size NDet lists of tuples. each tuple has the form ((pos, pos), (size, size), angle) + confidences = size NSeqs list of lists containing NDet floats, i.e. one per detection + """ + assert len(set_boxes) == len(confidences) and 0 < occ_threshold < 1 and 0 < nms_iou_thr < 1 + if not len(set_boxes): + return [] + assert (isinstance(set_boxes[0][0][0][0], float) or isinstance(set_boxes[0][0][0][0], int)) and \ + (isinstance(confidences[0][0], float) or isinstance(confidences[0][0], int)) + nms_boxes = [] + for boxes, confs in zip(set_boxes, confidences): + assert len(boxes) == len(confs) + indices = cv.dnn.NMSBoxesRotated(boxes, confs, occ_threshold, nms_iou_thr) + print(indices) + indices = indices.reshape(len(indices)).tolist() + nms_boxes.append([boxes[i] for i in indices]) + return nms_boxes + + +def generate_bboxes_from_pred(occ, pos, siz, ang, hdg, clf, anchor_dims, occ_threshold=0.5): + """ Generating the bounding boxes based on the regression targets """ + + # Get only the boxes where occupancy is greater or equal threshold. + real_boxes = np.where(occ >= occ_threshold) + # Get the indices of the occupancy array + coordinates = list(zip(real_boxes[0], real_boxes[1], real_boxes[2])) + # Assign anchor dimensions as original bounding box coordinates which will eventually be changed + # according to the predicted regression targets + anchor_dims = anchor_dims + real_anchors = np.random.rand(len(coordinates), len(anchor_dims[0])) + + for i, value in enumerate(real_boxes[2]): + real_anchors[i, ...] = anchor_dims[value] + + # Change the anchor boxes based on regression targets, this is the inverse of the operations given in + # createPillarTargets function (src/PointPillars.cpp) + predicted_boxes = [] + for i, value in enumerate(coordinates): + real_diag = np.sqrt(np.square(real_anchors[i][0]) + np.square(real_anchors[i][1])) + real_x = value[0] * Parameters.x_step * Parameters.downscaling_factor + Parameters.x_min + real_y = value[1] * Parameters.y_step * Parameters.downscaling_factor + Parameters.y_min + bb_x = pos[value][0] * real_diag + real_x + bb_y = pos[value][1] * real_diag + real_y + bb_z = pos[value][2] * real_anchors[i][2] + real_anchors[i][3] + # print(position[value], real_x, real_y, real_diag) + bb_length = np.exp(siz[value][0]) * real_anchors[i][0] + bb_width = np.exp(siz[value][1]) * real_anchors[i][1] + bb_height = np.exp(siz[value][2]) * real_anchors[i][2] + bb_yaw = ang[value] + real_anchors[i][4] + # bb_yaw = -np.arcsin(np.clip(ang[value], -1, 1)) + real_anchors[i][4] + bb_heading = np.round(hdg[value]) + bb_cls = np.argmax(clf[value]) + bb_conf = occ[value] + predicted_boxes.append(BBox(bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, + bb_yaw, bb_heading, bb_cls, bb_conf)) + + + return predicted_boxes + + + +def limit_period(val, offset=0.5, period=np.pi): + return val - np.floor(val / period + offset) * period + +def inverse_yaw_element(bb_yaw): + bb_yaw -= np.pi / 2 + while bb_yaw > np.pi: + # print("larger than pi") + bb_yaw -= (np.pi * 2) + while bb_yaw < -np.pi: + # print("smaller than -pi") + bb_yaw += (np.pi * 2) + + return bb_yaw + + # if bb_yaw > np.pi /2: + # bb_yaw -= 2 * np.pi + + # bb_yaw += np.pi/2 + # return bb_yaw + +def generate_bboxes_from_pred_and_np_array(occ, pos, siz, ang, hdg, clf, anchor_dims, occ_threshold=0.5): + """ Generating the bounding boxes based on the regression targets """ + + # Get only the boxes where occupancy is greater or equal threshold. + real_boxes = np.where(occ >= occ_threshold) + # print(occ.shape) + # Get the indices of the occupancy array + coordinates = list(zip(real_boxes[0], real_boxes[1], real_boxes[2])) + # Assign anchor dimensions as original bounding box coordinates which will eventually be changed + # according to the predicted regression targets + anchor_dims = anchor_dims + real_anchors = np.random.rand(len(coordinates), len(anchor_dims[0])) + + for i, value in enumerate(real_boxes[2]): + real_anchors[i, ...] = anchor_dims[value] + + # Change the anchor boxes based on regression targets, this is the inverse of the operations given in + # createPillarTargets function (src/PointPillars.cpp) + predicted_boxes = [] + predicted_boxes_list = [] + for i, value in enumerate(coordinates): + # print("coordinate ", i) + real_diag = np.sqrt(np.square(real_anchors[i][0]) + np.square(real_anchors[i][1])) + real_x = value[0] * Parameters.x_step * Parameters.downscaling_factor + Parameters.x_min + real_y = value[1] * Parameters.y_step * Parameters.downscaling_factor + Parameters.y_min + # print("i: ", i, "\tx: ", real_x, "\ty:", real_y) + # print("i: ", i, "\tx: ", value[0], "\ty:", value[1]) + bb_x = pos[value][0] * real_diag + real_x + bb_y = pos[value][1] * real_diag + real_y + bb_z = pos[value][2] * real_anchors[i][2] + real_anchors[i][3] + # print(position[value], real_x, real_y, real_diag) + bb_length = np.exp(siz[value][0]) * real_anchors[i][0] + bb_width = np.exp(siz[value][1]) * real_anchors[i][1] + bb_height = np.exp(siz[value][2]) * real_anchors[i][2] + bb_heading = np.round(hdg[value]) + bb_yaw = ang[value] + real_anchors[i][4] + # if np.int32(bb_heading) == 0: + # bb_yaw -= np.pi + + bb_cls = np.argmax(clf[value]) + bb_conf = occ[value] + predicted_boxes.append(BBox(bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, + bb_yaw, bb_heading, bb_cls, bb_conf)) + predicted_boxes_list.append([bb_x, bb_y, bb_z, bb_length, bb_width, bb_height, + bb_yaw, bb_heading, bb_cls, bb_conf]) + + return predicted_boxes, np.array(predicted_boxes_list) + +def convert_boxes_to_list(set_boxes): + # (B, N) + batch_predicted_boxes_list = [] + for batch_idx in range(len(set_boxes)): + predicted_boxes_list = [] + + for box in set_boxes[batch_idx]: + + predicted_boxes_list.append([box.x, box.y, box.z, box.length, box.width, box.height, + box.yaw, box.heading, box.cls, box.conf]) + + batch_predicted_boxes_list.append(predicted_boxes_list) + return batch_predicted_boxes_list + + +def focal_loss_checker(y_true, y_pred, n_occs=-1): + y_true = np.stack(np.where(y_true == 1)) + if n_occs == -1: + n_occs = y_true.shape[1] + occ_thr = np.sort(y_pred.flatten())[-n_occs] + y_pred = np.stack(np.where(y_pred >= occ_thr)) + p = 0 + for gt in range(y_true.shape[1]): + for pr in range(y_pred.shape[1]): + if np.all(y_true[:, gt] == y_pred[:, pr]): + p += 1 + break + print("#matched gt: ", p, " #unmatched gt: ", y_true.shape[1] - p, " #unmatched pred: ", y_pred.shape[1] - p, + " occupancy threshold: ", occ_thr) diff --git a/mtr_point_pillars_evaluation_v1.py b/mtr_point_pillars_evaluation_v1.py new file mode 100644 index 0000000..2c0acd5 --- /dev/null +++ b/mtr_point_pillars_evaluation_v1.py @@ -0,0 +1,164 @@ + + +import os +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"] = "1" + +from glob import glob +import numpy as np +import tensorflow as tf +from mtr_processors_v1 import CustomDataGenerator, AnalyseCustomDataGenerator +from inference_utils_mtr_v1 import generate_bboxes_from_pred +from inference_utils_mtr_v1 import focal_loss_checker, rotational_nms, generate_bboxes_from_pred_and_np_array, convert_boxes_to_list +# from readers import KittiDataReader +from config_mtr_v1 import Parameters +from network_v2_2 import build_point_pillar_graph +from datetime import datetime + +from det3d.kitti_dataset.utils.evaluation import save_kitti_format, save_kitti_format_for_evaluation + +from point_viz.converter import PointvizConverter + +DATA_ROOT = "/media/data3/tjtanaa/Project4-MTR" # TODO make main arg +MODEL_ROOT = "./logs_Pedestrian_MTR_No_Early_Stopping_wo_Aug_with_val" +PC_STATISTICS_PATH = "/home/tan/tjtanaa/det3d/det3d/mtr_dataset/point_cloud_statistics" + +occ_threshold = 0.5 +nms_iou_thr=0.5 +# KITTI_EVALUATION_OUTPUT = os.path.join(MODEL_ROOT, "Evaluation") +# if not os.path.exists(KITTI_EVALUATION_OUTPUT): +# os.makedirs(KITTI_EVALUATION_OUTPUT) + + +def limit_period(val, offset=0.5, period=np.pi): + return val - np.floor(val / period + offset) * period + +if __name__ == "__main__": + + params = Parameters() + pillar_net = build_point_pillar_graph(params) + pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) + pillar_net.summary() + + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_prediction_multiprocessing" + save_viz_path = os.path.join("/home/tan/tjtanaa/PointPillars/visualization", MODEL_ROOT.split('/')[-1]) + # Initialize and setup output directory. + Converter = PointvizConverter(save_viz_path) + + gt_database_dir = os.path.join(DATA_ROOT, "gt_database") + + validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, + point_cloud_statistics_path=PC_STATISTICS_PATH, + random_select=False, + npoints=8000, split='test', classes=list(params.classes_map.keys())) + inference_duration = [] + sample_index = 0 # has to be controlled manually to ensure that the sequence number is continuous + + for batch_idx in range(0,len(validation_gen)): + [pillars, voxels], [occupancy_, position_, size_, angle_, heading_], [pts_input, gt_boxes3d] = validation_gen[batch_idx] + + start=datetime.now() + + occupancy, position, size, angle, heading = pillar_net.predict([pillars, voxels]) + + inference_duration.append( datetime.now()-start) + + classification = np.zeros(shape=np.array(occupancy).shape) + classification_ = classification + + set_boxes, confidences = [], [] + loop_range = occupancy_.shape[0] if len(occupancy_.shape) == 4 else 1 + print("batch_idx ", batch_idx, " has ", loop_range, "batch sample", " with occupancy sum: ", np.sum(occupancy)) + for i in range(loop_range): + set_box, predicted_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy[i], position[i], size[i], angle[i], + heading[i], + classification[i], params.anchor_dims, occ_threshold=occ_threshold) + + + _, decoded_gt_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy_[i], position_[i], size_[i], angle_[i], + heading_[i], + classification_[i], params.anchor_dims, occ_threshold=occ_threshold) + + # gt_boxes3d_ = gt_boxes3d[i] + gt_boxes3d_ = decoded_gt_boxes3d + + # print(gt_boxes3d_.shape) + if(len(gt_boxes3d_) == 0): + gt_bbox_params_list = [] + else: + gt_bbox_params = np.stack([gt_boxes3d_[:,3], gt_boxes3d_[:,5], gt_boxes3d_[:,4], + gt_boxes3d_[:,1], gt_boxes3d_[:,2] , + gt_boxes3d_[:,0], + gt_boxes3d_[:,6]], axis=1) + + + gt_bbox_params_list = gt_bbox_params.tolist() + # gt_bbox_params_list = [] + for k in range(len(gt_bbox_params_list)): + msg = "%.5f, %s, %.5f"%(decoded_gt_boxes3d[k,9], params.map_classes[int(decoded_gt_boxes3d[k,8])], decoded_gt_boxes3d[k,6]) + # msg = "%.5f, %.5f"%(gt_bbox_params_list[k][3],gt_bbox_params_list[k][5]) + gt_bbox_params_list[k].append("Green") + # gt_bbox_params_list[k].append("1.0") + gt_bbox_params_list[k].append(msg) + + if len(set_box) > 0: + + + # NMS + # set_box + # print("start nms") + confidence = [float(box.conf) for box in set_box] + nms_boxes = rotational_nms([set_box], [confidence], occ_threshold=occ_threshold, nms_iou_thr=nms_iou_thr) + + predicted_boxes3d_list = convert_boxes_to_list(nms_boxes) + + predicted_boxes3d = np.array(predicted_boxes3d_list[0]) + predicted_boxes3d_ = predicted_boxes3d + + print("sample_: ", batch_idx * params.batch_size + i, " has ", predicted_boxes3d_.shape, "predictions") + + bbox_params = np.stack([predicted_boxes3d_[:,3], predicted_boxes3d_[:,5], predicted_boxes3d_[:,4], + predicted_boxes3d_[:,1], predicted_boxes3d_[:,2] , + predicted_boxes3d_[:,0], + predicted_boxes3d_[:,6]], axis=1) + # print("z ", predicted_boxes3d[:,2]) + + bbox_params_list = bbox_params.tolist() + # bbox_labels_conf = [str(predicted_boxes3d[k,9]) for k in range(predicted_boxes3d.shape[0])] + for k in range(predicted_boxes3d.shape[0]): + msg = "%.5f, %s, %.5f"%(predicted_boxes3d[k,9],params.map_classes[int(predicted_boxes3d[k,8])], predicted_boxes3d[k,6]) + bbox_params_list[k].append("Magenta") + bbox_params_list[k].append(msg) + # bbox_params_list[k].append(str(predicted_boxes3d[k,9]) + "=" + params.map_classes[int(predicted_boxes3d[k,8])]) + gt_bbox_params_list.append(bbox_params_list[k]) + + + # # save as kitti format for evaluation + # cur_sample_id = batch_idx * params.batch_size + i + # sample_file_name = validation_gen.sample_id_list[cur_sample_id] + # calib = sample[i]['calib'] + # # cur_boxes3d = cur_boxes3d.cpu().numpy() + + # cur_boxes3d_xyz = calib.lidar_to_rect(predicted_boxes3d[:, 0:3]) + + # cur_boxes3d = np.concatenate(( + # cur_boxes3d_xyz[:,0,np.newaxis], # 0 x + # cur_boxes3d_xyz[:,1,np.newaxis] + predicted_boxes3d[:,5,np.newaxis] / 2, # 1 y + # cur_boxes3d_xyz[:,2,np.newaxis], # 2 z + # predicted_boxes3d[:,5,np.newaxis], # 3 l # same as the original label + # predicted_boxes3d[:,4,np.newaxis], # 4 w # same as the original label + # predicted_boxes3d[:,3,np.newaxis], # 5 h # same as the original label + # -predicted_boxes3d[:,6,np.newaxis], # 6 ry + # ), axis=1) + # cur_scores_raw = predicted_boxes3d[:,-1] + # image_shape = validation_gen.get_image_shape(sample_file_name) + # labels_obj = validation_gen.get_label(sample_file_name) + # classes = ['Car' for i in range(len(predicted_boxes3d))] + # save_kitti_format_for_evaluation(sample_index, calib, cur_boxes3d, KITTI_EVALUATION_OUTPUT, cur_scores_raw, image_shape, classes, labels_obj) + sample_index += 1 + + coor = pts_input[i][:,[1,2,0]] + Converter.compile("evaluation_sample_{}".format(batch_idx * params.batch_size+i), coors=coor, intensity=pts_input[i][:,3], + bbox_params=gt_bbox_params_list) + # print("Average runtime speed: ", np.mean(inference_duration[20:])) + diff --git a/mtr_point_pillars_prediction_v1.py b/mtr_point_pillars_prediction_v1.py new file mode 100644 index 0000000..70623d6 --- /dev/null +++ b/mtr_point_pillars_prediction_v1.py @@ -0,0 +1,138 @@ + + +import os +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"] = "1" + +from glob import glob +import numpy as np +import tensorflow as tf +from mtr_processors_v1 import CustomDataGenerator, AnalyseCustomDataGenerator +from inference_utils_mtr_v1 import generate_bboxes_from_pred +from inference_utils_mtr_v1 import focal_loss_checker, rotational_nms, generate_bboxes_from_pred_and_np_array, convert_boxes_to_list +# from readers import KittiDataReader +from config_mtr_v1 import Parameters +from network_v2_2 import build_point_pillar_graph +from datetime import datetime + +from det3d.kitti_dataset.utils.evaluation import save_kitti_format, save_kitti_format_for_evaluation + +from point_viz.converter import PointvizConverter + +DATA_ROOT = "/media/data3/tjtanaa/Project4-MTR" # TODO make main arg +MODEL_ROOT = "./logs_Pedestrian_MTR_No_Early_Stopping_wo_Aug_with_val" +PC_STATISTICS_PATH = "/home/tan/tjtanaa/det3d/det3d/mtr_dataset/point_cloud_statistics" + +occ_threshold = 0.3 +nms_iou_thr=0.1 +# KITTI_EVALUATION_OUTPUT = os.path.join(MODEL_ROOT, "Evaluation") +# if not os.path.exists(KITTI_EVALUATION_OUTPUT): +# os.makedirs(KITTI_EVALUATION_OUTPUT) + + +def limit_period(val, offset=0.5, period=np.pi): + return val - np.floor(val / period + offset) * period + +if __name__ == "__main__": + + params = Parameters() + pillar_net = build_point_pillar_graph(params) + pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) + pillar_net.summary() + + # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_prediction_multiprocessing" + save_viz_path = os.path.join("/home/tan/tjtanaa/PointPillars/visualization", MODEL_ROOT.split('/')[-1]) + # Initialize and setup output directory. + Converter = PointvizConverter(save_viz_path) + + gt_database_dir = os.path.join(DATA_ROOT, "gt_database") + + real_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, + point_cloud_statistics_path=PC_STATISTICS_PATH, + random_select=False, + npoints=8000, split='real', classes=list(params.classes_map.keys())) + inference_duration = [] + sample_index = 0 # has to be controlled manually to ensure that the sequence number is continuous + print("total number of batch: ", len(real_gen)) + for batch_idx in range(0, len(real_gen)): + [pillars, voxels], [pts_input] = real_gen[batch_idx] + + start=datetime.now() + + occupancy, position, size, angle, heading = pillar_net.predict([pillars, voxels]) + + inference_duration.append( datetime.now()-start) + + classification = np.zeros(shape=np.array(occupancy).shape) + classification_ = classification + + set_boxes, confidences = [], [] + loop_range = occupancy.shape[0] if len(occupancy.shape) == 4 else 1 + print("batch_idx ", batch_idx, " has ", loop_range, "batch sample", " with occupancy sum: ", np.sum(occupancy)) + for i in range(loop_range): + set_box, predicted_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy[i], position[i], size[i], angle[i], + heading[i], + classification[i], params.anchor_dims, occ_threshold=occ_threshold) + + gt_bbox_params_list = [] + if len(set_box) > 0: + + + # NMS + # set_box + # print("start nms") + confidence = [float(box.conf) for box in set_box] + nms_boxes = rotational_nms([set_box], [confidence], occ_threshold=occ_threshold, nms_iou_thr=nms_iou_thr) + + predicted_boxes3d_list = convert_boxes_to_list(nms_boxes) + + predicted_boxes3d = np.array(predicted_boxes3d_list[0]) + predicted_boxes3d_ = predicted_boxes3d + + print("sample_: ", batch_idx * params.batch_size + i, " has ", predicted_boxes3d_.shape, "predictions") + + bbox_params = np.stack([predicted_boxes3d_[:,3], predicted_boxes3d_[:,5], predicted_boxes3d_[:,4], + predicted_boxes3d_[:,1], predicted_boxes3d_[:,2] , + predicted_boxes3d_[:,0], + predicted_boxes3d_[:,6]], axis=1) + # print("z ", predicted_boxes3d[:,2]) + + bbox_params_list = bbox_params.tolist() + # bbox_labels_conf = [str(predicted_boxes3d[k,9]) for k in range(predicted_boxes3d.shape[0])] + for k in range(predicted_boxes3d.shape[0]): + msg = "%.5f, %s, %.5f"%(predicted_boxes3d[k,9],params.map_classes[int(predicted_boxes3d[k,8])], predicted_boxes3d[k,6]) + bbox_params_list[k].append("Magenta") + bbox_params_list[k].append(msg) + # bbox_params_list[k].append(str(predicted_boxes3d[k,9]) + "=" + params.map_classes[int(predicted_boxes3d[k,8])]) + gt_bbox_params_list.append(bbox_params_list[k]) + + + # # save as kitti format for evaluation + # cur_sample_id = batch_idx * params.batch_size + i + # sample_file_name = validation_gen.sample_id_list[cur_sample_id] + # calib = sample[i]['calib'] + # # cur_boxes3d = cur_boxes3d.cpu().numpy() + + # cur_boxes3d_xyz = calib.lidar_to_rect(predicted_boxes3d[:, 0:3]) + + # cur_boxes3d = np.concatenate(( + # cur_boxes3d_xyz[:,0,np.newaxis], # 0 x + # cur_boxes3d_xyz[:,1,np.newaxis] + predicted_boxes3d[:,5,np.newaxis] / 2, # 1 y + # cur_boxes3d_xyz[:,2,np.newaxis], # 2 z + # predicted_boxes3d[:,5,np.newaxis], # 3 l # same as the original label + # predicted_boxes3d[:,4,np.newaxis], # 4 w # same as the original label + # predicted_boxes3d[:,3,np.newaxis], # 5 h # same as the original label + # -predicted_boxes3d[:,6,np.newaxis], # 6 ry + # ), axis=1) + # cur_scores_raw = predicted_boxes3d[:,-1] + # image_shape = validation_gen.get_image_shape(sample_file_name) + # labels_obj = validation_gen.get_label(sample_file_name) + # classes = ['Car' for i in range(len(predicted_boxes3d))] + # save_kitti_format_for_evaluation(sample_index, calib, cur_boxes3d, KITTI_EVALUATION_OUTPUT, cur_scores_raw, image_shape, classes, labels_obj) + # sample_index += 1 + + coor = pts_input[i][:,[1,2,0]] + Converter.compile("real_sample_{}".format(batch_idx * params.batch_size+i), coors=coor, intensity=pts_input[i][:,3], + bbox_params=gt_bbox_params_list) + # print("Average runtime speed: ", np.mean(inference_duration[20:])) + diff --git a/point_pillars_training_custom_run_v2.py b/mtr_point_pilllars_training_run_v1.py similarity index 59% rename from point_pillars_training_custom_run_v2.py rename to mtr_point_pilllars_training_run_v1.py index 1293f32..947ec4b 100644 --- a/point_pillars_training_custom_run_v2.py +++ b/mtr_point_pilllars_training_run_v1.py @@ -1,53 +1,64 @@ + import os +os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" +os.environ["CUDA_VISIBLE_DEVICES"] = "2" import time import numpy as np import tensorflow as tf from glob import glob # from config import Parameters -from config_v2 import Parameters -from loss import PointPillarNetworkLoss -from network import build_point_pillar_graph -# from processors import SimpleDataGenerator -# from custom_processors import CustomDataGenerator -from point_pillars_custom_processors_v2 import CustomDataGenerator -from readers import KittiDataReader +from config_mtr_v1 import Parameters +from loss_v2_2 import PointPillarNetworkLoss +from network_v2_2 import build_point_pillar_graph +from mtr_processors_v1 import CustomDataGenerator +# from readers import KittiDataReader + +from det3d.mtr_dataset import MTRDatasetBase # from point_viz.converter import PointvizConverter tf.get_logger().setLevel("ERROR") -# DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/KITTI/object/training" # TODO make main arg -DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/" # TODO make main arg -# MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_single_process" -# MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_No_Early_Stopping_wo_Aug_wo_val" -MODEL_ROOT = "./logs_Pedestrian_Custom_Dataset_No_Early_Stopping_wo_Aug_wo_val" -os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" -os.environ["CUDA_VISIBLE_DEVICES"] = "1" -if __name__ == "__main__": +# DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/KITTI/object/training" # TODO make main arg +DATA_ROOT = "/media/data3/tjtanaa/Project4-MTR" # TODO make main arg +MODEL_ROOT = "./logs_Pedestrian_MTR_No_Early_Stopping_wo_Aug_with_val" +PC_STATISTICS_PATH = "/home/tan/tjtanaa/det3d/det3d/mtr_dataset/point_cloud_statistics" +# from tensorflow.python.client import device_lib +# print(device_lib.list_local_devices()) +# exit() +if __name__ == "__main__": params = Parameters() + # gpus = tf.config.experimental.list_physical_devices('GPU') + pillar_net = build_point_pillar_graph(params) # pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) - + pillar_net.summary() + # exit() loss = PointPillarNetworkLoss(params) optimizer = tf.keras.optimizers.Adam(lr=params.learning_rate, decay=params.decay_rate) pillar_net.compile(optimizer, loss=loss.losses()) + - # gt_database_dir = os.path.join(DATA_ROOT, "gt_database") - gt_database_dir = None + train_dataset = MTRDatasetBase(DATA_ROOT, 'train', PC_STATISTICS_PATH) + + gt_database_dir = os.path.join(DATA_ROOT, "gt_database") + # gt_database_dir = None training_gen = CustomDataGenerator(batch_size=params.batch_size,root_dir = DATA_ROOT, - npoints=20000, split='train_val', classes=list(params.classes_map.keys()), - random_select=True, gt_database_dir=None, aug_hard_ratio=0.7) + point_cloud_statistics_path=PC_STATISTICS_PATH, + npoints=8000, split='train', classes=list(params.classes_map.keys()), + random_select=True, gt_database_dir=gt_database_dir, aug_hard_ratio=0.7) - # validation_gen = CustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, - # npoints=20000, split='val', classes=list(params.classes_map.keys())) + validation_gen = CustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, + point_cloud_statistics_path=PC_STATISTICS_PATH, + npoints=8000, split='test', classes=list(params.classes_map.keys())) # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_processor" @@ -68,7 +79,7 @@ callbacks = [ tf.keras.callbacks.TensorBoard(log_dir=log_dir), tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(log_dir, "model.h5"), - monitor='loss', save_best_only=True), + monitor='val_loss', save_best_only=True), tf.keras.callbacks.LearningRateScheduler( lambda epoch, lr: lr * 0.8 if ((epoch % epoch_to_decay == 0) and (epoch != 0)) else lr, verbose=True), # tf.keras.callbacks.EarlyStopping(patience=20, monitor='val_loss'), @@ -76,13 +87,16 @@ try: pillar_net.fit(training_gen, - # validation_data = validation_gen, + validation_data = validation_gen, steps_per_epoch=len(training_gen), callbacks=callbacks, use_multiprocessing=True, + max_queue_size = 16, epochs=int(params.total_training_epochs), workers=6) except KeyboardInterrupt: model_str = "interrupted_%s.h5" % time.strftime("%Y%m%d-%H%M%S") pillar_net.save(os.path.join(log_dir, model_str)) print("Interrupt. Saving output to %s" % os.path.join(os.getcwd(), log_dir[1:], model_str)) + + diff --git a/point_pillars_custom_processors_v2.py b/mtr_processors_v1.py similarity index 62% rename from point_pillars_custom_processors_v2.py rename to mtr_processors_v1.py index ae2b17c..3a83f07 100644 --- a/point_pillars_custom_processors_v2.py +++ b/mtr_processors_v1.py @@ -4,17 +4,21 @@ from tensorflow.python.keras.utils.data_utils import Sequence -from config_v2 import Parameters +from config_mtr_v1 import Parameters # from point_pillars import createPillars, createPillarsTarget from point_pillars_v2 import createPillars, createPillarsTarget # from readers import DataReader, Label3D from sklearn.utils import shuffle import sys +from det3d.mtr_dataset import MTRDatasetBase +from det3d.mtr_dataset.utils import mtr_utils -from det3d.pc_kitti_dataset import PCKittiAugmentedDataset +# from point_viz.converter import PointvizConverter +from datetime import datetime -from point_viz.converter import PointvizConverter +def limit_period(val, offset=0.5, period=np.pi): + return val - np.floor(val / period + offset) * period def select_best_anchors(arr): dims = np.indices(arr.shape[1:]) @@ -40,7 +44,7 @@ def make_point_pillars(self, points: np.ndarray): assert points.ndim == 2 assert points.shape[1] == 4 assert points.dtype == np.float32 - + # start=datetime.now() pillars, indices = createPillars(points, self.max_points_per_pillar, self.max_pillars, @@ -53,6 +57,7 @@ def make_point_pillars(self, points: np.ndarray): self.z_min, self.z_max, False) + # print("Create pillar takes : ", datetime.now()-start) return pillars, indices @@ -70,9 +75,6 @@ def make_ground_truth(self, gt_boxes_3d: Any, gt_cls_type_list: List[str]): # filter labels by classes (cars, pedestrians and Trams) # Label has 4 properties (Classification (0th index of labels file), # centroid coordinates, dimensions, yaw) - # labels = list(filter(lambda x: x.classification in self.classes, labels)) - - if len(gt_boxes_3d) == 0: pX, pY = int(self.Xn / self.downscaling_factor), int(self.Yn / self.downscaling_factor) @@ -82,12 +84,6 @@ def make_ground_truth(self, gt_boxes_3d: Any, gt_cls_type_list: List[str]): np.zeros((pX, pY, a), dtype='float32'), np.zeros((pX, pY, a, self.nb_classes), dtype='float64') # For each label file, generate these properties except for the Don't care class - - # target_positions = np.array([label.centroid for label in labels], dtype=np.float32) - # target_dimension = np.array([label.dimension for label in labels], dtype=np.float32) - # target_yaw = np.array([label.yaw for label in labels], dtype=np.float32) - # target_class = np.array([self.classes[label.classification] for label in labels], dtype=np.int32) - target_positions = gt_boxes_3d[:,:3] target_dimension = gt_boxes_3d[:,3:6] # don't have to translate again target_yaw = gt_boxes_3d[:, 6] @@ -101,6 +97,8 @@ def make_ground_truth(self, gt_boxes_3d: Any, gt_cls_type_list: List[str]): assert np.all(target_yaw >= -np.pi) & np.all(target_yaw <= np.pi) assert len(target_positions) == len(target_dimension) == len(target_yaw) == len(target_class) + # start=datetime.now() + target, pos, neg = createPillarsTarget(target_positions, target_dimension, target_yaw, @@ -121,39 +119,41 @@ def make_ground_truth(self, gt_boxes_3d: Any, gt_cls_type_list: List[str]): self.z_min, self.z_max, False) + + # print("Create target takes : ", datetime.now()-start) + self.pos_cnt += pos self.neg_cnt += neg # return a merged target view for all objects in the ground truth and get categorical labels - # print("target.shape: ", target.shape) + sel = select_best_anchors(target) - # print("self.shape: ", sel[...,0].shape) ohe = tf.keras.utils.to_categorical(sel[..., 9], num_classes=self.nb_classes, dtype='float64') # print("self.shape: ", sel[...,0].shape) - # print("ohe.shape: ", ohe.shape) - # print("sel[8].shape: ",sel[..., 8].shape) + return sel[..., 0], sel[..., 1:4], sel[..., 4:7], sel[..., 7], sel[..., 8], ohe -class CustomDataGenerator(DataProcessor, Sequence, PCKittiAugmentedDataset): +class CustomDataGenerator(DataProcessor, Sequence, MTRDatasetBase): """ Multiprocessing-safe data generator for training, validation or testing, without fancy augmentation """ - def __init__(self, batch_size: int, root_dir:str, npoints:int =16384, split: str ='train', + def __init__(self, batch_size: int, root_dir:str, point_cloud_statistics_path: str, + npoints:int =16384, split: str ='train', classes:List[str] =['Car', 'Pedestrian', 'Person_sitting'], random_select:bool =True, gt_database_dir=None, aug_hard_ratio:float=0.5, **kwargs): super(CustomDataGenerator, self).__init__( - batch_size=batch_size, root_dir=root_dir, - npoints=npoints, split=split, classes=classes, - random_select=random_select, gt_database_dir=gt_database_dir, - aug_hard_ratio=aug_hard_ratio, **kwargs + root_dir = root_dir, + split = split, + point_cloud_statistics_path = point_cloud_statistics_path, + **kwargs + # batch_size=batch_size, root_dir=root_dir, + # npoints=npoints, split=split, classes=classes, + # random_select=random_select, gt_database_dir=gt_database_dir, + # aug_hard_ratio=aug_hard_ratio, **kwargs ) - # self.data_reader = data_reader self.batch_size = batch_size - self.sample_id_list=self.get_sample_id_list() - # self.split = split - # print("CustomDataGenerator: " ,self.split) def get_sample(self, index): @@ -161,11 +161,10 @@ def get_sample(self, index): def __len__(self): - return len(self.sample_id_list) // self.batch_size + return len(self.sample_list) // self.batch_size def __getitem__(self, batch_id: int): file_ids = range(batch_id * self.batch_size, self.batch_size * (batch_id + 1)) - # print("inside getitem") pillars = [] voxels = [] occupancy = [] @@ -178,22 +177,11 @@ def __getitem__(self, batch_id: int): for i in file_ids: - sample = self.get_sample(i) - # For each file, dividing the space into a x-y grid to create pillars - pts_lidar = sample['calib'].rect_to_lidar(sample['pts_rect']) - pts_input = np.concatenate((pts_lidar, sample['pts_features']), axis=1) # (N, C) + point_cloud = self.get_lidar_without_background(i) - gt_boxes3d_xyz = sample['calib'].rect_to_lidar(sample['gt_boxes3d'][:,:3]) + pts_features = point_cloud[:, 3:] - gt_boxes3d = np.concatenate(( - gt_boxes3d_xyz[:,0,np.newaxis], # 0 x - gt_boxes3d_xyz[:,1,np.newaxis], # 1 y - gt_boxes3d_xyz[:,2,np.newaxis] + sample['gt_boxes3d'][:,3,np.newaxis] / 2, # 2 z - sample['gt_boxes3d'][:,5,np.newaxis], # 3 l # same as the original label - sample['gt_boxes3d'][:,4,np.newaxis], # 4 w # same as the original label - sample['gt_boxes3d'][:,3,np.newaxis], # 5 h # same as the original label - -sample['gt_boxes3d'][:,6,np.newaxis], # 6 ry - ), axis=1) + pts_input = np.concatenate([point_cloud[:,:3], pts_features[:,1,np.newaxis]], axis=1) # Voxels are the pillar ids pillars_, voxels_ = self.make_point_pillars(pts_input) @@ -201,10 +189,34 @@ def __getitem__(self, batch_id: int): pillars.append(pillars_) voxels.append(voxels_) + + + obj_list = self.get_label(i) # are labels + + gt_boxes3d = np.zeros((obj_list.__len__(), 7), dtype=np.float32) + gt_bbox_params_list = [] + for k, obj in enumerate(obj_list): + gt_boxes3d[k, 0:3], gt_boxes3d[k, 3], gt_boxes3d[k, 4], gt_boxes3d[k, 5], gt_boxes3d[k, 6] \ + = obj.pos, obj.h, obj.w, obj.l, limit_period(obj.ry, offset=0.5, period=2*np.pi) # mtr format + # = obj.pos, obj.h, obj.w, obj.l, obj.ry # kitti + + # print(bboxes3d_[:,:3].shape) + invalid_region_mask = self._get_invalid_region_mask(gt_boxes3d[:,:3]) + gt_boxes3d = gt_boxes3d[~invalid_region_mask,:] + + gt_boxes3d = np.concatenate(( + gt_boxes3d[:,0,np.newaxis], # 0 x + gt_boxes3d[:,1,np.newaxis], # 1 y + gt_boxes3d[:,2,np.newaxis], # 2 z + gt_boxes3d[:,5,np.newaxis], # 3 l # same as the original label + gt_boxes3d[:,4,np.newaxis], # 4 w # same as the original label + gt_boxes3d[:,3,np.newaxis], # 5 h # same as the original label + gt_boxes3d[:,6,np.newaxis], # 6 ry + ), axis=1) - if self.split=='train' or self.split =='val': + if self.split=='train' or self.split =='test': occupancy_, position_, size_, angle_, heading_, classification_ = self.make_ground_truth( - gt_boxes3d, sample['gt_cls_type_list']) + gt_boxes3d, ['pedestrian' for i in range(len(gt_boxes3d))]) occupancy.append(occupancy_) position.append(position_) @@ -216,7 +228,7 @@ def __getitem__(self, batch_id: int): pillars = np.concatenate(pillars, axis=0) voxels = np.concatenate(voxels, axis=0) - if self.split=='train' or self.split =='val': + if self.split=='train' or self.split =='test': occupancy = np.array(occupancy) position = np.array(position) size = np.array(size) @@ -229,43 +241,39 @@ def __getitem__(self, batch_id: int): return [pillars, voxels] def on_epoch_end(self): - # print("inside epoch") - if self.split=='train' or self.split =='val': - # pass - self.sample_id_list=shuffle(self.sample_id_list) + if self.split=='train' or self.split =='test': + self.sample_list=shuffle(self.sample_list) + + class AnalyseCustomDataGenerator(CustomDataGenerator): """ Multiprocessing-safe data generator for training, validation or testing, without fancy augmentation """ - def __init__(self, batch_size: int, root_dir:str, npoints:int =16384, split: str ='train', + def __init__(self, batch_size: int, root_dir:str, point_cloud_statistics_path: str, + npoints:int =16384, split: str ='train', classes:List[str] =['Car', 'Pedestrian', 'Person_sitting'], random_select:bool =True, gt_database_dir=None, aug_hard_ratio:float=0.5, **kwargs): super(AnalyseCustomDataGenerator, self).__init__( batch_size=batch_size, root_dir=root_dir, + point_cloud_statistics_path = point_cloud_statistics_path, npoints=npoints, split=split, classes=classes, random_select=random_select, gt_database_dir=gt_database_dir, aug_hard_ratio=aug_hard_ratio, **kwargs ) - # self.data_reader = data_reader self.batch_size = batch_size - self.sample_id_list=self.get_sample_id_list() - # self.split = split - # self.split = split - # print("AnalyseCustomDataGenerator: " ,self.split) - def _get_sample(self, index): + def get_sample(self, index): return super().get_sample(index) def __len__(self): - return len(self.sample_id_list) // self.batch_size + return len(self.sample_list) // self.batch_size def __getitem__(self, batch_id: int): file_ids = range(batch_id * self.batch_size, self.batch_size * (batch_id + 1)) - # print("inside getitem") pillars = [] voxels = [] occupancy = [] @@ -274,76 +282,67 @@ def __getitem__(self, batch_id: int): angle = [] heading = [] classification = [] - pts_input_ = [] gt_boxes3d_ = [] - sample_ = [] - - # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_processor" - # # Initialize and setup output directory. - # Converter = PointvizConverter(save_viz_path) - for i in file_ids: - # print(i) - # print(type(i)) - sample = self._get_sample(i) - # For each file, dividing the space into a x-y grid to create pillars - pts_lidar = sample['calib'].rect_to_lidar(sample['pts_rect']) - # print(pts_lidar.shape) - - pts_input = np.concatenate((pts_lidar, sample['pts_features']), axis=1) # (N, C) - gt_boxes3d_xyz = sample['calib'].rect_to_lidar(sample['gt_boxes3d'][:,:3]) - # print(gt_boxes3d_xyz.shape) - - gt_boxes3d = np.concatenate(( - gt_boxes3d_xyz[:,0,np.newaxis], # 0 x - gt_boxes3d_xyz[:,1,np.newaxis], # 1 y - gt_boxes3d_xyz[:,2,np.newaxis] + sample['gt_boxes3d'][:,3,np.newaxis] / 2, # 2 z - sample['gt_boxes3d'][:,5,np.newaxis], # 3 l # same as the original label - sample['gt_boxes3d'][:,4,np.newaxis], # 4 w # same as the original label - sample['gt_boxes3d'][:,3,np.newaxis], # 5 h # same as the original label - -sample['gt_boxes3d'][:,6,np.newaxis], # 6 ry - ), axis=1) + for i in file_ids: + point_cloud = self.get_lidar_without_background(i) - # print(type(gt_boxes3d)) - # gt_boxes3d = self.limit_yaw(gt_boxes3d) + pts_features = point_cloud[:, 3:] - # bbox_params = self.convert_labels_into_point_viz_format(gt_boxes3d) - # print(bbox_params.shape) - # Converter.compile("custom_sample_{}".format(i), coors=pts_input[:,:3], intensity=pts_input[:,3], - # bbox_params=bbox_params) - - - # exit() + pts_input = np.concatenate([point_cloud[:,:3], pts_features[:,1,np.newaxis]], axis=1) - # print(pts_input.shape) # Voxels are the pillar ids pillars_, voxels_ = self.make_point_pillars(pts_input) - # print(pillars_.shape, voxels_.shape) - # for i in range(10): - # print(pillars_[0,0,i,:]) - # print(np.sum(pillars_ > 0)) - # exit() - pillars.append(pillars_) voxels.append(voxels_) - # print(sample['gt_cls_type_list']) - # print("split: ", self.split) - if self.split=='train' or self.split =='val': - # print(len(gt_boxes3d), ", ", len(sample['gt_cls_type_list'])) - if (len(gt_boxes3d) == 0): - print("file id: ", i, " has zero gt label") - occupancy_, position_, size_, angle_, heading_, classification_ = self.make_ground_truth( - gt_boxes3d, sample['gt_cls_type_list']) - # print(len(a)) - # if + + + + if self.split=='train' or self.split =='test': + obj_list = self.get_label(i) # are labels + + gt_boxes3d = np.zeros((obj_list.__len__(), 7), dtype=np.float32) + # gt_bbox_params_list = [] + for k, obj in enumerate(obj_list): + gt_boxes3d[k, 0:3], gt_boxes3d[k, 3], gt_boxes3d[k, 4], gt_boxes3d[k, 5], gt_boxes3d[k, 6] \ + = obj.pos, obj.h, obj.w, obj.l, limit_period(obj.ry, offset=0.5, period=2*np.pi) # mtr format + # = obj.pos, obj.h, obj.w, obj.l, obj.ry # kitti + + # print(bboxes3d_[:,:3].shape) + invalid_region_mask = self._get_invalid_region_mask(gt_boxes3d[:,:3]) + gt_boxes3d = gt_boxes3d[~invalid_region_mask,:] + + # for k in range(len(gt_boxes3d)): + # gt_bbox_params = [gt_boxes3d[k, 5], gt_boxes3d[k, 3], gt_boxes3d[k, 4], + # gt_boxes3d[k, 1], gt_boxes3d[k, 2], gt_boxes3d[k, 0], + # gt_boxes3d[k, 6]] - # print(occupancy_.shape, position_.shape, size_.shape, angle_.shape, heading_.shape, classification_.shape) + # gt_bbox_params_list.append(gt_bbox_params) + + + + # if gt_boxes3d.__len__() == 0: + # print('No gt object') + # continue + + gt_boxes3d = np.concatenate(( + gt_boxes3d[:,0,np.newaxis], # 0 x + gt_boxes3d[:,1,np.newaxis], # 1 y + gt_boxes3d[:,2,np.newaxis], # 2 z + gt_boxes3d[:,5,np.newaxis], # 3 l # same as the original label + gt_boxes3d[:,4,np.newaxis], # 4 w # same as the original label + gt_boxes3d[:,3,np.newaxis], # 5 h # same as the original label + gt_boxes3d[:,6,np.newaxis], # 6 ry + ), axis=1) + + occupancy_, position_, size_, angle_, heading_, classification_ = self.make_ground_truth( + gt_boxes3d, ['pedestrian' for i in range(len(gt_boxes3d))]) occupancy.append(occupancy_) position.append(position_) @@ -352,29 +351,29 @@ def __getitem__(self, batch_id: int): heading.append(heading_) classification.append(classification_) - sample_.append(sample) gt_boxes3d_.append(gt_boxes3d) pts_input_.append(pts_input) + elif self.split=='real': + pts_input_.append(pts_input) - # exit() pillars = np.concatenate(pillars, axis=0) voxels = np.concatenate(voxels, axis=0) - if self.split=='train' or self.split =='val': + if self.split=='train' or self.split =='test': occupancy = np.array(occupancy) position = np.array(position) size = np.array(size) angle = np.array(angle) heading = np.array(heading) classification = np.array(classification) - # return [pillars, voxels], [occupancy, position, size, angle, heading, classification], [pts_input_, gt_boxes3d_, sample_] - return [pillars, voxels], [occupancy, position, size, angle, heading], [pts_input_, gt_boxes3d_, sample_] # new network + # return [pillars, voxels], [occupancy, position, size, angle, heading, classification] # network + return [pillars, voxels], [occupancy, position, size, angle, heading], [pts_input_, gt_boxes3d_] # network_v2 + elif self.split=='real': + return [pillars, voxels], [pts_input_] else: return [pillars, voxels] def on_epoch_end(self): - # print("inside epoch") - if self.split=='train' or self.split =='val': - self.sample_id_list=shuffle(self.sample_id_list) - \ No newline at end of file + if self.split=='train' or self.split =='test': + self.sample_list=shuffle(self.sample_list) \ No newline at end of file diff --git a/network_v2.py b/network_v2.py index 4e1deed..1fba5e4 100644 --- a/network_v2.py +++ b/network_v2.py @@ -112,8 +112,8 @@ def correct_batch_indices(tensor, batch_size): heading = tf.keras.layers.Conv2D(nb_anchors, (1, 1), name="heading/conv2d", activation="sigmoid")(concat) - # clf = tf.keras.layers.Conv2D(nb_anchors * nb_classes, (1, 1), name="clf/conv2d")(concat) - # clf = tf.keras.layers.Reshape(tuple(i // 2 for i in image_size) + (nb_anchors, nb_classes), name="clf/reshape")(clf) + clf = tf.keras.layers.Conv2D(nb_anchors * nb_classes, (1, 1), name="clf/conv2d")(concat) + clf = tf.keras.layers.Reshape(tuple(i // 2 for i in image_size) + (nb_anchors, nb_classes), name="clf/reshape")(clf) pillar_net = tf.keras.models.Model([input_pillars, input_indices], [occ, loc, size, angle, heading]) # print(pillar_net.summary()) diff --git a/network_v2_2.py b/network_v2_2.py index f30b6aa..7298525 100644 --- a/network_v2_2.py +++ b/network_v2_2.py @@ -11,10 +11,14 @@ def build_point_pillar_graph(params: Parameters): max_points = int(params.max_points_per_pillar) nb_features = int(params.nb_features) nb_channels = int(params.nb_channels) - batch_size = int(params.batch_size) + batch_size = int(params.batch_size) image_size = tuple([params.Xn, params.Yn]) nb_classes = int(params.nb_classes) nb_anchors = len(params.anchor_dims) + num_gpus = int(params.num_gpus) + # batch_size = batch_size // num_gpus + + # print(batch_size) if tf.keras.backend.image_data_format() == "channels_first": raise NotImplementedError @@ -24,10 +28,12 @@ def build_point_pillar_graph(params: Parameters): input_pillars = tf.keras.layers.Input(input_shape, batch_size=batch_size, name="pillars/input") input_indices = tf.keras.layers.Input((max_pillars, 3), batch_size=batch_size, name="pillars/indices", dtype=tf.int32) + # print(batch_size, input_indices.shape, input_pillars.shape) def correct_batch_indices(tensor, batch_size): - array = np.zeros((batch_size, max_pillars, 3), dtype=np.float32) - for i in range(batch_size): + array = np.zeros((batch_size//num_gpus, max_pillars, 3), dtype=np.float32) + # print(batch_size, array.shape, input_pillars.shape) + for i in range(batch_size//num_gpus): array[i, :, 0] = i return tensor + tf.constant(array, dtype=tf.int32) diff --git a/point_pillars_custom_prediction.py b/point_pillars_custom_prediction.py deleted file mode 100644 index 43e1003..0000000 --- a/point_pillars_custom_prediction.py +++ /dev/null @@ -1,157 +0,0 @@ -import os -from glob import glob -import numpy as np -import tensorflow as tf -# from processors import SimpleDataGenerator -# from custom_processors import CustomDataGenerator, AnalyseCustomDataGenerator -from point_pillars_custom_processors_v2 import CustomDataGenerator, AnalyseCustomDataGenerator -from inference_utils_v2 import generate_bboxes_from_pred, GroundTruthGenerator -from inference_utils_v2 import focal_loss_checker, rotational_nms, generate_bboxes_from_pred_and_np_array -from readers import KittiDataReader -from config_v2 import Parameters -from network import build_point_pillar_graph - - -from point_viz.converter import PointvizConverter - -DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/" -MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_No_Early_Stopping_wo_Aug_wo_val" - -os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" -os.environ["CUDA_VISIBLE_DEVICES"] = "0" - - -def limit_period(val, offset=0.5, period=np.pi): - return val - np.floor(val / period + offset) * period - -if __name__ == "__main__": - - params = Parameters() - pillar_net = build_point_pillar_graph(params) - pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) - pillar_net.summary() - - exit() - # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_prediction_multiprocessing" - save_viz_path = os.path.join("/home/tan/tjtanaa/PointPillars/visualization", MODEL_ROOT.split('/')[-1]) - # Initialize and setup output directory. - Converter = PointvizConverter(save_viz_path) - - - - - - - gt_database_dir = os.path.join(DATA_ROOT, "gt_database") - - # validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size,root_dir = DATA_ROOT, - # npoints=20000, split='train', classes=list(params.classes_map.keys()), - # random_select=True, gt_database_dir=None, aug_hard_ratio=0.7) - - validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, - npoints=20000, split='val',random_select=False, classes=list(params.classes_map.keys())) - # validation_gen = AnalyseCustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, - # npoints=20000, split='val',random_select=False, classes=list(params.classes_map.keys())) - - for batch_idx in range(0,20): - [pillars, voxels], [occupancy_, position_, size_, angle_, heading_, classification_], [pts_input, gt_boxes3d, sample] = validation_gen[batch_idx] - - occupancy, position, size, angle, heading, classification = pillar_net.predict([pillars, voxels]) - - # angle = limit_period(angle, offset=0.5, period=2*np.pi) - - - # occupancy[:,:,:,:2] = 0 - - # print(occupancy.shape) - # exit() - - set_boxes, confidences = [], [] - loop_range = occupancy_.shape[0] if len(occupancy_.shape) == 4 else 1 - for i in range(loop_range): - set_box, predicted_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy[i], position[i], size[i], angle[i], - heading[i], - classification[i], params.anchor_dims, occ_threshold=0.5) - - - _, decoded_gt_boxes3d = generate_bboxes_from_pred_and_np_array(occupancy_[i], position_[i], size_[i], angle_[i], - heading_[i], - classification_[i], params.anchor_dims, occ_threshold=0.4) - - # gt_boxes3d_ = gt_boxes3d[i] - gt_boxes3d_ = decoded_gt_boxes3d - - # print(gt_boxes3d_.shape) - gt_bbox_params = np.stack([gt_boxes3d_[:,3], gt_boxes3d_[:,5], gt_boxes3d_[:,4], - gt_boxes3d_[:,1], gt_boxes3d_[:,2] , - gt_boxes3d_[:,0], - gt_boxes3d_[:,6]], axis=1) - - - gt_bbox_params_list = gt_bbox_params.tolist() - # gt_bbox_params_list = [] - for k in range(len(gt_bbox_params_list)): - msg = "%.5f, %s, %.5f"%(decoded_gt_boxes3d[k,9], params.map_classes[int(decoded_gt_boxes3d[k,8])], decoded_gt_boxes3d[k,6]) - # msg = "%.5f, %.5f"%(gt_bbox_params_list[k][3],gt_bbox_params_list[k][5]) - gt_bbox_params_list[k].append("Green") - # gt_bbox_params_list[k].append("1.0") - gt_bbox_params_list[k].append(msg) - - if len(set_box) > 0: - predicted_boxes3d_ = predicted_boxes3d - # bbox_params = validation_gen.convert_predictions_into_point_viz_format(predicted_boxes3d[:,[1, 2, 0, 5, 3, 4, 6 ]]) - - print("batch_idx: ", batch_idx * params.batch_size + i, " has ", predicted_boxes3d_.shape, "predictions") - # print(predicted_boxes3d_) - # print(size[i]) - - bbox_params = np.stack([predicted_boxes3d_[:,3], predicted_boxes3d_[:,5], predicted_boxes3d_[:,4], - predicted_boxes3d_[:,1], predicted_boxes3d_[:,2] , - predicted_boxes3d_[:,0], - predicted_boxes3d_[:,6]], axis=1) - - - bbox_params_list = bbox_params.tolist() - # bbox_labels_conf = [str(predicted_boxes3d[k,9]) for k in range(predicted_boxes3d.shape[0])] - for k in range(predicted_boxes3d.shape[0]): - msg = "%.5f, %s, %.5f"%(predicted_boxes3d[k,9],params.map_classes[int(predicted_boxes3d[k,8])], predicted_boxes3d[k,6]) - bbox_params_list[k].append("Magenta") - bbox_params_list[k].append(msg) - # bbox_params_list[k].append(str(predicted_boxes3d[k,9]) + "=" + params.map_classes[int(predicted_boxes3d[k,8])]) - gt_bbox_params_list.append(bbox_params_list[k]) - - coor = pts_input[i][:,[1,2,0]] - # coor[:,1] *= -1 - Converter.compile("val_custom_sample_{}".format(batch_idx * params.batch_size+i), coors=coor, intensity=pts_input[i][:,3], - bbox_params=gt_bbox_params_list) - - # set_boxes.append(set_box) - # # set_boxes.append(generate_bboxes_from_pred(occupancy, position, size, angle, heading, - # # classification, params.anchor_dims, occ_threshold=0.1)) - # # confidences.append([float(boxes.conf) for boxes in set_boxes[-1]]) - - # sum_bboxes = 0 - # for h in range(len(set_boxes)): - # sum_bboxes += len(set_boxes[h]) - - # print('Batch ', str(batch_idx) ,': Box predictions with occupancy > occ_thr: ', sum_bboxes) - # print('Scene 1: Box predictions with occupancy > occ_thr: ', len(set_boxes[0])) - # exit() - # print(set_boxes[-1]) - - # # NMS - # nms_boxes = rotational_nms(set_boxes, confidences, occ_threshold=0.7, nms_iou_thr=0.5) - - # print('Scene 1: Boxes after NMS with iou_thr: ', len(nms_boxes[0])) - - # # Do all the further operations on predicted_boxes array, which contains the predicted bounding boxes - # gt_gen = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=False) - # gt_gen0 = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=True) - # for seq_boxes, gt_label, gt0 in zip(nms_boxes, gt_gen, gt_gen0): - # print("---------- New Scenario ---------- ") - # focal_loss_checker(gt0[0], occupancy[0], n_occs=-1) - # print("---------- ------------ ---------- ") - # for gt in gt_label: - # print(gt) - # for pred in seq_boxes: - # print(pred) diff --git a/point_pillars_custom_prediction_v2_2.py b/point_pillars_custom_prediction_v2_2.py index ac942d3..090820b 100644 --- a/point_pillars_custom_prediction_v2_2.py +++ b/point_pillars_custom_prediction_v2_2.py @@ -3,8 +3,8 @@ import numpy as np import tensorflow as tf from point_pillars_custom_processors_v2_2 import CustomDataGenerator, AnalyseCustomDataGenerator -from inference_utils_v2 import generate_bboxes_from_pred, GroundTruthGenerator -from inference_utils_v2 import focal_loss_checker, rotational_nms, generate_bboxes_from_pred_and_np_array +from inference_utils_v2_2 import generate_bboxes_from_pred +from inference_utils_v2_2 import focal_loss_checker, rotational_nms, generate_bboxes_from_pred_and_np_array from readers import KittiDataReader from config_v2_2 import Parameters from network_v2_2 import build_point_pillar_graph @@ -52,9 +52,12 @@ def limit_period(val, offset=0.5, period=np.pi): inference_duration = [] - for batch_idx in range(0,70): + for batch_idx in range(0,10): [pillars, voxels], [occupancy_, position_, size_, angle_, heading_], [pts_input, gt_boxes3d, sample] = validation_gen[batch_idx] + # 4 * 12000 * 100 * 9, 502 * 502 * 2 + + # 4 * 20000 * 4 start=datetime.now() @@ -132,34 +135,4 @@ def limit_period(val, offset=0.5, period=np.pi): # coor[:,1] *= -1 Converter.compile("val_custom_sample_{}".format(batch_idx * params.batch_size+i), coors=coor, intensity=pts_input[i][:,3], bbox_params=gt_bbox_params_list) - print("Average runtime speed: ", np.mean(inference_duration[20:])) - # set_boxes.append(set_box) - # # set_boxes.append(generate_bboxes_from_pred(occupancy, position, size, angle, heading, - # # classification, params.anchor_dims, occ_threshold=0.1)) - # # confidences.append([float(boxes.conf) for boxes in set_boxes[-1]]) - - # sum_bboxes = 0 - # for h in range(len(set_boxes)): - # sum_bboxes += len(set_boxes[h]) - - # print('Batch ', str(batch_idx) ,': Box predictions with occupancy > occ_thr: ', sum_bboxes) - # print('Scene 1: Box predictions with occupancy > occ_thr: ', len(set_boxes[0])) - # exit() - # print(set_boxes[-1]) - - # # NMS - # nms_boxes = rotational_nms(set_boxes, confidences, occ_threshold=0.7, nms_iou_thr=0.5) - - # print('Scene 1: Boxes after NMS with iou_thr: ', len(nms_boxes[0])) - - # # Do all the further operations on predicted_boxes array, which contains the predicted bounding boxes - # gt_gen = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=False) - # gt_gen0 = GroundTruthGenerator(data_reader, label_files, calibration_files, network_format=True) - # for seq_boxes, gt_label, gt0 in zip(nms_boxes, gt_gen, gt_gen0): - # print("---------- New Scenario ---------- ") - # focal_loss_checker(gt0[0], occupancy[0], n_occs=-1) - # print("---------- ------------ ---------- ") - # for gt in gt_label: - # print(gt) - # for pred in seq_boxes: - # print(pred) + print("Average runtime speed: ", np.mean(inference_duration[4:])) \ No newline at end of file diff --git a/point_pillars_custom_processors_v2_2.py b/point_pillars_custom_processors_v2_2.py index 2c0886c..656dc21 100644 --- a/point_pillars_custom_processors_v2_2.py +++ b/point_pillars_custom_processors_v2_2.py @@ -14,6 +14,7 @@ from det3d.pc_kitti_dataset import PCKittiAugmentedDataset from point_viz.converter import PointvizConverter +from datetime import datetime def select_best_anchors(arr): @@ -40,7 +41,7 @@ def make_point_pillars(self, points: np.ndarray): assert points.ndim == 2 assert points.shape[1] == 4 assert points.dtype == np.float32 - + # start=datetime.now() pillars, indices = createPillars(points, self.max_points_per_pillar, self.max_pillars, @@ -53,6 +54,7 @@ def make_point_pillars(self, points: np.ndarray): self.z_min, self.z_max, False) + # print("Create pillar takes : ", datetime.now()-start) return pillars, indices @@ -92,6 +94,8 @@ def make_ground_truth(self, gt_boxes_3d: Any, gt_cls_type_list: List[str]): assert np.all(target_yaw >= -np.pi) & np.all(target_yaw <= np.pi) assert len(target_positions) == len(target_dimension) == len(target_yaw) == len(target_class) + # start=datetime.now() + target, pos, neg = createPillarsTarget(target_positions, target_dimension, target_yaw, @@ -112,6 +116,9 @@ def make_ground_truth(self, gt_boxes_3d: Any, gt_cls_type_list: List[str]): self.z_min, self.z_max, False) + + # print("Create target takes : ", datetime.now()-start) + self.pos_cnt += pos self.neg_cnt += neg diff --git a/point_pillars_evaluation_v2_2.py b/point_pillars_evaluation_v2_2.py index 01fd5aa..88e87ae 100644 --- a/point_pillars_evaluation_v2_2.py +++ b/point_pillars_evaluation_v2_2.py @@ -5,9 +5,9 @@ import numpy as np import tensorflow as tf from point_pillars_custom_processors_v2_2 import CustomDataGenerator, AnalyseCustomDataGenerator -from inference_utils_v2 import generate_bboxes_from_pred, GroundTruthGenerator -from inference_utils_v2 import focal_loss_checker, rotational_nms, generate_bboxes_from_pred_and_np_array, convert_boxes_to_list -from readers import KittiDataReader +from inference_utils_v2_2 import generate_bboxes_from_pred +from inference_utils_v2_2 import focal_loss_checker, rotational_nms, generate_bboxes_from_pred_and_np_array, convert_boxes_to_list +# from readers import KittiDataReader from config_v2_2 import Parameters from network_v2_2 import build_point_pillar_graph from datetime import datetime diff --git a/point_pillars_test.py b/point_pillars_test.py deleted file mode 100644 index 8cfa872..0000000 --- a/point_pillars_test.py +++ /dev/null @@ -1,70 +0,0 @@ -import unittest -import numpy as np -import tensorflow as tf - -from point_pillars import createPillars, createPillarsTarget, select - - -class PointPillarsTest(unittest.TestCase): - - def setUp(self): - np.random.seed(42) - xy = np.random.randint(-100, 100+1, size=(100000, 2)) - z = np.random.randint(-3, 1+1, size=(100000, 1)) - i = np.random.rand(100000) - self.arr = np.c_[xy, z, i] - assert self.arr.shape == (100000, 4) - - def test_pillar_creation(self): - pillars, indices = createPillars(self.arr, 100, 12000, 0.16, 0.16, 0, 80.64, -40.32, 40.32, -3, 1, True) - - assert pillars.shape == (1, 12000, 100, 7) - assert pillars.dtype == np.float32 - assert indices.shape == (1, 12000, 3) - assert indices.dtype == np.int32 - - session = tf.Session() - pillars = tf.constant(pillars, dtype=tf.float32) - indices = tf.constant(indices, dtype=tf.int32) - feature_map = tf.scatter_nd(indices, tf.reduce_mean(pillars, axis=2), (1, 504, 504, 7))[0] - arr, = session.run([feature_map]) - assert (arr.shape == (504, 504, 7)) - - @staticmethod - def test_pillar_target_creation(): - - dims = np.array([[3.7, 1.6, 1.4], [3.7, 1.6, 1.4], [0.8, 0.6, 1.7]], dtype=np.float32) - posn = np.array([[50, 10, 0], [20, 0, 0], [30, 5, 0]], dtype=np.float32) - yaws = np.array([0, 0, 90], dtype=np.float32) - - target = createPillarsTarget(posn, - dims, - yaws, - np.array([1, 1, 2], dtype=np.int32), - dims[[0, 2]], - np.array([0, 0], dtype=np.float32), - np.array([0, 90], dtype=np.float32), - 0.5, - 0.4, - 10, - 2, - 0.1, - 0.1, - 0, - 80, - -40, - 40, - -3, - 1, - True) - - assert target.shape == (3, 400, 400, 2, 10) - assert (target[..., 0] == 1).sum() == 83 - - selected = target[..., 0:1].argmax(axis=0) - target = select(target, selected) - assert (target.shape == (400, 400, 2, 10)) - - -if __name__ == "__main__": - unittest.main() diff --git a/point_pillars_training_custom_run_v2_2.py b/point_pillars_training_custom_run_v2_2.py index c7b4206..056002a 100644 --- a/point_pillars_training_custom_run_v2_2.py +++ b/point_pillars_training_custom_run_v2_2.py @@ -19,14 +19,16 @@ DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/" # TODO make main arg # MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_single_process" # MODEL_ROOT = "./logs_Car_Pedestrian_Custom_Dataset_No_Early_Stopping_wo_Aug_wo_val" -MODEL_ROOT = "./logs_Car_Custom_Dataset_No_Early_Stopping_wo_Aug_wo_val_new_network" +MODEL_ROOT = "./logs_Car_Custom_Dataset_No_Early_Stopping_Aug_val_new_network_multigpu" os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" -os.environ["CUDA_VISIBLE_DEVICES"] = "1" +os.environ["CUDA_VISIBLE_DEVICES"] = "3" if __name__ == "__main__": params = Parameters() + # gpus = tf.config.experimental.list_physical_devices('GPU') + pillar_net = build_point_pillar_graph(params) # pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) pillar_net.summary() @@ -37,15 +39,34 @@ pillar_net.compile(optimizer, loss=loss.losses()) + + # loss = PointPillarNetworkLoss(params) + + # optimizer = tf.keras.optimizers.Adam(lr=params.learning_rate, decay=params.decay_rate) + + # if len(gpus)>1: + # strategy = tf.distribute.MirroredStrategy(cross_device_ops=tf.distribute.HierarchicalCopyAllReduce()) + # with strategy.scope(): + # pillar_net = build_point_pillar_graph(params) + # # pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) + # pillar_net.compile(optimizer, loss=loss.losses()) + # else: + # pillar_net = build_point_pillar_graph(params) + # # pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) + # pillar_net.compile(optimizer, loss=loss.losses()) + + # pillar_net.summary() + + gt_database_dir = os.path.join(DATA_ROOT, "gt_database") # gt_database_dir = None training_gen = CustomDataGenerator(batch_size=params.batch_size,root_dir = DATA_ROOT, - npoints=20000, split='train_val', classes=list(params.classes_map.keys()), + npoints=20000, split='train', classes=list(params.classes_map.keys()), random_select=True, gt_database_dir=gt_database_dir, aug_hard_ratio=0.7) validation_gen = CustomDataGenerator(batch_size=params.batch_size, root_dir=DATA_ROOT, - npoints=20000, split='train_val_test', classes=list(params.classes_map.keys())) + npoints=20000, split='val', classes=list(params.classes_map.keys())) # save_viz_path = "/home/tan/tjtanaa/PointPillars/visualization/custom_processor" @@ -78,7 +99,7 @@ steps_per_epoch=len(training_gen), callbacks=callbacks, use_multiprocessing=True, - # max_queue_size = 16, + max_queue_size = 16, epochs=int(params.total_training_epochs), workers=6) except KeyboardInterrupt: diff --git a/point_pillars_training_run.py b/point_pillars_training_run.py deleted file mode 100644 index 2dd1c27..0000000 --- a/point_pillars_training_run.py +++ /dev/null @@ -1,73 +0,0 @@ -import os -import time -import numpy as np -import tensorflow as tf -from glob import glob - -from config import Parameters -from loss import PointPillarNetworkLoss -from network import build_point_pillar_graph -from processors import SimpleDataGenerator -# from custom_processors import CustomDataGenerator -from readers import KittiDataReader - -tf.get_logger().setLevel("ERROR") - -DATA_ROOT = "/media/data3/tjtanaa/kitti_dataset/KITTI/object/training" # TODO make main arg -MODEL_ROOT = "./logs_Car_Pedestrian_Original_2" - -os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" -os.environ["CUDA_VISIBLE_DEVICES"] = "2" - -if __name__ == "__main__": - - params = Parameters() - - pillar_net = build_point_pillar_graph(params) - # pillar_net.load_weights(os.path.join(MODEL_ROOT, "model.h5")) - - loss = PointPillarNetworkLoss(params) - - optimizer = tf.keras.optimizers.Adam(lr=params.learning_rate, decay=params.decay_rate) - - pillar_net.compile(optimizer, loss=loss.losses()) - - data_reader = KittiDataReader() - - lidar_files = sorted(glob(os.path.join(DATA_ROOT, "velodyne", "*.bin"))) - label_files = sorted(glob(os.path.join(DATA_ROOT, "label_2", "*.txt"))) - calibration_files = sorted(glob(os.path.join(DATA_ROOT, "calib", "*.txt"))) - assert len(lidar_files) == len(label_files) == len(calibration_files), "Input dirs require equal number of files." - validation_len = int(0.3*len(label_files)) - - training_gen = SimpleDataGenerator(data_reader, params.batch_size, lidar_files[:-validation_len], label_files[:-validation_len], calibration_files[:-validation_len]) - validation_gen = SimpleDataGenerator(data_reader, params.batch_size, lidar_files[-validation_len:], label_files[-validation_len:], calibration_files[-validation_len:]) - - log_dir = MODEL_ROOT - # epoch_to_decay = int( - # np.round(params.iters_to_decay / params.batch_size * int(np.ceil(float(len(label_files)) / params.batch_size)))) - - epoch_to_decay = int( - np.round(params.iters_to_decay / params.batch_size * int(len(training_gen)))) - - callbacks = [ - tf.keras.callbacks.TensorBoard(log_dir=log_dir), - tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(log_dir, "model.h5"), - monitor='val_loss', save_best_only=True), - tf.keras.callbacks.LearningRateScheduler( - lambda epoch, lr: lr * 0.8 if ((epoch % epoch_to_decay == 0) and (epoch != 0)) else lr, verbose=True), - tf.keras.callbacks.EarlyStopping(patience=20, monitor='val_loss'), - ] - - try: - pillar_net.fit(training_gen, - validation_data = validation_gen, - steps_per_epoch=len(training_gen), - callbacks=callbacks, - # use_multiprocessing=True, - epochs=int(params.total_training_epochs)) - # workers=6) - except KeyboardInterrupt: - model_str = "interrupted_%s.h5" % time.strftime("%Y%m%d-%H%M%S") - pillar_net.save(os.path.join(log_dir, model_str)) - print("Interrupt. Saving output to %s" % os.path.join(os.getcwd(), log_dir[1:], model_str)) diff --git a/point_pillars_visualize_input.py b/point_pillars_visualize_input.py index 090c3f5..838a5c6 100644 --- a/point_pillars_visualize_input.py +++ b/point_pillars_visualize_input.py @@ -2,12 +2,9 @@ from glob import glob import numpy as np import tensorflow as tf -# from processors import SimpleDataGenerator -# from custom_processors import CustomDataGenerator, AnalyseCustomDataGenerator from point_pillars_custom_processors_v2 import CustomDataGenerator, AnalyseCustomDataGenerator from inference_utils_v2 import generate_bboxes_from_pred, GroundTruthGenerator, focal_loss_checker from inference_utils_v2 import rotational_nms, generate_bboxes_from_pred_and_np_array -from readers import KittiDataReader from config_v2 import Parameters from network import build_point_pillar_graph