diff --git a/PointRCNN/lib/.DS_Store b/PointRCNN/lib/.DS_Store new file mode 100644 index 0000000..3e07ce9 Binary files /dev/null and b/PointRCNN/lib/.DS_Store differ diff --git a/PointRCNN/lib/config.py b/PointRCNN/lib/config.py new file mode 100644 index 0000000..2cfe5ae --- /dev/null +++ b/PointRCNN/lib/config.py @@ -0,0 +1,259 @@ +from easydict import EasyDict as edict +import numpy as np + + +__C = edict() +cfg = __C + +# 0. basic config +__C.TAG = 'default' +__C.CLASSES = 'Car' + +__C.INCLUDE_SIMILAR_TYPE = False + +# config of augmentation +__C.AUG_DATA = True +__C.AUG_METHOD_LIST = ['rotation', 'scaling', 'flip'] +__C.AUG_METHOD_PROB = [0.5, 0.5, 0.5] +__C.AUG_ROT_RANGE = 18 + +__C.GT_AUG_ENABLED = False +__C.GT_EXTRA_NUM = 15 +__C.GT_AUG_RAND_NUM = False +__C.GT_AUG_APPLY_PROB = 0.75 +__C.GT_AUG_HARD_RATIO = 0.6 + +__C.PC_REDUCE_BY_RANGE = True +__C.PC_AREA_SCOPE = np.array([[-40, 40], + [-1, 3], + [0, 70.4]]) # x, y, z scope in rect camera coords + +__C.CLS_MEAN_SIZE = np.array([[1.52, 1.63, 3.88]], dtype=np.float32) + + +# 1. config of rpn network +__C.RPN = edict() +__C.RPN.ENABLED = True +__C.RPN.FIXED = False + +__C.RPN.USE_INTENSITY = True + +# config of bin-based loss +__C.RPN.LOC_XZ_FINE = False +__C.RPN.LOC_SCOPE = 3.0 +__C.RPN.LOC_BIN_SIZE = 0.5 +__C.RPN.NUM_HEAD_BIN = 12 + +# config of network structure +__C.RPN.BACKBONE = 'pointnet2_msg' + +__C.RPN.USE_BN = True +__C.RPN.NUM_POINTS = 16384 + +__C.RPN.SA_CONFIG = edict() +__C.RPN.SA_CONFIG.NPOINTS = [4096, 1024, 256, 64] +__C.RPN.SA_CONFIG.RADIUS = [[0.1, 0.5], [0.5, 1.0], [1.0, 2.0], [2.0, 4.0]] +__C.RPN.SA_CONFIG.NSAMPLE = [[16, 32], [16, 32], [16, 32], [16, 32]] +__C.RPN.SA_CONFIG.MLPS = [[[16, 16, 32], [32, 32, 64]], + [[64, 64, 128], [64, 96, 128]], + [[128, 196, 256], [128, 196, 256]], + [[256, 256, 512], [256, 384, 512]]] +__C.RPN.FP_MLPS = [[128, 128], [256, 256], [512, 512], [512, 512]] +__C.RPN.CLS_FC = [128] +__C.RPN.REG_FC = [128] +__C.RPN.DP_RATIO = 0.5 + +# config of training +__C.RPN.LOSS_CLS = 'DiceLoss' +__C.RPN.FG_WEIGHT = 15 +__C.RPN.FOCAL_ALPHA = [0.25, 0.75] +__C.RPN.FOCAL_GAMMA = 2.0 +__C.RPN.REG_LOSS_WEIGHT = [1.0, 1.0, 1.0, 1.0] +__C.RPN.LOSS_WEIGHT = [1.0, 1.0] +__C.RPN.NMS_TYPE = 'normal' # normal, rotate + +# config of testing +__C.RPN.SCORE_THRESH = 0.3 + + +# 2. config of rcnn network +__C.RCNN = edict() +__C.RCNN.ENABLED = False + +# config of input +__C.RCNN.USE_RPN_FEATURES = True +__C.RCNN.USE_MASK = True +__C.RCNN.MASK_TYPE = 'seg' +__C.RCNN.USE_INTENSITY = False +__C.RCNN.USE_DEPTH = True +__C.RCNN.USE_SEG_SCORE = False +__C.RCNN.ROI_SAMPLE_JIT = False +__C.RCNN.ROI_FG_AUG_TIMES = 10 + +__C.RCNN.REG_AUG_METHOD = 'multiple' # multiple, single, normal +__C.RCNN.POOL_EXTRA_WIDTH = 1.0 + +# config of bin-based loss +__C.RCNN.LOC_SCOPE = 1.5 +__C.RCNN.LOC_BIN_SIZE = 0.5 +__C.RCNN.NUM_HEAD_BIN = 9 +__C.RCNN.LOC_Y_BY_BIN = False +__C.RCNN.LOC_Y_SCOPE = 0.5 +__C.RCNN.LOC_Y_BIN_SIZE = 0.25 +__C.RCNN.SIZE_RES_ON_ROI = False + +# config of network structure +__C.RCNN.USE_BN = False +__C.RCNN.DP_RATIO = 0.0 + +__C.RCNN.BACKBONE = 'pointnet' # pointnet, pointsift +__C.RCNN.XYZ_UP_LAYER = [128, 128] + +__C.RCNN.NUM_POINTS = 512 +__C.RCNN.SA_CONFIG = edict() +__C.RCNN.SA_CONFIG.NPOINTS = [128, 32, -1] +__C.RCNN.SA_CONFIG.RADIUS = [0.2, 0.4, 100] +__C.RCNN.SA_CONFIG.NSAMPLE = [64, 64, 64] +__C.RCNN.SA_CONFIG.MLPS = [[128, 128, 128], + [128, 128, 256], + [256, 256, 512]] +__C.RCNN.CLS_FC = [256, 256] +__C.RCNN.REG_FC = [256, 256] + +# config of training +__C.RCNN.LOSS_CLS = 'BinaryCrossEntropy' +__C.RCNN.FOCAL_ALPHA = [0.25, 0.75] +__C.RCNN.FOCAL_GAMMA = 2.0 +__C.RCNN.CLS_WEIGHT = np.array([1.0, 1.0, 1.0], dtype=np.float32) +__C.RCNN.CLS_FG_THRESH = 0.6 +__C.RCNN.CLS_BG_THRESH = 0.45 +__C.RCNN.CLS_BG_THRESH_LO = 0.05 +__C.RCNN.REG_FG_THRESH = 0.55 +__C.RCNN.FG_RATIO = 0.5 +__C.RCNN.ROI_PER_IMAGE = 64 +__C.RCNN.HARD_BG_RATIO = 0.6 + +# config of testing +__C.RCNN.SCORE_THRESH = 0.3 +__C.RCNN.NMS_THRESH = 0.1 + + +# general training config +__C.TRAIN = edict() +__C.TRAIN.SPLIT = 'train' +__C.TRAIN.VAL_SPLIT = 'smallval' + +__C.TRAIN.LR = 0.002 +__C.TRAIN.LR_CLIP = 0.00001 +__C.TRAIN.LR_DECAY = 0.5 +__C.TRAIN.DECAY_STEP_LIST = [50, 100, 150, 200, 250, 300] +__C.TRAIN.LR_WARMUP = False +__C.TRAIN.WARMUP_MIN = 0.0002 +__C.TRAIN.WARMUP_EPOCH = 5 + +__C.TRAIN.DEPTH_LR = 0.002 + +__C.TRAIN.BN_MOMENTUM = 0.9 +__C.TRAIN.BN_DECAY = 0.5 +__C.TRAIN.BNM_CLIP = 0.01 +__C.TRAIN.BN_DECAY_STEP_LIST = [50, 100, 150, 200, 250, 300] + +__C.TRAIN.OPTIMIZER = 'adam' +__C.TRAIN.WEIGHT_DECAY = 0.0 # "L2 regularization coeff [default: 0.0]" +__C.TRAIN.MOMENTUM = 0.9 + +__C.TRAIN.MOMS = [0.95, 0.85] +__C.TRAIN.DIV_FACTOR = 10.0 +__C.TRAIN.PCT_START = 0.4 + +__C.TRAIN.GRAD_NORM_CLIP = 1.0 + +__C.TRAIN.RPN_PRE_NMS_TOP_N = 12000 +__C.TRAIN.RPN_POST_NMS_TOP_N = 2048 +__C.TRAIN.RPN_NMS_THRESH = 0.85 +__C.TRAIN.RPN_DISTANCE_BASED_PROPOSE = True + + +__C.TEST = edict() +__C.TEST.SPLIT = 'val' +__C.TEST.RPN_PRE_NMS_TOP_N = 9000 +__C.TEST.RPN_POST_NMS_TOP_N = 300 +__C.TEST.RPN_NMS_THRESH = 0.7 +__C.TEST.RPN_DISTANCE_BASED_PROPOSE = True + + +def cfg_from_file(filename): + """Load a config file and merge it into the default options.""" + import yaml + with open(filename, 'r') as f: + yaml_cfg = edict(yaml.load(f)) + + _merge_a_into_b(yaml_cfg, __C) + + +def _merge_a_into_b(a, b): + """Merge config dictionary a into config dictionary b, clobbering the + options in b whenever they are also specified in a. + """ + if type(a) is not edict: + return + + for k, v in a.items(): + # a must specify keys that are in b + if k not in b: + raise KeyError('{} is not a valid config key'.format(k)) + # the types must match, too + old_type = type(b[k]) + if old_type is not type(v): + if isinstance(b[k], np.ndarray): + v = np.array(v, dtype=b[k].dtype) + else: + raise ValueError(('Type mismatch ({} vs. {}) ' + 'for config key: {}').format(type(b[k]), type(v), k)) + # recursively merge dicts + if type(v) is edict: + try: + _merge_a_into_b(a[k], b[k]) + except: + print(('Error under config key: {}'.format(k))) + raise + else: + b[k] = v + + +def cfg_from_list(cfg_list): + """Set config keys via list (e.g., from command line).""" + from ast import literal_eval + assert len(cfg_list) % 2 == 0 + for k, v in zip(cfg_list[0::2], cfg_list[1::2]): + key_list = k.split('.') + d = __C + for subkey in key_list[:-1]: + assert subkey in d + d = d[subkey] + subkey = key_list[-1] + assert subkey in d + try: + value = literal_eval(v) + except: + # handle the case when v is a string literal + value = v + assert type(value) == type(d[subkey]), \ + 'type {} does not match original type {}'.format(type(value), type(d[subkey])) + d[subkey] = value + + +def save_config_to_file(cfg, pre='cfg', logger=None): + for key, val in cfg.items(): + if isinstance(cfg[key], edict): + if logger is not None: + logger.info('\n%s.%s = edict()' % (pre, key)) + else: + print('\n%s.%s = edict()' % (pre, key)) + save_config_to_file(cfg[key], pre=pre + '.' + key, logger=logger) + continue + + if logger is not None: + logger.info('%s.%s: %s' % (pre, key, val)) + else: + print('%s.%s: %s' % (pre, key, val)) diff --git a/PointRCNN/lib/datasets/kitti_dataset.py b/PointRCNN/lib/datasets/kitti_dataset.py new file mode 100644 index 0000000..9c3717b --- /dev/null +++ b/PointRCNN/lib/datasets/kitti_dataset.py @@ -0,0 +1,87 @@ +import os +import numpy as np +import torch.utils.data as torch_data +import lib.utils.calibration as calibration +import lib.utils.kitti_utils as kitti_utils +from PIL import Image +import pdb + + +class KittiDataset(torch_data.Dataset): + def __init__(self, root_dir, split='train'): + self.split = split + is_test = self.split == 'test' + self.imageset_dir = os.path.join(root_dir, 'KITTI', 'object', 'testing' if is_test else 'training') + + split_dir = os.path.join(root_dir, 'KITTI', 'ImageSets', split + '.txt') + self.image_idx_list = [x.strip() for x in open(split_dir).readlines()] + self.num_sample = self.image_idx_list.__len__() + + self.image_dir = os.path.join(self.imageset_dir, 'image_2') + self.image3_dir = os.path.join(self.imageset_dir, 'image_3') + self.depth_dir = os.path.join(self.imageset_dir, 'depth_map') + + self.lidar_dir = os.path.join(self.imageset_dir, 'velodyne') + self.calib_dir = os.path.join(self.imageset_dir, 'calib') + self.label_dir = os.path.join(self.imageset_dir, 'label_2') + self.plane_dir = os.path.join(self.imageset_dir, 'planes') + + def get_image(self, idx, left_image=True): + if left_image: + img_file = os.path.join(self.image_dir, '%06d.png' % idx) + else: + img_file = os.path.join(self.image3_dir, '%06d.png' % idx) + assert os.path.exists(img_file) + + img = Image.open(img_file).convert('RGB') + return img + + def get_image_shape(self, idx): + img_file = os.path.join(self.image_dir, '%06d.png' % idx) + assert os.path.exists(img_file) + im = Image.open(img_file) + width, height = im.size + return height, width, 3 + + def get_lidar(self, idx): + lidar_file = os.path.join(self.lidar_dir, '%06d.bin' % idx) + assert os.path.exists(lidar_file) + return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4) + + def get_depth(self, idx): + depth_file = os.path.join(self.depth_dir, '%06d.npy' % idx) + assert os.path.exists(depth_file) + return np.load(depth_file).astype(np.float32) + + def get_calib(self, idx): + calib_file = os.path.join(self.calib_dir, '%06d.txt' % idx) + assert os.path.exists(calib_file) + return calibration.Calibration(calib_file) + + def get_label(self, idx): + label_file = os.path.join(self.label_dir, '%06d.txt' % idx) + assert os.path.exists(label_file) + return kitti_utils.get_objects_from_label(label_file) + + + + def get_road_plane(self, idx): + plane_file = os.path.join(self.plane_dir, '%06d.txt' % idx) + with open(plane_file, 'r') as f: + lines = f.readlines() + lines = [float(i) for i in lines[3].split()] + plane = np.asarray(lines) + + # Ensure normal is always facing up, this is in the rectified camera coordinate + if plane[1] > 0: + plane = -plane + + norm = np.linalg.norm(plane[0:3]) + plane = plane / norm + return plane + + def __len__(self): + raise NotImplementedError + + def __getitem__(self, item): + raise NotImplementedError diff --git a/PointRCNN/lib/datasets/kitti_depth_rcnn_dataset.py b/PointRCNN/lib/datasets/kitti_depth_rcnn_dataset.py new file mode 100644 index 0000000..bf36b4f --- /dev/null +++ b/PointRCNN/lib/datasets/kitti_depth_rcnn_dataset.py @@ -0,0 +1,1089 @@ +import numpy as np +import os +import pickle +import torch + +from lib.datasets.kitti_dataset import KittiDataset +import lib.utils.kitti_utils as kitti_utils +import lib.utils.roipool3d.roipool3d_utils as roipool3d_utils +from lib.config import cfg + + + +class KittiRCNNDataset(KittiDataset): + def __init__(self, root_dir, npoints=16384, split='train', classes='Car', mode='TRAIN', random_select=True, + logger=None, rcnn_training_roi_dir=None, rcnn_training_feature_dir=None, rcnn_eval_roi_dir=None, + rcnn_eval_feature_dir=None, gt_database_dir=None, pseudo_lidar=False): + super().__init__(root_dir=root_dir, split=split) + if classes == 'Car': + self.classes = ('Background', 'Car') + aug_scene_root_dir = os.path.join(root_dir, 'KITTI', 'aug_scene') + elif classes == 'People': + self.classes = ('Background', 'Pedestrian', 'Cyclist') + elif classes == 'Pedestrian': + self.classes = ('Background', 'Pedestrian') + aug_scene_root_dir = os.path.join(root_dir, 'KITTI', 'aug_scene_ped') + elif classes == 'Cyclist': + self.classes = ('Background', 'Cyclist') + aug_scene_root_dir = os.path.join(root_dir, 'KITTI', 'aug_scene_cyclist') + else: + assert False, "Invalid classes: %s" % classes + + self.num_class = self.classes.__len__() + + self.npoints = npoints + self.sample_id_list = [] + self.random_select = random_select + self.logger = logger + + if split == 'train_aug': + self.aug_label_dir = os.path.join(aug_scene_root_dir, 'training', 'aug_label') + self.aug_pts_dir = os.path.join(aug_scene_root_dir, 'training', 'rectified_data') + else: + self.aug_label_dir = os.path.join(aug_scene_root_dir, 'training', 'aug_label') + self.aug_pts_dir = os.path.join(aug_scene_root_dir, 'training', 'rectified_data') + + # for rcnn training + self.rcnn_training_bbox_list = [] + self.rpn_feature_list = {} + self.pos_bbox_list = [] + self.neg_bbox_list = [] + self.far_neg_bbox_list = [] + self.rcnn_eval_roi_dir = rcnn_eval_roi_dir + self.rcnn_eval_feature_dir = rcnn_eval_feature_dir + self.rcnn_training_roi_dir = rcnn_training_roi_dir + self.rcnn_training_feature_dir = rcnn_training_feature_dir + + self.gt_database = None + + if not self.random_select: + self.logger.warning('random select is False') + + assert mode in ['TRAIN', 'EVAL', 'TEST'], 'Invalid mode: %s' % mode + self.mode = mode + + if cfg.RPN.ENABLED: + if gt_database_dir is not None: + self.gt_database = pickle.load(open(gt_database_dir, 'rb')) + + if cfg.GT_AUG_HARD_RATIO > 0: + easy_list, hard_list = [], [] + for k in range(self.gt_database.__len__()): + obj = self.gt_database[k] + if obj['points'].shape[0] > 100: + easy_list.append(obj) + else: + hard_list.append(obj) + self.gt_database = [easy_list, hard_list] + logger.info('Loading gt_database(easy(pt_num>100): %d, hard(pt_num<=100): %d) from %s' + % (len(easy_list), len(hard_list), gt_database_dir)) + else: + logger.info('Loading gt_database(%d) from %s' % (len(self.gt_database), gt_database_dir)) + + if mode == 'TRAIN': + self.preprocess_rpn_training_data() + else: + self.sample_id_list = [int(sample_id) for sample_id in self.image_idx_list] + self.logger.info('Load testing samples from %s' % self.imageset_dir) + self.logger.info('Done: total test samples %d' % len(self.sample_id_list)) + elif cfg.RCNN.ENABLED: + for idx in range(0, self.num_sample): + sample_id = int(self.image_idx_list[idx]) + obj_list = self.filtrate_objects(self.get_label(sample_id)) + if len(obj_list) == 0: + # logger.info('No gt classes: %06d' % sample_id) + continue + self.sample_id_list.append(sample_id) + + print('Done: filter %s results for rcnn training: %d / %d\n' % + (self.mode, len(self.sample_id_list), len(self.image_idx_list))) + + def preprocess_rpn_training_data(self): + """ + Discard samples which don't have current classes, which will not be used for training. + Valid sample_id is stored in self.sample_id_list + """ + self.logger.info('Loading %s samples from %s ...' % (self.mode, self.label_dir)) + for idx in range(0, self.num_sample): + sample_id = int(self.image_idx_list[idx]) + obj_list = self.filtrate_objects(self.get_label(sample_id)) + if len(obj_list) == 0: + # self.logger.info('No gt classes: %06d' % sample_id) + continue + self.sample_id_list.append(sample_id) + + self.logger.info('Done: filter %s results: %d / %d\n' % (self.mode, len(self.sample_id_list), + len(self.image_idx_list))) + + def get_label(self, idx): + if idx < 10000: + label_file = os.path.join(self.label_dir, '%06d.txt' % idx) + else: + label_file = os.path.join(self.aug_label_dir, '%06d.txt' % idx) + + assert os.path.exists(label_file) + return kitti_utils.get_objects_from_label(label_file) + + def get_image(self, idx, left_image=True): + return super().get_image(idx % 10000, left_image) + + def get_image_shape(self, idx): + return super().get_image_shape(idx % 10000) + + def get_calib(self, idx): + return super().get_calib(idx % 10000) + + def get_road_plane(self, idx): + return super().get_road_plane(idx % 10000) + + @staticmethod + def get_rpn_features(rpn_feature_dir, idx): + rpn_feature_file = os.path.join(rpn_feature_dir, '%06d.npy' % idx) + rpn_xyz_file = os.path.join(rpn_feature_dir, '%06d_xyz.npy' % idx) + rpn_intensity_file = os.path.join(rpn_feature_dir, '%06d_intensity.npy' % idx) + if cfg.RCNN.USE_SEG_SCORE: + rpn_seg_file = os.path.join(rpn_feature_dir, '%06d_rawscore.npy' % idx) + rpn_seg_score = np.load(rpn_seg_file).reshape(-1) + rpn_seg_score = torch.sigmoid(torch.from_numpy(rpn_seg_score)).numpy() + else: + rpn_seg_file = os.path.join(rpn_feature_dir, '%06d_seg.npy' % idx) + rpn_seg_score = np.load(rpn_seg_file).reshape(-1) + return np.load(rpn_xyz_file), np.load(rpn_feature_file), np.load(rpn_intensity_file).reshape(-1), rpn_seg_score + + def filtrate_objects(self, obj_list): + """ + Discard objects which are not in self.classes (or its similar classes) + :param obj_list: list + :return: list + """ + type_whitelist = self.classes + if self.mode == 'TRAIN' and cfg.INCLUDE_SIMILAR_TYPE: + type_whitelist = list(self.classes) + if 'Car' in self.classes: + type_whitelist.append('Van') + if 'Pedestrian' in self.classes: # or 'Cyclist' in self.classes: + type_whitelist.append('Person_sitting') + + valid_obj_list = [] + for obj in obj_list: + if obj.cls_type not in type_whitelist: # rm Van, 20180928 + continue + if self.mode == 'TRAIN' and cfg.PC_REDUCE_BY_RANGE and (self.check_pc_range(obj.pos) is False): + continue + valid_obj_list.append(obj) + return valid_obj_list + + @staticmethod + def filtrate_dc_objects(obj_list): + valid_obj_list = [] + for obj in obj_list: + if obj.cls_type in ['DontCare']: + continue + valid_obj_list.append(obj) + + return valid_obj_list + + @staticmethod + def check_pc_range(xyz): + """ + :param xyz: [x, y, z] + :return: + """ + x_range, y_range, z_range = cfg.PC_AREA_SCOPE + if (x_range[0] <= xyz[0] <= x_range[1]) and (y_range[0] <= xyz[1] <= y_range[1]) and \ + (z_range[0] <= xyz[2] <= z_range[1]): + return True + return False + + @staticmethod + def get_valid_flag(pts_rect, pts_img, pts_rect_depth, img_shape): + """ + Valid point should be in the image (and in the PC_AREA_SCOPE) + :param pts_rect: + :param pts_img: + :param pts_rect_depth: + :param img_shape: + :return: + """ + val_flag_1 = np.logical_and(pts_img[:, 0] >= 0, pts_img[:, 0] < img_shape[1]) + val_flag_2 = np.logical_and(pts_img[:, 1] >= 0, pts_img[:, 1] < img_shape[0]) + val_flag_merge = np.logical_and(val_flag_1, val_flag_2) + pts_valid_flag = np.logical_and(val_flag_merge, pts_rect_depth >= 0) + + if cfg.PC_REDUCE_BY_RANGE: + x_range, y_range, z_range = cfg.PC_AREA_SCOPE + pts_x, pts_y, pts_z = pts_rect[:, 0], pts_rect[:, 1], pts_rect[:, 2] + range_flag = (pts_x >= x_range[0]) & (pts_x <= x_range[1]) \ + & (pts_y >= y_range[0]) & (pts_y <= y_range[1]) \ + & (pts_z >= z_range[0]) & (pts_z <= z_range[1]) + pts_valid_flag = pts_valid_flag & range_flag + return pts_valid_flag + + def __len__(self): + if cfg.RPN.ENABLED: + return len(self.sample_id_list) + elif cfg.RCNN.ENABLED: + if self.mode == 'TRAIN': + return len(self.sample_id_list) + else: + return len(self.image_idx_list) + else: + raise NotImplementedError + + def __getitem__(self, index): + if cfg.RPN.ENABLED: + return self.get_rpn_sample(index) + elif cfg.RCNN.ENABLED: + if self.mode == 'TRAIN': + if cfg.RCNN.ROI_SAMPLE_JIT: + return self.get_rcnn_sample_jit(index) + else: + return self.get_rcnn_training_sample_batch(index) + else: + return self.get_proposal_from_file(index) + else: + raise NotImplementedError + + def get_rpn_sample(self, index): + sample_id = int(self.sample_id_list[index]) + if sample_id < 10000: + calib = self.get_calib(sample_id) + img_left = self.get_image(sample_id % 10000, left_image=True) + img_right = self.get_image(sample_id % 10000, left_image=False) + # img_shape = self.get_image_shape(sample_id) + W, H = img_left.size + depth = self.get_depth(sample_id) + + # Pad depth to constant shape for batching + top_pad = 384 - H + right_pad = 1248 - W + depth = np.pad(depth, ((top_pad, 0), (0, right_pad)), 'constant', constant_values=0) + + sample_info = {'sample_id': sample_id, 'random_select': self.random_select} + + if self.mode == 'TEST': + sample_info['left_image'] = img_left + sample_info['right_image'] = img_right + sample_info['gt_depth'] = depth + sample_info['calib'] = calib + return sample_info + + gt_obj_list = self.filtrate_objects(self.get_label(sample_id)) + gt_boxes3d = kitti_utils.objs_to_boxes3d(gt_obj_list) + + gt_alpha = np.zeros((gt_obj_list.__len__()), dtype=np.float32) + for k, obj in enumerate(gt_obj_list): + gt_alpha[k] = obj.alpha + + aug_gt_boxes3d = gt_boxes3d.copy() + + if cfg.RPN.FIXED: + sample_info['left_image'] = img_left + sample_info['right_image'] = img_right + sample_info['gt_depth'] = depth + sample_info['calib'] = calib + sample_info['gt_boxes3d'] = aug_gt_boxes3d + return sample_info + + sample_info['left_image'] = img_left + sample_info['right_image'] = img_right + sample_info['gt_depth'] = depth + sample_info['calib'] = calib + sample_info['gt_boxes3d'] = aug_gt_boxes3d + return sample_info + + # @staticmethod + # def depth_from_lidar(pc_velo, H, W, calib): + # pts_2d, _ = calib.lidar_to_img(pc_velo) + # fov_inds = (pts_2d[:, 0] < W - 1) & (pts_2d[:, 0] >= 0) & \ + # (pts_2d[:, 1] < H - 1) & (pts_2d[:, 1] >= 0) + # fov_inds = fov_inds & (pc_velo[:, 0] > 2) + # imgfov_pc_velo = pc_velo[fov_inds, :] + # imgfov_pts_2d = pts_2d[fov_inds, :] + + # imgfov_pc_rect = calib.lidar_to_rect(imgfov_pc_velo) + # depth_map = np.zeros((H, W)) - 1 + # imgfov_pts_2d = np.round(imgfov_pts_2d).astype(int) + + # depth_map[imgfov_pts_2d[:, 1], imgfov_pts_2d[:, 0]] = imgfov_pc_rect[:, 2] + + # return depth_map + + @staticmethod + def generate_rpn_training_labels(pts_rect, gt_boxes3d): + cls_label = np.zeros((pts_rect.shape[0]), dtype=np.int32) + reg_label = np.zeros((pts_rect.shape[0], 7), dtype=np.float32) # dx, dy, dz, ry, h, w, l + gt_corners = kitti_utils.boxes3d_to_corners3d(gt_boxes3d, rotate=True) + extend_gt_boxes3d = kitti_utils.enlarge_box3d(gt_boxes3d, extra_width=0.2) + extend_gt_corners = kitti_utils.boxes3d_to_corners3d(extend_gt_boxes3d, rotate=True) + for k in range(gt_boxes3d.shape[0]): + box_corners = gt_corners[k] + fg_pt_flag = kitti_utils.in_hull(pts_rect, box_corners) + fg_pts_rect = pts_rect[fg_pt_flag] + cls_label[fg_pt_flag] = 1 + + # enlarge the bbox3d, ignore nearby points + extend_box_corners = extend_gt_corners[k] + fg_enlarge_flag = kitti_utils.in_hull(pts_rect, extend_box_corners) + ignore_flag = np.logical_xor(fg_pt_flag, fg_enlarge_flag) + cls_label[ignore_flag] = -1 + + # pixel offset of object center + center3d = gt_boxes3d[k][0:3].copy() # (x, y, z) + center3d[1] -= gt_boxes3d[k][3] / 2 + reg_label[fg_pt_flag, 0:3] = center3d - fg_pts_rect # Now y is the true center of 3d box 20180928 + + # size and angle encoding + reg_label[fg_pt_flag, 3] = gt_boxes3d[k][3] # h + reg_label[fg_pt_flag, 4] = gt_boxes3d[k][4] # w + reg_label[fg_pt_flag, 5] = gt_boxes3d[k][5] # l + reg_label[fg_pt_flag, 6] = gt_boxes3d[k][6] # ry + + return cls_label, reg_label + + def rotate_box3d_along_y(self, box3d, rot_angle): + old_x, old_z, ry = box3d[0], box3d[2], box3d[6] + old_beta = np.arctan2(old_z, old_x) + alpha = -np.sign(old_beta) * np.pi / 2 + old_beta + ry + + box3d = kitti_utils.rotate_pc_along_y(box3d.reshape(1, 7), rot_angle=rot_angle)[0] + new_x, new_z = box3d[0], box3d[2] + new_beta = np.arctan2(new_z, new_x) + box3d[6] = np.sign(new_beta) * np.pi / 2 + alpha - new_beta + + return box3d + + def apply_gt_aug_to_one_scene(self, sample_id, pts_rect, pts_intensity, all_gt_boxes3d): + """ + :param pts_rect: (N, 3) + :param all_gt_boxex3d: (M2, 7) + :return: + """ + assert self.gt_database is not None + # extra_gt_num = np.random.randint(10, 15) + # try_times = 50 + if cfg.GT_AUG_RAND_NUM: + extra_gt_num = np.random.randint(10, cfg.GT_EXTRA_NUM) + else: + extra_gt_num = cfg.GT_EXTRA_NUM + try_times = 100 + cnt = 0 + cur_gt_boxes3d = all_gt_boxes3d.copy() + cur_gt_boxes3d[:, 4] += 0.5 # TODO: consider different objects + cur_gt_boxes3d[:, 5] += 0.5 # enlarge new added box to avoid too nearby boxes + cur_gt_corners = kitti_utils.boxes3d_to_corners3d(cur_gt_boxes3d) + + extra_gt_obj_list = [] + extra_gt_boxes3d_list = [] + new_pts_list, new_pts_intensity_list = [], [] + src_pts_flag = np.ones(pts_rect.shape[0], dtype=np.int32) + + road_plane = self.get_road_plane(sample_id) + a, b, c, d = road_plane + + while try_times > 0: + if cnt > extra_gt_num: + break + + try_times -= 1 + if cfg.GT_AUG_HARD_RATIO > 0: + p = np.random.rand() + if p > cfg.GT_AUG_HARD_RATIO: + # use easy sample + rand_idx = np.random.randint(0, len(self.gt_database[0])) + new_gt_dict = self.gt_database[0][rand_idx] + else: + # use hard sample + rand_idx = np.random.randint(0, len(self.gt_database[1])) + new_gt_dict = self.gt_database[1][rand_idx] + else: + rand_idx = np.random.randint(0, self.gt_database.__len__()) + new_gt_dict = self.gt_database[rand_idx] + + new_gt_box3d = new_gt_dict['gt_box3d'].copy() + new_gt_points = new_gt_dict['points'].copy() + new_gt_intensity = new_gt_dict['intensity'].copy() + new_gt_obj = new_gt_dict['obj'] + center = new_gt_box3d[0:3] + if cfg.PC_REDUCE_BY_RANGE and (self.check_pc_range(center) is False): + continue + + if new_gt_points.__len__() < 5: # too few points + continue + + # put it on the road plane + cur_height = (-d - a * center[0] - c * center[2]) / b + move_height = new_gt_box3d[1] - cur_height + new_gt_box3d[1] -= move_height + new_gt_points[:, 1] -= move_height + new_gt_obj.pos[1] -= move_height + + new_enlarged_box3d = new_gt_box3d.copy() + new_enlarged_box3d[4] += 0.5 + new_enlarged_box3d[5] += 0.5 # enlarge new added box to avoid too nearby boxes + + cnt += 1 + new_corners = kitti_utils.boxes3d_to_corners3d(new_enlarged_box3d.reshape(1, 7)) + iou3d = kitti_utils.get_iou3d(new_corners, cur_gt_corners) + valid_flag = iou3d.max() < 1e-8 + if not valid_flag: + continue + + enlarged_box3d = new_gt_box3d.copy() + enlarged_box3d[3] += 2 # remove the points above and below the object + + boxes_pts_mask_list = roipool3d_utils.pts_in_boxes3d_cpu( + torch.from_numpy(pts_rect), torch.from_numpy(enlarged_box3d.reshape(1, 7))) + pt_mask_flag = (boxes_pts_mask_list[0].numpy() == 1) + src_pts_flag[pt_mask_flag] = 0 # remove the original points which are inside the new box + + new_pts_list.append(new_gt_points) + new_pts_intensity_list.append(new_gt_intensity) + cur_gt_boxes3d = np.concatenate((cur_gt_boxes3d, new_enlarged_box3d.reshape(1, 7)), axis=0) + cur_gt_corners = np.concatenate((cur_gt_corners, new_corners), axis=0) + extra_gt_boxes3d_list.append(new_gt_box3d.reshape(1, 7)) + extra_gt_obj_list.append(new_gt_obj) + + if new_pts_list.__len__() == 0: + return False, pts_rect, pts_intensity, None, None + + extra_gt_boxes3d = np.concatenate(extra_gt_boxes3d_list, axis=0) + # remove original points and add new points + pts_rect = pts_rect[src_pts_flag == 1] + pts_intensity = pts_intensity[src_pts_flag == 1] + new_pts_rect = np.concatenate(new_pts_list, axis=0) + new_pts_intensity = np.concatenate(new_pts_intensity_list, axis=0) + pts_rect = np.concatenate((pts_rect, new_pts_rect), axis=0) + pts_intensity = np.concatenate((pts_intensity, new_pts_intensity), axis=0) + + return True, pts_rect, pts_intensity, extra_gt_boxes3d, extra_gt_obj_list + + def data_augmentation(self, aug_pts_rect, aug_gt_boxes3d, gt_alpha, sample_id=None, mustaug=False, stage=1): + """ + :param aug_pts_rect: (N, 3) + :param aug_gt_boxes3d: (N, 7) + :param gt_alpha: (N) + :return: + """ + aug_list = cfg.AUG_METHOD_LIST + aug_enable = 1 - np.random.rand(3) + if mustaug is True: + aug_enable[0] = -1 + aug_enable[1] = -1 + aug_method = [] + if 'rotation' in aug_list and aug_enable[0] < cfg.AUG_METHOD_PROB[0]: + angle = np.random.uniform(-np.pi / cfg.AUG_ROT_RANGE, np.pi / cfg.AUG_ROT_RANGE) + aug_pts_rect = kitti_utils.rotate_pc_along_y(aug_pts_rect, rot_angle=angle) + if stage == 1: + # xyz change, hwl unchange + aug_gt_boxes3d = kitti_utils.rotate_pc_along_y(aug_gt_boxes3d, rot_angle=angle) + + # calculate the ry after rotation + x, z = aug_gt_boxes3d[:, 0], aug_gt_boxes3d[:, 2] + beta = np.arctan2(z, x) + new_ry = np.sign(beta) * np.pi / 2 + gt_alpha - beta + aug_gt_boxes3d[:, 6] = new_ry # TODO: not in [-np.pi / 2, np.pi / 2] + elif stage == 2: + # for debug stage-2, this implementation has little float precision difference with the above one + assert aug_gt_boxes3d.shape[0] == 2 + aug_gt_boxes3d[0] = self.rotate_box3d_along_y(aug_gt_boxes3d[0], angle) + aug_gt_boxes3d[1] = self.rotate_box3d_along_y(aug_gt_boxes3d[1], angle) + else: + raise NotImplementedError + + aug_method.append(['rotation', angle]) + + if 'scaling' in aug_list and aug_enable[1] < cfg.AUG_METHOD_PROB[1]: + scale = np.random.uniform(0.95, 1.05) + aug_pts_rect = aug_pts_rect * scale + aug_gt_boxes3d[:, 0:6] = aug_gt_boxes3d[:, 0:6] * scale + aug_method.append(['scaling', scale]) + + if 'flip' in aug_list and aug_enable[2] < cfg.AUG_METHOD_PROB[2]: + # flip horizontal + aug_pts_rect[:, 0] = -aug_pts_rect[:, 0] + aug_gt_boxes3d[:, 0] = -aug_gt_boxes3d[:, 0] + # flip orientation: ry > 0: pi - ry, ry < 0: -pi - ry + if stage == 1: + aug_gt_boxes3d[:, 6] = np.sign(aug_gt_boxes3d[:, 6]) * np.pi - aug_gt_boxes3d[:, 6] + elif stage == 2: + assert aug_gt_boxes3d.shape[0] == 2 + aug_gt_boxes3d[0, 6] = np.sign(aug_gt_boxes3d[0, 6]) * np.pi - aug_gt_boxes3d[0, 6] + aug_gt_boxes3d[1, 6] = np.sign(aug_gt_boxes3d[1, 6]) * np.pi - aug_gt_boxes3d[1, 6] + else: + raise NotImplementedError + + aug_method.append('flip') + + return aug_pts_rect, aug_gt_boxes3d, aug_method + + def get_rcnn_sample_info(self, roi_info): + sample_id, gt_box3d = roi_info['sample_id'], roi_info['gt_box3d'] + rpn_xyz, rpn_features, rpn_intensity, seg_mask = self.rpn_feature_list[sample_id] + + # augmentation original roi by adding noise + roi_box3d = self.aug_roi_by_noise(roi_info) + + # point cloud pooling based on roi_box3d + pooled_boxes3d = kitti_utils.enlarge_box3d(roi_box3d.reshape(1, 7), cfg.RCNN.POOL_EXTRA_WIDTH) + + boxes_pts_mask_list = roipool3d_utils.pts_in_boxes3d_cpu(torch.from_numpy(rpn_xyz), + torch.from_numpy(pooled_boxes3d)) + pt_mask_flag = (boxes_pts_mask_list[0].numpy() == 1) + cur_pts = rpn_xyz[pt_mask_flag].astype(np.float32) + + # data augmentation + aug_pts = cur_pts.copy() + aug_gt_box3d = gt_box3d.copy().astype(np.float32) + aug_roi_box3d = roi_box3d.copy() + if cfg.AUG_DATA and self.mode == 'TRAIN': + # calculate alpha by ry + temp_boxes3d = np.concatenate([aug_roi_box3d.reshape(1, 7), aug_gt_box3d.reshape(1, 7)], axis=0) + temp_x, temp_z, temp_ry = temp_boxes3d[:, 0], temp_boxes3d[:, 2], temp_boxes3d[:, 6] + temp_beta = np.arctan2(temp_z, temp_x).astype(np.float64) + temp_alpha = -np.sign(temp_beta) * np.pi / 2 + temp_beta + temp_ry + + # data augmentation + aug_pts, aug_boxes3d, aug_method = self.data_augmentation(aug_pts, temp_boxes3d, temp_alpha, mustaug=True, stage=2) + aug_roi_box3d, aug_gt_box3d = aug_boxes3d[0], aug_boxes3d[1] + aug_gt_box3d = aug_gt_box3d.astype(gt_box3d.dtype) + + # Pool input points + valid_mask = 1 # whether the input is valid + + if aug_pts.shape[0] == 0: + pts_features = np.zeros((1, 128), dtype=np.float32) + input_channel = 3 + int(cfg.RCNN.USE_INTENSITY) + int(cfg.RCNN.USE_MASK) + int(cfg.RCNN.USE_DEPTH) + pts_input = np.zeros((1, input_channel), dtype=np.float32) + valid_mask = 0 + else: + pts_features = rpn_features[pt_mask_flag].astype(np.float32) + pts_intensity = rpn_intensity[pt_mask_flag].astype(np.float32) + + pts_input_list = [aug_pts, pts_intensity.reshape(-1, 1)] + if cfg.RCNN.USE_INTENSITY: + pts_input_list = [aug_pts, pts_intensity.reshape(-1, 1)] + else: + pts_input_list = [aug_pts] + + if cfg.RCNN.USE_MASK: + if cfg.RCNN.MASK_TYPE == 'seg': + pts_mask = seg_mask[pt_mask_flag].astype(np.float32) + elif cfg.RCNN.MASK_TYPE == 'roi': + pts_mask = roipool3d_utils.pts_in_boxes3d_cpu(torch.from_numpy(aug_pts), + torch.from_numpy(aug_roi_box3d.reshape(1, 7))) + pts_mask = (pts_mask[0].numpy() == 1).astype(np.float32) + else: + raise NotImplementedError + + pts_input_list.append(pts_mask.reshape(-1, 1)) + + if cfg.RCNN.USE_DEPTH: + pts_depth = np.linalg.norm(aug_pts, axis=1, ord=2) + pts_depth_norm = (pts_depth / 70.0) - 0.5 + pts_input_list.append(pts_depth_norm.reshape(-1, 1)) + + pts_input = np.concatenate(pts_input_list, axis=1) # (N, C) + + aug_gt_corners = kitti_utils.boxes3d_to_corners3d(aug_gt_box3d.reshape(-1, 7)) + aug_roi_corners = kitti_utils.boxes3d_to_corners3d(aug_roi_box3d.reshape(-1, 7)) + iou3d = kitti_utils.get_iou3d(aug_roi_corners, aug_gt_corners) + cur_iou = iou3d[0][0] + + # regression valid mask + reg_valid_mask = 1 if cur_iou >= cfg.RCNN.REG_FG_THRESH and valid_mask == 1 else 0 + + # classification label + cls_label = 1 if cur_iou > cfg.RCNN.CLS_FG_THRESH else 0 + if cfg.RCNN.CLS_BG_THRESH < cur_iou < cfg.RCNN.CLS_FG_THRESH or valid_mask == 0: + cls_label = -1 + + # canonical transform and sampling + pts_input_ct, gt_box3d_ct = self.canonical_transform(pts_input, aug_roi_box3d, aug_gt_box3d) + pts_input_ct, pts_features = self.rcnn_input_sample(pts_input_ct, pts_features) + + sample_info = {'sample_id': sample_id, + 'pts_input': pts_input_ct, + 'pts_features': pts_features, + 'cls_label': cls_label, + 'reg_valid_mask': reg_valid_mask, + 'gt_boxes3d_ct': gt_box3d_ct, + 'roi_boxes3d': aug_roi_box3d, + 'roi_size': aug_roi_box3d[3:6], + 'gt_boxes3d': aug_gt_box3d} + + return sample_info + + @staticmethod + def canonical_transform(pts_input, roi_box3d, gt_box3d): + roi_ry = roi_box3d[6] % (2 * np.pi) # 0 ~ 2pi + roi_center = roi_box3d[0:3] + # shift to center + pts_input[:, [0, 1, 2]] = pts_input[:, [0, 1, 2]] - roi_center + gt_box3d_ct = np.copy(gt_box3d) + gt_box3d_ct[0:3] = gt_box3d_ct[0:3] - roi_center + # rotate to the direction of head + gt_box3d_ct = kitti_utils.rotate_pc_along_y(gt_box3d_ct.reshape(1, 7), roi_ry).reshape(7) + gt_box3d_ct[6] = gt_box3d_ct[6] - roi_ry + pts_input = kitti_utils.rotate_pc_along_y(pts_input, roi_ry) + + return pts_input, gt_box3d_ct + + @staticmethod + def canonical_transform_batch(pts_input, roi_boxes3d, gt_boxes3d): + """ + :param pts_input: (N, npoints, 3 + C) + :param roi_boxes3d: (N, 7) + :param gt_boxes3d: (N, 7) + :return: + """ + roi_ry = roi_boxes3d[:, 6] % (2 * np.pi) # 0 ~ 2pi + roi_center = roi_boxes3d[:, 0:3] + # shift to center + pts_input[:, :, [0, 1, 2]] = pts_input[:, :, [0, 1, 2]] - roi_center.reshape(-1, 1, 3) + gt_boxes3d_ct = np.copy(gt_boxes3d) + gt_boxes3d_ct[:, 0:3] = gt_boxes3d_ct[:, 0:3] - roi_center + # rotate to the direction of head + gt_boxes3d_ct = kitti_utils.rotate_pc_along_y_torch(torch.from_numpy(gt_boxes3d_ct.reshape(-1, 1, 7)), + torch.from_numpy(roi_ry)).numpy().reshape(-1, 7) + gt_boxes3d_ct[:, 6] = gt_boxes3d_ct[:, 6] - roi_ry + pts_input = kitti_utils.rotate_pc_along_y_torch(torch.from_numpy(pts_input), torch.from_numpy(roi_ry)).numpy() + + return pts_input, gt_boxes3d_ct + + @staticmethod + def rcnn_input_sample(pts_input, pts_features): + choice = np.random.choice(pts_input.shape[0], cfg.RCNN.NUM_POINTS, replace=True) + + if pts_input.shape[0] < cfg.RCNN.NUM_POINTS: + choice[:pts_input.shape[0]] = np.arange(pts_input.shape[0]) + np.random.shuffle(choice) + pts_input = pts_input[choice] + pts_features = pts_features[choice] + + return pts_input, pts_features + + def aug_roi_by_noise(self, roi_info): + """ + add noise to original roi to get aug_box3d + :param roi_info: + :return: + """ + roi_box3d, gt_box3d = roi_info['roi_box3d'], roi_info['gt_box3d'] + original_iou = roi_info['iou3d'] + temp_iou = cnt = 0 + pos_thresh = min(cfg.RCNN.REG_FG_THRESH, cfg.RCNN.CLS_FG_THRESH) + gt_corners = kitti_utils.boxes3d_to_corners3d(gt_box3d.reshape(-1, 7)) + aug_box3d = roi_box3d + while temp_iou < pos_thresh and cnt < 10: + if roi_info['type'] == 'gt': + aug_box3d = self.random_aug_box3d(roi_box3d) # GT, must random + else: + if np.random.rand() < 0.2: + aug_box3d = roi_box3d # p=0.2 to keep the original roi box + else: + aug_box3d = self.random_aug_box3d(roi_box3d) + aug_corners = kitti_utils.boxes3d_to_corners3d(aug_box3d.reshape(-1, 7)) + iou3d = kitti_utils.get_iou3d(aug_corners, gt_corners) + temp_iou = iou3d[0][0] + cnt += 1 + if original_iou < pos_thresh: # original bg, break + break + return aug_box3d + + @staticmethod + def random_aug_box3d(box3d): + """ + :param box3d: (7) [x, y, z, h, w, l, ry] + random shift, scale, orientation + """ + if cfg.RCNN.REG_AUG_METHOD == 'single': + pos_shift = (np.random.rand(3) - 0.5) # [-0.5 ~ 0.5] + hwl_scale = (np.random.rand(3) - 0.5) / (0.5 / 0.15) + 1.0 # + angle_rot = (np.random.rand(1) - 0.5) / (0.5 / (np.pi / 12)) # [-pi/12 ~ pi/12] + + aug_box3d = np.concatenate([box3d[0:3] + pos_shift, box3d[3:6] * hwl_scale, + box3d[6:7] + angle_rot]) + return aug_box3d + elif cfg.RCNN.REG_AUG_METHOD == 'multiple': + # pos_range, hwl_range, angle_range, mean_iou + range_config = [[0.2, 0.1, np.pi / 12, 0.7], + [0.3, 0.15, np.pi / 12, 0.6], + [0.5, 0.15, np.pi / 9, 0.5], + [0.8, 0.15, np.pi / 6, 0.3], + [1.0, 0.15, np.pi / 3, 0.2]] + idx = np.random.randint(len(range_config)) + + pos_shift = ((np.random.rand(3) - 0.5) / 0.5) * range_config[idx][0] + hwl_scale = ((np.random.rand(3) - 0.5) / 0.5) * range_config[idx][1] + 1.0 + angle_rot = ((np.random.rand(1) - 0.5) / 0.5) * range_config[idx][2] + + aug_box3d = np.concatenate([box3d[0:3] + pos_shift, box3d[3:6] * hwl_scale, box3d[6:7] + angle_rot]) + return aug_box3d + elif cfg.RCNN.REG_AUG_METHOD == 'normal': + x_shift = np.random.normal(loc=0, scale=0.3) + y_shift = np.random.normal(loc=0, scale=0.2) + z_shift = np.random.normal(loc=0, scale=0.3) + h_shift = np.random.normal(loc=0, scale=0.25) + w_shift = np.random.normal(loc=0, scale=0.15) + l_shift = np.random.normal(loc=0, scale=0.5) + ry_shift = ((np.random.rand() - 0.5) / 0.5) * np.pi / 12 + + aug_box3d = np.array([box3d[0] + x_shift, box3d[1] + y_shift, box3d[2] + z_shift, box3d[3] + h_shift, + box3d[4] + w_shift, box3d[5] + l_shift, box3d[6] + ry_shift]) + return aug_box3d + else: + raise NotImplementedError + + def get_proposal_from_file(self, index): + sample_id = int(self.image_idx_list[index]) + proposal_file = os.path.join(self.rcnn_eval_roi_dir, '%06d.txt' % sample_id) + roi_obj_list = kitti_utils.get_objects_from_label(proposal_file) + + rpn_xyz, rpn_features, rpn_intensity, seg_mask = self.get_rpn_features(self.rcnn_eval_feature_dir, sample_id) + pts_rect, pts_rpn_features, pts_intensity = rpn_xyz, rpn_features, rpn_intensity + + roi_box3d_list, roi_scores = [], [] + for obj in roi_obj_list: + box3d = np.array([obj.pos[0], obj.pos[1], obj.pos[2], obj.h, obj.w, obj.l, obj.ry], dtype=np.float32) + roi_box3d_list.append(box3d.reshape(1, 7)) + roi_scores.append(obj.score) + + roi_boxes3d = np.concatenate(roi_box3d_list, axis=0) # (N, 7) + roi_scores = np.array(roi_scores, dtype=np.float32) # (N) + + if cfg.RCNN.ROI_SAMPLE_JIT: + sample_dict = {'sample_id': sample_id, + 'rpn_xyz': rpn_xyz, + 'rpn_features': rpn_features, + 'seg_mask': seg_mask, + 'roi_boxes3d': roi_boxes3d, + 'roi_scores': roi_scores, + 'pts_depth': np.linalg.norm(rpn_xyz, ord=2, axis=1)} + + if self.mode != 'TEST': + gt_obj_list = self.filtrate_objects(self.get_label(sample_id)) + gt_boxes3d = kitti_utils.objs_to_boxes3d(gt_obj_list) + + roi_corners = kitti_utils.boxes3d_to_corners3d(roi_boxes3d) + gt_corners = kitti_utils.boxes3d_to_corners3d(gt_boxes3d) + iou3d = kitti_utils.get_iou3d(roi_corners, gt_corners) + if gt_boxes3d.shape[0] > 0: + gt_iou = iou3d.max(axis=1) + else: + gt_iou = np.zeros(roi_boxes3d.shape[0]).astype(np.float32) + + sample_dict['gt_boxes3d'] = gt_boxes3d + sample_dict['gt_iou'] = gt_iou + return sample_dict + + if cfg.RCNN.USE_INTENSITY: + pts_extra_input_list = [pts_intensity.reshape(-1, 1), seg_mask.reshape(-1, 1)] + else: + pts_extra_input_list = [seg_mask.reshape(-1, 1)] + + if cfg.RCNN.USE_DEPTH: + cur_depth = np.linalg.norm(pts_rect, axis=1, ord=2) + cur_depth_norm = (cur_depth / 70.0) - 0.5 + pts_extra_input_list.append(cur_depth_norm.reshape(-1, 1)) + + pts_extra_input = np.concatenate(pts_extra_input_list, axis=1) + pts_input, pts_features = roipool3d_utils.roipool3d_cpu(roi_boxes3d, pts_rect, pts_rpn_features, + pts_extra_input, cfg.RCNN.POOL_EXTRA_WIDTH, + sampled_pt_num=cfg.RCNN.NUM_POINTS) + + sample_dict = {'sample_id': sample_id, + 'pts_input': pts_input, + 'pts_features': pts_features, + 'roi_boxes3d': roi_boxes3d, + 'roi_scores': roi_scores, + 'roi_size': roi_boxes3d[:, 3:6]} + + if self.mode == 'TEST': + return sample_dict + + gt_obj_list = self.filtrate_objects(self.get_label(sample_id)) + gt_boxes3d = np.zeros((gt_obj_list.__len__(), 7), dtype=np.float32) + + for k, obj in enumerate(gt_obj_list): + gt_boxes3d[k, 0:3], gt_boxes3d[k, 3], gt_boxes3d[k, 4], gt_boxes3d[k, 5], gt_boxes3d[k, 6] \ + = obj.pos, obj.h, obj.w, obj.l, obj.ry + + if gt_boxes3d.__len__() == 0: + gt_iou = np.zeros((roi_boxes3d.shape[0]), dtype=np.float32) + else: + roi_corners = kitti_utils.boxes3d_to_corners3d(roi_boxes3d) + gt_corners = kitti_utils.boxes3d_to_corners3d(gt_boxes3d) + iou3d = kitti_utils.get_iou3d(roi_corners, gt_corners) + gt_iou = iou3d.max(axis=1) + sample_dict['gt_boxes3d'] = gt_boxes3d + sample_dict['gt_iou'] = gt_iou + + return sample_dict + + def get_rcnn_training_sample_batch(self, index): + sample_id = int(self.sample_id_list[index]) + rpn_xyz, rpn_features, rpn_intensity, seg_mask = \ + self.get_rpn_features(self.rcnn_training_feature_dir, sample_id) + + # load rois and gt_boxes3d for this sample + roi_file = os.path.join(self.rcnn_training_roi_dir, '%06d.txt' % sample_id) + roi_obj_list = kitti_utils.get_objects_from_label(roi_file) + roi_boxes3d = kitti_utils.objs_to_boxes3d(roi_obj_list) + # roi_scores = kitti_utils.objs_to_scores(roi_obj_list) + + gt_obj_list = self.filtrate_objects(self.get_label(sample_id)) + gt_boxes3d = kitti_utils.objs_to_boxes3d(gt_obj_list) + + # calculate original iou + iou3d = kitti_utils.get_iou3d(kitti_utils.boxes3d_to_corners3d(roi_boxes3d), + kitti_utils.boxes3d_to_corners3d(gt_boxes3d)) + max_overlaps, gt_assignment = iou3d.max(axis=1), iou3d.argmax(axis=1) + max_iou_of_gt, roi_assignment = iou3d.max(axis=0), iou3d.argmax(axis=0) + roi_assignment = roi_assignment[max_iou_of_gt > 0].reshape(-1) + + # sample fg, easy_bg, hard_bg + fg_rois_per_image = int(np.round(cfg.RCNN.FG_RATIO * cfg.RCNN.ROI_PER_IMAGE)) + fg_thresh = min(cfg.RCNN.REG_FG_THRESH, cfg.RCNN.CLS_FG_THRESH) + fg_inds = np.nonzero(max_overlaps >= fg_thresh)[0] + fg_inds = np.concatenate((fg_inds, roi_assignment), axis=0) # consider the roi which has max_overlaps with gt as fg + + easy_bg_inds = np.nonzero((max_overlaps < cfg.RCNN.CLS_BG_THRESH_LO))[0] + hard_bg_inds = np.nonzero((max_overlaps < cfg.RCNN.CLS_BG_THRESH) & + (max_overlaps >= cfg.RCNN.CLS_BG_THRESH_LO))[0] + + fg_num_rois = fg_inds.size + bg_num_rois = hard_bg_inds.size + easy_bg_inds.size + + if fg_num_rois > 0 and bg_num_rois > 0: + # sampling fg + fg_rois_per_this_image = min(fg_rois_per_image, fg_num_rois) + rand_num = np.random.permutation(fg_num_rois) + fg_inds = fg_inds[rand_num[:fg_rois_per_this_image]] + + # sampling bg + bg_rois_per_this_image = cfg.RCNN.ROI_PER_IMAGE - fg_rois_per_this_image + bg_inds = self.sample_bg_inds(hard_bg_inds, easy_bg_inds, bg_rois_per_this_image) + + elif fg_num_rois > 0 and bg_num_rois == 0: + # sampling fg + rand_num = np.floor(np.random.rand(cfg.RCNN.ROI_PER_IMAGE ) * fg_num_rois) + rand_num = torch.from_numpy(rand_num).type_as(gt_boxes3d).long() + fg_inds = fg_inds[rand_num] + fg_rois_per_this_image = cfg.RCNN.ROI_PER_IMAGE + bg_rois_per_this_image = 0 + elif bg_num_rois > 0 and fg_num_rois == 0: + # sampling bg + bg_rois_per_this_image = cfg.RCNN.ROI_PER_IMAGE + bg_inds = self.sample_bg_inds(hard_bg_inds, easy_bg_inds, bg_rois_per_this_image) + fg_rois_per_this_image = 0 + else: + import pdb + pdb.set_trace() + raise NotImplementedError + + # augment the rois by noise + roi_list, roi_iou_list, roi_gt_list = [], [], [] + if fg_rois_per_this_image > 0: + fg_rois_src = roi_boxes3d[fg_inds].copy() + gt_of_fg_rois = gt_boxes3d[gt_assignment[fg_inds]] + fg_rois, fg_iou3d = self.aug_roi_by_noise_batch(fg_rois_src, gt_of_fg_rois, aug_times=10) + roi_list.append(fg_rois) + roi_iou_list.append(fg_iou3d) + roi_gt_list.append(gt_of_fg_rois) + + if bg_rois_per_this_image > 0: + bg_rois_src = roi_boxes3d[bg_inds].copy() + gt_of_bg_rois = gt_boxes3d[gt_assignment[bg_inds]] + bg_rois, bg_iou3d = self.aug_roi_by_noise_batch(bg_rois_src, gt_of_bg_rois, aug_times=1) + roi_list.append(bg_rois) + roi_iou_list.append(bg_iou3d) + roi_gt_list.append(gt_of_bg_rois) + + rois = np.concatenate(roi_list, axis=0) + iou_of_rois = np.concatenate(roi_iou_list, axis=0) + gt_of_rois = np.concatenate(roi_gt_list, axis=0) + + # collect extra features for point cloud pooling + if cfg.RCNN.USE_INTENSITY: + pts_extra_input_list = [rpn_intensity.reshape(-1, 1), seg_mask.reshape(-1, 1)] + else: + pts_extra_input_list = [seg_mask.reshape(-1, 1)] + + if cfg.RCNN.USE_DEPTH: + pts_depth = (np.linalg.norm(rpn_xyz, ord=2, axis=1) / 70.0) - 0.5 + pts_extra_input_list.append(pts_depth.reshape(-1, 1)) + pts_extra_input = np.concatenate(pts_extra_input_list, axis=1) + + pts_input, pts_features, pts_empty_flag = roipool3d_utils.roipool3d_cpu(rois, rpn_xyz, rpn_features, + pts_extra_input, + cfg.RCNN.POOL_EXTRA_WIDTH, + sampled_pt_num=cfg.RCNN.NUM_POINTS, + canonical_transform=False) + + # data augmentation + if cfg.AUG_DATA and self.mode == 'TRAIN': + for k in range(rois.__len__()): + aug_pts = pts_input[k, :, 0:3].copy() + aug_gt_box3d = gt_of_rois[k].copy() + aug_roi_box3d = rois[k].copy() + + # calculate alpha by ry + temp_boxes3d = np.concatenate([aug_roi_box3d.reshape(1, 7), aug_gt_box3d.reshape(1, 7)], axis=0) + temp_x, temp_z, temp_ry = temp_boxes3d[:, 0], temp_boxes3d[:, 2], temp_boxes3d[:, 6] + temp_beta = np.arctan2(temp_z, temp_x).astype(np.float64) + temp_alpha = -np.sign(temp_beta) * np.pi / 2 + temp_beta + temp_ry + + # data augmentation + aug_pts, aug_boxes3d, aug_method = self.data_augmentation(aug_pts, temp_boxes3d, temp_alpha, + mustaug=True, stage=2) + + # assign to original data + pts_input[k, :, 0:3] = aug_pts + rois[k] = aug_boxes3d[0] + gt_of_rois[k] = aug_boxes3d[1] + + valid_mask = (pts_empty_flag == 0).astype(np.int32) + + # regression valid mask + reg_valid_mask = (iou_of_rois > cfg.RCNN.REG_FG_THRESH).astype(np.int32) & valid_mask + + # classification label + cls_label = (iou_of_rois > cfg.RCNN.CLS_FG_THRESH).astype(np.int32) + invalid_mask = (iou_of_rois > cfg.RCNN.CLS_BG_THRESH) & (iou_of_rois < cfg.RCNN.CLS_FG_THRESH) + cls_label[invalid_mask] = -1 + cls_label[valid_mask == 0] = -1 + + # canonical transform and sampling + pts_input_ct, gt_boxes3d_ct = self.canonical_transform_batch(pts_input, rois, gt_of_rois) + + sample_info = {'sample_id': sample_id, + 'pts_input': pts_input_ct, + 'pts_features': pts_features, + 'cls_label': cls_label, + 'reg_valid_mask': reg_valid_mask, + 'gt_boxes3d_ct': gt_boxes3d_ct, + 'roi_boxes3d': rois, + 'roi_size': rois[:, 3:6], + 'gt_boxes3d': gt_of_rois} + + return sample_info + + def sample_bg_inds(self, hard_bg_inds, easy_bg_inds, bg_rois_per_this_image): + if hard_bg_inds.size > 0 and easy_bg_inds.size > 0: + hard_bg_rois_num = int(bg_rois_per_this_image * cfg.RCNN.HARD_BG_RATIO) + easy_bg_rois_num = bg_rois_per_this_image - hard_bg_rois_num + + # sampling hard bg + rand_num = np.floor(np.random.rand(hard_bg_rois_num) * hard_bg_inds.size).astype(np.int32) + hard_bg_inds = hard_bg_inds[rand_num] + # sampling easy bg + rand_num = np.floor(np.random.rand(easy_bg_rois_num) * easy_bg_inds.size).astype(np.int32) + easy_bg_inds = easy_bg_inds[rand_num] + + bg_inds = np.concatenate([hard_bg_inds, easy_bg_inds], axis=0) + elif hard_bg_inds.size > 0 and easy_bg_inds.size == 0: + hard_bg_rois_num = bg_rois_per_this_image + # sampling hard bg + rand_num = np.floor(np.random.rand(hard_bg_rois_num) * hard_bg_inds.size).astype(np.int32) + bg_inds = hard_bg_inds[rand_num] + elif hard_bg_inds.size == 0 and easy_bg_inds.size > 0: + easy_bg_rois_num = bg_rois_per_this_image + # sampling easy bg + rand_num = np.floor(np.random.rand(easy_bg_rois_num) * easy_bg_inds.size).astype(np.int32) + bg_inds = easy_bg_inds[rand_num] + else: + raise NotImplementedError + + return bg_inds + + def aug_roi_by_noise_batch(self, roi_boxes3d, gt_boxes3d, aug_times=10): + """ + :param roi_boxes3d: (N, 7) + :param gt_boxes3d: (N, 7) + :return: + """ + iou_of_rois = np.zeros(roi_boxes3d.shape[0], dtype=np.float32) + for k in range(roi_boxes3d.__len__()): + temp_iou = cnt = 0 + roi_box3d = roi_boxes3d[k] + gt_box3d = gt_boxes3d[k] + pos_thresh = min(cfg.RCNN.REG_FG_THRESH, cfg.RCNN.CLS_FG_THRESH) + gt_corners = kitti_utils.boxes3d_to_corners3d(gt_box3d.reshape(1, 7)) + aug_box3d = roi_box3d + while temp_iou < pos_thresh and cnt < aug_times: + if np.random.rand() < 0.2: + aug_box3d = roi_box3d # p=0.2 to keep the original roi box + else: + aug_box3d = self.random_aug_box3d(roi_box3d) + aug_corners = kitti_utils.boxes3d_to_corners3d(aug_box3d.reshape(1, 7)) + iou3d = kitti_utils.get_iou3d(aug_corners, gt_corners) + temp_iou = iou3d[0][0] + cnt += 1 + roi_boxes3d[k] = aug_box3d + iou_of_rois[k] = temp_iou + return roi_boxes3d, iou_of_rois + + def get_rcnn_sample_jit(self, index): + sample_id = int(self.sample_id_list[index]) + rpn_xyz, rpn_features, rpn_intensity, seg_mask = \ + self.get_rpn_features(self.rcnn_training_feature_dir, sample_id) + + # load rois and gt_boxes3d for this sample + roi_file = os.path.join(self.rcnn_training_roi_dir, '%06d.txt' % sample_id) + roi_obj_list = kitti_utils.get_objects_from_label(roi_file) + roi_boxes3d = kitti_utils.objs_to_boxes3d(roi_obj_list) + # roi_scores = kitti_utils.objs_to_scores(roi_obj_list) + + gt_obj_list = self.filtrate_objects(self.get_label(sample_id)) + gt_boxes3d = kitti_utils.objs_to_boxes3d(gt_obj_list) + + sample_info = {'sample_id': sample_id, + 'rpn_xyz': rpn_xyz, + 'rpn_features': rpn_features, + 'rpn_intensity': rpn_intensity, + 'seg_mask': seg_mask, + 'roi_boxes3d': roi_boxes3d, + 'gt_boxes3d': gt_boxes3d, + 'pts_depth': np.linalg.norm(rpn_xyz, ord=2, axis=1)} + + return sample_info + + def collate_batch(self, batch): + if self.mode != 'TRAIN' and cfg.RCNN.ENABLED and not cfg.RPN.ENABLED: + assert batch.__len__() == 1 + return batch[0] + + batch_size = batch.__len__() + ans_dict = {} + + for key in batch[0].keys(): + if cfg.RPN.ENABLED and key == 'gt_boxes3d' or \ + (cfg.RCNN.ENABLED and cfg.RCNN.ROI_SAMPLE_JIT and key in ['gt_boxes3d', 'roi_boxes3d']): + max_gt = 0 + for k in range(batch_size): + max_gt = max(max_gt, batch[k][key].__len__()) + batch_gt_boxes3d = np.zeros((batch_size, max_gt, 7), dtype=np.float32) + for i in range(batch_size): + batch_gt_boxes3d[i, :batch[i][key].__len__(), :] = batch[i][key] + ans_dict[key] = batch_gt_boxes3d + continue + + if isinstance(batch[0][key], np.ndarray): + if batch_size == 1: + ans_dict[key] = batch[0][key][np.newaxis, ...] + else: + ans_dict[key] = np.concatenate([batch[k][key][np.newaxis, ...] for k in range(batch_size)], axis=0) + + else: + ans_dict[key] = [batch[k][key] for k in range(batch_size)] + if isinstance(batch[0][key], int): + ans_dict[key] = np.array(ans_dict[key], dtype=np.int32) + elif isinstance(batch[0][key], float): + ans_dict[key] = np.array(ans_dict[key], dtype=np.float32) + + return ans_dict + + +if __name__ == '__main__': + pass diff --git a/PointRCNN/lib/datasets/kitti_rcnn_dataset.py b/PointRCNN/lib/datasets/kitti_rcnn_dataset.py new file mode 100644 index 0000000..0babc7b --- /dev/null +++ b/PointRCNN/lib/datasets/kitti_rcnn_dataset.py @@ -0,0 +1,1146 @@ +import numpy as np +import os +import pickle +import torch +import pdb + +from lib.datasets.kitti_dataset import KittiDataset +import lib.utils.kitti_utils as kitti_utils +import lib.utils.roipool3d.roipool3d_utils as roipool3d_utils +from lib.config import cfg + + +class KittiRCNNDataset(KittiDataset): + def __init__(self, root_dir, npoints=16384, split='train', classes='Car', mode='TRAIN', random_select=True, + logger=None, rcnn_training_roi_dir=None, rcnn_training_feature_dir=None, rcnn_eval_roi_dir=None, + rcnn_eval_feature_dir=None, gt_database_dir=None, pseudo_lidar=False): + super().__init__(root_dir=root_dir, split=split) + if classes == 'Car': + self.classes = ('Background', 'Car') + aug_scene_root_dir = os.path.join(root_dir, 'KITTI', 'aug_scene') + elif classes == 'People': + self.classes = ('Background', 'Pedestrian', 'Cyclist') + elif classes == 'Pedestrian': + self.classes = ('Background', 'Pedestrian') + aug_scene_root_dir = os.path.join(root_dir, 'KITTI', 'aug_scene_ped') + elif classes == 'Cyclist': + self.classes = ('Background', 'Cyclist') + aug_scene_root_dir = os.path.join(root_dir, 'KITTI', 'aug_scene_cyclist') + else: + assert False, "Invalid classes: %s" % classes + + if pseudo_lidar: + self.lidar_dir = os.path.join(self.lidar_dir, '../sdn_fix_bin_sparse') + self.plane_dir = os.path.join(self.plane_dir, '../sdn_fix_bin_planes') + + self.num_class = self.classes.__len__() + + self.npoints = npoints + self.sample_id_list = [] + self.random_select = random_select + self.logger = logger + + if split == 'train_aug': + self.aug_label_dir = os.path.join(aug_scene_root_dir, 'training', 'aug_label') + self.aug_pts_dir = os.path.join(aug_scene_root_dir, 'training', 'rectified_data') + else: + self.aug_label_dir = os.path.join(aug_scene_root_dir, 'training', 'aug_label') + self.aug_pts_dir = os.path.join(aug_scene_root_dir, 'training', 'rectified_data') + + # for rcnn training + self.rcnn_training_bbox_list = [] + self.rpn_feature_list = {} + self.pos_bbox_list = [] + self.neg_bbox_list = [] + self.far_neg_bbox_list = [] + self.rcnn_eval_roi_dir = rcnn_eval_roi_dir + self.rcnn_eval_feature_dir = rcnn_eval_feature_dir + self.rcnn_training_roi_dir = rcnn_training_roi_dir + self.rcnn_training_feature_dir = rcnn_training_feature_dir + + self.gt_database = None + + if not self.random_select: + self.logger.warning('random select is False') + + assert mode in ['TRAIN', 'EVAL', 'TEST'], 'Invalid mode: %s' % mode + self.mode = mode + + if cfg.RPN.ENABLED: + if gt_database_dir is not None: + self.gt_database = pickle.load(open(gt_database_dir, 'rb')) + + if cfg.GT_AUG_HARD_RATIO > 0: + easy_list, hard_list = [], [] + for k in range(self.gt_database.__len__()): + obj = self.gt_database[k] + if obj['points'].shape[0] > 100: + easy_list.append(obj) + else: + hard_list.append(obj) + self.gt_database = [easy_list, hard_list] + logger.info('Loading gt_database(easy(pt_num>100): %d, hard(pt_num<=100): %d) from %s' + % (len(easy_list), len(hard_list), gt_database_dir)) + else: + logger.info('Loading gt_database(%d) from %s' % (len(self.gt_database), gt_database_dir)) + + if mode == 'TRAIN': + self.preprocess_rpn_training_data() + else: + self.sample_id_list = [int(sample_id) for sample_id in self.image_idx_list] + self.logger.info('Load testing samples from %s' % self.imageset_dir) + self.logger.info('Done: total test samples %d' % len(self.sample_id_list)) + elif cfg.RCNN.ENABLED: + for idx in range(0, self.num_sample): + sample_id = int(self.image_idx_list[idx]) + obj_list = self.filtrate_objects(self.get_label(sample_id)) + if len(obj_list) == 0: + # logger.info('No gt classes: %06d' % sample_id) + continue + self.sample_id_list.append(sample_id) + + print('Done: filter %s results for rcnn training: %d / %d\n' % + (self.mode, len(self.sample_id_list), len(self.image_idx_list))) + + def preprocess_rpn_training_data(self): + """ + Discard samples which don't have current classes, which will not be used for training. + Valid sample_id is stored in self.sample_id_list + """ + self.logger.info('Loading %s samples from %s ...' % (self.mode, self.label_dir)) + for idx in range(0, self.num_sample): + sample_id = int(self.image_idx_list[idx]) + obj_list = self.filtrate_objects(self.get_label(sample_id)) + if len(obj_list) == 0: + # self.logger.info('No gt classes: %06d' % sample_id) + continue + self.sample_id_list.append(sample_id) + + self.logger.info('Done: filter %s results: %d / %d\n' % (self.mode, len(self.sample_id_list), + len(self.image_idx_list))) + + def get_label(self, idx): + if idx < 10000: + label_file = os.path.join(self.label_dir, '%06d.txt' % idx) + else: + label_file = os.path.join(self.aug_label_dir, '%06d.txt' % idx) + + assert os.path.exists(label_file) + return kitti_utils.get_objects_from_label(label_file) + + def get_image(self, idx): + return super().get_image(idx % 10000) + + def get_image_shape(self, idx): + return super().get_image_shape(idx % 10000) + + def get_calib(self, idx): + return super().get_calib(idx % 10000) + + def get_road_plane(self, idx): + return super().get_road_plane(idx % 10000) + + @staticmethod + def get_rpn_features(rpn_feature_dir, idx): + rpn_feature_file = os.path.join(rpn_feature_dir, '%06d.npy' % idx) + rpn_xyz_file = os.path.join(rpn_feature_dir, '%06d_xyz.npy' % idx) + rpn_intensity_file = os.path.join(rpn_feature_dir, '%06d_intensity.npy' % idx) + if cfg.RCNN.USE_SEG_SCORE: + rpn_seg_file = os.path.join(rpn_feature_dir, '%06d_rawscore.npy' % idx) + rpn_seg_score = np.load(rpn_seg_file).reshape(-1) + rpn_seg_score = torch.sigmoid(torch.from_numpy(rpn_seg_score)).numpy() + else: + rpn_seg_file = os.path.join(rpn_feature_dir, '%06d_seg.npy' % idx) + rpn_seg_score = np.load(rpn_seg_file).reshape(-1) + return np.load(rpn_xyz_file), np.load(rpn_feature_file), np.load(rpn_intensity_file).reshape(-1), rpn_seg_score + + def filtrate_objects(self, obj_list): + """ + Discard objects which are not in self.classes (or its similar classes) + :param obj_list: list + :return: list + """ + type_whitelist = self.classes + if self.mode == 'TRAIN' and cfg.INCLUDE_SIMILAR_TYPE: + type_whitelist = list(self.classes) + if 'Car' in self.classes: + type_whitelist.append('Van') + if 'Pedestrian' in self.classes: # or 'Cyclist' in self.classes: + type_whitelist.append('Person_sitting') + + valid_obj_list = [] + for obj in obj_list: + if obj.cls_type not in type_whitelist: # rm Van, 20180928 + continue + if self.mode == 'TRAIN' and cfg.PC_REDUCE_BY_RANGE and (self.check_pc_range(obj.pos) is False): + continue + valid_obj_list.append(obj) + return valid_obj_list + + @staticmethod + def filtrate_dc_objects(obj_list): + valid_obj_list = [] + for obj in obj_list: + if obj.cls_type in ['DontCare']: + continue + valid_obj_list.append(obj) + + return valid_obj_list + + @staticmethod + def check_pc_range(xyz): + """ + :param xyz: [x, y, z] + :return: + """ + x_range, y_range, z_range = cfg.PC_AREA_SCOPE + if (x_range[0] <= xyz[0] <= x_range[1]) and (y_range[0] <= xyz[1] <= y_range[1]) and \ + (z_range[0] <= xyz[2] <= z_range[1]): + return True + return False + + @staticmethod + def get_valid_flag(pts_rect, pts_img, pts_rect_depth, img_shape): + """ + Valid point should be in the image (and in the PC_AREA_SCOPE) + :param pts_rect: + :param pts_img: + :param pts_rect_depth: + :param img_shape: + :return: + """ + val_flag_1 = np.logical_and(pts_img[:, 0] >= 0, pts_img[:, 0] < img_shape[1]) + val_flag_2 = np.logical_and(pts_img[:, 1] >= 0, pts_img[:, 1] < img_shape[0]) + val_flag_merge = np.logical_and(val_flag_1, val_flag_2) + pts_valid_flag = np.logical_and(val_flag_merge, pts_rect_depth >= 0) + + if cfg.PC_REDUCE_BY_RANGE: + x_range, y_range, z_range = cfg.PC_AREA_SCOPE + pts_x, pts_y, pts_z = pts_rect[:, 0], pts_rect[:, 1], pts_rect[:, 2] + range_flag = (pts_x >= x_range[0]) & (pts_x <= x_range[1]) \ + & (pts_y >= y_range[0]) & (pts_y <= y_range[1]) \ + & (pts_z >= z_range[0]) & (pts_z <= z_range[1]) + pts_valid_flag = pts_valid_flag & range_flag + return pts_valid_flag + + def __len__(self): + if cfg.RPN.ENABLED: + return len(self.sample_id_list) + elif cfg.RCNN.ENABLED: + if self.mode == 'TRAIN': + return len(self.sample_id_list) + else: + return len(self.image_idx_list) + else: + raise NotImplementedError + + def __getitem__(self, index): + if cfg.RPN.ENABLED: + return self.get_rpn_sample(index) + elif cfg.RCNN.ENABLED: + if self.mode == 'TRAIN': + if cfg.RCNN.ROI_SAMPLE_JIT: + return self.get_rcnn_sample_jit(index) + else: + return self.get_rcnn_training_sample_batch(index) + else: + return self.get_proposal_from_file(index) + else: + raise NotImplementedError + + def get_rpn_sample(self, index): + sample_id = int(self.sample_id_list[index]) + if sample_id < 10000: + calib = self.get_calib(sample_id) + # img = self.get_image(sample_id) + img_shape = self.get_image_shape(sample_id) + pts_lidar = self.get_lidar(sample_id) + + # get valid point (projected points should be in image) + pts_rect = calib.lidar_to_rect(pts_lidar[:, 0:3]) + pts_intensity = pts_lidar[:, 3] + else: + calib = self.get_calib(sample_id % 10000) + # img = self.get_image(sample_id % 10000) + img_shape = self.get_image_shape(sample_id % 10000) + + pts_file = os.path.join(self.aug_pts_dir, '%06d.bin' % sample_id) + assert os.path.exists(pts_file), '%s' % pts_file + aug_pts = np.fromfile(pts_file, dtype=np.float32).reshape(-1, 4) + pts_rect, pts_intensity = aug_pts[:, 0:3], aug_pts[:, 3] + + pts_img, pts_rect_depth = calib.rect_to_img(pts_rect) + pts_valid_flag = self.get_valid_flag(pts_rect, pts_img, pts_rect_depth, img_shape) + + pts_rect = pts_rect[pts_valid_flag][:, 0:3] + pts_intensity = pts_intensity[pts_valid_flag] + + if cfg.GT_AUG_ENABLED and self.mode == 'TRAIN': + # all labels for checking overlapping + all_gt_obj_list = self.filtrate_dc_objects(self.get_label(sample_id)) + all_gt_boxes3d = kitti_utils.objs_to_boxes3d(all_gt_obj_list) + + gt_aug_flag = False + if np.random.rand() < cfg.GT_AUG_APPLY_PROB: + # augment one scene + gt_aug_flag, pts_rect, pts_intensity, extra_gt_boxes3d, extra_gt_obj_list = \ + self.apply_gt_aug_to_one_scene(sample_id, pts_rect, pts_intensity, all_gt_boxes3d) + + # generate inputs + if self.mode == 'TRAIN' or self.random_select: + if self.npoints < len(pts_rect): + pts_depth = pts_rect[:, 2] + pts_near_flag = pts_depth < 40.0 + far_idxs_choice = np.where(pts_near_flag == 0)[0] + near_idxs = np.where(pts_near_flag == 1)[0] + near_idxs_choice = np.random.choice(near_idxs, self.npoints - len(far_idxs_choice), replace=False) + + choice = np.concatenate((near_idxs_choice, far_idxs_choice), axis=0) \ + if len(far_idxs_choice) > 0 else near_idxs_choice + np.random.shuffle(choice) + else: + choice = np.arange(0, len(pts_rect), dtype=np.int32) + if self.npoints > len(pts_rect): + extra_choice = np.random.choice(choice, self.npoints - len(pts_rect), replace=False) + choice = np.concatenate((choice, extra_choice), axis=0) + np.random.shuffle(choice) + + ret_pts_rect = pts_rect[choice, :] + ret_pts_intensity = pts_intensity[choice] - 0.5 # translate intensity to [-0.5, 0.5] + else: + ret_pts_rect = pts_rect + ret_pts_intensity = pts_intensity - 0.5 + + pts_features = [ret_pts_intensity.reshape(-1, 1)] + ret_pts_features = np.concatenate(pts_features, axis=1) if pts_features.__len__() > 1 else pts_features[0] + + sample_info = {'sample_id': sample_id, 'random_select': self.random_select} + + if self.mode == 'TEST': + if cfg.RPN.USE_INTENSITY: + pts_input = np.concatenate((ret_pts_rect, ret_pts_features), axis=1) # (N, C) + else: + pts_input = ret_pts_rect + sample_info['pts_input'] = pts_input + sample_info['pts_rect'] = ret_pts_rect + sample_info['pts_features'] = ret_pts_features + return sample_info + + gt_obj_list = self.filtrate_objects(self.get_label(sample_id)) + if cfg.GT_AUG_ENABLED and self.mode == 'TRAIN' and gt_aug_flag: + gt_obj_list.extend(extra_gt_obj_list) + gt_boxes3d = kitti_utils.objs_to_boxes3d(gt_obj_list) + + gt_alpha = np.zeros((gt_obj_list.__len__()), dtype=np.float32) + for k, obj in enumerate(gt_obj_list): + gt_alpha[k] = obj.alpha + + # data augmentation + aug_pts_rect = ret_pts_rect.copy() + aug_gt_boxes3d = gt_boxes3d.copy() + if cfg.AUG_DATA and self.mode == 'TRAIN': + aug_pts_rect, aug_gt_boxes3d, aug_method = self.data_augmentation(aug_pts_rect, aug_gt_boxes3d, gt_alpha, + sample_id) + sample_info['aug_method'] = aug_method + + # prepare input + if cfg.RPN.USE_INTENSITY: + pts_input = np.concatenate((aug_pts_rect, ret_pts_features), axis=1) # (N, C) + else: + pts_input = aug_pts_rect + + if cfg.RPN.FIXED: + sample_info['pts_input'] = pts_input + sample_info['pts_rect'] = aug_pts_rect + sample_info['pts_features'] = ret_pts_features + sample_info['gt_boxes3d'] = aug_gt_boxes3d + return sample_info + + # generate training labels + rpn_cls_label, rpn_reg_label = self.generate_rpn_training_labels(aug_pts_rect, aug_gt_boxes3d) + sample_info['pts_input'] = pts_input + sample_info['pts_rect'] = aug_pts_rect + sample_info['pts_features'] = ret_pts_features + sample_info['rpn_cls_label'] = rpn_cls_label + sample_info['rpn_reg_label'] = rpn_reg_label + sample_info['gt_boxes3d'] = aug_gt_boxes3d + return sample_info + + @staticmethod + def generate_rpn_training_labels(pts_rect, gt_boxes3d): + cls_label = np.zeros((pts_rect.shape[0]), dtype=np.int32) + reg_label = np.zeros((pts_rect.shape[0], 7), dtype=np.float32) # dx, dy, dz, ry, h, w, l + gt_corners = kitti_utils.boxes3d_to_corners3d(gt_boxes3d, rotate=True) + extend_gt_boxes3d = kitti_utils.enlarge_box3d(gt_boxes3d, extra_width=0.2) + extend_gt_corners = kitti_utils.boxes3d_to_corners3d(extend_gt_boxes3d, rotate=True) + for k in range(gt_boxes3d.shape[0]): + box_corners = gt_corners[k] + fg_pt_flag = kitti_utils.in_hull(pts_rect, box_corners) + fg_pts_rect = pts_rect[fg_pt_flag] + cls_label[fg_pt_flag] = 1 + + # enlarge the bbox3d, ignore nearby points + extend_box_corners = extend_gt_corners[k] + fg_enlarge_flag = kitti_utils.in_hull(pts_rect, extend_box_corners) + ignore_flag = np.logical_xor(fg_pt_flag, fg_enlarge_flag) + cls_label[ignore_flag] = -1 + + # pixel offset of object center + center3d = gt_boxes3d[k][0:3].copy() # (x, y, z) + center3d[1] -= gt_boxes3d[k][3] / 2 + reg_label[fg_pt_flag, 0:3] = center3d - fg_pts_rect # Now y is the true center of 3d box 20180928 + + # size and angle encoding + reg_label[fg_pt_flag, 3] = gt_boxes3d[k][3] # h + reg_label[fg_pt_flag, 4] = gt_boxes3d[k][4] # w + reg_label[fg_pt_flag, 5] = gt_boxes3d[k][5] # l + reg_label[fg_pt_flag, 6] = gt_boxes3d[k][6] # ry + + return cls_label, reg_label + + def rotate_box3d_along_y(self, box3d, rot_angle): + old_x, old_z, ry = box3d[0], box3d[2], box3d[6] + old_beta = np.arctan2(old_z, old_x) + alpha = -np.sign(old_beta) * np.pi / 2 + old_beta + ry + + box3d = kitti_utils.rotate_pc_along_y(box3d.reshape(1, 7), rot_angle=rot_angle)[0] + new_x, new_z = box3d[0], box3d[2] + new_beta = np.arctan2(new_z, new_x) + box3d[6] = np.sign(new_beta) * np.pi / 2 + alpha - new_beta + + return box3d + + def apply_gt_aug_to_one_scene(self, sample_id, pts_rect, pts_intensity, all_gt_boxes3d): + """ + :param pts_rect: (N, 3) + :param all_gt_boxex3d: (M2, 7) + :return: + """ + assert self.gt_database is not None + # extra_gt_num = np.random.randint(10, 15) + # try_times = 50 + if cfg.GT_AUG_RAND_NUM: + extra_gt_num = np.random.randint(10, cfg.GT_EXTRA_NUM) + else: + extra_gt_num = cfg.GT_EXTRA_NUM + try_times = 100 + cnt = 0 + cur_gt_boxes3d = all_gt_boxes3d.copy() + cur_gt_boxes3d[:, 4] += 0.5 # TODO: consider different objects + cur_gt_boxes3d[:, 5] += 0.5 # enlarge new added box to avoid too nearby boxes + cur_gt_corners = kitti_utils.boxes3d_to_corners3d(cur_gt_boxes3d) + + extra_gt_obj_list = [] + extra_gt_boxes3d_list = [] + new_pts_list, new_pts_intensity_list = [], [] + src_pts_flag = np.ones(pts_rect.shape[0], dtype=np.int32) + + road_plane = self.get_road_plane(sample_id) + a, b, c, d = road_plane + + while try_times > 0: + if cnt > extra_gt_num: + break + + try_times -= 1 + if cfg.GT_AUG_HARD_RATIO > 0: + p = np.random.rand() + if p > cfg.GT_AUG_HARD_RATIO: + # use easy sample + rand_idx = np.random.randint(0, len(self.gt_database[0])) + new_gt_dict = self.gt_database[0][rand_idx] + else: + # use hard sample + rand_idx = np.random.randint(0, len(self.gt_database[1])) + new_gt_dict = self.gt_database[1][rand_idx] + else: + rand_idx = np.random.randint(0, self.gt_database.__len__()) + new_gt_dict = self.gt_database[rand_idx] + + new_gt_box3d = new_gt_dict['gt_box3d'].copy() + new_gt_points = new_gt_dict['points'].copy() + new_gt_intensity = new_gt_dict['intensity'].copy() + new_gt_obj = new_gt_dict['obj'] + center = new_gt_box3d[0:3] + if cfg.PC_REDUCE_BY_RANGE and (self.check_pc_range(center) is False): + continue + + if new_gt_points.__len__() < 5: # too few points + continue + + # put it on the road plane + cur_height = (-d - a * center[0] - c * center[2]) / b + move_height = new_gt_box3d[1] - cur_height + new_gt_box3d[1] -= move_height + new_gt_points[:, 1] -= move_height + new_gt_obj.pos[1] -= move_height + + new_enlarged_box3d = new_gt_box3d.copy() + new_enlarged_box3d[4] += 0.5 + new_enlarged_box3d[5] += 0.5 # enlarge new added box to avoid too nearby boxes + + cnt += 1 + new_corners = kitti_utils.boxes3d_to_corners3d(new_enlarged_box3d.reshape(1, 7)) + iou3d = kitti_utils.get_iou3d(new_corners, cur_gt_corners) + valid_flag = iou3d.max() < 1e-8 + if not valid_flag: + continue + + enlarged_box3d = new_gt_box3d.copy() + enlarged_box3d[3] += 2 # remove the points above and below the object + + boxes_pts_mask_list = roipool3d_utils.pts_in_boxes3d_cpu( + torch.from_numpy(pts_rect), torch.from_numpy(enlarged_box3d.reshape(1, 7))) + pt_mask_flag = (boxes_pts_mask_list[0].numpy() == 1) + src_pts_flag[pt_mask_flag] = 0 # remove the original points which are inside the new box + + new_pts_list.append(new_gt_points) + new_pts_intensity_list.append(new_gt_intensity) + cur_gt_boxes3d = np.concatenate((cur_gt_boxes3d, new_enlarged_box3d.reshape(1, 7)), axis=0) + cur_gt_corners = np.concatenate((cur_gt_corners, new_corners), axis=0) + extra_gt_boxes3d_list.append(new_gt_box3d.reshape(1, 7)) + extra_gt_obj_list.append(new_gt_obj) + + if new_pts_list.__len__() == 0: + return False, pts_rect, pts_intensity, None, None + + extra_gt_boxes3d = np.concatenate(extra_gt_boxes3d_list, axis=0) + # remove original points and add new points + pts_rect = pts_rect[src_pts_flag == 1] + pts_intensity = pts_intensity[src_pts_flag == 1] + new_pts_rect = np.concatenate(new_pts_list, axis=0) + new_pts_intensity = np.concatenate(new_pts_intensity_list, axis=0) + pts_rect = np.concatenate((pts_rect, new_pts_rect), axis=0) + pts_intensity = np.concatenate((pts_intensity, new_pts_intensity), axis=0) + + return True, pts_rect, pts_intensity, extra_gt_boxes3d, extra_gt_obj_list + + def data_augmentation(self, aug_pts_rect, aug_gt_boxes3d, gt_alpha, sample_id=None, mustaug=False, stage=1): + """ + :param aug_pts_rect: (N, 3) + :param aug_gt_boxes3d: (N, 7) + :param gt_alpha: (N) + :return: + """ + aug_list = cfg.AUG_METHOD_LIST + aug_enable = 1 - np.random.rand(3) + if mustaug is True: + aug_enable[0] = -1 + aug_enable[1] = -1 + aug_method = [] + if 'rotation' in aug_list and aug_enable[0] < cfg.AUG_METHOD_PROB[0]: + angle = np.random.uniform(-np.pi / cfg.AUG_ROT_RANGE, np.pi / cfg.AUG_ROT_RANGE) + aug_pts_rect = kitti_utils.rotate_pc_along_y(aug_pts_rect, rot_angle=angle) + if stage == 1: + # xyz change, hwl unchange + aug_gt_boxes3d = kitti_utils.rotate_pc_along_y(aug_gt_boxes3d, rot_angle=angle) + + # calculate the ry after rotation + x, z = aug_gt_boxes3d[:, 0], aug_gt_boxes3d[:, 2] + beta = np.arctan2(z, x) + new_ry = np.sign(beta) * np.pi / 2 + gt_alpha - beta + aug_gt_boxes3d[:, 6] = new_ry # TODO: not in [-np.pi / 2, np.pi / 2] + elif stage == 2: + # for debug stage-2, this implementation has little float precision difference with the above one + assert aug_gt_boxes3d.shape[0] == 2 + aug_gt_boxes3d[0] = self.rotate_box3d_along_y(aug_gt_boxes3d[0], angle) + aug_gt_boxes3d[1] = self.rotate_box3d_along_y(aug_gt_boxes3d[1], angle) + else: + raise NotImplementedError + + aug_method.append(['rotation', angle]) + + if 'scaling' in aug_list and aug_enable[1] < cfg.AUG_METHOD_PROB[1]: + scale = np.random.uniform(0.95, 1.05) + aug_pts_rect = aug_pts_rect * scale + aug_gt_boxes3d[:, 0:6] = aug_gt_boxes3d[:, 0:6] * scale + aug_method.append(['scaling', scale]) + + if 'flip' in aug_list and aug_enable[2] < cfg.AUG_METHOD_PROB[2]: + # flip horizontal + aug_pts_rect[:, 0] = -aug_pts_rect[:, 0] + aug_gt_boxes3d[:, 0] = -aug_gt_boxes3d[:, 0] + # flip orientation: ry > 0: pi - ry, ry < 0: -pi - ry + if stage == 1: + aug_gt_boxes3d[:, 6] = np.sign(aug_gt_boxes3d[:, 6]) * np.pi - aug_gt_boxes3d[:, 6] + elif stage == 2: + assert aug_gt_boxes3d.shape[0] == 2 + aug_gt_boxes3d[0, 6] = np.sign(aug_gt_boxes3d[0, 6]) * np.pi - aug_gt_boxes3d[0, 6] + aug_gt_boxes3d[1, 6] = np.sign(aug_gt_boxes3d[1, 6]) * np.pi - aug_gt_boxes3d[1, 6] + else: + raise NotImplementedError + + aug_method.append('flip') + + return aug_pts_rect, aug_gt_boxes3d, aug_method + + def get_rcnn_sample_info(self, roi_info): + sample_id, gt_box3d = roi_info['sample_id'], roi_info['gt_box3d'] + rpn_xyz, rpn_features, rpn_intensity, seg_mask = self.rpn_feature_list[sample_id] + + # augmentation original roi by adding noise + roi_box3d = self.aug_roi_by_noise(roi_info) + + # point cloud pooling based on roi_box3d + pooled_boxes3d = kitti_utils.enlarge_box3d(roi_box3d.reshape(1, 7), cfg.RCNN.POOL_EXTRA_WIDTH) + + boxes_pts_mask_list = roipool3d_utils.pts_in_boxes3d_cpu(torch.from_numpy(rpn_xyz), + torch.from_numpy(pooled_boxes3d)) + pt_mask_flag = (boxes_pts_mask_list[0].numpy() == 1) + cur_pts = rpn_xyz[pt_mask_flag].astype(np.float32) + + # data augmentation + aug_pts = cur_pts.copy() + aug_gt_box3d = gt_box3d.copy().astype(np.float32) + aug_roi_box3d = roi_box3d.copy() + if cfg.AUG_DATA and self.mode == 'TRAIN': + # calculate alpha by ry + temp_boxes3d = np.concatenate([aug_roi_box3d.reshape(1, 7), aug_gt_box3d.reshape(1, 7)], axis=0) + temp_x, temp_z, temp_ry = temp_boxes3d[:, 0], temp_boxes3d[:, 2], temp_boxes3d[:, 6] + temp_beta = np.arctan2(temp_z, temp_x).astype(np.float64) + temp_alpha = -np.sign(temp_beta) * np.pi / 2 + temp_beta + temp_ry + + # data augmentation + aug_pts, aug_boxes3d, aug_method = self.data_augmentation(aug_pts, temp_boxes3d, temp_alpha, mustaug=True, stage=2) + aug_roi_box3d, aug_gt_box3d = aug_boxes3d[0], aug_boxes3d[1] + aug_gt_box3d = aug_gt_box3d.astype(gt_box3d.dtype) + + # Pool input points + valid_mask = 1 # whether the input is valid + + if aug_pts.shape[0] == 0: + pts_features = np.zeros((1, 128), dtype=np.float32) + input_channel = 3 + int(cfg.RCNN.USE_INTENSITY) + int(cfg.RCNN.USE_MASK) + int(cfg.RCNN.USE_DEPTH) + pts_input = np.zeros((1, input_channel), dtype=np.float32) + valid_mask = 0 + else: + pts_features = rpn_features[pt_mask_flag].astype(np.float32) + pts_intensity = rpn_intensity[pt_mask_flag].astype(np.float32) + + pts_input_list = [aug_pts, pts_intensity.reshape(-1, 1)] + if cfg.RCNN.USE_INTENSITY: + pts_input_list = [aug_pts, pts_intensity.reshape(-1, 1)] + else: + pts_input_list = [aug_pts] + + if cfg.RCNN.USE_MASK: + if cfg.RCNN.MASK_TYPE == 'seg': + pts_mask = seg_mask[pt_mask_flag].astype(np.float32) + elif cfg.RCNN.MASK_TYPE == 'roi': + pts_mask = roipool3d_utils.pts_in_boxes3d_cpu(torch.from_numpy(aug_pts), + torch.from_numpy(aug_roi_box3d.reshape(1, 7))) + pts_mask = (pts_mask[0].numpy() == 1).astype(np.float32) + else: + raise NotImplementedError + + pts_input_list.append(pts_mask.reshape(-1, 1)) + + if cfg.RCNN.USE_DEPTH: + pts_depth = np.linalg.norm(aug_pts, axis=1, ord=2) + pts_depth_norm = (pts_depth / 70.0) - 0.5 + pts_input_list.append(pts_depth_norm.reshape(-1, 1)) + + pts_input = np.concatenate(pts_input_list, axis=1) # (N, C) + + aug_gt_corners = kitti_utils.boxes3d_to_corners3d(aug_gt_box3d.reshape(-1, 7)) + aug_roi_corners = kitti_utils.boxes3d_to_corners3d(aug_roi_box3d.reshape(-1, 7)) + iou3d = kitti_utils.get_iou3d(aug_roi_corners, aug_gt_corners) + cur_iou = iou3d[0][0] + + # regression valid mask + reg_valid_mask = 1 if cur_iou >= cfg.RCNN.REG_FG_THRESH and valid_mask == 1 else 0 + + # classification label + cls_label = 1 if cur_iou > cfg.RCNN.CLS_FG_THRESH else 0 + if cfg.RCNN.CLS_BG_THRESH < cur_iou < cfg.RCNN.CLS_FG_THRESH or valid_mask == 0: + cls_label = -1 + + # canonical transform and sampling + pts_input_ct, gt_box3d_ct = self.canonical_transform(pts_input, aug_roi_box3d, aug_gt_box3d) + pts_input_ct, pts_features = self.rcnn_input_sample(pts_input_ct, pts_features) + + sample_info = {'sample_id': sample_id, + 'pts_input': pts_input_ct, + 'pts_features': pts_features, + 'cls_label': cls_label, + 'reg_valid_mask': reg_valid_mask, + 'gt_boxes3d_ct': gt_box3d_ct, + 'roi_boxes3d': aug_roi_box3d, + 'roi_size': aug_roi_box3d[3:6], + 'gt_boxes3d': aug_gt_box3d} + + return sample_info + + @staticmethod + def canonical_transform(pts_input, roi_box3d, gt_box3d): + roi_ry = roi_box3d[6] % (2 * np.pi) # 0 ~ 2pi + roi_center = roi_box3d[0:3] + # shift to center + pts_input[:, [0, 1, 2]] = pts_input[:, [0, 1, 2]] - roi_center + gt_box3d_ct = np.copy(gt_box3d) + gt_box3d_ct[0:3] = gt_box3d_ct[0:3] - roi_center + # rotate to the direction of head + gt_box3d_ct = kitti_utils.rotate_pc_along_y(gt_box3d_ct.reshape(1, 7), roi_ry).reshape(7) + gt_box3d_ct[6] = gt_box3d_ct[6] - roi_ry + pts_input = kitti_utils.rotate_pc_along_y(pts_input, roi_ry) + + return pts_input, gt_box3d_ct + + @staticmethod + def canonical_transform_batch(pts_input, roi_boxes3d, gt_boxes3d): + """ + :param pts_input: (N, npoints, 3 + C) + :param roi_boxes3d: (N, 7) + :param gt_boxes3d: (N, 7) + :return: + """ + roi_ry = roi_boxes3d[:, 6] % (2 * np.pi) # 0 ~ 2pi + roi_center = roi_boxes3d[:, 0:3] + # shift to center + pts_input[:, :, [0, 1, 2]] = pts_input[:, :, [0, 1, 2]] - roi_center.reshape(-1, 1, 3) + gt_boxes3d_ct = np.copy(gt_boxes3d) + gt_boxes3d_ct[:, 0:3] = gt_boxes3d_ct[:, 0:3] - roi_center + # rotate to the direction of head + gt_boxes3d_ct = kitti_utils.rotate_pc_along_y_torch(torch.from_numpy(gt_boxes3d_ct.reshape(-1, 1, 7)), + torch.from_numpy(roi_ry)).numpy().reshape(-1, 7) + gt_boxes3d_ct[:, 6] = gt_boxes3d_ct[:, 6] - roi_ry + pts_input = kitti_utils.rotate_pc_along_y_torch(torch.from_numpy(pts_input), torch.from_numpy(roi_ry)).numpy() + + return pts_input, gt_boxes3d_ct + + @staticmethod + def rcnn_input_sample(pts_input, pts_features): + choice = np.random.choice(pts_input.shape[0], cfg.RCNN.NUM_POINTS, replace=True) + + if pts_input.shape[0] < cfg.RCNN.NUM_POINTS: + choice[:pts_input.shape[0]] = np.arange(pts_input.shape[0]) + np.random.shuffle(choice) + pts_input = pts_input[choice] + pts_features = pts_features[choice] + + return pts_input, pts_features + + def aug_roi_by_noise(self, roi_info): + """ + add noise to original roi to get aug_box3d + :param roi_info: + :return: + """ + roi_box3d, gt_box3d = roi_info['roi_box3d'], roi_info['gt_box3d'] + original_iou = roi_info['iou3d'] + temp_iou = cnt = 0 + pos_thresh = min(cfg.RCNN.REG_FG_THRESH, cfg.RCNN.CLS_FG_THRESH) + gt_corners = kitti_utils.boxes3d_to_corners3d(gt_box3d.reshape(-1, 7)) + aug_box3d = roi_box3d + while temp_iou < pos_thresh and cnt < 10: + if roi_info['type'] == 'gt': + aug_box3d = self.random_aug_box3d(roi_box3d) # GT, must random + else: + if np.random.rand() < 0.2: + aug_box3d = roi_box3d # p=0.2 to keep the original roi box + else: + aug_box3d = self.random_aug_box3d(roi_box3d) + aug_corners = kitti_utils.boxes3d_to_corners3d(aug_box3d.reshape(-1, 7)) + iou3d = kitti_utils.get_iou3d(aug_corners, gt_corners) + temp_iou = iou3d[0][0] + cnt += 1 + if original_iou < pos_thresh: # original bg, break + break + return aug_box3d + + @staticmethod + def random_aug_box3d(box3d): + """ + :param box3d: (7) [x, y, z, h, w, l, ry] + random shift, scale, orientation + """ + if cfg.RCNN.REG_AUG_METHOD == 'single': + pos_shift = (np.random.rand(3) - 0.5) # [-0.5 ~ 0.5] + hwl_scale = (np.random.rand(3) - 0.5) / (0.5 / 0.15) + 1.0 # + angle_rot = (np.random.rand(1) - 0.5) / (0.5 / (np.pi / 12)) # [-pi/12 ~ pi/12] + + aug_box3d = np.concatenate([box3d[0:3] + pos_shift, box3d[3:6] * hwl_scale, + box3d[6:7] + angle_rot]) + return aug_box3d + elif cfg.RCNN.REG_AUG_METHOD == 'multiple': + # pos_range, hwl_range, angle_range, mean_iou + range_config = [[0.2, 0.1, np.pi / 12, 0.7], + [0.3, 0.15, np.pi / 12, 0.6], + [0.5, 0.15, np.pi / 9, 0.5], + [0.8, 0.15, np.pi / 6, 0.3], + [1.0, 0.15, np.pi / 3, 0.2]] + idx = np.random.randint(len(range_config)) + + pos_shift = ((np.random.rand(3) - 0.5) / 0.5) * range_config[idx][0] + hwl_scale = ((np.random.rand(3) - 0.5) / 0.5) * range_config[idx][1] + 1.0 + angle_rot = ((np.random.rand(1) - 0.5) / 0.5) * range_config[idx][2] + + aug_box3d = np.concatenate([box3d[0:3] + pos_shift, box3d[3:6] * hwl_scale, box3d[6:7] + angle_rot]) + return aug_box3d + elif cfg.RCNN.REG_AUG_METHOD == 'normal': + x_shift = np.random.normal(loc=0, scale=0.3) + y_shift = np.random.normal(loc=0, scale=0.2) + z_shift = np.random.normal(loc=0, scale=0.3) + h_shift = np.random.normal(loc=0, scale=0.25) + w_shift = np.random.normal(loc=0, scale=0.15) + l_shift = np.random.normal(loc=0, scale=0.5) + ry_shift = ((np.random.rand() - 0.5) / 0.5) * np.pi / 12 + + aug_box3d = np.array([box3d[0] + x_shift, box3d[1] + y_shift, box3d[2] + z_shift, box3d[3] + h_shift, + box3d[4] + w_shift, box3d[5] + l_shift, box3d[6] + ry_shift]) + return aug_box3d + else: + raise NotImplementedError + + def get_proposal_from_file(self, index): + sample_id = int(self.image_idx_list[index]) + proposal_file = os.path.join(self.rcnn_eval_roi_dir, '%06d.txt' % sample_id) + roi_obj_list = kitti_utils.get_objects_from_label(proposal_file) + + rpn_xyz, rpn_features, rpn_intensity, seg_mask = self.get_rpn_features(self.rcnn_eval_feature_dir, sample_id) + pts_rect, pts_rpn_features, pts_intensity = rpn_xyz, rpn_features, rpn_intensity + + roi_box3d_list, roi_scores = [], [] + for obj in roi_obj_list: + box3d = np.array([obj.pos[0], obj.pos[1], obj.pos[2], obj.h, obj.w, obj.l, obj.ry], dtype=np.float32) + roi_box3d_list.append(box3d.reshape(1, 7)) + roi_scores.append(obj.score) + + roi_boxes3d = np.concatenate(roi_box3d_list, axis=0) # (N, 7) + roi_scores = np.array(roi_scores, dtype=np.float32) # (N) + + if cfg.RCNN.ROI_SAMPLE_JIT: + sample_dict = {'sample_id': sample_id, + 'rpn_xyz': rpn_xyz, + 'rpn_features': rpn_features, + 'seg_mask': seg_mask, + 'roi_boxes3d': roi_boxes3d, + 'roi_scores': roi_scores, + 'pts_depth': np.linalg.norm(rpn_xyz, ord=2, axis=1)} + + if self.mode != 'TEST': + gt_obj_list = self.filtrate_objects(self.get_label(sample_id)) + gt_boxes3d = kitti_utils.objs_to_boxes3d(gt_obj_list) + + roi_corners = kitti_utils.boxes3d_to_corners3d(roi_boxes3d) + gt_corners = kitti_utils.boxes3d_to_corners3d(gt_boxes3d) + iou3d = kitti_utils.get_iou3d(roi_corners, gt_corners) + if gt_boxes3d.shape[0] > 0: + gt_iou = iou3d.max(axis=1) + else: + gt_iou = np.zeros(roi_boxes3d.shape[0]).astype(np.float32) + + sample_dict['gt_boxes3d'] = gt_boxes3d + sample_dict['gt_iou'] = gt_iou + return sample_dict + + if cfg.RCNN.USE_INTENSITY: + pts_extra_input_list = [pts_intensity.reshape(-1, 1), seg_mask.reshape(-1, 1)] + else: + pts_extra_input_list = [seg_mask.reshape(-1, 1)] + + if cfg.RCNN.USE_DEPTH: + cur_depth = np.linalg.norm(pts_rect, axis=1, ord=2) + cur_depth_norm = (cur_depth / 70.0) - 0.5 + pts_extra_input_list.append(cur_depth_norm.reshape(-1, 1)) + + pts_extra_input = np.concatenate(pts_extra_input_list, axis=1) + pts_input, pts_features = roipool3d_utils.roipool3d_cpu(roi_boxes3d, pts_rect, pts_rpn_features, + pts_extra_input, cfg.RCNN.POOL_EXTRA_WIDTH, + sampled_pt_num=cfg.RCNN.NUM_POINTS) + + sample_dict = {'sample_id': sample_id, + 'pts_input': pts_input, + 'pts_features': pts_features, + 'roi_boxes3d': roi_boxes3d, + 'roi_scores': roi_scores, + 'roi_size': roi_boxes3d[:, 3:6]} + + if self.mode == 'TEST': + return sample_dict + + gt_obj_list = self.filtrate_objects(self.get_label(sample_id)) + gt_boxes3d = np.zeros((gt_obj_list.__len__(), 7), dtype=np.float32) + + for k, obj in enumerate(gt_obj_list): + gt_boxes3d[k, 0:3], gt_boxes3d[k, 3], gt_boxes3d[k, 4], gt_boxes3d[k, 5], gt_boxes3d[k, 6] \ + = obj.pos, obj.h, obj.w, obj.l, obj.ry + + if gt_boxes3d.__len__() == 0: + gt_iou = np.zeros((roi_boxes3d.shape[0]), dtype=np.float32) + else: + roi_corners = kitti_utils.boxes3d_to_corners3d(roi_boxes3d) + gt_corners = kitti_utils.boxes3d_to_corners3d(gt_boxes3d) + iou3d = kitti_utils.get_iou3d(roi_corners, gt_corners) + gt_iou = iou3d.max(axis=1) + sample_dict['gt_boxes3d'] = gt_boxes3d + sample_dict['gt_iou'] = gt_iou + + return sample_dict + + def get_rcnn_training_sample_batch(self, index): + sample_id = int(self.sample_id_list[index]) + rpn_xyz, rpn_features, rpn_intensity, seg_mask = \ + self.get_rpn_features(self.rcnn_training_feature_dir, sample_id) + + # load rois and gt_boxes3d for this sample + roi_file = os.path.join(self.rcnn_training_roi_dir, '%06d.txt' % sample_id) + roi_obj_list = kitti_utils.get_objects_from_label(roi_file) + roi_boxes3d = kitti_utils.objs_to_boxes3d(roi_obj_list) + # roi_scores = kitti_utils.objs_to_scores(roi_obj_list) + + gt_obj_list = self.filtrate_objects(self.get_label(sample_id)) + gt_boxes3d = kitti_utils.objs_to_boxes3d(gt_obj_list) + + # calculate original iou + iou3d = kitti_utils.get_iou3d(kitti_utils.boxes3d_to_corners3d(roi_boxes3d), + kitti_utils.boxes3d_to_corners3d(gt_boxes3d)) + max_overlaps, gt_assignment = iou3d.max(axis=1), iou3d.argmax(axis=1) + max_iou_of_gt, roi_assignment = iou3d.max(axis=0), iou3d.argmax(axis=0) + roi_assignment = roi_assignment[max_iou_of_gt > 0].reshape(-1) + + # sample fg, easy_bg, hard_bg + fg_rois_per_image = int(np.round(cfg.RCNN.FG_RATIO * cfg.RCNN.ROI_PER_IMAGE)) + fg_thresh = min(cfg.RCNN.REG_FG_THRESH, cfg.RCNN.CLS_FG_THRESH) + fg_inds = np.nonzero(max_overlaps >= fg_thresh)[0] + fg_inds = np.concatenate((fg_inds, roi_assignment), axis=0) # consider the roi which has max_overlaps with gt as fg + + easy_bg_inds = np.nonzero((max_overlaps < cfg.RCNN.CLS_BG_THRESH_LO))[0] + hard_bg_inds = np.nonzero((max_overlaps < cfg.RCNN.CLS_BG_THRESH) & + (max_overlaps >= cfg.RCNN.CLS_BG_THRESH_LO))[0] + + fg_num_rois = fg_inds.size + bg_num_rois = hard_bg_inds.size + easy_bg_inds.size + + if fg_num_rois > 0 and bg_num_rois > 0: + # sampling fg + fg_rois_per_this_image = min(fg_rois_per_image, fg_num_rois) + rand_num = np.random.permutation(fg_num_rois) + fg_inds = fg_inds[rand_num[:fg_rois_per_this_image]] + + # sampling bg + bg_rois_per_this_image = cfg.RCNN.ROI_PER_IMAGE - fg_rois_per_this_image + bg_inds = self.sample_bg_inds(hard_bg_inds, easy_bg_inds, bg_rois_per_this_image) + + elif fg_num_rois > 0 and bg_num_rois == 0: + # sampling fg + rand_num = np.floor(np.random.rand(cfg.RCNN.ROI_PER_IMAGE ) * fg_num_rois) + rand_num = torch.from_numpy(rand_num).type_as(gt_boxes3d).long() + fg_inds = fg_inds[rand_num] + fg_rois_per_this_image = cfg.RCNN.ROI_PER_IMAGE + bg_rois_per_this_image = 0 + elif bg_num_rois > 0 and fg_num_rois == 0: + # sampling bg + bg_rois_per_this_image = cfg.RCNN.ROI_PER_IMAGE + bg_inds = self.sample_bg_inds(hard_bg_inds, easy_bg_inds, bg_rois_per_this_image) + fg_rois_per_this_image = 0 + else: + import pdb + pdb.set_trace() + raise NotImplementedError + + # augment the rois by noise + roi_list, roi_iou_list, roi_gt_list = [], [], [] + if fg_rois_per_this_image > 0: + fg_rois_src = roi_boxes3d[fg_inds].copy() + gt_of_fg_rois = gt_boxes3d[gt_assignment[fg_inds]] + fg_rois, fg_iou3d = self.aug_roi_by_noise_batch(fg_rois_src, gt_of_fg_rois, aug_times=10) + roi_list.append(fg_rois) + roi_iou_list.append(fg_iou3d) + roi_gt_list.append(gt_of_fg_rois) + + if bg_rois_per_this_image > 0: + bg_rois_src = roi_boxes3d[bg_inds].copy() + gt_of_bg_rois = gt_boxes3d[gt_assignment[bg_inds]] + bg_rois, bg_iou3d = self.aug_roi_by_noise_batch(bg_rois_src, gt_of_bg_rois, aug_times=1) + roi_list.append(bg_rois) + roi_iou_list.append(bg_iou3d) + roi_gt_list.append(gt_of_bg_rois) + + rois = np.concatenate(roi_list, axis=0) + iou_of_rois = np.concatenate(roi_iou_list, axis=0) + gt_of_rois = np.concatenate(roi_gt_list, axis=0) + + # collect extra features for point cloud pooling + if cfg.RCNN.USE_INTENSITY: + pts_extra_input_list = [rpn_intensity.reshape(-1, 1), seg_mask.reshape(-1, 1)] + else: + pts_extra_input_list = [seg_mask.reshape(-1, 1)] + + if cfg.RCNN.USE_DEPTH: + pts_depth = (np.linalg.norm(rpn_xyz, ord=2, axis=1) / 70.0) - 0.5 + pts_extra_input_list.append(pts_depth.reshape(-1, 1)) + pts_extra_input = np.concatenate(pts_extra_input_list, axis=1) + + pts_input, pts_features, pts_empty_flag = roipool3d_utils.roipool3d_cpu(rois, rpn_xyz, rpn_features, + pts_extra_input, + cfg.RCNN.POOL_EXTRA_WIDTH, + sampled_pt_num=cfg.RCNN.NUM_POINTS, + canonical_transform=False) + + # data augmentation + if cfg.AUG_DATA and self.mode == 'TRAIN': + for k in range(rois.__len__()): + aug_pts = pts_input[k, :, 0:3].copy() + aug_gt_box3d = gt_of_rois[k].copy() + aug_roi_box3d = rois[k].copy() + + # calculate alpha by ry + temp_boxes3d = np.concatenate([aug_roi_box3d.reshape(1, 7), aug_gt_box3d.reshape(1, 7)], axis=0) + temp_x, temp_z, temp_ry = temp_boxes3d[:, 0], temp_boxes3d[:, 2], temp_boxes3d[:, 6] + temp_beta = np.arctan2(temp_z, temp_x).astype(np.float64) + temp_alpha = -np.sign(temp_beta) * np.pi / 2 + temp_beta + temp_ry + + # data augmentation + aug_pts, aug_boxes3d, aug_method = self.data_augmentation(aug_pts, temp_boxes3d, temp_alpha, + mustaug=True, stage=2) + + # assign to original data + pts_input[k, :, 0:3] = aug_pts + rois[k] = aug_boxes3d[0] + gt_of_rois[k] = aug_boxes3d[1] + + valid_mask = (pts_empty_flag == 0).astype(np.int32) + + # regression valid mask + reg_valid_mask = (iou_of_rois > cfg.RCNN.REG_FG_THRESH).astype(np.int32) & valid_mask + + # classification label + cls_label = (iou_of_rois > cfg.RCNN.CLS_FG_THRESH).astype(np.int32) + invalid_mask = (iou_of_rois > cfg.RCNN.CLS_BG_THRESH) & (iou_of_rois < cfg.RCNN.CLS_FG_THRESH) + cls_label[invalid_mask] = -1 + cls_label[valid_mask == 0] = -1 + + # canonical transform and sampling + pts_input_ct, gt_boxes3d_ct = self.canonical_transform_batch(pts_input, rois, gt_of_rois) + + sample_info = {'sample_id': sample_id, + 'pts_input': pts_input_ct, + 'pts_features': pts_features, + 'cls_label': cls_label, + 'reg_valid_mask': reg_valid_mask, + 'gt_boxes3d_ct': gt_boxes3d_ct, + 'roi_boxes3d': rois, + 'roi_size': rois[:, 3:6], + 'gt_boxes3d': gt_of_rois} + + return sample_info + + def sample_bg_inds(self, hard_bg_inds, easy_bg_inds, bg_rois_per_this_image): + if hard_bg_inds.size > 0 and easy_bg_inds.size > 0: + hard_bg_rois_num = int(bg_rois_per_this_image * cfg.RCNN.HARD_BG_RATIO) + easy_bg_rois_num = bg_rois_per_this_image - hard_bg_rois_num + + # sampling hard bg + rand_num = np.floor(np.random.rand(hard_bg_rois_num) * hard_bg_inds.size).astype(np.int32) + hard_bg_inds = hard_bg_inds[rand_num] + # sampling easy bg + rand_num = np.floor(np.random.rand(easy_bg_rois_num) * easy_bg_inds.size).astype(np.int32) + easy_bg_inds = easy_bg_inds[rand_num] + + bg_inds = np.concatenate([hard_bg_inds, easy_bg_inds], axis=0) + elif hard_bg_inds.size > 0 and easy_bg_inds.size == 0: + hard_bg_rois_num = bg_rois_per_this_image + # sampling hard bg + rand_num = np.floor(np.random.rand(hard_bg_rois_num) * hard_bg_inds.size).astype(np.int32) + bg_inds = hard_bg_inds[rand_num] + elif hard_bg_inds.size == 0 and easy_bg_inds.size > 0: + easy_bg_rois_num = bg_rois_per_this_image + # sampling easy bg + rand_num = np.floor(np.random.rand(easy_bg_rois_num) * easy_bg_inds.size).astype(np.int32) + bg_inds = easy_bg_inds[rand_num] + else: + raise NotImplementedError + + return bg_inds + + def aug_roi_by_noise_batch(self, roi_boxes3d, gt_boxes3d, aug_times=10): + """ + :param roi_boxes3d: (N, 7) + :param gt_boxes3d: (N, 7) + :return: + """ + iou_of_rois = np.zeros(roi_boxes3d.shape[0], dtype=np.float32) + for k in range(roi_boxes3d.__len__()): + temp_iou = cnt = 0 + roi_box3d = roi_boxes3d[k] + gt_box3d = gt_boxes3d[k] + pos_thresh = min(cfg.RCNN.REG_FG_THRESH, cfg.RCNN.CLS_FG_THRESH) + gt_corners = kitti_utils.boxes3d_to_corners3d(gt_box3d.reshape(1, 7)) + aug_box3d = roi_box3d + while temp_iou < pos_thresh and cnt < aug_times: + if np.random.rand() < 0.2: + aug_box3d = roi_box3d # p=0.2 to keep the original roi box + else: + aug_box3d = self.random_aug_box3d(roi_box3d) + aug_corners = kitti_utils.boxes3d_to_corners3d(aug_box3d.reshape(1, 7)) + iou3d = kitti_utils.get_iou3d(aug_corners, gt_corners) + temp_iou = iou3d[0][0] + cnt += 1 + roi_boxes3d[k] = aug_box3d + iou_of_rois[k] = temp_iou + return roi_boxes3d, iou_of_rois + + def get_rcnn_sample_jit(self, index): + sample_id = int(self.sample_id_list[index]) + rpn_xyz, rpn_features, rpn_intensity, seg_mask = \ + self.get_rpn_features(self.rcnn_training_feature_dir, sample_id) + + # load rois and gt_boxes3d for this sample + roi_file = os.path.join(self.rcnn_training_roi_dir, '%06d.txt' % sample_id) + roi_obj_list = kitti_utils.get_objects_from_label(roi_file) + roi_boxes3d = kitti_utils.objs_to_boxes3d(roi_obj_list) + # roi_scores = kitti_utils.objs_to_scores(roi_obj_list) + + gt_obj_list = self.filtrate_objects(self.get_label(sample_id)) + gt_boxes3d = kitti_utils.objs_to_boxes3d(gt_obj_list) + + sample_info = {'sample_id': sample_id, + 'rpn_xyz': rpn_xyz, + 'rpn_features': rpn_features, + 'rpn_intensity': rpn_intensity, + 'seg_mask': seg_mask, + 'roi_boxes3d': roi_boxes3d, + 'gt_boxes3d': gt_boxes3d, + 'pts_depth': np.linalg.norm(rpn_xyz, ord=2, axis=1)} + + return sample_info + + def collate_batch(self, batch): + if self.mode != 'TRAIN' and cfg.RCNN.ENABLED and not cfg.RPN.ENABLED: + assert batch.__len__() == 1 + return batch[0] + + batch_size = batch.__len__() + ans_dict = {} + + for key in batch[0].keys(): + if cfg.RPN.ENABLED and key == 'gt_boxes3d' or \ + (cfg.RCNN.ENABLED and cfg.RCNN.ROI_SAMPLE_JIT and key in ['gt_boxes3d', 'roi_boxes3d']): + max_gt = 0 + for k in range(batch_size): + max_gt = max(max_gt, batch[k][key].__len__()) + batch_gt_boxes3d = np.zeros((batch_size, max_gt, 7), dtype=np.float32) + for i in range(batch_size): + batch_gt_boxes3d[i, :batch[i][key].__len__(), :] = batch[i][key] + ans_dict[key] = batch_gt_boxes3d + continue + + if isinstance(batch[0][key], np.ndarray): + if batch_size == 1: + ans_dict[key] = batch[0][key][np.newaxis, ...] + else: + ans_dict[key] = np.concatenate([batch[k][key][np.newaxis, ...] for k in range(batch_size)], axis=0) + + else: + ans_dict[key] = [batch[k][key] for k in range(batch_size)] + if isinstance(batch[0][key], int): + ans_dict[key] = np.array(ans_dict[key], dtype=np.int32) + elif isinstance(batch[0][key], float): + ans_dict[key] = np.array(ans_dict[key], dtype=np.float32) + + return ans_dict + + +if __name__ == '__main__': + pass diff --git a/PointRCNN/lib/net/batch_utils.py b/PointRCNN/lib/net/batch_utils.py new file mode 100644 index 0000000..5552a74 --- /dev/null +++ b/PointRCNN/lib/net/batch_utils.py @@ -0,0 +1,186 @@ +import torch as torch +import numpy as np +from lib.config import cfg +import lib.utils.kitti_utils as kitti_utils +import kitti_util + + +def pointrcnn_transform(pts_lidar, calib, PRCNN_npoints, sample_id, mode='TRAIN', random_select=True): + + pts_rect = calib.lidar_to_rect(pts_lidar[:, 0:3]) + pts_intensity = pts_lidar[:, 3] + + if cfg.PC_REDUCE_BY_RANGE: + with torch.no_grad(): + x_range, y_range, z_range = cfg.PC_AREA_SCOPE + pts_x, pts_y, pts_z = pts_rect[:, 0], pts_rect[:, 1], pts_rect[:, 2] + range_flag = (pts_x >= x_range[0]) & (pts_x <= x_range[1]) \ + & (pts_y >= y_range[0]) & (pts_y <= y_range[1]) \ + & (pts_z >= z_range[0]) & (pts_z <= z_range[1]) + pts_valid_flag = range_flag + + pts_rect = pts_rect[pts_valid_flag][:, 0:3] + pts_intensity = pts_intensity[pts_valid_flag] + + # print('Length of sparse generated points: {}'.format(len(pts_rect))) + + # generate inputs + if mode == 'TRAIN' or random_select: + # Sample the number of points to a fixed value `PRCNN_npoints` for PointNet + if PRCNN_npoints < len(pts_rect): + pts_depth = pts_rect[:, 2] + pts_near_flag = pts_depth < 40.0 + far_idxs_choice = torch.nonzero(pts_near_flag == 0).flatten() + near_idxs = torch.nonzero(pts_near_flag == 1).flatten() + + idx = torch.randperm(near_idxs.size(0))[ + :PRCNN_npoints - len(far_idxs_choice)] + near_idxs_choice = near_idxs[idx] + + choice = torch.cat((near_idxs_choice, far_idxs_choice), dim=0) \ + if len(far_idxs_choice) > 0 else near_idxs_choice + choice = choice[torch.randperm(choice.shape[0])] + else: + choice = torch.arange(0, len(pts_rect), dtype=torch.long) + + if PRCNN_npoints > len(pts_rect): + times = PRCNN_npoints // len(pts_rect) + rem = PRCNN_npoints % len(pts_rect) + + idx = torch.randperm(choice.size(0))[:rem] + extra_choice = choice[idx] + + if times == 1: + choice = torch.cat((choice, extra_choice), dim=0) + else: + if len(extra_choice) == 0: + choice = choice.repeat(times) + else: + choice = torch.cat((choice.repeat(times), extra_choice), dim=0) + #print("Sample id: {}".format(sample_id)) + #print("Original pts size: {}".format(len(pts_rect))) + + choice = choice[torch.randperm(choice.shape[0])] + + ret_pts_rect = pts_rect[choice, :] + if len(ret_pts_rect) != PRCNN_npoints: + #print("Original pts size: {}".format(len(pts_rect))) + #print(len(ret_pts_rect)) + pass + + # translate intensity to [-0.5, 0.5] + ret_pts_intensity = pts_intensity[choice] - 0.5 + else: + ret_pts_rect = pts_rect + ret_pts_intensity = pts_intensity - 0.5 + + pts_features = [ret_pts_intensity.reshape(-1, 1)] + ret_pts_features = torch.cat( + pts_features, dim=1) if pts_features.__len__() > 1 else pts_features[0] + + if cfg.RPN.USE_INTENSITY: + pts_input = torch.cat( + (ret_pts_rect, ret_pts_features), dim=1) # (N, C) + else: + pts_input = ret_pts_rect + + return pts_input, ret_pts_rect, ret_pts_features + + +def generate_rpn_sample(pts_lidar, sample_info, idx, PRCNN_npoints, mode='TRAIN'): + calib = kitti_util.Calib(sample_info['calib'][idx]) + + pts_input, ret_pts_rect, ret_pts_features = pointrcnn_transform( + pts_lidar, calib, PRCNN_npoints, sample_id=sample_info['sample_id'][idx], mode=mode, random_select=sample_info['random_select'][idx]) + + sample = {} + if cfg.RPN.FIXED: + sample['pts_input'] = pts_input + sample['pts_rect'] = ret_pts_rect + sample['pts_features'] = ret_pts_features + return sample + + gt_boxes3d = sample_info['gt_boxes3d'][idx] + # Filter zero rows added by batching + gt_boxes3d = gt_boxes3d[~np.all(gt_boxes3d == 0, axis=1)] + + # generate training labels + # Not doing backprop here for now + rpn_cls_label, rpn_reg_label = generate_rpn_training_labels( + ret_pts_rect.cpu().detach().numpy(), gt_boxes3d) + + sample['pts_input'] = pts_input + sample['pts_rect'] = ret_pts_rect + sample['pts_features'] = ret_pts_features + sample['rpn_cls_label'] = rpn_cls_label + sample['rpn_reg_label'] = rpn_reg_label + + return sample + + +def get_detector_batch(points, batch, mode='TRAIN'): + """ + Input: + points: Pointcloud generated by Depth Network (In lidar coordinates) [Batch, N] + batch: Data batch containing left and right image features + + Returns: Updated batch with features for the pointcloud + Has three new fields: pts_input, pts_rect, pts_features + """ + batch_size = len(batch['sample_id']) + assert len(points) == batch_size + + samples = [generate_rpn_sample(pts_lidar, batch, idx, cfg.RPN.NUM_POINTS, mode) + for idx, pts_lidar in enumerate(points)] + + for key in samples[0].keys(): + if isinstance(samples[0][key], np.ndarray): + if batch_size == 1: + batch[key] = samples[0][key][np.newaxis, ...] + else: + batch[key] = np.concatenate( + [samples[k][key][np.newaxis, ...] for k in range(batch_size)], axis=0) + + elif isinstance(samples[0][key], torch.Tensor): + if batch_size == 1: + batch[key] = torch.unsqueeze(samples[0][key], dim=0) + else: + batch[key] = torch.cat( + [samples[k][key].unsqueeze_(0) for k in range(batch_size)], dim=0) + return batch + + +def generate_rpn_training_labels(pts_rect, gt_boxes3d): + cls_label = np.zeros((pts_rect.shape[0]), dtype=np.int32) + # dx, dy, dz, ry, h, w, l + reg_label = np.zeros((pts_rect.shape[0], 7), dtype=np.float32) + gt_corners = kitti_utils.boxes3d_to_corners3d(gt_boxes3d, rotate=True) + extend_gt_boxes3d = kitti_utils.enlarge_box3d( + gt_boxes3d, extra_width=0.2) + extend_gt_corners = kitti_utils.boxes3d_to_corners3d( + extend_gt_boxes3d, rotate=True) + for k in range(gt_boxes3d.shape[0]): + box_corners = gt_corners[k] + fg_pt_flag = kitti_utils.in_hull(pts_rect, box_corners) + fg_pts_rect = pts_rect[fg_pt_flag] + cls_label[fg_pt_flag] = 1 + + # enlarge the bbox3d, ignore nearby points + extend_box_corners = extend_gt_corners[k] + fg_enlarge_flag = kitti_utils.in_hull(pts_rect, extend_box_corners) + ignore_flag = np.logical_xor(fg_pt_flag, fg_enlarge_flag) + cls_label[ignore_flag] = -1 + + # pixel offset of object center + center3d = gt_boxes3d[k][0:3].copy() # (x, y, z) + center3d[1] -= gt_boxes3d[k][3] / 2 + # Now y is the true center of 3d box 20180928 + reg_label[fg_pt_flag, 0:3] = center3d - fg_pts_rect + + # size and angle encoding + reg_label[fg_pt_flag, 3] = gt_boxes3d[k][3] # h + reg_label[fg_pt_flag, 4] = gt_boxes3d[k][4] # w + reg_label[fg_pt_flag, 5] = gt_boxes3d[k][5] # l + reg_label[fg_pt_flag, 6] = gt_boxes3d[k][6] # ry + + return cls_label, reg_label diff --git a/PointRCNN/lib/net/depth_net.py b/PointRCNN/lib/net/depth_net.py new file mode 100644 index 0000000..c49dc8a --- /dev/null +++ b/PointRCNN/lib/net/depth_net.py @@ -0,0 +1,411 @@ +import torch +import torch.optim as optim +import torch.nn as nn +import torch.nn.functional as F +import torchvision.transforms as transforms +from torch.optim.lr_scheduler import StepLR, MultiStepLR +from torch_scatter import scatter_max + +import math +import random +import numpy as np +from depth_network import logger +import os +import shutil +from depth_network.models import * +import kitti_util +import batch_utils + +from PIL import Image +from tensorboardX import SummaryWriter +import ipdb + + +def loader(path): + return Image.open(path).convert('RGB') + + +def dynamic_baseline(calib): + P3 = calib.P3 + P = calib.P2 + baseline = P3[0, 3] / (-P3[0, 0]) - P[0, 3] / (-P[0, 0]) + return baseline + + +class DepthModel(): + def __init__(self, maxdisp, down, maxdepth, pretrain, save_tag, mode='TRAIN', dynamic_bs=False, + lr=0.001, mgpus=False, lr_stepsize=[10, 20], lr_gamma=0.1): + + result_dir = os.path.join('../', 'output', 'depth', save_tag) + # set logger + log = logger.setup_logger(os.path.join(result_dir, 'training.log')) + + # set tensorboard + writer = SummaryWriter(result_dir + '/tensorboardx') + + model = stackhourglass(maxdisp, down=down, maxdepth=maxdepth) + + # Number of parameters + log.info('Number of model parameters: {}'.format( + sum([p.data.nelement() for p in model.parameters()]))) + if mgpus or mode == 'TEST': + model = nn.DataParallel(model) + model = model.cuda() + + torch.backends.cudnn.benchmark = True + + # Optimizer + optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999)) + scheduler = MultiStepLR( + optimizer, milestones=lr_stepsize, gamma=lr_gamma) + + if pretrain is not None: + if os.path.isfile(pretrain): + log.info("=> loading pretrain '{}'".format(pretrain)) + checkpoint = torch.load(pretrain) + if mgpus or mode == 'TEST': + model.load_state_dict(checkpoint['state_dict']) + else: + model.load_state_dict(self.strip_prefix(checkpoint['state_dict'])) + optimizer.load_state_dict(checkpoint['optimizer']) + + else: + log.info( + '[Attention]: Do not find checkpoint {}'.format(pretrain)) + + optimizer.param_groups[0]['lr'] = lr + + self.optimizer = optimizer + self.scheduler = scheduler + self.net = model + self.dynamic_bs = dynamic_bs + self.mode = mode + self.result_dir = result_dir + + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + self.img_transform = transforms.Compose([ + transforms.ToTensor(), + normalize + ]) + + def load_data(self, batch_left_img, batch_right_img, batch_gt_depth, batch_calib): + left_imgs, right_imgs, calibs = [], [], [] + for left_img, right_img, calib in zip( + batch_left_img, batch_right_img, batch_calib): + if self.dynamic_bs: + calib = calib.P2[0, 0] * dynamic_baseline(calib) + else: + calib = calib.P2[0, 0] * 0.54 + + calib = torch.tensor(calib) + left_img = self.img_transform(left_img) + right_img = self.img_transform(right_img) + + # pad to (384, 1248) + C, H, W = left_img.shape + top_pad = 384 - H + right_pad = 1248 - W + left_img = F.pad( + left_img, (0, right_pad, top_pad, 0), "constant", 0) + right_img = F.pad( + right_img, (0, right_pad, top_pad, 0), "constant", 0) + + left_imgs.append(left_img) + right_imgs.append(right_img) + calibs.append(calib) + + left_img = torch.stack(left_imgs) + right_img = torch.stack(right_imgs) + calib = torch.stack(calibs) + + gt_depth = torch.from_numpy(batch_gt_depth).cuda(non_blocking=True) + + return left_img.float(), right_img.float(), gt_depth.float(), calib.float() + + def train(self, batch, start=2.0, max_high=1.0): + imgL, imgR, gt_depth, calib = self.load_data( + batch['left_image'], batch['right_image'], batch['gt_depth'], batch['calib']) + imgL, imgR, gt_depth, calib = imgL.cuda(), imgR.cuda(), gt_depth.cuda(), calib.cuda() + + # --------- + mask = (gt_depth >= 1) * (gt_depth <= 80) + mask.detach_() + #print('mask', torch.sum(mask).float()/(mask.size()[0]*mask.size()[1]*mask.size()[2])) + # ---- + + output1, output2, output3 = self.net(imgL, imgR, calib) + output3 = torch.squeeze(output3, 1) + + def hook_fn(grad): + print(grad.size()) + a = (grad == 0).float() + rate = 100 * torch.sum(a) / (grad.size()[0] * grad.size()[1] * grad.size()[2]) + print('depth_map', rate, torch.mean(grad)/(rate/100), torch.max(grad), torch.min(grad)) + print('one_norm', torch.sum(torch.abs(grad))) + + loss = 0.5 * F.smooth_l1_loss(output1[mask], gt_depth[mask], size_average=True) + 0.7 * F.smooth_l1_loss( + output2[mask], gt_depth[mask], size_average=True) + F.smooth_l1_loss(output3[mask], gt_depth[mask], + size_average=True) + + points = [] + for depth, calib_info, image, sample_id in zip( + output3, batch['calib'], batch['left_image'], batch['sample_id']): + calib_info = kitti_util.Calib(calib_info) + W, H = image.size + depth = depth[-H:, :W] + cloud = depth_to_pcl(calib_info, depth, max_high=max_high) + cloud = filter_cloud(cloud, image, calib_info) + cloud = transform(cloud, calib_info, sparse_type='angular_min', start=2.0) + # save_pcl(cloud, 'points/sparse_{}'.format(sample_id)) + points.append(cloud) + + det_batch = batch_utils.get_detector_batch(points, batch, mode='TRAIN') + return loss, det_batch + + def eval(self, batch, max_high=1.0): + imgL, imgR, gt_depth, calib = self.load_data( + batch['left_image'], batch['right_image'], batch['gt_depth'], batch['calib']) + imgL, imgR, gt_depth, calib = imgL.cuda(), imgR.cuda(), gt_depth.cuda(), calib.cuda() + + # --------- + mask = (gt_depth >= 1) * (gt_depth <= 80) + mask.detach_() + #print('mask', torch.sum(mask).float() / (mask.size()[0] * mask.size()[1] * mask.size()[2])) + # ---- + + with torch.no_grad(): + output3 = self.net(imgL, imgR, calib) + output3 = torch.squeeze(output3, 1) + #loss = F.smooth_l1_loss(output3[mask], gt_depth[mask], size_average=True) + loss = 0 + + points = [] + for depth, calib_info, image, sample_id in zip( + output3, batch['calib'], batch['left_image'], batch['sample_id']): + calib_info = kitti_util.Calib(calib_info) + W, H = image.size + + depth = depth[-H:, :W] + cloud = depth_to_pcl(calib_info, depth, max_high=max_high) + cloud = filter_cloud(cloud, image, calib_info) + cloud = transform(cloud, calib_info, sparse_type='angular_min', start=2.0) + points.append(cloud) + + det_batch = batch_utils.get_detector_batch(points, batch, mode='TEST') + + return loss, det_batch + + def save_checkpoint(self, epoch, is_best=False, filename='checkpoint.pth.tar'): + save_dir = os.path.join(self.result_dir, 'ckpt') + if not os.path.exists(save_dir): + os.mkdir(save_dir) + + best_RMSE = 0 # TODO: Add RMSE loss + state = { + 'epoch': epoch + 1, + 'arch': 'stackhourglass', + 'state_dict': self.net.state_dict(), + 'best_RMSE': best_RMSE, + 'scheduler': self.scheduler.state_dict(), + 'optimizer': self.optimizer.state_dict(), + } + torch.save(state, save_dir + '/' + filename) + if is_best: + shutil.copyfile(save_dir + '/' + filename, + save_dir + '/model_best.pth.tar') + + #shutil.copyfile(save_dir + '/' + filename, save_dir + + # '/checkpoint_{}.pth.tar'.format(epoch+1)) + + def strip_prefix(self, state_dict, prefix='module.'): + if not all(key.startswith(prefix) for key in state_dict.keys()): + return state_dict + stripped_state_dict = {} + for key in list(state_dict.keys()): + stripped_state_dict[key.replace(prefix, '')] = state_dict.pop(key) + return stripped_state_dict + + +def depth_to_pcl(calib, depth, max_high=1.): + rows, cols = depth.shape + c, r = torch.meshgrid(torch.arange(0., cols, device='cuda'), + torch.arange(0., rows, device='cuda')) + points = torch.stack([c.t(), r.t(), depth], dim=0) + points = points.reshape((3, -1)) + points = points.t() + cloud = calib.img_to_lidar(points[:, 0], points[:, 1], points[:, 2]) + valid = (cloud[:, 0] >= 0) & (cloud[:, 2] < max_high) + lidar = cloud[valid] + + # pad 1 in the intensity dimension + lidar = torch.cat( + [lidar, torch.ones((lidar.shape[0], 1), device='cuda')], 1) + lidar = lidar.float() + return lidar + + +def transform(points, calib_info, sparse_type, start=2.): + if sparse_type == 'angular': + points = random_sparse_angular(points) + if sparse_type == 'angular_min': + points = nearest_sparse_angular(points, start) + if sparse_type == 'angular_numpy': + points = points.cpu().numpy() + points = pto_ang_map(points).astype(np.float32) + points = torch.from_numpy(points).cuda() + + return points + + +def filter_cloud(velo_points, image, calib): + W, H = image.size + _, _, valid_inds_fov = get_lidar_in_image_fov( + velo_points[:, :3], calib, 0, 0, W, H, True) + velo_points = velo_points[valid_inds_fov] + + # depth, width, height + valid_inds = (velo_points[:, 0] < 120) & \ + (velo_points[:, 0] >= 0) & \ + (velo_points[:, 1] < 50) & \ + (velo_points[:, 1] >= -50) & \ + (velo_points[:, 2] < 1.5) & \ + (velo_points[:, 2] >= -2.5) + velo_points = velo_points[valid_inds] + return velo_points + + +def gen_ang_map(velo_points, start=2., H=64, W=512, device='cuda'): + dtheta = math.radians(0.4 * 64.0 / H) + dphi = math.radians(90.0 / W) + + x, y, z, i = velo_points[:, 0], velo_points[:, + 1], velo_points[:, 2], velo_points[:, 3] + + d = torch.sqrt(x ** 2 + y ** 2 + z ** 2) + r = torch.sqrt(x ** 2 + y ** 2) + d[d == 0] = 0.000001 + r[r == 0] = 0.000001 + phi = math.radians(45.) - torch.asin(y / r) + phi_ = (phi / dphi).long() + phi_ = torch.clamp(phi_, 0, W - 1) + + theta = math.radians(start) - torch.asin(z / d) + theta_ = (theta / dtheta).long() + theta_ = torch.clamp(theta_, 0, H - 1) + return [theta_, phi_] + + +def random_sparse_angular(velo_points, H=64, W=512, slice=1, device='cuda'): + """ + :param velo_points: Pointcloud of size [N, 4] + :param H: the row num of depth map, could be 64(default), 32, 16 + :param W: the col num of depth map + :param slice: output every slice lines + """ + + with torch.no_grad(): + theta_, phi_ = gen_ang_map(velo_points, H=64, W=512, device=device) + + depth_map = - torch.ones((H, W, 4), device=device) + + depth_map = depth_map + velo_points = velo_points + x, y, z, i = velo_points[:, 0], velo_points[:, 1], velo_points[:, 2], velo_points[:, 3] + theta_, phi_ = theta_, phi_ + + # Currently, does random subsample (maybe keep the points with min distance) + depth_map[theta_, phi_, 0] = x + depth_map[theta_, phi_, 1] = y + depth_map[theta_, phi_, 2] = z + depth_map[theta_, phi_, 3] = i + depth_map = depth_map.cuda() + + depth_map = depth_map[0:: slice, :, :] + depth_map = depth_map.reshape((-1, 4)) + return depth_map[depth_map[:, 0] != -1.0] + + + +def pto_ang_map(velo_points, H=64, W=512, slice=1): + """ + :param H: the row num of depth map, could be 64(default), 32, 16 + :param W: the col num of depth map + :param slice: output every slice lines + """ + +# np.random.shuffle(velo_points) + dtheta = np.radians(0.4 * 3.0 / H) + dphi = np.radians(90.0 / W) + + x, y, z, i = velo_points[:, 0], velo_points[:, 1], velo_points[:, 2], velo_points[:, 3] + + d = np.sqrt(x ** 2 + y ** 2 + z ** 2) + r = np.sqrt(x ** 2 + y ** 2) + d[d == 0] = 0.000001 + r[r == 0] = 0.000001 + phi = np.radians(45.) - np.arcsin(y / r) + phi_ = (phi / dphi).astype(int) + phi_[phi_ < 0] = 0 + phi_[phi_ >= W] = W - 1 + + theta = np.radians(2.) - np.arcsin(z / d) + theta_ = (theta / dtheta).astype(int) + theta_[theta_ < 0] = 0 + theta_[theta_ >= H] = H - 1 + + depth_map = - np.ones((H, W, 4)) + depth_map[theta_, phi_] = velo_points + + depth_map = depth_map[0::slice, :, :] + depth_map = depth_map.reshape((-1, 4)) + depth_map = depth_map[depth_map[:, 0] != -1.0] + return depth_map + + +def nearest_sparse_angular(velo_points, start=2., H=64, W=512, slice=1, device='cuda'): + """ + :param H: the row num of depth map, could be 64(default), 32, 16 + :param W: the col num of depth map + :param slice: output every slice lines + """ + + with torch.no_grad(): + theta_, phi_ = gen_ang_map(velo_points, start, H, W, device=device) + + depth_map = - torch.ones((H, W, 4), device=device) + depth_map = min_dist_subsample(velo_points, theta_, phi_, H, W, device='cuda') + # depth_map = depth_map[0::slice, :, :] + depth_map = depth_map.reshape((-1, 4)) + sparse_points = depth_map[depth_map[:, 0] != -1.0] + return sparse_points + + +def min_dist_subsample(velo_points, theta_, phi_, H, W, device='cuda'): + N = velo_points.shape[0] + + idx = theta_ * W + phi_ # phi_ in range [0, W-1] + depth = torch.arange(0, N, device='cuda') + + sampled_depth, argmin = scatter_max(depth, idx) + mask = argmin[argmin != -1] + return velo_points[mask] + + +def save_pcl(point_cloud, path='point'): + point_cloud = point_cloud.detach().cpu() + np.save(path, point_cloud) + + +def get_lidar_in_image_fov(pc_velo, calib, xmin, ymin, xmax, ymax, + return_more=False, clip_distance=2.0): + ''' Filter lidar points, keep those in image FOV ''' + pts_2d, pts_rect_depth = calib.lidar_to_img(pc_velo) + fov_inds = (pts_2d[:, 0] < xmax) & (pts_2d[:, 0] >= xmin) & \ + (pts_2d[:, 1] < ymax) & (pts_2d[:, 1] >= ymin) + fov_inds = fov_inds & (pc_velo[:, 0] > clip_distance) + imgfov_pc_velo = pc_velo[fov_inds, :] + if return_more: + return imgfov_pc_velo, pts_2d, fov_inds + else: + return imgfov_pc_velo diff --git a/PointRCNN/lib/net/kitti_util.py b/PointRCNN/lib/net/kitti_util.py new file mode 100755 index 0000000..912c161 --- /dev/null +++ b/PointRCNN/lib/net/kitti_util.py @@ -0,0 +1,87 @@ +import torch + + +class Calib(object): + def __init__(self, calib): + + self.P2 = torch.from_numpy(calib.P2).cuda() # 3 x 4 + self.P3 = torch.from_numpy(calib.P3).cuda() # 3 x 4 + self.R0 = torch.from_numpy(calib.R0).cuda() # 3 x 3 + self.V2C = torch.from_numpy(calib.V2C).cuda() # 3 x 4 + self.C2V = torch.from_numpy(calib.C2V).cuda() + + # Camera intrinsics and extrinsics + self.cu = self.P2[0, 2] + self.cv = self.P2[1, 2] + self.fu = self.P2[0, 0] + self.fv = self.P2[1, 1] + self.tx = self.P2[0, 3] / (-self.fu) + self.ty = self.P2[1, 3] / (-self.fv) + + def cart_to_hom(self, pts): + """ + :param pts: (N, 3 or 2) + :return pts_hom: (N, 4 or 3) + """ + ones = torch.ones((pts.shape[0], 1), dtype=torch.float32).cuda() + pts_hom = torch.cat((pts, ones), dim=1) + return pts_hom + + def rect_to_lidar(self, pts_rect): + """ + :param pts_rect: (N, 3) + :return pts_lidar: (N, 3) + """ + pts_hom = self.cart_to_hom(torch.matmul( + pts_rect, torch.inverse(self.R0.t()))) + pts_rect = torch.matmul(pts_hom, self.C2V.t()) + return pts_rect + + def lidar_to_rect(self, pts_lidar): + """ + :param pts_lidar: (N, 3) + :return pts_rect: (N, 3) + """ + pts_lidar_hom = self.cart_to_hom(pts_lidar) + pts_rect = torch.matmul( + pts_lidar_hom, torch.matmul(self.V2C.t(), self.R0.t())) + # pts_rect = reduce(np.dot, (pts_lidar_hom, self.V2C.T, self.R0.T)) + return pts_rect + + def rect_to_img(self, pts_rect): + """ + :param pts_rect: (N, 3) + :return pts_img: (N, 2) + """ + pts_rect_hom = self.cart_to_hom(pts_rect) + pts_2d_hom = torch.matmul(pts_rect_hom, self.P2.t()) + pts_img = (pts_2d_hom[:, 0:2].t() / pts_rect_hom[:, 2]).t() # (N, 2) + pts_rect_depth = pts_2d_hom[:, 2] - \ + self.P2.t()[3, 2] # depth in rect camera coord + return pts_img, pts_rect_depth + + def lidar_to_img(self, pts_lidar): + """ + :param pts_lidar: (N, 3) + :return pts_img: (N, 2) + """ + pts_rect = self.lidar_to_rect(pts_lidar) + pts_img, pts_depth = self.rect_to_img(pts_rect) + return pts_img, pts_depth + + def img_to_rect(self, u, v, depth_rect): + """ + :param u: (N) + :param v: (N) + :param depth_rect: (N) + :return: + """ + x = ((u - self.cu) * depth_rect) / self.fu + self.tx + y = ((v - self.cv) * depth_rect) / self.fv + self.ty + pts_rect = torch.cat( + (x.reshape(-1, 1), y.reshape(-1, 1), depth_rect.reshape(-1, 1)), dim=1) + return pts_rect + + def img_to_lidar(self, u, v, depth_rect): + pts_rect = self.img_to_rect(u, v, depth_rect) + return self.rect_to_lidar(pts_rect) diff --git a/PointRCNN/lib/net/point_rcnn.py b/PointRCNN/lib/net/point_rcnn.py new file mode 100644 index 0000000..a78a83e --- /dev/null +++ b/PointRCNN/lib/net/point_rcnn.py @@ -0,0 +1,72 @@ +import torch +import torch.nn as nn +from lib.net.rpn import RPN +from lib.net.rcnn_net import RCNNNet +from lib.config import cfg +import pdb + + +class PointRCNN(nn.Module): + def __init__(self, num_classes, use_xyz=True, mode='TRAIN'): + super().__init__() + + assert cfg.RPN.ENABLED or cfg.RCNN.ENABLED + + if cfg.RPN.ENABLED: + self.rpn = RPN(use_xyz=use_xyz, mode=mode) + + if cfg.RCNN.ENABLED: + rcnn_input_channels = 128 # channels of rpn features + if cfg.RCNN.BACKBONE == 'pointnet': + self.rcnn_net = RCNNNet(num_classes=num_classes, input_channels=rcnn_input_channels, use_xyz=use_xyz) + elif cfg.RCNN.BACKBONE == 'pointsift': + pass + else: + raise NotImplementedError + + def forward(self, input_data): + if cfg.RPN.ENABLED: + output = {} + # rpn inference + with torch.set_grad_enabled((not cfg.RPN.FIXED) and self.training): + if cfg.RPN.FIXED: + self.rpn.eval() + rpn_output = self.rpn(input_data) + output.update(rpn_output) + + # rcnn inference + if cfg.RCNN.ENABLED: + #with torch.no_grad(): + rpn_cls, rpn_reg = rpn_output['rpn_cls'], rpn_output['rpn_reg'] + backbone_xyz, backbone_features = rpn_output['backbone_xyz'], rpn_output['backbone_features'] + + rpn_scores_raw = rpn_cls[:, :, 0] + rpn_scores_norm = torch.sigmoid(rpn_scores_raw) + seg_mask = (rpn_scores_norm > cfg.RPN.SCORE_THRESH).float() + pts_depth = torch.norm(backbone_xyz, p=2, dim=2) + + # proposal layer + rois, roi_scores_raw = self.rpn.proposal_layer(rpn_scores_raw, rpn_reg, backbone_xyz) # (B, M, 7) + + output['rois'] = rois + output['roi_scores_raw'] = roi_scores_raw + output['seg_result'] = seg_mask + + rcnn_input_info = {'rpn_xyz': backbone_xyz, + 'rpn_features': backbone_features.permute((0, 2, 1)), + 'seg_mask': seg_mask, + 'roi_boxes3d': rois, + 'pts_depth': pts_depth} + if self.training: + rcnn_input_info['gt_boxes3d'] = input_data['gt_boxes3d'] + + + rcnn_output = self.rcnn_net(rcnn_input_info) + output.update(rcnn_output) + + elif cfg.RCNN.ENABLED: + output = self.rcnn_net(input_data) + else: + raise NotImplementedError + + return output diff --git a/PointRCNN/lib/net/pointnet2_msg.py b/PointRCNN/lib/net/pointnet2_msg.py new file mode 100644 index 0000000..569f332 --- /dev/null +++ b/PointRCNN/lib/net/pointnet2_msg.py @@ -0,0 +1,70 @@ +import torch +import torch.nn as nn +from pointnet2_lib.pointnet2.pointnet2_modules import PointnetFPModule, PointnetSAModuleMSG +from lib.config import cfg + + +def get_model(input_channels=6, use_xyz=True): + return Pointnet2MSG(input_channels=input_channels, use_xyz=use_xyz) + + +class Pointnet2MSG(nn.Module): + def __init__(self, input_channels=6, use_xyz=True): + super().__init__() + + self.SA_modules = nn.ModuleList() + channel_in = input_channels + + skip_channel_list = [input_channels] + for k in range(cfg.RPN.SA_CONFIG.NPOINTS.__len__()): + mlps = cfg.RPN.SA_CONFIG.MLPS[k].copy() + channel_out = 0 + for idx in range(mlps.__len__()): + mlps[idx] = [channel_in] + mlps[idx] + channel_out += mlps[idx][-1] + + self.SA_modules.append( + PointnetSAModuleMSG( + npoint=cfg.RPN.SA_CONFIG.NPOINTS[k], + radii=cfg.RPN.SA_CONFIG.RADIUS[k], + nsamples=cfg.RPN.SA_CONFIG.NSAMPLE[k], + mlps=mlps, + use_xyz=use_xyz, + bn=cfg.RPN.USE_BN + ) + ) + skip_channel_list.append(channel_out) + channel_in = channel_out + + self.FP_modules = nn.ModuleList() + + for k in range(cfg.RPN.FP_MLPS.__len__()): + pre_channel = cfg.RPN.FP_MLPS[k + 1][-1] if k + 1 < len(cfg.RPN.FP_MLPS) else channel_out + self.FP_modules.append( + PointnetFPModule(mlp=[pre_channel + skip_channel_list[k]] + cfg.RPN.FP_MLPS[k]) + ) + + def _break_up_pc(self, pc): + xyz = pc[..., 0:3].contiguous() + features = ( + pc[..., 3:].transpose(1, 2).contiguous() + if pc.size(-1) > 3 else None + ) + + return xyz, features + + def forward(self, pointcloud: torch.cuda.FloatTensor): + xyz, features = self._break_up_pc(pointcloud) + + l_xyz, l_features = [xyz], [features] + for i in range(len(self.SA_modules)): + li_xyz, li_features = self.SA_modules[i](l_xyz[i], l_features[i]) + l_xyz.append(li_xyz) + l_features.append(li_features) + + for i in range(-1, -(len(self.FP_modules) + 1), -1): + l_features[i - 1] = self.FP_modules[i]( + l_xyz[i - 1], l_xyz[i], l_features[i - 1], l_features[i] + ) + + return l_xyz[0], l_features[0] diff --git a/PointRCNN/lib/net/rcnn_net.py b/PointRCNN/lib/net/rcnn_net.py new file mode 100644 index 0000000..9348efe --- /dev/null +++ b/PointRCNN/lib/net/rcnn_net.py @@ -0,0 +1,192 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from pointnet2_lib.pointnet2.pointnet2_modules import PointnetSAModule +from lib.rpn.proposal_target_layer import ProposalTargetLayer +import pointnet2_lib.pointnet2.pytorch_utils as pt_utils +import lib.utils.loss_utils as loss_utils +from lib.config import cfg + +import lib.utils.kitti_utils as kitti_utils +import lib.utils.roipool3d.roipool3d_utils as roipool3d_utils + +import pdb + +class RCNNNet(nn.Module): + def __init__(self, num_classes, input_channels=0, use_xyz=True): + super().__init__() + + self.SA_modules = nn.ModuleList() + channel_in = input_channels + + if cfg.RCNN.USE_RPN_FEATURES: + self.rcnn_input_channel = 3 + int(cfg.RCNN.USE_INTENSITY) + int(cfg.RCNN.USE_MASK) + int(cfg.RCNN.USE_DEPTH) + self.xyz_up_layer = pt_utils.SharedMLP([self.rcnn_input_channel] + cfg.RCNN.XYZ_UP_LAYER, + bn=cfg.RCNN.USE_BN) + c_out = cfg.RCNN.XYZ_UP_LAYER[-1] + self.merge_down_layer = pt_utils.SharedMLP([c_out * 2, c_out], bn=cfg.RCNN.USE_BN) + + for k in range(cfg.RCNN.SA_CONFIG.NPOINTS.__len__()): + mlps = [channel_in] + cfg.RCNN.SA_CONFIG.MLPS[k] + + npoint = cfg.RCNN.SA_CONFIG.NPOINTS[k] if cfg.RCNN.SA_CONFIG.NPOINTS[k] != -1 else None + self.SA_modules.append( + PointnetSAModule( + npoint=npoint, + radius=cfg.RCNN.SA_CONFIG.RADIUS[k], + nsample=cfg.RCNN.SA_CONFIG.NSAMPLE[k], + mlp=mlps, + use_xyz=use_xyz, + bn=cfg.RCNN.USE_BN + ) + ) + channel_in = mlps[-1] + + # classification layer + cls_channel = 1 if num_classes == 2 else num_classes + cls_layers = [] + pre_channel = channel_in + for k in range(0, cfg.RCNN.CLS_FC.__len__()): + cls_layers.append(pt_utils.Conv1d(pre_channel, cfg.RCNN.CLS_FC[k], bn=cfg.RCNN.USE_BN)) + pre_channel = cfg.RCNN.CLS_FC[k] + cls_layers.append(pt_utils.Conv1d(pre_channel, cls_channel, activation=None)) + if cfg.RCNN.DP_RATIO >= 0: + cls_layers.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO)) + self.cls_layer = nn.Sequential(*cls_layers) + + if cfg.RCNN.LOSS_CLS == 'SigmoidFocalLoss': + self.cls_loss_func = loss_utils.SigmoidFocalClassificationLoss(alpha=cfg.RCNN.FOCAL_ALPHA[0], + gamma=cfg.RCNN.FOCAL_GAMMA) + elif cfg.RCNN.LOSS_CLS == 'BinaryCrossEntropy': + self.cls_loss_func = F.binary_cross_entropy + elif cfg.RCNN.LOSS_CLS == 'CrossEntropy': + cls_weight = torch.from_numpy(cfg.RCNN.CLS_WEIGHT).float() + self.cls_loss_func = nn.CrossEntropyLoss(ignore_index=-1, reduce=False, weight=cls_weight) + else: + raise NotImplementedError + + # regression layer + per_loc_bin_num = int(cfg.RCNN.LOC_SCOPE / cfg.RCNN.LOC_BIN_SIZE) * 2 + loc_y_bin_num = int(cfg.RCNN.LOC_Y_SCOPE / cfg.RCNN.LOC_Y_BIN_SIZE) * 2 + reg_channel = per_loc_bin_num * 4 + cfg.RCNN.NUM_HEAD_BIN * 2 + 3 + reg_channel += (1 if not cfg.RCNN.LOC_Y_BY_BIN else loc_y_bin_num * 2) + + reg_layers = [] + pre_channel = channel_in + for k in range(0, cfg.RCNN.REG_FC.__len__()): + reg_layers.append(pt_utils.Conv1d(pre_channel, cfg.RCNN.REG_FC[k], bn=cfg.RCNN.USE_BN)) + pre_channel = cfg.RCNN.REG_FC[k] + reg_layers.append(pt_utils.Conv1d(pre_channel, reg_channel, activation=None)) + if cfg.RCNN.DP_RATIO >= 0: + reg_layers.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO)) + self.reg_layer = nn.Sequential(*reg_layers) + + self.proposal_target_layer = ProposalTargetLayer() + self.init_weights(weight_init='xavier') + + def init_weights(self, weight_init='xavier'): + if weight_init == 'kaiming': + init_func = nn.init.kaiming_normal_ + elif weight_init == 'xavier': + init_func = nn.init.xavier_normal_ + elif weight_init == 'normal': + init_func = nn.init.normal_ + else: + raise NotImplementedError + + for m in self.modules(): + if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d): + if weight_init == 'normal': + init_func(m.weight, mean=0, std=0.001) + else: + init_func(m.weight) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + nn.init.normal_(self.reg_layer[-1].conv.weight, mean=0, std=0.001) + + def _break_up_pc(self, pc): + xyz = pc[..., 0:3].contiguous() + features = ( + pc[..., 3:].transpose(1, 2).contiguous() + if pc.size(-1) > 3 else None + ) + + return xyz, features + + def forward(self, input_data): + """ + :param input_data: input dict + :return: + """ + #pdb.set_trace() + if cfg.RCNN.ROI_SAMPLE_JIT: + if self.training: + #with torch.no_grad(): + target_dict = self.proposal_target_layer(input_data) + + pts_input = torch.cat((target_dict['sampled_pts'], target_dict['pts_feature']), dim=2) + target_dict['pts_input'] = pts_input + else: + rpn_xyz, rpn_features = input_data['rpn_xyz'], input_data['rpn_features'] + batch_rois = input_data['roi_boxes3d'] + if cfg.RCNN.USE_INTENSITY: + pts_extra_input_list = [input_data['rpn_intensity'].unsqueeze(dim=2), + input_data['seg_mask'].unsqueeze(dim=2)] + else: + pts_extra_input_list = [input_data['seg_mask'].unsqueeze(dim=2)] + + if cfg.RCNN.USE_DEPTH: + pts_depth = input_data['pts_depth'] / 70.0 - 0.5 + pts_extra_input_list.append(pts_depth.unsqueeze(dim=2)) + pts_extra_input = torch.cat(pts_extra_input_list, dim=2) + + pts_feature = torch.cat((pts_extra_input, rpn_features), dim=2) + pooled_features, pooled_empty_flag = \ + roipool3d_utils.roipool3d_gpu(rpn_xyz, pts_feature, batch_rois, cfg.RCNN.POOL_EXTRA_WIDTH, + sampled_pt_num=cfg.RCNN.NUM_POINTS) + + # canonical transformation + batch_size = batch_rois.shape[0] + roi_center = batch_rois[:, :, 0:3] + pooled_features[:, :, :, 0:3] -= roi_center.unsqueeze(dim=2) + for k in range(batch_size): + pooled_features[k, :, :, 0:3] = kitti_utils.rotate_pc_along_y_torch(pooled_features[k, :, :, 0:3], + batch_rois[k, :, 6]) + + pts_input = pooled_features.view(-1, pooled_features.shape[2], pooled_features.shape[3]) + else: + pts_input = input_data['pts_input'] + target_dict = {} + target_dict['pts_input'] = input_data['pts_input'] + target_dict['roi_boxes3d'] = input_data['roi_boxes3d'] + if self.training: + target_dict['cls_label'] = input_data['cls_label'] + target_dict['reg_valid_mask'] = input_data['reg_valid_mask'] + target_dict['gt_of_rois'] = input_data['gt_boxes3d_ct'] + + xyz, features = self._break_up_pc(pts_input) + + if cfg.RCNN.USE_RPN_FEATURES: + xyz_input = pts_input[..., 0:self.rcnn_input_channel].transpose(1, 2).unsqueeze(dim=3) + xyz_feature = self.xyz_up_layer(xyz_input) + + rpn_feature = pts_input[..., self.rcnn_input_channel:].transpose(1, 2).unsqueeze(dim=3) + + merged_feature = torch.cat((xyz_feature, rpn_feature), dim=1) + merged_feature = self.merge_down_layer(merged_feature) + l_xyz, l_features = [xyz], [merged_feature.squeeze(dim=3)] + else: + l_xyz, l_features = [xyz], [features] + + for i in range(len(self.SA_modules)): + li_xyz, li_features = self.SA_modules[i](l_xyz[i], l_features[i]) + l_xyz.append(li_xyz) + l_features.append(li_features) + + rcnn_cls = self.cls_layer(l_features[-1]).transpose(1, 2).contiguous().squeeze(dim=1) # (B, 1 or 2) + rcnn_reg = self.reg_layer(l_features[-1]).transpose(1, 2).contiguous().squeeze(dim=1) # (B, C) + ret_dict = {'rcnn_cls': rcnn_cls, 'rcnn_reg': rcnn_reg} + + if self.training: + ret_dict.update(target_dict) + return ret_dict diff --git a/PointRCNN/lib/net/rpn.py b/PointRCNN/lib/net/rpn.py new file mode 100644 index 0000000..7e453f2 --- /dev/null +++ b/PointRCNN/lib/net/rpn.py @@ -0,0 +1,86 @@ +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +from lib.rpn.proposal_layer import ProposalLayer +import pointnet2_lib.pointnet2.pytorch_utils as pt_utils +import lib.utils.loss_utils as loss_utils +from lib.config import cfg +import importlib +import pdb +import torch + + +class RPN(nn.Module): + def __init__(self, use_xyz=True, mode='TRAIN'): + super().__init__() + self.training_mode = (mode == 'TRAIN') + + MODEL = importlib.import_module(cfg.RPN.BACKBONE) + self.backbone_net = MODEL.get_model(input_channels=int(cfg.RPN.USE_INTENSITY), use_xyz=use_xyz) + + # classification branch + cls_layers = [] + pre_channel = cfg.RPN.FP_MLPS[0][-1] + for k in range(0, cfg.RPN.CLS_FC.__len__()): + cls_layers.append(pt_utils.Conv1d(pre_channel, cfg.RPN.CLS_FC[k], bn=cfg.RPN.USE_BN)) + pre_channel = cfg.RPN.CLS_FC[k] + cls_layers.append(pt_utils.Conv1d(pre_channel, 1, activation=None)) + if cfg.RPN.DP_RATIO >= 0: + cls_layers.insert(1, nn.Dropout(cfg.RPN.DP_RATIO)) + self.rpn_cls_layer = nn.Sequential(*cls_layers) + + # regression branch + per_loc_bin_num = int(cfg.RPN.LOC_SCOPE / cfg.RPN.LOC_BIN_SIZE) * 2 + if cfg.RPN.LOC_XZ_FINE: + reg_channel = per_loc_bin_num * 4 + cfg.RPN.NUM_HEAD_BIN * 2 + 3 + else: + reg_channel = per_loc_bin_num * 2 + cfg.RPN.NUM_HEAD_BIN * 2 + 3 + reg_channel += 1 # reg y + + reg_layers = [] + pre_channel = cfg.RPN.FP_MLPS[0][-1] + for k in range(0, cfg.RPN.REG_FC.__len__()): + reg_layers.append(pt_utils.Conv1d(pre_channel, cfg.RPN.REG_FC[k], bn=cfg.RPN.USE_BN)) + pre_channel = cfg.RPN.REG_FC[k] + reg_layers.append(pt_utils.Conv1d(pre_channel, reg_channel, activation=None)) + if cfg.RPN.DP_RATIO >= 0: + reg_layers.insert(1, nn.Dropout(cfg.RPN.DP_RATIO)) + self.rpn_reg_layer = nn.Sequential(*reg_layers) + + if cfg.RPN.LOSS_CLS == 'DiceLoss': + self.rpn_cls_loss_func = loss_utils.DiceLoss(ignore_target=-1) + elif cfg.RPN.LOSS_CLS == 'SigmoidFocalLoss': + self.rpn_cls_loss_func = loss_utils.SigmoidFocalClassificationLoss(alpha=cfg.RPN.FOCAL_ALPHA[0], + gamma=cfg.RPN.FOCAL_GAMMA) + elif cfg.RPN.LOSS_CLS == 'BinaryCrossEntropy': + self.rpn_cls_loss_func = F.binary_cross_entropy + else: + raise NotImplementedError + + self.proposal_layer = ProposalLayer(mode=mode) + self.init_weights() + + def init_weights(self): + if cfg.RPN.LOSS_CLS in ['SigmoidFocalLoss']: + pi = 0.01 + nn.init.constant_(self.rpn_cls_layer[2].conv.bias, -np.log((1 - pi) / pi)) + + nn.init.normal_(self.rpn_reg_layer[-1].conv.weight, mean=0, std=0.001) + + def forward(self, input_data): + """ + :param input_data: dict (point_cloud) + :return: + """ + #pdb.set_trace() + pts_input = input_data['pts_input'] + backbone_xyz, backbone_features = self.backbone_net(pts_input) # (B, N, 3), (B, C, N) (N=16384, C=128) + + rpn_cls = self.rpn_cls_layer(backbone_features).transpose(1, 2).contiguous() # (B, N, 1) + rpn_reg = self.rpn_reg_layer(backbone_features).transpose(1, 2).contiguous() # (B, N, C) (N=16384, C=76) + + ret_dict = {'rpn_cls': rpn_cls, 'rpn_reg': rpn_reg, + 'backbone_xyz': backbone_xyz, 'backbone_features': backbone_features} + + return ret_dict + diff --git a/PointRCNN/lib/net/train_functions.py b/PointRCNN/lib/net/train_functions.py new file mode 100644 index 0000000..4c69944 --- /dev/null +++ b/PointRCNN/lib/net/train_functions.py @@ -0,0 +1,216 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import lib.utils.loss_utils as loss_utils +from lib.config import cfg +from collections import namedtuple +import pdb + +def model_joint_fn_decorator(): + ModelReturn = namedtuple("ModelReturn", ['loss', 'tb_dict', 'disp_dict']) + MEAN_SIZE = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda() + + def model_fn(model, data): + if cfg.RPN.ENABLED: + pts_rect, pts_features, pts_input = data['pts_rect'], data['pts_features'], data['pts_input'] + gt_boxes3d = data['gt_boxes3d'] + + if not cfg.RPN.FIXED: + rpn_cls_label, rpn_reg_label = data['rpn_cls_label'], data['rpn_reg_label'] + rpn_cls_label = torch.from_numpy(rpn_cls_label).cuda(non_blocking=True).long() + rpn_reg_label = torch.from_numpy(rpn_reg_label).cuda(non_blocking=True).float() + + inputs = pts_input.float() + gt_boxes3d = torch.from_numpy(gt_boxes3d).cuda(non_blocking=True).float() + input_data = {'pts_input': inputs, 'gt_boxes3d': gt_boxes3d} + else: + input_data = {} + for key, val in data.items(): + if key != 'sample_id': + input_data[key] = torch.from_numpy(val).contiguous().cuda(non_blocking=True).float() + if not cfg.RCNN.ROI_SAMPLE_JIT: + pts_input = torch.cat((input_data['pts_input'], input_data['pts_features']), dim=-1) + input_data['pts_input'] = pts_input + + ret_dict = model(input_data) + + tb_dict = {} + disp_dict = {} + loss = 0 + if cfg.RPN.ENABLED and not cfg.RPN.FIXED: + rpn_cls, rpn_reg = ret_dict['rpn_cls'], ret_dict['rpn_reg'] + rpn_loss = get_rpn_loss(model, rpn_cls, rpn_reg, rpn_cls_label, rpn_reg_label, tb_dict) + loss += rpn_loss + disp_dict['rpn_loss'] = rpn_loss.item() + + if cfg.RCNN.ENABLED: + rcnn_loss = get_rcnn_loss(model, ret_dict, tb_dict) + disp_dict['rcnn_loss'] = tb_dict['rcnn_loss'] + disp_dict['reg_fg_sum'] = tb_dict['rcnn_reg_fg'] + loss += rcnn_loss + + disp_dict['loss'] = loss.item() + + return ModelReturn(loss, tb_dict, disp_dict) + + def get_rpn_loss(model, rpn_cls, rpn_reg, rpn_cls_label, rpn_reg_label, tb_dict): + if isinstance(model, nn.DataParallel): + rpn_cls_loss_func = model.module.rpn.rpn_cls_loss_func + else: + rpn_cls_loss_func = model.rpn.rpn_cls_loss_func + + rpn_cls_label_flat = rpn_cls_label.view(-1) + rpn_cls_flat = rpn_cls.view(-1) + fg_mask = (rpn_cls_label_flat > 0) + + # RPN classification loss + if cfg.RPN.LOSS_CLS == 'DiceLoss': + rpn_loss_cls = rpn_cls_loss_func(rpn_cls, rpn_cls_label_flat) + + elif cfg.RPN.LOSS_CLS == 'SigmoidFocalLoss': + rpn_cls_target = (rpn_cls_label_flat > 0).float() + pos = (rpn_cls_label_flat > 0).float() + neg = (rpn_cls_label_flat == 0).float() + cls_weights = pos + neg + pos_normalizer = pos.sum() + cls_weights = cls_weights / torch.clamp(pos_normalizer, min=1.0) + rpn_loss_cls = rpn_cls_loss_func(rpn_cls_flat, rpn_cls_target, cls_weights) + rpn_loss_cls_pos = (rpn_loss_cls * pos).sum() + rpn_loss_cls_neg = (rpn_loss_cls * neg).sum() + rpn_loss_cls = rpn_loss_cls.sum() + tb_dict['rpn_loss_cls_pos'] = rpn_loss_cls_pos.item() + tb_dict['rpn_loss_cls_neg'] = rpn_loss_cls_neg.item() + + elif cfg.RPN.LOSS_CLS == 'BinaryCrossEntropy': + weight = rpn_cls_flat.new(rpn_cls_flat.shape[0]).fill_(1.0) + weight[fg_mask] = cfg.RPN.FG_WEIGHT + rpn_cls_label_target = (rpn_cls_label_flat > 0).float() + batch_loss_cls = F.binary_cross_entropy(torch.sigmoid(rpn_cls_flat), rpn_cls_label_target, + weight=weight, reduction='none') + cls_valid_mask = (rpn_cls_label_flat >= 0).float() + rpn_loss_cls = (batch_loss_cls * cls_valid_mask).sum() / torch.clamp(cls_valid_mask.sum(), min=1.0) + else: + raise NotImplementedError + + # RPN regression loss + point_num = rpn_reg.size(0) * rpn_reg.size(1) + fg_sum = fg_mask.long().sum().item() + if fg_sum != 0: + loss_loc, loss_angle, loss_size, reg_loss_dict = \ + loss_utils.get_reg_loss(rpn_reg.view(point_num, -1)[fg_mask], + rpn_reg_label.view(point_num, 7)[fg_mask], + loc_scope=cfg.RPN.LOC_SCOPE, + loc_bin_size=cfg.RPN.LOC_BIN_SIZE, + num_head_bin=cfg.RPN.NUM_HEAD_BIN, + anchor_size=MEAN_SIZE, + get_xz_fine=cfg.RPN.LOC_XZ_FINE, + get_y_by_bin=False, + get_ry_fine=False) + + loss_size = 3 * loss_size # consistent with old codes + rpn_loss_reg = loss_loc + loss_angle + loss_size + else: + loss_loc = loss_angle = loss_size = rpn_loss_reg = rpn_loss_cls * 0 + + rpn_loss = rpn_loss_cls * cfg.RPN.LOSS_WEIGHT[0] + rpn_loss_reg * cfg.RPN.LOSS_WEIGHT[1] + + tb_dict.update({'rpn_loss_cls': rpn_loss_cls.item(), 'rpn_loss_reg': rpn_loss_reg.item(), + 'rpn_loss': rpn_loss.item(), 'rpn_fg_sum': fg_sum, 'rpn_loss_loc': loss_loc.item(), + 'rpn_loss_angle': loss_angle.item(), 'rpn_loss_size': loss_size.item()}) + + return rpn_loss + + def get_rcnn_loss(model, ret_dict, tb_dict): + rcnn_cls, rcnn_reg = ret_dict['rcnn_cls'], ret_dict['rcnn_reg'] + + cls_label = ret_dict['cls_label'].float() + reg_valid_mask = ret_dict['reg_valid_mask'] + roi_boxes3d = ret_dict['roi_boxes3d'] + roi_size = roi_boxes3d[:, 3:6] + gt_boxes3d_ct = ret_dict['gt_of_rois'] + pts_input = ret_dict['pts_input'] + + # rcnn classification loss + if isinstance(model, nn.DataParallel): + cls_loss_func = model.module.rcnn_net.cls_loss_func + else: + cls_loss_func = model.rcnn_net.cls_loss_func + + cls_label_flat = cls_label.view(-1) + + if cfg.RCNN.LOSS_CLS == 'SigmoidFocalLoss': + rcnn_cls_flat = rcnn_cls.view(-1) + + cls_target = (cls_label_flat > 0).float() + pos = (cls_label_flat > 0).float() + neg = (cls_label_flat == 0).float() + cls_weights = pos + neg + pos_normalizer = pos.sum() + cls_weights = cls_weights / torch.clamp(pos_normalizer, min=1.0) + + rcnn_loss_cls = cls_loss_func(rcnn_cls_flat, cls_target, cls_weights) + rcnn_loss_cls_pos = (rcnn_loss_cls * pos).sum() + rcnn_loss_cls_neg = (rcnn_loss_cls * neg).sum() + rcnn_loss_cls = rcnn_loss_cls.sum() + tb_dict['rpn_loss_cls_pos'] = rcnn_loss_cls_pos.item() + tb_dict['rpn_loss_cls_neg'] = rcnn_loss_cls_neg.item() + + elif cfg.RCNN.LOSS_CLS == 'BinaryCrossEntropy': + rcnn_cls_flat = rcnn_cls.view(-1) + batch_loss_cls = F.binary_cross_entropy(torch.sigmoid(rcnn_cls_flat), cls_label, reduction='none') + cls_valid_mask = (cls_label_flat >= 0).float() + rcnn_loss_cls = (batch_loss_cls * cls_valid_mask).sum() / torch.clamp(cls_valid_mask.sum(), min=1.0) + + elif cfg.TRAIN.LOSS_CLS == 'CrossEntropy': + rcnn_cls_reshape = rcnn_cls.view(rcnn_cls.shape[0], -1) + cls_target = cls_label_flat.long() + cls_valid_mask = (cls_label_flat >= 0).float() + + batch_loss_cls = cls_loss_func(rcnn_cls_reshape, cls_target) + normalizer = torch.clamp(cls_valid_mask.sum(), min=1.0) + rcnn_loss_cls = (batch_loss_cls.mean(dim=1) * cls_valid_mask).sum() / normalizer + + else: + raise NotImplementedError + + # rcnn regression loss + batch_size = pts_input.shape[0] + fg_mask = (reg_valid_mask > 0) + fg_sum = fg_mask.long().sum().item() + if fg_sum != 0: + all_anchor_size = roi_size + anchor_size = all_anchor_size[fg_mask] if cfg.RCNN.SIZE_RES_ON_ROI else MEAN_SIZE + + loss_loc, loss_angle, loss_size, reg_loss_dict = \ + loss_utils.get_reg_loss(rcnn_reg.view(batch_size, -1)[fg_mask], + gt_boxes3d_ct.view(batch_size, 7)[fg_mask], + loc_scope=cfg.RCNN.LOC_SCOPE, + loc_bin_size=cfg.RCNN.LOC_BIN_SIZE, + num_head_bin=cfg.RCNN.NUM_HEAD_BIN, + anchor_size=anchor_size, + get_xz_fine=True, get_y_by_bin=cfg.RCNN.LOC_Y_BY_BIN, + loc_y_scope=cfg.RCNN.LOC_Y_SCOPE, loc_y_bin_size=cfg.RCNN.LOC_Y_BIN_SIZE, + get_ry_fine=True) + + loss_size = 3 * loss_size # consistent with old codes + rcnn_loss_reg = loss_loc + loss_angle + loss_size + tb_dict.update(reg_loss_dict) + else: + loss_loc = loss_angle = loss_size = rcnn_loss_reg = rcnn_loss_cls * 0 + + rcnn_loss = rcnn_loss_cls + rcnn_loss_reg + tb_dict['rcnn_loss_cls'] = rcnn_loss_cls.item() + tb_dict['rcnn_loss_reg'] = rcnn_loss_reg.item() + tb_dict['rcnn_loss'] = rcnn_loss.item() + + tb_dict['rcnn_loss_loc'] = loss_loc.item() + tb_dict['rcnn_loss_angle'] = loss_angle.item() + tb_dict['rcnn_loss_size'] = loss_size.item() + + tb_dict['rcnn_cls_fg'] = (cls_label > 0).sum().item() + tb_dict['rcnn_cls_bg'] = (cls_label == 0).sum().item() + tb_dict['rcnn_reg_fg'] = reg_valid_mask.sum().item() + + return rcnn_loss + + return model_fn diff --git a/PointRCNN/lib/rpn/proposal_layer.py b/PointRCNN/lib/rpn/proposal_layer.py new file mode 100644 index 0000000..9c90224 --- /dev/null +++ b/PointRCNN/lib/rpn/proposal_layer.py @@ -0,0 +1,147 @@ +import torch +import torch.nn as nn +from lib.utils.bbox_transform import decode_bbox_target +from lib.config import cfg +import lib.utils.kitti_utils as kitti_utils +import lib.utils.iou3d.iou3d_utils as iou3d_utils +import pdb + + +class ProposalLayer(nn.Module): + def __init__(self, mode='TRAIN'): + super().__init__() + self.mode = mode + self.MEAN_SIZE = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda() + + def forward(self, rpn_scores, rpn_reg, xyz): + """ + :param rpn_scores: (B, N) + :param rpn_reg: (B, N, 8) + :param xyz: (B, N, 3) + :return bbox3d: (B, M, 7) + """ + # pdb.set_trace() + batch_size = xyz.shape[0] + proposals = decode_bbox_target(xyz.view(-1, 3), rpn_reg.view(-1, rpn_reg.shape[-1]), + anchor_size=self.MEAN_SIZE, + loc_scope=cfg.RPN.LOC_SCOPE, + loc_bin_size=cfg.RPN.LOC_BIN_SIZE, + num_head_bin=cfg.RPN.NUM_HEAD_BIN, + get_xz_fine=cfg.RPN.LOC_XZ_FINE, + get_y_by_bin=False, + get_ry_fine=False) # (N, 7) + proposals[:, 1] = proposals[:, 1] + proposals[:, 3] / 2 # set y as the center of bottom + proposals = proposals.view(batch_size, -1, 7) + + scores = rpn_scores + _, sorted_idxs = torch.sort(scores, dim=1, descending=True) + + batch_size = scores.size(0) + ret_bbox3d = scores.new(batch_size, cfg[self.mode].RPN_POST_NMS_TOP_N, 7).zero_() + ret_scores = scores.new(batch_size, cfg[self.mode].RPN_POST_NMS_TOP_N).zero_() + for k in range(batch_size): + scores_single = scores[k] + proposals_single = proposals[k] + order_single = sorted_idxs[k] + + if cfg.TEST.RPN_DISTANCE_BASED_PROPOSE: + scores_single, proposals_single = self.distance_based_proposal(scores_single, proposals_single, + order_single) + else: + scores_single, proposals_single = self.score_based_proposal(scores_single, proposals_single, + order_single) + + proposals_tot = proposals_single.size(0) + ret_bbox3d[k, :proposals_tot] = proposals_single + ret_scores[k, :proposals_tot] = scores_single + + return ret_bbox3d, ret_scores + + def distance_based_proposal(self, scores, proposals, order): + """ + propose rois in two area based on the distance + :param scores: (N) + :param proposals: (N, 7) + :param order: (N) + """ + nms_range_list = [0, 40.0, 80.0] + pre_tot_top_n = cfg[self.mode].RPN_PRE_NMS_TOP_N + pre_top_n_list = [0, int(pre_tot_top_n * 0.7), pre_tot_top_n - int(pre_tot_top_n * 0.7)] + post_tot_top_n = cfg[self.mode].RPN_POST_NMS_TOP_N + post_top_n_list = [0, int(post_tot_top_n * 0.7), post_tot_top_n - int(post_tot_top_n * 0.7)] + + scores_single_list, proposals_single_list = [], [] + + # sort by score + scores_ordered = scores[order] + proposals_ordered = proposals[order] + + dist = proposals_ordered[:, 2] + first_mask = (dist > nms_range_list[0]) & (dist <= nms_range_list[1]) + for i in range(1, len(nms_range_list)): + # get proposal distance mask + dist_mask = ((dist > nms_range_list[i - 1]) & (dist <= nms_range_list[i])) + + if dist_mask.sum() != 0: + # this area has points + # reduce by mask + cur_scores = scores_ordered[dist_mask] + cur_proposals = proposals_ordered[dist_mask] + + # fetch pre nms top K + cur_scores = cur_scores[:pre_top_n_list[i]] + cur_proposals = cur_proposals[:pre_top_n_list[i]] + else: + assert i == 2, '%d' % i + # this area doesn't have any points, so use rois of first area + cur_scores = scores_ordered[first_mask] + cur_proposals = proposals_ordered[first_mask] + + # fetch top K of first area + cur_scores = cur_scores[pre_top_n_list[i - 1]:][:pre_top_n_list[i]] + cur_proposals = cur_proposals[pre_top_n_list[i - 1]:][:pre_top_n_list[i]] + + # oriented nms + boxes_bev = kitti_utils.boxes3d_to_bev_torch(cur_proposals) + if cfg.RPN.NMS_TYPE == 'rotate': + keep_idx = iou3d_utils.nms_gpu(boxes_bev, cur_scores, cfg[self.mode].RPN_NMS_THRESH) + elif cfg.RPN.NMS_TYPE == 'normal': + keep_idx = iou3d_utils.nms_normal_gpu(boxes_bev, cur_scores, cfg[self.mode].RPN_NMS_THRESH) + else: + raise NotImplementedError + + # Fetch post nms top k + keep_idx = keep_idx[:post_top_n_list[i]] + + scores_single_list.append(cur_scores[keep_idx]) + proposals_single_list.append(cur_proposals[keep_idx]) + + scores_single = torch.cat(scores_single_list, dim=0) + proposals_single = torch.cat(proposals_single_list, dim=0) + return scores_single, proposals_single + + def score_based_proposal(self, scores, proposals, order): + """ + propose rois in two area based on the distance + :param scores: (N) + :param proposals: (N, 7) + :param order: (N) + """ + # sort by score + scores_ordered = scores[order] + proposals_ordered = proposals[order] + + # pre nms top K + cur_scores = scores_ordered[:cfg[self.mode].RPN_PRE_NMS_TOP_N] + cur_proposals = proposals_ordered[:cfg[self.mode].RPN_PRE_NMS_TOP_N] + + boxes_bev = kitti_utils.boxes3d_to_bev_torch(cur_proposals) + keep_idx = iou3d_utils.nms_gpu(boxes_bev, cur_scores, cfg[self.mode].RPN_NMS_THRESH) + + # Fetch post nms top k + keep_idx = keep_idx[:cfg[self.mode].RPN_POST_NMS_TOP_N] + + return cur_scores[keep_idx], cur_proposals[keep_idx] + + + diff --git a/PointRCNN/lib/rpn/proposal_target_layer.py b/PointRCNN/lib/rpn/proposal_target_layer.py new file mode 100644 index 0000000..9410703 --- /dev/null +++ b/PointRCNN/lib/rpn/proposal_target_layer.py @@ -0,0 +1,359 @@ +import torch +import torch.nn as nn +import numpy as np +from lib.config import cfg +import lib.utils.kitti_utils as kitti_utils +import lib.utils.roipool3d.roipool3d_utils as roipool3d_utils +import lib.utils.simple_roipool3d.simple_roipool3d_utils as simple_roipool3d_utils +import lib.utils.iou3d.iou3d_utils as iou3d_utils +import pdb + + +class ProposalTargetLayer(nn.Module): + def __init__(self): + super().__init__() + + def forward(self, input_dict): + roi_boxes3d, gt_boxes3d = input_dict['roi_boxes3d'], input_dict['gt_boxes3d'] + + batch_rois, batch_gt_of_rois, batch_roi_iou = self.sample_rois_for_rcnn(roi_boxes3d, gt_boxes3d) + + rpn_xyz, rpn_features = input_dict['rpn_xyz'], input_dict['rpn_features'] + if cfg.RCNN.USE_INTENSITY: + pts_extra_input_list = [input_dict['rpn_intensity'].unsqueeze(dim=2), + input_dict['seg_mask'].unsqueeze(dim=2)] + else: + pts_extra_input_list = [input_dict['seg_mask'].unsqueeze(dim=2)] + + if cfg.RCNN.USE_DEPTH: + pts_depth = input_dict['pts_depth'] / 70.0 - 0.5 + pts_extra_input_list.append(pts_depth.unsqueeze(dim=2)) + pts_extra_input = torch.cat(pts_extra_input_list, dim=2) + + # point cloud pooling + pts_feature = torch.cat((pts_extra_input, rpn_features), dim=2) + + ori_pts_idx, ori_pooled_empty_flag = \ + simple_roipool3d_utils.simple_roipool3d_gpu(rpn_xyz, pts_feature, batch_rois, cfg.RCNN.POOL_EXTRA_WIDTH, + sampled_pt_num=cfg.RCNN.NUM_POINTS) + + pts_idx = ori_pts_idx.clone().detach() + pooled_empty_flag = ori_pooled_empty_flag.clone().detach() + pooled_empty_flag = pooled_empty_flag.unsqueeze(2).unsqueeze(3).float() + + batch_size, num_box, pts_num = pts_idx.size()[0], pts_idx.size()[1], pts_idx.size()[2] + fea_ch = pts_feature.size()[2] + + pts_idx = pts_idx.view(batch_size, -1, 1) # [batch_size, num_box * 512, 1] + pts_idx = pts_idx.repeat(1, 1, fea_ch + 3) # [batch_size, num_box * 512, ch + 3] + + # [batch_size, num_box * 512, ch + 3] + pooled_features = torch.gather(torch.cat([rpn_xyz, pts_feature], dim=2), 1, pts_idx.long()) + pooled_features = pooled_features.view(batch_size, num_box, pts_num, -1) + pooled_features = pooled_features*(1 - pooled_empty_flag) + + sampled_pts, sampled_features = pooled_features[:, :, :, 0:3], pooled_features[:, :, :, 3:] + + # data augmentation + if cfg.AUG_DATA: + # data augmentation + sampled_pts, batch_rois, batch_gt_of_rois = \ + self.data_augmentation(sampled_pts, batch_rois, batch_gt_of_rois) + + # canonical transformation + batch_size = batch_rois.shape[0] + roi_ry = batch_rois[:, :, 6] % (2 * np.pi) + roi_center = batch_rois[:, :, 0:3] + sampled_pts = sampled_pts - roi_center.unsqueeze(dim=2) # (B, M, 512, 3) + batch_gt_of_rois[:, :, 0:3] = batch_gt_of_rois[:, :, 0:3] - roi_center + batch_gt_of_rois[:, :, 6] = batch_gt_of_rois[:, :, 6] - roi_ry + + for k in range(batch_size): + sampled_pts[k] = kitti_utils.rotate_pc_along_y_torch(sampled_pts[k], batch_rois[k, :, 6]) + batch_gt_of_rois[k] = kitti_utils.rotate_pc_along_y_torch(batch_gt_of_rois[k].unsqueeze(dim=1), + roi_ry[k]).squeeze(dim=1) + + # regression valid mask + valid_mask = (ori_pooled_empty_flag == 0) + reg_valid_mask = ((batch_roi_iou > cfg.RCNN.REG_FG_THRESH) & valid_mask).long() + + # classification label + batch_cls_label = (batch_roi_iou > cfg.RCNN.CLS_FG_THRESH).long() + invalid_mask = (batch_roi_iou > cfg.RCNN.CLS_BG_THRESH) & (batch_roi_iou < cfg.RCNN.CLS_FG_THRESH) + batch_cls_label[valid_mask == 0] = -1 + batch_cls_label[invalid_mask > 0] = -1 + + output_dict = {'sampled_pts': sampled_pts.view(-1, cfg.RCNN.NUM_POINTS, 3), + 'pts_feature': sampled_features.view(-1, cfg.RCNN.NUM_POINTS, sampled_features.shape[3]), + 'cls_label': batch_cls_label.view(-1), + 'reg_valid_mask': reg_valid_mask.view(-1), + 'gt_of_rois': batch_gt_of_rois.view(-1, 7), + 'gt_iou': batch_roi_iou.view(-1), + 'roi_boxes3d': batch_rois.view(-1, 7)} + + return output_dict + + def sample_rois_for_rcnn(self, roi_boxes3d, gt_boxes3d): + """ + :param roi_boxes3d: (B, M, 7) + :param gt_boxes3d: (B, N, 8) [x, y, z, h, w, l, ry, cls] + :return + batch_rois: (B, N, 7) + batch_gt_of_rois: (B, N, 8) + batch_roi_iou: (B, N) + """ + batch_size = roi_boxes3d.size(0) + + fg_rois_per_image = int(np.round(cfg.RCNN.FG_RATIO * cfg.RCNN.ROI_PER_IMAGE)) + + batch_rois = gt_boxes3d.new(batch_size, cfg.RCNN.ROI_PER_IMAGE, 7).zero_() + batch_gt_of_rois = gt_boxes3d.new(batch_size, cfg.RCNN.ROI_PER_IMAGE, 7).zero_() + batch_roi_iou = gt_boxes3d.new(batch_size, cfg.RCNN.ROI_PER_IMAGE).zero_() + + for idx in range(batch_size): + cur_roi, cur_gt = roi_boxes3d[idx], gt_boxes3d[idx] + + k = cur_gt.__len__() - 1 + while cur_gt[k].sum() == 0: + k -= 1 + cur_gt = cur_gt[:k + 1] + + # include gt boxes in the candidate rois + iou3d = iou3d_utils.boxes_iou3d_gpu(cur_roi, cur_gt[:, 0:7]) # (M, N) + + max_overlaps, gt_assignment = torch.max(iou3d, dim=1) + + # sample fg, easy_bg, hard_bg + fg_thresh = min(cfg.RCNN.REG_FG_THRESH, cfg.RCNN.CLS_FG_THRESH) + fg_inds = torch.nonzero((max_overlaps >= fg_thresh)).view(-1) + + # TODO: this will mix the fg and bg when CLS_BG_THRESH_LO < iou < CLS_BG_THRESH + # fg_inds = torch.cat((fg_inds, roi_assignment), dim=0) # consider the roi which has max_iou with gt as fg + + easy_bg_inds = torch.nonzero((max_overlaps < cfg.RCNN.CLS_BG_THRESH_LO)).view(-1) + hard_bg_inds = torch.nonzero((max_overlaps < cfg.RCNN.CLS_BG_THRESH) & + (max_overlaps >= cfg.RCNN.CLS_BG_THRESH_LO)).view(-1) + + fg_num_rois = fg_inds.numel() + bg_num_rois = hard_bg_inds.numel() + easy_bg_inds.numel() + + if fg_num_rois > 0 and bg_num_rois > 0: + # sampling fg + fg_rois_per_this_image = min(fg_rois_per_image, fg_num_rois) + + rand_num = torch.from_numpy(np.random.permutation(fg_num_rois)).type_as(gt_boxes3d).long() + fg_inds = fg_inds[rand_num[:fg_rois_per_this_image]] + + # sampling bg + bg_rois_per_this_image = cfg.RCNN.ROI_PER_IMAGE - fg_rois_per_this_image + bg_inds = self.sample_bg_inds(hard_bg_inds, easy_bg_inds, bg_rois_per_this_image) + + elif fg_num_rois > 0 and bg_num_rois == 0: + # sampling fg + rand_num = np.floor(np.random.rand(cfg.RCNN.ROI_PER_IMAGE) * fg_num_rois) + rand_num = torch.from_numpy(rand_num).type_as(gt_boxes3d).long() + fg_inds = fg_inds[rand_num] + fg_rois_per_this_image = cfg.RCNN.ROI_PER_IMAGE + bg_rois_per_this_image = 0 + elif bg_num_rois > 0 and fg_num_rois == 0: + # sampling bg + bg_rois_per_this_image = cfg.RCNN.ROI_PER_IMAGE + bg_inds = self.sample_bg_inds(hard_bg_inds, easy_bg_inds, bg_rois_per_this_image) + + fg_rois_per_this_image = 0 + else: + import pdb + pdb.set_trace() + raise NotImplementedError + + # augment the rois by noise + roi_list, roi_iou_list, roi_gt_list = [], [], [] + if fg_rois_per_this_image > 0: + fg_rois_src = cur_roi[fg_inds] + gt_of_fg_rois = cur_gt[gt_assignment[fg_inds]] + iou3d_src = max_overlaps[fg_inds] + fg_rois, fg_iou3d = self.aug_roi_by_noise_torch(fg_rois_src, gt_of_fg_rois, iou3d_src, + aug_times=cfg.RCNN.ROI_FG_AUG_TIMES) + roi_list.append(fg_rois) + roi_iou_list.append(fg_iou3d) + roi_gt_list.append(gt_of_fg_rois) + + if bg_rois_per_this_image > 0: + bg_rois_src = cur_roi[bg_inds] + gt_of_bg_rois = cur_gt[gt_assignment[bg_inds]] + iou3d_src = max_overlaps[bg_inds] + aug_times = 1 if cfg.RCNN.ROI_FG_AUG_TIMES > 0 else 0 + bg_rois, bg_iou3d = self.aug_roi_by_noise_torch(bg_rois_src, gt_of_bg_rois, iou3d_src, + aug_times=aug_times) + roi_list.append(bg_rois) + roi_iou_list.append(bg_iou3d) + roi_gt_list.append(gt_of_bg_rois) + + rois = torch.cat(roi_list, dim=0) + iou_of_rois = torch.cat(roi_iou_list, dim=0) + gt_of_rois = torch.cat(roi_gt_list, dim=0) + + batch_rois[idx] = rois + batch_gt_of_rois[idx] = gt_of_rois + batch_roi_iou[idx] = iou_of_rois + + return batch_rois, batch_gt_of_rois, batch_roi_iou + + def sample_bg_inds(self, hard_bg_inds, easy_bg_inds, bg_rois_per_this_image): + if hard_bg_inds.numel() > 0 and easy_bg_inds.numel() > 0: + hard_bg_rois_num = int(bg_rois_per_this_image * cfg.RCNN.HARD_BG_RATIO) + easy_bg_rois_num = bg_rois_per_this_image - hard_bg_rois_num + + # sampling hard bg + rand_idx = torch.randint(low=0, high=hard_bg_inds.numel(), size=(hard_bg_rois_num,)).long() + hard_bg_inds = hard_bg_inds[rand_idx] + + # sampling easy bg + rand_idx = torch.randint(low=0, high=easy_bg_inds.numel(), size=(easy_bg_rois_num,)).long() + easy_bg_inds = easy_bg_inds[rand_idx] + + bg_inds = torch.cat([hard_bg_inds, easy_bg_inds], dim=0) + elif hard_bg_inds.numel() > 0 and easy_bg_inds.numel() == 0: + hard_bg_rois_num = bg_rois_per_this_image + # sampling hard bg + rand_idx = torch.randint(low=0, high=hard_bg_inds.numel(), size=(hard_bg_rois_num,)).long() + bg_inds = hard_bg_inds[rand_idx] + elif hard_bg_inds.numel() == 0 and easy_bg_inds.numel() > 0: + easy_bg_rois_num = bg_rois_per_this_image + # sampling easy bg + rand_idx = torch.randint(low=0, high=easy_bg_inds.numel(), size=(easy_bg_rois_num,)).long() + bg_inds = easy_bg_inds[rand_idx] + else: + raise NotImplementedError + + return bg_inds + + def aug_roi_by_noise_torch(self, roi_boxes3d, gt_boxes3d, iou3d_src, aug_times=10): + iou_of_rois = torch.zeros(roi_boxes3d.shape[0]).type_as(gt_boxes3d) + pos_thresh = min(cfg.RCNN.REG_FG_THRESH, cfg.RCNN.CLS_FG_THRESH) + + for k in range(roi_boxes3d.shape[0]): + temp_iou = cnt = 0 + roi_box3d = roi_boxes3d[k] + + gt_box3d = gt_boxes3d[k].view(1, 7) + aug_box3d = roi_box3d + keep = True + while temp_iou < pos_thresh and cnt < aug_times: + if np.random.rand() < 0.2: + aug_box3d = roi_box3d # p=0.2 to keep the original roi box + keep = True + else: + aug_box3d = self.random_aug_box3d(roi_box3d) + keep = False + aug_box3d = aug_box3d.view((1, 7)) + iou3d = iou3d_utils.boxes_iou3d_gpu(aug_box3d, gt_box3d) + temp_iou = iou3d[0][0] + cnt += 1 + roi_boxes3d[k] = aug_box3d.view(-1) + if cnt == 0 or keep: + iou_of_rois[k] = iou3d_src[k] + else: + iou_of_rois[k] = temp_iou + return roi_boxes3d, iou_of_rois + + @staticmethod + def random_aug_box3d(box3d): + """ + :param box3d: (7) [x, y, z, h, w, l, ry] + random shift, scale, orientation + """ + if cfg.RCNN.REG_AUG_METHOD == 'single': + pos_shift = (torch.rand(3, device=box3d.device) - 0.5) # [-0.5 ~ 0.5] + hwl_scale = (torch.rand(3, device=box3d.device) - 0.5) / (0.5 / 0.15) + 1.0 # + angle_rot = (torch.rand(1, device=box3d.device) - 0.5) / (0.5 / (np.pi / 12)) # [-pi/12 ~ pi/12] + aug_box3d = torch.cat([box3d[0:3] + pos_shift, box3d[3:6] * hwl_scale, box3d[6:7] + angle_rot], dim=0) + return aug_box3d + elif cfg.RCNN.REG_AUG_METHOD == 'multiple': + # pos_range, hwl_range, angle_range, mean_iou + range_config = [[0.2, 0.1, np.pi / 12, 0.7], + [0.3, 0.15, np.pi / 12, 0.6], + [0.5, 0.15, np.pi / 9, 0.5], + [0.8, 0.15, np.pi / 6, 0.3], + [1.0, 0.15, np.pi / 3, 0.2]] + idx = torch.randint(low=0, high=len(range_config), size=(1,))[0].long() + + pos_shift = ((torch.rand(3, device=box3d.device) - 0.5) / 0.5) * range_config[idx][0] + hwl_scale = ((torch.rand(3, device=box3d.device) - 0.5) / 0.5) * range_config[idx][1] + 1.0 + angle_rot = ((torch.rand(1, device=box3d.device) - 0.5) / 0.5) * range_config[idx][2] + + aug_box3d = torch.cat([box3d[0:3] + pos_shift, box3d[3:6] * hwl_scale, box3d[6:7] + angle_rot], dim=0) + return aug_box3d + elif cfg.RCNN.REG_AUG_METHOD == 'normal': + x_shift = np.random.normal(loc=0, scale=0.3) + y_shift = np.random.normal(loc=0, scale=0.2) + z_shift = np.random.normal(loc=0, scale=0.3) + h_shift = np.random.normal(loc=0, scale=0.25) + w_shift = np.random.normal(loc=0, scale=0.15) + l_shift = np.random.normal(loc=0, scale=0.5) + ry_shift = ((torch.rand() - 0.5) / 0.5) * np.pi / 12 + + aug_box3d = np.array([box3d[0] + x_shift, box3d[1] + y_shift, box3d[2] + z_shift, box3d[3] + h_shift, + box3d[4] + w_shift, box3d[5] + l_shift, box3d[6] + ry_shift], dtype=np.float32) + aug_box3d = torch.from_numpy(aug_box3d).type_as(box3d) + return aug_box3d + else: + raise NotImplementedError + + def data_augmentation(self, pts, rois, gt_of_rois): + """ + :param pts: (B, M, 512, 3) + :param rois: (B, M. 7) + :param gt_of_rois: (B, M, 7) + :return: + """ + batch_size, boxes_num = pts.shape[0], pts.shape[1] + + # rotation augmentation + angles = (torch.rand((batch_size, boxes_num), device=pts.device) - 0.5 / 0.5) * (np.pi / cfg.AUG_ROT_RANGE) + + # calculate gt alpha from gt_of_rois + temp_x, temp_z, temp_ry = gt_of_rois[:, :, 0], gt_of_rois[:, :, 2], gt_of_rois[:, :, 6] + temp_beta = torch.atan2(temp_z, temp_x) + gt_alpha = -torch.sign(temp_beta) * np.pi / 2 + temp_beta + temp_ry # (B, M) + + temp_x, temp_z, temp_ry = rois[:, :, 0], rois[:, :, 2], rois[:, :, 6] + temp_beta = torch.atan2(temp_z, temp_x) + roi_alpha = -torch.sign(temp_beta) * np.pi / 2 + temp_beta + temp_ry # (B, M) + + for k in range(batch_size): + pts[k] = kitti_utils.rotate_pc_along_y_torch(pts[k], angles[k]) + gt_of_rois[k] = kitti_utils.rotate_pc_along_y_torch(gt_of_rois[k].unsqueeze(dim=1), angles[k]).squeeze(dim=1) + rois[k] = kitti_utils.rotate_pc_along_y_torch(rois[k].unsqueeze(dim=1), angles[k]).squeeze(dim=1) + + # calculate the ry after rotation + temp_x, temp_z = gt_of_rois[:, :, 0], gt_of_rois[:, :, 2] + temp_beta = torch.atan2(temp_z, temp_x) + gt_of_rois[:, :, 6] = torch.sign(temp_beta) * np.pi / 2 + gt_alpha - temp_beta + + temp_x, temp_z = rois[:, :, 0], rois[:, :, 2] + temp_beta = torch.atan2(temp_z, temp_x) + rois[:, :, 6] = torch.sign(temp_beta) * np.pi / 2 + roi_alpha - temp_beta + + # scaling augmentation + scales = 1 + ((torch.rand((batch_size, boxes_num), device=pts.device) - 0.5) / 0.5) * 0.05 + pts = pts * scales.unsqueeze(dim=2).unsqueeze(dim=3) + gt_of_rois[:, :, 0:6] = gt_of_rois[:, :, 0:6] * scales.unsqueeze(dim=2) + rois[:, :, 0:6] = rois[:, :, 0:6] * scales.unsqueeze(dim=2) + + # flip augmentation + flip_flag = torch.sign(torch.rand((batch_size, boxes_num), device=pts.device) - 0.5) + pts[:, :, :, 0] = pts[:, :, :, 0] * flip_flag.unsqueeze(dim=2) + gt_of_rois[:, :, 0] = gt_of_rois[:, :, 0] * flip_flag + # flip orientation: ry > 0: pi - ry, ry < 0: -pi - ry + src_ry = gt_of_rois[:, :, 6] + ry = (flip_flag == 1).float() * src_ry + (flip_flag == -1).float() * (torch.sign(src_ry) * np.pi - src_ry) + gt_of_rois[:, :, 6] = ry + + rois[:, :, 0] = rois[:, :, 0] * flip_flag + # flip orientation: ry > 0: pi - ry, ry < 0: -pi - ry + src_ry = rois[:, :, 6] + ry = (flip_flag == 1).float() * src_ry + (flip_flag == -1).float() * (torch.sign(src_ry) * np.pi - src_ry) + rois[:, :, 6] = ry + + return pts, rois, gt_of_rois diff --git a/PointRCNN/lib/utils/bbox_transform.py b/PointRCNN/lib/utils/bbox_transform.py new file mode 100644 index 0000000..8ba8bd9 --- /dev/null +++ b/PointRCNN/lib/utils/bbox_transform.py @@ -0,0 +1,121 @@ +import torch +import numpy as np + + +def rotate_pc_along_y_torch(pc, rot_angle): + """ + :param pc: (N, 3 + C) + :param rot_angle: (N) + :return: + """ + cosa = torch.cos(rot_angle).view(-1, 1) + sina = torch.sin(rot_angle).view(-1, 1) + + raw_1 = torch.cat([cosa, -sina], dim=1) + raw_2 = torch.cat([sina, cosa], dim=1) + R = torch.cat((raw_1.unsqueeze(dim=1), raw_2.unsqueeze(dim=1)), dim=1) # (N, 2, 2) + + pc_temp = pc[:, [0, 2]].unsqueeze(dim=1) # (N, 1, 2) + + pc[:, [0, 2]] = torch.matmul(pc_temp, R.permute(0, 2, 1)).squeeze(dim=1) + return pc + + +def decode_bbox_target(roi_box3d, pred_reg, loc_scope, loc_bin_size, num_head_bin, anchor_size, + get_xz_fine=True, get_y_by_bin=False, loc_y_scope=0.5, loc_y_bin_size=0.25, get_ry_fine=False): + """ + :param roi_box3d: (N, 7) + :param pred_reg: (N, C) + :param loc_scope: + :param loc_bin_size: + :param num_head_bin: + :param anchor_size: + :param get_xz_fine: + :param get_y_by_bin: + :param loc_y_scope: + :param loc_y_bin_size: + :param get_ry_fine: + :return: + """ + anchor_size = anchor_size.to(roi_box3d.get_device()) + per_loc_bin_num = int(loc_scope / loc_bin_size) * 2 + loc_y_bin_num = int(loc_y_scope / loc_y_bin_size) * 2 + + # recover xz localization + x_bin_l, x_bin_r = 0, per_loc_bin_num + z_bin_l, z_bin_r = per_loc_bin_num, per_loc_bin_num * 2 + start_offset = z_bin_r + + x_bin = torch.argmax(pred_reg[:, x_bin_l: x_bin_r], dim=1) + z_bin = torch.argmax(pred_reg[:, z_bin_l: z_bin_r], dim=1) + + pos_x = x_bin.float() * loc_bin_size + loc_bin_size / 2 - loc_scope + pos_z = z_bin.float() * loc_bin_size + loc_bin_size / 2 - loc_scope + + if get_xz_fine: + x_res_l, x_res_r = per_loc_bin_num * 2, per_loc_bin_num * 3 + z_res_l, z_res_r = per_loc_bin_num * 3, per_loc_bin_num * 4 + start_offset = z_res_r + + x_res_norm = torch.gather(pred_reg[:, x_res_l: x_res_r], dim=1, index=x_bin.unsqueeze(dim=1)).squeeze(dim=1) + z_res_norm = torch.gather(pred_reg[:, z_res_l: z_res_r], dim=1, index=z_bin.unsqueeze(dim=1)).squeeze(dim=1) + x_res = x_res_norm * loc_bin_size + z_res = z_res_norm * loc_bin_size + + pos_x += x_res + pos_z += z_res + + # recover y localization + if get_y_by_bin: + y_bin_l, y_bin_r = start_offset, start_offset + loc_y_bin_num + y_res_l, y_res_r = y_bin_r, y_bin_r + loc_y_bin_num + start_offset = y_res_r + + y_bin = torch.argmax(pred_reg[:, y_bin_l: y_bin_r], dim=1) + y_res_norm = torch.gather(pred_reg[:, y_res_l: y_res_r], dim=1, index=y_bin.unsqueeze(dim=1)).squeeze(dim=1) + y_res = y_res_norm * loc_y_bin_size + pos_y = y_bin.float() * loc_y_bin_size + loc_y_bin_size / 2 - loc_y_scope + y_res + pos_y = pos_y + roi_box3d[:, 1] + else: + y_offset_l, y_offset_r = start_offset, start_offset + 1 + start_offset = y_offset_r + + pos_y = roi_box3d[:, 1] + pred_reg[:, y_offset_l] + + # recover ry rotation + ry_bin_l, ry_bin_r = start_offset, start_offset + num_head_bin + ry_res_l, ry_res_r = ry_bin_r, ry_bin_r + num_head_bin + + ry_bin = torch.argmax(pred_reg[:, ry_bin_l: ry_bin_r], dim=1) + ry_res_norm = torch.gather(pred_reg[:, ry_res_l: ry_res_r], dim=1, index=ry_bin.unsqueeze(dim=1)).squeeze(dim=1) + if get_ry_fine: + # divide pi/2 into several bins + angle_per_class = (np.pi / 2) / num_head_bin + ry_res = ry_res_norm * (angle_per_class / 2) + ry = (ry_bin.float() * angle_per_class + angle_per_class / 2) + ry_res - np.pi / 4 + else: + angle_per_class = (2 * np.pi) / num_head_bin + ry_res = ry_res_norm * (angle_per_class / 2) + + # bin_center is (0, 30, 60, 90, 120, ..., 270, 300, 330) + ry = (ry_bin.float() * angle_per_class + ry_res) % (2 * np.pi) + ry[ry > np.pi] -= 2 * np.pi + + # recover size + size_res_l, size_res_r = ry_res_r, ry_res_r + 3 + assert size_res_r == pred_reg.shape[1] + + size_res_norm = pred_reg[:, size_res_l: size_res_r] + hwl = size_res_norm * anchor_size + anchor_size + + # shift to original coords + roi_center = roi_box3d[:, 0:3] + shift_ret_box3d = torch.cat((pos_x.view(-1, 1), pos_y.view(-1, 1), pos_z.view(-1, 1), hwl, ry.view(-1, 1)), dim=1) + ret_box3d = shift_ret_box3d + if roi_box3d.shape[1] == 7: + roi_ry = roi_box3d[:, 6] + ret_box3d = rotate_pc_along_y_torch(shift_ret_box3d, - roi_ry) + ret_box3d[:, 6] += roi_ry + ret_box3d[:, [0, 2]] += roi_center[:, [0, 2]] + + return ret_box3d diff --git a/PointRCNN/lib/utils/calibration.py b/PointRCNN/lib/utils/calibration.py new file mode 100644 index 0000000..66a0421 --- /dev/null +++ b/PointRCNN/lib/utils/calibration.py @@ -0,0 +1,162 @@ +import numpy as np +import os + + +def get_calib_from_file(calib_file): + with open(calib_file) as f: + lines = f.readlines() + + obj = lines[2].strip().split(' ')[1:] + P2 = np.array(obj, dtype=np.float32) + obj = lines[3].strip().split(' ')[1:] + P3 = np.array(obj, dtype=np.float32) + obj = lines[4].strip().split(' ')[1:] + R0 = np.array(obj, dtype=np.float32) + obj = lines[5].strip().split(' ')[1:] + Tr_velo_to_cam = np.array(obj, dtype=np.float32) + + return {'P2': P2.reshape(3, 4), + 'P3': P3.reshape(3, 4), + 'R0': R0.reshape(3, 3), + 'Tr_velo2cam': Tr_velo_to_cam.reshape(3, 4)} + + +class Calibration(object): + def __init__(self, calib_file): + if isinstance(calib_file, str): + calib = get_calib_from_file(calib_file) + else: + calib = calib_file + + self.P2 = calib['P2'] # 3 x 4 + self.P3 = calib['P3'] # 3 x 4 + self.R0 = calib['R0'] # 3 x 3 + self.V2C = calib['Tr_velo2cam'] # 3 x 4 + self.C2V = inverse_rigid_trans(self.V2C) + + # Camera intrinsics and extrinsics + self.cu = self.P2[0, 2] + self.cv = self.P2[1, 2] + self.fu = self.P2[0, 0] + self.fv = self.P2[1, 1] + self.tx = self.P2[0, 3] / (-self.fu) + self.ty = self.P2[1, 3] / (-self.fv) + + def cart_to_hom(self, pts): + """ + :param pts: (N, 3 or 2) + :return pts_hom: (N, 4 or 3) + """ + pts_hom = np.hstack((pts, np.ones((pts.shape[0], 1), dtype=np.float32))) + return pts_hom + + def rect_to_lidar(self, pts_rect): + """ + :param pts_rect: (N, 3) + :return pts_lidar: (N, 3) + """ + pts_hom = self.cart_to_hom(np.dot(pts_rect, np.linalg.inv(self.R0.T))) + pts_rect = np.dot(pts_hom, self.C2V.T) + # pts_rect = reduce(np.dot, (pts_lidar_hom, self.V2C.T, self.R0.T)) + return pts_rect + + def lidar_to_rect(self, pts_lidar): + """ + :param pts_lidar: (N, 3) + :return pts_rect: (N, 3) + """ + pts_lidar_hom = self.cart_to_hom(pts_lidar) + pts_rect = np.dot(pts_lidar_hom, np.dot(self.V2C.T, self.R0.T)) + # pts_rect = reduce(np.dot, (pts_lidar_hom, self.V2C.T, self.R0.T)) + return pts_rect + + def rect_to_img(self, pts_rect): + """ + :param pts_rect: (N, 3) + :return pts_img: (N, 2) + """ + pts_rect_hom = self.cart_to_hom(pts_rect) + pts_2d_hom = np.dot(pts_rect_hom, self.P2.T) + pts_img = (pts_2d_hom[:, 0:2].T / pts_rect_hom[:, 2]).T # (N, 2) + pts_rect_depth = pts_2d_hom[:, 2] - self.P2.T[3, 2] # depth in rect camera coord + return pts_img, pts_rect_depth + + def lidar_to_img(self, pts_lidar): + """ + :param pts_lidar: (N, 3) + :return pts_img: (N, 2) + """ + pts_rect = self.lidar_to_rect(pts_lidar) + pts_img, pts_depth = self.rect_to_img(pts_rect) + return pts_img, pts_depth + + def img_to_rect(self, u, v, depth_rect): + """ + :param u: (N) + :param v: (N) + :param depth_rect: (N) + :return: + """ + x = ((u - self.cu) * depth_rect) / self.fu + self.tx + y = ((v - self.cv) * depth_rect) / self.fv + self.ty + pts_rect = np.concatenate((x.reshape(-1, 1), y.reshape(-1, 1), depth_rect.reshape(-1, 1)), axis=1) + return pts_rect + + def depthmap_to_rect(self, depth_map): + """ + :param depth_map: (H, W), depth_map + :return: + """ + x_range = np.arange(0, depth_map.shape[1]) + y_range = np.arange(0, depth_map.shape[0]) + x_idxs, y_idxs = np.meshgrid(x_range, y_range) + x_idxs, y_idxs = x_idxs.reshape(-1), y_idxs.reshape(-1) + depth = depth_map[y_idxs, x_idxs] + pts_rect = self.img_to_rect(x_idxs, y_idxs, depth) + return pts_rect, x_idxs, y_idxs + + def corners3d_to_img_boxes(self, corners3d): + """ + :param corners3d: (N, 8, 3) corners in rect coordinate + :return: boxes: (None, 4) [x1, y1, x2, y2] in rgb coordinate + :return: boxes_corner: (None, 8) [xi, yi] in rgb coordinate + """ + sample_num = corners3d.shape[0] + corners3d_hom = np.concatenate((corners3d, np.ones((sample_num, 8, 1))), axis=2) # (N, 8, 4) + + img_pts = np.matmul(corners3d_hom, self.P2.T) # (N, 8, 3) + + x, y = img_pts[:, :, 0] / img_pts[:, :, 2], img_pts[:, :, 1] / img_pts[:, :, 2] + x1, y1 = np.min(x, axis=1), np.min(y, axis=1) + x2, y2 = np.max(x, axis=1), np.max(y, axis=1) + + boxes = np.concatenate((x1.reshape(-1, 1), y1.reshape(-1, 1), x2.reshape(-1, 1), y2.reshape(-1, 1)), axis=1) + boxes_corner = np.concatenate((x.reshape(-1, 8, 1), y.reshape(-1, 8, 1)), axis=2) + + return boxes, boxes_corner + + def camera_dis_to_rect(self, u, v, d): + """ + Can only process valid u, v, d, which means u, v can not beyond the image shape, reprojection error 0.02 + :param u: (N) + :param v: (N) + :param d: (N), the distance between camera and 3d points, d^2 = x^2 + y^2 + z^2 + :return: + """ + assert self.fu == self.fv, '%.8f != %.8f' % (self.fu, self.fv) + fd = np.sqrt((u - self.cu)**2 + (v - self.cv)**2 + self.fu**2) + x = ((u - self.cu) * d) / fd + self.tx + y = ((v - self.cv) * d) / fd + self.ty + z = np.sqrt(d**2 - x**2 - y**2) + pts_rect = np.concatenate((x.reshape(-1, 1), y.reshape(-1, 1), z.reshape(-1, 1)), axis=1) + return pts_rect + + +def inverse_rigid_trans(Tr): + ''' Inverse a rigid body transform matrix (3x4 as [R|t]) + [R'|-R't; 0|1] + ''' + inv_Tr = np.zeros_like(Tr) # 3x4 + inv_Tr[0:3, 0:3] = np.transpose(Tr[0:3, 0:3]) + inv_Tr[0:3, 3] = np.dot(-np.transpose(Tr[0:3, 0:3]), Tr[0:3, 3]) + return inv_Tr diff --git a/PointRCNN/lib/utils/iou3d/iou3d_utils.py b/PointRCNN/lib/utils/iou3d/iou3d_utils.py new file mode 100644 index 0000000..fa8bd82 --- /dev/null +++ b/PointRCNN/lib/utils/iou3d/iou3d_utils.py @@ -0,0 +1,91 @@ +import torch +import iou3d_cuda +import lib.utils.kitti_utils as kitti_utils + + +def boxes_iou_bev(boxes_a, boxes_b): + """ + :param boxes_a: (M, 5) + :param boxes_b: (N, 5) + :return: + ans_iou: (M, N) + """ + + ans_iou = torch.cuda.FloatTensor(torch.Size((boxes_a.shape[0], boxes_b.shape[0]))).zero_() + + iou3d_cuda.boxes_iou_bev_gpu(boxes_a.contiguous(), boxes_b.contiguous(), ans_iou) + + return ans_iou + + +def boxes_iou3d_gpu(boxes_a, boxes_b): + """ + :param boxes_a: (N, 7) [x, y, z, h, w, l, ry] + :param boxes_b: (M, 7) [x, y, z, h, w, l, ry] + :return: + ans_iou: (M, N) + """ + boxes_a_bev = kitti_utils.boxes3d_to_bev_torch(boxes_a) + boxes_b_bev = kitti_utils.boxes3d_to_bev_torch(boxes_b) + + # bev overlap + overlaps_bev = torch.cuda.FloatTensor(torch.Size((boxes_a.shape[0], boxes_b.shape[0]))).zero_() # (N, M) + iou3d_cuda.boxes_overlap_bev_gpu(boxes_a_bev.contiguous(), boxes_b_bev.contiguous(), overlaps_bev) + + # height overlap + boxes_a_height_min = (boxes_a[:, 1] - boxes_a[:, 3]).view(-1, 1) + boxes_a_height_max = boxes_a[:, 1].view(-1, 1) + boxes_b_height_min = (boxes_b[:, 1] - boxes_b[:, 3]).view(1, -1) + boxes_b_height_max = boxes_b[:, 1].view(1, -1) + + max_of_min = torch.max(boxes_a_height_min, boxes_b_height_min) + min_of_max = torch.min(boxes_a_height_max, boxes_b_height_max) + overlaps_h = torch.clamp(min_of_max - max_of_min, min=0) + + # 3d iou + overlaps_3d = overlaps_bev * overlaps_h + + vol_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1) + vol_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1) + + iou3d = overlaps_3d / torch.clamp(vol_a + vol_b - overlaps_3d, min=1e-7) + + return iou3d + + +def nms_gpu(boxes, scores, thresh): + """ + :param boxes: (N, 5) [x1, y1, x2, y2, ry] + :param scores: (N) + :param thresh: + :return: + """ + # areas = (x2 - x1) * (y2 - y1) + order = scores.sort(0, descending=True)[1] + + boxes = boxes[order].contiguous() + + keep = torch.LongTensor(boxes.size(0)) + num_out = iou3d_cuda.nms_gpu(boxes, keep, thresh) + return order[keep[:num_out].cuda()].contiguous() + + +def nms_normal_gpu(boxes, scores, thresh): + """ + :param boxes: (N, 5) [x1, y1, x2, y2, ry] + :param scores: (N) + :param thresh: + :return: + """ + # areas = (x2 - x1) * (y2 - y1) + order = scores.sort(0, descending=True)[1] + + boxes = boxes[order].contiguous() + + keep = torch.LongTensor(boxes.size(0)) + num_out = iou3d_cuda.nms_normal_gpu(boxes, keep, thresh) + return order[keep[:num_out].cuda()].contiguous() + + +if __name__ == '__main__': + pass diff --git a/PointRCNN/lib/utils/iou3d/setup.py b/PointRCNN/lib/utils/iou3d/setup.py new file mode 100644 index 0000000..f43517c --- /dev/null +++ b/PointRCNN/lib/utils/iou3d/setup.py @@ -0,0 +1,14 @@ +from setuptools import setup +from torch.utils.cpp_extension import BuildExtension, CUDAExtension + +setup( + name='iou3d', + ext_modules=[ + CUDAExtension('iou3d_cuda', [ + 'src/iou3d.cpp', + 'src/iou3d_kernel.cu', + ], + extra_compile_args={'cxx': ['-g'], + 'nvcc': ['-O2']}) + ], + cmdclass={'build_ext': BuildExtension}) diff --git a/PointRCNN/lib/utils/iou3d/src/iou3d.cpp b/PointRCNN/lib/utils/iou3d/src/iou3d.cpp new file mode 100644 index 0000000..7ac6272 --- /dev/null +++ b/PointRCNN/lib/utils/iou3d/src/iou3d.cpp @@ -0,0 +1,180 @@ +#include +#include +#include +#include +#include + +#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") +#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") +#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) + +#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) + +#define CHECK_ERROR(ans) { gpuAssert((ans), __FILE__, __LINE__); } +inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) +{ + if (code != cudaSuccess) + { + fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); + if (abort) exit(code); + } +} + +const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8; + + +void boxesoverlapLauncher(const int num_a, const float *boxes_a, const int num_b, const float *boxes_b, float *ans_overlap); +void boxesioubevLauncher(const int num_a, const float *boxes_a, const int num_b, const float *boxes_b, float *ans_iou); +void nmsLauncher(const float *boxes, unsigned long long * mask, int boxes_num, float nms_overlap_thresh); +void nmsNormalLauncher(const float *boxes, unsigned long long * mask, int boxes_num, float nms_overlap_thresh); + +int boxes_overlap_bev_gpu(at::Tensor boxes_a, at::Tensor boxes_b, at::Tensor ans_overlap){ + // params boxes_a: (N, 5) [x1, y1, x2, y2, ry] + // params boxes_b: (M, 5) + // params ans_overlap: (N, M) + + CHECK_INPUT(boxes_a); + CHECK_INPUT(boxes_b); + CHECK_INPUT(ans_overlap); + + int num_a = boxes_a.size(0); + int num_b = boxes_b.size(0); + + const float * boxes_a_data = boxes_a.data(); + const float * boxes_b_data = boxes_b.data(); + float * ans_overlap_data = ans_overlap.data(); + + boxesoverlapLauncher(num_a, boxes_a_data, num_b, boxes_b_data, ans_overlap_data); + + return 1; +} + +int boxes_iou_bev_gpu(at::Tensor boxes_a, at::Tensor boxes_b, at::Tensor ans_iou){ + // params boxes_a: (N, 5) [x1, y1, x2, y2, ry] + // params boxes_b: (M, 5) + // params ans_overlap: (N, M) + + CHECK_INPUT(boxes_a); + CHECK_INPUT(boxes_b); + CHECK_INPUT(ans_iou); + + int num_a = boxes_a.size(0); + int num_b = boxes_b.size(0); + + const float * boxes_a_data = boxes_a.data(); + const float * boxes_b_data = boxes_b.data(); + float * ans_iou_data = ans_iou.data(); + + boxesioubevLauncher(num_a, boxes_a_data, num_b, boxes_b_data, ans_iou_data); + + return 1; +} + +int nms_gpu(at::Tensor boxes, at::Tensor keep, float nms_overlap_thresh){ + // params boxes: (N, 5) [x1, y1, x2, y2, ry] + // params keep: (N) + + CHECK_INPUT(boxes); + CHECK_CONTIGUOUS(keep); + + int boxes_num = boxes.size(0); + const float * boxes_data = boxes.data(); + long * keep_data = keep.data(); + + const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS); + + unsigned long long *mask_data = NULL; + CHECK_ERROR(cudaMalloc((void**)&mask_data, boxes_num * col_blocks * sizeof(unsigned long long))); + nmsLauncher(boxes_data, mask_data, boxes_num, nms_overlap_thresh); + + // unsigned long long mask_cpu[boxes_num * col_blocks]; + // unsigned long long *mask_cpu = new unsigned long long [boxes_num * col_blocks]; + std::vector mask_cpu(boxes_num * col_blocks); + +// printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks); + CHECK_ERROR(cudaMemcpy(&mask_cpu[0], mask_data, boxes_num * col_blocks * sizeof(unsigned long long), + cudaMemcpyDeviceToHost)); + + cudaFree(mask_data); + + unsigned long long remv_cpu[col_blocks]; + memset(remv_cpu, 0, col_blocks * sizeof(unsigned long long)); + + int num_to_keep = 0; + + for (int i = 0; i < boxes_num; i++){ + int nblock = i / THREADS_PER_BLOCK_NMS; + int inblock = i % THREADS_PER_BLOCK_NMS; + + if (!(remv_cpu[nblock] & (1ULL << inblock))){ + keep_data[num_to_keep++] = i; + unsigned long long *p = &mask_cpu[0] + i * col_blocks; + for (int j = nblock; j < col_blocks; j++){ + remv_cpu[j] |= p[j]; + } + } + } + if ( cudaSuccess != cudaGetLastError() ) printf( "Error!\n" ); + + return num_to_keep; +} + + +int nms_normal_gpu(at::Tensor boxes, at::Tensor keep, float nms_overlap_thresh){ + // params boxes: (N, 5) [x1, y1, x2, y2, ry] + // params keep: (N) + + CHECK_INPUT(boxes); + CHECK_CONTIGUOUS(keep); + + int boxes_num = boxes.size(0); + const float * boxes_data = boxes.data(); + long * keep_data = keep.data(); + + const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS); + + unsigned long long *mask_data = NULL; + CHECK_ERROR(cudaMalloc((void**)&mask_data, boxes_num * col_blocks * sizeof(unsigned long long))); + nmsNormalLauncher(boxes_data, mask_data, boxes_num, nms_overlap_thresh); + + // unsigned long long mask_cpu[boxes_num * col_blocks]; + // unsigned long long *mask_cpu = new unsigned long long [boxes_num * col_blocks]; + std::vector mask_cpu(boxes_num * col_blocks); + +// printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks); + CHECK_ERROR(cudaMemcpy(&mask_cpu[0], mask_data, boxes_num * col_blocks * sizeof(unsigned long long), + cudaMemcpyDeviceToHost)); + + cudaFree(mask_data); + + unsigned long long remv_cpu[col_blocks]; + memset(remv_cpu, 0, col_blocks * sizeof(unsigned long long)); + + int num_to_keep = 0; + + for (int i = 0; i < boxes_num; i++){ + int nblock = i / THREADS_PER_BLOCK_NMS; + int inblock = i % THREADS_PER_BLOCK_NMS; + + if (!(remv_cpu[nblock] & (1ULL << inblock))){ + keep_data[num_to_keep++] = i; + unsigned long long *p = &mask_cpu[0] + i * col_blocks; + for (int j = nblock; j < col_blocks; j++){ + remv_cpu[j] |= p[j]; + } + } + } + if ( cudaSuccess != cudaGetLastError() ) printf( "Error!\n" ); + + return num_to_keep; +} + + + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("boxes_overlap_bev_gpu", &boxes_overlap_bev_gpu, "oriented boxes overlap"); + m.def("boxes_iou_bev_gpu", &boxes_iou_bev_gpu, "oriented boxes iou"); + m.def("nms_gpu", &nms_gpu, "oriented nms gpu"); + m.def("nms_normal_gpu", &nms_normal_gpu, "nms gpu"); +} + diff --git a/PointRCNN/lib/utils/iou3d/src/iou3d_kernel.cu b/PointRCNN/lib/utils/iou3d/src/iou3d_kernel.cu new file mode 100644 index 0000000..328a557 --- /dev/null +++ b/PointRCNN/lib/utils/iou3d/src/iou3d_kernel.cu @@ -0,0 +1,387 @@ +/* +3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others) +Written by Shaoshuai Shi +All Rights Reserved 2018. +*/ + +#include +#define THREADS_PER_BLOCK 16 +#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) + +//#define DEBUG +const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8; +const float EPS = 1e-8; +struct Point { + float x, y; + __device__ Point() {} + __device__ Point(double _x, double _y){ + x = _x, y = _y; + } + + __device__ void set(float _x, float _y){ + x = _x; y = _y; + } + + __device__ Point operator +(const Point &b)const{ + return Point(x + b.x, y + b.y); + } + + __device__ Point operator -(const Point &b)const{ + return Point(x - b.x, y - b.y); + } +}; + +__device__ inline float cross(const Point &a, const Point &b){ + return a.x * b.y - a.y * b.x; +} + +__device__ inline float cross(const Point &p1, const Point &p2, const Point &p0){ + return (p1.x - p0.x) * (p2.y - p0.y) - (p2.x - p0.x) * (p1.y - p0.y); +} + +__device__ int check_rect_cross(const Point &p1, const Point &p2, const Point &q1, const Point &q2){ + int ret = min(p1.x,p2.x) <= max(q1.x,q2.x) && + min(q1.x,q2.x) <= max(p1.x,p2.x) && + min(p1.y,p2.y) <= max(q1.y,q2.y) && + min(q1.y,q2.y) <= max(p1.y,p2.y); + return ret; +} + +__device__ inline int check_in_box2d(const float *box, const Point &p){ + //params: box (5) [x1, y1, x2, y2, angle] + const float MARGIN = 1e-5; + + float center_x = (box[0] + box[2]) / 2; + float center_y = (box[1] + box[3]) / 2; + float angle_cos = cos(-box[4]), angle_sin = sin(-box[4]); // rotate the point in the opposite direction of box + float rot_x = (p.x - center_x) * angle_cos + (p.y - center_y) * angle_sin + center_x; + float rot_y = -(p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos + center_y; +#ifdef DEBUG + printf("box: (%.3f, %.3f, %.3f, %.3f, %.3f)\n", box[0], box[1], box[2], box[3], box[4]); + printf("center: (%.3f, %.3f), cossin(%.3f, %.3f), src(%.3f, %.3f), rot(%.3f, %.3f)\n", center_x, center_y, + angle_cos, angle_sin, p.x, p.y, rot_x, rot_y); +#endif + return (rot_x > box[0] - MARGIN && rot_x < box[2] + MARGIN && rot_y > box[1] - MARGIN && rot_y < box[3] + MARGIN); +} + +__device__ inline int intersection(const Point &p1, const Point &p0, const Point &q1, const Point &q0, Point &ans){ + // fast exclusion + if (check_rect_cross(p0, p1, q0, q1) == 0) return 0; + + // check cross standing + float s1 = cross(q0, p1, p0); + float s2 = cross(p1, q1, p0); + float s3 = cross(p0, q1, q0); + float s4 = cross(q1, p1, q0); + + if (!(s1 * s2 > 0 && s3 * s4 > 0)) return 0; + + // calculate intersection of two lines + float s5 = cross(q1, p1, p0); + if(fabs(s5 - s1) > EPS){ + ans.x = (s5 * q0.x - s1 * q1.x) / (s5 - s1); + ans.y = (s5 * q0.y - s1 * q1.y) / (s5 - s1); + + } + else{ + float a0 = p0.y - p1.y, b0 = p1.x - p0.x, c0 = p0.x * p1.y - p1.x * p0.y; + float a1 = q0.y - q1.y, b1 = q1.x - q0.x, c1 = q0.x * q1.y - q1.x * q0.y; + float D = a0 * b1 - a1 * b0; + + ans.x = (b0 * c1 - b1 * c0) / D; + ans.y = (a1 * c0 - a0 * c1) / D; + } + + return 1; +} + +__device__ inline void rotate_around_center(const Point ¢er, const float angle_cos, const float angle_sin, Point &p){ + float new_x = (p.x - center.x) * angle_cos + (p.y - center.y) * angle_sin + center.x; + float new_y = -(p.x - center.x) * angle_sin + (p.y - center.y) * angle_cos + center.y; + p.set(new_x, new_y); +} + +__device__ inline int point_cmp(const Point &a, const Point &b, const Point ¢er){ + return atan2(a.y - center.y, a.x - center.x) > atan2(b.y - center.y, b.x - center.x); +} + +__device__ inline float box_overlap(const float *box_a, const float *box_b){ + // params: box_a (5) [x1, y1, x2, y2, angle] + // params: box_b (5) [x1, y1, x2, y2, angle] + + float a_x1 = box_a[0], a_y1 = box_a[1], a_x2 = box_a[2], a_y2 = box_a[3], a_angle = box_a[4]; + float b_x1 = box_b[0], b_y1 = box_b[1], b_x2 = box_b[2], b_y2 = box_b[3], b_angle = box_b[4]; + + Point center_a((a_x1 + a_x2) / 2, (a_y1 + a_y2) / 2); + Point center_b((b_x1 + b_x2) / 2, (b_y1 + b_y2) / 2); +#ifdef DEBUG + printf("a: (%.3f, %.3f, %.3f, %.3f, %.3f), b: (%.3f, %.3f, %.3f, %.3f, %.3f)\n", a_x1, a_y1, a_x2, a_y2, a_angle, + b_x1, b_y1, b_x2, b_y2, b_angle); + printf("center a: (%.3f, %.3f), b: (%.3f, %.3f)\n", center_a.x, center_a.y, center_b.x, center_b.y); +#endif + + Point box_a_corners[5]; + box_a_corners[0].set(a_x1, a_y1); + box_a_corners[1].set(a_x2, a_y1); + box_a_corners[2].set(a_x2, a_y2); + box_a_corners[3].set(a_x1, a_y2); + + Point box_b_corners[5]; + box_b_corners[0].set(b_x1, b_y1); + box_b_corners[1].set(b_x2, b_y1); + box_b_corners[2].set(b_x2, b_y2); + box_b_corners[3].set(b_x1, b_y2); + + // get oriented corners + float a_angle_cos = cos(a_angle), a_angle_sin = sin(a_angle); + float b_angle_cos = cos(b_angle), b_angle_sin = sin(b_angle); + + for (int k = 0; k < 4; k++){ +#ifdef DEBUG + printf("before corner %d: a(%.3f, %.3f), b(%.3f, %.3f) \n", k, box_a_corners[k].x, box_a_corners[k].y, box_b_corners[k].x, box_b_corners[k].y); +#endif + rotate_around_center(center_a, a_angle_cos, a_angle_sin, box_a_corners[k]); + rotate_around_center(center_b, b_angle_cos, b_angle_sin, box_b_corners[k]); +#ifdef DEBUG + printf("corner %d: a(%.3f, %.3f), b(%.3f, %.3f) \n", k, box_a_corners[k].x, box_a_corners[k].y, box_b_corners[k].x, box_b_corners[k].y); +#endif + } + + box_a_corners[4] = box_a_corners[0]; + box_b_corners[4] = box_b_corners[0]; + + // get intersection of lines + Point cross_points[16]; + Point poly_center; + int cnt = 0, flag = 0; + + poly_center.set(0, 0); + for (int i = 0; i < 4; i++){ + for (int j = 0; j < 4; j++){ + flag = intersection(box_a_corners[i + 1], box_a_corners[i], box_b_corners[j + 1], box_b_corners[j], cross_points[cnt]); + if (flag){ + poly_center = poly_center + cross_points[cnt]; + cnt++; + } + } + } + + // check corners + for (int k = 0; k < 4; k++){ + if (check_in_box2d(box_a, box_b_corners[k])){ + poly_center = poly_center + box_b_corners[k]; + cross_points[cnt] = box_b_corners[k]; + cnt++; + } + if (check_in_box2d(box_b, box_a_corners[k])){ + poly_center = poly_center + box_a_corners[k]; + cross_points[cnt] = box_a_corners[k]; + cnt++; + } + } + + poly_center.x /= cnt; + poly_center.y /= cnt; + + // sort the points of polygon + Point temp; + for (int j = 0; j < cnt - 1; j++){ + for (int i = 0; i < cnt - j - 1; i++){ + if (point_cmp(cross_points[i], cross_points[i + 1], poly_center)){ + temp = cross_points[i]; + cross_points[i] = cross_points[i + 1]; + cross_points[i + 1] = temp; + } + } + } + +#ifdef DEBUG + printf("cnt=%d\n", cnt); + for (int i = 0; i < cnt; i++){ + printf("All cross point %d: (%.3f, %.3f)\n", i, cross_points[i].x, cross_points[i].y); + } +#endif + + // get the overlap areas + float area = 0; + for (int k = 0; k < cnt - 1; k++){ + area += cross(cross_points[k] - cross_points[0], cross_points[k + 1] - cross_points[0]); + } + + return fabs(area) / 2.0; +} + +__device__ inline float iou_bev(const float *box_a, const float *box_b){ + // params: box_a (5) [x1, y1, x2, y2, angle] + // params: box_b (5) [x1, y1, x2, y2, angle] + float sa = (box_a[2] - box_a[0]) * (box_a[3] - box_a[1]); + float sb = (box_b[2] - box_b[0]) * (box_b[3] - box_b[1]); + float s_overlap = box_overlap(box_a, box_b); + return s_overlap / fmaxf(sa + sb - s_overlap, EPS); +} + +__global__ void boxes_overlap_kernel(const int num_a, const float *boxes_a, const int num_b, const float *boxes_b, float *ans_overlap){ + const int a_idx = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y; + const int b_idx = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x; + + if (a_idx >= num_a || b_idx >= num_b){ + return; + } + const float * cur_box_a = boxes_a + a_idx * 5; + const float * cur_box_b = boxes_b + b_idx * 5; + float s_overlap = box_overlap(cur_box_a, cur_box_b); + ans_overlap[a_idx * num_b + b_idx] = s_overlap; +} + +__global__ void boxes_iou_bev_kernel(const int num_a, const float *boxes_a, const int num_b, const float *boxes_b, float *ans_iou){ + const int a_idx = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y; + const int b_idx = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x; + + if (a_idx >= num_a || b_idx >= num_b){ + return; + } + + const float * cur_box_a = boxes_a + a_idx * 5; + const float * cur_box_b = boxes_b + b_idx * 5; + float cur_iou_bev = iou_bev(cur_box_a, cur_box_b); + ans_iou[a_idx * num_b + b_idx] = cur_iou_bev; +} + +__global__ void nms_kernel(const int boxes_num, const float nms_overlap_thresh, + const float *boxes, unsigned long long *mask){ + //params: boxes (N, 5) [x1, y1, x2, y2, ry] + //params: mask (N, N/THREADS_PER_BLOCK_NMS) + + const int row_start = blockIdx.y; + const int col_start = blockIdx.x; + + // if (row_start > col_start) return; + + const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS); + const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS); + + __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 5]; + + if (threadIdx.x < col_size) { + block_boxes[threadIdx.x * 5 + 0] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 0]; + block_boxes[threadIdx.x * 5 + 1] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 1]; + block_boxes[threadIdx.x * 5 + 2] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 2]; + block_boxes[threadIdx.x * 5 + 3] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 3]; + block_boxes[threadIdx.x * 5 + 4] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 4]; + } + __syncthreads(); + + if (threadIdx.x < row_size) { + const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x; + const float *cur_box = boxes + cur_box_idx * 5; + + int i = 0; + unsigned long long t = 0; + int start = 0; + if (row_start == col_start) { + start = threadIdx.x + 1; + } + for (i = start; i < col_size; i++) { + if (iou_bev(cur_box, block_boxes + i * 5) > nms_overlap_thresh){ + t |= 1ULL << i; + } + } + const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS); + mask[cur_box_idx * col_blocks + col_start] = t; + } +} + + +__device__ inline float iou_normal(float const * const a, float const * const b) { + float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]); + float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]); + float width = fmaxf(right - left, 0.f), height = fmaxf(bottom - top, 0.f); + float interS = width * height; + float Sa = (a[2] - a[0]) * (a[3] - a[1]); + float Sb = (b[2] - b[0]) * (b[3] - b[1]); + return interS / fmaxf(Sa + Sb - interS, EPS); +} + + +__global__ void nms_normal_kernel(const int boxes_num, const float nms_overlap_thresh, + const float *boxes, unsigned long long *mask){ + //params: boxes (N, 5) [x1, y1, x2, y2, ry] + //params: mask (N, N/THREADS_PER_BLOCK_NMS) + + const int row_start = blockIdx.y; + const int col_start = blockIdx.x; + + // if (row_start > col_start) return; + + const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS); + const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS, THREADS_PER_BLOCK_NMS); + + __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 5]; + + if (threadIdx.x < col_size) { + block_boxes[threadIdx.x * 5 + 0] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 0]; + block_boxes[threadIdx.x * 5 + 1] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 1]; + block_boxes[threadIdx.x * 5 + 2] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 2]; + block_boxes[threadIdx.x * 5 + 3] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 3]; + block_boxes[threadIdx.x * 5 + 4] = boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 4]; + } + __syncthreads(); + + if (threadIdx.x < row_size) { + const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x; + const float *cur_box = boxes + cur_box_idx * 5; + + int i = 0; + unsigned long long t = 0; + int start = 0; + if (row_start == col_start) { + start = threadIdx.x + 1; + } + for (i = start; i < col_size; i++) { + if (iou_normal(cur_box, block_boxes + i * 5) > nms_overlap_thresh){ + t |= 1ULL << i; + } + } + const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS); + mask[cur_box_idx * col_blocks + col_start] = t; + } +} + + + + + +void boxesoverlapLauncher(const int num_a, const float *boxes_a, const int num_b, const float *boxes_b, float *ans_overlap){ + + dim3 blocks(DIVUP(num_b, THREADS_PER_BLOCK), DIVUP(num_a, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) + dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK); + + boxes_overlap_kernel<<>>(num_a, boxes_a, num_b, boxes_b, ans_overlap); +#ifdef DEBUG + cudaDeviceSynchronize(); // for using printf in kernel function +#endif +} + +void boxesioubevLauncher(const int num_a, const float *boxes_a, const int num_b, const float *boxes_b, float *ans_iou){ + + dim3 blocks(DIVUP(num_b, THREADS_PER_BLOCK), DIVUP(num_a, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) + dim3 threads(THREADS_PER_BLOCK, THREADS_PER_BLOCK); + + boxes_iou_bev_kernel<<>>(num_a, boxes_a, num_b, boxes_b, ans_iou); +} + + +void nmsLauncher(const float *boxes, unsigned long long * mask, int boxes_num, float nms_overlap_thresh){ + dim3 blocks(DIVUP(boxes_num, THREADS_PER_BLOCK_NMS), + DIVUP(boxes_num, THREADS_PER_BLOCK_NMS)); + dim3 threads(THREADS_PER_BLOCK_NMS); + nms_kernel<<>>(boxes_num, nms_overlap_thresh, boxes, mask); +} + + +void nmsNormalLauncher(const float *boxes, unsigned long long * mask, int boxes_num, float nms_overlap_thresh){ + dim3 blocks(DIVUP(boxes_num, THREADS_PER_BLOCK_NMS), + DIVUP(boxes_num, THREADS_PER_BLOCK_NMS)); + dim3 threads(THREADS_PER_BLOCK_NMS); + nms_normal_kernel<<>>(boxes_num, nms_overlap_thresh, boxes, mask); +} diff --git a/PointRCNN/lib/utils/kitti_utils.py b/PointRCNN/lib/utils/kitti_utils.py new file mode 100644 index 0000000..4304db4 --- /dev/null +++ b/PointRCNN/lib/utils/kitti_utils.py @@ -0,0 +1,235 @@ +import numpy as np +from scipy.spatial import Delaunay +import scipy +import lib.utils.object3d as object3d +import torch + + +def get_objects_from_label(label_file): + with open(label_file, 'r') as f: + lines = f.readlines() + objects = [object3d.Object3d(line) for line in lines] + return objects + + +def dist_to_plane(plane, points): + """ + Calculates the signed distance from a 3D plane to each point in a list of points + :param plane: (a, b, c, d) + :param points: (N, 3) + :return: (N), signed distance of each point to the plane + """ + a, b, c, d = plane + + points = np.array(points) + x = points[:, 0] + y = points[:, 1] + z = points[:, 2] + + return (a*x + b*y + c*z + d) / np.sqrt(a**2 + b**2 + c**2) + + +def rotate_pc_along_y(pc, rot_angle): + """ + params pc: (N, 3+C), (N, 3) is in the rectified camera coordinate + params rot_angle: rad scalar + Output pc: updated pc with XYZ rotated + """ + cosval = np.cos(rot_angle) + sinval = np.sin(rot_angle) + rotmat = np.array([[cosval, -sinval], [sinval, cosval]]) + pc[:, [0, 2]] = np.dot(pc[:, [0, 2]], np.transpose(rotmat)) + return pc + + +def rotate_pc_along_y_torch(pc, rot_angle): + """ + :param pc: (N, 512, 3 + C) + :param rot_angle: (N) + :return: + TODO: merge with rotate_pc_along_y_torch in bbox_transform.py + """ + cosa = torch.cos(rot_angle).view(-1, 1) # (N, 1) + sina = torch.sin(rot_angle).view(-1, 1) # (N, 1) + + raw_1 = torch.cat([cosa, -sina], dim=1) # (N, 2) + raw_2 = torch.cat([sina, cosa], dim=1) # (N, 2) + R = torch.cat((raw_1.unsqueeze(dim=1), raw_2.unsqueeze(dim=1)), dim=1) # (N, 2, 2) + + pc_temp = pc[:, :, [0, 2]] # (N, 512, 2) + + pc[:, :, [0, 2]] = torch.matmul(pc_temp, R.permute(0, 2, 1)) # (N, 512, 2) + + return pc + + +def boxes3d_to_corners3d(boxes3d, rotate=True): + """ + :param boxes3d: (N, 7) [x, y, z, h, w, l, ry] + :param rotate: + :return: corners3d: (N, 8, 3) + """ + boxes_num = boxes3d.shape[0] + h, w, l = boxes3d[:, 3], boxes3d[:, 4], boxes3d[:, 5] + x_corners = np.array([l / 2., l / 2., -l / 2., -l / 2., l / 2., l / 2., -l / 2., -l / 2.], dtype=np.float32).T # (N, 8) + z_corners = np.array([w / 2., -w / 2., -w / 2., w / 2., w / 2., -w / 2., -w / 2., w / 2.], dtype=np.float32).T # (N, 8) + + y_corners = np.zeros((boxes_num, 8), dtype=np.float32) + y_corners[:, 4:8] = -h.reshape(boxes_num, 1).repeat(4, axis=1) # (N, 8) + + if rotate: + ry = boxes3d[:, 6] + zeros, ones = np.zeros(ry.size, dtype=np.float32), np.ones(ry.size, dtype=np.float32) + rot_list = np.array([[np.cos(ry), zeros, -np.sin(ry)], + [zeros, ones, zeros], + [np.sin(ry), zeros, np.cos(ry)]]) # (3, 3, N) + R_list = np.transpose(rot_list, (2, 0, 1)) # (N, 3, 3) + + temp_corners = np.concatenate((x_corners.reshape(-1, 8, 1), y_corners.reshape(-1, 8, 1), + z_corners.reshape(-1, 8, 1)), axis=2) # (N, 8, 3) + rotated_corners = np.matmul(temp_corners, R_list) # (N, 8, 3) + x_corners, y_corners, z_corners = rotated_corners[:, :, 0], rotated_corners[:, :, 1], rotated_corners[:, :, 2] + + x_loc, y_loc, z_loc = boxes3d[:, 0], boxes3d[:, 1], boxes3d[:, 2] + + x = x_loc.reshape(-1, 1) + x_corners.reshape(-1, 8) + y = y_loc.reshape(-1, 1) + y_corners.reshape(-1, 8) + z = z_loc.reshape(-1, 1) + z_corners.reshape(-1, 8) + + corners = np.concatenate((x.reshape(-1, 8, 1), y.reshape(-1, 8, 1), z.reshape(-1, 8, 1)), axis=2) + + return corners.astype(np.float32) + + +def boxes3d_to_corners3d_torch(boxes3d, flip=False): + """ + :param boxes3d: (N, 7) [x, y, z, h, w, l, ry] + :return: corners_rotated: (N, 8, 3) + """ + boxes_num = boxes3d.shape[0] + h, w, l, ry = boxes3d[:, 3:4], boxes3d[:, 4:5], boxes3d[:, 5:6], boxes3d[:, 6:7] + if flip: + ry = ry + np.pi + centers = boxes3d[:, 0:3] + zeros = torch.cuda.FloatTensor(boxes_num, 1).fill_(0) + ones = torch.cuda.FloatTensor(boxes_num, 1).fill_(1) + + x_corners = torch.cat([l / 2., l / 2., -l / 2., -l / 2., l / 2., l / 2., -l / 2., -l / 2.], dim=1) # (N, 8) + y_corners = torch.cat([zeros, zeros, zeros, zeros, -h, -h, -h, -h], dim=1) # (N, 8) + z_corners = torch.cat([w / 2., -w / 2., -w / 2., w / 2., w / 2., -w / 2., -w / 2., w / 2.], dim=1) # (N, 8) + corners = torch.cat((x_corners.unsqueeze(dim=1), y_corners.unsqueeze(dim=1), z_corners.unsqueeze(dim=1)), dim=1) # (N, 3, 8) + + cosa, sina = torch.cos(ry), torch.sin(ry) + raw_1 = torch.cat([cosa, zeros, sina], dim=1) + raw_2 = torch.cat([zeros, ones, zeros], dim=1) + raw_3 = torch.cat([-sina, zeros, cosa], dim=1) + R = torch.cat((raw_1.unsqueeze(dim=1), raw_2.unsqueeze(dim=1), raw_3.unsqueeze(dim=1)), dim=1) # (N, 3, 3) + + corners_rotated = torch.matmul(R, corners) # (N, 3, 8) + corners_rotated = corners_rotated + centers.unsqueeze(dim=2).expand(-1, -1, 8) + corners_rotated = corners_rotated.permute(0, 2, 1) + return corners_rotated + + +def boxes3d_to_bev_torch(boxes3d): + """ + :param boxes3d: (N, 7) [x, y, z, h, w, l, ry] + :return: + boxes_bev: (N, 5) [x1, y1, x2, y2, ry] + """ + boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5))) + + cu, cv = boxes3d[:, 0], boxes3d[:, 2] + half_l, half_w = boxes3d[:, 5] / 2, boxes3d[:, 4] / 2 + boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_l, cv - half_w + boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_l, cv + half_w + boxes_bev[:, 4] = boxes3d[:, 6] + return boxes_bev + + +def enlarge_box3d(boxes3d, extra_width): + """ + :param boxes3d: (N, 7) [x, y, z, h, w, l, ry] + """ + if isinstance(boxes3d, np.ndarray): + large_boxes3d = boxes3d.copy() + else: + large_boxes3d = boxes3d.clone() + large_boxes3d[:, 3:6] += extra_width * 2 + large_boxes3d[:, 1] += extra_width + return large_boxes3d + + +def in_hull(p, hull): + """ + :param p: (N, K) test points + :param hull: (M, K) M corners of a box + :return (N) bool + """ + try: + if not isinstance(hull, Delaunay): + hull = Delaunay(hull) + flag = hull.find_simplex(p) >= 0 + except scipy.spatial.qhull.QhullError: + print('Warning: not a hull %s' % str(hull)) + flag = np.zeros(p.shape[0], dtype=np.bool) + + return flag + + +def objs_to_boxes3d(obj_list): + boxes3d = np.zeros((obj_list.__len__(), 7), dtype=np.float32) + for k, obj in enumerate(obj_list): + boxes3d[k, 0:3], boxes3d[k, 3], boxes3d[k, 4], boxes3d[k, 5], boxes3d[k, 6] \ + = obj.pos, obj.h, obj.w, obj.l, obj.ry + return boxes3d + + +def objs_to_scores(obj_list): + scores = np.zeros((obj_list.__len__()), dtype=np.float32) + for k, obj in enumerate(obj_list): + scores[k] = obj.score + return scores + + +def get_iou3d(corners3d, query_corners3d, need_bev=False): + """ + :param corners3d: (N, 8, 3) in rect coords + :param query_corners3d: (M, 8, 3) + :return: + """ + from shapely.geometry import Polygon + A, B = corners3d, query_corners3d + N, M = A.shape[0], B.shape[0] + iou3d = np.zeros((N, M), dtype=np.float32) + iou_bev = np.zeros((N, M), dtype=np.float32) + + # for height overlap, since y face down, use the negative y + min_h_a = -A[:, 0:4, 1].sum(axis=1) / 4.0 + max_h_a = -A[:, 4:8, 1].sum(axis=1) / 4.0 + min_h_b = -B[:, 0:4, 1].sum(axis=1) / 4.0 + max_h_b = -B[:, 4:8, 1].sum(axis=1) / 4.0 + + for i in range(N): + for j in range(M): + max_of_min = np.max([min_h_a[i], min_h_b[j]]) + min_of_max = np.min([max_h_a[i], max_h_b[j]]) + h_overlap = np.max([0, min_of_max - max_of_min]) + if h_overlap == 0: + continue + + bottom_a, bottom_b = Polygon(A[i, 0:4, [0, 2]].T), Polygon(B[j, 0:4, [0, 2]].T) + if bottom_a.is_valid and bottom_b.is_valid: + # check is valid, A valid Polygon may not possess any overlapping exterior or interior rings. + bottom_overlap = bottom_a.intersection(bottom_b).area + else: + bottom_overlap = 0. + overlap3d = bottom_overlap * h_overlap + union3d = bottom_a.area * (max_h_a[i] - min_h_a[i]) + bottom_b.area * (max_h_b[j] - min_h_b[j]) - overlap3d + iou3d[i][j] = overlap3d / union3d + iou_bev[i][j] = bottom_overlap / (bottom_a.area + bottom_b.area - bottom_overlap) + + if need_bev: + return iou3d, iou_bev + + return iou3d diff --git a/PointRCNN/lib/utils/loss_utils.py b/PointRCNN/lib/utils/loss_utils.py new file mode 100644 index 0000000..c371125 --- /dev/null +++ b/PointRCNN/lib/utils/loss_utils.py @@ -0,0 +1,234 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +import pdb + + +class DiceLoss(nn.Module): + def __init__(self, ignore_target=-1): + super().__init__() + self.ignore_target = ignore_target + + def forward(self, input, target): + """ + :param input: (N), logit + :param target: (N), {0, 1} + :return: + """ + input = torch.sigmoid(input.view(-1)) + target = target.float().view(-1) + mask = (target != self.ignore_target).float() + return 1.0 - (torch.min(input, target) * mask).sum() / torch.clamp((torch.max(input, target) * mask).sum(), min=1.0) + + +class SigmoidFocalClassificationLoss(nn.Module): + """Sigmoid focal cross entropy loss. + Focal loss down-weights well classified examples and focusses on the hard + examples. See https://arxiv.org/pdf/1708.02002.pdf for the loss definition. + """ + def __init__(self, gamma=2.0, alpha=0.25): + """Constructor. + Args: + gamma: exponent of the modulating factor (1 - p_t) ^ gamma. + alpha: optional alpha weighting factor to balance positives vs negatives. + all_zero_negative: bool. if True, will treat all zero as background. + else, will treat first label as background. only affect alpha. + """ + super().__init__() + self._alpha = alpha + self._gamma = gamma + + def forward(self, + prediction_tensor, + target_tensor, + weights): + """Compute loss function. + + Args: + prediction_tensor: A float tensor of shape [batch_size, num_anchors, + num_classes] representing the predicted logits for each class + target_tensor: A float tensor of shape [batch_size, num_anchors, + num_classes] representing one-hot encoded classification targets + weights: a float tensor of shape [batch_size, num_anchors] + class_indices: (Optional) A 1-D integer tensor of class indices. + If provided, computes loss only for the specified class indices. + + Returns: + loss: a float tensor of shape [batch_size, num_anchors, num_classes] + representing the value of the loss function. + """ + per_entry_cross_ent = (_sigmoid_cross_entropy_with_logits( + labels=target_tensor, logits=prediction_tensor)) + prediction_probabilities = torch.sigmoid(prediction_tensor) + p_t = ((target_tensor * prediction_probabilities) + + ((1 - target_tensor) * (1 - prediction_probabilities))) + modulating_factor = 1.0 + if self._gamma: + modulating_factor = torch.pow(1.0 - p_t, self._gamma) + alpha_weight_factor = 1.0 + if self._alpha is not None: + alpha_weight_factor = (target_tensor * self._alpha + (1 - target_tensor) * (1 - self._alpha)) + + focal_cross_entropy_loss = (modulating_factor * alpha_weight_factor * per_entry_cross_ent) + return focal_cross_entropy_loss * weights + + +def _sigmoid_cross_entropy_with_logits(logits, labels): + # to be compatible with tensorflow, we don't use ignore_idx + loss = torch.clamp(logits, min=0) - logits * labels.type_as(logits) + loss += torch.log1p(torch.exp(-torch.abs(logits))) + # transpose_param = [0] + [param[-1]] + param[1:-1] + # logits = logits.permute(*transpose_param) + # loss_ftor = nn.NLLLoss(reduce=False) + # loss = loss_ftor(F.logsigmoid(logits), labels) + return loss + + +def get_reg_loss(pred_reg, reg_label, loc_scope, loc_bin_size, num_head_bin, anchor_size, + get_xz_fine=True, get_y_by_bin=False, loc_y_scope=0.5, loc_y_bin_size=0.25, get_ry_fine=False): + + """ + Bin-based 3D bounding boxes regression loss. See https://arxiv.org/abs/1812.04244 for more details. + + :param pred_reg: (N, C) + :param reg_label: (N, 7) [dx, dy, dz, h, w, l, ry] + :param loc_scope: constant + :param loc_bin_size: constant + :param num_head_bin: constant + :param anchor_size: (N, 3) or (3) + :param get_xz_fine: + :param get_y_by_bin: + :param loc_y_scope: + :param loc_y_bin_size: + :param get_ry_fine: + :return: + """ + per_loc_bin_num = int(loc_scope / loc_bin_size) * 2 + loc_y_bin_num = int(loc_y_scope / loc_y_bin_size) * 2 + + reg_loss_dict = {} + loc_loss = 0 + + # xz localization loss + x_offset_label, y_offset_label, z_offset_label = reg_label[:, 0], reg_label[:, 1], reg_label[:, 2] + x_shift = torch.clamp(x_offset_label + loc_scope, 0, loc_scope * 2 - 1e-3) + z_shift = torch.clamp(z_offset_label + loc_scope, 0, loc_scope * 2 - 1e-3) + x_bin_label = (x_shift / loc_bin_size).floor().long() + z_bin_label = (z_shift / loc_bin_size).floor().long() + + x_bin_l, x_bin_r = 0, per_loc_bin_num + z_bin_l, z_bin_r = per_loc_bin_num, per_loc_bin_num * 2 + start_offset = z_bin_r + + loss_x_bin = F.cross_entropy(pred_reg[:, x_bin_l: x_bin_r], x_bin_label) + loss_z_bin = F.cross_entropy(pred_reg[:, z_bin_l: z_bin_r], z_bin_label) + reg_loss_dict['loss_x_bin'] = loss_x_bin.item() + reg_loss_dict['loss_z_bin'] = loss_z_bin.item() + loc_loss += loss_x_bin + loss_z_bin + + if get_xz_fine: + x_res_l, x_res_r = per_loc_bin_num * 2, per_loc_bin_num * 3 + z_res_l, z_res_r = per_loc_bin_num * 3, per_loc_bin_num * 4 + start_offset = z_res_r + + x_res_label = x_shift - (x_bin_label.float() * loc_bin_size + loc_bin_size / 2) + z_res_label = z_shift - (z_bin_label.float() * loc_bin_size + loc_bin_size / 2) + x_res_norm_label = x_res_label / loc_bin_size + z_res_norm_label = z_res_label / loc_bin_size + + x_bin_onehot = torch.cuda.FloatTensor(x_bin_label.size(0), per_loc_bin_num).zero_() + x_bin_onehot.scatter_(1, x_bin_label.view(-1, 1).long(), 1) + z_bin_onehot = torch.cuda.FloatTensor(z_bin_label.size(0), per_loc_bin_num).zero_() + z_bin_onehot.scatter_(1, z_bin_label.view(-1, 1).long(), 1) + + loss_x_res = F.smooth_l1_loss((pred_reg[:, x_res_l: x_res_r] * x_bin_onehot).sum(dim=1), x_res_norm_label) + loss_z_res = F.smooth_l1_loss((pred_reg[:, z_res_l: z_res_r] * z_bin_onehot).sum(dim=1), z_res_norm_label) + reg_loss_dict['loss_x_res'] = loss_x_res.item() + reg_loss_dict['loss_z_res'] = loss_z_res.item() + loc_loss += loss_x_res + loss_z_res + + # y localization loss + if get_y_by_bin: + y_bin_l, y_bin_r = start_offset, start_offset + loc_y_bin_num + y_res_l, y_res_r = y_bin_r, y_bin_r + loc_y_bin_num + start_offset = y_res_r + + y_shift = torch.clamp(y_offset_label + loc_y_scope, 0, loc_y_scope * 2 - 1e-3) + y_bin_label = (y_shift / loc_y_bin_size).floor().long() + y_res_label = y_shift - (y_bin_label.float() * loc_y_bin_size + loc_y_bin_size / 2) + y_res_norm_label = y_res_label / loc_y_bin_size + + y_bin_onehot = torch.cuda.FloatTensor(y_bin_label.size(0), loc_y_bin_num).zero_() + y_bin_onehot.scatter_(1, y_bin_label.view(-1, 1).long(), 1) + + loss_y_bin = F.cross_entropy(pred_reg[:, y_bin_l: y_bin_r], y_bin_label) + loss_y_res = F.smooth_l1_loss((pred_reg[:, y_res_l: y_res_r] * y_bin_onehot).sum(dim=1), y_res_norm_label) + + reg_loss_dict['loss_y_bin'] = loss_y_bin.item() + reg_loss_dict['loss_y_res'] = loss_y_res.item() + + loc_loss += loss_y_bin + loss_y_res + else: + y_offset_l, y_offset_r = start_offset, start_offset + 1 + start_offset = y_offset_r + + loss_y_offset = F.smooth_l1_loss(pred_reg[:, y_offset_l: y_offset_r].sum(dim=1), y_offset_label) + reg_loss_dict['loss_y_offset'] = loss_y_offset.item() + loc_loss += loss_y_offset + + # angle loss + ry_bin_l, ry_bin_r = start_offset, start_offset + num_head_bin + ry_res_l, ry_res_r = ry_bin_r, ry_bin_r + num_head_bin + + ry_label = reg_label[:, 6] + + if get_ry_fine: + # divide pi/2 into several bins + angle_per_class = (np.pi / 2) / num_head_bin + + ry_label = ry_label % (2 * np.pi) # 0 ~ 2pi + opposite_flag = (ry_label > np.pi * 0.5) & (ry_label < np.pi * 1.5) + ry_label[opposite_flag] = (ry_label[opposite_flag] + np.pi) % (2 * np.pi) # (0 ~ pi/2, 3pi/2 ~ 2pi) + shift_angle = (ry_label + np.pi * 0.5) % (2 * np.pi) # (0 ~ pi) + + shift_angle = torch.clamp(shift_angle - np.pi * 0.25, min=1e-3, max=np.pi * 0.5 - 1e-3) # (0, pi/2) + + # bin center is (5, 10, 15, ..., 85) + ry_bin_label = (shift_angle / angle_per_class).floor().long() + ry_res_label = shift_angle - (ry_bin_label.float() * angle_per_class + angle_per_class / 2) + ry_res_norm_label = ry_res_label / (angle_per_class / 2) + + else: + # divide 2pi into several bins + angle_per_class = (2 * np.pi) / num_head_bin + heading_angle = ry_label % (2 * np.pi) # 0 ~ 2pi + + shift_angle = (heading_angle + angle_per_class / 2) % (2 * np.pi) + ry_bin_label = (shift_angle / angle_per_class).floor().long() + ry_res_label = shift_angle - (ry_bin_label.float() * angle_per_class + angle_per_class / 2) + ry_res_norm_label = ry_res_label / (angle_per_class / 2) + + ry_bin_onehot = torch.cuda.FloatTensor(ry_bin_label.size(0), num_head_bin).zero_() + ry_bin_onehot.scatter_(1, ry_bin_label.view(-1, 1).long(), 1) + loss_ry_bin = F.cross_entropy(pred_reg[:, ry_bin_l:ry_bin_r], ry_bin_label) + loss_ry_res = F.smooth_l1_loss((pred_reg[:, ry_res_l: ry_res_r] * ry_bin_onehot).sum(dim=1), ry_res_norm_label) + + reg_loss_dict['loss_ry_bin'] = loss_ry_bin.item() + reg_loss_dict['loss_ry_res'] = loss_ry_res.item() + angle_loss = loss_ry_bin + loss_ry_res + + # size loss + size_res_l, size_res_r = ry_res_r, ry_res_r + 3 + assert pred_reg.shape[1] == size_res_r, '%d vs %d' % (pred_reg.shape[1], size_res_r) + + size_res_norm_label = (reg_label[:, 3:6] - anchor_size) / anchor_size + size_res_norm = pred_reg[:, size_res_l:size_res_r] + size_loss = F.smooth_l1_loss(size_res_norm, size_res_norm_label) + + # Total regression loss + reg_loss_dict['loss_loc'] = loc_loss + reg_loss_dict['loss_angle'] = angle_loss + reg_loss_dict['loss_size'] = size_loss + + return loc_loss, angle_loss, size_loss, reg_loss_dict diff --git a/PointRCNN/lib/utils/object3d.py b/PointRCNN/lib/utils/object3d.py new file mode 100644 index 0000000..c0784a9 --- /dev/null +++ b/PointRCNN/lib/utils/object3d.py @@ -0,0 +1,103 @@ +import numpy as np + + +def cls_type_to_id(cls_type): + type_to_id = {'Car': 1, 'Pedestrian': 2, 'Cyclist': 3, 'Van': 4} + if cls_type not in type_to_id.keys(): + return -1 + return type_to_id[cls_type] + + +class Object3d(object): + def __init__(self, line): + label = line.strip().split(' ') + self.src = line + self.cls_type = label[0] + self.cls_id = cls_type_to_id(self.cls_type) + self.trucation = float(label[1]) + self.occlusion = float(label[2]) # 0:fully visible 1:partly occluded 2:largely occluded 3:unknown + self.alpha = float(label[3]) + self.box2d = np.array((float(label[4]), float(label[5]), float(label[6]), float(label[7])), dtype=np.float32) + self.h = float(label[8]) + self.w = float(label[9]) + self.l = float(label[10]) + self.pos = np.array((float(label[11]), float(label[12]), float(label[13])), dtype=np.float32) + self.dis_to_cam = np.linalg.norm(self.pos) + self.ry = float(label[14]) + self.score = float(label[15]) if label.__len__() == 16 else -1.0 + self.level_str = None + self.level = self.get_obj_level() + + def get_obj_level(self): + height = float(self.box2d[3]) - float(self.box2d[1]) + 1 + + if height >= 40 and self.trucation <= 0.15 and self.occlusion <= 0: + self.level_str = 'Easy' + return 1 # Easy + elif height >= 25 and self.trucation <= 0.3 and self.occlusion <= 1: + self.level_str = 'Moderate' + return 2 # Moderate + elif height >= 25 and self.trucation <= 0.5 and self.occlusion <= 2: + self.level_str = 'Hard' + return 3 # Hard + else: + self.level_str = 'UnKnown' + return 4 + + def generate_corners3d(self): + """ + generate corners3d representation for this object + :return corners_3d: (8, 3) corners of box3d in camera coord + """ + l, h, w = self.l, self.h, self.w + x_corners = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2] + y_corners = [0, 0, 0, 0, -h, -h, -h, -h] + z_corners = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2] + + R = np.array([[np.cos(self.ry), 0, np.sin(self.ry)], + [0, 1, 0], + [-np.sin(self.ry), 0, np.cos(self.ry)]]) + corners3d = np.vstack([x_corners, y_corners, z_corners]) # (3, 8) + corners3d = np.dot(R, corners3d).T + corners3d = corners3d + self.pos + return corners3d + + def to_bev_box2d(self, oblique=True, voxel_size=0.1): + """ + :param bev_shape: (2) for bev shape (h, w), => (y_max, x_max) in image + :param voxel_size: float, 0.1m + :param oblique: + :return: box2d (4, 2)/ (4) in image coordinate + """ + if oblique: + corners3d = self.generate_corners3d() + xz_corners = corners3d[0:4, [0, 2]] + box2d = np.zeros((4, 2), dtype=np.int32) + box2d[:, 0] = ((xz_corners[:, 0] - Object3d.MIN_XZ[0]) / voxel_size).astype(np.int32) + box2d[:, 1] = Object3d.BEV_SHAPE[0] - 1 - ((xz_corners[:, 1] - Object3d.MIN_XZ[1]) / voxel_size).astype(np.int32) + box2d[:, 0] = np.clip(box2d[:, 0], 0, Object3d.BEV_SHAPE[1]) + box2d[:, 1] = np.clip(box2d[:, 1], 0, Object3d.BEV_SHAPE[0]) + else: + box2d = np.zeros(4, dtype=np.int32) + # discrete_center = np.floor((self.pos / voxel_size)).astype(np.int32) + cu = np.floor((self.pos[0] - Object3d.MIN_XZ[0]) / voxel_size).astype(np.int32) + cv = Object3d.BEV_SHAPE[0] - 1 - ((self.pos[2] - Object3d.MIN_XZ[1]) / voxel_size).astype(np.int32) + half_l, half_w = int(self.l / voxel_size / 2), int(self.w / voxel_size / 2) + box2d[0], box2d[1] = cu - half_l, cv - half_w + box2d[2], box2d[3] = cu + half_l, cv + half_w + + return box2d + + def to_str(self): + print_str = '%s %.3f %.3f %.3f box2d: %s hwl: [%.3f %.3f %.3f] pos: %s ry: %.3f' \ + % (self.cls_type, self.trucation, self.occlusion, self.alpha, self.box2d, self.h, self.w, self.l, + self.pos, self.ry) + return print_str + + def to_kitti_format(self): + kitti_str = '%s %.2f %d %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f' \ + % (self.cls_type, self.trucation, int(self.occlusion), self.alpha, self.box2d[0], self.box2d[1], + self.box2d[2], self.box2d[3], self.h, self.w, self.l, self.pos[0], self.pos[1], self.pos[2], + self.ry) + return kitti_str + diff --git a/PointRCNN/lib/utils/roipool3d/roipool3d_utils.py b/PointRCNN/lib/utils/roipool3d/roipool3d_utils.py new file mode 100644 index 0000000..0182cf2 --- /dev/null +++ b/PointRCNN/lib/utils/roipool3d/roipool3d_utils.py @@ -0,0 +1,112 @@ +import torch +import roipool3d_cuda +import numpy as np +import lib.utils.kitti_utils as kitti_utils + + +def roipool3d_gpu(pts, pts_feature, boxes3d, pool_extra_width, sampled_pt_num=512): + """ + :param pts: (B, N, 3) + :param pts_feature: (B, N, C) + :param boxes3d: (B, M, 7) + :param pool_extra_width: float + :param sampled_pt_num: int + :return: + pooled_features: (B, M, 512, 3 + C) + pooled_empty_flag: (B, M) + """ + batch_size, boxes_num, feature_len = pts.shape[0], boxes3d.shape[1], pts_feature.shape[2] + pooled_boxes3d = kitti_utils.enlarge_box3d(boxes3d.view(-1, 7), pool_extra_width).view(batch_size, -1, 7) + + pooled_features = torch.cuda.FloatTensor(torch.Size((batch_size, boxes_num, + sampled_pt_num, 3 + feature_len))).zero_() + pooled_empty_flag = torch.cuda.IntTensor(torch.Size((batch_size, boxes_num))).zero_() + + roipool3d_cuda.forward(pts.contiguous(), pooled_boxes3d.contiguous(), + pts_feature.contiguous(), pooled_features, pooled_empty_flag) + + return pooled_features, pooled_empty_flag + + +def pts_in_boxes3d_cpu(pts, boxes3d): + """ + :param pts: (N, 3) in rect-camera coords + :param boxes3d: (M, 7) + :return: boxes_pts_mask_list: (M), list with [(N), (N), ..] + """ + if not pts.is_cuda: + pts = pts.float().contiguous() + boxes3d = boxes3d.float().contiguous() + pts_flag = torch.LongTensor(torch.Size((boxes3d.size(0), pts.size(0)))) # (M, N) + roipool3d_cuda.pts_in_boxes3d_cpu(pts_flag, pts, boxes3d) + + boxes_pts_mask_list = [] + for k in range(0, boxes3d.shape[0]): + cur_mask = pts_flag[k] > 0 + boxes_pts_mask_list.append(cur_mask) + return boxes_pts_mask_list + else: + raise NotImplementedError + + +def roipool_pc_cpu(pts, pts_feature, boxes3d, sampled_pt_num): + """ + :param pts: (N, 3) + :param pts_feature: (N, C) + :param boxes3d: (M, 7) + :param sampled_pt_num: int + :return: + """ + pts = pts.cpu().float().contiguous() + pts_feature = pts_feature.cpu().float().contiguous() + boxes3d = boxes3d.cpu().float().contiguous() + assert pts.shape[0] == pts_feature.shape[0] and pts.shape[1] == 3, '%s %s' % (pts.shape, pts_feature.shape) + assert pts.is_cuda is False + pooled_pts = torch.FloatTensor(torch.Size((boxes3d.shape[0], sampled_pt_num, 3))).zero_() + pooled_features = torch.FloatTensor(torch.Size((boxes3d.shape[0], sampled_pt_num, pts_feature.shape[1]))).zero_() + pooled_empty_flag = torch.LongTensor(boxes3d.shape[0]).zero_() + roipool3d_cuda.roipool3d_cpu(pts, boxes3d, pts_feature, pooled_pts, pooled_features, pooled_empty_flag) + return pooled_pts, pooled_features, pooled_empty_flag + + +def roipool3d_cpu(boxes3d, pts, pts_feature, pts_extra_input, pool_extra_width, sampled_pt_num=512, + canonical_transform=True): + """ + :param boxes3d: (N, 7) + :param pts: (N, 3) + :param pts_feature: (N, C) + :param pts_extra_input: (N, C2) + :param pool_extra_width: constant + :param sampled_pt_num: constant + :return: + """ + pooled_boxes3d = kitti_utils.enlarge_box3d(boxes3d, pool_extra_width) + + pts_feature_all = np.concatenate((pts_extra_input, pts_feature), axis=1) + + # Note: if pooled_empty_flag[i] > 0, the pooled_pts[i], pooled_features[i] will be zero + pooled_pts, pooled_features, pooled_empty_flag = \ + roipool_pc_cpu(torch.from_numpy(pts), torch.from_numpy(pts_feature_all), + torch.from_numpy(pooled_boxes3d), sampled_pt_num) + + extra_input_len = pts_extra_input.shape[1] + sampled_pts_input = torch.cat((pooled_pts, pooled_features[:, :, 0:extra_input_len]), dim=2).numpy() + sampled_pts_feature = pooled_features[:, :, extra_input_len:].numpy() + + if canonical_transform: + # Translate to the roi coordinates + roi_ry = boxes3d[:, 6] % (2 * np.pi) # 0~2pi + roi_center = boxes3d[:, 0:3] + + # shift to center + sampled_pts_input[:, :, 0:3] = sampled_pts_input[:, :, 0:3] - roi_center[:, np.newaxis, :] + for k in range(sampled_pts_input.shape[0]): + sampled_pts_input[k] = kitti_utils.rotate_pc_along_y(sampled_pts_input[k], roi_ry[k]) + + return sampled_pts_input, sampled_pts_feature + + return sampled_pts_input, sampled_pts_feature, pooled_empty_flag.numpy() + + +if __name__ == '__main__': + pass diff --git a/PointRCNN/lib/utils/roipool3d/setup.py b/PointRCNN/lib/utils/roipool3d/setup.py new file mode 100644 index 0000000..c7099a6 --- /dev/null +++ b/PointRCNN/lib/utils/roipool3d/setup.py @@ -0,0 +1,14 @@ +from setuptools import setup +from torch.utils.cpp_extension import BuildExtension, CUDAExtension + +setup( + name='roipool3d', + ext_modules=[ + CUDAExtension('roipool3d_cuda', [ + 'src/roipool3d.cpp', + 'src/roipool3d_kernel.cu', + ], + extra_compile_args={'cxx': ['-g'], + 'nvcc': ['-O2']}) + ], + cmdclass={'build_ext': BuildExtension}) diff --git a/PointRCNN/lib/utils/roipool3d/src/roipool3d.cpp b/PointRCNN/lib/utils/roipool3d/src/roipool3d.cpp new file mode 100644 index 0000000..ac002af --- /dev/null +++ b/PointRCNN/lib/utils/roipool3d/src/roipool3d.cpp @@ -0,0 +1,204 @@ +#include +#include + + +#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") +#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") +#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) + +void roipool3dLauncher_slow(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, + const float *xyz, const float *boxes3d, const float *pts_feature, float *pooled_features, int *pooled_empty_flag); + +void roipool3dLauncher(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, + const float *xyz, const float *boxes3d, const float *pts_feature, float *pooled_features, int *pooled_empty_flag); + +int roipool3d_gpu_slow(at::Tensor xyz, at::Tensor boxes3d, at::Tensor pts_feature, at::Tensor pooled_features, at::Tensor pooled_empty_flag){ + // params xyz: (B, N, 3) + // params boxes3d: (B, M, 7) + // params pts_feature: (B, N, C) + // params pooled_features: (B, M, 512, 3+C) + // params pooled_empty_flag: (B, M) + CHECK_INPUT(xyz); + CHECK_INPUT(boxes3d); + CHECK_INPUT(pts_feature); + CHECK_INPUT(pooled_features); + CHECK_INPUT(pooled_empty_flag); + + int batch_size = xyz.size(0); + int pts_num = xyz.size(1); + int boxes_num = boxes3d.size(1); + int feature_in_len = pts_feature.size(2); + int sampled_pts_num = pooled_features.size(2); + + + const float * xyz_data = xyz.data(); + const float * boxes3d_data = boxes3d.data(); + const float * pts_feature_data = pts_feature.data(); + float * pooled_features_data = pooled_features.data(); + int * pooled_empty_flag_data = pooled_empty_flag.data(); + + roipool3dLauncher_slow(batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, + xyz_data, boxes3d_data, pts_feature_data, pooled_features_data, pooled_empty_flag_data); + + return 1; +} + + + +int roipool3d_gpu(at::Tensor xyz, at::Tensor boxes3d, at::Tensor pts_feature, at::Tensor pooled_features, at::Tensor pooled_empty_flag){ + // params xyz: (B, N, 3) + // params boxes3d: (B, M, 7) + // params pts_feature: (B, N, C) + // params pooled_features: (B, M, 512, 3+C) + // params pooled_empty_flag: (B, M) + CHECK_INPUT(xyz); + CHECK_INPUT(boxes3d); + CHECK_INPUT(pts_feature); + CHECK_INPUT(pooled_features); + CHECK_INPUT(pooled_empty_flag); + + int batch_size = xyz.size(0); + int pts_num = xyz.size(1); + int boxes_num = boxes3d.size(1); + int feature_in_len = pts_feature.size(2); + int sampled_pts_num = pooled_features.size(2); + + + const float * xyz_data = xyz.data(); + const float * boxes3d_data = boxes3d.data(); + const float * pts_feature_data = pts_feature.data(); + float * pooled_features_data = pooled_features.data(); + int * pooled_empty_flag_data = pooled_empty_flag.data(); + + roipool3dLauncher(batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, + xyz_data, boxes3d_data, pts_feature_data, pooled_features_data, pooled_empty_flag_data); + + + + return 1; +} + + +int pt_in_box3d_cpu(float x, float y, float z, float cx, float bottom_y, float cz, float h, float w, float l, float angle){ + float max_dis = 10.0, x_rot, z_rot, cosa, sina, cy; + int in_flag; + cy = bottom_y - h / 2.0; + if ((fabsf(x - cx) > max_dis) || (fabsf(y - cy) > h / 2.0) || (fabsf(z - cz) > max_dis)){ + return 0; + } + cosa = cos(angle); sina = sin(angle); + x_rot = (x - cx) * cosa + (z - cz) * (-sina); + z_rot = (x - cx) * sina + (z - cz) * cosa; + + in_flag = (x_rot >= -l / 2.0) & (x_rot <= l / 2.0) & (z_rot >= -w / 2.0) & (z_rot <= w / 2.0); + return in_flag; +} + +int pts_in_boxes3d_cpu(at::Tensor pts_flag, at::Tensor pts, at::Tensor boxes3d){ + // param in_flag: (M, N), 0 or 1 + // param pts: (N, 3) + // param boxes3d: (M, 7) [x, y, z, h, w, l, ry] + + CHECK_CONTIGUOUS(pts_flag); + CHECK_CONTIGUOUS(pts); + CHECK_CONTIGUOUS(boxes3d); + + long boxes_num = boxes3d.size(0); + long pts_num = pts.size(0); + + long * pts_flag_flat = pts_flag.data(); + float * pts_flat = pts.data(); + float * boxes3d_flat = boxes3d.data(); + + memset(pts_flag_flat, 0, boxes_num * pts_num * sizeof(long)); + + int i, j, cur_in_flag; + for (i = 0; i < boxes_num; i++){ + for (j = 0; j < pts_num; j++){ + cur_in_flag = pt_in_box3d_cpu(pts_flat[j * 3], pts_flat[j * 3 + 1], pts_flat[j * 3 + 2], boxes3d_flat[i * 7], + boxes3d_flat[i * 7 + 1], boxes3d_flat[i * 7 + 2], boxes3d_flat[i * 7 + 3], + boxes3d_flat[i * 7 + 4], boxes3d_flat[i * 7 + 5], boxes3d_flat[i * 7 + 6]); + pts_flag_flat[i * pts_num + j] = cur_in_flag; + } + } + return 1; +} + +int roipool3d_cpu(at::Tensor pts, at::Tensor boxes3d, at::Tensor pts_feature, at::Tensor pooled_pts, + at::Tensor pooled_features, at::Tensor pooled_empty_flag){ + // param pts: (N, 3) [x, y, z] + // param boxes3d: (M, 7) [x, y, z, h, w, l, ry] + // param pts_feature: (N, C) + // param pooled_pts: (M, 512, 3) + // param pooled_features: (M, 512, C) + CHECK_CONTIGUOUS(pts); + CHECK_CONTIGUOUS(boxes3d); + CHECK_CONTIGUOUS(pts_feature); + CHECK_CONTIGUOUS(pooled_pts); + CHECK_CONTIGUOUS(pooled_features); + CHECK_CONTIGUOUS(pooled_empty_flag); + + long boxes_num = boxes3d.size(0); + long pts_num = pts.size(0); + long feature_len = pts_feature.size(1); + long sampled_pts_num = pooled_pts.size(1); + + float * pts_flat = pts.data(); + float * boxes3d_flat = boxes3d.data(); + float * pts_feature_flat = pts_feature.data(); + float * pooled_pts_flat = pooled_pts.data(); + float * pooled_features_flat = pooled_features.data(); + long * pooled_empty_flag_flat = pooled_empty_flag.data(); + + memset(pooled_empty_flag_flat, 0, boxes_num * sizeof(long)); + + int i, j, k, cnt, temp_idx, duplicate_idx, cur_in_flag; + for (i = 0; i < boxes_num; i++){ + cnt = 0; + for (j = 0; j < pts_num; j++){ + cur_in_flag = pt_in_box3d_cpu(pts_flat[j * 3], pts_flat[j * 3 + 1], pts_flat[j * 3 + 2], boxes3d_flat[i * 7], + boxes3d_flat[i * 7 + 1], boxes3d_flat[i * 7 + 2], boxes3d_flat[i * 7 + 3], + boxes3d_flat[i * 7 + 4], boxes3d_flat[i * 7 + 5], boxes3d_flat[i * 7 + 6]); + + if (cur_in_flag){ + if (cnt < sampled_pts_num){ + temp_idx = i * sampled_pts_num * 3 + cnt * 3; + for (k = 0; k < 3; k++) pooled_pts_flat[temp_idx + k] = pts_flat[j * 3 + k]; + temp_idx = i * sampled_pts_num * feature_len + cnt * feature_len; + for (k = 0; k < feature_len; k++) pooled_features_flat[temp_idx + k] = pts_feature_flat[j * feature_len + k]; + cnt++; + } + else break; + } + } + + if (cnt == 0){ + // no points in this box + pooled_empty_flag_flat[i] = 1; + } + else if (cnt < sampled_pts_num){ + // duplicate same points + duplicate_idx = 0; + for (j = cnt; j < sampled_pts_num; j++){ + temp_idx = i * sampled_pts_num * 3 + j * 3; + duplicate_idx = i * sampled_pts_num * 3 + (j % cnt) * 3; + for (k = 0; k < 3; k++) pooled_pts_flat[temp_idx + k] = pooled_pts_flat[duplicate_idx + k]; + temp_idx = i * sampled_pts_num * feature_len + j * feature_len; + duplicate_idx = i * sampled_pts_num * feature_len + (j % cnt) * feature_len; + for (k = 0; k < feature_len; k++){ + pooled_features_flat[temp_idx + k] = pooled_features_flat[duplicate_idx + k]; + } + } + } + } + return 1; +} + + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("pts_in_boxes3d_cpu", &pts_in_boxes3d_cpu, "pts_in_boxes3d_cpu"); + m.def("roipool3d_cpu", &roipool3d_cpu, "roipool3d_cpu"); + m.def("forward", &roipool3d_gpu, "roipool3d forward (CUDA)"); + m.def("forward_slow", &roipool3d_gpu_slow, "roipool3d forward (CUDA)"); +} + diff --git a/PointRCNN/lib/utils/roipool3d/src/roipool3d_kernel.cu b/PointRCNN/lib/utils/roipool3d/src/roipool3d_kernel.cu new file mode 100644 index 0000000..a3dc0ff --- /dev/null +++ b/PointRCNN/lib/utils/roipool3d/src/roipool3d_kernel.cu @@ -0,0 +1,237 @@ +/* +Point cloud feature pooling +Written by Shaoshuai Shi +All Rights Reserved 2018. +*/ + +#include +#include + +#define THREADS_PER_BLOCK 256 +#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) +// #define DEBUG + +__device__ inline int pt_in_box3d(float x, float y, float z, float cx, float bottom_y, float cz, float h, float w, + float l, float angle, float max_dis){ + float x_rot, z_rot, cosa, sina, cy; + int in_flag; + cy = bottom_y - h / 2.0; + if ((fabsf(x - cx) > max_dis) || (fabsf(y - cy) > h / 2.0) || (fabsf(z - cz) > max_dis)){ + return 0; + } + cosa = cos(angle); sina = sin(angle); + x_rot = (x - cx) * cosa + (z - cz) * (-sina); + z_rot = (x - cx) * sina + (z - cz) * cosa; + + in_flag = (x_rot >= -l / 2.0) & (x_rot <= l / 2.0) & (z_rot >= -w / 2.0) & (z_rot <= w / 2.0); + return in_flag; +} + + +__global__ void roipool3d_forward(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, + const float *xyz, const float *boxes3d, const float *pts_feature, + float *pooled_features, int *pooled_empty_flag){ + // params xyz: (B, N, 3) + // params boxes3d: (B, M, 7) + // params pts_feature: (B, N, C) + // params pooled_features: (B, M, 512, 3+C) + // params pooled_empty_flag: (B, M) + + int boxes_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (boxes_idx >= boxes_num){ + return; + } + + for (int i = 0; i < batch_size; i++){ + int cnt = 0; + for (int k = 0; k < pts_num; k++){ + int pt_offset = i * pts_num * 3 + k * 3; + int box_offset = i * boxes_num * 7 + boxes_idx * 7; + + int cur_in_flag = pt_in_box3d(xyz[pt_offset], xyz[pt_offset + 1], xyz[pt_offset + 2], boxes3d[box_offset], + boxes3d[box_offset + 1], boxes3d[box_offset + 2], boxes3d[box_offset + 3], + boxes3d[box_offset + 4], boxes3d[box_offset + 5], boxes3d[box_offset + 6], 10.0); + if (cur_in_flag){ + if (cnt < sampled_pts_num){ + int feature_out_offset = i * boxes_num * sampled_pts_num * (3 + feature_in_len) + + boxes_idx * sampled_pts_num * (3 + feature_in_len) + + cnt * (3 + feature_in_len); + + int feature_in_offset = i * pts_num * feature_in_len + k * feature_in_len; + + // copy xyz + for (int j = 0; j < 3; j++) + pooled_features[feature_out_offset + j] = xyz[pt_offset + j]; + + // copy feature + for (int j = 0; j < feature_in_len; j++) + pooled_features[feature_out_offset + 3 + j] = pts_feature[feature_in_offset + j]; + + cnt++; + } + else break; + } + } + + if (cnt == 0){ + pooled_empty_flag[i * boxes_num + boxes_idx] = 1; + } + else if (cnt < sampled_pts_num){ + // duplicate same points for sampling + for (int k = cnt; k < sampled_pts_num; k++){ + int duplicate_idx = k % cnt; + int src_offset = i * boxes_num * sampled_pts_num * (3 + feature_in_len) + + boxes_idx * sampled_pts_num * (3 + feature_in_len) + + duplicate_idx * (3 + feature_in_len); + int dst_offset = i * boxes_num * sampled_pts_num * (3 + feature_in_len) + + boxes_idx * sampled_pts_num * (3 + feature_in_len) + + k * (3 + feature_in_len); + for (int j = 0; j < 3 + feature_in_len; j++) + pooled_features[dst_offset + j] = pooled_features[src_offset + j]; + } + } + } +} + + +__global__ void assign_pts_to_box3d(int batch_size, int pts_num, int boxes_num, const float *xyz, const float *boxes3d, int *pts_assign){ + // params xyz: (B, N, 3) + // params boxes3d: (B, M, 7) + // params pts_assign: (B, N, M): idx of the corresponding box3d, -1 means background points + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + int box_idx = blockIdx.y; + int bs_idx = blockIdx.z; + + if (pt_idx >= pts_num || box_idx >= boxes_num || bs_idx >= batch_size){ + return; + } + int assign_idx = bs_idx * pts_num * boxes_num + pt_idx * boxes_num + box_idx; + pts_assign[assign_idx] = 0; + + int box_offset = bs_idx * boxes_num * 7 + box_idx * 7; + int pt_offset = bs_idx * pts_num * 3 + pt_idx * 3; + + int cur_in_flag = pt_in_box3d(xyz[pt_offset], xyz[pt_offset + 1], xyz[pt_offset + 2], boxes3d[box_offset], + boxes3d[box_offset + 1], boxes3d[box_offset + 2], boxes3d[box_offset + 3], + boxes3d[box_offset + 4], boxes3d[box_offset + 5], boxes3d[box_offset + 6], 10.0); + + pts_assign[assign_idx] = cur_in_flag; + // printf("bs=%d, pt=%d, in=%d\n", bs_idx, pt_idx, pts_assign[bs_idx * pts_num + pt_idx]); +} + + +__global__ void get_pooled_idx(int batch_size, int pts_num, int boxes_num, int sampled_pts_num, + const int *pts_assign, int *pts_idx, int *pooled_empty_flag){ + // params xyz: (B, N, 3) + // params pts_feature: (B, N, C) + // params pts_assign: (B, N) + // params pts_idx: (B, M, 512) + // params pooled_empty_flag: (B, M) + + int boxes_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (boxes_idx >= boxes_num){ + return; + } + + int bs_idx = blockIdx.y; + + int cnt = 0; + for (int k = 0; k < pts_num; k++){ + if (pts_assign[bs_idx * pts_num * boxes_num + k * boxes_num + boxes_idx]){ + if (cnt < sampled_pts_num){ + pts_idx[bs_idx * boxes_num * sampled_pts_num + boxes_idx * sampled_pts_num + cnt] = k; + cnt++; + } + else break; + } + } + + if (cnt == 0){ + pooled_empty_flag[bs_idx * boxes_num + boxes_idx] = 1; + } + else if (cnt < sampled_pts_num){ + // duplicate same points for sampling + for (int k = cnt; k < sampled_pts_num; k++){ + int duplicate_idx = k % cnt; + int base_offset = bs_idx * boxes_num * sampled_pts_num + boxes_idx * sampled_pts_num; + pts_idx[base_offset + k] = pts_idx[base_offset + duplicate_idx]; + } + } +} + + +__global__ void roipool3d_forward(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, + const float *xyz, const int *pts_idx, const float *pts_feature, + float *pooled_features, int *pooled_empty_flag){ + // params xyz: (B, N, 3) + // params pts_idx: (B, M, 512) + // params pts_feature: (B, N, C) + // params pooled_features: (B, M, 512, 3+C) + // params pooled_empty_flag: (B, M) + + int sample_pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + int box_idx = blockIdx.y; + int bs_idx = blockIdx.z; + + if (sample_pt_idx >= sampled_pts_num || box_idx >= boxes_num || bs_idx >= batch_size){ + return; + } + + if (pooled_empty_flag[bs_idx * boxes_num + box_idx]){ + return; + } + + int temp_idx = bs_idx * boxes_num * sampled_pts_num + box_idx * sampled_pts_num + sample_pt_idx; + int src_pt_idx = pts_idx[temp_idx]; + int dst_feature_offset = temp_idx * (3 + feature_in_len); + + for (int j = 0; j < 3; j++) + pooled_features[dst_feature_offset + j] = xyz[bs_idx * pts_num * 3 + src_pt_idx * 3 + j]; + + int src_feature_offset = bs_idx * pts_num * feature_in_len + src_pt_idx * feature_in_len; + for (int j = 0; j < feature_in_len; j++) + pooled_features[dst_feature_offset + 3 + j] = pts_feature[src_feature_offset + j]; +} + + +void roipool3dLauncher_slow(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, + const float *xyz, const float *boxes3d, const float *pts_feature, float *pooled_features, int *pooled_empty_flag){ + roipool3d_forward<<>>(batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, + xyz, boxes3d, pts_feature, pooled_features, pooled_empty_flag); + +#ifdef DEBUG + cudaDeviceSynchronize(); // for using printf in kernel function +#endif +} + + +void roipool3dLauncher(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, + const float *xyz, const float *boxes3d, const float *pts_feature, float *pooled_features, int *pooled_empty_flag){ + + // printf("batch_size=%d, pts_num=%d, boxes_num=%d\n", batch_size, pts_num, boxes_num); + int *pts_assign = NULL; + cudaMalloc(&pts_assign, batch_size * pts_num * boxes_num * sizeof(int)); // (batch_size, N, M) + // cudaMemset(&pts_assign, -1, batch_size * pts_num * boxes_num * sizeof(int)); + + dim3 blocks(DIVUP(pts_num, THREADS_PER_BLOCK), boxes_num, batch_size); // blockIdx.x(col), blockIdx.y(row) + dim3 threads(THREADS_PER_BLOCK); + assign_pts_to_box3d<<>>(batch_size, pts_num, boxes_num, xyz, boxes3d, pts_assign); + + int *pts_idx = NULL; + cudaMalloc(&pts_idx, batch_size * boxes_num * sampled_pts_num * sizeof(int)); // (batch_size, M, sampled_pts_num) + + dim3 blocks2(DIVUP(boxes_num, THREADS_PER_BLOCK), batch_size); // blockIdx.x(col), blockIdx.y(row) + get_pooled_idx<<>>(batch_size, pts_num, boxes_num, sampled_pts_num, pts_assign, pts_idx, pooled_empty_flag); + + dim3 blocks_pool(DIVUP(sampled_pts_num, THREADS_PER_BLOCK), boxes_num, batch_size); + roipool3d_forward<<>>(batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, + xyz, pts_idx, pts_feature, pooled_features, pooled_empty_flag); + + cudaFree(pts_assign); + cudaFree(pts_idx); + +#ifdef DEBUG + cudaDeviceSynchronize(); // for using printf in kernel function +#endif +} \ No newline at end of file diff --git a/PointRCNN/lib/utils/simple_roipool3d/setup.py b/PointRCNN/lib/utils/simple_roipool3d/setup.py new file mode 100644 index 0000000..76081d3 --- /dev/null +++ b/PointRCNN/lib/utils/simple_roipool3d/setup.py @@ -0,0 +1,14 @@ +from setuptools import setup +from torch.utils.cpp_extension import BuildExtension, CUDAExtension + +setup( + name='simple_roipool3d', + ext_modules=[ + CUDAExtension('simple_roipool3d_cuda', [ + 'src/simple_roipool3d.cpp', + 'src/simple_roipool3d_kernel.cu', + ], + extra_compile_args={'cxx': ['-g'], + 'nvcc': ['-O2']}) + ], + cmdclass={'build_ext': BuildExtension}) diff --git a/PointRCNN/lib/utils/simple_roipool3d/simple_roipool3d_utils.py b/PointRCNN/lib/utils/simple_roipool3d/simple_roipool3d_utils.py new file mode 100644 index 0000000..7783cc9 --- /dev/null +++ b/PointRCNN/lib/utils/simple_roipool3d/simple_roipool3d_utils.py @@ -0,0 +1,111 @@ +import torch +import simple_roipool3d_cuda +import numpy as np +import lib.utils.kitti_utils as kitti_utils + + +def simple_roipool3d_gpu(pts, pts_feature, boxes3d, pool_extra_width, sampled_pt_num=512): + """ + :param pts: (B, N, 3) + :param pts_feature: (B, N, C) + :param boxes3d: (B, M, 7) + :param pool_extra_width: float + :param sampled_pt_num: int + :return: + pooled_features: (B, M, 512, 3 + C) + pooled_empty_flag: (B, M) + """ + batch_size, boxes_num, feature_len = pts.shape[0], boxes3d.shape[1], pts_feature.shape[2] + pooled_boxes3d = kitti_utils.enlarge_box3d(boxes3d.view(-1, 7), pool_extra_width).view(batch_size, -1, 7) + + pts_idx = torch.cuda.IntTensor(torch.Size((batch_size, boxes_num, sampled_pt_num))).zero_() + pooled_empty_flag = torch.cuda.IntTensor(torch.Size((batch_size, boxes_num))).zero_() + + simple_roipool3d_cuda.forward(pts.contiguous(), pooled_boxes3d.contiguous(), + pts_feature.contiguous(), pts_idx, pooled_empty_flag) + + return pts_idx, pooled_empty_flag + + +def pts_in_boxes3d_cpu(pts, boxes3d): + """ + :param pts: (N, 3) in rect-camera coords + :param boxes3d: (M, 7) + :return: boxes_pts_mask_list: (M), list with [(N), (N), ..] + """ + if not pts.is_cuda: + pts = pts.float().contiguous() + boxes3d = boxes3d.float().contiguous() + pts_flag = torch.LongTensor(torch.Size((boxes3d.size(0), pts.size(0)))) # (M, N) + roipool3d_cuda.pts_in_boxes3d_cpu(pts_flag, pts, boxes3d) + + boxes_pts_mask_list = [] + for k in range(0, boxes3d.shape[0]): + cur_mask = pts_flag[k] > 0 + boxes_pts_mask_list.append(cur_mask) + return boxes_pts_mask_list + else: + raise NotImplementedError + + +def roipool_pc_cpu(pts, pts_feature, boxes3d, sampled_pt_num): + """ + :param pts: (N, 3) + :param pts_feature: (N, C) + :param boxes3d: (M, 7) + :param sampled_pt_num: int + :return: + """ + pts = pts.cpu().float().contiguous() + pts_feature = pts_feature.cpu().float().contiguous() + boxes3d = boxes3d.cpu().float().contiguous() + assert pts.shape[0] == pts_feature.shape[0] and pts.shape[1] == 3, '%s %s' % (pts.shape, pts_feature.shape) + assert pts.is_cuda is False + pooled_pts = torch.FloatTensor(torch.Size((boxes3d.shape[0], sampled_pt_num, 3))).zero_() + pooled_features = torch.FloatTensor(torch.Size((boxes3d.shape[0], sampled_pt_num, pts_feature.shape[1]))).zero_() + pooled_empty_flag = torch.LongTensor(boxes3d.shape[0]).zero_() + roipool3d_cuda.roipool3d_cpu(pts, boxes3d, pts_feature, pooled_pts, pooled_features, pooled_empty_flag) + return pooled_pts, pooled_features, pooled_empty_flag + + +def roipool3d_cpu(boxes3d, pts, pts_feature, pts_extra_input, pool_extra_width, sampled_pt_num=512, + canonical_transform=True): + """ + :param boxes3d: (N, 7) + :param pts: (N, 3) + :param pts_feature: (N, C) + :param pts_extra_input: (N, C2) + :param pool_extra_width: constant + :param sampled_pt_num: constant + :return: + """ + pooled_boxes3d = kitti_utils.enlarge_box3d(boxes3d, pool_extra_width) + + pts_feature_all = np.concatenate((pts_extra_input, pts_feature), axis=1) + + # Note: if pooled_empty_flag[i] > 0, the pooled_pts[i], pooled_features[i] will be zero + pooled_pts, pooled_features, pooled_empty_flag = \ + roipool_pc_cpu(torch.from_numpy(pts), torch.from_numpy(pts_feature_all), + torch.from_numpy(pooled_boxes3d), sampled_pt_num) + + extra_input_len = pts_extra_input.shape[1] + sampled_pts_input = torch.cat((pooled_pts, pooled_features[:, :, 0:extra_input_len]), dim=2).numpy() + sampled_pts_feature = pooled_features[:, :, extra_input_len:].numpy() + + if canonical_transform: + # Translate to the roi coordinates + roi_ry = boxes3d[:, 6] % (2 * np.pi) # 0~2pi + roi_center = boxes3d[:, 0:3] + + # shift to center + sampled_pts_input[:, :, 0:3] = sampled_pts_input[:, :, 0:3] - roi_center[:, np.newaxis, :] + for k in range(sampled_pts_input.shape[0]): + sampled_pts_input[k] = kitti_utils.rotate_pc_along_y(sampled_pts_input[k], roi_ry[k]) + + return sampled_pts_input, sampled_pts_feature + + return sampled_pts_input, sampled_pts_feature, pooled_empty_flag.numpy() + + +if __name__ == '__main__': + pass diff --git a/PointRCNN/lib/utils/simple_roipool3d/src/simple_roipool3d.cpp b/PointRCNN/lib/utils/simple_roipool3d/src/simple_roipool3d.cpp new file mode 100644 index 0000000..fe11bda --- /dev/null +++ b/PointRCNN/lib/utils/simple_roipool3d/src/simple_roipool3d.cpp @@ -0,0 +1,200 @@ +#include +#include + + +#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") +#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") +#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) + +void roipool3dLauncher_slow(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, + const float *xyz, const float *boxes3d, const float *pts_feature, float *pooled_features, int *pooled_empty_flag); + +void roipool3dLauncher(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, + const float *xyz, const float *boxes3d, int *pts_idx, int *pooled_empty_flag); + +int roipool3d_gpu_slow(at::Tensor xyz, at::Tensor boxes3d, at::Tensor pts_feature, at::Tensor pooled_features, at::Tensor pooled_empty_flag){ + // params xyz: (B, N, 3) + // params boxes3d: (B, M, 7) + // params pts_feature: (B, N, C) + // params pooled_features: (B, M, 512, 3+C) + // params pooled_empty_flag: (B, M) + CHECK_INPUT(xyz); + CHECK_INPUT(boxes3d); + CHECK_INPUT(pts_feature); + CHECK_INPUT(pooled_features); + CHECK_INPUT(pooled_empty_flag); + + int batch_size = xyz.size(0); + int pts_num = xyz.size(1); + int boxes_num = boxes3d.size(1); + int feature_in_len = pts_feature.size(2); + int sampled_pts_num = pooled_features.size(2); + + + const float * xyz_data = xyz.data(); + const float * boxes3d_data = boxes3d.data(); + const float * pts_feature_data = pts_feature.data(); + float * pooled_features_data = pooled_features.data(); + int * pooled_empty_flag_data = pooled_empty_flag.data(); + + roipool3dLauncher_slow(batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, + xyz_data, boxes3d_data, pts_feature_data, pooled_features_data, pooled_empty_flag_data); + + return 1; +} + + + +int roipool3d_gpu(at::Tensor xyz, at::Tensor boxes3d, at::Tensor pts_feature, at::Tensor pts_idx, at::Tensor pooled_empty_flag){ + // params xyz: (B, N, 3) + // params boxes3d: (B, M, 7) + // params pts_feature: (B, N, C) + // params pooled_features: (B, M, 512, 3+C) + // params pooled_empty_flag: (B, M) + CHECK_INPUT(xyz); + CHECK_INPUT(boxes3d); + CHECK_INPUT(pts_feature); + CHECK_INPUT(pts_idx); + CHECK_INPUT(pooled_empty_flag); + + int batch_size = xyz.size(0); + int pts_num = xyz.size(1); + int boxes_num = boxes3d.size(1); + int feature_in_len = pts_feature.size(2); + int sampled_pts_num = 512; + + + const float * xyz_data = xyz.data(); + const float * boxes3d_data = boxes3d.data(); + //const float * pts_feature_data = pts_feature.data(); + //float * pooled_features_data = pooled_features.data(); + int * pts_idx_data = pts_idx.data(); + int * pooled_empty_flag_data = pooled_empty_flag.data(); + + roipool3dLauncher(batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, + xyz_data, boxes3d_data, pts_idx_data, pooled_empty_flag_data); + + return 1; +} + + +int pt_in_box3d_cpu(float x, float y, float z, float cx, float bottom_y, float cz, float h, float w, float l, float angle){ + float max_dis = 10.0, x_rot, z_rot, cosa, sina, cy; + int in_flag; + cy = bottom_y - h / 2.0; + if ((fabsf(x - cx) > max_dis) || (fabsf(y - cy) > h / 2.0) || (fabsf(z - cz) > max_dis)){ + return 0; + } + cosa = cos(angle); sina = sin(angle); + x_rot = (x - cx) * cosa + (z - cz) * (-sina); + z_rot = (x - cx) * sina + (z - cz) * cosa; + + in_flag = (x_rot >= -l / 2.0) & (x_rot <= l / 2.0) & (z_rot >= -w / 2.0) & (z_rot <= w / 2.0); + return in_flag; +} + +int pts_in_boxes3d_cpu(at::Tensor pts_flag, at::Tensor pts, at::Tensor boxes3d){ + // param in_flag: (M, N), 0 or 1 + // param pts: (N, 3) + // param boxes3d: (M, 7) [x, y, z, h, w, l, ry] + + CHECK_CONTIGUOUS(pts_flag); + CHECK_CONTIGUOUS(pts); + CHECK_CONTIGUOUS(boxes3d); + + long boxes_num = boxes3d.size(0); + long pts_num = pts.size(0); + + long * pts_flag_flat = pts_flag.data(); + float * pts_flat = pts.data(); + float * boxes3d_flat = boxes3d.data(); + + memset(pts_flag_flat, 0, boxes_num * pts_num * sizeof(long)); + + int i, j, cur_in_flag; + for (i = 0; i < boxes_num; i++){ + for (j = 0; j < pts_num; j++){ + cur_in_flag = pt_in_box3d_cpu(pts_flat[j * 3], pts_flat[j * 3 + 1], pts_flat[j * 3 + 2], boxes3d_flat[i * 7], + boxes3d_flat[i * 7 + 1], boxes3d_flat[i * 7 + 2], boxes3d_flat[i * 7 + 3], + boxes3d_flat[i * 7 + 4], boxes3d_flat[i * 7 + 5], boxes3d_flat[i * 7 + 6]); + pts_flag_flat[i * pts_num + j] = cur_in_flag; + } + } + return 1; +} + +int roipool3d_cpu(at::Tensor pts, at::Tensor boxes3d, at::Tensor pts_feature, at::Tensor pooled_pts, + at::Tensor pooled_features, at::Tensor pooled_empty_flag){ + // param pts: (N, 3) [x, y, z] + // param boxes3d: (M, 7) [x, y, z, h, w, l, ry] + // param pts_feature: (N, C) + // param pooled_pts: (M, 512, 3) + // param pooled_features: (M, 512, C) + CHECK_CONTIGUOUS(pts); + CHECK_CONTIGUOUS(boxes3d); + CHECK_CONTIGUOUS(pts_feature); + CHECK_CONTIGUOUS(pooled_pts); + CHECK_CONTIGUOUS(pooled_features); + CHECK_CONTIGUOUS(pooled_empty_flag); + + long boxes_num = boxes3d.size(0); + long pts_num = pts.size(0); + long feature_len = pts_feature.size(1); + long sampled_pts_num = pooled_pts.size(1); + + float * pts_flat = pts.data(); + float * boxes3d_flat = boxes3d.data(); + float * pts_feature_flat = pts_feature.data(); + float * pooled_pts_flat = pooled_pts.data(); + float * pooled_features_flat = pooled_features.data(); + long * pooled_empty_flag_flat = pooled_empty_flag.data(); + + memset(pooled_empty_flag_flat, 0, boxes_num * sizeof(long)); + + int i, j, k, cnt, temp_idx, duplicate_idx, cur_in_flag; + for (i = 0; i < boxes_num; i++){ + cnt = 0; + for (j = 0; j < pts_num; j++){ + cur_in_flag = pt_in_box3d_cpu(pts_flat[j * 3], pts_flat[j * 3 + 1], pts_flat[j * 3 + 2], boxes3d_flat[i * 7], + boxes3d_flat[i * 7 + 1], boxes3d_flat[i * 7 + 2], boxes3d_flat[i * 7 + 3], + boxes3d_flat[i * 7 + 4], boxes3d_flat[i * 7 + 5], boxes3d_flat[i * 7 + 6]); + + if (cur_in_flag){ + if (cnt < sampled_pts_num){ + temp_idx = i * sampled_pts_num * 3 + cnt * 3; + for (k = 0; k < 3; k++) pooled_pts_flat[temp_idx + k] = pts_flat[j * 3 + k]; + temp_idx = i * sampled_pts_num * feature_len + cnt * feature_len; + for (k = 0; k < feature_len; k++) pooled_features_flat[temp_idx + k] = pts_feature_flat[j * feature_len + k]; + cnt++; + } + else break; + } + } + + if (cnt == 0){ + // no points in this box + pooled_empty_flag_flat[i] = 1; + } + else if (cnt < sampled_pts_num){ + // duplicate same points + duplicate_idx = 0; + for (j = cnt; j < sampled_pts_num; j++){ + temp_idx = i * sampled_pts_num * 3 + j * 3; + duplicate_idx = i * sampled_pts_num * 3 + (j % cnt) * 3; + for (k = 0; k < 3; k++) pooled_pts_flat[temp_idx + k] = pooled_pts_flat[duplicate_idx + k]; + temp_idx = i * sampled_pts_num * feature_len + j * feature_len; + duplicate_idx = i * sampled_pts_num * feature_len + (j % cnt) * feature_len; + for (k = 0; k < feature_len; k++){ + pooled_features_flat[temp_idx + k] = pooled_features_flat[duplicate_idx + k]; + } + } + } + } + return 1; +} + + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("forward", &roipool3d_gpu, "roipool3d forward (CUDA)"); +} + diff --git a/PointRCNN/lib/utils/simple_roipool3d/src/simple_roipool3d_kernel.cu b/PointRCNN/lib/utils/simple_roipool3d/src/simple_roipool3d_kernel.cu new file mode 100644 index 0000000..42f1669 --- /dev/null +++ b/PointRCNN/lib/utils/simple_roipool3d/src/simple_roipool3d_kernel.cu @@ -0,0 +1,237 @@ +/* +Point cloud feature pooling +Written by Shaoshuai Shi +All Rights Reserved 2018. +*/ + +#include +#include + +#define THREADS_PER_BLOCK 256 +#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) +// #define DEBUG + +__device__ inline int pt_in_box3d(float x, float y, float z, float cx, float bottom_y, float cz, float h, float w, + float l, float angle, float max_dis){ + float x_rot, z_rot, cosa, sina, cy; + int in_flag; + cy = bottom_y - h / 2.0; + if ((fabsf(x - cx) > max_dis) || (fabsf(y - cy) > h / 2.0) || (fabsf(z - cz) > max_dis)){ + return 0; + } + cosa = cos(angle); sina = sin(angle); + x_rot = (x - cx) * cosa + (z - cz) * (-sina); + z_rot = (x - cx) * sina + (z - cz) * cosa; + + in_flag = (x_rot >= -l / 2.0) & (x_rot <= l / 2.0) & (z_rot >= -w / 2.0) & (z_rot <= w / 2.0); + return in_flag; +} + + +__global__ void roipool3d_forward(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, + const float *xyz, const float *boxes3d, const float *pts_feature, + float *pooled_features, int *pooled_empty_flag){ + // params xyz: (B, N, 3) + // params boxes3d: (B, M, 7) + // params pts_feature: (B, N, C) + // params pooled_features: (B, M, 512, 3+C) + // params pooled_empty_flag: (B, M) + + int boxes_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (boxes_idx >= boxes_num){ + return; + } + + for (int i = 0; i < batch_size; i++){ + int cnt = 0; + for (int k = 0; k < pts_num; k++){ + int pt_offset = i * pts_num * 3 + k * 3; + int box_offset = i * boxes_num * 7 + boxes_idx * 7; + + int cur_in_flag = pt_in_box3d(xyz[pt_offset], xyz[pt_offset + 1], xyz[pt_offset + 2], boxes3d[box_offset], + boxes3d[box_offset + 1], boxes3d[box_offset + 2], boxes3d[box_offset + 3], + boxes3d[box_offset + 4], boxes3d[box_offset + 5], boxes3d[box_offset + 6], 10.0); + if (cur_in_flag){ + if (cnt < sampled_pts_num){ + int feature_out_offset = i * boxes_num * sampled_pts_num * (3 + feature_in_len) + + boxes_idx * sampled_pts_num * (3 + feature_in_len) + + cnt * (3 + feature_in_len); + + int feature_in_offset = i * pts_num * feature_in_len + k * feature_in_len; + + // copy xyz + for (int j = 0; j < 3; j++) + pooled_features[feature_out_offset + j] = xyz[pt_offset + j]; + + // copy feature + for (int j = 0; j < feature_in_len; j++) + pooled_features[feature_out_offset + 3 + j] = pts_feature[feature_in_offset + j]; + + cnt++; + } + else break; + } + } + + if (cnt == 0){ + pooled_empty_flag[i * boxes_num + boxes_idx] = 1; + } + else if (cnt < sampled_pts_num){ + // duplicate same points for sampling + for (int k = cnt; k < sampled_pts_num; k++){ + int duplicate_idx = k % cnt; + int src_offset = i * boxes_num * sampled_pts_num * (3 + feature_in_len) + + boxes_idx * sampled_pts_num * (3 + feature_in_len) + + duplicate_idx * (3 + feature_in_len); + int dst_offset = i * boxes_num * sampled_pts_num * (3 + feature_in_len) + + boxes_idx * sampled_pts_num * (3 + feature_in_len) + + k * (3 + feature_in_len); + for (int j = 0; j < 3 + feature_in_len; j++) + pooled_features[dst_offset + j] = pooled_features[src_offset + j]; + } + } + } +} + + +__global__ void assign_pts_to_box3d(int batch_size, int pts_num, int boxes_num, const float *xyz, const float *boxes3d, int *pts_assign){ + // params xyz: (B, N, 3) + // params boxes3d: (B, M, 7) + // params pts_assign: (B, N, M): idx of the corresponding box3d, -1 means background points + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + int box_idx = blockIdx.y; + int bs_idx = blockIdx.z; + + if (pt_idx >= pts_num || box_idx >= boxes_num || bs_idx >= batch_size){ + return; + } + int assign_idx = bs_idx * pts_num * boxes_num + pt_idx * boxes_num + box_idx; + pts_assign[assign_idx] = 0; + + int box_offset = bs_idx * boxes_num * 7 + box_idx * 7; + int pt_offset = bs_idx * pts_num * 3 + pt_idx * 3; + + int cur_in_flag = pt_in_box3d(xyz[pt_offset], xyz[pt_offset + 1], xyz[pt_offset + 2], boxes3d[box_offset], + boxes3d[box_offset + 1], boxes3d[box_offset + 2], boxes3d[box_offset + 3], + boxes3d[box_offset + 4], boxes3d[box_offset + 5], boxes3d[box_offset + 6], 10.0); + + pts_assign[assign_idx] = cur_in_flag; + // printf("bs=%d, pt=%d, in=%d\n", bs_idx, pt_idx, pts_assign[bs_idx * pts_num + pt_idx]); +} + + +__global__ void get_pooled_idx(int batch_size, int pts_num, int boxes_num, int sampled_pts_num, + const int *pts_assign, int *pts_idx, int *pooled_empty_flag){ + // params xyz: (B, N, 3) + // params pts_feature: (B, N, C) + // params pts_assign: (B, N) + // params pts_idx: (B, M, 512) + // params pooled_empty_flag: (B, M) + + int boxes_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (boxes_idx >= boxes_num){ + return; + } + + int bs_idx = blockIdx.y; + + int cnt = 0; + for (int k = 0; k < pts_num; k++){ + if (pts_assign[bs_idx * pts_num * boxes_num + k * boxes_num + boxes_idx]){ + if (cnt < sampled_pts_num){ + pts_idx[bs_idx * boxes_num * sampled_pts_num + boxes_idx * sampled_pts_num + cnt] = k; + cnt++; + } + else break; + } + } + + if (cnt == 0){ + pooled_empty_flag[bs_idx * boxes_num + boxes_idx] = 1; + } + else if (cnt < sampled_pts_num){ + // duplicate same points for sampling + for (int k = cnt; k < sampled_pts_num; k++){ + int duplicate_idx = k % cnt; + int base_offset = bs_idx * boxes_num * sampled_pts_num + boxes_idx * sampled_pts_num; + pts_idx[base_offset + k] = pts_idx[base_offset + duplicate_idx]; + } + } +} + + +__global__ void roipool3d_forward(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, + const float *xyz, const int *pts_idx, const float *pts_feature, + float *pooled_features, int *pooled_empty_flag){ + // params xyz: (B, N, 3) + // params pts_idx: (B, M, 512) + // params pts_feature: (B, N, C) + // params pooled_features: (B, M, 512, 3+C) + // params pooled_empty_flag: (B, M) + + int sample_pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + int box_idx = blockIdx.y; + int bs_idx = blockIdx.z; + + if (sample_pt_idx >= sampled_pts_num || box_idx >= boxes_num || bs_idx >= batch_size){ + return; + } + + if (pooled_empty_flag[bs_idx * boxes_num + box_idx]){ + return; + } + + int temp_idx = bs_idx * boxes_num * sampled_pts_num + box_idx * sampled_pts_num + sample_pt_idx; + int src_pt_idx = pts_idx[temp_idx]; + int dst_feature_offset = temp_idx * (3 + feature_in_len); + + for (int j = 0; j < 3; j++) + pooled_features[dst_feature_offset + j] = xyz[bs_idx * pts_num * 3 + src_pt_idx * 3 + j]; + + int src_feature_offset = bs_idx * pts_num * feature_in_len + src_pt_idx * feature_in_len; + for (int j = 0; j < feature_in_len; j++) + pooled_features[dst_feature_offset + 3 + j] = pts_feature[src_feature_offset + j]; +} + + +void roipool3dLauncher_slow(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, + const float *xyz, const float *boxes3d, const float *pts_feature, float *pooled_features, int *pooled_empty_flag){ + roipool3d_forward<<>>(batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, + xyz, boxes3d, pts_feature, pooled_features, pooled_empty_flag); + +#ifdef DEBUG + cudaDeviceSynchronize(); // for using printf in kernel function +#endif +} + + +void roipool3dLauncher(int batch_size, int pts_num, int boxes_num, int feature_in_len, int sampled_pts_num, + const float *xyz, const float *boxes3d, int *pts_idx, int *pooled_empty_flag){ + + // printf("batch_size=%d, pts_num=%d, boxes_num=%d\n", batch_size, pts_num, boxes_num); + int *pts_assign = NULL; + cudaMalloc(&pts_assign, batch_size * pts_num * boxes_num * sizeof(int)); // (batch_size, N, M) + // cudaMemset(&pts_assign, -1, batch_size * pts_num * boxes_num * sizeof(int)); + + dim3 blocks(DIVUP(pts_num, THREADS_PER_BLOCK), boxes_num, batch_size); // blockIdx.x(col), blockIdx.y(row) + dim3 threads(THREADS_PER_BLOCK); + assign_pts_to_box3d<<>>(batch_size, pts_num, boxes_num, xyz, boxes3d, pts_assign); + + //int *pts_idx = NULL; + //cudaMalloc(&pts_idx, batch_size * boxes_num * sampled_pts_num * sizeof(int)); // (batch_size, M, sampled_pts_num) + + dim3 blocks2(DIVUP(boxes_num, THREADS_PER_BLOCK), batch_size); // blockIdx.x(col), blockIdx.y(row) + get_pooled_idx<<>>(batch_size, pts_num, boxes_num, sampled_pts_num, pts_assign, pts_idx, pooled_empty_flag); + + //dim3 blocks_pool(DIVUP(sampled_pts_num, THREADS_PER_BLOCK), boxes_num, batch_size); + //roipool3d_forward<<>>(batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, + // xyz, pts_idx, pts_feature, pooled_features, pooled_empty_flag); + + cudaFree(pts_assign); + //cudaFree(pts_idx); + +#ifdef DEBUG + cudaDeviceSynchronize(); // for using printf in kernel function +#endif +} \ No newline at end of file