diff --git a/forge/test/models/pytorch/multimodal/trajectron/test_trajectron.py b/forge/test/models/pytorch/multimodal/trajectron/test_trajectron.py new file mode 100644 index 000000000..a13ffb3d3 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/test_trajectron.py @@ -0,0 +1,225 @@ +# # SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC + +# # SPDX-License-Identifier: Apache-2.0 + +import sys +sys.path.append("forge/test/models/pytorch/multimodal/trajectron/trajectron/") +import pytest +import forge +from test.models.pytorch.multimodal.trajectron.trajectron.model import Trajectron +from test.models.pytorch.multimodal.trajectron.trajectron.model.model_registrar import ModelRegistrar +from test.models.pytorch.multimodal.trajectron.trajectron.model.dataset import EnvironmentDataset, collate, get_timesteps_data +from forge.verify.compare import compare_with_golden +import os +import json +import dill +import torch +import torch.nn as nn +import numpy as np +from typing import Any +import torch.nn.utils.rnn as rnn +import pytest + + +def load_hyperparams(): + conf_path = "forge/test/models/pytorch/multimodal/trajectron/trajectron/config/config.json" + with open(conf_path, 'r', encoding='utf-8') as conf_json: + hyperparams = json.load(conf_json) + + # Set Default values + hyperparams['scene_freq_mult_eval'] = False + hyperparams['node_freq_mult_eval'] = False + hyperparams['edge_encoding'] = False + hyperparams['incl_robot_node'] = False + hyperparams['use_map_encoding'] = False + + hyperparams['edge_addition_filter'] = [1, 1] + hyperparams['edge_removal_filter'] = [1, 1] + + return hyperparams + +def load_env(): + eval_data_path = "forge/test/models/pytorch/multimodal/trajectron/trajectron/dataset_envs/eth_val.pkl" + with open(eval_data_path, 'rb') as f: + eval_env = dill.load(f, encoding='latin1') + return eval_env + + +class TrajectronWrapper(nn.Module): + def __init__(self, model_dir: str, hyperparams: dict[str, Any], env: Any, scene_index: int, num_samples: int = 1, z_mode: bool = True, gmm_mode: bool = True, all_z_sep: bool = False, full_dist: bool = False): + super().__init__() + + # Build Model registrar + if not os.path.exists(model_dir): + os.makedirs(model_dir, exist_ok=False) + model_config_path = model_dir + "/config.json" + if not os.path.exists(model_config_path): + with open(model_config_path, 'w') as conf_json: + json.dump(hyperparams, conf_json) + model_registrar = ModelRegistrar(model_dir, "cpu") + + # Build Trajectron Model + self.model = Trajectron(model_registrar=model_registrar, hyperparams=hyperparams, log_writer=None, device="cpu") + self.model.set_environment(env=env) + + self.model_dir = model_dir + self.hyperparams = hyperparams + self.env = env + + assert len(self.env.NodeType) == 1 + self.node_type = self.env.NodeType[0] + + self.scene_index = scene_index + self.num_samples = num_samples + self.z_mode = z_mode + self.gmm_mode = gmm_mode + self.all_z_sep = all_z_sep + self.full_dist = full_dist + + def _build_packed_sequence(self, packed_sequence_data, packed_sequence_batch_sizes, packed_sequence_sorted_indices, packed_sequence_unsorted_indices): + packed_sequence = torch.nn.utils.rnn.PackedSequence( + data=packed_sequence_data.squeeze(), + batch_sizes=packed_sequence_batch_sizes.squeeze(), + sorted_indices=packed_sequence_sorted_indices.squeeze(), + unsorted_indices=packed_sequence_unsorted_indices.squeeze(), + ) + return packed_sequence + + def forward(self, x, x_st_t, packed_sequence_data, packed_sequence_batch_sizes, packed_sequence_sorted_indices, packed_sequence_unsorted_indices, first_history_index): + neighbors_data_st = None + neighbors_edge_value = None + robot_traj_st_t = None + map = None + + ph = self.hyperparams['prediction_horizon'] + + packed_x_st_t = self._build_packed_sequence(packed_sequence_data, packed_sequence_batch_sizes, packed_sequence_sorted_indices, packed_sequence_unsorted_indices) + + model = self.model.node_models_dict[self.node_type] + predictions = model.predict( + inputs=x, + inputs_st=x_st_t, # Pack and send this + packed_inputs_st=packed_x_st_t, + first_history_indices=first_history_index, + neighbors=neighbors_data_st, + neighbors_edge_value=neighbors_edge_value, + robot=robot_traj_st_t, + map=map, + prediction_horizon=ph, + num_samples=self.num_samples, + z_mode=self.z_mode, + gmm_mode=self.gmm_mode, + full_dist=self.full_dist, + all_z_sep=self.all_z_sep + ) + + return predictions + + def eval(self): + super().eval() + self.model.eval() + + def get_input_batch(self, scene): + ph = self.hyperparams['prediction_horizon'] + timesteps = scene.sample_timesteps(1, min_future_timesteps=ph) + + min_future_timesteps = ph + min_history_timesteps = 1 + + node_type = self.node_type + assert node_type in self.model.pred_state + model = self.model.node_models_dict[node_type] + + # Get Input data for node type and given timesteps + batch = get_timesteps_data(env=self.env, scene=scene, t=timesteps, node_type=node_type, state=self.model.state, + pred_state=self.model.pred_state, edge_types=model.edge_types, + min_ht=min_history_timesteps, max_ht=self.model.max_ht, min_ft=min_future_timesteps, + max_ft=min_future_timesteps, hyperparams=self.hyperparams) + + assert batch is not None + + (first_history_index, + x_t, y_t, x_st_t, y_st_t, + neighbors_data_st, + neighbors_edge_value, + robot_traj_st_t, + map), nodes, timesteps_o = batch + + device = self.model.device + x = x_t.to(device) + x_st_t = x_st_t.to(device) + if robot_traj_st_t is not None: + robot_traj_st_t = robot_traj_st_t.to(device) + + if type(map) == torch.Tensor: + map = map.to(device) + + return (x, x_st_t, first_history_index, neighbors_data_st, neighbors_edge_value, robot_traj_st_t, map), (nodes, timesteps_o) + + +def pack_input_sequences(sequences, lower_indices = None, upper_indices = None, total_length=None): + bs, tf = sequences.shape[:2] + if lower_indices is None: + lower_indices = torch.zeros(bs, dtype=torch.int) + if upper_indices is None: + upper_indices = torch.ones(bs, dtype=torch.int) * (tf - 1) + if total_length is None: + total_length = max(upper_indices) + 1 + # This is done so that we can just pass in self.prediction_timesteps + # (which we want to INCLUDE, so this will exclude the next timestep). + inclusive_break_indices = upper_indices + 1 + + pad_list = list() + for i, seq_len in enumerate(inclusive_break_indices): + pad_list.append(sequences[i, lower_indices[i]:seq_len]) + + packed_seqs = rnn.pack_sequence(pad_list, enforce_sorted=False) + + return packed_seqs + + +def get_packed_sequence_values(packed_sequence): + values = ( + packed_sequence.data.unsqueeze(0).unsqueeze(0), + packed_sequence.batch_sizes.unsqueeze(0), + packed_sequence.sorted_indices.unsqueeze(0), + packed_sequence.unsorted_indices.unsqueeze(0), + ) + return values +@pytest.mark.nightly +@pytest.mark.model_analysis +def test_trajectronpp_pytorch(): + env = load_env() + hyperparams = load_hyperparams() + model_dir = "forge/test/models/pytorch/multimodal/trajectron/trajectron/model_dir" + + + # Build Pytorch Model + pt_model = TrajectronWrapper(model_dir=model_dir, hyperparams=hyperparams, env=env, scene_index=0) + pt_model.eval() + + scene = env.scenes[0] + inputs_batch = pt_model.get_input_batch(scene=scene) + + (x, x_st_t, first_history_index, neighbors_data_st, neighbors_edge_value, robot_traj_st_t, map), (nodes, timesteps_o) = inputs_batch + + packed_x_st_t = pack_input_sequences(x_st_t, lower_indices=first_history_index) + (packed_sequence_data, packed_sequence_batch_sizes, packed_sequence_sorted_indices, packed_sequence_unsorted_indices) = get_packed_sequence_values(packed_x_st_t) + + + + assert neighbors_data_st is None + assert neighbors_edge_value is None + assert robot_traj_st_t is None + assert map is None + # Run CPU Inference + output = pt_model(x, x_st_t, packed_sequence_data, packed_sequence_batch_sizes, packed_sequence_sorted_indices, packed_sequence_unsorted_indices, first_history_index) + inputs = [x, x_st_t, packed_sequence_data, packed_sequence_batch_sizes, packed_sequence_sorted_indices, packed_sequence_unsorted_indices, first_history_index] + compiled_model = forge.compile(pt_model,inputs) + co_out = compiled_model(*inputs) + fw_out = pt_model(*inputs) + + co_out = [co.to("cpu") for co in co_out] + fw_out = [fw_out] if isinstance(fw_out, torch.Tensor) else fw_out + + assert all([compare_with_golden(golden=fo, calculated=co, pcc=0.99) for fo, co in zip(fw_out, co_out)]) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/__init__.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/__init__.py new file mode 100644 index 000000000..638673905 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/__init__.py @@ -0,0 +1 @@ +from model import Trajectron \ No newline at end of file diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/argument_parser.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/argument_parser.py new file mode 100644 index 000000000..3eb5f047b --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/argument_parser.py @@ -0,0 +1,172 @@ +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("--conf", + help="path to json config file for hyperparameters", + type=str, + default='../config/config.json') + +parser.add_argument("--debug", + help="disable all disk writing processes.", + action='store_true') + +parser.add_argument("--preprocess_workers", + help="number of processes to spawn for preprocessing", + type=int, + default=0) + + +# Model Parameters +parser.add_argument("--offline_scene_graph", + help="whether to precompute the scene graphs offline, options are 'no' and 'yes'", + type=str, + default='yes') + +parser.add_argument("--dynamic_edges", + help="whether to use dynamic edges or not, options are 'no' and 'yes'", + type=str, + default='yes') + +parser.add_argument("--edge_state_combine_method", + help="the method to use for combining edges of the same type", + type=str, + default='sum') + +parser.add_argument("--edge_influence_combine_method", + help="the method to use for combining edge influences", + type=str, + default='attention') + +parser.add_argument('--edge_addition_filter', + nargs='+', + help="what scaling to use for edges as they're created", + type=float, + default=[0.25, 0.5, 0.75, 1.0]) # We don't automatically pad left with 0.0, if you want a sharp + # and short edge addition, then you need to have a 0.0 at the + # beginning, e.g. [0.0, 1.0]. + +parser.add_argument('--edge_removal_filter', + nargs='+', + help="what scaling to use for edges as they're removed", + type=float, + default=[1.0, 0.0]) # We don't automatically pad right with 0.0, if you want a sharp drop off like + # the default, then you need to have a 0.0 at the end. + +parser.add_argument('--override_attention_radius', + action='append', + help='Specify one attention radius to override. E.g. "PEDESTRIAN VEHICLE 10.0"', + default=[]) + +parser.add_argument('--incl_robot_node', + help="whether to include a robot node in the graph or simply model all agents", + action='store_true') + +parser.add_argument('--map_encoding', + help="Whether to use map encoding or not", + action='store_true') + +parser.add_argument('--augment', + help="Whether to augment the scene during training", + action='store_true') + +parser.add_argument('--node_freq_mult_train', + help="Whether to use frequency multiplying of nodes during training", + action='store_true') + +parser.add_argument('--node_freq_mult_eval', + help="Whether to use frequency multiplying of nodes during evaluation", + action='store_true') + +parser.add_argument('--scene_freq_mult_train', + help="Whether to use frequency multiplying of nodes during training", + action='store_true') + +parser.add_argument('--scene_freq_mult_eval', + help="Whether to use frequency multiplying of nodes during evaluation", + action='store_true') + +parser.add_argument('--scene_freq_mult_viz', + help="Whether to use frequency multiplying of nodes during evaluation", + action='store_true') + +parser.add_argument('--no_edge_encoding', + help="Whether to use neighbors edge encoding", + action='store_true') + +# Data Parameters +parser.add_argument("--data_dir", + help="what dir to look in for data", + type=str, + default='../experiments/processed') + +parser.add_argument("--train_data_dict", + help="what file to load for training data", + type=str, + default='train.pkl') + +parser.add_argument("--eval_data_dict", + help="what file to load for evaluation data", + type=str, + default='val.pkl') + +parser.add_argument("--log_dir", + help="what dir to save training information (i.e., saved models, logs, etc)", + type=str, + default='../experiments/logs') + +parser.add_argument("--log_tag", + help="tag for the log folder", + type=str, + default='') + +parser.add_argument('--device', + help='what device to perform training on', + type=str, + default='cuda:0') + +parser.add_argument("--eval_device", + help="what device to use during evaluation", + type=str, + default=None) + +# Training Parameters +parser.add_argument("--train_epochs", + help="number of iterations to train for", + type=int, + default=1) + +parser.add_argument('--batch_size', + help='training batch size', + type=int, + default=256) + +parser.add_argument('--eval_batch_size', + help='evaluation batch size', + type=int, + default=256) + +parser.add_argument('--k_eval', + help='how many samples to take during evaluation', + type=int, + default=25) + +parser.add_argument('--seed', + help='manual seed to use, default is 123', + type=int, + default=123) + +parser.add_argument('--eval_every', + help='how often to evaluate during training, never if None', + type=int, + default=1) + +parser.add_argument('--vis_every', + help='how often to visualize during training, never if None', + type=int, + default=1) + +parser.add_argument('--save_every', + help='how often to save during training, never if None', + type=int, + default=1) +args = parser.parse_args() diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/config/config.json b/forge/test/models/pytorch/multimodal/trajectron/trajectron/config/config.json new file mode 100644 index 000000000..3dc5d35d1 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/config/config.json @@ -0,0 +1,90 @@ +{ + + "batch_size": 256, + "grad_clip": 1.0, + + "learning_rate_style": "exp", + "learning_rate": 0.001, + "min_learning_rate": 0.00001, + "learning_decay_rate": 0.9999, + + "prediction_horizon": 12, + "minimum_history_length": 1, + "maximum_history_length": 8, + + "map_encoder": { + "PEDESTRIAN": { + "heading_state_index": 5, + "patch_size": [50, 10, 50, 90], + "map_channels": 3, + "hidden_channels": [10, 20, 10, 1], + "output_size": 32, + "masks": [5, 5, 5, 5], + "strides": [1, 1, 1, 1], + "dropout": 0.5 + } + }, + + "k": 1, + "k_eval": 1, + + "kl_min": 0.07, + "kl_weight": 100.0, + "kl_weight_start": 0, + "kl_decay_rate": 0.99995, + "kl_crossover": 400, + "kl_sigmoid_divisor": 4, + + "rnn_kwargs": { + "dropout_keep_prob": 0.75 + }, + "MLP_dropout_keep_prob": 0.9, + "enc_rnn_dim_edge": 32, + "enc_rnn_dim_edge_influence": 32, + "enc_rnn_dim_history": 32, + "enc_rnn_dim_future": 32, + "dec_rnn_dim": 128, + + "q_z_xy_MLP_dims": null, + "p_z_x_MLP_dims": 32, + "GMM_components": 1, + + "log_p_yt_xz_max": 6, + + "N": 1, + "K": 25, + + "tau_init": 2.0, + "tau_final": 0.05, + "tau_decay_rate": 0.997, + + "use_z_logit_clipping": true, + "z_logit_clip_start": 0.05, + "z_logit_clip_final": 5.0, + "z_logit_clip_crossover": 300, + "z_logit_clip_divisor": 5, + + "dynamic": { + "PEDESTRIAN": { + "name": "SingleIntegrator", + "distribution": true, + "limits": {} + } + }, + + "state": { + "PEDESTRIAN": { + "position": ["x", "y"], + "velocity": ["x", "y"], + "acceleration": ["x", "y"] + } + }, + + "pred_state": { + "PEDESTRIAN": { + "position": ["x", "y"] + } + }, + + "log_histograms": false +} \ No newline at end of file diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/config/nuScenes.json b/forge/test/models/pytorch/multimodal/trajectron/trajectron/config/nuScenes.json new file mode 100644 index 000000000..919ea6be1 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/config/nuScenes.json @@ -0,0 +1,109 @@ +{ + + "batch_size": 256, + "grad_clip": 1.0, + + "learning_rate_style": "exp", + "learning_rate": 0.003, + "min_learning_rate": 0.00001, + "learning_decay_rate": 0.9999, + + "prediction_horizon": 6, + "minimum_history_length": 1, + "maximum_history_length": 8, + + "map_encoder": { + "VEHICLE": { + "heading_state_index": 6, + "patch_size": [50, 10, 50, 90], + "map_channels": 3, + "hidden_channels": [10, 20, 10, 1], + "output_size": 32, + "masks": [5, 5, 5, 3], + "strides": [2, 2, 1, 1], + "dropout": 0.5 + } + }, + + "k": 1, + "k_eval": 1, + + "kl_min": 0.07, + "kl_weight": 100.0, + "kl_weight_start": 0, + "kl_decay_rate": 0.99995, + "kl_crossover": 400, + "kl_sigmoid_divisor": 4, + + "rnn_kwargs": { + "dropout_keep_prob": 0.75 + }, + "MLP_dropout_keep_prob": 0.9, + "enc_rnn_dim_edge": 32, + "enc_rnn_dim_edge_influence": 32, + "enc_rnn_dim_history": 32, + "enc_rnn_dim_future": 32, + "dec_rnn_dim": 128, + + "q_z_xy_MLP_dims": null, + "p_z_x_MLP_dims": 32, + "GMM_components": 1, + + "log_p_yt_xz_max": 6, + + "N": 1, + "K": 25, + + "tau_init": 2.0, + "tau_final": 0.05, + "tau_decay_rate": 0.997, + + "use_z_logit_clipping": true, + "z_logit_clip_start": 0.05, + "z_logit_clip_final": 5.0, + "z_logit_clip_crossover": 300, + "z_logit_clip_divisor": 5, + + "dynamic": { + "PEDESTRIAN": { + "name": "SingleIntegrator", + "distribution": true, + "limits": {} + }, + "VEHICLE": { + "name": "Unicycle", + "distribution": true, + "limits": { + "max_a": 4, + "min_a": -5, + "max_heading_change": 0.7, + "min_heading_change": -0.7 + } + } + }, + + "state": { + "PEDESTRIAN": { + "position": ["x", "y"], + "velocity": ["x", "y"], + "acceleration": ["x", "y"] + }, + "VEHICLE": { + "position": ["x", "y"], + "velocity": ["x", "y"], + "acceleration": ["x", "y"], + "heading": ["°", "d°"] + } + }, + + "pred_state": { + "VEHICLE": { + "position": ["x", "y"] + }, + "PEDESTRIAN": { + "position": ["x", "y"] + } + }, + + "log_histograms": false +} \ No newline at end of file diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/dataset_envs/eth_val.pkl b/forge/test/models/pytorch/multimodal/trajectron/trajectron/dataset_envs/eth_val.pkl new file mode 100644 index 000000000..8afc0d7c0 Binary files /dev/null and b/forge/test/models/pytorch/multimodal/trajectron/trajectron/dataset_envs/eth_val.pkl differ diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/__init__.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/__init__.py new file mode 100644 index 000000000..9ad06818f --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/__init__.py @@ -0,0 +1,8 @@ +from .data_structures import RingBuffer, SingleHeaderNumpyArray, DoubleHeaderNumpyArray +from .scene import Scene +from .node import Node +from .scene_graph import TemporalSceneGraph, SceneGraph +from .environment import Environment +from .node_type import NodeTypeEnum +from .data_utils import derivative_of +from .map import GeometricMap diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/data_structures.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/data_structures.py new file mode 100644 index 000000000..d16a9ea19 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/data_structures.py @@ -0,0 +1,277 @@ +import numpy as np +import pandas as pd +from collections.abc import Sequence +from collections import OrderedDict + + +class RingBuffer(Sequence): + def __init__(self, capacity, dtype=float, allow_overwrite=True): + """ + Create a new ring buffer with the given capacity and element type. + Code copy-pasted from: https://github.com/eric-wieser/numpy_ringbuffer + + Parameters + ---------- + capacity: int + The maximum capacity of the ring buffer + dtype: data-type, optional + Desired type of buffer elements. Use a type like (float, 2) to + produce a buffer with shape (N, 2) + allow_overwrite: bool + If false, throw an IndexError when trying to append to an already + full buffer + """ + self._arr = np.full(capacity, np.nan, dtype) + self._left_index = 0 + self._right_index = 0 + self._capacity = capacity + self._allow_overwrite = allow_overwrite + + def _unwrap(self): + """ Copy the data from this buffer into unwrapped form """ + return np.concatenate(( + self._arr[self._left_index:min(self._right_index, self._capacity)], + self._arr[:max(self._right_index - self._capacity, 0)] + )) + + def _fix_indices(self): + """ + Enforce our invariant that 0 <= self._left_index < self._capacity + """ + if self._left_index >= self._capacity: + self._left_index -= self._capacity + self._right_index -= self._capacity + elif self._left_index < 0: + self._left_index += self._capacity + self._right_index += self._capacity + + @property + def is_full(self): + """ True if there is no more space in the buffer """ + return len(self) == self._capacity + + # numpy compatibility + def __array__(self): + return self._unwrap() + + @property + def dtype(self): + return self._arr.dtype + + @property + def shape(self): + return (len(self),) + self._arr.shape[1:] + + # these mirror methods from deque + @property + def maxlen(self): + return self._capacity + + def append(self, value): + if self.is_full: + if not self._allow_overwrite: + raise IndexError('append to a full RingBuffer with overwrite disabled') + elif not len(self): + return + else: + self._left_index += 1 + + self._arr[self._right_index % self._capacity] = value + self._right_index += 1 + self._fix_indices() + + def appendleft(self, value): + if self.is_full: + if not self._allow_overwrite: + raise IndexError('append to a full RingBuffer with overwrite disabled') + elif not len(self): + return + else: + self._right_index -= 1 + + self._left_index -= 1 + self._fix_indices() + self._arr[self._left_index] = value + + def pop(self): + if len(self) == 0: + raise IndexError("pop from an empty RingBuffer") + self._right_index -= 1 + self._fix_indices() + res = self._arr[self._right_index % self._capacity] + return res + + def popleft(self): + if len(self) == 0: + raise IndexError("pop from an empty RingBuffer") + res = self._arr[self._left_index] + self._left_index += 1 + self._fix_indices() + return res + + def extend(self, values): + lv = len(values) + if len(self) + lv > self._capacity: + if not self._allow_overwrite: + raise IndexError('extend a RingBuffer such that it would overflow, with overwrite disabled') + elif not len(self): + return + if lv >= self._capacity: + # wipe the entire array! - this may not be threadsafe + self._arr[...] = values[-self._capacity:] + self._right_index = self._capacity + self._left_index = 0 + return + + ri = self._right_index % self._capacity + sl1 = np.s_[ri:min(ri + lv, self._capacity)] + sl2 = np.s_[:max(ri + lv - self._capacity, 0)] + self._arr[sl1] = values[:sl1.stop - sl1.start] + self._arr[sl2] = values[sl1.stop - sl1.start:] + self._right_index += lv + + self._left_index = max(self._left_index, self._right_index - self._capacity) + self._fix_indices() + + def extendleft(self, values): + lv = len(values) + if len(self) + lv > self._capacity: + if not self._allow_overwrite: + raise IndexError('extend a RingBuffer such that it would overflow, with overwrite disabled') + elif not len(self): + return + if lv >= self._capacity: + # wipe the entire array! - this may not be threadsafe + self._arr[...] = values[:self._capacity] + self._right_index = self._capacity + self._left_index = 0 + return + + self._left_index -= lv + self._fix_indices() + li = self._left_index + sl1 = np.s_[li:min(li + lv, self._capacity)] + sl2 = np.s_[:max(li + lv - self._capacity, 0)] + self._arr[sl1] = values[:sl1.stop - sl1.start] + self._arr[sl2] = values[sl1.stop - sl1.start:] + + self._right_index = min(self._right_index, self._left_index + self._capacity) + + # implement Sequence methods + def __len__(self): + return self._right_index - self._left_index + + def __getitem__(self, item): + # handle simple (b[1]) and basic (b[np.array([1, 2, 3])]) fancy indexing specially + if not isinstance(item, tuple): + item_arr = np.asarray(item) + if issubclass(item_arr.dtype.type, np.integer): + item_arr = (item_arr + self._left_index) % self._capacity + return self._arr[item_arr] + + # for everything else, get it right at the expense of efficiency + return self._unwrap()[item] + + def __iter__(self): + # alarmingly, this is comparable in speed to using itertools.chain + return iter(self._unwrap()) + + # Everything else + def __repr__(self): + return ''.format(np.asarray(self)) + + +class DoubleHeaderNumpyArray(object): + def __init__(self, data: np.ndarray, header: list): + """ + Data Structure mirroring some functionality of double indexed pandas DataFrames. + Indexing options are: + [:, (header1, header2)] + [:, [(header1, header2), (header1, header2)]] + [:, {header1: [header21, header22]}] + + A SingleHeaderNumpyArray can is returned if an element of the first header is querried as attribut: + doubleHeaderNumpyArray.position -> SingleHeaderNumpyArray + + :param data: The numpy array. + :param header: The double header structure as list of tuples [(header11, header21), (header11, header22) ...] + """ + self.data = data + self.header = header + self.double_header_lookup = OrderedDict() + self.tree_header_lookup = OrderedDict() + for i, header_item in enumerate(header): + self.double_header_lookup[header_item] = i + if header_item[0] not in self.tree_header_lookup: + self.tree_header_lookup[header_item[0]] = dict() + self.tree_header_lookup[header_item[0]][header_item[1]] = i + + def __mul__(self, other): + return DoubleHeaderNumpyArray(self.data * other, self.header) + + def get_single_header_array(self, h1: str, rows=slice(None, None, None)): + data_integer_indices = list() + h2_list = list() + for h2 in self.tree_header_lookup[h1]: + data_integer_indices.append(self.tree_header_lookup[h1][h2]) + h2_list.append(h2) + return SingleHeaderNumpyArray(self.data[rows, data_integer_indices], h2_list) + + def __getitem__(self, item): + rows, columns = item + data_integer_indices = list() + if type(columns) is dict: + for h1, h2s in columns.items(): + for h2 in h2s: + data_integer_indices.append(self.double_header_lookup[(h1, h2)]) + return self.data[rows, data_integer_indices] + elif type(columns) is list: + for column in columns: + assert type(column) is tuple, "If Index is list it hast to be list of double header tuples." + data_integer_indices.append(self.double_header_lookup[column]) + return self.data[rows, data_integer_indices] + elif type(columns) is tuple: + return self.data[rows, self.double_header_lookup[columns]] + else: + assert type(item) is str, "Index must be str, list of tuples or dict of tree structure." + return self.get_single_header_array(item, rows=rows) + + def __getattr__(self, item): + if not item.startswith('_'): + if item in self.tree_header_lookup.keys(): + return self.get_single_header_array(item) + else: + try: + return self.data.__getattribute__(item) + except AttributeError: + return super().__getattribute__(item) + else: + return super().__getattribute__(item) + + +class SingleHeaderNumpyArray(object): + def __init__(self, data: np.ndarray, header: list): + self.data = data + self.header_lookup = OrderedDict({h: i for i, h in enumerate(header)}) + + def __getitem__(self, item): + rows, columns = item + data_integer_indices = list() + if type(columns) is list or type(columns) is tuple: + for column in columns: + data_integer_indices.append(self.header_lookup[column]) + else: + data_integer_indices = self.header_lookup[columns] + return self.data[rows, data_integer_indices] + + def __getattr__(self, item): + if not item.startswith('_'): + if item in self.header_lookup.keys(): + return self[:, item] + else: + try: + return self.data.__getattribute__(item) + except AttributeError: + return super().__getattribute__(item) + else: + return super().__getattribute__(item) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/data_utils.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/data_utils.py new file mode 100644 index 000000000..72c7ec86b --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/data_utils.py @@ -0,0 +1,33 @@ +import numpy as np + + +def make_continuous_copy(alpha): + alpha = (alpha + np.pi) % (2.0 * np.pi) - np.pi + continuous_x = np.zeros_like(alpha) + continuous_x[0] = alpha[0] + for i in range(1, len(alpha)): + if not (np.sign(alpha[i]) == np.sign(alpha[i - 1])) and np.abs(alpha[i]) > np.pi / 2: + continuous_x[i] = continuous_x[i - 1] + ( + alpha[i] - alpha[i - 1]) - np.sign( + (alpha[i] - alpha[i - 1])) * 2 * np.pi + else: + continuous_x[i] = continuous_x[i - 1] + (alpha[i] - alpha[i - 1]) + + return continuous_x + + +def derivative_of(x, dt=1, radian=False): + if radian: + x = make_continuous_copy(x) + + not_nan_mask = ~np.isnan(x) + masked_x = x[not_nan_mask] + + if masked_x.shape[-1] < 2: + return np.zeros_like(x) + + dx = np.full_like(x, np.nan) + dx[not_nan_mask] = np.ediff1d(masked_x, to_begin=(masked_x[1] - masked_x[0])) / dt + + return dx + diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/environment.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/environment.py new file mode 100644 index 000000000..24ebb679f --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/environment.py @@ -0,0 +1,64 @@ +import json +import numpy as np +from itertools import product +from .node_type import NodeTypeEnum + + +class Environment(object): + def __init__(self, node_type_list, standardization, scenes=None, attention_radius=None, robot_type=None): + self.scenes = scenes + self.node_type_list = node_type_list + self.attention_radius = attention_radius + self.NodeType = NodeTypeEnum(node_type_list) + self.robot_type = robot_type + + self.standardization = standardization + self.standardize_param_memo = dict() + + self._scenes_resample_prop = None + + def get_edge_types(self): + return list(product(self.NodeType, repeat=2)) + + def get_standardize_params(self, state, node_type): + memo_key = (json.dumps(state), node_type) + if memo_key in self.standardize_param_memo: + return self.standardize_param_memo[memo_key] + + standardize_mean_list = list() + standardize_std_list = list() + for entity, dims in state.items(): + for dim in dims: + standardize_mean_list.append(self.standardization[node_type][entity][dim]['mean']) + standardize_std_list.append(self.standardization[node_type][entity][dim]['std']) + standardize_mean = np.stack(standardize_mean_list) + standardize_std = np.stack(standardize_std_list) + + self.standardize_param_memo[memo_key] = (standardize_mean, standardize_std) + return standardize_mean, standardize_std + + def standardize(self, array, state, node_type, mean=None, std=None): + if mean is None and std is None: + mean, std = self.get_standardize_params(state, node_type) + elif mean is None and std is not None: + mean, _ = self.get_standardize_params(state, node_type) + elif mean is not None and std is None: + _, std = self.get_standardize_params(state, node_type) + return np.where(np.isnan(array), np.array(np.nan), (array - mean) / std) + + def unstandardize(self, array, state, node_type, mean=None, std=None): + if mean is None and std is None: + mean, std = self.get_standardize_params(state, node_type) + elif mean is None and std is not None: + mean, _ = self.get_standardize_params(state, node_type) + elif mean is not None and std is None: + _, std = self.get_standardize_params(state, node_type) + return array * std + mean + + @property + def scenes_resample_prop(self): + if self._scenes_resample_prop is None: + self._scenes_resample_prop = np.array([scene.resample_prob for scene in self.scenes]) + self._scenes_resample_prop = self._scenes_resample_prop / np.sum(self._scenes_resample_prop) + return self._scenes_resample_prop + diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/map.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/map.py new file mode 100644 index 000000000..d1b527d51 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/map.py @@ -0,0 +1,185 @@ +import torch +import numpy as np +from model.dataset.homography_warper import get_rotation_matrix2d, warp_affine_crop + + +class Map(object): + def __init__(self, data, homography, description=None): + self.data = data + self.homography = homography + self.description = description + + def as_image(self): + raise NotImplementedError + + def get_cropped_maps(self, world_pts, patch_size, rotation=None, device='cpu'): + raise NotImplementedError + + def to_map_points(self, scene_pts): + raise NotImplementedError + + +class GeometricMap(Map): + """ + A Geometric Map is a int tensor of shape [layers, x, y]. The homography must transform a point in scene + coordinates to the respective point in map coordinates. + + :param data: Numpy array of shape [layers, x, y] + :param homography: Numpy array of shape [3, 3] + """ + def __init__(self, data, homography, description=None): + #assert isinstance(data.dtype, np.floating), "Geometric Maps must be float values." + super(GeometricMap, self).__init__(data, homography, description=description) + + self._last_padding = None + self._last_padded_map = None + self._torch_map = None + + def torch_map(self, device): + if self._torch_map is not None: + return self._torch_map + self._torch_map = torch.tensor(self.data, dtype=torch.uint8, device=device) + return self._torch_map + + def as_image(self): + # We have to transpose x and y to rows and columns. Assumes origin is lower left for image + # Also we move the channels to the last dimension + return (np.transpose(self.data, (2, 1, 0))).astype(np.uint) + + def get_padded_map(self, padding_x, padding_y, device): + if self._last_padding == (padding_x, padding_y): + return self._last_padded_map + else: + self._last_padding = (padding_x, padding_y) + self._last_padded_map = torch.full((self.data.shape[0], + self.data.shape[1] + 2 * padding_x, + self.data.shape[2] + 2 * padding_y), + False, dtype=torch.uint8) + self._last_padded_map[..., padding_x:-padding_x, padding_y:-padding_y] = self.torch_map(device) + return self._last_padded_map + + @staticmethod + def batch_rotate(map_batched, centers, angles, out_height, out_width): + """ + As the input is a map and the warp_affine works on an image coordinate system we would have to + flip the y axis updown, negate the angles, and flip it back after transformation. + This, however, is the same as not flipping at and not negating the radian. + + :param map_batched: + :param centers: + :param angles: + :param out_height: + :param out_width: + :return: + """ + M = get_rotation_matrix2d(centers, angles, torch.ones_like(angles)) + rotated_map_batched = warp_affine_crop(map_batched, centers, M, + dsize=(out_height, out_width), padding_mode='zeros') + + return rotated_map_batched + + @classmethod + def get_cropped_maps_from_scene_map_batch(cls, maps, scene_pts, patch_size, rotation=None, device='cpu'): + """ + Returns rotated patches of each map around the transformed scene points. + ___________________ + | | | + | |ps[3] | + | | | + | | | + | o|__________| + | | ps[2] | + | | | + |_______|__________| + ps = patch_size + + :param maps: List of GeometricMap objects [bs] + :param scene_pts: Scene points: [bs, 2] + :param patch_size: Extracted Patch size after rotation: [-x, -y, +x, +y] + :param rotation: Rotations in degrees: [bs] + :param device: Device on which the rotated tensors should be returned. + :return: Rotated and cropped tensor patches. + """ + batch_size = scene_pts.shape[0] + lat_size = 2 * np.max((patch_size[0], patch_size[2])) + long_size = 2 * np.max((patch_size[1], patch_size[3])) + assert lat_size % 2 == 0, "Patch width must be divisible by 2" + assert long_size % 2 == 0, "Patch length must be divisible by 2" + lat_size_half = lat_size // 2 + long_size_half = long_size // 2 + + context_padding_x = int(np.ceil(np.sqrt(2) * lat_size)) + context_padding_y = int(np.ceil(np.sqrt(2) * long_size)) + + centers = torch.tensor([s_map.to_map_points(scene_pts[np.newaxis, i]) for i, s_map in enumerate(maps)], + dtype=torch.long, device=device).squeeze(dim=1) \ + + torch.tensor([context_padding_x, context_padding_y], device=device, dtype=torch.long) + + padded_map = [s_map.get_padded_map(context_padding_x, context_padding_y, device=device) for s_map in maps] + + padded_map_batched = torch.stack([padded_map[i][..., + centers[i, 0] - context_padding_x: centers[i, 0] + context_padding_x, + centers[i, 1] - context_padding_y: centers[i, 1] + context_padding_y] + for i in range(centers.shape[0])], dim=0) + + center_patches = torch.tensor([[context_padding_y, context_padding_x]], + dtype=torch.int, + device=device).repeat(batch_size, 1) + + if rotation is not None: + angles = torch.Tensor(rotation) + else: + angles = torch.zeros(batch_size) + + rotated_map_batched = cls.batch_rotate(padded_map_batched/255., + center_patches.float(), + angles, + long_size, + lat_size) + + del padded_map_batched + + return rotated_map_batched[..., + long_size_half - patch_size[1]:(long_size_half + patch_size[3]), + lat_size_half - patch_size[0]:(lat_size_half + patch_size[2])] + + def get_cropped_maps(self, scene_pts, patch_size, rotation=None, device='cpu'): + """ + Returns rotated patches of the map around the transformed scene points. + ___________________ + | | | + | |ps[3] | + | | | + | | | + | o|__________| + | | ps[2] | + | | | + |_______|__________| + ps = patch_size + + :param scene_pts: Scene points: [bs, 2] + :param patch_size: Extracted Patch size after rotation: [-lat, -long, +lat, +long] + :param rotation: Rotations in degrees: [bs] + :param device: Device on which the rotated tensors should be returned. + :return: Rotated and cropped tensor patches. + """ + return self.get_cropped_maps_from_scene_map_batch([self]*scene_pts.shape[0], scene_pts, + patch_size, rotation=rotation, device=device) + + def to_map_points(self, scene_pts): + org_shape = None + if len(scene_pts.shape) > 2: + org_shape = scene_pts.shape + scene_pts = scene_pts.reshape((-1, 2)) + N, dims = scene_pts.shape + points_with_one = np.ones((dims + 1, N)) + points_with_one[:dims] = scene_pts.T + map_points = (self.homography @ points_with_one).T[..., :dims] + if org_shape is not None: + map_points = map_points.reshape(org_shape) + return map_points + + +class ImageMap(Map): # TODO Implement for image maps -> watch flipped coordinate system + def __init__(self): + raise NotImplementedError \ No newline at end of file diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/node.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/node.py new file mode 100644 index 000000000..a160412f1 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/node.py @@ -0,0 +1,240 @@ +import random +import numpy as np +import pandas as pd +from .data_structures import DoubleHeaderNumpyArray +# from ncls import NCLS + + +class Node(object): + def __init__(self, node_type, node_id, data, length=None, width=None, height=None, first_timestep=0, + is_robot=False, description="", frequency_multiplier=1, non_aug_node=None): + self.type = node_type + self.id = node_id + self.length = length + self.width = width + self.height = height + self.first_timestep = first_timestep + self.non_aug_node = non_aug_node + + if data is not None: + if isinstance(data, pd.DataFrame): + self.data = DoubleHeaderNumpyArray(data.values, list(data.columns)) + elif isinstance(data, DoubleHeaderNumpyArray): + self.data = data + else: + self.data = None + + self.is_robot = is_robot + self._last_timestep = None + self.description = description + self.frequency_multiplier = frequency_multiplier + + self.forward_in_time_on_next_override = False + + def __eq__(self, other): + return ((isinstance(other, self.__class__) + or isinstance(self, other.__class__)) + and self.id == other.id + and self.type == other.type) + + def __ne__(self, other): + return not self.__eq__(other) + + def __hash__(self): + return hash((self.type, self.id)) + + def __repr__(self): + return '/'.join([self.type.name, self.id]) + + def overwrite_data(self, data, header, forward_in_time_on_next_overwrite=False): + """ + This function hard overwrites the data matrix. When using it you have to make sure that the columns + in the new data matrix correspond to the old structure. As well as setting first_timestep. + + :param data: New data matrix + :param forward_in_time_on_next_overwrite: On the !!NEXT!! call of overwrite_data first_timestep will be increased. + :return: None + """ + if header is None: + self.data.data = data + else: + self.data = DoubleHeaderNumpyArray(data, header) + + self._last_timestep = None + if self.forward_in_time_on_next_override: + self.first_timestep += 1 + self.forward_in_time_on_next_override = forward_in_time_on_next_overwrite + + def scene_ts_to_node_ts(self, scene_ts) -> (np.ndarray, int, int): + """ + Transforms timestamp from scene into timeframe of node data. + + :param scene_ts: Scene timesteps + :return: ts: Transformed timesteps, paddingl: Number of timesteps in scene range which are not available in + node data before data is available. paddingu: Number of timesteps in scene range which are not + available in node data after data is available. + """ + paddingl = (self.first_timestep - scene_ts[0]).clip(0) + paddingu = (scene_ts[1] - self.last_timestep).clip(0) + ts = np.array(scene_ts).clip(min=self.first_timestep, max=self.last_timestep) - self.first_timestep + return ts, paddingl, paddingu + + def history_points_at(self, ts) -> int: + """ + Number of history points in trajectory. Timestep is exclusive. + + :param ts: Scene timestep where the number of history points are queried. + :return: Number of history timesteps. + """ + return ts - self.first_timestep + + def get(self, tr_scene, state, padding=np.nan) -> np.ndarray: + """ + Returns a time range of multiple properties of the node. + + :param tr_scene: The timestep range (inklusive). + :param state: The state description for which the properties are returned. + :param padding: The value which should be used for padding if not enough information is available. + :return: Array of node property values. + """ + if tr_scene.size == 1: + tr_scene = np.array([tr_scene[0], tr_scene[0]]) + length = tr_scene[1] - tr_scene[0] + 1 # tr is inclusive + tr, paddingl, paddingu = self.scene_ts_to_node_ts(tr_scene) + data_array = self.data[tr[0]:tr[1] + 1, state] + padded_data_array = np.full((length, data_array.shape[1]), fill_value=padding) + padded_data_array[paddingl:length - paddingu] = data_array + return padded_data_array + + @property + def timesteps(self) -> int: + """ + Number of available timesteps for node. + + :return: Number of available timesteps. + """ + return self.data.shape[0] + + @property + def last_timestep(self) -> int: + """ + Nodes last timestep in the Scene. + + :return: Nodes last timestep. + """ + if self._last_timestep is None: + self._last_timestep = self.first_timestep + self.timesteps - 1 + return self._last_timestep + + +class MultiNode(Node): + def __init__(self, node_type, node_id, nodes_list, is_robot=False): + super(MultiNode, self).__init__(node_type, node_id, data=None, is_robot=is_robot) + self.nodes_list = nodes_list + for node in self.nodes_list: + node.is_robot = is_robot + + self.first_timestep = min(node.first_timestep for node in self.nodes_list) + self._last_timestep = max(node.last_timestep for node in self.nodes_list) + + starts = np.array([node.first_timestep for node in self.nodes_list], dtype=np.int64) + ends = np.array([node.last_timestep for node in self.nodes_list], dtype=np.int64) + ids = np.arange(len(self.nodes_list), dtype=np.int64) + self.interval_tree = NCLS(starts, ends, ids) + + @staticmethod + def find_non_overlapping_nodes(nodes_list, min_timesteps=1) -> list: + """ + Greedily finds a set of non-overlapping nodes in the provided scene. + + :return: A list of non-overlapping nodes. + """ + non_overlapping_nodes = list() + nodes = sorted(nodes_list, key=lambda n: n.last_timestep) + current_time = 0 + for node in nodes: + if node.first_timestep >= current_time and node.timesteps >= min_timesteps: + # Include the node + non_overlapping_nodes.append(node) + current_time = node.last_timestep + + return non_overlapping_nodes + + def get_node_at_timesteps(self, scene_ts) -> Node: + possible_node_ranges = list(self.interval_tree.find_overlap(scene_ts[0], scene_ts[1] + 1)) + if not possible_node_ranges: + return Node(node_type=self.type, + node_id='EMPTY', + data=self.nodes_list[0].data * np.nan, + is_robot=self.is_robot) + + node_idx = random.choice(possible_node_ranges)[2] + return self.nodes_list[node_idx] + + def scene_ts_to_node_ts(self, scene_ts) -> (Node, np.ndarray, int, int): + """ + Transforms timestamp from scene into timeframe of node data. + + :param scene_ts: Scene timesteps + :return: ts: Transformed timesteps, paddingl: Number of timesteps in scene range which are not available in + node data before data is available. paddingu: Number of timesteps in scene range which are not + available in node data after data is available. + """ + possible_node_ranges = list(self.interval_tree.find_overlap(scene_ts[0], scene_ts[1] + 1)) + if not possible_node_ranges: + return None, None, None, None + + node_idx = random.choice(possible_node_ranges)[2] + node = self.nodes_list[node_idx] + + paddingl = (node.first_timestep - scene_ts[0]).clip(0) + paddingu = (scene_ts[1] - node.last_timestep).clip(0) + ts = np.array(scene_ts).clip(min=node.first_timestep, max=node.last_timestep) - node.first_timestep + return node, ts, paddingl, paddingu + + def get(self, tr_scene, state, padding=np.nan) -> np.ndarray: + if tr_scene.size == 1: + tr_scene = np.array([tr_scene, tr_scene]) + length = tr_scene[1] - tr_scene[0] + 1 # tr is inclusive + + node, tr, paddingl, paddingu = self.scene_ts_to_node_ts(tr_scene) + if node is None: + state_length = sum([len(entity_dims) for entity_dims in state.values()]) + return np.full((length, state_length), fill_value=padding) + + data_array = node.data[tr[0]:tr[1] + 1, state] + padded_data_array = np.full((length, data_array.shape[1]), fill_value=padding) + padded_data_array[paddingl:length - paddingu] = data_array + return padded_data_array + + def get_all(self, tr_scene, state, padding=np.nan) -> np.ndarray: + # Assumption here is that the user is asking for all of the data in this MultiNode and to return it within a + # full scene-sized output array. + assert tr_scene.size == 2 and tr_scene[0] == 0 and self.last_timestep <= tr_scene[1] + length = tr_scene[1] - tr_scene[0] + 1 # tr is inclusive + state_length = sum([len(entity_dims) for entity_dims in state.values()]) + padded_data_array = np.full((length, state_length), fill_value=padding) + for node in self.nodes_list: + padded_data_array[node.first_timestep:node.last_timestep + 1] = node.data[:, state] + + return padded_data_array + + def history_points_at(self, ts) -> int: + """ + Number of history points in trajectory. Timestep is exclusive. + + :param ts: Scene timestep where the number of history points are queried. + :return: Number of history timesteps. + """ + node_idx = next(self.interval_tree.find_overlap(ts, ts + 1))[2] + node = self.nodes_list[node_idx] + return ts - node.first_timestep + + @property + def timesteps(self) -> int: + """ + Number of available timesteps for node. + + :return: Number of available timesteps. + """ + return self._last_timestep - self.first_timestep + 1 diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/node_type.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/node_type.py new file mode 100644 index 000000000..a44917a22 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/node_type.py @@ -0,0 +1,35 @@ +class NodeType(object): + def __init__(self, name, value): + self.name = name + self.value = value + + def __repr__(self): + return self.name + + def __eq__(self, other): + if type(other) == str and self.name == other: + return True + else: + return isinstance(other, self.__class__) and self.name == other.name + + def __ne__(self, other): + return not self.__eq__(other) + + def __hash__(self): + return hash(self.name) + + def __add__(self, other): + return self.name + other + + +class NodeTypeEnum(list): + def __init__(self, node_type_list): + self.node_type_list = node_type_list + node_types = [NodeType(name, node_type_list.index(name) + 1) for name in node_type_list] + super().__init__(node_types) + + def __getattr__(self, name): + if not name.startswith('_') and name in object.__getattribute__(self, "node_type_list"): + return self[object.__getattribute__(self, "node_type_list").index(name)] + else: + return object.__getattribute__(self, name) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/scene.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/scene.py new file mode 100644 index 000000000..299148278 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/scene.py @@ -0,0 +1,219 @@ +import copy +import numpy as np +from .scene_graph import TemporalSceneGraph, SceneGraph +from .node import MultiNode + + +class Scene(object): + def __init__(self, timesteps, map=None, dt=1, name="", frequency_multiplier=1, aug_func=None, non_aug_scene=None): + self.map = map + self.timesteps = timesteps + self.dt = dt + self.name = name + + self.nodes = [] + + self.robot = None + + self.temporal_scene_graph = None + + self.frequency_multiplier = frequency_multiplier + + self.description = "" + + self.aug_func = aug_func + self.non_aug_scene = non_aug_scene + + def add_robot_from_nodes(self, robot_type): + scenes = [self] + if hasattr(self, 'augmented'): + scenes += self.augmented + + for scn in scenes: + nodes_list = [node for node in scn.nodes if node.type == robot_type] + non_overlapping_nodes = MultiNode.find_non_overlapping_nodes(nodes_list, min_timesteps=3) + scn.robot = MultiNode(robot_type, 'ROBOT', non_overlapping_nodes, is_robot=True) + + for node in non_overlapping_nodes: + scn.nodes.remove(node) + scn.nodes.append(scn.robot) + + def get_clipped_input_dict(self, timestep, state): + input_dict = dict() + existing_nodes = self.get_nodes_clipped_at_time(timesteps=np.array([timestep]), + state=state) + tr_scene = np.array([timestep, timestep]) + for node in existing_nodes: + input_dict[node] = node.get(tr_scene, state[node.type]) + + return input_dict + + def get_scene_graph(self, + timestep, + attention_radius=None, + edge_addition_filter=None, + edge_removal_filter=None) -> SceneGraph: + """ + Returns the Scene Graph for a given timestep. If the Temporal Scene Graph was pre calculated, + the temporal scene graph is sliced. Otherwise the scene graph is calculated on the spot. + + :param timestep: Timestep for which the scene graph is returned. + :param attention_radius: Attention radius for each node type permutation. (Only online) + :param edge_addition_filter: Filter for adding edges (Only online) + :param edge_removal_filter: Filter for removing edges (Only online) + :return: Scene Graph for given timestep. + """ + if self.temporal_scene_graph is None: + timestep_range = np.array([timestep - len(edge_removal_filter), timestep]) + node_pos_dict = dict() + present_nodes = self.present_nodes(np.array([timestep])) + + for node in present_nodes[timestep]: + node_pos_dict[node] = np.squeeze(node.get(timestep_range, {'position': ['x', 'y']})) + tsg = TemporalSceneGraph.create_from_temp_scene_dict(node_pos_dict, + attention_radius, + duration=(len(edge_removal_filter) + 1), + edge_addition_filter=edge_addition_filter, + edge_removal_filter=edge_removal_filter + ) + + return tsg.to_scene_graph(t=len(edge_removal_filter), + t_hist=len(edge_removal_filter), + t_fut=len(edge_addition_filter)) + else: + return self.temporal_scene_graph.to_scene_graph(timestep, + len(edge_removal_filter), + len(edge_addition_filter)) + + def calculate_scene_graph(self, + attention_radius, + edge_addition_filter=None, + edge_removal_filter=None) -> None: + """ + Calculate the Temporal Scene Graph for the entire Scene. + + :param attention_radius: Attention radius for each node type permutation. + :param edge_addition_filter: Filter for adding edges. + :param edge_removal_filter: Filter for removing edges. + :return: None + """ + timestep_range = np.array([0, self.timesteps-1]) + node_pos_dict = dict() + + for node in self.nodes: + if type(node) is MultiNode: + node_pos_dict[node] = np.squeeze(node.get_all(timestep_range, {'position': ['x', 'y']})) + else: + node_pos_dict[node] = np.squeeze(node.get(timestep_range, {'position': ['x', 'y']})) + + self.temporal_scene_graph = TemporalSceneGraph.create_from_temp_scene_dict(node_pos_dict, + attention_radius, + duration=self.timesteps, + edge_addition_filter=edge_addition_filter, + edge_removal_filter=edge_removal_filter) + + def duration(self): + """ + Calculates the duration of the scene. + + :return: Duration of the scene in s. + """ + return self.timesteps * self.dt + + def present_nodes(self, + timesteps, + type=None, + min_history_timesteps=0, + min_future_timesteps=0, + return_robot=True) -> dict: + """ + Finds all present nodes in the scene at a given timestemp + + :param timesteps: Timestep(s) for which all present nodes should be returned + :param type: Node type which should be returned. If None all node types are returned. + :param min_history_timesteps: Minimum history timesteps of a node to be returned. + :param min_future_timesteps: Minimum future timesteps of a node to be returned. + :param return_robot: Return a node if it is the robot. + :return: Dictionary with timesteps as keys and list of nodes as value. + """ + + present_nodes = {} + + for node in self.nodes: + if node.is_robot and not return_robot: + continue + if type is None or node.type == type: + lower_bound = timesteps - min_history_timesteps + upper_bound = timesteps + min_future_timesteps + mask = (node.first_timestep <= lower_bound) & (upper_bound <= node.last_timestep) + if mask.any(): + timestep_indices_present = np.nonzero(mask)[0] + for timestep_index_present in timestep_indices_present: + if timesteps[timestep_index_present] in present_nodes.keys(): + present_nodes[timesteps[timestep_index_present]].append(node) + else: + present_nodes[timesteps[timestep_index_present]] = [node] + + return present_nodes + + def get_nodes_clipped_at_time(self, timesteps, state): + clipped_nodes = list() + + existing_nodes = self.present_nodes(timesteps) + all_nodes = set().union(*existing_nodes.values()) + if not all_nodes: + return clipped_nodes + + tr_scene = np.array([timesteps.min(), timesteps.max()]) + data_header_memo = dict() + for node in all_nodes: + if isinstance(node, MultiNode): + copied_node = copy.deepcopy(node.get_node_at_timesteps(tr_scene)) + copied_node.id = self.robot.id + else: + copied_node = copy.deepcopy(node) + + clipped_value = node.get(tr_scene, state[node.type]) + + if node.type not in data_header_memo: + data_header = list() + for quantity, values in state[node.type].items(): + for value in values: + data_header.append((quantity, value)) + + data_header_memo[node.type] = data_header + + copied_node.overwrite_data(clipped_value, data_header_memo[node.type]) + copied_node.first_timestep = tr_scene[0] + + clipped_nodes.append(copied_node) + + return clipped_nodes + + def sample_timesteps(self, batch_size, min_future_timesteps=0) -> np.ndarray: + """ + Sample a batch size of possible timesteps for the scene. + + :param batch_size: Number of timesteps to sample. + :param min_future_timesteps: Minimum future timesteps in the scene for a timestep to be returned. + :return: Numpy Array of sampled timesteps. + """ + if batch_size > self.timesteps: + batch_size = self.timesteps + return np.random.choice(np.arange(0, self.timesteps-min_future_timesteps), size=batch_size, replace=False) + + def augment(self): + if self.aug_func is not None: + return self.aug_func(self) + else: + return self + + def get_node_by_id(self, id): + for node in self.nodes: + if node.id == id: + return node + + def __repr__(self): + return f"Scene: Duration: {self.duration()}s," \ + f" Nodes: {len(self.nodes)}," \ + f" Map: {'Yes' if self.map is not None else 'No'}." diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/scene_graph.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/scene_graph.py new file mode 100644 index 000000000..1113bd4d1 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/scene_graph.py @@ -0,0 +1,493 @@ +import numpy as np +from scipy.spatial.distance import pdist, squareform +import scipy.signal as ss +from collections import defaultdict +import warnings +from .node import Node + + +class Edge(object): + def __init__(self, curr_node, other_node): + self.id = self.get_edge_id(curr_node, other_node) + self.type = self.get_edge_type(curr_node, other_node) + self.curr_node = curr_node + self.other_node = other_node + + @staticmethod + def get_edge_id(n1, n2): + raise NotImplementedError("Use one of the Edge subclasses!") + + @staticmethod + def get_str_from_types(nt1, nt2): + raise NotImplementedError("Use one of the Edge subclasses!") + + @staticmethod + def get_edge_type(n1, n2): + raise NotImplementedError("Use one of the Edge subclasses!") + + def __eq__(self, other): + return (isinstance(other, self.__class__) + and self.id == other.id) + + def __ne__(self, other): + return not self.__eq__(other) + + def __hash__(self): + return hash(self.id) + + def __repr__(self): + return self.id + + +class UndirectedEdge(Edge): + def __init__(self, curr_node, other_node): + super(UndirectedEdge, self).__init__(curr_node, other_node) + + @staticmethod + def get_edge_id(n1, n2): + return '-'.join(sorted([str(n1), str(n2)])) + + @staticmethod + def get_str_from_types(nt1, nt2): + return '-'.join(sorted([nt1.name, nt2.name])) + + @staticmethod + def get_edge_type(n1, n2): + return '-'.join(sorted([n1.type.name, n2.type.name])) + + +class DirectedEdge(Edge): + def __init__(self, curr_node, other_node): + super(DirectedEdge, self).__init__(curr_node, other_node) + + @staticmethod + def get_edge_id(n1, n2): + return '->'.join([str(n1), str(n2)]) + + @staticmethod + def get_str_from_types(nt1, nt2): + return '->'.join([nt1.name, nt2.name]) + + @staticmethod + def get_edge_type(n1, n2): + return '->'.join([n1.type.name, n2.type.name]) + + +class TemporalSceneGraph(object): + def __init__(self, + edge_radius, + nodes=None, + adj_cube=np.zeros((1, 0, 0)), + weight_cube=np.zeros((1, 0, 0)), + node_type_mat=np.zeros((0, 0)), + edge_scaling=None): + self.edge_radius = edge_radius + self.nodes = nodes + if nodes is None: + self.nodes = np.array([]) + self.adj_cube = adj_cube + self.weight_cube = weight_cube + self.node_type_mat = node_type_mat + self.adj_mat = np.max(self.adj_cube, axis=0).clip(max=1.0) + self.edge_scaling = edge_scaling + self.node_index_lookup = None + self.calculate_node_index_lookup() + + def calculate_node_index_lookup(self): + node_index_lookup = dict() + for i, node in enumerate(self.nodes): + node_index_lookup[node] = i + + self.node_index_lookup = node_index_lookup + + def get_num_edges(self, t=0): + return np.sum(self.adj_cube[t]) // 2 + + def get_index(self, node): + return self.node_index_lookup[node] + + @classmethod + def create_from_temp_scene_dict(cls, + scene_temp_dict, + attention_radius, + duration=1, + edge_addition_filter=None, + edge_removal_filter=None, + online=False): + """ + Construct a spatiotemporal graph from node positions in a dataset. + + :param scene_temp_dict: Dict with all nodes in scene as keys and np.ndarray with positions as value + :param attention_radius: Attention radius dict. + :param duration: Temporal duration of the graph. + :param edge_addition_filter: - + :param edge_removal_filter: - + :return: TemporalSceneGraph + """ + + nodes = scene_temp_dict.keys() + N = len(nodes) + total_timesteps = duration + + if N == 0: + return TemporalSceneGraph(attention_radius) + + position_cube = np.full((total_timesteps, N, 2), np.nan) + + adj_cube = np.zeros((total_timesteps, N, N), dtype=np.int8) + dist_cube = np.zeros((total_timesteps, N, N), dtype=np.float) + + node_type_mat = np.zeros((N, N), dtype=np.int8) + node_attention_mat = np.zeros((N, N), dtype=np.float) + + for node_idx, node in enumerate(nodes): + if online: + # RingBuffers do not have a fixed constant size. Instead, they grow up to their capacity. Thus, + # we need to fill the values preceding the RingBuffer values with NaNs to make them fill the + # position_cube. + position_cube[-scene_temp_dict[node].shape[0]:, node_idx] = scene_temp_dict[node] + else: + position_cube[:, node_idx] = scene_temp_dict[node] + + node_type_mat[:, node_idx] = node.type.value + for node_idx_from, node_from in enumerate(nodes): + node_attention_mat[node_idx_from, node_idx] = attention_radius[(node_from.type, node.type)] + + np.fill_diagonal(node_type_mat, 0) + + for timestep in range(position_cube.shape[0]): + dists = squareform(pdist(position_cube[timestep], metric='euclidean')) + + # Put a 1 for all agent pairs which are closer than the edge_radius. + # Can produce a warning as dists can be nan if no data for node is available. + # This is accepted as nan <= x evaluates to False + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + adj_matrix = (dists <= node_attention_mat).astype(np.int8) * node_type_mat + + # Remove self-loops. + np.fill_diagonal(adj_matrix, 0) + + adj_cube[timestep] = adj_matrix + dist_cube[timestep] = dists + + dist_cube[np.isnan(dist_cube)] = 0. + weight_cube = np.divide(1., + dist_cube, + out=np.zeros_like(dist_cube), + where=(dist_cube > 0.)) + edge_scaling = None + if edge_addition_filter is not None and edge_removal_filter is not None: + edge_scaling = cls.calculate_edge_scaling(adj_cube, edge_addition_filter, edge_removal_filter) + tsg = cls(attention_radius, + np.array(list(nodes)), + adj_cube, weight_cube, + node_type_mat, + edge_scaling=edge_scaling) + return tsg + + @staticmethod + def calculate_edge_scaling(adj_cube, edge_addition_filter, edge_removal_filter): + shifted_right = np.pad(adj_cube, ((len(edge_addition_filter) - 1, 0), (0, 0), (0, 0)), 'constant', constant_values=0) + + new_edges = np.minimum( + ss.convolve(shifted_right, np.reshape(edge_addition_filter, (-1, 1, 1)), 'full'), 1. + )[(len(edge_addition_filter) - 1):-(len(edge_addition_filter) - 1)] + + new_edges[adj_cube == 0] = 0 + + result = np.minimum( + ss.convolve(new_edges, np.reshape(edge_removal_filter, (-1, 1, 1)), 'full'), 1. + )[:-(len(edge_removal_filter) - 1)] + + return result + + def to_scene_graph(self, t, t_hist=0, t_fut=0): + """ + Creates a Scene Graph from a Temporal Scene Graph + + :param t: Time in Temporal Scene Graph for which Scene Graph is created. + :param t_hist: Number of history timesteps which are considered to form edges in Scene Graph. + :param t_fut: Number of future timesteps which are considered to form edges in Scene Graph. + :return: SceneGraph + """ + lower_t = np.clip(t-t_hist, a_min=0, a_max=None) + higher_t = np.clip(t + t_fut + 1, a_min=None, a_max=self.adj_cube.shape[0] + 1) + adj_mat = np.max(self.adj_cube[lower_t:higher_t], axis=0) + weight_mat = np.max(self.weight_cube[lower_t:higher_t], axis=0) + return SceneGraph(self.edge_radius, + self.nodes, + adj_mat, + weight_mat, + self.node_type_mat, + self.node_index_lookup, + edge_scaling=self.edge_scaling[t] if self.edge_scaling is not None else None) + + +class SceneGraph(object): + def __init__(self, + edge_radius, + nodes=None, + adj_mat=np.zeros((0, 0)), + weight_mat=np.zeros((0, 0)), + node_type_mat=np.zeros((0, 0)), + node_index_lookup=None, + edge_scaling=None): + self.edge_radius = edge_radius + self.nodes = nodes + if nodes is None: + self.nodes = np.array([]) + self.node_type_mat = node_type_mat + self.adj_mat = adj_mat + self.weight_mat = weight_mat + self.edge_scaling = edge_scaling + self.node_index_lookup = node_index_lookup + + def get_index(self, node): + return self.node_index_lookup[node] + + def get_num_edges(self): + return np.sum(self.adj_mat) // 2 + + def get_neighbors(self, node, node_type): + """ + Get all neighbors of a node. + + :param node: Node for which all neighbors are returned. + :param node_type: Specifies node types which are returned. + :return: List of all neighbors. + """ + node_index = self.get_index(node) + connection_mask = self.get_connection_mask(node_index) + mask = ((self.node_type_mat[node_index] == node_type.value) * connection_mask) + return self.nodes[mask] + + def get_edge_scaling(self, node=None): + if node is None: + return self.edge_scaling + else: + node_index = self.get_index(node) + connection_mask = self.get_connection_mask(node_index) + return self.edge_scaling[node_index, connection_mask] + + def get_edge_weight(self, node=None): + if node is None: + return self.weight_mat + else: + node_index = self.get_index(node) + connection_mask = self.get_connection_mask(node_index) + return self.weight_mat[node_index, connection_mask] + + def get_connection_mask(self, node_index): + if self.edge_scaling is None: # We do not use edge scaling + return self.adj_mat[node_index] > 0. + else: + return self.edge_scaling[node_index] > 1e-2 + + def __sub__(self, other): + new_nodes = [node for node in self.nodes if node not in other.nodes] + removed_nodes = [node for node in other.nodes if node not in self.nodes] + + our_types = set(node.type for node in self.nodes) + other_types = set(node.type for node in other.nodes) + all_node_types = our_types | other_types + + new_neighbors = defaultdict(lambda: defaultdict(set)) + for node in self.nodes: + if node in removed_nodes: + continue + + if node in other.nodes: + for node_type in all_node_types: + new_items = set(self.get_neighbors(node, node_type)) - set(other.get_neighbors(node, node_type)) + if len(new_items) > 0: + new_neighbors[node][DirectedEdge.get_edge_type(node, Node(node_type, None, None))] = new_items + else: + for node_type in our_types: + neighbors = self.get_neighbors(node, node_type) + if len(neighbors) > 0: + new_neighbors[node][DirectedEdge.get_edge_type(node, Node(node_type, None, None))] = set(neighbors) + + removed_neighbors = defaultdict(lambda: defaultdict(set)) + for node in other.nodes: + if node in removed_nodes: + continue + + if node in self.nodes: + for node_type in all_node_types: + removed_items = set(other.get_neighbors(node, node_type)) - set(self.get_neighbors(node, node_type)) + if len(removed_items) > 0: + removed_neighbors[node][DirectedEdge.get_edge_type(node, Node(node_type, None, None))] = removed_items + else: + for node_type in other_types: + neighbors = other.get_neighbors(node, node_type) + if len(neighbors) > 0: + removed_neighbors[node][DirectedEdge.get_edge_type(node, Node(node_type, None, None))] = set(neighbors) + + return new_nodes, removed_nodes, new_neighbors, removed_neighbors + + +if __name__ == '__main__': + from environment import NodeTypeEnum + import time + + # # # # # # # # # # # # # # # # # + # Testing edge mask calculation # + # # # # # # # # # # # # # # # # # + B = np.array([[0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0], + [1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], + [1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0], + [1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0]])[:, :, np.newaxis, np.newaxis] + print(B.shape) + + edge_addition_filter = [0.25, 0.5, 0.75, 1.0] + edge_removal_filter = [1.0, 0.5, 0.0] + for i in range(B.shape[0]): + A = B[i] # (time, N, N) + + print(A[:, 0, 0]) + + start = time.time() + new_edges = np.minimum(ss.convolve(A, np.reshape(edge_addition_filter, (-1, 1, 1)), 'full'), 1.)[(len(edge_addition_filter) - 1):] + old_edges = np.minimum(ss.convolve(A, np.reshape(edge_removal_filter, (-1, 1, 1)), 'full'), 1.)[:-(len(edge_removal_filter) - 1)] + res = np.minimum(new_edges + old_edges, 1.)[:, 0, 0] + end = time.time() + print(end - start) + print(res) + + start = time.time() + res = TemporalSceneGraph.calculate_edge_scaling(A, edge_addition_filter, edge_removal_filter)[:, 0, 0] + end = time.time() + print(end - start) + print(res) + + print('-'*40) + + # # # # # # # # # # # # # # # + # Testing graph subtraction # + # # # # # # # # # # # # # # # + print('\n' + '-' * 40 + '\n') + + node_type_list = ['PEDESTRIAN', + 'BICYCLE', + 'VEHICLE'] + nte = NodeTypeEnum(node_type_list) + + attention_radius = dict() + attention_radius[(nte.PEDESTRIAN, nte.PEDESTRIAN)] = 5.0 + attention_radius[(nte.PEDESTRIAN, nte.VEHICLE)] = 20.0 + attention_radius[(nte.PEDESTRIAN, nte.BICYCLE)] = 10.0 + attention_radius[(nte.VEHICLE, nte.PEDESTRIAN)] = 20.0 + attention_radius[(nte.VEHICLE, nte.VEHICLE)] = 20.0 + attention_radius[(nte.VEHICLE, nte.BICYCLE)] = 20.0 + attention_radius[(nte.BICYCLE, nte.PEDESTRIAN)] = 10.0 + attention_radius[(nte.BICYCLE, nte.VEHICLE)] = 20.0 + attention_radius[(nte.BICYCLE, nte.BICYCLE)] = 10.0 + + scene_dict1 = {Node(nte.PEDESTRIAN, node_id='1'): np.array([1, 0]), + Node(nte.PEDESTRIAN, node_id='2'): np.array([0, 1])} + sg1 = TemporalSceneGraph.create_from_temp_scene_dict( + scene_dict1, + attention_radius=attention_radius, + duration=1, + edge_addition_filter=[0.25, 0.5, 0.75, 1.0], + edge_removal_filter=[1.0, 0.0]).to_scene_graph(t=0) + + scene_dict2 = {Node(nte.PEDESTRIAN, node_id='1'): np.array([1, 0]), + Node(nte.PEDESTRIAN, node_id='2'): np.array([1, 1])} + sg2 = TemporalSceneGraph.create_from_temp_scene_dict( + scene_dict2, + attention_radius=attention_radius, + duration=1, + edge_addition_filter=[0.25, 0.5, 0.75, 1.0], + edge_removal_filter=[1.0, 0.0]).to_scene_graph(t=0) + + new_nodes, removed_nodes, new_neighbors, removed_neighbors = sg2 - sg1 + print('New Nodes:', new_nodes) + print('Removed Nodes:', removed_nodes) + print('New Neighbors:', new_neighbors) + print('Removed Neighbors:', removed_neighbors) + + # # # # # # # # # # # # # # # + print('\n' + '-' * 40 + '\n') + + scene_dict1 = {Node(nte.PEDESTRIAN, node_id='1'): np.array([1, 0]), + Node(nte.PEDESTRIAN, node_id='2'): np.array([0, 1])} + sg1 = TemporalSceneGraph.create_from_temp_scene_dict( + scene_dict1, + attention_radius=attention_radius, + duration=1, + edge_addition_filter=[0.25, 0.5, 0.75, 1.0], + edge_removal_filter=[1.0, 0.0]).to_scene_graph(t=0) + + scene_dict2 = {Node(nte.PEDESTRIAN, node_id='1'): np.array([1, 0]), + Node(nte.PEDESTRIAN, node_id='2'): np.array([1, 1]), + Node(nte.PEDESTRIAN, node_id='3'): np.array([20, 1])} + sg2 = TemporalSceneGraph.create_from_temp_scene_dict( + scene_dict2, + attention_radius=attention_radius, + duration=1, + edge_addition_filter=[0.25, 0.5, 0.75, 1.0], + edge_removal_filter=[1.0, 0.0]).to_scene_graph(t=0) + + new_nodes, removed_nodes, new_neighbors, removed_neighbors = sg2 - sg1 + print('New Nodes:', new_nodes) + print('Removed Nodes:', removed_nodes) + print('New Neighbors:', new_neighbors) + print('Removed Neighbors:', removed_neighbors) + + # # # # # # # # # # # # # # # + print('\n' + '-' * 40 + '\n') + + scene_dict1 = {Node(nte.PEDESTRIAN, node_id='1'): np.array([1, 0]), + Node(nte.PEDESTRIAN, node_id='2'): np.array([0, 1])} + sg1 = TemporalSceneGraph.create_from_temp_scene_dict( + scene_dict1, + attention_radius=attention_radius, + duration=1, + edge_addition_filter=[0.25, 0.5, 0.75, 1.0], + edge_removal_filter=[1.0, 0.0]).to_scene_graph(t=0) + + scene_dict2 = {Node(nte.PEDESTRIAN, node_id='1'): np.array([1, 0]), + Node(nte.PEDESTRIAN, node_id='2'): np.array([10, 1]), + Node(nte.PEDESTRIAN, node_id='3'): np.array([20, 1])} + sg2 = TemporalSceneGraph.create_from_temp_scene_dict( + scene_dict2, + attention_radius=attention_radius, + duration=1, + edge_addition_filter=[0.25, 0.5, 0.75, 1.0], + edge_removal_filter=[1.0, 0.0]).to_scene_graph(t=0) + + new_nodes, removed_nodes, new_neighbors, removed_neighbors = sg2 - sg1 + print('New Nodes:', new_nodes) + print('Removed Nodes:', removed_nodes) + print('New Neighbors:', new_neighbors) + print('Removed Neighbors:', removed_neighbors) + + # # # # # # # # # # # # # # # + print('\n' + '-' * 40 + '\n') + + scene_dict1 = {Node(nte.PEDESTRIAN, node_id='1'): np.array([0, 0]), + Node(nte.PEDESTRIAN, node_id='2'): np.array([0, 1])} + sg1 = TemporalSceneGraph.create_from_temp_scene_dict( + scene_dict1, + attention_radius=attention_radius, + duration=1, + edge_addition_filter=[0.25, 0.5, 0.75, 1.0], + edge_removal_filter=[1.0, 0.0]).to_scene_graph(t=0) + + scene_dict2 = {Node(nte.PEDESTRIAN, node_id='2'): np.array([10, 1]), + Node(nte.PEDESTRIAN, node_id='3'): np.array([12, 1]), + Node(nte.PEDESTRIAN, node_id='4'): np.array([13, 1])} + sg2 = TemporalSceneGraph.create_from_temp_scene_dict( + scene_dict2, + attention_radius=attention_radius, + duration=1, + edge_addition_filter=[0.25, 0.5, 0.75, 1.0], + edge_removal_filter=[1.0, 0.0]).to_scene_graph(t=0) + + new_nodes, removed_nodes, new_neighbors, removed_neighbors = sg2 - sg1 + print('New Nodes:', new_nodes) + print('Removed Nodes:', removed_nodes) + print('New Neighbors:', new_neighbors) + print('Removed Neighbors:', removed_neighbors) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/evaluation/__init__.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/evaluation/__init__.py new file mode 100644 index 000000000..6674ebbeb --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/evaluation/__init__.py @@ -0,0 +1 @@ +from .evaluation import compute_batch_statistics, log_batch_errors, print_batch_errors \ No newline at end of file diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/evaluation/evaluation.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/evaluation/evaluation.py new file mode 100644 index 000000000..8e5a643c2 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/evaluation/evaluation.py @@ -0,0 +1,140 @@ +import numpy as np +from scipy.interpolate import RectBivariateSpline +from scipy.ndimage import binary_dilation +from scipy.stats import gaussian_kde +from utils import prediction_output_to_trajectories +import visualization +from matplotlib import pyplot as plt + + +def compute_ade(predicted_trajs, gt_traj): + error = np.linalg.norm(predicted_trajs - gt_traj, axis=-1) + ade = np.mean(error, axis=-1) + return ade.flatten() + + +def compute_fde(predicted_trajs, gt_traj): + final_error = np.linalg.norm(predicted_trajs[:, :, -1] - gt_traj[-1], axis=-1) + return final_error.flatten() + + +def compute_kde_nll(predicted_trajs, gt_traj): + kde_ll = 0. + log_pdf_lower_bound = -20 + num_timesteps = gt_traj.shape[0] + num_batches = predicted_trajs.shape[0] + + for batch_num in range(num_batches): + for timestep in range(num_timesteps): + try: + kde = gaussian_kde(predicted_trajs[batch_num, :, timestep].T) + pdf = np.clip(kde.logpdf(gt_traj[timestep].T), a_min=log_pdf_lower_bound, a_max=None)[0] + kde_ll += pdf / (num_timesteps * num_batches) + except np.linalg.LinAlgError: + kde_ll = np.nan + + return -kde_ll + + +def compute_obs_violations(predicted_trajs, map): + obs_map = map.data + + interp_obs_map = RectBivariateSpline(range(obs_map.shape[1]), + range(obs_map.shape[0]), + binary_dilation(obs_map.T, iterations=4), + kx=1, ky=1) + + old_shape = predicted_trajs.shape + pred_trajs_map = map.to_map_points(predicted_trajs.reshape((-1, 2))) + + traj_obs_values = interp_obs_map(pred_trajs_map[:, 0], pred_trajs_map[:, 1], grid=False) + traj_obs_values = traj_obs_values.reshape((old_shape[0], old_shape[1])) + num_viol_trajs = np.sum(traj_obs_values.max(axis=1) > 0, dtype=float) + + return num_viol_trajs + + +def compute_batch_statistics(prediction_output_dict, + dt, + max_hl, + ph, + node_type_enum, + kde=True, + obs=False, + map=None, + prune_ph_to_future=False, + best_of=False): + + (prediction_dict, + _, + futures_dict) = prediction_output_to_trajectories(prediction_output_dict, + dt, + max_hl, + ph, + prune_ph_to_future=prune_ph_to_future) + + batch_error_dict = dict() + for node_type in node_type_enum: + batch_error_dict[node_type] = {'ade': list(), 'fde': list(), 'kde': list(), 'obs_viols': list()} + + for t in prediction_dict.keys(): + for node in prediction_dict[t].keys(): + ade_errors = compute_ade(prediction_dict[t][node], futures_dict[t][node]) + fde_errors = compute_fde(prediction_dict[t][node], futures_dict[t][node]) + if kde: + kde_ll = compute_kde_nll(prediction_dict[t][node], futures_dict[t][node]) + else: + kde_ll = 0 + if obs: + obs_viols = compute_obs_violations(prediction_dict[t][node], map) + else: + obs_viols = 0 + if best_of: + ade_errors = np.min(ade_errors, keepdims=True) + fde_errors = np.min(fde_errors, keepdims=True) + kde_ll = np.min(kde_ll) + batch_error_dict[node.type]['ade'].extend(list(ade_errors)) + batch_error_dict[node.type]['fde'].extend(list(fde_errors)) + batch_error_dict[node.type]['kde'].extend([kde_ll]) + batch_error_dict[node.type]['obs_viols'].extend([obs_viols]) + + return batch_error_dict + + +def log_batch_errors(batch_errors_list, log_writer, namespace, curr_iter, bar_plot=[], box_plot=[]): + for node_type in batch_errors_list[0].keys(): + for metric in batch_errors_list[0][node_type].keys(): + metric_batch_error = [] + for batch_errors in batch_errors_list: + metric_batch_error.extend(batch_errors[node_type][metric]) + + if len(metric_batch_error) > 0: + log_writer.add_histogram(f"{node_type.name}/{namespace}/{metric}", metric_batch_error, curr_iter) + log_writer.add_scalar(f"{node_type.name}/{namespace}/{metric}_mean", np.mean(metric_batch_error), curr_iter) + log_writer.add_scalar(f"{node_type.name}/{namespace}/{metric}_median", np.median(metric_batch_error), curr_iter) + + if metric in bar_plot: + pd = {'dataset': [namespace] * len(metric_batch_error), + metric: metric_batch_error} + kde_barplot_fig, ax = plt.subplots(figsize=(5, 5)) + visualization.visualization_utils.plot_barplots(ax, pd, 'dataset', metric) + log_writer.add_figure(f"{node_type.name}/{namespace}/{metric}_bar_plot", kde_barplot_fig, curr_iter) + + if metric in box_plot: + mse_fde_pd = {'dataset': [namespace] * len(metric_batch_error), + metric: metric_batch_error} + fig, ax = plt.subplots(figsize=(5, 5)) + visualization.visualization_utils.plot_boxplots(ax, mse_fde_pd, 'dataset', metric) + log_writer.add_figure(f"{node_type.name}/{namespace}/{metric}_box_plot", fig, curr_iter) + + +def print_batch_errors(batch_errors_list, namespace, curr_iter): + for node_type in batch_errors_list[0].keys(): + for metric in batch_errors_list[0][node_type].keys(): + metric_batch_error = [] + for batch_errors in batch_errors_list: + metric_batch_error.extend(batch_errors[node_type][metric]) + + if len(metric_batch_error) > 0: + print(f"{curr_iter}: {node_type.name}/{namespace}/{metric}_mean", np.mean(metric_batch_error)) + print(f"{curr_iter}: {node_type.name}/{namespace}/{metric}_median", np.median(metric_batch_error)) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/__init__.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/__init__.py new file mode 100644 index 000000000..2b3ee24cd --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/__init__.py @@ -0,0 +1,2 @@ +from model.trajectron import Trajectron +from model.mgcvae import MultimodalGenerativeCVAE diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/__init__.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/__init__.py new file mode 100644 index 000000000..116a37caf --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/__init__.py @@ -0,0 +1,4 @@ +from .discrete_latent import DiscreteLatent +from .gmm2d import GMM2D +from .map_encoder import CNNMapEncoder +from .additive_attention import AdditiveAttention, TemporallyBatchedAdditiveAttention diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/additive_attention.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/additive_attention.py new file mode 100644 index 000000000..93623242b --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/additive_attention.py @@ -0,0 +1,67 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class AdditiveAttention(nn.Module): + # Implementing the attention module of Bahdanau et al. 2015 where + # score(h_j, s_(i-1)) = v . tanh(W_1 h_j + W_2 s_(i-1)) + def __init__(self, encoder_hidden_state_dim, decoder_hidden_state_dim, internal_dim=None): + super(AdditiveAttention, self).__init__() + + if internal_dim is None: + internal_dim = int((encoder_hidden_state_dim + decoder_hidden_state_dim) / 2) + + self.w1 = nn.Linear(encoder_hidden_state_dim, internal_dim, bias=False) + self.w2 = nn.Linear(decoder_hidden_state_dim, internal_dim, bias=False) + self.v = nn.Linear(internal_dim, 1, bias=False) + + def score(self, encoder_state, decoder_state): + # encoder_state is of shape (batch, enc_dim) + # decoder_state is of shape (batch, dec_dim) + # return value should be of shape (batch, 1) + return self.v(torch.tanh(self.w1(encoder_state) + self.w2(decoder_state))) + + def forward(self, encoder_states, decoder_state): + # encoder_states is of shape (batch, num_enc_states, enc_dim) + # decoder_state is of shape (batch, dec_dim) + score_vec = torch.cat([self.score(encoder_states[:, i], decoder_state) for i in range(encoder_states.shape[1])], + dim=1) + # score_vec is of shape (batch, num_enc_states) + + attention_probs = torch.unsqueeze(F.softmax(score_vec, dim=1), dim=2) + # attention_probs is of shape (batch, num_enc_states, 1) + + final_context_vec = torch.sum(attention_probs * encoder_states, dim=1) + # final_context_vec is of shape (batch, enc_dim) + + return final_context_vec, attention_probs + + +class TemporallyBatchedAdditiveAttention(AdditiveAttention): + # Implementing the attention module of Bahdanau et al. 2015 where + # score(h_j, s_(i-1)) = v . tanh(W_1 h_j + W_2 s_(i-1)) + def __init__(self, encoder_hidden_state_dim, decoder_hidden_state_dim, internal_dim=None): + super(TemporallyBatchedAdditiveAttention, self).__init__(encoder_hidden_state_dim, + decoder_hidden_state_dim, + internal_dim) + + def score(self, encoder_state, decoder_state): + # encoder_state is of shape (batch, num_enc_states, max_time, enc_dim) + # decoder_state is of shape (batch, max_time, dec_dim) + # return value should be of shape (batch, num_enc_states, max_time, 1) + return self.v(torch.tanh(self.w1(encoder_state) + torch.unsqueeze(self.w2(decoder_state), dim=1))) + + def forward(self, encoder_states, decoder_state): + # encoder_states is of shape (batch, num_enc_states, max_time, enc_dim) + # decoder_state is of shape (batch, max_time, dec_dim) + score_vec = self.score(encoder_states, decoder_state) + # score_vec is of shape (batch, num_enc_states, max_time, 1) + + attention_probs = F.softmax(score_vec, dim=1) + # attention_probs is of shape (batch, num_enc_states, max_time, 1) + + final_context_vec = torch.sum(attention_probs * encoder_states, dim=1) + # final_context_vec is of shape (batch, max_time, enc_dim) + + return final_context_vec, torch.squeeze(torch.transpose(attention_probs, 1, 2), dim=3) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/discrete_latent.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/discrete_latent.py new file mode 100644 index 000000000..b7ae68b00 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/discrete_latent.py @@ -0,0 +1,109 @@ +import torch +import torch.distributions as td +import numpy as np +from ..model_utils import ModeKeys + + +class DiscreteLatent(object): + def __init__(self, hyperparams, device): + self.hyperparams = hyperparams + self.z_dim = hyperparams['N'] * hyperparams['K'] + self.N = hyperparams['N'] + self.K = hyperparams['K'] + self.kl_min = hyperparams['kl_min'] + self.device = device + self.temp = None # filled in by MultimodalGenerativeCVAE.set_annealing_params + self.z_logit_clip = None # filled in by MultimodalGenerativeCVAE.set_annealing_params + self.p_dist = None # filled in by MultimodalGenerativeCVAE.encoder + self.q_dist = None # filled in by MultimodalGenerativeCVAE.encoder + + def dist_from_h(self, h, mode): + logits_separated = torch.reshape(h, (-1, self.N, self.K)) + logits_separated_mean_zero = logits_separated - torch.mean(logits_separated, dim=-1, keepdim=True) + if self.z_logit_clip is not None and mode == ModeKeys.TRAIN: + c = self.z_logit_clip + logits = torch.clamp(logits_separated_mean_zero, min=-c, max=c) + else: + logits = logits_separated_mean_zero + + return td.OneHotCategorical(logits=logits) + + def sample_q(self, num_samples, mode): + bs = self.p_dist.probs.size()[0] + num_components = self.N * self.K + z_NK = torch.from_numpy(self.all_one_hot_combinations(self.N, self.K)).float().to(self.device).repeat(num_samples, bs) + return torch.reshape(z_NK, (num_samples * num_components, -1, self.z_dim)) + + def sample_p(self, num_samples, mode, most_likely_z=False, full_dist=True, all_z_sep=False): + num_components = 1 + if full_dist: + bs = self.p_dist.probs.size()[0] + z_NK = torch.from_numpy(self.all_one_hot_combinations(self.N, self.K)).float().to(self.device).repeat(num_samples, bs) + num_components = self.K ** self.N + k = num_samples * num_components + elif all_z_sep: + bs = self.p_dist.probs.size()[0] + z_NK = torch.from_numpy(self.all_one_hot_combinations(self.N, self.K)).float().to(self.device).repeat(1, bs) + k = self.K ** self.N + num_samples = k + elif most_likely_z: + # Sampling the most likely z from p(z|x). + eye_mat = torch.eye(self.p_dist.event_shape[-1], device=self.device) + argmax_idxs = torch.argmax(self.p_dist.probs, dim=2) + z_NK = torch.unsqueeze(eye_mat[argmax_idxs], dim=0).expand(num_samples, -1, -1, -1) + k = num_samples + else: + z_NK = self.p_dist.sample((num_samples,)) + k = num_samples + + if mode == ModeKeys.PREDICT: + return torch.reshape(z_NK, (k, -1, self.N * self.K)), num_samples, num_components + else: + return torch.reshape(z_NK, (k, -1, self.N * self.K)) + + def kl_q_p(self, log_writer=None, prefix=None, curr_iter=None): + kl_separated = td.kl_divergence(self.q_dist, self.p_dist) + if len(kl_separated.size()) < 2: + kl_separated = torch.unsqueeze(kl_separated, dim=0) + + kl_minibatch = torch.mean(kl_separated, dim=0, keepdim=True) + + if log_writer is not None: + log_writer.add_scalar(prefix + '/true_kl', torch.sum(kl_minibatch), curr_iter) + + if self.kl_min > 0: + kl_lower_bounded = torch.clamp(kl_minibatch, min=self.kl_min) + kl = torch.sum(kl_lower_bounded) + else: + kl = torch.sum(kl_minibatch) + + return kl + + def q_log_prob(self, z): + k = z.size()[0] + z_NK = torch.reshape(z, [k, -1, self.N, self.K]) + return torch.sum(self.q_dist.log_prob(z_NK), dim=2) + + def p_log_prob(self, z): + k = z.size()[0] + z_NK = torch.reshape(z, [k, -1, self.N, self.K]) + return torch.sum(self.p_dist.log_prob(z_NK), dim=2) + + def get_p_dist_probs(self): + return self.p_dist.probs + + @staticmethod + def all_one_hot_combinations(N, K): + return np.eye(K).take(np.reshape(np.indices([K] * N), [N, -1]).T, axis=0).reshape(-1, N * K) # [K**N, N*K] + + def summarize_for_tensorboard(self, log_writer, prefix, curr_iter): + log_writer.add_histogram(prefix + "/latent/p_z_x", self.p_dist.probs, curr_iter) + log_writer.add_histogram(prefix + "/latent/q_z_xy", self.q_dist.probs, curr_iter) + log_writer.add_histogram(prefix + "/latent/p_z_x_logits", self.p_dist.logits, curr_iter) + log_writer.add_histogram(prefix + "/latent/q_z_xy_logits", self.q_dist.logits, curr_iter) + if self.z_dim <= 9: + for i in range(self.N): + for j in range(self.K): + log_writer.add_histogram(prefix + "/latent/q_z_xy_logit{0}{1}".format(i, j), + self.q_dist.logits[:, i, j], + curr_iter) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/gmm2d.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/gmm2d.py new file mode 100644 index 000000000..37e373398 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/gmm2d.py @@ -0,0 +1,158 @@ +import torch +import torch.distributions as td +import numpy as np +from ..model_utils import to_one_hot + + +class GMM2D(td.Distribution): + r""" + Gaussian Mixture Model using 2D Multivariate Gaussians each of as N components: + Cholesky decompesition and affine transformation for sampling: + + .. math:: Z \sim N(0, I) + + .. math:: S = \mu + LZ + + .. math:: S \sim N(\mu, \Sigma) \rightarrow N(\mu, LL^T) + + where :math:`L = chol(\Sigma)` and + + .. math:: \Sigma = \left[ {\begin{array}{cc} \sigma^2_x & \rho \sigma_x \sigma_y \\ \rho \sigma_x \sigma_y & \sigma^2_y \\ \end{array} } \right] + + such that + + .. math:: L = chol(\Sigma) = \left[ {\begin{array}{cc} \sigma_x & 0 \\ \rho \sigma_y & \sigma_y \sqrt{1-\rho^2} \\ \end{array} } \right] + + :param log_pis: Log Mixing Proportions :math:`log(\pi)`. [..., N] + :param mus: Mixture Components mean :math:`\mu`. [..., N * 2] + :param log_sigmas: Log Standard Deviations :math:`log(\sigma_d)`. [..., N * 2] + :param corrs: Cholesky factor of correlation :math:`\rho`. [..., N] + :param clip_lo: Clips the lower end of the standard deviation. + :param clip_hi: Clips the upper end of the standard deviation. + """ + def __init__(self, log_pis, mus, log_sigmas, corrs): + super(GMM2D, self).__init__(batch_shape=log_pis.shape[0], event_shape=log_pis.shape[1:]) + self.components = log_pis.shape[-1] + self.dimensions = 2 + self.device = log_pis.device + + log_pis = torch.clamp(log_pis, min=-1e5) + self.log_pis = log_pis - torch.logsumexp(log_pis, dim=-1, keepdim=True) # [..., N] + self.mus = self.reshape_to_components(mus) # [..., N, 2] + self.log_sigmas = self.reshape_to_components(log_sigmas) # [..., N, 2] + self.sigmas = torch.exp(self.log_sigmas) # [..., N, 2] + self.one_minus_rho2 = 1 - corrs**2 # [..., N] + self.one_minus_rho2 = torch.clamp(self.one_minus_rho2, min=1e-5, max=1) # otherwise log can be nan + self.corrs = corrs # [..., N] + + self.L = torch.stack([torch.stack([self.sigmas[..., 0], torch.zeros_like(self.log_pis)], dim=-1), + torch.stack([self.sigmas[..., 1] * self.corrs, + self.sigmas[..., 1] * torch.sqrt(self.one_minus_rho2)], + dim=-1)], + dim=-2) + + self.pis_cat_dist = td.Categorical(logits=log_pis) + + @classmethod + def from_log_pis_mus_cov_mats(cls, log_pis, mus, cov_mats): + corrs_sigma12 = cov_mats[..., 0, 1] + sigma_1 = torch.clamp(cov_mats[..., 0, 0], min=1e-8) + sigma_2 = torch.clamp(cov_mats[..., 1, 1], min=1e-8) + sigmas = torch.stack([torch.sqrt(sigma_1), torch.sqrt(sigma_2)], dim=-1) + log_sigmas = torch.log(sigmas) + corrs = corrs_sigma12 / (torch.prod(sigmas, dim=-1)) + return cls(log_pis, mus, log_sigmas, corrs) + + def rsample(self, sample_shape=torch.Size()): + """ + Generates a sample_shape shaped reparameterized sample or sample_shape + shaped batch of reparameterized samples if the distribution parameters + are batched. + + :param sample_shape: Shape of the samples + :return: Samples from the GMM. + """ + mvn_samples = (self.mus + + torch.squeeze( + torch.matmul(self.L, + torch.unsqueeze( + torch.randn(size=sample_shape + self.mus.shape, device=self.device), + dim=-1) + ), + dim=-1)) + component_cat_samples = self.pis_cat_dist.sample(sample_shape) + selector = torch.unsqueeze(to_one_hot(component_cat_samples, self.components), dim=-1) + return torch.sum(mvn_samples*selector, dim=-2) + + def log_prob(self, value): + r""" + Calculates the log probability of a value using the PDF for bivariate normal distributions: + + .. math:: + f(x | \mu, \sigma, \rho)={\frac {1}{2\pi \sigma _{x}\sigma _{y}{\sqrt {1-\rho ^{2}}}}}\exp + \left(-{\frac {1}{2(1-\rho ^{2})}}\left[{\frac {(x-\mu _{x})^{2}}{\sigma _{x}^{2}}}+ + {\frac {(y-\mu _{y})^{2}}{\sigma _{y}^{2}}}-{\frac {2\rho (x-\mu _{x})(y-\mu _{y})} + {\sigma _{x}\sigma _{y}}}\right]\right) + + :param value: The log probability density function is evaluated at those values. + :return: Log probability + """ + # x: [..., 2] + value = torch.unsqueeze(value, dim=-2) # [..., 1, 2] + dx = value - self.mus # [..., N, 2] + + exp_nominator = ((torch.sum((dx/self.sigmas)**2, dim=-1) # first and second term of exp nominator + - 2*self.corrs*torch.prod(dx, dim=-1)/torch.prod(self.sigmas, dim=-1))) # [..., N] + + component_log_p = -(2*np.log(2*np.pi) + + torch.log(self.one_minus_rho2) + + 2*torch.sum(self.log_sigmas, dim=-1) + + exp_nominator/self.one_minus_rho2) / 2 + + return torch.logsumexp(self.log_pis + component_log_p, dim=-1) + + def get_for_node_at_time(self, n, t): + return self.__class__(self.log_pis[:, n:n+1, t:t+1], self.mus[:, n:n+1, t:t+1], + self.log_sigmas[:, n:n+1, t:t+1], self.corrs[:, n:n+1, t:t+1]) + + def mode(self): + """ + Calculates the mode of the GMM by calculating probabilities of a 2D mesh grid + + :param required_accuracy: Accuracy of the meshgrid + :return: Mode of the GMM + """ + if self.mus.shape[-2] > 1: + samp, bs, time, comp, _ = self.mus.shape + assert samp == 1, "For taking the mode only one sample makes sense." + mode_node_list = [] + for n in range(bs): + mode_t_list = [] + for t in range(time): + nt_gmm = self.get_for_node_at_time(n, t) + x_min = self.mus[:, n, t, :, 0].min() + x_max = self.mus[:, n, t, :, 0].max() + y_min = self.mus[:, n, t, :, 1].min() + y_max = self.mus[:, n, t, :, 1].max() + search_grid = torch.stack(torch.meshgrid([torch.arange(x_min, x_max, 0.01), + torch.arange(y_min, y_max, 0.01)]), dim=2 + ).view(-1, 2).float().to(self.device) + + ll_score = nt_gmm.log_prob(search_grid) + argmax = torch.argmax(ll_score.squeeze(), dim=0) + mode_t_list.append(search_grid[argmax]) + mode_node_list.append(torch.stack(mode_t_list, dim=0)) + return torch.stack(mode_node_list, dim=0).unsqueeze(dim=0) + return torch.squeeze(self.mus, dim=-2) + + def reshape_to_components(self, tensor): + if len(tensor.shape) == 5: + return tensor + return torch.reshape(tensor, list(tensor.shape[:-1]) + [self.components, self.dimensions]) + + def get_covariance_matrix(self): + cov = self.corrs * torch.prod(self.sigmas, dim=-1) + E = torch.stack([torch.stack([self.sigmas[..., 0]**2, cov], dim=-1), + torch.stack([cov, self.sigmas[..., 1]**2], dim=-1)], + dim=-2) + return E diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/graph_attention.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/graph_attention.py new file mode 100644 index 000000000..fc8d89a03 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/graph_attention.py @@ -0,0 +1,58 @@ +import warnings +import math +import numbers +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn import init, Parameter + + +class GraphMultiTypeAttention(nn.Module): + def __init__(self, in_features, hidden_features, out_features, bias=True, types=1): + super(GraphMultiTypeAttention, self).__init__() + self.types = types + self.in_features = in_features + self.out_features = out_features + self.node_self_loop_weight = Parameter(torch.Tensor(hidden_features, in_features[0])) + + self.weight_per_type = nn.ParameterList() + for i in range(types): + self.weight_per_type.append(Parameter(torch.Tensor(hidden_features, in_features[i]))) + if bias: + self.bias = Parameter(torch.Tensor(hidden_features)) + else: + self.register_parameter('bias', None) + + self.linear_to_out = nn.Linear(hidden_features, out_features, bias=bias) + + self.reset_parameters() + + def reset_parameters(self): + for weight in self.weight_per_type: + bound = 1 / math.sqrt(weight.size(1)) + init.uniform_(weight, -bound, bound) + bound = 1 / math.sqrt(self.node_self_loop_weight.size(1)) + init.uniform_(self.node_self_loop_weight, -bound, bound) + if self.bias is not None: + init.uniform_(self.bias, -bound, bound) + + def forward(self, inputs, types, edge_weights): + weight_list = list() + for i, type in enumerate(types): + weight_list.append((edge_weights[i] / len(edge_weights)) * self.weight_per_type[type].T) + weight_list.append(self.node_self_loop_weight.T) + weight = torch.cat(weight_list, dim=0) + stacked_input = torch.cat(inputs, dim=-1) + output = stacked_input.matmul(weight) + + output = output + + if self.bias is not None: + output += self.bias + + return torch.relu(self.linear_to_out(torch.relu(output))) + + def extra_repr(self): + return 'in_features={}, hidden_features={},, out_features={}, types={}, bias={}'.format( + self.in_features, self.hidden_features, self.out_features, self.types, self.bias is not None + ) \ No newline at end of file diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/map_encoder.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/map_encoder.py new file mode 100644 index 000000000..27d6e1d36 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/map_encoder.py @@ -0,0 +1,28 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class CNNMapEncoder(nn.Module): + def __init__(self, map_channels, hidden_channels, output_size, masks, strides, patch_size): + super(CNNMapEncoder, self).__init__() + self.convs = nn.ModuleList() + patch_size_x = patch_size[0] + patch_size[2] + patch_size_y = patch_size[1] + patch_size[3] + input_size = (map_channels, patch_size_x, patch_size_y) + x_dummy = torch.ones(input_size).unsqueeze(0) * torch.tensor(float('nan')) + + for i, hidden_size in enumerate(hidden_channels): + self.convs.append(nn.Conv2d(map_channels if i == 0 else hidden_channels[i-1], + hidden_channels[i], masks[i], + stride=strides[i])) + x_dummy = self.convs[i](x_dummy) + + self.fc = nn.Linear(x_dummy.numel(), output_size) + + def forward(self, x, training): + for conv in self.convs: + x = F.leaky_relu(conv(x), 0.2) + x = torch.flatten(x, start_dim=1) + x = self.fc(x) + return x diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/__init__.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/__init__.py new file mode 100644 index 000000000..a01f88e8c --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/__init__.py @@ -0,0 +1,2 @@ +from .dataset import EnvironmentDataset, NodeTypeDataset +from .preprocessing import collate, get_node_timestep_data, get_timesteps_data, restore, get_relative_robot_traj diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/dataset.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/dataset.py new file mode 100644 index 000000000..ef361826c --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/dataset.py @@ -0,0 +1,76 @@ +from torch.utils import data +import numpy as np +from .preprocessing import get_node_timestep_data + + +class EnvironmentDataset(object): + def __init__(self, env, state, pred_state, node_freq_mult, scene_freq_mult, hyperparams, **kwargs): + self.env = env + self.state = state + self.pred_state = pred_state + self.hyperparams = hyperparams + self.max_ht = self.hyperparams['maximum_history_length'] + self.max_ft = kwargs['min_future_timesteps'] + self.node_type_datasets = list() + self._augment = False + for node_type in env.NodeType: + if node_type not in hyperparams['pred_state']: + continue + self.node_type_datasets.append(NodeTypeDataset(env, node_type, state, pred_state, node_freq_mult, + scene_freq_mult, hyperparams, **kwargs)) + + @property + def augment(self): + return self._augment + + @augment.setter + def augment(self, value): + self._augment = value + for node_type_dataset in self.node_type_datasets: + node_type_dataset.augment = value + + def __iter__(self): + return iter(self.node_type_datasets) + + +class NodeTypeDataset(data.Dataset): + def __init__(self, env, node_type, state, pred_state, node_freq_mult, + scene_freq_mult, hyperparams, augment=False, **kwargs): + self.env = env + self.state = state + self.pred_state = pred_state + self.hyperparams = hyperparams + self.max_ht = self.hyperparams['maximum_history_length'] + self.max_ft = kwargs['min_future_timesteps'] + + self.augment = augment + + self.node_type = node_type + self.index = self.index_env(node_freq_mult, scene_freq_mult, **kwargs) + self.len = len(self.index) + self.edge_types = [edge_type for edge_type in env.get_edge_types() if edge_type[0] is node_type] + + def index_env(self, node_freq_mult, scene_freq_mult, **kwargs): + index = list() + for scene in self.env.scenes: + present_node_dict = scene.present_nodes(np.arange(0, scene.timesteps), type=self.node_type, **kwargs) + for t, nodes in present_node_dict.items(): + for node in nodes: + index += [(scene, t, node)] *\ + (scene.frequency_multiplier if scene_freq_mult else 1) *\ + (node.frequency_multiplier if node_freq_mult else 1) + + return index + + def __len__(self): + return self.len + + def __getitem__(self, i): + (scene, t, node) = self.index[i] + + if self.augment: + scene = scene.augment() + node = scene.get_node_by_id(node.id) + + return get_node_timestep_data(self.env, scene, t, node, self.state, self.pred_state, + self.edge_types, self.max_ht, self.max_ft, self.hyperparams) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/homography_warper.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/homography_warper.py new file mode 100644 index 000000000..885ab5f9a --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/homography_warper.py @@ -0,0 +1,471 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from typing import Tuple, Optional + + +pi = torch.tensor(3.14159265358979323846) + + +def deg2rad(tensor: torch.Tensor) -> torch.Tensor: + r"""Function that converts angles from degrees to radians. + Args: + tensor (torch.Tensor): Tensor of arbitrary shape. + Returns: + torch.Tensor: tensor with same shape as input. + """ + if not isinstance(tensor, torch.Tensor): + raise TypeError("Input type is not a torch.Tensor. Got {}".format( + type(tensor))) + + return tensor * pi.to(tensor.device).type(tensor.dtype) / 180. + + +def angle_to_rotation_matrix(angle: torch.Tensor) -> torch.Tensor: + """ + Creates a rotation matrix out of angles in degrees + Args: + angle: (torch.Tensor): tensor of angles in degrees, any shape. + Returns: + torch.Tensor: tensor of *x2x2 rotation matrices. + Shape: + - Input: :math:`(*)` + - Output: :math:`(*, 2, 2)` + Example: + >>> input = torch.rand(1, 3) # Nx3 + >>> output = kornia.angle_to_rotation_matrix(input) # Nx3x2x2 + """ + ang_rad = deg2rad(angle) + cos_a: torch.Tensor = torch.cos(ang_rad) + sin_a: torch.Tensor = torch.sin(ang_rad) + return torch.stack([cos_a, sin_a, -sin_a, cos_a], dim=-1).view(*angle.shape, 2, 2) + + +def get_rotation_matrix2d( + center: torch.Tensor, + angle: torch.Tensor, + scale: torch.Tensor) -> torch.Tensor: + r"""Calculates an affine matrix of 2D rotation. + The function calculates the following matrix: + .. math:: + \begin{bmatrix} + \alpha & \beta & (1 - \alpha) \cdot \text{x} + - \beta \cdot \text{y} \\ + -\beta & \alpha & \beta \cdot \text{x} + + (1 - \alpha) \cdot \text{y} + \end{bmatrix} + where + .. math:: + \alpha = \text{scale} \cdot cos(\text{radian}) \\ + \beta = \text{scale} \cdot sin(\text{radian}) + The transformation maps the rotation center to itself + If this is not the target, adjust the shift. + Args: + center (Tensor): center of the rotation in the source image. + angle (Tensor): rotation radian in degrees. Positive values mean + counter-clockwise rotation (the coordinate origin is assumed to + be the top-left corner). + scale (Tensor): isotropic scale factor. + Returns: + Tensor: the affine matrix of 2D rotation. + Shape: + - Input: :math:`(B, 2)`, :math:`(B)` and :math:`(B)` + - Output: :math:`(B, 2, 3)` + Example: + >>> center = torch.zeros(1, 2) + >>> scale = torch.ones(1) + >>> radian = 45. * torch.ones(1) + >>> M = kornia.get_rotation_matrix2d(center, radian, scale) + tensor([[[ 0.7071, 0.7071, 0.0000], + [-0.7071, 0.7071, 0.0000]]]) + """ + if not torch.is_tensor(center): + raise TypeError("Input center type is not a torch.Tensor. Got {}" + .format(type(center))) + if not torch.is_tensor(angle): + raise TypeError("Input radian type is not a torch.Tensor. Got {}" + .format(type(angle))) + if not torch.is_tensor(scale): + raise TypeError("Input scale type is not a torch.Tensor. Got {}" + .format(type(scale))) + if not (len(center.shape) == 2 and center.shape[1] == 2): + raise ValueError("Input center must be a Bx2 tensor. Got {}" + .format(center.shape)) + if not len(angle.shape) == 1: + raise ValueError("Input radian must be a B tensor. Got {}" + .format(angle.shape)) + if not len(scale.shape) == 1: + raise ValueError("Input scale must be a B tensor. Got {}" + .format(scale.shape)) + if not (center.shape[0] == angle.shape[0] == scale.shape[0]): + raise ValueError("Inputs must have same batch size dimension. Got {}" + .format(center.shape, angle.shape, scale.shape)) + # convert radian and apply scale + scaled_rotation: torch.Tensor = angle_to_rotation_matrix(angle) * scale.view(-1, 1, 1) + alpha: torch.Tensor = scaled_rotation[:, 0, 0] + beta: torch.Tensor = scaled_rotation[:, 0, 1] + + # unpack the center to x, y coordinates + x: torch.Tensor = center[..., 0] + y: torch.Tensor = center[..., 1] + + # create output tensor + batch_size: int = center.shape[0] + M: torch.Tensor = torch.zeros( + batch_size, 2, 3, device=center.device, dtype=center.dtype) + M[..., 0:2, 0:2] = scaled_rotation + M[..., 0, 2] = (torch.tensor(1.) - alpha) * x - beta * y + M[..., 1, 2] = beta * x + (torch.tensor(1.) - alpha) * y + return M + +def convert_points_to_homogeneous(points: torch.Tensor) -> torch.Tensor: + r"""Function that converts points from Euclidean to homogeneous space. + Examples:: + >>> input = torch.rand(2, 4, 3) # BxNx3 + >>> output = kornia.convert_points_to_homogeneous(input) # BxNx4 + """ + if not isinstance(points, torch.Tensor): + raise TypeError("Input type is not a torch.Tensor. Got {}".format( + type(points))) + if len(points.shape) < 2: + raise ValueError("Input must be at least a 2D tensor. Got {}".format( + points.shape)) + + return torch.nn.functional.pad(points, [0, 1], "constant", 1.0) + + +def convert_points_from_homogeneous( + points: torch.Tensor, eps: float = 1e-8) -> torch.Tensor: + r"""Function that converts points from homogeneous to Euclidean space. + Examples:: + >>> input = torch.rand(2, 4, 3) # BxNx3 + >>> output = kornia.convert_points_from_homogeneous(input) # BxNx2 + """ + if not isinstance(points, torch.Tensor): + raise TypeError("Input type is not a torch.Tensor. Got {}".format( + type(points))) + + if len(points.shape) < 2: + raise ValueError("Input must be at least a 2D tensor. Got {}".format( + points.shape)) + + # we check for points at infinity + z_vec: torch.Tensor = points[..., -1:] + + # set the results of division by zeror/near-zero to 1.0 + # follow the convention of opencv: + # https://github.com/opencv/opencv/pull/14411/files + mask: torch.Tensor = torch.abs(z_vec) > eps + scale: torch.Tensor = torch.ones_like(z_vec).masked_scatter_( + mask, torch.tensor(1.0).to(points.device) / z_vec[mask]) + + return scale * points[..., :-1] + +def transform_points(trans_01: torch.Tensor, + points_1: torch.Tensor) -> torch.Tensor: + r"""Function that applies transformations to a set of points. + Args: + trans_01 (torch.Tensor): tensor for transformations of shape + :math:`(B, D+1, D+1)`. + points_1 (torch.Tensor): tensor of points of shape :math:`(B, N, D)`. + Returns: + torch.Tensor: tensor of N-dimensional points. + Shape: + - Output: :math:`(B, N, D)` + Examples: + >>> points_1 = torch.rand(2, 4, 3) # BxNx3 + >>> trans_01 = torch.eye(4).view(1, 4, 4) # Bx4x4 + >>> points_0 = kornia.transform_points(trans_01, points_1) # BxNx3 + """ + if not torch.is_tensor(trans_01) or not torch.is_tensor(points_1): + raise TypeError("Input type is not a torch.Tensor") + if not trans_01.device == points_1.device: + raise TypeError("Tensor must be in the same device") + if not trans_01.shape[0] == points_1.shape[0] and trans_01.shape[0] != 1: + raise ValueError("Input batch size must be the same for both tensors or 1") + if not trans_01.shape[-1] == (points_1.shape[-1] + 1): + raise ValueError("Last input dimensions must differe by one unit") + # to homogeneous + points_1_h = convert_points_to_homogeneous(points_1) # BxNxD+1 + # transform coordinates + points_0_h = torch.matmul( + trans_01.unsqueeze(1), points_1_h.unsqueeze(-1)) + points_0_h = torch.squeeze(points_0_h, dim=-1) + # to euclidean + points_0 = convert_points_from_homogeneous(points_0_h) # BxNxD + return points_0 + + +def multi_linspace(a, b, num, endpoint=True, device='cpu', dtype=torch.float): + """This function is just like np.linspace, but will create linearly + spaced vectors from a start to end vector. + Inputs: + a - Start vector. + b - End vector. + num - Number of samples to generate. Default is 50. Must be above 0. + endpoint - If True, b is the last sample. + Otherwise, it is not included. Default is True. + """ + + return a[..., None] + (b-a)[..., None]/(num-endpoint) * torch.arange(num, device=device, dtype=dtype) + + +def create_batched_meshgrid( + x_min: torch.Tensor, + y_min: torch.Tensor, + x_max: torch.Tensor, + y_max: torch.Tensor, + height: int, + width: int, + device: Optional[torch.device] = torch.device('cpu')) -> torch.Tensor: + """Generates a coordinate grid for an image. + When the flag `normalized_coordinates` is set to True, the grid is + normalized to be in the range [-1,1] to be consistent with the pytorch + function grid_sample. + http://pytorch.org/docs/master/nn.html#torch.nn.functional.grid_sample + Args: + height (int): the image height (rows). + width (int): the image width (cols). + normalized_coordinates (Optional[bool]): whether to normalize + coordinates in the range [-1, 1] in order to be consistent with the + PyTorch function grid_sample. + Return: + torch.Tensor: returns a grid tensor with shape :math:`(1, H, W, 2)`. + """ + # generate coordinates + xs = multi_linspace(x_min, x_max, width, device=device, dtype=torch.float) + ys = multi_linspace(y_min, y_max, height, device=device, dtype=torch.float) + + # generate grid by stacking coordinates + bs = x_min.shape[0] + batched_grid_i_list = list() + for i in range(bs): + batched_grid_i_list.append(torch.stack(torch.meshgrid([xs[i], ys[i]])).transpose(1, 2)) # 2xHxW + batched_grid: torch.Tensor = torch.stack(batched_grid_i_list, dim=0) + return batched_grid.permute(0, 2, 3, 1) # BxHxWx2 + + +def homography_warp(patch_src: torch.Tensor, + centers: torch.Tensor, + dst_homo_src: torch.Tensor, + dsize: Tuple[int, int], + mode: str = 'bilinear', + padding_mode: str = 'zeros') -> torch.Tensor: + r"""Function that warps image patchs or tensors by homographies. + See :class:`~kornia.geometry.warp.HomographyWarper` for details. + Args: + patch_src (torch.Tensor): The image or tensor to warp. Should be from + source of shape :math:`(N, C, H, W)`. + dst_homo_src (torch.Tensor): The homography or stack of homographies + from source to destination of shape + :math:`(N, 3, 3)`. + dsize (Tuple[int, int]): The height and width of the image to warp. + mode (str): interpolation mode to calculate output values + 'bilinear' | 'nearest'. Default: 'bilinear'. + padding_mode (str): padding mode for outside grid values + 'zeros' | 'border' | 'reflection'. Default: 'zeros'. + Return: + torch.Tensor: Patch sampled at locations from source to destination. + Example: + >>> input = torch.rand(1, 3, 32, 32) + >>> homography = torch.eye(3).view(1, 3, 3) + >>> output = kornia.homography_warp(input, homography, (32, 32)) + """ + + out_height, out_width = dsize + image_height, image_width = patch_src.shape[-2:] + x_min = 2. * (centers[..., 0] - out_width/2) / image_width - 1. + y_min = 2. * (centers[..., 1] - out_height/2) / image_height - 1. + x_max = 2. * (centers[..., 0] + out_width/2) / image_width - 1. + y_max = 2. * (centers[..., 1] + out_height/2) / image_height - 1. + warper = HomographyWarper(x_min, y_min, x_max, y_max, out_height, out_width, mode, padding_mode) + return warper(patch_src, dst_homo_src) + + +def normal_transform_pixel(height, width): + + tr_mat = torch.Tensor([[1.0, 0.0, -1.0], + [0.0, 1.0, -1.0], + [0.0, 0.0, 1.0]]) # 1x3x3 + + tr_mat[0, 0] = tr_mat[0, 0] * 2.0 / (width - 1.0) + tr_mat[1, 1] = tr_mat[1, 1] * 2.0 / (height - 1.0) + + tr_mat = tr_mat.unsqueeze(0) + + return tr_mat + + +def src_norm_to_dst_norm(dst_pix_trans_src_pix: torch.Tensor, + dsize_src: Tuple[int, int], dsize_dst: Tuple[int, int]) -> torch.Tensor: + # source and destination sizes + src_h, src_w = dsize_src + dst_h, dst_w = dsize_dst + # the devices and types + device: torch.device = dst_pix_trans_src_pix.device + dtype: torch.dtype = dst_pix_trans_src_pix.dtype + # compute the transformation pixel/norm for src/dst + src_norm_trans_src_pix: torch.Tensor = normal_transform_pixel( + src_h, src_w).to(device, dtype) + src_pix_trans_src_norm = torch.inverse(src_norm_trans_src_pix) + dst_norm_trans_dst_pix: torch.Tensor = normal_transform_pixel( + dst_h, dst_w).to(device, dtype) + # compute chain transformations + dst_norm_trans_src_norm: torch.Tensor = ( + dst_norm_trans_dst_pix @ (dst_pix_trans_src_pix @ src_pix_trans_src_norm) + ) + return dst_norm_trans_src_norm + + +def transform_warp_impl(src: torch.Tensor, centers: torch.Tensor, dst_pix_trans_src_pix: torch.Tensor, + dsize_src: Tuple[int, int], dsize_dst: Tuple[int, int], + grid_mode: str, padding_mode: str) -> torch.Tensor: + """Compute the transform in normalized cooridnates and perform the warping. + """ + dst_norm_trans_src_norm: torch.Tensor = src_norm_to_dst_norm( + dst_pix_trans_src_pix, dsize_src, dsize_src) + + src_norm_trans_dst_norm = torch.inverse(dst_norm_trans_src_norm) + return homography_warp(src, centers, src_norm_trans_dst_norm, dsize_dst, grid_mode, padding_mode) + + +class HomographyWarper(nn.Module): + r"""Warps image patches or tensors by homographies. + .. math:: + X_{dst} = H_{src}^{\{dst\}} * X_{src} + Args: + height (int): The height of the image to warp. + width (int): The width of the image to warp. + mode (str): interpolation mode to calculate output values + 'bilinear' | 'nearest'. Default: 'bilinear'. + padding_mode (str): padding mode for outside grid values + 'zeros' | 'border' | 'reflection'. Default: 'zeros'. + """ + + def __init__( + self, + x_min: torch.Tensor, + y_min: torch.Tensor, + x_max: torch.Tensor, + y_max: torch.Tensor, + height: int, + width: int, + mode: str = 'bilinear', + padding_mode: str = 'zeros') -> None: + super(HomographyWarper, self).__init__() + self.width: int = width + self.height: int = height + self.mode: str = mode + self.padding_mode: str = padding_mode + + # create base grid to compute the flow + self.grid: torch.Tensor = create_batched_meshgrid(x_min, y_min, x_max, y_max, height, width) + + def warp_grid(self, dst_homo_src: torch.Tensor) -> torch.Tensor: + r"""Computes the grid to warp the coordinates grid by an homography. + Args: + dst_homo_src (torch.Tensor): Homography or homographies (stacked) to + transform all points in the grid. Shape of the + homography has to be :math:`(N, 3, 3)`. + Returns: + torch.Tensor: the transformed grid of shape :math:`(N, H, W, 2)`. + """ + batch_size: int = dst_homo_src.shape[0] + device: torch.device = dst_homo_src.device + dtype: torch.dtype = dst_homo_src.dtype + # expand grid to match the input batch size + grid: torch.Tensor = self.grid + if len(dst_homo_src.shape) == 3: # local homography case + dst_homo_src = dst_homo_src.view(batch_size, 1, 3, 3) # NxHxWx3x3 + # perform the actual grid transformation, + # the grid is copied to input device and casted to the same type + flow: torch.Tensor = transform_points( + dst_homo_src, grid.to(device).to(dtype)) # NxHxWx2 + return flow.view(batch_size, self.height, self.width, 2) # NxHxWx2 + + def forward( # type: ignore + self, + patch_src: torch.Tensor, + dst_homo_src: torch.Tensor) -> torch.Tensor: + r"""Warps an image or tensor from source into reference frame. + Args: + patch_src (torch.Tensor): The image or tensor to warp. + Should be from source. + dst_homo_src (torch.Tensor): The homography or stack of homographies + from source to destination. The homography assumes normalized + coordinates [-1, 1]. + Return: + torch.Tensor: Patch sampled at locations from source to destination. + Shape: + - Input: :math:`(N, C, H, W)` and :math:`(N, 3, 3)` + - Output: :math:`(N, C, H, W)` + Example: + >>> input = torch.rand(1, 3, 32, 32) + >>> homography = torch.eye(3).view(1, 3, 3) + >>> warper = kornia.HomographyWarper(32, 32) + >>> output = warper(input, homography) # NxCxHxW + """ + if not dst_homo_src.device == patch_src.device: + raise TypeError("Patch and homography must be on the same device. \ + Got patch.device: {} dst_H_src.device: {}." + .format(patch_src.device, dst_homo_src.device)) + + return F.grid_sample(patch_src, self.warp_grid(dst_homo_src), # type: ignore + mode=self.mode, padding_mode=self.padding_mode, align_corners=True) + + +def warp_affine_crop(src: torch.Tensor, centers: torch.Tensor, M: torch.Tensor, + dsize: Tuple[int, int], flags: str = 'bilinear', + padding_mode: str = 'zeros') -> torch.Tensor: + r"""Applies an affine transformation to a tensor. + + The function warp_affine transforms the source tensor using + the specified matrix: + + .. math:: + \text{dst}(x, y) = \text{src} \left( M_{11} x + M_{12} y + M_{13} , + M_{21} x + M_{22} y + M_{23} \right ) + + Args: + src (torch.Tensor): input tensor of shape :math:`(B, C, H, W)`. + M (torch.Tensor): affine transformation of shape :math:`(B, 2, 3)`. + dsize (Tuple[int, int]): size of the output image (height, width). + mode (str): interpolation mode to calculate output values + 'bilinear' | 'nearest'. Default: 'bilinear'. + padding_mode (str): padding mode for outside grid values + 'zeros' | 'border' | 'reflection'. Default: 'zeros'. + + Returns: + torch.Tensor: the warped tensor. + + Shape: + - Output: :math:`(B, C, H, W)` + + .. note:: + See a working example `here `__. + """ + if not torch.is_tensor(src): + raise TypeError("Input src type is not a torch.Tensor. Got {}" + .format(type(src))) + + if not torch.is_tensor(M): + raise TypeError("Input M type is not a torch.Tensor. Got {}" + .format(type(M))) + + if not len(src.shape) == 4: + raise ValueError("Input src must be a BxCxHxW tensor. Got {}" + .format(src.shape)) + + if not (len(M.shape) == 3 or M.shape[-2:] == (2, 3)): + raise ValueError("Input M must be a Bx2x3 tensor. Got {}" + .format(src.shape)) + + # we generate a 3x3 transformation matrix from 2x3 affine + M_3x3: torch.Tensor = F.pad(M, [0, 0, 0, 1, 0, 0], + mode="constant", value=0) + M_3x3[:, 2, 2] += 1.0 + + # launches the warper + h, w = src.shape[-2:] + return transform_warp_impl(src, centers, M_3x3, (h, w), dsize, flags, padding_mode) \ No newline at end of file diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/preprocessing.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/preprocessing.py new file mode 100644 index 000000000..844d3068c --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/preprocessing.py @@ -0,0 +1,234 @@ +import torch +import numpy as np +import collections.abc +from torch.utils.data._utils.collate import default_collate +import dill +container_abcs = collections.abc + + +def restore(data): + """ + In case we dilled some structures to share between multiple process this function will restore them. + If the data input are not bytes we assume it was not dilled in the first place + + :param data: Possibly dilled data structure + :return: Un-dilled data structure + """ + if type(data) is bytes: + return dill.loads(data) + return data + + +def collate(batch): + if len(batch) == 0: + return batch + elem = batch[0] + if elem is None: + return None + elif isinstance(elem, container_abcs.Sequence): + if len(elem) == 4: # We assume those are the maps, map points, headings and patch_size + scene_map, scene_pts, heading_angle, patch_size = zip(*batch) + if heading_angle[0] is None: + heading_angle = None + else: + heading_angle = torch.Tensor(heading_angle) + map = scene_map[0].get_cropped_maps_from_scene_map_batch(scene_map, + scene_pts=torch.Tensor(scene_pts), + patch_size=patch_size[0], + rotation=heading_angle) + return map + transposed = zip(*batch) + return [collate(samples) for samples in transposed] + elif isinstance(elem, container_abcs.Mapping): + # We have to dill the neighbors structures. Otherwise each tensor is put into + # shared memory separately -> slow, file pointer overhead + # we only do this in multiprocessing + neighbor_dict = {key: [d[key] for d in batch] for key in elem} + return dill.dumps(neighbor_dict) if torch.utils.data.get_worker_info() else neighbor_dict + return default_collate(batch) + + +def get_relative_robot_traj(env, state, node_traj, robot_traj, node_type, robot_type): + # TODO: We will have to make this more generic if robot_type != node_type + # Make Robot State relative to node + _, std = env.get_standardize_params(state[robot_type], node_type=robot_type) + std[0:2] = env.attention_radius[(node_type, robot_type)] + robot_traj_st = env.standardize(robot_traj, + state[robot_type], + node_type=robot_type, + mean=node_traj, + std=std) + robot_traj_st_t = torch.tensor(robot_traj_st, dtype=torch.float) + + return robot_traj_st_t + + +def get_node_timestep_data(env, scene, t, node, state, pred_state, + edge_types, max_ht, max_ft, hyperparams, + scene_graph=None): + """ + Pre-processes the data for a single batch element: node state over time for a specific time in a specific scene + as well as the neighbour data for it. + + :param env: Environment + :param scene: Scene + :param t: Timestep in scene + :param node: Node + :param state: Specification of the node state + :param pred_state: Specification of the prediction state + :param edge_types: List of all Edge Types for which neighbours are pre-processed + :param max_ht: Maximum history timesteps + :param max_ft: Maximum future timesteps (prediction horizon) + :param hyperparams: Model hyperparameters + :param scene_graph: If scene graph was already computed for this scene and time you can pass it here + :return: Batch Element + """ + + # Node + timestep_range_x = np.array([t - max_ht, t]) + timestep_range_y = np.array([t + 1, t + max_ft]) + + x = node.get(timestep_range_x, state[node.type]) + y = node.get(timestep_range_y, pred_state[node.type]) + first_history_index = (max_ht - node.history_points_at(t)).clip(0) + + _, std = env.get_standardize_params(state[node.type], node.type) + std[0:2] = env.attention_radius[(node.type, node.type)] + rel_state = np.zeros_like(x[0]) + rel_state[0:2] = np.array(x)[-1, 0:2] + x_st = env.standardize(x, state[node.type], node.type, mean=rel_state, std=std) + if list(pred_state[node.type].keys())[0] == 'position': # If we predict position we do it relative to current pos + y_st = env.standardize(y, pred_state[node.type], node.type, mean=rel_state[0:2]) + else: + y_st = env.standardize(y, pred_state[node.type], node.type) + + x_t = torch.tensor(x, dtype=torch.float) + y_t = torch.tensor(y, dtype=torch.float) + x_st_t = torch.tensor(x_st, dtype=torch.float) + y_st_t = torch.tensor(y_st, dtype=torch.float) + + # Neighbors + neighbors_data_st = None + neighbors_edge_value = None + if hyperparams['edge_encoding']: + # Scene Graph + scene_graph = scene.get_scene_graph(t, + env.attention_radius, + hyperparams['edge_addition_filter'], + hyperparams['edge_removal_filter']) if scene_graph is None else scene_graph + + neighbors_data_st = dict() + neighbors_edge_value = dict() + for edge_type in edge_types: + neighbors_data_st[edge_type] = list() + # We get all nodes which are connected to the current node for the current timestep + connected_nodes = scene_graph.get_neighbors(node, edge_type[1]) + + if hyperparams['dynamic_edges'] == 'yes': + # We get the edge masks for the current node at the current timestep + edge_masks = torch.tensor(scene_graph.get_edge_scaling(node), dtype=torch.float) + neighbors_edge_value[edge_type] = edge_masks + + for connected_node in connected_nodes: + neighbor_state_np = connected_node.get(np.array([t - max_ht, t]), + state[connected_node.type], + padding=0.0) + + # Make State relative to node where neighbor and node have same state + _, std = env.get_standardize_params(state[connected_node.type], node_type=connected_node.type) + std[0:2] = env.attention_radius[edge_type] + equal_dims = np.min((neighbor_state_np.shape[-1], x.shape[-1])) + rel_state = np.zeros_like(neighbor_state_np) + rel_state[:, ..., :equal_dims] = x[-1, ..., :equal_dims] + neighbor_state_np_st = env.standardize(neighbor_state_np, + state[connected_node.type], + node_type=connected_node.type, + mean=rel_state, + std=std) + + neighbor_state = torch.tensor(neighbor_state_np_st, dtype=torch.float) + neighbors_data_st[edge_type].append(neighbor_state) + + # Robot + robot_traj_st_t = None + if hyperparams['incl_robot_node']: + timestep_range_r = np.array([t, t + max_ft]) + if scene.non_aug_scene is not None: + robot = scene.get_node_by_id(scene.non_aug_scene.robot.id) + else: + robot = scene.robot + robot_type = robot.type + robot_traj = robot.get(timestep_range_r, state[robot_type], padding=0.0) + node_state = np.zeros_like(robot_traj[0]) + node_state[:x.shape[1]] = x[-1] + robot_traj_st_t = get_relative_robot_traj(env, state, node_state, robot_traj, node.type, robot_type) + + # Map + map_tuple = None + if hyperparams['use_map_encoding']: + if node.type in hyperparams['map_encoder']: + if node.non_aug_node is not None: + x = node.non_aug_node.get(np.array([t]), state[node.type]) + me_hyp = hyperparams['map_encoder'][node.type] + if 'heading_state_index' in me_hyp: + heading_state_index = me_hyp['heading_state_index'] + # We have to rotate the map in the opposit direction of the agent to match them + if type(heading_state_index) is list: # infer from velocity or heading vector + heading_angle = -np.arctan2(x[-1, heading_state_index[1]], + x[-1, heading_state_index[0]]) * 180 / np.pi + else: + heading_angle = -x[-1, heading_state_index] * 180 / np.pi + else: + heading_angle = None + + scene_map = scene.map[node.type] + map_point = x[-1, :2] + + + patch_size = hyperparams['map_encoder'][node.type]['patch_size'] + map_tuple = (scene_map, map_point, heading_angle, patch_size) + + return (first_history_index, x_t, y_t, x_st_t, y_st_t, neighbors_data_st, + neighbors_edge_value, robot_traj_st_t, map_tuple) + + +def get_timesteps_data(env, scene, t, node_type, state, pred_state, + edge_types, min_ht, max_ht, min_ft, max_ft, hyperparams): + """ + Puts together the inputs for ALL nodes in a given scene and timestep in it. + + :param env: Environment + :param scene: Scene + :param t: Timestep in scene + :param node_type: Node Type of nodes for which the data shall be pre-processed + :param state: Specification of the node state + :param pred_state: Specification of the prediction state + :param edge_types: List of all Edge Types for which neighbors are pre-processed + :param max_ht: Maximum history timesteps + :param max_ft: Maximum future timesteps (prediction horizon) + :param hyperparams: Model hyperparameters + :return: + """ + nodes_per_ts = scene.present_nodes(t, + type=node_type, + min_history_timesteps=min_ht, + min_future_timesteps=max_ft, + return_robot=not hyperparams['incl_robot_node']) + batch = list() + nodes = list() + out_timesteps = list() + for timestep in nodes_per_ts.keys(): + scene_graph = scene.get_scene_graph(timestep, + env.attention_radius, + hyperparams['edge_addition_filter'], + hyperparams['edge_removal_filter']) + present_nodes = nodes_per_ts[timestep] + for node in present_nodes: + nodes.append(node) + out_timesteps.append(timestep) + batch.append(get_node_timestep_data(env, scene, timestep, node, state, pred_state, + edge_types, max_ht, max_ft, hyperparams, + scene_graph=scene_graph)) + if len(out_timesteps) == 0: + return None + return collate(batch), nodes, out_timesteps diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/__init__.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/__init__.py new file mode 100644 index 000000000..5853fbb5f --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/__init__.py @@ -0,0 +1,4 @@ +from model.dynamics.dynamic import Dynamic +from model.dynamics.single_integrator import SingleIntegrator +from model.dynamics.unicycle import Unicycle +from model.dynamics.linear import Linear diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/dynamic.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/dynamic.py new file mode 100644 index 000000000..6b03e13fd --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/dynamic.py @@ -0,0 +1,30 @@ + + +class Dynamic(object): + def __init__(self, dt, dyn_limits, device, model_registrar, xz_size, node_type): + self.dt = dt + self.device = device + self.dyn_limits = dyn_limits + self.initial_conditions = None + self.model_registrar = model_registrar + self.node_type = node_type + self.init_constants() + self.create_graph(xz_size) + + def set_initial_condition(self, init_con): + self.initial_conditions = init_con + + def init_constants(self): + pass + + def create_graph(self, xz_size): + pass + + def integrate_samples(self, s, x): + raise NotImplementedError + + def integrate_distribution(self, dist, x): + raise NotImplementedError + + def create_graph(self, xz_size): + pass diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/linear.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/linear.py new file mode 100644 index 000000000..bce54c1d6 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/linear.py @@ -0,0 +1,12 @@ +from ..dynamics import Dynamic + + +class Linear(Dynamic): + def init_constants(self): + pass + + def integrate_samples(self, v, x): + return v + + def integrate_distribution(self, v_dist, x): + return v_dist \ No newline at end of file diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/single_integrator.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/single_integrator.py new file mode 100644 index 000000000..984f5fa68 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/single_integrator.py @@ -0,0 +1,64 @@ +import torch +from model.dynamics import Dynamic +from utils import block_diag +from model.components import GMM2D + + +class SingleIntegrator(Dynamic): + def init_constants(self): + self.F = torch.eye(4, device=self.device, dtype=torch.float32) + self.F[0:2, 2:] = torch.eye(2, device=self.device, dtype=torch.float32) * self.dt + self.F_t = self.F.transpose(-2, -1) + + def integrate_samples(self, v, x=None): + """ + Integrates deterministic samples of velocity. + + :param v: Velocity samples + :param x: Not used for SI. + :return: Position samples + """ + p_0 = self.initial_conditions['pos'].unsqueeze(1) + return torch.cumsum(v, dim=2) * self.dt + p_0 + + def integrate_distribution(self, v_dist, x=None): + r""" + Integrates the GMM velocity distribution to a distribution over position. + The Kalman Equations are used. + + .. math:: \mu_{t+1} =\textbf{F} \mu_{t} + + .. math:: \mathbf{\Sigma}_{t+1}={\textbf {F}} \mathbf{\Sigma}_{t} {\textbf {F}}^{T} + + .. math:: + \textbf{F} = \left[ + \begin{array}{cccc} + \sigma_x^2 & \rho_p \sigma_x \sigma_y & 0 & 0 \\ + \rho_p \sigma_x \sigma_y & \sigma_y^2 & 0 & 0 \\ + 0 & 0 & \sigma_{v_x}^2 & \rho_v \sigma_{v_x} \sigma_{v_y} \\ + 0 & 0 & \rho_v \sigma_{v_x} \sigma_{v_y} & \sigma_{v_y}^2 \\ + \end{array} + \right]_{t} + + :param v_dist: Joint GMM Distribution over velocity in x and y direction. + :param x: Not used for SI. + :return: Joint GMM Distribution over position in x and y direction. + """ + p_0 = self.initial_conditions['pos'].unsqueeze(1) + ph = v_dist.mus.shape[-3] + sample_batch_dim = list(v_dist.mus.shape[0:2]) + pos_dist_sigma_matrix_list = [] + + pos_mus = p_0[:, None] + torch.cumsum(v_dist.mus, dim=2) * self.dt + + vel_dist_sigma_matrix = v_dist.get_covariance_matrix() + pos_dist_sigma_matrix_t = torch.zeros(sample_batch_dim + [v_dist.components, 2, 2], device=self.device) + + for t in range(ph): + vel_sigma_matrix_t = vel_dist_sigma_matrix[:, :, t] + full_sigma_matrix_t = block_diag([pos_dist_sigma_matrix_t, vel_sigma_matrix_t]) + pos_dist_sigma_matrix_t = self.F[..., :2, :].matmul(full_sigma_matrix_t.matmul(self.F_t)[..., :2]) + pos_dist_sigma_matrix_list.append(pos_dist_sigma_matrix_t) + + pos_dist_sigma_matrix = torch.stack(pos_dist_sigma_matrix_list, dim=2) + return GMM2D.from_log_pis_mus_cov_mats(v_dist.log_pis, pos_mus, pos_dist_sigma_matrix) \ No newline at end of file diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/unicycle.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/unicycle.py new file mode 100644 index 000000000..1a83c4274 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/unicycle.py @@ -0,0 +1,234 @@ +import torch +import torch.nn as nn +from model.dynamics import Dynamic +from utils import block_diag +from model.components import GMM2D + + +class Unicycle(Dynamic): + def init_constants(self): + self.F_s = torch.eye(4, device=self.device, dtype=torch.float32) + self.F_s[0:2, 2:] = torch.eye(2, device=self.device, dtype=torch.float32) * self.dt + self.F_s_t = self.F_s.transpose(-2, -1) + + def create_graph(self, xz_size): + model_if_absent = nn.Linear(xz_size + 1, 1) + self.p0_model = self.model_registrar.get_model(f"{self.node_type}/unicycle_initializer", model_if_absent) + + def dynamic(self, x, u): + r""" + TODO: Boris: Add docstring + :param x: + :param u: + :return: + """ + x_p = x[0] + y_p = x[1] + phi = x[2] + v = x[3] + dphi = u[0] + a = u[1] + + mask = torch.abs(dphi) <= 1e-2 + dphi = ~mask * dphi + (mask) * 1 + + phi_p_omega_dt = phi + dphi * self.dt + dsin_domega = (torch.sin(phi_p_omega_dt) - torch.sin(phi)) / dphi + dcos_domega = (torch.cos(phi_p_omega_dt) - torch.cos(phi)) / dphi + + d1 = torch.stack([(x_p + + (a / dphi) * dcos_domega + + v * dsin_domega + + (a / dphi) * torch.sin(phi_p_omega_dt) * self.dt), + (y_p + - v * dcos_domega + + (a / dphi) * dsin_domega + - (a / dphi) * torch.cos(phi_p_omega_dt) * self.dt), + phi + dphi * self.dt, + v + a * self.dt], dim=0) + d2 = torch.stack([x_p + v * torch.cos(phi) * self.dt + (a / 2) * torch.cos(phi) * self.dt ** 2, + y_p + v * torch.sin(phi) * self.dt + (a / 2) * torch.sin(phi) * self.dt ** 2, + phi * torch.ones_like(a), + v + a * self.dt], dim=0) + return torch.where(~mask, d1, d2) + + def integrate_samples(self, control_samples, x=None): + r""" + TODO: Boris: Add docstring + :param x: + :param u: + :return: + """ + ph = control_samples.shape[-2] + p_0 = self.initial_conditions['pos'].unsqueeze(1) + v_0 = self.initial_conditions['vel'].unsqueeze(1) + + # In case the input is batched because of the robot in online use we repeat this to match the batch size of x. + if p_0.size()[0] != x.size()[0]: + p_0 = p_0.repeat(x.size()[0], 1, 1) + v_0 = v_0.repeat(x.size()[0], 1, 1) + + phi_0 = torch.atan2(v_0[..., 1], v_0[..., 0]) + + phi_0 = phi_0 + torch.tanh(self.p0_model(torch.cat((x, phi_0), dim=-1))) + + u = torch.stack([control_samples[..., 0], control_samples[..., 1]], dim=0) + x = torch.stack([p_0[..., 0], p_0[..., 1], phi_0, torch.norm(v_0, dim=-1)], dim = 0).squeeze(dim=-1) + + mus_list = [] + for t in range(ph): + x = self.dynamic(x, u[..., t]) + mus_list.append(torch.stack((x[0], x[1]), dim=-1)) + + pos_mus = torch.stack(mus_list, dim=2) + return pos_mus + + def compute_control_jacobian(self, sample_batch_dim, components, x, u): + r""" + TODO: Boris: Add docstring + :param x: + :param u: + :return: + """ + F = torch.zeros(sample_batch_dim + [components, 4, 2], + device=self.device, + dtype=torch.float32) + + phi = x[2] + v = x[3] + dphi = u[0] + a = u[1] + + mask = torch.abs(dphi) <= 1e-2 + dphi = ~mask * dphi + (mask) * 1 + + phi_p_omega_dt = phi + dphi * self.dt + dsin_domega = (torch.sin(phi_p_omega_dt) - torch.sin(phi)) / dphi + dcos_domega = (torch.cos(phi_p_omega_dt) - torch.cos(phi)) / dphi + + F[..., 0, 0] = ((v / dphi) * torch.cos(phi_p_omega_dt) * self.dt + - (v / dphi) * dsin_domega + - (2 * a / dphi ** 2) * torch.sin(phi_p_omega_dt) * self.dt + - (2 * a / dphi ** 2) * dcos_domega + + (a / dphi) * torch.cos(phi_p_omega_dt) * self.dt ** 2) + F[..., 0, 1] = (1 / dphi) * dcos_domega + (1 / dphi) * torch.sin(phi_p_omega_dt) * self.dt + + F[..., 1, 0] = ((v / dphi) * dcos_domega + - (2 * a / dphi ** 2) * dsin_domega + + (2 * a / dphi ** 2) * torch.cos(phi_p_omega_dt) * self.dt + + (v / dphi) * torch.sin(phi_p_omega_dt) * self.dt + + (a / dphi) * torch.sin(phi_p_omega_dt) * self.dt ** 2) + F[..., 1, 1] = (1 / dphi) * dsin_domega - (1 / dphi) * torch.cos(phi_p_omega_dt) * self.dt + + F[..., 2, 0] = self.dt + + F[..., 3, 1] = self.dt + + F_sm = torch.zeros(sample_batch_dim + [components, 4, 2], + device=self.device, + dtype=torch.float32) + + F_sm[..., 0, 1] = (torch.cos(phi) * self.dt ** 2) / 2 + + F_sm[..., 1, 1] = (torch.sin(phi) * self.dt ** 2) / 2 + + F_sm[..., 3, 1] = self.dt + + return torch.where(~mask.unsqueeze(-1).unsqueeze(-1), F, F_sm) + + def compute_jacobian(self, sample_batch_dim, components, x, u): + r""" + TODO: Boris: Add docstring + :param x: + :param u: + :return: + """ + one = torch.tensor(1) + F = torch.zeros(sample_batch_dim + [components, 4, 4], + device=self.device, + dtype=torch.float32) + + phi = x[2] + v = x[3] + dphi = u[0] + a = u[1] + + mask = torch.abs(dphi) <= 1e-2 + dphi = ~mask * dphi + (mask) * 1 + + phi_p_omega_dt = phi + dphi * self.dt + dsin_domega = (torch.sin(phi_p_omega_dt) - torch.sin(phi)) / dphi + dcos_domega = (torch.cos(phi_p_omega_dt) - torch.cos(phi)) / dphi + + F[..., 0, 0] = one + F[..., 1, 1] = one + F[..., 2, 2] = one + F[..., 3, 3] = one + + F[..., 0, 2] = v * dcos_domega - (a / dphi) * dsin_domega + (a / dphi) * torch.cos(phi_p_omega_dt) * self.dt + F[..., 0, 3] = dsin_domega + + F[..., 1, 2] = v * dsin_domega + (a / dphi) * dcos_domega + (a / dphi) * torch.sin(phi_p_omega_dt) * self.dt + F[..., 1, 3] = -dcos_domega + + F_sm = torch.zeros(sample_batch_dim + [components, 4, 4], + device=self.device, + dtype=torch.float32) + + F_sm[..., 0, 0] = one + F_sm[..., 1, 1] = one + F_sm[..., 2, 2] = one + F_sm[..., 3, 3] = one + + F_sm[..., 0, 2] = -v * torch.sin(phi) * self.dt - (a * torch.sin(phi) * self.dt ** 2) / 2 + F_sm[..., 0, 3] = torch.cos(phi) * self.dt + + F_sm[..., 1, 2] = v * torch.cos(phi) * self.dt + (a * torch.cos(phi) * self.dt ** 2) / 2 + F_sm[..., 1, 3] = torch.sin(phi) * self.dt + + return torch.where(~mask.unsqueeze(-1).unsqueeze(-1), F, F_sm) + + def integrate_distribution(self, control_dist_dphi_a, x): + r""" + TODO: Boris: Add docstring + :param x: + :param u: + :return: + """ + sample_batch_dim = list(control_dist_dphi_a.mus.shape[0:2]) + ph = control_dist_dphi_a.mus.shape[-3] + p_0 = self.initial_conditions['pos'].unsqueeze(1) + v_0 = self.initial_conditions['vel'].unsqueeze(1) + + # In case the input is batched because of the robot in online use we repeat this to match the batch size of x. + if p_0.size()[0] != x.size()[0]: + p_0 = p_0.repeat(x.size()[0], 1, 1) + v_0 = v_0.repeat(x.size()[0], 1, 1) + + phi_0 = torch.atan2(v_0[..., 1], v_0[..., 0]) + + phi_0 = phi_0 + torch.tanh(self.p0_model(torch.cat((x, phi_0), dim=-1))) + + dist_sigma_matrix = control_dist_dphi_a.get_covariance_matrix() + pos_dist_sigma_matrix_t = torch.zeros(sample_batch_dim + [control_dist_dphi_a.components, 4, 4], + device=self.device) + + u = torch.stack([control_dist_dphi_a.mus[..., 0], control_dist_dphi_a.mus[..., 1]], dim=0) + x = torch.stack([p_0[..., 0], p_0[..., 1], phi_0, torch.norm(v_0, dim=-1)], dim=0) + + pos_dist_sigma_matrix_list = [] + mus_list = [] + for t in range(ph): + F_t = self.compute_jacobian(sample_batch_dim, control_dist_dphi_a.components, x, u[:, :, :, t]) + G_t = self.compute_control_jacobian(sample_batch_dim, control_dist_dphi_a.components, x, u[:, :, :, t]) + dist_sigma_matrix_t = dist_sigma_matrix[:, :, t] + pos_dist_sigma_matrix_t = (F_t.matmul(pos_dist_sigma_matrix_t.matmul(F_t.transpose(-2, -1))) + + G_t.matmul(dist_sigma_matrix_t.matmul(G_t.transpose(-2, -1)))) + pos_dist_sigma_matrix_list.append(pos_dist_sigma_matrix_t[..., :2, :2]) + + x = self.dynamic(x, u[:, :, :, t]) + mus_list.append(torch.stack((x[0], x[1]), dim=-1)) + + pos_dist_sigma_matrix = torch.stack(pos_dist_sigma_matrix_list, dim=2) + pos_mus = torch.stack(mus_list, dim=2) + return GMM2D.from_log_pis_mus_cov_mats(control_dist_dphi_a.log_pis, pos_mus, pos_dist_sigma_matrix) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/mgcvae.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/mgcvae.py new file mode 100644 index 000000000..b731e89e8 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/mgcvae.py @@ -0,0 +1,1161 @@ +import warnings +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from model.components import * +from model.model_utils import * +import model.dynamics as dynamic_module +from environment.scene_graph import DirectedEdge + + +class MultimodalGenerativeCVAE(torch.nn.Module): + def __init__(self, + env, + node_type, + model_registrar, + hyperparams, + device, + edge_types, + log_writer=None): + super().__init__() + self.hyperparams = hyperparams + self.env = env + self.node_type = node_type + self.model_registrar = model_registrar + self.log_writer = log_writer + self.device = device + self.edge_types = [edge_type for edge_type in edge_types if edge_type[0] is node_type] + self.curr_iter = 0 + + self.node_modules = dict() + self.node_modules = torch.nn.ModuleDict() + + self.min_hl = self.hyperparams['minimum_history_length'] + self.max_hl = self.hyperparams['maximum_history_length'] + self.ph = self.hyperparams['prediction_horizon'] + self.state = self.hyperparams['state'] + self.pred_state = self.hyperparams['pred_state'][node_type] + self.state_length = int(np.sum([len(entity_dims) for entity_dims in self.state[node_type].values()])) + if self.hyperparams['incl_robot_node']: + self.robot_state_length = int( + np.sum([len(entity_dims) for entity_dims in self.state[env.robot_type].values()]) + ) + self.pred_state_length = int(np.sum([len(entity_dims) for entity_dims in self.pred_state.values()])) + + edge_types_str = [DirectedEdge.get_str_from_types(*edge_type) for edge_type in self.edge_types] + self.create_graphical_model(edge_types_str) + + dynamic_class = getattr(dynamic_module, hyperparams['dynamic'][self.node_type]['name']) + dyn_limits = hyperparams['dynamic'][self.node_type]['limits'] + self.dynamic = dynamic_class(self.env.scenes[0].dt, dyn_limits, device, + self.model_registrar, self.x_size, self.node_type) + + def eval(self): + super().eval() + for key in self.node_modules.keys(): + self.node_modules[key].eval() + + def set_curr_iter(self, curr_iter): + self.curr_iter = curr_iter + + def add_submodule(self, name, model_if_absent): + self.node_modules[name] = self.model_registrar.get_model(name, model_if_absent) + + def clear_submodules(self): + self.node_modules.clear() + + def create_node_models(self): + ############################ + # Node History Encoder # + ############################ + self.add_submodule(self.node_type + '/node_history_encoder', + model_if_absent=nn.LSTM(input_size=self.state_length, + hidden_size=self.hyperparams['enc_rnn_dim_history'], + batch_first=True)) + + ########################### + # Node Future Encoder # + ########################### + # We'll create this here, but then later check if in training mode. + # Based on that, we'll factor this into the computation graph (or not). + self.add_submodule(self.node_type + '/node_future_encoder', + model_if_absent=nn.LSTM(input_size=self.pred_state_length, + hidden_size=self.hyperparams['enc_rnn_dim_future'], + bidirectional=True, + batch_first=True)) + # These are related to how you initialize states for the node future encoder. + self.add_submodule(self.node_type + '/node_future_encoder/initial_h', + model_if_absent=nn.Linear(self.state_length, + self.hyperparams['enc_rnn_dim_future'])) + self.add_submodule(self.node_type + '/node_future_encoder/initial_c', + model_if_absent=nn.Linear(self.state_length, + self.hyperparams['enc_rnn_dim_future'])) + + ############################ + # Robot Future Encoder # + ############################ + # We'll create this here, but then later check if we're next to the robot. + # Based on that, we'll factor this into the computation graph (or not). + if self.hyperparams['incl_robot_node']: + self.add_submodule('robot_future_encoder', + model_if_absent=nn.LSTM(input_size=self.robot_state_length, + hidden_size=self.hyperparams['enc_rnn_dim_future'], + bidirectional=True, + batch_first=True)) + # These are related to how you initialize states for the robot future encoder. + self.add_submodule('robot_future_encoder/initial_h', + model_if_absent=nn.Linear(self.robot_state_length, + self.hyperparams['enc_rnn_dim_future'])) + self.add_submodule('robot_future_encoder/initial_c', + model_if_absent=nn.Linear(self.robot_state_length, + self.hyperparams['enc_rnn_dim_future'])) + + if self.hyperparams['edge_encoding']: + ############################## + # Edge Influence Encoder # + ############################## + # NOTE: The edge influence encoding happens during calls + # to forward or incremental_forward, so we don't create + # a model for it here for the max and sum variants. + if self.hyperparams['edge_influence_combine_method'] == 'bi-rnn': + self.add_submodule(self.node_type + '/edge_influence_encoder', + model_if_absent=nn.LSTM(input_size=self.hyperparams['enc_rnn_dim_edge'], + hidden_size=self.hyperparams['enc_rnn_dim_edge_influence'], + bidirectional=True, + batch_first=True)) + + # Four times because we're trying to mimic a bi-directional + # LSTM's output (which, here, is c and h from both ends). + self.eie_output_dims = 4 * self.hyperparams['enc_rnn_dim_edge_influence'] + + elif self.hyperparams['edge_influence_combine_method'] == 'attention': + # Chose additive attention because of https://arxiv.org/pdf/1703.03906.pdf + # We calculate an attention context vector using the encoded edges as the "encoder" + # (that we attend _over_) + # and the node history encoder representation as the "decoder state" (that we attend _on_). + self.add_submodule(self.node_type + '/edge_influence_encoder', + model_if_absent=AdditiveAttention( + encoder_hidden_state_dim=self.hyperparams['enc_rnn_dim_edge_influence'], + decoder_hidden_state_dim=self.hyperparams['enc_rnn_dim_history'])) + + self.eie_output_dims = self.hyperparams['enc_rnn_dim_edge_influence'] + + ################### + # Map Encoder # + ################### + if self.hyperparams['use_map_encoding']: + if self.node_type in self.hyperparams['map_encoder']: + me_params = self.hyperparams['map_encoder'][self.node_type] + self.add_submodule(self.node_type + '/map_encoder', + model_if_absent=CNNMapEncoder(me_params['map_channels'], + me_params['hidden_channels'], + me_params['output_size'], + me_params['masks'], + me_params['strides'], + me_params['patch_size'])) + + ################################ + # Discrete Latent Variable # + ################################ + self.latent = DiscreteLatent(self.hyperparams, self.device) + + ###################################################################### + # Various Fully-Connected Layers from Encoder to Latent Variable # + ###################################################################### + # Node History Encoder + x_size = self.hyperparams['enc_rnn_dim_history'] + if self.hyperparams['edge_encoding']: + # Edge Encoder + x_size += self.eie_output_dims + if self.hyperparams['incl_robot_node']: + # Future Conditional Encoder + x_size += 4 * self.hyperparams['enc_rnn_dim_future'] + if self.hyperparams['use_map_encoding'] and self.node_type in self.hyperparams['map_encoder']: + # Map Encoder + x_size += self.hyperparams['map_encoder'][self.node_type]['output_size'] + + z_size = self.hyperparams['N'] * self.hyperparams['K'] + + if self.hyperparams['p_z_x_MLP_dims'] is not None: + self.add_submodule(self.node_type + '/p_z_x', + model_if_absent=nn.Linear(x_size, self.hyperparams['p_z_x_MLP_dims'])) + hx_size = self.hyperparams['p_z_x_MLP_dims'] + else: + hx_size = x_size + + self.add_submodule(self.node_type + '/hx_to_z', + model_if_absent=nn.Linear(hx_size, self.latent.z_dim)) + + if self.hyperparams['q_z_xy_MLP_dims'] is not None: + self.add_submodule(self.node_type + '/q_z_xy', + # Node Future Encoder + model_if_absent=nn.Linear(x_size + 4 * self.hyperparams['enc_rnn_dim_future'], + self.hyperparams['q_z_xy_MLP_dims'])) + hxy_size = self.hyperparams['q_z_xy_MLP_dims'] + else: + # Node Future Encoder + hxy_size = x_size + 4 * self.hyperparams['enc_rnn_dim_future'] + + self.add_submodule(self.node_type + '/hxy_to_z', + model_if_absent=nn.Linear(hxy_size, self.latent.z_dim)) + + #################### + # Decoder LSTM # + #################### + if self.hyperparams['incl_robot_node']: + decoder_input_dims = self.pred_state_length + self.robot_state_length + z_size + x_size + else: + decoder_input_dims = self.pred_state_length + z_size + x_size + + self.add_submodule(self.node_type + '/decoder/state_action', + model_if_absent=nn.Sequential( + nn.Linear(self.state_length, self.pred_state_length))) + + self.add_submodule(self.node_type + '/decoder/rnn_cell', + model_if_absent=nn.GRUCell(decoder_input_dims, self.hyperparams['dec_rnn_dim'])) + self.add_submodule(self.node_type + '/decoder/initial_h', + model_if_absent=nn.Linear(z_size + x_size, self.hyperparams['dec_rnn_dim'])) + + ################### + # Decoder GMM # + ################### + self.add_submodule(self.node_type + '/decoder/proj_to_GMM_log_pis', + model_if_absent=nn.Linear(self.hyperparams['dec_rnn_dim'], + self.hyperparams['GMM_components'])) + self.add_submodule(self.node_type + '/decoder/proj_to_GMM_mus', + model_if_absent=nn.Linear(self.hyperparams['dec_rnn_dim'], + self.hyperparams['GMM_components'] * self.pred_state_length)) + self.add_submodule(self.node_type + '/decoder/proj_to_GMM_log_sigmas', + model_if_absent=nn.Linear(self.hyperparams['dec_rnn_dim'], + self.hyperparams['GMM_components'] * self.pred_state_length)) + self.add_submodule(self.node_type + '/decoder/proj_to_GMM_corrs', + model_if_absent=nn.Linear(self.hyperparams['dec_rnn_dim'], + self.hyperparams['GMM_components'])) + + self.x_size = x_size + self.z_size = z_size + + def create_edge_models(self, edge_types): + for edge_type in edge_types: + neighbor_state_length = int( + np.sum([len(entity_dims) for entity_dims in self.state[edge_type.split('->')[1]].values()])) + if self.hyperparams['edge_state_combine_method'] == 'pointnet': + self.add_submodule(edge_type + '/pointnet_encoder', + model_if_absent=nn.Sequential( + nn.Linear(self.state_length, 2 * self.state_length), + nn.ReLU(), + nn.Linear(2 * self.state_length, 2 * self.state_length), + nn.ReLU())) + + edge_encoder_input_size = 2 * self.state_length + self.state_length + + elif self.hyperparams['edge_state_combine_method'] == 'attention': + self.add_submodule(self.node_type + '/edge_attention_combine', + model_if_absent=TemporallyBatchedAdditiveAttention( + encoder_hidden_state_dim=self.state_length, + decoder_hidden_state_dim=self.state_length)) + edge_encoder_input_size = self.state_length + neighbor_state_length + + else: + edge_encoder_input_size = self.state_length + neighbor_state_length + + self.add_submodule(edge_type + '/edge_encoder', + model_if_absent=nn.LSTM(input_size=edge_encoder_input_size, + hidden_size=self.hyperparams['enc_rnn_dim_edge'], + batch_first=True)) + + def create_graphical_model(self, edge_types): + """ + Creates or queries all trainable components. + + :param edge_types: List containing strings for all possible edge types for the node type. + :return: None + """ + self.clear_submodules() + + ############################ + # Everything but Edges # + ############################ + self.create_node_models() + + ##################### + # Edge Encoders # + ##################### + if self.hyperparams['edge_encoding']: + self.create_edge_models(edge_types) + + for name, module in self.node_modules.items(): + module.to(self.device) + + def create_new_scheduler(self, name, annealer, annealer_kws, creation_condition=True): + value_scheduler = None + rsetattr(self, name + '_scheduler', value_scheduler) + if creation_condition: + annealer_kws['device'] = self.device + value_annealer = annealer(annealer_kws) + rsetattr(self, name + '_annealer', value_annealer) + + # This is the value that we'll update on each call of + # step_annealers(). + rsetattr(self, name, value_annealer(0).clone().detach()) + dummy_optimizer = optim.Optimizer([rgetattr(self, name)], {'lr': value_annealer(0).clone().detach()}) + rsetattr(self, name + '_optimizer', dummy_optimizer) + + value_scheduler = CustomLR(dummy_optimizer, + value_annealer) + rsetattr(self, name + '_scheduler', value_scheduler) + + self.schedulers.append(value_scheduler) + self.annealed_vars.append(name) + + def set_annealing_params(self): + self.schedulers = list() + self.annealed_vars = list() + + self.create_new_scheduler(name='kl_weight', + annealer=sigmoid_anneal, + annealer_kws={ + 'start': self.hyperparams['kl_weight_start'], + 'finish': self.hyperparams['kl_weight'], + 'center_step': self.hyperparams['kl_crossover'], + 'steps_lo_to_hi': self.hyperparams['kl_crossover'] / self.hyperparams[ + 'kl_sigmoid_divisor'] + }) + + self.create_new_scheduler(name='latent.temp', + annealer=exp_anneal, + annealer_kws={ + 'start': self.hyperparams['tau_init'], + 'finish': self.hyperparams['tau_final'], + 'rate': self.hyperparams['tau_decay_rate'] + }) + + self.create_new_scheduler(name='latent.z_logit_clip', + annealer=sigmoid_anneal, + annealer_kws={ + 'start': self.hyperparams['z_logit_clip_start'], + 'finish': self.hyperparams['z_logit_clip_final'], + 'center_step': self.hyperparams['z_logit_clip_crossover'], + 'steps_lo_to_hi': self.hyperparams['z_logit_clip_crossover'] / self.hyperparams[ + 'z_logit_clip_divisor'] + }, + creation_condition=self.hyperparams['use_z_logit_clipping']) + + def step_annealers(self): + # This should manage all of the step-wise changed + # parameters automatically. + for idx, annealed_var in enumerate(self.annealed_vars): + if rgetattr(self, annealed_var + '_scheduler') is not None: + # First we step the scheduler. + with warnings.catch_warnings(): # We use a dummy optimizer: Warning because no .step() was called on it + warnings.simplefilter("ignore") + rgetattr(self, annealed_var + '_scheduler').step() + + # Then we set the annealed vars' value. + rsetattr(self, annealed_var, rgetattr(self, annealed_var + '_optimizer').param_groups[0]['lr']) + + self.summarize_annealers() + + def summarize_annealers(self): + if self.log_writer is not None: + for annealed_var in self.annealed_vars: + if rgetattr(self, annealed_var) is not None: + self.log_writer.add_scalar('%s/%s' % (str(self.node_type), annealed_var.replace('.', '/')), + rgetattr(self, annealed_var), self.curr_iter) + + def obtain_encoded_tensors(self, + mode, + inputs, + inputs_st, + packed_inputs_st, + labels, + labels_st, + first_history_indices, + neighbors, + neighbors_edge_value, + robot, + map) -> (torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor): + """ + Encodes input and output tensors for node and robot. + + :param mode: Mode in which the model is operated. E.g. Train, Eval, Predict. + :param inputs: Input tensor including the state for each agent over time [bs, t, state]. + :param inputs_st: Standardized input tensor. + :param labels: Label tensor including the label output for each agent over time [bs, t, pred_state]. + :param labels_st: Standardized label tensor. + :param first_history_indices: First timestep (index) in scene for which data is available for a node [bs] + :param neighbors: Preprocessed dict (indexed by edge type) of list of neighbor states over time. + [[bs, t, neighbor state]] + :param neighbors_edge_value: Preprocessed edge values for all neighbor nodes [[N]] + :param robot: Standardized robot state over time. [bs, t, robot_state] + :param map: Tensor of Map information. [bs, channels, x, y] + :return: tuple(x, x_nr_t, y_e, y_r, y, n_s_t0) + WHERE + - x: Encoded input / condition tensor to the CVAE x_e. + - x_r_t: Robot state (if robot is in scene). + - y_e: Encoded label / future of the node. + - y_r: Encoded future of the robot. + - y: Label / future of the node. + - n_s_t0: Standardized current state of the node. + """ + + x, x_r_t, y_e, y_r, y = None, None, None, None, None + initial_dynamics = dict() + + batch_size = inputs.shape[0] + + ######################################### + # Provide basic information to encoders # + ######################################### + node_history = inputs + node_present_state = inputs[:, -1] + node_pos = inputs[:, -1, 0:2] + node_vel = inputs[:, -1, 2:4] + + node_history_st = packed_inputs_st + node_present_state_st = inputs_st[:, -1] + node_pos_st = inputs_st[:, -1, 0:2] + node_vel_st = inputs_st[:, -1, 2:4] + + n_s_t0 = node_present_state_st + + initial_dynamics['pos'] = node_pos + initial_dynamics['vel'] = node_vel + + self.dynamic.set_initial_condition(initial_dynamics) + + if self.hyperparams['incl_robot_node']: + x_r_t, y_r = robot[..., 0, :], robot[..., 1:, :] + + ################## + # Encode History # + ################## + node_history_encoded = self.encode_node_history(mode, + node_history_st, + first_history_indices) + + return node_history_encoded + + ################## + # Encode Present # + ################## + node_present = node_present_state_st # [bs, state_dim] + + ################## + # Encode Future # + ################## + if mode != ModeKeys.PREDICT: + y = labels_st + + ############################## + # Encode Node Edges per Type # + ############################## + if self.hyperparams['edge_encoding']: + node_edges_encoded = list() + for edge_type in self.edge_types: + # Encode edges for given edge type + encoded_edges_type = self.encode_edge(mode, + node_history, + node_history_st, + edge_type, + neighbors[edge_type], + neighbors_edge_value[edge_type], + first_history_indices) + node_edges_encoded.append(encoded_edges_type) # List of [bs/nbs, enc_rnn_dim] + ##################### + # Encode Node Edges # + ##################### + total_edge_influence = self.encode_total_edge_influence(mode, + node_edges_encoded, + node_history_encoded, + batch_size) + + ################ + # Map Encoding # + ################ + if self.hyperparams['use_map_encoding'] and self.node_type in self.hyperparams['map_encoder']: + if self.log_writer and (self.curr_iter + 1) % 500 == 0: + map_clone = map.clone() + map_patch = self.hyperparams['map_encoder'][self.node_type]['patch_size'] + map_clone[:, :, map_patch[1] - 5:map_patch[1] + 5, map_patch[0] - 5:map_patch[0] + 5] = 1. + self.log_writer.add_images(f"{self.node_type}/cropped_maps", map_clone, + self.curr_iter, dataformats='NCWH') + + encoded_map = self.node_modules[self.node_type + '/map_encoder'](map * 2. - 1., (mode == ModeKeys.TRAIN)) + do = self.hyperparams['map_encoder'][self.node_type]['dropout'] + encoded_map = F.dropout(encoded_map, do, training=(mode == ModeKeys.TRAIN)) + + ###################################### + # Concatenate Encoder Outputs into x # + ###################################### + x_concat_list = list() + + # Every node has an edge-influence encoder (which could just be zero). + if self.hyperparams['edge_encoding']: + x_concat_list.append(total_edge_influence) # [bs/nbs, 4*enc_rnn_dim] + + # Every node has a history encoder. + x_concat_list.append(node_history_encoded) # [bs/nbs, enc_rnn_dim_history] + + if self.hyperparams['incl_robot_node']: + robot_future_encoder = self.encode_robot_future(mode, x_r_t, y_r) + x_concat_list.append(robot_future_encoder) + + if self.hyperparams['use_map_encoding'] and self.node_type in self.hyperparams['map_encoder']: + if self.log_writer: + self.log_writer.add_scalar(f"{self.node_type}/encoded_map_max", + torch.max(torch.abs(encoded_map)), self.curr_iter) + x_concat_list.append(encoded_map) + + x = torch.cat(x_concat_list, dim=1) + + if mode == ModeKeys.TRAIN or mode == ModeKeys.EVAL: + y_e = self.encode_node_future(mode, node_present, y) + + return x, x_r_t, y_e, y_r, y, n_s_t0 + + def encode_node_history(self, mode, node_hist, first_history_indices): + """ + Encodes the nodes history. + + :param mode: Mode in which the model is operated. E.g. Train, Eval, Predict. + :param node_hist: Historic and current state of the node. [bs, mhl, state] + :param first_history_indices: First timestep (index) in scene for which data is available for a node [bs] + :return: Encoded node history tensor. [bs, enc_rnn_dim] + """ + outputs = run_lstm_on_variable_length_seqs(self.node_modules[self.node_type + '/node_history_encoder'], + # outputs, _ = run_lstm_on_variable_length_seqs(self.node_modules[self.node_type + '/node_history_encoder'], + original_seqs=node_hist, + lower_indices=first_history_indices) + + return outputs + + outputs = F.dropout(outputs, + p=1. - self.hyperparams['rnn_kwargs']['dropout_keep_prob'], + training=(mode == ModeKeys.TRAIN)) # [bs, max_time, enc_rnn_dim] + + last_index_per_sequence = -(first_history_indices + 1) + + return outputs[torch.arange(first_history_indices.shape[0]), last_index_per_sequence] + + def encode_edge(self, + mode, + node_history, + node_history_st, + edge_type, + neighbors, + neighbors_edge_value, + first_history_indices): + + max_hl = self.hyperparams['maximum_history_length'] + + edge_states_list = list() # list of [#of neighbors, max_ht, state_dim] + for i, neighbor_states in enumerate(neighbors): # Get neighbors for timestep in batch + if len(neighbor_states) == 0: # There are no neighbors for edge type # TODO necessary? + neighbor_state_length = int( + np.sum([len(entity_dims) for entity_dims in self.state[edge_type[1]].values()]) + ) + edge_states_list.append(torch.zeros((1, max_hl + 1, neighbor_state_length), device=self.device)) + else: + edge_states_list.append(torch.stack(neighbor_states, dim=0).to(self.device)) + + if self.hyperparams['edge_state_combine_method'] == 'sum': + # Used in Structural-RNN to combine edges as well. + op_applied_edge_states_list = list() + for neighbors_state in edge_states_list: + op_applied_edge_states_list.append(torch.sum(neighbors_state, dim=0)) + combined_neighbors = torch.stack(op_applied_edge_states_list, dim=0) + if self.hyperparams['dynamic_edges'] == 'yes': + # Should now be (bs, time, 1) + op_applied_edge_mask_list = list() + for edge_value in neighbors_edge_value: + op_applied_edge_mask_list.append(torch.clamp(torch.sum(edge_value.to(self.device), + dim=0, keepdim=True), max=1.)) + combined_edge_masks = torch.stack(op_applied_edge_mask_list, dim=0) + + elif self.hyperparams['edge_state_combine_method'] == 'max': + # Used in NLP, e.g. max over word embeddings in a sentence. + op_applied_edge_states_list = list() + for neighbors_state in edge_states_list: + op_applied_edge_states_list.append(torch.max(neighbors_state, dim=0)) + combined_neighbors = torch.stack(op_applied_edge_states_list, dim=0) + if self.hyperparams['dynamic_edges'] == 'yes': + # Should now be (bs, time, 1) + op_applied_edge_mask_list = list() + for edge_value in neighbors_edge_value: + op_applied_edge_mask_list.append(torch.clamp(torch.max(edge_value.to(self.device), + dim=0, keepdim=True), max=1.)) + combined_edge_masks = torch.stack(op_applied_edge_mask_list, dim=0) + + elif self.hyperparams['edge_state_combine_method'] == 'mean': + # Used in NLP, e.g. mean over word embeddings in a sentence. + op_applied_edge_states_list = list() + for neighbors_state in edge_states_list: + op_applied_edge_states_list.append(torch.mean(neighbors_state, dim=0)) + combined_neighbors = torch.stack(op_applied_edge_states_list, dim=0) + if self.hyperparams['dynamic_edges'] == 'yes': + # Should now be (bs, time, 1) + op_applied_edge_mask_list = list() + for edge_value in neighbors_edge_value: + op_applied_edge_mask_list.append(torch.clamp(torch.mean(edge_value.to(self.device), + dim=0, keepdim=True), max=1.)) + combined_edge_masks = torch.stack(op_applied_edge_mask_list, dim=0) + + joint_history = torch.cat([combined_neighbors, node_history_st], dim=-1) + + outputs, _ = run_lstm_on_variable_length_seqs( + self.node_modules[DirectedEdge.get_str_from_types(*edge_type) + '/edge_encoder'], + original_seqs=joint_history, + lower_indices=first_history_indices + ) + + outputs = F.dropout(outputs, + p=1. - self.hyperparams['rnn_kwargs']['dropout_keep_prob'], + training=(mode == ModeKeys.TRAIN)) # [bs, max_time, enc_rnn_dim] + + last_index_per_sequence = -(first_history_indices + 1) + ret = outputs[torch.arange(last_index_per_sequence.shape[0]), last_index_per_sequence] + if self.hyperparams['dynamic_edges'] == 'yes': + return ret * combined_edge_masks + else: + return ret + + def encode_total_edge_influence(self, mode, encoded_edges, node_history_encoder, batch_size): + if self.hyperparams['edge_influence_combine_method'] == 'sum': + stacked_encoded_edges = torch.stack(encoded_edges, dim=0) + combined_edges = torch.sum(stacked_encoded_edges, dim=0) + + elif self.hyperparams['edge_influence_combine_method'] == 'mean': + stacked_encoded_edges = torch.stack(encoded_edges, dim=0) + combined_edges = torch.mean(stacked_encoded_edges, dim=0) + + elif self.hyperparams['edge_influence_combine_method'] == 'max': + stacked_encoded_edges = torch.stack(encoded_edges, dim=0) + combined_edges = torch.max(stacked_encoded_edges, dim=0) + + elif self.hyperparams['edge_influence_combine_method'] == 'bi-rnn': + if len(encoded_edges) == 0: + combined_edges = torch.zeros((batch_size, self.eie_output_dims), device=self.device) + + else: + # axis=1 because then we get size [batch_size, max_time, depth] + encoded_edges = torch.stack(encoded_edges, dim=1) + + _, state = self.node_modules[self.node_type + '/edge_influence_encoder'](encoded_edges) + combined_edges = unpack_RNN_state(state) + combined_edges = F.dropout(combined_edges, + p=1. - self.hyperparams['rnn_kwargs']['dropout_keep_prob'], + training=(mode == ModeKeys.TRAIN)) + + elif self.hyperparams['edge_influence_combine_method'] == 'attention': + # Used in Social Attention (https://arxiv.org/abs/1710.04689) + if len(encoded_edges) == 0: + combined_edges = torch.zeros((batch_size, self.eie_output_dims), device=self.device) + + else: + # axis=1 because then we get size [batch_size, max_time, depth] + encoded_edges = torch.stack(encoded_edges, dim=1) + combined_edges, _ = self.node_modules[self.node_type + '/edge_influence_encoder'](encoded_edges, + node_history_encoder) + combined_edges = F.dropout(combined_edges, + p=1. - self.hyperparams['rnn_kwargs']['dropout_keep_prob'], + training=(mode == ModeKeys.TRAIN)) + + return combined_edges + + def encode_node_future(self, mode, node_present, node_future) -> torch.Tensor: + """ + Encodes the node future (during training) using a bi-directional LSTM + + :param mode: Mode in which the model is operated. E.g. Train, Eval, Predict. + :param node_present: Current state of the node. [bs, state] + :param node_future: Future states of the node. [bs, ph, state] + :return: Encoded future. + """ + initial_h_model = self.node_modules[self.node_type + '/node_future_encoder/initial_h'] + initial_c_model = self.node_modules[self.node_type + '/node_future_encoder/initial_c'] + + # Here we're initializing the forward hidden states, + # but zeroing the backward ones. + initial_h = initial_h_model(node_present) + initial_h = torch.stack([initial_h, torch.zeros_like(initial_h, device=self.device)], dim=0) + + initial_c = initial_c_model(node_present) + initial_c = torch.stack([initial_c, torch.zeros_like(initial_c, device=self.device)], dim=0) + + initial_state = (initial_h, initial_c) + + _, state = self.node_modules[self.node_type + '/node_future_encoder'](node_future, initial_state) + state = unpack_RNN_state(state) + state = F.dropout(state, + p=1. - self.hyperparams['rnn_kwargs']['dropout_keep_prob'], + training=(mode == ModeKeys.TRAIN)) + + return state + + def encode_robot_future(self, mode, robot_present, robot_future) -> torch.Tensor: + """ + Encodes the robot future (during training) using a bi-directional LSTM + + :param mode: Mode in which the model is operated. E.g. Train, Eval, Predict. + :param robot_present: Current state of the robot. [bs, state] + :param robot_future: Future states of the robot. [bs, ph, state] + :return: Encoded future. + """ + initial_h_model = self.node_modules['robot_future_encoder/initial_h'] + initial_c_model = self.node_modules['robot_future_encoder/initial_c'] + + # Here we're initializing the forward hidden states, + # but zeroing the backward ones. + initial_h = initial_h_model(robot_present) + initial_h = torch.stack([initial_h, torch.zeros_like(initial_h, device=self.device)], dim=0) + + initial_c = initial_c_model(robot_present) + initial_c = torch.stack([initial_c, torch.zeros_like(initial_c, device=self.device)], dim=0) + + initial_state = (initial_h, initial_c) + + _, state = self.node_modules['robot_future_encoder'](robot_future, initial_state) + state = unpack_RNN_state(state) + state = F.dropout(state, + p=1. - self.hyperparams['rnn_kwargs']['dropout_keep_prob'], + training=(mode == ModeKeys.TRAIN)) + + return state + + def q_z_xy(self, mode, x, y_e) -> torch.Tensor: + r""" + .. math:: q_\phi(z \mid \mathbf{x}_i, \mathbf{y}_i) + + :param mode: Mode in which the model is operated. E.g. Train, Eval, Predict. + :param x: Input / Condition tensor. + :param y_e: Encoded future tensor. + :return: Latent distribution of the CVAE. + """ + xy = torch.cat([x, y_e], dim=1) + + if self.hyperparams['q_z_xy_MLP_dims'] is not None: + dense = self.node_modules[self.node_type + '/q_z_xy'] + h = F.dropout(F.relu(dense(xy)), + p=1. - self.hyperparams['MLP_dropout_keep_prob'], + training=(mode == ModeKeys.TRAIN)) + + else: + h = xy + + to_latent = self.node_modules[self.node_type + '/hxy_to_z'] + return self.latent.dist_from_h(to_latent(h), mode) + + def p_z_x(self, mode, x): + r""" + .. math:: p_\theta(z \mid \mathbf{x}_i) + + :param mode: Mode in which the model is operated. E.g. Train, Eval, Predict. + :param x: Input / Condition tensor. + :return: Latent distribution of the CVAE. + """ + if self.hyperparams['p_z_x_MLP_dims'] is not None: + dense = self.node_modules[self.node_type + '/p_z_x'] + h = F.dropout(F.relu(dense(x)), + p=1. - self.hyperparams['MLP_dropout_keep_prob'], + training=(mode == ModeKeys.TRAIN)) + + else: + h = x + + to_latent = self.node_modules[self.node_type + '/hx_to_z'] + return self.latent.dist_from_h(to_latent(h), mode) + + def project_to_GMM_params(self, tensor) -> (torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor): + """ + Projects tensor to parameters of a GMM with N components and D dimensions. + + :param tensor: Input tensor. + :return: tuple(log_pis, mus, log_sigmas, corrs) + WHERE + - log_pis: Weight (logarithm) of each GMM component. [N] + - mus: Mean of each GMM component. [N, D] + - log_sigmas: Standard Deviation (logarithm) of each GMM component. [N, D] + - corrs: Correlation between the GMM components. [N] + """ + log_pis = self.node_modules[self.node_type + '/decoder/proj_to_GMM_log_pis'](tensor) + mus = self.node_modules[self.node_type + '/decoder/proj_to_GMM_mus'](tensor) + log_sigmas = self.node_modules[self.node_type + '/decoder/proj_to_GMM_log_sigmas'](tensor) + corrs = torch.tanh(self.node_modules[self.node_type + '/decoder/proj_to_GMM_corrs'](tensor)) + return log_pis, mus, log_sigmas, corrs + + def p_y_xz(self, mode, x, x_nr_t, y_r, n_s_t0, z_stacked, prediction_horizon, + num_samples, num_components=1, gmm_mode=False): + r""" + .. math:: p_\psi(\mathbf{y}_i \mid \mathbf{x}_i, z) + + :param mode: Mode in which the model is operated. E.g. Train, Eval, Predict. + :param x: Input / Condition tensor. + :param x_nr_t: Joint state of node and robot (if robot is in scene). + :param y: Future tensor. + :param y_r: Encoded future tensor. + :param n_s_t0: Standardized current state of the node. + :param z_stacked: Stacked latent state. [num_samples_z * num_samples_gmm, bs, latent_state] + :param prediction_horizon: Number of prediction timesteps. + :param num_samples: Number of samples from the latent space. + :param num_components: Number of GMM components. + :param gmm_mode: If True: The mode of the GMM is sampled. + :return: GMM2D. If mode is Predict, also samples from the GMM. + """ + ph = prediction_horizon + pred_dim = self.pred_state_length + + z = torch.reshape(z_stacked, (-1, self.latent.z_dim)) + zx = torch.cat([z, x.repeat(num_samples * num_components, 1)], dim=1) + + cell = self.node_modules[self.node_type + '/decoder/rnn_cell'] + initial_h_model = self.node_modules[self.node_type + '/decoder/initial_h'] + + initial_state = initial_h_model(zx) + + log_pis, mus, log_sigmas, corrs, a_sample = [], [], [], [], [] + + # Infer initial action state for node from current state + a_0 = self.node_modules[self.node_type + '/decoder/state_action'](n_s_t0) + + state = initial_state + if self.hyperparams['incl_robot_node']: + input_ = torch.cat([zx, + a_0.repeat(num_samples * num_components, 1), + x_nr_t.repeat(num_samples * num_components, 1)], dim=1) + else: + input_ = torch.cat([zx, a_0.repeat(num_samples * num_components, 1)], dim=1) + + for j in range(ph): + h_state = cell(input_, state) + log_pi_t, mu_t, log_sigma_t, corr_t = self.project_to_GMM_params(h_state) + + gmm = GMM2D(log_pi_t, mu_t, log_sigma_t, corr_t) # [k;bs, pred_dim] + + if mode == ModeKeys.PREDICT and gmm_mode: + a_t = gmm.mode() + else: + a_t = gmm.rsample() + + if num_components > 1: + if mode == ModeKeys.PREDICT: + log_pis.append(self.latent.p_dist.logits.repeat(num_samples, 1, 1)) + else: + log_pis.append(self.latent.q_dist.logits.repeat(num_samples, 1, 1)) + else: + log_pis.append( + torch.ones_like(corr_t.reshape(num_samples, num_components, -1).permute(0, 2, 1).reshape(-1, 1)) + ) + + mus.append( + mu_t.reshape( + num_samples, num_components, -1, 2 + ).permute(0, 2, 1, 3).reshape(-1, 2 * num_components) + ) + log_sigmas.append( + log_sigma_t.reshape( + num_samples, num_components, -1, 2 + ).permute(0, 2, 1, 3).reshape(-1, 2 * num_components)) + corrs.append( + corr_t.reshape( + num_samples, num_components, -1 + ).permute(0, 2, 1).reshape(-1, num_components)) + + if self.hyperparams['incl_robot_node']: + dec_inputs = [zx, a_t, y_r[:, j].repeat(num_samples * num_components, 1)] + else: + dec_inputs = [zx, a_t] + input_ = torch.cat(dec_inputs, dim=1) + state = h_state + + log_pis = torch.stack(log_pis, dim=1) + mus = torch.stack(mus, dim=1) + log_sigmas = torch.stack(log_sigmas, dim=1) + corrs = torch.stack(corrs, dim=1) + + a_dist = GMM2D(torch.reshape(log_pis, [num_samples, -1, ph, num_components]), + torch.reshape(mus, [num_samples, -1, ph, num_components * pred_dim]), + torch.reshape(log_sigmas, [num_samples, -1, ph, num_components * pred_dim]), + torch.reshape(corrs, [num_samples, -1, ph, num_components])) + + if self.hyperparams['dynamic'][self.node_type]['distribution']: + y_dist = self.dynamic.integrate_distribution(a_dist, x) + else: + y_dist = a_dist + + if mode == ModeKeys.PREDICT: + if gmm_mode: + a_sample = a_dist.mode() + else: + a_sample = a_dist.rsample() + sampled_future = self.dynamic.integrate_samples(a_sample, x) + return y_dist, sampled_future + else: + return y_dist + + def encoder(self, mode, x, y_e, num_samples=None): + """ + Encoder of the CVAE. + + :param mode: Mode in which the model is operated. E.g. Train, Eval, Predict. + :param x: Input / Condition tensor. + :param y_e: Encoded future tensor. + :param num_samples: Number of samples from the latent space during Prediction. + :return: tuple(z, kl_obj) + WHERE + - z: Samples from the latent space. + - kl_obj: KL Divergenze between q and p + """ + if mode == ModeKeys.TRAIN: + sample_ct = self.hyperparams['k'] + elif mode == ModeKeys.EVAL: + sample_ct = self.hyperparams['k_eval'] + elif mode == ModeKeys.PREDICT: + sample_ct = num_samples + if num_samples is None: + raise ValueError("num_samples cannot be None with mode == PREDICT.") + + self.latent.q_dist = self.q_z_xy(mode, x, y_e) + self.latent.p_dist = self.p_z_x(mode, x) + + z = self.latent.sample_q(sample_ct, mode) + + if mode == ModeKeys.TRAIN: + kl_obj = self.latent.kl_q_p(self.log_writer, '%s' % str(self.node_type), self.curr_iter) + if self.log_writer is not None: + self.log_writer.add_scalar('%s/%s' % (str(self.node_type), 'kl'), kl_obj, self.curr_iter) + else: + kl_obj = None + + return z, kl_obj + + def decoder(self, mode, x, x_nr_t, y, y_r, n_s_t0, z, labels, prediction_horizon, num_samples): + """ + Decoder of the CVAE. + + :param mode: Mode in which the model is operated. E.g. Train, Eval, Predict. + :param x: Input / Condition tensor. + :param x: Input / Condition tensor. + :param x_nr_t: Joint state of node and robot (if robot is in scene). + :param y: Future tensor. + :param y_r: Encoded future tensor. + :param n_s_t0: Standardized current state of the node. + :param z: Stacked latent state. + :param prediction_horizon: Number of prediction timesteps. + :param num_samples: Number of samples from the latent space. + :return: Log probability of y over p. + """ + + num_components = self.hyperparams['N'] * self.hyperparams['K'] + y_dist = self.p_y_xz(mode, x, x_nr_t, y_r, n_s_t0, z, + prediction_horizon, num_samples, num_components=num_components) + log_p_yt_xz = torch.clamp(y_dist.log_prob(labels), max=self.hyperparams['log_p_yt_xz_max']) + if self.hyperparams['log_histograms'] and self.log_writer is not None: + self.log_writer.add_histogram('%s/%s' % (str(self.node_type), 'log_p_yt_xz'), log_p_yt_xz, self.curr_iter) + + log_p_y_xz = torch.sum(log_p_yt_xz, dim=2) + return log_p_y_xz + + def train_loss(self, + inputs, + inputs_st, + first_history_indices, + labels, + labels_st, + neighbors, + neighbors_edge_value, + robot, + map, + prediction_horizon) -> torch.Tensor: + """ + Calculates the training loss for a batch. + + :param inputs: Input tensor including the state for each agent over time [bs, t, state]. + :param inputs_st: Standardized input tensor. + :param first_history_indices: First timestep (index) in scene for which data is available for a node [bs] + :param labels: Label tensor including the label output for each agent over time [bs, t, pred_state]. + :param labels_st: Standardized label tensor. + :param neighbors: Preprocessed dict (indexed by edge type) of list of neighbor states over time. + [[bs, t, neighbor state]] + :param neighbors_edge_value: Preprocessed edge values for all neighbor nodes [[N]] + :param robot: Standardized robot state over time. [bs, t, robot_state] + :param map: Tensor of Map information. [bs, channels, x, y] + :param prediction_horizon: Number of prediction timesteps. + :return: Scalar tensor -> nll loss + """ + mode = ModeKeys.TRAIN + + x, x_nr_t, y_e, y_r, y, n_s_t0 = self.obtain_encoded_tensors(mode=mode, + inputs=inputs, + inputs_st=inputs_st, + labels=labels, + labels_st=labels_st, + first_history_indices=first_history_indices, + neighbors=neighbors, + neighbors_edge_value=neighbors_edge_value, + robot=robot, + map=map) + + z, kl = self.encoder(mode, x, y_e) + log_p_y_xz = self.decoder(mode, x, x_nr_t, y, y_r, n_s_t0, z, + labels, # Loss is calculated on unstandardized label + prediction_horizon, + self.hyperparams['k']) + + log_p_y_xz_mean = torch.mean(log_p_y_xz, dim=0) # [nbs] + log_likelihood = torch.mean(log_p_y_xz_mean) + + mutual_inf_q = mutual_inf_mc(self.latent.q_dist) + mutual_inf_p = mutual_inf_mc(self.latent.p_dist) + + ELBO = log_likelihood - self.kl_weight * kl + 1. * mutual_inf_p + loss = -ELBO + + if self.hyperparams['log_histograms'] and self.log_writer is not None: + self.log_writer.add_histogram('%s/%s' % (str(self.node_type), 'log_p_y_xz'), + log_p_y_xz_mean, + self.curr_iter) + + if self.log_writer is not None: + self.log_writer.add_scalar('%s/%s' % (str(self.node_type), 'mutual_information_q'), + mutual_inf_q, + self.curr_iter) + self.log_writer.add_scalar('%s/%s' % (str(self.node_type), 'mutual_information_p'), + mutual_inf_p, + self.curr_iter) + self.log_writer.add_scalar('%s/%s' % (str(self.node_type), 'log_likelihood'), + log_likelihood, + self.curr_iter) + self.log_writer.add_scalar('%s/%s' % (str(self.node_type), 'loss'), + loss, + self.curr_iter) + if self.hyperparams['log_histograms']: + self.latent.summarize_for_tensorboard(self.log_writer, str(self.node_type), self.curr_iter) + return loss + + def eval_loss(self, + inputs, + inputs_st, + first_history_indices, + labels, + labels_st, + neighbors, + neighbors_edge_value, + robot, + map, + prediction_horizon) -> torch.Tensor: + """ + Calculates the evaluation loss for a batch. + + :param inputs: Input tensor including the state for each agent over time [bs, t, state]. + :param inputs_st: Standardized input tensor. + :param first_history_indices: First timestep (index) in scene for which data is available for a node [bs] + :param labels: Label tensor including the label output for each agent over time [bs, t, pred_state]. + :param labels_st: Standardized label tensor. + :param neighbors: Preprocessed dict (indexed by edge type) of list of neighbor states over time. + [[bs, t, neighbor state]] + :param neighbors_edge_value: Preprocessed edge values for all neighbor nodes [[N]] + :param robot: Standardized robot state over time. [bs, t, robot_state] + :param map: Tensor of Map information. [bs, channels, x, y] + :param prediction_horizon: Number of prediction timesteps. + :return: tuple(nll_q_is, nll_p, nll_exact, nll_sampled) + """ + + mode = ModeKeys.EVAL + + x, x_nr_t, y_e, y_r, y, n_s_t0 = self.obtain_encoded_tensors(mode=mode, + inputs=inputs, + inputs_st=inputs_st, + labels=labels, + labels_st=labels_st, + first_history_indices=first_history_indices, + neighbors=neighbors, + neighbors_edge_value=neighbors_edge_value, + robot=robot, + map=map) + + num_components = self.hyperparams['N'] * self.hyperparams['K'] + ### Importance sampled NLL estimate + z, _ = self.encoder(mode, x, y_e) # [k_eval, nbs, N*K] + z = self.latent.sample_p(1, mode, full_dist=True) + y_dist, _ = self.p_y_xz(ModeKeys.PREDICT, x, x_nr_t, y_r, n_s_t0, z, + prediction_horizon, num_samples=1, num_components=num_components) + # We use unstandardized labels to compute the loss + log_p_yt_xz = torch.clamp(y_dist.log_prob(labels), max=self.hyperparams['log_p_yt_xz_max']) + log_p_y_xz = torch.sum(log_p_yt_xz, dim=2) + log_p_y_xz_mean = torch.mean(log_p_y_xz, dim=0) # [nbs] + log_likelihood = torch.mean(log_p_y_xz_mean) + nll = -log_likelihood + + return nll + + def predict(self, + inputs, + inputs_st, + packed_inputs_st, + first_history_indices, + neighbors, + neighbors_edge_value, + robot, + map, + prediction_horizon, + num_samples, + z_mode=False, + gmm_mode=False, + full_dist=True, + all_z_sep=False): + """ + Predicts the future of a batch of nodes. + + :param inputs: Input tensor including the state for each agent over time [bs, t, state]. + :param inputs_st: Standardized input tensor. + :param first_history_indices: First timestep (index) in scene for which data is available for a node [bs] + :param neighbors: Preprocessed dict (indexed by edge type) of list of neighbor states over time. + [[bs, t, neighbor state]] + :param neighbors_edge_value: Preprocessed edge values for all neighbor nodes [[N]] + :param robot: Standardized robot state over time. [bs, t, robot_state] + :param map: Tensor of Map information. [bs, channels, x, y] + :param prediction_horizon: Number of prediction timesteps. + :param num_samples: Number of samples from the latent space. + :param z_mode: If True: Select the most likely latent state. + :param gmm_mode: If True: The mode of the GMM is sampled. + :param all_z_sep: Samples each latent mode individually without merging them into a GMM. + :param full_dist: Samples all latent states and merges them into a GMM as output. + :return: + """ + mode = ModeKeys.PREDICT + + # x, x_nr_t, _, y_r, _, n_s_t0 = self.obtain_encoded_tensors(mode=mode, + out = self.obtain_encoded_tensors(mode=mode, + inputs=inputs, + inputs_st=inputs_st, + packed_inputs_st=packed_inputs_st, + labels=None, + labels_st=None, + first_history_indices=first_history_indices, + neighbors=neighbors, + neighbors_edge_value=neighbors_edge_value, + robot=robot, + map=map) + # return x, n_s_t0 + return out + + self.latent.p_dist = self.p_z_x(mode, x) + z, num_samples, num_components = self.latent.sample_p(num_samples, + mode, + most_likely_z=z_mode, + full_dist=full_dist, + all_z_sep=all_z_sep) + + _, our_sampled_future = self.p_y_xz(mode, x, x_nr_t, y_r, n_s_t0, z, + prediction_horizon, + num_samples, + num_components, + gmm_mode) + + return our_sampled_future diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/model_registrar.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/model_registrar.py new file mode 100644 index 000000000..111a8ab3e --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/model_registrar.py @@ -0,0 +1,75 @@ +import os +import torch +import torch.nn as nn + + +def get_model_device(model): + return next(model.parameters()).device + + +class ModelRegistrar(nn.Module): + def __init__(self, model_dir, device): + super(ModelRegistrar, self).__init__() + self.model_dict = nn.ModuleDict() + self.model_dir = model_dir + self.device = device + + def forward(self): + raise NotImplementedError('Although ModelRegistrar is a nn.Module, it is only to store parameters.') + + def get_model(self, name, model_if_absent=None): + # 4 cases: name in self.model_dict and model_if_absent is None (OK) + # name in self.model_dict and model_if_absent is not None (OK) + # name not in self.model_dict and model_if_absent is not None (OK) + # name not in self.model_dict and model_if_absent is None (NOT OK) + + if name in self.model_dict: + return self.model_dict[name] + + elif model_if_absent is not None: + self.model_dict[name] = model_if_absent.to(self.device) + return self.model_dict[name] + + else: + raise ValueError(f'{name} was never initialized in this Registrar!') + + def get_name_match(self, name): + ret_model_list = nn.ModuleList() + for key in self.model_dict.keys(): + if name in key: + ret_model_list.append(self.model_dict[key]) + return ret_model_list + + def get_all_but_name_match(self, name): + ret_model_list = nn.ModuleList() + for key in self.model_dict.keys(): + if name not in key: + ret_model_list.append(self.model_dict[key]) + return ret_model_list + + def print_model_names(self): + print(self.model_dict.keys()) + + def save_models(self, curr_iter): + # Create the model directiory if it's not present. + save_path = os.path.join(self.model_dir, + 'model_registrar-%d.pt' % curr_iter) + + torch.save(self.model_dict, save_path) + + def load_models(self, iter_num): + self.model_dict.clear() + + save_path = os.path.join(self.model_dir, + 'model_registrar-%d.pt' % iter_num) + + print('') + print('Loading from ' + save_path) + self.model_dict = torch.load(save_path, map_location=self.device) + print('Loaded!') + print('') + + def to(self, device): + for name, model in self.model_dict.items(): + if get_model_device(model) != device: + model.to(device) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/model_utils.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/model_utils.py new file mode 100644 index 000000000..688d99261 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/model_utils.py @@ -0,0 +1,130 @@ +import torch +import torch.nn.utils.rnn as rnn +from enum import Enum +import functools +import numpy as np +import math + + +class ModeKeys(Enum): + TRAIN = 1 + EVAL = 2 + PREDICT = 3 + + +def cyclical_lr(stepsize, min_lr=3e-4, max_lr=3e-3, decay=1.): + # Lambda function to calculate the LR + lr_lambda = lambda it: min_lr + (max_lr - min_lr) * relative(it, stepsize) * decay**it + + # Additional function to see where on the cycle we are + def relative(it, stepsize): + cycle = math.floor(1 + it / (2 * stepsize)) + x = abs(it / stepsize - 2 * cycle + 1) + return max(0, (1 - x)) + + return lr_lambda + + +def to_one_hot(labels, n_labels): + return torch.eye(n_labels, device=labels.device)[labels] + + +def exp_anneal(anneal_kws): + device = anneal_kws['device'] + start = torch.tensor(anneal_kws['start'], device=device) + finish = torch.tensor(anneal_kws['finish'], device=device) + rate = torch.tensor(anneal_kws['rate'], device=device) + return lambda step: finish - (finish - start)*torch.pow(rate, torch.tensor(step, dtype=torch.float, device=device)) + + +def sigmoid_anneal(anneal_kws): + device = anneal_kws['device'] + start = torch.tensor(anneal_kws['start'], device=device) + finish = torch.tensor(anneal_kws['finish'], device=device) + center_step = torch.tensor(anneal_kws['center_step'], device=device, dtype=torch.float) + steps_lo_to_hi = torch.tensor(anneal_kws['steps_lo_to_hi'], device=device, dtype=torch.float) + return lambda step: start + (finish - start)*torch.sigmoid((torch.tensor(float(step), device=device) - center_step) * (1./steps_lo_to_hi)) + + +class CustomLR(torch.optim.lr_scheduler.LambdaLR): + def __init__(self, optimizer, lr_lambda, last_epoch=-1): + super(CustomLR, self).__init__(optimizer, lr_lambda, last_epoch) + + def get_lr(self): + return [lmbda(self.last_epoch) + for lmbda, base_lr in zip(self.lr_lambdas, self.base_lrs)] + + +def mutual_inf_mc(x_dist): + dist = x_dist.__class__ + H_y = dist(probs=x_dist.probs.mean(dim=0)).entropy() + return (H_y - x_dist.entropy().mean(dim=0)).sum() + + +def run_lstm_on_variable_length_seqs(lstm_module, original_seqs, lower_indices=None, upper_indices=None, total_length=None): + # breakpoint() + # bs, tf = original_seqs.shape[:2] + # if lower_indices is None: + # lower_indices = torch.zeros(bs, dtype=torch.int) + # if upper_indices is None: + # upper_indices = torch.ones(bs, dtype=torch.int) * (tf - 1) + # if total_length is None: + # total_length = max(upper_indices) + 1 + # # This is done so that we can just pass in self.prediction_timesteps + # # (which we want to INCLUDE, so this will exclude the next timestep). + # inclusive_break_indices = upper_indices + 1 + + # pad_list = list() + # for i, seq_len in enumerate(inclusive_break_indices): + # pad_list.append(original_seqs[i, lower_indices[i]:seq_len]) + + # packed_seqs = rnn.pack_sequence(pad_list, enforce_sorted=False) + # return packed_seqs # TypeError: int() argument must be a string, a bytes-like object or a real number, not 'Any' + + packed_seqs = original_seqs + packed_output, (h_n, c_n) = lstm_module(packed_seqs) + return packed_output # TypeError: object of type 'Call' has no len() + output, _ = rnn.pad_packed_sequence(packed_output, + batch_first=True, + total_length=total_length) + + return output, (h_n, c_n) + + +def extract_subtensor_per_batch_element(tensor, indices): + batch_idxs = torch.arange(start=0, end=len(indices)) + + batch_idxs = batch_idxs[~torch.isnan(indices)] + indices = indices[~torch.isnan(indices)] + if indices.size == 0: + return None + else: + indices = indices.long() + if tensor.is_cuda: + batch_idxs = batch_idxs.to(tensor.get_device()) + indices = indices.to(tensor.get_device()) + return tensor[batch_idxs, indices] + + +def unpack_RNN_state(state_tuple): + # PyTorch returned LSTM states have 3 dims: + # (num_layers * num_directions, batch, hidden_size) + + state = torch.cat(state_tuple, dim=0).permute(1, 0, 2) + # Now state is (batch, 2 * num_layers * num_directions, hidden_size) + + state_size = state.size() + return torch.reshape(state, (-1, state_size[1] * state_size[2])) + + +def rsetattr(obj, attr, val): + pre, _, post = attr.rpartition('.') + return setattr(rgetattr(obj, pre) if pre else obj, post, val) + + +# using wonder's beautiful simplification: +# https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-objects/31174427?noredirect=1#comment86638618_31174427 +def rgetattr(obj, attr, *args): + def _getattr(obj, attr): + return getattr(obj, attr, *args) + return functools.reduce(_getattr, [obj] + attr.split('.')) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/online/__init__.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/online/__init__.py new file mode 100644 index 000000000..a1c907062 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/online/__init__.py @@ -0,0 +1,2 @@ +from .online_trajectron import OnlineTrajectron +from .online_mgcvae import OnlineMultimodalGenerativeCVAE diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/online/online_mgcvae.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/online/online_mgcvae.py new file mode 100644 index 000000000..c614c37a4 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/online/online_mgcvae.py @@ -0,0 +1,430 @@ +import warnings +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +from collections import defaultdict, Counter +from model.components import * +from model.model_utils import * +from model.dataset import get_relative_robot_traj +import model.dynamics as dynamic_module +from model.mgcvae import MultimodalGenerativeCVAE +from environment.scene_graph import DirectedEdge +from environment.node_type import NodeType + + +class OnlineMultimodalGenerativeCVAE(MultimodalGenerativeCVAE): + def __init__(self, + env, + node, + model_registrar, + hyperparams, + device): + self.hyperparams = hyperparams + self.node = node + self.node_type = self.node.type + + if len(env.scenes) != 1: + raise ValueError("Passed in Environment has number of scenes != 1") + self.robot = env.scenes[0].robot + self.model_registrar = model_registrar + self.device = device + + self.node_modules = dict() + self.env = env + self.scene_graph = None + + self.state = self.hyperparams['state'] + self.pred_state = self.hyperparams['pred_state'][self.node.type] + self.state_length = int(np.sum([len(entity_dims) for entity_dims in self.state[self.node.type].values()])) + if self.hyperparams['incl_robot_node']: + self.robot_state_length = int( + np.sum([len(entity_dims) for entity_dims in self.state[self.robot.type].values()])) + self.pred_state_length = int(np.sum([len(entity_dims) for entity_dims in self.pred_state.values()])) + + self.curr_hidden_states = dict() + self.edge_types = Counter() + + self.create_graphical_model() + + dynamic_class = getattr(dynamic_module, self.hyperparams['dynamic'][self.node_type]['name']) + dyn_limits = hyperparams['dynamic'][self.node_type]['limits'] + self.dynamic = dynamic_class(self.env.scenes[0].dt, dyn_limits, device, + self.model_registrar, self.x_size, self.node_type) + + def create_graphical_model(self): + """ + Creates or queries all trainable components. + + :return: None + """ + self.clear_submodules() + + ############################ + # Everything but Edges # + ############################ + self.create_node_models() + + for name, module in self.node_modules.items(): + module.to(self.device) + + def update_graph(self, new_scene_graph, new_neighbors, removed_neighbors): + self.scene_graph = new_scene_graph + + if self.node in new_neighbors: + for edge_type, new_neighbor_nodes in new_neighbors[self.node].items(): + self.add_edge_model(edge_type) + self.edge_types += Counter({edge_type: len(new_neighbor_nodes)}) + + if self.node in removed_neighbors: + for edge_type, removed_neighbor_nodes in removed_neighbors[self.node].items(): + self.remove_edge_model(edge_type) + self.edge_types -= Counter({edge_type: len(removed_neighbor_nodes)}) + + def get_edge_to(self, other_node): + return DirectedEdge(self.node, other_node) + + def add_edge_model(self, edge_type): + if self.hyperparams['edge_encoding']: + if edge_type + '/edge_encoder' not in self.node_modules: + neighbor_state_length = int( + np.sum([len(entity_dims) for entity_dims in + self.state[self._get_other_node_type_from_edge(edge_type)].values()])) + if self.hyperparams['edge_state_combine_method'] == 'pointnet': + self.add_submodule(edge_type + '/pointnet_encoder', + model_if_absent=nn.Sequential( + nn.Linear(self.state_length, 2 * self.state_length), + nn.ReLU(), + nn.Linear(2 * self.state_length, 2 * self.state_length), + nn.ReLU())) + + edge_encoder_input_size = 2 * self.state_length + self.state_length + + elif self.hyperparams['edge_state_combine_method'] == 'attention': + self.add_submodule(self.node.type + '/edge_attention_combine', + model_if_absent=TemporallyBatchedAdditiveAttention( + encoder_hidden_state_dim=self.state_length, + decoder_hidden_state_dim=self.state_length)) + edge_encoder_input_size = self.state_length + neighbor_state_length + + else: + edge_encoder_input_size = self.state_length + neighbor_state_length + + self.add_submodule(edge_type + '/edge_encoder', + model_if_absent=nn.LSTM(input_size=edge_encoder_input_size, + hidden_size=self.hyperparams['enc_rnn_dim_edge'], + batch_first=True)) + + def _get_other_node_type_from_edge(self, edge_type_str): + n2_type_str = edge_type_str.split('->')[1] + return NodeType(n2_type_str, self.env.node_type_list.index(n2_type_str) + 1) + + def _get_edge_type_from_str(self, edge_type_str): + n1_type_str, n2_type_str = edge_type_str.split('->') + return (NodeType(n1_type_str, self.env.node_type_list.index(n1_type_str) + 1), + NodeType(n2_type_str, self.env.node_type_list.index(n2_type_str) + 1)) + + def remove_edge_model(self, edge_type): + if self.hyperparams['edge_encoding']: + if len(self.scene_graph.get_neighbors(self.node, self._get_other_node_type_from_edge(edge_type))) == 0: + del self.node_modules[edge_type + '/edge_encoder'] + + def obtain_encoded_tensors(self, + mode, + inputs, + inputs_st, + inputs_np, + robot_present_and_future, + maps): + x, x_r_t, y_r = None, None, None + batch_size = 1 + + our_inputs = inputs[self.node] + our_inputs_st = inputs_st[self.node] + + initial_dynamics = dict() + initial_dynamics['pos'] = our_inputs[:, 0:2] # TODO: Generalize + initial_dynamics['vel'] = our_inputs[:, 2:4] # TODO: Generalize + self.dynamic.set_initial_condition(initial_dynamics) + + ######################################### + # Provide basic information to encoders # + ######################################### + if self.hyperparams['incl_robot_node'] and self.robot is not None: + robot_present_and_future_st = get_relative_robot_traj(self.env, self.state, + our_inputs, robot_present_and_future, + self.node.type, self.robot.type) + x_r_t = robot_present_and_future_st[..., 0, :] + y_r = robot_present_and_future_st[..., 1:, :] + + ################## + # Encode History # + ################## + node_history_encoded = self.encode_node_history(our_inputs_st) + + ############################## + # Encode Node Edges per Type # + ############################## + total_edge_influence = None + if self.hyperparams['edge_encoding']: + node_edges_encoded = list() + for edge_type in self.edge_types: + connected_nodes_batched = list() + edge_masks_batched = list() + + # We get all nodes which are connected to the current node for the current timestep + connected_nodes_batched.append(self.scene_graph.get_neighbors(self.node, + self._get_other_node_type_from_edge( + edge_type))) + + if self.hyperparams['dynamic_edges'] == 'yes': + # We get the edge masks for the current node at the current timestep + edge_masks_for_node = self.scene_graph.get_edge_scaling(self.node) + edge_masks_batched.append(torch.tensor(edge_masks_for_node, dtype=torch.float, device=self.device)) + + # Encode edges for given edge type + encoded_edges_type = self.encode_edge(inputs, + inputs_st, + inputs_np, + edge_type, + connected_nodes_batched, + edge_masks_batched) + node_edges_encoded.append(encoded_edges_type) # List of [bs/nbs, enc_rnn_dim] + + ##################### + # Encode Node Edges # + ##################### + total_edge_influence = self.encode_total_edge_influence(mode, + node_edges_encoded, + node_history_encoded, + batch_size) + + self.TD = {'node_history_encoded': node_history_encoded, + 'total_edge_influence': total_edge_influence} + + ################ + # Map Encoding # + ################ + if self.hyperparams['use_map_encoding'] and self.node_type in self.hyperparams['map_encoder']: + if self.node not in maps: + # This means the node was removed (it is only being kept around because of the edge removal filter). + me_params = self.hyperparams['map_encoder'][self.node_type] + self.TD['encoded_map'] = torch.zeros((1, me_params['output_size'])) + else: + encoded_map = self.node_modules[self.node_type + '/map_encoder'](maps[self.node] * 2. - 1., + (mode == ModeKeys.TRAIN)) + do = self.hyperparams['map_encoder'][self.node_type]['dropout'] + encoded_map = F.dropout(encoded_map, do, training=(mode == ModeKeys.TRAIN)) + self.TD['encoded_map'] = encoded_map + + ###################################### + # Concatenate Encoder Outputs into x # + ###################################### + return self.create_encoder_rep(mode, self.TD, x_r_t, y_r) + + def create_encoder_rep(self, mode, + TD, + robot_present_st, + robot_future_st): + # Unpacking TD + node_history_encoded = TD['node_history_encoded'] + if self.hyperparams['edge_encoding']: + total_edge_influence = TD['total_edge_influence'] + if self.hyperparams['use_map_encoding'] and self.node_type in self.hyperparams['map_encoder']: + encoded_map = TD['encoded_map'] + + if (self.hyperparams['incl_robot_node'] + and self.robot is not None + and robot_future_st is not None + and robot_present_st is not None): + robot_future_encoder = self.encode_robot_future(mode, robot_present_st, robot_future_st) + + # Tiling for multiple samples + # This tiling is done because: + # a) we must consider the prediction case where there are many candidate robot future actions, + # b) the edge and history encoders are all the same regardless of which candidate future robot action + # we're evaluating. + node_history_encoded = TD['node_history_encoded'].repeat(robot_future_st.size()[0], 1) + if self.hyperparams['edge_encoding']: + total_edge_influence = TD['total_edge_influence'].repeat(robot_future_st.size()[0], 1) + if self.hyperparams['use_map_encoding'] and self.node_type in self.hyperparams['map_encoder']: + encoded_map = TD['encoded_map'].repeat(robot_future_st.size()[0], 1) + + elif self.hyperparams['incl_robot_node'] and self.robot is not None: + # Four times because we're trying to mimic a bi-directional RNN's output (which is c and h from both ends). + robot_future_encoder = torch.zeros([1, 4 * self.hyperparams['enc_rnn_dim_future']], device=self.device) + + x_concat_list = list() + + # Every node has an edge-influence encoder (which could just be zero). + if self.hyperparams['edge_encoding']: + x_concat_list.append(total_edge_influence) # [bs/nbs, 4*enc_rnn_dim] + + # Every node has a history encoder. + x_concat_list.append(node_history_encoded) # [bs/nbs, enc_rnn_dim_history] + + if self.hyperparams['incl_robot_node'] and self.robot is not None: + x_concat_list.append(robot_future_encoder) # [bs/nbs, 4*enc_rnn_dim_history] + + if self.hyperparams['use_map_encoding'] and self.node_type in self.hyperparams['map_encoder']: + x_concat_list.append(encoded_map) # [bs/nbs, CNN output size] + + return torch.cat(x_concat_list, dim=1) + + def encode_node_history(self, inputs_st): + new_state = torch.unsqueeze(inputs_st, dim=1) # [bs, 1, state_dim] + if self.node.type + '/node_history_encoder' not in self.curr_hidden_states: + outputs, self.curr_hidden_states[self.node.type + '/node_history_encoder'] = self.node_modules[ + self.node.type + '/node_history_encoder'](new_state) + else: + outputs, self.curr_hidden_states[self.node.type + '/node_history_encoder'] = self.node_modules[ + self.node.type + '/node_history_encoder'](new_state, self.curr_hidden_states[ + self.node.type + '/node_history_encoder']) + + return outputs[:, 0, :] + + def encode_edge(self, inputs, inputs_st, inputs_np, edge_type, connected_nodes, edge_masks): + edge_type_tuple = self._get_edge_type_from_str(edge_type) + edge_states_list = list() # list of [#of neighbors, max_ht, state_dim] + neighbor_states = list() + + orig_rel_state = inputs[self.node].cpu().numpy() + for node in connected_nodes[0]: + neighbor_state_np = inputs_np[node] + + # Make State relative to node + _, std = self.env.get_standardize_params(self.state[node.type], node_type=node.type) + std[0:2] = self.env.attention_radius[edge_type_tuple] + + # TODO: This all makes the unsafe assumption that the first n dims + # refer to the same quantities even for different agent types! + equal_dims = np.min((neighbor_state_np.shape[-1], orig_rel_state.shape[-1])) + rel_state = np.zeros_like(neighbor_state_np) + rel_state[..., :equal_dims] = orig_rel_state[..., :equal_dims] + neighbor_state_np_st = self.env.standardize(neighbor_state_np, + self.state[node.type], + node_type=node.type, + mean=rel_state, + std=std) + + neighbor_state = torch.tensor(neighbor_state_np_st).float().to(self.device) + neighbor_states.append(neighbor_state) + + if len(neighbor_states) == 0: # There are no neighbors for edge type # TODO necessary? + neighbor_state_length = int(np.sum([len(entity_dims) for entity_dims in self.state[edge_type[1]].values()])) + edge_states_list.append(torch.zeros((1, 1, neighbor_state_length), device=self.device)) + else: + edge_states_list.append(torch.stack(neighbor_states, dim=0)) + + if self.hyperparams['edge_state_combine_method'] == 'sum': + # Used in Structural-RNN to combine edges as well. + op_applied_edge_states_list = list() + for neighbors_state in edge_states_list: + op_applied_edge_states_list.append(torch.sum(neighbors_state, dim=0)) + combined_neighbors = torch.stack(op_applied_edge_states_list, dim=0) + if self.hyperparams['dynamic_edges'] == 'yes': + # Should now be (bs, time, 1) + op_applied_edge_mask_list = list() + for edge_mask in edge_masks: + op_applied_edge_mask_list.append(torch.clamp(torch.sum(edge_mask, dim=0, keepdim=True), max=1.)) + combined_edge_masks = torch.stack(op_applied_edge_mask_list, dim=0) + + elif self.hyperparams['edge_state_combine_method'] == 'max': + # Used in NLP, e.g. max over word embeddings in a sentence. + op_applied_edge_states_list = list() + for neighbors_state in edge_states_list: + op_applied_edge_states_list.append(torch.max(neighbors_state, dim=0)) + combined_neighbors = torch.stack(op_applied_edge_states_list, dim=0) + if self.hyperparams['dynamic_edges'] == 'yes': + # Should now be (bs, time, 1) + op_applied_edge_mask_list = list() + for edge_mask in edge_masks: + op_applied_edge_mask_list.append(torch.clamp(torch.max(edge_mask, dim=0, keepdim=True), max=1.)) + combined_edge_masks = torch.stack(op_applied_edge_mask_list, dim=0) + + elif self.hyperparams['edge_state_combine_method'] == 'mean': + # Used in NLP, e.g. mean over word embeddings in a sentence. + op_applied_edge_states_list = list() + for neighbors_state in edge_states_list: + op_applied_edge_states_list.append(torch.mean(neighbors_state, dim=0)) + combined_neighbors = torch.stack(op_applied_edge_states_list, dim=0) + if self.hyperparams['dynamic_edges'] == 'yes': + # Should now be (bs, time, 1) + op_applied_edge_mask_list = list() + for edge_mask in edge_masks: + op_applied_edge_mask_list.append(torch.clamp(torch.mean(edge_mask, dim=0, keepdim=True), max=1.)) + combined_edge_masks = torch.stack(op_applied_edge_mask_list, dim=0) + + joint_history = torch.cat([combined_neighbors, torch.unsqueeze(inputs_st[self.node], dim=0)], dim=-1) + + if edge_type + '/edge_encoder' not in self.curr_hidden_states: + outputs, self.curr_hidden_states[edge_type + '/edge_encoder'] = self.node_modules[ + edge_type + '/edge_encoder'](joint_history) + else: + outputs, self.curr_hidden_states[edge_type + '/edge_encoder'] = self.node_modules[ + edge_type + '/edge_encoder'](joint_history, self.curr_hidden_states[edge_type + '/edge_encoder']) + + if self.hyperparams['dynamic_edges'] == 'yes': + return outputs[:, 0, :] * combined_edge_masks + else: + return outputs[:, 0, :] # [bs, enc_rnn_dim] + + def encoder_forward(self, inputs, inputs_st, inputs_np, robot_present_and_future=None, maps=None): + # Always predicting with the online model. + mode = ModeKeys.PREDICT + + self.x = self.obtain_encoded_tensors(mode, + inputs, + inputs_st, + inputs_np, + robot_present_and_future, + maps) + self.n_s_t0 = inputs_st[self.node] + + self.latent.p_dist = self.p_z_x(mode, self.x) + + # robot_future_st is optional here since you can use the same one from encoder_forward, + # but if it's given then we'll re-run that part of the model (if the node is adjacent to the robot). + def decoder_forward(self, prediction_horizon, + num_samples, + robot_present_and_future=None, + z_mode=False, + gmm_mode=False, + full_dist=False, + all_z_sep=False): + # Always predicting with the online model. + mode = ModeKeys.PREDICT + + x_nr_t, y_r = None, None + if (self.hyperparams['incl_robot_node'] + and self.robot is not None + and robot_present_and_future is not None): + our_inputs = torch.tensor(self.node.get(np.array([self.node.last_timestep]), + self.state[self.node.type], + padding=0.0), + dtype=torch.float, + device=self.device) + robot_present_and_future_st = get_relative_robot_traj(self.env, self.state, + our_inputs, robot_present_and_future, + self.node.type, self.robot.type) + x_nr_t = robot_present_and_future_st[..., 0, :] + y_r = robot_present_and_future_st[..., 1:, :] + self.x = self.create_encoder_rep(mode, self.TD, x_nr_t, y_r) + self.latent.p_dist = self.p_z_x(mode, self.x) + + # Making sure n_s_t0 has the same batch size as x_nr_t + self.n_s_t0 = self.n_s_t0[[0]].repeat(x_nr_t.size()[0], 1) + + z, num_samples, num_components = self.latent.sample_p(num_samples, + mode, + most_likely_z=z_mode, + full_dist=full_dist, + all_z_sep=all_z_sep) + + y_dist, our_sampled_future = self.p_y_xz(mode, self.x, x_nr_t, y_r, self.n_s_t0, z, + prediction_horizon, + num_samples, + num_components, + gmm_mode) + + return y_dist, our_sampled_future diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/online/online_trajectron.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/online/online_trajectron.py new file mode 100644 index 000000000..f1c5063be --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/online/online_trajectron.py @@ -0,0 +1,304 @@ +import torch +import numpy as np +from collections import Counter +from model.trajectron import Trajectron +from model.online.online_mgcvae import OnlineMultimodalGenerativeCVAE +from model.model_utils import ModeKeys +from environment import RingBuffer, TemporalSceneGraph, SceneGraph, derivative_of + + +class OnlineTrajectron(Trajectron): + def __init__(self, model_registrar, + hyperparams, device): + super(OnlineTrajectron, self).__init__(model_registrar=model_registrar, + hyperparams=hyperparams, + log_writer=False, + device=device) + self.node_data = dict() + self.scene_graph = None + self.RING_CAPACITY = max(len(self.hyperparams['edge_removal_filter']), + len(self.hyperparams['edge_addition_filter']), + self.hyperparams['maximum_history_length']) + 1 + self.rel_states = dict() + self.removed_nodes = Counter() + + def __repr__(self): + return f"OnlineTrajectron(# nodes: {len(self.nodes)}, device: {self.device}, hyperparameters: {str(self.hyperparams)}) " + + def _add_node_model(self, node): + if node in self.nodes: + raise ValueError('%s was already added to this graph!' % str(node)) + + self.nodes.add(node) + self.node_models_dict[node] = OnlineMultimodalGenerativeCVAE(self.env, + node, + self.model_registrar, + self.hyperparams, + self.device) + + def update_removed_nodes(self): + for node in list(self.removed_nodes.keys()): + if self.removed_nodes[node] >= len(self.hyperparams['edge_removal_filter']): + del self.node_data[node] + del self.removed_nodes[node] + + def _remove_node_model(self, node): + if node not in self.nodes: + raise ValueError('%s is not in this graph!' % str(node)) + + self.nodes.remove(node) + del self.node_models_dict[node] + + def set_environment(self, env, init_timestep=0): + self.env = env + self.scene_graph = SceneGraph(edge_radius=self.env.attention_radius) + self.nodes.clear() + self.node_data.clear() + self.node_models_dict.clear() + + # Fast-forwarding ourselves to the initial timestep, without running any of the underlying models. + for timestep in range(init_timestep + 1): + self.incremental_forward(self.env.scenes[0].get_clipped_input_dict(timestep, self.hyperparams['state']), + maps=None, run_models=False) + + def incremental_forward(self, new_inputs_dict, + maps, + prediction_horizon=0, + num_samples=0, + robot_present_and_future=None, + z_mode=False, + gmm_mode=False, + full_dist=False, + all_z_sep=False, + run_models=True): + # The way this function works is by appending the new datapoints to the + # ends of each of the LSTMs in the graph. Then, we recalculate the + # encoder's output vector h_x and feed that into the decoder to sample new outputs. + mode = ModeKeys.PREDICT + + # No grad since we're predicting always, as evidenced by the line above. + with torch.no_grad(): + for node, new_input in new_inputs_dict.items(): + if node not in self.node_data: + self.node_data[node] = RingBuffer(capacity=self.RING_CAPACITY, + dtype=(float, sum(len(self.state[node.type][k]) + for k in self.state[node.type]))) + self.node_data[node].append(new_input) + + if node in self.removed_nodes: + del self.removed_nodes[node] + + # Nodes in self.node_data that aren't in new_inputs_dict were just removed. + newly_removed_nodes = (set(self.node_data.keys()) - set(self.removed_nodes.keys())) - set( + new_inputs_dict.keys()) + + # We update self.removed_nodes with the newly removed nodes as well as all existing removed nodes to get + # the time since their last removal increased by one. + self.removed_nodes.update(newly_removed_nodes | set(self.removed_nodes.keys())) + + # For any nodes that are older than the length of the edge_removal_filter, we can safely clear their data. + self.update_removed_nodes() + + # Any remaining removed nodes that aren't yet old enough for data clearing simply have NaNs appended so + # that when it's passed through the LSTMs, the hidden state keeps propagating but the input plays no role + # (the NaNs get converted to zeros later on). + for node in self.removed_nodes: + self.node_data[node].append(np.full((1, self.node_data[node].shape[1]), np.nan)) + + for node in self.node_data: + node.overwrite_data(self.node_data[node], None, + forward_in_time_on_next_overwrite=(self.node_data[node].shape[0] + == self.RING_CAPACITY)) + + temp_scene_dict = {k: v[:, 0:2] for k, v in self.node_data.items()} + if not temp_scene_dict: + new_scene_graph = SceneGraph(edge_radius=self.env.attention_radius) + else: + new_scene_graph = TemporalSceneGraph.create_from_temp_scene_dict( + temp_scene_dict, + self.env.attention_radius, + duration=self.RING_CAPACITY, + edge_addition_filter=self.hyperparams['edge_addition_filter'], + edge_removal_filter=self.hyperparams['edge_removal_filter'], + online=True).to_scene_graph(t=self.RING_CAPACITY - 1) + + if self.hyperparams['dynamic_edges'] == 'yes': + new_nodes, removed_nodes, new_neighbors, removed_neighbors = new_scene_graph - self.scene_graph + + # Aside from updating the scene graph, this for loop updates the graph model + # structure of all affected nodes. + not_removed_nodes = [node for node in self.nodes if node not in removed_nodes] + self.scene_graph = new_scene_graph + for node in not_removed_nodes: + self.node_models_dict[node].update_graph(new_scene_graph, new_neighbors, removed_neighbors) + + # These next 2 for loops add or remove entire node models. + for node in new_nodes: + if (node.is_robot and self.hyperparams['incl_robot_node']) or node.type not in self.pred_state.keys(): + # Only deal with Models for NodeTypes we want to predict + continue + + self._add_node_model(node) + self.node_models_dict[node].update_graph(new_scene_graph, new_neighbors, removed_neighbors) + + for node in removed_nodes: + if (node.is_robot and self.hyperparams['incl_robot_node']) or node.type not in self.pred_state.keys(): + continue + + self._remove_node_model(node) + + # This actually updates the node models with the newly observed data. + if run_models: + inputs = dict() + inputs_st = dict() + inputs_np = dict() + + iter_list = list(self.node_models_dict.keys()) + [node for node in new_inputs_dict + if node.type not in self.pred_state.keys()] + if self.env.scenes[0].robot is not None: + iter_list.append(self.env.scenes[0].robot) + + for node in iter_list: + input_np = node.get(np.array([node.last_timestep, node.last_timestep]), self.state[node.type]) + + _, std = self.env.get_standardize_params(self.state[node.type.name], node.type) + std[0:2] = self.env.attention_radius[(node.type, node.type)] + rel_state = np.zeros_like(input_np) + rel_state[:, 0:2] = input_np[:, 0:2] + input_st = self.env.standardize(input_np, + self.state[node.type.name], + node.type, + mean=rel_state) + self.rel_states[node] = rel_state + + # Converting NaNs to zeros. + input_np[np.isnan(input_np)] = 0 + input_st[np.isnan(input_st)] = 0 + + # Convert to torch tensors + inputs[node] = torch.tensor(input_np, dtype=torch.float, device=self.device) + inputs_st[node] = torch.tensor(input_st, dtype=torch.float, device=self.device) + inputs_np[node] = input_np + + # We want tensors of shape (1, ph + 1, state_dim) where the first 1 is the batch size. + if (self.hyperparams['incl_robot_node'] + and self.env.scenes[0].robot is not None + and robot_present_and_future is not None): + if len(robot_present_and_future.shape) == 2: + robot_present_and_future = robot_present_and_future[np.newaxis, :] + + assert robot_present_and_future.shape[1] == prediction_horizon + 1 + robot_present_and_future = torch.tensor(robot_present_and_future, + dtype=torch.float, device=self.device) + + for node in self.node_models_dict: + self.node_models_dict[node].encoder_forward(inputs, + inputs_st, + inputs_np, + robot_present_and_future, + maps) + + # If num_predicted_timesteps or num_samples == 0 then do not run the decoder at all, + # just update the encoder LSTMs. + if prediction_horizon == 0 or num_samples == 0: + return + + return self.sample_model(prediction_horizon, + num_samples, + robot_present_and_future=robot_present_and_future, + z_mode=z_mode, + gmm_mode=gmm_mode, + full_dist=full_dist, + all_z_sep=all_z_sep) + + def _run_decoder(self, node, + num_predicted_timesteps, + num_samples, + robot_present_and_future=None, + z_mode=False, + gmm_mode=False, + full_dist=False, + all_z_sep=False): + model = self.node_models_dict[node] + prediction_dist, predictions_uns = model.decoder_forward(num_predicted_timesteps, + num_samples, + robot_present_and_future=robot_present_and_future, + z_mode=z_mode, + gmm_mode=gmm_mode, + full_dist=full_dist, + all_z_sep=all_z_sep) + + predictions_np = predictions_uns.cpu().detach().numpy() + + # Return will be of shape (batch_size, num_samples, num_predicted_timesteps, 2) + return prediction_dist, np.transpose(predictions_np, (1, 0, 2, 3)) + + def sample_model(self, num_predicted_timesteps, + num_samples, + robot_present_and_future=None, + z_mode=False, + gmm_mode=False, + full_dist=False, + all_z_sep=False): + # Just start from the encoder output (minus the + # robot future) and get num_samples of + # num_predicted_timesteps-length trajectories. + if num_predicted_timesteps == 0 or num_samples == 0: + return + + mode = ModeKeys.PREDICT + + # We want tensors of shape (1, ph + 1, state_dim) where the first 1 is the batch size. + if self.hyperparams['incl_robot_node'] and self.env.scenes[ + 0].robot is not None and robot_present_and_future is not None: + if len(robot_present_and_future.shape) == 2: + robot_present_and_future = robot_present_and_future[np.newaxis, :] + + assert robot_present_and_future.shape[1] == num_predicted_timesteps + 1 + + # No grad since we're predicting always, as evidenced by the line above. + with torch.no_grad(): + predictions_dict = dict() + prediction_dists = dict() + for node in set(self.nodes) - set(self.removed_nodes.keys()): + if node.is_robot: + continue + + prediction_dists[node], predictions_dict[node] = self._run_decoder(node, num_predicted_timesteps, + num_samples, + robot_present_and_future, + z_mode, + gmm_mode, + full_dist, + all_z_sep) + + return prediction_dists, predictions_dict + + def forward(self, init_env, + init_timestep, + input_dicts, # After the initial environment + num_predicted_timesteps, + num_samples, + robot_present_and_future=None, + z_mode=False, + gmm_mode=False, + full_dist=False, + all_z_sep=False): + # This is the standard forward prediction function, + # if you have some historical data and just want to + # predict forward some number of timesteps. + + # Setting us back to the initial scene graph we had. + self.set_environment(init_env, init_timestep) + + # Looping through and applying updates to the model. + for i in range(len(input_dicts)): + self.incremental_forward(input_dicts[i]) + + return self.sample_model(num_predicted_timesteps, + num_samples, + robot_present_and_future=robot_present_and_future, + z_mode=z_mode, + gmm_mode=gmm_mode, + full_dist=full_dist, + all_z_sep=all_z_sep) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/trajectron.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/trajectron.py new file mode 100644 index 000000000..eccde3eb2 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/trajectron.py @@ -0,0 +1,201 @@ +import torch +import numpy as np +from model.mgcvae import MultimodalGenerativeCVAE +from model.dataset import get_timesteps_data, restore + + +class Trajectron(torch.nn.Module): + def __init__(self, model_registrar, + hyperparams, log_writer, + device): + super(Trajectron, self).__init__() + self.hyperparams = hyperparams + self.log_writer = log_writer + self.device = device + self.curr_iter = 0 + + self.model_registrar = model_registrar + # self.node_models_dict = dict() + self.node_models_dict = torch.nn.ModuleDict() + self.nodes = set() + + self.env = None + + self.min_ht = self.hyperparams['minimum_history_length'] + self.max_ht = self.hyperparams['maximum_history_length'] + self.ph = self.hyperparams['prediction_horizon'] + self.state = self.hyperparams['state'] + self.state_length = dict() + for state_type in self.state.keys(): + self.state_length[state_type] = int( + np.sum([len(entity_dims) for entity_dims in self.state[state_type].values()]) + ) + self.pred_state = self.hyperparams['pred_state'] + + def eval(self): + super().eval() + for key in self.node_models_dict.keys(): + self.node_models_dict[key].eval() + + def set_environment(self, env): + self.env = env + + self.node_models_dict.clear() + edge_types = env.get_edge_types() + + for node_type in env.NodeType: + # Only add a Model for NodeTypes we want to predict + if node_type in self.pred_state.keys(): + self.node_models_dict[str(node_type)] = MultimodalGenerativeCVAE(env, + node_type, + self.model_registrar, + self.hyperparams, + self.device, + edge_types, + log_writer=self.log_writer) + + def set_curr_iter(self, curr_iter): + self.curr_iter = curr_iter + for node_str, model in self.node_models_dict.items(): + model.set_curr_iter(curr_iter) + + def set_annealing_params(self): + for node_str, model in self.node_models_dict.items(): + model.set_annealing_params() + + def step_annealers(self, node_type=None): + if node_type is None: + for node_type in self.node_models_dict: + self.node_models_dict[node_type].step_annealers() + else: + self.node_models_dict[node_type].step_annealers() + + def train_loss(self, batch, node_type): + (first_history_index, + x_t, y_t, x_st_t, y_st_t, + neighbors_data_st, + neighbors_edge_value, + robot_traj_st_t, + map) = batch + + x = x_t.to(self.device) + y = y_t.to(self.device) + x_st_t = x_st_t.to(self.device) + y_st_t = y_st_t.to(self.device) + if robot_traj_st_t is not None: + robot_traj_st_t = robot_traj_st_t.to(self.device) + if type(map) == torch.Tensor: + map = map.to(self.device) + + # Run forward pass + model = self.node_models_dict[node_type] + loss = model.train_loss(inputs=x, + inputs_st=x_st_t, + first_history_indices=first_history_index, + labels=y, + labels_st=y_st_t, + neighbors=restore(neighbors_data_st), + neighbors_edge_value=restore(neighbors_edge_value), + robot=robot_traj_st_t, + map=map, + prediction_horizon=self.ph) + + return loss + + def eval_loss(self, batch, node_type): + (first_history_index, + x_t, y_t, x_st_t, y_st_t, + neighbors_data_st, + neighbors_edge_value, + robot_traj_st_t, + map) = batch + + x = x_t.to(self.device) + y = y_t.to(self.device) + x_st_t = x_st_t.to(self.device) + y_st_t = y_st_t.to(self.device) + if robot_traj_st_t is not None: + robot_traj_st_t = robot_traj_st_t.to(self.device) + if type(map) == torch.Tensor: + map = map.to(self.device) + + # Run forward pass + model = self.node_models_dict[node_type] + nll = model.eval_loss(inputs=x, + inputs_st=x_st_t, + first_history_indices=first_history_index, + labels=y, + labels_st=y_st_t, + neighbors=restore(neighbors_data_st), + neighbors_edge_value=restore(neighbors_edge_value), + robot=robot_traj_st_t, + map=map, + prediction_horizon=self.ph) + + return nll.cpu().detach().numpy() + + def predict(self, + scene, + timesteps, + ph, + num_samples=1, + min_future_timesteps=0, + min_history_timesteps=1, + z_mode=False, + gmm_mode=False, + full_dist=True, + all_z_sep=False): + + predictions_dict = {} + for node_type in self.env.NodeType: + if node_type not in self.pred_state: + continue + + model = self.node_models_dict[node_type] + + # Get Input data for node type and given timesteps + batch = get_timesteps_data(env=self.env, scene=scene, t=timesteps, node_type=node_type, state=self.state, + pred_state=self.pred_state, edge_types=model.edge_types, + min_ht=min_history_timesteps, max_ht=self.max_ht, min_ft=min_future_timesteps, + max_ft=min_future_timesteps, hyperparams=self.hyperparams) + # There are no nodes of type present for timestep + if batch is None: + continue + (first_history_index, + x_t, y_t, x_st_t, y_st_t, + neighbors_data_st, + neighbors_edge_value, + robot_traj_st_t, + map), nodes, timesteps_o = batch + + x = x_t.to(self.device) + x_st_t = x_st_t.to(self.device) + if robot_traj_st_t is not None: + robot_traj_st_t = robot_traj_st_t.to(self.device) + if type(map) == torch.Tensor: + map = map.to(self.device) + + # Run forward pass + predictions = model.predict(inputs=x, + inputs_st=x_st_t, + first_history_indices=first_history_index, + neighbors=neighbors_data_st, + neighbors_edge_value=neighbors_edge_value, + robot=robot_traj_st_t, + map=map, + prediction_horizon=ph, + num_samples=num_samples, + z_mode=z_mode, + gmm_mode=gmm_mode, + full_dist=full_dist, + all_z_sep=all_z_sep) + + predictions_np = predictions.cpu().detach().numpy() + + # Assign predictions to node + for i, ts in enumerate(timesteps_o): + if ts not in predictions_dict.keys(): + predictions_dict[ts] = dict() + predictions_dict[ts][nodes[i]] = np.transpose(predictions_np[:, [i]], (1, 0, 2, 3)) + + return predictions_dict diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model_dir/config.json b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model_dir/config.json new file mode 100644 index 000000000..f38943d36 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model_dir/config.json @@ -0,0 +1 @@ +{"batch_size": 256, "grad_clip": 1.0, "learning_rate_style": "exp", "learning_rate": 0.001, "min_learning_rate": 1e-05, "learning_decay_rate": 0.9999, "prediction_horizon": 12, "minimum_history_length": 1, "maximum_history_length": 8, "map_encoder": {"PEDESTRIAN": {"heading_state_index": 5, "patch_size": [50, 10, 50, 90], "map_channels": 3, "hidden_channels": [10, 20, 10, 1], "output_size": 32, "masks": [5, 5, 5, 5], "strides": [1, 1, 1, 1], "dropout": 0.5}}, "k": 1, "k_eval": 1, "kl_min": 0.07, "kl_weight": 100.0, "kl_weight_start": 0, "kl_decay_rate": 0.99995, "kl_crossover": 400, "kl_sigmoid_divisor": 4, "rnn_kwargs": {"dropout_keep_prob": 0.75}, "MLP_dropout_keep_prob": 0.9, "enc_rnn_dim_edge": 32, "enc_rnn_dim_edge_influence": 32, "enc_rnn_dim_history": 32, "enc_rnn_dim_future": 32, "dec_rnn_dim": 128, "q_z_xy_MLP_dims": null, "p_z_x_MLP_dims": 32, "GMM_components": 1, "log_p_yt_xz_max": 6, "N": 1, "K": 25, "tau_init": 2.0, "tau_final": 0.05, "tau_decay_rate": 0.997, "use_z_logit_clipping": true, "z_logit_clip_start": 0.05, "z_logit_clip_final": 5.0, "z_logit_clip_crossover": 300, "z_logit_clip_divisor": 5, "dynamic": {"PEDESTRIAN": {"name": "SingleIntegrator", "distribution": true, "limits": {}}}, "state": {"PEDESTRIAN": {"position": ["x", "y"], "velocity": ["x", "y"], "acceleration": ["x", "y"]}}, "pred_state": {"PEDESTRIAN": {"position": ["x", "y"]}}, "log_histograms": false, "scene_freq_mult_eval": false, "node_freq_mult_eval": false, "edge_encoding": false, "incl_robot_node": false, "use_map_encoding": false} \ No newline at end of file diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/test/__init__.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/test/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/test/test_data_structures.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/test/test_data_structures.py new file mode 100644 index 000000000..6da777c01 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/test/test_data_structures.py @@ -0,0 +1,48 @@ +import numpy as np +import pandas as pd +from data import SingleHeaderNumpyArray, DoubleHeaderNumpyArray + + +def test_single_header_numpy_array(): + x = np.random.rand(10) + y = np.random.rand(10) + + array = SingleHeaderNumpyArray(np.stack((x, y), axis=-1), ['x', 'y']) + + assert (array[:, 'x'] == x).all() + assert (array[:, 'y'] == y).all() + assert (array[3:7, 'y'] == y[3:7]).all() + assert (array.x == x).all() + assert (array.y == y).all() + + +def test_double_header_numpy_array(): + x = np.random.rand(10) + y = np.random.rand(10) + vx = np.random.rand(10) + vy = np.random.rand(10) + + data_dict = {('position', 'x'): x, + ('position', 'y'): y, + ('velocity', 'x'): vx, + ('velocity', 'y'): vy} + + data_columns = pd.MultiIndex.from_product([['position', 'velocity'], ['x', 'y']]) + + node_data = pd.DataFrame(data_dict, columns=data_columns) + + array = DoubleHeaderNumpyArray(node_data.values, list(node_data.columns)) + + test_header_dict = {'position': ['x', 'y'], 'velocity': ['y']} + + assert (array[:, ('position', 'x')] == x).all() + assert (array[:, ('velocity', 'y')] == vy).all() + assert (array[4:7, ('velocity', 'y')] == vy[4:7]).all() + assert (array[:, [('position', 'x'), ('velocity', 'y')]] == np.stack((x, vy), axis=-1)).all() + assert (array[:, [('position', 'y'), ('velocity', 'x')]] == np.stack((y, vx), axis=-1)).all() + assert (array[2:6, [('position', 'y'), ('velocity', 'x')]] == np.stack((y, vx), axis=-1)[2:6]).all() + assert (array[:, test_header_dict] == np.stack((x, y, vy), axis=-1)).all() + assert (array[1:8, test_header_dict] == np.stack((x, y, vy), axis=-1)[1:8]).all() + assert (array.position.x == x).all() + + diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/test_online.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/test_online.py new file mode 100644 index 000000000..3e6cae7e4 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/test_online.py @@ -0,0 +1,238 @@ +import os +import time +import json +import torch +import dill +import random +import pathlib +import evaluation +import numpy as np +import visualization as vis +from argument_parser import args +from model.online.online_trajectron import OnlineTrajectron +from model.model_registrar import ModelRegistrar +from environment import Environment, Scene +import matplotlib.pyplot as plt + +if not torch.cuda.is_available() or args.device == 'cpu': + args.device = torch.device('cpu') +else: + if torch.cuda.device_count() == 1: + # If you have CUDA_VISIBLE_DEVICES set, which you should, + # then this will prevent leftover flag arguments from + # messing with the device allocation. + args.device = 'cuda:0' + + args.device = torch.device(args.device) + +if args.eval_device is None: + args.eval_device = 'cpu' + +if args.seed is not None: + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(args.seed) + + +def create_online_env(env, hyperparams, scene_idx, init_timestep): + test_scene = env.scenes[scene_idx] + + online_scene = Scene(timesteps=init_timestep + 1, + map=test_scene.map, + dt=test_scene.dt) + online_scene.nodes = test_scene.get_nodes_clipped_at_time( + timesteps=np.arange(init_timestep - hyperparams['maximum_history_length'], + init_timestep + 1), + state=hyperparams['state']) + online_scene.robot = test_scene.robot + online_scene.calculate_scene_graph(attention_radius=env.attention_radius, + edge_addition_filter=hyperparams['edge_addition_filter'], + edge_removal_filter=hyperparams['edge_removal_filter']) + + return Environment(node_type_list=env.node_type_list, + standardization=env.standardization, + scenes=[online_scene], + attention_radius=env.attention_radius, + robot_type=env.robot_type) + + +def get_maps_for_input(input_dict, scene, hyperparams): + scene_maps = list() + scene_pts = list() + heading_angles = list() + patch_sizes = list() + nodes_with_maps = list() + for node in input_dict: + if node.type in hyperparams['map_encoder']: + x = input_dict[node] + me_hyp = hyperparams['map_encoder'][node.type] + if 'heading_state_index' in me_hyp: + heading_state_index = me_hyp['heading_state_index'] + # We have to rotate the map in the opposit direction of the agent to match them + if type(heading_state_index) is list: # infer from velocity or heading vector + heading_angle = -np.arctan2(x[-1, heading_state_index[1]], + x[-1, heading_state_index[0]]) * 180 / np.pi + else: + heading_angle = -x[-1, heading_state_index] * 180 / np.pi + else: + heading_angle = None + + scene_map = scene.map[node.type] + map_point = x[-1, :2] + + patch_size = hyperparams['map_encoder'][node.type]['patch_size'] + + scene_maps.append(scene_map) + scene_pts.append(map_point) + heading_angles.append(heading_angle) + patch_sizes.append(patch_size) + nodes_with_maps.append(node) + + if heading_angles[0] is None: + heading_angles = None + else: + heading_angles = torch.Tensor(heading_angles) + + maps = scene_maps[0].get_cropped_maps_from_scene_map_batch(scene_maps, + scene_pts=torch.Tensor(scene_pts), + patch_size=patch_sizes[0], + rotation=heading_angles) + + maps_dict = {node: maps[[i]] for i, node in enumerate(nodes_with_maps)} + return maps_dict + + +def main(): + # Choose one of the model directory names under the experiment/*/models folders. + # Possibilities are 'vel_ee', 'int_ee', 'int_ee_me', or 'robot' + model_dir = os.path.join(args.log_dir, 'int_ee') + + # Load hyperparameters from json + config_file = os.path.join(model_dir, args.conf) + if not os.path.exists(config_file): + raise ValueError('Config json not found!') + with open(config_file, 'r') as conf_json: + hyperparams = json.load(conf_json) + + # Add hyperparams from arguments + hyperparams['dynamic_edges'] = args.dynamic_edges + hyperparams['edge_state_combine_method'] = args.edge_state_combine_method + hyperparams['edge_influence_combine_method'] = args.edge_influence_combine_method + hyperparams['edge_addition_filter'] = args.edge_addition_filter + hyperparams['edge_removal_filter'] = args.edge_removal_filter + hyperparams['batch_size'] = args.batch_size + hyperparams['k_eval'] = args.k_eval + hyperparams['offline_scene_graph'] = args.offline_scene_graph + hyperparams['incl_robot_node'] = args.incl_robot_node + hyperparams['edge_encoding'] = not args.no_edge_encoding + hyperparams['use_map_encoding'] = args.map_encoding + + output_save_dir = os.path.join(model_dir, 'pred_figs') + pathlib.Path(output_save_dir).mkdir(parents=True, exist_ok=True) + + eval_data_path = os.path.join(args.data_dir, args.eval_data_dict) + with open(eval_data_path, 'rb') as f: + eval_env = dill.load(f, encoding='latin1') + + if eval_env.robot_type is None and hyperparams['incl_robot_node']: + eval_env.robot_type = eval_env.NodeType[0] # TODO: Make more general, allow the user to specify? + for scene in eval_env.scenes: + scene.add_robot_from_nodes(eval_env.robot_type) + + print('Loaded data from %s' % (eval_data_path,)) + + # Creating a dummy environment with a single scene that contains information about the world. + # When using this code, feel free to use whichever scene index or initial timestep you wish. + scene_idx = 0 + + # You need to have at least acceleration, so you want 2 timesteps of prior data, e.g. [0, 1], + # so that you can immediately start incremental inference from the 3rd timestep onwards. + init_timestep = 1 + + eval_scene = eval_env.scenes[scene_idx] + online_env = create_online_env(eval_env, hyperparams, scene_idx, init_timestep) + + model_registrar = ModelRegistrar(model_dir, args.eval_device) + model_registrar.load_models(iter_num=12) + + trajectron = OnlineTrajectron(model_registrar, + hyperparams, + args.eval_device) + + # If you want to see what different robot futures do to the predictions, uncomment this line as well as + # related "... += adjustment" lines below. + # adjustment = np.stack([np.arange(13)/float(i*2.0) for i in range(6, 12)], axis=1) + + # Here's how you'd incrementally run the model, e.g. with streaming data. + trajectron.set_environment(online_env, init_timestep) + + for timestep in range(init_timestep + 1, eval_scene.timesteps): + input_dict = eval_scene.get_clipped_input_dict(timestep, hyperparams['state']) + + maps = None + if hyperparams['use_map_encoding']: + maps = get_maps_for_input(input_dict, eval_scene, hyperparams) + + robot_present_and_future = None + if eval_scene.robot is not None and hyperparams['incl_robot_node']: + robot_present_and_future = eval_scene.robot.get(np.array([timestep, + timestep + hyperparams['prediction_horizon']]), + hyperparams['state'][eval_scene.robot.type], + padding=0.0) + robot_present_and_future = np.stack([robot_present_and_future, robot_present_and_future], axis=0) + # robot_present_and_future += adjustment + + start = time.time() + dists, preds = trajectron.incremental_forward(input_dict, + maps, + prediction_horizon=6, + num_samples=1, + robot_present_and_future=robot_present_and_future, + full_dist=True) + end = time.time() + print("t=%d: took %.2f s (= %.2f Hz) w/ %d nodes and %d edges" % (timestep, end - start, + 1. / (end - start), len(trajectron.nodes), + trajectron.scene_graph.get_num_edges())) + + detailed_preds_dict = dict() + for node in eval_scene.nodes: + if node in preds: + detailed_preds_dict[node] = preds[node] + + fig, ax = plt.subplots() + vis.visualize_distribution(ax, + dists) + vis.visualize_prediction(ax, + {timestep: preds}, + eval_scene.dt, + hyperparams['maximum_history_length'], + hyperparams['prediction_horizon']) + + if eval_scene.robot is not None and hyperparams['incl_robot_node']: + robot_for_plotting = eval_scene.robot.get(np.array([timestep, + timestep + hyperparams['prediction_horizon']]), + hyperparams['state'][eval_scene.robot.type]) + # robot_for_plotting += adjustment + + ax.plot(robot_for_plotting[1:, 1], robot_for_plotting[1:, 0], + color='r', + linewidth=1.0, alpha=1.0) + + # Current Node Position + circle = plt.Circle((robot_for_plotting[0, 1], + robot_for_plotting[0, 0]), + 0.3, + facecolor='r', + edgecolor='k', + lw=0.5, + zorder=3) + ax.add_artist(circle) + + fig.savefig(os.path.join(output_save_dir, f'pred_{timestep}.pdf'), dpi=300) + plt.close(fig) + + +if __name__ == '__main__': + main() diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/train.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/train.py new file mode 100644 index 000000000..0e40b5bdc --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/train.py @@ -0,0 +1,440 @@ +import torch +from torch import nn, optim, utils +import numpy as np +import os +import time +import dill +import json +import random +import pathlib +import warnings +from tqdm import tqdm +import visualization +import evaluation +import matplotlib.pyplot as plt +from argument_parser import args +from model.trajectron import Trajectron +from model.model_registrar import ModelRegistrar +from model.model_utils import cyclical_lr +from model.dataset import EnvironmentDataset, collate +from tensorboardX import SummaryWriter +# torch.autograd.set_detect_anomaly(True) + +if not torch.cuda.is_available() or args.device == 'cpu': + args.device = torch.device('cpu') +else: + if torch.cuda.device_count() == 1: + # If you have CUDA_VISIBLE_DEVICES set, which you should, + # then this will prevent leftover flag arguments from + # messing with the device allocation. + args.device = 'cuda:0' + + args.device = torch.device(args.device) + +if args.eval_device is None: + args.eval_device = torch.device('cpu') + +# This is needed for memory pinning using a DataLoader (otherwise memory is pinned to cuda:0 by default) +torch.cuda.set_device(args.device) + +if args.seed is not None: + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(args.seed) + + +def main(): + # Load hyperparameters from json + if not os.path.exists(args.conf): + print('Config json not found!') + with open(args.conf, 'r', encoding='utf-8') as conf_json: + hyperparams = json.load(conf_json) + + # Add hyperparams from arguments + hyperparams['dynamic_edges'] = args.dynamic_edges + hyperparams['edge_state_combine_method'] = args.edge_state_combine_method + hyperparams['edge_influence_combine_method'] = args.edge_influence_combine_method + hyperparams['edge_addition_filter'] = args.edge_addition_filter + hyperparams['edge_removal_filter'] = args.edge_removal_filter + hyperparams['batch_size'] = args.batch_size + hyperparams['k_eval'] = args.k_eval + hyperparams['offline_scene_graph'] = args.offline_scene_graph + hyperparams['incl_robot_node'] = args.incl_robot_node + hyperparams['node_freq_mult_train'] = args.node_freq_mult_train + hyperparams['node_freq_mult_eval'] = args.node_freq_mult_eval + hyperparams['scene_freq_mult_train'] = args.scene_freq_mult_train + hyperparams['scene_freq_mult_eval'] = args.scene_freq_mult_eval + hyperparams['scene_freq_mult_viz'] = args.scene_freq_mult_viz + hyperparams['edge_encoding'] = not args.no_edge_encoding + hyperparams['use_map_encoding'] = args.map_encoding + hyperparams['augment'] = args.augment + hyperparams['override_attention_radius'] = args.override_attention_radius + + print('-----------------------') + print('| TRAINING PARAMETERS |') + print('-----------------------') + print('| batch_size: %d' % args.batch_size) + print('| device: %s' % args.device) + print('| eval_device: %s' % args.eval_device) + print('| Offline Scene Graph Calculation: %s' % args.offline_scene_graph) + print('| EE state_combine_method: %s' % args.edge_state_combine_method) + print('| EIE scheme: %s' % args.edge_influence_combine_method) + print('| dynamic_edges: %s' % args.dynamic_edges) + print('| robot node: %s' % args.incl_robot_node) + print('| edge_addition_filter: %s' % args.edge_addition_filter) + print('| edge_removal_filter: %s' % args.edge_removal_filter) + print('| MHL: %s' % hyperparams['minimum_history_length']) + print('| PH: %s' % hyperparams['prediction_horizon']) + print('-----------------------') + + log_writer = None + model_dir = None + if not args.debug: + # Create the log and model directiory if they're not present. + model_dir = os.path.join(args.log_dir, + 'models_' + time.strftime('%d_%b_%Y_%H_%M_%S', time.localtime()) + args.log_tag) + pathlib.Path(model_dir).mkdir(parents=True, exist_ok=True) + + # Save config to model directory + with open(os.path.join(model_dir, 'config.json'), 'w') as conf_json: + json.dump(hyperparams, conf_json) + + log_writer = SummaryWriter(log_dir=model_dir) + + # Load training and evaluation environments and scenes + train_scenes = [] + train_data_path = os.path.join(args.data_dir, args.train_data_dict) + with open(train_data_path, 'rb') as f: + train_env = dill.load(f, encoding='latin1') + + for attention_radius_override in args.override_attention_radius: + node_type1, node_type2, attention_radius = attention_radius_override.split(' ') + train_env.attention_radius[(node_type1, node_type2)] = float(attention_radius) + + if train_env.robot_type is None and hyperparams['incl_robot_node']: + train_env.robot_type = train_env.NodeType[0] # TODO: Make more general, allow the user to specify? + for scene in train_env.scenes: + scene.add_robot_from_nodes(train_env.robot_type) + + train_scenes = train_env.scenes + train_scenes_sample_probs = train_env.scenes_freq_mult_prop if args.scene_freq_mult_train else None + + train_dataset = EnvironmentDataset(train_env, + hyperparams['state'], + hyperparams['pred_state'], + scene_freq_mult=hyperparams['scene_freq_mult_train'], + node_freq_mult=hyperparams['node_freq_mult_train'], + hyperparams=hyperparams, + min_history_timesteps=hyperparams['minimum_history_length'], + min_future_timesteps=hyperparams['prediction_horizon'], + return_robot=not args.incl_robot_node) + train_data_loader = dict() + for node_type_data_set in train_dataset: + if len(node_type_data_set) == 0: + continue + + node_type_dataloader = utils.data.DataLoader(node_type_data_set, + collate_fn=collate, + pin_memory=False if args.device is 'cpu' else True, + batch_size=args.batch_size, + shuffle=True, + num_workers=args.preprocess_workers) + train_data_loader[node_type_data_set.node_type] = node_type_dataloader + + print(f"Loaded training data from {train_data_path}") + + eval_scenes = [] + eval_scenes_sample_probs = None + if args.eval_every is not None: + eval_data_path = os.path.join(args.data_dir, args.eval_data_dict) + with open(eval_data_path, 'rb') as f: + eval_env = dill.load(f, encoding='latin1') + + for attention_radius_override in args.override_attention_radius: + node_type1, node_type2, attention_radius = attention_radius_override.split(' ') + eval_env.attention_radius[(node_type1, node_type2)] = float(attention_radius) + + if eval_env.robot_type is None and hyperparams['incl_robot_node']: + eval_env.robot_type = eval_env.NodeType[0] # TODO: Make more general, allow the user to specify? + for scene in eval_env.scenes: + scene.add_robot_from_nodes(eval_env.robot_type) + + eval_scenes = eval_env.scenes + eval_scenes_sample_probs = eval_env.scenes_freq_mult_prop if args.scene_freq_mult_eval else None + + eval_dataset = EnvironmentDataset(eval_env, + hyperparams['state'], + hyperparams['pred_state'], + scene_freq_mult=hyperparams['scene_freq_mult_eval'], + node_freq_mult=hyperparams['node_freq_mult_eval'], + hyperparams=hyperparams, + min_history_timesteps=hyperparams['minimum_history_length'], + min_future_timesteps=hyperparams['prediction_horizon'], + return_robot=not args.incl_robot_node) + eval_data_loader = dict() + for node_type_data_set in eval_dataset: + if len(node_type_data_set) == 0: + continue + + node_type_dataloader = utils.data.DataLoader(node_type_data_set, + collate_fn=collate, + pin_memory=False if args.eval_device is 'cpu' else True, + batch_size=args.eval_batch_size, + shuffle=True, + num_workers=args.preprocess_workers) + eval_data_loader[node_type_data_set.node_type] = node_type_dataloader + + print(f"Loaded evaluation data from {eval_data_path}") + + # Offline Calculate Scene Graph + if hyperparams['offline_scene_graph'] == 'yes': + print(f"Offline calculating scene graphs") + for i, scene in enumerate(train_scenes): + scene.calculate_scene_graph(train_env.attention_radius, + hyperparams['edge_addition_filter'], + hyperparams['edge_removal_filter']) + print(f"Created Scene Graph for Training Scene {i}") + + for i, scene in enumerate(eval_scenes): + scene.calculate_scene_graph(eval_env.attention_radius, + hyperparams['edge_addition_filter'], + hyperparams['edge_removal_filter']) + print(f"Created Scene Graph for Evaluation Scene {i}") + + model_registrar = ModelRegistrar(model_dir, args.device) + + trajectron = Trajectron(model_registrar, + hyperparams, + log_writer, + args.device) + + trajectron.set_environment(train_env) + trajectron.set_annealing_params() + print('Created Training Model.') + + eval_trajectron = None + if args.eval_every is not None or args.vis_every is not None: + eval_trajectron = Trajectron(model_registrar, + hyperparams, + log_writer, + args.eval_device) + eval_trajectron.set_environment(eval_env) + eval_trajectron.set_annealing_params() + print('Created Evaluation Model.') + + optimizer = dict() + lr_scheduler = dict() + for node_type in train_env.NodeType: + if node_type not in hyperparams['pred_state']: + continue + optimizer[node_type] = optim.Adam([{'params': model_registrar.get_all_but_name_match('map_encoder').parameters()}, + {'params': model_registrar.get_name_match('map_encoder').parameters(), 'lr':0.0008}], lr=hyperparams['learning_rate']) + # Set Learning Rate + if hyperparams['learning_rate_style'] == 'const': + lr_scheduler[node_type] = optim.lr_scheduler.ExponentialLR(optimizer[node_type], gamma=1.0) + elif hyperparams['learning_rate_style'] == 'exp': + lr_scheduler[node_type] = optim.lr_scheduler.ExponentialLR(optimizer[node_type], + gamma=hyperparams['learning_decay_rate']) + + ################################# + # TRAINING # + ################################# + curr_iter_node_type = {node_type: 0 for node_type in train_data_loader.keys()} + for epoch in range(1, args.train_epochs + 1): + model_registrar.to(args.device) + train_dataset.augment = args.augment + for node_type, data_loader in train_data_loader.items(): + curr_iter = curr_iter_node_type[node_type] + pbar = tqdm(data_loader, ncols=80) + for batch in pbar: + trajectron.set_curr_iter(curr_iter) + trajectron.step_annealers(node_type) + optimizer[node_type].zero_grad() + train_loss = trajectron.train_loss(batch, node_type) + pbar.set_description(f"Epoch {epoch}, {node_type} L: {train_loss.item():.2f}") + train_loss.backward() + # Clipping gradients. + if hyperparams['grad_clip'] is not None: + nn.utils.clip_grad_value_(model_registrar.parameters(), hyperparams['grad_clip']) + optimizer[node_type].step() + + # Stepping forward the learning rate scheduler and annealers. + lr_scheduler[node_type].step() + + if not args.debug: + log_writer.add_scalar(f"{node_type}/train/learning_rate", + lr_scheduler[node_type].get_lr()[0], + curr_iter) + log_writer.add_scalar(f"{node_type}/train/loss", train_loss, curr_iter) + + curr_iter += 1 + curr_iter_node_type[node_type] = curr_iter + train_dataset.augment = False + if args.eval_every is not None or args.vis_every is not None: + eval_trajectron.set_curr_iter(epoch) + + ################################# + # VISUALIZATION # + ################################# + if args.vis_every is not None and not args.debug and epoch % args.vis_every == 0 and epoch > 0: + max_hl = hyperparams['maximum_history_length'] + ph = hyperparams['prediction_horizon'] + with torch.no_grad(): + # Predict random timestep to plot for train data set + if args.scene_freq_mult_viz: + scene = np.random.choice(train_scenes, p=train_scenes_sample_probs) + else: + scene = np.random.choice(train_scenes) + timestep = scene.sample_timesteps(1, min_future_timesteps=ph) + predictions = trajectron.predict(scene, + timestep, + ph, + min_future_timesteps=ph, + z_mode=True, + gmm_mode=True, + all_z_sep=False, + full_dist=False) + + # Plot predicted timestep for random scene + fig, ax = plt.subplots(figsize=(10, 10)) + visualization.visualize_prediction(ax, + predictions, + scene.dt, + max_hl=max_hl, + ph=ph, + map=scene.map['VISUALIZATION'] if scene.map is not None else None) + ax.set_title(f"{scene.name}-t: {timestep}") + log_writer.add_figure('train/prediction', fig, epoch) + + model_registrar.to(args.eval_device) + # Predict random timestep to plot for eval data set + if args.scene_freq_mult_viz: + scene = np.random.choice(eval_scenes, p=eval_scenes_sample_probs) + else: + scene = np.random.choice(eval_scenes) + timestep = scene.sample_timesteps(1, min_future_timesteps=ph) + predictions = eval_trajectron.predict(scene, + timestep, + ph, + num_samples=20, + min_future_timesteps=ph, + z_mode=False, + full_dist=False) + + # Plot predicted timestep for random scene + fig, ax = plt.subplots(figsize=(10, 10)) + visualization.visualize_prediction(ax, + predictions, + scene.dt, + max_hl=max_hl, + ph=ph, + map=scene.map['VISUALIZATION'] if scene.map is not None else None) + ax.set_title(f"{scene.name}-t: {timestep}") + log_writer.add_figure('eval/prediction', fig, epoch) + + # Predict random timestep to plot for eval data set + predictions = eval_trajectron.predict(scene, + timestep, + ph, + min_future_timesteps=ph, + z_mode=True, + gmm_mode=True, + all_z_sep=True, + full_dist=False) + + # Plot predicted timestep for random scene + fig, ax = plt.subplots(figsize=(10, 10)) + visualization.visualize_prediction(ax, + predictions, + scene.dt, + max_hl=max_hl, + ph=ph, + map=scene.map['VISUALIZATION'] if scene.map is not None else None) + ax.set_title(f"{scene.name}-t: {timestep}") + log_writer.add_figure('eval/prediction_all_z', fig, epoch) + + ################################# + # EVALUATION # + ################################# + if args.eval_every is not None and not args.debug and epoch % args.eval_every == 0 and epoch > 0: + max_hl = hyperparams['maximum_history_length'] + ph = hyperparams['prediction_horizon'] + model_registrar.to(args.eval_device) + with torch.no_grad(): + # Calculate evaluation loss + for node_type, data_loader in eval_data_loader.items(): + eval_loss = [] + print(f"Starting Evaluation @ epoch {epoch} for node type: {node_type}") + pbar = tqdm(data_loader, ncols=80) + for batch in pbar: + eval_loss_node_type = eval_trajectron.eval_loss(batch, node_type) + pbar.set_description(f"Epoch {epoch}, {node_type} L: {eval_loss_node_type.item():.2f}") + eval_loss.append({node_type: {'nll': [eval_loss_node_type]}}) + del batch + + evaluation.log_batch_errors(eval_loss, + log_writer, + f"{node_type}/eval_loss", + epoch) + + # Predict batch timesteps for evaluation dataset evaluation + eval_batch_errors = [] + for scene in tqdm(eval_scenes, desc='Sample Evaluation', ncols=80): + timesteps = scene.sample_timesteps(args.eval_batch_size) + + predictions = eval_trajectron.predict(scene, + timesteps, + ph, + num_samples=50, + min_future_timesteps=ph, + full_dist=False) + + eval_batch_errors.append(evaluation.compute_batch_statistics(predictions, + scene.dt, + max_hl=max_hl, + ph=ph, + node_type_enum=eval_env.NodeType, + map=scene.map)) + + evaluation.log_batch_errors(eval_batch_errors, + log_writer, + 'eval', + epoch, + bar_plot=['kde'], + box_plot=['ade', 'fde']) + + # Predict maximum likelihood batch timesteps for evaluation dataset evaluation + eval_batch_errors_ml = [] + for scene in tqdm(eval_scenes, desc='MM Evaluation', ncols=80): + timesteps = scene.sample_timesteps(scene.timesteps) + + predictions = eval_trajectron.predict(scene, + timesteps, + ph, + num_samples=1, + min_future_timesteps=ph, + z_mode=True, + gmm_mode=True, + full_dist=False) + + eval_batch_errors_ml.append(evaluation.compute_batch_statistics(predictions, + scene.dt, + max_hl=max_hl, + ph=ph, + map=scene.map, + node_type_enum=eval_env.NodeType, + kde=False)) + + evaluation.log_batch_errors(eval_batch_errors_ml, + log_writer, + 'eval/ml', + epoch) + + if args.save_every is not None and args.debug is False and epoch % args.save_every == 0: + model_registrar.save_models(epoch) + + +if __name__ == '__main__': + main() diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/__init__.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/__init__.py new file mode 100644 index 000000000..b50caf009 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/__init__.py @@ -0,0 +1,3 @@ +from .trajectory_utils import prediction_output_to_trajectories +from .matrix_utils import block_diag, tile +from .os_utils import maybe_makedirs \ No newline at end of file diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/matrix_utils.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/matrix_utils.py new file mode 100644 index 000000000..87c4efac3 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/matrix_utils.py @@ -0,0 +1,41 @@ +import numpy as np +import torch + + +def attach_dim(v, n_dim_to_prepend=0, n_dim_to_append=0): + return v.reshape( + torch.Size([1] * n_dim_to_prepend) + + v.shape + + torch.Size([1] * n_dim_to_append)) + + +def block_diag(m): + """ + Make a block diagonal matrix along dim=-3 + EXAMPLE: + block_diag(torch.ones(4,3,2)) + should give a 12 x 8 matrix with blocks of 3 x 2 ones. + Prepend batch dimensions if needed. + You can also give a list of matrices. + :type m: torch.Tensor, list + :rtype: torch.Tensor + """ + if type(m) is list: + m = torch.cat([m1.unsqueeze(-3) for m1 in m], -3) + + d = m.dim() + n = m.shape[-3] + siz0 = m.shape[:-3] + siz1 = m.shape[-2:] + m2 = m.unsqueeze(-2) + eye = attach_dim(torch.eye(n, device=m.device).unsqueeze(-2), d - 3, 1) + return (m2 * eye).reshape(siz0 + torch.Size(torch.tensor(siz1) * n)) + + +def tile(a, dim, n_tile, device='cpu'): + init_dim = a.size(dim) + repeat_idx = [1] * a.dim() + repeat_idx[dim] = n_tile + a = a.repeat(*(repeat_idx)) + order_index = torch.LongTensor(np.concatenate([init_dim * np.arange(n_tile) + i for i in range(init_dim)])).to(device) + return torch.index_select(a, dim, order_index) \ No newline at end of file diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/os_utils.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/os_utils.py new file mode 100644 index 000000000..038342680 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/os_utils.py @@ -0,0 +1,16 @@ +import os + + +def maybe_makedirs(path_to_create): + """This function will create a directory, unless it exists already, + at which point the function will return. + The exception handling is necessary as it prevents a race condition + from occurring. + Inputs: + path_to_create - A string path to a directory you'd like created. + """ + try: + os.makedirs(path_to_create) + except OSError: + if not os.path.isdir(path_to_create): + raise \ No newline at end of file diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/trajectory_utils.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/trajectory_utils.py new file mode 100644 index 000000000..e355822cf --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/trajectory_utils.py @@ -0,0 +1,48 @@ +import numpy as np + + +def prediction_output_to_trajectories(prediction_output_dict, + dt, + max_h, + ph, + map=None, + prune_ph_to_future=False): + + prediction_timesteps = prediction_output_dict.keys() + + output_dict = dict() + histories_dict = dict() + futures_dict = dict() + + for t in prediction_timesteps: + histories_dict[t] = dict() + output_dict[t] = dict() + futures_dict[t] = dict() + prediction_nodes = prediction_output_dict[t].keys() + for node in prediction_nodes: + predictions_output = prediction_output_dict[t][node] + position_state = {'position': ['x', 'y']} + + history = node.get(np.array([t - max_h, t]), position_state) # History includes current pos + history = history[~np.isnan(history.sum(axis=1))] + + future = node.get(np.array([t + 1, t + ph]), position_state) + future = future[~np.isnan(future.sum(axis=1))] + + if prune_ph_to_future: + predictions_output = predictions_output[:, :, :future.shape[0]] + if predictions_output.shape[2] == 0: + continue + + trajectory = predictions_output + + if map is None: + histories_dict[t][node] = history + output_dict[t][node] = trajectory + futures_dict[t][node] = future + else: + histories_dict[t][node] = map.to_map_points(history) + output_dict[t][node] = map.to_map_points(trajectory) + futures_dict[t][node] = map.to_map_points(future) + + return output_dict, histories_dict, futures_dict diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/visualization/__init__.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/visualization/__init__.py new file mode 100644 index 000000000..1f92021f3 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/visualization/__init__.py @@ -0,0 +1,2 @@ +from .visualization import visualize_prediction, visualize_distribution +from .visualization_utils import plot_boxplots \ No newline at end of file diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/visualization/visualization.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/visualization/visualization.py new file mode 100644 index 000000000..08e1fef90 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/visualization/visualization.py @@ -0,0 +1,130 @@ +from utils import prediction_output_to_trajectories +from scipy import linalg +import matplotlib.pyplot as plt +import matplotlib.patches as patches +import matplotlib.patheffects as pe +import numpy as np +import seaborn as sns + + +def plot_trajectories(ax, + prediction_dict, + histories_dict, + futures_dict, + line_alpha=0.7, + line_width=0.2, + edge_width=2, + circle_edge_width=0.5, + node_circle_size=0.3, + batch_num=0, + kde=False): + + cmap = ['k', 'b', 'y', 'g', 'r'] + + for node in histories_dict: + history = histories_dict[node] + future = futures_dict[node] + predictions = prediction_dict[node] + + if np.isnan(history[-1]).any(): + continue + + ax.plot(history[:, 0], history[:, 1], 'k--') + + for sample_num in range(prediction_dict[node].shape[1]): + + if kde and predictions.shape[1] >= 50: + line_alpha = 0.2 + for t in range(predictions.shape[2]): + sns.kdeplot(predictions[batch_num, :, t, 0], predictions[batch_num, :, t, 1], + ax=ax, shade=True, shade_lowest=False, + color=np.random.choice(cmap), alpha=0.8) + + ax.plot(predictions[batch_num, sample_num, :, 0], predictions[batch_num, sample_num, :, 1], + color=cmap[node.type.value], + linewidth=line_width, alpha=line_alpha) + + ax.plot(future[:, 0], + future[:, 1], + 'w--', + path_effects=[pe.Stroke(linewidth=edge_width, foreground='k'), pe.Normal()]) + + # Current Node Position + circle = plt.Circle((history[-1, 0], + history[-1, 1]), + node_circle_size, + facecolor='g', + edgecolor='k', + lw=circle_edge_width, + zorder=3) + ax.add_artist(circle) + + ax.axis('equal') + + +def visualize_prediction(ax, + prediction_output_dict, + dt, + max_hl, + ph, + robot_node=None, + map=None, + **kwargs): + + prediction_dict, histories_dict, futures_dict = prediction_output_to_trajectories(prediction_output_dict, + dt, + max_hl, + ph, + map=map) + + assert(len(prediction_dict.keys()) <= 1) + if len(prediction_dict.keys()) == 0: + return + ts_key = list(prediction_dict.keys())[0] + + prediction_dict = prediction_dict[ts_key] + histories_dict = histories_dict[ts_key] + futures_dict = futures_dict[ts_key] + + if map is not None: + ax.imshow(map.as_image(), origin='lower', alpha=0.5) + plot_trajectories(ax, prediction_dict, histories_dict, futures_dict, *kwargs) + + +def visualize_distribution(ax, + prediction_distribution_dict, + map=None, + pi_threshold=0.05, + **kwargs): + if map is not None: + ax.imshow(map.as_image(), origin='lower', alpha=0.5) + + for node, pred_dist in prediction_distribution_dict.items(): + if pred_dist.mus.shape[:2] != (1, 1): + return + + means = pred_dist.mus.squeeze().cpu().numpy() + covs = pred_dist.get_covariance_matrix().squeeze().cpu().numpy() + pis = pred_dist.pis_cat_dist.probs.squeeze().cpu().numpy() + + for timestep in range(means.shape[0]): + for z_val in range(means.shape[1]): + mean = means[timestep, z_val] + covar = covs[timestep, z_val] + pi = pis[timestep, z_val] + + if pi < pi_threshold: + continue + + v, w = linalg.eigh(covar) + v = 2. * np.sqrt(2.) * np.sqrt(v) + u = w[0] / linalg.norm(w[0]) + + # Plot an ellipse to show the Gaussian component + angle = np.arctan(u[1] / u[0]) + angle = 180. * angle / np.pi # convert to degrees + ell = patches.Ellipse(mean, v[0], v[1], 180. + angle, color='blue' if node.type.name == 'VEHICLE' else 'orange') + ell.set_edgecolor(None) + ell.set_clip_box(ax.bbox) + ell.set_alpha(pi/10) + ax.add_artist(ell) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/visualization/visualization_utils.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/visualization/visualization_utils.py new file mode 100644 index 000000000..8ad700ee2 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/visualization/visualization_utils.py @@ -0,0 +1,20 @@ +import numpy as np +import pandas as pd +import seaborn as sns + + +def plot_boxplots(ax, perf_dict_for_pd, x_label, y_label): + perf_df = pd.DataFrame.from_dict(perf_dict_for_pd) + our_mean_color = sns.color_palette("muted")[9] + marker_size = 7 + mean_markers = 'X' + with sns.color_palette("muted"): + sns.boxplot(x=x_label, y=y_label, data=perf_df, ax=ax, showfliers=False) + ax.plot([0], [np.mean(perf_df[y_label])], color=our_mean_color, marker=mean_markers, + markeredgecolor='#545454', markersize=marker_size, zorder=10) + + +def plot_barplots(ax, perf_dict_for_pd, x_label, y_label): + perf_df = pd.DataFrame.from_dict(perf_dict_for_pd) + with sns.color_palette("muted"): + sns.barplot(x=x_label, y=y_label, ax=ax, data=perf_df) \ No newline at end of file