diff --git a/forge/test/models/pytorch/multimodal/trajectron/test_trajectron.py b/forge/test/models/pytorch/multimodal/trajectron/test_trajectron.py new file mode 100644 index 000000000..c675387be --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/test_trajectron.py @@ -0,0 +1,307 @@ +# # SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC + +# # SPDX-License-Identifier: Apache-2.0 + +import sys + +sys.path.append("forge/test/models/pytorch/multimodal/trajectron/trajectron/") +import pytest +import forge +from test.models.pytorch.multimodal.trajectron.trajectron.model import Trajectron +from test.models.pytorch.multimodal.trajectron.trajectron.model.model_registrar import ModelRegistrar +from test.models.pytorch.multimodal.trajectron.trajectron.model.dataset import ( + EnvironmentDataset, + collate, + get_timesteps_data, +) +from forge.verify.compare import compare_with_golden +import os +import json +import dill +import torch +import torch.nn as nn +import numpy as np +from typing import Any +import torch.nn.utils.rnn as rnn +import pytest + + +def load_hyperparams(): + conf_path = "forge/test/models/pytorch/multimodal/trajectron/trajectron/config/config.json" + with open(conf_path, "r", encoding="utf-8") as conf_json: + hyperparams = json.load(conf_json) + + # Set Default values + hyperparams["scene_freq_mult_eval"] = False + hyperparams["node_freq_mult_eval"] = False + hyperparams["edge_encoding"] = False + hyperparams["incl_robot_node"] = False + hyperparams["use_map_encoding"] = False + + hyperparams["edge_addition_filter"] = [1, 1] + hyperparams["edge_removal_filter"] = [1, 1] + + return hyperparams + + +def load_env(): + eval_data_path = "forge/test/models/pytorch/multimodal/trajectron/trajectron/dataset_envs/eth_val.pkl" + with open(eval_data_path, "rb") as f: + eval_env = dill.load(f, encoding="latin1") + return eval_env + + +class TrajectronWrapper(nn.Module): + def __init__( + self, + model_dir: str, + hyperparams: dict[str, Any], + env: Any, + scene_index: int, + num_samples: int = 1, + z_mode: bool = True, + gmm_mode: bool = True, + all_z_sep: bool = False, + full_dist: bool = False, + ): + super().__init__() + + # Build Model registrar + if not os.path.exists(model_dir): + os.makedirs(model_dir, exist_ok=False) + model_config_path = model_dir + "/config.json" + if not os.path.exists(model_config_path): + with open(model_config_path, "w") as conf_json: + json.dump(hyperparams, conf_json) + model_registrar = ModelRegistrar(model_dir, "cpu") + + # Build Trajectron Model + self.model = Trajectron(model_registrar=model_registrar, hyperparams=hyperparams, log_writer=None, device="cpu") + self.model.set_environment(env=env) + + self.model_dir = model_dir + self.hyperparams = hyperparams + self.env = env + + assert len(self.env.NodeType) == 1 + self.node_type = self.env.NodeType[0] + + self.scene_index = scene_index + self.num_samples = num_samples + self.z_mode = z_mode + self.gmm_mode = gmm_mode + self.all_z_sep = all_z_sep + self.full_dist = full_dist + + def _build_packed_sequence( + self, + packed_sequence_data, + packed_sequence_batch_sizes, + packed_sequence_sorted_indices, + packed_sequence_unsorted_indices, + ): + packed_sequence = torch.nn.utils.rnn.PackedSequence( + data=packed_sequence_data.squeeze(), + batch_sizes=packed_sequence_batch_sizes.squeeze(), + sorted_indices=packed_sequence_sorted_indices.squeeze(), + unsorted_indices=packed_sequence_unsorted_indices.squeeze(), + ) + return packed_sequence + + def forward( + self, + x, + x_st_t, + packed_sequence_data, + packed_sequence_batch_sizes, + packed_sequence_sorted_indices, + packed_sequence_unsorted_indices, + first_history_index, + ): + neighbors_data_st = None + neighbors_edge_value = None + robot_traj_st_t = None + map = None + + ph = self.hyperparams["prediction_horizon"] + + packed_x_st_t = self._build_packed_sequence( + packed_sequence_data, + packed_sequence_batch_sizes, + packed_sequence_sorted_indices, + packed_sequence_unsorted_indices, + ) + + model = self.model.node_models_dict[self.node_type] + predictions = model.predict( + inputs=x, + inputs_st=x_st_t, # Pack and send this + packed_inputs_st=packed_x_st_t, + first_history_indices=first_history_index, + neighbors=neighbors_data_st, + neighbors_edge_value=neighbors_edge_value, + robot=robot_traj_st_t, + map=map, + prediction_horizon=ph, + num_samples=self.num_samples, + z_mode=self.z_mode, + gmm_mode=self.gmm_mode, + full_dist=self.full_dist, + all_z_sep=self.all_z_sep, + ) + + return predictions + + def eval(self): + super().eval() + self.model.eval() + + def get_input_batch(self, scene): + ph = self.hyperparams["prediction_horizon"] + timesteps = scene.sample_timesteps(1, min_future_timesteps=ph) + + min_future_timesteps = ph + min_history_timesteps = 1 + + node_type = self.node_type + assert node_type in self.model.pred_state + model = self.model.node_models_dict[node_type] + + # Get Input data for node type and given timesteps + batch = get_timesteps_data( + env=self.env, + scene=scene, + t=timesteps, + node_type=node_type, + state=self.model.state, + pred_state=self.model.pred_state, + edge_types=model.edge_types, + min_ht=min_history_timesteps, + max_ht=self.model.max_ht, + min_ft=min_future_timesteps, + max_ft=min_future_timesteps, + hyperparams=self.hyperparams, + ) + + assert batch is not None + + ( + ( + first_history_index, + x_t, + y_t, + x_st_t, + y_st_t, + neighbors_data_st, + neighbors_edge_value, + robot_traj_st_t, + map, + ), + nodes, + timesteps_o, + ) = batch + + device = self.model.device + x = x_t.to(device) + x_st_t = x_st_t.to(device) + if robot_traj_st_t is not None: + robot_traj_st_t = robot_traj_st_t.to(device) + + if type(map) == torch.Tensor: + map = map.to(device) + + return (x, x_st_t, first_history_index, neighbors_data_st, neighbors_edge_value, robot_traj_st_t, map), ( + nodes, + timesteps_o, + ) + + +def pack_input_sequences(sequences, lower_indices=None, upper_indices=None, total_length=None): + bs, tf = sequences.shape[:2] + if lower_indices is None: + lower_indices = torch.zeros(bs, dtype=torch.int) + if upper_indices is None: + upper_indices = torch.ones(bs, dtype=torch.int) * (tf - 1) + if total_length is None: + total_length = max(upper_indices) + 1 + # This is done so that we can just pass in self.prediction_timesteps + # (which we want to INCLUDE, so this will exclude the next timestep). + inclusive_break_indices = upper_indices + 1 + + pad_list = list() + for i, seq_len in enumerate(inclusive_break_indices): + pad_list.append(sequences[i, lower_indices[i] : seq_len]) + + packed_seqs = rnn.pack_sequence(pad_list, enforce_sorted=False) + + return packed_seqs + + +def get_packed_sequence_values(packed_sequence): + values = ( + packed_sequence.data.unsqueeze(0).unsqueeze(0), + packed_sequence.batch_sizes.unsqueeze(0), + packed_sequence.sorted_indices.unsqueeze(0), + packed_sequence.unsorted_indices.unsqueeze(0), + ) + return values + + +@pytest.mark.nightly +@pytest.mark.model_analysis +def test_trajectronpp_pytorch(): + env = load_env() + hyperparams = load_hyperparams() + model_dir = "forge/test/models/pytorch/multimodal/trajectron/trajectron/model_dir" + + # Build Pytorch Model + pt_model = TrajectronWrapper(model_dir=model_dir, hyperparams=hyperparams, env=env, scene_index=0) + pt_model.eval() + + scene = env.scenes[0] + inputs_batch = pt_model.get_input_batch(scene=scene) + + (x, x_st_t, first_history_index, neighbors_data_st, neighbors_edge_value, robot_traj_st_t, map), ( + nodes, + timesteps_o, + ) = inputs_batch + + packed_x_st_t = pack_input_sequences(x_st_t, lower_indices=first_history_index) + ( + packed_sequence_data, + packed_sequence_batch_sizes, + packed_sequence_sorted_indices, + packed_sequence_unsorted_indices, + ) = get_packed_sequence_values(packed_x_st_t) + + assert neighbors_data_st is None + assert neighbors_edge_value is None + assert robot_traj_st_t is None + assert map is None + # Run CPU Inference + output = pt_model( + x, + x_st_t, + packed_sequence_data, + packed_sequence_batch_sizes, + packed_sequence_sorted_indices, + packed_sequence_unsorted_indices, + first_history_index, + ) + inputs = [ + x, + x_st_t, + packed_sequence_data, + packed_sequence_batch_sizes, + packed_sequence_sorted_indices, + packed_sequence_unsorted_indices, + first_history_index, + ] + compiled_model = forge.compile(pt_model, inputs) + co_out = compiled_model(*inputs) + fw_out = pt_model(*inputs) + + co_out = [co.to("cpu") for co in co_out] + fw_out = [fw_out] if isinstance(fw_out, torch.Tensor) else fw_out + + assert all([compare_with_golden(golden=fo, calculated=co, pcc=0.99) for fo, co in zip(fw_out, co_out)]) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/__init__.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/__init__.py new file mode 100644 index 000000000..e7543593d --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/__init__.py @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +from model import Trajectron diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/argument_parser.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/argument_parser.py new file mode 100644 index 000000000..526e95fb3 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/argument_parser.py @@ -0,0 +1,138 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument( + "--conf", help="path to json config file for hyperparameters", type=str, default="../config/config.json" +) + +parser.add_argument("--debug", help="disable all disk writing processes.", action="store_true") + +parser.add_argument("--preprocess_workers", help="number of processes to spawn for preprocessing", type=int, default=0) + + +# Model Parameters +parser.add_argument( + "--offline_scene_graph", + help="whether to precompute the scene graphs offline, options are 'no' and 'yes'", + type=str, + default="yes", +) + +parser.add_argument( + "--dynamic_edges", help="whether to use dynamic edges or not, options are 'no' and 'yes'", type=str, default="yes" +) + +parser.add_argument( + "--edge_state_combine_method", + help="the method to use for combining edges of the same type", + type=str, + default="sum", +) + +parser.add_argument( + "--edge_influence_combine_method", + help="the method to use for combining edge influences", + type=str, + default="attention", +) + +parser.add_argument( + "--edge_addition_filter", + nargs="+", + help="what scaling to use for edges as they're created", + type=float, + default=[0.25, 0.5, 0.75, 1.0], +) # We don't automatically pad left with 0.0, if you want a sharp +# and short edge addition, then you need to have a 0.0 at the +# beginning, e.g. [0.0, 1.0]. + +parser.add_argument( + "--edge_removal_filter", + nargs="+", + help="what scaling to use for edges as they're removed", + type=float, + default=[1.0, 0.0], +) # We don't automatically pad right with 0.0, if you want a sharp drop off like +# the default, then you need to have a 0.0 at the end. + +parser.add_argument( + "--override_attention_radius", + action="append", + help='Specify one attention radius to override. E.g. "PEDESTRIAN VEHICLE 10.0"', + default=[], +) + +parser.add_argument( + "--incl_robot_node", + help="whether to include a robot node in the graph or simply model all agents", + action="store_true", +) + +parser.add_argument("--map_encoding", help="Whether to use map encoding or not", action="store_true") + +parser.add_argument("--augment", help="Whether to augment the scene during training", action="store_true") + +parser.add_argument( + "--node_freq_mult_train", help="Whether to use frequency multiplying of nodes during training", action="store_true" +) + +parser.add_argument( + "--node_freq_mult_eval", help="Whether to use frequency multiplying of nodes during evaluation", action="store_true" +) + +parser.add_argument( + "--scene_freq_mult_train", help="Whether to use frequency multiplying of nodes during training", action="store_true" +) + +parser.add_argument( + "--scene_freq_mult_eval", + help="Whether to use frequency multiplying of nodes during evaluation", + action="store_true", +) + +parser.add_argument( + "--scene_freq_mult_viz", help="Whether to use frequency multiplying of nodes during evaluation", action="store_true" +) + +parser.add_argument("--no_edge_encoding", help="Whether to use neighbors edge encoding", action="store_true") + +# Data Parameters +parser.add_argument("--data_dir", help="what dir to look in for data", type=str, default="../experiments/processed") + +parser.add_argument("--train_data_dict", help="what file to load for training data", type=str, default="train.pkl") + +parser.add_argument("--eval_data_dict", help="what file to load for evaluation data", type=str, default="val.pkl") + +parser.add_argument( + "--log_dir", + help="what dir to save training information (i.e., saved models, logs, etc)", + type=str, + default="../experiments/logs", +) + +parser.add_argument("--log_tag", help="tag for the log folder", type=str, default="") + +parser.add_argument("--device", help="what device to perform training on", type=str, default="cuda:0") + +parser.add_argument("--eval_device", help="what device to use during evaluation", type=str, default=None) + +# Training Parameters +parser.add_argument("--train_epochs", help="number of iterations to train for", type=int, default=1) + +parser.add_argument("--batch_size", help="training batch size", type=int, default=256) + +parser.add_argument("--eval_batch_size", help="evaluation batch size", type=int, default=256) + +parser.add_argument("--k_eval", help="how many samples to take during evaluation", type=int, default=25) + +parser.add_argument("--seed", help="manual seed to use, default is 123", type=int, default=123) + +parser.add_argument("--eval_every", help="how often to evaluate during training, never if None", type=int, default=1) + +parser.add_argument("--vis_every", help="how often to visualize during training, never if None", type=int, default=1) + +parser.add_argument("--save_every", help="how often to save during training, never if None", type=int, default=1) +args = parser.parse_args() diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/config/config.json b/forge/test/models/pytorch/multimodal/trajectron/trajectron/config/config.json new file mode 100644 index 000000000..fca815729 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/config/config.json @@ -0,0 +1,90 @@ +{ + + "batch_size": 256, + "grad_clip": 1.0, + + "learning_rate_style": "exp", + "learning_rate": 0.001, + "min_learning_rate": 0.00001, + "learning_decay_rate": 0.9999, + + "prediction_horizon": 12, + "minimum_history_length": 1, + "maximum_history_length": 8, + + "map_encoder": { + "PEDESTRIAN": { + "heading_state_index": 5, + "patch_size": [50, 10, 50, 90], + "map_channels": 3, + "hidden_channels": [10, 20, 10, 1], + "output_size": 32, + "masks": [5, 5, 5, 5], + "strides": [1, 1, 1, 1], + "dropout": 0.5 + } + }, + + "k": 1, + "k_eval": 1, + + "kl_min": 0.07, + "kl_weight": 100.0, + "kl_weight_start": 0, + "kl_decay_rate": 0.99995, + "kl_crossover": 400, + "kl_sigmoid_divisor": 4, + + "rnn_kwargs": { + "dropout_keep_prob": 0.75 + }, + "MLP_dropout_keep_prob": 0.9, + "enc_rnn_dim_edge": 32, + "enc_rnn_dim_edge_influence": 32, + "enc_rnn_dim_history": 32, + "enc_rnn_dim_future": 32, + "dec_rnn_dim": 128, + + "q_z_xy_MLP_dims": null, + "p_z_x_MLP_dims": 32, + "GMM_components": 1, + + "log_p_yt_xz_max": 6, + + "N": 1, + "K": 25, + + "tau_init": 2.0, + "tau_final": 0.05, + "tau_decay_rate": 0.997, + + "use_z_logit_clipping": true, + "z_logit_clip_start": 0.05, + "z_logit_clip_final": 5.0, + "z_logit_clip_crossover": 300, + "z_logit_clip_divisor": 5, + + "dynamic": { + "PEDESTRIAN": { + "name": "SingleIntegrator", + "distribution": true, + "limits": {} + } + }, + + "state": { + "PEDESTRIAN": { + "position": ["x", "y"], + "velocity": ["x", "y"], + "acceleration": ["x", "y"] + } + }, + + "pred_state": { + "PEDESTRIAN": { + "position": ["x", "y"] + } + }, + + "log_histograms": false +} diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/config/nuScenes.json b/forge/test/models/pytorch/multimodal/trajectron/trajectron/config/nuScenes.json new file mode 100644 index 000000000..acebf8e1e --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/config/nuScenes.json @@ -0,0 +1,109 @@ +{ + + "batch_size": 256, + "grad_clip": 1.0, + + "learning_rate_style": "exp", + "learning_rate": 0.003, + "min_learning_rate": 0.00001, + "learning_decay_rate": 0.9999, + + "prediction_horizon": 6, + "minimum_history_length": 1, + "maximum_history_length": 8, + + "map_encoder": { + "VEHICLE": { + "heading_state_index": 6, + "patch_size": [50, 10, 50, 90], + "map_channels": 3, + "hidden_channels": [10, 20, 10, 1], + "output_size": 32, + "masks": [5, 5, 5, 3], + "strides": [2, 2, 1, 1], + "dropout": 0.5 + } + }, + + "k": 1, + "k_eval": 1, + + "kl_min": 0.07, + "kl_weight": 100.0, + "kl_weight_start": 0, + "kl_decay_rate": 0.99995, + "kl_crossover": 400, + "kl_sigmoid_divisor": 4, + + "rnn_kwargs": { + "dropout_keep_prob": 0.75 + }, + "MLP_dropout_keep_prob": 0.9, + "enc_rnn_dim_edge": 32, + "enc_rnn_dim_edge_influence": 32, + "enc_rnn_dim_history": 32, + "enc_rnn_dim_future": 32, + "dec_rnn_dim": 128, + + "q_z_xy_MLP_dims": null, + "p_z_x_MLP_dims": 32, + "GMM_components": 1, + + "log_p_yt_xz_max": 6, + + "N": 1, + "K": 25, + + "tau_init": 2.0, + "tau_final": 0.05, + "tau_decay_rate": 0.997, + + "use_z_logit_clipping": true, + "z_logit_clip_start": 0.05, + "z_logit_clip_final": 5.0, + "z_logit_clip_crossover": 300, + "z_logit_clip_divisor": 5, + + "dynamic": { + "PEDESTRIAN": { + "name": "SingleIntegrator", + "distribution": true, + "limits": {} + }, + "VEHICLE": { + "name": "Unicycle", + "distribution": true, + "limits": { + "max_a": 4, + "min_a": -5, + "max_heading_change": 0.7, + "min_heading_change": -0.7 + } + } + }, + + "state": { + "PEDESTRIAN": { + "position": ["x", "y"], + "velocity": ["x", "y"], + "acceleration": ["x", "y"] + }, + "VEHICLE": { + "position": ["x", "y"], + "velocity": ["x", "y"], + "acceleration": ["x", "y"], + "heading": ["°", "d°"] + } + }, + + "pred_state": { + "VEHICLE": { + "position": ["x", "y"] + }, + "PEDESTRIAN": { + "position": ["x", "y"] + } + }, + + "log_histograms": false +} diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/dataset_envs/eth_val.pkl b/forge/test/models/pytorch/multimodal/trajectron/trajectron/dataset_envs/eth_val.pkl new file mode 100644 index 000000000..8afc0d7c0 Binary files /dev/null and b/forge/test/models/pytorch/multimodal/trajectron/trajectron/dataset_envs/eth_val.pkl differ diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/__init__.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/__init__.py new file mode 100644 index 000000000..422f33fc3 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/__init__.py @@ -0,0 +1,11 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +from .data_structures import RingBuffer, SingleHeaderNumpyArray, DoubleHeaderNumpyArray +from .scene import Scene +from .node import Node +from .scene_graph import TemporalSceneGraph, SceneGraph +from .environment import Environment +from .node_type import NodeTypeEnum +from .data_utils import derivative_of +from .map import GeometricMap diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/data_structures.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/data_structures.py new file mode 100644 index 000000000..20e50e83e --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/data_structures.py @@ -0,0 +1,282 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import numpy as np +import pandas as pd +from collections.abc import Sequence +from collections import OrderedDict + + +class RingBuffer(Sequence): + def __init__(self, capacity, dtype=float, allow_overwrite=True): + """ + Create a new ring buffer with the given capacity and element type. + Code copy-pasted from: https://github.com/eric-wieser/numpy_ringbuffer + + Parameters + ---------- + capacity: int + The maximum capacity of the ring buffer + dtype: data-type, optional + Desired type of buffer elements. Use a type like (float, 2) to + produce a buffer with shape (N, 2) + allow_overwrite: bool + If false, throw an IndexError when trying to append to an already + full buffer + """ + self._arr = np.full(capacity, np.nan, dtype) + self._left_index = 0 + self._right_index = 0 + self._capacity = capacity + self._allow_overwrite = allow_overwrite + + def _unwrap(self): + """Copy the data from this buffer into unwrapped form""" + return np.concatenate( + ( + self._arr[self._left_index : min(self._right_index, self._capacity)], + self._arr[: max(self._right_index - self._capacity, 0)], + ) + ) + + def _fix_indices(self): + """ + Enforce our invariant that 0 <= self._left_index < self._capacity + """ + if self._left_index >= self._capacity: + self._left_index -= self._capacity + self._right_index -= self._capacity + elif self._left_index < 0: + self._left_index += self._capacity + self._right_index += self._capacity + + @property + def is_full(self): + """True if there is no more space in the buffer""" + return len(self) == self._capacity + + # numpy compatibility + def __array__(self): + return self._unwrap() + + @property + def dtype(self): + return self._arr.dtype + + @property + def shape(self): + return (len(self),) + self._arr.shape[1:] + + # these mirror methods from deque + @property + def maxlen(self): + return self._capacity + + def append(self, value): + if self.is_full: + if not self._allow_overwrite: + raise IndexError("append to a full RingBuffer with overwrite disabled") + elif not len(self): + return + else: + self._left_index += 1 + + self._arr[self._right_index % self._capacity] = value + self._right_index += 1 + self._fix_indices() + + def appendleft(self, value): + if self.is_full: + if not self._allow_overwrite: + raise IndexError("append to a full RingBuffer with overwrite disabled") + elif not len(self): + return + else: + self._right_index -= 1 + + self._left_index -= 1 + self._fix_indices() + self._arr[self._left_index] = value + + def pop(self): + if len(self) == 0: + raise IndexError("pop from an empty RingBuffer") + self._right_index -= 1 + self._fix_indices() + res = self._arr[self._right_index % self._capacity] + return res + + def popleft(self): + if len(self) == 0: + raise IndexError("pop from an empty RingBuffer") + res = self._arr[self._left_index] + self._left_index += 1 + self._fix_indices() + return res + + def extend(self, values): + lv = len(values) + if len(self) + lv > self._capacity: + if not self._allow_overwrite: + raise IndexError("extend a RingBuffer such that it would overflow, with overwrite disabled") + elif not len(self): + return + if lv >= self._capacity: + # wipe the entire array! - this may not be threadsafe + self._arr[...] = values[-self._capacity :] + self._right_index = self._capacity + self._left_index = 0 + return + + ri = self._right_index % self._capacity + sl1 = np.s_[ri : min(ri + lv, self._capacity)] + sl2 = np.s_[: max(ri + lv - self._capacity, 0)] + self._arr[sl1] = values[: sl1.stop - sl1.start] + self._arr[sl2] = values[sl1.stop - sl1.start :] + self._right_index += lv + + self._left_index = max(self._left_index, self._right_index - self._capacity) + self._fix_indices() + + def extendleft(self, values): + lv = len(values) + if len(self) + lv > self._capacity: + if not self._allow_overwrite: + raise IndexError("extend a RingBuffer such that it would overflow, with overwrite disabled") + elif not len(self): + return + if lv >= self._capacity: + # wipe the entire array! - this may not be threadsafe + self._arr[...] = values[: self._capacity] + self._right_index = self._capacity + self._left_index = 0 + return + + self._left_index -= lv + self._fix_indices() + li = self._left_index + sl1 = np.s_[li : min(li + lv, self._capacity)] + sl2 = np.s_[: max(li + lv - self._capacity, 0)] + self._arr[sl1] = values[: sl1.stop - sl1.start] + self._arr[sl2] = values[sl1.stop - sl1.start :] + + self._right_index = min(self._right_index, self._left_index + self._capacity) + + # implement Sequence methods + def __len__(self): + return self._right_index - self._left_index + + def __getitem__(self, item): + # handle simple (b[1]) and basic (b[np.array([1, 2, 3])]) fancy indexing specially + if not isinstance(item, tuple): + item_arr = np.asarray(item) + if issubclass(item_arr.dtype.type, np.integer): + item_arr = (item_arr + self._left_index) % self._capacity + return self._arr[item_arr] + + # for everything else, get it right at the expense of efficiency + return self._unwrap()[item] + + def __iter__(self): + # alarmingly, this is comparable in speed to using itertools.chain + return iter(self._unwrap()) + + # Everything else + def __repr__(self): + return "".format(np.asarray(self)) + + +class DoubleHeaderNumpyArray(object): + def __init__(self, data: np.ndarray, header: list): + """ + Data Structure mirroring some functionality of double indexed pandas DataFrames. + Indexing options are: + [:, (header1, header2)] + [:, [(header1, header2), (header1, header2)]] + [:, {header1: [header21, header22]}] + + A SingleHeaderNumpyArray can is returned if an element of the first header is querried as attribut: + doubleHeaderNumpyArray.position -> SingleHeaderNumpyArray + + :param data: The numpy array. + :param header: The double header structure as list of tuples [(header11, header21), (header11, header22) ...] + """ + self.data = data + self.header = header + self.double_header_lookup = OrderedDict() + self.tree_header_lookup = OrderedDict() + for i, header_item in enumerate(header): + self.double_header_lookup[header_item] = i + if header_item[0] not in self.tree_header_lookup: + self.tree_header_lookup[header_item[0]] = dict() + self.tree_header_lookup[header_item[0]][header_item[1]] = i + + def __mul__(self, other): + return DoubleHeaderNumpyArray(self.data * other, self.header) + + def get_single_header_array(self, h1: str, rows=slice(None, None, None)): + data_integer_indices = list() + h2_list = list() + for h2 in self.tree_header_lookup[h1]: + data_integer_indices.append(self.tree_header_lookup[h1][h2]) + h2_list.append(h2) + return SingleHeaderNumpyArray(self.data[rows, data_integer_indices], h2_list) + + def __getitem__(self, item): + rows, columns = item + data_integer_indices = list() + if type(columns) is dict: + for h1, h2s in columns.items(): + for h2 in h2s: + data_integer_indices.append(self.double_header_lookup[(h1, h2)]) + return self.data[rows, data_integer_indices] + elif type(columns) is list: + for column in columns: + assert type(column) is tuple, "If Index is list it hast to be list of double header tuples." + data_integer_indices.append(self.double_header_lookup[column]) + return self.data[rows, data_integer_indices] + elif type(columns) is tuple: + return self.data[rows, self.double_header_lookup[columns]] + else: + assert type(item) is str, "Index must be str, list of tuples or dict of tree structure." + return self.get_single_header_array(item, rows=rows) + + def __getattr__(self, item): + if not item.startswith("_"): + if item in self.tree_header_lookup.keys(): + return self.get_single_header_array(item) + else: + try: + return self.data.__getattribute__(item) + except AttributeError: + return super().__getattribute__(item) + else: + return super().__getattribute__(item) + + +class SingleHeaderNumpyArray(object): + def __init__(self, data: np.ndarray, header: list): + self.data = data + self.header_lookup = OrderedDict({h: i for i, h in enumerate(header)}) + + def __getitem__(self, item): + rows, columns = item + data_integer_indices = list() + if type(columns) is list or type(columns) is tuple: + for column in columns: + data_integer_indices.append(self.header_lookup[column]) + else: + data_integer_indices = self.header_lookup[columns] + return self.data[rows, data_integer_indices] + + def __getattr__(self, item): + if not item.startswith("_"): + if item in self.header_lookup.keys(): + return self[:, item] + else: + try: + return self.data.__getattribute__(item) + except AttributeError: + return super().__getattribute__(item) + else: + return super().__getattribute__(item) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/data_utils.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/data_utils.py new file mode 100644 index 000000000..f8c5d1ff9 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/data_utils.py @@ -0,0 +1,35 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import numpy as np + + +def make_continuous_copy(alpha): + alpha = (alpha + np.pi) % (2.0 * np.pi) - np.pi + continuous_x = np.zeros_like(alpha) + continuous_x[0] = alpha[0] + for i in range(1, len(alpha)): + if not (np.sign(alpha[i]) == np.sign(alpha[i - 1])) and np.abs(alpha[i]) > np.pi / 2: + continuous_x[i] = ( + continuous_x[i - 1] + (alpha[i] - alpha[i - 1]) - np.sign((alpha[i] - alpha[i - 1])) * 2 * np.pi + ) + else: + continuous_x[i] = continuous_x[i - 1] + (alpha[i] - alpha[i - 1]) + + return continuous_x + + +def derivative_of(x, dt=1, radian=False): + if radian: + x = make_continuous_copy(x) + + not_nan_mask = ~np.isnan(x) + masked_x = x[not_nan_mask] + + if masked_x.shape[-1] < 2: + return np.zeros_like(x) + + dx = np.full_like(x, np.nan) + dx[not_nan_mask] = np.ediff1d(masked_x, to_begin=(masked_x[1] - masked_x[0])) / dt + + return dx diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/environment.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/environment.py new file mode 100644 index 000000000..48bf80d4f --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/environment.py @@ -0,0 +1,66 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import json +import numpy as np +from itertools import product +from .node_type import NodeTypeEnum + + +class Environment(object): + def __init__(self, node_type_list, standardization, scenes=None, attention_radius=None, robot_type=None): + self.scenes = scenes + self.node_type_list = node_type_list + self.attention_radius = attention_radius + self.NodeType = NodeTypeEnum(node_type_list) + self.robot_type = robot_type + + self.standardization = standardization + self.standardize_param_memo = dict() + + self._scenes_resample_prop = None + + def get_edge_types(self): + return list(product(self.NodeType, repeat=2)) + + def get_standardize_params(self, state, node_type): + memo_key = (json.dumps(state), node_type) + if memo_key in self.standardize_param_memo: + return self.standardize_param_memo[memo_key] + + standardize_mean_list = list() + standardize_std_list = list() + for entity, dims in state.items(): + for dim in dims: + standardize_mean_list.append(self.standardization[node_type][entity][dim]["mean"]) + standardize_std_list.append(self.standardization[node_type][entity][dim]["std"]) + standardize_mean = np.stack(standardize_mean_list) + standardize_std = np.stack(standardize_std_list) + + self.standardize_param_memo[memo_key] = (standardize_mean, standardize_std) + return standardize_mean, standardize_std + + def standardize(self, array, state, node_type, mean=None, std=None): + if mean is None and std is None: + mean, std = self.get_standardize_params(state, node_type) + elif mean is None and std is not None: + mean, _ = self.get_standardize_params(state, node_type) + elif mean is not None and std is None: + _, std = self.get_standardize_params(state, node_type) + return np.where(np.isnan(array), np.array(np.nan), (array - mean) / std) + + def unstandardize(self, array, state, node_type, mean=None, std=None): + if mean is None and std is None: + mean, std = self.get_standardize_params(state, node_type) + elif mean is None and std is not None: + mean, _ = self.get_standardize_params(state, node_type) + elif mean is not None and std is None: + _, std = self.get_standardize_params(state, node_type) + return array * std + mean + + @property + def scenes_resample_prop(self): + if self._scenes_resample_prop is None: + self._scenes_resample_prop = np.array([scene.resample_prob for scene in self.scenes]) + self._scenes_resample_prop = self._scenes_resample_prop / np.sum(self._scenes_resample_prop) + return self._scenes_resample_prop diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/map.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/map.py new file mode 100644 index 000000000..47cbb84ca --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/map.py @@ -0,0 +1,201 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import torch +import numpy as np +from model.dataset.homography_warper import get_rotation_matrix2d, warp_affine_crop + + +class Map(object): + def __init__(self, data, homography, description=None): + self.data = data + self.homography = homography + self.description = description + + def as_image(self): + raise NotImplementedError + + def get_cropped_maps(self, world_pts, patch_size, rotation=None, device="cpu"): + raise NotImplementedError + + def to_map_points(self, scene_pts): + raise NotImplementedError + + +class GeometricMap(Map): + """ + A Geometric Map is a int tensor of shape [layers, x, y]. The homography must transform a point in scene + coordinates to the respective point in map coordinates. + + :param data: Numpy array of shape [layers, x, y] + :param homography: Numpy array of shape [3, 3] + """ + + def __init__(self, data, homography, description=None): + # assert isinstance(data.dtype, np.floating), "Geometric Maps must be float values." + super(GeometricMap, self).__init__(data, homography, description=description) + + self._last_padding = None + self._last_padded_map = None + self._torch_map = None + + def torch_map(self, device): + if self._torch_map is not None: + return self._torch_map + self._torch_map = torch.tensor(self.data, dtype=torch.uint8, device=device) + return self._torch_map + + def as_image(self): + # We have to transpose x and y to rows and columns. Assumes origin is lower left for image + # Also we move the channels to the last dimension + return (np.transpose(self.data, (2, 1, 0))).astype(np.uint) + + def get_padded_map(self, padding_x, padding_y, device): + if self._last_padding == (padding_x, padding_y): + return self._last_padded_map + else: + self._last_padding = (padding_x, padding_y) + self._last_padded_map = torch.full( + (self.data.shape[0], self.data.shape[1] + 2 * padding_x, self.data.shape[2] + 2 * padding_y), + False, + dtype=torch.uint8, + ) + self._last_padded_map[..., padding_x:-padding_x, padding_y:-padding_y] = self.torch_map(device) + return self._last_padded_map + + @staticmethod + def batch_rotate(map_batched, centers, angles, out_height, out_width): + """ + As the input is a map and the warp_affine works on an image coordinate system we would have to + flip the y axis updown, negate the angles, and flip it back after transformation. + This, however, is the same as not flipping at and not negating the radian. + + :param map_batched: + :param centers: + :param angles: + :param out_height: + :param out_width: + :return: + """ + M = get_rotation_matrix2d(centers, angles, torch.ones_like(angles)) + rotated_map_batched = warp_affine_crop( + map_batched, centers, M, dsize=(out_height, out_width), padding_mode="zeros" + ) + + return rotated_map_batched + + @classmethod + def get_cropped_maps_from_scene_map_batch(cls, maps, scene_pts, patch_size, rotation=None, device="cpu"): + """ + Returns rotated patches of each map around the transformed scene points. + ___________________ + | | | + | |ps[3] | + | | | + | | | + | o|__________| + | | ps[2] | + | | | + |_______|__________| + ps = patch_size + + :param maps: List of GeometricMap objects [bs] + :param scene_pts: Scene points: [bs, 2] + :param patch_size: Extracted Patch size after rotation: [-x, -y, +x, +y] + :param rotation: Rotations in degrees: [bs] + :param device: Device on which the rotated tensors should be returned. + :return: Rotated and cropped tensor patches. + """ + batch_size = scene_pts.shape[0] + lat_size = 2 * np.max((patch_size[0], patch_size[2])) + long_size = 2 * np.max((patch_size[1], patch_size[3])) + assert lat_size % 2 == 0, "Patch width must be divisible by 2" + assert long_size % 2 == 0, "Patch length must be divisible by 2" + lat_size_half = lat_size // 2 + long_size_half = long_size // 2 + + context_padding_x = int(np.ceil(np.sqrt(2) * lat_size)) + context_padding_y = int(np.ceil(np.sqrt(2) * long_size)) + + centers = torch.tensor( + [s_map.to_map_points(scene_pts[np.newaxis, i]) for i, s_map in enumerate(maps)], + dtype=torch.long, + device=device, + ).squeeze(dim=1) + torch.tensor([context_padding_x, context_padding_y], device=device, dtype=torch.long) + + padded_map = [s_map.get_padded_map(context_padding_x, context_padding_y, device=device) for s_map in maps] + + padded_map_batched = torch.stack( + [ + padded_map[i][ + ..., + centers[i, 0] - context_padding_x : centers[i, 0] + context_padding_x, + centers[i, 1] - context_padding_y : centers[i, 1] + context_padding_y, + ] + for i in range(centers.shape[0]) + ], + dim=0, + ) + + center_patches = torch.tensor([[context_padding_y, context_padding_x]], dtype=torch.int, device=device).repeat( + batch_size, 1 + ) + + if rotation is not None: + angles = torch.Tensor(rotation) + else: + angles = torch.zeros(batch_size) + + rotated_map_batched = cls.batch_rotate( + padded_map_batched / 255.0, center_patches.float(), angles, long_size, lat_size + ) + + del padded_map_batched + + return rotated_map_batched[ + ..., + long_size_half - patch_size[1] : (long_size_half + patch_size[3]), + lat_size_half - patch_size[0] : (lat_size_half + patch_size[2]), + ] + + def get_cropped_maps(self, scene_pts, patch_size, rotation=None, device="cpu"): + """ + Returns rotated patches of the map around the transformed scene points. + ___________________ + | | | + | |ps[3] | + | | | + | | | + | o|__________| + | | ps[2] | + | | | + |_______|__________| + ps = patch_size + + :param scene_pts: Scene points: [bs, 2] + :param patch_size: Extracted Patch size after rotation: [-lat, -long, +lat, +long] + :param rotation: Rotations in degrees: [bs] + :param device: Device on which the rotated tensors should be returned. + :return: Rotated and cropped tensor patches. + """ + return self.get_cropped_maps_from_scene_map_batch( + [self] * scene_pts.shape[0], scene_pts, patch_size, rotation=rotation, device=device + ) + + def to_map_points(self, scene_pts): + org_shape = None + if len(scene_pts.shape) > 2: + org_shape = scene_pts.shape + scene_pts = scene_pts.reshape((-1, 2)) + N, dims = scene_pts.shape + points_with_one = np.ones((dims + 1, N)) + points_with_one[:dims] = scene_pts.T + map_points = (self.homography @ points_with_one).T[..., :dims] + if org_shape is not None: + map_points = map_points.reshape(org_shape) + return map_points + + +class ImageMap(Map): # TODO Implement for image maps -> watch flipped coordinate system + def __init__(self): + raise NotImplementedError diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/node.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/node.py new file mode 100644 index 000000000..b27b3ce17 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/node.py @@ -0,0 +1,256 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import random +import numpy as np +import pandas as pd +from .data_structures import DoubleHeaderNumpyArray + +# from ncls import NCLS + + +class Node(object): + def __init__( + self, + node_type, + node_id, + data, + length=None, + width=None, + height=None, + first_timestep=0, + is_robot=False, + description="", + frequency_multiplier=1, + non_aug_node=None, + ): + self.type = node_type + self.id = node_id + self.length = length + self.width = width + self.height = height + self.first_timestep = first_timestep + self.non_aug_node = non_aug_node + + if data is not None: + if isinstance(data, pd.DataFrame): + self.data = DoubleHeaderNumpyArray(data.values, list(data.columns)) + elif isinstance(data, DoubleHeaderNumpyArray): + self.data = data + else: + self.data = None + + self.is_robot = is_robot + self._last_timestep = None + self.description = description + self.frequency_multiplier = frequency_multiplier + + self.forward_in_time_on_next_override = False + + def __eq__(self, other): + return ( + (isinstance(other, self.__class__) or isinstance(self, other.__class__)) + and self.id == other.id + and self.type == other.type + ) + + def __ne__(self, other): + return not self.__eq__(other) + + def __hash__(self): + return hash((self.type, self.id)) + + def __repr__(self): + return "/".join([self.type.name, self.id]) + + def overwrite_data(self, data, header, forward_in_time_on_next_overwrite=False): + """ + This function hard overwrites the data matrix. When using it you have to make sure that the columns + in the new data matrix correspond to the old structure. As well as setting first_timestep. + + :param data: New data matrix + :param forward_in_time_on_next_overwrite: On the !!NEXT!! call of overwrite_data first_timestep will be increased. + :return: None + """ + if header is None: + self.data.data = data + else: + self.data = DoubleHeaderNumpyArray(data, header) + + self._last_timestep = None + if self.forward_in_time_on_next_override: + self.first_timestep += 1 + self.forward_in_time_on_next_override = forward_in_time_on_next_overwrite + + def scene_ts_to_node_ts(self, scene_ts) -> (np.ndarray, int, int): + """ + Transforms timestamp from scene into timeframe of node data. + + :param scene_ts: Scene timesteps + :return: ts: Transformed timesteps, paddingl: Number of timesteps in scene range which are not available in + node data before data is available. paddingu: Number of timesteps in scene range which are not + available in node data after data is available. + """ + paddingl = (self.first_timestep - scene_ts[0]).clip(0) + paddingu = (scene_ts[1] - self.last_timestep).clip(0) + ts = np.array(scene_ts).clip(min=self.first_timestep, max=self.last_timestep) - self.first_timestep + return ts, paddingl, paddingu + + def history_points_at(self, ts) -> int: + """ + Number of history points in trajectory. Timestep is exclusive. + + :param ts: Scene timestep where the number of history points are queried. + :return: Number of history timesteps. + """ + return ts - self.first_timestep + + def get(self, tr_scene, state, padding=np.nan) -> np.ndarray: + """ + Returns a time range of multiple properties of the node. + + :param tr_scene: The timestep range (inklusive). + :param state: The state description for which the properties are returned. + :param padding: The value which should be used for padding if not enough information is available. + :return: Array of node property values. + """ + if tr_scene.size == 1: + tr_scene = np.array([tr_scene[0], tr_scene[0]]) + length = tr_scene[1] - tr_scene[0] + 1 # tr is inclusive + tr, paddingl, paddingu = self.scene_ts_to_node_ts(tr_scene) + data_array = self.data[tr[0] : tr[1] + 1, state] + padded_data_array = np.full((length, data_array.shape[1]), fill_value=padding) + padded_data_array[paddingl : length - paddingu] = data_array + return padded_data_array + + @property + def timesteps(self) -> int: + """ + Number of available timesteps for node. + + :return: Number of available timesteps. + """ + return self.data.shape[0] + + @property + def last_timestep(self) -> int: + """ + Nodes last timestep in the Scene. + + :return: Nodes last timestep. + """ + if self._last_timestep is None: + self._last_timestep = self.first_timestep + self.timesteps - 1 + return self._last_timestep + + +class MultiNode(Node): + def __init__(self, node_type, node_id, nodes_list, is_robot=False): + super(MultiNode, self).__init__(node_type, node_id, data=None, is_robot=is_robot) + self.nodes_list = nodes_list + for node in self.nodes_list: + node.is_robot = is_robot + + self.first_timestep = min(node.first_timestep for node in self.nodes_list) + self._last_timestep = max(node.last_timestep for node in self.nodes_list) + + starts = np.array([node.first_timestep for node in self.nodes_list], dtype=np.int64) + ends = np.array([node.last_timestep for node in self.nodes_list], dtype=np.int64) + ids = np.arange(len(self.nodes_list), dtype=np.int64) + self.interval_tree = NCLS(starts, ends, ids) + + @staticmethod + def find_non_overlapping_nodes(nodes_list, min_timesteps=1) -> list: + """ + Greedily finds a set of non-overlapping nodes in the provided scene. + + :return: A list of non-overlapping nodes. + """ + non_overlapping_nodes = list() + nodes = sorted(nodes_list, key=lambda n: n.last_timestep) + current_time = 0 + for node in nodes: + if node.first_timestep >= current_time and node.timesteps >= min_timesteps: + # Include the node + non_overlapping_nodes.append(node) + current_time = node.last_timestep + + return non_overlapping_nodes + + def get_node_at_timesteps(self, scene_ts) -> Node: + possible_node_ranges = list(self.interval_tree.find_overlap(scene_ts[0], scene_ts[1] + 1)) + if not possible_node_ranges: + return Node( + node_type=self.type, node_id="EMPTY", data=self.nodes_list[0].data * np.nan, is_robot=self.is_robot + ) + + node_idx = random.choice(possible_node_ranges)[2] + return self.nodes_list[node_idx] + + def scene_ts_to_node_ts(self, scene_ts) -> (Node, np.ndarray, int, int): + """ + Transforms timestamp from scene into timeframe of node data. + + :param scene_ts: Scene timesteps + :return: ts: Transformed timesteps, paddingl: Number of timesteps in scene range which are not available in + node data before data is available. paddingu: Number of timesteps in scene range which are not + available in node data after data is available. + """ + possible_node_ranges = list(self.interval_tree.find_overlap(scene_ts[0], scene_ts[1] + 1)) + if not possible_node_ranges: + return None, None, None, None + + node_idx = random.choice(possible_node_ranges)[2] + node = self.nodes_list[node_idx] + + paddingl = (node.first_timestep - scene_ts[0]).clip(0) + paddingu = (scene_ts[1] - node.last_timestep).clip(0) + ts = np.array(scene_ts).clip(min=node.first_timestep, max=node.last_timestep) - node.first_timestep + return node, ts, paddingl, paddingu + + def get(self, tr_scene, state, padding=np.nan) -> np.ndarray: + if tr_scene.size == 1: + tr_scene = np.array([tr_scene, tr_scene]) + length = tr_scene[1] - tr_scene[0] + 1 # tr is inclusive + + node, tr, paddingl, paddingu = self.scene_ts_to_node_ts(tr_scene) + if node is None: + state_length = sum([len(entity_dims) for entity_dims in state.values()]) + return np.full((length, state_length), fill_value=padding) + + data_array = node.data[tr[0] : tr[1] + 1, state] + padded_data_array = np.full((length, data_array.shape[1]), fill_value=padding) + padded_data_array[paddingl : length - paddingu] = data_array + return padded_data_array + + def get_all(self, tr_scene, state, padding=np.nan) -> np.ndarray: + # Assumption here is that the user is asking for all of the data in this MultiNode and to return it within a + # full scene-sized output array. + assert tr_scene.size == 2 and tr_scene[0] == 0 and self.last_timestep <= tr_scene[1] + length = tr_scene[1] - tr_scene[0] + 1 # tr is inclusive + state_length = sum([len(entity_dims) for entity_dims in state.values()]) + padded_data_array = np.full((length, state_length), fill_value=padding) + for node in self.nodes_list: + padded_data_array[node.first_timestep : node.last_timestep + 1] = node.data[:, state] + + return padded_data_array + + def history_points_at(self, ts) -> int: + """ + Number of history points in trajectory. Timestep is exclusive. + + :param ts: Scene timestep where the number of history points are queried. + :return: Number of history timesteps. + """ + node_idx = next(self.interval_tree.find_overlap(ts, ts + 1))[2] + node = self.nodes_list[node_idx] + return ts - node.first_timestep + + @property + def timesteps(self) -> int: + """ + Number of available timesteps for node. + + :return: Number of available timesteps. + """ + return self._last_timestep - self.first_timestep + 1 diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/node_type.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/node_type.py new file mode 100644 index 000000000..1513e487a --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/node_type.py @@ -0,0 +1,38 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +class NodeType(object): + def __init__(self, name, value): + self.name = name + self.value = value + + def __repr__(self): + return self.name + + def __eq__(self, other): + if type(other) == str and self.name == other: + return True + else: + return isinstance(other, self.__class__) and self.name == other.name + + def __ne__(self, other): + return not self.__eq__(other) + + def __hash__(self): + return hash(self.name) + + def __add__(self, other): + return self.name + other + + +class NodeTypeEnum(list): + def __init__(self, node_type_list): + self.node_type_list = node_type_list + node_types = [NodeType(name, node_type_list.index(name) + 1) for name in node_type_list] + super().__init__(node_types) + + def __getattr__(self, name): + if not name.startswith("_") and name in object.__getattribute__(self, "node_type_list"): + return self[object.__getattribute__(self, "node_type_list").index(name)] + else: + return object.__getattribute__(self, name) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/scene.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/scene.py new file mode 100644 index 000000000..38430e607 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/scene.py @@ -0,0 +1,218 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import copy +import numpy as np +from .scene_graph import TemporalSceneGraph, SceneGraph +from .node import MultiNode + + +class Scene(object): + def __init__(self, timesteps, map=None, dt=1, name="", frequency_multiplier=1, aug_func=None, non_aug_scene=None): + self.map = map + self.timesteps = timesteps + self.dt = dt + self.name = name + + self.nodes = [] + + self.robot = None + + self.temporal_scene_graph = None + + self.frequency_multiplier = frequency_multiplier + + self.description = "" + + self.aug_func = aug_func + self.non_aug_scene = non_aug_scene + + def add_robot_from_nodes(self, robot_type): + scenes = [self] + if hasattr(self, "augmented"): + scenes += self.augmented + + for scn in scenes: + nodes_list = [node for node in scn.nodes if node.type == robot_type] + non_overlapping_nodes = MultiNode.find_non_overlapping_nodes(nodes_list, min_timesteps=3) + scn.robot = MultiNode(robot_type, "ROBOT", non_overlapping_nodes, is_robot=True) + + for node in non_overlapping_nodes: + scn.nodes.remove(node) + scn.nodes.append(scn.robot) + + def get_clipped_input_dict(self, timestep, state): + input_dict = dict() + existing_nodes = self.get_nodes_clipped_at_time(timesteps=np.array([timestep]), state=state) + tr_scene = np.array([timestep, timestep]) + for node in existing_nodes: + input_dict[node] = node.get(tr_scene, state[node.type]) + + return input_dict + + def get_scene_graph( + self, timestep, attention_radius=None, edge_addition_filter=None, edge_removal_filter=None + ) -> SceneGraph: + """ + Returns the Scene Graph for a given timestep. If the Temporal Scene Graph was pre calculated, + the temporal scene graph is sliced. Otherwise the scene graph is calculated on the spot. + + :param timestep: Timestep for which the scene graph is returned. + :param attention_radius: Attention radius for each node type permutation. (Only online) + :param edge_addition_filter: Filter for adding edges (Only online) + :param edge_removal_filter: Filter for removing edges (Only online) + :return: Scene Graph for given timestep. + """ + if self.temporal_scene_graph is None: + timestep_range = np.array([timestep - len(edge_removal_filter), timestep]) + node_pos_dict = dict() + present_nodes = self.present_nodes(np.array([timestep])) + + for node in present_nodes[timestep]: + node_pos_dict[node] = np.squeeze(node.get(timestep_range, {"position": ["x", "y"]})) + tsg = TemporalSceneGraph.create_from_temp_scene_dict( + node_pos_dict, + attention_radius, + duration=(len(edge_removal_filter) + 1), + edge_addition_filter=edge_addition_filter, + edge_removal_filter=edge_removal_filter, + ) + + return tsg.to_scene_graph( + t=len(edge_removal_filter), t_hist=len(edge_removal_filter), t_fut=len(edge_addition_filter) + ) + else: + return self.temporal_scene_graph.to_scene_graph( + timestep, len(edge_removal_filter), len(edge_addition_filter) + ) + + def calculate_scene_graph(self, attention_radius, edge_addition_filter=None, edge_removal_filter=None) -> None: + """ + Calculate the Temporal Scene Graph for the entire Scene. + + :param attention_radius: Attention radius for each node type permutation. + :param edge_addition_filter: Filter for adding edges. + :param edge_removal_filter: Filter for removing edges. + :return: None + """ + timestep_range = np.array([0, self.timesteps - 1]) + node_pos_dict = dict() + + for node in self.nodes: + if type(node) is MultiNode: + node_pos_dict[node] = np.squeeze(node.get_all(timestep_range, {"position": ["x", "y"]})) + else: + node_pos_dict[node] = np.squeeze(node.get(timestep_range, {"position": ["x", "y"]})) + + self.temporal_scene_graph = TemporalSceneGraph.create_from_temp_scene_dict( + node_pos_dict, + attention_radius, + duration=self.timesteps, + edge_addition_filter=edge_addition_filter, + edge_removal_filter=edge_removal_filter, + ) + + def duration(self): + """ + Calculates the duration of the scene. + + :return: Duration of the scene in s. + """ + return self.timesteps * self.dt + + def present_nodes( + self, timesteps, type=None, min_history_timesteps=0, min_future_timesteps=0, return_robot=True + ) -> dict: + """ + Finds all present nodes in the scene at a given timestemp + + :param timesteps: Timestep(s) for which all present nodes should be returned + :param type: Node type which should be returned. If None all node types are returned. + :param min_history_timesteps: Minimum history timesteps of a node to be returned. + :param min_future_timesteps: Minimum future timesteps of a node to be returned. + :param return_robot: Return a node if it is the robot. + :return: Dictionary with timesteps as keys and list of nodes as value. + """ + + present_nodes = {} + + for node in self.nodes: + if node.is_robot and not return_robot: + continue + if type is None or node.type == type: + lower_bound = timesteps - min_history_timesteps + upper_bound = timesteps + min_future_timesteps + mask = (node.first_timestep <= lower_bound) & (upper_bound <= node.last_timestep) + if mask.any(): + timestep_indices_present = np.nonzero(mask)[0] + for timestep_index_present in timestep_indices_present: + if timesteps[timestep_index_present] in present_nodes.keys(): + present_nodes[timesteps[timestep_index_present]].append(node) + else: + present_nodes[timesteps[timestep_index_present]] = [node] + + return present_nodes + + def get_nodes_clipped_at_time(self, timesteps, state): + clipped_nodes = list() + + existing_nodes = self.present_nodes(timesteps) + all_nodes = set().union(*existing_nodes.values()) + if not all_nodes: + return clipped_nodes + + tr_scene = np.array([timesteps.min(), timesteps.max()]) + data_header_memo = dict() + for node in all_nodes: + if isinstance(node, MultiNode): + copied_node = copy.deepcopy(node.get_node_at_timesteps(tr_scene)) + copied_node.id = self.robot.id + else: + copied_node = copy.deepcopy(node) + + clipped_value = node.get(tr_scene, state[node.type]) + + if node.type not in data_header_memo: + data_header = list() + for quantity, values in state[node.type].items(): + for value in values: + data_header.append((quantity, value)) + + data_header_memo[node.type] = data_header + + copied_node.overwrite_data(clipped_value, data_header_memo[node.type]) + copied_node.first_timestep = tr_scene[0] + + clipped_nodes.append(copied_node) + + return clipped_nodes + + def sample_timesteps(self, batch_size, min_future_timesteps=0) -> np.ndarray: + """ + Sample a batch size of possible timesteps for the scene. + + :param batch_size: Number of timesteps to sample. + :param min_future_timesteps: Minimum future timesteps in the scene for a timestep to be returned. + :return: Numpy Array of sampled timesteps. + """ + if batch_size > self.timesteps: + batch_size = self.timesteps + return np.random.choice(np.arange(0, self.timesteps - min_future_timesteps), size=batch_size, replace=False) + + def augment(self): + if self.aug_func is not None: + return self.aug_func(self) + else: + return self + + def get_node_by_id(self, id): + for node in self.nodes: + if node.id == id: + return node + + def __repr__(self): + return ( + f"Scene: Duration: {self.duration()}s," + f" Nodes: {len(self.nodes)}," + f" Map: {'Yes' if self.map is not None else 'No'}." + ) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/scene_graph.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/scene_graph.py new file mode 100644 index 000000000..63c15bb2c --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/environment/scene_graph.py @@ -0,0 +1,536 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import numpy as np +from scipy.spatial.distance import pdist, squareform +import scipy.signal as ss +from collections import defaultdict +import warnings +from .node import Node + + +class Edge(object): + def __init__(self, curr_node, other_node): + self.id = self.get_edge_id(curr_node, other_node) + self.type = self.get_edge_type(curr_node, other_node) + self.curr_node = curr_node + self.other_node = other_node + + @staticmethod + def get_edge_id(n1, n2): + raise NotImplementedError("Use one of the Edge subclasses!") + + @staticmethod + def get_str_from_types(nt1, nt2): + raise NotImplementedError("Use one of the Edge subclasses!") + + @staticmethod + def get_edge_type(n1, n2): + raise NotImplementedError("Use one of the Edge subclasses!") + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.id == other.id + + def __ne__(self, other): + return not self.__eq__(other) + + def __hash__(self): + return hash(self.id) + + def __repr__(self): + return self.id + + +class UndirectedEdge(Edge): + def __init__(self, curr_node, other_node): + super(UndirectedEdge, self).__init__(curr_node, other_node) + + @staticmethod + def get_edge_id(n1, n2): + return "-".join(sorted([str(n1), str(n2)])) + + @staticmethod + def get_str_from_types(nt1, nt2): + return "-".join(sorted([nt1.name, nt2.name])) + + @staticmethod + def get_edge_type(n1, n2): + return "-".join(sorted([n1.type.name, n2.type.name])) + + +class DirectedEdge(Edge): + def __init__(self, curr_node, other_node): + super(DirectedEdge, self).__init__(curr_node, other_node) + + @staticmethod + def get_edge_id(n1, n2): + return "->".join([str(n1), str(n2)]) + + @staticmethod + def get_str_from_types(nt1, nt2): + return "->".join([nt1.name, nt2.name]) + + @staticmethod + def get_edge_type(n1, n2): + return "->".join([n1.type.name, n2.type.name]) + + +class TemporalSceneGraph(object): + def __init__( + self, + edge_radius, + nodes=None, + adj_cube=np.zeros((1, 0, 0)), + weight_cube=np.zeros((1, 0, 0)), + node_type_mat=np.zeros((0, 0)), + edge_scaling=None, + ): + self.edge_radius = edge_radius + self.nodes = nodes + if nodes is None: + self.nodes = np.array([]) + self.adj_cube = adj_cube + self.weight_cube = weight_cube + self.node_type_mat = node_type_mat + self.adj_mat = np.max(self.adj_cube, axis=0).clip(max=1.0) + self.edge_scaling = edge_scaling + self.node_index_lookup = None + self.calculate_node_index_lookup() + + def calculate_node_index_lookup(self): + node_index_lookup = dict() + for i, node in enumerate(self.nodes): + node_index_lookup[node] = i + + self.node_index_lookup = node_index_lookup + + def get_num_edges(self, t=0): + return np.sum(self.adj_cube[t]) // 2 + + def get_index(self, node): + return self.node_index_lookup[node] + + @classmethod + def create_from_temp_scene_dict( + cls, + scene_temp_dict, + attention_radius, + duration=1, + edge_addition_filter=None, + edge_removal_filter=None, + online=False, + ): + """ + Construct a spatiotemporal graph from node positions in a dataset. + + :param scene_temp_dict: Dict with all nodes in scene as keys and np.ndarray with positions as value + :param attention_radius: Attention radius dict. + :param duration: Temporal duration of the graph. + :param edge_addition_filter: - + :param edge_removal_filter: - + :return: TemporalSceneGraph + """ + + nodes = scene_temp_dict.keys() + N = len(nodes) + total_timesteps = duration + + if N == 0: + return TemporalSceneGraph(attention_radius) + + position_cube = np.full((total_timesteps, N, 2), np.nan) + + adj_cube = np.zeros((total_timesteps, N, N), dtype=np.int8) + dist_cube = np.zeros((total_timesteps, N, N), dtype=np.float) + + node_type_mat = np.zeros((N, N), dtype=np.int8) + node_attention_mat = np.zeros((N, N), dtype=np.float) + + for node_idx, node in enumerate(nodes): + if online: + # RingBuffers do not have a fixed constant size. Instead, they grow up to their capacity. Thus, + # we need to fill the values preceding the RingBuffer values with NaNs to make them fill the + # position_cube. + position_cube[-scene_temp_dict[node].shape[0] :, node_idx] = scene_temp_dict[node] + else: + position_cube[:, node_idx] = scene_temp_dict[node] + + node_type_mat[:, node_idx] = node.type.value + for node_idx_from, node_from in enumerate(nodes): + node_attention_mat[node_idx_from, node_idx] = attention_radius[(node_from.type, node.type)] + + np.fill_diagonal(node_type_mat, 0) + + for timestep in range(position_cube.shape[0]): + dists = squareform(pdist(position_cube[timestep], metric="euclidean")) + + # Put a 1 for all agent pairs which are closer than the edge_radius. + # Can produce a warning as dists can be nan if no data for node is available. + # This is accepted as nan <= x evaluates to False + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + adj_matrix = (dists <= node_attention_mat).astype(np.int8) * node_type_mat + + # Remove self-loops. + np.fill_diagonal(adj_matrix, 0) + + adj_cube[timestep] = adj_matrix + dist_cube[timestep] = dists + + dist_cube[np.isnan(dist_cube)] = 0.0 + weight_cube = np.divide(1.0, dist_cube, out=np.zeros_like(dist_cube), where=(dist_cube > 0.0)) + edge_scaling = None + if edge_addition_filter is not None and edge_removal_filter is not None: + edge_scaling = cls.calculate_edge_scaling(adj_cube, edge_addition_filter, edge_removal_filter) + tsg = cls( + attention_radius, np.array(list(nodes)), adj_cube, weight_cube, node_type_mat, edge_scaling=edge_scaling + ) + return tsg + + @staticmethod + def calculate_edge_scaling(adj_cube, edge_addition_filter, edge_removal_filter): + shifted_right = np.pad( + adj_cube, ((len(edge_addition_filter) - 1, 0), (0, 0), (0, 0)), "constant", constant_values=0 + ) + + new_edges = np.minimum(ss.convolve(shifted_right, np.reshape(edge_addition_filter, (-1, 1, 1)), "full"), 1.0)[ + (len(edge_addition_filter) - 1) : -(len(edge_addition_filter) - 1) + ] + + new_edges[adj_cube == 0] = 0 + + result = np.minimum(ss.convolve(new_edges, np.reshape(edge_removal_filter, (-1, 1, 1)), "full"), 1.0)[ + : -(len(edge_removal_filter) - 1) + ] + + return result + + def to_scene_graph(self, t, t_hist=0, t_fut=0): + """ + Creates a Scene Graph from a Temporal Scene Graph + + :param t: Time in Temporal Scene Graph for which Scene Graph is created. + :param t_hist: Number of history timesteps which are considered to form edges in Scene Graph. + :param t_fut: Number of future timesteps which are considered to form edges in Scene Graph. + :return: SceneGraph + """ + lower_t = np.clip(t - t_hist, a_min=0, a_max=None) + higher_t = np.clip(t + t_fut + 1, a_min=None, a_max=self.adj_cube.shape[0] + 1) + adj_mat = np.max(self.adj_cube[lower_t:higher_t], axis=0) + weight_mat = np.max(self.weight_cube[lower_t:higher_t], axis=0) + return SceneGraph( + self.edge_radius, + self.nodes, + adj_mat, + weight_mat, + self.node_type_mat, + self.node_index_lookup, + edge_scaling=self.edge_scaling[t] if self.edge_scaling is not None else None, + ) + + +class SceneGraph(object): + def __init__( + self, + edge_radius, + nodes=None, + adj_mat=np.zeros((0, 0)), + weight_mat=np.zeros((0, 0)), + node_type_mat=np.zeros((0, 0)), + node_index_lookup=None, + edge_scaling=None, + ): + self.edge_radius = edge_radius + self.nodes = nodes + if nodes is None: + self.nodes = np.array([]) + self.node_type_mat = node_type_mat + self.adj_mat = adj_mat + self.weight_mat = weight_mat + self.edge_scaling = edge_scaling + self.node_index_lookup = node_index_lookup + + def get_index(self, node): + return self.node_index_lookup[node] + + def get_num_edges(self): + return np.sum(self.adj_mat) // 2 + + def get_neighbors(self, node, node_type): + """ + Get all neighbors of a node. + + :param node: Node for which all neighbors are returned. + :param node_type: Specifies node types which are returned. + :return: List of all neighbors. + """ + node_index = self.get_index(node) + connection_mask = self.get_connection_mask(node_index) + mask = (self.node_type_mat[node_index] == node_type.value) * connection_mask + return self.nodes[mask] + + def get_edge_scaling(self, node=None): + if node is None: + return self.edge_scaling + else: + node_index = self.get_index(node) + connection_mask = self.get_connection_mask(node_index) + return self.edge_scaling[node_index, connection_mask] + + def get_edge_weight(self, node=None): + if node is None: + return self.weight_mat + else: + node_index = self.get_index(node) + connection_mask = self.get_connection_mask(node_index) + return self.weight_mat[node_index, connection_mask] + + def get_connection_mask(self, node_index): + if self.edge_scaling is None: # We do not use edge scaling + return self.adj_mat[node_index] > 0.0 + else: + return self.edge_scaling[node_index] > 1e-2 + + def __sub__(self, other): + new_nodes = [node for node in self.nodes if node not in other.nodes] + removed_nodes = [node for node in other.nodes if node not in self.nodes] + + our_types = set(node.type for node in self.nodes) + other_types = set(node.type for node in other.nodes) + all_node_types = our_types | other_types + + new_neighbors = defaultdict(lambda: defaultdict(set)) + for node in self.nodes: + if node in removed_nodes: + continue + + if node in other.nodes: + for node_type in all_node_types: + new_items = set(self.get_neighbors(node, node_type)) - set(other.get_neighbors(node, node_type)) + if len(new_items) > 0: + new_neighbors[node][DirectedEdge.get_edge_type(node, Node(node_type, None, None))] = new_items + else: + for node_type in our_types: + neighbors = self.get_neighbors(node, node_type) + if len(neighbors) > 0: + new_neighbors[node][DirectedEdge.get_edge_type(node, Node(node_type, None, None))] = set( + neighbors + ) + + removed_neighbors = defaultdict(lambda: defaultdict(set)) + for node in other.nodes: + if node in removed_nodes: + continue + + if node in self.nodes: + for node_type in all_node_types: + removed_items = set(other.get_neighbors(node, node_type)) - set(self.get_neighbors(node, node_type)) + if len(removed_items) > 0: + removed_neighbors[node][ + DirectedEdge.get_edge_type(node, Node(node_type, None, None)) + ] = removed_items + else: + for node_type in other_types: + neighbors = other.get_neighbors(node, node_type) + if len(neighbors) > 0: + removed_neighbors[node][DirectedEdge.get_edge_type(node, Node(node_type, None, None))] = set( + neighbors + ) + + return new_nodes, removed_nodes, new_neighbors, removed_neighbors + + +if __name__ == "__main__": + from environment import NodeTypeEnum + import time + + # # # # # # # # # # # # # # # # # + # Testing edge mask calculation # + # # # # # # # # # # # # # # # # # + B = np.array( + [ + [0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0], + [1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], + [1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0], + [1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0], + ] + )[:, :, np.newaxis, np.newaxis] + print(B.shape) + + edge_addition_filter = [0.25, 0.5, 0.75, 1.0] + edge_removal_filter = [1.0, 0.5, 0.0] + for i in range(B.shape[0]): + A = B[i] # (time, N, N) + + print(A[:, 0, 0]) + + start = time.time() + new_edges = np.minimum(ss.convolve(A, np.reshape(edge_addition_filter, (-1, 1, 1)), "full"), 1.0)[ + (len(edge_addition_filter) - 1) : + ] + old_edges = np.minimum(ss.convolve(A, np.reshape(edge_removal_filter, (-1, 1, 1)), "full"), 1.0)[ + : -(len(edge_removal_filter) - 1) + ] + res = np.minimum(new_edges + old_edges, 1.0)[:, 0, 0] + end = time.time() + print(end - start) + print(res) + + start = time.time() + res = TemporalSceneGraph.calculate_edge_scaling(A, edge_addition_filter, edge_removal_filter)[:, 0, 0] + end = time.time() + print(end - start) + print(res) + + print("-" * 40) + + # # # # # # # # # # # # # # # + # Testing graph subtraction # + # # # # # # # # # # # # # # # + print("\n" + "-" * 40 + "\n") + + node_type_list = ["PEDESTRIAN", "BICYCLE", "VEHICLE"] + nte = NodeTypeEnum(node_type_list) + + attention_radius = dict() + attention_radius[(nte.PEDESTRIAN, nte.PEDESTRIAN)] = 5.0 + attention_radius[(nte.PEDESTRIAN, nte.VEHICLE)] = 20.0 + attention_radius[(nte.PEDESTRIAN, nte.BICYCLE)] = 10.0 + attention_radius[(nte.VEHICLE, nte.PEDESTRIAN)] = 20.0 + attention_radius[(nte.VEHICLE, nte.VEHICLE)] = 20.0 + attention_radius[(nte.VEHICLE, nte.BICYCLE)] = 20.0 + attention_radius[(nte.BICYCLE, nte.PEDESTRIAN)] = 10.0 + attention_radius[(nte.BICYCLE, nte.VEHICLE)] = 20.0 + attention_radius[(nte.BICYCLE, nte.BICYCLE)] = 10.0 + + scene_dict1 = { + Node(nte.PEDESTRIAN, node_id="1"): np.array([1, 0]), + Node(nte.PEDESTRIAN, node_id="2"): np.array([0, 1]), + } + sg1 = TemporalSceneGraph.create_from_temp_scene_dict( + scene_dict1, + attention_radius=attention_radius, + duration=1, + edge_addition_filter=[0.25, 0.5, 0.75, 1.0], + edge_removal_filter=[1.0, 0.0], + ).to_scene_graph(t=0) + + scene_dict2 = { + Node(nte.PEDESTRIAN, node_id="1"): np.array([1, 0]), + Node(nte.PEDESTRIAN, node_id="2"): np.array([1, 1]), + } + sg2 = TemporalSceneGraph.create_from_temp_scene_dict( + scene_dict2, + attention_radius=attention_radius, + duration=1, + edge_addition_filter=[0.25, 0.5, 0.75, 1.0], + edge_removal_filter=[1.0, 0.0], + ).to_scene_graph(t=0) + + new_nodes, removed_nodes, new_neighbors, removed_neighbors = sg2 - sg1 + print("New Nodes:", new_nodes) + print("Removed Nodes:", removed_nodes) + print("New Neighbors:", new_neighbors) + print("Removed Neighbors:", removed_neighbors) + + # # # # # # # # # # # # # # # + print("\n" + "-" * 40 + "\n") + + scene_dict1 = { + Node(nte.PEDESTRIAN, node_id="1"): np.array([1, 0]), + Node(nte.PEDESTRIAN, node_id="2"): np.array([0, 1]), + } + sg1 = TemporalSceneGraph.create_from_temp_scene_dict( + scene_dict1, + attention_radius=attention_radius, + duration=1, + edge_addition_filter=[0.25, 0.5, 0.75, 1.0], + edge_removal_filter=[1.0, 0.0], + ).to_scene_graph(t=0) + + scene_dict2 = { + Node(nte.PEDESTRIAN, node_id="1"): np.array([1, 0]), + Node(nte.PEDESTRIAN, node_id="2"): np.array([1, 1]), + Node(nte.PEDESTRIAN, node_id="3"): np.array([20, 1]), + } + sg2 = TemporalSceneGraph.create_from_temp_scene_dict( + scene_dict2, + attention_radius=attention_radius, + duration=1, + edge_addition_filter=[0.25, 0.5, 0.75, 1.0], + edge_removal_filter=[1.0, 0.0], + ).to_scene_graph(t=0) + + new_nodes, removed_nodes, new_neighbors, removed_neighbors = sg2 - sg1 + print("New Nodes:", new_nodes) + print("Removed Nodes:", removed_nodes) + print("New Neighbors:", new_neighbors) + print("Removed Neighbors:", removed_neighbors) + + # # # # # # # # # # # # # # # + print("\n" + "-" * 40 + "\n") + + scene_dict1 = { + Node(nte.PEDESTRIAN, node_id="1"): np.array([1, 0]), + Node(nte.PEDESTRIAN, node_id="2"): np.array([0, 1]), + } + sg1 = TemporalSceneGraph.create_from_temp_scene_dict( + scene_dict1, + attention_radius=attention_radius, + duration=1, + edge_addition_filter=[0.25, 0.5, 0.75, 1.0], + edge_removal_filter=[1.0, 0.0], + ).to_scene_graph(t=0) + + scene_dict2 = { + Node(nte.PEDESTRIAN, node_id="1"): np.array([1, 0]), + Node(nte.PEDESTRIAN, node_id="2"): np.array([10, 1]), + Node(nte.PEDESTRIAN, node_id="3"): np.array([20, 1]), + } + sg2 = TemporalSceneGraph.create_from_temp_scene_dict( + scene_dict2, + attention_radius=attention_radius, + duration=1, + edge_addition_filter=[0.25, 0.5, 0.75, 1.0], + edge_removal_filter=[1.0, 0.0], + ).to_scene_graph(t=0) + + new_nodes, removed_nodes, new_neighbors, removed_neighbors = sg2 - sg1 + print("New Nodes:", new_nodes) + print("Removed Nodes:", removed_nodes) + print("New Neighbors:", new_neighbors) + print("Removed Neighbors:", removed_neighbors) + + # # # # # # # # # # # # # # # + print("\n" + "-" * 40 + "\n") + + scene_dict1 = { + Node(nte.PEDESTRIAN, node_id="1"): np.array([0, 0]), + Node(nte.PEDESTRIAN, node_id="2"): np.array([0, 1]), + } + sg1 = TemporalSceneGraph.create_from_temp_scene_dict( + scene_dict1, + attention_radius=attention_radius, + duration=1, + edge_addition_filter=[0.25, 0.5, 0.75, 1.0], + edge_removal_filter=[1.0, 0.0], + ).to_scene_graph(t=0) + + scene_dict2 = { + Node(nte.PEDESTRIAN, node_id="2"): np.array([10, 1]), + Node(nte.PEDESTRIAN, node_id="3"): np.array([12, 1]), + Node(nte.PEDESTRIAN, node_id="4"): np.array([13, 1]), + } + sg2 = TemporalSceneGraph.create_from_temp_scene_dict( + scene_dict2, + attention_radius=attention_radius, + duration=1, + edge_addition_filter=[0.25, 0.5, 0.75, 1.0], + edge_removal_filter=[1.0, 0.0], + ).to_scene_graph(t=0) + + new_nodes, removed_nodes, new_neighbors, removed_neighbors = sg2 - sg1 + print("New Nodes:", new_nodes) + print("Removed Nodes:", removed_nodes) + print("New Neighbors:", new_neighbors) + print("Removed Neighbors:", removed_neighbors) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/evaluation/__init__.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/evaluation/__init__.py new file mode 100644 index 000000000..91ce29390 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/evaluation/__init__.py @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +from .evaluation import compute_batch_statistics, log_batch_errors, print_batch_errors diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/evaluation/evaluation.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/evaluation/evaluation.py new file mode 100644 index 000000000..fac4a45eb --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/evaluation/evaluation.py @@ -0,0 +1,142 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import numpy as np +from scipy.interpolate import RectBivariateSpline +from scipy.ndimage import binary_dilation +from scipy.stats import gaussian_kde +from utils import prediction_output_to_trajectories +import visualization +from matplotlib import pyplot as plt + + +def compute_ade(predicted_trajs, gt_traj): + error = np.linalg.norm(predicted_trajs - gt_traj, axis=-1) + ade = np.mean(error, axis=-1) + return ade.flatten() + + +def compute_fde(predicted_trajs, gt_traj): + final_error = np.linalg.norm(predicted_trajs[:, :, -1] - gt_traj[-1], axis=-1) + return final_error.flatten() + + +def compute_kde_nll(predicted_trajs, gt_traj): + kde_ll = 0.0 + log_pdf_lower_bound = -20 + num_timesteps = gt_traj.shape[0] + num_batches = predicted_trajs.shape[0] + + for batch_num in range(num_batches): + for timestep in range(num_timesteps): + try: + kde = gaussian_kde(predicted_trajs[batch_num, :, timestep].T) + pdf = np.clip(kde.logpdf(gt_traj[timestep].T), a_min=log_pdf_lower_bound, a_max=None)[0] + kde_ll += pdf / (num_timesteps * num_batches) + except np.linalg.LinAlgError: + kde_ll = np.nan + + return -kde_ll + + +def compute_obs_violations(predicted_trajs, map): + obs_map = map.data + + interp_obs_map = RectBivariateSpline( + range(obs_map.shape[1]), range(obs_map.shape[0]), binary_dilation(obs_map.T, iterations=4), kx=1, ky=1 + ) + + old_shape = predicted_trajs.shape + pred_trajs_map = map.to_map_points(predicted_trajs.reshape((-1, 2))) + + traj_obs_values = interp_obs_map(pred_trajs_map[:, 0], pred_trajs_map[:, 1], grid=False) + traj_obs_values = traj_obs_values.reshape((old_shape[0], old_shape[1])) + num_viol_trajs = np.sum(traj_obs_values.max(axis=1) > 0, dtype=float) + + return num_viol_trajs + + +def compute_batch_statistics( + prediction_output_dict, + dt, + max_hl, + ph, + node_type_enum, + kde=True, + obs=False, + map=None, + prune_ph_to_future=False, + best_of=False, +): + + (prediction_dict, _, futures_dict) = prediction_output_to_trajectories( + prediction_output_dict, dt, max_hl, ph, prune_ph_to_future=prune_ph_to_future + ) + + batch_error_dict = dict() + for node_type in node_type_enum: + batch_error_dict[node_type] = {"ade": list(), "fde": list(), "kde": list(), "obs_viols": list()} + + for t in prediction_dict.keys(): + for node in prediction_dict[t].keys(): + ade_errors = compute_ade(prediction_dict[t][node], futures_dict[t][node]) + fde_errors = compute_fde(prediction_dict[t][node], futures_dict[t][node]) + if kde: + kde_ll = compute_kde_nll(prediction_dict[t][node], futures_dict[t][node]) + else: + kde_ll = 0 + if obs: + obs_viols = compute_obs_violations(prediction_dict[t][node], map) + else: + obs_viols = 0 + if best_of: + ade_errors = np.min(ade_errors, keepdims=True) + fde_errors = np.min(fde_errors, keepdims=True) + kde_ll = np.min(kde_ll) + batch_error_dict[node.type]["ade"].extend(list(ade_errors)) + batch_error_dict[node.type]["fde"].extend(list(fde_errors)) + batch_error_dict[node.type]["kde"].extend([kde_ll]) + batch_error_dict[node.type]["obs_viols"].extend([obs_viols]) + + return batch_error_dict + + +def log_batch_errors(batch_errors_list, log_writer, namespace, curr_iter, bar_plot=[], box_plot=[]): + for node_type in batch_errors_list[0].keys(): + for metric in batch_errors_list[0][node_type].keys(): + metric_batch_error = [] + for batch_errors in batch_errors_list: + metric_batch_error.extend(batch_errors[node_type][metric]) + + if len(metric_batch_error) > 0: + log_writer.add_histogram(f"{node_type.name}/{namespace}/{metric}", metric_batch_error, curr_iter) + log_writer.add_scalar( + f"{node_type.name}/{namespace}/{metric}_mean", np.mean(metric_batch_error), curr_iter + ) + log_writer.add_scalar( + f"{node_type.name}/{namespace}/{metric}_median", np.median(metric_batch_error), curr_iter + ) + + if metric in bar_plot: + pd = {"dataset": [namespace] * len(metric_batch_error), metric: metric_batch_error} + kde_barplot_fig, ax = plt.subplots(figsize=(5, 5)) + visualization.visualization_utils.plot_barplots(ax, pd, "dataset", metric) + log_writer.add_figure(f"{node_type.name}/{namespace}/{metric}_bar_plot", kde_barplot_fig, curr_iter) + + if metric in box_plot: + mse_fde_pd = {"dataset": [namespace] * len(metric_batch_error), metric: metric_batch_error} + fig, ax = plt.subplots(figsize=(5, 5)) + visualization.visualization_utils.plot_boxplots(ax, mse_fde_pd, "dataset", metric) + log_writer.add_figure(f"{node_type.name}/{namespace}/{metric}_box_plot", fig, curr_iter) + + +def print_batch_errors(batch_errors_list, namespace, curr_iter): + for node_type in batch_errors_list[0].keys(): + for metric in batch_errors_list[0][node_type].keys(): + metric_batch_error = [] + for batch_errors in batch_errors_list: + metric_batch_error.extend(batch_errors[node_type][metric]) + + if len(metric_batch_error) > 0: + print(f"{curr_iter}: {node_type.name}/{namespace}/{metric}_mean", np.mean(metric_batch_error)) + print(f"{curr_iter}: {node_type.name}/{namespace}/{metric}_median", np.median(metric_batch_error)) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/__init__.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/__init__.py new file mode 100644 index 000000000..be76653b0 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/__init__.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +from model.trajectron import Trajectron +from model.mgcvae import MultimodalGenerativeCVAE diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/__init__.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/__init__.py new file mode 100644 index 000000000..ebf3ee86c --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/__init__.py @@ -0,0 +1,7 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +from .discrete_latent import DiscreteLatent +from .gmm2d import GMM2D +from .map_encoder import CNNMapEncoder +from .additive_attention import AdditiveAttention, TemporallyBatchedAdditiveAttention diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/additive_attention.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/additive_attention.py new file mode 100644 index 000000000..0d1ec7f2d --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/additive_attention.py @@ -0,0 +1,71 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class AdditiveAttention(nn.Module): + # Implementing the attention module of Bahdanau et al. 2015 where + # score(h_j, s_(i-1)) = v . tanh(W_1 h_j + W_2 s_(i-1)) + def __init__(self, encoder_hidden_state_dim, decoder_hidden_state_dim, internal_dim=None): + super(AdditiveAttention, self).__init__() + + if internal_dim is None: + internal_dim = int((encoder_hidden_state_dim + decoder_hidden_state_dim) / 2) + + self.w1 = nn.Linear(encoder_hidden_state_dim, internal_dim, bias=False) + self.w2 = nn.Linear(decoder_hidden_state_dim, internal_dim, bias=False) + self.v = nn.Linear(internal_dim, 1, bias=False) + + def score(self, encoder_state, decoder_state): + # encoder_state is of shape (batch, enc_dim) + # decoder_state is of shape (batch, dec_dim) + # return value should be of shape (batch, 1) + return self.v(torch.tanh(self.w1(encoder_state) + self.w2(decoder_state))) + + def forward(self, encoder_states, decoder_state): + # encoder_states is of shape (batch, num_enc_states, enc_dim) + # decoder_state is of shape (batch, dec_dim) + score_vec = torch.cat( + [self.score(encoder_states[:, i], decoder_state) for i in range(encoder_states.shape[1])], dim=1 + ) + # score_vec is of shape (batch, num_enc_states) + + attention_probs = torch.unsqueeze(F.softmax(score_vec, dim=1), dim=2) + # attention_probs is of shape (batch, num_enc_states, 1) + + final_context_vec = torch.sum(attention_probs * encoder_states, dim=1) + # final_context_vec is of shape (batch, enc_dim) + + return final_context_vec, attention_probs + + +class TemporallyBatchedAdditiveAttention(AdditiveAttention): + # Implementing the attention module of Bahdanau et al. 2015 where + # score(h_j, s_(i-1)) = v . tanh(W_1 h_j + W_2 s_(i-1)) + def __init__(self, encoder_hidden_state_dim, decoder_hidden_state_dim, internal_dim=None): + super(TemporallyBatchedAdditiveAttention, self).__init__( + encoder_hidden_state_dim, decoder_hidden_state_dim, internal_dim + ) + + def score(self, encoder_state, decoder_state): + # encoder_state is of shape (batch, num_enc_states, max_time, enc_dim) + # decoder_state is of shape (batch, max_time, dec_dim) + # return value should be of shape (batch, num_enc_states, max_time, 1) + return self.v(torch.tanh(self.w1(encoder_state) + torch.unsqueeze(self.w2(decoder_state), dim=1))) + + def forward(self, encoder_states, decoder_state): + # encoder_states is of shape (batch, num_enc_states, max_time, enc_dim) + # decoder_state is of shape (batch, max_time, dec_dim) + score_vec = self.score(encoder_states, decoder_state) + # score_vec is of shape (batch, num_enc_states, max_time, 1) + + attention_probs = F.softmax(score_vec, dim=1) + # attention_probs is of shape (batch, num_enc_states, max_time, 1) + + final_context_vec = torch.sum(attention_probs * encoder_states, dim=1) + # final_context_vec is of shape (batch, max_time, enc_dim) + + return final_context_vec, torch.squeeze(torch.transpose(attention_probs, 1, 2), dim=3) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/discrete_latent.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/discrete_latent.py new file mode 100644 index 000000000..222d826b1 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/discrete_latent.py @@ -0,0 +1,122 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import torch +import torch.distributions as td +import numpy as np +from ..model_utils import ModeKeys + + +class DiscreteLatent(object): + def __init__(self, hyperparams, device): + self.hyperparams = hyperparams + self.z_dim = hyperparams["N"] * hyperparams["K"] + self.N = hyperparams["N"] + self.K = hyperparams["K"] + self.kl_min = hyperparams["kl_min"] + self.device = device + self.temp = None # filled in by MultimodalGenerativeCVAE.set_annealing_params + self.z_logit_clip = None # filled in by MultimodalGenerativeCVAE.set_annealing_params + self.p_dist = None # filled in by MultimodalGenerativeCVAE.encoder + self.q_dist = None # filled in by MultimodalGenerativeCVAE.encoder + + def dist_from_h(self, h, mode): + logits_separated = torch.reshape(h, (-1, self.N, self.K)) + logits_separated_mean_zero = logits_separated - torch.mean(logits_separated, dim=-1, keepdim=True) + if self.z_logit_clip is not None and mode == ModeKeys.TRAIN: + c = self.z_logit_clip + logits = torch.clamp(logits_separated_mean_zero, min=-c, max=c) + else: + logits = logits_separated_mean_zero + + return td.OneHotCategorical(logits=logits) + + def sample_q(self, num_samples, mode): + bs = self.p_dist.probs.size()[0] + num_components = self.N * self.K + z_NK = ( + torch.from_numpy(self.all_one_hot_combinations(self.N, self.K)) + .float() + .to(self.device) + .repeat(num_samples, bs) + ) + return torch.reshape(z_NK, (num_samples * num_components, -1, self.z_dim)) + + def sample_p(self, num_samples, mode, most_likely_z=False, full_dist=True, all_z_sep=False): + num_components = 1 + if full_dist: + bs = self.p_dist.probs.size()[0] + z_NK = ( + torch.from_numpy(self.all_one_hot_combinations(self.N, self.K)) + .float() + .to(self.device) + .repeat(num_samples, bs) + ) + num_components = self.K**self.N + k = num_samples * num_components + elif all_z_sep: + bs = self.p_dist.probs.size()[0] + z_NK = torch.from_numpy(self.all_one_hot_combinations(self.N, self.K)).float().to(self.device).repeat(1, bs) + k = self.K**self.N + num_samples = k + elif most_likely_z: + # Sampling the most likely z from p(z|x). + eye_mat = torch.eye(self.p_dist.event_shape[-1], device=self.device) + argmax_idxs = torch.argmax(self.p_dist.probs, dim=2) + z_NK = torch.unsqueeze(eye_mat[argmax_idxs], dim=0).expand(num_samples, -1, -1, -1) + k = num_samples + else: + z_NK = self.p_dist.sample((num_samples,)) + k = num_samples + + if mode == ModeKeys.PREDICT: + return torch.reshape(z_NK, (k, -1, self.N * self.K)), num_samples, num_components + else: + return torch.reshape(z_NK, (k, -1, self.N * self.K)) + + def kl_q_p(self, log_writer=None, prefix=None, curr_iter=None): + kl_separated = td.kl_divergence(self.q_dist, self.p_dist) + if len(kl_separated.size()) < 2: + kl_separated = torch.unsqueeze(kl_separated, dim=0) + + kl_minibatch = torch.mean(kl_separated, dim=0, keepdim=True) + + if log_writer is not None: + log_writer.add_scalar(prefix + "/true_kl", torch.sum(kl_minibatch), curr_iter) + + if self.kl_min > 0: + kl_lower_bounded = torch.clamp(kl_minibatch, min=self.kl_min) + kl = torch.sum(kl_lower_bounded) + else: + kl = torch.sum(kl_minibatch) + + return kl + + def q_log_prob(self, z): + k = z.size()[0] + z_NK = torch.reshape(z, [k, -1, self.N, self.K]) + return torch.sum(self.q_dist.log_prob(z_NK), dim=2) + + def p_log_prob(self, z): + k = z.size()[0] + z_NK = torch.reshape(z, [k, -1, self.N, self.K]) + return torch.sum(self.p_dist.log_prob(z_NK), dim=2) + + def get_p_dist_probs(self): + return self.p_dist.probs + + @staticmethod + def all_one_hot_combinations(N, K): + return np.eye(K).take(np.reshape(np.indices([K] * N), [N, -1]).T, axis=0).reshape(-1, N * K) # [K**N, N*K] + + def summarize_for_tensorboard(self, log_writer, prefix, curr_iter): + log_writer.add_histogram(prefix + "/latent/p_z_x", self.p_dist.probs, curr_iter) + log_writer.add_histogram(prefix + "/latent/q_z_xy", self.q_dist.probs, curr_iter) + log_writer.add_histogram(prefix + "/latent/p_z_x_logits", self.p_dist.logits, curr_iter) + log_writer.add_histogram(prefix + "/latent/q_z_xy_logits", self.q_dist.logits, curr_iter) + if self.z_dim <= 9: + for i in range(self.N): + for j in range(self.K): + log_writer.add_histogram( + prefix + "/latent/q_z_xy_logit{0}{1}".format(i, j), self.q_dist.logits[:, i, j], curr_iter + ) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/gmm2d.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/gmm2d.py new file mode 100644 index 000000000..999c0a303 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/gmm2d.py @@ -0,0 +1,187 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import torch +import torch.distributions as td +import numpy as np +from ..model_utils import to_one_hot + + +class GMM2D(td.Distribution): + r""" + Gaussian Mixture Model using 2D Multivariate Gaussians each of as N components: + Cholesky decompesition and affine transformation for sampling: + + .. math:: Z \sim N(0, I) + + .. math:: S = \mu + LZ + + .. math:: S \sim N(\mu, \Sigma) \rightarrow N(\mu, LL^T) + + where :math:`L = chol(\Sigma)` and + + .. math:: \Sigma = \left[ {\begin{array}{cc} \sigma^2_x & \rho \sigma_x \sigma_y \\ \rho \sigma_x \sigma_y & \sigma^2_y \\ \end{array} } \right] + + such that + + .. math:: L = chol(\Sigma) = \left[ {\begin{array}{cc} \sigma_x & 0 \\ \rho \sigma_y & \sigma_y \sqrt{1-\rho^2} \\ \end{array} } \right] + + :param log_pis: Log Mixing Proportions :math:`log(\pi)`. [..., N] + :param mus: Mixture Components mean :math:`\mu`. [..., N * 2] + :param log_sigmas: Log Standard Deviations :math:`log(\sigma_d)`. [..., N * 2] + :param corrs: Cholesky factor of correlation :math:`\rho`. [..., N] + :param clip_lo: Clips the lower end of the standard deviation. + :param clip_hi: Clips the upper end of the standard deviation. + """ + + def __init__(self, log_pis, mus, log_sigmas, corrs): + super(GMM2D, self).__init__(batch_shape=log_pis.shape[0], event_shape=log_pis.shape[1:]) + self.components = log_pis.shape[-1] + self.dimensions = 2 + self.device = log_pis.device + + log_pis = torch.clamp(log_pis, min=-1e5) + self.log_pis = log_pis - torch.logsumexp(log_pis, dim=-1, keepdim=True) # [..., N] + self.mus = self.reshape_to_components(mus) # [..., N, 2] + self.log_sigmas = self.reshape_to_components(log_sigmas) # [..., N, 2] + self.sigmas = torch.exp(self.log_sigmas) # [..., N, 2] + self.one_minus_rho2 = 1 - corrs**2 # [..., N] + self.one_minus_rho2 = torch.clamp(self.one_minus_rho2, min=1e-5, max=1) # otherwise log can be nan + self.corrs = corrs # [..., N] + + self.L = torch.stack( + [ + torch.stack([self.sigmas[..., 0], torch.zeros_like(self.log_pis)], dim=-1), + torch.stack( + [self.sigmas[..., 1] * self.corrs, self.sigmas[..., 1] * torch.sqrt(self.one_minus_rho2)], dim=-1 + ), + ], + dim=-2, + ) + + self.pis_cat_dist = td.Categorical(logits=log_pis) + + @classmethod + def from_log_pis_mus_cov_mats(cls, log_pis, mus, cov_mats): + corrs_sigma12 = cov_mats[..., 0, 1] + sigma_1 = torch.clamp(cov_mats[..., 0, 0], min=1e-8) + sigma_2 = torch.clamp(cov_mats[..., 1, 1], min=1e-8) + sigmas = torch.stack([torch.sqrt(sigma_1), torch.sqrt(sigma_2)], dim=-1) + log_sigmas = torch.log(sigmas) + corrs = corrs_sigma12 / (torch.prod(sigmas, dim=-1)) + return cls(log_pis, mus, log_sigmas, corrs) + + def rsample(self, sample_shape=torch.Size()): + """ + Generates a sample_shape shaped reparameterized sample or sample_shape + shaped batch of reparameterized samples if the distribution parameters + are batched. + + :param sample_shape: Shape of the samples + :return: Samples from the GMM. + """ + mvn_samples = self.mus + torch.squeeze( + torch.matmul( + self.L, torch.unsqueeze(torch.randn(size=sample_shape + self.mus.shape, device=self.device), dim=-1) + ), + dim=-1, + ) + component_cat_samples = self.pis_cat_dist.sample(sample_shape) + selector = torch.unsqueeze(to_one_hot(component_cat_samples, self.components), dim=-1) + return torch.sum(mvn_samples * selector, dim=-2) + + def log_prob(self, value): + r""" + Calculates the log probability of a value using the PDF for bivariate normal distributions: + + .. math:: + f(x | \mu, \sigma, \rho)={\frac {1}{2\pi \sigma _{x}\sigma _{y}{\sqrt {1-\rho ^{2}}}}}\exp + \left(-{\frac {1}{2(1-\rho ^{2})}}\left[{\frac {(x-\mu _{x})^{2}}{\sigma _{x}^{2}}}+ + {\frac {(y-\mu _{y})^{2}}{\sigma _{y}^{2}}}-{\frac {2\rho (x-\mu _{x})(y-\mu _{y})} + {\sigma _{x}\sigma _{y}}}\right]\right) + + :param value: The log probability density function is evaluated at those values. + :return: Log probability + """ + # x: [..., 2] + value = torch.unsqueeze(value, dim=-2) # [..., 1, 2] + dx = value - self.mus # [..., N, 2] + + exp_nominator = torch.sum( + (dx / self.sigmas) ** 2, dim=-1 + ) - 2 * self.corrs * torch.prod( # first and second term of exp nominator + dx, dim=-1 + ) / torch.prod( + self.sigmas, dim=-1 + ) # [..., N] + + component_log_p = ( + -( + 2 * np.log(2 * np.pi) + + torch.log(self.one_minus_rho2) + + 2 * torch.sum(self.log_sigmas, dim=-1) + + exp_nominator / self.one_minus_rho2 + ) + / 2 + ) + + return torch.logsumexp(self.log_pis + component_log_p, dim=-1) + + def get_for_node_at_time(self, n, t): + return self.__class__( + self.log_pis[:, n : n + 1, t : t + 1], + self.mus[:, n : n + 1, t : t + 1], + self.log_sigmas[:, n : n + 1, t : t + 1], + self.corrs[:, n : n + 1, t : t + 1], + ) + + def mode(self): + """ + Calculates the mode of the GMM by calculating probabilities of a 2D mesh grid + + :param required_accuracy: Accuracy of the meshgrid + :return: Mode of the GMM + """ + if self.mus.shape[-2] > 1: + samp, bs, time, comp, _ = self.mus.shape + assert samp == 1, "For taking the mode only one sample makes sense." + mode_node_list = [] + for n in range(bs): + mode_t_list = [] + for t in range(time): + nt_gmm = self.get_for_node_at_time(n, t) + x_min = self.mus[:, n, t, :, 0].min() + x_max = self.mus[:, n, t, :, 0].max() + y_min = self.mus[:, n, t, :, 1].min() + y_max = self.mus[:, n, t, :, 1].max() + search_grid = ( + torch.stack( + torch.meshgrid([torch.arange(x_min, x_max, 0.01), torch.arange(y_min, y_max, 0.01)]), dim=2 + ) + .view(-1, 2) + .float() + .to(self.device) + ) + + ll_score = nt_gmm.log_prob(search_grid) + argmax = torch.argmax(ll_score.squeeze(), dim=0) + mode_t_list.append(search_grid[argmax]) + mode_node_list.append(torch.stack(mode_t_list, dim=0)) + return torch.stack(mode_node_list, dim=0).unsqueeze(dim=0) + return torch.squeeze(self.mus, dim=-2) + + def reshape_to_components(self, tensor): + if len(tensor.shape) == 5: + return tensor + return torch.reshape(tensor, list(tensor.shape[:-1]) + [self.components, self.dimensions]) + + def get_covariance_matrix(self): + cov = self.corrs * torch.prod(self.sigmas, dim=-1) + E = torch.stack( + [ + torch.stack([self.sigmas[..., 0] ** 2, cov], dim=-1), + torch.stack([cov, self.sigmas[..., 1] ** 2], dim=-1), + ], + dim=-2, + ) + return E diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/graph_attention.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/graph_attention.py new file mode 100644 index 000000000..6c9516753 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/graph_attention.py @@ -0,0 +1,61 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import warnings +import math +import numbers +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn import init, Parameter + + +class GraphMultiTypeAttention(nn.Module): + def __init__(self, in_features, hidden_features, out_features, bias=True, types=1): + super(GraphMultiTypeAttention, self).__init__() + self.types = types + self.in_features = in_features + self.out_features = out_features + self.node_self_loop_weight = Parameter(torch.Tensor(hidden_features, in_features[0])) + + self.weight_per_type = nn.ParameterList() + for i in range(types): + self.weight_per_type.append(Parameter(torch.Tensor(hidden_features, in_features[i]))) + if bias: + self.bias = Parameter(torch.Tensor(hidden_features)) + else: + self.register_parameter("bias", None) + + self.linear_to_out = nn.Linear(hidden_features, out_features, bias=bias) + + self.reset_parameters() + + def reset_parameters(self): + for weight in self.weight_per_type: + bound = 1 / math.sqrt(weight.size(1)) + init.uniform_(weight, -bound, bound) + bound = 1 / math.sqrt(self.node_self_loop_weight.size(1)) + init.uniform_(self.node_self_loop_weight, -bound, bound) + if self.bias is not None: + init.uniform_(self.bias, -bound, bound) + + def forward(self, inputs, types, edge_weights): + weight_list = list() + for i, type in enumerate(types): + weight_list.append((edge_weights[i] / len(edge_weights)) * self.weight_per_type[type].T) + weight_list.append(self.node_self_loop_weight.T) + weight = torch.cat(weight_list, dim=0) + stacked_input = torch.cat(inputs, dim=-1) + output = stacked_input.matmul(weight) + + output = output + + if self.bias is not None: + output += self.bias + + return torch.relu(self.linear_to_out(torch.relu(output))) + + def extra_repr(self): + return "in_features={}, hidden_features={},, out_features={}, types={}, bias={}".format( + self.in_features, self.hidden_features, self.out_features, self.types, self.bias is not None + ) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/map_encoder.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/map_encoder.py new file mode 100644 index 000000000..369be7db4 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/components/map_encoder.py @@ -0,0 +1,33 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class CNNMapEncoder(nn.Module): + def __init__(self, map_channels, hidden_channels, output_size, masks, strides, patch_size): + super(CNNMapEncoder, self).__init__() + self.convs = nn.ModuleList() + patch_size_x = patch_size[0] + patch_size[2] + patch_size_y = patch_size[1] + patch_size[3] + input_size = (map_channels, patch_size_x, patch_size_y) + x_dummy = torch.ones(input_size).unsqueeze(0) * torch.tensor(float("nan")) + + for i, hidden_size in enumerate(hidden_channels): + self.convs.append( + nn.Conv2d( + map_channels if i == 0 else hidden_channels[i - 1], hidden_channels[i], masks[i], stride=strides[i] + ) + ) + x_dummy = self.convs[i](x_dummy) + + self.fc = nn.Linear(x_dummy.numel(), output_size) + + def forward(self, x, training): + for conv in self.convs: + x = F.leaky_relu(conv(x), 0.2) + x = torch.flatten(x, start_dim=1) + x = self.fc(x) + return x diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/__init__.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/__init__.py new file mode 100644 index 000000000..e0d2ee4c0 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/__init__.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +from .dataset import EnvironmentDataset, NodeTypeDataset +from .preprocessing import collate, get_node_timestep_data, get_timesteps_data, restore, get_relative_robot_traj diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/dataset.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/dataset.py new file mode 100644 index 000000000..4769eae88 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/dataset.py @@ -0,0 +1,95 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +from torch.utils import data +import numpy as np +from .preprocessing import get_node_timestep_data + + +class EnvironmentDataset(object): + def __init__(self, env, state, pred_state, node_freq_mult, scene_freq_mult, hyperparams, **kwargs): + self.env = env + self.state = state + self.pred_state = pred_state + self.hyperparams = hyperparams + self.max_ht = self.hyperparams["maximum_history_length"] + self.max_ft = kwargs["min_future_timesteps"] + self.node_type_datasets = list() + self._augment = False + for node_type in env.NodeType: + if node_type not in hyperparams["pred_state"]: + continue + self.node_type_datasets.append( + NodeTypeDataset( + env, node_type, state, pred_state, node_freq_mult, scene_freq_mult, hyperparams, **kwargs + ) + ) + + @property + def augment(self): + return self._augment + + @augment.setter + def augment(self, value): + self._augment = value + for node_type_dataset in self.node_type_datasets: + node_type_dataset.augment = value + + def __iter__(self): + return iter(self.node_type_datasets) + + +class NodeTypeDataset(data.Dataset): + def __init__( + self, env, node_type, state, pred_state, node_freq_mult, scene_freq_mult, hyperparams, augment=False, **kwargs + ): + self.env = env + self.state = state + self.pred_state = pred_state + self.hyperparams = hyperparams + self.max_ht = self.hyperparams["maximum_history_length"] + self.max_ft = kwargs["min_future_timesteps"] + + self.augment = augment + + self.node_type = node_type + self.index = self.index_env(node_freq_mult, scene_freq_mult, **kwargs) + self.len = len(self.index) + self.edge_types = [edge_type for edge_type in env.get_edge_types() if edge_type[0] is node_type] + + def index_env(self, node_freq_mult, scene_freq_mult, **kwargs): + index = list() + for scene in self.env.scenes: + present_node_dict = scene.present_nodes(np.arange(0, scene.timesteps), type=self.node_type, **kwargs) + for t, nodes in present_node_dict.items(): + for node in nodes: + index += ( + [(scene, t, node)] + * (scene.frequency_multiplier if scene_freq_mult else 1) + * (node.frequency_multiplier if node_freq_mult else 1) + ) + + return index + + def __len__(self): + return self.len + + def __getitem__(self, i): + (scene, t, node) = self.index[i] + + if self.augment: + scene = scene.augment() + node = scene.get_node_by_id(node.id) + + return get_node_timestep_data( + self.env, + scene, + t, + node, + self.state, + self.pred_state, + self.edge_types, + self.max_ht, + self.max_ft, + self.hyperparams, + ) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/homography_warper.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/homography_warper.py new file mode 100644 index 000000000..5cc3a5f4d --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/homography_warper.py @@ -0,0 +1,467 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import torch +import torch.nn as nn +import torch.nn.functional as F +from typing import Tuple, Optional + + +pi = torch.tensor(3.14159265358979323846) + + +def deg2rad(tensor: torch.Tensor) -> torch.Tensor: + r"""Function that converts angles from degrees to radians. + Args: + tensor (torch.Tensor): Tensor of arbitrary shape. + Returns: + torch.Tensor: tensor with same shape as input. + """ + if not isinstance(tensor, torch.Tensor): + raise TypeError("Input type is not a torch.Tensor. Got {}".format(type(tensor))) + + return tensor * pi.to(tensor.device).type(tensor.dtype) / 180.0 + + +def angle_to_rotation_matrix(angle: torch.Tensor) -> torch.Tensor: + """ + Creates a rotation matrix out of angles in degrees + Args: + angle: (torch.Tensor): tensor of angles in degrees, any shape. + Returns: + torch.Tensor: tensor of *x2x2 rotation matrices. + Shape: + - Input: :math:`(*)` + - Output: :math:`(*, 2, 2)` + Example: + >>> input = torch.rand(1, 3) # Nx3 + >>> output = kornia.angle_to_rotation_matrix(input) # Nx3x2x2 + """ + ang_rad = deg2rad(angle) + cos_a: torch.Tensor = torch.cos(ang_rad) + sin_a: torch.Tensor = torch.sin(ang_rad) + return torch.stack([cos_a, sin_a, -sin_a, cos_a], dim=-1).view(*angle.shape, 2, 2) + + +def get_rotation_matrix2d(center: torch.Tensor, angle: torch.Tensor, scale: torch.Tensor) -> torch.Tensor: + r"""Calculates an affine matrix of 2D rotation. + The function calculates the following matrix: + .. math:: + \begin{bmatrix} + \alpha & \beta & (1 - \alpha) \cdot \text{x} + - \beta \cdot \text{y} \\ + -\beta & \alpha & \beta \cdot \text{x} + + (1 - \alpha) \cdot \text{y} + \end{bmatrix} + where + .. math:: + \alpha = \text{scale} \cdot cos(\text{radian}) \\ + \beta = \text{scale} \cdot sin(\text{radian}) + The transformation maps the rotation center to itself + If this is not the target, adjust the shift. + Args: + center (Tensor): center of the rotation in the source image. + angle (Tensor): rotation radian in degrees. Positive values mean + counter-clockwise rotation (the coordinate origin is assumed to + be the top-left corner). + scale (Tensor): isotropic scale factor. + Returns: + Tensor: the affine matrix of 2D rotation. + Shape: + - Input: :math:`(B, 2)`, :math:`(B)` and :math:`(B)` + - Output: :math:`(B, 2, 3)` + Example: + >>> center = torch.zeros(1, 2) + >>> scale = torch.ones(1) + >>> radian = 45. * torch.ones(1) + >>> M = kornia.get_rotation_matrix2d(center, radian, scale) + tensor([[[ 0.7071, 0.7071, 0.0000], + [-0.7071, 0.7071, 0.0000]]]) + """ + if not torch.is_tensor(center): + raise TypeError("Input center type is not a torch.Tensor. Got {}".format(type(center))) + if not torch.is_tensor(angle): + raise TypeError("Input radian type is not a torch.Tensor. Got {}".format(type(angle))) + if not torch.is_tensor(scale): + raise TypeError("Input scale type is not a torch.Tensor. Got {}".format(type(scale))) + if not (len(center.shape) == 2 and center.shape[1] == 2): + raise ValueError("Input center must be a Bx2 tensor. Got {}".format(center.shape)) + if not len(angle.shape) == 1: + raise ValueError("Input radian must be a B tensor. Got {}".format(angle.shape)) + if not len(scale.shape) == 1: + raise ValueError("Input scale must be a B tensor. Got {}".format(scale.shape)) + if not (center.shape[0] == angle.shape[0] == scale.shape[0]): + raise ValueError( + "Inputs must have same batch size dimension. Got {}".format(center.shape, angle.shape, scale.shape) + ) + # convert radian and apply scale + scaled_rotation: torch.Tensor = angle_to_rotation_matrix(angle) * scale.view(-1, 1, 1) + alpha: torch.Tensor = scaled_rotation[:, 0, 0] + beta: torch.Tensor = scaled_rotation[:, 0, 1] + + # unpack the center to x, y coordinates + x: torch.Tensor = center[..., 0] + y: torch.Tensor = center[..., 1] + + # create output tensor + batch_size: int = center.shape[0] + M: torch.Tensor = torch.zeros(batch_size, 2, 3, device=center.device, dtype=center.dtype) + M[..., 0:2, 0:2] = scaled_rotation + M[..., 0, 2] = (torch.tensor(1.0) - alpha) * x - beta * y + M[..., 1, 2] = beta * x + (torch.tensor(1.0) - alpha) * y + return M + + +def convert_points_to_homogeneous(points: torch.Tensor) -> torch.Tensor: + r"""Function that converts points from Euclidean to homogeneous space. + Examples:: + >>> input = torch.rand(2, 4, 3) # BxNx3 + >>> output = kornia.convert_points_to_homogeneous(input) # BxNx4 + """ + if not isinstance(points, torch.Tensor): + raise TypeError("Input type is not a torch.Tensor. Got {}".format(type(points))) + if len(points.shape) < 2: + raise ValueError("Input must be at least a 2D tensor. Got {}".format(points.shape)) + + return torch.nn.functional.pad(points, [0, 1], "constant", 1.0) + + +def convert_points_from_homogeneous(points: torch.Tensor, eps: float = 1e-8) -> torch.Tensor: + r"""Function that converts points from homogeneous to Euclidean space. + Examples:: + >>> input = torch.rand(2, 4, 3) # BxNx3 + >>> output = kornia.convert_points_from_homogeneous(input) # BxNx2 + """ + if not isinstance(points, torch.Tensor): + raise TypeError("Input type is not a torch.Tensor. Got {}".format(type(points))) + + if len(points.shape) < 2: + raise ValueError("Input must be at least a 2D tensor. Got {}".format(points.shape)) + + # we check for points at infinity + z_vec: torch.Tensor = points[..., -1:] + + # set the results of division by zeror/near-zero to 1.0 + # follow the convention of opencv: + # https://github.com/opencv/opencv/pull/14411/files + mask: torch.Tensor = torch.abs(z_vec) > eps + scale: torch.Tensor = torch.ones_like(z_vec).masked_scatter_( + mask, torch.tensor(1.0).to(points.device) / z_vec[mask] + ) + + return scale * points[..., :-1] + + +def transform_points(trans_01: torch.Tensor, points_1: torch.Tensor) -> torch.Tensor: + r"""Function that applies transformations to a set of points. + Args: + trans_01 (torch.Tensor): tensor for transformations of shape + :math:`(B, D+1, D+1)`. + points_1 (torch.Tensor): tensor of points of shape :math:`(B, N, D)`. + Returns: + torch.Tensor: tensor of N-dimensional points. + Shape: + - Output: :math:`(B, N, D)` + Examples: + >>> points_1 = torch.rand(2, 4, 3) # BxNx3 + >>> trans_01 = torch.eye(4).view(1, 4, 4) # Bx4x4 + >>> points_0 = kornia.transform_points(trans_01, points_1) # BxNx3 + """ + if not torch.is_tensor(trans_01) or not torch.is_tensor(points_1): + raise TypeError("Input type is not a torch.Tensor") + if not trans_01.device == points_1.device: + raise TypeError("Tensor must be in the same device") + if not trans_01.shape[0] == points_1.shape[0] and trans_01.shape[0] != 1: + raise ValueError("Input batch size must be the same for both tensors or 1") + if not trans_01.shape[-1] == (points_1.shape[-1] + 1): + raise ValueError("Last input dimensions must differe by one unit") + # to homogeneous + points_1_h = convert_points_to_homogeneous(points_1) # BxNxD+1 + # transform coordinates + points_0_h = torch.matmul(trans_01.unsqueeze(1), points_1_h.unsqueeze(-1)) + points_0_h = torch.squeeze(points_0_h, dim=-1) + # to euclidean + points_0 = convert_points_from_homogeneous(points_0_h) # BxNxD + return points_0 + + +def multi_linspace(a, b, num, endpoint=True, device="cpu", dtype=torch.float): + """This function is just like np.linspace, but will create linearly + spaced vectors from a start to end vector. + Inputs: + a - Start vector. + b - End vector. + num - Number of samples to generate. Default is 50. Must be above 0. + endpoint - If True, b is the last sample. + Otherwise, it is not included. Default is True. + """ + + return a[..., None] + (b - a)[..., None] / (num - endpoint) * torch.arange(num, device=device, dtype=dtype) + + +def create_batched_meshgrid( + x_min: torch.Tensor, + y_min: torch.Tensor, + x_max: torch.Tensor, + y_max: torch.Tensor, + height: int, + width: int, + device: Optional[torch.device] = torch.device("cpu"), +) -> torch.Tensor: + """Generates a coordinate grid for an image. + When the flag `normalized_coordinates` is set to True, the grid is + normalized to be in the range [-1,1] to be consistent with the pytorch + function grid_sample. + http://pytorch.org/docs/master/nn.html#torch.nn.functional.grid_sample + Args: + height (int): the image height (rows). + width (int): the image width (cols). + normalized_coordinates (Optional[bool]): whether to normalize + coordinates in the range [-1, 1] in order to be consistent with the + PyTorch function grid_sample. + Return: + torch.Tensor: returns a grid tensor with shape :math:`(1, H, W, 2)`. + """ + # generate coordinates + xs = multi_linspace(x_min, x_max, width, device=device, dtype=torch.float) + ys = multi_linspace(y_min, y_max, height, device=device, dtype=torch.float) + + # generate grid by stacking coordinates + bs = x_min.shape[0] + batched_grid_i_list = list() + for i in range(bs): + batched_grid_i_list.append(torch.stack(torch.meshgrid([xs[i], ys[i]])).transpose(1, 2)) # 2xHxW + batched_grid: torch.Tensor = torch.stack(batched_grid_i_list, dim=0) + return batched_grid.permute(0, 2, 3, 1) # BxHxWx2 + + +def homography_warp( + patch_src: torch.Tensor, + centers: torch.Tensor, + dst_homo_src: torch.Tensor, + dsize: Tuple[int, int], + mode: str = "bilinear", + padding_mode: str = "zeros", +) -> torch.Tensor: + r"""Function that warps image patchs or tensors by homographies. + See :class:`~kornia.geometry.warp.HomographyWarper` for details. + Args: + patch_src (torch.Tensor): The image or tensor to warp. Should be from + source of shape :math:`(N, C, H, W)`. + dst_homo_src (torch.Tensor): The homography or stack of homographies + from source to destination of shape + :math:`(N, 3, 3)`. + dsize (Tuple[int, int]): The height and width of the image to warp. + mode (str): interpolation mode to calculate output values + 'bilinear' | 'nearest'. Default: 'bilinear'. + padding_mode (str): padding mode for outside grid values + 'zeros' | 'border' | 'reflection'. Default: 'zeros'. + Return: + torch.Tensor: Patch sampled at locations from source to destination. + Example: + >>> input = torch.rand(1, 3, 32, 32) + >>> homography = torch.eye(3).view(1, 3, 3) + >>> output = kornia.homography_warp(input, homography, (32, 32)) + """ + + out_height, out_width = dsize + image_height, image_width = patch_src.shape[-2:] + x_min = 2.0 * (centers[..., 0] - out_width / 2) / image_width - 1.0 + y_min = 2.0 * (centers[..., 1] - out_height / 2) / image_height - 1.0 + x_max = 2.0 * (centers[..., 0] + out_width / 2) / image_width - 1.0 + y_max = 2.0 * (centers[..., 1] + out_height / 2) / image_height - 1.0 + warper = HomographyWarper(x_min, y_min, x_max, y_max, out_height, out_width, mode, padding_mode) + return warper(patch_src, dst_homo_src) + + +def normal_transform_pixel(height, width): + + tr_mat = torch.Tensor([[1.0, 0.0, -1.0], [0.0, 1.0, -1.0], [0.0, 0.0, 1.0]]) # 1x3x3 + + tr_mat[0, 0] = tr_mat[0, 0] * 2.0 / (width - 1.0) + tr_mat[1, 1] = tr_mat[1, 1] * 2.0 / (height - 1.0) + + tr_mat = tr_mat.unsqueeze(0) + + return tr_mat + + +def src_norm_to_dst_norm( + dst_pix_trans_src_pix: torch.Tensor, dsize_src: Tuple[int, int], dsize_dst: Tuple[int, int] +) -> torch.Tensor: + # source and destination sizes + src_h, src_w = dsize_src + dst_h, dst_w = dsize_dst + # the devices and types + device: torch.device = dst_pix_trans_src_pix.device + dtype: torch.dtype = dst_pix_trans_src_pix.dtype + # compute the transformation pixel/norm for src/dst + src_norm_trans_src_pix: torch.Tensor = normal_transform_pixel(src_h, src_w).to(device, dtype) + src_pix_trans_src_norm = torch.inverse(src_norm_trans_src_pix) + dst_norm_trans_dst_pix: torch.Tensor = normal_transform_pixel(dst_h, dst_w).to(device, dtype) + # compute chain transformations + dst_norm_trans_src_norm: torch.Tensor = dst_norm_trans_dst_pix @ (dst_pix_trans_src_pix @ src_pix_trans_src_norm) + return dst_norm_trans_src_norm + + +def transform_warp_impl( + src: torch.Tensor, + centers: torch.Tensor, + dst_pix_trans_src_pix: torch.Tensor, + dsize_src: Tuple[int, int], + dsize_dst: Tuple[int, int], + grid_mode: str, + padding_mode: str, +) -> torch.Tensor: + """Compute the transform in normalized cooridnates and perform the warping.""" + dst_norm_trans_src_norm: torch.Tensor = src_norm_to_dst_norm(dst_pix_trans_src_pix, dsize_src, dsize_src) + + src_norm_trans_dst_norm = torch.inverse(dst_norm_trans_src_norm) + return homography_warp(src, centers, src_norm_trans_dst_norm, dsize_dst, grid_mode, padding_mode) + + +class HomographyWarper(nn.Module): + r"""Warps image patches or tensors by homographies. + .. math:: + X_{dst} = H_{src}^{\{dst\}} * X_{src} + Args: + height (int): The height of the image to warp. + width (int): The width of the image to warp. + mode (str): interpolation mode to calculate output values + 'bilinear' | 'nearest'. Default: 'bilinear'. + padding_mode (str): padding mode for outside grid values + 'zeros' | 'border' | 'reflection'. Default: 'zeros'. + """ + + def __init__( + self, + x_min: torch.Tensor, + y_min: torch.Tensor, + x_max: torch.Tensor, + y_max: torch.Tensor, + height: int, + width: int, + mode: str = "bilinear", + padding_mode: str = "zeros", + ) -> None: + super(HomographyWarper, self).__init__() + self.width: int = width + self.height: int = height + self.mode: str = mode + self.padding_mode: str = padding_mode + + # create base grid to compute the flow + self.grid: torch.Tensor = create_batched_meshgrid(x_min, y_min, x_max, y_max, height, width) + + def warp_grid(self, dst_homo_src: torch.Tensor) -> torch.Tensor: + r"""Computes the grid to warp the coordinates grid by an homography. + Args: + dst_homo_src (torch.Tensor): Homography or homographies (stacked) to + transform all points in the grid. Shape of the + homography has to be :math:`(N, 3, 3)`. + Returns: + torch.Tensor: the transformed grid of shape :math:`(N, H, W, 2)`. + """ + batch_size: int = dst_homo_src.shape[0] + device: torch.device = dst_homo_src.device + dtype: torch.dtype = dst_homo_src.dtype + # expand grid to match the input batch size + grid: torch.Tensor = self.grid + if len(dst_homo_src.shape) == 3: # local homography case + dst_homo_src = dst_homo_src.view(batch_size, 1, 3, 3) # NxHxWx3x3 + # perform the actual grid transformation, + # the grid is copied to input device and casted to the same type + flow: torch.Tensor = transform_points(dst_homo_src, grid.to(device).to(dtype)) # NxHxWx2 + return flow.view(batch_size, self.height, self.width, 2) # NxHxWx2 + + def forward(self, patch_src: torch.Tensor, dst_homo_src: torch.Tensor) -> torch.Tensor: # type: ignore + r"""Warps an image or tensor from source into reference frame. + Args: + patch_src (torch.Tensor): The image or tensor to warp. + Should be from source. + dst_homo_src (torch.Tensor): The homography or stack of homographies + from source to destination. The homography assumes normalized + coordinates [-1, 1]. + Return: + torch.Tensor: Patch sampled at locations from source to destination. + Shape: + - Input: :math:`(N, C, H, W)` and :math:`(N, 3, 3)` + - Output: :math:`(N, C, H, W)` + Example: + >>> input = torch.rand(1, 3, 32, 32) + >>> homography = torch.eye(3).view(1, 3, 3) + >>> warper = kornia.HomographyWarper(32, 32) + >>> output = warper(input, homography) # NxCxHxW + """ + if not dst_homo_src.device == patch_src.device: + raise TypeError( + "Patch and homography must be on the same device. \ + Got patch.device: {} dst_H_src.device: {}.".format( + patch_src.device, dst_homo_src.device + ) + ) + + return F.grid_sample( + patch_src, + self.warp_grid(dst_homo_src), # type: ignore + mode=self.mode, + padding_mode=self.padding_mode, + align_corners=True, + ) + + +def warp_affine_crop( + src: torch.Tensor, + centers: torch.Tensor, + M: torch.Tensor, + dsize: Tuple[int, int], + flags: str = "bilinear", + padding_mode: str = "zeros", +) -> torch.Tensor: + r"""Applies an affine transformation to a tensor. + + The function warp_affine transforms the source tensor using + the specified matrix: + + .. math:: + \text{dst}(x, y) = \text{src} \left( M_{11} x + M_{12} y + M_{13} , + M_{21} x + M_{22} y + M_{23} \right ) + + Args: + src (torch.Tensor): input tensor of shape :math:`(B, C, H, W)`. + M (torch.Tensor): affine transformation of shape :math:`(B, 2, 3)`. + dsize (Tuple[int, int]): size of the output image (height, width). + mode (str): interpolation mode to calculate output values + 'bilinear' | 'nearest'. Default: 'bilinear'. + padding_mode (str): padding mode for outside grid values + 'zeros' | 'border' | 'reflection'. Default: 'zeros'. + + Returns: + torch.Tensor: the warped tensor. + + Shape: + - Output: :math:`(B, C, H, W)` + + .. note:: + See a working example `here `__. + """ + if not torch.is_tensor(src): + raise TypeError("Input src type is not a torch.Tensor. Got {}".format(type(src))) + + if not torch.is_tensor(M): + raise TypeError("Input M type is not a torch.Tensor. Got {}".format(type(M))) + + if not len(src.shape) == 4: + raise ValueError("Input src must be a BxCxHxW tensor. Got {}".format(src.shape)) + + if not (len(M.shape) == 3 or M.shape[-2:] == (2, 3)): + raise ValueError("Input M must be a Bx2x3 tensor. Got {}".format(src.shape)) + + # we generate a 3x3 transformation matrix from 2x3 affine + M_3x3: torch.Tensor = F.pad(M, [0, 0, 0, 1, 0, 0], mode="constant", value=0) + M_3x3[:, 2, 2] += 1.0 + + # launches the warper + h, w = src.shape[-2:] + return transform_warp_impl(src, centers, M_3x3, (h, w), dsize, flags, padding_mode) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/preprocessing.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/preprocessing.py new file mode 100644 index 000000000..21d42d9b6 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dataset/preprocessing.py @@ -0,0 +1,261 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import torch +import numpy as np +import collections.abc +from torch.utils.data._utils.collate import default_collate +import dill + +container_abcs = collections.abc + + +def restore(data): + """ + In case we dilled some structures to share between multiple process this function will restore them. + If the data input are not bytes we assume it was not dilled in the first place + + :param data: Possibly dilled data structure + :return: Un-dilled data structure + """ + if type(data) is bytes: + return dill.loads(data) + return data + + +def collate(batch): + if len(batch) == 0: + return batch + elem = batch[0] + if elem is None: + return None + elif isinstance(elem, container_abcs.Sequence): + if len(elem) == 4: # We assume those are the maps, map points, headings and patch_size + scene_map, scene_pts, heading_angle, patch_size = zip(*batch) + if heading_angle[0] is None: + heading_angle = None + else: + heading_angle = torch.Tensor(heading_angle) + map = scene_map[0].get_cropped_maps_from_scene_map_batch( + scene_map, scene_pts=torch.Tensor(scene_pts), patch_size=patch_size[0], rotation=heading_angle + ) + return map + transposed = zip(*batch) + return [collate(samples) for samples in transposed] + elif isinstance(elem, container_abcs.Mapping): + # We have to dill the neighbors structures. Otherwise each tensor is put into + # shared memory separately -> slow, file pointer overhead + # we only do this in multiprocessing + neighbor_dict = {key: [d[key] for d in batch] for key in elem} + return dill.dumps(neighbor_dict) if torch.utils.data.get_worker_info() else neighbor_dict + return default_collate(batch) + + +def get_relative_robot_traj(env, state, node_traj, robot_traj, node_type, robot_type): + # TODO: We will have to make this more generic if robot_type != node_type + # Make Robot State relative to node + _, std = env.get_standardize_params(state[robot_type], node_type=robot_type) + std[0:2] = env.attention_radius[(node_type, robot_type)] + robot_traj_st = env.standardize(robot_traj, state[robot_type], node_type=robot_type, mean=node_traj, std=std) + robot_traj_st_t = torch.tensor(robot_traj_st, dtype=torch.float) + + return robot_traj_st_t + + +def get_node_timestep_data( + env, scene, t, node, state, pred_state, edge_types, max_ht, max_ft, hyperparams, scene_graph=None +): + """ + Pre-processes the data for a single batch element: node state over time for a specific time in a specific scene + as well as the neighbour data for it. + + :param env: Environment + :param scene: Scene + :param t: Timestep in scene + :param node: Node + :param state: Specification of the node state + :param pred_state: Specification of the prediction state + :param edge_types: List of all Edge Types for which neighbours are pre-processed + :param max_ht: Maximum history timesteps + :param max_ft: Maximum future timesteps (prediction horizon) + :param hyperparams: Model hyperparameters + :param scene_graph: If scene graph was already computed for this scene and time you can pass it here + :return: Batch Element + """ + + # Node + timestep_range_x = np.array([t - max_ht, t]) + timestep_range_y = np.array([t + 1, t + max_ft]) + + x = node.get(timestep_range_x, state[node.type]) + y = node.get(timestep_range_y, pred_state[node.type]) + first_history_index = (max_ht - node.history_points_at(t)).clip(0) + + _, std = env.get_standardize_params(state[node.type], node.type) + std[0:2] = env.attention_radius[(node.type, node.type)] + rel_state = np.zeros_like(x[0]) + rel_state[0:2] = np.array(x)[-1, 0:2] + x_st = env.standardize(x, state[node.type], node.type, mean=rel_state, std=std) + if list(pred_state[node.type].keys())[0] == "position": # If we predict position we do it relative to current pos + y_st = env.standardize(y, pred_state[node.type], node.type, mean=rel_state[0:2]) + else: + y_st = env.standardize(y, pred_state[node.type], node.type) + + x_t = torch.tensor(x, dtype=torch.float) + y_t = torch.tensor(y, dtype=torch.float) + x_st_t = torch.tensor(x_st, dtype=torch.float) + y_st_t = torch.tensor(y_st, dtype=torch.float) + + # Neighbors + neighbors_data_st = None + neighbors_edge_value = None + if hyperparams["edge_encoding"]: + # Scene Graph + scene_graph = ( + scene.get_scene_graph( + t, env.attention_radius, hyperparams["edge_addition_filter"], hyperparams["edge_removal_filter"] + ) + if scene_graph is None + else scene_graph + ) + + neighbors_data_st = dict() + neighbors_edge_value = dict() + for edge_type in edge_types: + neighbors_data_st[edge_type] = list() + # We get all nodes which are connected to the current node for the current timestep + connected_nodes = scene_graph.get_neighbors(node, edge_type[1]) + + if hyperparams["dynamic_edges"] == "yes": + # We get the edge masks for the current node at the current timestep + edge_masks = torch.tensor(scene_graph.get_edge_scaling(node), dtype=torch.float) + neighbors_edge_value[edge_type] = edge_masks + + for connected_node in connected_nodes: + neighbor_state_np = connected_node.get( + np.array([t - max_ht, t]), state[connected_node.type], padding=0.0 + ) + + # Make State relative to node where neighbor and node have same state + _, std = env.get_standardize_params(state[connected_node.type], node_type=connected_node.type) + std[0:2] = env.attention_radius[edge_type] + equal_dims = np.min((neighbor_state_np.shape[-1], x.shape[-1])) + rel_state = np.zeros_like(neighbor_state_np) + rel_state[:, ..., :equal_dims] = x[-1, ..., :equal_dims] + neighbor_state_np_st = env.standardize( + neighbor_state_np, + state[connected_node.type], + node_type=connected_node.type, + mean=rel_state, + std=std, + ) + + neighbor_state = torch.tensor(neighbor_state_np_st, dtype=torch.float) + neighbors_data_st[edge_type].append(neighbor_state) + + # Robot + robot_traj_st_t = None + if hyperparams["incl_robot_node"]: + timestep_range_r = np.array([t, t + max_ft]) + if scene.non_aug_scene is not None: + robot = scene.get_node_by_id(scene.non_aug_scene.robot.id) + else: + robot = scene.robot + robot_type = robot.type + robot_traj = robot.get(timestep_range_r, state[robot_type], padding=0.0) + node_state = np.zeros_like(robot_traj[0]) + node_state[: x.shape[1]] = x[-1] + robot_traj_st_t = get_relative_robot_traj(env, state, node_state, robot_traj, node.type, robot_type) + + # Map + map_tuple = None + if hyperparams["use_map_encoding"]: + if node.type in hyperparams["map_encoder"]: + if node.non_aug_node is not None: + x = node.non_aug_node.get(np.array([t]), state[node.type]) + me_hyp = hyperparams["map_encoder"][node.type] + if "heading_state_index" in me_hyp: + heading_state_index = me_hyp["heading_state_index"] + # We have to rotate the map in the opposit direction of the agent to match them + if type(heading_state_index) is list: # infer from velocity or heading vector + heading_angle = ( + -np.arctan2(x[-1, heading_state_index[1]], x[-1, heading_state_index[0]]) * 180 / np.pi + ) + else: + heading_angle = -x[-1, heading_state_index] * 180 / np.pi + else: + heading_angle = None + + scene_map = scene.map[node.type] + map_point = x[-1, :2] + + patch_size = hyperparams["map_encoder"][node.type]["patch_size"] + map_tuple = (scene_map, map_point, heading_angle, patch_size) + + return ( + first_history_index, + x_t, + y_t, + x_st_t, + y_st_t, + neighbors_data_st, + neighbors_edge_value, + robot_traj_st_t, + map_tuple, + ) + + +def get_timesteps_data( + env, scene, t, node_type, state, pred_state, edge_types, min_ht, max_ht, min_ft, max_ft, hyperparams +): + """ + Puts together the inputs for ALL nodes in a given scene and timestep in it. + + :param env: Environment + :param scene: Scene + :param t: Timestep in scene + :param node_type: Node Type of nodes for which the data shall be pre-processed + :param state: Specification of the node state + :param pred_state: Specification of the prediction state + :param edge_types: List of all Edge Types for which neighbors are pre-processed + :param max_ht: Maximum history timesteps + :param max_ft: Maximum future timesteps (prediction horizon) + :param hyperparams: Model hyperparameters + :return: + """ + nodes_per_ts = scene.present_nodes( + t, + type=node_type, + min_history_timesteps=min_ht, + min_future_timesteps=max_ft, + return_robot=not hyperparams["incl_robot_node"], + ) + batch = list() + nodes = list() + out_timesteps = list() + for timestep in nodes_per_ts.keys(): + scene_graph = scene.get_scene_graph( + timestep, env.attention_radius, hyperparams["edge_addition_filter"], hyperparams["edge_removal_filter"] + ) + present_nodes = nodes_per_ts[timestep] + for node in present_nodes: + nodes.append(node) + out_timesteps.append(timestep) + batch.append( + get_node_timestep_data( + env, + scene, + timestep, + node, + state, + pred_state, + edge_types, + max_ht, + max_ft, + hyperparams, + scene_graph=scene_graph, + ) + ) + if len(out_timesteps) == 0: + return None + return collate(batch), nodes, out_timesteps diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/__init__.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/__init__.py new file mode 100644 index 000000000..968fc5653 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/__init__.py @@ -0,0 +1,7 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +from model.dynamics.dynamic import Dynamic +from model.dynamics.single_integrator import SingleIntegrator +from model.dynamics.unicycle import Unicycle +from model.dynamics.linear import Linear diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/dynamic.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/dynamic.py new file mode 100644 index 000000000..4fd3b6b0f --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/dynamic.py @@ -0,0 +1,33 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 + + +class Dynamic(object): + def __init__(self, dt, dyn_limits, device, model_registrar, xz_size, node_type): + self.dt = dt + self.device = device + self.dyn_limits = dyn_limits + self.initial_conditions = None + self.model_registrar = model_registrar + self.node_type = node_type + self.init_constants() + self.create_graph(xz_size) + + def set_initial_condition(self, init_con): + self.initial_conditions = init_con + + def init_constants(self): + pass + + def create_graph(self, xz_size): + pass + + def integrate_samples(self, s, x): + raise NotImplementedError + + def integrate_distribution(self, dist, x): + raise NotImplementedError + + def create_graph(self, xz_size): + pass diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/linear.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/linear.py new file mode 100644 index 000000000..228df8008 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/linear.py @@ -0,0 +1,15 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +from ..dynamics import Dynamic + + +class Linear(Dynamic): + def init_constants(self): + pass + + def integrate_samples(self, v, x): + return v + + def integrate_distribution(self, v_dist, x): + return v_dist diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/single_integrator.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/single_integrator.py new file mode 100644 index 000000000..cb2cfeb2e --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/single_integrator.py @@ -0,0 +1,67 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import torch +from model.dynamics import Dynamic +from utils import block_diag +from model.components import GMM2D + + +class SingleIntegrator(Dynamic): + def init_constants(self): + self.F = torch.eye(4, device=self.device, dtype=torch.float32) + self.F[0:2, 2:] = torch.eye(2, device=self.device, dtype=torch.float32) * self.dt + self.F_t = self.F.transpose(-2, -1) + + def integrate_samples(self, v, x=None): + """ + Integrates deterministic samples of velocity. + + :param v: Velocity samples + :param x: Not used for SI. + :return: Position samples + """ + p_0 = self.initial_conditions["pos"].unsqueeze(1) + return torch.cumsum(v, dim=2) * self.dt + p_0 + + def integrate_distribution(self, v_dist, x=None): + r""" + Integrates the GMM velocity distribution to a distribution over position. + The Kalman Equations are used. + + .. math:: \mu_{t+1} =\textbf{F} \mu_{t} + + .. math:: \mathbf{\Sigma}_{t+1}={\textbf {F}} \mathbf{\Sigma}_{t} {\textbf {F}}^{T} + + .. math:: + \textbf{F} = \left[ + \begin{array}{cccc} + \sigma_x^2 & \rho_p \sigma_x \sigma_y & 0 & 0 \\ + \rho_p \sigma_x \sigma_y & \sigma_y^2 & 0 & 0 \\ + 0 & 0 & \sigma_{v_x}^2 & \rho_v \sigma_{v_x} \sigma_{v_y} \\ + 0 & 0 & \rho_v \sigma_{v_x} \sigma_{v_y} & \sigma_{v_y}^2 \\ + \end{array} + \right]_{t} + + :param v_dist: Joint GMM Distribution over velocity in x and y direction. + :param x: Not used for SI. + :return: Joint GMM Distribution over position in x and y direction. + """ + p_0 = self.initial_conditions["pos"].unsqueeze(1) + ph = v_dist.mus.shape[-3] + sample_batch_dim = list(v_dist.mus.shape[0:2]) + pos_dist_sigma_matrix_list = [] + + pos_mus = p_0[:, None] + torch.cumsum(v_dist.mus, dim=2) * self.dt + + vel_dist_sigma_matrix = v_dist.get_covariance_matrix() + pos_dist_sigma_matrix_t = torch.zeros(sample_batch_dim + [v_dist.components, 2, 2], device=self.device) + + for t in range(ph): + vel_sigma_matrix_t = vel_dist_sigma_matrix[:, :, t] + full_sigma_matrix_t = block_diag([pos_dist_sigma_matrix_t, vel_sigma_matrix_t]) + pos_dist_sigma_matrix_t = self.F[..., :2, :].matmul(full_sigma_matrix_t.matmul(self.F_t)[..., :2]) + pos_dist_sigma_matrix_list.append(pos_dist_sigma_matrix_t) + + pos_dist_sigma_matrix = torch.stack(pos_dist_sigma_matrix_list, dim=2) + return GMM2D.from_log_pis_mus_cov_mats(v_dist.log_pis, pos_mus, pos_dist_sigma_matrix) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/unicycle.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/unicycle.py new file mode 100644 index 000000000..b46820063 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/dynamics/unicycle.py @@ -0,0 +1,239 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import torch +import torch.nn as nn +from model.dynamics import Dynamic +from utils import block_diag +from model.components import GMM2D + + +class Unicycle(Dynamic): + def init_constants(self): + self.F_s = torch.eye(4, device=self.device, dtype=torch.float32) + self.F_s[0:2, 2:] = torch.eye(2, device=self.device, dtype=torch.float32) * self.dt + self.F_s_t = self.F_s.transpose(-2, -1) + + def create_graph(self, xz_size): + model_if_absent = nn.Linear(xz_size + 1, 1) + self.p0_model = self.model_registrar.get_model(f"{self.node_type}/unicycle_initializer", model_if_absent) + + def dynamic(self, x, u): + r""" + TODO: Boris: Add docstring + :param x: + :param u: + :return: + """ + x_p = x[0] + y_p = x[1] + phi = x[2] + v = x[3] + dphi = u[0] + a = u[1] + + mask = torch.abs(dphi) <= 1e-2 + dphi = ~mask * dphi + (mask) * 1 + + phi_p_omega_dt = phi + dphi * self.dt + dsin_domega = (torch.sin(phi_p_omega_dt) - torch.sin(phi)) / dphi + dcos_domega = (torch.cos(phi_p_omega_dt) - torch.cos(phi)) / dphi + + d1 = torch.stack( + [ + (x_p + (a / dphi) * dcos_domega + v * dsin_domega + (a / dphi) * torch.sin(phi_p_omega_dt) * self.dt), + (y_p - v * dcos_domega + (a / dphi) * dsin_domega - (a / dphi) * torch.cos(phi_p_omega_dt) * self.dt), + phi + dphi * self.dt, + v + a * self.dt, + ], + dim=0, + ) + d2 = torch.stack( + [ + x_p + v * torch.cos(phi) * self.dt + (a / 2) * torch.cos(phi) * self.dt**2, + y_p + v * torch.sin(phi) * self.dt + (a / 2) * torch.sin(phi) * self.dt**2, + phi * torch.ones_like(a), + v + a * self.dt, + ], + dim=0, + ) + return torch.where(~mask, d1, d2) + + def integrate_samples(self, control_samples, x=None): + r""" + TODO: Boris: Add docstring + :param x: + :param u: + :return: + """ + ph = control_samples.shape[-2] + p_0 = self.initial_conditions["pos"].unsqueeze(1) + v_0 = self.initial_conditions["vel"].unsqueeze(1) + + # In case the input is batched because of the robot in online use we repeat this to match the batch size of x. + if p_0.size()[0] != x.size()[0]: + p_0 = p_0.repeat(x.size()[0], 1, 1) + v_0 = v_0.repeat(x.size()[0], 1, 1) + + phi_0 = torch.atan2(v_0[..., 1], v_0[..., 0]) + + phi_0 = phi_0 + torch.tanh(self.p0_model(torch.cat((x, phi_0), dim=-1))) + + u = torch.stack([control_samples[..., 0], control_samples[..., 1]], dim=0) + x = torch.stack([p_0[..., 0], p_0[..., 1], phi_0, torch.norm(v_0, dim=-1)], dim=0).squeeze(dim=-1) + + mus_list = [] + for t in range(ph): + x = self.dynamic(x, u[..., t]) + mus_list.append(torch.stack((x[0], x[1]), dim=-1)) + + pos_mus = torch.stack(mus_list, dim=2) + return pos_mus + + def compute_control_jacobian(self, sample_batch_dim, components, x, u): + r""" + TODO: Boris: Add docstring + :param x: + :param u: + :return: + """ + F = torch.zeros(sample_batch_dim + [components, 4, 2], device=self.device, dtype=torch.float32) + + phi = x[2] + v = x[3] + dphi = u[0] + a = u[1] + + mask = torch.abs(dphi) <= 1e-2 + dphi = ~mask * dphi + (mask) * 1 + + phi_p_omega_dt = phi + dphi * self.dt + dsin_domega = (torch.sin(phi_p_omega_dt) - torch.sin(phi)) / dphi + dcos_domega = (torch.cos(phi_p_omega_dt) - torch.cos(phi)) / dphi + + F[..., 0, 0] = ( + (v / dphi) * torch.cos(phi_p_omega_dt) * self.dt + - (v / dphi) * dsin_domega + - (2 * a / dphi**2) * torch.sin(phi_p_omega_dt) * self.dt + - (2 * a / dphi**2) * dcos_domega + + (a / dphi) * torch.cos(phi_p_omega_dt) * self.dt**2 + ) + F[..., 0, 1] = (1 / dphi) * dcos_domega + (1 / dphi) * torch.sin(phi_p_omega_dt) * self.dt + + F[..., 1, 0] = ( + (v / dphi) * dcos_domega + - (2 * a / dphi**2) * dsin_domega + + (2 * a / dphi**2) * torch.cos(phi_p_omega_dt) * self.dt + + (v / dphi) * torch.sin(phi_p_omega_dt) * self.dt + + (a / dphi) * torch.sin(phi_p_omega_dt) * self.dt**2 + ) + F[..., 1, 1] = (1 / dphi) * dsin_domega - (1 / dphi) * torch.cos(phi_p_omega_dt) * self.dt + + F[..., 2, 0] = self.dt + + F[..., 3, 1] = self.dt + + F_sm = torch.zeros(sample_batch_dim + [components, 4, 2], device=self.device, dtype=torch.float32) + + F_sm[..., 0, 1] = (torch.cos(phi) * self.dt**2) / 2 + + F_sm[..., 1, 1] = (torch.sin(phi) * self.dt**2) / 2 + + F_sm[..., 3, 1] = self.dt + + return torch.where(~mask.unsqueeze(-1).unsqueeze(-1), F, F_sm) + + def compute_jacobian(self, sample_batch_dim, components, x, u): + r""" + TODO: Boris: Add docstring + :param x: + :param u: + :return: + """ + one = torch.tensor(1) + F = torch.zeros(sample_batch_dim + [components, 4, 4], device=self.device, dtype=torch.float32) + + phi = x[2] + v = x[3] + dphi = u[0] + a = u[1] + + mask = torch.abs(dphi) <= 1e-2 + dphi = ~mask * dphi + (mask) * 1 + + phi_p_omega_dt = phi + dphi * self.dt + dsin_domega = (torch.sin(phi_p_omega_dt) - torch.sin(phi)) / dphi + dcos_domega = (torch.cos(phi_p_omega_dt) - torch.cos(phi)) / dphi + + F[..., 0, 0] = one + F[..., 1, 1] = one + F[..., 2, 2] = one + F[..., 3, 3] = one + + F[..., 0, 2] = v * dcos_domega - (a / dphi) * dsin_domega + (a / dphi) * torch.cos(phi_p_omega_dt) * self.dt + F[..., 0, 3] = dsin_domega + + F[..., 1, 2] = v * dsin_domega + (a / dphi) * dcos_domega + (a / dphi) * torch.sin(phi_p_omega_dt) * self.dt + F[..., 1, 3] = -dcos_domega + + F_sm = torch.zeros(sample_batch_dim + [components, 4, 4], device=self.device, dtype=torch.float32) + + F_sm[..., 0, 0] = one + F_sm[..., 1, 1] = one + F_sm[..., 2, 2] = one + F_sm[..., 3, 3] = one + + F_sm[..., 0, 2] = -v * torch.sin(phi) * self.dt - (a * torch.sin(phi) * self.dt**2) / 2 + F_sm[..., 0, 3] = torch.cos(phi) * self.dt + + F_sm[..., 1, 2] = v * torch.cos(phi) * self.dt + (a * torch.cos(phi) * self.dt**2) / 2 + F_sm[..., 1, 3] = torch.sin(phi) * self.dt + + return torch.where(~mask.unsqueeze(-1).unsqueeze(-1), F, F_sm) + + def integrate_distribution(self, control_dist_dphi_a, x): + r""" + TODO: Boris: Add docstring + :param x: + :param u: + :return: + """ + sample_batch_dim = list(control_dist_dphi_a.mus.shape[0:2]) + ph = control_dist_dphi_a.mus.shape[-3] + p_0 = self.initial_conditions["pos"].unsqueeze(1) + v_0 = self.initial_conditions["vel"].unsqueeze(1) + + # In case the input is batched because of the robot in online use we repeat this to match the batch size of x. + if p_0.size()[0] != x.size()[0]: + p_0 = p_0.repeat(x.size()[0], 1, 1) + v_0 = v_0.repeat(x.size()[0], 1, 1) + + phi_0 = torch.atan2(v_0[..., 1], v_0[..., 0]) + + phi_0 = phi_0 + torch.tanh(self.p0_model(torch.cat((x, phi_0), dim=-1))) + + dist_sigma_matrix = control_dist_dphi_a.get_covariance_matrix() + pos_dist_sigma_matrix_t = torch.zeros( + sample_batch_dim + [control_dist_dphi_a.components, 4, 4], device=self.device + ) + + u = torch.stack([control_dist_dphi_a.mus[..., 0], control_dist_dphi_a.mus[..., 1]], dim=0) + x = torch.stack([p_0[..., 0], p_0[..., 1], phi_0, torch.norm(v_0, dim=-1)], dim=0) + + pos_dist_sigma_matrix_list = [] + mus_list = [] + for t in range(ph): + F_t = self.compute_jacobian(sample_batch_dim, control_dist_dphi_a.components, x, u[:, :, :, t]) + G_t = self.compute_control_jacobian(sample_batch_dim, control_dist_dphi_a.components, x, u[:, :, :, t]) + dist_sigma_matrix_t = dist_sigma_matrix[:, :, t] + pos_dist_sigma_matrix_t = F_t.matmul(pos_dist_sigma_matrix_t.matmul(F_t.transpose(-2, -1))) + G_t.matmul( + dist_sigma_matrix_t.matmul(G_t.transpose(-2, -1)) + ) + pos_dist_sigma_matrix_list.append(pos_dist_sigma_matrix_t[..., :2, :2]) + + x = self.dynamic(x, u[:, :, :, t]) + mus_list.append(torch.stack((x[0], x[1]), dim=-1)) + + pos_dist_sigma_matrix = torch.stack(pos_dist_sigma_matrix_list, dim=2) + pos_mus = torch.stack(mus_list, dim=2) + return GMM2D.from_log_pis_mus_cov_mats(control_dist_dphi_a.log_pis, pos_mus, pos_dist_sigma_matrix) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/mgcvae.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/mgcvae.py new file mode 100644 index 000000000..c05e86229 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/mgcvae.py @@ -0,0 +1,1240 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import warnings +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from model.components import * +from model.model_utils import * +import model.dynamics as dynamic_module +from environment.scene_graph import DirectedEdge + + +class MultimodalGenerativeCVAE(torch.nn.Module): + def __init__(self, env, node_type, model_registrar, hyperparams, device, edge_types, log_writer=None): + super().__init__() + self.hyperparams = hyperparams + self.env = env + self.node_type = node_type + self.model_registrar = model_registrar + self.log_writer = log_writer + self.device = device + self.edge_types = [edge_type for edge_type in edge_types if edge_type[0] is node_type] + self.curr_iter = 0 + + self.node_modules = dict() + self.node_modules = torch.nn.ModuleDict() + + self.min_hl = self.hyperparams["minimum_history_length"] + self.max_hl = self.hyperparams["maximum_history_length"] + self.ph = self.hyperparams["prediction_horizon"] + self.state = self.hyperparams["state"] + self.pred_state = self.hyperparams["pred_state"][node_type] + self.state_length = int(np.sum([len(entity_dims) for entity_dims in self.state[node_type].values()])) + if self.hyperparams["incl_robot_node"]: + self.robot_state_length = int( + np.sum([len(entity_dims) for entity_dims in self.state[env.robot_type].values()]) + ) + self.pred_state_length = int(np.sum([len(entity_dims) for entity_dims in self.pred_state.values()])) + + edge_types_str = [DirectedEdge.get_str_from_types(*edge_type) for edge_type in self.edge_types] + self.create_graphical_model(edge_types_str) + + dynamic_class = getattr(dynamic_module, hyperparams["dynamic"][self.node_type]["name"]) + dyn_limits = hyperparams["dynamic"][self.node_type]["limits"] + self.dynamic = dynamic_class( + self.env.scenes[0].dt, dyn_limits, device, self.model_registrar, self.x_size, self.node_type + ) + + def eval(self): + super().eval() + for key in self.node_modules.keys(): + self.node_modules[key].eval() + + def set_curr_iter(self, curr_iter): + self.curr_iter = curr_iter + + def add_submodule(self, name, model_if_absent): + self.node_modules[name] = self.model_registrar.get_model(name, model_if_absent) + + def clear_submodules(self): + self.node_modules.clear() + + def create_node_models(self): + ############################ + # Node History Encoder # + ############################ + self.add_submodule( + self.node_type + "/node_history_encoder", + model_if_absent=nn.LSTM( + input_size=self.state_length, hidden_size=self.hyperparams["enc_rnn_dim_history"], batch_first=True + ), + ) + + ########################### + # Node Future Encoder # + ########################### + # We'll create this here, but then later check if in training mode. + # Based on that, we'll factor this into the computation graph (or not). + self.add_submodule( + self.node_type + "/node_future_encoder", + model_if_absent=nn.LSTM( + input_size=self.pred_state_length, + hidden_size=self.hyperparams["enc_rnn_dim_future"], + bidirectional=True, + batch_first=True, + ), + ) + # These are related to how you initialize states for the node future encoder. + self.add_submodule( + self.node_type + "/node_future_encoder/initial_h", + model_if_absent=nn.Linear(self.state_length, self.hyperparams["enc_rnn_dim_future"]), + ) + self.add_submodule( + self.node_type + "/node_future_encoder/initial_c", + model_if_absent=nn.Linear(self.state_length, self.hyperparams["enc_rnn_dim_future"]), + ) + + ############################ + # Robot Future Encoder # + ############################ + # We'll create this here, but then later check if we're next to the robot. + # Based on that, we'll factor this into the computation graph (or not). + if self.hyperparams["incl_robot_node"]: + self.add_submodule( + "robot_future_encoder", + model_if_absent=nn.LSTM( + input_size=self.robot_state_length, + hidden_size=self.hyperparams["enc_rnn_dim_future"], + bidirectional=True, + batch_first=True, + ), + ) + # These are related to how you initialize states for the robot future encoder. + self.add_submodule( + "robot_future_encoder/initial_h", + model_if_absent=nn.Linear(self.robot_state_length, self.hyperparams["enc_rnn_dim_future"]), + ) + self.add_submodule( + "robot_future_encoder/initial_c", + model_if_absent=nn.Linear(self.robot_state_length, self.hyperparams["enc_rnn_dim_future"]), + ) + + if self.hyperparams["edge_encoding"]: + ############################## + # Edge Influence Encoder # + ############################## + # NOTE: The edge influence encoding happens during calls + # to forward or incremental_forward, so we don't create + # a model for it here for the max and sum variants. + if self.hyperparams["edge_influence_combine_method"] == "bi-rnn": + self.add_submodule( + self.node_type + "/edge_influence_encoder", + model_if_absent=nn.LSTM( + input_size=self.hyperparams["enc_rnn_dim_edge"], + hidden_size=self.hyperparams["enc_rnn_dim_edge_influence"], + bidirectional=True, + batch_first=True, + ), + ) + + # Four times because we're trying to mimic a bi-directional + # LSTM's output (which, here, is c and h from both ends). + self.eie_output_dims = 4 * self.hyperparams["enc_rnn_dim_edge_influence"] + + elif self.hyperparams["edge_influence_combine_method"] == "attention": + # Chose additive attention because of https://arxiv.org/pdf/1703.03906.pdf + # We calculate an attention context vector using the encoded edges as the "encoder" + # (that we attend _over_) + # and the node history encoder representation as the "decoder state" (that we attend _on_). + self.add_submodule( + self.node_type + "/edge_influence_encoder", + model_if_absent=AdditiveAttention( + encoder_hidden_state_dim=self.hyperparams["enc_rnn_dim_edge_influence"], + decoder_hidden_state_dim=self.hyperparams["enc_rnn_dim_history"], + ), + ) + + self.eie_output_dims = self.hyperparams["enc_rnn_dim_edge_influence"] + + ################### + # Map Encoder # + ################### + if self.hyperparams["use_map_encoding"]: + if self.node_type in self.hyperparams["map_encoder"]: + me_params = self.hyperparams["map_encoder"][self.node_type] + self.add_submodule( + self.node_type + "/map_encoder", + model_if_absent=CNNMapEncoder( + me_params["map_channels"], + me_params["hidden_channels"], + me_params["output_size"], + me_params["masks"], + me_params["strides"], + me_params["patch_size"], + ), + ) + + ################################ + # Discrete Latent Variable # + ################################ + self.latent = DiscreteLatent(self.hyperparams, self.device) + + ###################################################################### + # Various Fully-Connected Layers from Encoder to Latent Variable # + ###################################################################### + # Node History Encoder + x_size = self.hyperparams["enc_rnn_dim_history"] + if self.hyperparams["edge_encoding"]: + # Edge Encoder + x_size += self.eie_output_dims + if self.hyperparams["incl_robot_node"]: + # Future Conditional Encoder + x_size += 4 * self.hyperparams["enc_rnn_dim_future"] + if self.hyperparams["use_map_encoding"] and self.node_type in self.hyperparams["map_encoder"]: + # Map Encoder + x_size += self.hyperparams["map_encoder"][self.node_type]["output_size"] + + z_size = self.hyperparams["N"] * self.hyperparams["K"] + + if self.hyperparams["p_z_x_MLP_dims"] is not None: + self.add_submodule( + self.node_type + "/p_z_x", model_if_absent=nn.Linear(x_size, self.hyperparams["p_z_x_MLP_dims"]) + ) + hx_size = self.hyperparams["p_z_x_MLP_dims"] + else: + hx_size = x_size + + self.add_submodule(self.node_type + "/hx_to_z", model_if_absent=nn.Linear(hx_size, self.latent.z_dim)) + + if self.hyperparams["q_z_xy_MLP_dims"] is not None: + self.add_submodule( + self.node_type + "/q_z_xy", + # Node Future Encoder + model_if_absent=nn.Linear( + x_size + 4 * self.hyperparams["enc_rnn_dim_future"], self.hyperparams["q_z_xy_MLP_dims"] + ), + ) + hxy_size = self.hyperparams["q_z_xy_MLP_dims"] + else: + # Node Future Encoder + hxy_size = x_size + 4 * self.hyperparams["enc_rnn_dim_future"] + + self.add_submodule(self.node_type + "/hxy_to_z", model_if_absent=nn.Linear(hxy_size, self.latent.z_dim)) + + #################### + # Decoder LSTM # + #################### + if self.hyperparams["incl_robot_node"]: + decoder_input_dims = self.pred_state_length + self.robot_state_length + z_size + x_size + else: + decoder_input_dims = self.pred_state_length + z_size + x_size + + self.add_submodule( + self.node_type + "/decoder/state_action", + model_if_absent=nn.Sequential(nn.Linear(self.state_length, self.pred_state_length)), + ) + + self.add_submodule( + self.node_type + "/decoder/rnn_cell", + model_if_absent=nn.GRUCell(decoder_input_dims, self.hyperparams["dec_rnn_dim"]), + ) + self.add_submodule( + self.node_type + "/decoder/initial_h", + model_if_absent=nn.Linear(z_size + x_size, self.hyperparams["dec_rnn_dim"]), + ) + + ################### + # Decoder GMM # + ################### + self.add_submodule( + self.node_type + "/decoder/proj_to_GMM_log_pis", + model_if_absent=nn.Linear(self.hyperparams["dec_rnn_dim"], self.hyperparams["GMM_components"]), + ) + self.add_submodule( + self.node_type + "/decoder/proj_to_GMM_mus", + model_if_absent=nn.Linear( + self.hyperparams["dec_rnn_dim"], self.hyperparams["GMM_components"] * self.pred_state_length + ), + ) + self.add_submodule( + self.node_type + "/decoder/proj_to_GMM_log_sigmas", + model_if_absent=nn.Linear( + self.hyperparams["dec_rnn_dim"], self.hyperparams["GMM_components"] * self.pred_state_length + ), + ) + self.add_submodule( + self.node_type + "/decoder/proj_to_GMM_corrs", + model_if_absent=nn.Linear(self.hyperparams["dec_rnn_dim"], self.hyperparams["GMM_components"]), + ) + + self.x_size = x_size + self.z_size = z_size + + def create_edge_models(self, edge_types): + for edge_type in edge_types: + neighbor_state_length = int( + np.sum([len(entity_dims) for entity_dims in self.state[edge_type.split("->")[1]].values()]) + ) + if self.hyperparams["edge_state_combine_method"] == "pointnet": + self.add_submodule( + edge_type + "/pointnet_encoder", + model_if_absent=nn.Sequential( + nn.Linear(self.state_length, 2 * self.state_length), + nn.ReLU(), + nn.Linear(2 * self.state_length, 2 * self.state_length), + nn.ReLU(), + ), + ) + + edge_encoder_input_size = 2 * self.state_length + self.state_length + + elif self.hyperparams["edge_state_combine_method"] == "attention": + self.add_submodule( + self.node_type + "/edge_attention_combine", + model_if_absent=TemporallyBatchedAdditiveAttention( + encoder_hidden_state_dim=self.state_length, decoder_hidden_state_dim=self.state_length + ), + ) + edge_encoder_input_size = self.state_length + neighbor_state_length + + else: + edge_encoder_input_size = self.state_length + neighbor_state_length + + self.add_submodule( + edge_type + "/edge_encoder", + model_if_absent=nn.LSTM( + input_size=edge_encoder_input_size, + hidden_size=self.hyperparams["enc_rnn_dim_edge"], + batch_first=True, + ), + ) + + def create_graphical_model(self, edge_types): + """ + Creates or queries all trainable components. + + :param edge_types: List containing strings for all possible edge types for the node type. + :return: None + """ + self.clear_submodules() + + ############################ + # Everything but Edges # + ############################ + self.create_node_models() + + ##################### + # Edge Encoders # + ##################### + if self.hyperparams["edge_encoding"]: + self.create_edge_models(edge_types) + + for name, module in self.node_modules.items(): + module.to(self.device) + + def create_new_scheduler(self, name, annealer, annealer_kws, creation_condition=True): + value_scheduler = None + rsetattr(self, name + "_scheduler", value_scheduler) + if creation_condition: + annealer_kws["device"] = self.device + value_annealer = annealer(annealer_kws) + rsetattr(self, name + "_annealer", value_annealer) + + # This is the value that we'll update on each call of + # step_annealers(). + rsetattr(self, name, value_annealer(0).clone().detach()) + dummy_optimizer = optim.Optimizer([rgetattr(self, name)], {"lr": value_annealer(0).clone().detach()}) + rsetattr(self, name + "_optimizer", dummy_optimizer) + + value_scheduler = CustomLR(dummy_optimizer, value_annealer) + rsetattr(self, name + "_scheduler", value_scheduler) + + self.schedulers.append(value_scheduler) + self.annealed_vars.append(name) + + def set_annealing_params(self): + self.schedulers = list() + self.annealed_vars = list() + + self.create_new_scheduler( + name="kl_weight", + annealer=sigmoid_anneal, + annealer_kws={ + "start": self.hyperparams["kl_weight_start"], + "finish": self.hyperparams["kl_weight"], + "center_step": self.hyperparams["kl_crossover"], + "steps_lo_to_hi": self.hyperparams["kl_crossover"] / self.hyperparams["kl_sigmoid_divisor"], + }, + ) + + self.create_new_scheduler( + name="latent.temp", + annealer=exp_anneal, + annealer_kws={ + "start": self.hyperparams["tau_init"], + "finish": self.hyperparams["tau_final"], + "rate": self.hyperparams["tau_decay_rate"], + }, + ) + + self.create_new_scheduler( + name="latent.z_logit_clip", + annealer=sigmoid_anneal, + annealer_kws={ + "start": self.hyperparams["z_logit_clip_start"], + "finish": self.hyperparams["z_logit_clip_final"], + "center_step": self.hyperparams["z_logit_clip_crossover"], + "steps_lo_to_hi": self.hyperparams["z_logit_clip_crossover"] / self.hyperparams["z_logit_clip_divisor"], + }, + creation_condition=self.hyperparams["use_z_logit_clipping"], + ) + + def step_annealers(self): + # This should manage all of the step-wise changed + # parameters automatically. + for idx, annealed_var in enumerate(self.annealed_vars): + if rgetattr(self, annealed_var + "_scheduler") is not None: + # First we step the scheduler. + with warnings.catch_warnings(): # We use a dummy optimizer: Warning because no .step() was called on it + warnings.simplefilter("ignore") + rgetattr(self, annealed_var + "_scheduler").step() + + # Then we set the annealed vars' value. + rsetattr(self, annealed_var, rgetattr(self, annealed_var + "_optimizer").param_groups[0]["lr"]) + + self.summarize_annealers() + + def summarize_annealers(self): + if self.log_writer is not None: + for annealed_var in self.annealed_vars: + if rgetattr(self, annealed_var) is not None: + self.log_writer.add_scalar( + "%s/%s" % (str(self.node_type), annealed_var.replace(".", "/")), + rgetattr(self, annealed_var), + self.curr_iter, + ) + + def obtain_encoded_tensors( + self, + mode, + inputs, + inputs_st, + packed_inputs_st, + labels, + labels_st, + first_history_indices, + neighbors, + neighbors_edge_value, + robot, + map, + ) -> (torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor): + """ + Encodes input and output tensors for node and robot. + + :param mode: Mode in which the model is operated. E.g. Train, Eval, Predict. + :param inputs: Input tensor including the state for each agent over time [bs, t, state]. + :param inputs_st: Standardized input tensor. + :param labels: Label tensor including the label output for each agent over time [bs, t, pred_state]. + :param labels_st: Standardized label tensor. + :param first_history_indices: First timestep (index) in scene for which data is available for a node [bs] + :param neighbors: Preprocessed dict (indexed by edge type) of list of neighbor states over time. + [[bs, t, neighbor state]] + :param neighbors_edge_value: Preprocessed edge values for all neighbor nodes [[N]] + :param robot: Standardized robot state over time. [bs, t, robot_state] + :param map: Tensor of Map information. [bs, channels, x, y] + :return: tuple(x, x_nr_t, y_e, y_r, y, n_s_t0) + WHERE + - x: Encoded input / condition tensor to the CVAE x_e. + - x_r_t: Robot state (if robot is in scene). + - y_e: Encoded label / future of the node. + - y_r: Encoded future of the robot. + - y: Label / future of the node. + - n_s_t0: Standardized current state of the node. + """ + + x, x_r_t, y_e, y_r, y = None, None, None, None, None + initial_dynamics = dict() + + batch_size = inputs.shape[0] + + ######################################### + # Provide basic information to encoders # + ######################################### + node_history = inputs + node_present_state = inputs[:, -1] + node_pos = inputs[:, -1, 0:2] + node_vel = inputs[:, -1, 2:4] + + node_history_st = packed_inputs_st + node_present_state_st = inputs_st[:, -1] + node_pos_st = inputs_st[:, -1, 0:2] + node_vel_st = inputs_st[:, -1, 2:4] + + n_s_t0 = node_present_state_st + + initial_dynamics["pos"] = node_pos + initial_dynamics["vel"] = node_vel + + self.dynamic.set_initial_condition(initial_dynamics) + + if self.hyperparams["incl_robot_node"]: + x_r_t, y_r = robot[..., 0, :], robot[..., 1:, :] + + ################## + # Encode History # + ################## + node_history_encoded = self.encode_node_history(mode, node_history_st, first_history_indices) + + return node_history_encoded + + ################## + # Encode Present # + ################## + node_present = node_present_state_st # [bs, state_dim] + + ################## + # Encode Future # + ################## + if mode != ModeKeys.PREDICT: + y = labels_st + + ############################## + # Encode Node Edges per Type # + ############################## + if self.hyperparams["edge_encoding"]: + node_edges_encoded = list() + for edge_type in self.edge_types: + # Encode edges for given edge type + encoded_edges_type = self.encode_edge( + mode, + node_history, + node_history_st, + edge_type, + neighbors[edge_type], + neighbors_edge_value[edge_type], + first_history_indices, + ) + node_edges_encoded.append(encoded_edges_type) # List of [bs/nbs, enc_rnn_dim] + ##################### + # Encode Node Edges # + ##################### + total_edge_influence = self.encode_total_edge_influence( + mode, node_edges_encoded, node_history_encoded, batch_size + ) + + ################ + # Map Encoding # + ################ + if self.hyperparams["use_map_encoding"] and self.node_type in self.hyperparams["map_encoder"]: + if self.log_writer and (self.curr_iter + 1) % 500 == 0: + map_clone = map.clone() + map_patch = self.hyperparams["map_encoder"][self.node_type]["patch_size"] + map_clone[:, :, map_patch[1] - 5 : map_patch[1] + 5, map_patch[0] - 5 : map_patch[0] + 5] = 1.0 + self.log_writer.add_images( + f"{self.node_type}/cropped_maps", map_clone, self.curr_iter, dataformats="NCWH" + ) + + encoded_map = self.node_modules[self.node_type + "/map_encoder"](map * 2.0 - 1.0, (mode == ModeKeys.TRAIN)) + do = self.hyperparams["map_encoder"][self.node_type]["dropout"] + encoded_map = F.dropout(encoded_map, do, training=(mode == ModeKeys.TRAIN)) + + ###################################### + # Concatenate Encoder Outputs into x # + ###################################### + x_concat_list = list() + + # Every node has an edge-influence encoder (which could just be zero). + if self.hyperparams["edge_encoding"]: + x_concat_list.append(total_edge_influence) # [bs/nbs, 4*enc_rnn_dim] + + # Every node has a history encoder. + x_concat_list.append(node_history_encoded) # [bs/nbs, enc_rnn_dim_history] + + if self.hyperparams["incl_robot_node"]: + robot_future_encoder = self.encode_robot_future(mode, x_r_t, y_r) + x_concat_list.append(robot_future_encoder) + + if self.hyperparams["use_map_encoding"] and self.node_type in self.hyperparams["map_encoder"]: + if self.log_writer: + self.log_writer.add_scalar( + f"{self.node_type}/encoded_map_max", torch.max(torch.abs(encoded_map)), self.curr_iter + ) + x_concat_list.append(encoded_map) + + x = torch.cat(x_concat_list, dim=1) + + if mode == ModeKeys.TRAIN or mode == ModeKeys.EVAL: + y_e = self.encode_node_future(mode, node_present, y) + + return x, x_r_t, y_e, y_r, y, n_s_t0 + + def encode_node_history(self, mode, node_hist, first_history_indices): + """ + Encodes the nodes history. + + :param mode: Mode in which the model is operated. E.g. Train, Eval, Predict. + :param node_hist: Historic and current state of the node. [bs, mhl, state] + :param first_history_indices: First timestep (index) in scene for which data is available for a node [bs] + :return: Encoded node history tensor. [bs, enc_rnn_dim] + """ + outputs = run_lstm_on_variable_length_seqs( + self.node_modules[self.node_type + "/node_history_encoder"], + # outputs, _ = run_lstm_on_variable_length_seqs(self.node_modules[self.node_type + '/node_history_encoder'], + original_seqs=node_hist, + lower_indices=first_history_indices, + ) + + return outputs + + outputs = F.dropout( + outputs, p=1.0 - self.hyperparams["rnn_kwargs"]["dropout_keep_prob"], training=(mode == ModeKeys.TRAIN) + ) # [bs, max_time, enc_rnn_dim] + + last_index_per_sequence = -(first_history_indices + 1) + + return outputs[torch.arange(first_history_indices.shape[0]), last_index_per_sequence] + + def encode_edge( + self, mode, node_history, node_history_st, edge_type, neighbors, neighbors_edge_value, first_history_indices + ): + + max_hl = self.hyperparams["maximum_history_length"] + + edge_states_list = list() # list of [#of neighbors, max_ht, state_dim] + for i, neighbor_states in enumerate(neighbors): # Get neighbors for timestep in batch + if len(neighbor_states) == 0: # There are no neighbors for edge type # TODO necessary? + neighbor_state_length = int( + np.sum([len(entity_dims) for entity_dims in self.state[edge_type[1]].values()]) + ) + edge_states_list.append(torch.zeros((1, max_hl + 1, neighbor_state_length), device=self.device)) + else: + edge_states_list.append(torch.stack(neighbor_states, dim=0).to(self.device)) + + if self.hyperparams["edge_state_combine_method"] == "sum": + # Used in Structural-RNN to combine edges as well. + op_applied_edge_states_list = list() + for neighbors_state in edge_states_list: + op_applied_edge_states_list.append(torch.sum(neighbors_state, dim=0)) + combined_neighbors = torch.stack(op_applied_edge_states_list, dim=0) + if self.hyperparams["dynamic_edges"] == "yes": + # Should now be (bs, time, 1) + op_applied_edge_mask_list = list() + for edge_value in neighbors_edge_value: + op_applied_edge_mask_list.append( + torch.clamp(torch.sum(edge_value.to(self.device), dim=0, keepdim=True), max=1.0) + ) + combined_edge_masks = torch.stack(op_applied_edge_mask_list, dim=0) + + elif self.hyperparams["edge_state_combine_method"] == "max": + # Used in NLP, e.g. max over word embeddings in a sentence. + op_applied_edge_states_list = list() + for neighbors_state in edge_states_list: + op_applied_edge_states_list.append(torch.max(neighbors_state, dim=0)) + combined_neighbors = torch.stack(op_applied_edge_states_list, dim=0) + if self.hyperparams["dynamic_edges"] == "yes": + # Should now be (bs, time, 1) + op_applied_edge_mask_list = list() + for edge_value in neighbors_edge_value: + op_applied_edge_mask_list.append( + torch.clamp(torch.max(edge_value.to(self.device), dim=0, keepdim=True), max=1.0) + ) + combined_edge_masks = torch.stack(op_applied_edge_mask_list, dim=0) + + elif self.hyperparams["edge_state_combine_method"] == "mean": + # Used in NLP, e.g. mean over word embeddings in a sentence. + op_applied_edge_states_list = list() + for neighbors_state in edge_states_list: + op_applied_edge_states_list.append(torch.mean(neighbors_state, dim=0)) + combined_neighbors = torch.stack(op_applied_edge_states_list, dim=0) + if self.hyperparams["dynamic_edges"] == "yes": + # Should now be (bs, time, 1) + op_applied_edge_mask_list = list() + for edge_value in neighbors_edge_value: + op_applied_edge_mask_list.append( + torch.clamp(torch.mean(edge_value.to(self.device), dim=0, keepdim=True), max=1.0) + ) + combined_edge_masks = torch.stack(op_applied_edge_mask_list, dim=0) + + joint_history = torch.cat([combined_neighbors, node_history_st], dim=-1) + + outputs, _ = run_lstm_on_variable_length_seqs( + self.node_modules[DirectedEdge.get_str_from_types(*edge_type) + "/edge_encoder"], + original_seqs=joint_history, + lower_indices=first_history_indices, + ) + + outputs = F.dropout( + outputs, p=1.0 - self.hyperparams["rnn_kwargs"]["dropout_keep_prob"], training=(mode == ModeKeys.TRAIN) + ) # [bs, max_time, enc_rnn_dim] + + last_index_per_sequence = -(first_history_indices + 1) + ret = outputs[torch.arange(last_index_per_sequence.shape[0]), last_index_per_sequence] + if self.hyperparams["dynamic_edges"] == "yes": + return ret * combined_edge_masks + else: + return ret + + def encode_total_edge_influence(self, mode, encoded_edges, node_history_encoder, batch_size): + if self.hyperparams["edge_influence_combine_method"] == "sum": + stacked_encoded_edges = torch.stack(encoded_edges, dim=0) + combined_edges = torch.sum(stacked_encoded_edges, dim=0) + + elif self.hyperparams["edge_influence_combine_method"] == "mean": + stacked_encoded_edges = torch.stack(encoded_edges, dim=0) + combined_edges = torch.mean(stacked_encoded_edges, dim=0) + + elif self.hyperparams["edge_influence_combine_method"] == "max": + stacked_encoded_edges = torch.stack(encoded_edges, dim=0) + combined_edges = torch.max(stacked_encoded_edges, dim=0) + + elif self.hyperparams["edge_influence_combine_method"] == "bi-rnn": + if len(encoded_edges) == 0: + combined_edges = torch.zeros((batch_size, self.eie_output_dims), device=self.device) + + else: + # axis=1 because then we get size [batch_size, max_time, depth] + encoded_edges = torch.stack(encoded_edges, dim=1) + + _, state = self.node_modules[self.node_type + "/edge_influence_encoder"](encoded_edges) + combined_edges = unpack_RNN_state(state) + combined_edges = F.dropout( + combined_edges, + p=1.0 - self.hyperparams["rnn_kwargs"]["dropout_keep_prob"], + training=(mode == ModeKeys.TRAIN), + ) + + elif self.hyperparams["edge_influence_combine_method"] == "attention": + # Used in Social Attention (https://arxiv.org/abs/1710.04689) + if len(encoded_edges) == 0: + combined_edges = torch.zeros((batch_size, self.eie_output_dims), device=self.device) + + else: + # axis=1 because then we get size [batch_size, max_time, depth] + encoded_edges = torch.stack(encoded_edges, dim=1) + combined_edges, _ = self.node_modules[self.node_type + "/edge_influence_encoder"]( + encoded_edges, node_history_encoder + ) + combined_edges = F.dropout( + combined_edges, + p=1.0 - self.hyperparams["rnn_kwargs"]["dropout_keep_prob"], + training=(mode == ModeKeys.TRAIN), + ) + + return combined_edges + + def encode_node_future(self, mode, node_present, node_future) -> torch.Tensor: + """ + Encodes the node future (during training) using a bi-directional LSTM + + :param mode: Mode in which the model is operated. E.g. Train, Eval, Predict. + :param node_present: Current state of the node. [bs, state] + :param node_future: Future states of the node. [bs, ph, state] + :return: Encoded future. + """ + initial_h_model = self.node_modules[self.node_type + "/node_future_encoder/initial_h"] + initial_c_model = self.node_modules[self.node_type + "/node_future_encoder/initial_c"] + + # Here we're initializing the forward hidden states, + # but zeroing the backward ones. + initial_h = initial_h_model(node_present) + initial_h = torch.stack([initial_h, torch.zeros_like(initial_h, device=self.device)], dim=0) + + initial_c = initial_c_model(node_present) + initial_c = torch.stack([initial_c, torch.zeros_like(initial_c, device=self.device)], dim=0) + + initial_state = (initial_h, initial_c) + + _, state = self.node_modules[self.node_type + "/node_future_encoder"](node_future, initial_state) + state = unpack_RNN_state(state) + state = F.dropout( + state, p=1.0 - self.hyperparams["rnn_kwargs"]["dropout_keep_prob"], training=(mode == ModeKeys.TRAIN) + ) + + return state + + def encode_robot_future(self, mode, robot_present, robot_future) -> torch.Tensor: + """ + Encodes the robot future (during training) using a bi-directional LSTM + + :param mode: Mode in which the model is operated. E.g. Train, Eval, Predict. + :param robot_present: Current state of the robot. [bs, state] + :param robot_future: Future states of the robot. [bs, ph, state] + :return: Encoded future. + """ + initial_h_model = self.node_modules["robot_future_encoder/initial_h"] + initial_c_model = self.node_modules["robot_future_encoder/initial_c"] + + # Here we're initializing the forward hidden states, + # but zeroing the backward ones. + initial_h = initial_h_model(robot_present) + initial_h = torch.stack([initial_h, torch.zeros_like(initial_h, device=self.device)], dim=0) + + initial_c = initial_c_model(robot_present) + initial_c = torch.stack([initial_c, torch.zeros_like(initial_c, device=self.device)], dim=0) + + initial_state = (initial_h, initial_c) + + _, state = self.node_modules["robot_future_encoder"](robot_future, initial_state) + state = unpack_RNN_state(state) + state = F.dropout( + state, p=1.0 - self.hyperparams["rnn_kwargs"]["dropout_keep_prob"], training=(mode == ModeKeys.TRAIN) + ) + + return state + + def q_z_xy(self, mode, x, y_e) -> torch.Tensor: + r""" + .. math:: q_\phi(z \mid \mathbf{x}_i, \mathbf{y}_i) + + :param mode: Mode in which the model is operated. E.g. Train, Eval, Predict. + :param x: Input / Condition tensor. + :param y_e: Encoded future tensor. + :return: Latent distribution of the CVAE. + """ + xy = torch.cat([x, y_e], dim=1) + + if self.hyperparams["q_z_xy_MLP_dims"] is not None: + dense = self.node_modules[self.node_type + "/q_z_xy"] + h = F.dropout( + F.relu(dense(xy)), p=1.0 - self.hyperparams["MLP_dropout_keep_prob"], training=(mode == ModeKeys.TRAIN) + ) + + else: + h = xy + + to_latent = self.node_modules[self.node_type + "/hxy_to_z"] + return self.latent.dist_from_h(to_latent(h), mode) + + def p_z_x(self, mode, x): + r""" + .. math:: p_\theta(z \mid \mathbf{x}_i) + + :param mode: Mode in which the model is operated. E.g. Train, Eval, Predict. + :param x: Input / Condition tensor. + :return: Latent distribution of the CVAE. + """ + if self.hyperparams["p_z_x_MLP_dims"] is not None: + dense = self.node_modules[self.node_type + "/p_z_x"] + h = F.dropout( + F.relu(dense(x)), p=1.0 - self.hyperparams["MLP_dropout_keep_prob"], training=(mode == ModeKeys.TRAIN) + ) + + else: + h = x + + to_latent = self.node_modules[self.node_type + "/hx_to_z"] + return self.latent.dist_from_h(to_latent(h), mode) + + def project_to_GMM_params(self, tensor) -> (torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor): + """ + Projects tensor to parameters of a GMM with N components and D dimensions. + + :param tensor: Input tensor. + :return: tuple(log_pis, mus, log_sigmas, corrs) + WHERE + - log_pis: Weight (logarithm) of each GMM component. [N] + - mus: Mean of each GMM component. [N, D] + - log_sigmas: Standard Deviation (logarithm) of each GMM component. [N, D] + - corrs: Correlation between the GMM components. [N] + """ + log_pis = self.node_modules[self.node_type + "/decoder/proj_to_GMM_log_pis"](tensor) + mus = self.node_modules[self.node_type + "/decoder/proj_to_GMM_mus"](tensor) + log_sigmas = self.node_modules[self.node_type + "/decoder/proj_to_GMM_log_sigmas"](tensor) + corrs = torch.tanh(self.node_modules[self.node_type + "/decoder/proj_to_GMM_corrs"](tensor)) + return log_pis, mus, log_sigmas, corrs + + def p_y_xz( + self, mode, x, x_nr_t, y_r, n_s_t0, z_stacked, prediction_horizon, num_samples, num_components=1, gmm_mode=False + ): + r""" + .. math:: p_\psi(\mathbf{y}_i \mid \mathbf{x}_i, z) + + :param mode: Mode in which the model is operated. E.g. Train, Eval, Predict. + :param x: Input / Condition tensor. + :param x_nr_t: Joint state of node and robot (if robot is in scene). + :param y: Future tensor. + :param y_r: Encoded future tensor. + :param n_s_t0: Standardized current state of the node. + :param z_stacked: Stacked latent state. [num_samples_z * num_samples_gmm, bs, latent_state] + :param prediction_horizon: Number of prediction timesteps. + :param num_samples: Number of samples from the latent space. + :param num_components: Number of GMM components. + :param gmm_mode: If True: The mode of the GMM is sampled. + :return: GMM2D. If mode is Predict, also samples from the GMM. + """ + ph = prediction_horizon + pred_dim = self.pred_state_length + + z = torch.reshape(z_stacked, (-1, self.latent.z_dim)) + zx = torch.cat([z, x.repeat(num_samples * num_components, 1)], dim=1) + + cell = self.node_modules[self.node_type + "/decoder/rnn_cell"] + initial_h_model = self.node_modules[self.node_type + "/decoder/initial_h"] + + initial_state = initial_h_model(zx) + + log_pis, mus, log_sigmas, corrs, a_sample = [], [], [], [], [] + + # Infer initial action state for node from current state + a_0 = self.node_modules[self.node_type + "/decoder/state_action"](n_s_t0) + + state = initial_state + if self.hyperparams["incl_robot_node"]: + input_ = torch.cat( + [zx, a_0.repeat(num_samples * num_components, 1), x_nr_t.repeat(num_samples * num_components, 1)], dim=1 + ) + else: + input_ = torch.cat([zx, a_0.repeat(num_samples * num_components, 1)], dim=1) + + for j in range(ph): + h_state = cell(input_, state) + log_pi_t, mu_t, log_sigma_t, corr_t = self.project_to_GMM_params(h_state) + + gmm = GMM2D(log_pi_t, mu_t, log_sigma_t, corr_t) # [k;bs, pred_dim] + + if mode == ModeKeys.PREDICT and gmm_mode: + a_t = gmm.mode() + else: + a_t = gmm.rsample() + + if num_components > 1: + if mode == ModeKeys.PREDICT: + log_pis.append(self.latent.p_dist.logits.repeat(num_samples, 1, 1)) + else: + log_pis.append(self.latent.q_dist.logits.repeat(num_samples, 1, 1)) + else: + log_pis.append( + torch.ones_like(corr_t.reshape(num_samples, num_components, -1).permute(0, 2, 1).reshape(-1, 1)) + ) + + mus.append( + mu_t.reshape(num_samples, num_components, -1, 2).permute(0, 2, 1, 3).reshape(-1, 2 * num_components) + ) + log_sigmas.append( + log_sigma_t.reshape(num_samples, num_components, -1, 2) + .permute(0, 2, 1, 3) + .reshape(-1, 2 * num_components) + ) + corrs.append(corr_t.reshape(num_samples, num_components, -1).permute(0, 2, 1).reshape(-1, num_components)) + + if self.hyperparams["incl_robot_node"]: + dec_inputs = [zx, a_t, y_r[:, j].repeat(num_samples * num_components, 1)] + else: + dec_inputs = [zx, a_t] + input_ = torch.cat(dec_inputs, dim=1) + state = h_state + + log_pis = torch.stack(log_pis, dim=1) + mus = torch.stack(mus, dim=1) + log_sigmas = torch.stack(log_sigmas, dim=1) + corrs = torch.stack(corrs, dim=1) + + a_dist = GMM2D( + torch.reshape(log_pis, [num_samples, -1, ph, num_components]), + torch.reshape(mus, [num_samples, -1, ph, num_components * pred_dim]), + torch.reshape(log_sigmas, [num_samples, -1, ph, num_components * pred_dim]), + torch.reshape(corrs, [num_samples, -1, ph, num_components]), + ) + + if self.hyperparams["dynamic"][self.node_type]["distribution"]: + y_dist = self.dynamic.integrate_distribution(a_dist, x) + else: + y_dist = a_dist + + if mode == ModeKeys.PREDICT: + if gmm_mode: + a_sample = a_dist.mode() + else: + a_sample = a_dist.rsample() + sampled_future = self.dynamic.integrate_samples(a_sample, x) + return y_dist, sampled_future + else: + return y_dist + + def encoder(self, mode, x, y_e, num_samples=None): + """ + Encoder of the CVAE. + + :param mode: Mode in which the model is operated. E.g. Train, Eval, Predict. + :param x: Input / Condition tensor. + :param y_e: Encoded future tensor. + :param num_samples: Number of samples from the latent space during Prediction. + :return: tuple(z, kl_obj) + WHERE + - z: Samples from the latent space. + - kl_obj: KL Divergenze between q and p + """ + if mode == ModeKeys.TRAIN: + sample_ct = self.hyperparams["k"] + elif mode == ModeKeys.EVAL: + sample_ct = self.hyperparams["k_eval"] + elif mode == ModeKeys.PREDICT: + sample_ct = num_samples + if num_samples is None: + raise ValueError("num_samples cannot be None with mode == PREDICT.") + + self.latent.q_dist = self.q_z_xy(mode, x, y_e) + self.latent.p_dist = self.p_z_x(mode, x) + + z = self.latent.sample_q(sample_ct, mode) + + if mode == ModeKeys.TRAIN: + kl_obj = self.latent.kl_q_p(self.log_writer, "%s" % str(self.node_type), self.curr_iter) + if self.log_writer is not None: + self.log_writer.add_scalar("%s/%s" % (str(self.node_type), "kl"), kl_obj, self.curr_iter) + else: + kl_obj = None + + return z, kl_obj + + def decoder(self, mode, x, x_nr_t, y, y_r, n_s_t0, z, labels, prediction_horizon, num_samples): + """ + Decoder of the CVAE. + + :param mode: Mode in which the model is operated. E.g. Train, Eval, Predict. + :param x: Input / Condition tensor. + :param x: Input / Condition tensor. + :param x_nr_t: Joint state of node and robot (if robot is in scene). + :param y: Future tensor. + :param y_r: Encoded future tensor. + :param n_s_t0: Standardized current state of the node. + :param z: Stacked latent state. + :param prediction_horizon: Number of prediction timesteps. + :param num_samples: Number of samples from the latent space. + :return: Log probability of y over p. + """ + + num_components = self.hyperparams["N"] * self.hyperparams["K"] + y_dist = self.p_y_xz( + mode, x, x_nr_t, y_r, n_s_t0, z, prediction_horizon, num_samples, num_components=num_components + ) + log_p_yt_xz = torch.clamp(y_dist.log_prob(labels), max=self.hyperparams["log_p_yt_xz_max"]) + if self.hyperparams["log_histograms"] and self.log_writer is not None: + self.log_writer.add_histogram("%s/%s" % (str(self.node_type), "log_p_yt_xz"), log_p_yt_xz, self.curr_iter) + + log_p_y_xz = torch.sum(log_p_yt_xz, dim=2) + return log_p_y_xz + + def train_loss( + self, + inputs, + inputs_st, + first_history_indices, + labels, + labels_st, + neighbors, + neighbors_edge_value, + robot, + map, + prediction_horizon, + ) -> torch.Tensor: + """ + Calculates the training loss for a batch. + + :param inputs: Input tensor including the state for each agent over time [bs, t, state]. + :param inputs_st: Standardized input tensor. + :param first_history_indices: First timestep (index) in scene for which data is available for a node [bs] + :param labels: Label tensor including the label output for each agent over time [bs, t, pred_state]. + :param labels_st: Standardized label tensor. + :param neighbors: Preprocessed dict (indexed by edge type) of list of neighbor states over time. + [[bs, t, neighbor state]] + :param neighbors_edge_value: Preprocessed edge values for all neighbor nodes [[N]] + :param robot: Standardized robot state over time. [bs, t, robot_state] + :param map: Tensor of Map information. [bs, channels, x, y] + :param prediction_horizon: Number of prediction timesteps. + :return: Scalar tensor -> nll loss + """ + mode = ModeKeys.TRAIN + + x, x_nr_t, y_e, y_r, y, n_s_t0 = self.obtain_encoded_tensors( + mode=mode, + inputs=inputs, + inputs_st=inputs_st, + labels=labels, + labels_st=labels_st, + first_history_indices=first_history_indices, + neighbors=neighbors, + neighbors_edge_value=neighbors_edge_value, + robot=robot, + map=map, + ) + + z, kl = self.encoder(mode, x, y_e) + log_p_y_xz = self.decoder( + mode, + x, + x_nr_t, + y, + y_r, + n_s_t0, + z, + labels, # Loss is calculated on unstandardized label + prediction_horizon, + self.hyperparams["k"], + ) + + log_p_y_xz_mean = torch.mean(log_p_y_xz, dim=0) # [nbs] + log_likelihood = torch.mean(log_p_y_xz_mean) + + mutual_inf_q = mutual_inf_mc(self.latent.q_dist) + mutual_inf_p = mutual_inf_mc(self.latent.p_dist) + + ELBO = log_likelihood - self.kl_weight * kl + 1.0 * mutual_inf_p + loss = -ELBO + + if self.hyperparams["log_histograms"] and self.log_writer is not None: + self.log_writer.add_histogram( + "%s/%s" % (str(self.node_type), "log_p_y_xz"), log_p_y_xz_mean, self.curr_iter + ) + + if self.log_writer is not None: + self.log_writer.add_scalar( + "%s/%s" % (str(self.node_type), "mutual_information_q"), mutual_inf_q, self.curr_iter + ) + self.log_writer.add_scalar( + "%s/%s" % (str(self.node_type), "mutual_information_p"), mutual_inf_p, self.curr_iter + ) + self.log_writer.add_scalar( + "%s/%s" % (str(self.node_type), "log_likelihood"), log_likelihood, self.curr_iter + ) + self.log_writer.add_scalar("%s/%s" % (str(self.node_type), "loss"), loss, self.curr_iter) + if self.hyperparams["log_histograms"]: + self.latent.summarize_for_tensorboard(self.log_writer, str(self.node_type), self.curr_iter) + return loss + + def eval_loss( + self, + inputs, + inputs_st, + first_history_indices, + labels, + labels_st, + neighbors, + neighbors_edge_value, + robot, + map, + prediction_horizon, + ) -> torch.Tensor: + """ + Calculates the evaluation loss for a batch. + + :param inputs: Input tensor including the state for each agent over time [bs, t, state]. + :param inputs_st: Standardized input tensor. + :param first_history_indices: First timestep (index) in scene for which data is available for a node [bs] + :param labels: Label tensor including the label output for each agent over time [bs, t, pred_state]. + :param labels_st: Standardized label tensor. + :param neighbors: Preprocessed dict (indexed by edge type) of list of neighbor states over time. + [[bs, t, neighbor state]] + :param neighbors_edge_value: Preprocessed edge values for all neighbor nodes [[N]] + :param robot: Standardized robot state over time. [bs, t, robot_state] + :param map: Tensor of Map information. [bs, channels, x, y] + :param prediction_horizon: Number of prediction timesteps. + :return: tuple(nll_q_is, nll_p, nll_exact, nll_sampled) + """ + + mode = ModeKeys.EVAL + + x, x_nr_t, y_e, y_r, y, n_s_t0 = self.obtain_encoded_tensors( + mode=mode, + inputs=inputs, + inputs_st=inputs_st, + labels=labels, + labels_st=labels_st, + first_history_indices=first_history_indices, + neighbors=neighbors, + neighbors_edge_value=neighbors_edge_value, + robot=robot, + map=map, + ) + + num_components = self.hyperparams["N"] * self.hyperparams["K"] + ### Importance sampled NLL estimate + z, _ = self.encoder(mode, x, y_e) # [k_eval, nbs, N*K] + z = self.latent.sample_p(1, mode, full_dist=True) + y_dist, _ = self.p_y_xz( + ModeKeys.PREDICT, + x, + x_nr_t, + y_r, + n_s_t0, + z, + prediction_horizon, + num_samples=1, + num_components=num_components, + ) + # We use unstandardized labels to compute the loss + log_p_yt_xz = torch.clamp(y_dist.log_prob(labels), max=self.hyperparams["log_p_yt_xz_max"]) + log_p_y_xz = torch.sum(log_p_yt_xz, dim=2) + log_p_y_xz_mean = torch.mean(log_p_y_xz, dim=0) # [nbs] + log_likelihood = torch.mean(log_p_y_xz_mean) + nll = -log_likelihood + + return nll + + def predict( + self, + inputs, + inputs_st, + packed_inputs_st, + first_history_indices, + neighbors, + neighbors_edge_value, + robot, + map, + prediction_horizon, + num_samples, + z_mode=False, + gmm_mode=False, + full_dist=True, + all_z_sep=False, + ): + """ + Predicts the future of a batch of nodes. + + :param inputs: Input tensor including the state for each agent over time [bs, t, state]. + :param inputs_st: Standardized input tensor. + :param first_history_indices: First timestep (index) in scene for which data is available for a node [bs] + :param neighbors: Preprocessed dict (indexed by edge type) of list of neighbor states over time. + [[bs, t, neighbor state]] + :param neighbors_edge_value: Preprocessed edge values for all neighbor nodes [[N]] + :param robot: Standardized robot state over time. [bs, t, robot_state] + :param map: Tensor of Map information. [bs, channels, x, y] + :param prediction_horizon: Number of prediction timesteps. + :param num_samples: Number of samples from the latent space. + :param z_mode: If True: Select the most likely latent state. + :param gmm_mode: If True: The mode of the GMM is sampled. + :param all_z_sep: Samples each latent mode individually without merging them into a GMM. + :param full_dist: Samples all latent states and merges them into a GMM as output. + :return: + """ + mode = ModeKeys.PREDICT + + # x, x_nr_t, _, y_r, _, n_s_t0 = self.obtain_encoded_tensors(mode=mode, + out = self.obtain_encoded_tensors( + mode=mode, + inputs=inputs, + inputs_st=inputs_st, + packed_inputs_st=packed_inputs_st, + labels=None, + labels_st=None, + first_history_indices=first_history_indices, + neighbors=neighbors, + neighbors_edge_value=neighbors_edge_value, + robot=robot, + map=map, + ) + # return x, n_s_t0 + return out + + self.latent.p_dist = self.p_z_x(mode, x) + z, num_samples, num_components = self.latent.sample_p( + num_samples, mode, most_likely_z=z_mode, full_dist=full_dist, all_z_sep=all_z_sep + ) + + _, our_sampled_future = self.p_y_xz( + mode, x, x_nr_t, y_r, n_s_t0, z, prediction_horizon, num_samples, num_components, gmm_mode + ) + + return our_sampled_future diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/model_registrar.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/model_registrar.py new file mode 100644 index 000000000..d5aaf1966 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/model_registrar.py @@ -0,0 +1,76 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import os +import torch +import torch.nn as nn + + +def get_model_device(model): + return next(model.parameters()).device + + +class ModelRegistrar(nn.Module): + def __init__(self, model_dir, device): + super(ModelRegistrar, self).__init__() + self.model_dict = nn.ModuleDict() + self.model_dir = model_dir + self.device = device + + def forward(self): + raise NotImplementedError("Although ModelRegistrar is a nn.Module, it is only to store parameters.") + + def get_model(self, name, model_if_absent=None): + # 4 cases: name in self.model_dict and model_if_absent is None (OK) + # name in self.model_dict and model_if_absent is not None (OK) + # name not in self.model_dict and model_if_absent is not None (OK) + # name not in self.model_dict and model_if_absent is None (NOT OK) + + if name in self.model_dict: + return self.model_dict[name] + + elif model_if_absent is not None: + self.model_dict[name] = model_if_absent.to(self.device) + return self.model_dict[name] + + else: + raise ValueError(f"{name} was never initialized in this Registrar!") + + def get_name_match(self, name): + ret_model_list = nn.ModuleList() + for key in self.model_dict.keys(): + if name in key: + ret_model_list.append(self.model_dict[key]) + return ret_model_list + + def get_all_but_name_match(self, name): + ret_model_list = nn.ModuleList() + for key in self.model_dict.keys(): + if name not in key: + ret_model_list.append(self.model_dict[key]) + return ret_model_list + + def print_model_names(self): + print(self.model_dict.keys()) + + def save_models(self, curr_iter): + # Create the model directiory if it's not present. + save_path = os.path.join(self.model_dir, "model_registrar-%d.pt" % curr_iter) + + torch.save(self.model_dict, save_path) + + def load_models(self, iter_num): + self.model_dict.clear() + + save_path = os.path.join(self.model_dir, "model_registrar-%d.pt" % iter_num) + + print("") + print("Loading from " + save_path) + self.model_dict = torch.load(save_path, map_location=self.device) + print("Loaded!") + print("") + + def to(self, device): + for name, model in self.model_dict.items(): + if get_model_device(model) != device: + model.to(device) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/model_utils.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/model_utils.py new file mode 100644 index 000000000..19c4cf3ec --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/model_utils.py @@ -0,0 +1,137 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import torch +import torch.nn.utils.rnn as rnn +from enum import Enum +import functools +import numpy as np +import math + + +class ModeKeys(Enum): + TRAIN = 1 + EVAL = 2 + PREDICT = 3 + + +def cyclical_lr(stepsize, min_lr=3e-4, max_lr=3e-3, decay=1.0): + # Lambda function to calculate the LR + lr_lambda = lambda it: min_lr + (max_lr - min_lr) * relative(it, stepsize) * decay**it + + # Additional function to see where on the cycle we are + def relative(it, stepsize): + cycle = math.floor(1 + it / (2 * stepsize)) + x = abs(it / stepsize - 2 * cycle + 1) + return max(0, (1 - x)) + + return lr_lambda + + +def to_one_hot(labels, n_labels): + return torch.eye(n_labels, device=labels.device)[labels] + + +def exp_anneal(anneal_kws): + device = anneal_kws["device"] + start = torch.tensor(anneal_kws["start"], device=device) + finish = torch.tensor(anneal_kws["finish"], device=device) + rate = torch.tensor(anneal_kws["rate"], device=device) + return lambda step: finish - (finish - start) * torch.pow( + rate, torch.tensor(step, dtype=torch.float, device=device) + ) + + +def sigmoid_anneal(anneal_kws): + device = anneal_kws["device"] + start = torch.tensor(anneal_kws["start"], device=device) + finish = torch.tensor(anneal_kws["finish"], device=device) + center_step = torch.tensor(anneal_kws["center_step"], device=device, dtype=torch.float) + steps_lo_to_hi = torch.tensor(anneal_kws["steps_lo_to_hi"], device=device, dtype=torch.float) + return lambda step: start + (finish - start) * torch.sigmoid( + (torch.tensor(float(step), device=device) - center_step) * (1.0 / steps_lo_to_hi) + ) + + +class CustomLR(torch.optim.lr_scheduler.LambdaLR): + def __init__(self, optimizer, lr_lambda, last_epoch=-1): + super(CustomLR, self).__init__(optimizer, lr_lambda, last_epoch) + + def get_lr(self): + return [lmbda(self.last_epoch) for lmbda, base_lr in zip(self.lr_lambdas, self.base_lrs)] + + +def mutual_inf_mc(x_dist): + dist = x_dist.__class__ + H_y = dist(probs=x_dist.probs.mean(dim=0)).entropy() + return (H_y - x_dist.entropy().mean(dim=0)).sum() + + +def run_lstm_on_variable_length_seqs( + lstm_module, original_seqs, lower_indices=None, upper_indices=None, total_length=None +): + # breakpoint() + # bs, tf = original_seqs.shape[:2] + # if lower_indices is None: + # lower_indices = torch.zeros(bs, dtype=torch.int) + # if upper_indices is None: + # upper_indices = torch.ones(bs, dtype=torch.int) * (tf - 1) + # if total_length is None: + # total_length = max(upper_indices) + 1 + # # This is done so that we can just pass in self.prediction_timesteps + # # (which we want to INCLUDE, so this will exclude the next timestep). + # inclusive_break_indices = upper_indices + 1 + + # pad_list = list() + # for i, seq_len in enumerate(inclusive_break_indices): + # pad_list.append(original_seqs[i, lower_indices[i]:seq_len]) + + # packed_seqs = rnn.pack_sequence(pad_list, enforce_sorted=False) + # return packed_seqs # TypeError: int() argument must be a string, a bytes-like object or a real number, not 'Any' + + packed_seqs = original_seqs + packed_output, (h_n, c_n) = lstm_module(packed_seqs) + return packed_output # TypeError: object of type 'Call' has no len() + output, _ = rnn.pad_packed_sequence(packed_output, batch_first=True, total_length=total_length) + + return output, (h_n, c_n) + + +def extract_subtensor_per_batch_element(tensor, indices): + batch_idxs = torch.arange(start=0, end=len(indices)) + + batch_idxs = batch_idxs[~torch.isnan(indices)] + indices = indices[~torch.isnan(indices)] + if indices.size == 0: + return None + else: + indices = indices.long() + if tensor.is_cuda: + batch_idxs = batch_idxs.to(tensor.get_device()) + indices = indices.to(tensor.get_device()) + return tensor[batch_idxs, indices] + + +def unpack_RNN_state(state_tuple): + # PyTorch returned LSTM states have 3 dims: + # (num_layers * num_directions, batch, hidden_size) + + state = torch.cat(state_tuple, dim=0).permute(1, 0, 2) + # Now state is (batch, 2 * num_layers * num_directions, hidden_size) + + state_size = state.size() + return torch.reshape(state, (-1, state_size[1] * state_size[2])) + + +def rsetattr(obj, attr, val): + pre, _, post = attr.rpartition(".") + return setattr(rgetattr(obj, pre) if pre else obj, post, val) + + +# using wonder's beautiful simplification: +# https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-objects/31174427?noredirect=1#comment86638618_31174427 +def rgetattr(obj, attr, *args): + def _getattr(obj, attr): + return getattr(obj, attr, *args) + + return functools.reduce(_getattr, [obj] + attr.split(".")) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/online/__init__.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/online/__init__.py new file mode 100644 index 000000000..e8fa6b337 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/online/__init__.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +from .online_trajectron import OnlineTrajectron +from .online_mgcvae import OnlineMultimodalGenerativeCVAE diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/online/online_mgcvae.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/online/online_mgcvae.py new file mode 100644 index 000000000..624ebf426 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/online/online_mgcvae.py @@ -0,0 +1,428 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import warnings +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +from collections import defaultdict, Counter +from model.components import * +from model.model_utils import * +from model.dataset import get_relative_robot_traj +import model.dynamics as dynamic_module +from model.mgcvae import MultimodalGenerativeCVAE +from environment.scene_graph import DirectedEdge +from environment.node_type import NodeType + + +class OnlineMultimodalGenerativeCVAE(MultimodalGenerativeCVAE): + def __init__(self, env, node, model_registrar, hyperparams, device): + self.hyperparams = hyperparams + self.node = node + self.node_type = self.node.type + + if len(env.scenes) != 1: + raise ValueError("Passed in Environment has number of scenes != 1") + self.robot = env.scenes[0].robot + self.model_registrar = model_registrar + self.device = device + + self.node_modules = dict() + self.env = env + self.scene_graph = None + + self.state = self.hyperparams["state"] + self.pred_state = self.hyperparams["pred_state"][self.node.type] + self.state_length = int(np.sum([len(entity_dims) for entity_dims in self.state[self.node.type].values()])) + if self.hyperparams["incl_robot_node"]: + self.robot_state_length = int( + np.sum([len(entity_dims) for entity_dims in self.state[self.robot.type].values()]) + ) + self.pred_state_length = int(np.sum([len(entity_dims) for entity_dims in self.pred_state.values()])) + + self.curr_hidden_states = dict() + self.edge_types = Counter() + + self.create_graphical_model() + + dynamic_class = getattr(dynamic_module, self.hyperparams["dynamic"][self.node_type]["name"]) + dyn_limits = hyperparams["dynamic"][self.node_type]["limits"] + self.dynamic = dynamic_class( + self.env.scenes[0].dt, dyn_limits, device, self.model_registrar, self.x_size, self.node_type + ) + + def create_graphical_model(self): + """ + Creates or queries all trainable components. + + :return: None + """ + self.clear_submodules() + + ############################ + # Everything but Edges # + ############################ + self.create_node_models() + + for name, module in self.node_modules.items(): + module.to(self.device) + + def update_graph(self, new_scene_graph, new_neighbors, removed_neighbors): + self.scene_graph = new_scene_graph + + if self.node in new_neighbors: + for edge_type, new_neighbor_nodes in new_neighbors[self.node].items(): + self.add_edge_model(edge_type) + self.edge_types += Counter({edge_type: len(new_neighbor_nodes)}) + + if self.node in removed_neighbors: + for edge_type, removed_neighbor_nodes in removed_neighbors[self.node].items(): + self.remove_edge_model(edge_type) + self.edge_types -= Counter({edge_type: len(removed_neighbor_nodes)}) + + def get_edge_to(self, other_node): + return DirectedEdge(self.node, other_node) + + def add_edge_model(self, edge_type): + if self.hyperparams["edge_encoding"]: + if edge_type + "/edge_encoder" not in self.node_modules: + neighbor_state_length = int( + np.sum( + [ + len(entity_dims) + for entity_dims in self.state[self._get_other_node_type_from_edge(edge_type)].values() + ] + ) + ) + if self.hyperparams["edge_state_combine_method"] == "pointnet": + self.add_submodule( + edge_type + "/pointnet_encoder", + model_if_absent=nn.Sequential( + nn.Linear(self.state_length, 2 * self.state_length), + nn.ReLU(), + nn.Linear(2 * self.state_length, 2 * self.state_length), + nn.ReLU(), + ), + ) + + edge_encoder_input_size = 2 * self.state_length + self.state_length + + elif self.hyperparams["edge_state_combine_method"] == "attention": + self.add_submodule( + self.node.type + "/edge_attention_combine", + model_if_absent=TemporallyBatchedAdditiveAttention( + encoder_hidden_state_dim=self.state_length, decoder_hidden_state_dim=self.state_length + ), + ) + edge_encoder_input_size = self.state_length + neighbor_state_length + + else: + edge_encoder_input_size = self.state_length + neighbor_state_length + + self.add_submodule( + edge_type + "/edge_encoder", + model_if_absent=nn.LSTM( + input_size=edge_encoder_input_size, + hidden_size=self.hyperparams["enc_rnn_dim_edge"], + batch_first=True, + ), + ) + + def _get_other_node_type_from_edge(self, edge_type_str): + n2_type_str = edge_type_str.split("->")[1] + return NodeType(n2_type_str, self.env.node_type_list.index(n2_type_str) + 1) + + def _get_edge_type_from_str(self, edge_type_str): + n1_type_str, n2_type_str = edge_type_str.split("->") + return ( + NodeType(n1_type_str, self.env.node_type_list.index(n1_type_str) + 1), + NodeType(n2_type_str, self.env.node_type_list.index(n2_type_str) + 1), + ) + + def remove_edge_model(self, edge_type): + if self.hyperparams["edge_encoding"]: + if len(self.scene_graph.get_neighbors(self.node, self._get_other_node_type_from_edge(edge_type))) == 0: + del self.node_modules[edge_type + "/edge_encoder"] + + def obtain_encoded_tensors(self, mode, inputs, inputs_st, inputs_np, robot_present_and_future, maps): + x, x_r_t, y_r = None, None, None + batch_size = 1 + + our_inputs = inputs[self.node] + our_inputs_st = inputs_st[self.node] + + initial_dynamics = dict() + initial_dynamics["pos"] = our_inputs[:, 0:2] # TODO: Generalize + initial_dynamics["vel"] = our_inputs[:, 2:4] # TODO: Generalize + self.dynamic.set_initial_condition(initial_dynamics) + + ######################################### + # Provide basic information to encoders # + ######################################### + if self.hyperparams["incl_robot_node"] and self.robot is not None: + robot_present_and_future_st = get_relative_robot_traj( + self.env, self.state, our_inputs, robot_present_and_future, self.node.type, self.robot.type + ) + x_r_t = robot_present_and_future_st[..., 0, :] + y_r = robot_present_and_future_st[..., 1:, :] + + ################## + # Encode History # + ################## + node_history_encoded = self.encode_node_history(our_inputs_st) + + ############################## + # Encode Node Edges per Type # + ############################## + total_edge_influence = None + if self.hyperparams["edge_encoding"]: + node_edges_encoded = list() + for edge_type in self.edge_types: + connected_nodes_batched = list() + edge_masks_batched = list() + + # We get all nodes which are connected to the current node for the current timestep + connected_nodes_batched.append( + self.scene_graph.get_neighbors(self.node, self._get_other_node_type_from_edge(edge_type)) + ) + + if self.hyperparams["dynamic_edges"] == "yes": + # We get the edge masks for the current node at the current timestep + edge_masks_for_node = self.scene_graph.get_edge_scaling(self.node) + edge_masks_batched.append(torch.tensor(edge_masks_for_node, dtype=torch.float, device=self.device)) + + # Encode edges for given edge type + encoded_edges_type = self.encode_edge( + inputs, inputs_st, inputs_np, edge_type, connected_nodes_batched, edge_masks_batched + ) + node_edges_encoded.append(encoded_edges_type) # List of [bs/nbs, enc_rnn_dim] + + ##################### + # Encode Node Edges # + ##################### + total_edge_influence = self.encode_total_edge_influence( + mode, node_edges_encoded, node_history_encoded, batch_size + ) + + self.TD = {"node_history_encoded": node_history_encoded, "total_edge_influence": total_edge_influence} + + ################ + # Map Encoding # + ################ + if self.hyperparams["use_map_encoding"] and self.node_type in self.hyperparams["map_encoder"]: + if self.node not in maps: + # This means the node was removed (it is only being kept around because of the edge removal filter). + me_params = self.hyperparams["map_encoder"][self.node_type] + self.TD["encoded_map"] = torch.zeros((1, me_params["output_size"])) + else: + encoded_map = self.node_modules[self.node_type + "/map_encoder"]( + maps[self.node] * 2.0 - 1.0, (mode == ModeKeys.TRAIN) + ) + do = self.hyperparams["map_encoder"][self.node_type]["dropout"] + encoded_map = F.dropout(encoded_map, do, training=(mode == ModeKeys.TRAIN)) + self.TD["encoded_map"] = encoded_map + + ###################################### + # Concatenate Encoder Outputs into x # + ###################################### + return self.create_encoder_rep(mode, self.TD, x_r_t, y_r) + + def create_encoder_rep(self, mode, TD, robot_present_st, robot_future_st): + # Unpacking TD + node_history_encoded = TD["node_history_encoded"] + if self.hyperparams["edge_encoding"]: + total_edge_influence = TD["total_edge_influence"] + if self.hyperparams["use_map_encoding"] and self.node_type in self.hyperparams["map_encoder"]: + encoded_map = TD["encoded_map"] + + if ( + self.hyperparams["incl_robot_node"] + and self.robot is not None + and robot_future_st is not None + and robot_present_st is not None + ): + robot_future_encoder = self.encode_robot_future(mode, robot_present_st, robot_future_st) + + # Tiling for multiple samples + # This tiling is done because: + # a) we must consider the prediction case where there are many candidate robot future actions, + # b) the edge and history encoders are all the same regardless of which candidate future robot action + # we're evaluating. + node_history_encoded = TD["node_history_encoded"].repeat(robot_future_st.size()[0], 1) + if self.hyperparams["edge_encoding"]: + total_edge_influence = TD["total_edge_influence"].repeat(robot_future_st.size()[0], 1) + if self.hyperparams["use_map_encoding"] and self.node_type in self.hyperparams["map_encoder"]: + encoded_map = TD["encoded_map"].repeat(robot_future_st.size()[0], 1) + + elif self.hyperparams["incl_robot_node"] and self.robot is not None: + # Four times because we're trying to mimic a bi-directional RNN's output (which is c and h from both ends). + robot_future_encoder = torch.zeros([1, 4 * self.hyperparams["enc_rnn_dim_future"]], device=self.device) + + x_concat_list = list() + + # Every node has an edge-influence encoder (which could just be zero). + if self.hyperparams["edge_encoding"]: + x_concat_list.append(total_edge_influence) # [bs/nbs, 4*enc_rnn_dim] + + # Every node has a history encoder. + x_concat_list.append(node_history_encoded) # [bs/nbs, enc_rnn_dim_history] + + if self.hyperparams["incl_robot_node"] and self.robot is not None: + x_concat_list.append(robot_future_encoder) # [bs/nbs, 4*enc_rnn_dim_history] + + if self.hyperparams["use_map_encoding"] and self.node_type in self.hyperparams["map_encoder"]: + x_concat_list.append(encoded_map) # [bs/nbs, CNN output size] + + return torch.cat(x_concat_list, dim=1) + + def encode_node_history(self, inputs_st): + new_state = torch.unsqueeze(inputs_st, dim=1) # [bs, 1, state_dim] + if self.node.type + "/node_history_encoder" not in self.curr_hidden_states: + outputs, self.curr_hidden_states[self.node.type + "/node_history_encoder"] = self.node_modules[ + self.node.type + "/node_history_encoder" + ](new_state) + else: + outputs, self.curr_hidden_states[self.node.type + "/node_history_encoder"] = self.node_modules[ + self.node.type + "/node_history_encoder" + ](new_state, self.curr_hidden_states[self.node.type + "/node_history_encoder"]) + + return outputs[:, 0, :] + + def encode_edge(self, inputs, inputs_st, inputs_np, edge_type, connected_nodes, edge_masks): + edge_type_tuple = self._get_edge_type_from_str(edge_type) + edge_states_list = list() # list of [#of neighbors, max_ht, state_dim] + neighbor_states = list() + + orig_rel_state = inputs[self.node].cpu().numpy() + for node in connected_nodes[0]: + neighbor_state_np = inputs_np[node] + + # Make State relative to node + _, std = self.env.get_standardize_params(self.state[node.type], node_type=node.type) + std[0:2] = self.env.attention_radius[edge_type_tuple] + + # TODO: This all makes the unsafe assumption that the first n dims + # refer to the same quantities even for different agent types! + equal_dims = np.min((neighbor_state_np.shape[-1], orig_rel_state.shape[-1])) + rel_state = np.zeros_like(neighbor_state_np) + rel_state[..., :equal_dims] = orig_rel_state[..., :equal_dims] + neighbor_state_np_st = self.env.standardize( + neighbor_state_np, self.state[node.type], node_type=node.type, mean=rel_state, std=std + ) + + neighbor_state = torch.tensor(neighbor_state_np_st).float().to(self.device) + neighbor_states.append(neighbor_state) + + if len(neighbor_states) == 0: # There are no neighbors for edge type # TODO necessary? + neighbor_state_length = int(np.sum([len(entity_dims) for entity_dims in self.state[edge_type[1]].values()])) + edge_states_list.append(torch.zeros((1, 1, neighbor_state_length), device=self.device)) + else: + edge_states_list.append(torch.stack(neighbor_states, dim=0)) + + if self.hyperparams["edge_state_combine_method"] == "sum": + # Used in Structural-RNN to combine edges as well. + op_applied_edge_states_list = list() + for neighbors_state in edge_states_list: + op_applied_edge_states_list.append(torch.sum(neighbors_state, dim=0)) + combined_neighbors = torch.stack(op_applied_edge_states_list, dim=0) + if self.hyperparams["dynamic_edges"] == "yes": + # Should now be (bs, time, 1) + op_applied_edge_mask_list = list() + for edge_mask in edge_masks: + op_applied_edge_mask_list.append(torch.clamp(torch.sum(edge_mask, dim=0, keepdim=True), max=1.0)) + combined_edge_masks = torch.stack(op_applied_edge_mask_list, dim=0) + + elif self.hyperparams["edge_state_combine_method"] == "max": + # Used in NLP, e.g. max over word embeddings in a sentence. + op_applied_edge_states_list = list() + for neighbors_state in edge_states_list: + op_applied_edge_states_list.append(torch.max(neighbors_state, dim=0)) + combined_neighbors = torch.stack(op_applied_edge_states_list, dim=0) + if self.hyperparams["dynamic_edges"] == "yes": + # Should now be (bs, time, 1) + op_applied_edge_mask_list = list() + for edge_mask in edge_masks: + op_applied_edge_mask_list.append(torch.clamp(torch.max(edge_mask, dim=0, keepdim=True), max=1.0)) + combined_edge_masks = torch.stack(op_applied_edge_mask_list, dim=0) + + elif self.hyperparams["edge_state_combine_method"] == "mean": + # Used in NLP, e.g. mean over word embeddings in a sentence. + op_applied_edge_states_list = list() + for neighbors_state in edge_states_list: + op_applied_edge_states_list.append(torch.mean(neighbors_state, dim=0)) + combined_neighbors = torch.stack(op_applied_edge_states_list, dim=0) + if self.hyperparams["dynamic_edges"] == "yes": + # Should now be (bs, time, 1) + op_applied_edge_mask_list = list() + for edge_mask in edge_masks: + op_applied_edge_mask_list.append(torch.clamp(torch.mean(edge_mask, dim=0, keepdim=True), max=1.0)) + combined_edge_masks = torch.stack(op_applied_edge_mask_list, dim=0) + + joint_history = torch.cat([combined_neighbors, torch.unsqueeze(inputs_st[self.node], dim=0)], dim=-1) + + if edge_type + "/edge_encoder" not in self.curr_hidden_states: + outputs, self.curr_hidden_states[edge_type + "/edge_encoder"] = self.node_modules[ + edge_type + "/edge_encoder" + ](joint_history) + else: + outputs, self.curr_hidden_states[edge_type + "/edge_encoder"] = self.node_modules[ + edge_type + "/edge_encoder" + ](joint_history, self.curr_hidden_states[edge_type + "/edge_encoder"]) + + if self.hyperparams["dynamic_edges"] == "yes": + return outputs[:, 0, :] * combined_edge_masks + else: + return outputs[:, 0, :] # [bs, enc_rnn_dim] + + def encoder_forward(self, inputs, inputs_st, inputs_np, robot_present_and_future=None, maps=None): + # Always predicting with the online model. + mode = ModeKeys.PREDICT + + self.x = self.obtain_encoded_tensors(mode, inputs, inputs_st, inputs_np, robot_present_and_future, maps) + self.n_s_t0 = inputs_st[self.node] + + self.latent.p_dist = self.p_z_x(mode, self.x) + + # robot_future_st is optional here since you can use the same one from encoder_forward, + # but if it's given then we'll re-run that part of the model (if the node is adjacent to the robot). + def decoder_forward( + self, + prediction_horizon, + num_samples, + robot_present_and_future=None, + z_mode=False, + gmm_mode=False, + full_dist=False, + all_z_sep=False, + ): + # Always predicting with the online model. + mode = ModeKeys.PREDICT + + x_nr_t, y_r = None, None + if self.hyperparams["incl_robot_node"] and self.robot is not None and robot_present_and_future is not None: + our_inputs = torch.tensor( + self.node.get(np.array([self.node.last_timestep]), self.state[self.node.type], padding=0.0), + dtype=torch.float, + device=self.device, + ) + robot_present_and_future_st = get_relative_robot_traj( + self.env, self.state, our_inputs, robot_present_and_future, self.node.type, self.robot.type + ) + x_nr_t = robot_present_and_future_st[..., 0, :] + y_r = robot_present_and_future_st[..., 1:, :] + self.x = self.create_encoder_rep(mode, self.TD, x_nr_t, y_r) + self.latent.p_dist = self.p_z_x(mode, self.x) + + # Making sure n_s_t0 has the same batch size as x_nr_t + self.n_s_t0 = self.n_s_t0[[0]].repeat(x_nr_t.size()[0], 1) + + z, num_samples, num_components = self.latent.sample_p( + num_samples, mode, most_likely_z=z_mode, full_dist=full_dist, all_z_sep=all_z_sep + ) + + y_dist, our_sampled_future = self.p_y_xz( + mode, self.x, x_nr_t, y_r, self.n_s_t0, z, prediction_horizon, num_samples, num_components, gmm_mode + ) + + return y_dist, our_sampled_future diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/online/online_trajectron.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/online/online_trajectron.py new file mode 100644 index 000000000..73de7ed89 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/online/online_trajectron.py @@ -0,0 +1,343 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import torch +import numpy as np +from collections import Counter +from model.trajectron import Trajectron +from model.online.online_mgcvae import OnlineMultimodalGenerativeCVAE +from model.model_utils import ModeKeys +from environment import RingBuffer, TemporalSceneGraph, SceneGraph, derivative_of + + +class OnlineTrajectron(Trajectron): + def __init__(self, model_registrar, hyperparams, device): + super(OnlineTrajectron, self).__init__( + model_registrar=model_registrar, hyperparams=hyperparams, log_writer=False, device=device + ) + self.node_data = dict() + self.scene_graph = None + self.RING_CAPACITY = ( + max( + len(self.hyperparams["edge_removal_filter"]), + len(self.hyperparams["edge_addition_filter"]), + self.hyperparams["maximum_history_length"], + ) + + 1 + ) + self.rel_states = dict() + self.removed_nodes = Counter() + + def __repr__(self): + return f"OnlineTrajectron(# nodes: {len(self.nodes)}, device: {self.device}, hyperparameters: {str(self.hyperparams)}) " + + def _add_node_model(self, node): + if node in self.nodes: + raise ValueError("%s was already added to this graph!" % str(node)) + + self.nodes.add(node) + self.node_models_dict[node] = OnlineMultimodalGenerativeCVAE( + self.env, node, self.model_registrar, self.hyperparams, self.device + ) + + def update_removed_nodes(self): + for node in list(self.removed_nodes.keys()): + if self.removed_nodes[node] >= len(self.hyperparams["edge_removal_filter"]): + del self.node_data[node] + del self.removed_nodes[node] + + def _remove_node_model(self, node): + if node not in self.nodes: + raise ValueError("%s is not in this graph!" % str(node)) + + self.nodes.remove(node) + del self.node_models_dict[node] + + def set_environment(self, env, init_timestep=0): + self.env = env + self.scene_graph = SceneGraph(edge_radius=self.env.attention_radius) + self.nodes.clear() + self.node_data.clear() + self.node_models_dict.clear() + + # Fast-forwarding ourselves to the initial timestep, without running any of the underlying models. + for timestep in range(init_timestep + 1): + self.incremental_forward( + self.env.scenes[0].get_clipped_input_dict(timestep, self.hyperparams["state"]), + maps=None, + run_models=False, + ) + + def incremental_forward( + self, + new_inputs_dict, + maps, + prediction_horizon=0, + num_samples=0, + robot_present_and_future=None, + z_mode=False, + gmm_mode=False, + full_dist=False, + all_z_sep=False, + run_models=True, + ): + # The way this function works is by appending the new datapoints to the + # ends of each of the LSTMs in the graph. Then, we recalculate the + # encoder's output vector h_x and feed that into the decoder to sample new outputs. + mode = ModeKeys.PREDICT + + # No grad since we're predicting always, as evidenced by the line above. + with torch.no_grad(): + for node, new_input in new_inputs_dict.items(): + if node not in self.node_data: + self.node_data[node] = RingBuffer( + capacity=self.RING_CAPACITY, + dtype=(float, sum(len(self.state[node.type][k]) for k in self.state[node.type])), + ) + self.node_data[node].append(new_input) + + if node in self.removed_nodes: + del self.removed_nodes[node] + + # Nodes in self.node_data that aren't in new_inputs_dict were just removed. + newly_removed_nodes = (set(self.node_data.keys()) - set(self.removed_nodes.keys())) - set( + new_inputs_dict.keys() + ) + + # We update self.removed_nodes with the newly removed nodes as well as all existing removed nodes to get + # the time since their last removal increased by one. + self.removed_nodes.update(newly_removed_nodes | set(self.removed_nodes.keys())) + + # For any nodes that are older than the length of the edge_removal_filter, we can safely clear their data. + self.update_removed_nodes() + + # Any remaining removed nodes that aren't yet old enough for data clearing simply have NaNs appended so + # that when it's passed through the LSTMs, the hidden state keeps propagating but the input plays no role + # (the NaNs get converted to zeros later on). + for node in self.removed_nodes: + self.node_data[node].append(np.full((1, self.node_data[node].shape[1]), np.nan)) + + for node in self.node_data: + node.overwrite_data( + self.node_data[node], + None, + forward_in_time_on_next_overwrite=(self.node_data[node].shape[0] == self.RING_CAPACITY), + ) + + temp_scene_dict = {k: v[:, 0:2] for k, v in self.node_data.items()} + if not temp_scene_dict: + new_scene_graph = SceneGraph(edge_radius=self.env.attention_radius) + else: + new_scene_graph = TemporalSceneGraph.create_from_temp_scene_dict( + temp_scene_dict, + self.env.attention_radius, + duration=self.RING_CAPACITY, + edge_addition_filter=self.hyperparams["edge_addition_filter"], + edge_removal_filter=self.hyperparams["edge_removal_filter"], + online=True, + ).to_scene_graph(t=self.RING_CAPACITY - 1) + + if self.hyperparams["dynamic_edges"] == "yes": + new_nodes, removed_nodes, new_neighbors, removed_neighbors = new_scene_graph - self.scene_graph + + # Aside from updating the scene graph, this for loop updates the graph model + # structure of all affected nodes. + not_removed_nodes = [node for node in self.nodes if node not in removed_nodes] + self.scene_graph = new_scene_graph + for node in not_removed_nodes: + self.node_models_dict[node].update_graph(new_scene_graph, new_neighbors, removed_neighbors) + + # These next 2 for loops add or remove entire node models. + for node in new_nodes: + if ( + node.is_robot and self.hyperparams["incl_robot_node"] + ) or node.type not in self.pred_state.keys(): + # Only deal with Models for NodeTypes we want to predict + continue + + self._add_node_model(node) + self.node_models_dict[node].update_graph(new_scene_graph, new_neighbors, removed_neighbors) + + for node in removed_nodes: + if ( + node.is_robot and self.hyperparams["incl_robot_node"] + ) or node.type not in self.pred_state.keys(): + continue + + self._remove_node_model(node) + + # This actually updates the node models with the newly observed data. + if run_models: + inputs = dict() + inputs_st = dict() + inputs_np = dict() + + iter_list = list(self.node_models_dict.keys()) + [ + node for node in new_inputs_dict if node.type not in self.pred_state.keys() + ] + if self.env.scenes[0].robot is not None: + iter_list.append(self.env.scenes[0].robot) + + for node in iter_list: + input_np = node.get(np.array([node.last_timestep, node.last_timestep]), self.state[node.type]) + + _, std = self.env.get_standardize_params(self.state[node.type.name], node.type) + std[0:2] = self.env.attention_radius[(node.type, node.type)] + rel_state = np.zeros_like(input_np) + rel_state[:, 0:2] = input_np[:, 0:2] + input_st = self.env.standardize(input_np, self.state[node.type.name], node.type, mean=rel_state) + self.rel_states[node] = rel_state + + # Converting NaNs to zeros. + input_np[np.isnan(input_np)] = 0 + input_st[np.isnan(input_st)] = 0 + + # Convert to torch tensors + inputs[node] = torch.tensor(input_np, dtype=torch.float, device=self.device) + inputs_st[node] = torch.tensor(input_st, dtype=torch.float, device=self.device) + inputs_np[node] = input_np + + # We want tensors of shape (1, ph + 1, state_dim) where the first 1 is the batch size. + if ( + self.hyperparams["incl_robot_node"] + and self.env.scenes[0].robot is not None + and robot_present_and_future is not None + ): + if len(robot_present_and_future.shape) == 2: + robot_present_and_future = robot_present_and_future[np.newaxis, :] + + assert robot_present_and_future.shape[1] == prediction_horizon + 1 + robot_present_and_future = torch.tensor( + robot_present_and_future, dtype=torch.float, device=self.device + ) + + for node in self.node_models_dict: + self.node_models_dict[node].encoder_forward( + inputs, inputs_st, inputs_np, robot_present_and_future, maps + ) + + # If num_predicted_timesteps or num_samples == 0 then do not run the decoder at all, + # just update the encoder LSTMs. + if prediction_horizon == 0 or num_samples == 0: + return + + return self.sample_model( + prediction_horizon, + num_samples, + robot_present_and_future=robot_present_and_future, + z_mode=z_mode, + gmm_mode=gmm_mode, + full_dist=full_dist, + all_z_sep=all_z_sep, + ) + + def _run_decoder( + self, + node, + num_predicted_timesteps, + num_samples, + robot_present_and_future=None, + z_mode=False, + gmm_mode=False, + full_dist=False, + all_z_sep=False, + ): + model = self.node_models_dict[node] + prediction_dist, predictions_uns = model.decoder_forward( + num_predicted_timesteps, + num_samples, + robot_present_and_future=robot_present_and_future, + z_mode=z_mode, + gmm_mode=gmm_mode, + full_dist=full_dist, + all_z_sep=all_z_sep, + ) + + predictions_np = predictions_uns.cpu().detach().numpy() + + # Return will be of shape (batch_size, num_samples, num_predicted_timesteps, 2) + return prediction_dist, np.transpose(predictions_np, (1, 0, 2, 3)) + + def sample_model( + self, + num_predicted_timesteps, + num_samples, + robot_present_and_future=None, + z_mode=False, + gmm_mode=False, + full_dist=False, + all_z_sep=False, + ): + # Just start from the encoder output (minus the + # robot future) and get num_samples of + # num_predicted_timesteps-length trajectories. + if num_predicted_timesteps == 0 or num_samples == 0: + return + + mode = ModeKeys.PREDICT + + # We want tensors of shape (1, ph + 1, state_dim) where the first 1 is the batch size. + if ( + self.hyperparams["incl_robot_node"] + and self.env.scenes[0].robot is not None + and robot_present_and_future is not None + ): + if len(robot_present_and_future.shape) == 2: + robot_present_and_future = robot_present_and_future[np.newaxis, :] + + assert robot_present_and_future.shape[1] == num_predicted_timesteps + 1 + + # No grad since we're predicting always, as evidenced by the line above. + with torch.no_grad(): + predictions_dict = dict() + prediction_dists = dict() + for node in set(self.nodes) - set(self.removed_nodes.keys()): + if node.is_robot: + continue + + prediction_dists[node], predictions_dict[node] = self._run_decoder( + node, + num_predicted_timesteps, + num_samples, + robot_present_and_future, + z_mode, + gmm_mode, + full_dist, + all_z_sep, + ) + + return prediction_dists, predictions_dict + + def forward( + self, + init_env, + init_timestep, + input_dicts, # After the initial environment + num_predicted_timesteps, + num_samples, + robot_present_and_future=None, + z_mode=False, + gmm_mode=False, + full_dist=False, + all_z_sep=False, + ): + # This is the standard forward prediction function, + # if you have some historical data and just want to + # predict forward some number of timesteps. + + # Setting us back to the initial scene graph we had. + self.set_environment(init_env, init_timestep) + + # Looping through and applying updates to the model. + for i in range(len(input_dicts)): + self.incremental_forward(input_dicts[i]) + + return self.sample_model( + num_predicted_timesteps, + num_samples, + robot_present_and_future=robot_present_and_future, + z_mode=z_mode, + gmm_mode=gmm_mode, + full_dist=full_dist, + all_z_sep=all_z_sep, + ) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/trajectron.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/trajectron.py new file mode 100644 index 000000000..333a6b671 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model/trajectron.py @@ -0,0 +1,241 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import torch +import numpy as np +from model.mgcvae import MultimodalGenerativeCVAE +from model.dataset import get_timesteps_data, restore + + +class Trajectron(torch.nn.Module): + def __init__(self, model_registrar, hyperparams, log_writer, device): + super(Trajectron, self).__init__() + self.hyperparams = hyperparams + self.log_writer = log_writer + self.device = device + self.curr_iter = 0 + + self.model_registrar = model_registrar + # self.node_models_dict = dict() + self.node_models_dict = torch.nn.ModuleDict() + self.nodes = set() + + self.env = None + + self.min_ht = self.hyperparams["minimum_history_length"] + self.max_ht = self.hyperparams["maximum_history_length"] + self.ph = self.hyperparams["prediction_horizon"] + self.state = self.hyperparams["state"] + self.state_length = dict() + for state_type in self.state.keys(): + self.state_length[state_type] = int( + np.sum([len(entity_dims) for entity_dims in self.state[state_type].values()]) + ) + self.pred_state = self.hyperparams["pred_state"] + + def eval(self): + super().eval() + for key in self.node_models_dict.keys(): + self.node_models_dict[key].eval() + + def set_environment(self, env): + self.env = env + + self.node_models_dict.clear() + edge_types = env.get_edge_types() + + for node_type in env.NodeType: + # Only add a Model for NodeTypes we want to predict + if node_type in self.pred_state.keys(): + self.node_models_dict[str(node_type)] = MultimodalGenerativeCVAE( + env, + node_type, + self.model_registrar, + self.hyperparams, + self.device, + edge_types, + log_writer=self.log_writer, + ) + + def set_curr_iter(self, curr_iter): + self.curr_iter = curr_iter + for node_str, model in self.node_models_dict.items(): + model.set_curr_iter(curr_iter) + + def set_annealing_params(self): + for node_str, model in self.node_models_dict.items(): + model.set_annealing_params() + + def step_annealers(self, node_type=None): + if node_type is None: + for node_type in self.node_models_dict: + self.node_models_dict[node_type].step_annealers() + else: + self.node_models_dict[node_type].step_annealers() + + def train_loss(self, batch, node_type): + ( + first_history_index, + x_t, + y_t, + x_st_t, + y_st_t, + neighbors_data_st, + neighbors_edge_value, + robot_traj_st_t, + map, + ) = batch + + x = x_t.to(self.device) + y = y_t.to(self.device) + x_st_t = x_st_t.to(self.device) + y_st_t = y_st_t.to(self.device) + if robot_traj_st_t is not None: + robot_traj_st_t = robot_traj_st_t.to(self.device) + if type(map) == torch.Tensor: + map = map.to(self.device) + + # Run forward pass + model = self.node_models_dict[node_type] + loss = model.train_loss( + inputs=x, + inputs_st=x_st_t, + first_history_indices=first_history_index, + labels=y, + labels_st=y_st_t, + neighbors=restore(neighbors_data_st), + neighbors_edge_value=restore(neighbors_edge_value), + robot=robot_traj_st_t, + map=map, + prediction_horizon=self.ph, + ) + + return loss + + def eval_loss(self, batch, node_type): + ( + first_history_index, + x_t, + y_t, + x_st_t, + y_st_t, + neighbors_data_st, + neighbors_edge_value, + robot_traj_st_t, + map, + ) = batch + + x = x_t.to(self.device) + y = y_t.to(self.device) + x_st_t = x_st_t.to(self.device) + y_st_t = y_st_t.to(self.device) + if robot_traj_st_t is not None: + robot_traj_st_t = robot_traj_st_t.to(self.device) + if type(map) == torch.Tensor: + map = map.to(self.device) + + # Run forward pass + model = self.node_models_dict[node_type] + nll = model.eval_loss( + inputs=x, + inputs_st=x_st_t, + first_history_indices=first_history_index, + labels=y, + labels_st=y_st_t, + neighbors=restore(neighbors_data_st), + neighbors_edge_value=restore(neighbors_edge_value), + robot=robot_traj_st_t, + map=map, + prediction_horizon=self.ph, + ) + + return nll.cpu().detach().numpy() + + def predict( + self, + scene, + timesteps, + ph, + num_samples=1, + min_future_timesteps=0, + min_history_timesteps=1, + z_mode=False, + gmm_mode=False, + full_dist=True, + all_z_sep=False, + ): + + predictions_dict = {} + for node_type in self.env.NodeType: + if node_type not in self.pred_state: + continue + + model = self.node_models_dict[node_type] + + # Get Input data for node type and given timesteps + batch = get_timesteps_data( + env=self.env, + scene=scene, + t=timesteps, + node_type=node_type, + state=self.state, + pred_state=self.pred_state, + edge_types=model.edge_types, + min_ht=min_history_timesteps, + max_ht=self.max_ht, + min_ft=min_future_timesteps, + max_ft=min_future_timesteps, + hyperparams=self.hyperparams, + ) + # There are no nodes of type present for timestep + if batch is None: + continue + ( + ( + first_history_index, + x_t, + y_t, + x_st_t, + y_st_t, + neighbors_data_st, + neighbors_edge_value, + robot_traj_st_t, + map, + ), + nodes, + timesteps_o, + ) = batch + + x = x_t.to(self.device) + x_st_t = x_st_t.to(self.device) + if robot_traj_st_t is not None: + robot_traj_st_t = robot_traj_st_t.to(self.device) + if type(map) == torch.Tensor: + map = map.to(self.device) + + # Run forward pass + predictions = model.predict( + inputs=x, + inputs_st=x_st_t, + first_history_indices=first_history_index, + neighbors=neighbors_data_st, + neighbors_edge_value=neighbors_edge_value, + robot=robot_traj_st_t, + map=map, + prediction_horizon=ph, + num_samples=num_samples, + z_mode=z_mode, + gmm_mode=gmm_mode, + full_dist=full_dist, + all_z_sep=all_z_sep, + ) + + predictions_np = predictions.cpu().detach().numpy() + + # Assign predictions to node + for i, ts in enumerate(timesteps_o): + if ts not in predictions_dict.keys(): + predictions_dict[ts] = dict() + predictions_dict[ts][nodes[i]] = np.transpose(predictions_np[:, [i]], (1, 0, 2, 3)) + + return predictions_dict diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/model_dir/config.json b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model_dir/config.json new file mode 100644 index 000000000..bf417f081 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/model_dir/config.json @@ -0,0 +1 @@ +{"batch_size": 256, "grad_clip": 1.0, "learning_rate_style": "exp", "learning_rate": 0.001, "min_learning_rate": 1e-05, "learning_decay_rate": 0.9999, "prediction_horizon": 12, "minimum_history_length": 1, "maximum_history_length": 8, "map_encoder": {"PEDESTRIAN": {"heading_state_index": 5, "patch_size": [50, 10, 50, 90], "map_channels": 3, "hidden_channels": [10, 20, 10, 1], "output_size": 32, "masks": [5, 5, 5, 5], "strides": [1, 1, 1, 1], "dropout": 0.5}}, "k": 1, "k_eval": 1, "kl_min": 0.07, "kl_weight": 100.0, "kl_weight_start": 0, "kl_decay_rate": 0.99995, "kl_crossover": 400, "kl_sigmoid_divisor": 4, "rnn_kwargs": {"dropout_keep_prob": 0.75}, "MLP_dropout_keep_prob": 0.9, "enc_rnn_dim_edge": 32, "enc_rnn_dim_edge_influence": 32, "enc_rnn_dim_history": 32, "enc_rnn_dim_future": 32, "dec_rnn_dim": 128, "q_z_xy_MLP_dims": null, "p_z_x_MLP_dims": 32, "GMM_components": 1, "log_p_yt_xz_max": 6, "N": 1, "K": 25, "tau_init": 2.0, "tau_final": 0.05, "tau_decay_rate": 0.997, "use_z_logit_clipping": true, "z_logit_clip_start": 0.05, "z_logit_clip_final": 5.0, "z_logit_clip_crossover": 300, "z_logit_clip_divisor": 5, "dynamic": {"PEDESTRIAN": {"name": "SingleIntegrator", "distribution": true, "limits": {}}}, "state": {"PEDESTRIAN": {"position": ["x", "y"], "velocity": ["x", "y"], "acceleration": ["x", "y"]}}, "pred_state": {"PEDESTRIAN": {"position": ["x", "y"]}}, "log_histograms": false, "scene_freq_mult_eval": false, "node_freq_mult_eval": false, "edge_encoding": false, "incl_robot_node": false, "use_map_encoding": false} diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/test/__init__.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/test/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/test/test_data_structures.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/test/test_data_structures.py new file mode 100644 index 000000000..e840fda99 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/test/test_data_structures.py @@ -0,0 +1,46 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import numpy as np +import pandas as pd +from data import SingleHeaderNumpyArray, DoubleHeaderNumpyArray + + +def test_single_header_numpy_array(): + x = np.random.rand(10) + y = np.random.rand(10) + + array = SingleHeaderNumpyArray(np.stack((x, y), axis=-1), ["x", "y"]) + + assert (array[:, "x"] == x).all() + assert (array[:, "y"] == y).all() + assert (array[3:7, "y"] == y[3:7]).all() + assert (array.x == x).all() + assert (array.y == y).all() + + +def test_double_header_numpy_array(): + x = np.random.rand(10) + y = np.random.rand(10) + vx = np.random.rand(10) + vy = np.random.rand(10) + + data_dict = {("position", "x"): x, ("position", "y"): y, ("velocity", "x"): vx, ("velocity", "y"): vy} + + data_columns = pd.MultiIndex.from_product([["position", "velocity"], ["x", "y"]]) + + node_data = pd.DataFrame(data_dict, columns=data_columns) + + array = DoubleHeaderNumpyArray(node_data.values, list(node_data.columns)) + + test_header_dict = {"position": ["x", "y"], "velocity": ["y"]} + + assert (array[:, ("position", "x")] == x).all() + assert (array[:, ("velocity", "y")] == vy).all() + assert (array[4:7, ("velocity", "y")] == vy[4:7]).all() + assert (array[:, [("position", "x"), ("velocity", "y")]] == np.stack((x, vy), axis=-1)).all() + assert (array[:, [("position", "y"), ("velocity", "x")]] == np.stack((y, vx), axis=-1)).all() + assert (array[2:6, [("position", "y"), ("velocity", "x")]] == np.stack((y, vx), axis=-1)[2:6]).all() + assert (array[:, test_header_dict] == np.stack((x, y, vy), axis=-1)).all() + assert (array[1:8, test_header_dict] == np.stack((x, y, vy), axis=-1)[1:8]).all() + assert (array.position.x == x).all() diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/test_online.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/test_online.py new file mode 100644 index 000000000..123b8e87e --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/test_online.py @@ -0,0 +1,252 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import os +import time +import json +import torch +import dill +import random +import pathlib +import evaluation +import numpy as np +import visualization as vis +from argument_parser import args +from model.online.online_trajectron import OnlineTrajectron +from model.model_registrar import ModelRegistrar +from environment import Environment, Scene +import matplotlib.pyplot as plt + +if not torch.cuda.is_available() or args.device == "cpu": + args.device = torch.device("cpu") +else: + if torch.cuda.device_count() == 1: + # If you have CUDA_VISIBLE_DEVICES set, which you should, + # then this will prevent leftover flag arguments from + # messing with the device allocation. + args.device = "cuda:0" + + args.device = torch.device(args.device) + +if args.eval_device is None: + args.eval_device = "cpu" + +if args.seed is not None: + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(args.seed) + + +def create_online_env(env, hyperparams, scene_idx, init_timestep): + test_scene = env.scenes[scene_idx] + + online_scene = Scene(timesteps=init_timestep + 1, map=test_scene.map, dt=test_scene.dt) + online_scene.nodes = test_scene.get_nodes_clipped_at_time( + timesteps=np.arange(init_timestep - hyperparams["maximum_history_length"], init_timestep + 1), + state=hyperparams["state"], + ) + online_scene.robot = test_scene.robot + online_scene.calculate_scene_graph( + attention_radius=env.attention_radius, + edge_addition_filter=hyperparams["edge_addition_filter"], + edge_removal_filter=hyperparams["edge_removal_filter"], + ) + + return Environment( + node_type_list=env.node_type_list, + standardization=env.standardization, + scenes=[online_scene], + attention_radius=env.attention_radius, + robot_type=env.robot_type, + ) + + +def get_maps_for_input(input_dict, scene, hyperparams): + scene_maps = list() + scene_pts = list() + heading_angles = list() + patch_sizes = list() + nodes_with_maps = list() + for node in input_dict: + if node.type in hyperparams["map_encoder"]: + x = input_dict[node] + me_hyp = hyperparams["map_encoder"][node.type] + if "heading_state_index" in me_hyp: + heading_state_index = me_hyp["heading_state_index"] + # We have to rotate the map in the opposit direction of the agent to match them + if type(heading_state_index) is list: # infer from velocity or heading vector + heading_angle = ( + -np.arctan2(x[-1, heading_state_index[1]], x[-1, heading_state_index[0]]) * 180 / np.pi + ) + else: + heading_angle = -x[-1, heading_state_index] * 180 / np.pi + else: + heading_angle = None + + scene_map = scene.map[node.type] + map_point = x[-1, :2] + + patch_size = hyperparams["map_encoder"][node.type]["patch_size"] + + scene_maps.append(scene_map) + scene_pts.append(map_point) + heading_angles.append(heading_angle) + patch_sizes.append(patch_size) + nodes_with_maps.append(node) + + if heading_angles[0] is None: + heading_angles = None + else: + heading_angles = torch.Tensor(heading_angles) + + maps = scene_maps[0].get_cropped_maps_from_scene_map_batch( + scene_maps, scene_pts=torch.Tensor(scene_pts), patch_size=patch_sizes[0], rotation=heading_angles + ) + + maps_dict = {node: maps[[i]] for i, node in enumerate(nodes_with_maps)} + return maps_dict + + +def main(): + # Choose one of the model directory names under the experiment/*/models folders. + # Possibilities are 'vel_ee', 'int_ee', 'int_ee_me', or 'robot' + model_dir = os.path.join(args.log_dir, "int_ee") + + # Load hyperparameters from json + config_file = os.path.join(model_dir, args.conf) + if not os.path.exists(config_file): + raise ValueError("Config json not found!") + with open(config_file, "r") as conf_json: + hyperparams = json.load(conf_json) + + # Add hyperparams from arguments + hyperparams["dynamic_edges"] = args.dynamic_edges + hyperparams["edge_state_combine_method"] = args.edge_state_combine_method + hyperparams["edge_influence_combine_method"] = args.edge_influence_combine_method + hyperparams["edge_addition_filter"] = args.edge_addition_filter + hyperparams["edge_removal_filter"] = args.edge_removal_filter + hyperparams["batch_size"] = args.batch_size + hyperparams["k_eval"] = args.k_eval + hyperparams["offline_scene_graph"] = args.offline_scene_graph + hyperparams["incl_robot_node"] = args.incl_robot_node + hyperparams["edge_encoding"] = not args.no_edge_encoding + hyperparams["use_map_encoding"] = args.map_encoding + + output_save_dir = os.path.join(model_dir, "pred_figs") + pathlib.Path(output_save_dir).mkdir(parents=True, exist_ok=True) + + eval_data_path = os.path.join(args.data_dir, args.eval_data_dict) + with open(eval_data_path, "rb") as f: + eval_env = dill.load(f, encoding="latin1") + + if eval_env.robot_type is None and hyperparams["incl_robot_node"]: + eval_env.robot_type = eval_env.NodeType[0] # TODO: Make more general, allow the user to specify? + for scene in eval_env.scenes: + scene.add_robot_from_nodes(eval_env.robot_type) + + print("Loaded data from %s" % (eval_data_path,)) + + # Creating a dummy environment with a single scene that contains information about the world. + # When using this code, feel free to use whichever scene index or initial timestep you wish. + scene_idx = 0 + + # You need to have at least acceleration, so you want 2 timesteps of prior data, e.g. [0, 1], + # so that you can immediately start incremental inference from the 3rd timestep onwards. + init_timestep = 1 + + eval_scene = eval_env.scenes[scene_idx] + online_env = create_online_env(eval_env, hyperparams, scene_idx, init_timestep) + + model_registrar = ModelRegistrar(model_dir, args.eval_device) + model_registrar.load_models(iter_num=12) + + trajectron = OnlineTrajectron(model_registrar, hyperparams, args.eval_device) + + # If you want to see what different robot futures do to the predictions, uncomment this line as well as + # related "... += adjustment" lines below. + # adjustment = np.stack([np.arange(13)/float(i*2.0) for i in range(6, 12)], axis=1) + + # Here's how you'd incrementally run the model, e.g. with streaming data. + trajectron.set_environment(online_env, init_timestep) + + for timestep in range(init_timestep + 1, eval_scene.timesteps): + input_dict = eval_scene.get_clipped_input_dict(timestep, hyperparams["state"]) + + maps = None + if hyperparams["use_map_encoding"]: + maps = get_maps_for_input(input_dict, eval_scene, hyperparams) + + robot_present_and_future = None + if eval_scene.robot is not None and hyperparams["incl_robot_node"]: + robot_present_and_future = eval_scene.robot.get( + np.array([timestep, timestep + hyperparams["prediction_horizon"]]), + hyperparams["state"][eval_scene.robot.type], + padding=0.0, + ) + robot_present_and_future = np.stack([robot_present_and_future, robot_present_and_future], axis=0) + # robot_present_and_future += adjustment + + start = time.time() + dists, preds = trajectron.incremental_forward( + input_dict, + maps, + prediction_horizon=6, + num_samples=1, + robot_present_and_future=robot_present_and_future, + full_dist=True, + ) + end = time.time() + print( + "t=%d: took %.2f s (= %.2f Hz) w/ %d nodes and %d edges" + % ( + timestep, + end - start, + 1.0 / (end - start), + len(trajectron.nodes), + trajectron.scene_graph.get_num_edges(), + ) + ) + + detailed_preds_dict = dict() + for node in eval_scene.nodes: + if node in preds: + detailed_preds_dict[node] = preds[node] + + fig, ax = plt.subplots() + vis.visualize_distribution(ax, dists) + vis.visualize_prediction( + ax, + {timestep: preds}, + eval_scene.dt, + hyperparams["maximum_history_length"], + hyperparams["prediction_horizon"], + ) + + if eval_scene.robot is not None and hyperparams["incl_robot_node"]: + robot_for_plotting = eval_scene.robot.get( + np.array([timestep, timestep + hyperparams["prediction_horizon"]]), + hyperparams["state"][eval_scene.robot.type], + ) + # robot_for_plotting += adjustment + + ax.plot(robot_for_plotting[1:, 1], robot_for_plotting[1:, 0], color="r", linewidth=1.0, alpha=1.0) + + # Current Node Position + circle = plt.Circle( + (robot_for_plotting[0, 1], robot_for_plotting[0, 0]), + 0.3, + facecolor="r", + edgecolor="k", + lw=0.5, + zorder=3, + ) + ax.add_artist(circle) + + fig.savefig(os.path.join(output_save_dir, f"pred_{timestep}.pdf"), dpi=300) + plt.close(fig) + + +if __name__ == "__main__": + main() diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/train.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/train.py new file mode 100644 index 000000000..274e3e0de --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/train.py @@ -0,0 +1,452 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import torch +from torch import nn, optim, utils +import numpy as np +import os +import time +import dill +import json +import random +import pathlib +import warnings +from tqdm import tqdm +import visualization +import evaluation +import matplotlib.pyplot as plt +from argument_parser import args +from model.trajectron import Trajectron +from model.model_registrar import ModelRegistrar +from model.model_utils import cyclical_lr +from model.dataset import EnvironmentDataset, collate +from tensorboardX import SummaryWriter + +# torch.autograd.set_detect_anomaly(True) + +if not torch.cuda.is_available() or args.device == "cpu": + args.device = torch.device("cpu") +else: + if torch.cuda.device_count() == 1: + # If you have CUDA_VISIBLE_DEVICES set, which you should, + # then this will prevent leftover flag arguments from + # messing with the device allocation. + args.device = "cuda:0" + + args.device = torch.device(args.device) + +if args.eval_device is None: + args.eval_device = torch.device("cpu") + +# This is needed for memory pinning using a DataLoader (otherwise memory is pinned to cuda:0 by default) +torch.cuda.set_device(args.device) + +if args.seed is not None: + random.seed(args.seed) + np.random.seed(args.seed) + torch.manual_seed(args.seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(args.seed) + + +def main(): + # Load hyperparameters from json + if not os.path.exists(args.conf): + print("Config json not found!") + with open(args.conf, "r", encoding="utf-8") as conf_json: + hyperparams = json.load(conf_json) + + # Add hyperparams from arguments + hyperparams["dynamic_edges"] = args.dynamic_edges + hyperparams["edge_state_combine_method"] = args.edge_state_combine_method + hyperparams["edge_influence_combine_method"] = args.edge_influence_combine_method + hyperparams["edge_addition_filter"] = args.edge_addition_filter + hyperparams["edge_removal_filter"] = args.edge_removal_filter + hyperparams["batch_size"] = args.batch_size + hyperparams["k_eval"] = args.k_eval + hyperparams["offline_scene_graph"] = args.offline_scene_graph + hyperparams["incl_robot_node"] = args.incl_robot_node + hyperparams["node_freq_mult_train"] = args.node_freq_mult_train + hyperparams["node_freq_mult_eval"] = args.node_freq_mult_eval + hyperparams["scene_freq_mult_train"] = args.scene_freq_mult_train + hyperparams["scene_freq_mult_eval"] = args.scene_freq_mult_eval + hyperparams["scene_freq_mult_viz"] = args.scene_freq_mult_viz + hyperparams["edge_encoding"] = not args.no_edge_encoding + hyperparams["use_map_encoding"] = args.map_encoding + hyperparams["augment"] = args.augment + hyperparams["override_attention_radius"] = args.override_attention_radius + + print("-----------------------") + print("| TRAINING PARAMETERS |") + print("-----------------------") + print("| batch_size: %d" % args.batch_size) + print("| device: %s" % args.device) + print("| eval_device: %s" % args.eval_device) + print("| Offline Scene Graph Calculation: %s" % args.offline_scene_graph) + print("| EE state_combine_method: %s" % args.edge_state_combine_method) + print("| EIE scheme: %s" % args.edge_influence_combine_method) + print("| dynamic_edges: %s" % args.dynamic_edges) + print("| robot node: %s" % args.incl_robot_node) + print("| edge_addition_filter: %s" % args.edge_addition_filter) + print("| edge_removal_filter: %s" % args.edge_removal_filter) + print("| MHL: %s" % hyperparams["minimum_history_length"]) + print("| PH: %s" % hyperparams["prediction_horizon"]) + print("-----------------------") + + log_writer = None + model_dir = None + if not args.debug: + # Create the log and model directiory if they're not present. + model_dir = os.path.join( + args.log_dir, "models_" + time.strftime("%d_%b_%Y_%H_%M_%S", time.localtime()) + args.log_tag + ) + pathlib.Path(model_dir).mkdir(parents=True, exist_ok=True) + + # Save config to model directory + with open(os.path.join(model_dir, "config.json"), "w") as conf_json: + json.dump(hyperparams, conf_json) + + log_writer = SummaryWriter(log_dir=model_dir) + + # Load training and evaluation environments and scenes + train_scenes = [] + train_data_path = os.path.join(args.data_dir, args.train_data_dict) + with open(train_data_path, "rb") as f: + train_env = dill.load(f, encoding="latin1") + + for attention_radius_override in args.override_attention_radius: + node_type1, node_type2, attention_radius = attention_radius_override.split(" ") + train_env.attention_radius[(node_type1, node_type2)] = float(attention_radius) + + if train_env.robot_type is None and hyperparams["incl_robot_node"]: + train_env.robot_type = train_env.NodeType[0] # TODO: Make more general, allow the user to specify? + for scene in train_env.scenes: + scene.add_robot_from_nodes(train_env.robot_type) + + train_scenes = train_env.scenes + train_scenes_sample_probs = train_env.scenes_freq_mult_prop if args.scene_freq_mult_train else None + + train_dataset = EnvironmentDataset( + train_env, + hyperparams["state"], + hyperparams["pred_state"], + scene_freq_mult=hyperparams["scene_freq_mult_train"], + node_freq_mult=hyperparams["node_freq_mult_train"], + hyperparams=hyperparams, + min_history_timesteps=hyperparams["minimum_history_length"], + min_future_timesteps=hyperparams["prediction_horizon"], + return_robot=not args.incl_robot_node, + ) + train_data_loader = dict() + for node_type_data_set in train_dataset: + if len(node_type_data_set) == 0: + continue + + node_type_dataloader = utils.data.DataLoader( + node_type_data_set, + collate_fn=collate, + pin_memory=False if args.device is "cpu" else True, + batch_size=args.batch_size, + shuffle=True, + num_workers=args.preprocess_workers, + ) + train_data_loader[node_type_data_set.node_type] = node_type_dataloader + + print(f"Loaded training data from {train_data_path}") + + eval_scenes = [] + eval_scenes_sample_probs = None + if args.eval_every is not None: + eval_data_path = os.path.join(args.data_dir, args.eval_data_dict) + with open(eval_data_path, "rb") as f: + eval_env = dill.load(f, encoding="latin1") + + for attention_radius_override in args.override_attention_radius: + node_type1, node_type2, attention_radius = attention_radius_override.split(" ") + eval_env.attention_radius[(node_type1, node_type2)] = float(attention_radius) + + if eval_env.robot_type is None and hyperparams["incl_robot_node"]: + eval_env.robot_type = eval_env.NodeType[0] # TODO: Make more general, allow the user to specify? + for scene in eval_env.scenes: + scene.add_robot_from_nodes(eval_env.robot_type) + + eval_scenes = eval_env.scenes + eval_scenes_sample_probs = eval_env.scenes_freq_mult_prop if args.scene_freq_mult_eval else None + + eval_dataset = EnvironmentDataset( + eval_env, + hyperparams["state"], + hyperparams["pred_state"], + scene_freq_mult=hyperparams["scene_freq_mult_eval"], + node_freq_mult=hyperparams["node_freq_mult_eval"], + hyperparams=hyperparams, + min_history_timesteps=hyperparams["minimum_history_length"], + min_future_timesteps=hyperparams["prediction_horizon"], + return_robot=not args.incl_robot_node, + ) + eval_data_loader = dict() + for node_type_data_set in eval_dataset: + if len(node_type_data_set) == 0: + continue + + node_type_dataloader = utils.data.DataLoader( + node_type_data_set, + collate_fn=collate, + pin_memory=False if args.eval_device is "cpu" else True, + batch_size=args.eval_batch_size, + shuffle=True, + num_workers=args.preprocess_workers, + ) + eval_data_loader[node_type_data_set.node_type] = node_type_dataloader + + print(f"Loaded evaluation data from {eval_data_path}") + + # Offline Calculate Scene Graph + if hyperparams["offline_scene_graph"] == "yes": + print(f"Offline calculating scene graphs") + for i, scene in enumerate(train_scenes): + scene.calculate_scene_graph( + train_env.attention_radius, hyperparams["edge_addition_filter"], hyperparams["edge_removal_filter"] + ) + print(f"Created Scene Graph for Training Scene {i}") + + for i, scene in enumerate(eval_scenes): + scene.calculate_scene_graph( + eval_env.attention_radius, hyperparams["edge_addition_filter"], hyperparams["edge_removal_filter"] + ) + print(f"Created Scene Graph for Evaluation Scene {i}") + + model_registrar = ModelRegistrar(model_dir, args.device) + + trajectron = Trajectron(model_registrar, hyperparams, log_writer, args.device) + + trajectron.set_environment(train_env) + trajectron.set_annealing_params() + print("Created Training Model.") + + eval_trajectron = None + if args.eval_every is not None or args.vis_every is not None: + eval_trajectron = Trajectron(model_registrar, hyperparams, log_writer, args.eval_device) + eval_trajectron.set_environment(eval_env) + eval_trajectron.set_annealing_params() + print("Created Evaluation Model.") + + optimizer = dict() + lr_scheduler = dict() + for node_type in train_env.NodeType: + if node_type not in hyperparams["pred_state"]: + continue + optimizer[node_type] = optim.Adam( + [ + {"params": model_registrar.get_all_but_name_match("map_encoder").parameters()}, + {"params": model_registrar.get_name_match("map_encoder").parameters(), "lr": 0.0008}, + ], + lr=hyperparams["learning_rate"], + ) + # Set Learning Rate + if hyperparams["learning_rate_style"] == "const": + lr_scheduler[node_type] = optim.lr_scheduler.ExponentialLR(optimizer[node_type], gamma=1.0) + elif hyperparams["learning_rate_style"] == "exp": + lr_scheduler[node_type] = optim.lr_scheduler.ExponentialLR( + optimizer[node_type], gamma=hyperparams["learning_decay_rate"] + ) + + ################################# + # TRAINING # + ################################# + curr_iter_node_type = {node_type: 0 for node_type in train_data_loader.keys()} + for epoch in range(1, args.train_epochs + 1): + model_registrar.to(args.device) + train_dataset.augment = args.augment + for node_type, data_loader in train_data_loader.items(): + curr_iter = curr_iter_node_type[node_type] + pbar = tqdm(data_loader, ncols=80) + for batch in pbar: + trajectron.set_curr_iter(curr_iter) + trajectron.step_annealers(node_type) + optimizer[node_type].zero_grad() + train_loss = trajectron.train_loss(batch, node_type) + pbar.set_description(f"Epoch {epoch}, {node_type} L: {train_loss.item():.2f}") + train_loss.backward() + # Clipping gradients. + if hyperparams["grad_clip"] is not None: + nn.utils.clip_grad_value_(model_registrar.parameters(), hyperparams["grad_clip"]) + optimizer[node_type].step() + + # Stepping forward the learning rate scheduler and annealers. + lr_scheduler[node_type].step() + + if not args.debug: + log_writer.add_scalar( + f"{node_type}/train/learning_rate", lr_scheduler[node_type].get_lr()[0], curr_iter + ) + log_writer.add_scalar(f"{node_type}/train/loss", train_loss, curr_iter) + + curr_iter += 1 + curr_iter_node_type[node_type] = curr_iter + train_dataset.augment = False + if args.eval_every is not None or args.vis_every is not None: + eval_trajectron.set_curr_iter(epoch) + + ################################# + # VISUALIZATION # + ################################# + if args.vis_every is not None and not args.debug and epoch % args.vis_every == 0 and epoch > 0: + max_hl = hyperparams["maximum_history_length"] + ph = hyperparams["prediction_horizon"] + with torch.no_grad(): + # Predict random timestep to plot for train data set + if args.scene_freq_mult_viz: + scene = np.random.choice(train_scenes, p=train_scenes_sample_probs) + else: + scene = np.random.choice(train_scenes) + timestep = scene.sample_timesteps(1, min_future_timesteps=ph) + predictions = trajectron.predict( + scene, + timestep, + ph, + min_future_timesteps=ph, + z_mode=True, + gmm_mode=True, + all_z_sep=False, + full_dist=False, + ) + + # Plot predicted timestep for random scene + fig, ax = plt.subplots(figsize=(10, 10)) + visualization.visualize_prediction( + ax, + predictions, + scene.dt, + max_hl=max_hl, + ph=ph, + map=scene.map["VISUALIZATION"] if scene.map is not None else None, + ) + ax.set_title(f"{scene.name}-t: {timestep}") + log_writer.add_figure("train/prediction", fig, epoch) + + model_registrar.to(args.eval_device) + # Predict random timestep to plot for eval data set + if args.scene_freq_mult_viz: + scene = np.random.choice(eval_scenes, p=eval_scenes_sample_probs) + else: + scene = np.random.choice(eval_scenes) + timestep = scene.sample_timesteps(1, min_future_timesteps=ph) + predictions = eval_trajectron.predict( + scene, timestep, ph, num_samples=20, min_future_timesteps=ph, z_mode=False, full_dist=False + ) + + # Plot predicted timestep for random scene + fig, ax = plt.subplots(figsize=(10, 10)) + visualization.visualize_prediction( + ax, + predictions, + scene.dt, + max_hl=max_hl, + ph=ph, + map=scene.map["VISUALIZATION"] if scene.map is not None else None, + ) + ax.set_title(f"{scene.name}-t: {timestep}") + log_writer.add_figure("eval/prediction", fig, epoch) + + # Predict random timestep to plot for eval data set + predictions = eval_trajectron.predict( + scene, + timestep, + ph, + min_future_timesteps=ph, + z_mode=True, + gmm_mode=True, + all_z_sep=True, + full_dist=False, + ) + + # Plot predicted timestep for random scene + fig, ax = plt.subplots(figsize=(10, 10)) + visualization.visualize_prediction( + ax, + predictions, + scene.dt, + max_hl=max_hl, + ph=ph, + map=scene.map["VISUALIZATION"] if scene.map is not None else None, + ) + ax.set_title(f"{scene.name}-t: {timestep}") + log_writer.add_figure("eval/prediction_all_z", fig, epoch) + + ################################# + # EVALUATION # + ################################# + if args.eval_every is not None and not args.debug and epoch % args.eval_every == 0 and epoch > 0: + max_hl = hyperparams["maximum_history_length"] + ph = hyperparams["prediction_horizon"] + model_registrar.to(args.eval_device) + with torch.no_grad(): + # Calculate evaluation loss + for node_type, data_loader in eval_data_loader.items(): + eval_loss = [] + print(f"Starting Evaluation @ epoch {epoch} for node type: {node_type}") + pbar = tqdm(data_loader, ncols=80) + for batch in pbar: + eval_loss_node_type = eval_trajectron.eval_loss(batch, node_type) + pbar.set_description(f"Epoch {epoch}, {node_type} L: {eval_loss_node_type.item():.2f}") + eval_loss.append({node_type: {"nll": [eval_loss_node_type]}}) + del batch + + evaluation.log_batch_errors(eval_loss, log_writer, f"{node_type}/eval_loss", epoch) + + # Predict batch timesteps for evaluation dataset evaluation + eval_batch_errors = [] + for scene in tqdm(eval_scenes, desc="Sample Evaluation", ncols=80): + timesteps = scene.sample_timesteps(args.eval_batch_size) + + predictions = eval_trajectron.predict( + scene, timesteps, ph, num_samples=50, min_future_timesteps=ph, full_dist=False + ) + + eval_batch_errors.append( + evaluation.compute_batch_statistics( + predictions, scene.dt, max_hl=max_hl, ph=ph, node_type_enum=eval_env.NodeType, map=scene.map + ) + ) + + evaluation.log_batch_errors( + eval_batch_errors, log_writer, "eval", epoch, bar_plot=["kde"], box_plot=["ade", "fde"] + ) + + # Predict maximum likelihood batch timesteps for evaluation dataset evaluation + eval_batch_errors_ml = [] + for scene in tqdm(eval_scenes, desc="MM Evaluation", ncols=80): + timesteps = scene.sample_timesteps(scene.timesteps) + + predictions = eval_trajectron.predict( + scene, + timesteps, + ph, + num_samples=1, + min_future_timesteps=ph, + z_mode=True, + gmm_mode=True, + full_dist=False, + ) + + eval_batch_errors_ml.append( + evaluation.compute_batch_statistics( + predictions, + scene.dt, + max_hl=max_hl, + ph=ph, + map=scene.map, + node_type_enum=eval_env.NodeType, + kde=False, + ) + ) + + evaluation.log_batch_errors(eval_batch_errors_ml, log_writer, "eval/ml", epoch) + + if args.save_every is not None and args.debug is False and epoch % args.save_every == 0: + model_registrar.save_models(epoch) + + +if __name__ == "__main__": + main() diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/__init__.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/__init__.py new file mode 100644 index 000000000..9200d8dee --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/__init__.py @@ -0,0 +1,6 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +from .trajectory_utils import prediction_output_to_trajectories +from .matrix_utils import block_diag, tile +from .os_utils import maybe_makedirs diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/matrix_utils.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/matrix_utils.py new file mode 100644 index 000000000..cb32abc44 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/matrix_utils.py @@ -0,0 +1,43 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import numpy as np +import torch + + +def attach_dim(v, n_dim_to_prepend=0, n_dim_to_append=0): + return v.reshape(torch.Size([1] * n_dim_to_prepend) + v.shape + torch.Size([1] * n_dim_to_append)) + + +def block_diag(m): + """ + Make a block diagonal matrix along dim=-3 + EXAMPLE: + block_diag(torch.ones(4,3,2)) + should give a 12 x 8 matrix with blocks of 3 x 2 ones. + Prepend batch dimensions if needed. + You can also give a list of matrices. + :type m: torch.Tensor, list + :rtype: torch.Tensor + """ + if type(m) is list: + m = torch.cat([m1.unsqueeze(-3) for m1 in m], -3) + + d = m.dim() + n = m.shape[-3] + siz0 = m.shape[:-3] + siz1 = m.shape[-2:] + m2 = m.unsqueeze(-2) + eye = attach_dim(torch.eye(n, device=m.device).unsqueeze(-2), d - 3, 1) + return (m2 * eye).reshape(siz0 + torch.Size(torch.tensor(siz1) * n)) + + +def tile(a, dim, n_tile, device="cpu"): + init_dim = a.size(dim) + repeat_idx = [1] * a.dim() + repeat_idx[dim] = n_tile + a = a.repeat(*(repeat_idx)) + order_index = torch.LongTensor(np.concatenate([init_dim * np.arange(n_tile) + i for i in range(init_dim)])).to( + device + ) + return torch.index_select(a, dim, order_index) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/os_utils.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/os_utils.py new file mode 100644 index 000000000..5acd68d77 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/os_utils.py @@ -0,0 +1,19 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import os + + +def maybe_makedirs(path_to_create): + """This function will create a directory, unless it exists already, + at which point the function will return. + The exception handling is necessary as it prevents a race condition + from occurring. + Inputs: + path_to_create - A string path to a directory you'd like created. + """ + try: + os.makedirs(path_to_create) + except OSError: + if not os.path.isdir(path_to_create): + raise diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/trajectory_utils.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/trajectory_utils.py new file mode 100644 index 000000000..588151c6e --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/utils/trajectory_utils.py @@ -0,0 +1,46 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import numpy as np + + +def prediction_output_to_trajectories(prediction_output_dict, dt, max_h, ph, map=None, prune_ph_to_future=False): + + prediction_timesteps = prediction_output_dict.keys() + + output_dict = dict() + histories_dict = dict() + futures_dict = dict() + + for t in prediction_timesteps: + histories_dict[t] = dict() + output_dict[t] = dict() + futures_dict[t] = dict() + prediction_nodes = prediction_output_dict[t].keys() + for node in prediction_nodes: + predictions_output = prediction_output_dict[t][node] + position_state = {"position": ["x", "y"]} + + history = node.get(np.array([t - max_h, t]), position_state) # History includes current pos + history = history[~np.isnan(history.sum(axis=1))] + + future = node.get(np.array([t + 1, t + ph]), position_state) + future = future[~np.isnan(future.sum(axis=1))] + + if prune_ph_to_future: + predictions_output = predictions_output[:, :, : future.shape[0]] + if predictions_output.shape[2] == 0: + continue + + trajectory = predictions_output + + if map is None: + histories_dict[t][node] = history + output_dict[t][node] = trajectory + futures_dict[t][node] = future + else: + histories_dict[t][node] = map.to_map_points(history) + output_dict[t][node] = map.to_map_points(trajectory) + futures_dict[t][node] = map.to_map_points(future) + + return output_dict, histories_dict, futures_dict diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/visualization/__init__.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/visualization/__init__.py new file mode 100644 index 000000000..d8b5b2027 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/visualization/__init__.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +from .visualization import visualize_prediction, visualize_distribution +from .visualization_utils import plot_boxplots diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/visualization/visualization.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/visualization/visualization.py new file mode 100644 index 000000000..5c1547ef1 --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/visualization/visualization.py @@ -0,0 +1,137 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +from utils import prediction_output_to_trajectories +from scipy import linalg +import matplotlib.pyplot as plt +import matplotlib.patches as patches +import matplotlib.patheffects as pe +import numpy as np +import seaborn as sns + + +def plot_trajectories( + ax, + prediction_dict, + histories_dict, + futures_dict, + line_alpha=0.7, + line_width=0.2, + edge_width=2, + circle_edge_width=0.5, + node_circle_size=0.3, + batch_num=0, + kde=False, +): + + cmap = ["k", "b", "y", "g", "r"] + + for node in histories_dict: + history = histories_dict[node] + future = futures_dict[node] + predictions = prediction_dict[node] + + if np.isnan(history[-1]).any(): + continue + + ax.plot(history[:, 0], history[:, 1], "k--") + + for sample_num in range(prediction_dict[node].shape[1]): + + if kde and predictions.shape[1] >= 50: + line_alpha = 0.2 + for t in range(predictions.shape[2]): + sns.kdeplot( + predictions[batch_num, :, t, 0], + predictions[batch_num, :, t, 1], + ax=ax, + shade=True, + shade_lowest=False, + color=np.random.choice(cmap), + alpha=0.8, + ) + + ax.plot( + predictions[batch_num, sample_num, :, 0], + predictions[batch_num, sample_num, :, 1], + color=cmap[node.type.value], + linewidth=line_width, + alpha=line_alpha, + ) + + ax.plot( + future[:, 0], + future[:, 1], + "w--", + path_effects=[pe.Stroke(linewidth=edge_width, foreground="k"), pe.Normal()], + ) + + # Current Node Position + circle = plt.Circle( + (history[-1, 0], history[-1, 1]), + node_circle_size, + facecolor="g", + edgecolor="k", + lw=circle_edge_width, + zorder=3, + ) + ax.add_artist(circle) + + ax.axis("equal") + + +def visualize_prediction(ax, prediction_output_dict, dt, max_hl, ph, robot_node=None, map=None, **kwargs): + + prediction_dict, histories_dict, futures_dict = prediction_output_to_trajectories( + prediction_output_dict, dt, max_hl, ph, map=map + ) + + assert len(prediction_dict.keys()) <= 1 + if len(prediction_dict.keys()) == 0: + return + ts_key = list(prediction_dict.keys())[0] + + prediction_dict = prediction_dict[ts_key] + histories_dict = histories_dict[ts_key] + futures_dict = futures_dict[ts_key] + + if map is not None: + ax.imshow(map.as_image(), origin="lower", alpha=0.5) + plot_trajectories(ax, prediction_dict, histories_dict, futures_dict, *kwargs) + + +def visualize_distribution(ax, prediction_distribution_dict, map=None, pi_threshold=0.05, **kwargs): + if map is not None: + ax.imshow(map.as_image(), origin="lower", alpha=0.5) + + for node, pred_dist in prediction_distribution_dict.items(): + if pred_dist.mus.shape[:2] != (1, 1): + return + + means = pred_dist.mus.squeeze().cpu().numpy() + covs = pred_dist.get_covariance_matrix().squeeze().cpu().numpy() + pis = pred_dist.pis_cat_dist.probs.squeeze().cpu().numpy() + + for timestep in range(means.shape[0]): + for z_val in range(means.shape[1]): + mean = means[timestep, z_val] + covar = covs[timestep, z_val] + pi = pis[timestep, z_val] + + if pi < pi_threshold: + continue + + v, w = linalg.eigh(covar) + v = 2.0 * np.sqrt(2.0) * np.sqrt(v) + u = w[0] / linalg.norm(w[0]) + + # Plot an ellipse to show the Gaussian component + angle = np.arctan(u[1] / u[0]) + angle = 180.0 * angle / np.pi # convert to degrees + ell = patches.Ellipse( + mean, v[0], v[1], 180.0 + angle, color="blue" if node.type.name == "VEHICLE" else "orange" + ) + ell.set_edgecolor(None) + ell.set_clip_box(ax.bbox) + ell.set_alpha(pi / 10) + ax.add_artist(ell) diff --git a/forge/test/models/pytorch/multimodal/trajectron/trajectron/visualization/visualization_utils.py b/forge/test/models/pytorch/multimodal/trajectron/trajectron/visualization/visualization_utils.py new file mode 100644 index 000000000..a12b8a2eb --- /dev/null +++ b/forge/test/models/pytorch/multimodal/trajectron/trajectron/visualization/visualization_utils.py @@ -0,0 +1,30 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import numpy as np +import pandas as pd +import seaborn as sns + + +def plot_boxplots(ax, perf_dict_for_pd, x_label, y_label): + perf_df = pd.DataFrame.from_dict(perf_dict_for_pd) + our_mean_color = sns.color_palette("muted")[9] + marker_size = 7 + mean_markers = "X" + with sns.color_palette("muted"): + sns.boxplot(x=x_label, y=y_label, data=perf_df, ax=ax, showfliers=False) + ax.plot( + [0], + [np.mean(perf_df[y_label])], + color=our_mean_color, + marker=mean_markers, + markeredgecolor="#545454", + markersize=marker_size, + zorder=10, + ) + + +def plot_barplots(ax, perf_dict_for_pd, x_label, y_label): + perf_df = pd.DataFrame.from_dict(perf_dict_for_pd) + with sns.color_palette("muted"): + sns.barplot(x=x_label, y=y_label, ax=ax, data=perf_df)