diff --git a/roach/config/config_agent.yaml b/roach/config/config_agent.yaml new file mode 100644 index 0000000..dc49aaf --- /dev/null +++ b/roach/config/config_agent.yaml @@ -0,0 +1,64 @@ +entry_point: roach.rl_birdview_agent:RlBirdviewAgent +wb_run_path: null +wb_ckpt_step: null +env_wrapper: + entry_point: roach.utils.rl_birdview_wrapper:RlBirdviewWrapper + kwargs: + input_states: + - control + - vel_xy + acc_as_action: true +policy: + entry_point: roach.models.ppo_policy:PpoPolicy + kwargs: + policy_head_arch: + - 256 + - 256 + value_head_arch: + - 256 + - 256 + features_extractor_entry_point: roach.models.torch_layers:XtMaCNN + features_extractor_kwargs: + states_neurons: + - 256 + - 256 + distribution_entry_point: roach.models.distributions:BetaDistribution + distribution_kwargs: + dist_init: null +training: + entry_point: roach.models.ppo:PPO + kwargs: + learning_rate: 1.0e-05 + n_steps_total: 12288 + batch_size: 256 + n_epochs: 20 + gamma: 0.99 + gae_lambda: 0.9 + clip_range: 0.2 + clip_range_vf: null + ent_coef: 0.01 + explore_coef: 0.05 + vf_coef: 0.5 + max_grad_norm: 0.5 + target_kl: 0.01 + update_adv: false + lr_schedule_step: 8 +obs_configs: + birdview: + module: birdview.chauffeurnet + width_in_pixels: 192 + pixels_ev_to_bottom: 0 + pixels_per_meter: 5.0 + history_idx: + - -1 + - -1 + - -1 + - -1 + scale_bbox: true + scale_mask_col: 1.0 + speed: + module: actor_state.speed + control: + module: actor_state.control + velocity: + module: actor_state.velocity diff --git a/roach/config/config_agent_original.yaml b/roach/config/config_agent_original.yaml new file mode 100644 index 0000000..ef831ef --- /dev/null +++ b/roach/config/config_agent_original.yaml @@ -0,0 +1,64 @@ +entry_point: roach.rl_birdview_agent:RlBirdviewAgent +wb_run_path: null +wb_ckpt_step: null +env_wrapper: + entry_point: roach.utils.rl_birdview_wrapper:RlBirdviewWrapper + kwargs: + input_states: + - control + - vel_xy + acc_as_action: true +policy: + entry_point: roach.models.ppo_policy:PpoPolicy + kwargs: + policy_head_arch: + - 256 + - 256 + value_head_arch: + - 256 + - 256 + features_extractor_entry_point: roach.models.torch_layers:XtMaCNN + features_extractor_kwargs: + states_neurons: + - 256 + - 256 + distribution_entry_point: roach.models.distributions:BetaDistribution + distribution_kwargs: + dist_init: null +training: + entry_point: roach.models.ppo:PPO + kwargs: + learning_rate: 1.0e-05 + n_steps_total: 12288 + batch_size: 256 + n_epochs: 20 + gamma: 0.99 + gae_lambda: 0.9 + clip_range: 0.2 + clip_range_vf: null + ent_coef: 0.01 + explore_coef: 0.05 + vf_coef: 0.5 + max_grad_norm: 0.5 + target_kl: 0.01 + update_adv: false + lr_schedule_step: 8 +obs_configs: + birdview: + module: birdview.chauffeurnet + width_in_pixels: 192 + pixels_ev_to_bottom: 40 + pixels_per_meter: 5.0 + history_idx: + - -16 + - -11 + - -6 + - -1 + scale_bbox: true + scale_mask_col: 1.0 + speed: + module: actor_state.speed + control: + module: actor_state.control + velocity: + module: actor_state.velocity diff --git a/roach/criteria/blocked.py b/roach/criteria/blocked.py new file mode 100644 index 0000000..334e581 --- /dev/null +++ b/roach/criteria/blocked.py @@ -0,0 +1,30 @@ +import numpy as np + + +class Blocked(): + + def __init__(self, speed_threshold=0.1, below_threshold_max_time=90.0): + self._speed_threshold = speed_threshold + self._below_threshold_max_time = below_threshold_max_time + self._time_last_valid_state = None + + def tick(self, vehicle, timestamp): + info = None + linear_speed = self._calculate_speed(vehicle.get_velocity()) + + if linear_speed < self._speed_threshold and self._time_last_valid_state: + if (timestamp['relative_simulation_time'] - self._time_last_valid_state) > self._below_threshold_max_time: + # The actor has been "blocked" for too long + ev_loc = vehicle.get_location() + info = { + 'step': timestamp['step'], + 'simulation_time': timestamp['relative_simulation_time'], + 'ev_loc': [ev_loc.x, ev_loc.y, ev_loc.z] + } + else: + self._time_last_valid_state = timestamp['relative_simulation_time'] + return info + + @staticmethod + def _calculate_speed(carla_velocity): + return np.linalg.norm([carla_velocity.x, carla_velocity.y]) diff --git a/roach/criteria/collision.py b/roach/criteria/collision.py new file mode 100644 index 0000000..f2f41e4 --- /dev/null +++ b/roach/criteria/collision.py @@ -0,0 +1,117 @@ +import carla +import weakref +import numpy as np + + +class Collision(): + def __init__(self, vehicle, carla_world, intensity_threshold=0.0, + min_area_of_collision=3, max_area_of_collision=5, max_id_time=5): + blueprint = carla_world.get_blueprint_library().find('sensor.other.collision') + self._collision_sensor = carla_world.spawn_actor(blueprint, carla.Transform(), attach_to=vehicle) + self._collision_sensor.listen(lambda event: self._on_collision(weakref.ref(self), event)) + self._collision_info = None + + self.registered_collisions = [] + self.last_id = None + self.collision_time = None + + # If closer than this distance, the collision is ignored + self._min_area_of_collision = min_area_of_collision + # If further than this distance, the area is forgotten + self._max_area_of_collision = max_area_of_collision + # Amount of time the last collision if is remembered + self._max_id_time = max_id_time + # intensity_threshold, LBC uses 400, leaderboard does not use it (set to 0) + self._intensity_threshold = intensity_threshold + + def tick(self, vehicle, timestamp): + ev_loc = vehicle.get_location() + new_registered_collisions = [] + # Loops through all the previous registered collisions + for collision_location in self.registered_collisions: + distance = ev_loc.distance(collision_location) + # If far away from a previous collision, forget it + if distance <= self._max_area_of_collision: + new_registered_collisions.append(collision_location) + + self.registered_collisions = new_registered_collisions + + if self.last_id and timestamp['relative_simulation_time'] - self.collision_time > self._max_id_time: + self.last_id = None + + info = self._collision_info + self._collision_info = None + if info is not None: + info['step'] -= timestamp['start_frame'] + info['simulation_time'] -= timestamp['start_simulation_time'] + return info + + @staticmethod + def _on_collision(weakself, event): + self = weakself() + if not self: + return + # Ignore the current one if it's' the same id as before + if self.last_id == event.other_actor.id: + return + # Ignore if it's too close to a previous collision (avoid micro collisions) + ev_loc = event.actor.get_transform().location + for collision_location in self.registered_collisions: + if ev_loc.distance(collision_location) <= self._min_area_of_collision: + return + # Ignore if its intensity is smaller than self._intensity_threshold + impulse = event.normal_impulse + intensity = np.linalg.norm([impulse.x, impulse.y, impulse.z]) + if intensity < self._intensity_threshold: + return + + # collision_type + if ('static' in event.other_actor.type_id or 'traffic' in event.other_actor.type_id) \ + and 'sidewalk' not in event.other_actor.type_id: + collision_type = 0 # TrafficEventType.COLLISION_STATIC + elif 'vehicle' in event.other_actor.type_id: + collision_type = 1 # TrafficEventType.COLLISION_VEHICLE + elif 'walker' in event.other_actor.type_id: + collision_type = 2 # TrafficEventType.COLLISION_PEDESTRIAN + else: + collision_type = -1 + + # write to info, all quantities in in world coordinate + event_loc = event.transform.location + event_rot = event.transform.rotation + oa_loc = event.other_actor.get_transform().location + oa_rot = event.other_actor.get_transform().rotation + oa_vel = event.other_actor.get_velocity() + ev_rot = event.actor.get_transform().rotation + ev_vel = event.actor.get_velocity() + + self._collision_info = { + 'step': event.frame, + 'simulation_time': event.timestamp, + 'collision_type': collision_type, + 'other_actor_id': event.other_actor.id, + 'other_actor_type_id': event.other_actor.type_id, + 'intensity': intensity, + 'normal_impulse': [impulse.x, impulse.y, impulse.z], + 'event_loc': [event_loc.x, event_loc.y, event_loc.z], + 'event_rot': [event_rot.roll, event_rot.pitch, event_rot.yaw], + 'ev_loc': [ev_loc.x, ev_loc.y, ev_loc.z], + 'ev_rot': [ev_rot.roll, ev_rot.pitch, ev_rot.yaw], + 'ev_vel': [ev_vel.x, ev_vel.y, ev_vel.z], + 'oa_loc': [oa_loc.x, oa_loc.y, oa_loc.z], + 'oa_rot': [oa_rot.roll, oa_rot.pitch, oa_rot.yaw], + 'oa_vel': [oa_vel.x, oa_vel.y, oa_vel.z] + } + + self.collision_time = event.timestamp + + self.registered_collisions.append(ev_loc) + + # Number 0: static objects -> ignore it + if event.other_actor.id != 0: + self.last_id = event.other_actor.id + + def clean(self): + self._collision_sensor.stop() + self._collision_sensor.destroy() + self._collision_sensor = None diff --git a/roach/criteria/encounter_light.py b/roach/criteria/encounter_light.py new file mode 100644 index 0000000..e5ab4ac --- /dev/null +++ b/roach/criteria/encounter_light.py @@ -0,0 +1,26 @@ +from carla_gym.utils.traffic_light import TrafficLightHandler + + +class EncounterLight(): + + def __init__(self, dist_threshold=7.5): + self._last_light_id = None + self._dist_threshold = dist_threshold + + def tick(self, vehicle, timestamp): + info = None + + light_state, light_loc, light_id = TrafficLightHandler.get_light_state( + vehicle, dist_threshold=self._dist_threshold) + + if light_id is not None: + if light_id != self._last_light_id: + self._last_light_id = light_id + info = { + 'step': timestamp['step'], + 'simulation_time': timestamp['relative_simulation_time'], + 'id': light_id, + 'tl_loc': light_loc.tolist() + } + + return info diff --git a/roach/criteria/outside_route_lane.py b/roach/criteria/outside_route_lane.py new file mode 100644 index 0000000..117aff9 --- /dev/null +++ b/roach/criteria/outside_route_lane.py @@ -0,0 +1,99 @@ +import carla +from carla_gym.utils.transforms import cast_angle + + +class OutsideRouteLane(): + + def __init__(self, carla_map, vehicle_loc, + allowed_out_distance=1.3, max_allowed_vehicle_angle=120.0, max_allowed_waypint_angle=150.0): + self._map = carla_map + self._pre_ego_waypoint = self._map.get_waypoint(vehicle_loc) + + self._allowed_out_distance = allowed_out_distance + self._max_allowed_vehicle_angle = max_allowed_vehicle_angle + self._max_allowed_waypint_angle = max_allowed_waypint_angle + + self._outside_lane_active = False + self._wrong_lane_active = False + self._last_road_id = None + self._last_lane_id = None + + def tick(self, vehicle, timestamp, distance_traveled): + ev_loc = vehicle.get_location() + ev_yaw = vehicle.get_transform().rotation.yaw + self._is_outside_driving_lanes(ev_loc) + self._is_at_wrong_lane(ev_loc, ev_yaw) + + info = None + if self._outside_lane_active or self._wrong_lane_active: + info = { + 'step': timestamp['step'], + 'simulation_time': timestamp['relative_simulation_time'], + 'ev_loc': [ev_loc.x, ev_loc.y, ev_loc.z], + 'distance_traveled': distance_traveled, + 'outside_lane': self._outside_lane_active, + 'wrong_lane': self._wrong_lane_active + } + return info + + def _is_outside_driving_lanes(self, location): + """ + Detects if the ego_vehicle is outside driving/parking lanes + """ + + current_driving_wp = self._map.get_waypoint(location, lane_type=carla.LaneType.Driving, project_to_road=True) + current_parking_wp = self._map.get_waypoint(location, lane_type=carla.LaneType.Parking, project_to_road=True) + + driving_distance = location.distance(current_driving_wp.transform.location) + if current_parking_wp is not None: # Some towns have no parking + parking_distance = location.distance(current_parking_wp.transform.location) + else: + parking_distance = float('inf') + + if driving_distance >= parking_distance: + distance = parking_distance + lane_width = current_parking_wp.lane_width + else: + distance = driving_distance + lane_width = current_driving_wp.lane_width + + self._outside_lane_active = distance > (lane_width / 2 + self._allowed_out_distance) + + def _is_at_wrong_lane(self, location, yaw): + """ + Detects if the ego_vehicle has invaded a wrong driving lane + """ + + current_waypoint = self._map.get_waypoint(location, lane_type=carla.LaneType.Driving, project_to_road=True) + current_lane_id = current_waypoint.lane_id + current_road_id = current_waypoint.road_id + + # Lanes and roads are too chaotic at junctions + if current_waypoint.is_junction: + self._wrong_lane_active = False + elif self._last_road_id != current_road_id or self._last_lane_id != current_lane_id: + + # Route direction can be considered continuous, except after exiting a junction. + if self._pre_ego_waypoint.is_junction: + # cast angle to [-180, +180) + vehicle_lane_angle = cast_angle( + current_waypoint.transform.rotation.yaw - yaw) + + self._wrong_lane_active = abs(vehicle_lane_angle) > self._max_allowed_vehicle_angle + + else: + # Check for a big gap in waypoint directions. + waypoint_angle = cast_angle( + current_waypoint.transform.rotation.yaw - self._pre_ego_waypoint.transform.rotation.yaw) + + if abs(waypoint_angle) >= self._max_allowed_waypint_angle: + # Is the ego vehicle going back to the lane, or going out? Take the opposite + self._wrong_lane_active = not bool(self._wrong_lane_active) + else: + # Changing to a lane with the same direction + self._wrong_lane_active = False + + # Remember the last state + self._last_lane_id = current_lane_id + self._last_road_id = current_road_id + self._pre_ego_waypoint = current_waypoint diff --git a/roach/criteria/route_deviation.py b/roach/criteria/route_deviation.py new file mode 100644 index 0000000..1990db7 --- /dev/null +++ b/roach/criteria/route_deviation.py @@ -0,0 +1,34 @@ +class RouteDeviation(): + + def __init__(self, offroad_min=15, offroad_max=30, max_route_percentage=0.3): + self._offroad_min = offroad_min + self._offroad_max = offroad_max + self._max_route_percentage = max_route_percentage + self._out_route_distance = 0.0 + + def tick(self, vehicle, timestamp, ref_waypoint, distance_traveled, route_length): + ev_loc = vehicle.get_location() + + distance = ev_loc.distance(ref_waypoint.transform.location) + + # fail if off_route is True + off_route_max = distance > self._offroad_max + + # fail if off_safe_route more than 30% of total route length + off_route_min = False + if distance > self._offroad_min: + self._out_route_distance += distance_traveled + out_route_percentage = self._out_route_distance / route_length + if out_route_percentage > self._max_route_percentage: + off_route_min = True + + info = None + if off_route_max or off_route_min: + info = { + 'step': timestamp['step'], + 'simulation_time': timestamp['relative_simulation_time'], + 'ev_loc': [ev_loc.x, ev_loc.y, ev_loc.z], + 'off_route_max': off_route_max, + 'off_route_min': off_route_min + } + return info diff --git a/roach/criteria/run_red_light.py b/roach/criteria/run_red_light.py new file mode 100644 index 0000000..908cba5 --- /dev/null +++ b/roach/criteria/run_red_light.py @@ -0,0 +1,64 @@ +import carla +import shapely.geometry +from carla_gym.utils.traffic_light import TrafficLightHandler + + +class RunRedLight(): + + def __init__(self, carla_map, distance_light=30): + self._map = carla_map + self._last_red_light_id = None + self._distance_light = distance_light + + def tick(self, vehicle, timestamp): + ev_tra = vehicle.get_transform() + ev_loc = ev_tra.location + ev_dir = ev_tra.get_forward_vector() + ev_extent = vehicle.bounding_box.extent.x + + tail_close_pt = ev_tra.transform(carla.Location(x=-0.8 * ev_extent)) + tail_far_pt = ev_tra.transform(carla.Location(x=-ev_extent - 1.0)) + tail_wp = self._map.get_waypoint(tail_far_pt) + + info = None + for idx_tl in range(TrafficLightHandler.num_tl): + traffic_light = TrafficLightHandler.list_tl_actor[idx_tl] + tl_tv_loc = TrafficLightHandler.list_tv_loc[idx_tl] + if tl_tv_loc.distance(ev_loc) > self._distance_light: + continue + if traffic_light.state != carla.TrafficLightState.Red: + continue + if self._last_red_light_id and self._last_red_light_id == traffic_light.id: + continue + + for idx_wp in range(len(TrafficLightHandler.list_stopline_wps[idx_tl])): + wp = TrafficLightHandler.list_stopline_wps[idx_tl][idx_wp] + wp_dir = wp.transform.get_forward_vector() + dot_ve_wp = ev_dir.x * wp_dir.x + ev_dir.y * wp_dir.y + ev_dir.z * wp_dir.z + + if tail_wp.road_id == wp.road_id and tail_wp.lane_id == wp.lane_id and dot_ve_wp > 0: + # This light is red and is affecting our lane + stop_left_loc, stop_right_loc = TrafficLightHandler.list_stopline_vtx[idx_tl][idx_wp] + # Is the vehicle traversing the stop line? + if self._is_vehicle_crossing_line((tail_close_pt, tail_far_pt), (stop_left_loc, stop_right_loc)): + tl_loc = traffic_light.get_location() + # loc_in_ev = trans_utils.loc_global_to_ref(tl_loc, ev_tra) + self._last_red_light_id = traffic_light.id + info = { + 'step': timestamp['step'], + 'simulation_time': timestamp['relative_simulation_time'], + 'id': traffic_light.id, + 'tl_loc': [tl_loc.x, tl_loc.y, tl_loc.z], + 'ev_loc': [ev_loc.x, ev_loc.y, ev_loc.z] + } + return info + + @staticmethod + def _is_vehicle_crossing_line(seg1, seg2): + """ + check if vehicle crosses a line segment + """ + line1 = shapely.geometry.LineString([(seg1[0].x, seg1[0].y), (seg1[1].x, seg1[1].y)]) + line2 = shapely.geometry.LineString([(seg2[0].x, seg2[0].y), (seg2[1].x, seg2[1].y)]) + inter = line1.intersection(line2) + return not inter.is_empty diff --git a/roach/criteria/run_stop_sign.py b/roach/criteria/run_stop_sign.py new file mode 100644 index 0000000..b13b32a --- /dev/null +++ b/roach/criteria/run_stop_sign.py @@ -0,0 +1,157 @@ +import carla +import numpy as np + + +class RunStopSign(): + + def __init__(self, carla_world, proximity_threshold=50.0, speed_threshold=0.1, waypoint_step=1.0): + self._map = carla_world.get_map() + self._proximity_threshold = proximity_threshold + self._speed_threshold = speed_threshold + self._waypoint_step = waypoint_step + + all_actors = carla_world.get_actors() + self._list_stop_signs = [] + for _actor in all_actors: + if 'traffic.stop' in _actor.type_id: + self._list_stop_signs.append(_actor) + + self._target_stop_sign = None + self._stop_completed = False + self._affected_by_stop = False + + def tick(self, vehicle, timestamp): + info = None + ev_loc = vehicle.get_location() + ev_f_vec = vehicle.get_transform().get_forward_vector() + + if self._target_stop_sign is None: + self._target_stop_sign = self._scan_for_stop_sign(vehicle.get_transform()) + if self._target_stop_sign is not None: + stop_loc = self._target_stop_sign.get_location() + # info = { + # 'event': 'encounter', + # 'step': timestamp['step'], + # 'simulation_time': timestamp['relative_simulation_time'], + # 'id': self._target_stop_sign.id, + # 'stop_loc': [stop_loc.x, stop_loc.y, stop_loc.z], + # 'ev_loc': [ev_loc.x, ev_loc.y, ev_loc.z] + # } + else: + # we were in the middle of dealing with a stop sign + if not self._stop_completed: + # did the ego-vehicle stop? + current_speed = self._calculate_speed(vehicle.get_velocity()) + if current_speed < self._speed_threshold: + self._stop_completed = True + + if not self._affected_by_stop: + stop_t = self._target_stop_sign.get_transform() + transformed_tv = stop_t.transform(self._target_stop_sign.trigger_volume.location) + stop_extent = self._target_stop_sign.trigger_volume.extent + if self.point_inside_boundingbox(ev_loc, transformed_tv, stop_extent): + self._affected_by_stop = True + + if not self.is_affected_by_stop(ev_loc, self._target_stop_sign): + # is the vehicle out of the influence of this stop sign now? + if not self._stop_completed and self._affected_by_stop: + # did we stop? + stop_loc = self._target_stop_sign.get_transform().location + # info = { + # 'event': 'run', + # 'step': timestamp['step'], + # 'simulation_time': timestamp['relative_simulation_time'], + # 'id': self._target_stop_sign.id, + # 'stop_loc': [stop_loc.x, stop_loc.y, stop_loc.z], + # 'ev_loc': [ev_loc.x, ev_loc.y, ev_loc.z] + # } + # reset state + self._target_stop_sign = None + self._stop_completed = False + self._affected_by_stop = False + + # return info + + def _scan_for_stop_sign(self, vehicle_transform): + target_stop_sign = None + + ve_dir = vehicle_transform.get_forward_vector() + + wp = self._map.get_waypoint(vehicle_transform.location) + wp_dir = wp.transform.get_forward_vector() + + dot_ve_wp = ve_dir.x * wp_dir.x + ve_dir.y * wp_dir.y + ve_dir.z * wp_dir.z + + if dot_ve_wp > 0: # Ignore all when going in a wrong lane + for stop_sign in self._list_stop_signs: + if self.is_affected_by_stop(vehicle_transform.location, stop_sign): + # this stop sign is affecting the vehicle + target_stop_sign = stop_sign + break + + return target_stop_sign + + def is_affected_by_stop(self, vehicle_loc, stop, multi_step=20): + """ + Check if the given actor is affected by the stop + """ + affected = False + # first we run a fast coarse test + stop_t = stop.get_transform() + stop_location = stop_t.location + if stop_location.distance(vehicle_loc) > self._proximity_threshold: + return affected + + transformed_tv = stop_t.transform(stop.trigger_volume.location) + + # slower and accurate test based on waypoint's horizon and geometric test + list_locations = [vehicle_loc] + waypoint = self._map.get_waypoint(vehicle_loc) + for _ in range(multi_step): + if waypoint: + next_wps = waypoint.next(self._waypoint_step) + if not next_wps: + break + waypoint = next_wps[0] + if not waypoint: + break + list_locations.append(waypoint.transform.location) + + for actor_location in list_locations: + if self.point_inside_boundingbox(actor_location, transformed_tv, stop.trigger_volume.extent): + affected = True + + return affected + + @staticmethod + def _calculate_speed(carla_velocity): + return np.linalg.norm([carla_velocity.x, carla_velocity.y]) + + @staticmethod + def point_inside_boundingbox(point, bb_center, bb_extent): + """ + X + :param point: + :param bb_center: + :param bb_extent: + :return: + """ + # bugfix slim bbox + bb_extent.x = max(bb_extent.x, bb_extent.y) + bb_extent.y = max(bb_extent.x, bb_extent.y) + + # pylint: disable=invalid-name + A = carla.Vector2D(bb_center.x - bb_extent.x, bb_center.y - bb_extent.y) + B = carla.Vector2D(bb_center.x + bb_extent.x, bb_center.y - bb_extent.y) + D = carla.Vector2D(bb_center.x - bb_extent.x, bb_center.y + bb_extent.y) + M = carla.Vector2D(point.x, point.y) + + AB = B - A + AD = D - A + AM = M - A + am_ab = AM.x * AB.x + AM.y * AB.y + ab_ab = AB.x * AB.x + AB.y * AB.y + am_ad = AM.x * AD.x + AM.y * AD.y + ad_ad = AD.x * AD.x + AD.y * AD.y + + return am_ab > 0 and am_ab < ab_ab and am_ad > 0 and am_ad < ad_ad diff --git a/roach/log/ckpt_11833344.pth b/roach/log/ckpt_11833344.pth new file mode 100644 index 0000000..49737d4 Binary files /dev/null and b/roach/log/ckpt_11833344.pth differ diff --git a/roach/models/distributions.py b/roach/models/distributions.py new file mode 100644 index 0000000..19a1867 --- /dev/null +++ b/roach/models/distributions.py @@ -0,0 +1,278 @@ +from typing import Optional, Tuple +import torch as th +import torch.nn as nn +from torch.distributions import Beta, Normal +from torch.nn import functional as F +import numpy as np + + +def sum_independent_dims(tensor: th.Tensor) -> th.Tensor: + if len(tensor.shape) > 1: + tensor = tensor.sum(dim=1) + else: + tensor = tensor.sum() + return tensor + + +class DiagGaussianDistribution(): + def __init__(self, action_dim: int, dist_init=None, action_dependent_std=False): + self.distribution = None + self.action_dim = action_dim + self.dist_init = dist_init + self.action_dependent_std = action_dependent_std + + self.low = None + self.high = None + self.log_std_max = 2 + self.log_std_min = -20 + + # [mu, log_std], [0, 1] + self.acc_exploration_dist = { + 'go': th.FloatTensor([0.66, -3]), + 'stop': th.FloatTensor([-0.66, -3]) + } + self.steer_exploration_dist = { + 'turn': th.FloatTensor([0.0, -1]), + 'straight': th.FloatTensor([3.0, 3.0]) + } + + if th.cuda.is_available(): + self.device = 'cuda' + else: + self.device = 'cpu' + + def proba_distribution_net(self, latent_dim: int) -> Tuple[nn.Module, nn.Parameter]: + mean_actions = nn.Linear(latent_dim, self.action_dim) + if self.action_dependent_std: + log_std = nn.Linear(latent_dim, self.action_dim) + else: + log_std = nn.Parameter(-2.0*th.ones(self.action_dim), requires_grad=True) + + if self.dist_init is not None: + # log_std.weight.data.fill_(0.01) + # mean_actions.weight.data.fill_(0.01) + # acc/steer + mean_actions.bias.data[0] = self.dist_init[0][0] + mean_actions.bias.data[1] = self.dist_init[1][0] + if self.action_dependent_std: + log_std.bias.data[0] = self.dist_init[0][1] + log_std.bias.data[1] = self.dist_init[1][1] + else: + init_tensor = th.FloatTensor([self.dist_init[0][1], self.dist_init[1][1]]) + log_std = nn.Parameter(init_tensor, requires_grad=True) + + return mean_actions, log_std + + def proba_distribution(self, mean_actions: th.Tensor, log_std: th.Tensor) -> "DiagGaussianDistribution": + if self.action_dependent_std: + log_std = th.clamp(log_std, self.log_std_min, self.log_std_max) + action_std = th.ones_like(mean_actions) * log_std.exp() + self.distribution = Normal(mean_actions, action_std) + return self + + def log_prob(self, actions: th.Tensor) -> th.Tensor: + log_prob = self.distribution.log_prob(actions) + return sum_independent_dims(log_prob) + + def entropy_loss(self) -> th.Tensor: + entropy_loss = -1.0 * self.distribution.entropy() + return th.mean(entropy_loss) + + def exploration_loss(self, exploration_suggests) -> th.Tensor: + # [('stop'/'go'/None, 'turn'/'straight'/None)] + # (batch_size, action_dim) + mu = self.distribution.loc.detach().clone() + sigma = self.distribution.scale.detach().clone() + + for i, (acc_suggest, steer_suggest) in enumerate(exploration_suggests): + if acc_suggest != '': + mu[i, 0] = self.acc_exploration_dist[acc_suggest][0] + sigma[i, 0] = self.acc_exploration_dist[acc_suggest][1] + if steer_suggest != '': + mu[i, 1] = self.steer_exploration_dist[steer_suggest][0] + sigma[i, 1] = self.steer_exploration_dist[steer_suggest][1] + + dist_ent = Normal(mu, sigma) + + exploration_loss = th.distributions.kl_divergence(dist_ent, self.distribution) + return th.mean(exploration_loss) + + def sample(self) -> th.Tensor: + return self.distribution.rsample() + + def mode(self) -> th.Tensor: + return self.distribution.mean + + def get_actions(self, deterministic: bool = False) -> th.Tensor: + if deterministic: + return self.mode() + return self.sample() + + +class SquashedDiagGaussianDistribution(): + def __init__(self, action_dim: int, log_std_init: float = 0.0, action_dependent_std=False): + self.distribution = None + + self.action_dim = action_dim + self.log_std_init = log_std_init + self.eps = 1e-7 + self.action_dependent_std = action_dependent_std + + self.low = -1.0 + self.high = 1.0 + self.log_std_max = 2 + self.log_std_min = -20 + + self.gaussian_actions = None + + def proba_distribution_net(self, latent_dim: int): + mean_actions = nn.Linear(latent_dim, self.action_dim) + if self.action_dependent_std: + log_std = nn.Linear(latent_dim, self.action_dim) + else: + log_std = nn.Parameter(th.ones(self.action_dim) * self.log_std_init, requires_grad=True) + return mean_actions, log_std + + def proba_distribution(self, mean_actions: th.Tensor, log_std: th.Tensor): + if self.action_dependent_std: + log_std = th.clamp(log_std, self.log_std_min, self.log_std_max) + action_std = th.ones_like(mean_actions) * log_std.exp() + self.distribution = Normal(mean_actions, action_std) + return self + + def log_prob(self, actions: th.Tensor, gaussian_actions: Optional[th.Tensor] = None) -> th.Tensor: + # Inverse tanh + if gaussian_actions is None: + gaussian_actions = th.clamp(actions, min=-1.0 + self.eps, max=1.0 - self.eps) + gaussian_actions = 0.5 * (gaussian_actions.log1p() - (-gaussian_actions).log1p()) + + # Log likelihood for a Gaussian distribution + log_prob = self.distribution.log_prob(gaussian_actions) + log_prob = sum_independent_dims(log_prob) + + # sb3 correction + # log_prob -= th.sum(th.log(1 - actions ** 2 + self.eps), dim=1) + # spinning-up correction + log_prob -= (2*(np.log(2) - gaussian_actions - F.softplus(-2*gaussian_actions))).sum(axis=1) + return log_prob + + def entropy(self) -> Optional[th.Tensor]: + return None + + def sample(self) -> th.Tensor: + return th.tanh(self.distribution.rsample()) + + def mode(self) -> th.Tensor: + return th.tanh(self.distribution.mean) + + def get_actions(self, deterministic: bool = False) -> th.Tensor: + if deterministic: + return self.mode() + return self.sample() + + +class BetaDistribution(): + def __init__(self, action_dim=2, dist_init=None): + assert action_dim == 2 + + self.distribution = None + self.action_dim = action_dim + self.dist_init = dist_init + self.low = 0.0 + self.high = 1.0 + + # [beta, alpha], [0, 1] + self.acc_exploration_dist = { + # [1, 2.5] + # [1.5, 1.0] + 'go': th.FloatTensor([1.0, 2.5]), + 'stop': th.FloatTensor([1.5, 1.0]) + } + self.steer_exploration_dist = { + 'turn': th.FloatTensor([1.0, 1.0]), + 'straight': th.FloatTensor([3.0, 3.0]) + } + + if th.cuda.is_available(): + self.device = 'cuda' + else: + self.device = 'cpu' + + def proba_distribution_net(self, latent_dim: int) -> Tuple[nn.Module, nn.Module]: + + linear_alpha = nn.Linear(latent_dim, self.action_dim) + linear_beta = nn.Linear(latent_dim, self.action_dim) + + if self.dist_init is not None: + # linear_alpha.weight.data.fill_(0.01) + # linear_beta.weight.data.fill_(0.01) + # acc + linear_alpha.bias.data[0] = self.dist_init[0][1] + linear_beta.bias.data[0] = self.dist_init[0][0] + # steer + linear_alpha.bias.data[1] = self.dist_init[1][1] + linear_beta.bias.data[1] = self.dist_init[1][0] + + alpha = nn.Sequential(linear_alpha, nn.Softplus()) + beta = nn.Sequential(linear_beta, nn.Softplus()) + return alpha, beta + + def proba_distribution(self, alpha, beta): + self.distribution = Beta(alpha, beta) + return self + + def log_prob(self, actions: th.Tensor) -> th.Tensor: + log_prob = self.distribution.log_prob(actions) + return sum_independent_dims(log_prob) + + def entropy_loss(self) -> th.Tensor: + entropy_loss = -1.0 * self.distribution.entropy() + return th.mean(entropy_loss) + + def exploration_loss(self, exploration_suggests) -> th.Tensor: + # [('stop'/'go'/None, 'turn'/'straight'/None)] + # (batch_size, action_dim) + alpha = self.distribution.concentration1.detach().clone() + beta = self.distribution.concentration0.detach().clone() + + for i, (acc_suggest, steer_suggest) in enumerate(exploration_suggests): + if acc_suggest != '': + beta[i, 0] = self.acc_exploration_dist[acc_suggest][0] + alpha[i, 0] = self.acc_exploration_dist[acc_suggest][1] + if steer_suggest != '': + beta[i, 1] = self.steer_exploration_dist[steer_suggest][0] + alpha[i, 1] = self.steer_exploration_dist[steer_suggest][1] + + dist_ent = Beta(alpha, beta) + + exploration_loss = th.distributions.kl_divergence(self.distribution, dist_ent) + return th.mean(exploration_loss) + + def sample(self) -> th.Tensor: + # Reparametrization trick to pass gradients + return self.distribution.rsample() + + def mode(self) -> th.Tensor: + alpha = self.distribution.concentration1 + beta = self.distribution.concentration0 + x = th.zeros_like(alpha) + x[:, 1] += 0.5 + mask1 = (alpha > 1) & (beta > 1) + x[mask1] = (alpha[mask1]-1)/(alpha[mask1]+beta[mask1]-2) + + mask2 = (alpha <= 1) & (beta > 1) + x[mask2] = 0.0 + + mask3 = (alpha > 1) & (beta <= 1) + x[mask3] = 1.0 + + # mean + mask4 = (alpha <= 1) & (beta <= 1) + x[mask4] = self.distribution.mean[mask4] + + return x + + def get_actions(self, deterministic: bool = False) -> th.Tensor: + if deterministic: + return self.mode() + return self.sample() diff --git a/roach/models/ppo.py b/roach/models/ppo.py new file mode 100644 index 0000000..269f7ab --- /dev/null +++ b/roach/models/ppo.py @@ -0,0 +1,277 @@ +import time +import torch as th +import numpy as np +from collections import deque +from torch.nn import functional as F + +from stable_baselines3.common.vec_env import VecEnv +from stable_baselines3.common.callbacks import BaseCallback +from stable_baselines3.common.utils import explained_variance + +from .ppo_buffer import PpoBuffer + + +class PPO(): + def __init__(self, policy, env, + learning_rate: float = 1e-5, + n_steps_total: int = 8192, + batch_size: int = 256, + n_epochs: int = 20, + gamma: float = 0.99, + gae_lambda: float = 0.9, + clip_range: float = 0.2, + clip_range_vf: float = None, + ent_coef: float = 0.05, + explore_coef: float = 0.05, + vf_coef: float = 0.5, + max_grad_norm: float = 0.5, + target_kl: float = 0.01, + update_adv=False, + lr_schedule_step=None, + start_num_timesteps: int = 0): + + self.policy = policy + self.env = env + self.learning_rate = learning_rate + self.n_steps_total = n_steps_total + self.n_steps = n_steps_total//env.num_envs + self.batch_size = batch_size + self.n_epochs = n_epochs + self.gamma = gamma + self.gae_lambda = gae_lambda + self.clip_range = clip_range + self.clip_range_vf = clip_range_vf + self.ent_coef = ent_coef + self.explore_coef = explore_coef + self.vf_coef = vf_coef + self.max_grad_norm = max_grad_norm + self.target_kl = target_kl + self.update_adv = update_adv + self.lr_schedule_step = lr_schedule_step + self.start_num_timesteps = start_num_timesteps + self.num_timesteps = start_num_timesteps + + self._last_obs = None + self._last_dones = None + self.ep_stat_buffer = None + + self.buffer = PpoBuffer(self.n_steps, self.env.observation_space, self.env.action_space, + gamma=self.gamma, gae_lambda=self.gae_lambda, n_envs=self.env.num_envs) + self.policy = self.policy.to(self.policy.device) + + model_parameters = filter(lambda p: p.requires_grad, self.policy.parameters()) + total_params = sum([np.prod(p.size()) for p in model_parameters]) + print(f'trainable parameters: {total_params/1000000:.2f}M') + + def collect_rollouts(self, env: VecEnv, callback: BaseCallback, + rollout_buffer: PpoBuffer, n_rollout_steps: int) -> bool: + assert self._last_obs is not None, "No previous observation was provided" + n_steps = 0 + rollout_buffer.reset() + + self.action_statistics = [] + self.mu_statistics = [] + self.sigma_statistics = [] + + while n_steps < n_rollout_steps: + actions, values, log_probs, mu, sigma, _ = self.policy.forward(self._last_obs) + self.action_statistics.append(actions) + self.mu_statistics.append(mu) + self.sigma_statistics.append(sigma) + + new_obs, rewards, dones, infos = env.step(actions) + + if callback.on_step() is False: + return False + + # update_info_buffer + for i in np.where(dones)[0]: + self.ep_stat_buffer.append(infos[i]['episode_stat']) + + n_steps += 1 + self.num_timesteps += env.num_envs + + rollout_buffer.add(self._last_obs, actions, rewards, self._last_dones, values, log_probs, mu, sigma, infos) + self._last_obs = new_obs + self._last_dones = dones + + last_values = self.policy.forward_value(self._last_obs) + rollout_buffer.compute_returns_and_advantage(last_values, dones=self._last_dones) + + return True + + def train(self): + for param_group in self.policy.optimizer.param_groups: + param_group["lr"] = self.learning_rate + + entropy_losses, exploration_losses, pg_losses, value_losses, losses = [], [], [], [], [] + clip_fractions = [] + approx_kl_divs = [] + + # train for gradient_steps epochs + epoch = 0 + data_len = int(self.buffer.buffer_size * self.buffer.n_envs / self.batch_size) + for epoch in range(self.n_epochs): + approx_kl_divs = [] + # Do a complete pass on the rollout buffer + self.buffer.start_caching(self.batch_size) + # while self.buffer.sample_queue.qsize() < 3: + # time.sleep(0.01) + for i in range(data_len): + + if self.buffer.sample_queue.empty(): + while self.buffer.sample_queue.empty(): + # print(f'buffer_empty: {self.buffer.sample_queue.qsize()}') + time.sleep(0.01) + rollout_data = self.buffer.sample_queue.get() + + values, log_prob, entropy_loss, exploration_loss, distribution = self.policy.evaluate_actions( + rollout_data.observations, rollout_data.actions, rollout_data.exploration_suggests, + detach_values=False) + # Normalize advantage + advantages = rollout_data.advantages + # advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8) + + # ratio between old and new policy, should be one at the first iteration + ratio = th.exp(log_prob - rollout_data.old_log_prob) + + # clipped surrogate loss + policy_loss_1 = advantages * ratio + policy_loss_2 = advantages * th.clamp(ratio, 1 - self.clip_range, 1 + self.clip_range) + policy_loss = -th.min(policy_loss_1, policy_loss_2).mean() + + # Logging + clip_fraction = th.mean((th.abs(ratio - 1) > self.clip_range).float()).item() + clip_fractions.append(clip_fraction) + + if self.clip_range_vf is None: + # No clipping + values_pred = values + else: + # Clip the different between old and new value + # NOTE: this depends on the reward scaling + values_pred = rollout_data.old_values + th.clamp(values - rollout_data.old_values, + -self.clip_range_vf, self.clip_range_vf) + # Value loss using the TD(gae_lambda) target + value_loss = F.mse_loss(rollout_data.returns, values_pred) + + loss = policy_loss + self.vf_coef * value_loss \ + + self.ent_coef * entropy_loss + self.explore_coef * exploration_loss + + losses.append(loss.item()) + pg_losses.append(policy_loss.item()) + value_losses.append(value_loss.item()) + entropy_losses.append(entropy_loss.item()) + exploration_losses.append(exploration_loss.item()) + + # Optimization step + self.policy.optimizer.zero_grad() + loss.backward() + # Clip grad norm + th.nn.utils.clip_grad_norm_(self.policy.parameters(), self.max_grad_norm) + self.policy.optimizer.step() + + with th.no_grad(): + old_distribution = self.policy.action_dist.proba_distribution( + rollout_data.old_mu, rollout_data.old_sigma) + kl_div = th.distributions.kl_divergence(old_distribution.distribution, distribution) + + approx_kl_divs.append(kl_div.mean().item()) + + if self.target_kl is not None and np.mean(approx_kl_divs) > 1.5 * self.target_kl: + if self.lr_schedule_step is not None: + self.kl_early_stop += 1 + if self.kl_early_stop >= self.lr_schedule_step: + self.learning_rate *= 0.5 + self.kl_early_stop = 0 + break + + # update advantages + if self.update_adv: + self.buffer.update_values(self.policy) + last_values = self.policy.forward_value(self._last_obs) + self.buffer.compute_returns_and_advantage(last_values, dones=self._last_dones) + + explained_var = explained_variance(self.buffer.returns.flatten(), self.buffer.values.flatten()) + + # Logs + self.train_debug = { + "train/entropy_loss": np.mean(entropy_losses), + "train/exploration_loss": np.mean(exploration_losses), + "train/policy_gradient_loss": np.mean(pg_losses), + "train/value_loss": np.mean(value_losses), + "train/last_epoch_kl": np.mean(approx_kl_divs), + "train/clip_fraction": np.mean(clip_fractions), + "train/loss": np.mean(losses), + "train/explained_variance": explained_var, + "train/clip_range": self.clip_range, + "train/train_epoch": epoch, + "train/learning_rate": self.learning_rate + } + + def learn(self, total_timesteps, callback=None, seed=2021): + # reset env seed + self.env.action_space.seed(seed) + self.env.observation_space.seed(seed) + self.env.seed(seed) + + self.start_time = time.time() + + self.kl_early_stop = 0 + self.t_train_values = 0.0 + + self.ep_stat_buffer = deque(maxlen=100) + self._last_obs = self.env.reset() + self._last_dones = np.zeros((self.env.num_envs,), dtype=np.bool) + + callback.init_callback(self) + + callback.on_training_start(locals(), globals()) + + while self.num_timesteps < total_timesteps: + callback.on_rollout_start() + t0 = time.time() + self.policy = self.policy.train() + continue_training = self.collect_rollouts(self.env, callback, self.buffer, self.n_steps) + self.t_rollout = time.time() - t0 + callback.on_rollout_end() + + if continue_training is False: + break + + t0 = time.time() + self.train() + self.t_train = time.time() - t0 + callback.on_training_end() + + return self + + def _get_init_kwargs(self): + init_kwargs = dict( + learning_rate=self.learning_rate, + n_steps_total=self.n_steps_total, + batch_size=self.batch_size, + n_epochs=self.n_epochs, + gamma=self.gamma, + gae_lambda=self.gae_lambda, + clip_range=self.clip_range, + clip_range_vf=self.clip_range_vf, + ent_coef=self.ent_coef, + explore_coef=self.explore_coef, + vf_coef=self.vf_coef, + max_grad_norm=self.max_grad_norm, + target_kl=self.target_kl, + update_adv=self.update_adv, + lr_schedule_step=self.lr_schedule_step, + start_num_timesteps=self.num_timesteps, + ) + return init_kwargs + + def save(self, path: str) -> None: + th.save({'policy_state_dict': self.policy.state_dict(), + 'policy_init_kwargs': self.policy.get_init_kwargs(), + 'train_init_kwargs': self._get_init_kwargs()}, + path) + + def get_env(self): + return self.env diff --git a/roach/models/ppo_buffer.py b/roach/models/ppo_buffer.py new file mode 100644 index 0000000..a2c6678 --- /dev/null +++ b/roach/models/ppo_buffer.py @@ -0,0 +1,261 @@ +from gym import spaces +import numpy as np +from typing import Optional, Generator, NamedTuple, Dict, List +import torch as th +from stable_baselines3.common.vec_env.base_vec_env import tile_images +import cv2 +import time +from threading import Thread +import queue + +COLORS = [ + [46, 52, 54], + [136, 138, 133], + [255, 0, 255], + [0, 255, 255], + [0, 0, 255], + [255, 0, 0], + [255, 255, 0], + [255, 255, 255] +] + + +class PpoBufferSamples(NamedTuple): + observations: Dict[str, th.Tensor] + actions: th.Tensor + old_values: th.Tensor + old_log_prob: th.Tensor + old_mu: th.Tensor + old_sigma: th.Tensor + advantages: th.Tensor + returns: th.Tensor + exploration_suggests: List[tuple] + + +class PpoBuffer(): + def __init__(self, buffer_size: int, observation_space: spaces.Space, action_space: spaces.Space, + gae_lambda: float = 1, gamma: float = 0.99, n_envs: int = 1): + + self.buffer_size = buffer_size + self.observation_space = observation_space + self.action_space = action_space + self.gae_lambda = gae_lambda + self.gamma = gamma + self.n_envs = n_envs + self.reset() + + self.pos = 0 + self.full = False + if th.cuda.is_available(): + self.device = 'cuda' + else: + self.device = 'cpu' + + self.sample_queue = queue.Queue() + + def reset(self) -> None: + self.observations = {} + for k, s in self.observation_space.spaces.items(): + self.observations[k] = np.zeros((self.buffer_size, self.n_envs,)+s.shape, dtype=s.dtype) + # int(np.prod(self.action_space.shape)) + self.actions = np.zeros((self.buffer_size, self.n_envs)+self.action_space.shape, dtype=np.float32) + self.rewards = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32) + self.returns = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32) + self.advantages = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32) + self.dones = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32) + self.values = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32) + self.log_probs = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32) + self.mus = np.zeros((self.buffer_size, self.n_envs)+self.action_space.shape, dtype=np.float32) + self.sigmas = np.zeros((self.buffer_size, self.n_envs)+self.action_space.shape, dtype=np.float32) + self.exploration_suggests = np.zeros((self.buffer_size, self.n_envs), dtype=[('acc', 'U10'), ('steer', 'U10')]) + + self.reward_debugs = [[] for i in range(self.n_envs)] + self.terminal_debugs = [[] for i in range(self.n_envs)] + + self.pos = 0 + self.full = False + + def compute_returns_and_advantage(self, last_value: th.Tensor, dones: np.ndarray) -> None: + last_gae_lam = 0 + for step in reversed(range(self.buffer_size)): + if step == self.buffer_size - 1: + next_non_terminal = 1.0 - dones + next_value = last_value + # spinning up return calculation + # self.returns[step] = self.rewards[step] + self.gamma * last_value * next_non_terminal + else: + next_non_terminal = 1.0 - self.dones[step + 1] + next_value = self.values[step + 1] + # spinning up return calculation + # self.returns[step] = self.rewards[step] + self.gamma * self.returns[step+1] * next_non_terminal + delta = self.rewards[step] + self.gamma * next_value * next_non_terminal - self.values[step] + last_gae_lam = delta + self.gamma * self.gae_lambda * next_non_terminal * last_gae_lam + self.advantages[step] = last_gae_lam + + # sb3 return + self.returns = self.advantages + self.values + + def add(self, + obs_dict: Dict[str, np.ndarray], + action: np.ndarray, + reward: np.ndarray, + done: np.ndarray, + value: np.ndarray, + log_prob: np.ndarray, + mu: np.ndarray, + sigma: np.ndarray, + infos) -> None: + + for k, v in obs_dict.items(): + self.observations[k][self.pos] = v + self.actions[self.pos] = action + self.rewards[self.pos] = reward + self.dones[self.pos] = done + self.values[self.pos] = value + self.log_probs[self.pos] = log_prob + self.mus[self.pos] = mu + self.sigmas[self.pos] = sigma + + for i in range(self.n_envs): + self.reward_debugs[i].append(infos[i]['reward_debug']['debug_texts']) + self.terminal_debugs[i].append(infos[i]['terminal_debug']['debug_texts']) + + n_steps = infos[i]['terminal_debug']['exploration_suggest']['n_steps'] + if n_steps > 0: + n_start = max(0, self.pos-n_steps) + self.exploration_suggests[n_start:self.pos, i] = \ + infos[i]['terminal_debug']['exploration_suggest']['suggest'] + + self.pos += 1 + if self.pos == self.buffer_size: + self.full = True + + def update_values(self, policy): + for i in range(self.buffer_size): + obs_dict = {} + for k in self.observations.keys(): + obs_dict[k] = self.observations[k][i] + values = policy.forward_value(obs_dict) + self.values[i] = values + + def get(self, batch_size: Optional[int] = None) -> Generator[PpoBufferSamples, None, None]: + assert self.full, '' + indices = np.random.permutation(self.buffer_size * self.n_envs) + # Prepare the data + for tensor in ['actions', 'values', 'log_probs', 'advantages', 'returns', + 'mus', 'sigmas', 'exploration_suggests']: + self.__dict__['flat_'+tensor] = self.flatten(self.__dict__[tensor]) + self.flat_observations = {} + for k in self.observations.keys(): + self.flat_observations[k] = self.flatten(self.observations[k]) + + # spinning up: the next two lines implement the advantage normalization trick + adv_mean = np.mean(self.advantages) + adv_std = np.std(self.advantages) + np.finfo(np.float32).eps + self.advantages = (self.advantages - adv_mean) / adv_std + + # Return everything, don't create minibatches + if batch_size is None: + batch_size = self.buffer_size * self.n_envs + + start_idx = 0 + while start_idx < self.buffer_size * self.n_envs: + yield self._get_samples(indices[start_idx:start_idx + batch_size]) + start_idx += batch_size + + def _get_samples(self, batch_inds: np.ndarray) -> PpoBufferSamples: + def to_torch(x): + return th.as_tensor(x).to(self.device) + # return th.from_numpy(x.astype(np.float32)).to(self.device) + + obs_dict = {} + for k in self.observations.keys(): + obs_dict[k] = to_torch(self.flat_observations[k][batch_inds]) + + data = (self.flat_actions[batch_inds], + self.flat_values[batch_inds], + self.flat_log_probs[batch_inds], + self.flat_mus[batch_inds], + self.flat_sigmas[batch_inds], + self.flat_advantages[batch_inds], + self.flat_returns[batch_inds] + ) + + data_torch = (obs_dict,) + tuple(map(to_torch, data)) + (self.flat_exploration_suggests[batch_inds],) + return PpoBufferSamples(*data_torch) + + @staticmethod + def flatten(arr: np.ndarray) -> np.ndarray: + shape = arr.shape + # if len(shape) < 3: + # return arr.swapaxes(0, 1).reshape(shape[0] * shape[1]) + # else: + return arr.reshape(shape[0] * shape[1], *shape[2:]) + + def render(self): + assert self.full, '' + list_render = [] + + _, _, c, h, w = self.observations['birdview'].shape + vis_idx = np.array([0, 1, 2, 6, 10, 14]) + + for i in range(self.buffer_size): + im_envs = [] + for j in range(self.n_envs): + + masks = self.observations['birdview'][i, j, vis_idx, :, :] > 100 + + im_birdview = np.zeros([h, w, 3], dtype=np.uint8) + for idx_c in range(len(vis_idx)): + im_birdview[masks[idx_c]] = COLORS[idx_c] + + im = np.zeros([h, w*2, 3], dtype=np.uint8) + im[:h, :w] = im_birdview + + action_str = np.array2string(self.actions[i, j], precision=1, separator=',', suppress_small=True) + state_str = np.array2string(self.observations['state'][i, j], + precision=1, separator=',', suppress_small=True) + + reward = self.rewards[i, j] + ret = self.returns[i, j] + advantage = self.advantages[i, j] + done = int(self.dones[i, j]) + value = self.values[i, j] + log_prob = self.log_probs[i, j] + + txt_1 = f'v:{value:5.2f} p:{log_prob:5.2f} a{action_str}' + im = cv2.putText(im, txt_1, (2, 12), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (255, 255, 255), 1) + txt_2 = f'{done} {state_str}' + im = cv2.putText(im, txt_2, (2, 24), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (255, 255, 255), 1) + txt_3 = f'rw:{reward:5.2f} rt:{ret:5.2f} a:{advantage:5.2f}' + im = cv2.putText(im, txt_3, (2, 36), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (255, 255, 255), 1) + + for i_txt, txt in enumerate(self.reward_debugs[j][i] + self.terminal_debugs[j][i]): + im = cv2.putText(im, txt, (w, (i_txt+1)*15), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (255, 255, 255), 1) + + im_envs.append(im) + + big_im = tile_images(im_envs) + list_render.append(big_im) + + return list_render + + def start_caching(self, batch_size): + thread1 = Thread(target=self.cache_to_cuda, args=(batch_size,)) + thread1.start() + + def cache_to_cuda(self, batch_size): + self.sample_queue.queue.clear() + + for rollout_data in self.get(batch_size): + while self.sample_queue.qsize() >= 2: + time.sleep(0.01) + self.sample_queue.put(rollout_data) + + def size(self) -> int: + """ + :return: (int) The current size of the buffer + """ + if self.full: + return self.buffer_size + return self.pos diff --git a/roach/models/ppo_policy.py b/roach/models/ppo_policy.py new file mode 100644 index 0000000..0b56a42 --- /dev/null +++ b/roach/models/ppo_policy.py @@ -0,0 +1,244 @@ +from typing import Union, Dict, Tuple, Any +from functools import partial +import gym +import torch as th +import torch.nn as nn +import numpy as np + +from roach.utils.config_utils import load_entry_point + + +class PpoPolicy(nn.Module): + + def __init__(self, + observation_space: gym.spaces.Space, + action_space: gym.spaces.Space, + policy_head_arch=[256, 256], + value_head_arch=[256, 256], + features_extractor_entry_point=None, + features_extractor_kwargs={}, + distribution_entry_point=None, + distribution_kwargs={}): + + super(PpoPolicy, self).__init__() + self.observation_space = observation_space + self.action_space = action_space + self.features_extractor_entry_point = features_extractor_entry_point + self.features_extractor_kwargs = features_extractor_kwargs + self.distribution_entry_point = distribution_entry_point + self.distribution_kwargs = distribution_kwargs + + if th.cuda.is_available(): + self.device = 'cuda' + else: + self.device = 'cpu' + + self.optimizer_class = th.optim.Adam + self.optimizer_kwargs = {'eps': 1e-5} + + features_extractor_entry_point = features_extractor_entry_point.replace("agents.rl_birdview","roach") + features_extractor_class = load_entry_point(features_extractor_entry_point) + self.features_extractor = features_extractor_class(observation_space, **features_extractor_kwargs) + + distribution_entry_point = distribution_entry_point.replace("agents.rl_birdview","roach") + distribution_class = load_entry_point(distribution_entry_point) + self.action_dist = distribution_class(int(np.prod(action_space.shape)), **distribution_kwargs) + + if 'StateDependentNoiseDistribution' in distribution_entry_point: + self.use_sde = True + self.sde_sample_freq = 4 + else: + self.use_sde = False + self.sde_sample_freq = None + + # best_so_far + # self.net_arch = [dict(pi=[256, 128, 64], vf=[128, 64])] + self.policy_head_arch = list(policy_head_arch) + self.value_head_arch = list(value_head_arch) + self.activation_fn = nn.ReLU + self.ortho_init = False + self._build() + + def reset_noise(self, n_envs: int = 1) -> None: + assert self.use_sde, 'reset_noise() is only available when using gSDE' + self.action_dist.sample_weights(self.dist_sigma, batch_size=n_envs) + + def _build(self) -> None: + last_layer_dim_pi = self.features_extractor.features_dim + policy_net = [] + for layer_size in self.policy_head_arch: + policy_net.append(nn.Linear(last_layer_dim_pi, layer_size)) + policy_net.append(self.activation_fn()) + last_layer_dim_pi = layer_size + self.policy_head = nn.Sequential(*policy_net).to(self.device) + # mu->alpha/mean, sigma->beta/log_std (nn.Module, nn.Parameter) + self.dist_mu, self.dist_sigma = self.action_dist.proba_distribution_net(last_layer_dim_pi) + last_layer_dim_vf = self.features_extractor.features_dim + value_net = [] + for layer_size in self.value_head_arch: + value_net.append(nn.Linear(last_layer_dim_vf, layer_size)) + value_net.append(self.activation_fn()) + last_layer_dim_vf = layer_size + value_net.append(nn.Linear(last_layer_dim_vf, 1)) + self.value_head = nn.Sequential(*value_net).to(self.device) + # Init weights: use orthogonal initialization + # with small initial weight for the output + if self.ortho_init: + # TODO: check for features_extractor + # Values from stable-baselines. + # feature_extractor/mlp values are + # originally from openai/baselines (default gains/init_scales). + module_gains = { + # self.features_extractor: np.sqrt(2), + self.policy_head: np.sqrt(2), + self.value_head: np.sqrt(2) + # self.action_net: 0.01, + } + for module, gain in module_gains.items(): + module.apply(partial(self.init_weights, gain=gain)) + + self.optimizer = self.optimizer_class(self.parameters(), **self.optimizer_kwargs) + + def _get_features(self, birdview: th.Tensor, state: th.Tensor) -> th.Tensor: + """ + :param birdview: th.Tensor (num_envs, frame_stack*channel, height, width) + :param state: th.Tensor (num_envs, state_dim) + """ + birdview = birdview.float() / 255.0 + features = self.features_extractor(birdview, state) + return features + + def _get_action_dist_from_features(self, features: th.Tensor): + latent_pi = self.policy_head(features) + mu = self.dist_mu(latent_pi) + if isinstance(self.dist_sigma, nn.Parameter): + sigma = self.dist_sigma + else: + sigma = self.dist_sigma(latent_pi) + return self.action_dist.proba_distribution(mu, sigma), mu.detach().cpu().numpy(), sigma.detach().cpu().numpy() + + def evaluate_actions(self, obs_dict: Dict[str, th.Tensor], actions: th.Tensor, exploration_suggests, + detach_values=False): + features = self._get_features(**obs_dict) + + if detach_values: + detached_features = features.detach() + values = self.value_head(detached_features) + else: + values = self.value_head(features) + + distribution, mu, sigma = self._get_action_dist_from_features(features) + actions = self.scale_action(actions) + log_prob = distribution.log_prob(actions) + return values.flatten(), log_prob, distribution.entropy_loss(), \ + distribution.exploration_loss(exploration_suggests), distribution.distribution + + def evaluate_values(self, obs_dict: Dict[str, th.Tensor]): + features = self._get_features(**obs_dict) + values = self.value_head(features) + distribution, mu, sigma = self._get_action_dist_from_features(features) + return values.flatten(), distribution.distribution + + def forward(self, obs_dict: Dict[str, np.ndarray], deterministic: bool = False, clip_action: bool = False, only_feature: bool = False, feature_input: np.ndarray = None): + ''' + used in collect_rollouts(), do not clamp actions + ''' + with th.no_grad(): + if feature_input is None: + obs_tensor_dict = dict([(k, th.as_tensor(v).to(self.device).unsqueeze(0)) for k, v in obs_dict.items()]) + features = self._get_features(**obs_tensor_dict) + else: + features = th.tensor(feature_input).to(self.device) + if only_feature: + return features.cpu().numpy() + values = self.value_head(features) + distribution, mu, sigma = self._get_action_dist_from_features(features) + actions = distribution.get_actions(deterministic=deterministic) + log_prob = distribution.log_prob(actions) + actions = actions.cpu().numpy() + actions = self.unscale_action(actions) + if clip_action: + actions = np.clip(actions, self.action_space.low, self.action_space.high) + values = values.cpu().numpy().flatten() + log_prob = log_prob.cpu().numpy() + features = features.cpu().numpy() + return actions, values, log_prob, mu, sigma, features + + def forward_value(self, obs_dict: Dict[str, np.ndarray]) -> np.ndarray: + with th.no_grad(): + obs_tensor_dict = dict([(k, th.as_tensor(v).to(self.device)) for k, v in obs_dict.items()]) + features = self._get_features(**obs_tensor_dict) + values = self.value_head(features) + values = values.cpu().numpy().flatten() + return values + + def forward_policy(self, obs_dict: Dict[str, np.ndarray]) -> np.ndarray: + with th.no_grad(): + obs_tensor_dict = dict([(k, th.as_tensor(v).to(self.device)) for k, v in obs_dict.items()]) + features = self._get_features(**obs_tensor_dict) + distribution, mu, sigma = self._get_action_dist_from_features(features) + return mu, sigma + + def scale_action(self, action: th.Tensor, eps=1e-7) -> th.Tensor: + # input action \in [a_low, a_high] + # output action \in [d_low+eps, d_high-eps] + d_low, d_high = self.action_dist.low, self.action_dist.high # scalar + + if d_low is not None and d_high is not None: + a_low = th.as_tensor(self.action_space.low.astype(np.float32)).to(action.device) + a_high = th.as_tensor(self.action_space.high.astype(np.float32)).to(action.device) + action = (action-a_low)/(a_high-a_low) * (d_high-d_low) + d_low + action = th.clamp(action, d_low+eps, d_high-eps) + return action + + def unscale_action(self, action: np.ndarray, eps=0.0) -> np.ndarray: + # input action \in [d_low, d_high] + # output action \in [a_low+eps, a_high-eps] + d_low, d_high = self.action_dist.low, self.action_dist.high # scalar + + if d_low is not None and d_high is not None: + # batch_size = action.shape[0] + a_low, a_high = self.action_space.low, self.action_space.high + # same shape as action [batch_size, action_dim] + # a_high = np.tile(self.action_space.high, [batch_size, 1]) + action = (action-d_low)/(d_high-d_low) * (a_high-a_low) + a_low + # action = np.clip(action, a_low+eps, a_high-eps) + return action + + def get_init_kwargs(self) -> Dict[str, Any]: + init_kwargs = dict( + observation_space=self.observation_space, + action_space=self.action_space, + policy_head_arch=self.policy_head_arch, + value_head_arch=self.value_head_arch, + features_extractor_entry_point=self.features_extractor_entry_point, + features_extractor_kwargs=self.features_extractor_kwargs, + distribution_entry_point=self.distribution_entry_point, + distribution_kwargs=self.distribution_kwargs, + ) + return init_kwargs + + @classmethod + def load(cls, path): + if th.cuda.is_available(): + device = 'cuda' + else: + device = 'cpu' + saved_variables = th.load(path, map_location=device) + # Create policy object + model = cls(**saved_variables['policy_init_kwargs']) + + # Load weights + model.load_state_dict(saved_variables['policy_state_dict']) + model.to(device) + return model, saved_variables['train_init_kwargs'] + + @staticmethod + def init_weights(module: nn.Module, gain: float = 1) -> None: + """ + Orthogonal initialization (used in PPO and A2C) + """ + if isinstance(module, (nn.Linear, nn.Conv2d)): + nn.init.orthogonal_(module.weight, gain=gain) + if module.bias is not None: + module.bias.data.fill_(0.0) diff --git a/roach/models/torch_layers.py b/roach/models/torch_layers.py new file mode 100644 index 0000000..ef18091 --- /dev/null +++ b/roach/models/torch_layers.py @@ -0,0 +1,114 @@ +"""Policies: abstract base class and concrete implementations.""" + +import torch as th +import torch.nn as nn +import numpy as np + +from . import torch_util as tu + + +class XtMaCNN(nn.Module): + ''' + Inspired by https://github.com/xtma/pytorch_car_caring + ''' + + def __init__(self, observation_space, features_dim=256, states_neurons=[256]): + super().__init__() + self.features_dim = features_dim + + n_input_channels = observation_space['birdview'].shape[0] + + self.cnn = nn.Sequential( + nn.Conv2d(n_input_channels, 8, kernel_size=5, stride=2), + nn.ReLU(), + nn.Conv2d(8, 16, kernel_size=5, stride=2), + nn.ReLU(), + nn.Conv2d(16, 32, kernel_size=5, stride=2), + nn.ReLU(), + nn.Conv2d(32, 64, kernel_size=3, stride=2), + nn.ReLU(), + nn.Conv2d(64, 128, kernel_size=3, stride=2), + nn.ReLU(), + nn.Conv2d(128, 256, kernel_size=3, stride=1), + nn.ReLU(), + nn.Flatten(), + ) + # Compute shape by doing one forward pass + with th.no_grad(): + n_flatten = self.cnn(th.as_tensor(observation_space['birdview'].sample()[None]).float()).shape[1] + + self.linear = nn.Sequential(nn.Linear(n_flatten+states_neurons[-1], 512), nn.ReLU(), + nn.Linear(512, features_dim), nn.ReLU()) + + states_neurons = [observation_space['state'].shape[0]] + states_neurons + self.state_linear = [] + for i in range(len(states_neurons)-1): + self.state_linear.append(nn.Linear(states_neurons[i], states_neurons[i+1])) + self.state_linear.append(nn.ReLU()) + self.state_linear = nn.Sequential(*self.state_linear) + + self.apply(self._weights_init) + + @staticmethod + def _weights_init(m): + if isinstance(m, nn.Conv2d): + nn.init.xavier_uniform_(m.weight, gain=nn.init.calculate_gain('relu')) + nn.init.constant_(m.bias, 0.1) + + def forward(self, birdview, state): + x = self.cnn(birdview) + latent_state = self.state_linear(state) + + # latent_state = state.repeat(1, state.shape[1]*256) + + x = th.cat((x, latent_state), dim=1) + x = self.linear(x) + return x + + +class ImpalaCNN(nn.Module): + def __init__(self, observation_space, chans=(16, 32, 32, 64, 64), states_neurons=[256], + features_dim=256, nblock=2, batch_norm=False, final_relu=True): + # (16, 32, 32) + super().__init__() + self.features_dim = features_dim + self.final_relu = final_relu + + # image encoder + curshape = observation_space['birdview'].shape + s = 1 / np.sqrt(len(chans)) # per stack scale + self.stacks = nn.ModuleList() + for outchan in chans: + stack = tu.CnnDownStack(curshape[0], nblock=nblock, outchan=outchan, scale=s, batch_norm=batch_norm) + self.stacks.append(stack) + curshape = stack.output_shape(curshape) + + # dense after concatenate + n_image_latent = tu.intprod(curshape) + self.dense = tu.NormedLinear(n_image_latent+states_neurons[-1], features_dim, scale=1.4) + + # state encoder + states_neurons = [observation_space['state'].shape[0]] + states_neurons + self.state_linear = [] + for i in range(len(states_neurons)-1): + self.state_linear.append(tu.NormedLinear(states_neurons[i], states_neurons[i+1])) + self.state_linear.append(nn.ReLU()) + self.state_linear = nn.Sequential(*self.state_linear) + + def forward(self, birdview, state): + # birdview: [b, c, h, w] + # x = x.to(dtype=th.float32) / self.scale_ob + + for layer in self.stacks: + birdview = layer(birdview) + + x = th.flatten(birdview, 1) + x = th.relu(x) + + latent_state = self.state_linear(state) + + x = th.cat((x, latent_state), dim=1) + x = self.dense(x) + if self.final_relu: + x = th.relu(x) + return x diff --git a/roach/models/torch_util.py b/roach/models/torch_util.py new file mode 100644 index 0000000..262ce2b --- /dev/null +++ b/roach/models/torch_util.py @@ -0,0 +1,104 @@ +import torch as th +from torch import nn +import math +import torch.nn.functional as F + + +def NormedLinear(*args, scale=1.0, dtype=th.float32, **kwargs): + """ + nn.Linear but with normalized fan-in init + """ + out = nn.Linear(*args, **kwargs) + out.weight.data *= scale / out.weight.norm(dim=1, p=2, keepdim=True) + if kwargs.get("bias", True): + out.bias.data *= 0 + return out + + +def NormedConv2d(*args, scale=1, **kwargs): + """ + nn.Conv2d but with normalized fan-in init + """ + out = nn.Conv2d(*args, **kwargs) + out.weight.data *= scale / out.weight.norm(dim=(1, 2, 3), p=2, keepdim=True) + if kwargs.get("bias", True): + out.bias.data *= 0 + return out + + +def intprod(xs): + """ + Product of a sequence of integers + """ + out = 1 + for x in xs: + out *= x + return out + + +class CnnBasicBlock(nn.Module): + """ + Residual basic block (without batchnorm), as in ImpalaCNN + Preserves channel number and shape + """ + + def __init__(self, inchan, scale=1, batch_norm=False): + super().__init__() + self.inchan = inchan + self.batch_norm = batch_norm + s = math.sqrt(scale) + self.conv0 = NormedConv2d(self.inchan, self.inchan, 3, padding=1, scale=s) + self.conv1 = NormedConv2d(self.inchan, self.inchan, 3, padding=1, scale=s) + if self.batch_norm: + self.bn0 = nn.BatchNorm2d(self.inchan) + self.bn1 = nn.BatchNorm2d(self.inchan) + + def residual(self, x): + # inplace should be False for the first relu, so that it does not change the input, + # which will be used for skip connection. + # getattr is for backwards compatibility with loaded models + if getattr(self, "batch_norm", False): + x = self.bn0(x) + x = F.relu(x, inplace=False) + x = self.conv0(x) + if getattr(self, "batch_norm", False): + x = self.bn1(x) + x = F.relu(x, inplace=True) + x = self.conv1(x) + return x + + def forward(self, x): + return x + self.residual(x) + + +class CnnDownStack(nn.Module): + """ + Downsampling stack from Impala CNN + """ + + def __init__(self, inchan, nblock, outchan, scale=1, pool=True, **kwargs): + super().__init__() + self.inchan = inchan + self.outchan = outchan + self.pool = pool + self.firstconv = NormedConv2d(inchan, outchan, 3, padding=1) + s = scale / math.sqrt(nblock) + self.blocks = nn.ModuleList( + [CnnBasicBlock(outchan, scale=s, **kwargs) for _ in range(nblock)] + ) + + def forward(self, x): + x = self.firstconv(x) + if getattr(self, "pool", True): + x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1) + for block in self.blocks: + x = block(x) + return x + + def output_shape(self, inshape): + c, h, w = inshape + assert c == self.inchan + if getattr(self, "pool", True): + return (self.outchan, (h + 1) // 2, (w + 1) // 2) + else: + return (self.outchan, h, w) \ No newline at end of file diff --git a/roach/obs_manager/actor_state/control.py b/roach/obs_manager/actor_state/control.py new file mode 100644 index 0000000..8e0b5af --- /dev/null +++ b/roach/obs_manager/actor_state/control.py @@ -0,0 +1,27 @@ +import numpy as np +from gym import spaces + +from roach.obs_manager.obs_manager import ObsManagerBase + + +class ObsManager(): + + def __init__(self, obs_configs): + self._parent_actor = None + def attach_ego_vehicle(self, parent_actor): + self._parent_actor = parent_actor + + def get_observation(self): + control = self._parent_actor.vehicle.get_control() + speed_limit = self._parent_actor.vehicle.get_speed_limit() / 3.6 * 0.8 + obs = { + 'throttle': np.array([control.throttle], dtype=np.float32), + 'steer': np.array([control.steer], dtype=np.float32), + 'brake': np.array([control.brake], dtype=np.float32), + 'gear': np.array([control.gear], dtype=np.float32), + 'speed_limit': np.array([speed_limit], dtype=np.float32), + } + return obs + + def clean(self): + self._parent_actor = None diff --git a/roach/obs_manager/actor_state/route.py b/roach/obs_manager/actor_state/route.py new file mode 100644 index 0000000..5052e1a --- /dev/null +++ b/roach/obs_manager/actor_state/route.py @@ -0,0 +1,71 @@ +import numpy as np +from gym import spaces + +from carla_gym.core.obs_manager.obs_manager import ObsManagerBase +import carla_gym.utils.transforms as trans_utils + + +class ObsManager(ObsManagerBase): + + def __init__(self, obs_configs): + self._parent_actor = None + self._route_steps = 5 + super(ObsManager, self).__init__() + + def _define_obs_space(self): + self.obs_space = spaces.Dict({ + 'lateral_dist': spaces.Box(low=0.0, high=2.0, shape=(1,), dtype=np.float32), + 'angle_diff': spaces.Box(low=-2.0, high=2.0, shape=(1,), dtype=np.float32), + 'route_locs': spaces.Box(low=-5.0, high=5.0, shape=(self._route_steps*2,), dtype=np.float32), + 'dist_remaining': spaces.Box(low=0.0, high=100, shape=(1,), dtype=np.float32) + }) + + def attach_ego_vehicle(self, parent_actor): + self._parent_actor = parent_actor + + def get_observation(self): + ev_transform = self._parent_actor.vehicle.get_transform() + route_plan = self._parent_actor.route_plan + + # lateral_dist + waypoint, road_option = route_plan[0] + wp_transform = waypoint.transform + + d_vec = ev_transform.location - wp_transform.location + np_d_vec = np.array([d_vec.x, d_vec.y], dtype=np.float32) + wp_unit_forward = wp_transform.rotation.get_forward_vector() + np_wp_unit_right = np.array([-wp_unit_forward.y, wp_unit_forward.x], dtype=np.float32) + + lateral_dist = np.abs(np.dot(np_wp_unit_right, np_d_vec)) + lateral_dist = np.clip(lateral_dist, 0, 2) + + # angle_diff + angle_diff = np.deg2rad(np.abs(trans_utils.cast_angle(ev_transform.rotation.yaw - wp_transform.rotation.yaw))) + angle_diff = np.clip(angle_diff, -2, 2) + + # route_locs + location_list = [] + route_length = len(route_plan) + for i in range(self._route_steps): + if i < route_length: + waypoint, road_option = route_plan[i] + else: + waypoint, road_option = route_plan[-1] + + wp_location_world_coord = waypoint.transform.location + wp_location_actor_coord = trans_utils.loc_global_to_ref(wp_location_world_coord, ev_transform) + location_list += [wp_location_actor_coord.x, wp_location_actor_coord.y] + + # dist_remaining_in_km + dist_remaining_in_km = (self._parent_actor.route_length - self._parent_actor.route_completed) / 1000.0 + + obs = { + 'lateral_dist': np.array([lateral_dist], dtype=np.float32), + 'angle_diff': np.array([angle_diff], dtype=np.float32), + 'route_locs': np.array(location_list, dtype=np.float32), + 'dist_remaining': np.array([dist_remaining_in_km], dtype=np.float32) + } + return obs + + def clean(self): + self._parent_actor = None diff --git a/roach/obs_manager/actor_state/speed.py b/roach/obs_manager/actor_state/speed.py new file mode 100644 index 0000000..dae1040 --- /dev/null +++ b/roach/obs_manager/actor_state/speed.py @@ -0,0 +1,46 @@ +import numpy as np +from gym import spaces + +from carla_gym.core.obs_manager.obs_manager import ObsManagerBase + + +class ObsManager(ObsManagerBase): + """ + in m/s + """ + + def __init__(self, obs_configs): + self._parent_actor = None + super(ObsManager, self).__init__() + + def _define_obs_space(self): + self.obs_space = spaces.Dict({ + 'speed': spaces.Box(low=-10.0, high=30.0, shape=(1,), dtype=np.float32), + 'speed_xy': spaces.Box(low=-10.0, high=30.0, shape=(1,), dtype=np.float32), + 'forward_speed': spaces.Box(low=-10.0, high=30.0, shape=(1,), dtype=np.float32) + }) + + def attach_ego_vehicle(self, parent_actor): + self._parent_actor = parent_actor + + def get_observation(self): + velocity = self._parent_actor.vehicle.get_velocity() + transform = self._parent_actor.vehicle.get_transform() + forward_vec = transform.get_forward_vector() + + np_vel = np.array([velocity.x, velocity.y, velocity.z]) + np_fvec = np.array([forward_vec.x, forward_vec.y, forward_vec.z]) + + speed = np.linalg.norm(np_vel) + speed_xy = np.linalg.norm(np_vel[0:2]) + forward_speed = np.dot(np_vel, np_fvec) + + obs = { + 'speed': np.array([speed], dtype=np.float32), + 'speed_xy': np.array([speed_xy], dtype=np.float32), + 'forward_speed': np.array([forward_speed], dtype=np.float32) + } + return obs + + def clean(self): + self._parent_actor = None diff --git a/roach/obs_manager/actor_state/velocity.py b/roach/obs_manager/actor_state/velocity.py new file mode 100644 index 0000000..dae6c03 --- /dev/null +++ b/roach/obs_manager/actor_state/velocity.py @@ -0,0 +1,43 @@ +import numpy as np +from gym import spaces + +from carla_gym.core.obs_manager.obs_manager import ObsManagerBase +import carla_gym.utils.transforms as trans_utils + + +class ObsManager(ObsManagerBase): + + def __init__(self, obs_configs): + super(ObsManager, self).__init__() + + def _define_obs_space(self): + # acc_x, acc_y: m/s2 + # vel_x, vel_y: m/s + # vel_angular z: rad/s + self.obs_space = spaces.Dict({ + 'acc_xy': spaces.Box(low=-1e3, high=1e3, shape=(2,), dtype=np.float32), + 'vel_xy': spaces.Box(low=-1e2, high=1e2, shape=(2,), dtype=np.float32), + 'vel_ang_z': spaces.Box(low=-1e3, high=1e3, shape=(1,), dtype=np.float32) + }) + + def attach_ego_vehicle(self, parent_actor): + self._parent_actor = parent_actor + + def get_observation(self): + ev_transform = self._parent_actor.vehicle.get_transform() + acc_w = self._parent_actor.vehicle.get_acceleration() + vel_w = self._parent_actor.vehicle.get_velocity() + ang_w = self._parent_actor.vehicle.get_angular_velocity() + + acc_ev = trans_utils.vec_global_to_ref(acc_w, ev_transform.rotation) + vel_ev = trans_utils.vec_global_to_ref(vel_w, ev_transform.rotation) + + obs = { + 'acc_xy': np.array([acc_ev.x, acc_ev.y], dtype=np.float32), + 'vel_xy': np.array([vel_ev.x, vel_ev.y], dtype=np.float32), + 'vel_ang_z': np.array([ang_w.z], dtype=np.float32) + } + return obs + + def clean(self): + self._parent_actor = None diff --git a/roach/obs_manager/birdview/chauffeurnet.py b/roach/obs_manager/birdview/chauffeurnet.py new file mode 100644 index 0000000..d7aed43 --- /dev/null +++ b/roach/obs_manager/birdview/chauffeurnet.py @@ -0,0 +1,290 @@ +import numpy as np +import carla +from gym import spaces +import cv2 as cv +from collections import deque +from pathlib import Path +import h5py + +from roach.utils.traffic_light import TrafficLightHandler + +COLOR_ALUMINIUM_5 = (255, 255, 255) # road +COLOR_BLACK = (0, 0, 0) # background black +COLOR_BLUE = (0, 0, 255) # vehicle blue +COLOR_CYAN = (0, 255, 255) # walker +COLOR_RED = (255, 0, 0) # tl_red +COLOR_Lane = (0, 255, 0) # lane green +COLOR_YELLOW = (255, 255, 0) # tl_yellow + +COLOR_WHITE = COLOR_ALUMINIUM_5 # ego-vehicle +COLOR_GREEN = COLOR_ALUMINIUM_5 # tl_green +COLOR_MAGENTA = COLOR_ALUMINIUM_5 # divider +COLOR_MAGENTA_2 = COLOR_ALUMINIUM_5 # divider +COLOR_YELLOW_2 = COLOR_ALUMINIUM_5 # stop mask + + +def tint(color, factor): + r, g, b = color + r = int(r + (255-r) * factor) + g = int(g + (255-g) * factor) + b = int(b + (255-b) * factor) + r = min(r, 255) + g = min(g, 255) + b = min(b, 255) + return (r, g, b) + + +class ObsManager(): + def __init__(self, obs_configs, criteria_stop=None): + self._width = int(obs_configs['width_in_pixels']) + self._pixels_ev_to_bottom = obs_configs['pixels_ev_to_bottom'] + self._pixels_per_meter = obs_configs['pixels_per_meter'] + self._history_idx = obs_configs['history_idx'] + self._scale_bbox = obs_configs.get('scale_bbox', True) + self._scale_mask_col = obs_configs.get('scale_mask_col', 1.1) + + self._history_queue = deque(maxlen=20) + + self._image_channels = 3 + self._masks_channels = 3 + 3*len(self._history_idx) + self._parent_actor = None + self._world = None + + self._map_dir = Path(__file__).resolve().parent / 'maps' + + self._criteria_stop =criteria_stop + + super(ObsManager, self).__init__() + + def attach_ego_vehicle(self, ego_vehicle): + self._parent_actor = ego_vehicle + self._world = self._parent_actor.get_world() + + maps_h5_path = self._map_dir / (self._world.get_map().name + '.h5') + with h5py.File(maps_h5_path, 'r', libver='latest', swmr=True) as hf: + self._road = np.array(hf['road'], dtype=np.uint8) + self._lane_marking_all = np.array(hf['lane_marking_all'], dtype=np.uint8) + self._lane_marking_white_broken = np.array(hf['lane_marking_white_broken'], dtype=np.uint8) + + self._world_offset = np.array(hf.attrs['world_offset_in_meters'], dtype=np.float32) + assert np.isclose(self._pixels_per_meter, float(hf.attrs['pixels_per_meter'])) + + self._distance_threshold = np.ceil(self._width / self._pixels_per_meter) + + @staticmethod + def _get_stops(criteria_stop): + stop_sign = criteria_stop._target_stop_sign + stops = [] + if (stop_sign is not None) and (not criteria_stop._stop_completed): + bb_loc = carla.Location(stop_sign.trigger_volume.location) + bb_ext = carla.Vector3D(stop_sign.trigger_volume.extent) + bb_ext.x = max(bb_ext.x, bb_ext.y) + bb_ext.y = max(bb_ext.x, bb_ext.y) + trans = stop_sign.get_transform() + stops = [(carla.Transform(trans.location, trans.rotation), bb_loc, bb_ext)] + return stops + + def get_observation(self, route_plan): + ev_transform = self._parent_actor.get_transform() + ev_loc = ev_transform.location + ev_rot = ev_transform.rotation + ev_bbox = self._parent_actor.bounding_box + + def is_within_distance(w): + c_distance = abs(ev_loc.x - w.location.x) < self._distance_threshold \ + and abs(ev_loc.y - w.location.y) < self._distance_threshold \ + and abs(ev_loc.z - w.location.z) < 8.0 + c_ev = abs(ev_loc.x - w.location.x) < 1.0 and abs(ev_loc.y - w.location.y) < 1.0 + return c_distance and (not c_ev) + + + vehicle_bbox_list = self._world.get_level_bbs(carla.CityObjectLabel.Vehicles) + walker_bbox_list = self._world.get_level_bbs(carla.CityObjectLabel.Pedestrians) + if self._scale_bbox: + vehicles = self._get_surrounding_actors(vehicle_bbox_list, is_within_distance, 1.0) + walkers = self._get_surrounding_actors(walker_bbox_list, is_within_distance, 2.0) + else: + vehicles = self._get_surrounding_actors(vehicle_bbox_list, is_within_distance) + walkers = self._get_surrounding_actors(walker_bbox_list, is_within_distance) + + tl_green = TrafficLightHandler.get_stopline_vtx(ev_loc, 0) + tl_yellow = TrafficLightHandler.get_stopline_vtx(ev_loc, 1) + tl_red = TrafficLightHandler.get_stopline_vtx(ev_loc, 2) + stops = self._get_stops(self._criteria_stop) + + self._history_queue.append((vehicles, walkers, tl_green, tl_yellow, tl_red, stops)) + + M_warp = self._get_warp_transform(ev_loc, ev_rot) + + # objects with history + vehicle_masks, walker_masks, tl_green_masks, tl_yellow_masks, tl_red_masks, stop_masks \ + = self._get_history_masks(M_warp) + + # road_mask, lane_mask + road_mask = cv.warpAffine(self._road, M_warp, (self._width, self._width)).astype(np.bool) + lane_mask_all = cv.warpAffine(self._lane_marking_all, M_warp, (self._width, self._width)).astype(np.bool) + lane_mask_broken = cv.warpAffine(self._lane_marking_white_broken, M_warp, + (self._width, self._width)).astype(np.bool) + + # route_mask + route_mask = np.zeros([self._width, self._width], dtype=np.uint8) + route_in_pixel = np.array([[self._world_to_pixel(wp.transform.location)] + for wp, _ in route_plan[0:80]]) + route_warped = cv.transform(route_in_pixel, M_warp) + cv.polylines(route_mask, [np.round(route_warped).astype(np.int32)], False, 1, thickness=16) + route_mask = route_mask.astype(np.bool) + + # ev_mask + ev_mask = self._get_mask_from_actor_list([(ev_transform, ev_bbox.location, ev_bbox.extent)], M_warp) + ev_mask_col = self._get_mask_from_actor_list([(ev_transform, ev_bbox.location, + ev_bbox.extent*self._scale_mask_col)], M_warp) + # render + image = np.zeros([self._width, self._width, 3], dtype=np.uint8) + image[road_mask] = COLOR_ALUMINIUM_5 + image[route_mask] = COLOR_Lane + image[lane_mask_all] = COLOR_MAGENTA + image[lane_mask_broken] = COLOR_MAGENTA_2 + + h_len = len(self._history_idx)-1 + for i, mask in enumerate(stop_masks): + image[mask] = tint(COLOR_YELLOW_2, (h_len-i)*1.0) + for i, mask in enumerate(tl_green_masks): + image[mask] = tint(COLOR_GREEN, (h_len-i)*1.0) + for i, mask in enumerate(tl_yellow_masks): + image[mask] = tint(COLOR_YELLOW, (h_len-i)*1.0) + for i, mask in enumerate(tl_red_masks): + image[mask] = tint(COLOR_RED, (h_len-i)*1.0) + + for i, mask in enumerate(vehicle_masks): + image[mask] = tint(COLOR_BLUE, (h_len-i)*1.0) + for i, mask in enumerate(walker_masks): + image[mask] = tint(COLOR_CYAN, (h_len-i)*1.0) + + image[ev_mask] = COLOR_WHITE + # image[obstacle_mask] = COLOR_BLUE + + # masks + c_road = road_mask * 255 + c_route = route_mask * 255 + c_lane = lane_mask_all * 255 + c_lane[lane_mask_broken] = 120 + + # masks with history + c_tl_history = [] + for i in range(len(self._history_idx)): + c_tl = np.zeros([self._width, self._width], dtype=np.uint8) + c_tl[tl_green_masks[i]] = 80 + c_tl[tl_yellow_masks[i]] = 170 + c_tl[tl_red_masks[i]] = 255 + c_tl[stop_masks[i]] = 255 + c_tl_history.append(c_tl) + + c_vehicle_history = [m*255 for m in vehicle_masks] + c_walker_history = [m*255 for m in walker_masks] + + masks = np.stack((c_road, c_route, c_lane, *c_vehicle_history, *c_walker_history, *c_tl_history), axis=2) + masks = np.transpose(masks, [2, 0, 1]) + + obs_dict = {'rendered': image, 'masks': masks} + + # self._parent_actor.collision_px = np.any(ev_mask_col & walker_masks[-1]) + + return obs_dict + + def _get_history_masks(self, M_warp): + qsize = len(self._history_queue) + vehicle_masks, walker_masks, tl_green_masks, tl_yellow_masks, tl_red_masks, stop_masks = [], [], [], [], [], [] + for idx in self._history_idx: + idx = max(idx, -1 * qsize) + + vehicles, walkers, tl_green, tl_yellow, tl_red, stops = self._history_queue[idx] + + vehicle_masks.append(self._get_mask_from_actor_list(vehicles, M_warp)) + walker_masks.append(self._get_mask_from_actor_list(walkers, M_warp)) + tl_green_masks.append(self._get_mask_from_stopline_vtx(tl_green, M_warp)) + tl_yellow_masks.append(self._get_mask_from_stopline_vtx(tl_yellow, M_warp)) + tl_red_masks.append(self._get_mask_from_stopline_vtx(tl_red, M_warp)) + stop_masks.append(self._get_mask_from_actor_list(stops, M_warp)) + + return vehicle_masks, walker_masks, tl_green_masks, tl_yellow_masks, tl_red_masks, stop_masks + + def _get_mask_from_stopline_vtx(self, stopline_vtx, M_warp): + mask = np.zeros([self._width, self._width], dtype=np.uint8) + for sp_locs in stopline_vtx: + stopline_in_pixel = np.array([[self._world_to_pixel(x)] for x in sp_locs]) + stopline_warped = cv.transform(stopline_in_pixel, M_warp) + stopline_warped = np.round(stopline_warped).astype(np.int32) + cv.line(mask, tuple(stopline_warped[0, 0]), tuple(stopline_warped[1, 0]), + color=1, thickness=6) + return mask.astype(np.bool) + + def _get_mask_from_actor_list(self, actor_list, M_warp): + mask = np.zeros([self._width, self._width], dtype=np.uint8) + for actor_transform, bb_loc, bb_ext in actor_list: + + corners = [carla.Location(x=-bb_ext.x, y=-bb_ext.y), + carla.Location(x=bb_ext.x, y=-bb_ext.y), + carla.Location(x=bb_ext.x, y=0), + carla.Location(x=bb_ext.x, y=bb_ext.y), + carla.Location(x=-bb_ext.x, y=bb_ext.y)] + corners = [bb_loc + corner for corner in corners] + + corners = [actor_transform.transform(corner) for corner in corners] + corners_in_pixel = np.array([[self._world_to_pixel(corner)] for corner in corners]) + corners_warped = cv.transform(corners_in_pixel, M_warp) + + cv.fillConvexPoly(mask, np.round(corners_warped).astype(np.int32), 1) + return mask.astype(np.bool) + + @staticmethod + def _get_surrounding_actors(bbox_list, criterium, scale=None): + actors = [] + for bbox in bbox_list: + is_within_distance = criterium(bbox) + if is_within_distance: + bb_loc = carla.Location() + bb_ext = carla.Vector3D(bbox.extent) + if scale is not None: + bb_ext = bb_ext * scale + bb_ext.x = max(bb_ext.x, 0.8) + bb_ext.y = max(bb_ext.y, 0.8) + + actors.append((carla.Transform(bbox.location, bbox.rotation), bb_loc, bb_ext)) + return actors + + def _get_warp_transform(self, ev_loc, ev_rot): + ev_loc_in_px = self._world_to_pixel(ev_loc) + yaw = np.deg2rad(ev_rot.yaw) + + forward_vec = np.array([np.cos(yaw), np.sin(yaw)]) + right_vec = np.array([np.cos(yaw + 0.5*np.pi), np.sin(yaw + 0.5*np.pi)]) + + bottom_left = ev_loc_in_px - self._pixels_ev_to_bottom * forward_vec - (0.5*self._width) * right_vec + top_left = ev_loc_in_px + (self._width-self._pixels_ev_to_bottom) * forward_vec - (0.5*self._width) * right_vec + top_right = ev_loc_in_px + (self._width-self._pixels_ev_to_bottom) * forward_vec + (0.5*self._width) * right_vec + + src_pts = np.stack((bottom_left, top_left, top_right), axis=0).astype(np.float32) + dst_pts = np.array([[0, self._width-1], + [0, 0], + [self._width-1, 0]], dtype=np.float32) + return cv.getAffineTransform(src_pts, dst_pts) + + def _world_to_pixel(self, location, projective=False): + """Converts the world coordinates to pixel coordinates""" + x = self._pixels_per_meter * (location.x - self._world_offset[0]) + y = self._pixels_per_meter * (location.y - self._world_offset[1]) + + if projective: + p = np.array([x, y, 1], dtype=np.float32) + else: + p = np.array([x, y], dtype=np.float32) + return p + + def _world_to_pixel_width(self, width): + """Converts the world units to pixel units""" + return self._pixels_per_meter * width + + def clean(self): + self._parent_actor = None + self._world = None + self._history_queue.clear() diff --git a/roach/obs_manager/birdview/hdmap_generate.py b/roach/obs_manager/birdview/hdmap_generate.py new file mode 100644 index 0000000..229299a --- /dev/null +++ b/roach/obs_manager/birdview/hdmap_generate.py @@ -0,0 +1,33 @@ +import numpy as np +import carla +from gym import spaces +import cv2 as cv +from collections import deque +from pathlib import Path +import h5py + + +COLOR_BLACK = (0, 0, 0) +COLOR_RED = (255, 0, 0) +COLOR_GREEN = (0, 255, 0) +COLOR_BLUE = (0, 0, 255) +COLOR_CYAN = (0, 255, 255) +COLOR_MAGENTA = (255, 0, 255) +COLOR_MAGENTA_2 = (255, 140, 255) +COLOR_YELLOW = (255, 255, 0) +COLOR_YELLOW_2 = (160, 160, 0) +COLOR_WHITE = (255, 255, 255) +COLOR_ALUMINIUM_0 = (238, 238, 236) +COLOR_ALUMINIUM_3 = (136, 138, 133) +COLOR_ALUMINIUM_5 = (46, 52, 54) + +def tint(color, factor): + r, g, b = color + r = int(r + (255-r) * factor) + g = int(g + (255-g) * factor) + b = int(b + (255-b) * factor) + r = min(r, 255) + g = min(g, 255) + b = min(b, 255) + return (r, g, b) + diff --git a/roach/obs_manager/birdview/maps/Town01.h5 b/roach/obs_manager/birdview/maps/Town01.h5 new file mode 100644 index 0000000..c752c1a Binary files /dev/null and b/roach/obs_manager/birdview/maps/Town01.h5 differ diff --git a/roach/obs_manager/birdview/maps/Town02.h5 b/roach/obs_manager/birdview/maps/Town02.h5 new file mode 100644 index 0000000..ca7f2fb Binary files /dev/null and b/roach/obs_manager/birdview/maps/Town02.h5 differ diff --git a/roach/obs_manager/birdview/maps/Town03.h5 b/roach/obs_manager/birdview/maps/Town03.h5 new file mode 100644 index 0000000..8bb4fd8 Binary files /dev/null and b/roach/obs_manager/birdview/maps/Town03.h5 differ diff --git a/roach/obs_manager/birdview/maps/Town04.h5 b/roach/obs_manager/birdview/maps/Town04.h5 new file mode 100644 index 0000000..de25dba Binary files /dev/null and b/roach/obs_manager/birdview/maps/Town04.h5 differ diff --git a/roach/obs_manager/birdview/maps/Town05.h5 b/roach/obs_manager/birdview/maps/Town05.h5 new file mode 100644 index 0000000..2a511a5 Binary files /dev/null and b/roach/obs_manager/birdview/maps/Town05.h5 differ diff --git a/roach/obs_manager/birdview/maps/Town06.h5 b/roach/obs_manager/birdview/maps/Town06.h5 new file mode 100644 index 0000000..62ee8b4 Binary files /dev/null and b/roach/obs_manager/birdview/maps/Town06.h5 differ diff --git a/roach/obs_manager/birdview/maps/Town07.h5 b/roach/obs_manager/birdview/maps/Town07.h5 new file mode 100644 index 0000000..2331d1e Binary files /dev/null and b/roach/obs_manager/birdview/maps/Town07.h5 differ diff --git a/roach/obs_manager/birdview/maps/Town10HD.h5 b/roach/obs_manager/birdview/maps/Town10HD.h5 new file mode 100644 index 0000000..be8576a Binary files /dev/null and b/roach/obs_manager/birdview/maps/Town10HD.h5 differ diff --git a/roach/rl_birdview_agent.py b/roach/rl_birdview_agent.py new file mode 100644 index 0000000..3bd2215 --- /dev/null +++ b/roach/rl_birdview_agent.py @@ -0,0 +1,58 @@ +import logging +import numpy as np +from omegaconf import OmegaConf +import copy + +from roach.utils.config_utils import load_entry_point + + +class RlBirdviewAgent(): + def __init__(self, path_to_conf_file='config_agent.yaml', ckpt = None): + + self._render_dict = None + self.supervision_dict = None + self._ckpt = ckpt + self.setup(path_to_conf_file) + + def setup(self, path_to_conf_file): + cfg = OmegaConf.load(path_to_conf_file) + cfg = OmegaConf.to_container(cfg) + + self._obs_configs = cfg['obs_configs'] + self._train_cfg = cfg['training'] + + + self._policy_class = load_entry_point(cfg['policy']['entry_point']) + self._policy_kwargs = cfg['policy']['kwargs'] + if self._ckpt is None: + self._policy = None + else: + self._policy, self._train_cfg['kwargs'] = self._policy_class.load(self._ckpt) + self._policy = self._policy.eval() + + self._wrapper_class = load_entry_point(cfg['env_wrapper']['entry_point']) + self._wrapper_kwargs = cfg['env_wrapper']['kwargs'] + + def run_step(self, input_data, timestamp): + input_data = copy.deepcopy(input_data) + + policy_input = self._wrapper_class.process_obs(input_data, self._wrapper_kwargs['input_states'], train=False) + + actions, values, log_probs, mu, sigma, features = self._policy.forward( + policy_input, deterministic=True, clip_action=True) + control = self._wrapper_class.process_act(actions, self._wrapper_kwargs['acc_as_action'], train=False) + self.supervision_dict = { + 'action': np.array([control.throttle, control.steer, control.brake], dtype=np.float32), + 'value': values[0], + 'action_mu': mu[0], + 'action_sigma': sigma[0], + 'features': features[0], + 'speed': input_data['speed']['forward_speed'] + } + self.supervision_dict = copy.deepcopy(self.supervision_dict) + + return control + + @property + def obs_configs(self): + return self._obs_configs diff --git a/roach/utils/config_utils.py b/roach/utils/config_utils.py new file mode 100644 index 0000000..12de43c --- /dev/null +++ b/roach/utils/config_utils.py @@ -0,0 +1,148 @@ +from importlib import import_module +import json +from pathlib import Path +import socket +import xml.etree.ElementTree as ET +import h5py +import carla +import numpy as np +import hydra + + +def check_h5_maps(env_configs, obs_configs, carla_sh_path): + pixels_per_meter = None + for agent_id, obs_cfg in obs_configs.items(): + for k,v in obs_cfg.items(): + if 'birdview' in v['module']: + pixels_per_meter = float(v['pixels_per_meter']) + + if pixels_per_meter is None: + # agent does not require birdview map as observation + return + + save_dir = Path(hydra.utils.get_original_cwd()) / 'carla_gym/core/obs_manager/birdview/maps' + txt_command = f'Please run map generation script from project root directory. \n' \ + f'\033[93m' \ + f'python -m carla_gym.utils.birdview_map ' \ + f'--save_dir {save_dir} --pixels_per_meter {pixels_per_meter:.2f} ' \ + f'--carla_sh_path {carla_sh_path}' \ + f'\033[0m' + + # check if pixels_per_meter match + for env_cfg in env_configs: + carla_map = env_cfg['env_configs']['carla_map'] + hf_file_path = save_dir / (carla_map+'.h5') + + file_exists = hf_file_path.exists() + if file_exists: + map_hf = h5py.File(hf_file_path, 'r') + hf_pixels_per_meter = float(map_hf.attrs['pixels_per_meter']) + map_hf.close() + pixels_per_meter_match = np.isclose(hf_pixels_per_meter, pixels_per_meter) + txt_assert = f'pixel_per_meter mismatch between h5 file ({hf_pixels_per_meter}) '\ + f'and obs_config ({pixels_per_meter}). ' + else: + txt_assert = f'{hf_file_path} does not exists. ' + pixels_per_meter_match = False + + assert file_exists and pixels_per_meter_match, txt_assert + txt_command + + +def load_entry_point(name): + mod_name, attr_name = name.split(":") + mod = import_module(mod_name) + fn = getattr(mod, attr_name) + return fn + + +def load_obs_configs(agent_configs_dict): + obs_configs = {} + for actor_id, cfg in agent_configs_dict.items(): + obs_configs[actor_id] = json.load(open(cfg['path_to_conf_file'], 'r'))['obs_configs'] + return obs_configs + + +def init_agents(agent_configs_dict, **kwargs): + agents_dict = {} + for actor_id, cfg in agent_configs_dict.items(): + AgentClass = load_entry_point(cfg['entry_point']) + agents_dict[actor_id] = AgentClass(cfg['path_to_conf_file'], **kwargs) + return agents_dict + + +def parse_routes_file(routes_xml_filename): + route_descriptions_dict = {} + tree = ET.parse(routes_xml_filename) + + for route in tree.iter("route"): + + route_id = int(route.attrib['id']) + + route_descriptions_dict[route_id] = {} + + for actor_type in ['ego_vehicle', 'scenario_actor']: + route_descriptions_dict[route_id][actor_type+'s'] = {} + for actor in route.iter(actor_type): + actor_id = actor.attrib['id'] + + waypoint_list = [] # the list of waypoints that can be found on this route for this actor + for waypoint in actor.iter('waypoint'): + location = carla.Location( + x=float(waypoint.attrib['x']), + y=float(waypoint.attrib['y']), + z=float(waypoint.attrib['z'])) + rotation = carla.Rotation( + roll=float(waypoint.attrib['roll']), + pitch=float(waypoint.attrib['pitch']), + yaw=float(waypoint.attrib['yaw'])) + waypoint_list.append(carla.Transform(location, rotation)) + + route_descriptions_dict[route_id][actor_type+'s'][actor_id] = waypoint_list + + return route_descriptions_dict + + +def get_single_route(routes_xml_filename, route_id): + tree = ET.parse(routes_xml_filename) + route = tree.find(f'.//route[@id="{route_id}"]') + + route_dict = {} + for actor_type in ['ego_vehicle', 'scenario_actor']: + route_dict[actor_type+'s'] = {} + for actor in route.iter(actor_type): + actor_id = actor.attrib['id'] + + waypoint_list = [] # the list of waypoints that can be found on this route for this actor + for waypoint in actor.iter('waypoint'): + location = carla.Location( + x=float(waypoint.attrib['x']), + y=float(waypoint.attrib['y']), + z=float(waypoint.attrib['z'])) + rotation = carla.Rotation( + roll=float(waypoint.attrib['roll']), + pitch=float(waypoint.attrib['pitch']), + yaw=float(waypoint.attrib['yaw'])) + waypoint_list.append(carla.Transform(location, rotation)) + + route_dict[actor_type+'s'][actor_id] = waypoint_list + return route_dict + + +def to_camel_case(snake_str, init_capital=False): + # agent_class_str = to_camel_case(agent_module_str.split('.')[-1], init_capital=True) + components = snake_str.split('_') + if init_capital: + init_letter = components[0].title() + else: + init_letter = components[0] + return init_letter + ''.join(x.title() for x in components[1:]) + + +def get_free_tcp_port(): + s = socket.socket() + s.bind(("", 0)) # Request the sys to provide a free port dynamically + server_port = s.getsockname()[1] + s.close() + # 2000 works fine for now + server_port = 2000 + return server_port diff --git a/roach/utils/expert_noiser.py b/roach/utils/expert_noiser.py new file mode 100644 index 0000000..d2dafb3 --- /dev/null +++ b/roach/utils/expert_noiser.py @@ -0,0 +1,167 @@ +import time +import random + + +class ExpertNoiser(object): + + # define frequency into noise events per minute + # define the amount_of_time of setting the noise + + def __init__(self, noise_type, frequency=15, intensity=10, min_noise_time_amount=2.0): + + # self.variable_type = variable + self.noise_type = noise_type + self.frequency = frequency + self.noise_being_set = False + self.noise_start_time = time.time() + self.noise_end_time = time.time() + 1 + self.min_noise_time_amount = min_noise_time_amount + self.noise_time_amount = min_noise_time_amount + float(random.randint(50, 200) / 100.0) + self.second_counter = time.time() + self.steer_noise_time = 0 + self.intensity = intensity + random.randint(-2, 2) + self.remove_noise = False + self.current_noise_mean = 0 + + def set_noise(self): + + if self.noise_type == 'Spike' or self.noise_type == 'Throttle': + + # flip between positive and negative + coin = random.randint(0, 1) + if coin == 0: # negative + self.current_noise_mean = 0.001 # -random.gauss(0.05,0.02) + else: # positive + self.current_noise_mean = -0.001 # random.gauss(0.05,0.02) + + def get_noise(self): + + if self.noise_type == 'Spike' or self.noise_type == 'Throttle': + if self.current_noise_mean > 0: + + return min(0.55, + self.current_noise_mean + ( + time.time() - self.noise_start_time) * 0.03 * self.intensity) + else: + + return max(-0.55, + self.current_noise_mean - ( + time.time() - self.noise_start_time) * 0.03 * self.intensity) + + def get_noise_removing(self): + # print 'REMOVING' + added_noise = (self.noise_end_time - self.noise_start_time) * 0.02 * self.intensity + # print added_noise + if self.noise_type == 'Spike' or self.noise_type == 'Throttle': + if self.current_noise_mean > 0: + added_noise = min(0.55, added_noise + self.current_noise_mean) + return added_noise - (time.time() - self.noise_end_time) * 0.03 * self.intensity + else: + added_noise = max(-0.55, self.current_noise_mean - added_noise) + return added_noise + (time.time() - self.noise_end_time) * 0.03 * self.intensity + + def is_time_for_noise(self, steer): + + # Count Seconds + second_passed = False + if time.time() - self.second_counter >= 1.0: + second_passed = True + self.second_counter = time.time() + + if time.time() - self.noise_start_time >= self.noise_time_amount and not self.remove_noise and self.noise_being_set: + self.noise_being_set = False + self.remove_noise = True + self.noise_end_time = time.time() + + if self.noise_being_set: + return True + + if self.remove_noise: + # print "TIME REMOVING ",(time.time()-self.noise_end_time) + if (time.time() - self.noise_end_time) > ( + self.noise_time_amount): # if half the amount put passed + self.remove_noise = False + self.noise_time_amount = self.min_noise_time_amount + float( + random.randint(50, 200) / 100.0) + return False + else: + return True + + if second_passed and not self.noise_being_set: + # Ok, if noise is not being set but a second passed... we may start puting more + + seed = random.randint(0, 60) + if seed < self.frequency: + if not self.noise_being_set: + self.noise_being_set = True + self.set_noise() + self.steer_noise_time = steer + self.noise_start_time = time.time() + return True + else: + return False + + else: + return False + + def set_noise_exist(self, noise_exist): + self.noise_being_set = noise_exist + + def compute_noise(self, action, speed): + + # noisy_action = action + if self.noise_type == 'None': + return action, False, False + + if self.noise_type == 'Spike': + + if self.is_time_for_noise(action.steer): + steer = action.steer + + if self.remove_noise: + steer_noisy = max( + min(steer + self.get_noise_removing() * (25 / (2.3 * speed + 5)), 1), -1) + + else: + steer_noisy = max(min(steer + self.get_noise() * (25 / (2.3 * speed + 5)), 1), + -1) + + noisy_action = action + + noisy_action.steer = steer_noisy + + return noisy_action, False, not self.remove_noise + + else: + return action, False, False + + if self.noise_type == 'Throttle': + if self.is_time_for_noise(action.throttle): + throttle_noisy = action.throttle + brake_noisy = action.brake + + if self.remove_noise: + # print(" Throttle noise removing", self.get_noise_removing()) + noise = self.get_noise_removing() + if noise > 0: + throttle_noisy = max(min(throttle_noisy + noise, 1), 0) + else: + brake_noisy = max(min(brake_noisy + -noise, 1), 0) + + else: + + # print(" Throttle noise ", self.get_noise()) + noise = self.get_noise() + if noise > 0: + throttle_noisy = max(min(throttle_noisy + noise, 1), 0) + else: + brake_noisy = max(min(brake_noisy + -noise, 1), 0) + + noisy_action = action + noisy_action.throttle = throttle_noisy + noisy_action.brake = brake_noisy + + # print 'timefornosie' + return noisy_action, False, not self.remove_noise + else: + return action, False, False diff --git a/roach/utils/rl_birdview_wrapper.py b/roach/utils/rl_birdview_wrapper.py new file mode 100644 index 0000000..2070eb3 --- /dev/null +++ b/roach/utils/rl_birdview_wrapper.py @@ -0,0 +1,142 @@ +import gym +import numpy as np +import cv2 +import carla + +eval_num_zombie_vehicles = { + 'Town01': 120, + 'Town02': 70, + 'Town03': 70, + 'Town04': 150, + 'Town05': 120, + 'Town06': 120 +} +eval_num_zombie_walkers = { + 'Town01': 120, + 'Town02': 70, + 'Town03': 70, + 'Town04': 80, + 'Town05': 120, + 'Town06': 80 +} + +class RlBirdviewWrapper(gym.Wrapper): + def __init__(self, env, input_states=[], acc_as_action=False): + assert len(env._obs_configs) == 1 + self._ev_id = list(env._obs_configs.keys())[0] + self._input_states = input_states + self._acc_as_action = acc_as_action + self._render_dict = {} + + state_spaces = [] + if 'speed' in self._input_states: + state_spaces.append(env.observation_space[self._ev_id]['speed']['speed_xy']) + if 'speed_limit' in self._input_states: + state_spaces.append(env.observation_space[self._ev_id]['control']['speed_limit']) + if 'control' in self._input_states: + state_spaces.append(env.observation_space[self._ev_id]['control']['throttle']) + state_spaces.append(env.observation_space[self._ev_id]['control']['steer']) + state_spaces.append(env.observation_space[self._ev_id]['control']['brake']) + state_spaces.append(env.observation_space[self._ev_id]['control']['gear']) + if 'acc_xy' in self._input_states: + state_spaces.append(env.observation_space[self._ev_id]['velocity']['acc_xy']) + if 'vel_xy' in self._input_states: + state_spaces.append(env.observation_space[self._ev_id]['velocity']['vel_xy']) + if 'vel_ang_z' in self._input_states: + state_spaces.append(env.observation_space[self._ev_id]['velocity']['vel_ang_z']) + + state_low = np.concatenate([s.low for s in state_spaces]) + state_high = np.concatenate([s.high for s in state_spaces]) + + env.observation_space = gym.spaces.Dict( + {'state': gym.spaces.Box(low=state_low, high=state_high, dtype=np.float32), + 'birdview': env.observation_space[self._ev_id]['birdview']['masks']}) + + if self._acc_as_action: + # act: acc(throttle/brake), steer + env.action_space = gym.spaces.Box(low=np.array([-1, -1]), high=np.array([1, 1]), dtype=np.float32) + else: + # act: throttle, steer, brake + env.action_space = gym.spaces.Box(low=np.array([0, -1, 0]), high=np.array([1, 1, 1]), dtype=np.float32) + + super(RlBirdviewWrapper, self).__init__(env) + + self.eval_mode = False + + def step(self, action): + action_ma = {self._ev_id: self.process_act(action, self._acc_as_action)} + + obs_ma, reward_ma, done_ma, info_ma = self.env.step(action_ma) + + obs = self.process_obs(obs_ma[self._ev_id], self._input_states) + reward = reward_ma[self._ev_id] + done = done_ma[self._ev_id] + info = info_ma[self._ev_id] + + self._render_dict = { + 'timestamp': self.env.timestamp, + 'obs': self._render_dict['prev_obs'], + 'prev_obs': obs, + 'im_render': self._render_dict['prev_im_render'], + 'prev_im_render': obs_ma[self._ev_id]['birdview']['rendered'], + 'action': action, + 'reward_debug': info['reward_debug'], + 'terminal_debug': info['terminal_debug'] + } + return obs, reward, done, info + + + @staticmethod + def process_obs(obs, input_states, train=True): + + state_list = [] + if 'speed' in input_states: + state_list.append(obs['speed']['speed_xy']) + if 'speed_limit' in input_states: + state_list.append(obs['control']['speed_limit']) + if 'control' in input_states: + state_list.append(obs['control']['throttle']) + state_list.append(obs['control']['steer']) + state_list.append(obs['control']['brake']) + state_list.append(obs['control']['gear']/5.0) + if 'acc_xy' in input_states: + state_list.append(obs['velocity']['acc_xy']) + if 'vel_xy' in input_states: + state_list.append(obs['velocity']['vel_xy']) + if 'vel_ang_z' in input_states: + state_list.append(obs['velocity']['vel_ang_z']) + + state = np.concatenate(state_list) + + birdview = obs['birdview']['masks'] + + if not train: + birdview = np.expand_dims(birdview, 0) + state = np.expand_dims(state, 0) + + obs_dict = { + 'state': state.astype(np.float32), + 'birdview': birdview + } + return obs_dict + + @staticmethod + def process_act(action, acc_as_action, train=True): + if not train: + action = action[0] + if acc_as_action: + acc, steer = action.astype(np.float64) + if acc >= 0.0: + throttle = acc + brake = 0.0 + else: + throttle = 0.0 + brake = np.abs(acc) + else: + throttle, steer, brake = action.astype(np.float64) + + throttle = np.clip(throttle, 0, 1) + steer = np.clip(steer, -1, 1) + brake = np.clip(brake, 0, 1) + control = carla.VehicleControl(throttle=throttle, steer=steer, brake=brake) + return control diff --git a/roach/utils/traffic_light.py b/roach/utils/traffic_light.py new file mode 100644 index 0000000..2625092 --- /dev/null +++ b/roach/utils/traffic_light.py @@ -0,0 +1,199 @@ +from collections import deque +import carla +import numpy as np +import roach.utils.transforms as trans_utils + + +def _get_traffic_light_waypoints(traffic_light, carla_map): + """ + get area of a given traffic light + adapted from "carla-simulator/scenario_runner/srunner/scenariomanager/scenarioatomics/atomic_criteria.py" + """ + base_transform = traffic_light.get_transform() + tv_loc = traffic_light.trigger_volume.location + tv_ext = traffic_light.trigger_volume.extent + + # Discretize the trigger box into points + x_values = np.arange(-0.9 * tv_ext.x, 0.9 * tv_ext.x, 1.0) # 0.9 to avoid crossing to adjacent lanes + area = [] + for x in x_values: + point_location = base_transform.transform(tv_loc + carla.Location(x=x)) + area.append(point_location) + + # Get the waypoints of these points, removing duplicates + ini_wps = [] + for pt in area: + wpx = carla_map.get_waypoint(pt) + # As x_values are arranged in order, only the last one has to be checked + if not ini_wps or ini_wps[-1].road_id != wpx.road_id or ini_wps[-1].lane_id != wpx.lane_id: + ini_wps.append(wpx) + + # Leaderboard: Advance them until the intersection + stopline_wps = [] + stopline_vertices = [] + junction_wps = [] + for wpx in ini_wps: + # Below: just use trigger volume, otherwise it's on the zebra lines. + # stopline_wps.append(wpx) + # vec_forward = wpx.transform.get_forward_vector() + # vec_right = carla.Vector3D(x=-vec_forward.y, y=vec_forward.x, z=0) + + # loc_left = wpx.transform.location - 0.4 * wpx.lane_width * vec_right + # loc_right = wpx.transform.location + 0.4 * wpx.lane_width * vec_right + # stopline_vertices.append([loc_left, loc_right]) + + while not wpx.is_intersection: + next_wp = wpx.next(0.5)[0] + if next_wp and not next_wp.is_intersection: + wpx = next_wp + else: + break + junction_wps.append(wpx) + + stopline_wps.append(wpx) + vec_forward = wpx.transform.get_forward_vector() + vec_right = carla.Vector3D(x=-vec_forward.y, y=vec_forward.x, z=0) + + loc_left = wpx.transform.location - 0.4 * wpx.lane_width * vec_right + loc_right = wpx.transform.location + 0.4 * wpx.lane_width * vec_right + stopline_vertices.append([loc_left, loc_right]) + + # all paths at junction for this traffic light + junction_paths = [] + path_wps = [] + wp_queue = deque(junction_wps) + while len(wp_queue) > 0: + current_wp = wp_queue.pop() + path_wps.append(current_wp) + next_wps = current_wp.next(1.0) + for next_wp in next_wps: + if next_wp.is_junction: + wp_queue.append(next_wp) + else: + junction_paths.append(path_wps) + path_wps = [] + + return carla.Location(base_transform.transform(tv_loc)), stopline_wps, stopline_vertices, junction_paths + + +class TrafficLightHandler: + num_tl = 0 + list_tl_actor = [] + list_tv_loc = [] + list_stopline_wps = [] + list_stopline_vtx = [] + list_junction_paths = [] + carla_map = None + + @staticmethod + def reset(world): + TrafficLightHandler.carla_map = world.get_map() + + TrafficLightHandler.num_tl = 0 + TrafficLightHandler.list_tl_actor = [] + TrafficLightHandler.list_tv_loc = [] + TrafficLightHandler.list_stopline_wps = [] + TrafficLightHandler.list_stopline_vtx = [] + TrafficLightHandler.list_junction_paths = [] + + all_actors = world.get_actors() + for _actor in all_actors: + if 'traffic_light' in _actor.type_id: + tv_loc, stopline_wps, stopline_vtx, junction_paths = _get_traffic_light_waypoints( + _actor, TrafficLightHandler.carla_map) + + TrafficLightHandler.list_tl_actor.append(_actor) + TrafficLightHandler.list_tv_loc.append(tv_loc) + TrafficLightHandler.list_stopline_wps.append(stopline_wps) + TrafficLightHandler.list_stopline_vtx.append(stopline_vtx) + TrafficLightHandler.list_junction_paths.append(junction_paths) + + TrafficLightHandler.num_tl += 1 + + @staticmethod + def get_light_state(vehicle, offset=0.0, dist_threshold=15.0): + ''' + vehicle: carla.Vehicle + ''' + vec_tra = vehicle.get_transform() + veh_dir = vec_tra.get_forward_vector() + + hit_loc = vec_tra.transform(carla.Location(x=offset)) + hit_wp = TrafficLightHandler.carla_map.get_waypoint(hit_loc) + + light_loc = None + light_state = None + light_id = None + for i in range(TrafficLightHandler.num_tl): + traffic_light = TrafficLightHandler.list_tl_actor[i] + tv_loc = 0.5*TrafficLightHandler.list_stopline_wps[i][0].transform.location \ + + 0.5*TrafficLightHandler.list_stopline_wps[i][-1].transform.location + + distance = np.sqrt((tv_loc.x-hit_loc.x)**2 + (tv_loc.y-hit_loc.y)**2) + if distance > dist_threshold: + continue + + for wp in TrafficLightHandler.list_stopline_wps[i]: + + wp_dir = wp.transform.get_forward_vector() + dot_ve_wp = veh_dir.x * wp_dir.x + veh_dir.y * wp_dir.y + veh_dir.z * wp_dir.z + + wp_1 = wp.previous(4.0)[0] + same_road = (hit_wp.road_id == wp.road_id) and (hit_wp.lane_id == wp.lane_id) + same_road_1 = (hit_wp.road_id == wp_1.road_id) and (hit_wp.lane_id == wp_1.lane_id) + + # if (wp.road_id != wp_1.road_id) or (wp.lane_id != wp_1.lane_id): + # print(f'Traffic Light Problem: {wp.road_id}={wp_1.road_id}, {wp.lane_id}={wp_1.lane_id}') + + if (same_road or same_road_1) and dot_ve_wp > 0: + # This light is red and is affecting our lane + loc_in_ev = trans_utils.loc_global_to_ref(wp.transform.location, vec_tra) + light_loc = np.array([loc_in_ev.x, loc_in_ev.y, loc_in_ev.z], dtype=np.float32) + light_state = traffic_light.state + light_id = traffic_light.id + break + + return light_state, light_loc, light_id + + @staticmethod + def get_junctoin_paths(veh_loc, color=0, dist_threshold=50.0): + if color == 0: + tl_state = carla.TrafficLightState.Green + elif color == 1: + tl_state = carla.TrafficLightState.Yellow + elif color == 2: + tl_state = carla.TrafficLightState.Red + + junctoin_paths = [] + for i in range(TrafficLightHandler.num_tl): + traffic_light = TrafficLightHandler.list_tl_actor[i] + tv_loc = TrafficLightHandler.list_tv_loc[i] + if tv_loc.distance(veh_loc) > dist_threshold: + continue + if traffic_light.state != tl_state: + continue + + junctoin_paths += TrafficLightHandler.list_junction_paths[i] + + return junctoin_paths + + @staticmethod + def get_stopline_vtx(veh_loc, color, dist_threshold=50.0): + if color == 0: + tl_state = carla.TrafficLightState.Green + elif color == 1: + tl_state = carla.TrafficLightState.Yellow + elif color == 2: + tl_state = carla.TrafficLightState.Red + + stopline_vtx = [] + for i in range(TrafficLightHandler.num_tl): + traffic_light = TrafficLightHandler.list_tl_actor[i] + tv_loc = TrafficLightHandler.list_tv_loc[i] + if tv_loc.distance(veh_loc) > dist_threshold: + continue + if traffic_light.state != tl_state: + continue + stopline_vtx += TrafficLightHandler.list_stopline_vtx[i] + + return stopline_vtx diff --git a/roach/utils/transforms.py b/roach/utils/transforms.py new file mode 100644 index 0000000..48fd80f --- /dev/null +++ b/roach/utils/transforms.py @@ -0,0 +1,128 @@ +import numpy as np +import carla + + +def loc_global_to_ref(target_loc_in_global, ref_trans_in_global): + """ + :param target_loc_in_global: carla.Location in global coordinate (world, actor) + :param ref_trans_in_global: carla.Transform in global coordinate (world, actor) + :return: carla.Location in ref coordinate + """ + x = target_loc_in_global.x - ref_trans_in_global.location.x + y = target_loc_in_global.y - ref_trans_in_global.location.y + z = target_loc_in_global.z - ref_trans_in_global.location.z + vec_in_global = carla.Vector3D(x=x, y=y, z=z) + vec_in_ref = vec_global_to_ref(vec_in_global, ref_trans_in_global.rotation) + + target_loc_in_ref = carla.Location(x=vec_in_ref.x, y=vec_in_ref.y, z=vec_in_ref.z) + return target_loc_in_ref + + +def vec_global_to_ref(target_vec_in_global, ref_rot_in_global): + """ + :param target_vec_in_global: carla.Vector3D in global coordinate (world, actor) + :param ref_rot_in_global: carla.Rotation in global coordinate (world, actor) + :return: carla.Vector3D in ref coordinate + """ + R = carla_rot_to_mat(ref_rot_in_global) + np_vec_in_global = np.array([[target_vec_in_global.x], + [target_vec_in_global.y], + [target_vec_in_global.z]]) + np_vec_in_ref = R.T.dot(np_vec_in_global) + target_vec_in_ref = carla.Vector3D(x=np_vec_in_ref[0, 0], y=np_vec_in_ref[1, 0], z=np_vec_in_ref[2, 0]) + return target_vec_in_ref + + +def rot_global_to_ref(target_rot_in_global, ref_rot_in_global): + target_roll_in_ref = cast_angle(target_rot_in_global.roll - ref_rot_in_global.roll) + target_pitch_in_ref = cast_angle(target_rot_in_global.pitch - ref_rot_in_global.pitch) + target_yaw_in_ref = cast_angle(target_rot_in_global.yaw - ref_rot_in_global.yaw) + + target_rot_in_ref = carla.Rotation(roll=target_roll_in_ref, pitch=target_pitch_in_ref, yaw=target_yaw_in_ref) + return target_rot_in_ref + +def rot_ref_to_global(target_rot_in_ref, ref_rot_in_global): + target_roll_in_global = cast_angle(target_rot_in_ref.roll + ref_rot_in_global.roll) + target_pitch_in_global = cast_angle(target_rot_in_ref.pitch + ref_rot_in_global.pitch) + target_yaw_in_global = cast_angle(target_rot_in_ref.yaw + ref_rot_in_global.yaw) + + target_rot_in_global = carla.Rotation(roll=target_roll_in_global, pitch=target_pitch_in_global, yaw=target_yaw_in_global) + return target_rot_in_global + + +def carla_rot_to_mat(carla_rotation): + """ + Transform rpy in carla.Rotation to rotation matrix in np.array + + :param carla_rotation: carla.Rotation + :return: np.array rotation matrix + """ + roll = np.deg2rad(carla_rotation.roll) + pitch = np.deg2rad(carla_rotation.pitch) + yaw = np.deg2rad(carla_rotation.yaw) + + yaw_matrix = np.array([ + [np.cos(yaw), -np.sin(yaw), 0], + [np.sin(yaw), np.cos(yaw), 0], + [0, 0, 1] + ]) + pitch_matrix = np.array([ + [np.cos(pitch), 0, -np.sin(pitch)], + [0, 1, 0], + [np.sin(pitch), 0, np.cos(pitch)] + ]) + roll_matrix = np.array([ + [1, 0, 0], + [0, np.cos(roll), np.sin(roll)], + [0, -np.sin(roll), np.cos(roll)] + ]) + + rotation_matrix = yaw_matrix.dot(pitch_matrix).dot(roll_matrix) + return rotation_matrix + +def get_loc_rot_vel_in_ev(actor_list, ev_transform, get_acceleration = False, origin = False): + location, rotation, absolute_velocity = [], [], [] + if get_acceleration: + absolute_acceleration = [] + if origin: + origin_velocity = [] + origin_acceleration = [] + for actor in actor_list: + # location + location_in_world = actor.get_transform().location + location_in_ev = loc_global_to_ref(location_in_world, ev_transform) + location.append([location_in_ev.x, location_in_ev.y, location_in_ev.z]) + # rotation + rotation_in_world = actor.get_transform().rotation + rotation_in_ev = rot_global_to_ref(rotation_in_world, ev_transform.rotation) + rotation.append([rotation_in_ev.roll, rotation_in_ev.pitch, rotation_in_ev.yaw]) + # velocity + vel_in_world = actor.get_velocity() + vel_in_ev = vec_global_to_ref(vel_in_world, ev_transform.rotation) + absolute_velocity.append([vel_in_ev.x, vel_in_ev.y, vel_in_ev.z]) + if get_acceleration: + # acceleration + acc_in_world = actor.get_acceleration() + acc_in_ev = vec_global_to_ref(acc_in_world, ev_transform.rotation) + absolute_acceleration.append([acc_in_ev.x, acc_in_ev.y, acc_in_ev.z]) + if origin: + origin_velocity.append([vel_in_world.x, vel_in_world.y, vel_in_world.z]) + origin_acceleration.append([acc_in_world.x, acc_in_world.y, acc_in_world.z]) + if get_acceleration: + if origin: + return location, rotation, absolute_velocity, absolute_acceleration, origin_velocity, origin_acceleration + return location, rotation, absolute_velocity, absolute_acceleration + return np.array(location), np.array(rotation), np.array(absolute_velocity) + +def get_loc_rot_in_global(actor_list): + location, rotation = [], [] + for actor in actor_list: + location_in_world = actor.get_transform().location + location.append([location_in_world.x, location_in_world.y, location_in_world.z]) + rotation_in_world = actor.get_transform().rotation + rotation.append([rotation_in_world.roll, rotation_in_world.pitch, rotation_in_world.yaw]) + return np.array(location), np.array(rotation) + +def cast_angle(x): + # cast angle to [-180, +180) + return (x+180.0)%360.0-180.0 \ No newline at end of file diff --git a/roach/utils/wandb_callback.py b/roach/utils/wandb_callback.py new file mode 100644 index 0000000..74694ad --- /dev/null +++ b/roach/utils/wandb_callback.py @@ -0,0 +1,211 @@ +import numpy as np +import time +from pathlib import Path +import wandb +from stable_baselines3.common.callbacks import BaseCallback +from gym.wrappers.monitoring.video_recorder import ImageEncoder +from omegaconf import OmegaConf + + +class WandbCallback(BaseCallback): + def __init__(self, cfg, vec_env): + super(WandbCallback, self).__init__(verbose=1) + + # save_dir = Path.cwd() + # self._save_dir = save_dir + self._video_path = Path('video') + self._video_path.mkdir(parents=True, exist_ok=True) + self._ckpt_dir = Path('ckpt') + self._ckpt_dir.mkdir(parents=True, exist_ok=True) + + # wandb.init(project=cfg.wb_project, dir=save_dir, name=cfg.wb_runname) + wandb.init(project=cfg.wb_project, name=cfg.wb_name, notes=cfg.wb_notes, tags=cfg.wb_tags) + wandb.config.update(OmegaConf.to_container(cfg)) + + wandb.save('./config_agent.yaml') + wandb.save('.hydra/*') + + self.vec_env = vec_env + + self._eval_step = int(1e5) + self._buffer_step = int(1e5) + + def _init_callback(self): + self.n_epoch = 0 + self._last_time_buffer = self.model.num_timesteps + self._last_time_eval = self.model.num_timesteps + + def _on_step(self) -> bool: + return True + + def _on_training_start(self) -> None: + pass + + def _on_rollout_start(self): + # self.model._last_obs = self.model.env.reset() + pass + + def _on_training_end(self) -> None: + print(f'n_epoch: {self.n_epoch}, num_timesteps: {self.model.num_timesteps}') + # save time + time_elapsed = time.time() - self.model.start_time + wandb.log({ + 'time/n_epoch': self.n_epoch, + 'time/sec_per_epoch': time_elapsed / (self.n_epoch+1), + 'time/fps': (self.model.num_timesteps-self.model.start_num_timesteps) / time_elapsed, + 'time/train': self.model.t_train, + 'time/train_values': self.model.t_train_values, + 'time/rollout': self.model.t_rollout + }, step=self.model.num_timesteps) + wandb.log(self.model.train_debug, step=self.model.num_timesteps) + + # evaluate and save checkpoint + if (self.model.num_timesteps - self._last_time_eval) >= self._eval_step: + self._last_time_eval = self.model.num_timesteps + # evaluate + eval_video_path = (self._video_path / f'eval_{self.model.num_timesteps}.mp4').as_posix() + avg_ep_stat, ep_events = self.evaluate_policy(self.vec_env, self.model.policy, eval_video_path) + # log to wandb + wandb.log({f'video/{self.model.num_timesteps}': wandb.Video(eval_video_path)}, + step=self.model.num_timesteps) + wandb.log(avg_ep_stat, step=self.model.num_timesteps) + # save events + # eval_json_path = (video_path / f'event_{self.model.num_timesteps}.json').as_posix() + # with open(eval_json_path, 'w') as fd: + # json.dump(ep_events, fd, indent=4, sort_keys=False) + # wandb.save(eval_json_path) + + ckpt_path = (self._ckpt_dir / f'ckpt_{self.model.num_timesteps}.pth').as_posix() + self.model.save(ckpt_path) + wandb.save(f'./{ckpt_path}') + self.n_epoch += 1 + + # CONFIGHACK: curriculum + # num_zombies = {} + # for i in range(self.vec_env.num_envs): + # env_all_tasks = self.vec_env.get_attr('all_tasks',indices=i)[0] + # num_zombies[f'train/n_veh/{i}'] = env_all_tasks[0]['num_zombie_vehicles'] + # num_zombies[f'train/n_ped/{i}'] = env_all_tasks[0]['num_zombie_walkers'] + # if wandb.config['curriculum']: + # if avg_ep_stat['eval/route_completed_in_km'] > 1.0: + # # and avg_ep_stat['eval/red_light']>0: + # for env_task in env_all_tasks: + # env_task['num_zombie_vehicles'] += 10 + # env_task['num_zombie_walkers'] += 10 + # self.vec_env.set_attr('all_tasks', env_all_tasks, indices=i) + + # wandb.log(num_zombies, step=self.model.num_timesteps) + + def _on_rollout_end(self): + wandb.log({'time/rollout': self.model.t_rollout}, step=self.model.num_timesteps) + + # save rollout statistics + avg_ep_stat = self.get_avg_ep_stat(self.model.ep_stat_buffer, prefix='rollout/') + wandb.log(avg_ep_stat, step=self.model.num_timesteps) + + # action, mu, sigma histogram + action_statistics = np.array(self.model.action_statistics) + mu_statistics = np.array(self.model.mu_statistics) + sigma_statistics = np.array(self.model.sigma_statistics) + n_action = action_statistics.shape[-1] + action_statistics = action_statistics.reshape(-1, n_action) + mu_statistics = mu_statistics.reshape(-1, n_action) + sigma_statistics = sigma_statistics.reshape(-1, n_action) + + for i in range(n_action): + # path_str = (self._save_dir/f'action{i}.csv').as_posix() + # np.savetxt(path_str, action_statistics[:, i], delimiter=',') + # wandb.save(path_str) + wandb.log({f'action[{i}]': wandb.Histogram(action_statistics[:, i])}, step=self.model.num_timesteps) + wandb.log({f'alpha[{i}]': wandb.Histogram(mu_statistics[:, i])}, step=self.model.num_timesteps) + wandb.log({f'beta[{i}]': wandb.Histogram(sigma_statistics[:, i])}, step=self.model.num_timesteps) + + # render buffer + if (self.model.num_timesteps - self._last_time_buffer) >= self._buffer_step: + self._last_time_buffer = self.model.num_timesteps + buffer_video_path = (self._video_path / f'buffer_{self.model.num_timesteps}.mp4').as_posix() + + list_buffer_im = self.model.buffer.render() + encoder = ImageEncoder(buffer_video_path, list_buffer_im[0].shape, 30, 30) + for im in list_buffer_im: + encoder.capture_frame(im) + encoder.close() + encoder = None + + wandb.log({f'buffer/{self.model.num_timesteps}': wandb.Video(buffer_video_path)}, + step=self.model.num_timesteps) + + @staticmethod + def evaluate_policy(env, policy, video_path, min_eval_steps=3000): + policy = policy.eval() + t0 = time.time() + for i in range(env.num_envs): + env.set_attr('eval_mode', True, indices=i) + obs = env.reset() + + list_render = [] + ep_stat_buffer = [] + ep_events = {} + for i in range(env.num_envs): + ep_events[f'venv_{i}'] = [] + + n_step = 0 + n_timeout = 0 + env_done = np.array([False]*env.num_envs) + # while n_step < min_eval_steps: + while n_step < min_eval_steps or not np.all(env_done): + actions, values, log_probs, mu, sigma, _ = policy.forward(obs, deterministic=True, clip_action=True) + obs, reward, done, info = env.step(actions) + + for i in range(env.num_envs): + env.set_attr('action_value', values[i], indices=i) + env.set_attr('action_log_probs', log_probs[i], indices=i) + env.set_attr('action_mu', mu[i], indices=i) + env.set_attr('action_sigma', sigma[i], indices=i) + + list_render.append(env.render(mode='rgb_array')) + + n_step += 1 + env_done |= done + + for i in np.where(done)[0]: + ep_stat_buffer.append(info[i]['episode_stat']) + ep_events[f'venv_{i}'].append(info[i]['episode_event']) + n_timeout += int(info[i]['timeout']) + + # conda install x264=='1!152.20180717' ffmpeg=4.0.2 -c conda-forge + encoder = ImageEncoder(video_path, list_render[0].shape, 30, 30) + for im in list_render: + encoder.capture_frame(im) + encoder.close() + + avg_ep_stat = WandbCallback.get_avg_ep_stat(ep_stat_buffer, prefix='eval/') + avg_ep_stat['eval/eval_timeout'] = n_timeout + + duration = time.time() - t0 + avg_ep_stat['time/t_eval'] = duration + avg_ep_stat['time/fps_eval'] = n_step * env.num_envs / duration + + for i in range(env.num_envs): + env.set_attr('eval_mode', False, indices=i) + obs = env.reset() + return avg_ep_stat, ep_events + + @staticmethod + def get_avg_ep_stat(ep_stat_buffer, prefix=''): + avg_ep_stat = {} + if len(ep_stat_buffer) > 0: + for ep_info in ep_stat_buffer: + for k, v in ep_info.items(): + k_avg = f'{prefix}{k}' + if k_avg in avg_ep_stat: + avg_ep_stat[k_avg] += v + else: + avg_ep_stat[k_avg] = v + + n_episodes = float(len(ep_stat_buffer)) + for k in avg_ep_stat.keys(): + avg_ep_stat[k] /= n_episodes + avg_ep_stat[f'{prefix}n_episodes'] = n_episodes + + return avg_ep_stat