diff --git a/image_0.png b/image_0.png index 7fd439e1d..874f8f32f 100644 Binary files a/image_0.png and b/image_0.png differ diff --git a/image_1.png b/image_1.png index bdabc7479..303729689 100644 Binary files a/image_1.png and b/image_1.png differ diff --git a/image_2.png b/image_2.png index ad118a751..7d386f85f 100644 Binary files a/image_2.png and b/image_2.png differ diff --git a/image_3.png b/image_3.png index 2d6dd6a89..07385ca6d 100644 Binary files a/image_3.png and b/image_3.png differ diff --git a/image_4.png b/image_4.png index b3e3b8b76..40b36bfea 100644 Binary files a/image_4.png and b/image_4.png differ diff --git a/image_5.png b/image_5.png index 9b227df31..de471a98a 100644 Binary files a/image_5.png and b/image_5.png differ diff --git a/predicators/perception/spot_perceiver.py b/predicators/perception/spot_perceiver.py index db98b07ac..1b498e4ab 100644 --- a/predicators/perception/spot_perceiver.py +++ b/predicators/perception/spot_perceiver.py @@ -4,11 +4,13 @@ import time from pathlib import Path from typing import Dict, List, Optional, Set, Collection +import json import imageio.v2 as iio import numpy as np from bosdyn.client import math_helpers from matplotlib import pyplot as plt +import PIL.Image from predicators import utils from predicators.envs import BaseEnv, get_or_create_env @@ -569,8 +571,7 @@ def _create_goal(self, state: State, GroundAtom(Inside, [cup, plastic_bin]), } object_name_to_object = {} - self._parse_vlm_goal_from_state(state, goal_description, object_name_to_object) - import ipdb; ipdb.set_trace() + return self._parse_vlm_goal_from_state(state, goal_description, object_name_to_object) if goal_description == "know container as empty": cup = Object("cup", _container_type) ContainingFoodKnown = pred_name_to_pred["ContainingFoodKnown"] @@ -722,9 +723,8 @@ def _parse_vlm_goal_from_state( object_names = set(id_to_obj) prompt_prefix = self._get_language_goal_prompt_prefix(object_names) prompt = prompt_prefix + f"\n# {language_goal}" - import ipdb; ipdb.set_trace() image_list = [ - PIL.Image.fromarray(v.rotated_rgb) for _, v in rgbds.items() + PIL.Image.fromarray(v.rotated_rgb) for _, v in state.camera_images.items() ] responses = vlm.sample_completions( prompt=prompt, @@ -734,10 +734,13 @@ def _parse_vlm_goal_from_state( num_completions=1, ) response = responses[0] - import ipdb; ipdb.set_trace() # Currently assumes that the LLM is perfect. In the future, will need # to handle various errors and perhaps query the LLM for multiple # responses until we find one that can be parsed. - goal_spec = json.loads(response) + try: + goal_spec = json.loads(response) + except json.JSONDecodeError as e: + goal_spec = json.loads(response.replace('`', '').replace('json', '')) + return self._curr_env._parse_goal_from_json(goal_spec, id_to_obj) diff --git a/predicators/spot_utils/perception/object_perception.py b/predicators/spot_utils/perception/object_perception.py index fc9f4e1fe..448aa03b5 100644 --- a/predicators/spot_utils/perception/object_perception.py +++ b/predicators/spot_utils/perception/object_perception.py @@ -216,7 +216,6 @@ def query_vlm(full_prompt, image_list): votes = [result[i] for result in results] final_results.append(votes.count(True) > votes.count(False)) - import ipdb; ipdb.set_trace() return final_results