From 8fd338780e5a1c066aa1d12c4d9913ee2af6c373 Mon Sep 17 00:00:00 2001
From: nkumar-bdaii <nkumar@theaiinstitute.com>
Date: Thu, 16 May 2024 18:10:55 -0400
Subject: [PATCH] should be good to go!

---
 .../spot_utils/perception/object_detection.py | 122 +-----------------
 1 file changed, 4 insertions(+), 118 deletions(-)

diff --git a/predicators/spot_utils/perception/object_detection.py b/predicators/spot_utils/perception/object_detection.py
index 6757e8f324..ee315c70bf 100644
--- a/predicators/spot_utils/perception/object_detection.py
+++ b/predicators/spot_utils/perception/object_detection.py
@@ -686,10 +686,9 @@ def display_camera_detections(artifacts: Dict[str, Any],
         "right_fisheye_image",
         "frontright_fisheye_image",
     ]
-    TEST_APRIL_TAG_ID = 408
     TEST_LANGUAGE_DESCRIPTIONS = [
-        "small basketball toy/stuffed toy basketball/small orange ball",
-        "small football toy/stuffed toy football/small brown ball",
+        "potted plant",
+        "green apple/tennis ball",
     ]
 
     def _run_manual_test() -> None:
@@ -720,18 +719,10 @@ def _run_manual_test() -> None:
         localizer = SpotLocalizer(robot, path, lease_client, lease_keepalive)
         rgbds = capture_images(robot, localizer, TEST_CAMERAS)
 
-        # Detect the april tag and brush.
-        april_tag_id: ObjectDetectionID = AprilTagObjectDetectionID(
-            TEST_APRIL_TAG_ID)
         language_ids: List[ObjectDetectionID] = [
             LanguageObjectDetectionID(d) for d in TEST_LANGUAGE_DESCRIPTIONS
         ]
-        known_static_id: ObjectDetectionID = KnownStaticObjectDetectionID(
-            "imaginary_box",
-            math_helpers.SE3Pose(-5, 0, 0, rot=math_helpers.Quat()))
-        object_ids: List[ObjectDetectionID] = [april_tag_id, known_static_id
-                                               ] + language_ids
-        detections, artifacts = detect_objects(object_ids, rgbds)
+        detections, artifacts = detect_objects(language_ids, rgbds)
         for obj_id, detection in detections.items():
             print(f"Detected {obj_id} at {detection}")
 
@@ -741,109 +732,4 @@ def _run_manual_test() -> None:
         visualize_all_artifacts(artifacts, detections_outfile,
                                 no_detections_outfile)
 
-    def _run_pythonic_bowl_test() -> None:
-        # Test for using an arbitrary python function to detect objects,
-        # which in this case uses a combination of vision-language and
-        # colored-based detection to find a bowl that has blue tape on the
-        # bottom. The tape is used to crudely orient the bowl. Like the
-        # previous test, this one assumes that the bowl is within view.
-        # Put inside a function to avoid variable scoping issues.
-        args = utils.parse_args(env_required=False,
-                                seed_required=False,
-                                approach_required=False)
-        utils.update_config(args)
-
-        # Get constants.
-        hostname = CFG.spot_robot_ip
-        path = get_graph_nav_dir()
-
-        # First, capture images.
-        sdk = create_standard_sdk('SpotCameraTestClient')
-        robot = sdk.create_robot(hostname)
-        authenticate(robot)
-        verify_estop(robot)
-        lease_client = robot.ensure_client(LeaseClient.default_service_name)
-        lease_client.take()
-        lease_client = robot.ensure_client(LeaseClient.default_service_name)
-        lease_client.take()
-        lease_keepalive = LeaseKeepAlive(lease_client,
-                                         must_acquire=True,
-                                         return_at_exit=True)
-
-        assert path.exists()
-        localizer = SpotLocalizer(robot, path, lease_client, lease_keepalive)
-        rgbds = capture_images(robot, localizer)
-
-        def _detect_bowl(
-            rgbds: Dict[str, RGBDImageWithContext]
-        ) -> Optional[math_helpers.SE3Pose]:
-            # ONLY use the hand camera (which we assume is looking down)
-            # because otherwise it's impossible to see the top/bottom.
-            hand_camera = "hand_color_image"
-            assert hand_camera in rgbds
-            rgbds = {hand_camera: rgbds[hand_camera]}
-            # Start by using vision-language.
-            language_id = LanguageObjectDetectionID("large cup")
-            detections, artifacts = detect_objects([language_id], rgbds)
-            if not detections:
-                return None
-            # Crop using the bounding box. If there were multiple detections,
-            # choose the highest scoring one.
-            obj_id_to_img_detections = artifacts["language"][
-                "object_id_to_img_detections"]
-            img_detections = obj_id_to_img_detections[language_id]
-            assert len(img_detections) > 0
-            best_seg_bb: Optional[SegmentedBoundingBox] = None
-            best_seg_bb_score = -np.inf
-            best_camera: Optional[str] = None
-            for camera, seg_bb in img_detections.items():
-                if seg_bb.score > best_seg_bb_score:
-                    best_seg_bb_score = seg_bb.score
-                    best_seg_bb = seg_bb
-                    best_camera = camera
-            assert best_camera is not None
-            assert best_seg_bb is not None
-            x1, y1, x2, y2 = best_seg_bb.bounding_box
-            x_min, x_max = min(x1, x2), max(x1, x2)
-            y_min, y_max = min(y1, y2), max(y1, y2)
-            best_rgb = rgbds[best_camera].rgb
-            height, width = best_rgb.shape[:2]
-            r_min = min(max(int(y_min), 0), height)
-            r_max = min(max(int(y_max), 0), height)
-            c_min = min(max(int(x_min), 0), width)
-            c_max = min(max(int(x_max), 0), width)
-            cropped_img = best_rgb[r_min:r_max, c_min:c_max]
-            # Look for the blue tape inside the bounding box.
-            lo, hi = ((0, 130, 130), (130, 255, 255))
-            centroid = find_color_based_centroid(cropped_img, lo, hi)
-            blue_tape_found = (centroid is not None)
-            # If the blue tape was found, assume that the bowl is oriented
-            # upside-down; otherwise, it's right-side up.
-            if blue_tape_found:
-                roll = np.pi
-                print("Detected blue tape; bowl is upside-down!")
-            else:
-                roll = 0.0
-                print("Did NOT detect blue tape; bowl is right side-up!")
-            rot = math_helpers.Quat.from_roll(roll)
-            # Use the x, y, z from vision-language.
-            vision_language_pose = detections[language_id]
-            pose = math_helpers.SE3Pose(x=vision_language_pose.x,
-                                        y=vision_language_pose.y,
-                                        z=vision_language_pose.z,
-                                        rot=rot)
-            return pose
-
-        bowl_id = PythonicObjectDetectionID("bowl", _detect_bowl)
-        detections, artifacts = detect_objects([bowl_id], rgbds)
-        for obj_id, detection in detections.items():
-            print(f"Detected {obj_id} at {detection}")
-
-        # Visualize the artifacts.
-        detections_outfile = Path(".") / "object_detection_artifacts.png"
-        no_detections_outfile = Path(".") / "no_detection_artifacts.png"
-        visualize_all_artifacts(artifacts, detections_outfile,
-                                no_detections_outfile)
-
-    _run_manual_test()
-    _run_pythonic_bowl_test()
+    _run_manual_test()
\ No newline at end of file