bdaiinstitute · nkumar-bdai · Aug 17, 2023 · Aug 16, 2023 · Aug 16, 2023 · Aug 16, 2023
diff --git a/.gitignore b/.gitignore
@@ -27,6 +27,7 @@ tests/_fake_results
 predicators/envs/assets/task_jsons/spot_bike_env/last.json
 spot_perception_outputs
 spot_perception_debug_dir/
+sas_plan
 
 # Jetbrains IDEs
 .idea/
diff --git a/mypy.ini b/mypy.ini
@@ -112,3 +112,9 @@ ignore_missing_imports = True
 
 [mypy-playsound.*]
 ignore_missing_imports = True
+
+[mypy-gymnasium.*]
+ignore_missing_imports = True
+
+[mypy-gymnasium_robotics.*]
+ignore_missing_imports = True
diff --git a/predicators/approaches/active_sampler_learning_approach.py b/predicators/approaches/active_sampler_learning_approach.py
@@ -12,7 +12,9 @@
 
 import abc
 import logging
-from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple
+from collections import defaultdict
+from typing import Any, Callable, DefaultDict, Dict, List, Optional, \
+    Sequence, Set, Tuple
 
 import dill as pkl
 import numpy as np
@@ -21,13 +23,14 @@
 from predicators import utils
 from predicators.approaches.online_nsrt_learning_approach import \
     OnlineNSRTLearningApproach
+from predicators.competence_models import SkillCompetenceModel
 from predicators.explorers import BaseExplorer, create_explorer
 from predicators.ml_models import BinaryClassifier, BinaryClassifierEnsemble, \
     KNeighborsClassifier, MLPBinaryClassifier, MLPRegressor
 from predicators.settings import CFG
 from predicators.structs import NSRT, Array, GroundAtom, LowLevelTrajectory, \
-    NSRTSampler, Object, ParameterizedOption, Predicate, Segment, State, \
-    Task, Type, _GroundNSRT, _GroundSTRIPSOperator, _Option
+    Metrics, NSRTSampler, Object, ParameterizedOption, Predicate, Segment, \
+    State, Task, Type, _GroundNSRT, _GroundSTRIPSOperator, _Option
 
 # Dataset for sampler learning: includes (s, option, s', label) per param opt.
 _OptionSamplerDataset = List[Tuple[State, _Option, State, Any]]
@@ -54,6 +57,8 @@ def __init__(self, initial_predicates: Set[Predicate],
         # successfully reached their effects or not). Updated in-place by the
         # explorer when CFG.explorer is active_sampler_explorer.
         self._ground_op_hist: Dict[_GroundSTRIPSOperator, List[bool]] = {}
+        self._competence_models: Dict[_GroundSTRIPSOperator,
+                                      SkillCompetenceModel] = {}
         self._last_seen_segment_traj_idx = -1
 
         # For certain methods, we may want the NSRTs used for exploration to
@@ -62,10 +67,33 @@ def __init__(self, initial_predicates: Set[Predicate],
         # NSRTs to samplers to be used at exploration time.
         self._nsrt_to_explorer_sampler: Dict[NSRT, NSRTSampler] = {}
 
+        # Record what train tasks have been seen during exploration so far.
+        self._seen_train_task_idxs: Set[int] = set()
+
+        self._default_cost = -np.log(utils.beta_bernoulli_posterior([]).mean())
+
     @classmethod
     def get_name(cls) -> str:
         return "active_sampler_learning"
 
+    def _run_task_plan(
+        self, task: Task, nsrts: Set[NSRT], preds: Set[Predicate],
+        timeout: float, seed: int, **kwargs: Any
+    ) -> Tuple[List[_GroundNSRT], List[Set[GroundAtom]], Metrics]:
+        # Add ground operator competence for competence-aware planning.
+        ground_op_costs = {
+            o: -np.log(m.get_current_competence())
+            for o, m in self._competence_models.items()
+        }
+        return super()._run_task_plan(task,
+                                      nsrts,
+                                      preds,
+                                      timeout,
+                                      seed,
+                                      ground_op_costs=ground_op_costs,
+                                      default_cost=self._default_cost,
+                                      **kwargs)
+
     def _create_explorer(self) -> BaseExplorer:
         # Geometrically increase the length of exploration.
         b = CFG.active_sampler_learning_explore_length_base
@@ -81,8 +109,10 @@ def _create_explorer(self) -> BaseExplorer:
             self._get_current_nsrts(),
             self._option_model,
             ground_op_hist=self._ground_op_hist,
+            competence_models=self._competence_models,
             max_steps_before_termination=max_steps,
-            nsrt_to_explorer_sampler=self._nsrt_to_explorer_sampler)
+            nsrt_to_explorer_sampler=self._nsrt_to_explorer_sampler,
+            seen_train_task_idxs=self._seen_train_task_idxs)
         return explorer
 
     def load(self, online_learning_cycle: Optional[int]) -> None:
@@ -92,9 +122,11 @@ def load(self, online_learning_cycle: Optional[int]) -> None:
             save_dict = pkl.load(f)
         self._sampler_data = save_dict["sampler_data"]
         self._ground_op_hist = save_dict["ground_op_hist"]
+        self._competence_models = save_dict["competence_models"]
         self._last_seen_segment_traj_idx = save_dict[
             "last_seen_segment_traj_idx"]
         self._nsrt_to_explorer_sampler = save_dict["nsrt_to_explorer_sampler"]
+        self._seen_train_task_idxs = save_dict["seen_train_task_idxs"]
         self._online_learning_cycle = CFG.skip_until_cycle + 1
 
     def _learn_nsrts(self, trajectories: List[LowLevelTrajectory],
@@ -111,6 +143,9 @@ def _learn_nsrts(self, trajectories: List[LowLevelTrajectory],
         self._update_sampler_data()
         # Re-learn samplers. Updates the NSRTs.
         self._learn_wrapped_samplers(online_learning_cycle)
+        # Advance the competence models.
+        for competence_model in self._competence_models.values():
+            competence_model.advance_cycle()
         # Save the things we need other than the NSRTs, which were already
         # saved in the above call to self._learn_nsrts()
         save_path = utils.get_approach_save_path_str()
@@ -119,14 +154,18 @@ def _learn_nsrts(self, trajectories: List[LowLevelTrajectory],
                 {
                     "sampler_data": self._sampler_data,
                     "ground_op_hist": self._ground_op_hist,
+                    "competence_models": self._competence_models,
                     "last_seen_segment_traj_idx":
                     self._last_seen_segment_traj_idx,
                     "nsrt_to_explorer_sampler": self._nsrt_to_explorer_sampler,
+                    "seen_train_task_idxs": self._seen_train_task_idxs,
                 }, f)
 
     def _update_sampler_data(self) -> None:
         start_idx = self._last_seen_segment_traj_idx + 1
         new_trajs = self._segmented_trajs[start_idx:]
+        ground_op_to_num_data: DefaultDict[_GroundSTRIPSOperator,
+                                           int] = defaultdict(int)
         for segmented_traj in new_trajs:
             self._last_seen_segment_traj_idx += 1
             just_made_incorrect_pick = False
@@ -170,6 +209,18 @@ def _update_sampler_data(self) -> None:
                 if o.parent not in self._sampler_data:
                     self._sampler_data[o.parent] = []
                 self._sampler_data[o.parent].append((s, o, ns, label))
+                ground_nsrt = utils.option_to_ground_nsrt(o, self._nsrts)
+                ground_op_to_num_data[ground_nsrt.op] += 1
+        # Save competence models.
+        for ground_op, model in self._competence_models.items():
+            approach_save_path = utils.get_approach_save_path_str()
+            save_path = "_".join([
+                approach_save_path, f"{ground_op.name}{ground_op.objects}",
+                f"{self._online_learning_cycle}.competence"
+            ])
+            with open(save_path, "wb") as f:
+                pkl.dump(model, f)
+            logging.info(f"Saved competence model to {save_path}.")
 
     def _check_option_success(self, option: _Option, segment: Segment) -> bool:
         ground_nsrt = utils.option_to_ground_nsrt(option, self._nsrts)
@@ -321,9 +372,14 @@ def _learn_nsrt_sampler(self, nsrt_data: _OptionSamplerDataset,
         # Easiest way to access the base sampler.
         base_sampler = nsrt._sampler  # pylint: disable=protected-access
         score_fn = _classifier_to_score_fn(classifier, nsrt)
-        wrapped_sampler = _wrap_sampler(base_sampler, score_fn)
-
-        return (wrapped_sampler, wrapped_sampler)
+        wrapped_sampler_test = _wrap_sampler(base_sampler,
+                                             score_fn,
+                                             strategy="greedy")
+        wrapped_sampler_exploration = _wrap_sampler(
+            base_sampler,
+            score_fn,
+            strategy=CFG.active_sampler_learning_exploration_sample_strategy)
+        return (wrapped_sampler_test, wrapped_sampler_exploration)
 
 
 class _ClassifierEnsembleWrappedSamplerLearner(_WrappedSamplerLearner):
@@ -375,13 +431,18 @@ def _learn_nsrt_sampler(self, nsrt_data: _OptionSamplerDataset,
         test_score_fn = _classifier_ensemble_to_score_fn(classifier,
                                                          nsrt,
                                                          test_time=True)
-        test_wrapped_sampler = _wrap_sampler(base_sampler, test_score_fn)
+        wrapped_sampler_test = _wrap_sampler(base_sampler,
+                                             test_score_fn,
+                                             strategy="greedy")
         explore_score_fn = _classifier_ensemble_to_score_fn(classifier,
                                                             nsrt,
                                                             test_time=False)
-        explore_wrapped_sampler = _wrap_sampler(base_sampler, explore_score_fn)
+        wrapped_sampler_exploration = _wrap_sampler(
+            base_sampler,
+            explore_score_fn,
+            strategy=CFG.active_sampler_learning_exploration_sample_strategy)
 
-        return (test_wrapped_sampler, explore_wrapped_sampler)
+        return (wrapped_sampler_test, wrapped_sampler_exploration)
 
 
 class _FittedQWrappedSamplerLearner(_WrappedSamplerLearner):
@@ -433,8 +494,14 @@ def _learn_nsrt_sampler(self, nsrt_data: _OptionSamplerDataset,
         score_fn = _regressor_to_score_fn(regressor, nsrt)
         # Save the score function for use in later target computation.
         self._next_nsrt_score_fns[nsrt] = score_fn
-        wrapped_sampler = _wrap_sampler(base_sampler, score_fn)
-        return (wrapped_sampler, wrapped_sampler)
+        wrapped_sampler_test = _wrap_sampler(base_sampler,
+                                             score_fn,
+                                             strategy="greedy")
+        wrapped_sampler_exploration = _wrap_sampler(
+            base_sampler,
+            score_fn,
+            strategy=CFG.active_sampler_learning_exploration_sample_strategy)
+        return (wrapped_sampler_test, wrapped_sampler_exploration)
 
     def _predict(self, state: State, option: _Option) -> float:
         """Predict Q(s, a)."""
@@ -443,7 +510,7 @@ def _predict(self, state: State, option: _Option) -> float:
         ground_nsrt = utils.option_to_ground_nsrt(option, self._nsrts)
         # Special case: we haven't seen any data for the parent NSRT, so we
         # haven't learned a score function for it.
-        if ground_nsrt.parent not in self._nsrt_score_fns:
+        if ground_nsrt.parent not in self._nsrt_score_fns:  # pragma: no cover
             return 0.0
         score_fn = self._nsrt_score_fns[ground_nsrt.parent]
         return score_fn(state, ground_nsrt.objects, [option.params])[0]
@@ -503,10 +570,8 @@ def _fit_regressor(self, nsrt_data: _OptionSamplerDataset) -> MLPRegressor:
 
 
 # Helper functions.
-def _wrap_sampler(
-    base_sampler: NSRTSampler,
-    score_fn: _ScoreFn,
-) -> NSRTSampler:
+def _wrap_sampler(base_sampler: NSRTSampler, score_fn: _ScoreFn,
+                  strategy: str) -> NSRTSampler:
     """Create a wrapped sampler that uses a score function to select among
     candidates from a base sampler."""
 
@@ -517,8 +582,16 @@ def _sample(state: State, goal: Set[GroundAtom], rng: np.random.Generator,
             for _ in range(CFG.active_sampler_learning_num_samples)
         ]
         scores = score_fn(state, objects, samples)
-        # For now, just pick the best scoring sample.
-        idx = np.argmax(scores)
+        if strategy in ["greedy", "epsilon_greedy"]:
+            idx = int(np.argmax(scores))
+            if strategy == "epsilon_greedy" and rng.uniform(
+            ) <= CFG.active_sampler_learning_exploration_epsilon:
+                # Randomly select a sample to pick, following the epsilon
+                # greedy strategy!
+                idx = rng.integers(0, len(scores))
+        else:
+            raise NotImplementedError('Exploration strategy ' +
+                                      f'{strategy} ' + 'is not implemented.')
         return samples[idx]
 
     return _sample

diff --git a/predicators/approaches/bridge_policy_approach.py b/predicators/approaches/bridge_policy_approach.py
@@ -272,7 +272,7 @@ def learn_from_interaction_results(
             traj = response.teacher_traj
             assert traj is not None
             atom_traj = [utils.abstract(s, preds) for s in traj.states]
-            segmented_traj = segment_trajectory((traj, atom_traj))
+            segmented_traj = segment_trajectory(traj, preds, atom_traj)
             if not segmented_traj:
                 assert len(atom_traj) == 1
                 states = [traj.states[0]]

diff --git a/predicators/approaches/gnn_option_policy_approach.py b/predicators/approaches/gnn_option_policy_approach.py
@@ -43,15 +43,14 @@ def _generate_data_from_dataset(
         self, dataset: Dataset
     ) -> List[Tuple[State, Set[GroundAtom], Set[GroundAtom], _Option]]:
         data = []
-        ground_atom_dataset = utils.create_ground_atom_dataset(
-            dataset.trajectories, self._initial_predicates)
         # In this approach, we never learned any NSRTs, so we just call
         # segment_trajectory() to segment the given dataset.
         segmented_trajs = [
-            segment_trajectory(traj) for traj in ground_atom_dataset
+            segment_trajectory(traj, self._initial_predicates)
+            for traj in dataset.trajectories
         ]
-        for segment_traj, (ll_traj, _) in zip(segmented_trajs,
-                                              ground_atom_dataset):
+        for segment_traj, ll_traj in zip(segmented_trajs,
+                                         dataset.trajectories):
             if not ll_traj.is_demo:
                 continue
             goal = self._train_tasks[ll_traj.train_task_idx].goal

diff --git a/predicators/approaches/grammar_search_invention_approach.py b/predicators/approaches/grammar_search_invention_approach.py
@@ -21,8 +21,8 @@
 from predicators.predicate_search_score_functions import \
     _PredicateSearchScoreFunction, create_score_function
 from predicators.settings import CFG
-from predicators.structs import Dataset, GroundAtom, GroundAtomTrajectory, \
-    Object, ParameterizedOption, Predicate, Segment, State, Task, Type
+from predicators.structs import Dataset, GroundAtomTrajectory, Object, \
+    ParameterizedOption, Predicate, Segment, State, Task, Type
 
 ################################################################################
 #                          Programmatic classifiers                            #
@@ -614,10 +614,8 @@ def __post_init__(self) -> None:
             # states in each segment, which we store into
             # self._state_sequence.
             for traj in self.dataset.trajectories:
-                dummy_atoms_seq: List[Set[GroundAtom]] = [
-                    set() for _ in range(len(traj.states))
-                ]
-                seg_traj = segment_trajectory((traj, dummy_atoms_seq))
+                # The init_atoms and final_atoms are not used.
+                seg_traj = segment_trajectory(traj, predicates=set())
                 state_seq = utils.segment_trajectory_to_state_sequence(
                     seg_traj)
                 self._state_sequences.append(state_seq)
@@ -861,10 +859,11 @@ def _get_successors(
         # preconditions.
         logging.info("\nFiltering out predicates that don't appear in "
                      "preconditions...")
-        pruned_atom_data = utils.prune_ground_atom_dataset(
-            atom_dataset, kept_predicates | initial_predicates)
+        preds = kept_predicates | initial_predicates
+        pruned_atom_data = utils.prune_ground_atom_dataset(atom_dataset, preds)
         segmented_trajs = [
-            segment_trajectory(traj) for traj in pruned_atom_data
+            segment_trajectory(ll_traj, set(preds), atom_seq=atom_seq)
+            for (ll_traj, atom_seq) in pruned_atom_data
         ]
         low_level_trajs = [ll_traj for ll_traj, _ in pruned_atom_data]
         preds_in_preconds = set()
@@ -900,8 +899,10 @@ def _select_predicates_by_clustering(
             assert dataset.annotations is not None and len(
                 dataset.annotations) == len(dataset.trajectories)
             assert CFG.segmenter == "option_changes"
+            preds = set(initial_predicates) | initial_predicates
             segmented_trajs = [
-                segment_trajectory(traj) for traj in atom_dataset
+                segment_trajectory(ll_traj, preds, atom_seq)
+                for ll_traj, atom_seq in atom_dataset
             ]
             assert len(segmented_trajs) == len(dataset.annotations)
             # First, get the set of all ground truth operator names.

diff --git a/predicators/approaches/nsrt_learning_approach.py b/predicators/approaches/nsrt_learning_approach.py
@@ -18,8 +18,8 @@
 from predicators.nsrt_learning.nsrt_learning_main import learn_nsrts_from_data
 from predicators.planning import task_plan, task_plan_grounding
 from predicators.settings import CFG
-from predicators.structs import NSRT, Dataset, LowLevelTrajectory, \
-    ParameterizedOption, Predicate, Segment, Task, Type
+from predicators.structs import NSRT, Dataset, GroundAtomTrajectory, \
+    LowLevelTrajectory, ParameterizedOption, Predicate, Segment, Task, Type
 
 
 class NSRTLearningApproach(BilevelPlanningApproach):
@@ -62,6 +62,11 @@ def _learn_nsrts(self, trajectories: List[LowLevelTrajectory],
             online_learning_cycle=online_learning_cycle)
         # If CFG.load_atoms is set, then try to create a GroundAtomTrajectory
         # by loading sets of GroundAtoms directly from a saved file.
+        # By default, we don't create a full ground atom dataset, since
+        # doing so requires called abstract on all states, including states
+        # that might ultimately just be in the middle of segments. When
+        # options take many steps, this makes a big time/space difference.
+        ground_atom_dataset: Optional[List[GroundAtomTrajectory]] = None
         if CFG.load_atoms:
             os.makedirs(CFG.data_dir, exist_ok=True)
             # Check that the dataset file was previously saved.
@@ -82,7 +87,7 @@ def _learn_nsrts(self, trajectories: List[LowLevelTrajectory],
                         (traj, [set(atoms) for atoms in ground_atom_seq]))
             else:
                 raise ValueError(f"Cannot load ground atoms: {dataset_fname}")
-        else:
+        elif CFG.save_atoms:
             # Apply predicates to data, producing a dataset of abstract states.
             ground_atom_dataset = utils.create_ground_atom_dataset(
                 trajectories, self._get_current_predicates())

diff --git a/predicators/args.py b/predicators/args.py
@@ -37,6 +37,7 @@ def create_arg_parser(env_required: bool = True,
     parser.add_argument("--restart_learning", action="store_true")
     parser.add_argument("--load_data", action="store_true")
     parser.add_argument("--load_atoms", action="store_true")
+    parser.add_argument("--save_atoms", action="store_true")
     parser.add_argument("--skip_until_cycle", default=-1, type=int)
     parser.add_argument("--experiment_id", default="", type=str)
     parser.add_argument("--load_experiment_id", default="", type=str)