Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge branch 'master' of https://github.com/Learning-and-Intelligent-… #98

Merged
merged 4 commits into from
Aug 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ tests/_fake_results
predicators/envs/assets/task_jsons/spot_bike_env/last.json
spot_perception_outputs
spot_perception_debug_dir/
sas_plan

# Jetbrains IDEs
.idea/
6 changes: 6 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,9 @@ ignore_missing_imports = True

[mypy-playsound.*]
ignore_missing_imports = True

[mypy-gymnasium.*]
ignore_missing_imports = True

[mypy-gymnasium_robotics.*]
ignore_missing_imports = True
111 changes: 92 additions & 19 deletions predicators/approaches/active_sampler_learning_approach.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@

import abc
import logging
from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple
from collections import defaultdict
from typing import Any, Callable, DefaultDict, Dict, List, Optional, \
Sequence, Set, Tuple

import dill as pkl
import numpy as np
Expand All @@ -21,13 +23,14 @@
from predicators import utils
from predicators.approaches.online_nsrt_learning_approach import \
OnlineNSRTLearningApproach
from predicators.competence_models import SkillCompetenceModel
from predicators.explorers import BaseExplorer, create_explorer
from predicators.ml_models import BinaryClassifier, BinaryClassifierEnsemble, \
KNeighborsClassifier, MLPBinaryClassifier, MLPRegressor
from predicators.settings import CFG
from predicators.structs import NSRT, Array, GroundAtom, LowLevelTrajectory, \
NSRTSampler, Object, ParameterizedOption, Predicate, Segment, State, \
Task, Type, _GroundNSRT, _GroundSTRIPSOperator, _Option
Metrics, NSRTSampler, Object, ParameterizedOption, Predicate, Segment, \
State, Task, Type, _GroundNSRT, _GroundSTRIPSOperator, _Option

# Dataset for sampler learning: includes (s, option, s', label) per param opt.
_OptionSamplerDataset = List[Tuple[State, _Option, State, Any]]
Expand All @@ -54,6 +57,8 @@ def __init__(self, initial_predicates: Set[Predicate],
# successfully reached their effects or not). Updated in-place by the
# explorer when CFG.explorer is active_sampler_explorer.
self._ground_op_hist: Dict[_GroundSTRIPSOperator, List[bool]] = {}
self._competence_models: Dict[_GroundSTRIPSOperator,
SkillCompetenceModel] = {}
self._last_seen_segment_traj_idx = -1

# For certain methods, we may want the NSRTs used for exploration to
Expand All @@ -62,10 +67,33 @@ def __init__(self, initial_predicates: Set[Predicate],
# NSRTs to samplers to be used at exploration time.
self._nsrt_to_explorer_sampler: Dict[NSRT, NSRTSampler] = {}

# Record what train tasks have been seen during exploration so far.
self._seen_train_task_idxs: Set[int] = set()

self._default_cost = -np.log(utils.beta_bernoulli_posterior([]).mean())

@classmethod
def get_name(cls) -> str:
return "active_sampler_learning"

def _run_task_plan(
self, task: Task, nsrts: Set[NSRT], preds: Set[Predicate],
timeout: float, seed: int, **kwargs: Any
) -> Tuple[List[_GroundNSRT], List[Set[GroundAtom]], Metrics]:
# Add ground operator competence for competence-aware planning.
ground_op_costs = {
o: -np.log(m.get_current_competence())
for o, m in self._competence_models.items()
}
return super()._run_task_plan(task,
nsrts,
preds,
timeout,
seed,
ground_op_costs=ground_op_costs,
default_cost=self._default_cost,
**kwargs)

def _create_explorer(self) -> BaseExplorer:
# Geometrically increase the length of exploration.
b = CFG.active_sampler_learning_explore_length_base
Expand All @@ -81,8 +109,10 @@ def _create_explorer(self) -> BaseExplorer:
self._get_current_nsrts(),
self._option_model,
ground_op_hist=self._ground_op_hist,
competence_models=self._competence_models,
max_steps_before_termination=max_steps,
nsrt_to_explorer_sampler=self._nsrt_to_explorer_sampler)
nsrt_to_explorer_sampler=self._nsrt_to_explorer_sampler,
seen_train_task_idxs=self._seen_train_task_idxs)
return explorer

def load(self, online_learning_cycle: Optional[int]) -> None:
Expand All @@ -92,9 +122,11 @@ def load(self, online_learning_cycle: Optional[int]) -> None:
save_dict = pkl.load(f)
self._sampler_data = save_dict["sampler_data"]
self._ground_op_hist = save_dict["ground_op_hist"]
self._competence_models = save_dict["competence_models"]
self._last_seen_segment_traj_idx = save_dict[
"last_seen_segment_traj_idx"]
self._nsrt_to_explorer_sampler = save_dict["nsrt_to_explorer_sampler"]
self._seen_train_task_idxs = save_dict["seen_train_task_idxs"]
self._online_learning_cycle = CFG.skip_until_cycle + 1

def _learn_nsrts(self, trajectories: List[LowLevelTrajectory],
Expand All @@ -111,6 +143,9 @@ def _learn_nsrts(self, trajectories: List[LowLevelTrajectory],
self._update_sampler_data()
# Re-learn samplers. Updates the NSRTs.
self._learn_wrapped_samplers(online_learning_cycle)
# Advance the competence models.
for competence_model in self._competence_models.values():
competence_model.advance_cycle()
# Save the things we need other than the NSRTs, which were already
# saved in the above call to self._learn_nsrts()
save_path = utils.get_approach_save_path_str()
Expand All @@ -119,14 +154,18 @@ def _learn_nsrts(self, trajectories: List[LowLevelTrajectory],
{
"sampler_data": self._sampler_data,
"ground_op_hist": self._ground_op_hist,
"competence_models": self._competence_models,
"last_seen_segment_traj_idx":
self._last_seen_segment_traj_idx,
"nsrt_to_explorer_sampler": self._nsrt_to_explorer_sampler,
"seen_train_task_idxs": self._seen_train_task_idxs,
}, f)

def _update_sampler_data(self) -> None:
start_idx = self._last_seen_segment_traj_idx + 1
new_trajs = self._segmented_trajs[start_idx:]
ground_op_to_num_data: DefaultDict[_GroundSTRIPSOperator,
int] = defaultdict(int)
for segmented_traj in new_trajs:
self._last_seen_segment_traj_idx += 1
just_made_incorrect_pick = False
Expand Down Expand Up @@ -170,6 +209,18 @@ def _update_sampler_data(self) -> None:
if o.parent not in self._sampler_data:
self._sampler_data[o.parent] = []
self._sampler_data[o.parent].append((s, o, ns, label))
ground_nsrt = utils.option_to_ground_nsrt(o, self._nsrts)
ground_op_to_num_data[ground_nsrt.op] += 1
# Save competence models.
for ground_op, model in self._competence_models.items():
approach_save_path = utils.get_approach_save_path_str()
save_path = "_".join([
approach_save_path, f"{ground_op.name}{ground_op.objects}",
f"{self._online_learning_cycle}.competence"
])
with open(save_path, "wb") as f:
pkl.dump(model, f)
logging.info(f"Saved competence model to {save_path}.")

def _check_option_success(self, option: _Option, segment: Segment) -> bool:
ground_nsrt = utils.option_to_ground_nsrt(option, self._nsrts)
Expand Down Expand Up @@ -321,9 +372,14 @@ def _learn_nsrt_sampler(self, nsrt_data: _OptionSamplerDataset,
# Easiest way to access the base sampler.
base_sampler = nsrt._sampler # pylint: disable=protected-access
score_fn = _classifier_to_score_fn(classifier, nsrt)
wrapped_sampler = _wrap_sampler(base_sampler, score_fn)

return (wrapped_sampler, wrapped_sampler)
wrapped_sampler_test = _wrap_sampler(base_sampler,
score_fn,
strategy="greedy")
wrapped_sampler_exploration = _wrap_sampler(
base_sampler,
score_fn,
strategy=CFG.active_sampler_learning_exploration_sample_strategy)
return (wrapped_sampler_test, wrapped_sampler_exploration)


class _ClassifierEnsembleWrappedSamplerLearner(_WrappedSamplerLearner):
Expand Down Expand Up @@ -375,13 +431,18 @@ def _learn_nsrt_sampler(self, nsrt_data: _OptionSamplerDataset,
test_score_fn = _classifier_ensemble_to_score_fn(classifier,
nsrt,
test_time=True)
test_wrapped_sampler = _wrap_sampler(base_sampler, test_score_fn)
wrapped_sampler_test = _wrap_sampler(base_sampler,
test_score_fn,
strategy="greedy")
explore_score_fn = _classifier_ensemble_to_score_fn(classifier,
nsrt,
test_time=False)
explore_wrapped_sampler = _wrap_sampler(base_sampler, explore_score_fn)
wrapped_sampler_exploration = _wrap_sampler(
base_sampler,
explore_score_fn,
strategy=CFG.active_sampler_learning_exploration_sample_strategy)

return (test_wrapped_sampler, explore_wrapped_sampler)
return (wrapped_sampler_test, wrapped_sampler_exploration)


class _FittedQWrappedSamplerLearner(_WrappedSamplerLearner):
Expand Down Expand Up @@ -433,8 +494,14 @@ def _learn_nsrt_sampler(self, nsrt_data: _OptionSamplerDataset,
score_fn = _regressor_to_score_fn(regressor, nsrt)
# Save the score function for use in later target computation.
self._next_nsrt_score_fns[nsrt] = score_fn
wrapped_sampler = _wrap_sampler(base_sampler, score_fn)
return (wrapped_sampler, wrapped_sampler)
wrapped_sampler_test = _wrap_sampler(base_sampler,
score_fn,
strategy="greedy")
wrapped_sampler_exploration = _wrap_sampler(
base_sampler,
score_fn,
strategy=CFG.active_sampler_learning_exploration_sample_strategy)
return (wrapped_sampler_test, wrapped_sampler_exploration)

def _predict(self, state: State, option: _Option) -> float:
"""Predict Q(s, a)."""
Expand All @@ -443,7 +510,7 @@ def _predict(self, state: State, option: _Option) -> float:
ground_nsrt = utils.option_to_ground_nsrt(option, self._nsrts)
# Special case: we haven't seen any data for the parent NSRT, so we
# haven't learned a score function for it.
if ground_nsrt.parent not in self._nsrt_score_fns:
if ground_nsrt.parent not in self._nsrt_score_fns: # pragma: no cover
return 0.0
score_fn = self._nsrt_score_fns[ground_nsrt.parent]
return score_fn(state, ground_nsrt.objects, [option.params])[0]
Expand Down Expand Up @@ -503,10 +570,8 @@ def _fit_regressor(self, nsrt_data: _OptionSamplerDataset) -> MLPRegressor:


# Helper functions.
def _wrap_sampler(
base_sampler: NSRTSampler,
score_fn: _ScoreFn,
) -> NSRTSampler:
def _wrap_sampler(base_sampler: NSRTSampler, score_fn: _ScoreFn,
strategy: str) -> NSRTSampler:
"""Create a wrapped sampler that uses a score function to select among
candidates from a base sampler."""

Expand All @@ -517,8 +582,16 @@ def _sample(state: State, goal: Set[GroundAtom], rng: np.random.Generator,
for _ in range(CFG.active_sampler_learning_num_samples)
]
scores = score_fn(state, objects, samples)
# For now, just pick the best scoring sample.
idx = np.argmax(scores)
if strategy in ["greedy", "epsilon_greedy"]:
idx = int(np.argmax(scores))
if strategy == "epsilon_greedy" and rng.uniform(
) <= CFG.active_sampler_learning_exploration_epsilon:
# Randomly select a sample to pick, following the epsilon
# greedy strategy!
idx = rng.integers(0, len(scores))
else:
raise NotImplementedError('Exploration strategy ' +
f'{strategy} ' + 'is not implemented.')
return samples[idx]

return _sample
Expand Down
2 changes: 1 addition & 1 deletion predicators/approaches/bridge_policy_approach.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def learn_from_interaction_results(
traj = response.teacher_traj
assert traj is not None
atom_traj = [utils.abstract(s, preds) for s in traj.states]
segmented_traj = segment_trajectory((traj, atom_traj))
segmented_traj = segment_trajectory(traj, preds, atom_traj)
if not segmented_traj:
assert len(atom_traj) == 1
states = [traj.states[0]]
Expand Down
9 changes: 4 additions & 5 deletions predicators/approaches/gnn_option_policy_approach.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,14 @@ def _generate_data_from_dataset(
self, dataset: Dataset
) -> List[Tuple[State, Set[GroundAtom], Set[GroundAtom], _Option]]:
data = []
ground_atom_dataset = utils.create_ground_atom_dataset(
dataset.trajectories, self._initial_predicates)
# In this approach, we never learned any NSRTs, so we just call
# segment_trajectory() to segment the given dataset.
segmented_trajs = [
segment_trajectory(traj) for traj in ground_atom_dataset
segment_trajectory(traj, self._initial_predicates)
for traj in dataset.trajectories
]
for segment_traj, (ll_traj, _) in zip(segmented_trajs,
ground_atom_dataset):
for segment_traj, ll_traj in zip(segmented_trajs,
dataset.trajectories):
if not ll_traj.is_demo:
continue
goal = self._train_tasks[ll_traj.train_task_idx].goal
Expand Down
21 changes: 11 additions & 10 deletions predicators/approaches/grammar_search_invention_approach.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
from predicators.predicate_search_score_functions import \
_PredicateSearchScoreFunction, create_score_function
from predicators.settings import CFG
from predicators.structs import Dataset, GroundAtom, GroundAtomTrajectory, \
Object, ParameterizedOption, Predicate, Segment, State, Task, Type
from predicators.structs import Dataset, GroundAtomTrajectory, Object, \
ParameterizedOption, Predicate, Segment, State, Task, Type

################################################################################
# Programmatic classifiers #
Expand Down Expand Up @@ -614,10 +614,8 @@ def __post_init__(self) -> None:
# states in each segment, which we store into
# self._state_sequence.
for traj in self.dataset.trajectories:
dummy_atoms_seq: List[Set[GroundAtom]] = [
set() for _ in range(len(traj.states))
]
seg_traj = segment_trajectory((traj, dummy_atoms_seq))
# The init_atoms and final_atoms are not used.
seg_traj = segment_trajectory(traj, predicates=set())
state_seq = utils.segment_trajectory_to_state_sequence(
seg_traj)
self._state_sequences.append(state_seq)
Expand Down Expand Up @@ -861,10 +859,11 @@ def _get_successors(
# preconditions.
logging.info("\nFiltering out predicates that don't appear in "
"preconditions...")
pruned_atom_data = utils.prune_ground_atom_dataset(
atom_dataset, kept_predicates | initial_predicates)
preds = kept_predicates | initial_predicates
pruned_atom_data = utils.prune_ground_atom_dataset(atom_dataset, preds)
segmented_trajs = [
segment_trajectory(traj) for traj in pruned_atom_data
segment_trajectory(ll_traj, set(preds), atom_seq=atom_seq)
for (ll_traj, atom_seq) in pruned_atom_data
]
low_level_trajs = [ll_traj for ll_traj, _ in pruned_atom_data]
preds_in_preconds = set()
Expand Down Expand Up @@ -900,8 +899,10 @@ def _select_predicates_by_clustering(
assert dataset.annotations is not None and len(
dataset.annotations) == len(dataset.trajectories)
assert CFG.segmenter == "option_changes"
preds = set(initial_predicates) | initial_predicates
segmented_trajs = [
segment_trajectory(traj) for traj in atom_dataset
segment_trajectory(ll_traj, preds, atom_seq)
for ll_traj, atom_seq in atom_dataset
]
assert len(segmented_trajs) == len(dataset.annotations)
# First, get the set of all ground truth operator names.
Expand Down
11 changes: 8 additions & 3 deletions predicators/approaches/nsrt_learning_approach.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
from predicators.nsrt_learning.nsrt_learning_main import learn_nsrts_from_data
from predicators.planning import task_plan, task_plan_grounding
from predicators.settings import CFG
from predicators.structs import NSRT, Dataset, LowLevelTrajectory, \
ParameterizedOption, Predicate, Segment, Task, Type
from predicators.structs import NSRT, Dataset, GroundAtomTrajectory, \
LowLevelTrajectory, ParameterizedOption, Predicate, Segment, Task, Type


class NSRTLearningApproach(BilevelPlanningApproach):
Expand Down Expand Up @@ -62,6 +62,11 @@ def _learn_nsrts(self, trajectories: List[LowLevelTrajectory],
online_learning_cycle=online_learning_cycle)
# If CFG.load_atoms is set, then try to create a GroundAtomTrajectory
# by loading sets of GroundAtoms directly from a saved file.
# By default, we don't create a full ground atom dataset, since
# doing so requires called abstract on all states, including states
# that might ultimately just be in the middle of segments. When
# options take many steps, this makes a big time/space difference.
ground_atom_dataset: Optional[List[GroundAtomTrajectory]] = None
if CFG.load_atoms:
os.makedirs(CFG.data_dir, exist_ok=True)
# Check that the dataset file was previously saved.
Expand All @@ -82,7 +87,7 @@ def _learn_nsrts(self, trajectories: List[LowLevelTrajectory],
(traj, [set(atoms) for atoms in ground_atom_seq]))
else:
raise ValueError(f"Cannot load ground atoms: {dataset_fname}")
else:
elif CFG.save_atoms:
# Apply predicates to data, producing a dataset of abstract states.
ground_atom_dataset = utils.create_ground_atom_dataset(
trajectories, self._get_current_predicates())
Expand Down
1 change: 1 addition & 0 deletions predicators/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def create_arg_parser(env_required: bool = True,
parser.add_argument("--restart_learning", action="store_true")
parser.add_argument("--load_data", action="store_true")
parser.add_argument("--load_atoms", action="store_true")
parser.add_argument("--save_atoms", action="store_true")
parser.add_argument("--skip_until_cycle", default=-1, type=int)
parser.add_argument("--experiment_id", default="", type=str)
parser.add_argument("--load_experiment_id", default="", type=str)
Expand Down
Loading
Loading