facebookresearch · joeaortiz · Mar 16, 2022 · Mar 16, 2022 · Mar 16, 2022 · Mar 16, 2022
diff --git a/tests/optimizer/linear/test_gbp_linear_solver.py b/tests/optimizer/linear/test_gbp_linear_solver.py
@@ -0,0 +1,171 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+import theseus as th
+
+
+"""
+Build linear 1D surface estimation problem.
+Solve using GBP and using matrix inversion and compare answers.
+GBP exactly computes the marginal means on convergence.
+
+All the following cases should not affect the converged solution:
+- with / without vectorization
+- with / without factor to variable message damping
+- with / without dropout
+- with / without factor linear system damping
+"""
+
+
+def _check_info(info, batch_size, max_iterations, initial_error, objective):
+    assert info.err_history.shape == (batch_size, max_iterations + 1)
+    assert info.err_history[:, 0].allclose(initial_error)
+    assert info.err_history.argmin(dim=1).allclose(info.best_iter + 1)
+    last_error = objective.error_squared_norm() / 2
+    last_convergence_idx = info.converged_iter.max().item()
+    assert info.err_history[:, last_convergence_idx].allclose(last_error)
+
+
+def run_gbp_linear_solver(
+    frac_loops,
+    vectorize=True,
+    ftov_damping=0.0,
+    dropout=0.0,
+    lin_system_damping=torch.tensor([1e-4]),
+):
+    max_iterations = 200
+
+    n_variables = 100
+    batch_size = 1
+
+    torch.manual_seed(0)
+
+    # initial input tensors
+    # measurements come from x = sin(t / 50) * t**2 / 250 + 1 with random noise added
+    ts = torch.arange(n_variables)
+    true_meas = torch.sin(ts / 10.0) * ts * ts / 250.0 + 1
+    noisy_meas = true_meas[None, :].repeat(batch_size, 1)
+    noisy_meas += torch.normal(torch.zeros_like(noisy_meas), 1.0)
+
+    variables = []
+    meas_vars = []
+    for i in range(n_variables):
+        variables.append(th.Vector(tensor=torch.rand(batch_size, 1), name=f"x_{i}"))
+        meas_vars.append(th.Vector(tensor=torch.rand(batch_size, 1), name=f"meas_x{i}"))
+
+    objective = th.Objective()
+
+    # measurement cost functions
+    meas_weight = th.ScaleCostWeight(5.0, name="meas_weight")
+    for var, meas in zip(variables, meas_vars):
+        objective.add(th.Difference(var, meas, meas_weight))
+
+    # smoothness cost functions between adjacent variables
+    smoothness_weight = th.ScaleCostWeight(2.0, name="smoothness_weight")
+    zero = th.Vector(tensor=torch.zeros(batch_size, 1), name="zero")
+    for i in range(n_variables - 1):
+        objective.add(
+            th.Between(variables[i], variables[i + 1], zero, smoothness_weight)
+        )
+
+    # difference cost functions between non-adjacent variables to give
+    # off diagonal elements in information matrix
+    difference_weight = th.ScaleCostWeight(1.0, name="difference_weight")
+    for i in range(int(n_variables * frac_loops)):
+        ix1, ix2 = torch.randint(n_variables, (2,))
+        diff = th.Vector(
+            tensor=torch.tensor([[true_meas[ix2] - true_meas[ix1]]]), name=f"diff{i}"
+        )
+        diff.tensor += torch.normal(torch.zeros(1, 1), 0.2)
+        objective.add(
+            th.Between(variables[ix1], variables[ix2], diff, difference_weight)
+        )
+
+    input_tensors = {}
+    for var in variables:
+        input_tensors[var.name] = var.tensor
+    for i in range(len(noisy_meas[0])):
+        input_tensors[f"meas_x{i}"] = noisy_meas[:, i][:, None]
+
+    # Solve with GBP
+    optimizer = th.GaussianBeliefPropagation(
+        objective, max_iterations=max_iterations, vectorize=vectorize
+    )
+    optimizer.set_params(max_iterations=max_iterations)
+    objective.update(input_tensors)
+    initial_error = objective.error_squared_norm() / 2
+
+    callback_expected_iter = [0]
+
+    def callback(opt_, info_, _, it_):
+        assert opt_ is optimizer
+        assert isinstance(info_, th.optimizer.OptimizerInfo)
+        assert it_ == callback_expected_iter[0]
+        callback_expected_iter[0] += 1
+
+    info = optimizer.optimize(
+        track_best_solution=True,
+        track_err_history=True,
+        end_iter_callback=callback,
+        ftov_msg_damping=ftov_damping,
+        dropout=dropout,
+        lin_system_damping=lin_system_damping,
+        verbose=True,
+    )
+    gbp_solution = [var.tensor.clone() for var in variables]
+
+    # Solve with linear solver
+    objective.update(input_tensors)
+    linear_optimizer = th.LinearOptimizer(objective, th.CholeskyDenseSolver)
+    linear_optimizer.optimize(verbose=True)
+    lin_solution = [var.tensor.clone() for var in variables]
+
+    # Solve with Gauss-Newton
+    # If problem is poorly conditioned solving with Gauss-Newton can yield
+    # a slightly different solution to one linear solve, so check both
+    objective.update(input_tensors)
+    gn_optimizer = th.GaussNewton(objective, th.CholeskyDenseSolver)
+    gn_optimizer.optimize(verbose=True)
+    gn_solution = [var.tensor.clone() for var in variables]
+
+    # checks
+    for x, x_target in zip(gbp_solution, lin_solution):
+        assert x.allclose(x_target, rtol=1e-3)
+    for x, x_target in zip(gbp_solution, gn_solution):
+        assert x.allclose(x_target, rtol=1e-3)
+    _check_info(info, batch_size, max_iterations, initial_error, objective)
+
+    # # Visualise reconstructed surface
+    # soln_vec = torch.cat(gbp_solution, dim=1)[0]
+    # import matplotlib.pylab as plt
+    # plt.scatter(torch.arange(n_variables), soln_vec, label="solution")
+    # plt.scatter(torch.arange(n_variables), noisy_meas[0], label="meas")
+    # plt.legend()
+    # plt.show()
+
+
+def test_gbp_linear_solver():
+
+    # problems with increasing loopyness
+    # the loopier the fewer iterations to solve
+    frac_loops = [0.1, 0.2, 0.5]
+    for frac in frac_loops:
+
+        run_gbp_linear_solver(frac_loops=frac)
+
+        # with factor to variable message damping, may take too many steps to converge
+        # run_gbp_linear_solver(vectorize=vectorize, frac_loops=frac, ftov_damping=0.1)
+        # with dropout
+        run_gbp_linear_solver(frac_loops=frac, dropout=0.1)
+
+        # test linear system damping
+        run_gbp_linear_solver(frac_loops=frac, lin_system_damping=torch.tensor([0.0]))
+        run_gbp_linear_solver(frac_loops=frac, lin_system_damping=torch.tensor([1e-2]))
+        run_gbp_linear_solver(frac_loops=frac, lin_system_damping=torch.tensor([1e-6]))
+
+    # test without vectorization once
+    run_gbp_linear_solver(frac_loops=0.5, vectorize=False)
diff --git a/theseus/__init__.py b/theseus/__init__.py
@@ -66,6 +66,7 @@
 )
 from .optimizer import (  # usort: skip
     DenseLinearization,
+    GaussianBeliefPropagation,
     Linearization,
     ManifoldGaussian,
     OptimizerInfo,

diff --git a/theseus/core/objective.py b/theseus/core/objective.py
@@ -111,6 +111,10 @@ def __init__(
         # If vectorization is on, this will also handle vectorized containers
         self._vectorization_to: Optional[Callable] = None
 
+        self.vectorized_cost_fns: Optional[List[CostFunction]] = None
+        # nested list of name of each base cost function in the vectorized cfs
+        self.vectorized_cf_names: Optional[List[List[str]]] = None
+
         # If vectorization is on, this gets replaced by a vectorized version
         self._retract_method = Objective._retract_base
 
@@ -682,6 +686,8 @@ def _enable_vectorization(
         vectorization_run_fn: Callable,
         vectorized_to: Callable,
         vectorized_retract_fn: Callable,
+        vectorized_cost_fns: List[CostFunction],
+        vectorized_cf_names: List[List[str]],
         error_iter_fn: Callable[[], Iterable[CostFunction]],
         enabler: Any,
     ):
@@ -694,6 +700,8 @@ def _enable_vectorization(
         self._vectorization_run = vectorization_run_fn
         self._vectorization_to = vectorized_to
         self._retract_method = vectorized_retract_fn
+        self.vectorized_cost_fns = vectorized_cost_fns
+        self.vectorized_cf_names = vectorized_cf_names
         self._get_error_iter = error_iter_fn
         self._vectorized = True
 
@@ -703,6 +711,8 @@ def disable_vectorization(self):
         self._vectorization_run = None
         self._vectorization_to = None
         self._retract_method = Objective._retract_base
+        self.vectorized_cost_fns = None
+        self.vectorized_cf_names = None
         self._get_error_iter = self._get_error_iter_base
         self._vectorized = False
 
@@ -713,6 +723,9 @@ def vectorized(self):
             == (self._vectorized_jacobians_iter is None)
             == (self._vectorization_run is None)
             == (self._vectorization_to is None)
+            == (self._retract_method is Objective._retract_base)
+            == (self.vectorized_cost_fns is None)
+            == (self.vectorized_cf_names is None)
             == (self._get_error_iter == self._get_error_iter_base)
             == (self._retract_method == Objective._retract_base)
         )

diff --git a/theseus/core/vectorizer.py b/theseus/core/vectorizer.py
@@ -116,13 +116,17 @@ def __init__(self, objective: Objective, empty_cuda_cache: bool = False):
             _CostFunctionSchema, List[_CostFunctionWrapper]
         ] = defaultdict(list)
 
+        schema_cf_names_dict: Dict[_CostFunctionSchema, List[str]] = defaultdict(list)
+
         # Create wrappers for all cost functions and also get their schemas
         for cost_fn in objective.cost_functions.values():
             wrapper = _CostFunctionWrapper(cost_fn)
             self._cost_fn_wrappers.append(wrapper)
             schema = _get_cost_function_schema(cost_fn)
             self._schema_dict[schema].append(wrapper)
 
+            schema_cf_names_dict[schema].append(cost_fn.name)
+
         # Now create a vectorized cost function for each unique schema
         self._vectorized_cost_fns: Dict[_CostFunctionSchema, CostFunction] = {}
         for schema in self._schema_dict:
@@ -146,6 +150,8 @@ def __init__(self, objective: Objective, empty_cuda_cache: bool = False):
             self._vectorize,
             self._to,
             self._vectorized_retract_optim_vars,
+            list(self._vectorized_cost_fns.values()),
+            list(schema_cf_names_dict.values()),
             self._get_vectorized_error_iter,
             self,
         )
@@ -391,10 +397,10 @@ def _vectorize(
             }
             ret = [cf for cf_list in schema_dict.values() for cf in cf_list]
         for schema, cost_fn_wrappers in schema_dict.items():
-            if len(cost_fn_wrappers) == 1:
-                self._handle_singleton_wrapper(schema, cost_fn_wrappers, mode)
-            else:
-                self._handle_schema_vectorization(schema, cost_fn_wrappers, mode)
+            # if len(cost_fn_wrappers) == 1:
+            #     self._handle_singleton_wrapper(schema, cost_fn_wrappers, mode)
+            # else:
+            self._handle_schema_vectorization(schema, cost_fn_wrappers, mode)
         return ret
 
     def _get_vectorized_error_iter(self) -> Iterable[_CostFunctionWrapper]:

diff --git a/theseus/optimizer/__init__.py b/theseus/optimizer/__init__.py
@@ -9,3 +9,4 @@
 from .optimizer import Optimizer, OptimizerInfo
 from .sparse_linearization import SparseLinearization
 from .variable_ordering import VariableOrdering
+from .gbp import GaussianBeliefPropagation, GBPSchedule