TuringLang · THargreaves · Oct 21, 2024 · Aug 9, 2024 · Aug 12, 2024 · Aug 12, 2024
diff --git a/docs/src/extras.md b/docs/src/extras.md
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -62,18 +62,18 @@ using SSMProblems
 
 struct SimpleLatentDynamics <: LatentDynamics end
 
-function distribution(rng::AbstractRNG, dyn::SimpleLatentDynamics, extra::Nothing)
+function distribution(rng::AbstractRNG, dyn::SimpleLatentDynamics; kwargs...)
     return Normal(0.0, 1.0)
 end
 
-function distribution(rng::AbstractRNG, dyn::SimpleLatentDynamics, step::Int, state::Float64, extra::Nothing)
+function distribution(rng::AbstractRNG, dyn::SimpleLatentDynamics, step::Int, state::Float64; kwargs...)
     return Normal(state, 0.1)
 end
 
 struct SimpleObservationProcess <: ObservationProcess end
 
 function distribution(
-    obs::SimpleObservationPRocess, step::Int, state::Float64, observation::Float64, extra::Nothing
+    obs::SimpleObservationPRocess, step::Int, state::Float64, observation::Float64; kwargs...
 )
     return Normal(state, 0.5)
 end
@@ -87,12 +87,11 @@ model = StateSpaceModel(dyn, obs)
 There are a few things to note here:
 
 - Two methods must be defined for the `LatentDynamics`, one containing
-  `step`/`state` arguments and use for transitioning, and one without these,
+  `step`/`state` arguments and used for transitioning, and one without these,
   used for initialisation.
-  the model is time-homogeneous, these are not required in the function bodies.
-- Every function takes an `extra` argument. This is part of the "secret sauce"
-  of `SSMProblems` that allows it to flexibly represent more exotic models
-  without any performance penalty. You can read more about it [here](extras.md).
+- Every function should accept keyword arguments. This is key feature of
+  `SSMProblems` that allows it to flexibly represent more exotic models without
+  any performance penalty. You can read more about it [here](kwargs.md).
 - If your latent dynamics and observation process cannot be represented as a
   `Distribution` object, you may implement specific methods for sampling and
   log-density evaluation as documented below.
@@ -109,8 +108,8 @@ for (i, observation) in enumerate(observations)
     idx = resample(rng, log_weights)
     particles = particles[idx]
     for i in 1:N
-        particles[i] = simulate(rng, dyn, i, particles[i], nothing)
-        log_weights[i] += logdensity(obs, i, particles[i], observation, nothing)
+        particles[i] = simulate(rng, dyn, i, particles[i])
+        log_weights[i] += logdensity(obs, i, particles[i], observation)
     end
 end
 ```

diff --git a/docs/src/kwargs.md b/docs/src/kwargs.md
@@ -0,0 +1,93 @@
+# Control Variables and Extras
+
+All functions that form part of the `SSMProblems` model interface should accept
+keyword arguments.
+
+These argument has multiple uses, but are generally used to pass in additional
+information to the model at inference time. Although this might seem unnecessary
+and clunky for simple models, this addition leads to a great amount of
+flexibility that allows complex and exotic models to be implemented with little
+effort or performance penalty.
+
+If your model does not require any keyword arguments, you do not need to use any
+in your function body (though `; kwargs...` should still be included in the signature).
+
+When forward-simulating, filtering or smoothing from a model, these keyword
+arguments are passed to the SSM definition. Some advanced algorithms such as the
+Rao-Blackwellised particle filter may also introduce additional keyword
+arguments at inference time.
+
+## Use as Control Variables
+
+In simple cases `kwargs` can be used to specify a control (or input) vector as
+is common in [control engineering](https://www.mathworks.com/help/control/ref/ss.html).
+
+In this case, the `simulate` function for the latent dynamics may look like
+this:
+
+```julia
+function simulate(
+    rng::AbstractRNG, 
+    dyn::SimpleLatentDynamics, 
+    step::Int, 
+    state::Float64; 
+    control::Float64,  # new keyword argument
+    kwargs...
+)
+    return state + control + rand(rng, Normal(0.0, 0.1))
+end
+```
+
+## Use as Time Deltas
+
+Keywords are not limited to be used as simple control vectors, and can in fact
+be used to pass in arbitrary additional information to the model at runtime. A
+common use case is when considering data arriving at irregular time intervals.
+In this case, they keyword arguments can be used to pass in the time delta
+between observations.
+
+In this case, the `simulate` function for the latent dynamics may look like
+this: 
+
+```julia
+function simulate(
+    rng::AbstractRNG, 
+    dyn::SimpleLatentDynamics, 
+    step::Int, 
+    state::Float64; 
+    dts::Vector{Float64},  # new keyword argument
+    kwargs...
+)
+    dt = dts[step]
+    return state + dt * rand(rng, Normal(0.1, 1.0))
+end
+```
+
+Note, that it is also possible to store this data in the latent dynamic's struct
+and extract it during a transition (e.g. `dyn.dts[timestep]`). However, this
+approach  has the disadvantage that the control variables must be defined when
+the model is instantiated. Further, this means that re-runs with new control
+variables require a re-instantiation of the model.
+
+Using keyword arguments for control variables allows for a separation between
+the abstract definition of the state space model and the concrete simulation or
+inference given specific data.
+
+## Use with Streaming Data
+
+The de-coupling of model definition and data that comes from using keyword
+arguments makes it easy to use `SSMProblems` with streaming data. As control
+variables arrive, these can be passed to the model distributions via keyword arguments.
+
+## Use in Rao-Blackwellisation
+
+Briefly, a Rao-Blackwellised particle filter is an efficient variant of the
+generic particle filter that can be applied to state space models that have an
+analytically tractable sub-model. The filter behaves as two nested filters, a
+regular particle filter for the outer model, and an analytic filter (e.g. Kalman
+filter) for the inner sub-model.
+
+Since the value of the keyword arguments can be defined at inference time, the
+outer filter can pass information to the inner filter via through these.
+This leads to a clean and generic interface for Rao-Blackwellised filtering,
+which is not possible with other state space model packages.
diff --git a/examples/kalman-filter/script.jl b/examples/kalman-filter/script.jl
@@ -33,6 +33,12 @@ struct LinearGaussianLatentDynamics{T<:Real} <: LatentDynamics{Vector{T}}
     Q::Matrix{T}
 end
 
+# Note, that our specific dynamics should be subtypes of the abstract `LatentDynamics` type.
+# Importantly, consider the type parameters used. Whereas the type parameter(s) of our
+# struct can be whatever is/are most suitable, the type parameter of `LatentDynamics` should
+# reflect the type used to store a given latent state at a given time step. In this case,
+# since we are considering a multi-dimensional problem, we use `Vector{T}`.
+
 # Similarly, the observation process is defined by the following equation:
 # ```
 # y[k] = Hx[k] + v[k],          v[k] ∼ N(0, R)
@@ -44,25 +50,25 @@ struct LinearGaussianObservationProcess{T<:Real} <: ObservationProcess{Vector{T}
 end
 
 # We then define general transition and observation distributions to be used in forward
-# simulation. Since our model is homogenous (doesn't depend on control variables), we use
-# `Nothing` for the type of `extra`.
+# simulation. If our model were time-inhomogenous, we could make our distribution functions
+# depend on the `step` argument or pass in control variables via keyword arguments.
 #
 # Even if we did not require forward simulation (e.g. we were given observations), it is
 # still useful to define these methods as they allow us to run a particle filter on our
 # model with no additional implementation required. Although a Kalman filter would generally
 # be preferred in this linear Gaussian case, it may be of interest to compare the sampling
 # performance with a general particle filter.
 
-function SSMProblems.distribution(model::LinearGaussianLatentDynamics, extra::Nothing)
+function SSMProblems.distribution(model::LinearGaussianLatentDynamics; kwargs...)
     return MvNormal(model.z, model.P)
 end
 function SSMProblems.distribution(
-    model::LinearGaussianLatentDynamics{T}, step::Int, prev_state::Vector{T}, extra::Nothing
+    model::LinearGaussianLatentDynamics{T}, step::Int, prev_state::Vector{T}; kwargs...
 ) where {T}
     return MvNormal(model.Φ * prev_state + model.b, model.Q)
 end
 function SSMProblems.distribution(
-    model::LinearGaussianObservationProcess{T}, step::Int, state::Vector{T}, extra::Nothing
+    model::LinearGaussianObservationProcess{T}, step::Int, state::Vector{T}; kwargs...
 ) where {T}
     return MvNormal(model.H * state, model.R)
 end
@@ -79,23 +85,25 @@ struct KalmanFilter end
 # be used to dispatch to the correct method.
 
 const LinearGaussianSSM{T} = StateSpaceModel{
-    <:LinearGaussianLatentDynamics{T},<:LinearGaussianObservationProcess{T}
+    T,<:LinearGaussianLatentDynamics{T},<:LinearGaussianObservationProcess{T}
 };
 
 # We then define a method for the `sample` function. This is a standardised interface which
 # requires the model we are sampling from, the sampling algorithm as well as the
-# observations and any extras.
+# observations and any keyword arguments which will be passed to the distribution functions.
+#
+# Note, that if our model were time-inhomogenous, we we need our model vectors/matrices
+# (e.g. A, b) to depend on the step variable `t` or the control variables. To make our
+# filtering algorithm generic, we should not handle this within the `sample` function.
+# Instead, we would pass this responsibility to the model by defining functions of the form
+# `calc_A(model, t; kwargs...)` etc.
 
 function AbstractMCMC.sample(
-    model::LinearGaussianSSM{U},
-    ::KalmanFilter,
-    observations::AbstractVector,
-    extra0,
-    extras::AbstractVector,
-) where {U}
+    model::LinearGaussianSSM{MT}, ::KalmanFilter, observations::AbstractVector; kwargs...
+) where {MT}
     T = length(observations)
-    x_filts = Vector{Vector{U}}(undef, T)
-    P_filts = Vector{Matrix{U}}(undef, T)
+    x_filts = Vector{Vector{MT}}(undef, T)
+    P_filts = Vector{Matrix{MT}}(undef, T)
 
     @unpack z, P, Φ, b, Q = model.dyn  ## Extract parameters
     @unpack H, R = model.obs
@@ -122,20 +130,6 @@ function AbstractMCMC.sample(
     return x_filts, P_filts
 end
 
-# In this specific case, however, since our model is homogenous, we do not expect to have
-# any extras passed in. For convenience, we create a method without the `extra0` and
-# `extras` argument which then replaces them with `nothing` and a vector of `nothing`s,
-# respectively. This pattern is not specific to linear Gaussian models or Kalman filters so
-# we define it with general types.
-
-function AbstractMCMC.sample(
-    model::StateSpaceModel, algorithm, observations::AbstractVector
-)
-    extra0 = nothing
-    extras = fill(nothing, length(observations))
-    return sample(model, algorithm, observations, extra0, extras)
-end
-
 # ## Simulation and Filtering
 
 # We define the parameters for our model as so.