Merge pull request #175 from baggepinnen/numimm

improve numerics in IMM
baggepinnen · Dec 21, 2024 · ec95e71 · ec95e71
2 parents 143b370 + 429a2a6
commit ec95e71
Show file tree

Hide file tree

Showing 2 changed files with 31 additions and 14 deletions.
diff --git a/docs/src/beetle_example_imm.md b/docs/src/beetle_example_imm.md
@@ -48,7 +48,7 @@ We then define the probability distributions we need. The IMM filter takes a tra
 dgσ = 1.0 # the deviation of the measurement noise distribution
 dvσ = 0.3 # the deviation of the dynamics noise distribution
 ϕσ  = 0.5
-P = [0.995 0.005; 0.0 1] # Transition probability matrix, we model the search mode as "almost terminal"
+P = [0.995 0.005; 0.0 1] # Transition probability matrix, we model the search mode as "terminal"
 μ = [1.0, 0.0] # Initial mixing probabilities
 R1 = Diagonal([1e-1, 1e-1, dvσ, ϕσ].^2)
 R2 = dgσ^2*I(ny) # Measurement noise covariance matrix
@@ -107,7 +107,7 @@ function get_opt_kf(p)
     R1i = Diagonal(SVector{4}(exp10.(p[1:4])))
     R2i = SMatrix{2,2}(exp10(p[5])*R2)
     d0i = MvNormal(SVector{4, T}(T.(d0.μ)), SMatrix{4,4}(T.(d0.Σ)))
-    modegain = 5+exp10(p[6])
+    modegain = 2+exp10(p[6])
     Pi = SMatrix{2,2, Float64,4}(P)
     # sigmoid(x) = 1/(1+exp(-x))
     # switch_prob = sigmoid(p[7])
@@ -120,7 +120,8 @@ end
 function cost(pars)
     try
         imm = get_opt_kf(pars)
-        ll = loglik(imm, u, y, interact=true) - 1/2*logdet(imm.models[1].R1)
+        T = length(y)
+        ll = loglik(imm, u[1:T], y[1:T], interact=true) - 1/2*logdet(imm.models[1].R1)
         return -ll
     catch e
         # rethrow() # If you only get Inf, you can uncomment this line to see the error message
@@ -133,22 +134,29 @@ Random.seed!(0)
 res = Optim.optimize(
     cost,
     params,
-    ParticleSwarm(),
+    ParticleSwarm(), # Use a gradient-free optimizer. ForwardDiff works, but the algorithm is numerically difficult to compute gradients through and may suffer from overflows in the gradient computation
     Optim.Options(
         show_trace        = true,
         show_every        = 5,
-        iterations        = 200,
+        iterations        = 100,
+        time_limit        = 30,
     ),
 )
 
 imm = get_opt_kf(res.minimizer)
-imm = get_opt_kf([-0.23848249020342335, 0.09510413594186848, -2.7540206342539832, -0.026720713351238334, -5.596193009305194, -25.37645648820617]) #make sure it goes well # hide
+imm = get_opt_kf([-0.1981314138910982, -0.18626406669394405, -2.7342979645906547, 0.17994244691004058, -11.706419070755908, -54.16703441089562]) #make sure it goes well # hide
 
 sol = forward_trajectory(imm, u, y)
 plot(sol.extra', title="Mode (optimized filter)")
 ```
 
-If it went well, the filter should be in mode 1 (the `false` mode) from the start until just before 200 time steps, at which point it should switch to model 2 (`true`). This method of detecting the mode switch of the beetle appears to be somewhat less robust than the particle filter, but is significantly cheaper computationally. 
+If it went well, the filter should be in mode 1 (the `false` mode) from the start until around 200 time steps, at which point it should switch to model 2 (`true`). This method of detecting the mode switch of the beetle appears to be somewhat less robust than the particle filter, but is significantly cheaper computationally. 
 
 The IMM filter does not stick in mode 2 perpetually after having reached it since it never actually becomes fully confident that mode 2 has been reached, but detecting the first switch is sufficient to know that the switch has occurred. 
 
+The log-likelihood of the solution
+```@example beetle_imm
+sol.ll
+```
+
+should be similar to that of the particle-filter in the tutorial [Smoothing the track of a moving beetle](@ref), which was around -1660.
diff --git a/src/imm.jl b/src/imm.jl
@@ -91,11 +91,11 @@ function interact!(imm::IMM)
             @bangbang new_R[j] .= 0 .* new_R[j]
         end
         for i = eachindex(models)
-            μij = P[i,j] * μ[i] / cj[j]
+            μij = calc_μij(P[i,j], μ[i], cj[j])
             @bangbang new_x[j] .+= μij .* models[i].x
         end
         for i = eachindex(models)
-            μij = P[i,j] * μ[i] / cj[j]
+            μij = calc_μij(P[i,j], μ[i], cj[j])
             if !iszero(μij)
                 d = models[i].x - new_x[j]
                 @bangbang new_R[j] .+= symmetrize(μij .* (d * d' .+  models[i].R))
@@ -110,6 +110,10 @@ function interact!(imm::IMM)
     nothing
 end
 
+function calc_μij(P, μ, cj)
+    P*μ/cj # This overflows for Dual numbers when cj has extremely small values
+end
+
 function predict!(imm::IMM, args...; kwargs...)
     for (model,μ) in zip(imm.models, imm.μ)
         # iszero(μ) && continue # Filter has died
@@ -126,7 +130,7 @@ The correct step of the IMM filter corrects each model with the measurements `y`
 
 The returned tuple consists of the sum of the log-likelihood of all models, the vector of individual log-likelihoods and an array of the rest of the return values from the correct step of each model.
 """
-function correct!(imm::IMM, u, y, args...; kwargs...)
+function correct!(imm::IMM, u, y, args...; expnormalize = true, kwargs...)
     (; μ, P, models) = imm
     lls = zeros(eltype(imm.x), length(models))
     rest = []
@@ -141,10 +145,15 @@ function correct!(imm::IMM, u, y, args...; kwargs...)
         push!(rest, others)
     end
     μP = P'μ
-    new_μ = exp.(lls) .* μP
-    μ .= new_μ ./ sum(new_μ)
 
-    sum(lls), lls, rest
+    # Naive formulas
+    # new_μ = exp.(lls) .* μP
+    # μ .= new_μ ./ sum(new_μ)
+
+    # Rewrite exp.(lls) .* μP as exp.(lls .+ log.(μP)), after which we can identify lls .+ log.(μP) as w and μ as we from the particle-filter implementation. This may improve the numerics
+    w = lls .+ log.(μP)
+    ll = logsumexp!(w, μ)
+    return ll, lls, rest
 end
 
 """
@@ -154,7 +163,7 @@ Combine the models of the IMM filter into a single state `imm.x` and covariance
 """
 function combine!(imm::IMM)
     (; μ, x, R, models) = imm
-    @assert sum(μ) ≈ 1.0
+    @assert sum(μ) ≈ 1.0 "sum(μ) = $(sum(μ))"
 
     @bangbang x .= 0 .* x
     @bangbang R .= 0 .* R