diff --git a/docs/src/beetle_example_imm.md b/docs/src/beetle_example_imm.md index 97d10ab..682dd5e 100644 --- a/docs/src/beetle_example_imm.md +++ b/docs/src/beetle_example_imm.md @@ -48,7 +48,7 @@ We then define the probability distributions we need. The IMM filter takes a tra dgσ = 1.0 # the deviation of the measurement noise distribution dvσ = 0.3 # the deviation of the dynamics noise distribution ϕσ = 0.5 -P = [0.995 0.005; 0.0 1] # Transition probability matrix, we model the search mode as "almost terminal" +P = [0.995 0.005; 0.0 1] # Transition probability matrix, we model the search mode as "terminal" μ = [1.0, 0.0] # Initial mixing probabilities R1 = Diagonal([1e-1, 1e-1, dvσ, ϕσ].^2) R2 = dgσ^2*I(ny) # Measurement noise covariance matrix @@ -107,7 +107,7 @@ function get_opt_kf(p) R1i = Diagonal(SVector{4}(exp10.(p[1:4]))) R2i = SMatrix{2,2}(exp10(p[5])*R2) d0i = MvNormal(SVector{4, T}(T.(d0.μ)), SMatrix{4,4}(T.(d0.Σ))) - modegain = 5+exp10(p[6]) + modegain = 2+exp10(p[6]) Pi = SMatrix{2,2, Float64,4}(P) # sigmoid(x) = 1/(1+exp(-x)) # switch_prob = sigmoid(p[7]) @@ -120,7 +120,8 @@ end function cost(pars) try imm = get_opt_kf(pars) - ll = loglik(imm, u, y, interact=true) - 1/2*logdet(imm.models[1].R1) + T = length(y) + ll = loglik(imm, u[1:T], y[1:T], interact=true) - 1/2*logdet(imm.models[1].R1) return -ll catch e # rethrow() # If you only get Inf, you can uncomment this line to see the error message @@ -133,22 +134,29 @@ Random.seed!(0) res = Optim.optimize( cost, params, - ParticleSwarm(), + ParticleSwarm(), # Use a gradient-free optimizer. ForwardDiff works, but the algorithm is numerically difficult to compute gradients through and may suffer from overflows in the gradient computation Optim.Options( show_trace = true, show_every = 5, - iterations = 200, + iterations = 100, + time_limit = 30, ), ) imm = get_opt_kf(res.minimizer) -imm = get_opt_kf([-0.23848249020342335, 0.09510413594186848, -2.7540206342539832, -0.026720713351238334, -5.596193009305194, -25.37645648820617]) #make sure it goes well # hide +imm = get_opt_kf([-0.1981314138910982, -0.18626406669394405, -2.7342979645906547, 0.17994244691004058, -11.706419070755908, -54.16703441089562]) #make sure it goes well # hide sol = forward_trajectory(imm, u, y) plot(sol.extra', title="Mode (optimized filter)") ``` -If it went well, the filter should be in mode 1 (the `false` mode) from the start until just before 200 time steps, at which point it should switch to model 2 (`true`). This method of detecting the mode switch of the beetle appears to be somewhat less robust than the particle filter, but is significantly cheaper computationally. +If it went well, the filter should be in mode 1 (the `false` mode) from the start until around 200 time steps, at which point it should switch to model 2 (`true`). This method of detecting the mode switch of the beetle appears to be somewhat less robust than the particle filter, but is significantly cheaper computationally. The IMM filter does not stick in mode 2 perpetually after having reached it since it never actually becomes fully confident that mode 2 has been reached, but detecting the first switch is sufficient to know that the switch has occurred. +The log-likelihood of the solution +```@example beetle_imm +sol.ll +``` + +should be similar to that of the particle-filter in the tutorial [Smoothing the track of a moving beetle](@ref), which was around -1660. \ No newline at end of file diff --git a/src/imm.jl b/src/imm.jl index 73c8c35..870ffdb 100644 --- a/src/imm.jl +++ b/src/imm.jl @@ -91,11 +91,11 @@ function interact!(imm::IMM) @bangbang new_R[j] .= 0 .* new_R[j] end for i = eachindex(models) - μij = P[i,j] * μ[i] / cj[j] + μij = calc_μij(P[i,j], μ[i], cj[j]) @bangbang new_x[j] .+= μij .* models[i].x end for i = eachindex(models) - μij = P[i,j] * μ[i] / cj[j] + μij = calc_μij(P[i,j], μ[i], cj[j]) if !iszero(μij) d = models[i].x - new_x[j] @bangbang new_R[j] .+= symmetrize(μij .* (d * d' .+ models[i].R)) @@ -110,6 +110,10 @@ function interact!(imm::IMM) nothing end +function calc_μij(P, μ, cj) + P*μ/cj # This overflows for Dual numbers when cj has extremely small values +end + function predict!(imm::IMM, args...; kwargs...) for (model,μ) in zip(imm.models, imm.μ) # iszero(μ) && continue # Filter has died @@ -126,7 +130,7 @@ The correct step of the IMM filter corrects each model with the measurements `y` The returned tuple consists of the sum of the log-likelihood of all models, the vector of individual log-likelihoods and an array of the rest of the return values from the correct step of each model. """ -function correct!(imm::IMM, u, y, args...; kwargs...) +function correct!(imm::IMM, u, y, args...; expnormalize = true, kwargs...) (; μ, P, models) = imm lls = zeros(eltype(imm.x), length(models)) rest = [] @@ -141,10 +145,15 @@ function correct!(imm::IMM, u, y, args...; kwargs...) push!(rest, others) end μP = P'μ - new_μ = exp.(lls) .* μP - μ .= new_μ ./ sum(new_μ) - sum(lls), lls, rest + # Naive formulas + # new_μ = exp.(lls) .* μP + # μ .= new_μ ./ sum(new_μ) + + # Rewrite exp.(lls) .* μP as exp.(lls .+ log.(μP)), after which we can identify lls .+ log.(μP) as w and μ as we from the particle-filter implementation. This may improve the numerics + w = lls .+ log.(μP) + ll = logsumexp!(w, μ) + return ll, lls, rest end """ @@ -154,7 +163,7 @@ Combine the models of the IMM filter into a single state `imm.x` and covariance """ function combine!(imm::IMM) (; μ, x, R, models) = imm - @assert sum(μ) ≈ 1.0 + @assert sum(μ) ≈ 1.0 "sum(μ) = $(sum(μ))" @bangbang x .= 0 .* x @bangbang R .= 0 .* R