Make Gibbs work with step_warmup (#2502)

mhauru · web-flow · commit 1397d694ca8a · 2025-03-11T17:47:55.000Z
* Make Gibbs work with step_warmup

* Bump patch version to 0.36.3

* Fix a Gibbs bug
diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,6 @@
 name = "Turing"
 uuid = "fce5fe82-541a-59a6-adf8-730c64b5f9a0"
-version = "0.36.2"
+version = "0.36.3"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
diff --git a/src/mcmc/gibbs.jl b/src/mcmc/gibbs.jl
@@ -405,20 +405,75 @@ end
 
 varinfo(state::GibbsState) = state.vi
 
-function DynamicPPL.initialstep(
+"""
+Initialise a VarInfo for the Gibbs sampler.
+
+This is straight up copypasta from DynamicPPL's src/sampler.jl. It is repeated here to
+support calling both step and step_warmup as the initial step. DynamicPPL initialstep is
+incompatible with step_warmup.
+"""
+function initial_varinfo(rng, model, spl, initial_params)
+    vi = DynamicPPL.default_varinfo(rng, model, spl)
+
+    # Update the parameters if provided.
+    if initial_params !== nothing
+        vi = DynamicPPL.initialize_parameters!!(vi, initial_params, spl, model)
+
+        # Update joint log probability.
+        # This is a quick fix for https://github.com/TuringLang/Turing.jl/issues/1588
+        # and https://github.com/TuringLang/Turing.jl/issues/1563
+        # to avoid that existing variables are resampled
+        vi = last(DynamicPPL.evaluate!!(model, vi, DynamicPPL.DefaultContext()))
+    end
+    return vi
+end
+
+function AbstractMCMC.step(
     rng::Random.AbstractRNG,
     model::DynamicPPL.Model,
-    spl::DynamicPPL.Sampler{<:Gibbs},
-    vi::DynamicPPL.AbstractVarInfo;
+    spl::DynamicPPL.Sampler{<:Gibbs};
     initial_params=nothing,
     kwargs...,
 )
     alg = spl.alg
     varnames = alg.varnames
     samplers = alg.samplers
+    vi = initial_varinfo(rng, model, spl, initial_params)
 
     vi, states = gibbs_initialstep_recursive(
-        rng, model, varnames, samplers, vi; initial_params=initial_params, kwargs...
+        rng,
+        model,
+        AbstractMCMC.step,
+        varnames,
+        samplers,
+        vi;
+        initial_params=initial_params,
+        kwargs...,
+    )
+    return Transition(model, vi), GibbsState(vi, states)
+end
+
+function AbstractMCMC.step_warmup(
+    rng::Random.AbstractRNG,
+    model::DynamicPPL.Model,
+    spl::DynamicPPL.Sampler{<:Gibbs};
+    initial_params=nothing,
+    kwargs...,
+)
+    alg = spl.alg
+    varnames = alg.varnames
+    samplers = alg.samplers
+    vi = initial_varinfo(rng, model, spl, initial_params)
+
+    vi, states = gibbs_initialstep_recursive(
+        rng,
+        model,
+        AbstractMCMC.step_warmup,
+        varnames,
+        samplers,
+        vi;
+        initial_params=initial_params,
+        kwargs...,
     )
     return Transition(model, vi), GibbsState(vi, states)
 end
@@ -427,9 +482,20 @@ end
 Take the first step of MCMC for the first component sampler, and call the same function
 recursively on the remaining samplers, until no samplers remain. Return the global VarInfo
 and a tuple of initial states for all component samplers.
+
+The `step_function` argument should always be either AbstractMCMC.step or
+AbstractMCMC.step_warmup.
 """
 function gibbs_initialstep_recursive(
-    rng, model, varname_vecs, samplers, vi, states=(); initial_params=nothing, kwargs...
+    rng,
+    model,
+    step_function::Function,
+    varname_vecs,
+    samplers,
+    vi,
+    states=();
+    initial_params=nothing,
+    kwargs...,
 )
     # End recursion
     if isempty(varname_vecs) && isempty(samplers)
@@ -450,7 +516,7 @@ function gibbs_initialstep_recursive(
     conditioned_model, context = make_conditional(model, varnames, vi)
 
     # Take initial step with the current sampler.
-    _, new_state = AbstractMCMC.step(
+    _, new_state = step_function(
         rng,
         conditioned_model,
         sampler;
@@ -470,6 +536,7 @@ function gibbs_initialstep_recursive(
     return gibbs_initialstep_recursive(
         rng,
         model,
+        step_function,
         varname_vecs_tail,
         samplers_tail,
         vi,
@@ -493,7 +560,29 @@ function AbstractMCMC.step(
     states = state.states
     @assert length(samplers) == length(state.states)
 
-    vi, states = gibbs_step_recursive(rng, model, varnames, samplers, states, vi; kwargs...)
+    vi, states = gibbs_step_recursive(
+        rng, model, AbstractMCMC.step, varnames, samplers, states, vi; kwargs...
+    )
+    return Transition(model, vi), GibbsState(vi, states)
+end
+
+function AbstractMCMC.step_warmup(
+    rng::Random.AbstractRNG,
+    model::DynamicPPL.Model,
+    spl::DynamicPPL.Sampler{<:Gibbs},
+    state::GibbsState;
+    kwargs...,
+)
+    vi = varinfo(state)
+    alg = spl.alg
+    varnames = alg.varnames
+    samplers = alg.samplers
+    states = state.states
+    @assert length(samplers) == length(state.states)
+
+    vi, states = gibbs_step_recursive(
+        rng, model, AbstractMCMC.step_warmup, varnames, samplers, states, vi; kwargs...
+    )
     return Transition(model, vi), GibbsState(vi, states)
 end
 
@@ -620,10 +709,14 @@ end
 """
 Run a Gibbs step for the first varname/sampler/state tuple, and recursively call the same
 function on the tail, until there are no more samplers left.
+
+The `step_function` argument should always be either AbstractMCMC.step or
+AbstractMCMC.step_warmup.
 """
 function gibbs_step_recursive(
     rng::Random.AbstractRNG,
     model::DynamicPPL.Model,
+    step_function::Function,
     varname_vecs,
     samplers,
     states,
@@ -657,7 +750,7 @@ function gibbs_step_recursive(
     state = setparams_varinfo!!(conditioned_model, sampler, state, vi)
 
     # Take a step with the local sampler.
-    new_state = last(AbstractMCMC.step(rng, conditioned_model, sampler, state; kwargs...))
+    new_state = last(step_function(rng, conditioned_model, sampler, state; kwargs...))
 
     new_vi_local = varinfo(new_state)
     # Merge the latest values for all the variables in the current sampler.
@@ -668,6 +761,7 @@ function gibbs_step_recursive(
     return gibbs_step_recursive(
         rng,
         model,
+        step_function,
         varname_vecs_tail,
         samplers_tail,
         states_tail,
diff --git a/src/mcmc/repeat_sampler.jl b/src/mcmc/repeat_sampler.jl
@@ -60,3 +60,30 @@ function AbstractMCMC.step(
     end
     return transition, state
 end
+
+function AbstractMCMC.step_warmup(
+    rng::Random.AbstractRNG,
+    model::AbstractMCMC.AbstractModel,
+    sampler::RepeatSampler;
+    kwargs...,
+)
+    return AbstractMCMC.step_warmup(rng, model, sampler.sampler; kwargs...)
+end
+
+function AbstractMCMC.step_warmup(
+    rng::Random.AbstractRNG,
+    model::AbstractMCMC.AbstractModel,
+    sampler::RepeatSampler,
+    state;
+    kwargs...,
+)
+    transition, state = AbstractMCMC.step_warmup(
+        rng, model, sampler.sampler, state; kwargs...
+    )
+    for _ in 2:(sampler.num_repeat)
+        transition, state = AbstractMCMC.step_warmup(
+            rng, model, sampler.sampler, state; kwargs...
+        )
+    end
+    return transition, state
+end
diff --git a/test/mcmc/gibbs.jl b/test/mcmc/gibbs.jl
@@ -268,6 +268,102 @@ end
     @test chain1.value == chain2.value
 end
 
+@testset "Gibbs warmup" begin
+    # An inference algorithm, for testing purposes, that records how many warm-up steps
+    # and how many non-warm-up steps haven been taken.
+    mutable struct WarmupCounter <: Inference.InferenceAlgorithm
+        warmup_init_count::Int
+        non_warmup_init_count::Int
+        warmup_count::Int
+        non_warmup_count::Int
+
+        WarmupCounter() = new(0, 0, 0, 0)
+    end
+
+    Turing.Inference.drop_space(wuc::WarmupCounter) = wuc
+    Turing.Inference.getspace(::WarmupCounter) = ()
+    Turing.Inference.isgibbscomponent(::WarmupCounter) = true
+
+    # A trivial state that holds nothing but a VarInfo, to be used with WarmupCounter.
+    struct VarInfoState{T}
+        vi::T
+    end
+
+    Turing.Inference.varinfo(state::VarInfoState) = state.vi
+    function Turing.Inference.setparams_varinfo!!(
+        ::DynamicPPL.Model,
+        ::DynamicPPL.Sampler,
+        ::VarInfoState,
+        params::DynamicPPL.AbstractVarInfo,
+    )
+        return VarInfoState(params)
+    end
+
+    function AbstractMCMC.step(
+        ::Random.AbstractRNG,
+        model::DynamicPPL.Model,
+        spl::DynamicPPL.Sampler{<:WarmupCounter};
+        kwargs...,
+    )
+        spl.alg.non_warmup_init_count += 1
+        return Turing.Inference.Transition(nothing, 0.0),
+        VarInfoState(DynamicPPL.VarInfo(model))
+    end
+
+    function AbstractMCMC.step_warmup(
+        ::Random.AbstractRNG,
+        model::DynamicPPL.Model,
+        spl::DynamicPPL.Sampler{<:WarmupCounter};
+        kwargs...,
+    )
+        spl.alg.warmup_init_count += 1
+        return Turing.Inference.Transition(nothing, 0.0),
+        VarInfoState(DynamicPPL.VarInfo(model))
+    end
+
+    function AbstractMCMC.step(
+        ::Random.AbstractRNG,
+        ::DynamicPPL.Model,
+        spl::DynamicPPL.Sampler{<:WarmupCounter},
+        s::VarInfoState;
+        kwargs...,
+    )
+        spl.alg.non_warmup_count += 1
+        return Turing.Inference.Transition(nothing, 0.0), s
+    end
+
+    function AbstractMCMC.step_warmup(
+        ::Random.AbstractRNG,
+        ::DynamicPPL.Model,
+        spl::DynamicPPL.Sampler{<:WarmupCounter},
+        s::VarInfoState;
+        kwargs...,
+    )
+        spl.alg.warmup_count += 1
+        return Turing.Inference.Transition(nothing, 0.0), s
+    end
+
+    @model f() = x ~ Normal()
+    m = f()
+
+    num_samples = 10
+    num_warmup = 3
+    wuc = WarmupCounter()
+    sample(m, Gibbs(:x => wuc), num_samples; num_warmup=num_warmup)
+    @test wuc.warmup_init_count == 1
+    @test wuc.non_warmup_init_count == 0
+    @test wuc.warmup_count == num_warmup
+    @test wuc.non_warmup_count == num_samples - 1
+
+    num_reps = 2
+    wuc = WarmupCounter()
+    sample(m, Gibbs(:x => RepeatSampler(wuc, num_reps)), num_samples; num_warmup=num_warmup)
+    @test wuc.warmup_init_count == 1
+    @test wuc.non_warmup_init_count == 0
+    @test wuc.warmup_count == num_warmup * num_reps
+    @test wuc.non_warmup_count == (num_samples - 1) * num_reps
+end
+
 @testset "Testing gibbs.jl with $adbackend" for adbackend in ADUtils.adbackends
     @info "Starting Gibbs tests with $adbackend"
     @testset "Deprecated Gibbs constructors" begin