diff --git a/.gitignore b/.gitignore index bbea4b3..1424df2 100644 --- a/.gitignore +++ b/.gitignore @@ -38,4 +38,23 @@ exp_*/ *.png # Plot files -*plot.ipynb \ No newline at end of file +*plot.ipynb +*plot.py + +# Save experiment files +*save_experiment.py + +# nohup output +nohup*.out + +learning_cp/tsptw/old_exp/ +learning_cp/tsptw/TSPTW* +# all run files +run*.jl +*.sh +learning_cp/comparison/plots.jl + +# all .out files (nohup output) +*.out + +learning_cp/comparison/tensorboard_logs diff --git a/learning_cp/common/benchmark.jl b/learning_cp/common/benchmark.jl new file mode 100644 index 0000000..9880410 --- /dev/null +++ b/learning_cp/common/benchmark.jl @@ -0,0 +1,142 @@ +# This util evaluates a trained model on n new instances with ILDS(1), ILDS(2), ILDS with given budget and DFS +# Any heuristic using specific state representation should have "specific" in its file name +#include("../common/experiment.jl") +include("../comparison/comparison.jl") +include("utils.jl") + +#include("experiment.jl") +#include("../comparison/comparison.jl") +#include("utils.jl") +using CUDA + +# Parameters to edit +folder = "/home/martom/SeaPearl/SeaPearlZoo/learning_cp/comparison/2022-08-17/exp_MaxCut_100_10_4->10_4_10012-02-09/" + +chosen_features = Dict( +"node_number_of_neighbors" => true, +"constraint_type" => true, +"constraint_activity" => true, +"nb_not_bounded_variable" => true, +"variable_initial_domain_size" => true, +"variable_domain_size" => true, +"variable_is_objective" => true, +"variable_assigned_value" => true, +"variable_is_bound" => true, +"values_raw" => true) + +n = 20 +k = 4 +generator = SeaPearl.MaxCutGenerator(n,k) +n = 10 # Number of instances to evaluate on +budget = 10000 # Budget of visited nodes +has_objective = false # Set it to true if we have to branch on the objective variable +include_dfs = false # Set it to true if you want to evaluate with DFS in addition to ILDS + +# Define your basic heuristics here +threshold = 2*k +MISHeuristic(x; cpmodel=nothing) = length(x.onDomainChange) - 1 < threshold ? 1 : 0 +heuristic_mis = SeaPearl.BasicHeuristic(MISHeuristic) +selectMax(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.maximum(x.domain) +heuristic_max = SeaPearl.BasicHeuristic(selectMax) +basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic() + ) + +function benchmark(folder::String, n::Int, chosen_features, has_objective::Bool, generator, basicHeuristics, include_dfs, budget::Int; verbose=true, ILDS = nothing) + models=[] + models_names=[] + for file in readdir(folder) + if splitext(file)[2] == ".bson" + @load folder * "/" * file model + push!(models, model) + push!(models_names, replace(splitext(file)[1], "model_"=>"")) + end + end + + reward = SeaPearl.GeneralReward + + eval_strategies = SeaPearl.SearchStrategy[] + search_strategy_names = String[] + if typeof(ILDS) == SeaPearl.ILDSearch + for i in 0:ILDS.d + push!(eval_strategies, SeaPearl.ILDSearch(i)) + push!(search_strategy_names, "ILDS"*string(i)) + end + push!(eval_strategies, SeaPearl.ILDSearch(10)) + append!(search_strategy_names, ["ILDSbudget", "DFS"]) + else + eval_strategies = SeaPearl.SearchStrategy[SeaPearl.ILDSearch(0),SeaPearl.ILDSearch(1),SeaPearl.ILDSearch(2),SeaPearl.ILDSearch(10)] + search_strategy_names = ["ILDS0", "ILDS1", "ILDS2", "ILDSbudget", "DFS"] + if include_dfs + push!(eval_strategies,SeaPearl.DFSearch()) + end + end + + agents = [] + valueSelectionArray = SeaPearl.ValueSelection[] + + for (i,model) in enumerate(models) + agent = RL.Agent( + policy=RL.QBasedPolicy( + learner=RL.DQNLearner( + approximator=RL.NeuralNetworkApproximator( + model=model, + optimizer=ADAM() + ) |> cpu, + target_approximator=RL.NeuralNetworkApproximator( + model=model, + optimizer=ADAM() + ) |> cpu, + loss_func=Flux.Losses.huber_loss + ), + explorer=RL.EpsilonGreedyExplorer( + ϵ_stable=0.0 + ) + ), + trajectory=RL.CircularArraySLARTTrajectory( + capacity=1, + state=SeaPearl.DefaultTrajectoryState[] => (), + legal_actions_mask=Vector{Bool} => (1,), + ) + ) + push!(agents,agent) + if occursin("specific",models_names[i]) + if isa(generator, SeaPearl.MaximumIndependentSetGenerator) + state_representation = SeaPearl.MISStateRepresentation{SeaPearl.MISFeaturization,SeaPearl.DefaultTrajectoryState} + elseif isa(generator, SeaPearl.LegacyGraphColoringGenerator) || isa(generator, SeaPearl.HomogeneousGraphColoringGenerator) || isa(generator, SeaPearl.ClusterizedGraphColoringGenerator) || isa(generator, SeaPearl.BarabasiAlbertGraphGenerator) || isa(generator, SeaPearl.ErdosRenyiGraphGenerator) || isa(generator, SeaPearl.WattsStrogatzGraphGenerator) + state_representation = SeaPearl.GraphColoringStateRepresentation{SeaPearl.GraphColoringFeaturization, SeaPearl.DefaultTrajectoryState} + end + push!(valueSelectionArray, SeaPearl.SimpleLearnedHeuristic{state_representation,reward,SeaPearl.FixedOutput}(agent)) + else + state_representation = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization, SeaPearl.HeterogeneousTrajectoryState} + push!(valueSelectionArray, SeaPearl.SimpleLearnedHeuristic{state_representation,reward,SeaPearl.FixedOutput}(agent; chosen_features=chosen_features)) + end + end + append!(valueSelectionArray, collect(values(basicHeuristics))) + append!(models_names, collect(keys(basicHeuristics))) + variableHeuristic = SeaPearl.MinDomainVariableSelection{has_objective}() + evaluator = SeaPearl.SameInstancesEvaluator(valueSelectionArray, generator; nbInstances=n) + folder_names = split(folder, "/") + #println(pwd()) + #println(folder) + if !isdir("../benchmarks/"*folder_names[1]) + mkdir("../benchmarks/"*folder_names[1]) + end + dir = mkdir("../benchmarks/"*folder) + for (j, search_strategy) in enumerate(eval_strategies) + if search_strategy == SeaPearl.ILDSearch(10) + SeaPearl.setNodesBudget!(evaluator, budget) + end + SeaPearl.evaluate(evaluator, variableHeuristic, search_strategy; verbose = verbose) + eval_metrics = evaluator.metrics + for i in 1:size(eval_metrics)[2] + SeaPearlExtras.storedata(eval_metrics[:, i]; filename= dir *"/"* search_strategy_names[j] * "_" * models_names[i]) + end + # empty evaluator metrics + if search_strategy == SeaPearl.ILDSearch(10) + SeaPearl.resetNodesBudget!(evaluator) + end + empty!(evaluator) + end +end +#benchmark(folder, n, chosen_features, has_objective, generator, basicHeuristics, include_dfs, budget) diff --git a/learning_cp/common/experiment.jl b/learning_cp/common/experiment.jl new file mode 100644 index 0000000..adfadee --- /dev/null +++ b/learning_cp/common/experiment.jl @@ -0,0 +1,147 @@ +using SeaPearl +using SeaPearlExtras +using ReinforcementLearning +const RL = ReinforcementLearning +using Flux +using GeometricFlux +using JSON +using BSON: @save, @load +using Dates +using Random +using LightGraphs +using OrderedCollections +using CircularArrayBuffers +using PyCall +using Pkg +using TensorBoardLogger, Logging + +""" +ENV["PYTHON"] = "/home/x86_64-unknown-linux_ol7-gnu/anaconda-2022.05/bin/python" +using Pkg +Pkg.build("PyCall") +run(`$(PyCall.python) -m pip install matplotlib`) +run(`$(PyCall.python) -m pip install pandas`) +run(`$(PyCall.python) -m pip install seaborn`) +run(`$(PyCall.python) -m pip install ipython`) +""" + +# ------------------- +# ------------------- +# Core function +# ------------------- +# ------------------- + +function trytrain(; nbEpisodes::Int, evalFreq::Int, nbInstances::Int, restartPerInstances::Int=1, generator::SeaPearl.AbstractModelGenerator, variableHeuristic::SeaPearl.AbstractVariableSelection=SeaPearl.MinDomainVariableSelection{false}(), learnedHeuristics::OrderedDict{String,<:SeaPearl.LearnedHeuristic}, basicHeuristics::OrderedDict{String,SeaPearl.BasicHeuristic}, base_name="experiment"::String, exp_name=""::String, out_solver=true::Bool, verbose=false::Bool, nbRandomHeuristics=0::Int, eval_timeout=nothing::Union{Nothing, Int}, training_timeout=nothing::Union{Nothing, Int}, eval_every =nothing::Union{Nothing, Int}, eval_strategy=SeaPearl.DFSearch(), strategy = SeaPearl.DFSearch(), seedTraining = nothing::Union{Nothing, Int}, seedEval = nothing, eval_generator=nothing, logger = nothing, device = cpu) + + + experienceTime = Base.replace("$(round(now(), Dates.Second(3)))", ":" => "-") + date = split(experienceTime, "T")[1] + time = split(experienceTime, "T")[2] + logger =TBLogger("tensorboard_logs/"*exp_name*date*"_"*time, min_level=Logging.Info) + + if !isdir(date) + mkdir(date) + end + dir = mkdir(string(date, "/exp_", exp_name, time)) + lh = last(collect(values(learnedHeuristics))) + code_dir = mkdir(dir*"/code/") + for file in readdir(".") + if isfile(file) + cp(file, code_dir*file) + end + end + + randomHeuristics = Array{SeaPearl.BasicHeuristic}(undef, 0) + for i in 1:nbRandomHeuristics + push!(randomHeuristics, SeaPearl.RandomHeuristic()) + end + + valueSelectionArray = cat(collect(values(learnedHeuristics)), collect(values(basicHeuristics)), randomHeuristics, dims=1) + + if !isnothing(eval_generator) + evaluator = SeaPearl.SameInstancesEvaluator(valueSelectionArray, eval_generator; evalFreq=evalFreq, nbInstances=nbInstances, evalTimeOut = eval_timeout, rng = MersenneTwister(seedEval) ) + else + evaluator = SeaPearl.SameInstancesEvaluator(valueSelectionArray, generator; evalFreq=evalFreq, nbInstances=nbInstances, evalTimeOut = eval_timeout, rng = MersenneTwister(seedEval)) + end + metricsArray, eval_metricsArray = SeaPearl.train!( + valueSelectionArray=valueSelectionArray, + generator=generator, + nbEpisodes=nbEpisodes, + strategy=strategy, + eval_strategy = eval_strategy, + variableHeuristic=variableHeuristic, + out_solver=out_solver, + verbose=verbose, + evaluator = evaluator, + training_timeout = training_timeout, + restartPerInstances=restartPerInstances, + rngTraining = MersenneTwister(seedTraining), + eval_every = eval_every, + logger = logger, + device = device, + ) + + #saving model weights + for (key, lh) in learnedHeuristics + if (hasfield(typeof(lh.agent.policy),:approximator)) #PPO + model = Flux.cpu(lh.agent.policy.approximator) + else #DQN + model = Flux.cpu(lh.agent.policy.learner.approximator) + end + @save dir * "/model_" * key * ".bson" model + end + + counter = 0 + for key in keys(learnedHeuristics) + counter += 1 + SeaPearlExtras.storedata(metricsArray[counter]; filename=dir * "/" * base_name * "_training_" * key) + end + + counter = 0 + for key in keys(learnedHeuristics) + counter += 1 + SeaPearlExtras.storedata(eval_metricsArray[:, counter]; filename=dir * "/" * base_name * "_" * key) + end + for key in keys(basicHeuristics) + counter += 1 + SeaPearlExtras.storedata(eval_metricsArray[:, counter]; filename=dir * "/" * base_name * "_" * key) + end + for i = 1:nbRandomHeuristics + SeaPearlExtras.storedata(eval_metricsArray[:, counter+i]; filename=dir * "/" * base_name * "_random$(i)") + end + + py""" + import sys + sys.path.insert(0, "/home/martom/SeaPearl/SeaPearlExtras.jl/src/metrics/basicmetrics/") + from benchmarkPy import * + import plots + import numpy as np + def benchmark(path): + print_all(path +"/") + + def plot(path,eval): + plots.all(path +"/", window=100, estimator=np.mean, ilds= eval) + """ + + py"plot"(dir, eval_strategy != SeaPearl.DFSearch()) + + chosen_features = valueSelectionArray[1].chosen_features + feature_size = [6, 5, 2] + + n = 10 # Number of instances to evaluate on + budget = 1000 # Budget of visited nodes + has_objective = false # Set it to true if we have to branch on the objective variable + include_dfs = (eval_strategy == SeaPearl.DFSearch()) # Set it to true if you want to evaluate with DFS in addition to ILDS + + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic() + ) + include("../common/benchmark.jl") + Base.invokelatest(benchmark, dir, n, chosen_features, has_objective, generator, basicHeuristics, include_dfs, budget; ILDS = eval_strategy) + + py"benchmark"(dir) + + return metricsArray, eval_metricsArray +end \ No newline at end of file diff --git a/learning_cp/common/performanceprofile.jl b/learning_cp/common/performanceprofile.jl new file mode 100644 index 0000000..d0991f9 --- /dev/null +++ b/learning_cp/common/performanceprofile.jl @@ -0,0 +1,89 @@ +# This util evaluates a trained model on n new instances +include("experiment.jl") +include("../comparison/comparison.jl") +include("utils.jl") + +# Parameters to edit +folder = "../comparison/exp_MIS_70_8_heterogeneous_ffcpnn_50012022-06-17T17-27-00/" +chosen_features = Dict( + "node_number_of_neighbors" => true, + "constraint_type" => true, + "constraint_activity" => true, + "nb_not_bounded_variable" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "variable_is_objective" => true, + "variable_assigned_value" => true, + "variable_is_bound" => true, + "values_raw" => true) + +generator = SeaPearl.MaximumIndependentSetGenerator(70, 8) +n = 100 +has_objective = true +eval_strategy = SeaPearl.ILDSearch(2) + +selectMax(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.maximum(x.domain) +heuristic_max = SeaPearl.BasicHeuristic(selectMax) +basicHeuristics = OrderedDict( + "max" => heuristic_max, + "random" => SeaPearl.RandomHeuristic() +) + + +function performanceProfile(folder::String, n::Int, chosen_features, has_objective::Bool, generator, basicHeuristics; eval_strategy = SeaPearl.DFSearch(), verbose=true) + models=[] + models_names=[] + for file in readdir(folder) + if splitext(file)[2] == ".bson" + @load folder * "/" * file model + push!(models, model) + push!(models_names, splitext(file)[1]) + end + end + + reward = SeaPearl.GeneralReward + + agents = [] + for model in models + push!(agents,RL.Agent( + policy=RL.QBasedPolicy( + learner=RL.DQNLearner( + approximator=RL.NeuralNetworkApproximator( + model=model, + optimizer=ADAM() + ), + target_approximator=RL.NeuralNetworkApproximator( + model=model, + optimizer=ADAM() + ), + loss_func=Flux.Losses.huber_loss + ), + explorer=RL.EpsilonGreedyExplorer( + ϵ_stable=0.0 + ) + ), + trajectory=RL.CircularArraySLARTTrajectory( + capacity=1, + state=SeaPearl.DefaultTrajectoryState[] => (), + legal_actions_mask=Vector{Bool} => (1,), + ) + )) + end + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + valueSelectionArray = SeaPearl.ValueSelection[SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent; chosen_features=chosen_features) for agent in agents] + append!(valueSelectionArray, collect(values(basicHeuristics))) + append!(models_names, collect(keys(basicHeuristics))) + variableHeuristic = SeaPearl.MinDomainVariableSelection{has_objective}() + evaluator = SeaPearl.SameInstancesEvaluator(valueSelectionArray, generator; nbInstances=n, evalTimeOut=20) + SeaPearl.evaluate(evaluator, variableHeuristic, eval_strategy; verbose = verbose) + eval_metrics = evaluator.metrics + folder_names = split(folder, "/") + dir = mkdir("../performanceprofiles/"*folder_names[length(folder_names)-1]) + for i in 1:size(eval_metrics)[2] + SeaPearlExtras.storedata(eval_metrics[:, i]; filename= dir *"/"* models_names[i]) + end +end + +performanceProfile(folder, n, chosen_features, has_objective, generator, basicHeuristics; eval_strategy=eval_strategy) + +nothing \ No newline at end of file diff --git a/learning_cp/common/utils.jl b/learning_cp/common/utils.jl new file mode 100644 index 0000000..0f7a933 --- /dev/null +++ b/learning_cp/common/utils.jl @@ -0,0 +1,766 @@ +using SeaPearl + +############################################################################### +######### Utils +############################################################################### + +get_default_graph_conv_layer(in, out, pool; init = Flux.glorot_uniform) = SeaPearl.GraphConv(in => out, Flux.leakyrelu; pool = pool, init = init) + +function get_default_graph_chain(in, mid, out, n_layers,pool; init = Flux.glorot_uniform ) + @assert n_layers >= 1 + layers = [] + if n_layers == 1 + push!(layers, get_default_graph_conv_layer(in, out, pool; init = init)) + elseif n_layers == 2 + push!(layers, get_default_graph_conv_layer(in, mid,pool; init = init)) + push!(layers, get_default_graph_conv_layer(mid, out, pool; init = init)) + else + push!(layers, get_default_graph_conv_layer(in, mid, pool; init = init)) + for i in 2:(n_layers-1) + push!(layers, get_default_graph_conv_layer(mid, mid, pool; init = init)) + end + push!(layers, get_default_graph_conv_layer(mid, out, pool; init = init)) + end + return Flux.Chain(layers...) +end + + +function get_dense_chain(in, mid, out, n_layers, σ=Flux.identity; init = Flux.glorot_uniform ) + @assert n_layers >= 1 + layers = [] + if n_layers == 1 + push!(layers, Flux.Dense(in, out, init= init)) + elseif n_layers == 2 + push!(layers, Flux.Dense(in, mid, σ, init= init)) + push!(layers, Flux.Dense(mid, out, init= init)) + else + push!(layers, Flux.Dense(in, mid, σ, init= init)) + for i in 2:(n_layers-1) + push!(layers, Flux.Dense(mid, mid, σ, init= init)) + end + push!(layers, Flux.Dense(mid, out, init= init)) + end + return Flux.Chain(layers...) +end + +function get_default_cpnn(;feature_size, conv_size, dense_size, output_size, n_layers_graph, n_layers_node, n_layers_output, pool=SeaPearl.sumPooling(), σ=Flux.leakyrelu, init = Flux.glorot_uniform) + return SeaPearl.CPNN( + graphChain=get_default_graph_chain(feature_size, conv_size, conv_size, n_layers_graph, pool; init = init), + nodeChain=get_dense_chain(conv_size, dense_size, dense_size, n_layers_node, σ; init=init), + outputChain=get_dense_chain(dense_size, dense_size, output_size, n_layers_output, σ; init=init) + ) +end + +function get_default_ffcpnn(;feature_size, conv_size, dense_size, output_size, n_layers_graph, n_layers_node, n_layers_output, pool=SeaPearl.meanPooling(), σ=Flux.leakyrelu, init = Flux.glorot_uniform) + return SeaPearl.FullFeaturedCPNN( + graphChain=get_default_graph_chain(feature_size, conv_size, conv_size, n_layers_graph; pool = pool, init = init), + nodeChain=get_dense_chain(conv_size, dense_size, dense_size, n_layers_node, σ; init=init), + outputChain=get_dense_chain(2*dense_size, dense_size, output_size, n_layers_output, σ; init=init) + ) +end + +function get_default_learner(batch_size, update_horizon, min_replay_history, update_freq, target_update_freq, get_default_nn) + return RL.DQNLearner( + approximator=RL.NeuralNetworkApproximator( + model=get_default_nn(), + optimizer=ADAM() + ), + target_approximator=RL.NeuralNetworkApproximator( + model=get_default_nn(), + optimizer=ADAM() + ), + loss_func=Flux.Losses.huber_loss, + batch_size=batch_size, + update_horizon=update_horizon, + min_replay_history=min_replay_history, + update_freq=update_freq, + target_update_freq=target_update_freq, + ) +end + +function get_epsilon_greedy_explorer(decay_steps, ϵ_stable; rng=nothing) + if isnothing(rng) + return RL.EpsilonGreedyExplorer( + ϵ_stable=ϵ_stable, + kind=:exp, + decay_steps=decay_steps, + step=1 + ) + else + return RL.EpsilonGreedyExplorer( + ϵ_stable=ϵ_stable, + kind=:exp, + decay_steps=decay_steps, + step=1, + rng = rng + ) + end +end + +function get_ucb_explorer(c, n_actions) + return RL.UCBExplorer(n_actions; c=c) +end + +function get_softmax_explorer(T_stable, T_init, decay_steps) + return SeaPearl.SoftmaxTDecayExplorer(;T_init=T_init, T_stable=T_stable, decay_steps=decay_steps) +end + +function get_default_slart_trajectory(; capacity, n_actions) + return RL.CircularArraySLARTTrajectory( + capacity=capacity, + state=SeaPearl.DefaultTrajectoryState[] => (), + legal_actions_mask=Vector{Bool} => (n_actions,), + ) +end + +function get_default_sart_trajectory(; capacity) + return RL.CircularArraySARTTrajectory( + capacity=capacity, + state=SeaPearl.DefaultTrajectoryState[] => (), + ) +end + +function get_default_agent(;get_explorer, batch_size, update_horizon, min_replay_history, update_freq, target_update_freq, get_default_nn, get_default_trajectory) + return RL.Agent( + policy=RL.QBasedPolicy( + learner=get_default_learner(batch_size, update_horizon, min_replay_history, update_freq, target_update_freq, get_default_nn), + explorer=get_explorer(), + ), + trajectory=get_default_trajectory() + ) +end + + +struct HeterogeneousModel{A,B} + Inputlayer::A + Middlelayers::Vector{B} + + function HeterogeneousModel(Inputlayer, Middlelayers) + return new{typeof(Inputlayer), eltype(Middlelayers)}(Inputlayer,Middlelayers) + end + + function HeterogeneousModel(original_features_size::Vector{Int}, mid::Int, out::Int, n_layers::Int; pool=SeaPearl.meanPooling(), init = Flux.glorot_uniform) + + Middlelayers=SeaPearl.HeterogeneousGraphConv[] + + if n_layers == 1 + Inputlayer = get_heterogeneous_graph_conv_init_layer(original_features_size, out, init = init) + else + Inputlayer = get_heterogeneous_graph_conv_init_layer(original_features_size, mid, init = init) + for i in 1:n_layers - 2 + push!(Middlelayers, get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init)) + end + push!(Middlelayers,get_heterogeneous_graph_conv_layer(mid, out, original_features_size, pool, init = init)) + end + return new{typeof(Inputlayer), eltype(Middlelayers)}(Inputlayer, Middlelayers) + + end +end + +Flux.@functor HeterogeneousModel +""" +function Flux.functor(::Type{<:HeterogeneousModel}, m) + return (m.Inputlayer, m.Middlelayers), ls -> HeterogeneousModel(ls[1], ls[2]) +end +""" +function (m::HeterogeneousModel)(fg) + original_fg = deepcopy(fg) + out = m.Inputlayer(fg) + for layer in m.Middlelayers + out = layer(out, original_fg) + end + return out +end + + + +struct HeterogeneousModel3 + layer1::SeaPearl.HeterogeneousGraphConvInit + layer2::SeaPearl.HeterogeneousGraphConv + layer3::SeaPearl.HeterogeneousGraphConv +end + +function (m::HeterogeneousModel3)(fg) + original_fg = fg + out1 = m.layer1(fg) + out2 = m.layer2(out1, original_fg) + out3 = m.layer3(out2, original_fg) + return out3 +end + +Flux.@functor HeterogeneousModel3 + +struct HeterogeneousModel4 + layer1::SeaPearl.HeterogeneousGraphConvInit + layer2::SeaPearl.HeterogeneousGraphConv + layer3::SeaPearl.HeterogeneousGraphConv + layer4::SeaPearl.HeterogeneousGraphConv +end + +function (m::HeterogeneousModel4)(fg) + original_fg = fg + out1 = m.layer1(fg) + out2 = m.layer2(out1, original_fg) + out3 = m.layer3(out2, original_fg) + out4 = m.layer4(out3, original_fg) + return out4 +end + +Flux.@functor HeterogeneousModel4 + +#= +struct HGTModel1 + layer1::SeaPearl.HeterogeneousGraphConvInit +end + +function (m::HGTModel1)(fg) + out1 = m.layer1(fg) + return out1 +end + +Flux.@functor HGTModel1 + +struct HGTModel2 + layer1::SeaPearl.HeterogeneousGraphConvInit + layer2::SeaPearl.HeterogeneousGraphTransformer +end + +function (m::HGTModel2)(fg) + original_fg = fg + out1 = m.layer1(fg) + out2 = m.layer2(out1) + return out2 +end + +Flux.@functor HGTModel2 + +struct HGTModel3 + layer1::SeaPearl.HeterogeneousGraphConvInit + layer2::SeaPearl.HeterogeneousGraphTransformer + layer3::SeaPearl.HeterogeneousGraphTransformer +end + +function (m::HGTModel3)(fg) + original_fg = fg + out1 = m.layer1(fg) + out2 = m.layer2(out1) + out3 = m.layer3(out2) + return out3 +end + +Flux.@functor HGTModel3 + +struct HGTModel4 + layer1::SeaPearl.HeterogeneousGraphConvInit + layer2::SeaPearl.HeterogeneousGraphTransformer + layer3::SeaPearl.HeterogeneousGraphTransformer + layer4::SeaPearl.HeterogeneousGraphTransformer +end + +function (m::HGTModel4)(fg) + original_fg = fg + out1 = m.layer1(fg) + out2 = m.layer2(out1) + out3 = m.layer3(out2) + out4 = m.layer4(out3) + return out4 +end + +Flux.@functor HGTModel4 + +struct HGTModel5 + layer1::SeaPearl.HeterogeneousGraphConvInit + layer2::SeaPearl.HeterogeneousGraphTransformer + layer3::SeaPearl.HeterogeneousGraphTransformer + layer4::SeaPearl.HeterogeneousGraphTransformer + layer5::SeaPearl.HeterogeneousGraphTransformer +end + +function (m::HGTModel5)(fg) + original_fg = fg + out1 = m.layer1(fg) + out2 = m.layer2(out1) + out3 = m.layer3(out2) + out4 = m.layer4(out3) + out5 = m.layer5(out4) + return out5 +end + +Flux.@functor HGTModel5 + +struct HGTModel6 + layer1::SeaPearl.HeterogeneousGraphConvInit + layer2::SeaPearl.HeterogeneousGraphTransformer + layer3::SeaPearl.HeterogeneousGraphTransformer + layer4::SeaPearl.HeterogeneousGraphTransformer + layer5::SeaPearl.HeterogeneousGraphTransformer + layer6::SeaPearl.HeterogeneousGraphTransformer +end + +function (m::HGTModel6)(fg) + original_fg = fg + out1 = m.layer1(fg) + out2 = m.layer2(out1) + out3 = m.layer3(out2) + out4 = m.layer4(out3) + out5 = m.layer5(out4) + out6 = m.layer6(out5) + return out6 +end + +Flux.@functor HGTModel6 +=# +get_heterogeneous_graph_conv_layer(in, out, original_features_size, pool; init = Flux.glorot_uniform) = SeaPearl.HeterogeneousGraphConv(in => out, original_features_size, Flux.leakyrelu; pool = pool, init = init) + +get_heterogeneous_graph_conv_init_layer(original_features_size, out; init = Flux.glorot_uniform) = SeaPearl.HeterogeneousGraphConvInit(original_features_size, out, Flux.leakyrelu, init = init) + +#=function get_heterogeneous_graph_chain(original_features_size, mid, out, n_layers; pool=SeaPearl.meanPooling(), init = Flux.glorot_uniform) + @assert n_layers >= 1 and n_layers <= 6 + if n_layers == 1 + return HeterogeneousModel1( + get_heterogeneous_graph_conv_init_layer(original_features_size, mid, init = init) + ) + elseif n_layers == 2 + return HeterogeneousModel2( + get_heterogeneous_graph_conv_init_layer(original_features_size, mid, init = init), + get_heterogeneous_graph_conv_layer(mid, out, original_features_size, pool, init = init) + ) + elseif n_layers == 3 + return HeterogeneousModel3( + get_heterogeneous_graph_conv_init_layer(original_features_size, mid, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, out, original_features_size, pool, init = init), + ) + elseif n_layers == 4 + return HeterogeneousModel4( + get_heterogeneous_graph_conv_init_layer(original_features_size, mid, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, out, original_features_size, pool, init = init), + ) + elseif n_layers == 5 + return HeterogeneousModel5( + get_heterogeneous_graph_conv_init_layer(original_features_size, mid, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, out, original_features_size, pool, init = init) + ) + elseif n_layers == 6 + return HeterogeneousModel6( + get_heterogeneous_graph_conv_init_layer(original_features_size, mid, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, out, original_features_size, pool, init = init), + ) + elseif n_layers == 24 + return HeterogeneousModel24( + get_heterogeneous_graph_conv_init_layer(original_features_size, mid, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, mid, original_features_size, pool, init = init), + get_heterogeneous_graph_conv_layer(mid, out, original_features_size, pool, init = init), + ) + end +end +=# + +function get_heterogeneous_graph_chain(original_features_size, mid, out, n_layers; pool=SeaPearl.meanPooling(), init = Flux.glorot_uniform, device = cpu) + @assert n_layers >= 1 + return HeterogeneousModel(original_features_size, mid, out, n_layers; pool= pool, init = init) +end + +get_hgt_layer(dim, heads) = SeaPearl.HeterogeneousGraphTransformer(dim, heads) + +function get_hgt(original_features_size, out, n_layers; heads=4, init = Flux.glorot_uniform) + @assert n_layers >= 1 and n_layers <= 6 + if n_layers == 1 + return HGTModel1( + get_heterogeneous_graph_conv_init_layer(original_features_size, out, init = init) + ) + elseif n_layers == 2 + return HGTModel2( + get_heterogeneous_graph_conv_init_layer(original_features_size, out, init = init), + get_hgt_layer(out, heads) + ) + elseif n_layers == 3 + return HGTModel3( + get_heterogeneous_graph_conv_init_layer(original_features_size, out, init = init), + get_hgt_layer(out, heads), + get_hgt_layer(out, heads), + ) + elseif n_layers == 4 + return HGTModel4( + get_heterogeneous_graph_conv_init_layer(original_features_size, out, init = init), + get_hgt_layer(out, heads), + get_hgt_layer(out, heads), + get_hgt_layer(out, heads), + ) + elseif n_layers == 5 + return HGTModel5( + get_heterogeneous_graph_conv_init_layer(original_features_size, out, init = init), + get_hgt_layer(out, heads), + get_hgt_layer(out, heads), + get_hgt_layer(out, heads), + get_hgt_layer(out, heads) + ) + elseif n_layers == 6 + return HGTModel6( + get_heterogeneous_graph_conv_init_layer(original_features_size, out, init = init), + get_hgt_layer(out, heads), + get_hgt_layer(out, heads), + get_hgt_layer(out, heads), + get_hgt_layer(out, heads), + get_hgt_layer(out, heads), + ) + end +end +# struct Heterog, pooleneousModelHGT1 +# layer1::SeaPea, poolrl.HeterogeneousGraphConvInit +# end + +# function (m::HeterogeneousModelHGT1)(fg) +# out1 = m.layer1(fg) +# return ou, poolt1 +# e, poolnd + +# Flux.@functor HeterogeneousModelHGT1 + +# struct HeterogeneousModelHGT2 +# layer1::SeaPearl.HeterogeneousGraphConvInit +# layer2::SeaPearl.HeterogeneousGraphTransformer +# end + +# function (m::HeterogeneousModelHGT2)(fg) +# original_fg = fg +# out1 = m.layer1(fg) +# out2 = m.layer2(out1) +# return out2 +# end + +# Flux.@functor HeterogeneousModelHGT2 + +# struct HeterogeneousModelHGT3 +# layer1::SeaPearl.HeterogeneousGraphConvInit +# layer2::SeaPearl.HeterogeneousGraphTransformer +# layer3::SeaPearl.HeterogeneousGraphTransformer +# end + +# function (m::HeterogeneousModelHGT3)(fg) +# original_fg = fg +# out1 = m.layer1(fg) +# out2 = m.layer2(out1) +# out3 = m.layer3(out2) +# return out3 +# end + +# Flux.@functor HeterogeneousModelHGT3 + +# struct HeterogeneousModelHGT4 +# layer1::SeaPearl.HeterogeneousGraphConvInit +# layer2::SeaPearl.HeterogeneousGraphTransformer +# layer3::SeaPearl.HeterogeneousGraphTransformer +# layer4::SeaPearl.HeterogeneousGraphTransformer +# end + +# function (m::HeterogeneousModelHGT4)(fg) +# original_fg = fg +# out1 = m.layer1(fg) +# out2 = m.layer2(out1) +# out3 = m.layer3(out2) +# out4 = m.layer4(out3) +# return out4 +# end + +# Flux.@functor HeterogeneousModelHGT4 + +# struct HeterogeneousModelHGT5 +# layer1::SeaPearl.HeterogeneousGraphConvInit +# layer2::SeaPearl.HeterogeneousGraphTransformer +# layer3::SeaPearl.HeterogeneousGraphTransformer +# layer4::SeaPearl.HeterogeneousGraphTransformer +# layer5::SeaPearl.HeterogeneousGraphTransformer +# end + +# function (m::HeterogeneousModelHGT5)(fg) +# original_fg = fg +# out1 = m.layer1(fg) +# out2 = m.layer2(out1) +# out3 = m.layer3(out2) +# out4 = m.layer4(out3) +# out5 = m.layer5(out4) +# return out5 +# end + +# Flux.@functor HeterogeneousModelHGT5 + +# struct HeterogeneousModelHGT6 +# layer1::SeaPearl.HeterogeneousGraphConvInit +# layer2::SeaPearl.HeterogeneousGraphTransformer +# layer3::SeaPearl.HeterogeneousGraphTransformer +# layer4::SeaPearl.HeterogeneousGraphTransformer +# layer5::SeaPearl.HeterogeneousGraphTransformer +# layer6::SeaPearl.HeterogeneousGraphTransformer +# end + +# function (m::HeterogeneousModelHGT6)(fg) +# original_fg = fg +# out1 = m.layer1(fg) +# out2 = m.layer2(out1) +# out3 = m.layer3(out2) +# out4 = m.layer4(out3) +# out5 = m.layer5(out4) +# out6 = m.layer6(out5) +# return out6 +# end + +# Flux.@functor HeterogeneousModelHGT6 + +# get_heterogeneous_graph_conv_layer_hgt(in, heads) = SeaPearl.HeterogeneousGraphTransformer(in, heads) + +# function get_heterogeneous_graph_chain_hgt(original_features_size, out, n_layers, heads) +# @assert n_layers >= 1 and n_layers <= 6 +# if n_layers == 1 +# return HeterogeneousModelHGT1( +# get_heterogeneous_graph_conv_init_layer(original_features_size, out) +# ) +# elseif n_layers == 2 +# return HeterogeneousModelHGT2( +# get_heterogeneous_graph_conv_init_layer(original_features_size, out), +# get_heterogeneous_graph_conv_layer_hgt(out, heads) +# ) +# elseif n_layers == 3 +# return HeterogeneousModelHGT3( +# get_heterogeneous_graph_conv_init_layer(original_features_size, out), +# get_heterogeneous_graph_conv_layer_hgt(out, heads), +# get_heterogeneous_graph_conv_layer_hgt(out, heads), +# ) +# elseif n_layers == 4 +# return HeterogeneousModelHGT4( +# get_heterogeneous_graph_conv_init_layer(original_features_size, out), +# get_heterogeneous_graph_conv_layer_hgt(out, heads), +# get_heterogeneous_graph_conv_layer_hgt(out, heads), +# get_heterogeneous_graph_conv_layer_hgt(out, heads), +# ) +# elseif n_layers == 5 +# return HeterogeneousModelHGT5( +# get_heterogeneous_graph_conv_init_layer(original_features_size, out), +# get_heterogeneous_graph_conv_layer_hgt(out, heads), +# get_heterogeneous_graph_conv_layer_hgt(out, heads), +# get_heterogeneous_graph_conv_layer_hgt(out, heads), +# get_heterogeneous_graph_conv_layer_hgt(out, heads) +# ) +# elseif n_layers == 6 +# return HeterogeneousModelHGT6( +# get_heterogeneous_graph_conv_init_layer(original_features_size, out), +# get_heterogeneous_graph_conv_layer_hgt(out, heads), +# get_heterogeneous_graph_conv_layer_hgt(out, heads), +# get_heterogeneous_graph_conv_layer_hgt(out, heads), +# get_heterogeneous_graph_conv_layer_hgt(out, heads), +# get_heterogeneous_graph_conv_layer_hgt(out, heads) +# ) +# end +# end + +function get_heterogeneous_cpnn(;feature_size, conv_size=8, dense_size=16, output_size, n_layers_graph=3, n_layers_node=2, n_layers_output=2, pool=SeaPearl.meanPooling(), σ=Flux.leakyrelu, init = Flux.glorot_uniform) + return SeaPearl.HeterogeneousCPNN( + graphChain=get_heterogeneous_graph_chain(feature_size, conv_size, conv_size, n_layers_graph; pool=pool, init = init), + nodeChain=get_dense_chain(conv_size, dense_size, dense_size, n_layers_node, σ, init = init), + outputChain=get_dense_chain(dense_size, dense_size, output_size, n_layers_output, σ, init = init) + ) +end + +function get_heterogeneous_fullfeaturedcpnn(;feature_size, conv_type="gc", conv_size=8, dense_size=16, output_size=1, n_layers_graph=3, n_layers_node=2, n_layers_output=2, pool=SeaPearl.meanPooling(), σ=Flux.leakyrelu, heads=4, init = Flux.glorot_uniform, device = cpu) + if conv_type == "gc" + return SeaPearl.HeterogeneousFullFeaturedCPNN( + get_heterogeneous_graph_chain(feature_size, conv_size, conv_size, n_layers_graph; pool=pool, init = init), + get_dense_chain(conv_size, dense_size, dense_size, n_layers_node, σ, init = init), + Flux.Chain(), + get_dense_chain(2*dense_size, dense_size, output_size, n_layers_output, σ, init = init) + )|> device + elseif conv_type == "hgt" + return SeaPearl.HeterogeneousFullFeaturedCPNN( + get_hgt(feature_size, conv_size, n_layers_graph; heads=heads, init = init), + get_dense_chain(conv_size, dense_size, dense_size, n_layers_node, σ, init = init), + Flux.Chain(), + get_dense_chain(2*dense_size, dense_size, output_size, n_layers_output, σ, init = init) + ) + else + error("conv_type unknown!") + end +end + +function get_pretrained_heterogeneous_fullfeaturedcpnn(file_path) + @load file_path model + return deepcopy(model.model) +end + +function get_heterogeneous_ffcpnnv2(;feature_size, conv_size=8, dense_size=16, output_size, n_layers_graph=3, n_layers_output=2, pool=SeaPearl.meanPooling()) + return SeaPearl.HeterogeneousFFCPNNv2( + get_heterogeneous_graph_chain(feature_size, conv_size, dense_size, n_layers_graph; pool=pool), + Flux.Chain(), + get_dense_chain(5*dense_size, dense_size, output_size, n_layers_output) #TODO: fix the 'in' argument (hardcoded) + ) +end + +function get_heterogeneous_ffcpnnv3(;feature_size, conv_size=8, dense_size=16, output_size, n_layers_graph=3, n_layers_output=2, pool=SeaPearl.meanPooling(), σ=Flux.leakyrelu, pooling="mean") + return SeaPearl.HeterogeneousFFCPNNv3( + get_heterogeneous_graph_chain(feature_size, conv_size, dense_size, n_layers_graph; pool=pool), + Flux.Chain(), + get_dense_chain(dense_size, dense_size, output_size, n_layers_output, σ); + pooling=pooling + ) +end + +function get_heterogeneous_ffcpnnv4(;feature_size, conv_size=8, dense_size=16, output_size, n_layers_graph=3, n_layers_node=2, n_layers_output=2, pool=SeaPearl.meanPooling(), σ=Flux.leakyrelu, pooling="mean") + return SeaPearl.HeterogeneousFFCPNNv4( + get_heterogeneous_graph_chain(feature_size, conv_size, dense_size, n_layers_graph; pool=pool), + get_dense_chain(conv_size, dense_size, dense_size, n_layers_node, σ), + Flux.Chain(), + get_dense_chain(dense_size, dense_size, output_size, n_layers_output, σ); + pooling=pooling + ) +end + +function get_heterogeneous_variableoutputcpnn(;feature_size, conv_size=8, dense_size=16, output_size=1, n_layers_graph=3, n_layers_node=2, n_layers_output=2, pool=SeaPearl.meanPooling()) + return SeaPearl.HeterogeneousVariableOutputCPNN( + get_heterogeneous_graph_chain(feature_size, conv_size, conv_size, n_layers_graph; pool=pool), + get_dense_chain(conv_size, dense_size, dense_size, n_layers_node), + get_dense_chain(2*dense_size, dense_size, output_size, n_layers_output) + ) +end + +# function get_heterogeneous_cpnn_hgt(;feature_size, conv_size, dense_size, output_size, n_layers_graph, n_layers_node, n_layers_output, heads) +# return SeaPearl.HeterogeneousCPNN( +# graphChain=get_heterogeneous_graph_chain_hgt(feature_size, conv_size, n_layers_graph, heads), +# nodeChain=get_dense_chain(conv_size, dense_size, dense_size, n_layers_node), +# outputChain=get_dense_chain(dense_size, dense_size, output_size, n_layers_output) +# ) +# end + +function get_heterogeneous_learner(batch_size, update_horizon, min_replay_history, update_freq, target_update_freq, get_heterogeneous_nn, γ) + return RL.DQNLearner( + approximator=RL.NeuralNetworkApproximator( + model=get_heterogeneous_nn(), + optimizer=ADAM() + ), + target_approximator=RL.NeuralNetworkApproximator( + model=get_heterogeneous_nn(), + optimizer=ADAM() + ), + loss_func=Flux.Losses.huber_loss, + batch_size=batch_size, + update_horizon=update_horizon, + min_replay_history=min_replay_history, + update_freq=update_freq, + target_update_freq=target_update_freq, + γ = γ + ) +end + +function get_heterogeneous_slart_trajectory(; capacity, n_actions) + return RL.CircularArraySLARTTrajectory( + capacity=capacity, + state=SeaPearl.HeterogeneousTrajectoryState[] => (), + legal_actions_mask=Vector{Bool} => (n_actions,), + ) +end + +function get_heterogeneous_ppo_trajectory(; capacity, n_actions) + return RL.MaskedPPOTrajectory( + capacity=capacity, + state=SeaPearl.HeterogeneousTrajectoryState[] => (), + legal_actions_mask=Matrix{Bool} => (n_actions,1), + action = Vector{Int} => (1,), + action_log_prob=Vector{Float32} => (1,), + reward = Vector{Float32} => (1,), + terminal = Vector{Bool} => (1,), + ) +end + +CircularArrayPSLARTTrajectory(; capacity, kwargs...) = RL.PrioritizedTrajectory( + RL.CircularArraySLARTTrajectory(; capacity = capacity, kwargs...), + RL.SumTree(capacity), +) + +function get_heterogeneous_prioritized_trajectory(; capacity, n_actions) + return CircularArrayPSLARTTrajectory( + capacity=capacity, + state=SeaPearl.HeterogeneousTrajectoryState[] => (), + legal_actions_mask=Vector{Bool} => (n_actions,), + ) +end + +function get_heterogeneous_sart_trajectory(; capacity) + return RL.CircularArraySARTTrajectory( + capacity=capacity, + state=SeaPearl.HeterogeneousTrajectoryState[] => (), + ) +end + +function get_heterogeneous_agent(; get_explorer, batch_size=16, update_horizon, min_replay_history, update_freq=1, target_update_freq=200, γ = 0.999f0, get_heterogeneous_trajectory, get_heterogeneous_nn) + return RL.Agent( + policy=RL.QBasedPolicy( + learner=get_heterogeneous_learner(batch_size, update_horizon, min_replay_history, update_freq, target_update_freq, get_heterogeneous_nn, γ), + explorer=get_explorer(), + ), + trajectory=get_heterogeneous_trajectory() + ) +end + +function get_heterogeneous_agent_priodqn(; get_explorer, batch_size=16, update_horizon, min_replay_history, update_freq=1, target_update_freq=200, get_heterogeneous_prioritized_trajectory, get_heterogeneous_nn) + return RL.Agent( + policy=RL.QBasedPolicy( + learner=get_heterogeneous_learner(batch_size, update_horizon, min_replay_history, update_freq, target_update_freq, get_heterogeneous_nn), + explorer=get_explorer(), + ), + trajectory=get_heterogeneous_prioritized_trajectory() + ) +end + +function get_ppo_approximator(get_heterogeneous_nn_actor,get_heterogeneous_nn_critic) + return( + RL.ActorCritic( + actor =get_heterogeneous_nn_actor(), + critic =get_heterogeneous_nn_critic(), + optimizer = ADAM(), + ) + ) +end + +function get_heterogeneous_agent_ppo(; n_epochs, n_microbatches, critic_loss_weight = 1.0f0, entropy_loss_weight = 0.01f0, update_freq, get_heterogeneous_ppo_trajectory, get_heterogeneous_nn_actor,get_heterogeneous_nn_critic) + return RL.Agent( + policy=RL.PPOPolicy( + approximator = get_ppo_approximator(get_heterogeneous_nn_actor,get_heterogeneous_nn_critic), + γ = 0.99f0, + λ = 0.95f0, + clip_range = 0.2f0, + max_grad_norm = 0.5f0, + n_epochs = n_epochs, + n_microbatches = n_microbatches, + actor_loss_weight = 1.0f0, + critic_loss_weight = critic_loss_weight, + entropy_loss_weight = entropy_loss_weight, + update_freq = update_freq, + ), + trajectory=get_heterogeneous_ppo_trajectory() + ) +end \ No newline at end of file diff --git a/learning_cp/comparison/MIS.jl b/learning_cp/comparison/MIS.jl new file mode 100644 index 0000000..acd564f --- /dev/null +++ b/learning_cp/comparison/MIS.jl @@ -0,0 +1,718 @@ +include("../common/experiment.jl") +include("../common/utils.jl") +include("comparison.jl") + +MISHeuristic(threshold::Int) = SeaPearl.BasicHeuristic((x; cpmodel = nothing) -> length(x.onDomainChange) - 1 < threshold ? 1 : 0, nothing) + +############################################################################### +######### Experiment Type 4 +######### +######### Supervised vs Simple +############################################################################### + +function experiment_heuristic_heterogeneous_mis(n, k, n_episodes, n_instances; n_layers_graph=3, n_eval=10, reward=SeaPearl.GeneralReward) + generator = SeaPearl.MaximumIndependentSetGenerator(n,k) + + # Basic value-selection heuristic + basicHeuristics = OrderedDict( + "maximum" => SeaPearl.BasicHeuristic() + ) + + experiment_heuristic_heterogeneous(n, n_episodes, n_instances; + chosen_features=nothing, + feature_size = [2, 3, 1], + output_size = 2, + generator = generator, + basicHeuristics = basicHeuristics, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "mis", + eta_decay_steps = Int(floor(n_episodes/1.5)), + helpValueHeuristic = SeaPearl.BasicHeuristic(), + eta_init = 1.0, + eta_stable = 0.0 + ) +end + +############################################################################### +######### Experiment Type 5 +######### +######### +############################################################################### + +function experiment_explorer_heterogeneous_MIS(chosen_features, feature_size, n, k, n_episodes, n_instances; n_eval=10) + """ + Compares the impact of the number of convolution layers for the heterogeneous representation. + """ + generator = SeaPearl.MaximumIndependentSetGenerator(n,k) + restartPerInstances = 1 + + selectMax(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.maximum(x.domain) + heuristic_max = SeaPearl.BasicHeuristic(selectMax) + basicHeuristics = OrderedDict( + "max" => heuristic_max + ) + + experiment_explorer_heterogeneous( + n, + n, + n_episodes, + n_instances; + feature_size = feature_size, + chosen_features = chosen_features, + output_size = 2, + n_eval=n_eval, + generator, + type = "MIS_"*string(n)*"_"*string(k)*"_explorer_comparison", + basicHeuristics = basicHeuristics, + reward=SeaPearl.GeneralReward, + n_layers_graph=3, + c=2.0) +end + +############################################################################### +######### Experiment Type 6 +######### +######### +############################################################################### +""" +Compares different CPNNs with the heterogeneous representation for the MIS problem. + +""" +function experiment_nn_heterogeneous_MIS(n, k, n_episodes, n_instances, nb_steps_per_episode; n_layers_graph=3, n_eval=10, reward=SeaPearl.GeneralReward) + + MIS_generator = SeaPearl.MaximumIndependentSetGenerator(n,k) + + experiment_nn_heterogeneous(n, nb_steps_per_episode, n_episodes, n_instances; + chosen_features=nothing, + feature_size = [2, 3, 1], + output_size = 2, + generator = MIS_generator, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "MIS", + decay_steps=2000, + c=2.0, + basicHeuristics=nothing + ) +end + +############################################################################### +######### Experiment Type 8 +######### +######### +############################################################################### + + +function experiment_chosen_features_hetcpnn_MIS(chosen_features_list, n, k, n_episodes, n_instances; n_eval=10) + """ + Compares the impact of the number of convolution layers for the heterogeneous representation. + """ + generator = SeaPearl.MaximumIndependentSetGenerator(n,k) + restartPerInstances = 1 + + experiment_chosen_features_hetcpnn( + n, + n, + n_episodes, + n_instances, + restartPerInstances; + output_size = 2, + generator = generator, + chosen_features_list = chosen_features_list, + type = "MIS_"*string(n)*"_"*string(k) + ) +end +function experiment_chosen_features_hetffcpnn_MIS(chosen_features_list, n, k, n_episodes, n_instances; n_eval=10) + """ + Compares the impact of the number of convolution layers for the heterogeneous representation. + """ + generator = SeaPearl.MaximumIndependentSetGenerator(n,k) + restartPerInstances = 1 + + experiment_chosen_features_hetffcpnn( + n, + n, + n_episodes, + n_instances, + restartPerInstances; + output_size = 2, + generator=generator, + chosen_features_list=chosen_features_list, + type = "MIS_"*string(n)*"_"*string(k) + ) +end + +############################################################################### +######### Experiment Type 9 +######### +######### Transfer Learning +############################################################################### +function experiment_transfer_heterogeneous_mis(n, n_transfered, k, k_transfered, + n_episodes, + n_episodes_transfered, + n_instances; + n_layers_graph=3, + n_eval=10, + n_eval_transfered=10, + reward=SeaPearl.GeneralReward, + decay_steps=2000, + trajectory_capacity=2000, + eval_strategy=eval_strategy) + mis_generator = SeaPearl.MaximumIndependentSetGenerator(n, k) + mis_generator_transfered = SeaPearl.MaximumIndependentSetGenerator(n_transfered, k_transfered) + + # Basic value-selection heuristic + basicHeuristics = OrderedDict( + "maximum" => SeaPearl.BasicHeuristic(), + "mis("* string(2*k_transfered) *")" => MISHeuristic(2*k_transfered) + ) + + experiment_transfer_heterogeneous(n, n_transfered, n_episodes, n_episodes_transfered, n_instances; + chosen_features=nothing, + feature_size = [2, 3, 1], + output_size = 2, + output_size_transfered = 2, + generator = mis_generator, + generator_transfered = mis_generator_transfered, + basicHeuristics = basicHeuristics, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + n_eval_transfered = n_eval_transfered, + reward = reward, + type = "mis", + decay_steps=decay_steps, + trajectory_capacity=trajectory_capacity, + eval_strategy=eval_strategy + ) +end + + +############################################################################### +######### Experiment Type 10 +######### +######### Restart +############################################################################### +function experiment_restart_heterogeneous_mis(n, k, n_episodes, n_instances; + restart_list = [1, 5, 10, 20], + n_layers_graph=3, + n_eval=10, + reward=SeaPearl.GeneralReward, + decay_steps=2000, + trajectory_capacity=2000) + + mis_generator = SeaPearl.MaximumIndependentSetGenerator(n, k) + + experiment_restart_heterogeneous(n, n_episodes, n_instances; + restart_list = restart_list, + feature_size = [2, 3, 1], + output_size = 2, + generator = mis_generator, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "MIS_"*string(n)*"_"*string(k), + decay_steps=decay_steps, + trajectory_capacity=trajectory_capacity + ) +end + +############################################################################### +######### Experiment Type 11 +######### +######### +############################################################################### +""" +Compares HGT and HeterogeneousGraphConv. +""" + +function experiment_hgt_vs_graphconv_MIS(chosen_features, n, k, n_episodes, n_instances; n_eval=10) + """ + Compares the impact of the number of convolution layers for the heterogeneous representation. + """ + generator = SeaPearl.MaximumIndependentSetGenerator(n,k) + restartPerInstances = 1 + + experiment_hgt_vs_graphconv( + n, + n, + n_episodes, + n_instances, + restartPerInstances; + output_size = 2, + generator = generator, + chosen_features = chosen_features, + type = "MIS_"*string(n)*"_"*string(k) + ) +end + +############################################################################### +######### Simple MIS experiment +######### +######### +############################################################################### + +function experiment_MIS_dfs_dive(n, k, n_episodes, n_instances; chosen_features=nothing, feature_size=nothing, n_eval=20, n_eva = n, k_eva = k,n_layers_graph=3, c=2.0, trajectory_capacity=10000, pool = SeaPearl.meanPooling(), nbRandomHeuristics = 1, eval_timeout = 60, restartPerInstances = 1, seedEval = nothing, training_timeout = 3600, eval_every = 120) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + n_step_per_episode = Int(round(n//2))+k + decay_step = 200000 + trajectory_capacity = 800*n_step_per_episode + update_horizon = Int(floor(n_step_per_episode//2)) + + if isnothing(chosen_features) + chosen_features = Dict( + "variable_is_bound" => true, + "variable_assigned_value" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "variable_is_objective" => true, + "constraint_activity" => true, + "constraint_type" => true, + "nb_not_bounded_variable" => true, + "values_raw" => true, + ) + feature_size = [5, 4, 1] + end + + rngExp = MersenneTwister(seedEval) + init = Flux.glorot_uniform(MersenneTwister(seedEval)) + + agent_dfs= get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=2), + get_explorer = () -> get_epsilon_greedy_explorer(decay_step, 0.1; rng = rngExp ), + batch_size=32, + update_horizon=update_horizon, + min_replay_history=Int(round(16*n_step_per_episode//2)), + update_freq=2, + target_update_freq=7*n_step_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=3, + n_layers_node=2, + n_layers_output=2, + pool=pool, + σ=NNlib.leakyrelu, + init = init, + #device =gpu + ), + γ = 0.99f0 + ) + + agent_rbs = deepcopy(agent_dfs) + + learnedHeuristics_dfs = OrderedDict{String,SeaPearl.LearnedHeuristic}() + learnedHeuristics_dfs["dfs"] = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,SeaPearl.DefaultReward,SeaPearl.FixedOutput}(agent_dfs; chosen_features=chosen_features) #Default Reward + learnedHeuristics_rbs = OrderedDict{String,SeaPearl.LearnedHeuristic}() + learnedHeuristics_rbs["rbs"] = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,SeaPearl.ScoreReward,SeaPearl.FixedOutput}(agent_rbs; chosen_features=chosen_features) #Score Reward + + + threshold = 2*k + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + MISHeuristic(x; cpmodel=nothing) = length(x.onDomainChange) - 1 < threshold ? 1 : 0 + heuristic_mis = SeaPearl.BasicHeuristic(MISHeuristic) + selectMax(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.maximum(x.domain) + heuristic_max = SeaPearl.BasicHeuristic(selectMax) + basicHeuristics = OrderedDict( + "MISheuristic" => heuristic_mis, + ) +""" + generator = SeaPearl.MaximumIndependentSetGenerator(n,k) + eval_generator = SeaPearl.MaximumIndependentSetGenerator(n_eva, k_eva) + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=restartPerInstances, + strategy = SeaPearl.DFSearch(), + eval_strategy = SeaPearl.DFSearch(), + out_solver = false, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics_dfs, + basicHeuristics=basicHeuristics; + verbose=true, + nbRandomHeuristics=nbRandomHeuristics, + exp_name= "MIS_dfs_"*string(n_episodes)*"_"*string(n)*"_"*string(k)*"_timeout_"*string(training_timeout)*"_eval_every_"*string(eval_every)*"_seedEval_"*string(seedEval)*"_", + eval_generator = eval_generator, + seedEval = seedEval, + training_timeout = training_timeout, + eval_every = eval_every, + ) +""" + println() + + generator = SeaPearl.MaximumIndependentSetGenerator(n,k) + eval_generator = SeaPearl.MaximumIndependentSetGenerator(n_eva, k_eva) + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=restartPerInstances, + strategy = SeaPearl.DFSearch(), + eval_strategy = SeaPearl.DFSearch(), + out_solver = true, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics_rbs, + basicHeuristics=basicHeuristics; + verbose=true, + nbRandomHeuristics=nbRandomHeuristics, + exp_name= "MIS_rbs_"*string(n_episodes)*"_"*string(n)*"_"*string(k)*"_timeout_"*string(training_timeout)*"_eval_every_"*string(eval_every)*"_", + eval_timeout=eval_timeout, + eval_generator = eval_generator, + seedEval = seedEval, + training_timeout = training_timeout, + eval_every = eval_every, + ) + nothing + +end + +############################################################################### +######### Simple MIS experiment +######### +######### +############################################################################### + +function simple_experiment_MIS(n, k, n_episodes, n_instances; chosen_features=nothing, feature_size=nothing, n_eval=25, n_eva = n, k_eva = k,n_layers_graph=3, reward = SeaPearl.GeneralReward, c=2.0, trajectory_capacity = 40000, pool = SeaPearl.meanPooling(), nbRandomHeuristics = 1, eval_timeout = 240, restartPerInstances = 1, seedEval = nothing, device = gpu, batch_size = 64, update_freq = 1, target_update_freq= 100, name = "", numDevice = 0, eval_strategy = SeaPearl.DFSearch()) + """ + Runs a single experiment on MIS + """ + + n_step_per_episode = Int(round(n//2))+k + + update_horizon = Int(round(n_step_per_episode//2)) + + if device == gpu + CUDA.device!(numDevice) + end + + evalFreq=Int(floor(n_episodes / n_eval)) + step_explorer = Int(floor(n_episodes*n_step_per_episode*0.05)) + + + generator = SeaPearl.MaximumIndependentSetGenerator(n,k) + eval_generator = SeaPearl.MaximumIndependentSetGenerator(n_eva, k_eva) + + rngExp = MersenneTwister(seedEval) + init = Flux.glorot_uniform(MersenneTwister(seedEval)) + + + if isnothing(chosen_features) + chosen_features = Dict( + "node_number_of_neighbors" => true, + "constraint_type" => true, + "constraint_activity" => true, + "nb_not_bounded_variable" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "variable_is_objective" => true, + "variable_assigned_value" => true, + "variable_is_bound" => true, + "values_raw" => true) + feature_size = [6, 5, 2] + end + rngExp = MersenneTwister(seedEval) + init = Flux.glorot_uniform(MersenneTwister(seedEval)) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + agent_3 = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=2), + get_explorer = () -> get_epsilon_greedy_explorer(step_explorer, 0.1; rng = rngExp ), + batch_size=batch_size, + update_horizon=update_horizon, + min_replay_history=Int(round(16*n_step_per_episode//2)), + update_freq=update_freq, + target_update_freq=target_update_freq, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=3, + n_layers_node=3, + n_layers_output=2, + pool=pool, + σ=NNlib.leakyrelu, + init = init, + device = device + ), + γ = 0.99f0 + ) + + learned_heuristic_3 = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_3; chosen_features=chosen_features) + + learnedHeuristics = OrderedDict( + "3layer" => learned_heuristic_3, + ) + + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + selectMax(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.maximum(x.domain) + heuristic_max = SeaPearl.BasicHeuristic(selectMax) + basicHeuristics = OrderedDict( + "expert_max" => heuristic_max + ) + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=evalFreq, + nbInstances=n_instances, + restartPerInstances=restartPerInstances, + generator=generator, + eval_strategy=eval_strategy, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=true, + seedEval=seedEval, + nbRandomHeuristics=nbRandomHeuristics, + exp_name=name *'_'* string(n) *"_"*string(n_eva) * "_" * string(n_episodes) * "_"* string(seedEval) * "_", + eval_timeout=eval_timeout, + eval_generator=eval_generator, + ) + nothing + +end + +############################################################################### +######### Comparison of tripartite graph vs specialized graph +######### +######### +############################################################################### +""" +Compares the tripartite graph representation with a specific representation. +""" + +function experiment_tripartite_vs_specific_MIS(n, k, n_episodes, n_instances; n_layers_graph=4, n_eval=10, reward=SeaPearl.GeneralReward) + + MIS_generator = SeaPearl.MaximumIndependentSetGenerator(n,k) + SR_specific = SeaPearl.MISStateRepresentation{SeaPearl.MISFeaturization,SeaPearl.DefaultTrajectoryState} + + # Basic value-selection heuristic + threshold = 2*k + MISHeuristic(x; cpmodel=nothing) = length(x.onDomainChange) - 1 < threshold ? 1 : 0 + heuristic_mis = SeaPearl.BasicHeuristic(MISHeuristic) + selectMax(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.maximum(x.domain) + heuristic_max = SeaPearl.BasicHeuristic(selectMax) + basicHeuristics = OrderedDict( + "MISheuristic" => heuristic_mis, + "max" => heuristic_max + ) + + chosen_features = Dict( + "node_number_of_neighbors" => true, + "constraint_type" => true, + "constraint_activity" => true, + "nb_not_bounded_variable" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "variable_is_objective" => true, + "variable_assigned_value" => true, + "variable_is_bound" => true, + "values_raw" => true) + + nb_steps_per_episode = Int(round(n/2+k)) + experiment_tripartite_vs_specific(n, nb_steps_per_episode, n_episodes, n_instances, SR_specific; + chosen_features = chosen_features, + feature_size = [6, 5, 2], + feature_size_specific = SeaPearl.feature_length(SR_specific), + output_size = 2, + generator = MIS_generator, + n_layers_graph = n_layers_graph, + eval_strategy = SeaPearl.ILDSearch(1), + n_eval = n_eval, + reward = reward, + type = "MIS", + basicHeuristics=basicHeuristics +) +end + +############################################################################### +######### Experiment Type MALIK +######### +######### +############################################################################### + +""" +Compares different RL Agents with the heterogeneous representation for the MIS problem. +""" +function experiment_rl_heterogeneous_mis(n,k, n_episodes, n_instances; n_layers_graph=3, n_eval=10, reward=SeaPearl.GeneralReward) + + mis_generator = SeaPearl.MaximumIndependentSetGenerator(n, k) + + chosen_features = Dict( + "variable_initial_domain_size" => true, + "constraint_type" => true, + "variable_domain_size" => true, + "values_raw" => true) + + feature_size = [2,2,1] + n_step_per_episode = Int(round(n//2))+k + experiment_rl_heterogeneous(n, n_episodes, n_instances; + eval_strategy = SeaPearl.ILDSearch(2), + chosen_features=chosen_features, + feature_size = feature_size, + output_size = 2, + generator = mis_generator, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "mis", + decay_steps=250*n_step_per_episode, + basicHeuristics=nothing + ) +end + +############################################################################### +######### Reward comparison +######### +######### +############################################################################### + +""" +Compares 3 different rewards +""" +function experiment_general_rewards_mis(n, k, n_episodes, n_instances; n_eval=10) + + generator = SeaPearl.MaximumIndependentSetGenerator(n, k) + # Basic value-selection heuristic + threshold = 2*k + MISHeuristic(x; cpmodel=nothing) = length(x.onDomainChange) - 1 < threshold ? 1 : 0 + heuristic_mis = SeaPearl.BasicHeuristic(MISHeuristic) + selectMax(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.maximum(x.domain) + heuristic_max = SeaPearl.BasicHeuristic(selectMax) + basicHeuristics = OrderedDict( + "MISheuristic" => heuristic_mis, + "max" => heuristic_max + ) + + chosen_features = Dict( + "node_number_of_neighbors" => true, + "constraint_type" => true, + "constraint_activity" => true, + "nb_not_bounded_variable" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "variable_is_objective" => true, + "variable_assigned_value" => true, + "variable_is_bound" => true, + "values_raw" => true) + + feature_size = [6,5,2] + nb_steps_per_episode = Int(round(n//2))+k + experiment_general_rewards( + n, + n_episodes, + n_instances, + nb_steps_per_episode; + feature_size=feature_size, + output_size=2, + n_eval=10, + generator=generator, + chosen_features=chosen_features, + basicHeuristics=basicHeuristics, + ) +end + + +############################################################################### +######### Reward comparison +######### +######### +############################################################################### + +""" +Compares 3 different rewards +""" +function experiment_general_vs_score_rewards_mis(n, k, n_episodes, n_instances; n_eval=10) + + generator = SeaPearl.MaximumIndependentSetGenerator(n, k) + # Basic value-selection heuristic + threshold = 2*k + MISHeuristic(x; cpmodel=nothing) = length(x.onDomainChange) - 1 < threshold ? 1 : 0 + heuristic_mis = SeaPearl.BasicHeuristic(MISHeuristic) + selectMax(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.maximum(x.domain) + heuristic_max = SeaPearl.BasicHeuristic(selectMax) + basicHeuristics = OrderedDict( + "MISheuristic" => heuristic_mis, + "max" => heuristic_max + ) + + chosen_features = Dict( + "node_number_of_neighbors" => true, + "constraint_type" => true, + "constraint_activity" => true, + "nb_not_bounded_variable" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "variable_is_objective" => true, + "variable_assigned_value" => true, + "variable_is_bound" => true, + "values_raw" => true) + + feature_size = [6,5,2] + nb_steps_per_episode = Int(round(n//2))+k + experiment_general_vs_score_rewards( + n, + n_episodes, + n_instances, + nb_steps_per_episode; + feature_size=feature_size, + output_size=2, + n_eval=10, + generator=generator, + chosen_features=chosen_features, + basicHeuristics=basicHeuristics, + ) +end + +############################################################################### +######### Experiment Type +######### +######### Chain Transfer Learning +############################################################################### +function experiment_chain_transfer_heterogeneous_mis(n, k, + n_episodes, + n_instances; + n_layers_graph=3, + n_evals, + reward=SeaPearl.GeneralReward, + decay_steps=2000, + trajectory_capacity=2000, + eval_strategy=eval_strategy) + + generators = [] + for i in 1:length(n) + push!(generators, SeaPearl.MaximumIndependentSetGenerator(n[i], k[i])) + end + + # Basic value-selection heuristic + basicHeuristics = OrderedDict( + "maximum" => SeaPearl.BasicHeuristic(), + "mis("* string(2*last(k)) *")" => MISHeuristic(2*last(k)) + ) + + experiment_chain_transfer_heterogeneous(n, n_episodes, n_instances; + chosen_features=nothing, + feature_size = [2, 3, 1], + output_sizes = [2 for i in 1:length(n)], + generators = generators, + basicHeuristics = basicHeuristics, + n_layers_graph = n_layers_graph, + n_evals = n_evals, + reward = reward, + type = "mis", + decay_steps=decay_steps, + trajectory_capacity=trajectory_capacity, + eval_strategy=eval_strategy + ) +end \ No newline at end of file diff --git a/learning_cp/comparison/MaxCut.jl b/learning_cp/comparison/MaxCut.jl new file mode 100644 index 0000000..1bd8196 --- /dev/null +++ b/learning_cp/comparison/MaxCut.jl @@ -0,0 +1,365 @@ +include("../common/experiment.jl") +include("../common/utils.jl") +include("comparison.jl") + +using TensorBoardLogger, Logging +import CUDA +############################################################################### +######### DFS / RBS comparison on Max Cut +######### +######### +############################################################################### + +function experiment_Max_Cut_dfs_dive(n, k, n_episodes, n_instances; chosen_features=nothing, feature_size=nothing, n_eval=20, n_eva = n, k_eva = k,n_layers_graph=3, c=2.0, trajectory_capacity=10000, pool = SeaPearl.meanPooling(), nbRandomHeuristics = 1, eval_timeout = 60, restartPerInstances = 1, seedEval = nothing, training_timeout = 3600, eval_every = 120) + + +SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} +n_step_per_episode = n+1 +decay_step = n_step_per_episode*n_episodes*0.6 +trajectory_capacity = 800*n_step_per_episode +update_horizon = Int(floor(n_step_per_episode//2)) + +if isnothing(chosen_features) + chosen_features = Dict( + "node_number_of_neighbors" => true, + "constraint_type" => true, + "constraint_activity" => true, + "nb_not_bounded_variable" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "variable_is_objective" => true, + "variable_assigned_value" => true, + "variable_is_bound" => true, + "values_raw" => true) + feature_size = [6, 5, 2] +end +rngExp = MersenneTwister(seedEval) +init = Flux.glorot_uniform(MersenneTwister(seedEval)) + + + agent_dfs= get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=2), + get_explorer = () -> get_epsilon_greedy_explorer(decay_step, 0.1; rng = rngExp ), + batch_size=8, + update_horizon=update_horizon, + min_replay_history=Int(round(16*n_step_per_episode//2)), + update_freq=2, + target_update_freq=7*n_step_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=3, + n_layers_node=2, + n_layers_output=2, + pool=pool, + σ=NNlib.leakyrelu, + init = init, + #device =gpu + ), + γ = 0.99f0 + ) + + agent_rbs = deepcopy(agent_dfs) + + learnedHeuristics_dfs = OrderedDict{String,SeaPearl.LearnedHeuristic}() + learnedHeuristics_dfs["dfs"] = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,SeaPearl.DefaultReward,SeaPearl.FixedOutput}(agent_dfs; chosen_features=chosen_features) #Default Reward + learnedHeuristics_rbs = OrderedDict{String,SeaPearl.LearnedHeuristic}() + learnedHeuristics_rbs["rbs"] = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,SeaPearl.ScoreReward,SeaPearl.FixedOutput}(agent_rbs; chosen_features=chosen_features) #Score Reward + + threshold = 2*k + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + MISHeuristic(x; cpmodel=nothing) = length(x.onDomainChange) - 1 < threshold ? 1 : 0 + heuristic_mis = SeaPearl.BasicHeuristic(MISHeuristic) + selectMax(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.maximum(x.domain) + heuristic_max = SeaPearl.BasicHeuristic(selectMax) + basicHeuristics = OrderedDict( + "MaxCutheuristic" => heuristic_mis, + ) + + generator = SeaPearl.MaxCutGenerator(n,k) + eval_generator = SeaPearl.MaxCutGenerator(n_eva, k_eva) + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=restartPerInstances, + strategy = SeaPearl.DFSearch(), + eval_strategy = SeaPearl.DFSearch(), + out_solver = false, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics_dfs, + basicHeuristics=basicHeuristics; + verbose=true, + nbRandomHeuristics=nbRandomHeuristics, + exp_name= "Max_Cut_dfs_"*string(n_episodes)*"_"*string(n)*"_"*string(k)*"_timeout_"*string(training_timeout)*"_eval_every_"*string(eval_every)*"_", + eval_generator = eval_generator, + seedEval = seedEval, + training_timeout = training_timeout, + eval_every = eval_every, + ) + + println() + + generator = SeaPearl.MaxCutGenerator(n,k) + eval_generator = SeaPearl.MaxCutGenerator(n_eva, k_eva) + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=restartPerInstances, + strategy = SeaPearl.DFSearch(), + eval_strategy = SeaPearl.DFSearch(), + out_solver = true, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics_rbs, + basicHeuristics=basicHeuristics; + verbose=true, + nbRandomHeuristics=nbRandomHeuristics, + exp_name= "Max_Cut_rbs_"*string(n_episodes)*"_"*string(n)*"_"*string(k)*"_timeout_"*string(training_timeout)*"_eval_every_"*string(eval_every)*"_", + eval_timeout=eval_timeout, + eval_generator = eval_generator, + seedEval = seedEval, + training_timeout = training_timeout, + eval_every = eval_every, + ) + nothing + +end +############################################################################### +######### Simple MaxCut experiment +######### +######### +############################################################################### + +function simple_experiment_MaxCut(n, k, n_episodes, n_instances; chosen_features=nothing, feature_size=nothing, n_eval=20, n_eva = n, k_eva = k,n_layers_graph=3, reward = SeaPearl.GeneralReward, c=2.0, trajectory_capacity=30000, pool = SeaPearl.meanPooling(), nbRandomHeuristics = 1, eval_timeout = 240, restartPerInstances = 1, seedEval = nothing, device=cpu, batch_size = 64, update_freq = 10, target_update_freq= 500, name = "", numDevice = 0, eval_strategy = SeaPearl.DFSearch()) + + #to change of device : CUDA.device!(i) i is the id tof the GPU being used + + n_step_per_episode = Int(round(n/5)) + update_horizon = Int(round(n_step_per_episode//2)) + + if device == gpu + CUDA.device!(numDevice) + end + + generator = SeaPearl.MaxCutGenerator(n,k) + eval_generator = SeaPearl.MaxCutGenerator(n_eva, k_eva) + + evalFreq=Int(floor(n_episodes / n_eval)) + + step_explorer = Int(floor(n_episodes*n_step_per_episode*0.1 )) + + if isnothing(chosen_features) + chosen_features = Dict( + "node_number_of_neighbors" => true, + "constraint_type" => true, + "constraint_activity" => true, + "nb_not_bounded_variable" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "variable_is_objective" => true, + "variable_assigned_value" => true, + "variable_is_bound" => true, + "values_raw" => true) + feature_size = [6, 5, 2] + end + + rngExp = MersenneTwister(seedEval) + init = Flux.glorot_uniform(MersenneTwister(seedEval)) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + selectMax(x::SeaPearl.BoolVar; cpmodel=nothing) = SeaPearl.maximum(x.domain.inner) + selectMin(x::SeaPearl.BoolVar; cpmodel=nothing) = SeaPearl.minimum(x.domain.inner) + heuristic_max = SeaPearl.BasicHeuristic(selectMax) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "expert_max" => heuristic_max, + "expert_min" => heuristic_min + ) + + agent_3 = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=2), + get_explorer = () -> get_epsilon_greedy_explorer(step_explorer, 0.01; rng = rngExp ), + batch_size=batch_size, + update_horizon=update_horizon, + min_replay_history=Int(round(16*n_step_per_episode//2)), + update_freq=update_freq, + target_update_freq=target_update_freq, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=3, + n_layers_node=3, + n_layers_output=2, + pool=pool, + σ=NNlib.leakyrelu, + init = init, + device = device + ), + γ = 0.99f0 + ) + + learned_heuristic_3 = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_3; chosen_features=chosen_features) + + learnedHeuristics = OrderedDict( + "3layer" => learned_heuristic_3, + ) + + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=evalFreq, + nbInstances=n_instances, + restartPerInstances=restartPerInstances, + generator=generator, + eval_strategy=eval_strategy, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=true, + seedEval=seedEval, + nbRandomHeuristics=nbRandomHeuristics, + exp_name=name *'_'* string(n) *"_"*string(n_eva) * "_" * string(n_episodes) * "_"* string(seedEval) * "_", + eval_timeout=eval_timeout, + eval_generator=eval_generator, + device = device + ) + nothing + +end + + +############################################################################### +######### Max cut CPNN vs FFCPNN +######### +######### +############################################################################### + +function simple_MaxCut_cpnn(n, k, n_episodes, n_instances; chosen_features=nothing, feature_size=nothing, n_eval=20, n_eva = n, k_eva = k,n_layers_graph=3, reward = SeaPearl.GeneralReward, c=2.0, trajectory_capacity=5000, pool = SeaPearl.meanPooling(), nbRandomHeuristics = 1, eval_timeout = 60, restartPerInstances = 10, seedEval = nothing, eval_strategy = SeaPearl.ILDSearch(2)) + """ + Runs a single experiment on MIS + """ + n_step_per_episode = n+1 + reward = SeaPearl.GeneralReward + generator = SeaPearl.MaxCutGenerator(n,k) + eval_generator = SeaPearl.MaxCutGenerator(n_eva, k_eva) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + trajectory_capacity = 800*n_step_per_episode + update_horizon = Int(round(n_step_per_episode//2)) + learnedHeuristics = OrderedDict{String,SeaPearl.LearnedHeuristic}() + + if isnothing(chosen_features) + chosen_features = Dict( + "node_number_of_neighbors" => true, + "constraint_type" => true, + "constraint_activity" => true, + "nb_not_bounded_variable" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "variable_is_objective" => true, + "variable_assigned_value" => true, + "variable_is_bound" => true, + "values_raw" => true) + feature_size = [6, 5, 2] + end + rngExp = MersenneTwister(seedEval) + init = Flux.glorot_uniform(MersenneTwister(seedEval)) + + agent_ffcpnn= get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=2), + get_explorer = () -> get_epsilon_greedy_explorer(Int(floor(n_episodes*n_step_per_episode*0.80)), 0.1; rng = rngExp ), + batch_size=32, + update_horizon=update_horizon, + min_replay_history=Int(round(16*n_step_per_episode//2)), + update_freq=4, + target_update_freq=7*n_step_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=16, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_node=3, + n_layers_output=3, + pool=SeaPearl.meanPooling(), + σ=NNlib.leakyrelu, + init = init, + #device =gpu + ), + γ = 0.99f0 + ) + + agent_cpnn= get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=2), + get_explorer = () -> get_epsilon_greedy_explorer(Int(floor(n_episodes*n_step_per_episode*0.80)), 0.1; rng = rngExp ), + batch_size=32, + update_horizon=update_horizon, + min_replay_history=Int(round(16*n_step_per_episode//2)), + update_freq=4, + target_update_freq=7*n_step_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_cpnn( + feature_size=feature_size, + conv_size=16, + dense_size=16, + output_size=2, + n_layers_graph=n_layers_graph, + n_layers_output=3, + pool=SeaPearl.meanPooling(), + σ=NNlib.leakyrelu, + init = init, + #device =gpu + ), + γ = 0.99f0 + ) + + learned_heuristic_ffcpnn = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_ffcpnn, chosen_features=chosen_features) + learned_heuristic_cpnn = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_cpnn; chosen_features=chosen_features) + + learnedHeuristics["ffcpnn"] = learned_heuristic_ffcpnn + learnedHeuristics["cpnn"] = learned_heuristic_cpnn + + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + selectMax(x::SeaPearl.BoolVar; cpmodel=nothing) = SeaPearl.maximum(x.domain.inner) + selectMin(x::SeaPearl.BoolVar; cpmodel=nothing) = SeaPearl.minimum(x.domain.inner) + heuristic_max = SeaPearl.BasicHeuristic(selectMax) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "expert_max" => heuristic_max, + "expert_min" => heuristic_min + ) + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=restartPerInstances, + eval_strategy = eval_strategy, + #strategy = SeaPearl.DFWBSearch(), + #out_solver = false, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + verbose=true, + nbRandomHeuristics=nbRandomHeuristics, + exp_name= "MaxCut_ffcpnn_cpnn_"*string(n_episodes)*"_"*string(n)*"_"*string(k)*"->"*string(n_eva)*"_"*string(k_eva)*"_"* string(n_episodes), + eval_timeout=eval_timeout, + eval_generator = eval_generator, + seedEval = seedEval + ) + nothing + +end \ No newline at end of file diff --git a/learning_cp/comparison/Project.toml b/learning_cp/comparison/Project.toml new file mode 100644 index 0000000..93b9dc3 --- /dev/null +++ b/learning_cp/comparison/Project.toml @@ -0,0 +1,20 @@ +[deps] +ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" +BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +CircularArrayBuffers = "9de3a189-e0c0-4e15-ba3b-b14b9fb0aec1" +Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d" +Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" +GeometricFlux = "7e08b658-56d3-11e9-2997-919d5b31e4ea" +Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6" +Gtk = "4c0ca9eb-093a-5379-98c5-f87ac0bbbf44" +JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d" +OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" +ReinforcementLearning = "158674fc-8238-5cab-b5ba-03dfc80d1318" +Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" +SeaPearl = "c13076dc-bdcd-48ba-bc88-4b44c2587ab3" +SeaPearlExtras = "90978a9a-af5e-4113-9033-e3aa0a1ac968" +Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb" diff --git a/learning_cp/comparison/comparison.jl b/learning_cp/comparison/comparison.jl new file mode 100644 index 0000000..9ff7026 --- /dev/null +++ b/learning_cp/comparison/comparison.jl @@ -0,0 +1,2586 @@ +using SeaPearl + +############################################################################### +######### Global Parameters +######### +######### +############################################################################### + +DEFAULT_CHOSEN_FEATURES = Dict( + "constraint_activity" => true, + "constraint_type" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "values_raw" => true, +) + +############################################################################### +######### Experiment Type 1 +######### +######### +############################################################################### +""" +Compare three agents: + - an agent with the default representation and default features; + - an agent with the default representation and chosen features; + - an agent with the heterogeneous representation and chosen features. +""" +function experiment_representation( + size, + n_episodes, + n_instances; + feature_sizes, + output_size, + generator, + basicHeuristics=nothing, + n_layers_graph=3, + n_eval=10, + reward=SeaPearl.GeneralReward, + type="", + chosen_features=nothing, + trajectory_capacity=2000, + init=Flux.glorot_uniform +) + + SR_default = SeaPearl.DefaultStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.DefaultTrajectoryState} + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + agent_default_default = get_default_agent(; + get_default_trajectory = () -> get_default_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(2000, 0.01), + batch_size=16, + update_horizon=8, + min_replay_history=256, + update_freq=1, + target_update_freq=8, + get_default_nn = () -> get_default_cpnn( + feature_size=feature_sizes[1], + conv_size=8, + dense_size=16, + output_size=output_size, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2, + init = init + ) + ) + learned_heuristic_default_default = SeaPearl.SimpleLearnedHeuristic{SR_default,reward,SeaPearl.FixedOutput}(agent_default_default) + + if isnothing(chosen_features) + chosen_features = DEFAULT_CHOSEN_FEATURES + end + + agent_default_chosen = get_default_agent(; + get_default_trajectory = () -> get_default_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(2000, 0.01), + batch_size=16, + update_horizon=8, + min_replay_history=256, + update_freq=1, + target_update_freq=8, + get_default_nn = () -> get_default_cpnn( + feature_size=feature_sizes[2], + conv_size=8, + dense_size=16, + output_size=output_size, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2, + init = init + ) + ) + learned_heuristic_default_chosen = SeaPearl.SimpleLearnedHeuristic{SR_default,reward,SeaPearl.FixedOutput}(agent_default_chosen; chosen_features=chosen_features) + + agent_heterogeneous = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(2000, 0.01), + batch_size=16, + update_horizon=8, + min_replay_history=256, + update_freq=1, + target_update_freq=8, + get_heterogeneous_nn = () -> get_heterogeneous_cpnn( + feature_size=feature_sizes[3], + conv_size=8, + dense_size=16, + output_size=output_size, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2 + ) + ) + learned_heuristic_heterogeneous = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_heterogeneous; chosen_features=chosen_features) + + learnedHeuristics = OrderedDict( + "defaultdefault" => learned_heuristic_default_default, + "defaultchosen" => learned_heuristic_default_chosen, + "heterogeneous" => learned_heuristic_heterogeneous, + ) + if isnothing(basicHeuristics) + basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic() + ) + end + + # ------------------- + # Variable Heuristic definition + # ------------------- + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=1, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=false, + nbRandomHeuristics=0, + exp_name= type * "_representation_" * string(n_episodes) * "_" * string(size) * "_" + ) + nothing +end + +############################################################################### +######### Experiment Type 2 +######### +######### +############################################################################### +""" +Compares the impact of the number of convolution layers for the heterogeneous representation. +""" + +function experiment_n_conv( + n_nodes, + n_episodes, + n_instances; + n_eval=10, + generator, + SR, + chosen_features, + feature_size, + type = "", + trajectory_capacity = 2000, + output_size = n_nodes, + reward = SeaPearl.GeneralReward, + ) + + learnedHeuristics = OrderedDict{String,SeaPearl.LearnedHeuristic}() + for i in 1:3 + if SR <: SeaPearl.DefaultStateRepresentation + agent = get_default_agent(; + get_default_trajectory = () -> get_default_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(2000, 0.01), + batch_size=16, + update_horizon=8, + min_replay_history=256, + update_freq=1, + target_update_freq=8, + get_default_nn = () -> get_default_cpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=output_size, + n_layers_graph=i, + n_layers_node=2, + n_layers_output=2, + init = init + ) + ) + else + agent = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(2000, 0.01), + batch_size=16, + update_horizon=8, + min_replay_history=256, + update_freq=1, + target_update_freq=8, + get_heterogeneous_nn = () -> get_heterogeneous_cpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=output_size, + n_layers_graph=i, + n_layers_node=2, + n_layers_output=2 + ) + ) + end + + if !isnothing(chosen_features) + learned_heuristic = SeaPearl.SimpleLearnedHeuristic{SR,SeaPearl.GeneralReward,SeaPearl.FixedOutput}(agent; chosen_features=chosen_features) + else + learned_heuristic = SeaPearl.SimpleLearnedHeuristic{SR,SeaPearl.GeneralReward,SeaPearl.FixedOutput}(agent) + end + learnedHeuristics[type * "_" *string(i)] = learned_heuristic + end + + # Basic value-selection heuristic + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + # ------------------- + # Variable Heuristic definition + # ------------------- + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=1, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=false, + nbRandomHeuristics=0, + exp_name="graphcoloring_n_conv_" * type * "_" * string(n_episodes) * "_" * string(n_nodes) * "_" + ) + nothing +end + +############################################################################### +######### Experiment Type 3 +######### +######### +############################################################################### +""" +Compares the impact of the chosen_features for the heterogeneous representation. +""" +function experiment_chosen_features_heterogeneous( + size, + n_episodes, + n_instances; + output_size = size, + n_layers_graph=3, + n_eval=10, + generator, + chosen_features_list, + type="", + eval_timeout=nothing, + reward=SeaPearl.GeneralReward, + trajectory_capacity=2000 +) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + learnedHeuristics = OrderedDict{String,SeaPearl.LearnedHeuristic}() + for i in 1:length(chosen_features_list) + chosen_features = chosen_features_list[i][1] + feature_size = chosen_features_list[i][2] + agent = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(2000, 0.01), + batch_size=16, + update_horizon=8, + min_replay_history=256, + update_freq=1, + target_update_freq=8, + get_heterogeneous_nn = () -> get_heterogeneous_cpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=output_size, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2 + ) + ) + learned_heuristic = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent; chosen_features=chosen_features) + learnedHeuristics["heterogeneous_" *string(i)] = learned_heuristic + end + + # Basic value-selection heuristic + # selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + # heuristic_min = SeaPearl.BasicHeuristic(selectMin) + + basicHeuristics = OrderedDict( + "min" => SeaPearl.RandomHeuristic() + ) + + # ------------------- + # Variable Heuristic definition + # ------------------- + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=1, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=false, + nbRandomHeuristics=0, + exp_name= type * "_heterogeneous_chosen_features_" * string(n_episodes) * "_" * string(size) * "_", + eval_timeout=eval_timeout + ) + nothing +end + +############################################################################### +######### Experiment Type 4 +######### +######### +############################################################################### +""" +Compares the simple and the supervised learned heuristic for the heterogeneous representation. +""" +function experiment_heuristic_heterogeneous( + size, + n_episodes, + n_instances; + feature_size, + output_size, + n_eval=10, + generator, + type="", + eval_timeout=nothing, + chosen_features=nothing, + basicHeuristics, + reward=SeaPearl.GeneralReward, + trajectory_capacity=2000, + n_layers_graph=3, + eta_init=1.0, + eta_stable=0.1, + eta_decay_steps, + helpValueHeuristic +) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + if isnothing(chosen_features) + chosen_features = DEFAULT_CHOSEN_FEATURES + end + + agent_simple = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(2000, 0.01), + batch_size=16, + update_horizon=8, + min_replay_history=256, + update_freq=1, + target_update_freq=8, + get_heterogeneous_nn = () -> get_heterogeneous_cpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=output_size, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2 + ) + ) + learned_heuristic_simple = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,SeaPearl.GeneralReward,SeaPearl.FixedOutput}(agent_simple; chosen_features=chosen_features) + + agent_supervised = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(2000, 0.01), + batch_size=16, + update_horizon=8, + min_replay_history=256, + update_freq=1, + target_update_freq=200, + get_heterogeneous_nn = () -> get_heterogeneous_cpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=output_size, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2 + ) + ) + learned_heuristic_supervised = SeaPearl.SupervisedLearnedHeuristic{SR_heterogeneous,SeaPearl.GeneralReward,SeaPearl.FixedOutput}(agent_supervised; chosen_features=chosen_features, eta_init=eta_init, eta_stable=eta_stable, decay_steps=eta_decay_steps, helpValueHeuristic=helpValueHeuristic) + + learnedHeuristics = OrderedDict( + "simple" => learned_heuristic_simple, + "supervised" => learned_heuristic_supervised, + ) + + if isnothing(basicHeuristics) + basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic() + ) + end + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=1, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=false, + nbRandomHeuristics=0, + exp_name= type * "_heterogeneous_heuristic_" * string(n_episodes) * "_" * string(size) * "_", + eval_timeout=eval_timeout + ) + nothing +end + +############################################################################### +######### Experiment Type 5 +######### +######### +############################################################################### +""" +Compares different action explorers for the heterogeneous representation. +""" +function experiment_explorer_heterogeneous( + pb_size, + nb_steps_per_episode, + n_episodes, + n_instances; + feature_size, + output_size, + n_eval=10, + generator, + type="", + eval_timeout=nothing, + chosen_features=nothing, + basicHeuristics, + reward=SeaPearl.GeneralReward, + n_layers_graph=3, + decay_steps=2000, + c=2.0, + trajectory_capacity=2000 +) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + if isnothing(chosen_features) + chosen_features = DEFAULT_CHOSEN_FEATURES + end + + agent_epsilon_greedy = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=800*nb_steps_per_episode, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=16, + update_horizon=Int(round(nb_steps_per_episode/2)), + min_replay_history=16*Int(round(nb_steps_per_episode/2)), + update_freq=nb_steps_per_episode, + target_update_freq=8*nb_steps_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2 + ) + ) + learned_heuristic_epsilon_greedy = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_epsilon_greedy; chosen_features=chosen_features) + + agent_ucb = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=800*nb_steps_per_episode, n_actions=output_size), + get_explorer = () -> get_ucb_explorer(c, output_size), + batch_size=16, + update_horizon=Int(round(nb_steps_per_episode/2)), + min_replay_history=16*Int(round(nb_steps_per_episode/2)), + update_freq=nb_steps_per_episode, + target_update_freq=8*nb_steps_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2 + ) + ) + learned_heuristic_ucb = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_ucb; chosen_features=chosen_features) + + agent_softmaxTdecay = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=800*nb_steps_per_episode, n_actions=output_size), + get_explorer = () -> get_softmax_explorer(5.0, 0.2, decay_steps), + batch_size=16, + update_horizon=Int(round(nb_steps_per_episode/2)), + min_replay_history=256, + update_freq=nb_steps_per_episode, + target_update_freq=8*nb_steps_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2 + ) + ) + learned_heuristic_softmaxTdecay = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_softmaxTdecay; chosen_features=chosen_features) + + learnedHeuristics = OrderedDict( + "epsilon_greedy" => learned_heuristic_epsilon_greedy, + "ucb" => learned_heuristic_ucb, + "softmaxTdecay" => learned_heuristic_softmaxTdecay + ) + + if isnothing(basicHeuristics) + basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic() + ) + end + variableHeuristic = SeaPearl.MinDomainVariableSelection{true}() + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=1, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + eval_strategy=SeaPearl.ILDSearch(2), + out_solver=true, + verbose=true, + nbRandomHeuristics=0, + exp_name= type * "_heterogeneous_explorer_" * string(n_episodes) * "_" * string(pb_size) * "_", + eval_timeout=eval_timeout + ) + nothing +end + +############################################################################### +######### Experiment Type 6 +######### +######### +############################################################################### +""" +Compares different CPNNs for the heterogeneous representation. +""" +function experiment_nn_heterogeneous( + size, + nb_steps_per_episode, + n_episodes, + n_instances; + feature_size, + output_size, + n_eval=10, + generator, + type="", + eval_timeout=nothing, + chosen_features=nothing, + basicHeuristics, + reward=SeaPearl.GeneralReward, + n_layers_graph=3, + c=2.0, + trajectory_capacity=5000, + decay_steps = 1000, + update_horizon = 10, + pool=SeaPearl.sumPooling(), + restartPerInstances=1 +) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + if isnothing(chosen_features) + chosen_features = DEFAULT_CHOSEN_FEATURES + end + + agent_fullfeaturedcpnn = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=16, + update_horizon=Int(round(nb_steps_per_episode/2)), + min_replay_history=256, + update_freq=2, + target_update_freq=7 * nb_steps_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2, + pool=pool + ) + ) + learned_heuristic_fullfeaturedcpnn = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous, reward, SeaPearl.FixedOutput}(agent_fullfeaturedcpnn; chosen_features=chosen_features) + + agent_cpnn = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=16, + update_horizon=update_horizon, + min_replay_history=256, + update_freq=2, + target_update_freq=7 * nb_steps_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_cpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=output_size, + n_layers_graph=n_layers_graph, + n_layers_output=4 + ) + ) + learned_heuristic_cpnn = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_cpnn; chosen_features=chosen_features) + + + learnedHeuristics = OrderedDict( + "fullfeaturedcpnn" => learned_heuristic_fullfeaturedcpnn, + "cpnn" => learned_heuristic_cpnn + ) + + if isnothing(basicHeuristics) + basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic() + ) + end + variableHeuristic = SeaPearl.MinDomainVariableSelection{true}() + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=restartPerInstances, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=true, + nbRandomHeuristics=1, + eval_strategy=SeaPearl.ILDSearch(2), + exp_name= type * "_heterogeneous_cpnn_"*string(size)*"_"* string(n_episodes)*"_"*string(pool)*"_", + eval_timeout=eval_timeout, + ) + nothing +end + +function experiment_nn_heterogeneousv4( + size, + nb_steps_per_episode, + n_episodes, + n_instances; + feature_size, + output_size, + n_eval=10, + generator, + type="", + eval_timeout=nothing, + chosen_features=nothing, + basicHeuristics, + reward=SeaPearl.GeneralReward, + n_layers_graph=3, + c=2.0, + trajectory_capacity=5000, + decay_steps = 1000, + update_horizon = 10, + pool=SeaPearl.sumPooling(), + restartPerInstances=1 +) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + if isnothing(chosen_features) + chosen_features = DEFAULT_CHOSEN_FEATURES + end + + agent_fullfeaturedcpnn = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=16, + update_horizon=Int(round(nb_steps_per_episode/2)), + min_replay_history=256, + update_freq=2, + target_update_freq=7 * nb_steps_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2, + pool=pool + ) + ) + learned_heuristic_fullfeaturedcpnn = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous, reward, SeaPearl.FixedOutput}(agent_fullfeaturedcpnn; chosen_features=chosen_features) + + agent_ffcpnnv4 = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=8, + update_horizon=update_horizon, + min_replay_history=256, + update_freq=2, + target_update_freq=7 * nb_steps_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_ffcpnnv4( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_output=4 + ) + ) + learned_heuristic_ffcpnnv4 = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_ffcpnnv4; chosen_features=chosen_features) + + #= + agent_variableoutputcpnn = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=16, + update_horizon=update_horizon, + min_replay_history=256, + update_freq=1, + target_update_freq = 7 * nb_steps_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_variableoutputcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2 + ) + ) + learned_heuristic_variableoutputcpnn = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_variableoutputcpnn; chosen_features=chosen_features) + =# + + learnedHeuristics = OrderedDict( + "fullfeaturedcpnn" => learned_heuristic_fullfeaturedcpnn, + #"ffcpnnv3" => learned_heuristic_ffcpnnv3 + "new_cpnn" => learned_heuristic_ffcpnnv4 + ) + + if isnothing(basicHeuristics) + basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic() + ) + end + variableHeuristic = SeaPearl.MinDomainVariableSelection{true}() + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=restartPerInstances, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=true, + nbRandomHeuristics=1, + eval_strategy=SeaPearl.ILDSearch(2), + exp_name= type * "_heterogeneous_cpnn_" * string(n_episodes), + eval_timeout=eval_timeout + ) + nothing +end + +function experiment_nn_heterogeneous_softmax_explorer( + size, + nb_steps_per_episode, + n_episodes, + n_instances; + feature_size, + output_size, + n_eval=10, + generator, + type="", + eval_timeout=nothing, + chosen_features=nothing, + basicHeuristics, + reward=SeaPearl.GeneralReward, + n_layers_graph=3, + c=2.0, + trajectory_capacity=5000, + decay_steps = 1000, + update_horizon = 10, + pool=SeaPearl.sumPooling(), + restartPerInstances=1 +) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + if isnothing(chosen_features) + chosen_features = DEFAULT_CHOSEN_FEATURES + end + + agent_fullfeaturedcpnn = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_softmax_explorer(5.0, 0.1, decay_steps), + batch_size=16, + update_horizon=Int(round(nb_steps_per_episode/2)), + min_replay_history=256, + update_freq=2, + target_update_freq=7 * nb_steps_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2, + pool=pool + ) + ) + learned_heuristic_fullfeaturedcpnn = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous, reward, SeaPearl.FixedOutput}(agent_fullfeaturedcpnn; chosen_features=chosen_features) + + agent_ffcpnnv3 = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=16, + update_horizon=update_horizon, + min_replay_history=256, + update_freq=2, + target_update_freq=7 * nb_steps_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_ffcpnnv3( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_output=4 + ) + ) + learned_heuristic_ffcpnnv3 = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_ffcpnnv3; chosen_features=chosen_features) + + + agent_variableoutputcpnn = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=16, + update_horizon=update_horizon, + min_replay_history=256, + update_freq=1, + target_update_freq = 7 * nb_steps_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_variableoutputcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2 + ) + ) + learned_heuristic_variableoutputcpnn = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_variableoutputcpnn; chosen_features=chosen_features) + + + learnedHeuristics = OrderedDict( + "fullfeaturedcpnn" => learned_heuristic_fullfeaturedcpnn, + "ffcpnnv3," => learned_heuristic_ffcpnnv3 + ) + + if isnothing(basicHeuristics) + basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic() + ) + end + variableHeuristic = SeaPearl.MinDomainVariableSelection{true}() + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=restartPerInstances, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=true, + nbRandomHeuristics=1, + eval_strategy=SeaPearl.ILDSearch(2), + exp_name= type * "_heterogeneous_cpnn_" * string(n_episodes), + eval_timeout=eval_timeout + ) + nothing +end + +############################################################################### +######### Experiment Type 7 +######### +######### +############################################################################### +""" +Compares different pooling methods in the CPNN for the heterogeneous representation. +""" +function experiment_pooling_heterogeneous( + size, + n_episodes, + n_instances; + feature_size, + output_size, + n_eval=10, + generator, + type="", + eval_timeout=nothing, + chosen_features=nothing, + basicHeuristics, + reward=SeaPearl.GeneralReward, + n_layers_graph=3, + decay_steps=2000, + c=2.0, + trajectory_capacity=2000 +) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + if isnothing(chosen_features) + chosen_features = DEFAULT_CHOSEN_FEATURES + end + + agent_sum = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=2000, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=16, + update_horizon=8, + min_replay_history=256, + update_freq=1, + target_update_freq=8, + get_heterogeneous_nn = () -> get_heterogeneous_cpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=output_size, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2; + pool=SeaPearl.sumPooling() + ) + ) + learned_heuristic_sum = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_sum; chosen_features=chosen_features) + + agent_mean = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=16, + update_horizon=8, + min_replay_history=256, + update_freq=1, + target_update_freq=8, + get_heterogeneous_nn = () -> get_heterogeneous_cpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=output_size, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2; + pool=SeaPearl.meanPooling() + ) + ) + learned_heuristic_mean = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_mean; chosen_features=chosen_features) + + learnedHeuristics = OrderedDict( + "sum" => learned_heuristic_sum, + "mean" => learned_heuristic_mean, + ) + + if isnothing(basicHeuristics) + basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic() + ) + end + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=1, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=true, + nbRandomHeuristics=0, + exp_name= type * "_heterogeneous_pooling_" * string(n_episodes) * "_" * string(size) * "_", + eval_timeout=eval_timeout + ) + nothing +end + +############################################################################### +######### Experiment Type 8 +######### +######### +############################################################################### + +""" +Compares different choices of features on HeterogeneousCPNN versus default_default +""" +function experiment_chosen_features_hetcpnn( + size, + n_step_per_episode, + n_episodes, + n_instances, + restartPerInstances; + output_size = size, + update_horizon = Int(round(n_step_per_episode//2)), + n_layers_graph=3, + n_eval=10, + generator, + chosen_features_list, + type="", + eval_timeout=60, + reward=SeaPearl.GeneralReward, + trajectory_capacity=nothing, + basicHeuristics = nothing +) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + SR_default = SeaPearl.DefaultStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.DefaultTrajectoryState} + trajectory_capacity = 500*n_step_per_episode + learnedHeuristics = OrderedDict{String,SeaPearl.LearnedHeuristic}() + for i in 1:length(chosen_features_list) + chosen_features = chosen_features_list[i][1] + feature_size = chosen_features_list[i][2] + agent_hetcpnn = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(2000, 0.01), + batch_size=32, + update_horizon=update_horizon, + min_replay_history=Int(round(32*n_step_per_episode//2)), + update_freq=1, + target_update_freq=7*n_step_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_cpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=output_size, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2 + ) + ) + learned_heuristic_hetcpnn = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_hetcpnn; chosen_features=chosen_features) + + learnedHeuristics["hetcpnn_" *string(i)] = learned_heuristic_hetcpnn + end + agent_default = get_default_agent(; + get_default_trajectory = () -> get_default_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(n_step_per_episode*300, 0.01), + batch_size=32, + update_horizon=Int(round(n_step_per_episode//2)), + min_replay_history=Int(round(32*n_step_per_episode//2)), + update_freq=1, + target_update_freq=7*n_step_per_episode, + get_default_nn = () -> get_default_cpnn( + feature_size=3, + conv_size=8, + dense_size=16, + output_size=output_size, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2, + init = init + ) + ) + learned_heuristic_default = SeaPearl.SimpleLearnedHeuristic{SR_default,reward,SeaPearl.FixedOutput}(agent_default) + learnedHeuristics["default"] = learned_heuristic_default + + # ------------------- + # Variable Heuristic definition + # ------------------- + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + + if isnothing(basicHeuristics) + basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic() + ) + end + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=restartPerInstances, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=true, + nbRandomHeuristics=0, + exp_name= type * "_heterogeneous_cpnn_chosen_features_" * string(n_episodes) * "_" * string(size) * "_", + eval_timeout=eval_timeout, + seedTraining = 33 + ) + nothing +end + +function experiment_chosen_features_hetffcpnn( + size, + n_step_per_episode, + n_episodes, + n_instances, + restartPerInstances; + output_size = size, + n_layers_graph=3, + n_eval=10, + generator, + chosen_features_list, + update_horizon = Int(round(n_step_per_episode//2)), + type="", + eval_timeout=60, + reward=SeaPearl.GeneralReward, + trajectory_capacity=nothing, + basicHeuristics = nothing +) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + SR_default = SeaPearl.DefaultStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.DefaultTrajectoryState} + trajectory_capacity = 700*n_step_per_episode + learnedHeuristics = OrderedDict{String,SeaPearl.LearnedHeuristic}() + for i in 1:length(chosen_features_list) + chosen_features = chosen_features_list[i][1] + feature_size = chosen_features_list[i][2] + agent_hetcpnn = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(400*n_step_per_episode, 0.01), + batch_size=16, + update_horizon=update_horizon, + min_replay_history=16*update_horizon, + update_freq=2, + target_update_freq=7*n_step_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2 + ) + ) + learned_heuristic_hetffcpnn = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_hetcpnn; chosen_features=chosen_features) + learnedHeuristics["hetffcpnn_" *string(i)] = learned_heuristic_hetffcpnn + end + agent_default = get_default_agent(; + get_default_trajectory = () -> get_default_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(n_step_per_episode*400, 0.01), + batch_size=16, + update_horizon=update_horizon, + min_replay_history=update_horizon*16, + update_freq=4, + target_update_freq=7*n_step_per_episode, + get_default_nn = () -> get_default_cpnn( + feature_size=3, + conv_size=8, + dense_size=16, + output_size=output_size, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2, + init = init + ) + ) + learned_heuristic_default = SeaPearl.SimpleLearnedHeuristic{SR_default,reward,SeaPearl.FixedOutput}(agent_default) + learnedHeuristics["default"] = learned_heuristic_default + + # ------------------- + # Variable Heuristic definition + # ------------------- + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + + if isnothing(basicHeuristics) + basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic() + ) + end + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=restartPerInstances, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=true, + nbRandomHeuristics=0, + exp_name= type * "_heterogeneous_ffcpnn_chosen_features" * string(n_episodes) * "_" * string(size) * "_", + eval_timeout=eval_timeout + ) + nothing +end + +############################################################################### +######### Experiment Type 9 +######### +######### Transfer Learning +############################################################################### +""" +Tests the impact of transfer learning +""" +function experiment_transfer_heterogeneous( + size, + size_transfered, + n_episodes, + n_episodes_transfered, + n_instances; + feature_size, + output_size, + output_size_transfered, + n_eval=10, + n_eval_transfered=10, + generator, + generator_transfered, + type="", + expParameters=Dict{String,Any}()::Dict{String,Any}, + eval_timeout=nothing, + chosen_features=nothing, + basicHeuristics=nothing, + reward=SeaPearl.GeneralReward, + n_layers_graph=3, + decay_steps=2000, + trajectory_capacity=2000, + update_horizon=8, + min_replay_history=128, + verbose=true, + eval_strategy=SeaPearl.DFSearch(), +) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + if isnothing(chosen_features) + chosen_features = DEFAULT_CHOSEN_FEATURES + end + + agent = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + update_horizon=update_horizon, + min_replay_history=min_replay_history, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + n_layers_graph=n_layers_graph + ) + ) + learned_heuristic = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent; chosen_features=chosen_features) + + learnedHeuristics = OrderedDict( + "learned" => learned_heuristic, + ) + + if isnothing(basicHeuristics) + basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic() + ) + end + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + generator=generator, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + verbose=verbose, + exp_name= type * "_transfer_" * string(n_episodes) * "_" * string(size) * "_", + eval_timeout=eval_timeout, + eval_strategy=eval_strategy + ) + + agent_transfer = RL.Agent( + policy= RL.QBasedPolicy( + learner=deepcopy(agent.policy.learner), + explorer= get_epsilon_greedy_explorer(decay_steps, 0.01), + ), + trajectory=get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size_transfered) + ) + agent_transfer.policy.learner.approximator.optimizer.eta = 0.0001 + agent_transfer.policy.learner.target_approximator.optimizer.eta = 0.0001 + learned_heuristic_transfer = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,SeaPearl.GeneralReward,SeaPearl.FixedOutput}(agent_transfer; chosen_features=chosen_features) + + agent = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size_transfered), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + update_horizon=update_horizon, + min_replay_history=min_replay_history, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + n_layers_graph=n_layers_graph + ) + ) + learned_heuristic = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent; chosen_features=chosen_features) + + learnedHeuristics = OrderedDict( + # "new" => learned_heuristic, + "transfer" => learned_heuristic_transfer, + ) + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes_transfered, + evalFreq=Int(floor(n_episodes_transfered / n_eval_transfered)), + nbInstances=n_instances, + generator=generator_transfered, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + verbose=verbose, + exp_name=type * "_transfered_" * string(n_episodes_transfered) * "_" * string(size_transfered) * "_", + eval_timeout=eval_timeout, + eval_strategy=eval_strategy + ) + nothing +end + +############################################################################### +######### Experiment Type 10 +######### +######### Restart +############################################################################### +""" +Compares different values of argument `restartPerInstances`` +""" +function experiment_restart_heterogeneous( + size, + n_episodes, + n_instances; + restart_list = [1, 5, 10, 20], + output_size = size, + n_eval=10, + generator, + type="", + eval_timeout=nothing, + verbose = false, + reward=SeaPearl.GeneralReward, + n_layers_graph=3, + trajectory_capacity=2000, + decay_steps = 2000, + update_horizon = 8, + min_replay_history = 128, + feature_size, + chosen_features=nothing, + basicHeuristics=nothing +) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + if isnothing(chosen_features) + chosen_features = DEFAULT_CHOSEN_FEATURES + end + + if isnothing(basicHeuristics) + basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic() + ) + end + + for i in 1:length(restart_list) + n_restart = restart_list[i] + + learnedHeuristics = OrderedDict{String,SeaPearl.LearnedHeuristic}() + agent = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + update_horizon=update_horizon, + min_replay_history=min_replay_history, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + n_layers_graph=n_layers_graph + ) + ) + learned_heuristic = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent; chosen_features=chosen_features) + learnedHeuristics["heterogeneous_" *string(i)] = learned_heuristic + + trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=n_restart, + generator=generator, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + verbose=verbose, + exp_name= type * "_restart_" * string(n_restart) * "_" * string(n_episodes) * "_" * string(size) * "_", + eval_timeout=eval_timeout + ) + end + nothing +end + + +############################################################################### +######### Experiment Type 11 +######### +######### +############################################################################### +""" +Compares different activation functions on the dense network for the heterogeneous representation. +""" +function experiment_activation_heterogeneous( + size, + n_episodes, + n_instances; + feature_size, + output_size, + n_eval=10, + generator, + type="", + eval_timeout=nothing, + chosen_features=nothing, + basicHeuristics, + reward=SeaPearl.GeneralReward, + n_layers_graph=3, + decay_steps=n_episodes*size*0.8, + c=2.0, + trajectory_capacity=5000, + pool=SeaPearl.sumPooling() +) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + if isnothing(chosen_features) + chosen_features = DEFAULT_CHOSEN_FEATURES + end + + agent_fullfeaturedcpnn = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=32, + update_horizon=10, + min_replay_history=256, + update_freq=1, + target_update_freq=80, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=16, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2, + pool=pool, + σ=NNlib.relu + ) + ) + learned_heuristic_fullfeaturedcpnn = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous, reward, SeaPearl.FixedOutput}(agent_fullfeaturedcpnn; chosen_features=chosen_features) + + agent_ffcpnnv3_relu = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=32, + update_horizon=10, + min_replay_history=256, + update_freq=1, + target_update_freq=80, + get_heterogeneous_nn = () -> get_heterogeneous_ffcpnnv3( + feature_size=feature_size, + conv_size=16, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_output=2, + σ=NNlib.relu + ) + ) + learned_heuristic_ffcpnnv3_relu = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_ffcpnnv3_relu; chosen_features=chosen_features) + + agent_ffcpnnv3_sigmoid = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=32, + update_horizon=10, + min_replay_history=256, + update_freq=1, + target_update_freq=80, + get_heterogeneous_nn = () -> get_heterogeneous_ffcpnnv3( + feature_size=feature_size, + conv_size=16, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_output=2, + σ=NNlib.sigmoid + ) + ) + + learned_heuristic_ffcpnnv3_sigmoid = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_ffcpnnv3_sigmoid; chosen_features=chosen_features) + + agent_ffcpnnv3_leakyrelu = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=32, + update_horizon=10, + min_replay_history=256, + update_freq=1, + target_update_freq=80, + get_heterogeneous_nn = () -> get_heterogeneous_ffcpnnv3( + feature_size=feature_size, + conv_size=16, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_output=2, + σ=NNlib.leakyrelu + ) + ) + learned_heuristic_ffcpnnv3_leakyrelu = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_ffcpnnv3_leakyrelu; chosen_features=chosen_features) + + agent_ffcpnnv3_id = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=32, + update_horizon=10, + min_replay_history=256, + update_freq=1, + target_update_freq=80, + get_heterogeneous_nn = () -> get_heterogeneous_ffcpnnv3( + feature_size=feature_size, + conv_size=16, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_output=2, + σ=identity + ) + ) + learned_heuristic_ffcpnnv3_id = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_ffcpnnv3_id; chosen_features=chosen_features) + + agent_ffcpnnv4_leakyrelu = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=32, + update_horizon=10, + min_replay_history=256, + update_freq=1, + target_update_freq=80, + get_heterogeneous_nn = () -> get_heterogeneous_ffcpnnv4( + feature_size=feature_size, + conv_size=16, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_output=2, + σ=NNlib.leakyrelu + ) + ) + learned_heuristic_ffcpnnv4_leakyrelu = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_ffcpnnv4_leakyrelu; chosen_features=chosen_features) + + learnedHeuristics = OrderedDict( + #"cpnn" => learned_heuristic_cpnn, + "fullfeaturedcpnn_relu" => learned_heuristic_fullfeaturedcpnn, + # "variableoutputcpnn" => learned_heuristic_variableoutputcpnn, + # "ffcpnnv2" => learned_heuristic_ffcpnnv2, + "ffcpnnv3_relu" => learned_heuristic_ffcpnnv3_relu, + #"ffcpnnv3_sigmoid" => learned_heuristic_ffcpnnv3_sigmoid, + "ffcpnnv3_leakyrelu" => learned_heuristic_ffcpnnv3_leakyrelu, + "ffcpnnv4_leakyrelu" => learned_heuristic_ffcpnnv4_leakyrelu, + #"ffcpnnv3_id" => learned_heuristic_ffcpnnv3_id, + + + ) + + if isnothing(basicHeuristics) + basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic() + ) + end + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=1, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=false, + nbRandomHeuristics=0, + exp_name= type * "_heterogeneous_cpnn_" * string(n_episodes) * "_" * string(size) * "_", + eval_timeout=eval_timeout + ) + nothing +end + +############################################################################### +######### Experiment Type 12 +######### +######### +############################################################################### +""" +Compare different pooling functions for the graph features in the different versions of FFCPNN. +""" +function experiment_features_pooling_heterogeneous( + size, + n_episodes, + n_instances; + feature_size, + output_size, + n_eval=10, + generator, + type="", + eval_timeout=nothing, + chosen_features=nothing, + basicHeuristics, + reward=SeaPearl.GeneralReward, + n_layers_graph=3, + decay_steps=n_episodes*size*0.8, + c=2.0, + trajectory_capacity=5000, + pool=SeaPearl.sumPooling() +) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + if isnothing(chosen_features) + chosen_features = DEFAULT_CHOSEN_FEATURES + end + + agent_fullfeaturedcpnn = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=32, + update_horizon=10, + min_replay_history=256, + update_freq=1, + target_update_freq=80, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=16, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2, + pool=pool, + σ=NNlib.leakyrelu + ) + ) + learned_heuristic_fullfeaturedcpnn = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous, reward, SeaPearl.FixedOutput}(agent_fullfeaturedcpnn; chosen_features=chosen_features) + + agent_ffcpnnv3_max = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=32, + update_horizon=10, + min_replay_history=256, + update_freq=1, + target_update_freq=80, + get_heterogeneous_nn = () -> get_heterogeneous_ffcpnnv3( + feature_size=feature_size, + conv_size=16, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_output=2, + σ=NNlib.leakyrelu, + pooling="max" + ) + ) + learned_heuristic_ffcpnnv3_max = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_ffcpnnv3_max; chosen_features=chosen_features) + + agent_ffcpnnv3_mean = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=32, + update_horizon=10, + min_replay_history=256, + update_freq=1, + target_update_freq=80, + get_heterogeneous_nn = () -> get_heterogeneous_ffcpnnv3( + feature_size=feature_size, + conv_size=16, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_output=2, + σ=NNlib.leakyrelu, + pooling="mean" + ) + ) + + learned_heuristic_ffcpnnv3_mean = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_ffcpnnv3_mean; chosen_features=chosen_features) + + agent_ffcpnnv3_sum = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=32, + update_horizon=10, + min_replay_history=256, + update_freq=1, + target_update_freq=80, + get_heterogeneous_nn = () -> get_heterogeneous_ffcpnnv3( + feature_size=feature_size, + conv_size=16, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_output=2, + σ=NNlib.leakyrelu, + pooling="sum" + ) + ) + learned_heuristic_ffcpnnv3_sum = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_ffcpnnv3_sum; chosen_features=chosen_features) + + + learnedHeuristics = OrderedDict( + #"cpnn" => learned_heuristic_cpnn, + "fullfeaturedcpnnu" => learned_heuristic_fullfeaturedcpnn, + # "variableoutputcpnn" => learned_heuristic_variableoutputcpnn, + # "ffcpnnv2" => learned_heuristic_ffcpnnv2, + "ffcpnnv3_max" => learned_heuristic_ffcpnnv3_max, + #"ffcpnnv3_sigmoid" => learned_heuristic_ffcpnnv3_sigmoid, + "ffcpnnv3_mean" => learned_heuristic_ffcpnnv3_mean, + "ffcpnnv3_sum" => learned_heuristic_ffcpnnv3_sum, + #"ffcpnnv3_id" => learned_heuristic_ffcpnnv3_id, + + + ) + + if isnothing(basicHeuristics) + basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic() + ) + end + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=1, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=false, + nbRandomHeuristics=0, + exp_name= type * "_heterogeneous_cpnn_" * string(n_episodes) * "_" * string(size) * "_", + eval_timeout=eval_timeout + ) + nothing +end + +############################################################################### +######### Experiment Type 11 +######### +######### +############################################################################### +""" +Compares HGT and HeterogeneousGraphConv. +""" + +function experiment_hgt_vs_graphconv( + size, + n_step_per_episode, + n_episodes, + n_instances, + restartPerInstances; + output_size = size, + n_layers_graph=3, + n_eval=10, + generator, + update_horizon = Int(round(n_step_per_episode//2)), + chosen_features, + type="", + eval_timeout=60, + reward=SeaPearl.GeneralReward, + trajectory_capacity=nothing, + basicHeuristics = nothing +) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + trajectory_capacity = 500*n_step_per_episode + learnedHeuristics = OrderedDict{String,SeaPearl.LearnedHeuristic}() + agent_hetgc = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(250*n_step_per_episode, 0.01), + batch_size=16, + update_horizon=update_horizon, + min_replay_history=Int(round(32*n_step_per_episode//2)), + update_freq=1, + target_update_freq=7*n_step_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=chosen_features[2], + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2 + ) + ) + learned_heuristic_hetgc = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_hetgc; chosen_features=chosen_features[1]) + learnedHeuristics["hetgc"] = learned_heuristic_hetgc + agent_hgt = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(250*n_step_per_episode, 0.01), + batch_size=16, + update_horizon=update_horizon, + min_replay_history=Int(round(32*n_step_per_episode//2)), + update_freq=1, + target_update_freq=7*n_step_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=chosen_features[2], + conv_type="hgt", + conv_size=8, + heads=2, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2 + ) + ) + learned_heuristic_hgt = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_hgt, chosen_features=chosen_features[1]) + learnedHeuristics["hgt"] = learned_heuristic_hgt + + # ------------------- + # Variable Heuristic definition + # ------------------- + variableHeuristic = SeaPearl.MinDomainVariableSelection{true}() + if isnothing(basicHeuristics) + basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic() + ) + end + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + eval_strategy=SeaPearl.ILDSearch(2), + restartPerInstances=restartPerInstances, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=true, + nbRandomHeuristics=0, + exp_name= type * "_hgt_vs_graphconv", + eval_timeout=eval_timeout + ) + nothing +end + +############################################################################### +######### Experiment Type MALIK +######### +######### +############################################################################### +""" +Compares different RL Agents for the heterogeneous representation. +""" + function experiment_rl_heterogeneous( + size, + n_episodes, + n_instances; + eval_strategy = SeaPearl.DFSearch(), + feature_size, + output_size, + n_eval=10, + generator, + type="", + eval_timeout=nothing, + chosen_features=nothing, + basicHeuristics, + reward=SeaPearl.GeneralReward, + n_layers_graph=3, + decay_steps=Int(round(250*size*0.75)), + trajectory_capacity=Int(round(1000*size*0.75)), + pool=SeaPearl.sumPooling() + ) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + agent_ffcpnn_dqn = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=16, + update_horizon=Int(round(size*0.75)), + min_replay_history=Int(round(16*size*0.75)), + update_freq=1, + target_update_freq=Int(round(8*size*0.75)), + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2, + pool=pool + ) + ) + learned_heuristic_ffcpnn_dqn = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous, reward, SeaPearl.FixedOutput}(agent_ffcpnn_dqn; chosen_features=chosen_features) + + # agent_ffcpnn_priodqn = get_heterogeneous_agent_priodqn(; + # get_heterogeneous_prioritized_trajectory = () -> get_heterogeneous_prioritized_trajectory(capacity=trajectory_capacity, n_actions=output_size), + # get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + # batch_size=16, + # update_horizon=Int(round(size*0.75)), + # min_replay_history=Int(round(16*size*0.75)), + # update_freq=1, + # target_update_freq=Int(round(8*size*0.75)), + # get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + # feature_size=feature_size, + # conv_size=8, + # dense_size=16, + # output_size=1, + # n_layers_graph=n_layers_graph, + # n_layers_node=2, + # n_layers_output=2, + # pool=pool + # ) + # ) + # learned_heuristic_ffcpnn_priodqn = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous, reward, SeaPearl.FixedOutput}(agent_ffcpnn_priodqn; chosen_features=chosen_features) + + agent_ffcpnn_ppo = get_heterogeneous_agent_ppo(; + get_heterogeneous_ppo_trajectory = () -> get_heterogeneous_ppo_trajectory(capacity=trajectory_capacity, n_actions=output_size), + n_epochs=4, + n_microbatches=4, + critic_loss_weight = 1.0f0, + entropy_loss_weight = 0.0f0, + update_freq=128, + + get_heterogeneous_nn_actor = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=1, + n_layers_node=2, + n_layers_output=2, + pool=pool + ), + + get_heterogeneous_nn_critic = () -> get_heterogeneous_cpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=1, + n_layers_node=2, + n_layers_output=2, + pool=pool + ) + ) + learned_heuristic_ffcpnn_ppo = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_ffcpnn_ppo; chosen_features=chosen_features) + + # agent_ffcpnn_ppo2 = get_heterogeneous_agent_ppo(; + # get_heterogeneous_ppo_trajectory = () -> get_heterogeneous_ppo_trajectory(capacity=trajectory_capacity, n_actions=output_size), + # n_epochs=4, + # n_microbatches=4, + # critic_loss_weight = 0.5f0, + # entropy_loss_weight = 0.0f0, + # update_freq=128, + + # get_heterogeneous_nn_actor = () -> get_heterogeneous_fullfeaturedcpnn( + # feature_size=feature_size, + # conv_size=8, + # dense_size=16, + # output_size=1, + # n_layers_graph=1, + # n_layers_node=2, + # n_layers_output=2, + # pool=pool + # ), + + # get_heterogeneous_nn_critic = () -> get_heterogeneous_cpnn( + # feature_size=feature_size, + # conv_size=8, + # dense_size=16, + # output_size=1, + # n_layers_graph=1, + # n_layers_node=2, + # n_layers_output=2, + # pool=pool + # ) + # ) + + # learned_heuristic_ffcpnn_ppo2 = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_ffcpnn_ppo2; chosen_features=chosen_features) + + learnedHeuristics = OrderedDict( + "ffcpnn_dqn"* string(pool) => learned_heuristic_ffcpnn_dqn, + # "ffcpnn_priodqn"* string(pool) => learned_heuristic_ffcpnn_priodqn, + "ffcpnn_ppo"* string(pool) => learned_heuristic_ffcpnn_ppo + # "ffcpnn_ppo_critic0.5"* string(pool) => learned_heuristic_ffcpnn_ppo2 + ) + + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + + selectMax(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.maximum(x.domain) + heuristic_max = SeaPearl.BasicHeuristic(selectMax) + + if isnothing(basicHeuristics) + basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic(), + "min" => heuristic_min, + "max" => heuristic_max + ) + end + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + eval_strategy = eval_strategy, + nbInstances=n_instances, + restartPerInstances=1, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=false, + nbRandomHeuristics=0, + exp_name= type * "_heterogeneous_cpnn_" * string(n_episodes) * "_" * string(size) * "_" * string(pool)* "_", + eval_timeout=eval_timeout + ) + nothing +end + + +############################################################################### +######### Experiment Type Update Freq +######### +############################################################################### +""" +Compares different values of argument `update_freq` +""" +function experiment_update_freq( + size, + n_episodes, + nb_steps_per_episode, + n_instances; + feature_size, + output_size, + n_eval=10, + generator, + type="", + eval_timeout=nothing, + chosen_features=nothing, + basicHeuristics, + reward=SeaPearl.GeneralReward, + n_layers_graph=3, + decay_steps=n_episodes*size*0.8, + c=2.0, + trajectory_capacity=5000, + pool=SeaPearl.sumPooling() +) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + if isnothing(chosen_features) + chosen_features = DEFAULT_CHOSEN_FEATURES + end + + agent_fullfeaturedcpnn_update_freq_1 = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=32, + update_horizon=10, + min_replay_history=256, + update_freq=1, + target_update_freq=80, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=16, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2, + pool=pool, + σ=NNlib.leakyrelu + ) + ) + learned_heuristic_fullfeaturedcpnn_update_freq_1 = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous, reward, SeaPearl.FixedOutput}(agent_fullfeaturedcpnn_update_freq_1; chosen_features=chosen_features) + + agent_fullfeaturedcpnn_update_freq_2 = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=32, + update_horizon=10, + min_replay_history=256, + update_freq=div(nb_steps_per_episode, 2), + target_update_freq=80, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=16, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2, + pool=pool, + σ=NNlib.leakyrelu + ) + ) + learned_heuristic_fullfeaturedcpnn_update_freq_2 = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous, reward, SeaPearl.FixedOutput}(agent_fullfeaturedcpnn_update_freq_2; chosen_features=chosen_features) + + agent_fullfeaturedcpnn_update_freq_3 = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + batch_size=32, + update_horizon=10, + min_replay_history=256, + update_freq=nb_steps_per_episode, + target_update_freq=80, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=16, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2, + pool=pool, + σ=NNlib.leakyrelu + ) + ) + learned_heuristic_fullfeaturedcpnn_update_freq_3 = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous, reward, SeaPearl.FixedOutput}(agent_fullfeaturedcpnn_update_freq_3; chosen_features=chosen_features) + + learnedHeuristics = OrderedDict( + "update_freq_1" => learned_heuristic_fullfeaturedcpnn_update_freq_1, + "update_freq_nb_steps_per_episode_2" => learned_heuristic_fullfeaturedcpnn_update_freq_2, + "update_freq_nb_steps_per_episode" => learned_heuristic_fullfeaturedcpnn_update_freq_3 + + ) + + if isnothing(basicHeuristics) + basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic() + ) + end + + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=1, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=false, + nbRandomHeuristics=0, + exp_name= type * "_heterogeneous_cpnn_" * string(n_episodes) * "_" * string(size) * "_", + eval_timeout=eval_timeout + ) + nothing +end + +############################################################################### +######### Comparison of tripartite graph vs specialized graph +######### +############################################################################### +""" +Compares the tripartite graph representation with a specific representation. +""" + function experiment_tripartite_vs_specific( + pb_size, + nb_steps_per_episode, + n_episodes, + n_instances, + SR_specific; + feature_size, + feature_size_specific, + output_size, + n_eval=10, + generator, + type="", + eval_timeout=nothing, + eval_strategy = SeaPearl.ILDSearch(2), + chosen_features=nothing, + basicHeuristics, + reward=SeaPearl.GeneralReward, + n_layers_graph=3, + decay_steps=Int(round(nb_steps_per_episode*1000)), + trajectory_capacity=Int(round(nb_steps_per_episode*800)), + ) + + SR_tripartite = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + agent_tripartite = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.05), + batch_size=8, + update_horizon=Int(round(nb_steps_per_episode*0.7)), + min_replay_history=Int(round(64*nb_steps_per_episode)), + update_freq=1, + target_update_freq=Int(round(7*nb_steps_per_episode)), + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2, + pool=SeaPearl.sumPooling() + ) + ) + + agent_specific = get_default_agent(; + get_default_trajectory = () -> get_default_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.05), + batch_size=8, + update_horizon=Int(round(nb_steps_per_episode*0.7)), + min_replay_history=Int(round(64*nb_steps_per_episode)), + update_freq=1, + target_update_freq=Int(round(7*nb_steps_per_episode)), + get_default_nn = () -> get_default_cpnn( + feature_size=feature_size_specific, + conv_size=8, + dense_size=16, + output_size=output_size, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2, + pool=SeaPearl.sumPooling() + ) + ) + tripartite_heuristic = SeaPearl.SimpleLearnedHeuristic{SR_tripartite, reward, SeaPearl.FixedOutput}(agent_tripartite; chosen_features=chosen_features) + specific_heuristic = SeaPearl.SimpleLearnedHeuristic{SR_specific, reward, SeaPearl.FixedOutput}(agent_specific) + + learnedHeuristics = OrderedDict( + "tripartite" => tripartite_heuristic, + "specific" => specific_heuristic + ) + + + if isnothing(basicHeuristics) + basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic() + ) + end + variableHeuristic = SeaPearl.MinDomainVariableSelection{true}() + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=1, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=false, + eval_strategy=eval_strategy, + nbRandomHeuristics=0, + exp_name= type *"_"* string(pb_size) * "_tripartite_vs_specific_" * string(n_episodes), + eval_timeout=eval_timeout + ) + nothing +end + +############################################################################### +######### GeneralReward comparison experiment (role of \gamma) +######### +############################################################################### +""" +Compares three different values of gamma in GeneralReward +""" + function experiment_general_rewards( + pb_size, + n_episodes, + n_instances, + nb_steps_per_episode; + feature_size, + output_size, + n_eval=10, + generator, + type="", + eval_timeout=60, + eval_strategy = SeaPearl.ILDSearch(2), + chosen_features, + basicHeuristics, + decay_steps=500, + trajectory_capacity=1000, + ) + decay_steps = 2000*nb_steps_per_episode + trajectory_capacity = 1000*nb_steps_per_episode + SR = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + rewards = [SeaPearl.GeneralReward2,SeaPearl.GeneralReward3] + learnedHeuristics = OrderedDict{String, SeaPearl.LearnedHeuristic}() + for i in 1:2 + agent = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.1), + batch_size=8, + update_horizon=Int(round(nb_steps_per_episode*0.5)), + min_replay_history=Int(round(16*nb_steps_per_episode)), + update_freq=2, + target_update_freq=Int(round(7*nb_steps_per_episode)), + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=4, + n_layers_node=2, + n_layers_output=2, + pool = SeaPearl.sumPooling() + ) + ) + heuristic = SeaPearl.SimpleLearnedHeuristic{SR, rewards[i], SeaPearl.FixedOutput}(agent; chosen_features=chosen_features) + learnedHeuristics[replace(string(rewards[i]), "SeaPearl." => "")] = heuristic + end + + if isnothing(basicHeuristics) + basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic() + ) + end + variableHeuristic = SeaPearl.MinDomainVariableSelection{true}() + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=1, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=false, + eval_strategy=eval_strategy, + nbRandomHeuristics=0, + exp_name= type *"_"* string(pb_size) * "_reward_comparison_" * string(n_episodes), + eval_timeout=eval_timeout + ) + nothing +end + +############################################################################### +######### Reward comparison experiment +######### +############################################################################### +""" +Compares GeneralReward and ScoreReward +""" + function experiment_general_vs_score_rewards( + pb_size, + n_episodes, + n_instances, + nb_steps_per_episode; + feature_size, + output_size, + n_eval=10, + generator, + type="", + eval_timeout=nothing, + eval_strategy = SeaPearl.ILDSearch(1), + chosen_features, + basicHeuristics, + decay_steps=500, + trajectory_capacity=1000, + ) + decay_steps = 1000*nb_steps_per_episode + trajectory_capacity = 900*nb_steps_per_episode + SR = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + learnedHeuristics = OrderedDict{String, SeaPearl.LearnedHeuristic}() + agent_generalreward = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.05), + batch_size=8, + update_horizon=Int(round(nb_steps_per_episode*0.35)), + min_replay_history=Int(round(64*nb_steps_per_episode)), + update_freq=1, + target_update_freq=Int(round(7*nb_steps_per_episode)), + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=4, + n_layers_node=2, + n_layers_output=2, + pool=SeaPearl.sumPooling() + ) + ) + heuristic_generalreward = SeaPearl.SimpleLearnedHeuristic{SR, SeaPearl.GeneralReward2, SeaPearl.FixedOutput}(agent_generalreward; chosen_features=chosen_features) + learnedHeuristics["general"] = heuristic_generalreward + + agent_scorereward = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_size), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.05), + batch_size=8, + update_horizon=Int(round(nb_steps_per_episode)), + min_replay_history=Int(round(64*nb_steps_per_episode)), + update_freq=1, + target_update_freq=Int(round(7*nb_steps_per_episode)), + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=4, + n_layers_node=2, + n_layers_output=2, + pool = SeaPearl.sumPooling() + ) + ) + heuristic_scorereward = SeaPearl.SimpleLearnedHeuristic{SR, SeaPearl.ScoreReward, SeaPearl.FixedOutput}(agent_scorereward; chosen_features=chosen_features) + learnedHeuristics["score"] = heuristic_scorereward + agent_scorereward.policy.learner = deepcopy(agent_generalreward.policy.learner) + + + if isnothing(basicHeuristics) + basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic() + ) + end + variableHeuristic = SeaPearl.MinDomainVariableSelection{true}() + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=1, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=false, + eval_strategy=eval_strategy, + nbRandomHeuristics=0, + exp_name= type *"_"* string(pb_size) * "_reward_comparison_" * string(n_episodes), + eval_timeout=eval_timeout + ) + nothing +end + +############################################################################### +######### Experiment Type +######### +######### Chain Transfer Learning +############################################################################### +""" +Tests the impact of transfer learning +""" +function experiment_chain_transfer_heterogeneous( + sizes, + n_episodes, + n_instances; + feature_size, + output_sizes, + n_evals, + generators, + type="", + expParameters=Dict{String,Any}()::Dict{String,Any}, + eval_timeout=nothing, + chosen_features=nothing, + basicHeuristics=nothing, + reward=SeaPearl.GeneralReward, + n_layers_graph=3, + decay_steps=2000, + trajectory_capacity=2000, + update_horizon=8, + min_replay_history=128, + verbose=true, + eval_strategy=SeaPearl.DFSearch(), +) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + if isnothing(chosen_features) + chosen_features = DEFAULT_CHOSEN_FEATURES + end + + agent = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_sizes[1]), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01), + update_horizon=update_horizon, + min_replay_history=min_replay_history, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + n_layers_graph=n_layers_graph + ) + ) + learned_heuristic = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent; chosen_features=chosen_features) + + learnedHeuristics = OrderedDict( + "learned" => learned_heuristic, + ) + + if isnothing(basicHeuristics) + basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic() + ) + end + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes[1], + evalFreq=Int(floor(n_episodes[1] / n_evals[1])), + nbInstances=n_instances, + generator=generators[1], + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + verbose=verbose, + exp_name= type * "_transfer_" * string(n_episodes[1]) * "_" * string(sizes[1]) * "_", + eval_timeout=eval_timeout, + eval_strategy=eval_strategy, + eval_generator = last(generators) + ) + + for i in 2:length(sizes) + agent_transfer = RL.Agent( + policy= RL.QBasedPolicy( + learner=deepcopy(agent.policy.learner), + explorer= get_epsilon_greedy_explorer(decay_steps, 0.01), + ), + trajectory=get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=output_sizes[i]) + ) + agent_transfer.policy.learner.approximator.optimizer.eta = 0.0001 + agent_transfer.policy.learner.target_approximator.optimizer.eta = 0.0001 + learned_heuristic_transfer = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,SeaPearl.GeneralReward,SeaPearl.FixedOutput}(agent_transfer; chosen_features=chosen_features) + + learnedHeuristics = OrderedDict( + "transfer" => learned_heuristic_transfer, + ) + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes[i], + evalFreq=Int(floor(n_episodes[i] / n_evals[i])), + nbInstances=n_instances, + generator=generators[i], + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + verbose=verbose, + exp_name=type * "_transfered(" * string(i) * ")_" * string(n_episodes[i]) * "_" * string(sizes[i]) * "_", + eval_timeout=eval_timeout, + eval_strategy=eval_strategy, + eval_generator = last(generators) + ) + end + nothing +end + diff --git a/learning_cp/comparison/graphcoloring.jl b/learning_cp/comparison/graphcoloring.jl new file mode 100644 index 0000000..25251f1 --- /dev/null +++ b/learning_cp/comparison/graphcoloring.jl @@ -0,0 +1,1118 @@ +include("../common/experiment.jl") +include("../common/utils.jl") +include("comparison.jl") + +using CUDA +############################################################################### +######### simple GC experiment +######### +######### +############################################################################### +############################################################################### +######### simple GC experiment +######### +######### +############################################################################### +function simple_graph_coloring_experiment(n_nodes, n_nodes_eval, n_min_color, density, n_episodes, n_instances; n_layers_graph=3, n_eval=20, reward = SeaPearl.GeneralReward, c=2.0, trajectory_capacity=10000, pool = SeaPearl.sumPooling(), nbRandomHeuristics = 1, eval_timeout = nothing, restartPerInstances = 10, seedEval = nothing) + + n_step_per_episode = n_nodes + update_horizon = Int(round(n_step_per_episode//2)) + + coloring_generator = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes, n_min_color, density) + eval_coloring_generator = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes_eval, n_min_color, density) + + evalFreq=Int(floor(n_episodes / n_eval)) + + + chosen_features = Dict( + "node_number_of_neighbors" => true, + "constraint_type" => true, + "constraint_activity" => true, + "nb_not_bounded_variable" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "variable_is_objective" => true, + "variable_assigned_value" => true, + "variable_is_bound" => true, + "values_raw" => true) + + feature_size = [6, 6, 2] + + decay_steps = Int(floor(n_episodes*restartPerInstances*(n_nodes+1)*0.5)) + + rngExp = MersenneTwister(seedEval) + init = Flux.glorot_uniform(MersenneTwister(seedEval)) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + # Basic value-selection heuristic + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + rngExp = MersenneTwister(seedEval) + init = Flux.glorot_uniform(MersenneTwister(seedEval)) + + agent_ffcpnn = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=n_nodes), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.05; rng = rngExp ), + batch_size=32, + update_horizon=update_horizon, + min_replay_history=Int(round(16*n_step_per_episode//2)), + update_freq=1, + target_update_freq=7*n_step_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=3, + n_layers_node=2, + n_layers_output=2, + pool=pool, + σ=NNlib.leakyrelu, + init = init + ), + γ = 0.99f0 + ) + + agent_cpnn = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=n_nodes), + get_explorer = () -> get_epsilon_greedy_explorer(decay_steps, 0.01; rng = rngExp ), + batch_size=16, + update_horizon=update_horizon, + min_replay_history=Int(round(16*n_step_per_episode//2)), + update_freq=1, + target_update_freq=8*n_nodes, + get_heterogeneous_nn = () -> get_heterogeneous_cpnn( + feature_size=feature_size, + conv_size=8, + dense_size=8, + output_size=n_nodes, + n_layers_graph=n_layers_graph, + n_layers_output=2, + pool=pool, + σ=NNlib.leakyrelu, + init = init, + #device =gpu + ), + γ = 0.99f0 + ) + + learned_heuristic_ffcpnn = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_ffcpnn, chosen_features=chosen_features) + #learned_heuristic_control = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_control; chosen_features=chosen_features) + #learned_heuristic_cpnn = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_cpnn; chosen_features=chosen_features) + + learnedHeuristics = OrderedDict{String,SeaPearl.LearnedHeuristic}() + + learnedHeuristics["ffcpnn"] = learned_heuristic_ffcpnn + #learnedHeuristics["control"] = learned_heuristic_control + #learnedHeuristics["cpnn"] = learned_heuristic_cpnn + + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=evalFreq, + nbInstances=n_instances, + restartPerInstances=restartPerInstances, + generator=coloring_generator, +# eval_strategy=SeaPearl.ILDSearch(2), + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=true, + nbRandomHeuristics=nbRandomHeuristics, + exp_name="graph_coloring_benchmark" * string(n_nodes) *"_"*string(n_nodes_eval) * "_" * string(n_episodes) * "_", + eval_timeout=eval_timeout, + eval_generator=eval_coloring_generator, + training_timeout = 1800, + #eval_every = 5, + ) + +end +############################################################################### +######### Experiment Type 1 +######### +######### +############################################################################### +""" +Compare three agents: + - an agent with the default representation and default features; + - an agent with the default representation and chosen features; + - an agent with the heterogeneous representation and chosen features. +""" +function experiment_representation_graphcoloring(n_nodes, n_min_color, density, n_episodes, n_instances; n_layers_graph=3, n_eval=10, reward=SeaPearl.GeneralReward) + coloring_generator = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes, n_min_color, density) + + + experiment_representation(n_nodes, n_episodes, n_instances; + chosen_features=nothing, + feature_sizes = [3, 9, [2, 3, 1]], + output_size = n_nodes, + generator = coloring_generator, + basicHeuristics=nothing, + n_layers_graph=n_layers_graph, + n_eval=n_eval, + reward=reward, + type="graphcoloring", + ) +end + +############################################################################### +######### Experiment Type 2 +######### +######### +############################################################################### +""" +Compares the impact of the number of convolution layers for the heterogeneous representation. +""" +function experiment_heterogeneous_n_conv(n_nodes, n_min_color, density, n_episodes, n_instances; n_eval=10) + coloring_generator = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes, n_min_color, density) + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + chosen_features = Dict( + "constraint_type" => true, + "variable_initial_domain_size" => true, + "values_onehot" => true, + ) + + experiment_n_conv(n_nodes, n_episodes, n_instances; + n_eval=n_eval, + generator=coloring_generator, + SR=SR_heterogeneous, + chosen_features=chosen_features, + feature_size=[1, 2, n_nodes], + type="heterogeneous", + output_size = n_nodes) +end + +""" +Compares the impact of the number of convolution layers for the default representation with chosen features. +""" +function experiment_default_chosen_n_conv(n_nodes, n_min_color, density, n_episodes, n_instances; n_eval=10) + coloring_generator = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes, n_min_color, density) + SR_default = SeaPearl.DefaultStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.DefaultTrajectoryState} + + chosen_features = Dict( + "constraint_type" => true, + "variable_initial_domain_size" => true, + "values_onehot" => true, + ) + + experiment_n_conv(n_nodes, n_episodes, n_instances; + n_eval=n_eval, + generator=coloring_generator, + SR=SR_default, + chosen_features=chosen_features, + feature_size=6 + n_nodes, + type="default_chosen", + output_size = n_nodes) +end + +""" +Compares the impact of the number of convolution layers for the default representation. +""" +function experiment_default_default_n_conv(n_nodes, n_min_color, density, n_episodes, n_instances; n_eval=10) + coloring_generator = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes, n_min_color, density) + SR_default = SeaPearl.DefaultStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.DefaultTrajectoryState} + + experiment_n_conv(n_nodes, n_episodes, n_instances; + n_eval=n_eval, + generator=coloring_generator, + SR=SR_default, + feature_size=3, + chosen_features=nothing, + type="default_default", + output_size = n_nodes) +end + +############################################################################### +######### Experiment Type 3 +######### +######### +############################################################################### +""" +Compares the impact of the chosen features for the heterogeneous representation. +""" +function experiment_chosen_features_heterogeneous_graphcoloring(n_nodes, n_min_color, density, n_episodes, n_instances; n_eval=10) + coloring_generator = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes, n_min_color, density) + + chosen_features_list = [ + [ + Dict( + "constraint_type" => true, + "variable_initial_domain_size" => true, + "values_onehot" => true, + ), + [1, 2, n_nodes] + ], + [ + Dict( + "constraint_type" => true, + "variable_initial_domain_size" => true, + "values_raw" => true, + ), + [1, 2, 1] + ], + [ + Dict( + "constraint_type" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "values_onehot" => true, + ), + [2, 2, n_nodes] + ], + [ + Dict( + "constraint_activity" => true, + "constraint_type" => true, + "variable_initial_domain_size" => true, + "values_onehot" => true, + ), + [1, 3, n_nodes] + ], + [ + Dict( + "constraint_activity" => true, + "constraint_type" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "values_raw" => true, + ), + [2, 3, 1] + ], + [ + Dict( + "constraint_activity" => true, + "constraint_type" => true, + "nb_not_bounded_variable" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "variable_is_bound" => true, + "values_raw" => true, + ), + [3, 4, 1] + ], + ] + + + experiment_chosen_features_heterogeneous(n_nodes, n_episodes, n_instances; + n_eval=n_eval, + generator=coloring_generator, + chosen_features_list=chosen_features_list, + type="graphcoloring", + output_size = n_nodes + ) +end + +############################################################################### +######### Experiment Type 4 +######### +######### +############################################################################### +""" +Compares the simple and the supervised learned heuristic for the heterogeneous representation. +""" +function experiment_heuristic_heterogeneous_graphcoloring(n_nodes, n_min_color, density, n_episodes, n_instances; n_layers_graph=3, n_eval=10, reward=SeaPearl.GeneralReward) + coloring_generator = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes, n_min_color, density) + + # Basic value-selection heuristic + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + experiment_heuristic_heterogeneous(n_nodes, n_episodes, n_instances; + chosen_features=nothing, + feature_size = [2, 3, 1], + output_size = n_nodes, + generator = coloring_generator, + basicHeuristics = basicHeuristics, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "graphcoloring", + eta_decay_steps = Int(floor(n_episodes/1.5)), + helpValueHeuristic = heuristic_min, + eta_init = 1.0, + eta_stable = 0.0 + ) +end + +############################################################################### +######### Experiment Type 5 +######### +######### +############################################################################### + +""" +Compares different action explorers for the heterogeneous representation. +""" +function experiment_explorer_heterogeneous_graphcoloring(n_nodes, n_min_color, density, n_episodes, n_instances; n_layers_graph=3, n_eval=10, reward=SeaPearl.GeneralReward) + coloring_generator = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes, n_min_color, density) + + + # Basic value-selection heuristic + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + experiment_explorer_heterogeneous(n_nodes, n_episodes, n_instances; + chosen_features=nothing, + feature_size = [2, 3, 1], + output_size = n_nodes, + generator = coloring_generator, + basicHeuristics = basicHeuristics, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "graphcoloring", + decay_steps=2000, + c=2.0 + ) +end + +############################################################################### +######### Experiment Type 6 +######### +######### +############################################################################### + +function experiment_nn_heterogeneous_graphcoloring(n_nodes, n_min_color, density, n_episodes, n_instances, n_step_per_episode; n_layers_graph=3, n_eval=10, reward=SeaPearl.GeneralReward, pool = SeaPearl.sumPooling(), restartPerInstances = 1) + """ + Compare agents with different Fullfeatured CPNN pipeline + """ + + coloring_generator = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes, n_min_color, density) + + expParameters = Dict( + :generatorParameters => Dict( + :nbNodes => n_nodes, + :nbMinColor => n_min_color, + :density => density + ), + :pooling => string(pool) + ) + + # Basic value-selection heuristic + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + experiment_nn_heterogeneous(n_nodes, n_step_per_episode, n_episodes, n_instances; + chosen_features=nothing, + feature_size = [2, 3, 1], + output_size = n_nodes, + generator = coloring_generator, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "graphcoloring", + c=2.0, + basicHeuristics=basicHeuristics, + pool = pool, + restartPerInstances = restartPerInstances + ) +end + +function experiment_nn_heterogeneous_graphcoloringv4(n_nodes, n_min_color, density, n_episodes, n_instances, n_step_per_episode; n_layers_graph=3, n_eval=10, reward=SeaPearl.GeneralReward, pool = SeaPearl.sumPooling()) + """ + Compare agents with different Fullfeatured CPNN pipeline + """ + + coloring_generator = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes, n_min_color, density) + + expParameters = Dict( + :generatorParameters => Dict( + :nbNodes => n_nodes, + :nbMinColor => n_min_color, + :density => density + ), + :pooling => string(pool) + ) + + # Basic value-selection heuristic + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + experiment_nn_heterogeneousv4(n_nodes, n_step_per_episode, n_episodes, n_instances; + chosen_features=nothing, + feature_size = [2, 3, 1], + output_size = n_nodes, + generator = coloring_generator, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "graphcoloring", + c=2.0, + basicHeuristics=basicHeuristics, + pool = pool + ) +end + +############################################################################### +######### Experiment Type 7 +######### +######### +############################################################################### + +""" +Compares different pooling methods in the CPNN for the heterogeneous representation. +""" +function experiment_pooling_heterogeneous_graphcoloring(n_nodes, n_min_color, density, n_episodes, n_instances; n_layers_graph=3, n_eval=10, reward=SeaPearl.GeneralReward) + coloring_generator = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes, n_min_color, density) + + # Basic value-selection heuristic + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + experiment_pooling_heterogeneous(n_nodes, n_episodes, n_instances; + chosen_features=nothing, + feature_size = [2, 3, 1], + output_size = n_nodes, + generator = coloring_generator, + basicHeuristics = basicHeuristics, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "graphcoloring", + decay_steps=2000, + c=2.0 + ) +end + +############################################################################### +######### Experiment Type 8 +######### +######### +############################################################################### + +""" +Compares different choices of features on HeterogeneousCPNN versus default_default +""" +function experiment_chosen_features_hetcpnn_graphcoloring(chosen_features_list, n_nodes, n_min_color, density, n_episodes, n_instances; n_eval=10) + generator = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes, n_min_color, density) + restartPerInstances = 1 + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + experiment_chosen_features_hetcpnn( + n_nodes, + n_nodes+1, + n_episodes, + n_instances, + restartPerInstances; + basicHeuristics = basicHeuristics, + output_size = n_nodes, + generator=generator, + chosen_features_list=chosen_features_list, + type="graphcoloring_"*string(n_nodes), + ) +end + + +""" +Compares different choices of features on HeterogeneousFullFeaturedCPNN versus default_default +""" +function experiment_chosen_features_hetffcpnn_graphcoloring(chosen_features_list, n_nodes, n_min_color, density, n_episodes, n_instances; n_eval=10) + generator = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes, n_min_color, density) + restartPerInstances = 1 + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + experiment_chosen_features_hetffcpnn( + n_nodes, + n_nodes+1, + n_episodes, + n_instances, + restartPerInstances; + basicHeuristics = basicHeuristics, + output_size = n_nodes, + generator=generator, + chosen_features_list=chosen_features_list, + type="graphcoloring_"*string(n_nodes) + ) +end + +############################################################################### +######### Experiment Type 9 +######### +######### Transfer Learning +############################################################################### +""" +Tests the impact of transfer learning +""" +function experiment_transfer_heterogeneous_graphcoloring(n_nodes, + n_nodes_transfered, + n_min_color, + density, + n_episodes, + n_episodes_transfered, + n_instances; + n_layers_graph=3, + n_eval=10, + n_eval_transfered=10, + reward=SeaPearl.GeneralReward, + decay_steps=2000, + trajectory_capacity=2000) + coloring_generator = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes, n_min_color, density) + coloring_generator_transfered = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes_transfered, n_min_color, density) + + + # Basic value-selection heuristic + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + experiment_transfer_heterogeneous(n_nodes, n_nodes_transfered, n_episodes, n_episodes_transfered, n_instances; + chosen_features=nothing, + feature_size = [2, 3, 1], + output_size = n_nodes, + output_size_transfered = n_nodes_transfered, + generator = coloring_generator, + generator_transfered = coloring_generator_transfered, + basicHeuristics = basicHeuristics, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + n_eval_transfered = n_eval_transfered, + reward = reward, + type = "graphcoloring", + decay_steps=decay_steps, + trajectory_capacity=trajectory_capacity + ) +end + +############################################################################### +######### Experiment Type 10 +######### +######### Restart +############################################################################### +""" +Compares different values of argument `restartPerInstances`` +""" + +function experiment_restart_heterogeneous_graphcoloring(n_nodes, n_min_color, density, n_episodes, n_instances; + restart_list = [1, 5, 10, 20], + n_layers_graph=3, + n_eval=10, + reward=SeaPearl.GeneralReward, + decay_steps=2000, + trajectory_capacity=2000) + + coloring_generator = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes, n_min_color, density) + + # Basic value-selection heuristic + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + experiment_restart_heterogeneous(n_nodes, n_episodes, n_instances; + restart_list = restart_list, + feature_size = [2, 3, 1], + output_size = n_nodes, + generator = coloring_generator, + basicHeuristics = basicHeuristics, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "graphcoloring", + decay_steps=decay_steps, + trajectory_capacity=trajectory_capacity + ) +end + +############################################################################### +######### Experiment Type 11 +######### +######### +############################################################################### +""" +Compare different activation functions. +""" +function experiment_activation_heterogeneous_graphcoloring(n_nodes, n_min_color, density, n_episodes, n_instances; + n_layers_graph=3, + n_eval=10, + reward=SeaPearl.GeneralReward, + pool = SeaPearl.sumPooling() + ) + + coloring_generator = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes, n_min_color, density) + + expParameters = Dict( + :generatorParameters => Dict( + :nbNodes => n_nodes, + :nbMinColor => n_min_color, + :density => density + ), + :pooling => string(pool) + ) + + # Basic value-selection heuristic + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + experiment_activation_heterogeneous(n_nodes, n_episodes, n_instances; + chosen_features=nothing, + feature_size = [2, 3, 1], + output_size = n_nodes, + generator = coloring_generator, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "graphcoloring", + decay_steps=2000, + c=2.0, + basicHeuristics=basicHeuristics, + pool = pool + ) +end + +############################################################################### +######### Experiment Type 12 +######### +######### +############################################################################### +""" +Compare different pooling functions for the graph features in the different versions of FFCPNN. +""" + +function experiment_features_pooling_heterogeneous_graphcoloring(n_nodes, n_min_color, density, n_episodes, n_instances; n_layers_graph=3, n_eval=10, reward=SeaPearl.GeneralReward, pool = SeaPearl.sumPooling()) + coloring_generator = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes, n_min_color, density) + + expParameters = Dict( + :generatorParameters => Dict( + :nbNodes => n_nodes, + :nbMinColor => n_min_color, + :density => density + ), + :pooling => string(pool) + ) + + # Basic value-selection heuristic + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + experiment_features_pooling_heterogeneous(n_nodes, n_episodes, n_instances; + chosen_features=nothing, + feature_size = [2, 3, 1], + output_size = n_nodes, + generator = coloring_generator, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "graphcoloring", + decay_steps=2000, + c=2.0, + basicHeuristics=basicHeuristics, + pool = pool +) +end + +############################################################################### +######### Simple graphcoloring experiment +######### +######### +############################################################################### +""" +Runs a single experiment on graphcoloring +""" + +function simple_experiment_graphcoloring(n, k, n_episodes, n_instances, chosen_features, feature_size; n_eval=10, eval_timeout=60) + n_step_per_episode = n + reward = SeaPearl.GeneralReward + generator = SeaPearl.BarabasiAlbertGraphGenerator(n,k) + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + trajectory_capacity = 800*n_step_per_episode + update_horizon = Int(round(n_step_per_episode//2)) + learnedHeuristics = OrderedDict{String,SeaPearl.LearnedHeuristic}() + agent_hetcpnn = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=n), + get_explorer = () -> get_epsilon_greedy_explorer(250*n_step_per_episode, 0.01), + batch_size=16, + update_horizon=update_horizon, + min_replay_history=Int(round(16*n_step_per_episode//2)), + update_freq=n_step_per_episode, + target_update_freq=7*n_step_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=3, + n_layers_node=2, + n_layers_output=2 + ) + ) + learned_heuristic_hetffcpnn = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_hetcpnn; chosen_features=chosen_features) + learnedHeuristics["hetffcpnn"] = learned_heuristic_hetffcpnn + variableHeuristic = SeaPearl.MinDomainVariableSelection{true}() + + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=1, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=true, + nbRandomHeuristics=0, + exp_name= "graphcoloring_"*string(n)*"_heterogeneous_ffcpnn_" * string(n_episodes), + eval_timeout=eval_timeout + ) + nothing +end + +############################################################################### +######### Experiment update_freq comparison +######### +######### +############################################################################### +""" +Compares different values of argument `update_freq` +""" +function experiment_update_freq_graphcoloring(n_nodes, n_step_per_episode, n_min_color, density, n_episodes, n_instances; n_layers_graph=3, n_eval=10, reward=SeaPearl.GeneralReward, pool = SeaPearl.sumPooling()) + + coloring_generator = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes, n_min_color, density) + + expParameters = Dict( + :generatorParameters => Dict( + :nbNodes => n_nodes, + :nbMinColor => n_min_color, + :density => density + ), + :pooling => string(pool) + ) + # Basic value-selection heuristic + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + experiment_update_freq(n_nodes, n_episodes, n_step_per_episode, n_instances; + chosen_features=nothing, + feature_size = [2, 3, 1], + output_size = n_nodes, + generator = coloring_generator, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "graphcoloring", + decay_steps=2000, + c=2.0, + basicHeuristics=basicHeuristics, + pool = pool + ) +end + +############################################################################### +######### Comparison of tripartite graph vs specialized graph +######### +######### +############################################################################### +""" +Compares the tripartite graph representation with a specific representation. +""" + +function experiment_tripartite_vs_specific_graphcoloring(n, k, n_episodes, n_instances; n_layers_graph=3, n_eval=10, reward=SeaPearl.GeneralReward) + + coloring_generator = SeaPearl.BarabasiAlbertGraphGenerator(n, k) + SR_specific = SeaPearl.GraphColoringStateRepresentation{SeaPearl.GraphColoringFeaturization,SeaPearl.DefaultTrajectoryState} + + # Basic value-selection heuristic + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + chosen_features = Dict( + "node_number_of_neighbors" => true, + "constraint_type" => true, + "constraint_activity" => true, + "nb_not_bounded_variable" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "variable_is_objective" => true, + "variable_assigned_value" => true, + "variable_is_bound" => true, + "values_raw" => true) + + experiment_tripartite_vs_specific(n, n, n_episodes, n_instances, SR_specific; + chosen_features = chosen_features, + feature_size = [6, 5, 2], + feature_size_specific = SeaPearl.feature_length(SR_specific), + output_size = n, + generator = coloring_generator, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "graphcoloring", + basicHeuristics=basicHeuristics +) +end + +############################################################################### +######### Experiment Type MALIK +######### +######### +############################################################################### + +function experiment_rl_heterogeneous_graphcoloring(n_nodes, n_min_color, density, n_episodes, n_instances; n_layers_graph=3, n_eval=10, reward=SeaPearl.GeneralReward) + """ + Compare rl agents + """ + coloring_generator = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes, n_min_color, density) + chosen_features = Dict( + "variable_initial_domain_size" => true, + "constraint_type" => true, + "variable_domain_size" => true, + "values_raw" => true) + + feature_size = [2,2,1] + + # Basic value-selection heuristic + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + n_step_per_episode = Int(round(n_nodes*0.75)) + experiment_rl_heterogeneous(n_nodes, n_episodes, n_instances; + chosen_features=chosen_features, + feature_size = feature_size, + output_size = n_nodes, + generator = coloring_generator, + basicHeuristics = nothing, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "graphcoloring", + decay_steps=250*n_step_per_episode, + ) +end + +############################################################################### +######### Reward comparison experiment +######### +######### +############################################################################### + +function reward_comparison_graphcoloring(n, density, min_nodes, n_episodes, n_instances, chosen_features, feature_size; n_eval=10, eval_timeout=60) + """ + Runs a single experiment on graphcoloring + """ + n_step_per_episode = n + reward1 = SeaPearl.GeneralReward + reward2 = SeaPearl.CPReward + generator = SeaPearl.ClusterizedGraphColoringGenerator(n,min_nodes,density) + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + trajectory_capacity = 800*n_step_per_episode + update_horizon = Int(round(n_step_per_episode//2)) + learnedHeuristics = OrderedDict{String,SeaPearl.LearnedHeuristic}() + agent_gen = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=n), + get_explorer = () -> get_epsilon_greedy_explorer(250*n_step_per_episode, 0.01), + batch_size=16, + update_horizon=update_horizon, + min_replay_history=Int(round(16*n_step_per_episode//2)), + update_freq=n_step_per_episode, + target_update_freq=7*n_step_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=3, + n_layers_node=2, + n_layers_output=2 + ) + ) + + agent_cp = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=n), + get_explorer = () -> get_epsilon_greedy_explorer(250*n_step_per_episode, 0.01), + batch_size=16, + update_horizon=update_horizon, + min_replay_history=Int(round(16*n_step_per_episode//2)), + update_freq=n_step_per_episode, + target_update_freq=7*n_step_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=3, + n_layers_node=2, + n_layers_output=2 + ) + ) + learned_heuristic_gen = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward1,SeaPearl.FixedOutput}(agent_gen; chosen_features=chosen_features) + learnedHeuristics["gen"] = learned_heuristic_gen + learned_heuristic_cp = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward2,SeaPearl.FixedOutput}(agent_cp; chosen_features=chosen_features) + learnedHeuristics["cp"] = learned_heuristic_cp + variableHeuristic = SeaPearl.MinDomainVariableSelection{true}() + + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=1, + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=true, + nbRandomHeuristics=0, + exp_name= "graphcoloring_"*string(n)*"_heterogeneous_ffcpnn_" * string(n_episodes), + eval_timeout=eval_timeout + ) + nothing +end + + +function transfert_graph_coloring_experiment(n_nodes, n_nodes_eval, k, n_episodes, n_instances; n_layers_graph=3, n_eval=25, reward = SeaPearl.GeneralReward, c=2.0, trajectory_capacity = 30000, pool = SeaPearl.meanPooling(), nbRandomHeuristics = 1, eval_timeout = 240, restartPerInstances = 1, seedEval = nothing, device = gpu, batch_size = 64, update_freq = 10, target_update_freq= 500, name = "", numDevice = 0, eval_strategy = SeaPearl.DFSearch()) + n_step_per_episode = n_nodes + + update_horizon = Int(round(n_step_per_episode//2)) + + if device == gpu + CUDA.device!(numDevice) + end + coloring_generator = SeaPearl.BarabasiAlbertGraphGenerator(n_nodes,k) + eval_coloring_generator = SeaPearl.BarabasiAlbertGraphGenerator(n_nodes_eval,k) + n_min_color = k + density = 0.9 + coloring_generator = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes, n_min_color, density) + eval_coloring_generator = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes_eval, n_min_color, density) + + evalFreq=Int(floor(n_episodes / n_eval)) + + step_explorer = Int(floor(n_episodes*n_step_per_episode*0.1 )) + + chosen_features = Dict( + "node_number_of_neighbors" => true, + "constraint_type" => true, + "constraint_activity" => true, + "nb_not_bounded_variable" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "variable_is_objective" => true, + "variable_assigned_value" => true, + "variable_is_bound" => true, + "values_raw" => true) + + feature_size = [6, 5, 2] + + rngExp = MersenneTwister(seedEval) + init = Flux.glorot_uniform(MersenneTwister(seedEval)) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + # Basic value-selection heuristic + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + agent_3 = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=n_nodes), + get_explorer = () -> get_epsilon_greedy_explorer(step_explorer, 0.05; rng = rngExp ), + batch_size=batch_size, + update_horizon=update_horizon, + min_replay_history=Int(round(16*n_step_per_episode//2)), + update_freq=update_freq, + target_update_freq=target_update_freq, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=3, + n_layers_node=3, + n_layers_output=2, + pool=pool, + σ=NNlib.leakyrelu, + init = init, + device = device + ), + γ = 0.99f0 + ) + + learned_heuristic_3 = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_3; chosen_features=chosen_features) + + learnedHeuristics = OrderedDict( + "3layer" => learned_heuristic_3, + ) + + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=evalFreq, + nbInstances=n_instances, + restartPerInstances=restartPerInstances, + generator=coloring_generator, + eval_strategy=eval_strategy, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=false, + seedEval=seedEval, + nbRandomHeuristics=nbRandomHeuristics, + exp_name=name *'_'* string(n_nodes) *"_"*string(n_nodes_eval) * "_" * string(n_episodes) * "_"* string(seedEval) * "_", + eval_timeout=eval_timeout, + eval_generator=eval_coloring_generator, + device = device + ) + +end diff --git a/learning_cp/comparison/jobshop.jl b/learning_cp/comparison/jobshop.jl new file mode 100644 index 0000000..764bd85 --- /dev/null +++ b/learning_cp/comparison/jobshop.jl @@ -0,0 +1,443 @@ +include("../common/experiment.jl") +include("../common/utils.jl") +include("comparison.jl") + +############################################################################### +######### Simple jobshop experiment +######### +######### +############################################################################### + +function simple_experiment_jobshop(n_machines, n_jobs, max_time, n_episodes, n_instances, chosen_features, feature_size; n_eval=10, eval_timeout=60) + """ + Runs a single experiment on the jobshop scheduling problem + """ + n_step_per_episode = Int(round(n_machines*n_jobs*0.75)) + reward = SeaPearl.GeneralReward + generator = SeaPearl.JobShopGenerator(n_machines, n_jobs, max_time) + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + trajectory_capacity = 3500*n_step_per_episode + update_horizon = n_step_per_episode + learnedHeuristics = OrderedDict{String,SeaPearl.LearnedHeuristic}() + agent = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=max_time), + get_explorer = () -> get_epsilon_greedy_explorer(3000*n_step_per_episode, 0.05), + batch_size=8, + update_horizon=update_horizon, + min_replay_history=160*n_step_per_episode, + update_freq=1, + target_update_freq=7*n_step_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=6, + n_layers_node=1, + n_layers_output=2, + pool=SeaPearl.sumPooling() + ) + ) + learned_heuristic = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent; chosen_features=chosen_features) + learnedHeuristics["learning"] = learned_heuristic + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=5, + eval_strategy = SeaPearl.ILDSearch(2), + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=true, + nbRandomHeuristics=0, + exp_name= "jobshop_"*string(n_machines)*"_"*string(n_jobs)*"_" * string(max_time), + eval_timeout=eval_timeout + ) + nothing + +end + +function jobshop_retrain(n_machines, n_jobs, max_time, n_episodes, n_instances, chosen_features, feature_size, model_file; n_eval=10, eval_timeout=60) + """ + Fine-tunes a given jobshop model + """ + n_step_per_episode = Int(round(n_machines*n_jobs*0.75)) + reward = SeaPearl.GeneralReward + generator = SeaPearl.JobShopGenerator(n_machines, n_jobs, max_time) + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + trajectory_capacity = 3500*n_step_per_episode + update_horizon = n_step_per_episode + learnedHeuristics = OrderedDict{String,SeaPearl.LearnedHeuristic}() + agent = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=max_time), + get_explorer = () -> get_softmax_explorer(0.01, 0.5, 3500*n_step_per_episode), + batch_size=8, + update_horizon=update_horizon, + min_replay_history=160*n_step_per_episode, + update_freq=1, + target_update_freq=7*n_step_per_episode, + get_heterogeneous_nn = () -> get_pretrained_heterogeneous_fullfeaturedcpnn(model_file) + ) + learned_heuristic = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent; chosen_features=chosen_features) + learnedHeuristics["learning"] = learned_heuristic + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=5, + eval_strategy = SeaPearl.ILDSearch(2), + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=false, + nbRandomHeuristics=0, + exp_name= "jobshop_"*string(n_machines)*"_"*string(n_jobs)*"_" * string(max_time), + eval_timeout=eval_timeout + ) + nothing + +end + +############################################################################### +######### Experiment Type 1 +######### +######### +############################################################################### +""" +Compares HGT and HeterogeneousGraphConv. +""" + +function experiment_hgt_vs_graphconv_jobshop(chosen_features, n_machines, n_jobs, max_time, n_episodes, n_instances; n_eval=10) + """ + Compares the impact of the number of convolution layers for the heterogeneous representation. + """ + generator = SeaPearl.JobShopGenerator(n_machines, n_jobs, max_time) + + experiment_hgt_vs_graphconv( + n_machines*n_jobs, + n_machines*n_jobs, + n_episodes, + n_instances, + 1; + output_size = max_time, + generator = generator, + chosen_features = chosen_features, + type = "jobshop_"*string(n_machines)*"_"*string(n_jobs)*"_"*string(max_time) + ) +end + + +############################################################################### +######### Experiment Type 2 +######### +######### +############################################################################### +""" +Compares different Reward on JobShop +""" + +function experiment_different_reward_jobshop(n_machines, n_jobs, max_time, n_episodes, n_instances; n_layers_graph=3, n_eval=10, pool = SeaPearl.meanPooling()) + """ + Compare three agents: + - an agent with the default representation and default features; + - an agent with the default representation and chosen features; + - an agent with the heterogeneous representation and chosen features. + """ + generator = SeaPearl.JobShopGenerator(n_machines, n_jobs, max_time) + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + chosen_features = Dict( + "variable_is_bound" => true, + "variable_assigned_value" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "variable_is_objective" => true, + "constraint_activity" => true, + "constraint_type" => true, + "nb_not_bounded_variable" => true, + "values_raw" => true, + ) + + experiment_reward(n_machines*n_jobs, n_episodes, n_instances; + chosen_features=chosen_features, + feature_size = [5, 9, 1], + output_size = max_time, + generator = generator, + basicHeuristics = basicHeuristics, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + type = "jobshop", + c=2.0, + pool = pool) +end + # Basic value-selection heuristic +############################################################################### +######### Experiment Type 6 +######### +######### +############################################################################### + +function experiment_nn_heterogeneous_jobshop(chosen_features, feature_size, n_machines, n_jobs, max_time, n_episodes, n_instances; n_eval=10) + """ + Compares the impact of the number of convolution layers for the heterogeneous representation. + """ + generator = SeaPearl.JobShopGenerator(n_machines, n_jobs, max_time) + + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + experiment_nn_heterogeneous( + n_machines*n_jobs, + Int(round(n_machines*n_jobs*0.75)), + n_episodes, + n_instances; + feature_size=feature_size, + output_size=max_time, + n_eval=n_eval, + generator=generator, + type = "jobshop_"*string(n_machines)*"_"*string(n_jobs)*"_"*string(max_time), + eval_timeout=60, + chosen_features=chosen_features, + basicHeuristics=basicHeuristics, + reward=SeaPearl.GeneralReward, + n_layers_graph=4, + trajectory_capacity = 900*Int(round(n_machines*n_jobs*0.75)), + decay_steps = 500 * Int(round(n_machines*n_jobs*0.75)), + update_horizon = Int(round(Int(round(n_machines*n_jobs*0.75))/2)), + pool=SeaPearl.sumPooling() + ) +end + +function experiment_nn_heterogeneous_jobshop_with_restarts(chosen_features, feature_size, n_machines, n_jobs, max_time, n_episodes, n_instances; n_eval=10) + """ + Compares the impact of the number of convolution layers for the heterogeneous representation. + """ + generator = SeaPearl.JobShopGenerator(n_machines, n_jobs, max_time) + + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + experiment_nn_heterogeneous( + n_machines*n_jobs, + Int(round(n_machines*n_jobs*0.75)), + n_episodes, + n_instances; + feature_size=feature_size, + output_size=max_time, + n_eval=n_eval, + generator=generator, + type = "jobshop_"*string(n_machines)*"_"*string(n_jobs)*"_"*string(max_time), + eval_timeout=60, + chosen_features=chosen_features, + basicHeuristics=basicHeuristics, + reward=SeaPearl.GeneralReward, + n_layers_graph=4, + trajectory_capacity = 900*Int(round(n_machines*n_jobs*0.75)), + decay_steps = 500 * Int(round(n_machines*n_jobs*0.75)), + update_horizon = Int(round(Int(round(n_machines*n_jobs*0.75))/2)), + pool=SeaPearl.sumPooling(), + restartPerInstances=5 + ) +end + +function experiment_nn_heterogeneous_jobshop_high_explo(chosen_features, feature_size, n_machines, n_jobs, max_time, n_episodes, n_instances; n_eval=10) + """ + Compares the impact of the number of convolution layers for the heterogeneous representation. + """ + generator = SeaPearl.JobShopGenerator(n_machines, n_jobs, max_time) + + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + experiment_nn_heterogeneous( + n_machines*n_jobs, + Int(round(n_machines*n_jobs*0.75)), + n_episodes, + n_instances; + feature_size=feature_size, + output_size=max_time, + n_eval=n_eval, + generator=generator, + type = "jobshop_"*string(n_machines)*"_"*string(n_jobs)*"_"*string(max_time), + eval_timeout=60, + chosen_features=chosen_features, + basicHeuristics=basicHeuristics, + reward=SeaPearl.GeneralReward, + n_layers_graph=4, + trajectory_capacity = 900*Int(round(n_machines*n_jobs*0.75)), + decay_steps = 6000 * Int(round(n_machines*n_jobs*0.75)), + update_horizon = Int(round(Int(round(n_machines*n_jobs*0.75))/2)), + pool=SeaPearl.sumPooling() + ) +end + +function experiment_nn_heterogeneous_jobshop_softmax_high_explo(chosen_features, feature_size, n_machines, n_jobs, max_time, n_episodes, n_instances; n_eval=10) + """ + Compares the impact of the number of convolution layers for the heterogeneous representation. + """ + generator = SeaPearl.JobShopGenerator(n_machines, n_jobs, max_time) + + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + experiment_nn_heterogeneous_softmax_explorer( + n_machines*n_jobs, + Int(round(n_machines*n_jobs*0.75)), + n_episodes, + n_instances; + feature_size=feature_size, + output_size=max_time, + n_eval=n_eval, + generator=generator, + type = "jobshop_"*string(n_machines)*"_"*string(n_jobs)*"_"*string(max_time), + eval_timeout=60, + chosen_features=chosen_features, + basicHeuristics=basicHeuristics, + reward=SeaPearl.GeneralReward, + n_layers_graph=4, + trajectory_capacity = 900*Int(round(n_machines*n_jobs*0.75)), + decay_steps = 1000 * Int(round(n_machines*n_jobs*0.75)), + update_horizon = Int(round(Int(round(n_machines*n_jobs*0.75))/2)), + pool=SeaPearl.sumPooling(), + restartPerInstances = 5 + ) +end + + +function comparison_convolution_depth_jobshop(n_machines, n_jobs, max_time, n_episodes, n_instances, chosen_features, feature_size; n_eval=10, eval_timeout=60) + """ + Runs a single experiment on the jobshop scheduling problem + """ + n_step_per_episode = Int(round(n_machines*n_jobs*0.75)) + reward = SeaPearl.GeneralReward + generator = SeaPearl.JobShopSoftDeadlinesGenerator(n_machines, n_jobs, max_time) + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + trajectory_capacity = 3500*n_step_per_episode + update_horizon = n_step_per_episode + learnedHeuristics = OrderedDict{String,SeaPearl.LearnedHeuristic}() + agent1 = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=max_time), + get_explorer = () -> get_epsilon_greedy_explorer(3000*n_step_per_episode, 0.05), + batch_size=8, + update_horizon=update_horizon, + min_replay_history=160*n_step_per_episode, + update_freq=1, + target_update_freq=7*n_step_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=6, + n_layers_node=1, + n_layers_output=2, + pool=SeaPearl.sumPooling() + ) + ) + learned_heuristic1 = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent1; chosen_features=chosen_features) + learnedHeuristics["learning6"] = learned_heuristic1 + + agent2 = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=max_time), + get_explorer = () -> get_epsilon_greedy_explorer(3000*n_step_per_episode, 0.05), + batch_size=8, + update_horizon=update_horizon, + min_replay_history=160*n_step_per_episode, + update_freq=1, + target_update_freq=7*n_step_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=6, + n_layers_node=1, + n_layers_output=2, + pool=SeaPearl.sumPooling() + ) + ) + learned_heuristic2 = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent2; chosen_features=chosen_features) + learnedHeuristics["learning10"] = learned_heuristic2 + + agent3 = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=max_time), + get_explorer = () -> get_epsilon_greedy_explorer(3000*n_step_per_episode, 0.05), + batch_size=8, + update_horizon=update_horizon, + min_replay_history=160*n_step_per_episode, + update_freq=1, + target_update_freq=7*n_step_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=24, + n_layers_node=1, + n_layers_output=2, + pool=SeaPearl.sumPooling() + ) + ) + learned_heuristic3 = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent3; chosen_features=chosen_features) + learnedHeuristics["learning24"] = learned_heuristic3 + + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=5, + eval_strategy = SeaPearl.ILDSearch(2), + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=true, + nbRandomHeuristics=0, + exp_name= "jobshop_"*string(n_machines)*"_"*string(n_jobs)*"_" * string(max_time), + eval_timeout=eval_timeout + ) + nothing + +end \ No newline at end of file diff --git a/learning_cp/comparison/kep.jl b/learning_cp/comparison/kep.jl new file mode 100644 index 0000000..15006d9 --- /dev/null +++ b/learning_cp/comparison/kep.jl @@ -0,0 +1,415 @@ +include("../common/experiment.jl") +include("../common/utils.jl") +include("comparison.jl") + +function experiment_representation(n_nodes, density, n_episodes, n_instances; n_layers_graph=2, n_eval=10, eval_timeout=60) + """ + Compare three agents: + - an agent with the default representation and default features; + - an agent with the default representation and chosen features; + - an agent with the heterogeneous representation and chosen features. + """ + kep_generator = SeaPearl.KepGenerator(n_nodes, density) + SR_default = SeaPearl.DefaultStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.DefaultTrajectoryState} + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + agent_default_default = get_default_agent(; + capacity=2000, + decay_steps=2000, + ϵ_stable=0.01, + batch_size=16, + update_horizon=8, + min_replay_history=256, + update_freq=1, + target_update_freq=8, + feature_size=3, + conv_size=8, + dense_size=16, + output_size=2, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2 + ) + learned_heuristic_default_default = SeaPearl.SimpleLearnedHeuristic{SR_default,SeaPearl.GeneralReward,SeaPearl.FixedOutput}(agent_default_default) + + chosen_features = Dict( + "constraint_type" => true, + "variable_initial_domain_size" => true, + "values_raw" => true, + ) + + agent_default_chosen = get_default_agent(; + capacity=2000, + decay_steps=2000, + ϵ_stable=0.01, + batch_size=16, + update_horizon=8, + min_replay_history=256, + update_freq=1, + target_update_freq=8, + feature_size=11, + conv_size=8, + dense_size=16, + output_size=2, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2 + ) + learned_heuristic_default_chosen = SeaPearl.SimpleLearnedHeuristic{SR_default,SeaPearl.GeneralReward,SeaPearl.FixedOutput}(agent_default_chosen; chosen_features=chosen_features) + + agent_heterogeneous = get_heterogeneous_agent(; + capacity=2000, + decay_steps=2000, + ϵ_stable=0.01, + batch_size=16, + update_horizon=8, + min_replay_history=256, + update_freq=1, + target_update_freq=8, + feature_size=[1, 6, 1], + conv_size=8, + dense_size=16, + output_size=2, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2 + ) + learned_heuristic_heterogeneous = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,SeaPearl.GeneralReward,SeaPearl.FixedOutput}(agent_heterogeneous; chosen_features=chosen_features) + + + # Basic value-selection heuristic + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + + learnedHeuristics = OrderedDict( + "defaultdefault" => learned_heuristic_default_default, + "defaultchosen" => learned_heuristic_default_chosen, + "heterogeneous" => learned_heuristic_heterogeneous, + ) + basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic() + ) + + # ------------------- + # Variable Heuristic definition + # ------------------- + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=1, + generator=kep_generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=true, + nbRandomHeuristics=0, + exp_name="kep_representation_" * string(n_episodes) * "_" * string(n_nodes) * "_", + eval_timeout=eval_timeout + ) +end + +############################################################################### +######### Experiment Type 3 +######### +######### +############################################################################### + +function experiment_chosen_features_heterogeneous_kep(n_nodes, density, n_episodes, n_instances; n_eval=10, eval_timeout=60) + """ + Compares the impact of the number of convolution layers for the heterogeneous representation. + """ + kep_generator = SeaPearl.KepGenerator(n_nodes, density) + + chosen_features_list = [ + [ + Dict( + "constraint_type" => true, + "variable_initial_domain_size" => true, + "values_onehot" => true, + ), + [1, 6, 2] + ], + [ + Dict( + "constraint_type" => true, + "variable_initial_domain_size" => true, + "values_raw" => true, + ), + [1, 6, 1] + ], + [ + Dict( + "constraint_type" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "values_onehot" => true, + ), + [2, 6, 2] + ], + [ + Dict( + "constraint_activity" => true, + "constraint_type" => true, + "variable_initial_domain_size" => true, + "values_onehot" => true, + ), + [1, 7, 2] + ], + [ + Dict( + "constraint_activity" => true, + "constraint_type" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "values_raw" => true, + ), + [2, 7, 1] + ], + [ + Dict( + "constraint_activity" => true, + "constraint_type" => true, + "nb_not_bounded_variable" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "variable_is_bound" => true, + "values_raw" => true, + ), + [3, 8, 1] + ], + ] + + + experiment_chosen_features_heterogeneous(n_nodes, n_episodes, n_instances; + n_eval=n_eval, + generator=kep_generator, + chosen_features_list=chosen_features_list, + type="kep", + output_size=2, + eval_timeout=eval_timeout) +end +############################################################################### +######### Experiment Type 4 +######### +######### +############################################################################### +function experiment_nn_heterogeneous_kep(n_nodes, density, n_episodes, n_instances; n_layers_graph=3, n_eval=10, eval_timeout=600, reward=SeaPearl.GeneralReward, pool = SeaPearl.sumPooling()) + """ + Compare agents with different Fullfeatured CPNN pipeline + """ + + kep_generator = SeaPearl.KepGenerator(n_nodes, density) + + expParameters = Dict( + :generatorParameters => Dict( + :nbNodes => n_nodes, + :density => density + ), + :pooling => string(pool) + ) + + # Basic value-selection heuristic + selectMax(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.maximum(x.domain) + heuristic_max = SeaPearl.BasicHeuristic(selectMax) + basicHeuristics = OrderedDict( + "max" => heuristic_max + ) + + chosen_features = Dict( + "variable_is_bound" => true, + "variable_assigned_value" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "variable_is_objective" => true, + "constraint_activity" => true, + "constraint_type" => true, + "nb_not_bounded_variable" => true, + "values_raw" => true, + ) + + experiment_nn_heterogeneous(n_nodes, n_nodes, n_episodes, n_instances; + #chosen_features=chosen_features, + feature_size = [5, 8, 1], #[2, 7, 1], + output_size = 2, + generator = kep_generator, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "kep", + c=2.0, + basicHeuristics=basicHeuristics, + pool = pool +) +end +############################################################################### +######### Simple KEP experiment +######### +######### +############################################################################### + +function simple_experiment_kep(n_nodes, density, n_episodes, n_instances; chosen_features=nothing, feature_size=nothing, n_eval=10, n_nodes_eva = n_nodes, density_eva = density, n_layers_graph=3, reward = SeaPearl.GeneralReward, c=2.0, trajectory_capacity=2000, pool = SeaPearl.meanPooling(), nbRandomHeuristics = 1, eval_timeout = 60, restartPerInstances = 10, seedEval = nothing) + """ + Runs a single experiment on KEP + """ + reward = SeaPearl.GeneralReward + generator = SeaPearl.KepGenerator(n_nodes, density) + n_step_per_episode = Int(n_nodes/2) + + if isnothing(chosen_features) + chosen_features = Dict( + "variable_is_bound" => true, + "variable_assigned_value" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "variable_is_objective" => true, + "constraint_activity" => true, + "constraint_type" => true, + "nb_not_bounded_variable" => true, + "values_raw" => true, + ) + feature_size = [5, 8, 1] + + end + rngExp = MersenneTwister(seedEval) + init = Flux.glorot_uniform(MersenneTwister(seedEval)) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + trajectory_capacity = 500*n_step_per_episode + update_horizon = Int(round(n_step_per_episode//2)) + learnedHeuristics = OrderedDict{String,SeaPearl.LearnedHeuristic}() + + agent_24= get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=2), + get_explorer = () -> get_epsilon_greedy_explorer(Int(floor(n_episodes*n_step_per_episode*0.75)), 0.05; rng = rngExp ), + batch_size=16, + update_horizon=update_horizon, + min_replay_history=Int(round(16*n_step_per_episode//2)), + update_freq=1, + target_update_freq=7*n_step_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=24, + n_layers_node=3, + n_layers_output=2, + pool=pool, + σ=NNlib.leakyrelu, + init = init, + device = cpu + ), + γ = 0.99f0 + ) + agent_6 = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=2), + get_explorer = () -> get_epsilon_greedy_explorer(Int(floor(n_episodes*n_step_per_episode*0.75)), 0.05; rng = rngExp ), + batch_size=16, + update_horizon=update_horizon, + min_replay_history=Int(round(16*n_step_per_episode//2)), + update_freq=1, + target_update_freq=7*n_step_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=6, + n_layers_node=3, + n_layers_output=2, + pool=pool, + σ=NNlib.leakyrelu, + init = init, + device = cpu + ), + γ = 0.99f0 + ) + agent_cpu = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=2), + get_explorer = () -> get_epsilon_greedy_explorer(Int(floor(n_episodes*n_step_per_episode*0.75)), 0.05; rng = rngExp ), + batch_size=256, + update_horizon=update_horizon, + min_replay_history=Int(round(16*n_step_per_episode//2)), + update_freq=4, + target_update_freq=7*n_step_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=3, + n_layers_node=2, + n_layers_output=2, + pool=SeaPearl.meanPooling(), + σ=NNlib.leakyrelu, + init = init, + device = cpu + ), + γ = 0.99f0 + ) + agent_gpu = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=2), + get_explorer = () -> get_epsilon_greedy_explorer(Int(floor(n_episodes*n_step_per_episode*0.75)), 0.05; rng = rngExp ), + batch_size=256, + update_horizon=update_horizon, + min_replay_history=Int(round(16*n_step_per_episode//2)), + update_freq=4, + target_update_freq=7*n_step_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + feature_size=feature_size, + conv_size=8, + dense_size=16, + output_size=1, + n_layers_graph=3, + n_layers_node=2, + n_layers_output=2, + pool=SeaPearl.meanPooling(), + σ=NNlib.leakyrelu, + init = init, + device = gpu + ), + γ = 0.99f0 + ) + + learned_heuristic_24 = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_24; chosen_features=chosen_features) + learned_heuristic_6 = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_6; chosen_features=chosen_features) + learned_heuristic_cpu = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_cpu; chosen_features=chosen_features) + learned_heuristic_gpu = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_gpu; chosen_features=chosen_features) + + learnedHeuristics["gpu"] = learned_heuristic_gpu + learnedHeuristics["cpu"] = learned_heuristic_cpu + #learnedHeuristics["24layer"] = learned_heuristic_24 + + selectMax(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.maximum(x.domain) + heuristic_max = SeaPearl.BasicHeuristic(selectMax) + basicHeuristics = OrderedDict( + "expert_max" => heuristic_max + ) + + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=restartPerInstances, + eval_strategy = SeaPearl.ILDSearch(0), + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=true, + nbRandomHeuristics=0, + exp_name= "kep_"*string(n_nodes)*"_"*string(density)*"_"* string(n_episodes), + eval_timeout=eval_timeout + ) + nothing + +end \ No newline at end of file diff --git a/learning_cp/comparison/latin.jl b/learning_cp/comparison/latin.jl new file mode 100644 index 0000000..926634a --- /dev/null +++ b/learning_cp/comparison/latin.jl @@ -0,0 +1,426 @@ +include("../common/experiment.jl") +include("../common/utils.jl") +include("comparison.jl") + +############################################################################### +######### Experiment Type 1 +######### +######### +############################################################################### + +function experiment_representation_latin(board_size, density, n_episodes, n_instances; n_layers_graph=2, n_eval=10, reward=SeaPearl.GeneralReward) + """ + Compare three agents: + - an agent with the default representation and default features; + - an agent with the default representation and chosen features; + - an agent with the heterogeneous representation and chosen features. + """ + + latin_generator = SeaPearl.LatinGenerator(board_size, density) + + + experiment_representation(board_size, n_episodes, n_instances; + chosen_features=nothing, + feature_sizes = [3, 9, [2, 3, 1]], + output_size = board_size, + generator = latin_generator, + basicHeuristics=nothing, + n_layers_graph=n_layers_graph, + n_eval=n_eval, + reward=reward, + type="latin", + ) +end + +############################################################################### +######### Experiment Type 2 +######### +######### +############################################################################### + + +function experiment_heterogeneous_n_conv_latin(board_size, density, n_episodes, n_instances; n_eval=10) + """ + Compares the impact of the number of convolution layers for the heterogeneous representation. + """ + latin_generator = SeaPearl.LatinGenerator(board_size, density) + + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + chosen_features = Dict( + "constraint_type" => true, + "variable_initial_domain_size" => true, + "values_onehot" => true, + ) + + experiment_n_conv(board_size, n_episodes, n_instances; + n_eval=n_eval, + generator=latin_generator, + SR=SR_heterogeneous, + chosen_features=chosen_features, + feature_size=[1, 2, board_size], + type="heterogeneous", + output_size = board_size) +end + +function experiment_default_chosen_n_conv_latin(board_size, density, n_episodes, n_instances; n_eval=10) + """ + Compares the impact of the number of convolution layers for the default representation. + """ + latin_generator = SeaPearl.LatinGenerator(board_size, density) + + SR_default = SeaPearl.DefaultStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.DefaultTrajectoryState} + + chosen_features = Dict( + "constraint_type" => true, + "variable_initial_domain_size" => true, + "values_onehot" => true, + ) + + experiment_n_conv(board_size, n_episodes, n_instances; + n_eval=n_eval, + generator=latin_generator, + SR=SR_default, + chosen_features=chosen_features, + feature_size= 6 + board_size, + type="default_chosen", + output_size = board_size) +end + +function experiment_default_default_n_conv_latin(board_size, density, n_episodes, n_instances; n_eval=10) + """ + Compares the impact of the number of convolution layers for the default representation. + """ + latin_generator = SeaPearl.LatinGenerator(board_size, density) + + SR_default = SeaPearl.DefaultStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.DefaultTrajectoryState} + + + experiment_n_conv(board_size, n_episodes, n_instances; + n_eval=n_eval, + generator=latin_generator, + SR=SR_default, + feature_size=3, + chosen_features=nothing, + type="default_default", + output_size = board_size) +end + +############################################################################### +######### Experiment Type 3 +######### +######### +############################################################################### + + +function experiment_chosen_features_heterogeneous_latin(board_size, density, n_episodes, n_instances; n_eval=10) + """ + Compares the impact of the number of convolution layers for the heterogeneous representation. + """ + latin_generator = SeaPearl.LatinGenerator(board_size, density) + + + chosen_features_list = [ + [ + Dict( + "constraint_type" => true, + "variable_initial_domain_size" => true, + "values_onehot" => true, + ), + [1, 2, board_size] + ], + [ + Dict( + "constraint_type" => true, + "variable_initial_domain_size" => true, + "values_raw" => true, + ), + [1, 2, 1] + ], + [ + Dict( + "constraint_type" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "values_onehot" => true, + ), + [2, 2, board_size] + ], + [ + Dict( + "constraint_activity" => true, + "constraint_type" => true, + "variable_initial_domain_size" => true, + "values_onehot" => true, + ), + [1, 3, board_size] + ], + [ + Dict( + "constraint_activity" => true, + "constraint_type" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "values_raw" => true, + ), + [2, 3, 1] + ], + [ + Dict( + "constraint_activity" => true, + "constraint_type" => true, + "nb_not_bounded_variable" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "variable_is_bound" => true, + "values_raw" => true, + ), + [3, 4, 1] + ], + ] + + experiment_chosen_features_heterogeneous(board_size, n_episodes, n_instances; + n_eval=n_eval, + generator=latin_generator, + chosen_features_list=chosen_features_list, + type="latin", + output_size = board_size + ) +end + +############################################################################### +######### Experiment Type 4 +######### +######### +############################################################################### + + +function experiment_heuristic_heterogeneous_latin(board_size, density, n_episodes, n_instances; n_layers_graph=3, n_eval=10, reward=SeaPearl.GeneralReward) + """ + Compares the simple and the supervised learned heuristic for the heterogeneous representation. + """ + latin_generator = SeaPearl.LatinGenerator(board_size, density) + + # Basic value-selection heuristic + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + experiment_heuristic_heterogeneous(board_size, n_episodes, n_instances; + chosen_features=nothing, + feature_size = [2, 3, 1], + output_size = board_size, + generator = latin_generator, + basicHeuristics = basicHeuristics, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "latin", + eta_decay_steps = Int(floor(n_episodes/1.5)), + helpValueHeuristic = heuristic_min, + eta_init = 1.0, + eta_stable = 0.0 + ) +end + +############################################################################### +######### Experiment Type 5 +######### +######### +############################################################################### + +function experiment_explorer_heterogeneous_latin(board_size, density, n_episodes, n_instances; n_layers_graph=3, n_eval=10, reward=SeaPearl.GeneralReward) + """ + Compares different action explorers for the heterogeneous representation. + - an agent with the epsilon_greedy explorer + - an agent with the upper confident bound explorer + """ + latin_generator = SeaPearl.LatinGenerator(board_size, density) + + # Basic value-selection heuristic + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + experiment_explorer_heterogeneous(board_size, n_episodes, n_instances; + chosen_features=nothing, + feature_size = [2, 3, 1], + output_size = board_size, + generator = latin_generator, + basicHeuristics = basicHeuristics, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "latin", + decay_steps=2000, + c=2.0, + ) +end + +############################################################################### +######### Experiment Type 6 +######### +######### +############################################################################### + +function experiment_nn_heterogeneous_latin(board_size, density, n_episodes, n_instances; n_layers_graph=3, n_eval=10, reward=SeaPearl.GeneralReward, pool = SeaPearl.sumPooling() ) + """ + Compares different CPNNs for the heterogeneous representation. + """ + latin_generator = SeaPearl.LatinGenerator(board_size, density) + + chosen_features = Dict( + "constraint_activity" => true, + "constraint_type" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "values_raw" => true, + ) + + # Basic value-selection heuristic + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + basicHeuristics = OrderedDict( + "min" => heuristic_min + ) + + experiment_nn_heterogeneous(board_size, n_episodes, n_instances; + chosen_features = chosen_features, + feature_size = [2, 3, 1], + output_size = board_size, + generator = latin_generator, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "latin", + decay_steps=2000, + c=2.0, + basicHeuristics=basicHeuristics, + pool = pool + ) +end + +############################################################################### +######### Experiment Type 8 +######### +######### +############################################################################### + + +function experiment_chosen_features_hetcpnn_latin(chosen_features_list, board_size, density, n_episodes, n_instances; n_eval=10) + """ + Compares the impact of the number of convolution layers for the heterogeneous representation. + """ + latin_generator = SeaPearl.LatinGenerator(board_size, density) + restartPerInstances = 1 + + + experiment_chosen_features_hetcpnn( + board_size+1, + 10, + n_episodes, + n_instances, + restartPerInstances; + update_horizon=10, + output_size = board_size, + generator=latin_generator, + chosen_features_list=chosen_features_list, + type="latin_"*string(board_size), + ) +end + +function experiment_chosen_features_hetffcpnn_latin(chosen_features_list, board_size, density, n_episodes, n_instances; n_eval=10) + """ + Compares the impact of the number of convolution layers for the heterogeneous representation. + """ + latin_generator = SeaPearl.LatinGenerator(board_size, density) + restartPerInstances = 1 + + + experiment_chosen_features_hetffcpnn( + board_size+1, + 10, + n_episodes, + n_instances, + restartPerInstances; + update_horizon=10, + output_size = board_size, + generator=latin_generator, + chosen_features_list=chosen_features_list, + type="latin_"*string(board_size), + ) +end + +############################################################################### +######### Experiment Type 9 +######### +######### +############################################################################### + +function experiment_transfer_heterogeneous_latin(board_size, + board_size_transfered, + density, + n_episodes, + n_episodes_transfered, + n_instances; + n_layers_graph=3, + n_eval=10, + n_eval_transfered=10, + reward=SeaPearl.GeneralReward, + decay_steps=2000, + trajectory_capacity=2000) + """ + + """ + latin_generator = SeaPearl.LatinGenerator(board_size, density) + latin_generator_transfered = SeaPearl.LatinGenerator(board_size_transfered, density) + + experiment_transfer_heterogeneous(board_size, board_size_transfered, n_episodes, n_episodes_transfered, n_instances; + chosen_features=nothing, + feature_size = [2, 3, 1], + output_size = board_size, + output_size_transfered = board_size_transfered, + generator = latin_generator, + generator_transfered = latin_generator_transfered, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + n_eval_transfered=n_eval_transfered, + reward = reward, + type = "latin", + decay_steps=decay_steps, + trajectory_capacity=trajectory_capacity, + ) +end + +############################################################################### +######### Experiment Type 10 +######### +######### Restart +############################################################################### +function experiment_restart_heterogeneous_latin(board_size, density, n_episodes, n_instances; + restart_list = [1, 5, 10, 20], + n_layers_graph=3, + n_eval=10, + reward=SeaPearl.GeneralReward, + decay_steps=2000, + trajectory_capacity=2000) + + latin_generator = SeaPearl.LatinGenerator(board_size, density) + + experiment_restart_heterogeneous(board_size, n_episodes, n_instances; + restart_list = restart_list, + feature_size = [2, 3, 1], + output_size = board_size, + generator = latin_generator, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "latin", + decay_steps=decay_steps, + trajectory_capacity=trajectory_capacity + ) +end diff --git a/learning_cp/comparison/nqueens.jl b/learning_cp/comparison/nqueens.jl new file mode 100644 index 0000000..e97965e --- /dev/null +++ b/learning_cp/comparison/nqueens.jl @@ -0,0 +1,495 @@ +include("../common/experiment.jl") +include("../common/utils.jl") +include("comparison.jl") + +############################################################################### +######### Experiment Type 1 +######### +######### +############################################################################### + +""" +Compares three agents: + - an agent with the default graph representation and default features; + - an agent with the default graph representation and chosen features; + - an agent with the heterogeneous graph representation and chosen features. +""" +function experiment_representation_nqueens(board_size, n_episodes, n_instances; n_layers_graph=2, n_eval=10, reward=SeaPearl.GeneralReward) + + nqueens_generator = SeaPearl.NQueensGenerator(board_size) + + experiment_representation(board_size, n_episodes, n_instances; + chosen_features=nothing, + feature_sizes = [3, 12, [2, 6, 1]], + output_size = board_size, + generator = nqueens_generator, + basicHeuristics=nothing, + n_layers_graph=n_layers_graph, + n_eval=n_eval, + reward=reward, + type="nqueens", + ) +end + +############################################################################### +######### Experiment Type 2 +######### +######### +############################################################################### + +""" +Compares the impact of the number of convolution layers for the heterogeneous representation. +""" +function experiment_heterogeneous_n_conv(board_size, n_episodes, n_instances; n_eval=10) + + nqueens_generator = SeaPearl.ClusterizedGraphColoringGenerator(board_size) + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + chosen_features = Dict( + "constraint_type" => true, + "variable_initial_domain_size" => true, + "values_onehot" => true, + ) + + experiment_n_conv(board_size, n_episodes, n_instances; + n_eval=n_eval, + generator=nqueens_generator, + SR=SR_heterogeneous, + chosen_features=chosen_features, + feature_size=[1, 2, board_size], + type="heterogeneous", + output_size = board_size) +end + +""" +Compares the impact of the number of convolution layers for the default representation. +""" +function experiment_default_chosen_n_conv(n_nodes, n_min_color, density, n_episodes, n_instances; n_eval=10) + + nqueens_generator = SeaPearl.ClusterizedGraphColoringGenerator(n_nodes, n_min_color, density) + SR_default = SeaPearl.DefaultStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.DefaultTrajectoryState} + + chosen_features = Dict( + "constraint_type" => true, + "variable_initial_domain_size" => true, + "values_onehot" => true, + ) + + + + experiment_n_conv(board_size, n_episodes, n_instances; + n_eval=n_eval, + generator=nqueens_generator, + SR=SR_default, + chosen_features=chosen_features, + feature_size=6 + n_nodes, + type="default_chosen", + output_size = board_size) +end + +""" +Compares the impact of the number of convolution layers for the default representation. +""" +function experiment_default_default_n_conv(board_size, n_episodes, n_instances; n_eval=10) + + nqueens_generator = SeaPearl.ClusterizedGraphColoringGenerator(board_size) + SR_default = SeaPearl.DefaultStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.DefaultTrajectoryState} + + + experiment_n_conv(board_size, n_episodes, n_instances; + n_eval=n_eval, + generator=nqueens_generator, + SR=SR_default, + feature_size=3, + chosen_features=nothing, + type="default_default", + output_size = board_size) +end + +############################################################################### +######### Experiment Type 3 +######### +######### +############################################################################### +""" +Compares the impact of the chosen features for the heterogeneous representation. +""" +function experiment_chosen_features_heterogeneous_nqueens(board_size, n_episodes, n_instances; n_eval=10, reward=SeaPearl.GeneralReward) + + nqueens_generator = SeaPearl.NQueensGenerator(board_size) + + chosen_features_list = [ + [ + Dict( + "constraint_type" => true, + "variable_initial_domain_size" => true, + "values_onehot" => true, + ), + [1, 5, board_size] + ], + [ + Dict( + "constraint_type" => true, + "variable_initial_domain_size" => true, + "values_raw" => true, + ), + [1, 5, 1] + ], + [ + Dict( + "constraint_type" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "values_onehot" => true, + ), + [2, 5, board_size] + ], + [ + Dict( + "constraint_activity" => true, + "constraint_type" => true, + "variable_initial_domain_size" => true, + "values_onehot" => true, + ), + [1, 6, board_size] + ], + [ + Dict( + "constraint_activity" => true, + "constraint_type" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "values_raw" => true, + ), + [2, 6, 1] + ], + [ + Dict( + "constraint_activity" => true, + "constraint_type" => true, + "nb_not_bounded_variable" => true, + "variable_initial_domain_size" => true, + "variable_domain_size" => true, + "variable_is_bound" => true, + "values_raw" => true, + ), + [3, 7, 1] + ], + ] + + + experiment_chosen_features_heterogeneous(board_size, n_episodes, n_instances; + n_eval=n_eval, + generator=nqueens_generator, + chosen_features_list=chosen_features_list, + type="nqueens", + output_size=board_size, + reward=reward) +end + +############################################################################### +######### Experiment Type 5 +######### +######### +############################################################################### +""" +Compares different action explorers with the heterogeneous representation for the nqueens problem. +""" +function experiment_explorer_heterogeneous_nqueens(board_size, n_episodes, n_instances; n_layers_graph=3, n_eval=10, reward=SeaPearl.GeneralReward) + + nqueens_generator = SeaPearl.NQueensGenerator(board_size) + + experiment_explorer_heterogeneous(board_size, n_episodes, n_instances; + chosen_features=nothing, + feature_size = [2, 6, 1], + output_size = board_size, + generator = nqueens_generator, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "nqueens", + decay_steps=2000, + c=2.0, + basicHeuristics=nothing + ) +end + +############################################################################### +######### Experiment Type 6 +######### +######### +############################################################################### +""" +Compares different CPNNs with the heterogeneous representation for the nqueens problem. + +""" +function experiment_nn_heterogeneous_nqueens(board_size, n_episodes, n_instances; n_layers_graph=3, n_eval=10, reward=SeaPearl.GeneralReward) + + nqueens_generator = SeaPearl.NQueensGenerator(board_size) + + experiment_nn_heterogeneous(board_size, n_episodes, n_instances; + chosen_features=nothing, + feature_size = [2, 6, 1], + output_size = board_size, + generator = nqueens_generator, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "nqueens", + decay_steps=2000, + c=2.0, + basicHeuristics=nothing + ) +end + + +############################################################################### +######### Experiment Type 8 +######### +######### +############################################################################### + + +function experiment_chosen_features_hetcpnn_nqueens(chosen_features_list, board_size, n_episodes, n_instances; n_eval=10) + """ + Compares the impact of the number of convolution layers for the heterogeneous representation. + """ + generator = SeaPearl.NQueensGenerator(board_size) + restartPerInstances = 1 + + experiment_chosen_features_hetcpnn( + board_size, + board_size-5, + n_episodes, + n_instances, + restartPerInstances; + output_size = board_size, + update_horizon = board_size-5, + generator = generator, + chosen_features_list = chosen_features_list, + type = "nqueens_"*string(board_size) + ) +end +function experiment_chosen_features_hetffcpnn_nqueens(chosen_features_list, board_size, n_episodes, n_instances; n_eval=10) + """ + Compares the impact of the number of convolution layers for the heterogeneous representation. + """ + generator = SeaPearl.NQueensGenerator(board_size) + restartPerInstances = 1 + + experiment_chosen_features_hetffcpnn( + board_size, + Int(round(board_size*0.7)), + n_episodes, + n_instances, + restartPerInstances; + output_size = board_size, + update_horizon = board_size-5, + generator=generator, + chosen_features_list=chosen_features_list, + type = "nqueens_"*string(board_size) + ) +end + +############################################################################### +######### Experiment Type 9 +######### +######### +############################################################################### + +function experiment_transfer_heterogeneous_nqueens(board_size, + board_size_transfered, + n_episodes, + n_episodes_transfered, + n_instances; + n_layers_graph=3, + n_eval=10, + n_eval_transfered=10, + reward=SeaPearl.GeneralReward, + decay_steps=2000, + trajectory_capacity=2000) + """ + + """ + nqueens_generator = SeaPearl.NQueensGenerator(board_size) + nqueens_generator_transfered = SeaPearl.NQueensGenerator(board_size_transfered) + + experiment_transfer_heterogeneous(board_size, board_size_transfered, n_episodes, n_episodes_transfered, n_instances; + chosen_features=nothing, + feature_size = [2, 6, 1], + output_size = board_size, + output_size_transfered = board_size_transfered, + generator = nqueens_generator, + generator_transfered = nqueens_generator_transfered, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + n_eval_transfered = n_eval_transfered, + reward = reward, + type = "nqueens", + decay_steps=decay_steps, + trajectory_capacity=trajectory_capacity, + ) +end + +############################################################################### +######### Experiment Type 10 +######### +######### Restart +############################################################################### +function experiment_restart_heterogeneous_nqueens(board_size, n_episodes, n_instances; + restart_list = [1, 5, 10, 20], + n_layers_graph=3, + n_eval=10, + reward=SeaPearl.GeneralReward, + decay_steps=2000, + trajectory_capacity=2000) + + nqueens_generator = SeaPearl.NQueensGenerator(board_size) + + experiment_restart_heterogeneous(board_size, n_episodes, n_instances; + restart_list = restart_list, + feature_size = [2, 6, 1], + output_size = board_size, + generator = nqueens_generator, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "nqueens", + decay_steps=decay_steps, + trajectory_capacity=trajectory_capacity + ) +end + +############################################################################### +######### Experiment Type11 +######### +######### +############################################################################### +""" +Compares different CPNNs with the heterogeneous representation for the nqueens problem. + +""" +function experiment_activation_heterogeneous_nqueens(board_size, n_episodes, n_instances; n_layers_graph=3, n_eval=10, reward=SeaPearl.GeneralReward) + + nqueens_generator = SeaPearl.NQueensGenerator(board_size) + + experiment_activation_heterogeneous(board_size, n_episodes, n_instances; + chosen_features=nothing, + feature_size = [2, 6, 1], + output_size = board_size, + generator = nqueens_generator, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "nqueens", + decay_steps=2000, + c=2.0, + basicHeuristics=nothing + ) +end + +############################################################################### +######### Experiment Type MALIK +######### +######### +############################################################################### +""" +Compares different RL Agents with the heterogeneous representation for the nqueens problem. + +""" +function experiment_rl_heterogeneous_nqueens(board_size, n_episodes, n_instances; n_layers_graph=3, n_eval=10, reward=SeaPearl.GeneralReward) + + nqueens_generator = SeaPearl.NQueensGenerator(board_size) + + chosen_features = Dict( + "variable_initial_domain_size" => true, + "constraint_type" => true, + "variable_domain_size" => true, + "values_raw" => true) + + feature_size = [2,5,1] + n_step_per_episode = Int(round(board_size*0.75)) + experiment_rl_heterogeneous(board_size, n_episodes, n_instances; + chosen_features=chosen_features, + feature_size = feature_size, + output_size = board_size, + generator = nqueens_generator, + n_layers_graph = n_layers_graph, + n_eval = n_eval, + reward = reward, + type = "nqueens", + decay_steps=250*n_step_per_episode, + basicHeuristics=nothing + ) +end + +############################################################################### +######### Simple nqueens experiment +######### +######### +############################################################################### + +function simple_experiment_nqueens(n, n_episodes, n_instances, variable_selection, chosen_features, feature_size; n_eval=10, eval_timeout=60) + """ + Runs a single experiment on nqueens + """ + n_step_per_episode = Int(round(n*0.7)) + reward = SeaPearl.GeneralReward + generator = SeaPearl.NQueensGenerator(n) + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + trajectory_capacity = 700*n_step_per_episode + update_horizon = n_step_per_episode + learnedHeuristics = OrderedDict{String,SeaPearl.LearnedHeuristic}() + agent_hetcpnn = get_heterogeneous_agent(; + get_heterogeneous_trajectory = () -> get_heterogeneous_slart_trajectory(capacity=trajectory_capacity, n_actions=n), + get_explorer = () -> get_epsilon_greedy_explorer(1000*n_step_per_episode, 0.05), + batch_size=16, + update_horizon=update_horizon, + min_replay_history=32*update_horizon, + update_freq=1, + target_update_freq=7*n_step_per_episode, + get_heterogeneous_nn = () -> get_heterogeneous_fullfeaturedcpnn( + conv_type="gc", + feature_size=feature_size, + conv_size=4, + dense_size=4, + output_size=1, + n_layers_graph=3, + n_layers_node=2, + n_layers_output=2 + ) + ) + learned_heuristic_hetffcpnn = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,reward,SeaPearl.FixedOutput}(agent_hetcpnn; chosen_features=chosen_features) + learnedHeuristics["hetffcpnn"] = learned_heuristic_hetffcpnn + variableHeuristic = nothing + if variable_selection == "min" + variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + elseif variable_selection == "random" + variableHeuristic = SeaPearl.RandomVariableSelection{false}() + else + error("Variable selection method not implemented!") + end + + basicHeuristics = OrderedDict( + "random" => SeaPearl.RandomHeuristic() + ) + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=1, + eval_strategy = SeaPearl.ILDSearch(2), + generator=generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=true, + nbRandomHeuristics=0, + exp_name= "nqueens_"*string(n)*"_heterogeneous_ffcpnn_"*string(n_episodes) * "_" * string(size) * "_", + eval_timeout=eval_timeout + ) + nothing + +end \ No newline at end of file diff --git a/learning_cp/comparison/rb.jl b/learning_cp/comparison/rb.jl new file mode 100644 index 0000000..34b2e65 --- /dev/null +++ b/learning_cp/comparison/rb.jl @@ -0,0 +1,115 @@ +include("../common/experiment.jl") +include("../common/utils.jl") +include("comparison.jl") + +function experiment_representation(k, n, α, r, p, n_episodes, n_instances; n_layers_graph=2, n_eval=10) + """ + Compare three agents: + - an agent with the default representation and default features; + - an agent with the default representation and chosen features; + - an agent with the heterogeneous representation and chosen features. + """ + rb_generator = SeaPearl.RBGenerator(k, n, α, r, p) + SR_default = SeaPearl.DefaultStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.DefaultTrajectoryState} + SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} + + agent_default_default = get_default_agent(; + capacity=2000, + decay_steps=2000, + ϵ_stable=0.01, + batch_size=16, + update_horizon=8, + min_replay_history=256, + update_freq=1, + target_update_freq=8, + feature_size=3, + conv_size=8, + dense_size=16, + output_size=rb_generator.d, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2 + ) + learned_heuristic_default_default = SeaPearl.SimpleLearnedHeuristic{SR_default,SeaPearl.GeneralReward,SeaPearl.FixedOutput}(agent_default_default) + + chosen_features = Dict( + "constraint_type" => true, + "variable_initial_domain_size" => true, + "values_onehot" => true, + ) + + agent_default_chosen = get_default_agent(; + capacity=2000, + decay_steps=2000, + ϵ_stable=0.01, + batch_size=16, + update_horizon=8, + min_replay_history=256, + update_freq=1, + target_update_freq=8, + feature_size=rb_generator.d + 5, + conv_size=8, + dense_size=16, + output_size=rb_generator.d, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2 + ) + learned_heuristic_default_chosen = SeaPearl.SimpleLearnedHeuristic{SR_default,SeaPearl.GeneralReward,SeaPearl.FixedOutput}(agent_default_chosen; chosen_features=chosen_features) + + agent_heterogeneous = get_heterogeneous_agent(; + capacity=2000, + decay_steps=2000, + ϵ_stable=0.01, + batch_size=16, + update_horizon=8, + min_replay_history=256, + update_freq=1, + target_update_freq=8, + feature_size=[1, 1, rb_generator.d], + conv_size=8, + dense_size=16, + output_size=rb_generator.d, + n_layers_graph=n_layers_graph, + n_layers_node=2, + n_layers_output=2 + ) + learned_heuristic_heterogeneous = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,SeaPearl.GeneralReward,SeaPearl.FixedOutput}(agent_heterogeneous; chosen_features=chosen_features) + + + # Basic value-selection heuristic + selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) + heuristic_min = SeaPearl.BasicHeuristic(selectMin) + + learnedHeuristics = OrderedDict( + "defaultdefault" => learned_heuristic_default_default, + "defaultchosen" => learned_heuristic_default_chosen, + "heterogeneous" => learned_heuristic_heterogeneous, + ) + basicHeuristics = OrderedDict( + # "min" => heuristic_min, + "random1" => SeaPearl.RandomHeuristic(), + # "random2" => SeaPearl.RandomHeuristic() + ) + + # ------------------- + # Variable Heuristic definition + # ------------------- + # variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + variableHeuristic = SeaPearl.RandomVariableSelection{false}() + + metricsArray, eval_metricsArray = trytrain( + nbEpisodes=n_episodes, + evalFreq=Int(floor(n_episodes / n_eval)), + nbInstances=n_instances, + restartPerInstances=1, + generator=rb_generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=false, + nbRandomHeuristics=0, + exp_name="rb_representation_" * string(n_episodes) * "_" * string(rb_generator.d) * "_" + ) +end \ No newline at end of file diff --git a/learning_cp/comparison/run.jl b/learning_cp/comparison/run.jl new file mode 100644 index 0000000..abc9ac5 --- /dev/null +++ b/learning_cp/comparison/run.jl @@ -0,0 +1,13 @@ +include("nqueens.jl") +include("graphcoloring.jl") + +# experiment_nn_heterogeneous_nqueens(25, 20001, 1; reward=SeaPearl.GeneralReward) +# experiment_explorer_heterogeneous_nqueens(20, 10001, 10) +# experiment_chosen_features_heterogeneous_nqueens(10, 1001, 10; ) +# experiment_pooling_heterogeneous_graphcoloring(10, 5, 0.95, 3001, 10) +# experiment_chosen_features_heterogeneous_graphcoloring(10, 5, 0.95, 3001, 10) +# experiment_nn_heterogeneous_graphcoloring(40, 5, 0.95, 4001, 10; n_eval=40) +# experiment_activation_heterogeneous_graphcoloring(20, 5, 0.95, 3001, 10; n_eval=40) +# experiment_activation_heterogeneous_nqueens(10, 1001, 1; reward=SeaPearl.GeneralReward) +# experiment_features_pooling_heterogeneous_graphcoloring(20, 5, 0.95, 3001, 10; n_eval=40) +experiment_update_freq_graphcoloring(25, 25, 5, .95, 1500, 10) diff --git a/learning_cp/comparison/tuto_tensorboard.txt b/learning_cp/comparison/tuto_tensorboard.txt new file mode 100644 index 0000000..3561850 --- /dev/null +++ b/learning_cp/comparison/tuto_tensorboard.txt @@ -0,0 +1,37 @@ +Partie SERVEUR : + +1) Démarrer un environnement CONDA +2) Installer TensorBoard sur l'environnement : "pip install tensorboard" +3) Récuperer le dossier d'install de Tensorboard avec : "pip show tensorboard" +4) se déplacer dans le dossier d'install de Tensorboard et lancer le server Tensorboard en spécifiant le dossier de log : + +Exemple : +" +@thanos.umontreal.cirrelt.lan:~PATH_TO_TENSORBOARD_INSTALL_FILE (1045)>python main.py --logdir="MON_DOSSIER_DE_LOG" +" + +L'interface est disponible sur : "http://localhost:6006/" + +Partie Client : +""" +using TensorBoardLogger, Logging, Random +lg=TBLogger("tensorboard_logs/run", min_level=Logging.Info) + +exemple de logging : + +with_logger(lg) do + for i=1:100 + x0 = 0.5+i/30; s0 = 0.5/(i/20); + edges = collect(-5:0.1:5) + centers = collect(edges[1:end-1] .+0.05) + histvals = [exp(-((c-x0)/s0)^2) for c=centers] + data_tuple = (edges, histvals) + data_struct = sample_struct(i^2, i^1.5-0.3*i) + + + @info "test" i=i j=i^2 dd=rand(10).+0.1*i hh=data_tuple + @info "test_2" i=i j=2^i hh=data_tuple log_step_increment=0 + @info "" my_weird_struct=data_struct log_step_increment=0 + @debug "debug_msg" this_wont_show_up=i + end + end \ No newline at end of file diff --git a/learning_cp/graphcoloring/Project.toml b/learning_cp/graphcoloring/Project.toml index 43f9bbc..188ee61 100644 --- a/learning_cp/graphcoloring/Project.toml +++ b/learning_cp/graphcoloring/Project.toml @@ -1,9 +1,13 @@ [deps] +ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" +CircularArrayBuffers = "9de3a189-e0c0-4e15-ba3b-b14b9fb0aec1" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" GeometricFlux = "7e08b658-56d3-11e9-2997-919d5b31e4ea" +Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d" +OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" ReinforcementLearning = "158674fc-8238-5cab-b5ba-03dfc80d1318" Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" SeaPearl = "c13076dc-bdcd-48ba-bc88-4b44c2587ab3" diff --git a/learning_cp/graphcoloring/agents.jl b/learning_cp/graphcoloring/agents.jl deleted file mode 100644 index 5fabbd9..0000000 --- a/learning_cp/graphcoloring/agents.jl +++ /dev/null @@ -1,78 +0,0 @@ -# Model definition -n = coloring_generator.n -trajectory_capacity = 3000 - -approximator_model = SeaPearl.CPNN( - graphChain = Flux.Chain( - SeaPearl.GraphConv(numInFeatures => 16, Flux.leakyrelu), - SeaPearl.GraphConv(16 => 16, Flux.leakyrelu), - SeaPearl.GraphConv(16 => 16, Flux.leakyrelu) - ), - nodeChain = Flux.Chain( - Flux.Dense(16, 32, Flux.leakyrelu), - Flux.Dense(32, 16, Flux.leakyrelu), - ), - #globalChain = Flux.Chain( - ## Flux.Dense(numGlobalFeature, 64, Flux.leakyrelu), - # Flux.Dense(64, 32, Flux.leakyrelu), - # Flux.Dense(32, 16, Flux.leakyrelu), - #), - outputChain = Flux.Chain( - Flux.Dense(16, 16, Flux.leakyrelu), - Flux.Dense(16, n), - )) #|> gpu -target_approximator_model = SeaPearl.CPNN( - graphChain = Flux.Chain( - SeaPearl.GraphConv(numInFeatures => 16, Flux.leakyrelu), - SeaPearl.GraphConv(16 => 16, Flux.leakyrelu), - SeaPearl.GraphConv(16 => 16, Flux.leakyrelu) - ), - nodeChain = Flux.Chain( - Flux.Dense(16, 32, Flux.leakyrelu), - Flux.Dense(32, 16, Flux.leakyrelu), - ), - #globalChain = Flux.Chain( - ## Flux.Dense(numGlobalFeature, 64, Flux.leakyrelu), - # Flux.Dense(64, 32, Flux.leakyrelu), - # Flux.Dense(32, 16, Flux.leakyrelu), - #), - outputChain = Flux.Chain( - Flux.Dense(16, 16, Flux.leakyrelu), - Flux.Dense(16, n), - ) #|> gpu -) #|> gpu - - -agent = RL.Agent( - policy = RL.QBasedPolicy( - learner = RL.DQNLearner( - approximator = RL.NeuralNetworkApproximator( - model = approximator_model, - optimizer = ADAM() - ), - target_approximator = RL.NeuralNetworkApproximator( - model = target_approximator_model, - optimizer = ADAM() - ), - loss_func = Flux.Losses.huber_loss, - γ = 0.99f0, - batch_size = 16, #32, - update_horizon = 4, #what if the number of nodes in a episode is smaller - min_replay_history = 128, - update_freq = 1, - target_update_freq = 200, - ), - explorer = RL.EpsilonGreedyExplorer( - ϵ_stable = 0.01, - kind = :exp, - decay_steps = 3000, - step = 1, - #rng = rng - ) - ), - trajectory = RL.CircularArraySLARTTrajectory( - capacity = trajectory_capacity, - state = SeaPearl.DefaultTrajectoryState[] => (), - legal_actions_mask = Vector{Bool} => (n, ), - ) -) diff --git a/learning_cp/graphcoloring/features.jl b/learning_cp/graphcoloring/features.jl deleted file mode 100644 index d3514ca..0000000 --- a/learning_cp/graphcoloring/features.jl +++ /dev/null @@ -1,39 +0,0 @@ -struct BetterFeaturization <: SeaPearl.AbstractFeaturization end - -function SeaPearl.featurize(sr::SeaPearl.DefaultStateRepresentation{BetterFeaturization,TS}) where TS - g = sr.cplayergraph - features = zeros(Float32, coloring_generator.n+6, nv(g)) - for i in 1:nv(g) - cp_vertex = SeaPearl.cpVertexFromIndex(g, i) - if isa(cp_vertex, SeaPearl.VariableVertex) - features[1,i] = 1. - if g.cpmodel.objective == cp_vertex.variable - features[6, i] = 1. - end - end - if isa(cp_vertex, SeaPearl.ConstraintVertex) - features[2, i] = 1. - constraint = cp_vertex.constraint - if isa(constraint, SeaPearl.NotEqual) - features[4, i] = 1. - end - if isa(constraint, SeaPearl.LessOrEqual) - features[5, i] = 1. - end - end - if isa(cp_vertex, SeaPearl.ValueVertex) - features[3, i] = 1. - value = cp_vertex.value - features[6+value, i] = 1. - end - end - features -end - -function SeaPearl.feature_length(::Type{SeaPearl.DefaultStateRepresentation{BetterFeaturization, TS}}) where TS - coloring_generator.n+6 -end - -function SeaPearl.global_feature_length(::Type{SeaPearl.DefaultStateRepresentation{BetterFeaturization, TS}}) where TS - return 0 -end diff --git a/learning_cp/graphcoloring/graphcoloring.jl b/learning_cp/graphcoloring/graphcoloring.jl deleted file mode 100644 index 3ec0125..0000000 --- a/learning_cp/graphcoloring/graphcoloring.jl +++ /dev/null @@ -1,162 +0,0 @@ -using SeaPearl -using SeaPearlExtras -using ReinforcementLearning -const RL = ReinforcementLearning -using Flux -using GeometricFlux -using JSON -using BSON: @save, @load -using Dates -using Random -using LightGraphs - - -# ------------------- -# Experience variables -# ------------------- -nbEpisodes = 1001 -restartPerInstances = 1 -evalFreq = 100 -nbInstances = 50 -nbRandomHeuristics = 1 - -nbNodes = 20 -nbMinColor = 5 -density = 0.95 -# ------------------- -# Generator -# ------------------- -coloring_generator = SeaPearl.ClusterizedGraphColoringGenerator(nbNodes, nbMinColor, density) - -#include("rewards.jl") -include("features.jl") - -# ------------------- -# Internal variables -# ------------------- -featurizationType = BetterFeaturization -rewardType = SeaPearl.CPReward - -SR = SeaPearl.DefaultStateRepresentation{featurizationType, SeaPearl.DefaultTrajectoryState} -numInFeatures = SeaPearl.feature_length(SR) -#numGlobalFeature = SeaPearl.global_feature_length(SR) - -# ------------------- -# Agent definition -# ------------------- -include("agents.jl") - -# ------------------- -# Value Heuristic definition -# ------------------- - -learnedHeuristic = SeaPearl.SimpleLearnedHeuristic{SR, rewardType, SeaPearl.FixedOutput}(agent) -# Basic value-selection heuristic -selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) -heuristic_min = SeaPearl.BasicHeuristic(selectMin) -function select_random_value(x::SeaPearl.IntVar; cpmodel=nothing) - selected_number = rand(1:length(x.domain)) - i = 1 - for value in x.domain - if i == selected_number - return value - end - i += 1 - end - @assert false "This should not happen" -end - -randomHeuristics = [] -for i in 1:nbRandomHeuristics - push!(randomHeuristics, SeaPearl.BasicHeuristic(select_random_value)) -end - -valueSelectionArray = [learnedHeuristic, heuristic_min] -append!(valueSelectionArray, randomHeuristics) -# ------------------- -# Variable Heuristic definition -# ------------------- -variableSelection = SeaPearl.MinDomainVariableSelection{false}() - -# ------------------- -# ------------------- -# Core function -# ------------------- -# ------------------- - -function trytrain(nbEpisodes::Int) - experienceTime = now() - dir = mkdir(string("exp_",Base.replace("$(round(experienceTime, Dates.Second(3)))",":"=>"-"))) - expParameters = Dict( - :experimentParameters => Dict( - :nbEpisodes => nbEpisodes, - :restartPerInstances => restartPerInstances, - :evalFreq => evalFreq, - :nbInstances => nbInstances, - ), - :generatorParameters => Dict( - :nbNodes => nbNodes, - :nbMinColor => nbMinColor, - :density => density - ), - :nbRandomHeuristics => nbRandomHeuristics, - :Featurization => Dict( - :featurizationType => featurizationType, - #:chosen_features => featurizationType == SeaPearl.FeaturizationHelper ? chosen_features : nothing - :chosen_features => nothing - ), - :learnerParameters => Dict( - :model => string(agent.policy.learner.approximator.model), - :gamma => agent.policy.learner.sampler.γ, - :batch_size => agent.policy.learner.sampler.batch_size, - :update_horizon => agent.policy.learner.sampler.n, - :min_replay_history => agent.policy.learner.min_replay_history, - :update_freq => agent.policy.learner.update_freq, - :target_update_freq => agent.policy.learner.target_update_freq, - ), - :explorerParameters => Dict( - :ϵ_stable => agent.policy.explorer.ϵ_stable, - :decay_steps => agent.policy.explorer.decay_steps, - ), - :trajectoryParameters => Dict( - :trajectoryType => typeof(agent.trajectory), - :capacity => trajectory_capacity - ), - :reward => rewardType - ) - open(dir*"/params.json", "w") do file - JSON.print(file, expParameters) - end - - metricsArray, eval_metricsArray = SeaPearl.train!( - valueSelectionArray=valueSelectionArray, - generator=coloring_generator, - nbEpisodes=nbEpisodes, - strategy=SeaPearl.DFSearch(), - variableHeuristic=variableSelection, - out_solver=true, - verbose = false, - evaluator=SeaPearl.SameInstancesEvaluator(valueSelectionArray,coloring_generator; evalFreq = evalFreq, nbInstances = nbInstances), - restartPerInstances = restartPerInstances - ) - - #saving model weights - model = agent.policy.learner.approximator - @save dir*"/model_gc"*string(coloring_generator.n)*".bson" model - - SeaPearlExtras.storedata(metricsArray[1]; filename=dir*"/graph_coloring_$(nbNodes)_traininglearned") - SeaPearlExtras.storedata(metricsArray[2]; filename=dir*"/graph_coloring_$(nbNodes)_traininggreedy") - for i = 1:nbRandomHeuristics - SeaPearlExtras.storedata(metricsArray[2+i]; filename=dir*"/graph_coloring_$(nbNodes)_trainingrandom$(i)") - end - SeaPearlExtras.storedata(eval_metricsArray[:,1]; filename=dir*"/graph_coloring_$(nbNodes)_learned") - SeaPearlExtras.storedata(eval_metricsArray[:,2]; filename=dir*"/graph_coloring_$(nbNodes)_greedy") - for i = 1:nbRandomHeuristics - SeaPearlExtras.storedata(eval_metricsArray[:,i+2]; filename=dir*"/graph_coloring_$(nbNodes)_random$(i)") - end - - return metricsArray, eval_metricsArray -end - -metricsArray, eval_metricsArray = trytrain(nbEpisodes) -nothing diff --git a/learning_cp/graphcoloring/rewards.jl b/learning_cp/graphcoloring/rewards.jl deleted file mode 100644 index 90ffc10..0000000 --- a/learning_cp/graphcoloring/rewards.jl +++ /dev/null @@ -1,29 +0,0 @@ - -mutable struct InspectReward <: SeaPearl.AbstractReward - value::Float32 -end - -InspectReward(model::SeaPearl.CPModel) = InspectReward(0) - -function SeaPearl.set_reward!(::Type{SeaPearl.StepPhase}, lh::SeaPearl.LearnedHeuristic{SR, InspectReward, O}, model::SeaPearl.CPModel, symbol::Union{Nothing, Symbol}) where { - SR <: SeaPearl.AbstractStateRepresentation, - O <: SeaPearl.ActionOutput -} - lh.reward.value += -1 - nothing -end - -function SeaPearl.set_reward!(::Type{SeaPearl.DecisionPhase}, lh::SeaPearl.LearnedHeuristic{SR, InspectReward, O}, model::SeaPearl.CPModel) where { - SR <: SeaPearl.AbstractStateRepresentation, - O <: SeaPearl.ActionOutput -} - lh.reward.value += -1 - nothing -end - -function SeaPearl.set_reward!(::Type{SeaPearl.EndingPhase}, lh::SeaPearl.LearnedHeuristic{SR, InspectReward, O}, model::SeaPearl.CPModel, symbol::Union{Nothing, Symbol}) where { - SR <: SeaPearl.AbstractStateRepresentation, - O <: SeaPearl.ActionOutput -} - nothing -end diff --git a/learning_cp/kep/Project.toml b/learning_cp/kep/Project.toml new file mode 100644 index 0000000..9215568 --- /dev/null +++ b/learning_cp/kep/Project.toml @@ -0,0 +1,14 @@ +[deps] +ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" +BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" +Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" +Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" +GeometricFlux = "7e08b658-56d3-11e9-2997-919d5b31e4ea" +JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +ReinforcementLearning = "158674fc-8238-5cab-b5ba-03dfc80d1318" +Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" +SeaPearl = "c13076dc-bdcd-48ba-bc88-4b44c2587ab3" +SeaPearlExtras = "90978a9a-af5e-4113-9033-e3aa0a1ac968" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" diff --git a/learning_cp/maxcut/Project.toml b/learning_cp/maxcut/Project.toml new file mode 100644 index 0000000..188ee61 --- /dev/null +++ b/learning_cp/maxcut/Project.toml @@ -0,0 +1,14 @@ +[deps] +ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" +BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" +CircularArrayBuffers = "9de3a189-e0c0-4e15-ba3b-b14b9fb0aec1" +Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" +GeometricFlux = "7e08b658-56d3-11e9-2997-919d5b31e4ea" +Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6" +JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d" +OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +ReinforcementLearning = "158674fc-8238-5cab-b5ba-03dfc80d1318" +Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" +SeaPearl = "c13076dc-bdcd-48ba-bc88-4b44c2587ab3" +SeaPearlExtras = "90978a9a-af5e-4113-9033-e3aa0a1ac968" diff --git a/learning_cp/mis/Project.toml b/learning_cp/mis/Project.toml new file mode 100644 index 0000000..58bd8c1 --- /dev/null +++ b/learning_cp/mis/Project.toml @@ -0,0 +1,13 @@ +[deps] +BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" +CircularArrayBuffers = "9de3a189-e0c0-4e15-ba3b-b14b9fb0aec1" +Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" +GeometricFlux = "7e08b658-56d3-11e9-2997-919d5b31e4ea" +Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6" +JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d" +OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +ReinforcementLearning = "158674fc-8238-5cab-b5ba-03dfc80d1318" +Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" +SeaPearl = "c13076dc-bdcd-48ba-bc88-4b44c2587ab3" +SeaPearlExtras = "90978a9a-af5e-4113-9033-e3aa0a1ac968" diff --git a/learning_cp/nqueens/nqueens.jl b/learning_cp/nqueens/nqueens.jl index 3648f4c..c6d61a5 100644 --- a/learning_cp/nqueens/nqueens.jl +++ b/learning_cp/nqueens/nqueens.jl @@ -1,192 +1,102 @@ -using SeaPearl -using SeaPearlExtras -using ReinforcementLearning -const RL = ReinforcementLearning -using Flux -using GeometricFlux -using BSON: @save, @load -using JSON -using Random -using Dates -using Statistics -using LightGraphs - -include("rewards.jl") -include("features.jl") +include("../common/experiment.jl") # ------------------- -# Generator +# Parameters # ------------------- -board_size = 15 -nqueens_generator = SeaPearl.NQueensGenerator(board_size) +NB_EPISODES = @isdefined(NB_EPISODES) ? NB_EPISODES : 1001 +EVAL_FREQ = @isdefined(EVAL_FREQ) ? EVAL_FREQ : 200 +NB_INSTANCES = @isdefined(NB_INSTANCES) ? NB_INSTANCES : 10 +NB_RANDOM_HEURISTICS = @isdefined(NB_RANDOM_HEURISTICS) ? NB_RANDOM_HEURISTICS : 0 +RESTART_PER_INSTANCES = @isdefined(RESTART_PER_INSTANCES) ? RESTART_PER_INSTANCES : 1 +VERBOSE = @isdefined(VERBOSE) ? VERBOSE : false # ------------------- -# Features +# Generator # ------------------- -features_type = BetterFeaturization +board_size = @isdefined(SIZE) ? SIZE : 10 +nqueens_generator = SeaPearl.NQueensGenerator(board_size) -SR = SeaPearl.DefaultStateRepresentation{features_type, SeaPearl.DefaultTrajectoryState} +OUTPUT_SIZE = board_size # ------------------- -# Internal variables +# State Representation # ------------------- -numInFeatures = SeaPearl.feature_length(SR) +SR_default_default = SeaPearl.DefaultStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.DefaultTrajectoryState} +SR_default_chosen = SeaPearl.DefaultStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.DefaultTrajectoryState} +SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{SeaPearl.DefaultFeaturization,SeaPearl.HeterogeneousTrajectoryState} -# ------------------- -# Experience variables -# ------------------- -nbEpisodes = 10000 -evalFreq = 1000 -nbInstances = 50 -nbRandomHeuristics = 0 -restartPerInstances = 1 +numInFeaturesDefault = 3 +numInFeaturesDefaultChosen = 9 + board_size +numInFeaturesHeterogeneous = [1, 5, board_size] # ------------------- # Agent definition # ------------------- -include("agents.jl") +include("../common/agents.jl") # ------------------- # Value Heuristic definition # ------------------- -rewardType = SeaPearl.CPReward - -eta_init = 1. -eta_stable = 0.1 -warmup_steps = 50 -decay_steps = 50 - -heuristic_used = "supervised" - -if heuristic_used == "simple" - learnedHeuristic = SeaPearl.SimpleLearnedHeuristic{SR, rewardType, SeaPearl.FixedOutput}(agent) -elseif heuristic_used == "supervised" - learnedHeuristic = SeaPearl.SupervisedLearnedHeuristic{SR, rewardType, SeaPearl.FixedOutput}( - agent, - eta_init=eta_init, - eta_stable=eta_stable, - warmup_steps=warmup_steps, - decay_steps=decay_steps, - rng=MersenneTwister(1234) - ) -end + +chosen_features = Dict( + "constraint_activity" => false, + "constraint_type" => true, + "nb_involved_constraint_propagation" => false, + "nb_not_bounded_variable" => false, + "variable_domain_size" => false, + "variable_initial_domain_size" => true, + "variable_is_bound" => false, + "values_onehot" => true, + "values_raw" => false, +) + +# Learned Heuristic +learnedHeuristic_default_default = SeaPearl.SimpleLearnedHeuristic{SR_default_default,SeaPearl.GeneralReward,SeaPearl.FixedOutput}(agent_default_default) +learnedHeuristic_default_chosen = SeaPearl.SimpleLearnedHeuristic{SR_default_chosen,SeaPearl.GeneralReward,SeaPearl.FixedOutput}(agent_default_chosen; chosen_features=chosen_features) +learnedHeuristic_heterogeneous = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous,SeaPearl.GeneralReward,SeaPearl.FixedOutput}(agent_heterogeneous; chosen_features=chosen_features) # Basic value-selection heuristic selectMin(x::SeaPearl.IntVar; cpmodel=nothing) = SeaPearl.minimum(x.domain) heuristic_min = SeaPearl.BasicHeuristic(selectMin) -function select_random_value(x::SeaPearl.IntVar; cpmodel=nothing) - selected_number = rand(1:length(x.domain)) - i = 1 - for value in x.domain - if i == selected_number - return value - end - i += 1 - end - @assert false "This should not happen" -end - -randomHeuristics = [] -for i in 1:nbRandomHeuristics - push!(randomHeuristics, SeaPearl.BasicHeuristic(select_random_value)) -end - -valueSelectionArray = [learnedHeuristic, heuristic_min] -append!(valueSelectionArray, randomHeuristics) +learnedHeuristics = OrderedDict( + "defaultdefault" => learnedHeuristic_default_default, + "defaultchosen" => learnedHeuristic_default_chosen, + "heterogeneous" => learnedHeuristic_heterogeneous +) +basicHeuristics = OrderedDict( + "min" => heuristic_min +) # ------------------- # Variable Heuristic definition # ------------------- -variableSelection = SeaPearl.MinDomainVariableSelection{false}() - -# ------------------- -# ------------------- -# Core function -# ------------------- -# ------------------- - -function trytrain(nbEpisodes::Int) - experienceTime = now() - dir = mkdir(string("exp_", Base.replace("$(round(experienceTime, Dates.Second(3)))", ":" => "-"))) - out_solver = true - expParameters = Dict( - :experimentParameters => Dict( - :nbEpisodes => nbEpisodes, - :restartPerInstances => restartPerInstances, - :evalFreq => evalFreq, - :nbInstances => nbInstances - ), - :generatorParameters => Dict( - :instance => "nqueens", - :boardSize => board_size, - ), - :learnedHeuristic => Dict( - :learnedHeuristicType => typeof(learnedHeuristic), - :eta_init => hasproperty(learnedHeuristic, :eta_init) ? learnedHeuristic.eta_init : nothing, - :eta_stable => hasproperty(learnedHeuristic, :eta_stable) ? learnedHeuristic.eta_stable : nothing, - :warmup_steps => hasproperty(learnedHeuristic, :warmup_steps) ? learnedHeuristic.warmup_steps : nothing, - :decay_steps => hasproperty(learnedHeuristic, :decay_steps) ? learnedHeuristic.decay_steps : nothing, - :rng => hasproperty(learnedHeuristic, :rng) ? {:rngType => typeof(learnedHeuristic.rng), :seed => learnedHeuristic.rng["seed"]} : nothing - ), - :nbRandomHeuristics => nbRandomHeuristics, - :Featurization => Dict( - :featurizationType => features_type, - :chosen_features => nothing - ), - :learnerParameters => Dict( - :model => string(agent.policy.learner.approximator.model), - :gamma => agent.policy.learner.sampler.γ, - :batch_size => agent.policy.learner.sampler.batch_size, - :update_horizon => agent.policy.learner.sampler.n, - :min_replay_history => agent.policy.learner.min_replay_history, - :update_freq => agent.policy.learner.update_freq, - :target_update_freq => agent.policy.learner.target_update_freq - ), - :explorerParameters => Dict( - :ϵ_stable => agent.policy.explorer.ϵ_stable, - :decay_steps => agent.policy.explorer.decay_steps - ), - :trajectoryParameters => Dict( - :trajectoryType => typeof(agent.trajectory), - :capacity => trajectory_capacity - ), - :reward => rewardType - ) - open(dir * "/params.json", "w") do file - JSON.print(file, expParameters) - end - metricsArray, eval_metricsArray = SeaPearl.train!( - valueSelectionArray=valueSelectionArray, - generator=nqueens_generator, - nbEpisodes=nbEpisodes, - strategy=SeaPearl.DFSearch(), - variableHeuristic=variableSelection, - out_solver=out_solver, - verbose=false, - evaluator=SeaPearl.SameInstancesEvaluator(valueSelectionArray, nqueens_generator; evalFreq=evalFreq, nbInstances=nbInstances), - restartPerInstances=1 - ) - - #saving model weights - trained_weights = params(agent.policy.learner.approximator.model) - @save dir * "/model_weights_nqueens_$(board_size).bson" trained_weights - - SeaPearlExtras.storedata(metricsArray[1]; filename=dir * "/nqueens_$(board_size)_training") - SeaPearlExtras.storedata(eval_metricsArray[:, 1]; filename=dir * "/nqueens_$(board_size)_trained") - SeaPearlExtras.storedata(eval_metricsArray[:, 2]; filename=dir * "/nqueens_$(board_size)_min") - for i = 1:nbRandomHeuristics - SeaPearlExtras.storedata(eval_metricsArray[:, i+2]; filename=dir * "/nqueens_$(board_size)_random$(i)") - end - - return metricsArray, eval_metricsArray -end - - - -# ------------------- -# ------------------- - -metricsArray, eval_metricsArray = trytrain(nbEpisodes) +variableHeuristic = SeaPearl.MinDomainVariableSelection{false}() + +# ------------------- +# Run Experiment +# ------------------- + +expParameters = Dict( + :generatorParameters => Dict( + :boardSize => board_size, + ), +) + +metricsArray, eval_metricsArray = trytrain( + nbEpisodes=NB_EPISODES, + evalFreq=EVAL_FREQ, + nbInstances=NB_INSTANCES, + restartPerInstances=RESTART_PER_INSTANCES, + generator=nqueens_generator, + variableHeuristic=variableHeuristic, + learnedHeuristics=learnedHeuristics, + basicHeuristics=basicHeuristics; + out_solver=true, + verbose=VERBOSE, + expParameters=expParameters, + nbRandomHeuristics=NB_RANDOM_HEURISTICS, + exp_name = string(NB_EPISODES) * "_" * string(board_size) * "_" +) nothing diff --git a/learning_cp/nqueens/rewards.jl b/learning_cp/nqueens/rewards.jl deleted file mode 100644 index 1377a4f..0000000 --- a/learning_cp/nqueens/rewards.jl +++ /dev/null @@ -1,54 +0,0 @@ -#################### -# InspectReward -#################### - -mutable struct InspectReward <: SeaPearl.AbstractReward - value::Float32 -end - -InspectReward(model::SeaPearl.CPModel) = InspectReward(0) - -function SeaPearl.set_reward!(::Type{SeaPearl.StepPhase}, lh::SeaPearl.LearnedHeuristic{SR, InspectReward, O}, model::SeaPearl.CPModel, symbol::Union{Nothing, Symbol}) where { - SR <: SeaPearl.AbstractStateRepresentation, - O <: SeaPearl.ActionOutput -} - lh.reward.value += -1 - nothing -end - -function SeaPearl.set_reward!(::Type{SeaPearl.DecisionPhase}, lh::SeaPearl.LearnedHeuristic{SR, InspectReward, O}, model::SeaPearl.CPModel) where { - SR <: SeaPearl.AbstractStateRepresentation, - O <: SeaPearl.ActionOutput -} - lh.reward.value += -1 - nothing -end - -function SeaPearl.set_reward!(::Type{SeaPearl.EndingPhase}, lh::SeaPearl.LearnedHeuristic{SR, InspectReward, O}, model::SeaPearl.CPModel, symbol::Union{Nothing, Symbol}) where { - SR <: SeaPearl.AbstractStateRepresentation, - O <: SeaPearl.ActionOutput -} - lh.reward.value += 10 - nothing -end - -#################### -# NQueenReward -#################### - -struct NQueenReward <: SeaPearl.AbstractReward end - -function SeaPearl.set_reward!(::Type{SeaPearl.StepPhase}, lh::SeaPearl.LearnedHeuristic{SR,NQueenReward,O}, model::SeaPearl.CPModel, symbol::Union{Nothing,Symbol}) where {SR <: SeaPearl.AbstractStateRepresentation,O <: SeaPearl.ActionOutput} - lh.current_reward -= 1 - nothing -end - -function SeaPearl.set_reward!(::Type{SeaPearl.DecisionPhase}, lh::SeaPearl.LearnedHeuristic{SR,NQueenReward,O}, model::SeaPearl.CPModel) where {SR <: SeaPearl.AbstractStateRepresentation,O <: SeaPearl.ActionOutput} - lh.current_reward -= 1 - nothing -end - -function SeaPearl.set_reward!(::Type{SeaPearl.EndingPhase}, lh::SeaPearl.LearnedHeuristic{SR,NQueenReward,O}, model::SeaPearl.CPModel, symbol::Union{Nothing,Symbol}) where {SR <: SeaPearl.AbstractStateRepresentation,O <: SeaPearl.ActionOutput} - lh.current_reward += 100 / model.statistics.numberOfNodes - nothing -end \ No newline at end of file diff --git a/learning_cp/tsptw/agents_heterogeneous.jl b/learning_cp/tsptw/agents_heterogeneous.jl new file mode 100644 index 0000000..f9caffd --- /dev/null +++ b/learning_cp/tsptw/agents_heterogeneous.jl @@ -0,0 +1,142 @@ +trajectory_capacity = 3000 + +agent = RL.Agent( + policy=RL.QBasedPolicy( + learner=RL.DQNLearner( + approximator=RL.NeuralNetworkApproximator( + model=SeaPearl.CPNN( + graphChain=Flux.Chain( + SeaPearl.GraphConv(numInFeatures => 4, Flux.leakyrelu), + SeaPearl.GraphConv(4 => 4, Flux.leakyrelu), + SeaPearl.GraphConv(4 => 4, Flux.leakyrelu) + ), + nodeChain=Flux.Chain( + Flux.Dense(4, 8, Flux.leakyrelu), + Flux.Dense(8, 4, Flux.leakyrelu), + ), + globalChain=Flux.Chain(), + outputChain=Flux.Chain( + Flux.Dense(4, 4, Flux.leakyrelu), + Flux.Dense(4, n_city), + )), + optimizer=ADAM() + ), + target_approximator=RL.NeuralNetworkApproximator( + model=SeaPearl.CPNN( + graphChain=Flux.Chain( + SeaPearl.GraphConv(numInFeatures => 4, Flux.leakyrelu), + SeaPearl.GraphConv(4 => 4, Flux.leakyrelu), + SeaPearl.GraphConv(4 => 4, Flux.leakyrelu) + ), + nodeChain=Flux.Chain( + Flux.Dense(4, 8, Flux.leakyrelu), + Flux.Dense(8, 4, Flux.leakyrelu), + ), + globalChain=Flux.Chain(), + outputChain=Flux.Chain( + Flux.Dense(4, 4, Flux.leakyrelu), + Flux.Dense(4, n_city), + ) + ), + optimizer=ADAM() + ), + loss_func=Flux.Losses.huber_loss, + batch_size=8, + update_horizon=4, + min_replay_history=128, + update_freq=1, + target_update_freq=200, + ), + explorer=RL.EpsilonGreedyExplorer( + ϵ_stable=0.01, + kind = :exp, + decay_steps=3000, + step=1, + #rng = rng + ) + ), + trajectory=RL.CircularArraySLARTTrajectory( + capacity=trajectory_capacity, + state=SeaPearl.DefaultTrajectoryState[] => (), + legal_actions_mask=Vector{Bool} => (n_city,), + ) +) + +struct HeterogeneousModel + layer1::SeaPearl.HeterogeneousGraphConvInit + layer2::SeaPearl.HeterogeneousGraphConv + layer3::SeaPearl.HeterogeneousGraphConv + layer4::SeaPearl.HeterogeneousGraphConv +end + +function HeterogeneousModel() + layer1 = SeaPearl.HeterogeneousGraphConvInit(numInFeatures2, 4, Flux.leakyrelu) + layer2 = SeaPearl.HeterogeneousGraphConv(4 => 4, numInFeatures2, Flux.leakyrelu) + layer3 = SeaPearl.HeterogeneousGraphConv(4 => 4, numInFeatures2, Flux.leakyrelu) + layer4 = SeaPearl.HeterogeneousGraphConv(4 => 4, numInFeatures2, Flux.leakyrelu) + return HeterogeneousModel(layer1,layer2,layer3,layer4) +end + +function (m::HeterogeneousModel)(fg) + original_fg = fg + out1 = m.layer1(fg) + out2 = m.layer2(out1, original_fg) + out3 = m.layer3(out2, original_fg) + out4 = m.layer4(out3, original_fg) + return out4 +end + +Flux.@functor HeterogeneousModel + +agent2 = RL.Agent( + policy=RL.QBasedPolicy( + learner=RL.DQNLearner( + approximator=RL.NeuralNetworkApproximator( + model=SeaPearl.HeterogeneousCPNN( + graphChain=HeterogeneousModel(), + nodeChain=Flux.Chain( + Flux.Dense(4, 8, Flux.leakyrelu), + Flux.Dense(8, 4, Flux.leakyrelu), + ), + globalChain=Flux.Chain(), + outputChain=Flux.Chain( + Flux.Dense(4, 4, Flux.leakyrelu), + Flux.Dense(4, n_city), + )), + optimizer=ADAM() + ), + target_approximator=RL.NeuralNetworkApproximator( + model=SeaPearl.HeterogeneousCPNN( + graphChain=HeterogeneousModel(), + nodeChain=Flux.Chain( + Flux.Dense(4, 8, Flux.leakyrelu), + Flux.Dense(8, 4, Flux.leakyrelu), + ), + globalChain=Flux.Chain(), + outputChain=Flux.Chain( + Flux.Dense(4, 4, Flux.leakyrelu), + Flux.Dense(4, n_city), + ) + ), + optimizer=ADAM() + ), + loss_func=Flux.Losses.huber_loss, + batch_size=8, + update_horizon = 4, + min_replay_history=128, + update_freq=1, + target_update_freq=200, + ), + explorer=RL.EpsilonGreedyExplorer( + ϵ_stable=0.01, + kind = :exp, + decay_steps=3000, + step=1, + ) + ), + trajectory=RL.CircularArraySLARTTrajectory( + capacity=trajectory_capacity, + state=SeaPearl.HeterogeneousTrajectoryState[] => (), + legal_actions_mask=Vector{Bool} => (n_city,), + ) +) \ No newline at end of file diff --git a/learning_cp/tsptw/tsptw.jl b/learning_cp/tsptw/tsptw.jl index 17b4d4d..37b0aae 100644 --- a/learning_cp/tsptw/tsptw.jl +++ b/learning_cp/tsptw/tsptw.jl @@ -47,28 +47,10 @@ include("agents.jl") # ------------------- # Value Heuristic definition # ------------------- -heuristic_used = "simple" -rewardType = SeaPearl.TsptwReward - -if heuristic_used == "simple" - learnedHeuristic = SeaPearl.SimpleLearnedHeuristic{SR, rewardType, SeaPearl.VariableOutput}(agent) - -# SupevisedLEarnedHeuristic is not compatible with TsptwStateRepresentation yet - -#=elseif heuristic_used == "supervised" - eta_init = .9 - eta_stable = .1 - warmup_steps = 300 - decay_steps = 700 - - learnedHeuristic = SeaPearl.SupervisedLearnedHeuristic{SR, rewardType, SeaPearl.VariableOutput}( - agent; - eta_init=eta_init, - eta_stable=eta_stable, - warmup_steps=warmup_steps, - decay_steps=decay_steps, - rng=MersenneTwister(1234) - ) =# +if default_representation + learnedHeuristic = SeaPearl.SimpleLearnedHeuristic{SR, SeaPearl.CPReward, SeaPearl.FixedOutput}(agent) +else + learnedHeuristic = SeaPearl.SimpleLearnedHeuristic{SR, SeaPearl.TsptwReward, SeaPearl.VariableOutput}(agent) end include("nearest_heuristic.jl") diff --git a/learning_cp/tsptw/tsptw_featurizationhelper.jl b/learning_cp/tsptw/tsptw_featurizationhelper.jl index 1baa119..18a78bc 100644 --- a/learning_cp/tsptw/tsptw_featurizationhelper.jl +++ b/learning_cp/tsptw/tsptw_featurizationhelper.jl @@ -12,7 +12,7 @@ using Dates # ------------------- # Generator # ------------------- -n_city = 7 +n_city = 10 grid_size = 25 max_tw_gap = 0 max_tw = 100 @@ -51,7 +51,6 @@ nbEpisodes = 2001 evalFreq = 200 nbInstances = 10 nbRandomHeuristics = 1 -restartPerInstances = 1 # ------------------- # Agent definition @@ -128,16 +127,16 @@ function trytrain(nbEpisodes::Int) experienceTime = now() dir = mkdir(string("exp_",Base.replace("$(round(experienceTime, Dates.Second(3)))",":"=>"-"))) expParameters = Dict( - :experimentParameters => Dict( + :experimentParameters => Dict( :nbEpisodes => nbEpisodes, :evalFreq => evalFreq, - :nbInstances => nbInstances + :nbInstances => nbInstances, ), :generatorParameters => Dict( :nCity => n_city, :gridSize => grid_size, :maxTwGap => max_tw_gap, - :maxTw => max_tw + :maxTw => max_tw, ), :nbRandomHeuristics => nbRandomHeuristics, :Featurization => Dict( @@ -151,11 +150,11 @@ function trytrain(nbEpisodes::Int) :update_horizon => agent.policy.learner.sampler.n, :min_replay_history => agent.policy.learner.min_replay_history, :update_freq => agent.policy.learner.update_freq, - :target_update_freq => agent.policy.learner.target_update_freq + :target_update_freq => agent.policy.learner.target_update_freq, ), :explorerParameters => Dict( :ϵ_stable => agent.policy.explorer.ϵ_stable, - :decay_steps => agent.policy.explorer.decay_steps + :decay_steps => agent.policy.explorer.decay_steps, ), :trajectoryParameters => Dict( :trajectoryType => typeof(agent.trajectory), @@ -173,10 +172,9 @@ function trytrain(nbEpisodes::Int) nbEpisodes=nbEpisodes, strategy=SeaPearl.DFSearch(), variableHeuristic=variableSelection, - out_solver=true, - verbose=false, - evaluator=SeaPearl.SameInstancesEvaluator(valueSelectionArray,tsptw_generator; evalFreq = evalFreq, nbInstances = nbInstances), - restartPerInstances=1 + out_solver = true, + verbose = true, + evaluator=SeaPearl.SameInstancesEvaluator(valueSelectionArray,tsptw_generator; evalFreq = evalFreq, nbInstances = nbInstances) ) trained_weights = params(agent.policy.learner.approximator.model) diff --git a/learning_cp/tsptw/tsptw_heterogeneous.jl b/learning_cp/tsptw/tsptw_heterogeneous.jl new file mode 100644 index 0000000..d3b4d77 --- /dev/null +++ b/learning_cp/tsptw/tsptw_heterogeneous.jl @@ -0,0 +1,167 @@ +using SeaPearl +using SeaPearlExtras +using ReinforcementLearning +const RL = ReinforcementLearning +using Flux +using GeometricFlux +using BSON: @save, @load +using JSON +using Random +using Dates +using Statistics + +# ------------------- +# Generator +# ------------------- + +n_city = 10 +grid_size = 25 +max_tw_gap = 0 +max_tw = 100 + +featurizationType = SeaPearl.DefaultFeaturization +rewardType = SeaPearl.GeneralReward +tsptw_generator = SeaPearl.TsptwGenerator(n_city, grid_size, max_tw_gap, max_tw, true) +SR_default = SeaPearl.DefaultStateRepresentation{featurizationType,SeaPearl.DefaultTrajectoryState} +SR_heterogeneous = SeaPearl.HeterogeneousStateRepresentation{featurizationType,SeaPearl.HeterogeneousTrajectoryState} + +# ------------------- +# Internal variables +# ------------------- +numInFeatures = 3 +numInFeatures2 = [1, 16, 1] + +# ------------------- +# Experience variables +# ------------------- +nbEpisodes = 2001 +evalFreq = 200 +nbInstances = 1 + +# ------------------- +# Agent definition +# ------------------- +include("agents_heterogeneous.jl") + +# ------------------- +# Value Heuristic definition +# ------------------- + +chosen_features = Dict( + "constraint_activity" => false, + "constraint_type" => true, + "nb_involved_constraint_propagation" => false, + "nb_not_bounded_variable" => false, + "variable_domain_size" => false, + "variable_initial_domain_size" => true, + "variable_is_bound" => false, + "values_onehot" => false, + "values_raw" => true, +) + + +learnedHeuristic = SeaPearl.SimpleLearnedHeuristic{SR_default, rewardType, SeaPearl.FixedOutput}(agent) +learnedHeuristic2 = SeaPearl.SimpleLearnedHeuristic{SR_heterogeneous, rewardType, SeaPearl.FixedOutput}(agent2; chosen_features=chosen_features) + +# Basic value-selection heuristic +include("nearest_heuristic.jl") +nearest_heuristic = SeaPearl.BasicHeuristic(select_nearest_neighbor) # Basic value-selection heuristic +nbRandomHeuristics = 0 +valueSelectionArray = [learnedHeuristic, learnedHeuristic2, nearest_heuristic] + +# ------------------- +# Variable Heuristic definition +# ------------------- +struct TsptwVariableSelection{TakeObjective} <: SeaPearl.AbstractVariableSelection{TakeObjective} end +TsptwVariableSelection(;take_objective=false) = TsptwVariableSelection{take_objective}() +function (::TsptwVariableSelection{false})(cpmodel::SeaPearl.CPModel; rng=nothing) + for i in 1:length(keys(cpmodel.variables)) + if haskey(cpmodel.variables, "a_"*string(i)) && !SeaPearl.isbound(cpmodel.variables["a_"*string(i)]) + return cpmodel.variables["a_"*string(i)] + end + end +end +variableSelection = TsptwVariableSelection() + +# ------------------- +# ------------------- +# Core function +# ------------------- +# ------------------- + +function trytrain(nbEpisodes::Int) + experienceTime = now() + dir = mkdir(string("exp_", Base.replace("$(round(experienceTime, Dates.Second(3)))", ":" => "-"))) + expParameters = Dict( + :experimentParameters => Dict( + :nbEpisodes => nbEpisodes, + :evalFreq => evalFreq, + :nbInstances => nbInstances, + ), + :generatorParameters => Dict( + :nCity => n_city, + :gridSize => grid_size, + :maxTwGap => max_tw_gap, + :maxTw => max_tw, + ), + :nbRandomHeuristics => nbRandomHeuristics, + :Featurization => Dict( + :featurizationType => featurizationType, + :chosen_features => chosen_features + ), + :learnerParameters => Dict( + :model => string(agent.policy.learner.approximator.model), + :gamma => agent.policy.learner.sampler.γ, + :batch_size => agent.policy.learner.sampler.batch_size, + :update_horizon => agent.policy.learner.sampler.n, + :min_replay_history => agent.policy.learner.min_replay_history, + :update_freq => agent.policy.learner.update_freq, + :target_update_freq => agent.policy.learner.target_update_freq, + ), + :explorerParameters => Dict( + :ϵ_stable => agent.policy.explorer.ϵ_stable, + :decay_steps => agent.policy.explorer.decay_steps, + ), + :trajectoryParameters => Dict( + :trajectoryType => typeof(agent.trajectory), + :capacity => trajectory_capacity + ), + :reward => rewardType + ) + open(dir * "/params.json", "w") do file + JSON.print(file, expParameters) + end + cp("graphcoloring_heterogeneous.jl", dir*"/graphcoloring_heterogeneous.jl") + cp("agents_heterogeneous.jl", dir*"/agents_heterogeneous.jl") + metricsArray, eval_metricsArray = SeaPearl.train!( + valueSelectionArray=valueSelectionArray, + generator=tsptw_generator, + nbEpisodes=nbEpisodes, + strategy=SeaPearl.DFSearch(), + variableHeuristic=variableSelection, + out_solver=true, + verbose=true, + evaluator=SeaPearl.SameInstancesEvaluator(valueSelectionArray, tsptw_generator; evalFreq=evalFreq, nbInstances=nbInstances), + restartPerInstances=1 + ) + + #saving model weights + trained_weights = params(agent.policy.learner.approximator.model) + @save dir * "/model_weights_tsptw_$(n_city).bson" trained_weights + + SeaPearlExtras.storedata(metricsArray[1]; filename=dir * "/tsptw_$(n_city)_training") + SeaPearlExtras.storedata(metricsArray[2]; filename=dir * "/tsptw_$(n_city)_training2") + SeaPearlExtras.storedata(eval_metricsArray[:, 1]; filename=dir * "/tsptw_$(n_city)_trained") + SeaPearlExtras.storedata(eval_metricsArray[:, 2]; filename=dir * "/tsptw_$(n_city)_trained2") + SeaPearlExtras.storedata(eval_metricsArray[:, 3]; filename=dir * "/tsptw_$(n_city)_min") + + return metricsArray, eval_metricsArray +end + + + +# ------------------- +# ------------------- + +metricsArray, eval_metricsArray = trytrain(nbEpisodes) +nothing