diff --git a/experiments/grid_search.jl b/experiments/grid_search.jl index b4d5bf37ed511762be4b367b5383a04a0f22dc8a..79cf2573f7ca9cd9788a8ffd20d6e900a9a0a99f 100644 --- a/experiments/grid_search.jl +++ b/experiments/grid_search.jl @@ -45,6 +45,32 @@ function grid_search( # Save: if !(is_multi_processed(PLZ) && MPI.Comm_rank(PLZ.comm) != 0) - Serialization.serialize(joinpath(grid_search_path, "$(replace(lowercase(dataname), " " => "_")).jls"), outcomes) + Serialization.serialize(joinpath(grid_search_path, "$(replace(lowercase(dataname), " " => "_"))_best.jls"), best_outcome(outcomes)) + Serialization.serialise(joinpath(grid_search_path, "$(replace(lowercase(dataname), " " => "_"))_best_eccco.jls"), best_eccco(outcomes)) + Serialization.serialise(joinpath(grid_search_path, "$(replace(lowercase(dataname), " " => "_"))_best_eccco_Δ.jls"), best_eccco_Δ(outcomes)) end -end \ No newline at end of file +end + +""" + best_outcome(outcomes; generator=["ECCCo", "ECCCo-Δ"], measure=["distance_from_energy", "distance_from_targets"]) + +Returns the best outcome from grid search results. The best outcome is defined as the one with the lowest average rank across all datasets and variables for the specified generator and measure. +""" +function best_outcome(outcomes; generator=["ECCCo", "ECCCo-Δ"], measure=["distance_from_energy", "distance_from_targets"]) + ranks = map(outcomes) do outcome + ranks = avg_generator_rank(outcome; generator=generator, measure=measure) |> + x -> x.avg_rank |> + x -> sum(x) / length(x)[1] + return ranks + end + best_index = argmin(values(ranks)) + best_outcome = ( + params = keys(ranks)[best_index], + outcome = outcomes[best_index] + ) + return best_outcome +end + +best_eccco(outcomes) = best_outcome(outcomes; generator=["ECCCo"], measure=["distance_from_energy", "distance_from_targets"]) + +best_eccco_Δ(outcomes) = best_outcome(outcomes; generator=["ECCCo-Δ"], measure=["distance_from_energy", "distance_from_targets"]) \ No newline at end of file diff --git a/experiments/post_processing/results.jl b/experiments/post_processing/results.jl index eff3de9e41714329489ec9a8f753f490965080ca..8b5606786eef95cd1b0dcb8c7e6a27c63aecfa95 100644 --- a/experiments/post_processing/results.jl +++ b/experiments/post_processing/results.jl @@ -3,15 +3,18 @@ Helper function to quickly filter a benchmark table for the distance from targets: the smaller this distance, the higher the plausibility. """ -function summarise_outcome(outcome::ExperimentOutcome; measure="distance_from_targets", model::Union{Nothing,String}=nothing) +function summarise_outcome(outcome::ExperimentOutcome; measure::Union{Nothing,AbstractArray}=nothing, model::Union{Nothing,String}=nothing) + bmk = outcome.bmk + measure = isnothing(measure) ? unique(bmk().variable) : measure + df = groupby(bmk(), [:dataname, :generator, :model, :variable]) |> x -> combine(x, :value => mean => :mean, :value => std => :std) |> - x -> subset(x, :variable => ByRow(x -> x ==measure)) + x -> subset(x, :variable => ByRow(x -> x ∈ measure)) if !isnothing(model) df = subset(df, :model => ByRow(x -> x == model)) end - sort!(df, [:model, :mean]) + sort!(df, [:model, :variable, :mean]) return df end @@ -20,7 +23,7 @@ end Helper function to quickly filter a benchmark table for the distance from targets: the smaller this distance, the higher the plausibility. """ -plausibility(outcome::ExperimentOutcome; kwrgs...) = summarise_outcome(outcome, measure="distance_from_targets", kwrgs...) +plausibility(outcome::ExperimentOutcome; kwrgs...) = summarise_outcome(outcome, measure=["distance_from_targets"], kwrgs...) """ @@ -28,11 +31,51 @@ plausibility(outcome::ExperimentOutcome; kwrgs...) = summarise_outcome(outcome, Helper function to quickly filter a benchmark table for the distance from energy: the smaller this distance, the higher the faithfulness. """ -faithfulness(outcome::ExperimentOutcome; kwrgs...) = summarise_outcome(outcome, measure="distance_from_energy", kwrgs...) +faithfulness(outcome::ExperimentOutcome; kwrgs...) = summarise_outcome(outcome, measure=["distance_from_energy"], kwrgs...) """ closeness(outcome::ExperimentOutcome) Helper function to quickly filter a benchmark table for the distance from the factual: the smaller this distance, the higher the closeness desideratum. """ -closeness(outcome::ExperimentOutcome; kwrgs...) = summarise_outcome(outcome, measure="distance", kwrgs...) \ No newline at end of file +closeness(outcome::ExperimentOutcome; kwrgs...) = summarise_outcome(outcome, measure=["distance"], kwrgs...) + +""" + generator_rank(outcome::ExperimentOutcome; generator::Union{AbstractArray,Nothing}=nothing, measure::Union{AbstractArray,Nothing}=nothing, model::Union{Nothing,String}=nothing) + +Computes the average rank of a generator across all datasets and variables. +""" +function generator_rank( + outcome::ExperimentOutcome; + generator::Union{AbstractArray,Nothing}=nothing, + measure::Union{AbstractArray,Nothing}=nothing, + model::Union{Nothing,String}=nothing +) + + # Setup: + generator = isnothing(generator) ? collect(keys(outcome.generator_dict)) : generator + bmk = outcome.bmk + measure = isnothing(measure) ? unique(bmk().variable) : measure + + # Compute: + results = summarise_outcome(outcome, measure=measure, model=model) + # Adjust variables for which higher is better: + higher_is_better = [var ∈ ["validity", "redundancy"] for var in results.variable] + results.mean[higher_is_better] .= - results.mean[higher_is_better] + # Compute ranks: + ranked_results = groupby(results, [:dataname, :model, :variable]) |> + x -> combine(x, :mean => sortperm => :rank, :generator) |> + x -> subset(x, :generator => ByRow(x -> x ∈ generator)) |> + x -> groupby(x, [:dataname, :generator, :variable]) |> + x -> combine(x, :rank => mean => :avg_rank) |> + x -> subset(x, :variable => ByRow(x -> x ∈ measure)) + sort!(ranked_results, [:variable, :avg_rank]) + return ranked_results +end + +generator_rank_plausibility(outcome::ExperimentOutcome; kwrgs...) = generator_rank(outcome, measure=["distance_from_targets"], kwrgs...) + +generator_rank_faithfulness(outcome::ExperimentOutcome; kwrgs...) = generator_rank(outcome, measure=["distance_from_energy"], kwrgs...) + +generator_rank_closeness(outcome::ExperimentOutcome; kwrgs...) = generator_rank(outcome, measure=["distance"], kwrgs...) +