diff --git a/.vscode/settings.json b/.vscode/settings.json index f848dd07ae86a8a83ef94cfaa2a627725db817ac..7a73a41bfdf76d6f793007240d80983a52f15f97 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,2 @@ { - "julia.environmentPath": "/Users/paltmeyer/code/ECCCo.jl" } \ No newline at end of file diff --git a/experiments/grid_search.jl b/experiments/grid_search.jl index 967cc8a8b5961725c7e3be1059fbf8c372880b0e..0c11b9f293b365bb4e833ab7fb317090ea2bf1fb 100644 --- a/experiments/grid_search.jl +++ b/experiments/grid_search.jl @@ -192,11 +192,6 @@ function best_absolute_outcome( higher_is_better = [var ∈ ["validity", "redundancy"] for var in evaluation.variable] evaluation.value[higher_is_better] .= -evaluation.value[higher_is_better] - # # Normalise to allow for comparison across measures: - # evaluation = - # groupby(evaluation, [:dataname, :variable]) |> - # x -> transform(x, :value => standardize => :value) - # Reconstruct outcome with normalised values: bmk = CounterfactualExplanations.Evaluation.Benchmark(evaluation) outcome = ExperimentOutcome(exper, model_dict, generator_dict, bmk) @@ -229,6 +224,13 @@ best_absolute_outcome_eccco(outcomes; kwrgs...) = best_absolute_outcome_eccco_Δ(outcomes; kwrgs...) = best_absolute_outcome(outcomes; generator = ECCCo_Δ_NAMES, kwrgs...) +""" + best_outcome(outcomes) + +The best outcome is chosen as follows: choose the outcome with the minium average unfaithfulness (`distance_from_energy_l2`) aggregated across all ECCCo generators (`ECCCo_Δ_NAMES`) for the weakest models (`MLP` and `MLP Ensemble`). +""" +best_outcome(outcomes; measure=["distance_from_energy_l2"]) = best_absolute_outcome(outcomes; generator=ECCCo_Δ_NAMES, measure=measure, model=["MLP", "MLP Ensemble"]) + """ append_best_params!(params::NamedTuple, dataname::String) @@ -252,7 +254,8 @@ function append_best_params!(params::NamedTuple, dataname::String) "$(replace(lowercase(dataname), " " => "_")).jls", ), ) - best_params = best_absolute_outcome_eccco_Δ(grid_search_results).params + best_params = best_outcome(grid_search_results).params params = (; params..., best_params...) + @info "Best parameters: $(best_params)" end end diff --git a/experiments/post_processing/hypothesis_tests.jl b/experiments/post_processing/hypothesis_tests.jl new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/experiments/post_processing/results.jl b/experiments/post_processing/results.jl index 81f77140f85ca6d72491041d9bac2d48414803e1..81d291cb291d0f5cb50bab8e4f5695b5370c0e7e 100644 --- a/experiments/post_processing/results.jl +++ b/experiments/post_processing/results.jl @@ -13,7 +13,7 @@ function summarise_outcome( measure = isnothing(measure) ? unique(bmk().variable) : measure df = bmk() # If the :run column is missing (single runs), add it: - if !(:run ∈ names(df)) + if !("run" ∈ names(df)) df.run .= 1 end # Aggregate per run: