Skip to content
Snippets Groups Projects
Commit 849f24f5 authored by Pat Alt's avatar Pat Alt
Browse files

uh

parent fb3dd417
No related branches found
No related tags found
No related merge requests found
......@@ -141,7 +141,7 @@ chosen_data <- c(
"MNIST",
"GMSC"
)
tab_i <- tab_valid
tab_i <- tab
# Logic:
tab_i <- tab_i[variable %in% measures]
......@@ -155,7 +155,7 @@ col_names <- c(
rep(measure_names,length(chosen_data))
)
caption <- sprintf(
"Results for %s datasets: sample averages +/- one standard deviation over all valid counterfactuals. Best outcomes are highlighted in bold. Asterisks indicate that the given value is more than one (*) or two (**) standard deviations away from the baseline (Wachter). \\label{tab:results-%s} \\newline",
"Results for %s datasets: sample averages +/- one standard deviation across counterfactuals. Best outcomes are highlighted in bold. Asterisks indicate that the given value is more than one (*) or two (**) standard deviations away from the baseline (Wachter). \\label{tab:results-%s} \\newline",
chosen_source,
chosen_source
)
......@@ -192,7 +192,7 @@ chosen_data <- c(
"Moons",
"Circles"
)
tab_i <- tab_valid
tab_i <- tab
# Logic:
tab_i <- tab_i[variable %in% measures]
......@@ -206,7 +206,7 @@ col_names <- c(
rep(measure_names,length(chosen_data))
)
caption <- sprintf(
"Results for %s datasets: sample averages +/- one standard deviation over all valid counterfactuals. Best outcomes are highlighted in bold. Asterisks indicate that the given value is more than one (*) or two (**) standard deviations away from the baseline (Wachter). \\label{tab:results-%s} \\newline",
"Results for %s datasets: sample averages +/- one standard deviation across counterfactuals. Best outcomes are highlighted in bold. Asterisks indicate that the given value is more than one (*) or two (**) standard deviations away from the baseline (Wachter). \\label{tab:results-%s} \\newline",
chosen_source,
chosen_source
)
......
\begin{table}
\caption{Results for real-world datasets: sample averages +/- one standard deviation over all valid counterfactuals. Best outcomes are highlighted in bold. Asterisks indicate that the given value is more than one (*) or two (**) standard deviations away from the baseline (Wachter). \label{tab:results-real-world} \newline}
\caption{Results for real-world datasets: sample averages +/- one standard deviation across counterfactuals. Best outcomes are highlighted in bold. Asterisks indicate that the given value is more than one (*) or two (**) standard deviations away from the baseline (Wachter). \label{tab:results-real-world} \newline}
\centering
\resizebox{\linewidth}{!}{
\begin{tabular}[t]{llcccc}
......@@ -9,37 +9,37 @@
\cmidrule(l{3pt}r{3pt}){3-4} \cmidrule(l{3pt}r{3pt}){5-6}
Model & Generator & Unfaithfulness ↓ & Implausibility ↓ & Unfaithfulness ↓ & Implausibility ↓\\
\midrule
& ECCCo & \textbf{19.27 ± 5.02}** & 314.54 ± 32.54*\hphantom{*} & \textbf{79.16 ± 11.67}** & 18.26 ± 4.92**\\
& ECCCo & \textbf{19.28 ± 5.01}** & 314.76 ± 32.36*\hphantom{*} & \textbf{79.16 ± 11.67}** & 18.26 ± 4.92**\\
& REVISE & 188.54 ± 26.22*\hphantom{*} & \textbf{254.32 ± 41.55}** & 186.40 ± 28.06\hphantom{*}\hphantom{*} & \textbf{5.34 ± 2.38}**\\
& REVISE & 188.70 ± 26.18*\hphantom{*} & \textbf{255.26 ± 41.50}** & 186.40 ± 28.06\hphantom{*}\hphantom{*} & \textbf{5.34 ± 2.38}**\\
& Schut & 199.70 ± 28.43\hphantom{*}\hphantom{*} & 273.01 ± 39.60** & 200.98 ± 28.49\hphantom{*}\hphantom{*} & 6.50 ± 2.01**\\
& Schut & 211.00 ± 27.21\hphantom{*}\hphantom{*} & 286.61 ± 39.85*\hphantom{*} & 200.98 ± 28.49\hphantom{*}\hphantom{*} & 6.50 ± 2.01**\\
\multirow{-4}{*}{\raggedright\arraybackslash JEM} & Wachter & 222.81 ± 26.22\hphantom{*}\hphantom{*} & 361.38 ± 39.55\hphantom{*}\hphantom{*} & 214.08 ± 45.35\hphantom{*}\hphantom{*} & 61.04 ± 2.58\hphantom{*}\hphantom{*}\\
\multirow{-4}{*}{\raggedright\arraybackslash JEM} & Wachter & 222.90 ± 26.56\hphantom{*}\hphantom{*} & 361.88 ± 39.74\hphantom{*}\hphantom{*} & 214.08 ± 45.35\hphantom{*}\hphantom{*} & 61.04 ± 2.58\hphantom{*}\hphantom{*}\\
\cmidrule{1-6}
& ECCCo & \textbf{15.99 ± 3.06}** & 294.72 ± 30.75** & \textbf{83.28 ± 13.26}** & 17.21 ± 4.46**\\
& REVISE & 173.05 ± 20.38** & \textbf{246.20 ± 37.74}** & 194.24 ± 35.41\hphantom{*}\hphantom{*} & \textbf{4.95 ± 1.26}**\\
& REVISE & 173.59 ± 20.65** & \textbf{246.32 ± 37.46}** & 194.24 ± 35.41\hphantom{*}\hphantom{*} & \textbf{4.95 ± 1.26}**\\
& Schut & 186.91 ± 22.98*\hphantom{*} & 264.68 ± 37.58** & 208.45 ± 34.60\hphantom{*}\hphantom{*} & 6.12 ± 1.91**\\
& Schut & 205.33 ± 24.07\hphantom{*}\hphantom{*} & 287.39 ± 39.33*\hphantom{*} & 208.45 ± 34.60\hphantom{*}\hphantom{*} & 6.12 ± 1.91**\\
\multirow{-4}{*}{\raggedright\arraybackslash JEM Ensemble} & Wachter & 217.37 ± 23.93\hphantom{*}\hphantom{*} & 362.91 ± 39.40\hphantom{*}\hphantom{*} & 186.19 ± 33.88\hphantom{*}\hphantom{*} & 60.70 ± 44.32\hphantom{*}\hphantom{*}\\
\multirow{-4}{*}{\raggedright\arraybackslash JEM Ensemble} & Wachter & 217.67 ± 23.78\hphantom{*}\hphantom{*} & 363.23 ± 39.24\hphantom{*}\hphantom{*} & 186.19 ± 33.88\hphantom{*}\hphantom{*} & 60.70 ± 44.32\hphantom{*}\hphantom{*}\\
\cmidrule{1-6}
& ECCCo & \textbf{41.95 ± 6.50}** & 591.58 ± 36.24\hphantom{*}\hphantom{*} & \textbf{75.93 ± 14.27}** & 17.20 ± 3.15\hphantom{*}\hphantom{*}\\
& ECCCo & \textbf{41.95 ± 6.50}** & 591.58 ± 36.24\hphantom{*}\hphantom{*} & \textbf{75.93 ± 14.27}** & 17.20 ± 3.15**\\
& REVISE & 365.69 ± 14.90*\hphantom{*} & 245.36 ± 39.69** & 196.75 ± 41.25\hphantom{*}\hphantom{*} & \textbf{4.84 ± 0.60}**\\
& REVISE & 365.82 ± 15.35*\hphantom{*} & \textbf{249.49 ± 41.55}** & 196.75 ± 41.25\hphantom{*}\hphantom{*} & \textbf{4.84 ± 0.60}**\\
& Schut & 371.12 ± 19.99\hphantom{*}\hphantom{*} & \textbf{245.11 ± 35.72}** & 212.00 ± 41.15\hphantom{*}\hphantom{*} & 6.44 ± 1.34\hphantom{*}\hphantom{*}\\
& Schut & 382.44 ± 17.81\hphantom{*}\hphantom{*} & 285.98 ± 42.48*\hphantom{*} & 212.00 ± 41.15\hphantom{*}\hphantom{*} & 6.44 ± 1.34**\\
\multirow{-4}{*}{\raggedright\arraybackslash MLP} & Wachter & 384.76 ± 16.52\hphantom{*}\hphantom{*} & 359.21 ± 42.03\hphantom{*}\hphantom{*} & 184.03 ± 48.16\hphantom{*}\hphantom{*} & 7.49 ± 0.89\hphantom{*}\hphantom{*}\\
\multirow{-4}{*}{\raggedright\arraybackslash MLP} & Wachter & 386.05 ± 16.60\hphantom{*}\hphantom{*} & 361.83 ± 42.18\hphantom{*}\hphantom{*} & 218.34 ± 53.26\hphantom{*}\hphantom{*} & 45.84 ± 39.39\hphantom{*}\hphantom{*}\\
\cmidrule{1-6}
& ECCCo & \textbf{31.43 ± 3.91}** & 490.88 ± 27.19\hphantom{*}\hphantom{*} & \textbf{73.86 ± 14.63}** & 17.92 ± 4.17\hphantom{*}\hphantom{*}\\
& ECCCo & \textbf{31.43 ± 3.91}** & 490.88 ± 27.19\hphantom{*}\hphantom{*} & \textbf{73.86 ± 14.63}** & 17.92 ± 4.17**\\
& REVISE & 337.21 ± 11.68*\hphantom{*} & \textbf{244.84 ± 37.17}** & 207.21 ± 43.20\hphantom{*}\hphantom{*} & \textbf{5.78 ± 2.10}**\\
& REVISE & 337.74 ± 11.89*\hphantom{*} & \textbf{247.67 ± 38.36}** & 207.21 ± 43.20\hphantom{*}\hphantom{*} & \textbf{5.78 ± 2.10}**\\
& Schut & 344.60 ± 13.64*\hphantom{*} & 252.53 ± 37.92** & 205.36 ± 32.11\hphantom{*}\hphantom{*} & 7.00 ± 2.15*\hphantom{*}\\
& Schut & 359.54 ± 14.52\hphantom{*}\hphantom{*} & 283.99 ± 41.08*\hphantom{*} & 205.36 ± 32.11\hphantom{*}\hphantom{*} & 7.00 ± 2.15**\\
\multirow{-4}{*}{\raggedright\arraybackslash MLP Ensemble} & Wachter & 358.51 ± 13.18\hphantom{*}\hphantom{*} & 352.63 ± 39.93\hphantom{*}\hphantom{*} & 177.20 ± 25.86\hphantom{*}\hphantom{*} & 10.27 ± 3.21\hphantom{*}\hphantom{*}\\
\multirow{-4}{*}{\raggedright\arraybackslash MLP Ensemble} & Wachter & 360.79 ± 14.39\hphantom{*}\hphantom{*} & 357.73 ± 42.55\hphantom{*}\hphantom{*} & 213.71 ± 54.17\hphantom{*}\hphantom{*} & 73.09 ± 64.50\hphantom{*}\hphantom{*}\\
\bottomrule
\end{tabular}}
\end{table}
\begin{table}
\caption{Results for synthetic datasets: sample averages +/- one standard deviation over all valid counterfactuals. Best outcomes are highlighted in bold. Asterisks indicate that the given value is more than one (*) or two (**) standard deviations away from the baseline (Wachter). \label{tab:results-synthetic} \newline}
\caption{Results for synthetic datasets: sample averages +/- one standard deviation across counterfactuals. Best outcomes are highlighted in bold. Asterisks indicate that the given value is more than one (*) or two (**) standard deviations away from the baseline (Wachter). \label{tab:results-synthetic} \newline}
\centering
\resizebox{\linewidth}{!}{
\begin{tabular}[t]{llcccccc}
......@@ -15,21 +15,21 @@ Model & Generator & Unfaithfulness ↓ & Implausibility ↓ & Unfaithfulness ↓
& ECCCo (no EBM) & 0.16 ± 0.11\hphantom{*}\hphantom{*} & 0.34 ± 0.19\hphantom{*}\hphantom{*} & 0.91 ± 0.32\hphantom{*}\hphantom{*} & 1.71 ± 0.25\hphantom{*}\hphantom{*} & 0.70 ± 0.33\hphantom{*}\hphantom{*} & 1.30 ± 0.37\hphantom{*}\hphantom{*}\\
& REVISE & 0.15 ± 0.00** & 0.41 ± 0.01** & 0.78 ± 0.23\hphantom{*}\hphantom{*} & 1.57 ± 0.26\hphantom{*}\hphantom{*} & \textbf{0.33 ± 0.01}** & \textbf{0.64 ± 0.00}**\\
& REVISE & 0.19 ± 0.03\hphantom{*}\hphantom{*} & 0.41 ± 0.01** & 0.78 ± 0.23\hphantom{*}\hphantom{*} & 1.57 ± 0.26\hphantom{*}\hphantom{*} & \textbf{0.48 ± 0.16}*\hphantom{*} & \textbf{0.95 ± 0.32}*\hphantom{*}\\
& Schut & 0.39 ± 0.07\hphantom{*}\hphantom{*} & 0.73 ± 0.17\hphantom{*}\hphantom{*} & 0.66 ± 0.25\hphantom{*}\hphantom{*} & 1.47 ± 0.10** & 0.54 ± 0.43\hphantom{*}\hphantom{*} & 1.28 ± 0.53\hphantom{*}\hphantom{*}\\
& Schut & 0.39 ± 0.07\hphantom{*}\hphantom{*} & 0.73 ± 0.17\hphantom{*}\hphantom{*} & 0.67 ± 0.27\hphantom{*}\hphantom{*} & 1.50 ± 0.22*\hphantom{*} & 0.54 ± 0.43\hphantom{*}\hphantom{*} & 1.28 ± 0.53\hphantom{*}\hphantom{*}\\
\multirow{-6}{*}{\raggedright\arraybackslash JEM} & Wachter & 0.18 ± 0.10\hphantom{*}\hphantom{*} & 0.44 ± 0.17\hphantom{*}\hphantom{*} & 0.78 ± 0.23\hphantom{*}\hphantom{*} & 1.75 ± 0.19\hphantom{*}\hphantom{*} & 0.68 ± 0.34\hphantom{*}\hphantom{*} & 1.33 ± 0.32\hphantom{*}\hphantom{*}\\
\multirow{-6}{*}{\raggedright\arraybackslash JEM} & Wachter & 0.18 ± 0.10\hphantom{*}\hphantom{*} & 0.44 ± 0.17\hphantom{*}\hphantom{*} & 0.80 ± 0.27\hphantom{*}\hphantom{*} & 1.78 ± 0.24\hphantom{*}\hphantom{*} & 0.68 ± 0.34\hphantom{*}\hphantom{*} & 1.33 ± 0.32\hphantom{*}\hphantom{*}\\
\cmidrule{1-8}
& ECCCo & \textbf{0.29 ± 0.05}** & 0.23 ± 0.06** & 0.80 ± 0.62\hphantom{*}\hphantom{*} & 1.69 ± 0.40\hphantom{*}\hphantom{*} & 0.65 ± 0.53\hphantom{*}\hphantom{*} & 1.17 ± 0.41\hphantom{*}\hphantom{*}\\
& ECCCo (no CP) & 0.29 ± 0.05** & \textbf{0.23 ± 0.07}** & \textbf{0.79 ± 0.62}\hphantom{*}\hphantom{*} & 1.68 ± 0.42\hphantom{*}\hphantom{*} & 0.49 ± 0.35\hphantom{*}\hphantom{*} & 1.19 ± 0.44\hphantom{*}\hphantom{*}\\
& ECCCo (no CP) & 0.29 ± 0.05** & \textbf{0.23 ± 0.07}** & \textbf{0.79 ± 0.62}\hphantom{*}\hphantom{*} & 1.68 ± 0.42\hphantom{*}\hphantom{*} & \textbf{0.49 ± 0.35}\hphantom{*}\hphantom{*} & 1.19 ± 0.44\hphantom{*}\hphantom{*}\\
& ECCCo (no EBM) & 0.46 ± 0.05\hphantom{*}\hphantom{*} & 0.28 ± 0.04** & 1.34 ± 0.47\hphantom{*}\hphantom{*} & 1.68 ± 0.47\hphantom{*}\hphantom{*} & 0.84 ± 0.51\hphantom{*}\hphantom{*} & 1.23 ± 0.31\hphantom{*}\hphantom{*}\\
& REVISE & 0.52 ± 0.04\hphantom{*}\hphantom{*} & 0.41 ± 0.01\hphantom{*}\hphantom{*} & 1.45 ± 0.44\hphantom{*}\hphantom{*} & 1.64 ± 0.31\hphantom{*}\hphantom{*} & \textbf{0.06 ± 0.01}** & \textbf{0.64 ± 0.00}**\\
& REVISE & 0.56 ± 0.05\hphantom{*}\hphantom{*} & 0.41 ± 0.01\hphantom{*}\hphantom{*} & 1.45 ± 0.44\hphantom{*}\hphantom{*} & \textbf{1.64 ± 0.31}\hphantom{*}\hphantom{*} & 0.58 ± 0.52\hphantom{*}\hphantom{*} & \textbf{0.95 ± 0.32}\hphantom{*}\hphantom{*}\\
& Schut & 0.43 ± 0.06*\hphantom{*} & 0.47 ± 0.36\hphantom{*}\hphantom{*} & 1.39 ± 0.50\hphantom{*}\hphantom{*} & \textbf{1.59 ± 0.26}\hphantom{*}\hphantom{*} & 0.58 ± 0.37\hphantom{*}\hphantom{*} & 1.23 ± 0.43\hphantom{*}\hphantom{*}\\
& Schut & 0.43 ± 0.06*\hphantom{*} & 0.47 ± 0.36\hphantom{*}\hphantom{*} & 1.45 ± 0.55\hphantom{*}\hphantom{*} & 1.73 ± 0.48\hphantom{*}\hphantom{*} & 0.58 ± 0.37\hphantom{*}\hphantom{*} & 1.23 ± 0.43\hphantom{*}\hphantom{*}\\
\multirow{-6}{*}{\raggedright\arraybackslash MLP} & Wachter & 0.51 ± 0.04\hphantom{*}\hphantom{*} & 0.40 ± 0.08\hphantom{*}\hphantom{*} & 1.32 ± 0.41\hphantom{*}\hphantom{*} & 1.69 ± 0.32\hphantom{*}\hphantom{*} & 0.83 ± 0.50\hphantom{*}\hphantom{*} & 1.24 ± 0.29\hphantom{*}\hphantom{*}\\
\bottomrule
......
No preview for this file type
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment