tables.Rmd

library(data.table)
library(kableExtra)
files <- list.files("artifacts/results/")
dt <- lapply(files[grepl("_benchmark.csv", files)], function(x) {
    fread(file.path("artifacts/results/", x))
})
dt <- Reduce(function(x,y) {rbind(x,y, fill=TRUE)}, dt)
dt[,ce:=NULL]
synth <- c("Moons", "Circles", "Linearly Separable")
dt[,source:=ifelse(dataname %in% synth, "synthetic", "real-world")]
tab <- dt[
    ,
    .(
      value=sprintf("%1.2f (%1.2f)", mean(value), sd(value)),
      val = mean(value),
      std = sd(value)
    ),
    .(dataname, generator, model, variable, source)
]

tab$top_val = F
tab$one_std_wachter = F
tab$two_std_wachter = F

# Measures to be minimized:
min_measures <- c(
  "distance",
  "distance_from_energy",
  "distance_from_targets",
  "set_size_penalty"
)
tab[variable %in% min_measures,top_val:=val==min(val),.(model, dataname, variable)]
tab[variable %in% min_measures,top_val:=ifelse(rep(all(top_val),length(top_val)),F,top_val),.(model, dataname, variable)]
tab[variable %in% min_measures,two_std_wachter:=val+2*std<val[generator=="Wachter"],.(model, dataname, variable)]
tab[variable %in% min_measures,one_std_wachter:=val+1*std<val[generator=="Wachter"],.(model, dataname, variable)]

# Measures to be maximized:
max_measures <- c(
  "validity",
  "redundancy"
)
tab[variable %in% max_measures,top_val:=val==max(val),.(model, dataname, variable)]
tab[variable %in% max_measures,top_val:=ifelse(rep(all(top_val),length(top_val)),F,top_val),.(model, dataname, variable)]
tab[variable %in% max_measures,two_std_wachter:=val-2*std>val[generator=="Wachter"],.(model, dataname, variable)]
tab[variable %in% max_measures,one_std_wachter:=val-1*std>val[generator=="Wachter"],.(model, dataname, variable)]

# Add conditional formatting:
tab[one_std_wachter==T,value:=paste0(value,"*")]
tab[two_std_wachter==T,value:=paste0(value,"*")]
tab$value <- cell_spec(tab$value, "latex", bold=tab$top_val)

# Remove redundant columns:
tab[,val:=NULL]
tab[,std:=NULL]
tab[,top_val:=NULL]
tab[,two_std_wachter:=NULL]
tab[,one_std_wachter:=NULL]
tab_full <- dcast(tab, dataname + model + generator ~ variable)
col_names <- c(
  "Model",
  "Data",
  "Generator",
  "Cost ↓",
  "Unfaithfulness ↓",
  "Implausibility ↓",
  "Redundancy ↑",
  "Uncertainty ↓",
  "Validity ↑"
)
kbl(
  tab_full, caption = "All results for all datasets. Standard deviations across samples are shown in parentheses. Best outcomes are highlighted in bold. Asterisks indicate that the given value is more than one (*) or two (**) standard deviations away from the baseline (Wachter). \\label{tab:results-full} \\newline",
  align = "c", col.names=col_names, booktabs = F, escape=F,
  format="latex"
) %>%
  kable_styling(latex_options = c("scale_down")) %>%
  kable_paper(full_width = F) %>%
  column_spec(1, bold = T) %>%
  collapse_rows(columns = 1:3, latex_hline = "custom", valign = "middle", custom_latex_hline = 1:2) %>%
  save_kable("paper/contents/table_all.tex")
# Choices:
measures <- c("distance_from_energy", "distance_from_targets")
measure_names <- c("Unfaithfulness ↓","Implausibility ↓")
chosen_source <- "real-world"
chosen_data <- c(
  "MNIST",
  "GMSC"
)
tab_i <- tab

# Logic:
tab_i <- tab_i[variable %in% measures]
tab_i <- tab_i[source == chosen_source]
tab_i <- tab_i[dataname %in% chosen_data]
tab_i[,dataname:=factor(dataname,levels=chosen_data)]
tab_i <- dcast(tab_i, model + generator ~ dataname + variable)
col_names <- c(
  "Model",
  "Generator",
  rep(measure_names,length(chosen_data))
)
caption <- sprintf(
  "Results for %s datasets. Standard deviations across samples are shown in parentheses. Best outcomes are highlighted in bold. Asterisks indicate that the given value is more than one (*) or two (**) standard deviations away from the baseline (Wachter). \\label{tab:results-%s} \\newline",
  chosen_source,
  chosen_source
)
file_name <- sprintf(
  "paper/contents/table-%s.tex",
  chosen_source
)
sub_header <- rep(length(measures), length(chosen_data))
names(sub_header) <- chosen_data
header <- c(
  " " = 2, sub_header
)
line_sep <- c(rep("",length(measures)-1),"\\addlinespace")
kbl(
  tab_i, caption = caption,
  align = "c", col.names=col_names, booktabs = T, escape=F,
  format="latex", linesep = line_sep
) %>%
  kable_styling(latex_options = c("scale_down")) %>%
  kable_paper(full_width = F) %>%
  add_header_above(header) %>%
  column_spec(1, bold = T) %>%
  collapse_rows(columns = 1:2, latex_hline = "major", valign = "middle") %>%
  save_kable(file_name)
# Choices:
measures <- c("distance_from_energy", "distance_from_targets")
measure_names <- c("Unfaithfulness ↓","Implausibility ↓")
chosen_source <- "synthetic"
chosen_data <- c(
  "Linearly Separable",
  "Moons",
  "Circles"
)
tab_i <- tab

# Logic:
tab_i <- tab_i[variable %in% measures]
tab_i <- tab_i[source == chosen_source]
tab_i <- tab_i[dataname %in% chosen_data]
tab_i[,dataname:=factor(dataname,levels=chosen_data)]
tab_i <- dcast(tab_i, model + generator ~ dataname + variable)
col_names <- c(
  "Model",
  "Generator",
  rep(measure_names,length(chosen_data))
)
caption <- sprintf(
  "Results for %s datasets. Standard deviations across samples are shown in parentheses. Best outcomes are highlighted in bold. Asterisks indicate that the given value is more than one (*) or two (**) standard deviations away from the baseline (Wachter). \\label{tab:results-%s} \\newline",
  chosen_source,
  chosen_source
)
file_name <- sprintf(
  "paper/contents/table-%s.tex",
  chosen_source
)
sub_header <- rep(length(measures), length(chosen_data))
names(sub_header) <- chosen_data
header <- c(
  " " = 2, sub_header
)
line_sep <- c(rep("",length(measures)-1),"\\addlinespace")
kbl(
  tab_i, caption = caption,
  align = "c", col.names=col_names, booktabs = T, escape=F,
  format="latex", linesep = line_sep
) %>%
  kable_styling(latex_options = c("scale_down")) %>%
  kable_paper(full_width = F) %>%
  add_header_above(header) %>%
  column_spec(1, bold = T) %>%
  collapse_rows(columns = 1:2, latex_hline = "major", valign = "middle") %>%
  save_kable(file_name)